Mercurial > projects > dil

--- a/trunk/src/cmd/Generate.d	Fri Jan 04 23:00:38 2008 +0100
+++ b/trunk/src/cmd/Generate.d	Sat Jan 05 16:53:27 2008 +0100
@@ -5,7 +5,7 @@
 module cmd.Generate;
 import dil.SyntaxTree;
 import dil.Token;
-import dil.Parser, dil.Lexer;
+import dil.Parser, dil.lexer.Lexer;
 import dil.File;
 import tango.io.Print;
 import common;
--- a/trunk/src/cmd/ImportGraph.d	Fri Jan 04 23:00:38 2008 +0100
+++ b/trunk/src/cmd/ImportGraph.d	Sat Jan 05 16:53:27 2008 +0100
@@ -6,7 +6,6 @@
 import dil.SyntaxTree;
 import dil.Declarations;
 import dil.Token;
-import dil.Parser, dil.Lexer;
 import dil.File;
 import dil.Module;
 import dil.Settings;
@@ -37,7 +36,7 @@
   string modulePath;
   foreach (path; importPaths)
   {
-    modulePath = path ~ (path[$-1] == dirSep ? "" : [dirSep]) ~ moduleFQN ~ ".d";
+    modulePath = path ~ dirSep ~ moduleFQN ~ ".d";
     // TODO: also check for *.di?
     if ((new FilePath(modulePath)).exists())
       return modulePath;
--- a/trunk/src/cmd/Statistics.d	Fri Jan 04 23:00:38 2008 +0100
+++ b/trunk/src/cmd/Statistics.d	Sat Jan 05 16:53:27 2008 +0100
@@ -5,7 +5,7 @@
 module cmd.Statistics;
 import dil.Token;
 import dil.File;
-import dil.Lexer;
+import dil.lexer.Lexer;
 import common;

 struct Statistics
--- a/trunk/src/dil/Lexer.d	Fri Jan 04 23:00:38 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2861 +0,0 @@
-/++
-  Author: Aziz Köksal
-  License: GPL3
-+/
-module dil.Lexer;
-import dil.Token;
-import dil.Information;
-import dil.Keywords;
-import dil.Identifier;
-import dil.Messages;
-import dil.HtmlEntities;
-import dil.CompilerInfo;
-import dil.IdTable;
-import dil.Unicode;
-import tango.stdc.stdlib : strtof, strtod, strtold;
-import tango.stdc.errno : errno, ERANGE;
-import tango.stdc.time : time_t, time, ctime;
-import tango.stdc.string : strlen;
-import common;
-
-public import dil.LexerFuncs;
-
-/++
-  The Lexer analyzes the characters of a source text and
-  produces a doubly-linked list of tokens.
-+/
-class Lexer
-{
-  Token* head;      /// The head of the doubly linked token list.
-  Token* tail;      /// The tail of the linked list. Set in scan().
-  Token* token;     /// Points to the current token in the token list.
-  string text;      /// The source text.
-  char[] filePath;  /// Path to the source text.
-  char* p;          /// Points to the current character in the source text.
-  char* end;        /// Points one character past the end of the source text.
-
-  // Members used for error messages:
-  InfoManager infoMan;
-  LexerError[] errors;
-  /// Always points to the beginning of the current line.
-  char* lineBegin;
-//   Token* newline;     /// Current newline token.
-  uint lineNum = 1;   /// Current, actual source text line number.
-  uint lineNum_hline; /// Line number set by #line.
-  uint inTokenString; /// > 0 if inside q{ }
-  char[] errorPath;   /// The path displayed in error messages.
-
-  /++
-    Construct a Lexer object.
-    Params:
-      text     = the UTF-8 source code.
-      filePath = the path to the source code; used for error messages.
-  +/
-  this(string text, string filePath, InfoManager infoMan = null)
-  {
-    this.filePath = this.errorPath = filePath;
-    this.infoMan = infoMan;
-
-    this.text = text;
-    if (text.length == 0 || text[$-1] != 0)
-    {
-      this.text.length = this.text.length + 1;
-      this.text[$-1] = 0;
-    }
-
-    this.p = this.text.ptr;
-    this.end = this.p + this.text.length;
-    this.lineBegin = this.p;
-
-    this.head = new Token;
-    this.head.type = TOK.HEAD;
-    this.head.start = this.head.end = this.p;
-    this.token = this.head;
-    // Add a newline as the first token after the head.
-    auto newline = new Token;
-    newline.type = TOK.Newline;
-    newline.setWhitespaceFlag();
-    newline.start = newline.end = this.p;
-    newline.filePath = this.errorPath;
-    newline.lineNum = 1;
-    newline.lineNum_hline = 0;
-    // Link in.
-    this.token.next = newline;
-    newline.prev = this.token;
-    this.token = newline;
-//     this.newline = newline;
-    scanShebang();
-  }
-
-  ~this()
-  {
-    auto token = head.next;
-    while (token !is null)
-    {
-      assert(token.type == TOK.EOF ? token == tail && token.next is null : 1);
-      delete token.prev;
-      token = token.next;
-    }
-    delete tail;
-  }
-
-  /++
-    The "shebang" may optionally appear once at the beginning of a file.
-    Regexp: #![^\EndOfLine]*
-  +/
-  void scanShebang()
-  {
-    if (*p == '#' && p[1] == '!')
-    {
-      auto t = new Token;
-      t.type = TOK.Shebang;
-      t.setWhitespaceFlag();
-      t.start = p;
-      ++p;
-      while (!isEndOfLine(++p))
-        isascii(*p) || decodeUTF8();
-      t.end = p;
-      this.token.next = t;
-      t.prev = this.token;
-    }
-  }
-
-  void finalizeSpecialToken(ref Token t)
-  {
-    assert(t.srcText[0..2] == "__");
-    switch (t.type)
-    {
-    case TOK.FILE:
-      t.str = this.errorPath;
-      break;
-    case TOK.LINE:
-      t.uint_ = this.errorLineNumber(this.lineNum);
-      break;
-    case TOK.DATE,
-         TOK.TIME,
-         TOK.TIMESTAMP:
-      time_t time_val;
-      time(&time_val);
-      char* str = ctime(&time_val);
-      char[] time_str = str[0 .. strlen(str)];
-      switch (t.type)
-      {
-      case TOK.DATE:
-        time_str = time_str[4..11] ~ time_str[20..24] ~ \0; break;
-      case TOK.TIME:
-        time_str = time_str[11..19] ~ \0; break;
-      case TOK.TIMESTAMP:
-        time_str = time_str[0..24] ~ \0; break;
-      default: assert(0);
-      }
-      t.str = time_str;
-      break;
-    case TOK.VENDOR:
-      t.str = VENDOR;
-      break;
-    case TOK.VERSION:
-      t.uint_ = VERSION_MAJOR*1000 + VERSION_MINOR;
-      break;
-    default:
-      assert(0);
-    }
-  }
-
-  private void setLineBegin(char* p)
-  {
-    // Check that we can look behind one character.
-    assert((p-1) >= text.ptr && p < end);
-    // Check that previous character is a newline.
-    assert(isNewlineEnd(p - 1));
-    this.lineBegin = p;
-  }
-
-  private void scanNext(ref Token* t)
-  {
-    assert(t !is null);
-    if (t.next)
-    {
-      t = t.next;
-//       if (t.type == TOK.Newline)
-//         this.newline = t;
-    }
-    else if (t != this.tail)
-    {
-      Token* new_t = new Token;
-      scan(*new_t);
-      new_t.prev = t;
-      t.next = new_t;
-      t = new_t;
-    }
-  }
-
-  /// Advance t one token forward.
-  void peek(ref Token* t)
-  {
-    scanNext(t);
-  }
-
-  /// Advance to the next token in the source text.
-  TOK nextToken()
-  {
-    scanNext(this.token);
-    return this.token.type;
-  }
-
-  /// Returns true if p points to the last character of a Newline.
-  bool isNewlineEnd(char* p)
-  {
-    if (*p == '\n' || *p == '\r')
-      return true;
-    if (*p == LS[2] || *p == PS[2])
-      if ((p-2) >= text.ptr)
-        if (p[-1] == LS[1] && p[-2] == LS[0])
-          return true;
-    return false;
-  }
-
-  /++
-    This is the old scan method.
-    TODO: profile old and new to see which one is faster.
-  +/
-  public void scan(ref Token t)
-  in
-  {
-    assert(text.ptr <= p && p < end);
-  }
-  out
-  {
-    assert(text.ptr <= t.start && t.start < end, Token.toString(t.type));
-    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.type));
-  }
-  body
-  {
-    // Scan whitespace.
-    if (isspace(*p))
-    {
-      t.ws = p;
-      while (isspace(*++p))
-      {}
-    }
-
-    // Scan a token.
-    uint c = *p;
-    {
-      t.start = p;
-      // Newline.
-      switch (*p)
-      {
-      case '\r':
-        if (p[1] == '\n')
-          ++p;
-      case '\n':
-        assert(isNewlineEnd(p));
-        ++p;
-        ++lineNum;
-        setLineBegin(p);
-//         this.newline = &t;
-        t.type = TOK.Newline;
-        t.setWhitespaceFlag();
-        t.filePath = this.errorPath;
-        t.lineNum = lineNum;
-        t.lineNum_hline = lineNum_hline;
-        t.end = p;
-        return;
-      default:
-        if (isUnicodeNewline(p))
-        {
-          ++p; ++p;
-          goto case '\n';
-        }
-      }
-      // Identifier or string literal.
-      if (isidbeg(c))
-      {
-        if (c == 'r' && p[1] == '"' && ++p)
-          return scanRawStringLiteral(t);
-        if (c == 'x' && p[1] == '"')
-          return scanHexStringLiteral(t);
-      version(D2)
-      {
-        if (c == 'q' && p[1] == '"')
-          return scanDelimitedStringLiteral(t);
-        if (c == 'q' && p[1] == '{')
-          return scanTokenStringLiteral(t);
-      }
-        // Scan identifier.
-      Lidentifier:
-        do
-        { c = *++p; }
-        while (isident(c) || !isascii(c) && isUnicodeAlpha())
-
-        t.end = p;
-
-        auto id = IdTable.lookup(t.srcText);
-        t.type = id.type;
-        t.ident = id;
-
-        if (t.type == TOK.Identifier || t.isKeyword)
-          return;
-        else if (t.isSpecialToken)
-          finalizeSpecialToken(t);
-        else if (t.type == TOK.EOF)
-        {
-          tail = &t;
-          assert(t.srcText == "__EOF__");
-        }
-        else
-          assert(0, "unexpected token type: " ~ Token.toString(t.type));
-        return;
-      }
-
-      if (isdigit(c))
-        return scanNumber(t);
-
-      if (c == '/')
-      {
-        c = *++p;
-        switch(c)
-        {
-        case '=':
-          ++p;
-          t.type = TOK.DivAssign;
-          t.end = p;
-          return;
-        case '+':
-          return scanNestedComment(t);
-        case '*':
-          return scanBlockComment(t);
-        case '/':
-          while (!isEndOfLine(++p))
-            isascii(*p) || decodeUTF8();
-          t.type = TOK.Comment;
-          t.setWhitespaceFlag();
-          t.end = p;
-          return;
-        default:
-          t.type = TOK.Div;
-          t.end = p;
-          return;
-        }
-      }
-
-      switch (c)
-      {
-      case '\'':
-        return scanCharacterLiteral(t);
-      case '`':
-        return scanRawStringLiteral(t);
-      case '"':
-        return scanNormalStringLiteral(t);
-      case '\\':
-        char[] buffer;
-        do
-        {
-          c = scanEscapeSequence();
-          if (isascii(c))
-            buffer ~= c;
-          else
-            encodeUTF8(buffer, c);
-        } while (*p == '\\')
-        buffer ~= 0;
-        t.type = TOK.String;
-        t.str = buffer;
-        t.end = p;
-        return;
-      case '>': /* >  >=  >>  >>=  >>>  >>>= */
-        c = *++p;
-        switch (c)
-        {
-        case '=':
-          t.type = TOK.GreaterEqual;
-          goto Lcommon;
-        case '>':
-          if (p[1] == '>')
-          {
-            ++p;
-            if (p[1] == '=')
-            { ++p;
-              t.type = TOK.URShiftAssign;
-            }
-            else
-              t.type = TOK.URShift;
-          }
-          else if (p[1] == '=')
-          {
-            ++p;
-            t.type = TOK.RShiftAssign;
-          }
-          else
-            t.type = TOK.RShift;
-          goto Lcommon;
-        default:
-          t.type = TOK.Greater;
-          goto Lcommon2;
-        }
-        assert(0);
-      case '<': /* <  <=  <>  <>=  <<  <<= */
-        c = *++p;
-        switch (c)
-        {
-        case '=':
-          t.type = TOK.LessEqual;
-          goto Lcommon;
-        case '<':
-          if (p[1] == '=') {
-            ++p;
-            t.type = TOK.LShiftAssign;
-          }
-          else
-            t.type = TOK.LShift;
-          goto Lcommon;
-        case '>':
-          if (p[1] == '=') {
-            ++p;
-            t.type = TOK.LorEorG;
-          }
-          else
-            t.type = TOK.LorG;
-          goto Lcommon;
-        default:
-          t.type = TOK.Less;
-          goto Lcommon2;
-        }
-        assert(0);
-      case '!': /* !  !<  !>  !<=  !>=  !<>  !<>= */
-        c = *++p;
-        switch (c)
-        {
-        case '<':
-          c = *++p;
-          if (c == '>')
-          {
-            if (p[1] == '=') {
-              ++p;
-              t.type = TOK.Unordered;
-            }
-            else
-              t.type = TOK.UorE;
-          }
-          else if (c == '=')
-          {
-            t.type = TOK.UorG;
-          }
-          else {
-            t.type = TOK.UorGorE;
-            goto Lcommon2;
-          }
-          goto Lcommon;
-        case '>':
-          if (p[1] == '=')
-          {
-            ++p;
-            t.type = TOK.UorL;
-          }
-          else
-            t.type = TOK.UorLorE;
-          goto Lcommon;
-        case '=':
-          t.type = TOK.NotEqual;
-          goto Lcommon;
-        default:
-          t.type = TOK.Not;
-          goto Lcommon2;
-        }
-        assert(0);
-      case '.': /* .  .[0-9]  ..  ... */
-        if (p[1] == '.')
-        {
-          ++p;
-          if (p[1] == '.') {
-            ++p;
-            t.type = TOK.Ellipses;
-          }
-          else
-            t.type = TOK.Slice;
-        }
-        else if (isdigit(p[1]))
-        {
-          return scanReal(t);
-        }
-        else
-          t.type = TOK.Dot;
-        goto Lcommon;
-      case '|': /* |  ||  |= */
-        c = *++p;
-        if (c == '=')
-          t.type = TOK.OrAssign;
-        else if (c == '|')
-          t.type = TOK.OrLogical;
-        else {
-          t.type = TOK.OrBinary;
-          goto Lcommon2;
-        }
-        goto Lcommon;
-      case '&': /* &  &&  &= */
-        c = *++p;
-        if (c == '=')
-          t.type = TOK.AndAssign;
-        else if (c == '&')
-          t.type = TOK.AndLogical;
-        else {
-          t.type = TOK.AndBinary;
-          goto Lcommon2;
-        }
-        goto Lcommon;
-      case '+': /* +  ++  += */
-        c = *++p;
-        if (c == '=')
-          t.type = TOK.PlusAssign;
-        else if (c == '+')
-          t.type = TOK.PlusPlus;
-        else {
-          t.type = TOK.Plus;
-          goto Lcommon2;
-        }
-        goto Lcommon;
-      case '-': /* -  --  -= */
-        c = *++p;
-        if (c == '=')
-          t.type = TOK.MinusAssign;
-        else if (c == '-')
-          t.type = TOK.MinusMinus;
-        else {
-          t.type = TOK.Minus;
-          goto Lcommon2;
-        }
-        goto Lcommon;
-      case '=': /* =  == */
-        if (p[1] == '=') {
-          ++p;
-          t.type = TOK.Equal;
-        }
-        else
-          t.type = TOK.Assign;
-        goto Lcommon;
-      case '~': /* ~  ~= */
-         if (p[1] == '=') {
-           ++p;
-           t.type = TOK.CatAssign;
-         }
-         else
-           t.type = TOK.Tilde;
-         goto Lcommon;
-      case '*': /* *  *= */
-         if (p[1] == '=') {
-           ++p;
-           t.type = TOK.MulAssign;
-         }
-         else
-           t.type = TOK.Mul;
-         goto Lcommon;
-      case '^': /* ^  ^= */
-         if (p[1] == '=') {
-           ++p;
-           t.type = TOK.XorAssign;
-         }
-         else
-           t.type = TOK.Xor;
-         goto Lcommon;
-      case '%': /* %  %= */
-         if (p[1] == '=') {
-           ++p;
-           t.type = TOK.ModAssign;
-         }
-         else
-           t.type = TOK.Mod;
-         goto Lcommon;
-      // Single character tokens:
-      case '(':
-        t.type = TOK.LParen;
-        goto Lcommon;
-      case ')':
-        t.type = TOK.RParen;
-        goto Lcommon;
-      case '[':
-        t.type = TOK.LBracket;
-        goto Lcommon;
-      case ']':
-        t.type = TOK.RBracket;
-        goto Lcommon;
-      case '{':
-        t.type = TOK.LBrace;
-        goto Lcommon;
-      case '}':
-        t.type = TOK.RBrace;
-        goto Lcommon;
-      case ':':
-        t.type = TOK.Colon;
-        goto Lcommon;
-      case ';':
-        t.type = TOK.Semicolon;
-        goto Lcommon;
-      case '?':
-        t.type = TOK.Question;
-        goto Lcommon;
-      case ',':
-        t.type = TOK.Comma;
-        goto Lcommon;
-      case '$':
-        t.type = TOK.Dollar;
-      Lcommon:
-        ++p;
-      Lcommon2:
-        t.end = p;
-        return;
-      case '#':
-        return scanSpecialTokenSequence(t);
-      default:
-      }
-
-      // Check for EOF
-      if (isEOF(c))
-      {
-        assert(isEOF(*p), ""~*p);
-        t.type = TOK.EOF;
-        t.end = p;
-        tail = &t;
-        assert(t.start == t.end);
-        return;
-      }
-
-      if (!isascii(c))
-      {
-        c = decodeUTF8();
-        if (isUniAlpha(c))
-          goto Lidentifier;
-      }
-
-      error(t.start, MID.IllegalCharacter, cast(dchar)c);
-
-      ++p;
-      t.type = TOK.Illegal;
-      t.setWhitespaceFlag();
-      t.dchar_ = c;
-      t.end = p;
-      return;
-    }
-  }
-
-  template toUint(char[] T)
-  {
-    static assert(0 < T.length && T.length <= 4);
-    static if (T.length == 1)
-      const uint toUint = T[0];
-    else
-      const uint toUint = (T[0] << ((T.length-1)*8)) | toUint!(T[1..$]);
-  }
-  static assert(toUint!("\xAA\xBB\xCC\xDD") == 0xAABBCCDD);
-
-  // Can't use this yet due to a bug in DMD (bug id=1534).
-  template case_(char[] str, TOK tok, char[] label)
-  {
-    const char[] case_ =
-      `case `~toUint!(str).stringof~`:
-
-         goto `~label~`;`;
-  }
-
-  template case_L4(char[] str, TOK tok)
-  {
-    const char[] case_L4 = case_!(str, tok, "Lcommon_4");
-  }
-
-  template case_L3(char[] str, TOK tok)
-  {
-    const char[] case_L3 = case_!(str, tok, "Lcommon_3");
-  }
-
-  template case_L2(char[] str, TOK tok)
-  {
-    const char[] case_L2 = case_!(str, tok, "Lcommon_2");
-  }
-
-  template case_L1(char[] str, TOK tok)
-  {
-    const char[] case_L3 = case_!(str, tok, "Lcommon");
-  }
-
-  public void scan_(ref Token t)
-  in
-  {
-    assert(text.ptr <= p && p < end);
-  }
-  out
-  {
-    assert(text.ptr <= t.start && t.start < end, Token.toString(t.type));
-    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.type));
-  }
-  body
-  {
-    // Scan whitespace.
-    if (isspace(*p))
-    {
-      t.ws = p;
-      while (isspace(*++p))
-      {}
-    }
-
-    // Scan a token.
-    t.start = p;
-    // Newline.
-    switch (*p)
-    {
-    case '\r':
-      if (p[1] == '\n')
-        ++p;
-    case '\n':
-      assert(isNewlineEnd(p));
-      ++p;
-      ++lineNum;
-      setLineBegin(p);
-//       this.newline = &t;
-      t.type = TOK.Newline;
-      t.setWhitespaceFlag();
-      t.filePath = this.errorPath;
-      t.lineNum = lineNum;
-      t.lineNum_hline = lineNum_hline;
-      t.end = p;
-      return;
-    default:
-      if (isUnicodeNewline(p))
-      {
-        ++p; ++p;
-        goto case '\n';
-      }
-    }
-
-    uint c = *p;
-    assert(end - p != 0);
-    switch (end - p)
-    {
-    case 1:
-      goto L1character;
-    case 2:
-      c <<= 8; c |= p[1];
-      goto L2characters;
-    case 3:
-      c <<= 8; c |= p[1]; c <<= 8; c |= p[2];
-      goto L3characters;
-    default:
-      version(BigEndian)
-        c = *cast(uint*)p;
-      else
-      {
-        c <<= 8; c |= p[1]; c <<= 8; c |= p[2]; c <<= 8; c |= p[3];
-        /+
-        c = *cast(uint*)p;
-        asm
-        {
-          mov EDX, c;
-          bswap EDX;
-          mov c, EDX;
-        }
-        +/
-      }
-    }
-
-    // 4 character tokens.
-    switch (c)
-    {
-    case toUint!(">>>="):
-      t.type = TOK.RShiftAssign;
-      goto Lcommon_4;
-    case toUint!("!<>="):
-      t.type = TOK.Unordered;
-    Lcommon_4:
-      p += 4;
-      t.end = p;
-      return;
-    default:
-    }
-
-    c >>>= 8;
-  L3characters:
-    assert(p == t.start);
-    // 3 character tokens.
-    switch (c)
-    {
-    case toUint!(">>="):
-      t.type = TOK.RShiftAssign;
-      goto Lcommon_3;
-    case toUint!(">>>"):
-      t.type = TOK.URShift;
-      goto Lcommon_3;
-    case toUint!("<>="):
-      t.type = TOK.LorEorG;
-      goto Lcommon_3;
-    case toUint!("<<="):
-      t.type = TOK.LShiftAssign;
-      goto Lcommon_3;
-    case toUint!("!<="):
-      t.type = TOK.UorG;
-      goto Lcommon_3;
-    case toUint!("!>="):
-      t.type = TOK.UorL;
-      goto Lcommon_3;
-    case toUint!("!<>"):
-      t.type = TOK.UorE;
-      goto Lcommon_3;
-    case toUint!("..."):
-      t.type = TOK.Ellipses;
-    Lcommon_3:
-      p += 3;
-      t.end = p;
-      return;
-    default:
-    }
-
-    c >>>= 8;
-  L2characters:
-    assert(p == t.start);
-    // 2 character tokens.
-    switch (c)
-    {
-    case toUint!("/+"):
-      ++p; // Skip /
-      return scanNestedComment(t);
-    case toUint!("/*"):
-      ++p; // Skip /
-      return scanBlockComment(t);
-    case toUint!("//"):
-      ++p; // Skip /
-      assert(*p == '/');
-      while (!isEndOfLine(++p))
-        isascii(*p) || decodeUTF8();
-      t.type = TOK.Comment;
-      t.setWhitespaceFlag();
-      t.end = p;
-      return;
-    case toUint!(">="):
-      t.type = TOK.GreaterEqual;
-      goto Lcommon_2;
-    case toUint!(">>"):
-      t.type = TOK.RShift;
-      goto Lcommon_2;
-    case toUint!("<<"):
-      t.type = TOK.LShift;
-      goto Lcommon_2;
-    case toUint!("<="):
-      t.type = TOK.LessEqual;
-      goto Lcommon_2;
-    case toUint!("<>"):
-      t.type = TOK.LorG;
-      goto Lcommon_2;
-    case toUint!("!<"):
-      t.type = TOK.UorGorE;
-      goto Lcommon_2;
-    case toUint!("!>"):
-      t.type = TOK.UorLorE;
-      goto Lcommon_2;
-    case toUint!("!="):
-      t.type = TOK.NotEqual;
-      goto Lcommon_2;
-    case toUint!(".."):
-      t.type = TOK.Slice;
-      goto Lcommon_2;
-    case toUint!("&&"):
-      t.type = TOK.AndLogical;
-      goto Lcommon_2;
-    case toUint!("&="):
-      t.type = TOK.AndAssign;
-      goto Lcommon_2;
-    case toUint!("||"):
-      t.type = TOK.OrLogical;
-      goto Lcommon_2;
-    case toUint!("|="):
-      t.type = TOK.OrAssign;
-      goto Lcommon_2;
-    case toUint!("++"):
-      t.type = TOK.PlusPlus;
-      goto Lcommon_2;
-    case toUint!("+="):
-      t.type = TOK.PlusAssign;
-      goto Lcommon_2;
-    case toUint!("--"):
-      t.type = TOK.MinusMinus;
-      goto Lcommon_2;
-    case toUint!("-="):
-      t.type = TOK.MinusAssign;
-      goto Lcommon_2;
-    case toUint!("=="):
-      t.type = TOK.Equal;
-      goto Lcommon_2;
-    case toUint!("~="):
-      t.type = TOK.CatAssign;
-      goto Lcommon_2;
-    case toUint!("*="):
-      t.type = TOK.MulAssign;
-      goto Lcommon_2;
-    case toUint!("/="):
-      t.type = TOK.DivAssign;
-      goto Lcommon_2;
-    case toUint!("^="):
-      t.type = TOK.XorAssign;
-      goto Lcommon_2;
-    case toUint!("%="):
-      t.type = TOK.ModAssign;
-    Lcommon_2:
-      p += 2;
-      t.end = p;
-      return;
-    default:
-    }
-
-    c >>>= 8;
-  L1character:
-    assert(p == t.start);
-    assert(*p == c, Format("p={0},c={1}", *p, cast(dchar)c));
-    // 1 character tokens.
-    // TODO: consider storing the token type in ptable.
-    switch (c)
-    {
-    case '\'':
-      return scanCharacterLiteral(t);
-    case '`':
-      return scanRawStringLiteral(t);
-    case '"':
-      return scanNormalStringLiteral(t);
-    case '\\':
-      char[] buffer;
-      do
-      {
-        c = scanEscapeSequence();
-        if (isascii(c))
-          buffer ~= c;
-        else
-          encodeUTF8(buffer, c);
-      } while (*p == '\\')
-      buffer ~= 0;
-      t.type = TOK.String;
-      t.str = buffer;
-      t.end = p;
-      return;
-    case '<':
-      t.type = TOK.Greater;
-      goto Lcommon;
-    case '>':
-      t.type = TOK.Less;
-      goto Lcommon;
-    case '^':
-      t.type = TOK.Xor;
-      goto Lcommon;
-    case '!':
-      t.type = TOK.Not;
-      goto Lcommon;
-    case '.':
-      if (isdigit(p[1]))
-        return scanReal(t);
-      t.type = TOK.Dot;
-      goto Lcommon;
-    case '&':
-      t.type = TOK.AndBinary;
-      goto Lcommon;
-    case '|':
-      t.type = TOK.OrBinary;
-      goto Lcommon;
-    case '+':
-      t.type = TOK.Plus;
-      goto Lcommon;
-    case '-':
-      t.type = TOK.Minus;
-      goto Lcommon;
-    case '=':
-      t.type = TOK.Assign;
-      goto Lcommon;
-    case '~':
-      t.type = TOK.Tilde;
-      goto Lcommon;
-    case '*':
-      t.type = TOK.Mul;
-      goto Lcommon;
-    case '/':
-      t.type = TOK.Div;
-      goto Lcommon;
-    case '%':
-      t.type = TOK.Mod;
-      goto Lcommon;
-    case '(':
-      t.type = TOK.LParen;
-      goto Lcommon;
-    case ')':
-      t.type = TOK.RParen;
-      goto Lcommon;
-    case '[':
-      t.type = TOK.LBracket;
-      goto Lcommon;
-    case ']':
-      t.type = TOK.RBracket;
-      goto Lcommon;
-    case '{':
-      t.type = TOK.LBrace;
-      goto Lcommon;
-    case '}':
-      t.type = TOK.RBrace;
-      goto Lcommon;
-    case ':':
-      t.type = TOK.Colon;
-      goto Lcommon;
-    case ';':
-      t.type = TOK.Semicolon;
-      goto Lcommon;
-    case '?':
-      t.type = TOK.Question;
-      goto Lcommon;
-    case ',':
-      t.type = TOK.Comma;
-      goto Lcommon;
-    case '$':
-      t.type = TOK.Dollar;
-    Lcommon:
-      ++p;
-      t.end = p;
-      return;
-    case '#':
-      return scanSpecialTokenSequence(t);
-    default:
-    }
-
-    assert(p == t.start);
-    assert(*p == c);
-
-    // TODO: consider moving isidbeg() and isdigit() up.
-    if (isidbeg(c))
-    {
-      if (c == 'r' && p[1] == '"' && ++p)
-        return scanRawStringLiteral(t);
-      if (c == 'x' && p[1] == '"')
-        return scanHexStringLiteral(t);
-    version(D2)
-    {
-      if (c == 'q' && p[1] == '"')
-        return scanDelimitedStringLiteral(t);
-      if (c == 'q' && p[1] == '{')
-        return scanTokenStringLiteral(t);
-    }
-      // Scan identifier.
-    Lidentifier:
-      do
-      { c = *++p; }
-      while (isident(c) || !isascii(c) && isUnicodeAlpha())
-
-      t.end = p;
-
-      auto id = IdTable.lookup(t.srcText);
-      t.type = id.type;
-      t.ident = id;
-
-      if (t.type == TOK.Identifier || t.isKeyword)
-        return;
-      else if (t.isSpecialToken)
-        finalizeSpecialToken(t);
-      else if (t.type == TOK.EOF)
-      {
-        tail = &t;
-        assert(t.srcText == "__EOF__");
-      }
-      else
-        assert(0, "unexpected token type: " ~ Token.toString(t.type));
-      return;
-    }
-
-    if (isdigit(c))
-      return scanNumber(t);
-
-    // Check for EOF
-    if (isEOF(c))
-    {
-      assert(isEOF(*p), *p~"");
-      t.type = TOK.EOF;
-      t.end = p;
-      tail = &t;
-      assert(t.start == t.end);
-      return;
-    }
-
-    if (!isascii(c))
-    {
-      c = decodeUTF8();
-      if (isUniAlpha(c))
-        goto Lidentifier;
-    }
-
-    error(t.start, MID.IllegalCharacter, cast(dchar)c);
-
-    ++p;
-    t.type = TOK.Illegal;
-    t.setWhitespaceFlag();
-    t.dchar_ = c;
-    t.end = p;
-    return;
-  }
-
-  void scanBlockComment(ref Token t)
-  {
-    assert(p[-1] == '/' && *p == '*');
-    auto tokenLineNum = lineNum;
-    auto tokenLineBegin = lineBegin;
-  Loop:
-    while (1)
-    {
-      switch (*++p)
-      {
-      case '*':
-        if (p[1] != '/')
-          continue;
-        p += 2;
-        break Loop;
-      case '\r':
-        if (p[1] == '\n')
-          ++p;
-      case '\n':
-        assert(isNewlineEnd(p));
-        ++lineNum;
-        setLineBegin(p+1);
-        break;
-      default:
-        if (!isascii(*p))
-        {
-          if (isUnicodeNewlineChar(decodeUTF8()))
-            goto case '\n';
-        }
-        else if (isEOF(*p))
-        {
-          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedBlockComment);
-          break Loop;
-        }
-      }
-    }
-    t.type = TOK.Comment;
-    t.setWhitespaceFlag();
-    t.end = p;
-    return;
-  }
-
-  void scanNestedComment(ref Token t)
-  {
-    assert(p[-1] == '/' && *p == '+');
-    auto tokenLineNum = lineNum;
-    auto tokenLineBegin = lineBegin;
-    uint level = 1;
-  Loop:
-    while (1)
-    {
-      switch (*++p)
-      {
-      case '/':
-        if (p[1] == '+')
-          ++p, ++level;
-        continue;
-      case '+':
-        if (p[1] != '/')
-          continue;
-        ++p;
-        if (--level != 0)
-          continue;
-        ++p;
-        break Loop;
-      case '\r':
-        if (p[1] == '\n')
-          ++p;
-      case '\n':
-        assert(isNewlineEnd(p));
-        ++lineNum;
-        setLineBegin(p+1);
-        continue;
-      default:
-        if (!isascii(*p))
-        {
-          if (isUnicodeNewlineChar(decodeUTF8()))
-            goto case '\n';
-        }
-        else if (isEOF(*p))
-        {
-          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedNestedComment);
-          break Loop;
-        }
-      }
-    }
-    t.type = TOK.Comment;
-    t.setWhitespaceFlag();
-    t.end = p;
-    return;
-  }
-
-  char scanPostfix()
-  {
-    assert(p[-1] == '"' || p[-1] == '`' ||
-      { version(D2) return p[-1] == '}';
-               else return 0; }()
-    );
-    switch (*p)
-    {
-    case 'c':
-    case 'w':
-    case 'd':
-      return *p++;
-    default:
-      return 0;
-    }
-    assert(0);
-  }
-
-  void scanNormalStringLiteral(ref Token t)
-  {
-    assert(*p == '"');
-    auto tokenLineNum = lineNum;
-    auto tokenLineBegin = lineBegin;
-    t.type = TOK.String;
-    char[] buffer;
-    uint c;
-    while (1)
-    {
-      c = *++p;
-      switch (c)
-      {
-      case '"':
-        ++p;
-        t.pf = scanPostfix();
-      Lreturn:
-        t.str = buffer ~ '\0';
-        t.end = p;
-        return;
-      case '\\':
-        c = scanEscapeSequence();
-        --p;
-        if (isascii(c))
-          break;
-        encodeUTF8(buffer, c);
-        continue;
-      case '\r':
-        if (p[1] == '\n')
-          ++p;
-      case '\n':
-        assert(isNewlineEnd(p));
-        c = '\n'; // Convert Newline to \n.
-        ++lineNum;
-        setLineBegin(p+1);
-        break;
-      case 0, _Z_:
-        error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedString);
-        goto Lreturn;
-      default:
-        if (!isascii(c))
-        {
-          c = decodeUTF8();
-          if (isUnicodeNewlineChar(c))
-            goto case '\n';
-          encodeUTF8(buffer, c);
-          continue;
-        }
-      }
-      assert(isascii(c));
-      buffer ~= c;
-    }
-    assert(0);
-  }
-
-  void scanCharacterLiteral(ref Token t)
-  {
-    assert(*p == '\'');
-    ++p;
-    t.type = TOK.CharLiteral;
-    switch (*p)
-    {
-    case '\\':
-      t.dchar_ = scanEscapeSequence();
-      break;
-    case '\'':
-      error(t.start, MID.EmptyCharacterLiteral);
-      break;
-    default:
-      if (isEndOfLine(p))
-        break;
-      uint c = *p;
-      if (!isascii(c))
-        c = decodeUTF8();
-      t.dchar_ = c;
-      ++p;
-    }
-
-    if (*p == '\'')
-      ++p;
-    else
-      error(t.start, MID.UnterminatedCharacterLiteral);
-    t.end = p;
-  }
-
-  void scanRawStringLiteral(ref Token t)
-  {
-    assert(*p == '`' || *p == '"' && p[-1] == 'r');
-    auto tokenLineNum = lineNum;
-    auto tokenLineBegin = lineBegin;
-    t.type = TOK.String;
-    uint delim = *p;
-    char[] buffer;
-    uint c;
-    while (1)
-    {
-      c = *++p;
-      switch (c)
-      {
-      case '\r':
-        if (p[1] == '\n')
-          ++p;
-      case '\n':
-        assert(isNewlineEnd(p));
-        c = '\n'; // Convert Newline to '\n'.
-        ++lineNum;
-        setLineBegin(p+1);
-        break;
-      case '`':
-      case '"':
-        if (c == delim)
-        {
-          ++p;
-          t.pf = scanPostfix();
-        Lreturn:
-          t.str = buffer ~ '\0';
-          t.end = p;
-          return;
-        }
-        break;
-      case 0, _Z_:
-        error(tokenLineNum, tokenLineBegin, t.start,
-          delim == 'r' ? MID.UnterminatedRawString : MID.UnterminatedBackQuoteString);
-        goto Lreturn;
-      default:
-        if (!isascii(c))
-        {
-          c = decodeUTF8();
-          if (isUnicodeNewlineChar(c))
-            goto case '\n';
-          encodeUTF8(buffer, c);
-          continue;
-        }
-      }
-      assert(isascii(c));
-      buffer ~= c;
-    }
-    assert(0);
-  }
-
-  void scanHexStringLiteral(ref Token t)
-  {
-    assert(p[0] == 'x' && p[1] == '"');
-    t.type = TOK.String;
-
-    auto tokenLineNum = lineNum;
-    auto tokenLineBegin = lineBegin;
-
-    uint c;
-    ubyte[] buffer;
-    ubyte h; // hex number
-    uint n; // number of hex digits
-
-    ++p;
-    assert(*p == '"');
-    while (1)
-    {
-      c = *++p;
-      switch (c)
-      {
-      case '"':
-        if (n & 1)
-          error(tokenLineNum, tokenLineBegin, t.start, MID.OddNumberOfDigitsInHexString);
-        ++p;
-        t.pf = scanPostfix();
-      Lreturn:
-        t.str = cast(string) (buffer ~= 0);
-        t.end = p;
-        return;
-      case '\r':
-        if (p[1] == '\n')
-          ++p;
-      case '\n':
-        assert(isNewlineEnd(p));
-        ++lineNum;
-        setLineBegin(p+1);
-        continue;
-      default:
-        if (ishexad(c))
-        {
-          if (c <= '9')
-            c -= '0';
-          else if (c <= 'F')
-            c -= 'A' - 10;
-          else
-            c -= 'a' - 10;
-
-          if (n & 1)
-          {
-            h <<= 4;
-            h |= c;
-            buffer ~= h;
-          }
-          else
-            h = cast(ubyte)c;
-          ++n;
-          continue;
-        }
-        else if (isspace(c))
-          continue; // Skip spaces.
-        else if (isEOF(c))
-        {
-          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedHexString);
-          t.pf = 0;
-          goto Lreturn;
-        }
-        else
-        {
-          auto errorAt = p;
-          if (!isascii(c))
-          {
-            c = decodeUTF8();
-            if (isUnicodeNewlineChar(c))
-              goto case '\n';
-          }
-          error(errorAt, MID.NonHexCharInHexString, cast(dchar)c);
-        }
-      }
-    }
-    assert(0);
-  }
-
-version(D2)
-{
-  void scanDelimitedStringLiteral(ref Token t)
-  {
-    assert(p[0] == 'q' && p[1] == '"');
-    t.type = TOK.String;
-
-    auto tokenLineNum = lineNum;
-    auto tokenLineBegin = lineBegin;
-
-    char[] buffer;
-    dchar opening_delim = 0, // 0 if no nested delimiter or '[', '(', '<', '{'
-          closing_delim; // Will be ']', ')', '>', '},
-                         // the first character of an identifier or
-                         // any other Unicode/ASCII character.
-    char[] str_delim; // Identifier delimiter.
-    uint level = 1; // Counter for nestable delimiters.
-
-    ++p; ++p; // Skip q"
-    uint c = *p;
-    switch (c)
-    {
-    case '(':
-      opening_delim = c;
-      closing_delim = ')'; // c + 1
-      break;
-    case '[', '<', '{':
-      opening_delim = c;
-      closing_delim = c + 2; // Get to closing counterpart. Feature of ASCII table.
-      break;
-    default:
-      dchar scanNewline()
-      {
-        switch (*p)
-        {
-        case '\r':
-          if (p[1] == '\n')
-            ++p;
-        case '\n':
-          assert(isNewlineEnd(p));
-          ++p;
-          ++lineNum;
-          setLineBegin(p);
-          return '\n';
-        default:
-          if (isUnicodeNewline(p))
-          {
-            ++p; ++p;
-            goto case '\n';
-          }
-        }
-        return 0;
-      }
-      // Skip leading newlines:
-      while (scanNewline() != 0)
-      {}
-      assert(!isNewline(p));
-
-      char* begin = p;
-      c = *p;
-      closing_delim = c;
-      // TODO: Check for non-printable characters?
-      if (!isascii(c))
-      {
-        closing_delim = decodeUTF8();
-        if (!isUniAlpha(closing_delim))
-          break; // Not an identifier.
-      }
-      else if (!isidbeg(c))
-        break; // Not an identifier.
-
-      // Parse Identifier + EndOfLine
-      do
-      { c = *++p; }
-      while (isident(c) || !isascii(c) && isUnicodeAlpha())
-      // Store identifier
-      str_delim = begin[0..p-begin];
-      // Scan newline
-      if (scanNewline() == '\n')
-        --p; // Go back one because of "c = *++p;" in main loop.
-      else
-      {
-        // TODO: error(p, MID.ExpectedNewlineAfterIdentDelim);
-      }
-    }
-
-    bool checkStringDelim(char* p)
-    {
-      assert(str_delim.length != 0);
-      if (buffer[$-1] == '\n' && // Last character copied to buffer must be '\n'.
-          end-p >= str_delim.length && // Check remaining length.
-          p[0..str_delim.length] == str_delim) // Compare.
-        return true;
-      return false;
-    }
-
-    while (1)
-    {
-      c = *++p;
-      switch (c)
-      {
-      case '\r':
-        if (p[1] == '\n')
-          ++p;
-      case '\n':
-        assert(isNewlineEnd(p));
-        c = '\n'; // Convert Newline to '\n'.
-        ++lineNum;
-        setLineBegin(p+1);
-        break;
-      case 0, _Z_:
-        // TODO: error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedDelimitedString);
-        goto Lreturn3;
-      default:
-        if (!isascii(c))
-        {
-          auto begin = p;
-          c = decodeUTF8();
-          if (isUnicodeNewlineChar(c))
-            goto case '\n';
-          if (c == closing_delim)
-          {
-            if (str_delim.length)
-            {
-              if (checkStringDelim(begin))
-              {
-                p = begin + str_delim.length;
-                goto Lreturn2;
-              }
-            }
-            else
-            {
-              assert(level == 1);
-              --level;
-              goto Lreturn;
-            }
-          }
-          encodeUTF8(buffer, c);
-          continue;
-        }
-        else
-        {
-          if (c == opening_delim)
-            ++level;
-          else if (c == closing_delim)
-          {
-            if (str_delim.length)
-            {
-              if (checkStringDelim(p))
-              {
-                p += str_delim.length;
-                goto Lreturn2;
-              }
-            }
-            else if (--level == 0)
-              goto Lreturn;
-          }
-        }
-      }
-      assert(isascii(c));
-      buffer ~= c;
-    }
-  Lreturn: // Character delimiter.
-    assert(c == closing_delim);
-    assert(level == 0);
-    ++p; // Skip closing delimiter.
-  Lreturn2: // String delimiter.
-    if (*p == '"')
-      ++p;
-    else
-    {
-      // TODO: error(p, MID.ExpectedDblQuoteAfterDelim, str_delim.length ? str_delim : closing_delim~"");
-    }
-
-    t.pf = scanPostfix();
-  Lreturn3: // Error.
-    t.str = buffer ~ '\0';
-    t.end = p;
-  }
-
-  void scanTokenStringLiteral(ref Token t)
-  {
-    assert(p[0] == 'q' && p[1] == '{');
-    t.type = TOK.String;
-
-    auto tokenLineNum = lineNum;
-    auto tokenLineBegin = lineBegin;
-
-    // A guard against changes to particular members:
-    // this.lineNum_hline and this.errorPath
-    ++inTokenString;
-
-    uint lineNum = this.lineNum;
-    uint level = 1;
-
-    ++p; ++p; // Skip q{
-
-    auto prev_t = &t;
-    Token* token;
-    while (1)
-    {
-      token = new Token;
-      scan(*token);
-      // Save the tokens in a doubly linked list.
-      // Could be useful for various tools.
-      token.prev = prev_t;
-      prev_t.next = token;
-      prev_t = token;
-      switch (token.type)
-      {
-      case TOK.LBrace:
-        ++level;
-        continue;
-      case TOK.RBrace:
-        if (--level == 0)
-        {
-          t.tok_str = t.next;
-          t.next = null;
-          break;
-        }
-        continue;
-      case TOK.EOF:
-        // TODO: error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedTokenString);
-        t.tok_str = t.next;
-        t.next = token;
-        break;
-      default:
-        continue;
-      }
-      break; // Exit loop.
-    }
-
-    assert(token.type == TOK.RBrace || token.type == TOK.EOF);
-    assert(token.type == TOK.RBrace && t.next is null ||
-           token.type == TOK.EOF && t.next !is null);
-
-    char[] buffer;
-    // token points to } or EOF
-    if (token.type == TOK.EOF)
-    {
-      t.end = token.start;
-      buffer = t.srcText[2..$].dup ~ '\0';
-    }
-    else
-    {
-      // Assign to buffer before scanPostfix().
-      t.end = p;
-      buffer = t.srcText[2..$-1].dup ~ '\0';
-      t.pf = scanPostfix();
-      t.end = p; // Assign again because of postfix.
-    }
-    // Convert newlines to '\n'.
-    if (lineNum != this.lineNum)
-    {
-      assert(buffer[$-1] == '\0');
-      uint i, j;
-      for (; i < buffer.length; ++i)
-        switch (buffer[i])
-        {
-        case '\r':
-          if (buffer[i+1] == '\n')
-            ++i;
-        case '\n':
-          assert(isNewlineEnd(buffer.ptr + i));
-          buffer[j++] = '\n'; // Convert Newline to '\n'.
-          break;
-        default:
-          if (isUnicodeNewline(buffer.ptr + i))
-          {
-            ++i; ++i;
-            goto case '\n';
-          }
-          buffer[j++] = buffer[i]; // Copy.
-        }
-      buffer.length = j; // Adjust length.
-    }
-    assert(buffer[$-1] == '\0');
-    t.str = buffer;
-
-    --inTokenString;
-  }
-} // version(D2)
-
-  dchar scanEscapeSequence()
-  out(result)
-  { assert(isValidChar(result)); }
-  body
-  {
-    assert(*p == '\\');
-
-    auto sequenceStart = p; // Used for error reporting.
-
-    ++p;
-    uint c = char2ev(*p);
-    if (c)
-    {
-      ++p;
-      return c;
-    }
-
-    uint digits = 2;
-
-    switch (*p)
-    {
-    case 'x':
-      assert(c == 0);
-      while (1)
-      {
-        ++p;
-        if (ishexad(*p))
-        {
-          c *= 16;
-          if (*p <= '9')
-            c += *p - '0';
-          else if (*p <= 'F')
-            c += *p - 'A' + 10;
-          else
-            c += *p - 'a' + 10;
-
-          if (!--digits)
-          {
-            ++p;
-            if (isValidChar(c))
-              return c; // Return valid escape value.
-
-            error(sequenceStart, MID.InvalidUnicodeEscapeSequence, sequenceStart[0..p-sequenceStart]);
-            break;
-          }
-          continue;
-        }
-
-        error(sequenceStart, MID.InsufficientHexDigits);
-        break;
-      }
-      break;
-    case 'u':
-      digits = 4;
-      goto case 'x';
-    case 'U':
-      digits = 8;
-      goto case 'x';
-    default:
-      if (isoctal(*p))
-      {
-        assert(c == 0);
-        c += *p - '0';
-        ++p;
-        if (!isoctal(*p))
-          return c;
-        c *= 8;
-        c += *p - '0';
-        ++p;
-        if (!isoctal(*p))
-          return c;
-        c *= 8;
-        c += *p - '0';
-        ++p;
-        return c; // Return valid escape value.
-      }
-      else if(*p == '&')
-      {
-        if (isalpha(*++p))
-        {
-          auto begin = p;
-          while (isalnum(*++p))
-          {}
-
-          if (*p == ';')
-          {
-            // Pass entity excluding '&' and ';'.
-            c = entity2Unicode(begin[0..p - begin]);
-            ++p; // Skip ;
-            if (c != 0xFFFF)
-              return c; // Return valid escape value.
-            else
-              error(sequenceStart, MID.UndefinedHTMLEntity, sequenceStart[0 .. p - sequenceStart]);
-          }
-          else
-            error(sequenceStart, MID.UnterminatedHTMLEntity, sequenceStart[0 .. p - sequenceStart]);
-        }
-        else
-          error(sequenceStart, MID.InvalidBeginHTMLEntity);
-      }
-      else if (isEndOfLine(p))
-        error(sequenceStart, MID.UndefinedEscapeSequence,
-          isEOF(*p) ? `\EOF` : `\NewLine`);
-      else
-      {
-        char[] str = `\`;
-        if (isascii(c))
-          str ~= *p;
-        else
-          encodeUTF8(str, decodeUTF8());
-        ++p;
-        // TODO: check for unprintable character?
-        error(sequenceStart, MID.UndefinedEscapeSequence, str);
-      }
-    }
-    return REPLACEMENT_CHAR; // Error: return replacement character.
-  }
-
-  /*
-    IntegerLiteral:= (Dec|Hex|Bin|Oct)Suffix?
-    Dec:= (0|[1-9][0-9_]*)
-    Hex:= 0[xX] HexDigits
-    Bin:= 0[bB][01_]+
-    Oct:= 0[0-7_]+
-    Suffix:= (L[uU]?|[uU]L?)
-    HexDigits:= [0-9a-zA-Z_]+
-
-    Invalid: "0b_", "0x_", "._"
-  */
-  void scanNumber(ref Token t)
-  {
-    ulong ulong_;
-    bool overflow;
-    bool isDecimal;
-    size_t digits;
-
-    if (*p != '0')
-      goto LscanInteger;
-    ++p; // skip zero
-    // check for xX bB ...
-    switch (*p)
-    {
-    case 'x','X':
-      goto LscanHex;
-    case 'b','B':
-      goto LscanBinary;
-    case 'L':
-      if (p[1] == 'i')
-        goto LscanReal; // 0Li
-      break; // 0L
-    case '.':
-      if (p[1] == '.')
-        break; // 0..
-      // 0.
-    case 'i','f','F', // Imaginary and float literal suffixes.
-         'e', 'E':    // Float exponent.
-      goto LscanReal;
-    default:
-      if (*p == '_')
-        goto LscanOctal; // 0_
-      else if (isdigit(*p))
-      {
-        if (*p == '8' || *p == '9')
-          goto Loctal_hasDecimalDigits; // 08 or 09
-        else
-          goto Loctal_enter_loop; // 0[0-7]
-      }
-    }
-
-    // Number 0
-    assert(p[-1] == '0');
-    assert(*p != '_' && !isdigit(*p));
-    assert(ulong_ == 0);
-    isDecimal = true;
-    goto Lfinalize;
-
-  LscanInteger:
-    assert(*p != 0 && isdigit(*p));
-    isDecimal = true;
-    goto Lenter_loop_int;
-    while (1)
-    {
-      if (*++p == '_')
-        continue;
-      if (!isdigit(*p))
-        break;
-    Lenter_loop_int:
-      if (ulong_ < ulong.max/10 || (ulong_ == ulong.max/10 && *p <= '5'))
-      {
-        ulong_ *= 10;
-        ulong_ += *p - '0';
-        continue;
-      }
-      // Overflow: skip following digits.
-      overflow = true;
-      while (isdigit(*++p)) {}
-      break;
-    }
-
-    // The number could be a float, so check overflow below.
-    switch (*p)
-    {
-    case '.':
-      if (p[1] != '.')
-        goto LscanReal;
-      break;
-    case 'L':
-      if (p[1] != 'i')
-        break;
-    case 'i', 'f', 'F', 'e', 'E':
-      goto LscanReal;
-    default:
-    }
-
-    if (overflow)
-      error(t.start, MID.OverflowDecimalNumber);
-
-    assert((isdigit(p[-1]) || p[-1] == '_') && !isdigit(*p) && *p != '_');
-    goto Lfinalize;
-
-  LscanHex:
-    assert(digits == 0);
-    assert(*p == 'x' || *p == 'X');
-    while (1)
-    {
-      if (*++p == '_')
-        continue;
-      if (!ishexad(*p))
-        break;
-      ++digits;
-      ulong_ *= 16;
-      if (*p <= '9')
-        ulong_ += *p - '0';
-      else if (*p <= 'F')
-        ulong_ += *p - 'A' + 10;
-      else
-        ulong_ += *p - 'a' + 10;
-    }
-
-    assert(ishexad(p[-1]) || p[-1] == '_' || p[-1] == 'x' || p[-1] == 'X');
-    assert(!ishexad(*p) && *p != '_');
-
-    switch (*p)
-    {
-    case '.':
-      if (p[1] == '.')
-        break;
-    case 'p', 'P':
-      return scanHexReal(t);
-    default:
-    }
-
-    if (digits == 0 || digits > 16)
-      error(t.start, digits == 0 ? MID.NoDigitsInHexNumber : MID.OverflowHexNumber);
-
-    goto Lfinalize;
-
-  LscanBinary:
-    assert(digits == 0);
-    assert(*p == 'b' || *p == 'B');
-    while (1)
-    {
-      if (*++p == '0')
-      {
-        ++digits;
-        ulong_ *= 2;
-      }
-      else if (*p == '1')
-      {
-        ++digits;
-        ulong_ *= 2;
-        ulong_ += *p - '0';
-      }
-      else if (*p == '_')
-        continue;
-      else
-        break;
-    }
-
-    if (digits == 0 || digits > 64)
-      error(t.start, digits == 0 ? MID.NoDigitsInBinNumber : MID.OverflowBinaryNumber);
-
-    assert(p[-1] == '0' || p[-1] == '1' || p[-1] == '_' || p[-1] == 'b' || p[-1] == 'B', p[-1] ~ "");
-    assert( !(*p == '0' || *p == '1' || *p == '_') );
-    goto Lfinalize;
-
-  LscanOctal:
-    assert(*p == '_');
-    while (1)
-    {
-      if (*++p == '_')
-        continue;
-      if (!isoctal(*p))
-        break;
-    Loctal_enter_loop:
-      if (ulong_ < ulong.max/2 || (ulong_ == ulong.max/2 && *p <= '1'))
-      {
-        ulong_ *= 8;
-        ulong_ += *p - '0';
-        continue;
-      }
-      // Overflow: skip following digits.
-      overflow = true;
-      while (isoctal(*++p)) {}
-      break;
-    }
-
-    bool hasDecimalDigits;
-    if (isdigit(*p))
-    {
-    Loctal_hasDecimalDigits:
-      hasDecimalDigits = true;
-      while (isdigit(*++p)) {}
-    }
-
-    // The number could be a float, so check errors below.
-    switch (*p)
-    {
-    case '.':
-      if (p[1] != '.')
-        goto LscanReal;
-      break;
-    case 'L':
-      if (p[1] != 'i')
-        break;
-    case 'i', 'f', 'F', 'e', 'E':
-      goto LscanReal;
-    default:
-    }
-
-    if (hasDecimalDigits)
-      error(t.start, MID.OctalNumberHasDecimals);
-
-    if (overflow)
-      error(t.start, MID.OverflowOctalNumber);
-//     goto Lfinalize;
-
-  Lfinalize:
-    enum Suffix
-    {
-      None     = 0,
-      Unsigned = 1,
-      Long     = 2
-    }
-
-    // Scan optional suffix: L, Lu, LU, u, uL, U or UL.
-    Suffix suffix;
-    while (1)
-    {
-      switch (*p)
-      {
-      case 'L':
-        if (suffix & Suffix.Long)
-          break;
-        suffix |= Suffix.Long;
-        ++p;
-        continue;
-      case 'u', 'U':
-        if (suffix & Suffix.Unsigned)
-          break;
-        suffix |= Suffix.Unsigned;
-        ++p;
-        continue;
-      default:
-        break;
-      }
-      break;
-    }
-
-    // Determine type of Integer.
-    switch (suffix)
-    {
-    case Suffix.None:
-      if (ulong_ & 0x8000_0000_0000_0000)
-      {
-        if (isDecimal)
-          error(t.start, MID.OverflowDecimalSign);
-        t.type = TOK.Uint64;
-      }
-      else if (ulong_ & 0xFFFF_FFFF_0000_0000)
-        t.type = TOK.Int64;
-      else if (ulong_ & 0x8000_0000)
-        t.type = isDecimal ? TOK.Int64 : TOK.Uint32;
-      else
-        t.type = TOK.Int32;
-      break;
-    case Suffix.Unsigned:
-      if (ulong_ & 0xFFFF_FFFF_0000_0000)
-        t.type = TOK.Uint64;
-      else
-        t.type = TOK.Uint32;
-      break;
-    case Suffix.Long:
-      if (ulong_ & 0x8000_0000_0000_0000)
-      {
-        if (isDecimal)
-          error(t.start, MID.OverflowDecimalSign);
-        t.type = TOK.Uint64;
-      }
-      else
-        t.type = TOK.Int64;
-      break;
-    case Suffix.Unsigned | Suffix.Long:
-      t.type = TOK.Uint64;
-      break;
-    default:
-      assert(0);
-    }
-    t.ulong_ = ulong_;
-    t.end = p;
-    return;
-  LscanReal:
-    scanReal(t);
-    return;
-  }
-
-  /*
-    FloatLiteral:= Float[fFL]?i?
-    Float:= DecFloat | HexFloat
-    DecFloat:= ([0-9][0-9_]*[.][0-9_]*DecExponent?) | [.][0-9][0-9_]*DecExponent? | [0-9][0-9_]*DecExponent
-    DecExponent:= [eE][+-]?[0-9][0-9_]*
-    HexFloat:= 0[xX](HexDigits[.]HexDigits | [.][0-9a-zA-Z]HexDigits? | HexDigits)HexExponent
-    HexExponent:= [pP][+-]?[0-9][0-9_]*
-  */
-  void scanReal(ref Token t)
-  {
-    if (*p == '.')
-    {
-      assert(p[1] != '.');
-      // This function was called by scan() or scanNumber().
-      while (isdigit(*++p) || *p == '_') {}
-    }
-    else
-      // This function was called by scanNumber().
-      assert(delegate ()
-        {
-          switch (*p)
-          {
-          case 'L':
-            if (p[1] != 'i')
-              return false;
-          case 'i', 'f', 'F', 'e', 'E':
-            return true;
-          default:
-          }
-          return false;
-        }()
-      );
-
-    // Scan exponent.
-    if (*p == 'e' || *p == 'E')
-    {
-      ++p;
-      if (*p == '-' || *p == '+')
-        ++p;
-      if (isdigit(*p))
-        while (isdigit(*++p) || *p == '_') {}
-      else
-        error(t.start, MID.FloatExpMustStartWithDigit);
-    }
-
-    // Copy whole number and remove underscores from buffer.
-    char[] buffer = t.start[0..p-t.start].dup;
-    uint j;
-    foreach (c; buffer)
-      if (c != '_')
-        buffer[j++] = c;
-    buffer.length = j; // Adjust length.
-    buffer ~= 0; // Terminate for C functions.
-
-    finalizeFloat(t, buffer);
-  }
-
-  void scanHexReal(ref Token t)
-  {
-    assert(*p == '.' || *p == 'p' || *p == 'P');
-    MID mid;
-    if (*p == '.')
-      while (ishexad(*++p) || *p == '_')
-      {}
-    // Decimal exponent is required.
-    if (*p != 'p' && *p != 'P')
-    {
-      mid = MID.HexFloatExponentRequired;
-      goto Lerr;
-    }
-    // Scan exponent
-    assert(*p == 'p' || *p == 'P');
-    ++p;
-    if (*p == '+' || *p == '-')
-      ++p;
-    if (!isdigit(*p))
-    {
-      mid = MID.HexFloatExpMustStartWithDigit;
-      goto Lerr;
-    }
-    while (isdigit(*++p) || *p == '_')
-    {}
-    // Copy whole number and remove underscores from buffer.
-    char[] buffer = t.start[0..p-t.start].dup;
-    uint j;
-    foreach (c; buffer)
-      if (c != '_')
-        buffer[j++] = c;
-    buffer.length = j; // Adjust length.
-    buffer ~= 0; // Terminate for C functions.
-    finalizeFloat(t, buffer);
-    return;
-  Lerr:
-    t.type = TOK.Float32;
-    t.end = p;
-    error(t.start, mid);
-  }
-
-  void finalizeFloat(ref Token t, string buffer)
-  {
-    assert(buffer[$-1] == 0);
-    // Float number is well-formed. Check suffixes and do conversion.
-    switch (*p)
-    {
-    case 'f', 'F':
-      t.type = TOK.Float32;
-      t.float_ = strtof(buffer.ptr, null);
-      ++p;
-      break;
-    case 'L':
-      t.type = TOK.Float80;
-      t.real_ = strtold(buffer.ptr, null);
-      ++p;
-      break;
-    default:
-      t.type = TOK.Float64;
-      t.double_ = strtod(buffer.ptr, null);
-    }
-    if (*p == 'i')
-    {
-      ++p;
-      t.type += 3; // Switch to imaginary counterpart.
-      assert(t.type == TOK.Imaginary32 ||
-             t.type == TOK.Imaginary64 ||
-             t.type == TOK.Imaginary80);
-    }
-    if (errno() == ERANGE)
-      error(t.start, MID.OverflowFloatNumber);
-    t.end = p;
-  }
-
-  /// Scan special token: #line Integer [Filespec] EndOfLine
-  void scanSpecialTokenSequence(ref Token t)
-  {
-    assert(*p == '#');
-    t.type = TOK.HashLine;
-    t.setWhitespaceFlag();
-
-    MID mid;
-    auto errorAtColumn = p;
-
-    ++p;
-    if (p[0] != 'l' || p[1] != 'i' || p[2] != 'n' || p[3] != 'e')
-    {
-      mid = MID.ExpectedIdentifierSTLine;
-      goto Lerr;
-    }
-    p += 3;
-
-    // TODO: #line58"path/file" is legal. Require spaces?
-    //       State.Space could be used for that purpose.
-    enum State
-    { /+Space,+/ Integer, Filespec, End }
-
-    State state = State.Integer;
-
-    while (!isEndOfLine(++p))
-    {
-      if (isspace(*p))
-        continue;
-      if (state == State.Integer)
-      {
-        if (!isdigit(*p))
-        {
-          errorAtColumn = p;
-          mid = MID.ExpectedIntegerAfterSTLine;
-          goto Lerr;
-        }
-        t.tokLineNum = new Token;
-        scan(*t.tokLineNum);
-        if (t.tokLineNum.type != TOK.Int32 && t.tokLineNum.type != TOK.Uint32)
-        {
-          errorAtColumn = t.tokLineNum.start;
-          mid = MID.ExpectedIntegerAfterSTLine;
-          goto Lerr;
-        }
-        --p; // Go one back because scan() advanced p past the integer.
-        state = State.Filespec;
-      }
-      else if (state == State.Filespec)
-      {
-        if (*p != '"')
-        {
-          errorAtColumn = p;
-          mid = MID.ExpectedFilespec;
-          goto Lerr;
-        }
-        t.tokLineFilespec = new Token;
-        t.tokLineFilespec.start = p;
-        t.tokLineFilespec.type = TOK.Filespec;
-        t.tokLineFilespec.setWhitespaceFlag();
-        while (*++p != '"')
-        {
-          if (isEndOfLine(p))
-          {
-            errorAtColumn = t.tokLineFilespec.start;
-            mid = MID.UnterminatedFilespec;
-            t.tokLineFilespec.end = p;
-            goto Lerr;
-          }
-          isascii(*p) || decodeUTF8();
-        }
-        auto start = t.tokLineFilespec.start +1; // +1 skips '"'
-        t.tokLineFilespec.str = start[0 .. p - start];
-        t.tokLineFilespec.end = p + 1;
-        state = State.End;
-      }
-      else/+ if (state == State.End)+/
-      {
-        mid = MID.UnterminatedSpecialToken;
-        goto Lerr;
-      }
-    }
-    assert(isEndOfLine(p));
-
-    if (state == State.Integer)
-    {
-      errorAtColumn = p;
-      mid = MID.ExpectedIntegerAfterSTLine;
-      goto Lerr;
-    }
-
-    // Evaluate #line only when not in token string.
-    if (!inTokenString && t.tokLineNum)
-    {
-      this.lineNum_hline = this.lineNum - t.tokLineNum.uint_ + 1;
-      if (t.tokLineFilespec)
-        this.errorPath = t.tokLineFilespec.str;
-    }
-    t.end = p;
-
-    return;
-  Lerr:
-    t.end = p;
-    error(errorAtColumn, mid);
-  }
-
-  /++
-    Insert an empty dummy token before t.
-    Useful in the parsing phase for representing a node in the AST
-    that doesn't consume an actual token from the source text.
-  +/
-  Token* insertEmptyTokenBefore(Token* t)
-  {
-    assert(t !is null && t.prev !is null);
-    assert(text.ptr <= t.start && t.start < end, Token.toString(t.type));
-    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.type));
-
-    auto prev_t = t.prev;
-    auto new_t = new Token;
-    new_t.type = TOK.Empty;
-    new_t.start = new_t.end = prev_t.end;
-    // Link in new token.
-    prev_t.next = new_t;
-    new_t.prev = prev_t;
-    new_t.next = t;
-    t.prev = new_t;
-    return new_t;
-  }
-
-  uint errorLineNumber(uint lineNum)
-  {
-    return lineNum - this.lineNum_hline;
-  }
-
-  void error(char* columnPos, MID mid, ...)
-  {
-    error_(this.lineNum, this.lineBegin, columnPos, mid, _arguments, _argptr);
-  }
-
-  void error(uint lineNum, char* lineBegin, char* columnPos, MID mid, ...)
-  {
-    error_(lineNum, lineBegin, columnPos, mid, _arguments, _argptr);
-  }
-
-  void error_(uint lineNum, char* lineBegin, char* columnPos, MID mid,
-              TypeInfo[] _arguments, void* _argptr)
-  {
-    lineNum = this.errorLineNumber(lineNum);
-    auto location = new Location(errorPath, lineNum, lineBegin, columnPos);
-    auto msg = Format(_arguments, _argptr, GetMsg(mid));
-    auto error = new LexerError(location, msg);
-    errors ~= error;
-    if (infoMan !is null)
-      infoMan ~= error;
-  }
-
-  Token* getTokens()
-  {
-    while (nextToken() != TOK.EOF)
-    {}
-    return head;
-  }
-
-  /// Scan the whole text until EOF is encountered.
-  void scanAll()
-  {
-    while (nextToken() != TOK.EOF)
-    {}
-  }
-
-  /// HEAD -> Newline -> First Token
-  Token* firstToken()
-  {
-    return this.head.next.next;
-  }
-
-  static void loadKeywords(ref Identifier[string] table)
-  {
-    foreach(k; keywords)
-      table[k.str] = k;
-  }
-
-  /// Returns true if str is a valid D identifier.
-  static bool isIdentifierString(char[] str)
-  {
-    if (str.length == 0 || isdigit(str[0]))
-      return false;
-    size_t idx;
-    do
-    {
-      auto c = dil.Unicode.decode(str, idx);
-      if (c == ERROR_CHAR || !(isident(c) || !isascii(c) && isUniAlpha(c)))
-        return false;
-    } while (idx < str.length)
-    return true;
-  }
-
-  /// Returns true if str is a keyword or a special token (__FILE__, __LINE__ etc.)
-  static bool isReservedIdentifier(char[] str)
-  {
-    if (str.length == 0)
-      return false;
-
-    static Identifier[string] reserved_ids_table;
-    if (reserved_ids_table is null)
-      Lexer.loadKeywords(reserved_ids_table);
-
-    if (!isIdentifierString(str))
-      return false;
-
-    return (str in reserved_ids_table) !is null;
-  }
-
-  /++
-    Returns true if the current character to be decoded is
-    a Unicode alpha character.
-    The current pointer 'p' is not advanced if false is returned.
-  +/
-  bool isUnicodeAlpha()
-  {
-    assert(!isascii(*p), "check for ASCII char before calling decodeUTF8().");
-    char* p = this.p;
-    dchar d = *p;
-    ++p; // Move to second byte.
-    // Error if second byte is not a trail byte.
-    if (!isTrailByte(*p))
-      return false;
-    // Check for overlong sequences.
-    switch (d)
-    {
-    case 0xE0, 0xF0, 0xF8, 0xFC:
-      if ((*p & d) == 0x80)
-        return false;
-    default:
-      if ((d & 0xFE) == 0xC0) // 1100000x
-        return false;
-    }
-    const char[] checkNextByte = "if (!isTrailByte(*++p))"
-                                 "  return false;";
-    const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;";
-    // Decode
-    if ((d & 0b1110_0000) == 0b1100_0000)
-    {
-      d &= 0b0001_1111;
-      mixin(appendSixBits);
-    }
-    else if ((d & 0b1111_0000) == 0b1110_0000)
-    {
-      d &= 0b0000_1111;
-      mixin(appendSixBits ~
-            checkNextByte ~ appendSixBits);
-    }
-    else if ((d & 0b1111_1000) == 0b1111_0000)
-    {
-      d &= 0b0000_0111;
-      mixin(appendSixBits ~
-            checkNextByte ~ appendSixBits ~
-            checkNextByte ~ appendSixBits);
-    }
-    else
-      return false;
-
-    assert(isTrailByte(*p));
-    if (!isValidChar(d) || !isUniAlpha(d))
-      return false;
-    // Only advance pointer if this is a Unicode alpha character.
-    this.p = p;
-    return true;
-  }
-
-  /// Decodes the next UTF-8 sequence.
-  dchar decodeUTF8()
-  {
-    assert(!isascii(*p), "check for ASCII char before calling decodeUTF8().");
-    char* p = this.p;
-    dchar d = *p;
-
-    ++p; // Move to second byte.
-    // Error if second byte is not a trail byte.
-    if (!isTrailByte(*p))
-      goto Lerr2;
-
-    // Check for overlong sequences.
-    switch (d)
-    {
-    case 0xE0, // 11100000 100xxxxx
-         0xF0, // 11110000 1000xxxx
-         0xF8, // 11111000 10000xxx
-         0xFC: // 11111100 100000xx
-      if ((*p & d) == 0x80)
-        goto Lerr;
-    default:
-      if ((d & 0xFE) == 0xC0) // 1100000x
-        goto Lerr;
-    }
-
-    const char[] checkNextByte = "if (!isTrailByte(*++p))"
-                                 "  goto Lerr2;";
-    const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;";
-
-    // Decode
-    if ((d & 0b1110_0000) == 0b1100_0000)
-    { // 110xxxxx 10xxxxxx
-      d &= 0b0001_1111;
-      mixin(appendSixBits);
-    }
-    else if ((d & 0b1111_0000) == 0b1110_0000)
-    { // 1110xxxx 10xxxxxx 10xxxxxx
-      d &= 0b0000_1111;
-      mixin(appendSixBits ~
-            checkNextByte ~ appendSixBits);
-    }
-    else if ((d & 0b1111_1000) == 0b1111_0000)
-    { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-      d &= 0b0000_0111;
-      mixin(appendSixBits ~
-            checkNextByte ~ appendSixBits ~
-            checkNextByte ~ appendSixBits);
-    }
-    else
-      // 5 and 6 byte UTF-8 sequences are not allowed yet.
-      // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-      // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-      goto Lerr;
-
-    assert(isTrailByte(*p));
-
-    if (!isValidChar(d))
-    {
-    Lerr:
-      // Three cases:
-      // *) the UTF-8 sequence was successfully decoded but the resulting
-      //    character is invalid.
-      //    p points to last trail byte in the sequence.
-      // *) the UTF-8 sequence is overlong.
-      //    p points to second byte in the sequence.
-      // *) the UTF-8 sequence has more than 4 bytes or starts with
-      //    a trail byte.
-      //    p points to second byte in the sequence.
-      assert(isTrailByte(*p));
-      // Move to next ASCII character or lead byte of a UTF-8 sequence.
-      while (p < (end-1) && isTrailByte(*p))
-        ++p;
-      --p;
-      assert(!isTrailByte(p[1]));
-    Lerr2:
-      d = REPLACEMENT_CHAR;
-      error(this.p, MID.InvalidUTF8Sequence);
-    }
-
-    this.p = p;
-    return d;
-  }
-
-  static void encodeUTF8(ref char[] str, dchar d)
-  {
-    assert(!isascii(d), "check for ASCII char before calling encodeUTF8().");
-    assert(isValidChar(d), "check if character is valid before calling encodeUTF8().");
-
-    char[6] b = void;
-    if (d < 0x800)
-    {
-      b[0] = 0xC0 | (d >> 6);
-      b[1] = 0x80 | (d & 0x3F);
-      str ~= b[0..2];
-    }
-    else if (d < 0x10000)
-    {
-      b[0] = 0xE0 | (d >> 12);
-      b[1] = 0x80 | ((d >> 6) & 0x3F);
-      b[2] = 0x80 | (d & 0x3F);
-      str ~= b[0..3];
-    }
-    else if (d < 0x200000)
-    {
-      b[0] = 0xF0 | (d >> 18);
-      b[1] = 0x80 | ((d >> 12) & 0x3F);
-      b[2] = 0x80 | ((d >> 6) & 0x3F);
-      b[3] = 0x80 | (d & 0x3F);
-      str ~= b[0..4];
-    }
-    /+ // There are no 5 and 6 byte UTF-8 sequences yet.
-    else if (d < 0x4000000)
-    {
-      b[0] = 0xF8 | (d >> 24);
-      b[1] = 0x80 | ((d >> 18) & 0x3F);
-      b[2] = 0x80 | ((d >> 12) & 0x3F);
-      b[3] = 0x80 | ((d >> 6) & 0x3F);
-      b[4] = 0x80 | (d & 0x3F);
-      str ~= b[0..5];
-    }
-    else if (d < 0x80000000)
-    {
-      b[0] = 0xFC | (d >> 30);
-      b[1] = 0x80 | ((d >> 24) & 0x3F);
-      b[2] = 0x80 | ((d >> 18) & 0x3F);
-      b[3] = 0x80 | ((d >> 12) & 0x3F);
-      b[4] = 0x80 | ((d >> 6) & 0x3F);
-      b[5] = 0x80 | (d & 0x3F);
-      str ~= b[0..6];
-    }
-    +/
-    else
-     assert(0);
-  }
-}
-
-unittest
-{
-  Stdout("Testing Lexer.\n");
-  struct Pair
-  {
-    char[] tokenText;
-    TOK type;
-  }
-  static Pair[] pairs = [
-    {"#!äöüß",  TOK.Shebang},       {"\n",      TOK.Newline},
-    {"//çay",   TOK.Comment},       {"\n",      TOK.Newline},
-                                    {"&",       TOK.AndBinary},
-    {"/*çağ*/", TOK.Comment},       {"&&",      TOK.AndLogical},
-    {"/+çak+/", TOK.Comment},       {"&=",      TOK.AndAssign},
-    {">",       TOK.Greater},       {"+",       TOK.Plus},
-    {">=",      TOK.GreaterEqual},  {"++",      TOK.PlusPlus},
-    {">>",      TOK.RShift},        {"+=",      TOK.PlusAssign},
-    {">>=",     TOK.RShiftAssign},  {"-",       TOK.Minus},
-    {">>>",     TOK.URShift},       {"--",      TOK.MinusMinus},
-    {">>>=",    TOK.URShiftAssign}, {"-=",      TOK.MinusAssign},
-    {"<",       TOK.Less},          {"=",       TOK.Assign},
-    {"<=",      TOK.LessEqual},     {"==",      TOK.Equal},
-    {"<>",      TOK.LorG},          {"~",       TOK.Tilde},
-    {"<>=",     TOK.LorEorG},       {"~=",      TOK.CatAssign},
-    {"<<",      TOK.LShift},        {"*",       TOK.Mul},
-    {"<<=",     TOK.LShiftAssign},  {"*=",      TOK.MulAssign},
-    {"!",       TOK.Not},           {"/",       TOK.Div},
-    {"!=",      TOK.NotEqual},      {"/=",      TOK.DivAssign},
-    {"!<",      TOK.UorGorE},       {"^",       TOK.Xor},
-    {"!>",      TOK.UorLorE},       {"^=",      TOK.XorAssign},
-    {"!<=",     TOK.UorG},          {"%",       TOK.Mod},
-    {"!>=",     TOK.UorL},          {"%=",      TOK.ModAssign},
-    {"!<>",     TOK.UorE},          {"(",       TOK.LParen},
-    {"!<>=",    TOK.Unordered},     {")",       TOK.RParen},
-    {".",       TOK.Dot},           {"[",       TOK.LBracket},
-    {"..",      TOK.Slice},         {"]",       TOK.RBracket},
-    {"...",     TOK.Ellipses},      {"{",       TOK.LBrace},
-    {"|",       TOK.OrBinary},      {"}",       TOK.RBrace},
-    {"||",      TOK.OrLogical},     {":",       TOK.Colon},
-    {"|=",      TOK.OrAssign},      {";",       TOK.Semicolon},
-    {"?",       TOK.Question},      {",",       TOK.Comma},
-    {"$",       TOK.Dollar},        {"cam",     TOK.Identifier},
-    {"çay",     TOK.Identifier},    {".0",      TOK.Float64},
-    {"0",       TOK.Int32},         {"\n",      TOK.Newline},
-    {"\r",      TOK.Newline},       {"\r\n",    TOK.Newline},
-    {"\u2028",  TOK.Newline},       {"\u2029",  TOK.Newline}
-  ];
-
-  char[] src;
-
-  // Join all token texts into a single string.
-  foreach (i, pair; pairs)
-    if (pair.type == TOK.Comment && pair.tokenText[1] == '/' || // Line comment.
-        pair.type == TOK.Shebang)
-    {
-      assert(pairs[i+1].type == TOK.Newline); // Must be followed by a newline.
-      src ~= pair.tokenText;
-    }
-    else
-      src ~= pair.tokenText ~ " ";
-
-  auto lx = new Lexer(src, "");
-  auto token = lx.getTokens();
-
-  uint i;
-  assert(token == lx.head);
-  assert(token.next.type == TOK.Newline);
-  token = token.next.next;
-  do
-  {
-    assert(i < pairs.length);
-    assert(token.srcText == pairs[i].tokenText, Format("Scanned '{0}' but expected '{1}'", token.srcText, pairs[i].tokenText));
-    ++i;
-    token = token.next;
-  } while (token.type != TOK.EOF)
-}
-
-unittest
-{
-  Stdout("Testing method Lexer.peek()\n");
-  string sourceText = "unittest { }";
-  auto lx = new Lexer(sourceText, null);
-
-  auto next = lx.head;
-  lx.peek(next);
-  assert(next.type == TOK.Newline);
-  lx.peek(next);
-  assert(next.type == TOK.Unittest);
-  lx.peek(next);
-  assert(next.type == TOK.LBrace);
-  lx.peek(next);
-  assert(next.type == TOK.RBrace);
-  lx.peek(next);
-  assert(next.type == TOK.EOF);
-
-  lx = new Lexer("", null);
-  next = lx.head;
-  lx.peek(next);
-  assert(next.type == TOK.Newline);
-  lx.peek(next);
-  assert(next.type == TOK.EOF);
-}
-
-unittest
-{
-  // Numbers unittest
-  // 0L 0ULi 0_L 0_UL 0x0U 0x0p2 0_Fi 0_e2 0_F 0_i
-  // 0u 0U 0uL 0UL 0L 0LU 0Lu
-  // 0Li 0f 0F 0fi 0Fi 0i
-  // 0b_1_LU 0b1000u
-  // 0x232Lu
-}
-
-/// ASCII character properties table.
-static const int ptable[256] = [
- 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-32, 0, 0x2200, 0, 0, 0, 0, 0x2700, 0, 0, 0, 0, 0, 0, 0, 0,
- 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0x3f00,
- 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0x5c00, 0, 0,16,
- 0, 0x70c, 0x80c,12,12,12, 0xc0c, 8, 8, 8, 8, 8, 8, 8, 0xa08, 8,
- 8, 8, 0xd08, 8, 0x908, 8, 0xb08, 8, 8, 8, 8, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-];
-
-enum CProperty
-{
-       Octal = 1,
-       Digit = 1<<1,
-         Hex = 1<<2,
-       Alpha = 1<<3,
-  Underscore = 1<<4,
-  Whitespace = 1<<5
-}
-
-const uint EVMask = 0xFF00; // Bit mask for escape value
-
-private alias CProperty CP;
-int isoctal(char c) { return ptable[c] & CP.Octal; }
-int isdigit(char c) { return ptable[c] & CP.Digit; }
-int ishexad(char c) { return ptable[c] & CP.Hex; }
-int isalpha(char c) { return ptable[c] & CP.Alpha; }
-int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); }
-int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); }
-int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); }
-int isspace(char c) { return ptable[c] & CP.Whitespace; }
-int char2ev(char c) { return ptable[c] >> 8; /*(ptable[c] & EVMask) >> 8;*/ }
-int isascii(uint c) { return c < 128; }
-
-version(gen_ptable)
-static this()
-{
-  alias ptable p;
-  assert(p.length == 256);
-  // Initialize character properties table.
-  for (int i; i < p.length; ++i)
-  {
-    p[i] = 0; // Reset
-    if ('0' <= i && i <= '7')
-      p[i] |= CP.Octal;
-    if ('0' <= i && i <= '9')
-      p[i] |= CP.Digit;
-    if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
-      p[i] |= CP.Hex;
-    if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
-      p[i] |= CP.Alpha;
-    if (i == '_')
-      p[i] |= CP.Underscore;
-    if (i == ' ' || i == '\t' || i == '\v' || i == '\f')
-      p[i] |= CP.Whitespace;
-  }
-  // Store escape sequence values in second byte.
-  assert(CProperty.max <= ubyte.max, "character property flags and escape value byte overlap.");
-  p['\''] |= 39 << 8;
-  p['"'] |= 34 << 8;
-  p['?'] |= 63 << 8;
-  p['\\'] |= 92 << 8;
-  p['a'] |= 7 << 8;
-  p['b'] |= 8 << 8;
-  p['f'] |= 12 << 8;
-  p['n'] |= 10 << 8;
-  p['r'] |= 13 << 8;
-  p['t'] |= 9 << 8;
-  p['v'] |= 11 << 8;
-  // Print a formatted array literal.
-  char[] array = "[\n";
-  foreach (i, c; ptable)
-  {
-    array ~= Format((c>255?" 0x{0:x},":"{0,2},"), c) ~ (((i+1) % 16) ? "":"\n");
-  }
-  array[$-2..$] = "\n]";
-  Stdout(array).newline;
-}
--- a/trunk/src/dil/Module.d	Fri Jan 04 23:00:38 2008 +0100
+++ b/trunk/src/dil/Module.d	Sat Jan 05 16:53:27 2008 +0100
@@ -8,7 +8,7 @@
 import dil.Declarations;
 import dil.Parser;
 import dil.ImportParser;
-import dil.Lexer;
+import dil.lexer.Lexer;
 import dil.File;
 import dil.Scope;
 import dil.Symbol;
--- a/trunk/src/dil/Parser.d	Fri Jan 04 23:00:38 2008 +0100
+++ b/trunk/src/dil/Parser.d	Sat Jan 05 16:53:27 2008 +0100
@@ -3,7 +3,7 @@
   License: GPL3
 +/
 module dil.Parser;
-import dil.Lexer;
+import dil.lexer.Lexer;
 import dil.SyntaxTree;
 import dil.Token;
 import dil.Messages;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/src/dil/lexer/Lexer.d	Sat Jan 05 16:53:27 2008 +0100
@@ -0,0 +1,2861 @@
+/++
+  Author: Aziz Köksal
+  License: GPL3
++/
+module dil.lexer.Lexer;
+import dil.Token;
+import dil.Information;
+import dil.Keywords;
+import dil.Identifier;
+import dil.Messages;
+import dil.HtmlEntities;
+import dil.CompilerInfo;
+import dil.IdTable;
+import dil.Unicode;
+import tango.stdc.stdlib : strtof, strtod, strtold;
+import tango.stdc.errno : errno, ERANGE;
+import tango.stdc.time : time_t, time, ctime;
+import tango.stdc.string : strlen;
+import common;
+
+public import dil.LexerFuncs;
+
+/++
+  The Lexer analyzes the characters of a source text and
+  produces a doubly-linked list of tokens.
++/
+class Lexer
+{
+  Token* head;      /// The head of the doubly linked token list.
+  Token* tail;      /// The tail of the linked list. Set in scan().
+  Token* token;     /// Points to the current token in the token list.
+  string text;      /// The source text.
+  char[] filePath;  /// Path to the source text.
+  char* p;          /// Points to the current character in the source text.
+  char* end;        /// Points one character past the end of the source text.
+
+  // Members used for error messages:
+  InfoManager infoMan;
+  LexerError[] errors;
+  /// Always points to the beginning of the current line.
+  char* lineBegin;
+//   Token* newline;     /// Current newline token.
+  uint lineNum = 1;   /// Current, actual source text line number.
+  uint lineNum_hline; /// Line number set by #line.
+  uint inTokenString; /// > 0 if inside q{ }
+  char[] errorPath;   /// The path displayed in error messages.
+
+  /++
+    Construct a Lexer object.
+    Params:
+      text     = the UTF-8 source code.
+      filePath = the path to the source code; used for error messages.
+  +/
+  this(string text, string filePath, InfoManager infoMan = null)
+  {
+    this.filePath = this.errorPath = filePath;
+    this.infoMan = infoMan;
+
+    this.text = text;
+    if (text.length == 0 || text[$-1] != 0)
+    {
+      this.text.length = this.text.length + 1;
+      this.text[$-1] = 0;
+    }
+
+    this.p = this.text.ptr;
+    this.end = this.p + this.text.length;
+    this.lineBegin = this.p;
+
+    this.head = new Token;
+    this.head.type = TOK.HEAD;
+    this.head.start = this.head.end = this.p;
+    this.token = this.head;
+    // Add a newline as the first token after the head.
+    auto newline = new Token;
+    newline.type = TOK.Newline;
+    newline.setWhitespaceFlag();
+    newline.start = newline.end = this.p;
+    newline.filePath = this.errorPath;
+    newline.lineNum = 1;
+    newline.lineNum_hline = 0;
+    // Link in.
+    this.token.next = newline;
+    newline.prev = this.token;
+    this.token = newline;
+//     this.newline = newline;
+    scanShebang();
+  }
+
+  ~this()
+  {
+    auto token = head.next;
+    while (token !is null)
+    {
+      assert(token.type == TOK.EOF ? token == tail && token.next is null : 1);
+      delete token.prev;
+      token = token.next;
+    }
+    delete tail;
+  }
+
+  /++
+    The "shebang" may optionally appear once at the beginning of a file.
+    Regexp: #![^\EndOfLine]*
+  +/
+  void scanShebang()
+  {
+    if (*p == '#' && p[1] == '!')
+    {
+      auto t = new Token;
+      t.type = TOK.Shebang;
+      t.setWhitespaceFlag();
+      t.start = p;
+      ++p;
+      while (!isEndOfLine(++p))
+        isascii(*p) || decodeUTF8();
+      t.end = p;
+      this.token.next = t;
+      t.prev = this.token;
+    }
+  }
+
+  void finalizeSpecialToken(ref Token t)
+  {
+    assert(t.srcText[0..2] == "__");
+    switch (t.type)
+    {
+    case TOK.FILE:
+      t.str = this.errorPath;
+      break;
+    case TOK.LINE:
+      t.uint_ = this.errorLineNumber(this.lineNum);
+      break;
+    case TOK.DATE,
+         TOK.TIME,
+         TOK.TIMESTAMP:
+      time_t time_val;
+      time(&time_val);
+      char* str = ctime(&time_val);
+      char[] time_str = str[0 .. strlen(str)];
+      switch (t.type)
+      {
+      case TOK.DATE:
+        time_str = time_str[4..11] ~ time_str[20..24] ~ \0; break;
+      case TOK.TIME:
+        time_str = time_str[11..19] ~ \0; break;
+      case TOK.TIMESTAMP:
+        time_str = time_str[0..24] ~ \0; break;
+      default: assert(0);
+      }
+      t.str = time_str;
+      break;
+    case TOK.VENDOR:
+      t.str = VENDOR;
+      break;
+    case TOK.VERSION:
+      t.uint_ = VERSION_MAJOR*1000 + VERSION_MINOR;
+      break;
+    default:
+      assert(0);
+    }
+  }
+
+  private void setLineBegin(char* p)
+  {
+    // Check that we can look behind one character.
+    assert((p-1) >= text.ptr && p < end);
+    // Check that previous character is a newline.
+    assert(isNewlineEnd(p - 1));
+    this.lineBegin = p;
+  }
+
+  private void scanNext(ref Token* t)
+  {
+    assert(t !is null);
+    if (t.next)
+    {
+      t = t.next;
+//       if (t.type == TOK.Newline)
+//         this.newline = t;
+    }
+    else if (t != this.tail)
+    {
+      Token* new_t = new Token;
+      scan(*new_t);
+      new_t.prev = t;
+      t.next = new_t;
+      t = new_t;
+    }
+  }
+
+  /// Advance t one token forward.
+  void peek(ref Token* t)
+  {
+    scanNext(t);
+  }
+
+  /// Advance to the next token in the source text.
+  TOK nextToken()
+  {
+    scanNext(this.token);
+    return this.token.type;
+  }
+
+  /// Returns true if p points to the last character of a Newline.
+  bool isNewlineEnd(char* p)
+  {
+    if (*p == '\n' || *p == '\r')
+      return true;
+    if (*p == LS[2] || *p == PS[2])
+      if ((p-2) >= text.ptr)
+        if (p[-1] == LS[1] && p[-2] == LS[0])
+          return true;
+    return false;
+  }
+
+  /++
+    This is the old scan method.
+    TODO: profile old and new to see which one is faster.
+  +/
+  public void scan(ref Token t)
+  in
+  {
+    assert(text.ptr <= p && p < end);
+  }
+  out
+  {
+    assert(text.ptr <= t.start && t.start < end, Token.toString(t.type));
+    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.type));
+  }
+  body
+  {
+    // Scan whitespace.
+    if (isspace(*p))
+    {
+      t.ws = p;
+      while (isspace(*++p))
+      {}
+    }
+
+    // Scan a token.
+    uint c = *p;
+    {
+      t.start = p;
+      // Newline.
+      switch (*p)
+      {
+      case '\r':
+        if (p[1] == '\n')
+          ++p;
+      case '\n':
+        assert(isNewlineEnd(p));
+        ++p;
+        ++lineNum;
+        setLineBegin(p);
+//         this.newline = &t;
+        t.type = TOK.Newline;
+        t.setWhitespaceFlag();
+        t.filePath = this.errorPath;
+        t.lineNum = lineNum;
+        t.lineNum_hline = lineNum_hline;
+        t.end = p;
+        return;
+      default:
+        if (isUnicodeNewline(p))
+        {
+          ++p; ++p;
+          goto case '\n';
+        }
+      }
+      // Identifier or string literal.
+      if (isidbeg(c))
+      {
+        if (c == 'r' && p[1] == '"' && ++p)
+          return scanRawStringLiteral(t);
+        if (c == 'x' && p[1] == '"')
+          return scanHexStringLiteral(t);
+      version(D2)
+      {
+        if (c == 'q' && p[1] == '"')
+          return scanDelimitedStringLiteral(t);
+        if (c == 'q' && p[1] == '{')
+          return scanTokenStringLiteral(t);
+      }
+        // Scan identifier.
+      Lidentifier:
+        do
+        { c = *++p; }
+        while (isident(c) || !isascii(c) && isUnicodeAlpha())
+
+        t.end = p;
+
+        auto id = IdTable.lookup(t.srcText);
+        t.type = id.type;
+        t.ident = id;
+
+        if (t.type == TOK.Identifier || t.isKeyword)
+          return;
+        else if (t.isSpecialToken)
+          finalizeSpecialToken(t);
+        else if (t.type == TOK.EOF)
+        {
+          tail = &t;
+          assert(t.srcText == "__EOF__");
+        }
+        else
+          assert(0, "unexpected token type: " ~ Token.toString(t.type));
+        return;
+      }
+
+      if (isdigit(c))
+        return scanNumber(t);
+
+      if (c == '/')
+      {
+        c = *++p;
+        switch(c)
+        {
+        case '=':
+          ++p;
+          t.type = TOK.DivAssign;
+          t.end = p;
+          return;
+        case '+':
+          return scanNestedComment(t);
+        case '*':
+          return scanBlockComment(t);
+        case '/':
+          while (!isEndOfLine(++p))
+            isascii(*p) || decodeUTF8();
+          t.type = TOK.Comment;
+          t.setWhitespaceFlag();
+          t.end = p;
+          return;
+        default:
+          t.type = TOK.Div;
+          t.end = p;
+          return;
+        }
+      }
+
+      switch (c)
+      {
+      case '\'':
+        return scanCharacterLiteral(t);
+      case '`':
+        return scanRawStringLiteral(t);
+      case '"':
+        return scanNormalStringLiteral(t);
+      case '\\':
+        char[] buffer;
+        do
+        {
+          c = scanEscapeSequence();
+          if (isascii(c))
+            buffer ~= c;
+          else
+            encodeUTF8(buffer, c);
+        } while (*p == '\\')
+        buffer ~= 0;
+        t.type = TOK.String;
+        t.str = buffer;
+        t.end = p;
+        return;
+      case '>': /* >  >=  >>  >>=  >>>  >>>= */
+        c = *++p;
+        switch (c)
+        {
+        case '=':
+          t.type = TOK.GreaterEqual;
+          goto Lcommon;
+        case '>':
+          if (p[1] == '>')
+          {
+            ++p;
+            if (p[1] == '=')
+            { ++p;
+              t.type = TOK.URShiftAssign;
+            }
+            else
+              t.type = TOK.URShift;
+          }
+          else if (p[1] == '=')
+          {
+            ++p;
+            t.type = TOK.RShiftAssign;
+          }
+          else
+            t.type = TOK.RShift;
+          goto Lcommon;
+        default:
+          t.type = TOK.Greater;
+          goto Lcommon2;
+        }
+        assert(0);
+      case '<': /* <  <=  <>  <>=  <<  <<= */
+        c = *++p;
+        switch (c)
+        {
+        case '=':
+          t.type = TOK.LessEqual;
+          goto Lcommon;
+        case '<':
+          if (p[1] == '=') {
+            ++p;
+            t.type = TOK.LShiftAssign;
+          }
+          else
+            t.type = TOK.LShift;
+          goto Lcommon;
+        case '>':
+          if (p[1] == '=') {
+            ++p;
+            t.type = TOK.LorEorG;
+          }
+          else
+            t.type = TOK.LorG;
+          goto Lcommon;
+        default:
+          t.type = TOK.Less;
+          goto Lcommon2;
+        }
+        assert(0);
+      case '!': /* !  !<  !>  !<=  !>=  !<>  !<>= */
+        c = *++p;
+        switch (c)
+        {
+        case '<':
+          c = *++p;
+          if (c == '>')
+          {
+            if (p[1] == '=') {
+              ++p;
+              t.type = TOK.Unordered;
+            }
+            else
+              t.type = TOK.UorE;
+          }
+          else if (c == '=')
+          {
+            t.type = TOK.UorG;
+          }
+          else {
+            t.type = TOK.UorGorE;
+            goto Lcommon2;
+          }
+          goto Lcommon;
+        case '>':
+          if (p[1] == '=')
+          {
+            ++p;
+            t.type = TOK.UorL;
+          }
+          else
+            t.type = TOK.UorLorE;
+          goto Lcommon;
+        case '=':
+          t.type = TOK.NotEqual;
+          goto Lcommon;
+        default:
+          t.type = TOK.Not;
+          goto Lcommon2;
+        }
+        assert(0);
+      case '.': /* .  .[0-9]  ..  ... */
+        if (p[1] == '.')
+        {
+          ++p;
+          if (p[1] == '.') {
+            ++p;
+            t.type = TOK.Ellipses;
+          }
+          else
+            t.type = TOK.Slice;
+        }
+        else if (isdigit(p[1]))
+        {
+          return scanReal(t);
+        }
+        else
+          t.type = TOK.Dot;
+        goto Lcommon;
+      case '|': /* |  ||  |= */
+        c = *++p;
+        if (c == '=')
+          t.type = TOK.OrAssign;
+        else if (c == '|')
+          t.type = TOK.OrLogical;
+        else {
+          t.type = TOK.OrBinary;
+          goto Lcommon2;
+        }
+        goto Lcommon;
+      case '&': /* &  &&  &= */
+        c = *++p;
+        if (c == '=')
+          t.type = TOK.AndAssign;
+        else if (c == '&')
+          t.type = TOK.AndLogical;
+        else {
+          t.type = TOK.AndBinary;
+          goto Lcommon2;
+        }
+        goto Lcommon;
+      case '+': /* +  ++  += */
+        c = *++p;
+        if (c == '=')
+          t.type = TOK.PlusAssign;
+        else if (c == '+')
+          t.type = TOK.PlusPlus;
+        else {
+          t.type = TOK.Plus;
+          goto Lcommon2;
+        }
+        goto Lcommon;
+      case '-': /* -  --  -= */
+        c = *++p;
+        if (c == '=')
+          t.type = TOK.MinusAssign;
+        else if (c == '-')
+          t.type = TOK.MinusMinus;
+        else {
+          t.type = TOK.Minus;
+          goto Lcommon2;
+        }
+        goto Lcommon;
+      case '=': /* =  == */
+        if (p[1] == '=') {
+          ++p;
+          t.type = TOK.Equal;
+        }
+        else
+          t.type = TOK.Assign;
+        goto Lcommon;
+      case '~': /* ~  ~= */
+         if (p[1] == '=') {
+           ++p;
+           t.type = TOK.CatAssign;
+         }
+         else
+           t.type = TOK.Tilde;
+         goto Lcommon;
+      case '*': /* *  *= */
+         if (p[1] == '=') {
+           ++p;
+           t.type = TOK.MulAssign;
+         }
+         else
+           t.type = TOK.Mul;
+         goto Lcommon;
+      case '^': /* ^  ^= */
+         if (p[1] == '=') {
+           ++p;
+           t.type = TOK.XorAssign;
+         }
+         else
+           t.type = TOK.Xor;
+         goto Lcommon;
+      case '%': /* %  %= */
+         if (p[1] == '=') {
+           ++p;
+           t.type = TOK.ModAssign;
+         }
+         else
+           t.type = TOK.Mod;
+         goto Lcommon;
+      // Single character tokens:
+      case '(':
+        t.type = TOK.LParen;
+        goto Lcommon;
+      case ')':
+        t.type = TOK.RParen;
+        goto Lcommon;
+      case '[':
+        t.type = TOK.LBracket;
+        goto Lcommon;
+      case ']':
+        t.type = TOK.RBracket;
+        goto Lcommon;
+      case '{':
+        t.type = TOK.LBrace;
+        goto Lcommon;
+      case '}':
+        t.type = TOK.RBrace;
+        goto Lcommon;
+      case ':':
+        t.type = TOK.Colon;
+        goto Lcommon;
+      case ';':
+        t.type = TOK.Semicolon;
+        goto Lcommon;
+      case '?':
+        t.type = TOK.Question;
+        goto Lcommon;
+      case ',':
+        t.type = TOK.Comma;
+        goto Lcommon;
+      case '$':
+        t.type = TOK.Dollar;
+      Lcommon:
+        ++p;
+      Lcommon2:
+        t.end = p;
+        return;
+      case '#':
+        return scanSpecialTokenSequence(t);
+      default:
+      }
+
+      // Check for EOF
+      if (isEOF(c))
+      {
+        assert(isEOF(*p), ""~*p);
+        t.type = TOK.EOF;
+        t.end = p;
+        tail = &t;
+        assert(t.start == t.end);
+        return;
+      }
+
+      if (!isascii(c))
+      {
+        c = decodeUTF8();
+        if (isUniAlpha(c))
+          goto Lidentifier;
+      }
+
+      error(t.start, MID.IllegalCharacter, cast(dchar)c);
+
+      ++p;
+      t.type = TOK.Illegal;
+      t.setWhitespaceFlag();
+      t.dchar_ = c;
+      t.end = p;
+      return;
+    }
+  }
+
+  template toUint(char[] T)
+  {
+    static assert(0 < T.length && T.length <= 4);
+    static if (T.length == 1)
+      const uint toUint = T[0];
+    else
+      const uint toUint = (T[0] << ((T.length-1)*8)) | toUint!(T[1..$]);
+  }
+  static assert(toUint!("\xAA\xBB\xCC\xDD") == 0xAABBCCDD);
+
+  // Can't use this yet due to a bug in DMD (bug id=1534).
+  template case_(char[] str, TOK tok, char[] label)
+  {
+    const char[] case_ =
+      `case `~toUint!(str).stringof~`:
+
+         goto `~label~`;`;
+  }
+
+  template case_L4(char[] str, TOK tok)
+  {
+    const char[] case_L4 = case_!(str, tok, "Lcommon_4");
+  }
+
+  template case_L3(char[] str, TOK tok)
+  {
+    const char[] case_L3 = case_!(str, tok, "Lcommon_3");
+  }
+
+  template case_L2(char[] str, TOK tok)
+  {
+    const char[] case_L2 = case_!(str, tok, "Lcommon_2");
+  }
+
+  template case_L1(char[] str, TOK tok)
+  {
+    const char[] case_L3 = case_!(str, tok, "Lcommon");
+  }
+
+  public void scan_(ref Token t)
+  in
+  {
+    assert(text.ptr <= p && p < end);
+  }
+  out
+  {
+    assert(text.ptr <= t.start && t.start < end, Token.toString(t.type));
+    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.type));
+  }
+  body
+  {
+    // Scan whitespace.
+    if (isspace(*p))
+    {
+      t.ws = p;
+      while (isspace(*++p))
+      {}
+    }
+
+    // Scan a token.
+    t.start = p;
+    // Newline.
+    switch (*p)
+    {
+    case '\r':
+      if (p[1] == '\n')
+        ++p;
+    case '\n':
+      assert(isNewlineEnd(p));
+      ++p;
+      ++lineNum;
+      setLineBegin(p);
+//       this.newline = &t;
+      t.type = TOK.Newline;
+      t.setWhitespaceFlag();
+      t.filePath = this.errorPath;
+      t.lineNum = lineNum;
+      t.lineNum_hline = lineNum_hline;
+      t.end = p;
+      return;
+    default:
+      if (isUnicodeNewline(p))
+      {
+        ++p; ++p;
+        goto case '\n';
+      }
+    }
+
+    uint c = *p;
+    assert(end - p != 0);
+    switch (end - p)
+    {
+    case 1:
+      goto L1character;
+    case 2:
+      c <<= 8; c |= p[1];
+      goto L2characters;
+    case 3:
+      c <<= 8; c |= p[1]; c <<= 8; c |= p[2];
+      goto L3characters;
+    default:
+      version(BigEndian)
+        c = *cast(uint*)p;
+      else
+      {
+        c <<= 8; c |= p[1]; c <<= 8; c |= p[2]; c <<= 8; c |= p[3];
+        /+
+        c = *cast(uint*)p;
+        asm
+        {
+          mov EDX, c;
+          bswap EDX;
+          mov c, EDX;
+        }
+        +/
+      }
+    }
+
+    // 4 character tokens.
+    switch (c)
+    {
+    case toUint!(">>>="):
+      t.type = TOK.RShiftAssign;
+      goto Lcommon_4;
+    case toUint!("!<>="):
+      t.type = TOK.Unordered;
+    Lcommon_4:
+      p += 4;
+      t.end = p;
+      return;
+    default:
+    }
+
+    c >>>= 8;
+  L3characters:
+    assert(p == t.start);
+    // 3 character tokens.
+    switch (c)
+    {
+    case toUint!(">>="):
+      t.type = TOK.RShiftAssign;
+      goto Lcommon_3;
+    case toUint!(">>>"):
+      t.type = TOK.URShift;
+      goto Lcommon_3;
+    case toUint!("<>="):
+      t.type = TOK.LorEorG;
+      goto Lcommon_3;
+    case toUint!("<<="):
+      t.type = TOK.LShiftAssign;
+      goto Lcommon_3;
+    case toUint!("!<="):
+      t.type = TOK.UorG;
+      goto Lcommon_3;
+    case toUint!("!>="):
+      t.type = TOK.UorL;
+      goto Lcommon_3;
+    case toUint!("!<>"):
+      t.type = TOK.UorE;
+      goto Lcommon_3;
+    case toUint!("..."):
+      t.type = TOK.Ellipses;
+    Lcommon_3:
+      p += 3;
+      t.end = p;
+      return;
+    default:
+    }
+
+    c >>>= 8;
+  L2characters:
+    assert(p == t.start);
+    // 2 character tokens.
+    switch (c)
+    {
+    case toUint!("/+"):
+      ++p; // Skip /
+      return scanNestedComment(t);
+    case toUint!("/*"):
+      ++p; // Skip /
+      return scanBlockComment(t);
+    case toUint!("//"):
+      ++p; // Skip /
+      assert(*p == '/');
+      while (!isEndOfLine(++p))
+        isascii(*p) || decodeUTF8();
+      t.type = TOK.Comment;
+      t.setWhitespaceFlag();
+      t.end = p;
+      return;
+    case toUint!(">="):
+      t.type = TOK.GreaterEqual;
+      goto Lcommon_2;
+    case toUint!(">>"):
+      t.type = TOK.RShift;
+      goto Lcommon_2;
+    case toUint!("<<"):
+      t.type = TOK.LShift;
+      goto Lcommon_2;
+    case toUint!("<="):
+      t.type = TOK.LessEqual;
+      goto Lcommon_2;
+    case toUint!("<>"):
+      t.type = TOK.LorG;
+      goto Lcommon_2;
+    case toUint!("!<"):
+      t.type = TOK.UorGorE;
+      goto Lcommon_2;
+    case toUint!("!>"):
+      t.type = TOK.UorLorE;
+      goto Lcommon_2;
+    case toUint!("!="):
+      t.type = TOK.NotEqual;
+      goto Lcommon_2;
+    case toUint!(".."):
+      t.type = TOK.Slice;
+      goto Lcommon_2;
+    case toUint!("&&"):
+      t.type = TOK.AndLogical;
+      goto Lcommon_2;
+    case toUint!("&="):
+      t.type = TOK.AndAssign;
+      goto Lcommon_2;
+    case toUint!("||"):
+      t.type = TOK.OrLogical;
+      goto Lcommon_2;
+    case toUint!("|="):
+      t.type = TOK.OrAssign;
+      goto Lcommon_2;
+    case toUint!("++"):
+      t.type = TOK.PlusPlus;
+      goto Lcommon_2;
+    case toUint!("+="):
+      t.type = TOK.PlusAssign;
+      goto Lcommon_2;
+    case toUint!("--"):
+      t.type = TOK.MinusMinus;
+      goto Lcommon_2;
+    case toUint!("-="):
+      t.type = TOK.MinusAssign;
+      goto Lcommon_2;
+    case toUint!("=="):
+      t.type = TOK.Equal;
+      goto Lcommon_2;
+    case toUint!("~="):
+      t.type = TOK.CatAssign;
+      goto Lcommon_2;
+    case toUint!("*="):
+      t.type = TOK.MulAssign;
+      goto Lcommon_2;
+    case toUint!("/="):
+      t.type = TOK.DivAssign;
+      goto Lcommon_2;
+    case toUint!("^="):
+      t.type = TOK.XorAssign;
+      goto Lcommon_2;
+    case toUint!("%="):
+      t.type = TOK.ModAssign;
+    Lcommon_2:
+      p += 2;
+      t.end = p;
+      return;
+    default:
+    }
+
+    c >>>= 8;
+  L1character:
+    assert(p == t.start);
+    assert(*p == c, Format("p={0},c={1}", *p, cast(dchar)c));
+    // 1 character tokens.
+    // TODO: consider storing the token type in ptable.
+    switch (c)
+    {
+    case '\'':
+      return scanCharacterLiteral(t);
+    case '`':
+      return scanRawStringLiteral(t);
+    case '"':
+      return scanNormalStringLiteral(t);
+    case '\\':
+      char[] buffer;
+      do
+      {
+        c = scanEscapeSequence();
+        if (isascii(c))
+          buffer ~= c;
+        else
+          encodeUTF8(buffer, c);
+      } while (*p == '\\')
+      buffer ~= 0;
+      t.type = TOK.String;
+      t.str = buffer;
+      t.end = p;
+      return;
+    case '<':
+      t.type = TOK.Greater;
+      goto Lcommon;
+    case '>':
+      t.type = TOK.Less;
+      goto Lcommon;
+    case '^':
+      t.type = TOK.Xor;
+      goto Lcommon;
+    case '!':
+      t.type = TOK.Not;
+      goto Lcommon;
+    case '.':
+      if (isdigit(p[1]))
+        return scanReal(t);
+      t.type = TOK.Dot;
+      goto Lcommon;
+    case '&':
+      t.type = TOK.AndBinary;
+      goto Lcommon;
+    case '|':
+      t.type = TOK.OrBinary;
+      goto Lcommon;
+    case '+':
+      t.type = TOK.Plus;
+      goto Lcommon;
+    case '-':
+      t.type = TOK.Minus;
+      goto Lcommon;
+    case '=':
+      t.type = TOK.Assign;
+      goto Lcommon;
+    case '~':
+      t.type = TOK.Tilde;
+      goto Lcommon;
+    case '*':
+      t.type = TOK.Mul;
+      goto Lcommon;
+    case '/':
+      t.type = TOK.Div;
+      goto Lcommon;
+    case '%':
+      t.type = TOK.Mod;
+      goto Lcommon;
+    case '(':
+      t.type = TOK.LParen;
+      goto Lcommon;
+    case ')':
+      t.type = TOK.RParen;
+      goto Lcommon;
+    case '[':
+      t.type = TOK.LBracket;
+      goto Lcommon;
+    case ']':
+      t.type = TOK.RBracket;
+      goto Lcommon;
+    case '{':
+      t.type = TOK.LBrace;
+      goto Lcommon;
+    case '}':
+      t.type = TOK.RBrace;
+      goto Lcommon;
+    case ':':
+      t.type = TOK.Colon;
+      goto Lcommon;
+    case ';':
+      t.type = TOK.Semicolon;
+      goto Lcommon;
+    case '?':
+      t.type = TOK.Question;
+      goto Lcommon;
+    case ',':
+      t.type = TOK.Comma;
+      goto Lcommon;
+    case '$':
+      t.type = TOK.Dollar;
+    Lcommon:
+      ++p;
+      t.end = p;
+      return;
+    case '#':
+      return scanSpecialTokenSequence(t);
+    default:
+    }
+
+    assert(p == t.start);
+    assert(*p == c);
+
+    // TODO: consider moving isidbeg() and isdigit() up.
+    if (isidbeg(c))
+    {
+      if (c == 'r' && p[1] == '"' && ++p)
+        return scanRawStringLiteral(t);
+      if (c == 'x' && p[1] == '"')
+        return scanHexStringLiteral(t);
+    version(D2)
+    {
+      if (c == 'q' && p[1] == '"')
+        return scanDelimitedStringLiteral(t);
+      if (c == 'q' && p[1] == '{')
+        return scanTokenStringLiteral(t);
+    }
+      // Scan identifier.
+    Lidentifier:
+      do
+      { c = *++p; }
+      while (isident(c) || !isascii(c) && isUnicodeAlpha())
+
+      t.end = p;
+
+      auto id = IdTable.lookup(t.srcText);
+      t.type = id.type;
+      t.ident = id;
+
+      if (t.type == TOK.Identifier || t.isKeyword)
+        return;
+      else if (t.isSpecialToken)
+        finalizeSpecialToken(t);
+      else if (t.type == TOK.EOF)
+      {
+        tail = &t;
+        assert(t.srcText == "__EOF__");
+      }
+      else
+        assert(0, "unexpected token type: " ~ Token.toString(t.type));
+      return;
+    }
+
+    if (isdigit(c))
+      return scanNumber(t);
+
+    // Check for EOF
+    if (isEOF(c))
+    {
+      assert(isEOF(*p), *p~"");
+      t.type = TOK.EOF;
+      t.end = p;
+      tail = &t;
+      assert(t.start == t.end);
+      return;
+    }
+
+    if (!isascii(c))
+    {
+      c = decodeUTF8();
+      if (isUniAlpha(c))
+        goto Lidentifier;
+    }
+
+    error(t.start, MID.IllegalCharacter, cast(dchar)c);
+
+    ++p;
+    t.type = TOK.Illegal;
+    t.setWhitespaceFlag();
+    t.dchar_ = c;
+    t.end = p;
+    return;
+  }
+
+  void scanBlockComment(ref Token t)
+  {
+    assert(p[-1] == '/' && *p == '*');
+    auto tokenLineNum = lineNum;
+    auto tokenLineBegin = lineBegin;
+  Loop:
+    while (1)
+    {
+      switch (*++p)
+      {
+      case '*':
+        if (p[1] != '/')
+          continue;
+        p += 2;
+        break Loop;
+      case '\r':
+        if (p[1] == '\n')
+          ++p;
+      case '\n':
+        assert(isNewlineEnd(p));
+        ++lineNum;
+        setLineBegin(p+1);
+        break;
+      default:
+        if (!isascii(*p))
+        {
+          if (isUnicodeNewlineChar(decodeUTF8()))
+            goto case '\n';
+        }
+        else if (isEOF(*p))
+        {
+          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedBlockComment);
+          break Loop;
+        }
+      }
+    }
+    t.type = TOK.Comment;
+    t.setWhitespaceFlag();
+    t.end = p;
+    return;
+  }
+
+  void scanNestedComment(ref Token t)
+  {
+    assert(p[-1] == '/' && *p == '+');
+    auto tokenLineNum = lineNum;
+    auto tokenLineBegin = lineBegin;
+    uint level = 1;
+  Loop:
+    while (1)
+    {
+      switch (*++p)
+      {
+      case '/':
+        if (p[1] == '+')
+          ++p, ++level;
+        continue;
+      case '+':
+        if (p[1] != '/')
+          continue;
+        ++p;
+        if (--level != 0)
+          continue;
+        ++p;
+        break Loop;
+      case '\r':
+        if (p[1] == '\n')
+          ++p;
+      case '\n':
+        assert(isNewlineEnd(p));
+        ++lineNum;
+        setLineBegin(p+1);
+        continue;
+      default:
+        if (!isascii(*p))
+        {
+          if (isUnicodeNewlineChar(decodeUTF8()))
+            goto case '\n';
+        }
+        else if (isEOF(*p))
+        {
+          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedNestedComment);
+          break Loop;
+        }
+      }
+    }
+    t.type = TOK.Comment;
+    t.setWhitespaceFlag();
+    t.end = p;
+    return;
+  }
+
+  char scanPostfix()
+  {
+    assert(p[-1] == '"' || p[-1] == '`' ||
+      { version(D2) return p[-1] == '}';
+               else return 0; }()
+    );
+    switch (*p)
+    {
+    case 'c':
+    case 'w':
+    case 'd':
+      return *p++;
+    default:
+      return 0;
+    }
+    assert(0);
+  }
+
+  void scanNormalStringLiteral(ref Token t)
+  {
+    assert(*p == '"');
+    auto tokenLineNum = lineNum;
+    auto tokenLineBegin = lineBegin;
+    t.type = TOK.String;
+    char[] buffer;
+    uint c;
+    while (1)
+    {
+      c = *++p;
+      switch (c)
+      {
+      case '"':
+        ++p;
+        t.pf = scanPostfix();
+      Lreturn:
+        t.str = buffer ~ '\0';
+        t.end = p;
+        return;
+      case '\\':
+        c = scanEscapeSequence();
+        --p;
+        if (isascii(c))
+          break;
+        encodeUTF8(buffer, c);
+        continue;
+      case '\r':
+        if (p[1] == '\n')
+          ++p;
+      case '\n':
+        assert(isNewlineEnd(p));
+        c = '\n'; // Convert Newline to \n.
+        ++lineNum;
+        setLineBegin(p+1);
+        break;
+      case 0, _Z_:
+        error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedString);
+        goto Lreturn;
+      default:
+        if (!isascii(c))
+        {
+          c = decodeUTF8();
+          if (isUnicodeNewlineChar(c))
+            goto case '\n';
+          encodeUTF8(buffer, c);
+          continue;
+        }
+      }
+      assert(isascii(c));
+      buffer ~= c;
+    }
+    assert(0);
+  }
+
+  void scanCharacterLiteral(ref Token t)
+  {
+    assert(*p == '\'');
+    ++p;
+    t.type = TOK.CharLiteral;
+    switch (*p)
+    {
+    case '\\':
+      t.dchar_ = scanEscapeSequence();
+      break;
+    case '\'':
+      error(t.start, MID.EmptyCharacterLiteral);
+      break;
+    default:
+      if (isEndOfLine(p))
+        break;
+      uint c = *p;
+      if (!isascii(c))
+        c = decodeUTF8();
+      t.dchar_ = c;
+      ++p;
+    }
+
+    if (*p == '\'')
+      ++p;
+    else
+      error(t.start, MID.UnterminatedCharacterLiteral);
+    t.end = p;
+  }
+
+  void scanRawStringLiteral(ref Token t)
+  {
+    assert(*p == '`' || *p == '"' && p[-1] == 'r');
+    auto tokenLineNum = lineNum;
+    auto tokenLineBegin = lineBegin;
+    t.type = TOK.String;
+    uint delim = *p;
+    char[] buffer;
+    uint c;
+    while (1)
+    {
+      c = *++p;
+      switch (c)
+      {
+      case '\r':
+        if (p[1] == '\n')
+          ++p;
+      case '\n':
+        assert(isNewlineEnd(p));
+        c = '\n'; // Convert Newline to '\n'.
+        ++lineNum;
+        setLineBegin(p+1);
+        break;
+      case '`':
+      case '"':
+        if (c == delim)
+        {
+          ++p;
+          t.pf = scanPostfix();
+        Lreturn:
+          t.str = buffer ~ '\0';
+          t.end = p;
+          return;
+        }
+        break;
+      case 0, _Z_:
+        error(tokenLineNum, tokenLineBegin, t.start,
+          delim == 'r' ? MID.UnterminatedRawString : MID.UnterminatedBackQuoteString);
+        goto Lreturn;
+      default:
+        if (!isascii(c))
+        {
+          c = decodeUTF8();
+          if (isUnicodeNewlineChar(c))
+            goto case '\n';
+          encodeUTF8(buffer, c);
+          continue;
+        }
+      }
+      assert(isascii(c));
+      buffer ~= c;
+    }
+    assert(0);
+  }
+
+  void scanHexStringLiteral(ref Token t)
+  {
+    assert(p[0] == 'x' && p[1] == '"');
+    t.type = TOK.String;
+
+    auto tokenLineNum = lineNum;
+    auto tokenLineBegin = lineBegin;
+
+    uint c;
+    ubyte[] buffer;
+    ubyte h; // hex number
+    uint n; // number of hex digits
+
+    ++p;
+    assert(*p == '"');
+    while (1)
+    {
+      c = *++p;
+      switch (c)
+      {
+      case '"':
+        if (n & 1)
+          error(tokenLineNum, tokenLineBegin, t.start, MID.OddNumberOfDigitsInHexString);
+        ++p;
+        t.pf = scanPostfix();
+      Lreturn:
+        t.str = cast(string) (buffer ~= 0);
+        t.end = p;
+        return;
+      case '\r':
+        if (p[1] == '\n')
+          ++p;
+      case '\n':
+        assert(isNewlineEnd(p));
+        ++lineNum;
+        setLineBegin(p+1);
+        continue;
+      default:
+        if (ishexad(c))
+        {
+          if (c <= '9')
+            c -= '0';
+          else if (c <= 'F')
+            c -= 'A' - 10;
+          else
+            c -= 'a' - 10;
+
+          if (n & 1)
+          {
+            h <<= 4;
+            h |= c;
+            buffer ~= h;
+          }
+          else
+            h = cast(ubyte)c;
+          ++n;
+          continue;
+        }
+        else if (isspace(c))
+          continue; // Skip spaces.
+        else if (isEOF(c))
+        {
+          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedHexString);
+          t.pf = 0;
+          goto Lreturn;
+        }
+        else
+        {
+          auto errorAt = p;
+          if (!isascii(c))
+          {
+            c = decodeUTF8();
+            if (isUnicodeNewlineChar(c))
+              goto case '\n';
+          }
+          error(errorAt, MID.NonHexCharInHexString, cast(dchar)c);
+        }
+      }
+    }
+    assert(0);
+  }
+
+version(D2)
+{
+  void scanDelimitedStringLiteral(ref Token t)
+  {
+    assert(p[0] == 'q' && p[1] == '"');
+    t.type = TOK.String;
+
+    auto tokenLineNum = lineNum;
+    auto tokenLineBegin = lineBegin;
+
+    char[] buffer;
+    dchar opening_delim = 0, // 0 if no nested delimiter or '[', '(', '<', '{'
+          closing_delim; // Will be ']', ')', '>', '},
+                         // the first character of an identifier or
+                         // any other Unicode/ASCII character.
+    char[] str_delim; // Identifier delimiter.
+    uint level = 1; // Counter for nestable delimiters.
+
+    ++p; ++p; // Skip q"
+    uint c = *p;
+    switch (c)
+    {
+    case '(':
+      opening_delim = c;
+      closing_delim = ')'; // c + 1
+      break;
+    case '[', '<', '{':
+      opening_delim = c;
+      closing_delim = c + 2; // Get to closing counterpart. Feature of ASCII table.
+      break;
+    default:
+      dchar scanNewline()
+      {
+        switch (*p)
+        {
+        case '\r':
+          if (p[1] == '\n')
+            ++p;
+        case '\n':
+          assert(isNewlineEnd(p));
+          ++p;
+          ++lineNum;
+          setLineBegin(p);
+          return '\n';
+        default:
+          if (isUnicodeNewline(p))
+          {
+            ++p; ++p;
+            goto case '\n';
+          }
+        }
+        return 0;
+      }
+      // Skip leading newlines:
+      while (scanNewline() != 0)
+      {}
+      assert(!isNewline(p));
+
+      char* begin = p;
+      c = *p;
+      closing_delim = c;
+      // TODO: Check for non-printable characters?
+      if (!isascii(c))
+      {
+        closing_delim = decodeUTF8();
+        if (!isUniAlpha(closing_delim))
+          break; // Not an identifier.
+      }
+      else if (!isidbeg(c))
+        break; // Not an identifier.
+
+      // Parse Identifier + EndOfLine
+      do
+      { c = *++p; }
+      while (isident(c) || !isascii(c) && isUnicodeAlpha())
+      // Store identifier
+      str_delim = begin[0..p-begin];
+      // Scan newline
+      if (scanNewline() == '\n')
+        --p; // Go back one because of "c = *++p;" in main loop.
+      else
+      {
+        // TODO: error(p, MID.ExpectedNewlineAfterIdentDelim);
+      }
+    }
+
+    bool checkStringDelim(char* p)
+    {
+      assert(str_delim.length != 0);
+      if (buffer[$-1] == '\n' && // Last character copied to buffer must be '\n'.
+          end-p >= str_delim.length && // Check remaining length.
+          p[0..str_delim.length] == str_delim) // Compare.
+        return true;
+      return false;
+    }
+
+    while (1)
+    {
+      c = *++p;
+      switch (c)
+      {
+      case '\r':
+        if (p[1] == '\n')
+          ++p;
+      case '\n':
+        assert(isNewlineEnd(p));
+        c = '\n'; // Convert Newline to '\n'.
+        ++lineNum;
+        setLineBegin(p+1);
+        break;
+      case 0, _Z_:
+        // TODO: error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedDelimitedString);
+        goto Lreturn3;
+      default:
+        if (!isascii(c))
+        {
+          auto begin = p;
+          c = decodeUTF8();
+          if (isUnicodeNewlineChar(c))
+            goto case '\n';
+          if (c == closing_delim)
+          {
+            if (str_delim.length)
+            {
+              if (checkStringDelim(begin))
+              {
+                p = begin + str_delim.length;
+                goto Lreturn2;
+              }
+            }
+            else
+            {
+              assert(level == 1);
+              --level;
+              goto Lreturn;
+            }
+          }
+          encodeUTF8(buffer, c);
+          continue;
+        }
+        else
+        {
+          if (c == opening_delim)
+            ++level;
+          else if (c == closing_delim)
+          {
+            if (str_delim.length)
+            {
+              if (checkStringDelim(p))
+              {
+                p += str_delim.length;
+                goto Lreturn2;
+              }
+            }
+            else if (--level == 0)
+              goto Lreturn;
+          }
+        }
+      }
+      assert(isascii(c));
+      buffer ~= c;
+    }
+  Lreturn: // Character delimiter.
+    assert(c == closing_delim);
+    assert(level == 0);
+    ++p; // Skip closing delimiter.
+  Lreturn2: // String delimiter.
+    if (*p == '"')
+      ++p;
+    else
+    {
+      // TODO: error(p, MID.ExpectedDblQuoteAfterDelim, str_delim.length ? str_delim : closing_delim~"");
+    }
+
+    t.pf = scanPostfix();
+  Lreturn3: // Error.
+    t.str = buffer ~ '\0';
+    t.end = p;
+  }
+
+  void scanTokenStringLiteral(ref Token t)
+  {
+    assert(p[0] == 'q' && p[1] == '{');
+    t.type = TOK.String;
+
+    auto tokenLineNum = lineNum;
+    auto tokenLineBegin = lineBegin;
+
+    // A guard against changes to particular members:
+    // this.lineNum_hline and this.errorPath
+    ++inTokenString;
+
+    uint lineNum = this.lineNum;
+    uint level = 1;
+
+    ++p; ++p; // Skip q{
+
+    auto prev_t = &t;
+    Token* token;
+    while (1)
+    {
+      token = new Token;
+      scan(*token);
+      // Save the tokens in a doubly linked list.
+      // Could be useful for various tools.
+      token.prev = prev_t;
+      prev_t.next = token;
+      prev_t = token;
+      switch (token.type)
+      {
+      case TOK.LBrace:
+        ++level;
+        continue;
+      case TOK.RBrace:
+        if (--level == 0)
+        {
+          t.tok_str = t.next;
+          t.next = null;
+          break;
+        }
+        continue;
+      case TOK.EOF:
+        // TODO: error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedTokenString);
+        t.tok_str = t.next;
+        t.next = token;
+        break;
+      default:
+        continue;
+      }
+      break; // Exit loop.
+    }
+
+    assert(token.type == TOK.RBrace || token.type == TOK.EOF);
+    assert(token.type == TOK.RBrace && t.next is null ||
+           token.type == TOK.EOF && t.next !is null);
+
+    char[] buffer;
+    // token points to } or EOF
+    if (token.type == TOK.EOF)
+    {
+      t.end = token.start;
+      buffer = t.srcText[2..$].dup ~ '\0';
+    }
+    else
+    {
+      // Assign to buffer before scanPostfix().
+      t.end = p;
+      buffer = t.srcText[2..$-1].dup ~ '\0';
+      t.pf = scanPostfix();
+      t.end = p; // Assign again because of postfix.
+    }
+    // Convert newlines to '\n'.
+    if (lineNum != this.lineNum)
+    {
+      assert(buffer[$-1] == '\0');
+      uint i, j;
+      for (; i < buffer.length; ++i)
+        switch (buffer[i])
+        {
+        case '\r':
+          if (buffer[i+1] == '\n')
+            ++i;
+        case '\n':
+          assert(isNewlineEnd(buffer.ptr + i));
+          buffer[j++] = '\n'; // Convert Newline to '\n'.
+          break;
+        default:
+          if (isUnicodeNewline(buffer.ptr + i))
+          {
+            ++i; ++i;
+            goto case '\n';
+          }
+          buffer[j++] = buffer[i]; // Copy.
+        }
+      buffer.length = j; // Adjust length.
+    }
+    assert(buffer[$-1] == '\0');
+    t.str = buffer;
+
+    --inTokenString;
+  }
+} // version(D2)
+
+  dchar scanEscapeSequence()
+  out(result)
+  { assert(isValidChar(result)); }
+  body
+  {
+    assert(*p == '\\');
+
+    auto sequenceStart = p; // Used for error reporting.
+
+    ++p;
+    uint c = char2ev(*p);
+    if (c)
+    {
+      ++p;
+      return c;
+    }
+
+    uint digits = 2;
+
+    switch (*p)
+    {
+    case 'x':
+      assert(c == 0);
+      while (1)
+      {
+        ++p;
+        if (ishexad(*p))
+        {
+          c *= 16;
+          if (*p <= '9')
+            c += *p - '0';
+          else if (*p <= 'F')
+            c += *p - 'A' + 10;
+          else
+            c += *p - 'a' + 10;
+
+          if (!--digits)
+          {
+            ++p;
+            if (isValidChar(c))
+              return c; // Return valid escape value.
+
+            error(sequenceStart, MID.InvalidUnicodeEscapeSequence, sequenceStart[0..p-sequenceStart]);
+            break;
+          }
+          continue;
+        }
+
+        error(sequenceStart, MID.InsufficientHexDigits);
+        break;
+      }
+      break;
+    case 'u':
+      digits = 4;
+      goto case 'x';
+    case 'U':
+      digits = 8;
+      goto case 'x';
+    default:
+      if (isoctal(*p))
+      {
+        assert(c == 0);
+        c += *p - '0';
+        ++p;
+        if (!isoctal(*p))
+          return c;
+        c *= 8;
+        c += *p - '0';
+        ++p;
+        if (!isoctal(*p))
+          return c;
+        c *= 8;
+        c += *p - '0';
+        ++p;
+        return c; // Return valid escape value.
+      }
+      else if(*p == '&')
+      {
+        if (isalpha(*++p))
+        {
+          auto begin = p;
+          while (isalnum(*++p))
+          {}
+
+          if (*p == ';')
+          {
+            // Pass entity excluding '&' and ';'.
+            c = entity2Unicode(begin[0..p - begin]);
+            ++p; // Skip ;
+            if (c != 0xFFFF)
+              return c; // Return valid escape value.
+            else
+              error(sequenceStart, MID.UndefinedHTMLEntity, sequenceStart[0 .. p - sequenceStart]);
+          }
+          else
+            error(sequenceStart, MID.UnterminatedHTMLEntity, sequenceStart[0 .. p - sequenceStart]);
+        }
+        else
+          error(sequenceStart, MID.InvalidBeginHTMLEntity);
+      }
+      else if (isEndOfLine(p))
+        error(sequenceStart, MID.UndefinedEscapeSequence,
+          isEOF(*p) ? `\EOF` : `\NewLine`);
+      else
+      {
+        char[] str = `\`;
+        if (isascii(c))
+          str ~= *p;
+        else
+          encodeUTF8(str, decodeUTF8());
+        ++p;
+        // TODO: check for unprintable character?
+        error(sequenceStart, MID.UndefinedEscapeSequence, str);
+      }
+    }
+    return REPLACEMENT_CHAR; // Error: return replacement character.
+  }
+
+  /*
+    IntegerLiteral:= (Dec|Hex|Bin|Oct)Suffix?
+    Dec:= (0|[1-9][0-9_]*)
+    Hex:= 0[xX] HexDigits
+    Bin:= 0[bB][01_]+
+    Oct:= 0[0-7_]+
+    Suffix:= (L[uU]?|[uU]L?)
+    HexDigits:= [0-9a-zA-Z_]+
+
+    Invalid: "0b_", "0x_", "._"
+  */
+  void scanNumber(ref Token t)
+  {
+    ulong ulong_;
+    bool overflow;
+    bool isDecimal;
+    size_t digits;
+
+    if (*p != '0')
+      goto LscanInteger;
+    ++p; // skip zero
+    // check for xX bB ...
+    switch (*p)
+    {
+    case 'x','X':
+      goto LscanHex;
+    case 'b','B':
+      goto LscanBinary;
+    case 'L':
+      if (p[1] == 'i')
+        goto LscanReal; // 0Li
+      break; // 0L
+    case '.':
+      if (p[1] == '.')
+        break; // 0..
+      // 0.
+    case 'i','f','F', // Imaginary and float literal suffixes.
+         'e', 'E':    // Float exponent.
+      goto LscanReal;
+    default:
+      if (*p == '_')
+        goto LscanOctal; // 0_
+      else if (isdigit(*p))
+      {
+        if (*p == '8' || *p == '9')
+          goto Loctal_hasDecimalDigits; // 08 or 09
+        else
+          goto Loctal_enter_loop; // 0[0-7]
+      }
+    }
+
+    // Number 0
+    assert(p[-1] == '0');
+    assert(*p != '_' && !isdigit(*p));
+    assert(ulong_ == 0);
+    isDecimal = true;
+    goto Lfinalize;
+
+  LscanInteger:
+    assert(*p != 0 && isdigit(*p));
+    isDecimal = true;
+    goto Lenter_loop_int;
+    while (1)
+    {
+      if (*++p == '_')
+        continue;
+      if (!isdigit(*p))
+        break;
+    Lenter_loop_int:
+      if (ulong_ < ulong.max/10 || (ulong_ == ulong.max/10 && *p <= '5'))
+      {
+        ulong_ *= 10;
+        ulong_ += *p - '0';
+        continue;
+      }
+      // Overflow: skip following digits.
+      overflow = true;
+      while (isdigit(*++p)) {}
+      break;
+    }
+
+    // The number could be a float, so check overflow below.
+    switch (*p)
+    {
+    case '.':
+      if (p[1] != '.')
+        goto LscanReal;
+      break;
+    case 'L':
+      if (p[1] != 'i')
+        break;
+    case 'i', 'f', 'F', 'e', 'E':
+      goto LscanReal;
+    default:
+    }
+
+    if (overflow)
+      error(t.start, MID.OverflowDecimalNumber);
+
+    assert((isdigit(p[-1]) || p[-1] == '_') && !isdigit(*p) && *p != '_');
+    goto Lfinalize;
+
+  LscanHex:
+    assert(digits == 0);
+    assert(*p == 'x' || *p == 'X');
+    while (1)
+    {
+      if (*++p == '_')
+        continue;
+      if (!ishexad(*p))
+        break;
+      ++digits;
+      ulong_ *= 16;
+      if (*p <= '9')
+        ulong_ += *p - '0';
+      else if (*p <= 'F')
+        ulong_ += *p - 'A' + 10;
+      else
+        ulong_ += *p - 'a' + 10;
+    }
+
+    assert(ishexad(p[-1]) || p[-1] == '_' || p[-1] == 'x' || p[-1] == 'X');
+    assert(!ishexad(*p) && *p != '_');
+
+    switch (*p)
+    {
+    case '.':
+      if (p[1] == '.')
+        break;
+    case 'p', 'P':
+      return scanHexReal(t);
+    default:
+    }
+
+    if (digits == 0 || digits > 16)
+      error(t.start, digits == 0 ? MID.NoDigitsInHexNumber : MID.OverflowHexNumber);
+
+    goto Lfinalize;
+
+  LscanBinary:
+    assert(digits == 0);
+    assert(*p == 'b' || *p == 'B');
+    while (1)
+    {
+      if (*++p == '0')
+      {
+        ++digits;
+        ulong_ *= 2;
+      }
+      else if (*p == '1')
+      {
+        ++digits;
+        ulong_ *= 2;
+        ulong_ += *p - '0';
+      }
+      else if (*p == '_')
+        continue;
+      else
+        break;
+    }
+
+    if (digits == 0 || digits > 64)
+      error(t.start, digits == 0 ? MID.NoDigitsInBinNumber : MID.OverflowBinaryNumber);
+
+    assert(p[-1] == '0' || p[-1] == '1' || p[-1] == '_' || p[-1] == 'b' || p[-1] == 'B', p[-1] ~ "");
+    assert( !(*p == '0' || *p == '1' || *p == '_') );
+    goto Lfinalize;
+
+  LscanOctal:
+    assert(*p == '_');
+    while (1)
+    {
+      if (*++p == '_')
+        continue;
+      if (!isoctal(*p))
+        break;
+    Loctal_enter_loop:
+      if (ulong_ < ulong.max/2 || (ulong_ == ulong.max/2 && *p <= '1'))
+      {
+        ulong_ *= 8;
+        ulong_ += *p - '0';
+        continue;
+      }
+      // Overflow: skip following digits.
+      overflow = true;
+      while (isoctal(*++p)) {}
+      break;
+    }
+
+    bool hasDecimalDigits;
+    if (isdigit(*p))
+    {
+    Loctal_hasDecimalDigits:
+      hasDecimalDigits = true;
+      while (isdigit(*++p)) {}
+    }
+
+    // The number could be a float, so check errors below.
+    switch (*p)
+    {
+    case '.':
+      if (p[1] != '.')
+        goto LscanReal;
+      break;
+    case 'L':
+      if (p[1] != 'i')
+        break;
+    case 'i', 'f', 'F', 'e', 'E':
+      goto LscanReal;
+    default:
+    }
+
+    if (hasDecimalDigits)
+      error(t.start, MID.OctalNumberHasDecimals);
+
+    if (overflow)
+      error(t.start, MID.OverflowOctalNumber);
+//     goto Lfinalize;
+
+  Lfinalize:
+    enum Suffix
+    {
+      None     = 0,
+      Unsigned = 1,
+      Long     = 2
+    }
+
+    // Scan optional suffix: L, Lu, LU, u, uL, U or UL.
+    Suffix suffix;
+    while (1)
+    {
+      switch (*p)
+      {
+      case 'L':
+        if (suffix & Suffix.Long)
+          break;
+        suffix |= Suffix.Long;
+        ++p;
+        continue;
+      case 'u', 'U':
+        if (suffix & Suffix.Unsigned)
+          break;
+        suffix |= Suffix.Unsigned;
+        ++p;
+        continue;
+      default:
+        break;
+      }
+      break;
+    }
+
+    // Determine type of Integer.
+    switch (suffix)
+    {
+    case Suffix.None:
+      if (ulong_ & 0x8000_0000_0000_0000)
+      {
+        if (isDecimal)
+          error(t.start, MID.OverflowDecimalSign);
+        t.type = TOK.Uint64;
+      }
+      else if (ulong_ & 0xFFFF_FFFF_0000_0000)
+        t.type = TOK.Int64;
+      else if (ulong_ & 0x8000_0000)
+        t.type = isDecimal ? TOK.Int64 : TOK.Uint32;
+      else
+        t.type = TOK.Int32;
+      break;
+    case Suffix.Unsigned:
+      if (ulong_ & 0xFFFF_FFFF_0000_0000)
+        t.type = TOK.Uint64;
+      else
+        t.type = TOK.Uint32;
+      break;
+    case Suffix.Long:
+      if (ulong_ & 0x8000_0000_0000_0000)
+      {
+        if (isDecimal)
+          error(t.start, MID.OverflowDecimalSign);
+        t.type = TOK.Uint64;
+      }
+      else
+        t.type = TOK.Int64;
+      break;
+    case Suffix.Unsigned | Suffix.Long:
+      t.type = TOK.Uint64;
+      break;
+    default:
+      assert(0);
+    }
+    t.ulong_ = ulong_;
+    t.end = p;
+    return;
+  LscanReal:
+    scanReal(t);
+    return;
+  }
+
+  /*
+    FloatLiteral:= Float[fFL]?i?
+    Float:= DecFloat | HexFloat
+    DecFloat:= ([0-9][0-9_]*[.][0-9_]*DecExponent?) | [.][0-9][0-9_]*DecExponent? | [0-9][0-9_]*DecExponent
+    DecExponent:= [eE][+-]?[0-9][0-9_]*
+    HexFloat:= 0[xX](HexDigits[.]HexDigits | [.][0-9a-zA-Z]HexDigits? | HexDigits)HexExponent
+    HexExponent:= [pP][+-]?[0-9][0-9_]*
+  */
+  void scanReal(ref Token t)
+  {
+    if (*p == '.')
+    {
+      assert(p[1] != '.');
+      // This function was called by scan() or scanNumber().
+      while (isdigit(*++p) || *p == '_') {}
+    }
+    else
+      // This function was called by scanNumber().
+      assert(delegate ()
+        {
+          switch (*p)
+          {
+          case 'L':
+            if (p[1] != 'i')
+              return false;
+          case 'i', 'f', 'F', 'e', 'E':
+            return true;
+          default:
+          }
+          return false;
+        }()
+      );
+
+    // Scan exponent.
+    if (*p == 'e' || *p == 'E')
+    {
+      ++p;
+      if (*p == '-' || *p == '+')
+        ++p;
+      if (isdigit(*p))
+        while (isdigit(*++p) || *p == '_') {}
+      else
+        error(t.start, MID.FloatExpMustStartWithDigit);
+    }
+
+    // Copy whole number and remove underscores from buffer.
+    char[] buffer = t.start[0..p-t.start].dup;
+    uint j;
+    foreach (c; buffer)
+      if (c != '_')
+        buffer[j++] = c;
+    buffer.length = j; // Adjust length.
+    buffer ~= 0; // Terminate for C functions.
+
+    finalizeFloat(t, buffer);
+  }
+
+  void scanHexReal(ref Token t)
+  {
+    assert(*p == '.' || *p == 'p' || *p == 'P');
+    MID mid;
+    if (*p == '.')
+      while (ishexad(*++p) || *p == '_')
+      {}
+    // Decimal exponent is required.
+    if (*p != 'p' && *p != 'P')
+    {
+      mid = MID.HexFloatExponentRequired;
+      goto Lerr;
+    }
+    // Scan exponent
+    assert(*p == 'p' || *p == 'P');
+    ++p;
+    if (*p == '+' || *p == '-')
+      ++p;
+    if (!isdigit(*p))
+    {
+      mid = MID.HexFloatExpMustStartWithDigit;
+      goto Lerr;
+    }
+    while (isdigit(*++p) || *p == '_')
+    {}
+    // Copy whole number and remove underscores from buffer.
+    char[] buffer = t.start[0..p-t.start].dup;
+    uint j;
+    foreach (c; buffer)
+      if (c != '_')
+        buffer[j++] = c;
+    buffer.length = j; // Adjust length.
+    buffer ~= 0; // Terminate for C functions.
+    finalizeFloat(t, buffer);
+    return;
+  Lerr:
+    t.type = TOK.Float32;
+    t.end = p;
+    error(t.start, mid);
+  }
+
+  void finalizeFloat(ref Token t, string buffer)
+  {
+    assert(buffer[$-1] == 0);
+    // Float number is well-formed. Check suffixes and do conversion.
+    switch (*p)
+    {
+    case 'f', 'F':
+      t.type = TOK.Float32;
+      t.float_ = strtof(buffer.ptr, null);
+      ++p;
+      break;
+    case 'L':
+      t.type = TOK.Float80;
+      t.real_ = strtold(buffer.ptr, null);
+      ++p;
+      break;
+    default:
+      t.type = TOK.Float64;
+      t.double_ = strtod(buffer.ptr, null);
+    }
+    if (*p == 'i')
+    {
+      ++p;
+      t.type += 3; // Switch to imaginary counterpart.
+      assert(t.type == TOK.Imaginary32 ||
+             t.type == TOK.Imaginary64 ||
+             t.type == TOK.Imaginary80);
+    }
+    if (errno() == ERANGE)
+      error(t.start, MID.OverflowFloatNumber);
+    t.end = p;
+  }
+
+  /// Scan special token: #line Integer [Filespec] EndOfLine
+  void scanSpecialTokenSequence(ref Token t)
+  {
+    assert(*p == '#');
+    t.type = TOK.HashLine;
+    t.setWhitespaceFlag();
+
+    MID mid;
+    auto errorAtColumn = p;
+
+    ++p;
+    if (p[0] != 'l' || p[1] != 'i' || p[2] != 'n' || p[3] != 'e')
+    {
+      mid = MID.ExpectedIdentifierSTLine;
+      goto Lerr;
+    }
+    p += 3;
+
+    // TODO: #line58"path/file" is legal. Require spaces?
+    //       State.Space could be used for that purpose.
+    enum State
+    { /+Space,+/ Integer, Filespec, End }
+
+    State state = State.Integer;
+
+    while (!isEndOfLine(++p))
+    {
+      if (isspace(*p))
+        continue;
+      if (state == State.Integer)
+      {
+        if (!isdigit(*p))
+        {
+          errorAtColumn = p;
+          mid = MID.ExpectedIntegerAfterSTLine;
+          goto Lerr;
+        }
+        t.tokLineNum = new Token;
+        scan(*t.tokLineNum);
+        if (t.tokLineNum.type != TOK.Int32 && t.tokLineNum.type != TOK.Uint32)
+        {
+          errorAtColumn = t.tokLineNum.start;
+          mid = MID.ExpectedIntegerAfterSTLine;
+          goto Lerr;
+        }
+        --p; // Go one back because scan() advanced p past the integer.
+        state = State.Filespec;
+      }
+      else if (state == State.Filespec)
+      {
+        if (*p != '"')
+        {
+          errorAtColumn = p;
+          mid = MID.ExpectedFilespec;
+          goto Lerr;
+        }
+        t.tokLineFilespec = new Token;
+        t.tokLineFilespec.start = p;
+        t.tokLineFilespec.type = TOK.Filespec;
+        t.tokLineFilespec.setWhitespaceFlag();
+        while (*++p != '"')
+        {
+          if (isEndOfLine(p))
+          {
+            errorAtColumn = t.tokLineFilespec.start;
+            mid = MID.UnterminatedFilespec;
+            t.tokLineFilespec.end = p;
+            goto Lerr;
+          }
+          isascii(*p) || decodeUTF8();
+        }
+        auto start = t.tokLineFilespec.start +1; // +1 skips '"'
+        t.tokLineFilespec.str = start[0 .. p - start];
+        t.tokLineFilespec.end = p + 1;
+        state = State.End;
+      }
+      else/+ if (state == State.End)+/
+      {
+        mid = MID.UnterminatedSpecialToken;
+        goto Lerr;
+      }
+    }
+    assert(isEndOfLine(p));
+
+    if (state == State.Integer)
+    {
+      errorAtColumn = p;
+      mid = MID.ExpectedIntegerAfterSTLine;
+      goto Lerr;
+    }
+
+    // Evaluate #line only when not in token string.
+    if (!inTokenString && t.tokLineNum)
+    {
+      this.lineNum_hline = this.lineNum - t.tokLineNum.uint_ + 1;
+      if (t.tokLineFilespec)
+        this.errorPath = t.tokLineFilespec.str;
+    }
+    t.end = p;
+
+    return;
+  Lerr:
+    t.end = p;
+    error(errorAtColumn, mid);
+  }
+
+  /++
+    Insert an empty dummy token before t.
+    Useful in the parsing phase for representing a node in the AST
+    that doesn't consume an actual token from the source text.
+  +/
+  Token* insertEmptyTokenBefore(Token* t)
+  {
+    assert(t !is null && t.prev !is null);
+    assert(text.ptr <= t.start && t.start < end, Token.toString(t.type));
+    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.type));
+
+    auto prev_t = t.prev;
+    auto new_t = new Token;
+    new_t.type = TOK.Empty;
+    new_t.start = new_t.end = prev_t.end;
+    // Link in new token.
+    prev_t.next = new_t;
+    new_t.prev = prev_t;
+    new_t.next = t;
+    t.prev = new_t;
+    return new_t;
+  }
+
+  uint errorLineNumber(uint lineNum)
+  {
+    return lineNum - this.lineNum_hline;
+  }
+
+  void error(char* columnPos, MID mid, ...)
+  {
+    error_(this.lineNum, this.lineBegin, columnPos, mid, _arguments, _argptr);
+  }
+
+  void error(uint lineNum, char* lineBegin, char* columnPos, MID mid, ...)
+  {
+    error_(lineNum, lineBegin, columnPos, mid, _arguments, _argptr);
+  }
+
+  void error_(uint lineNum, char* lineBegin, char* columnPos, MID mid,
+              TypeInfo[] _arguments, void* _argptr)
+  {
+    lineNum = this.errorLineNumber(lineNum);
+    auto location = new Location(errorPath, lineNum, lineBegin, columnPos);
+    auto msg = Format(_arguments, _argptr, GetMsg(mid));
+    auto error = new LexerError(location, msg);
+    errors ~= error;
+    if (infoMan !is null)
+      infoMan ~= error;
+  }
+
+  Token* getTokens()
+  {
+    while (nextToken() != TOK.EOF)
+    {}
+    return head;
+  }
+
+  /// Scan the whole text until EOF is encountered.
+  void scanAll()
+  {
+    while (nextToken() != TOK.EOF)
+    {}
+  }
+
+  /// HEAD -> Newline -> First Token
+  Token* firstToken()
+  {
+    return this.head.next.next;
+  }
+
+  static void loadKeywords(ref Identifier[string] table)
+  {
+    foreach(k; keywords)
+      table[k.str] = k;
+  }
+
+  /// Returns true if str is a valid D identifier.
+  static bool isIdentifierString(char[] str)
+  {
+    if (str.length == 0 || isdigit(str[0]))
+      return false;
+    size_t idx;
+    do
+    {
+      auto c = dil.Unicode.decode(str, idx);
+      if (c == ERROR_CHAR || !(isident(c) || !isascii(c) && isUniAlpha(c)))
+        return false;
+    } while (idx < str.length)
+    return true;
+  }
+
+  /// Returns true if str is a keyword or a special token (__FILE__, __LINE__ etc.)
+  static bool isReservedIdentifier(char[] str)
+  {
+    if (str.length == 0)
+      return false;
+
+    static Identifier[string] reserved_ids_table;
+    if (reserved_ids_table is null)
+      Lexer.loadKeywords(reserved_ids_table);
+
+    if (!isIdentifierString(str))
+      return false;
+
+    return (str in reserved_ids_table) !is null;
+  }
+
+  /++
+    Returns true if the current character to be decoded is
+    a Unicode alpha character.
+    The current pointer 'p' is not advanced if false is returned.
+  +/
+  bool isUnicodeAlpha()
+  {
+    assert(!isascii(*p), "check for ASCII char before calling decodeUTF8().");
+    char* p = this.p;
+    dchar d = *p;
+    ++p; // Move to second byte.
+    // Error if second byte is not a trail byte.
+    if (!isTrailByte(*p))
+      return false;
+    // Check for overlong sequences.
+    switch (d)
+    {
+    case 0xE0, 0xF0, 0xF8, 0xFC:
+      if ((*p & d) == 0x80)
+        return false;
+    default:
+      if ((d & 0xFE) == 0xC0) // 1100000x
+        return false;
+    }
+    const char[] checkNextByte = "if (!isTrailByte(*++p))"
+                                 "  return false;";
+    const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;";
+    // Decode
+    if ((d & 0b1110_0000) == 0b1100_0000)
+    {
+      d &= 0b0001_1111;
+      mixin(appendSixBits);
+    }
+    else if ((d & 0b1111_0000) == 0b1110_0000)
+    {
+      d &= 0b0000_1111;
+      mixin(appendSixBits ~
+            checkNextByte ~ appendSixBits);
+    }
+    else if ((d & 0b1111_1000) == 0b1111_0000)
+    {
+      d &= 0b0000_0111;
+      mixin(appendSixBits ~
+            checkNextByte ~ appendSixBits ~
+            checkNextByte ~ appendSixBits);
+    }
+    else
+      return false;
+
+    assert(isTrailByte(*p));
+    if (!isValidChar(d) || !isUniAlpha(d))
+      return false;
+    // Only advance pointer if this is a Unicode alpha character.
+    this.p = p;
+    return true;
+  }
+
+  /// Decodes the next UTF-8 sequence.
+  dchar decodeUTF8()
+  {
+    assert(!isascii(*p), "check for ASCII char before calling decodeUTF8().");
+    char* p = this.p;
+    dchar d = *p;
+
+    ++p; // Move to second byte.
+    // Error if second byte is not a trail byte.
+    if (!isTrailByte(*p))
+      goto Lerr2;
+
+    // Check for overlong sequences.
+    switch (d)
+    {
+    case 0xE0, // 11100000 100xxxxx
+         0xF0, // 11110000 1000xxxx
+         0xF8, // 11111000 10000xxx
+         0xFC: // 11111100 100000xx
+      if ((*p & d) == 0x80)
+        goto Lerr;
+    default:
+      if ((d & 0xFE) == 0xC0) // 1100000x
+        goto Lerr;
+    }
+
+    const char[] checkNextByte = "if (!isTrailByte(*++p))"
+                                 "  goto Lerr2;";
+    const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;";
+
+    // Decode
+    if ((d & 0b1110_0000) == 0b1100_0000)
+    { // 110xxxxx 10xxxxxx
+      d &= 0b0001_1111;
+      mixin(appendSixBits);
+    }
+    else if ((d & 0b1111_0000) == 0b1110_0000)
+    { // 1110xxxx 10xxxxxx 10xxxxxx
+      d &= 0b0000_1111;
+      mixin(appendSixBits ~
+            checkNextByte ~ appendSixBits);
+    }
+    else if ((d & 0b1111_1000) == 0b1111_0000)
+    { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+      d &= 0b0000_0111;
+      mixin(appendSixBits ~
+            checkNextByte ~ appendSixBits ~
+            checkNextByte ~ appendSixBits);
+    }
+    else
+      // 5 and 6 byte UTF-8 sequences are not allowed yet.
+      // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+      // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+      goto Lerr;
+
+    assert(isTrailByte(*p));
+
+    if (!isValidChar(d))
+    {
+    Lerr:
+      // Three cases:
+      // *) the UTF-8 sequence was successfully decoded but the resulting
+      //    character is invalid.
+      //    p points to last trail byte in the sequence.
+      // *) the UTF-8 sequence is overlong.
+      //    p points to second byte in the sequence.
+      // *) the UTF-8 sequence has more than 4 bytes or starts with
+      //    a trail byte.
+      //    p points to second byte in the sequence.
+      assert(isTrailByte(*p));
+      // Move to next ASCII character or lead byte of a UTF-8 sequence.
+      while (p < (end-1) && isTrailByte(*p))
+        ++p;
+      --p;
+      assert(!isTrailByte(p[1]));
+    Lerr2:
+      d = REPLACEMENT_CHAR;
+      error(this.p, MID.InvalidUTF8Sequence);
+    }
+
+    this.p = p;
+    return d;
+  }
+
+  static void encodeUTF8(ref char[] str, dchar d)
+  {
+    assert(!isascii(d), "check for ASCII char before calling encodeUTF8().");
+    assert(isValidChar(d), "check if character is valid before calling encodeUTF8().");
+
+    char[6] b = void;
+    if (d < 0x800)
+    {
+      b[0] = 0xC0 | (d >> 6);
+      b[1] = 0x80 | (d & 0x3F);
+      str ~= b[0..2];
+    }
+    else if (d < 0x10000)
+    {
+      b[0] = 0xE0 | (d >> 12);
+      b[1] = 0x80 | ((d >> 6) & 0x3F);
+      b[2] = 0x80 | (d & 0x3F);
+      str ~= b[0..3];
+    }
+    else if (d < 0x200000)
+    {
+      b[0] = 0xF0 | (d >> 18);
+      b[1] = 0x80 | ((d >> 12) & 0x3F);
+      b[2] = 0x80 | ((d >> 6) & 0x3F);
+      b[3] = 0x80 | (d & 0x3F);
+      str ~= b[0..4];
+    }
+    /+ // There are no 5 and 6 byte UTF-8 sequences yet.
+    else if (d < 0x4000000)
+    {
+      b[0] = 0xF8 | (d >> 24);
+      b[1] = 0x80 | ((d >> 18) & 0x3F);
+      b[2] = 0x80 | ((d >> 12) & 0x3F);
+      b[3] = 0x80 | ((d >> 6) & 0x3F);
+      b[4] = 0x80 | (d & 0x3F);
+      str ~= b[0..5];
+    }
+    else if (d < 0x80000000)
+    {
+      b[0] = 0xFC | (d >> 30);
+      b[1] = 0x80 | ((d >> 24) & 0x3F);
+      b[2] = 0x80 | ((d >> 18) & 0x3F);
+      b[3] = 0x80 | ((d >> 12) & 0x3F);
+      b[4] = 0x80 | ((d >> 6) & 0x3F);
+      b[5] = 0x80 | (d & 0x3F);
+      str ~= b[0..6];
+    }
+    +/
+    else
+     assert(0);
+  }
+}
+
+unittest
+{
+  Stdout("Testing Lexer.\n");
+  struct Pair
+  {
+    char[] tokenText;
+    TOK type;
+  }
+  static Pair[] pairs = [
+    {"#!äöüß",  TOK.Shebang},       {"\n",      TOK.Newline},
+    {"//çay",   TOK.Comment},       {"\n",      TOK.Newline},
+                                    {"&",       TOK.AndBinary},
+    {"/*çağ*/", TOK.Comment},       {"&&",      TOK.AndLogical},
+    {"/+çak+/", TOK.Comment},       {"&=",      TOK.AndAssign},
+    {">",       TOK.Greater},       {"+",       TOK.Plus},
+    {">=",      TOK.GreaterEqual},  {"++",      TOK.PlusPlus},
+    {">>",      TOK.RShift},        {"+=",      TOK.PlusAssign},
+    {">>=",     TOK.RShiftAssign},  {"-",       TOK.Minus},
+    {">>>",     TOK.URShift},       {"--",      TOK.MinusMinus},
+    {">>>=",    TOK.URShiftAssign}, {"-=",      TOK.MinusAssign},
+    {"<",       TOK.Less},          {"=",       TOK.Assign},
+    {"<=",      TOK.LessEqual},     {"==",      TOK.Equal},
+    {"<>",      TOK.LorG},          {"~",       TOK.Tilde},
+    {"<>=",     TOK.LorEorG},       {"~=",      TOK.CatAssign},
+    {"<<",      TOK.LShift},        {"*",       TOK.Mul},
+    {"<<=",     TOK.LShiftAssign},  {"*=",      TOK.MulAssign},
+    {"!",       TOK.Not},           {"/",       TOK.Div},
+    {"!=",      TOK.NotEqual},      {"/=",      TOK.DivAssign},
+    {"!<",      TOK.UorGorE},       {"^",       TOK.Xor},
+    {"!>",      TOK.UorLorE},       {"^=",      TOK.XorAssign},
+    {"!<=",     TOK.UorG},          {"%",       TOK.Mod},
+    {"!>=",     TOK.UorL},          {"%=",      TOK.ModAssign},
+    {"!<>",     TOK.UorE},          {"(",       TOK.LParen},
+    {"!<>=",    TOK.Unordered},     {")",       TOK.RParen},
+    {".",       TOK.Dot},           {"[",       TOK.LBracket},
+    {"..",      TOK.Slice},         {"]",       TOK.RBracket},
+    {"...",     TOK.Ellipses},      {"{",       TOK.LBrace},
+    {"|",       TOK.OrBinary},      {"}",       TOK.RBrace},
+    {"||",      TOK.OrLogical},     {":",       TOK.Colon},
+    {"|=",      TOK.OrAssign},      {";",       TOK.Semicolon},
+    {"?",       TOK.Question},      {",",       TOK.Comma},
+    {"$",       TOK.Dollar},        {"cam",     TOK.Identifier},
+    {"çay",     TOK.Identifier},    {".0",      TOK.Float64},
+    {"0",       TOK.Int32},         {"\n",      TOK.Newline},
+    {"\r",      TOK.Newline},       {"\r\n",    TOK.Newline},
+    {"\u2028",  TOK.Newline},       {"\u2029",  TOK.Newline}
+  ];
+
+  char[] src;
+
+  // Join all token texts into a single string.
+  foreach (i, pair; pairs)
+    if (pair.type == TOK.Comment && pair.tokenText[1] == '/' || // Line comment.
+        pair.type == TOK.Shebang)
+    {
+      assert(pairs[i+1].type == TOK.Newline); // Must be followed by a newline.
+      src ~= pair.tokenText;
+    }
+    else
+      src ~= pair.tokenText ~ " ";
+
+  auto lx = new Lexer(src, "");
+  auto token = lx.getTokens();
+
+  uint i;
+  assert(token == lx.head);
+  assert(token.next.type == TOK.Newline);
+  token = token.next.next;
+  do
+  {
+    assert(i < pairs.length);
+    assert(token.srcText == pairs[i].tokenText, Format("Scanned '{0}' but expected '{1}'", token.srcText, pairs[i].tokenText));
+    ++i;
+    token = token.next;
+  } while (token.type != TOK.EOF)
+}
+
+unittest
+{
+  Stdout("Testing method Lexer.peek()\n");
+  string sourceText = "unittest { }";
+  auto lx = new Lexer(sourceText, null);
+
+  auto next = lx.head;
+  lx.peek(next);
+  assert(next.type == TOK.Newline);
+  lx.peek(next);
+  assert(next.type == TOK.Unittest);
+  lx.peek(next);
+  assert(next.type == TOK.LBrace);
+  lx.peek(next);
+  assert(next.type == TOK.RBrace);
+  lx.peek(next);
+  assert(next.type == TOK.EOF);
+
+  lx = new Lexer("", null);
+  next = lx.head;
+  lx.peek(next);
+  assert(next.type == TOK.Newline);
+  lx.peek(next);
+  assert(next.type == TOK.EOF);
+}
+
+unittest
+{
+  // Numbers unittest
+  // 0L 0ULi 0_L 0_UL 0x0U 0x0p2 0_Fi 0_e2 0_F 0_i
+  // 0u 0U 0uL 0UL 0L 0LU 0Lu
+  // 0Li 0f 0F 0fi 0Fi 0i
+  // 0b_1_LU 0b1000u
+  // 0x232Lu
+}
+
+/// ASCII character properties table.
+static const int ptable[256] = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+32, 0, 0x2200, 0, 0, 0, 0, 0x2700, 0, 0, 0, 0, 0, 0, 0, 0,
+ 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0x3f00,
+ 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0x5c00, 0, 0,16,
+ 0, 0x70c, 0x80c,12,12,12, 0xc0c, 8, 8, 8, 8, 8, 8, 8, 0xa08, 8,
+ 8, 8, 0xd08, 8, 0x908, 8, 0xb08, 8, 8, 8, 8, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+];
+
+enum CProperty
+{
+       Octal = 1,
+       Digit = 1<<1,
+         Hex = 1<<2,
+       Alpha = 1<<3,
+  Underscore = 1<<4,
+  Whitespace = 1<<5
+}
+
+const uint EVMask = 0xFF00; // Bit mask for escape value
+
+private alias CProperty CP;
+int isoctal(char c) { return ptable[c] & CP.Octal; }
+int isdigit(char c) { return ptable[c] & CP.Digit; }
+int ishexad(char c) { return ptable[c] & CP.Hex; }
+int isalpha(char c) { return ptable[c] & CP.Alpha; }
+int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); }
+int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); }
+int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); }
+int isspace(char c) { return ptable[c] & CP.Whitespace; }
+int char2ev(char c) { return ptable[c] >> 8; /*(ptable[c] & EVMask) >> 8;*/ }
+int isascii(uint c) { return c < 128; }
+
+version(gen_ptable)
+static this()
+{
+  alias ptable p;
+  assert(p.length == 256);
+  // Initialize character properties table.
+  for (int i; i < p.length; ++i)
+  {
+    p[i] = 0; // Reset
+    if ('0' <= i && i <= '7')
+      p[i] |= CP.Octal;
+    if ('0' <= i && i <= '9')
+      p[i] |= CP.Digit;
+    if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
+      p[i] |= CP.Hex;
+    if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
+      p[i] |= CP.Alpha;
+    if (i == '_')
+      p[i] |= CP.Underscore;
+    if (i == ' ' || i == '\t' || i == '\v' || i == '\f')
+      p[i] |= CP.Whitespace;
+  }
+  // Store escape sequence values in second byte.
+  assert(CProperty.max <= ubyte.max, "character property flags and escape value byte overlap.");
+  p['\''] |= 39 << 8;
+  p['"'] |= 34 << 8;
+  p['?'] |= 63 << 8;
+  p['\\'] |= 92 << 8;
+  p['a'] |= 7 << 8;
+  p['b'] |= 8 << 8;
+  p['f'] |= 12 << 8;
+  p['n'] |= 10 << 8;
+  p['r'] |= 13 << 8;
+  p['t'] |= 9 << 8;
+  p['v'] |= 11 << 8;
+  // Print a formatted array literal.
+  char[] array = "[\n";
+  foreach (i, c; ptable)
+  {
+    array ~= Format((c>255?" 0x{0:x},":"{0,2},"), c) ~ (((i+1) % 16) ? "":"\n");
+  }
+  array[$-2..$] = "\n]";
+  Stdout(array).newline;
+}
--- a/trunk/src/main.d	Fri Jan 04 23:00:38 2008 +0100
+++ b/trunk/src/main.d	Sat Jan 05 16:53:27 2008 +0100
@@ -5,7 +5,7 @@
 module main;

 import dil.Parser;
-import dil.Lexer;
+import dil.lexer.Lexer;
 import dil.Token;
 import dil.Messages;
 import dil.Settings;