view src/dil/lexer/Lexer.d @ 820:1d06b4aed7cf

Revised code in the first pass. Added code to handle anonymous unions and structs. Hope the idea will work. Added type to class Aggregate and isAnonymous to some other Symbol classes.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Fri, 14 Mar 2008 15:42:08 +0100
parents 372fa4fbbb1d
children d659f7aa055c
line wrap: on
line source

/++
  Author: Aziz Köksal
  License: GPL3
+/
module dil.lexer.Lexer;

import dil.lexer.Token;
import dil.lexer.Keywords;
import dil.lexer.Identifier;
import dil.lexer.IdTable;
import dil.Information;
import dil.Messages;
import dil.HtmlEntities;
import dil.CompilerInfo;
import dil.Unicode;
import dil.SourceText;
import dil.Time;
import common;

import tango.stdc.stdlib : strtof, strtod, strtold;
import tango.stdc.errno : errno, ERANGE;

public import dil.lexer.Funcs;

/// The Lexer analyzes the characters of a source text and
/// produces a doubly-linked list of tokens.
class Lexer
{
  SourceText srcText; /// The source text.
  char* p;            /// Points to the current character in the source text.
  char* end;          /// Points one character past the end of the source text.

  Token* head;  /// The head of the doubly linked token list.
  Token* tail;  /// The tail of the linked list. Set in scan().
  Token* token; /// Points to the current token in the token list.

  // Members used for error messages:
  InfoManager infoMan;
  LexerError[] errors;
  /// Always points to the first character of the current line.
  char* lineBegin;
//   Token* newline;     /// Current newline token.
  uint lineNum = 1;   /// Current, actual source text line number.
  uint lineNum_hline; /// Line number set by #line.
  uint inTokenString; /// > 0 if inside q{ }
  /// Holds the original file path and the modified one (by #line.)
  NewlineData.FilePaths* filePaths;

  /// Construct a Lexer object.
  /// Params:
  ///   srcText = the UTF-8 source code.
  ///   infoMan = used for collecting error messages.
  this(SourceText srcText, InfoManager infoMan = null)
  {
    this.srcText = srcText;
    this.infoMan = infoMan;

    assert(text.length && text[$-1] == 0, "source text has no sentinel character");
    this.p = text.ptr;
    this.end = this.p + text.length;
    this.lineBegin = this.p;

    this.head = new Token;
    this.head.kind = TOK.HEAD;
    this.head.start = this.head.end = this.p;
    this.token = this.head;
    // Initialize this.filePaths.
    newFilePath(this.srcText.filePath);
    // Add a newline as the first token after the head.
    auto newline = new Token;
    newline.kind = TOK.Newline;
    newline.setWhitespaceFlag();
    newline.start = newline.end = this.p;
    newline.newline.filePaths = this.filePaths;
    newline.newline.oriLineNum = 1;
    newline.newline.setLineNum = 0;
    // Link in.
    this.token.next = newline;
    newline.prev = this.token;
    this.token = newline;
//     this.newline = newline;
    scanShebang();
  }

  /// The destructor deletes the doubly-linked token list.
  ~this()
  {
    auto token = head.next;
    while (token !is null)
    {
      assert(token.kind == TOK.EOF ? token == tail && token.next is null : 1);
      delete token.prev;
      token = token.next;
    }
    delete tail;
  }

  char[] text()
  {
    return srcText.data;
  }

  /// The "shebang" may optionally appear once at the beginning of a file.
  /// Regexp: #![^\EndOfLine]*
  void scanShebang()
  {
    if (*p == '#' && p[1] == '!')
    {
      auto t = new Token;
      t.kind = TOK.Shebang;
      t.setWhitespaceFlag();
      t.start = p;
      ++p;
      while (!isEndOfLine(++p))
        isascii(*p) || decodeUTF8();
      t.end = p;
      this.token.next = t;
      t.prev = this.token;
    }
  }

  /// Sets the value of the special token.
  void finalizeSpecialToken(ref Token t)
  {
    assert(t.srcText[0..2] == "__");
    switch (t.kind)
    {
    case TOK.FILE:
      t.str = this.filePaths.setPath;
      break;
    case TOK.LINE:
      t.uint_ = this.errorLineNumber(this.lineNum);
      break;
    case TOK.DATE,
         TOK.TIME,
         TOK.TIMESTAMP:
      auto time_str = Time.toString();
      switch (t.kind)
      {
      case TOK.DATE:
        time_str = Time.month_day(time_str) ~ ' ' ~ Time.year(time_str); break;
      case TOK.TIME:
        time_str = Time.time(time_str); break;
      case TOK.TIMESTAMP:
        break; // time_str is the timestamp.
      default: assert(0);
      }
      time_str ~= '\0'; // Terminate with a zero.
      t.str = time_str;
      break;
    case TOK.VENDOR:
      t.str = VENDOR;
      break;
    case TOK.VERSION:
      t.uint_ = VERSION_MAJOR*1000 + VERSION_MINOR;
      break;
    default:
      assert(0);
    }
  }

  /// Sets a new file path.
  void newFilePath(char[] newPath)
  {
    auto paths = new NewlineData.FilePaths;
    paths.oriPath = this.srcText.filePath;
    paths.setPath = newPath;
    this.filePaths = paths;
  }

  private void setLineBegin(char* p)
  {
    // Check that we can look behind one character.
    assert((p-1) >= text.ptr && p < end);
    // Check that previous character is a newline.
    assert(isNewlineEnd(p - 1));
    this.lineBegin = p;
  }

  /// Scans the next token in the source text.
  ///
  /// Creates a new token if t.next is null and appends it to the list.
  private void scanNext(ref Token* t)
  {
    assert(t !is null);
    if (t.next)
    {
      t = t.next;
//       if (t.kind == TOK.Newline)
//         this.newline = t;
    }
    else if (t != this.tail)
    {
      Token* new_t = new Token;
      scan(*new_t);
      new_t.prev = t;
      t.next = new_t;
      t = new_t;
    }
  }

  /// Advance t one token forward.
  void peek(ref Token* t)
  {
    scanNext(t);
  }

  /// Advance to the next token in the source text.
  TOK nextToken()
  {
    scanNext(this.token);
    return this.token.kind;
  }

  /// Returns true if p points to the last character of a Newline.
  bool isNewlineEnd(char* p)
  {
    if (*p == '\n' || *p == '\r')
      return true;
    if (*p == LS[2] || *p == PS[2])
      if ((p-2) >= text.ptr)
        if (p[-1] == LS[1] && p[-2] == LS[0])
          return true;
    return false;
  }

  /// The main method which recognizes the characters that make up a token.
  ///
  /// Complicated tokens are scanned in separate methods.
  public void scan(ref Token t)
  in
  {
    assert(text.ptr <= p && p < end);
  }
  out
  {
    assert(text.ptr <= t.start && t.start < end, Token.toString(t.kind));
    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.kind));
  }
  body
  {
    // Scan whitespace.
    if (isspace(*p))
    {
      t.ws = p;
      while (isspace(*++p))
      {}
    }

    // Scan a token.
    uint c = *p;
    {
      t.start = p;
      // Newline.
      switch (*p)
      {
      case '\r':
        if (p[1] == '\n')
          ++p;
      case '\n':
        assert(isNewlineEnd(p));
        ++p;
        ++lineNum;
        setLineBegin(p);
//         this.newline = &t;
        t.kind = TOK.Newline;
        t.setWhitespaceFlag();
        t.newline.filePaths = this.filePaths;
        t.newline.oriLineNum = lineNum;
        t.newline.setLineNum = lineNum_hline;
        t.end = p;
        return;
      default:
        if (isUnicodeNewline(p))
        {
          ++p; ++p;
          goto case '\n';
        }
      }
      // Identifier or string literal.
      if (isidbeg(c))
      {
        if (c == 'r' && p[1] == '"' && ++p)
          return scanRawStringLiteral(t);
        if (c == 'x' && p[1] == '"')
          return scanHexStringLiteral(t);
      version(D2)
      {
        if (c == 'q' && p[1] == '"')
          return scanDelimitedStringLiteral(t);
        if (c == 'q' && p[1] == '{')
          return scanTokenStringLiteral(t);
      }
        // Scan identifier.
      Lidentifier:
        do
        { c = *++p; }
        while (isident(c) || !isascii(c) && isUnicodeAlpha())

        t.end = p;

        auto id = IdTable.lookup(t.srcText);
        t.kind = id.kind;
        t.ident = id;

        if (t.kind == TOK.Identifier || t.isKeyword)
          return;
        else if (t.isSpecialToken)
          finalizeSpecialToken(t);
        else if (t.kind == TOK.EOF)
        {
          tail = &t;
          assert(t.srcText == "__EOF__");
        }
        else
          assert(0, "unexpected token type: " ~ Token.toString(t.kind));
        return;
      }

      if (isdigit(c))
        return scanNumber(t);

      if (c == '/')
      {
        c = *++p;
        switch(c)
        {
        case '=':
          ++p;
          t.kind = TOK.DivAssign;
          t.end = p;
          return;
        case '+':
          return scanNestedComment(t);
        case '*':
          return scanBlockComment(t);
        case '/':
          while (!isEndOfLine(++p))
            isascii(*p) || decodeUTF8();
          t.kind = TOK.Comment;
          t.setWhitespaceFlag();
          t.end = p;
          return;
        default:
          t.kind = TOK.Div;
          t.end = p;
          return;
        }
      }

      switch (c)
      {
      case '\'':
        return scanCharacterLiteral(t);
      case '`':
        return scanRawStringLiteral(t);
      case '"':
        return scanNormalStringLiteral(t);
      case '\\':
        char[] buffer;
        do
        {
          bool isBinary;
          c = scanEscapeSequence(isBinary);
          if (isascii(c) || isBinary)
            buffer ~= c;
          else
            encodeUTF8(buffer, c);
        } while (*p == '\\')
        buffer ~= 0;
        t.kind = TOK.String;
        t.str = buffer;
        t.end = p;
        return;
      case '>': /* >  >=  >>  >>=  >>>  >>>= */
        c = *++p;
        switch (c)
        {
        case '=':
          t.kind = TOK.GreaterEqual;
          goto Lcommon;
        case '>':
          if (p[1] == '>')
          {
            ++p;
            if (p[1] == '=')
            { ++p;
              t.kind = TOK.URShiftAssign;
            }
            else
              t.kind = TOK.URShift;
          }
          else if (p[1] == '=')
          {
            ++p;
            t.kind = TOK.RShiftAssign;
          }
          else
            t.kind = TOK.RShift;
          goto Lcommon;
        default:
          t.kind = TOK.Greater;
          goto Lcommon2;
        }
        assert(0);
      case '<': /* <  <=  <>  <>=  <<  <<= */
        c = *++p;
        switch (c)
        {
        case '=':
          t.kind = TOK.LessEqual;
          goto Lcommon;
        case '<':
          if (p[1] == '=') {
            ++p;
            t.kind = TOK.LShiftAssign;
          }
          else
            t.kind = TOK.LShift;
          goto Lcommon;
        case '>':
          if (p[1] == '=') {
            ++p;
            t.kind = TOK.LorEorG;
          }
          else
            t.kind = TOK.LorG;
          goto Lcommon;
        default:
          t.kind = TOK.Less;
          goto Lcommon2;
        }
        assert(0);
      case '!': /* !  !<  !>  !<=  !>=  !<>  !<>= */
        c = *++p;
        switch (c)
        {
        case '<':
          c = *++p;
          if (c == '>')
          {
            if (p[1] == '=') {
              ++p;
              t.kind = TOK.Unordered;
            }
            else
              t.kind = TOK.UorE;
          }
          else if (c == '=')
          {
            t.kind = TOK.UorG;
          }
          else {
            t.kind = TOK.UorGorE;
            goto Lcommon2;
          }
          goto Lcommon;
        case '>':
          if (p[1] == '=')
          {
            ++p;
            t.kind = TOK.UorL;
          }
          else
            t.kind = TOK.UorLorE;
          goto Lcommon;
        case '=':
          t.kind = TOK.NotEqual;
          goto Lcommon;
        default:
          t.kind = TOK.Not;
          goto Lcommon2;
        }
        assert(0);
      case '.': /* .  .[0-9]  ..  ... */
        if (p[1] == '.')
        {
          ++p;
          if (p[1] == '.') {
            ++p;
            t.kind = TOK.Ellipses;
          }
          else
            t.kind = TOK.Slice;
        }
        else if (isdigit(p[1]))
        {
          return scanReal(t);
        }
        else
          t.kind = TOK.Dot;
        goto Lcommon;
      case '|': /* |  ||  |= */
        c = *++p;
        if (c == '=')
          t.kind = TOK.OrAssign;
        else if (c == '|')
          t.kind = TOK.OrLogical;
        else {
          t.kind = TOK.OrBinary;
          goto Lcommon2;
        }
        goto Lcommon;
      case '&': /* &  &&  &= */
        c = *++p;
        if (c == '=')
          t.kind = TOK.AndAssign;
        else if (c == '&')
          t.kind = TOK.AndLogical;
        else {
          t.kind = TOK.AndBinary;
          goto Lcommon2;
        }
        goto Lcommon;
      case '+': /* +  ++  += */
        c = *++p;
        if (c == '=')
          t.kind = TOK.PlusAssign;
        else if (c == '+')
          t.kind = TOK.PlusPlus;
        else {
          t.kind = TOK.Plus;
          goto Lcommon2;
        }
        goto Lcommon;
      case '-': /* -  --  -= */
        c = *++p;
        if (c == '=')
          t.kind = TOK.MinusAssign;
        else if (c == '-')
          t.kind = TOK.MinusMinus;
        else {
          t.kind = TOK.Minus;
          goto Lcommon2;
        }
        goto Lcommon;
      case '=': /* =  == */
        if (p[1] == '=') {
          ++p;
          t.kind = TOK.Equal;
        }
        else
          t.kind = TOK.Assign;
        goto Lcommon;
      case '~': /* ~  ~= */
         if (p[1] == '=') {
           ++p;
           t.kind = TOK.CatAssign;
         }
         else
           t.kind = TOK.Tilde;
         goto Lcommon;
      case '*': /* *  *= */
         if (p[1] == '=') {
           ++p;
           t.kind = TOK.MulAssign;
         }
         else
           t.kind = TOK.Mul;
         goto Lcommon;
      case '^': /* ^  ^= */
         if (p[1] == '=') {
           ++p;
           t.kind = TOK.XorAssign;
         }
         else
           t.kind = TOK.Xor;
         goto Lcommon;
      case '%': /* %  %= */
         if (p[1] == '=') {
           ++p;
           t.kind = TOK.ModAssign;
         }
         else
           t.kind = TOK.Mod;
         goto Lcommon;
      // Single character tokens:
      case '(':
        t.kind = TOK.LParen;
        goto Lcommon;
      case ')':
        t.kind = TOK.RParen;
        goto Lcommon;
      case '[':
        t.kind = TOK.LBracket;
        goto Lcommon;
      case ']':
        t.kind = TOK.RBracket;
        goto Lcommon;
      case '{':
        t.kind = TOK.LBrace;
        goto Lcommon;
      case '}':
        t.kind = TOK.RBrace;
        goto Lcommon;
      case ':':
        t.kind = TOK.Colon;
        goto Lcommon;
      case ';':
        t.kind = TOK.Semicolon;
        goto Lcommon;
      case '?':
        t.kind = TOK.Question;
        goto Lcommon;
      case ',':
        t.kind = TOK.Comma;
        goto Lcommon;
      case '$':
        t.kind = TOK.Dollar;
      Lcommon:
        ++p;
      Lcommon2:
        t.end = p;
        return;
      case '#':
        return scanSpecialTokenSequence(t);
      default:
      }

      // Check for EOF
      if (isEOF(c))
      {
        assert(isEOF(*p), ""~*p);
        t.kind = TOK.EOF;
        t.end = p;
        tail = &t;
        assert(t.start == t.end);
        return;
      }

      if (!isascii(c))
      {
        c = decodeUTF8();
        if (isUniAlpha(c))
          goto Lidentifier;
      }

      error(t.start, MID.IllegalCharacter, cast(dchar)c);

      ++p;
      t.kind = TOK.Illegal;
      t.setWhitespaceFlag();
      t.dchar_ = c;
      t.end = p;
      return;
    }
  }

  /// Converts a string literal to an integer.
  template toUint(char[] T)
  {
    static assert(0 < T.length && T.length <= 4);
    static if (T.length == 1)
      const uint toUint = T[0];
    else
      const uint toUint = (T[0] << ((T.length-1)*8)) | toUint!(T[1..$]);
  }
  static assert(toUint!("\xAA\xBB\xCC\xDD") == 0xAABBCCDD);

  /// Constructs case statements. E.g.:
  /// ---
  //// // case_!("<", "Less", "Lcommon") ->
  /// case 60u:
  ///   t.kind = TOK.Less;
  ///   goto Lcommon;
  /// ---
  /// Note:Can't use this yet due to a $(DMDBUG 1534, bug) in DMD.
  template case_(char[] str, char[] kind, char[] label)
  {
    const char[] case_ =
      `case `~toUint!(str).stringof~`:`
        `t.kind = TOK.`~kind~`;`
        `goto `~label~`;`;
  }
  //pragma(msg, case_!("<", "Less", "Lcommon"));

  template case_L4(char[] str, TOK kind)
  {
    const char[] case_L4 = case_!(str, kind, "Lcommon_4");
  }

  template case_L3(char[] str, TOK kind)
  {
    const char[] case_L3 = case_!(str, kind, "Lcommon_3");
  }

  template case_L2(char[] str, TOK kind)
  {
    const char[] case_L2 = case_!(str, kind, "Lcommon_2");
  }

  template case_L1(char[] str, TOK kind)
  {
    const char[] case_L3 = case_!(str, kind, "Lcommon");
  }

  /// An alternative scan method.
  /// Profiling shows it's a bit slower.
  public void scan_(ref Token t)
  in
  {
    assert(text.ptr <= p && p < end);
  }
  out
  {
    assert(text.ptr <= t.start && t.start < end, Token.toString(t.kind));
    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.kind));
  }
  body
  {
    // Scan whitespace.
    if (isspace(*p))
    {
      t.ws = p;
      while (isspace(*++p))
      {}
    }

    // Scan a token.
    t.start = p;
    // Newline.
    switch (*p)
    {
    case '\r':
      if (p[1] == '\n')
        ++p;
    case '\n':
      assert(isNewlineEnd(p));
      ++p;
      ++lineNum;
      setLineBegin(p);
//       this.newline = &t;
      t.kind = TOK.Newline;
      t.setWhitespaceFlag();
      t.newline.filePaths = this.filePaths;
      t.newline.oriLineNum = lineNum;
      t.newline.setLineNum = lineNum_hline;
      t.end = p;
      return;
    default:
      if (isUnicodeNewline(p))
      {
        ++p; ++p;
        goto case '\n';
      }
    }

    uint c = *p;
    assert(end - p != 0);
    switch (end - p)
    {
    case 1:
      goto L1character;
    case 2:
      c <<= 8; c |= p[1];
      goto L2characters;
    case 3:
      c <<= 8; c |= p[1]; c <<= 8; c |= p[2];
      goto L3characters;
    default:
      version(BigEndian)
        c = *cast(uint*)p;
      else
      {
        c <<= 8; c |= p[1]; c <<= 8; c |= p[2]; c <<= 8; c |= p[3];
        /+
        c = *cast(uint*)p;
        asm
        {
          mov EDX, c;
          bswap EDX;
          mov c, EDX;
        }
        +/
      }
    }

    // 4 character tokens.
    switch (c)
    {
    case toUint!(">>>="):
      t.kind = TOK.RShiftAssign;
      goto Lcommon_4;
    case toUint!("!<>="):
      t.kind = TOK.Unordered;
    Lcommon_4:
      p += 4;
      t.end = p;
      return;
    default:
    }

    c >>>= 8;
  L3characters:
    assert(p == t.start);
    // 3 character tokens.
    switch (c)
    {
    case toUint!(">>="):
      t.kind = TOK.RShiftAssign;
      goto Lcommon_3;
    case toUint!(">>>"):
      t.kind = TOK.URShift;
      goto Lcommon_3;
    case toUint!("<>="):
      t.kind = TOK.LorEorG;
      goto Lcommon_3;
    case toUint!("<<="):
      t.kind = TOK.LShiftAssign;
      goto Lcommon_3;
    case toUint!("!<="):
      t.kind = TOK.UorG;
      goto Lcommon_3;
    case toUint!("!>="):
      t.kind = TOK.UorL;
      goto Lcommon_3;
    case toUint!("!<>"):
      t.kind = TOK.UorE;
      goto Lcommon_3;
    case toUint!("..."):
      t.kind = TOK.Ellipses;
    Lcommon_3:
      p += 3;
      t.end = p;
      return;
    default:
    }

    c >>>= 8;
  L2characters:
    assert(p == t.start);
    // 2 character tokens.
    switch (c)
    {
    case toUint!("/+"):
      ++p; // Skip /
      return scanNestedComment(t);
    case toUint!("/*"):
      ++p; // Skip /
      return scanBlockComment(t);
    case toUint!("//"):
      ++p; // Skip /
      assert(*p == '/');
      while (!isEndOfLine(++p))
        isascii(*p) || decodeUTF8();
      t.kind = TOK.Comment;
      t.setWhitespaceFlag();
      t.end = p;
      return;
    case toUint!(">="):
      t.kind = TOK.GreaterEqual;
      goto Lcommon_2;
    case toUint!(">>"):
      t.kind = TOK.RShift;
      goto Lcommon_2;
    case toUint!("<<"):
      t.kind = TOK.LShift;
      goto Lcommon_2;
    case toUint!("<="):
      t.kind = TOK.LessEqual;
      goto Lcommon_2;
    case toUint!("<>"):
      t.kind = TOK.LorG;
      goto Lcommon_2;
    case toUint!("!<"):
      t.kind = TOK.UorGorE;
      goto Lcommon_2;
    case toUint!("!>"):
      t.kind = TOK.UorLorE;
      goto Lcommon_2;
    case toUint!("!="):
      t.kind = TOK.NotEqual;
      goto Lcommon_2;
    case toUint!(".."):
      t.kind = TOK.Slice;
      goto Lcommon_2;
    case toUint!("&&"):
      t.kind = TOK.AndLogical;
      goto Lcommon_2;
    case toUint!("&="):
      t.kind = TOK.AndAssign;
      goto Lcommon_2;
    case toUint!("||"):
      t.kind = TOK.OrLogical;
      goto Lcommon_2;
    case toUint!("|="):
      t.kind = TOK.OrAssign;
      goto Lcommon_2;
    case toUint!("++"):
      t.kind = TOK.PlusPlus;
      goto Lcommon_2;
    case toUint!("+="):
      t.kind = TOK.PlusAssign;
      goto Lcommon_2;
    case toUint!("--"):
      t.kind = TOK.MinusMinus;
      goto Lcommon_2;
    case toUint!("-="):
      t.kind = TOK.MinusAssign;
      goto Lcommon_2;
    case toUint!("=="):
      t.kind = TOK.Equal;
      goto Lcommon_2;
    case toUint!("~="):
      t.kind = TOK.CatAssign;
      goto Lcommon_2;
    case toUint!("*="):
      t.kind = TOK.MulAssign;
      goto Lcommon_2;
    case toUint!("/="):
      t.kind = TOK.DivAssign;
      goto Lcommon_2;
    case toUint!("^="):
      t.kind = TOK.XorAssign;
      goto Lcommon_2;
    case toUint!("%="):
      t.kind = TOK.ModAssign;
    Lcommon_2:
      p += 2;
      t.end = p;
      return;
    default:
    }

    c >>>= 8;
  L1character:
    assert(p == t.start);
    assert(*p == c, Format("p={0},c={1}", *p, cast(dchar)c));
    // 1 character tokens.
    // TODO: consider storing the token type in ptable.
    switch (c)
    {
    case '\'':
      return scanCharacterLiteral(t);
    case '`':
      return scanRawStringLiteral(t);
    case '"':
      return scanNormalStringLiteral(t);
    case '\\':
      char[] buffer;
      do
      {
        bool isBinary;
        c = scanEscapeSequence(isBinary);
        if (isascii(c) || isBinary)
          buffer ~= c;
        else
          encodeUTF8(buffer, c);
      } while (*p == '\\')
      buffer ~= 0;
      t.kind = TOK.String;
      t.str = buffer;
      t.end = p;
      return;
    case '<':
      t.kind = TOK.Greater;
      goto Lcommon;
    case '>':
      t.kind = TOK.Less;
      goto Lcommon;
    case '^':
      t.kind = TOK.Xor;
      goto Lcommon;
    case '!':
      t.kind = TOK.Not;
      goto Lcommon;
    case '.':
      if (isdigit(p[1]))
        return scanReal(t);
      t.kind = TOK.Dot;
      goto Lcommon;
    case '&':
      t.kind = TOK.AndBinary;
      goto Lcommon;
    case '|':
      t.kind = TOK.OrBinary;
      goto Lcommon;
    case '+':
      t.kind = TOK.Plus;
      goto Lcommon;
    case '-':
      t.kind = TOK.Minus;
      goto Lcommon;
    case '=':
      t.kind = TOK.Assign;
      goto Lcommon;
    case '~':
      t.kind = TOK.Tilde;
      goto Lcommon;
    case '*':
      t.kind = TOK.Mul;
      goto Lcommon;
    case '/':
      t.kind = TOK.Div;
      goto Lcommon;
    case '%':
      t.kind = TOK.Mod;
      goto Lcommon;
    case '(':
      t.kind = TOK.LParen;
      goto Lcommon;
    case ')':
      t.kind = TOK.RParen;
      goto Lcommon;
    case '[':
      t.kind = TOK.LBracket;
      goto Lcommon;
    case ']':
      t.kind = TOK.RBracket;
      goto Lcommon;
    case '{':
      t.kind = TOK.LBrace;
      goto Lcommon;
    case '}':
      t.kind = TOK.RBrace;
      goto Lcommon;
    case ':':
      t.kind = TOK.Colon;
      goto Lcommon;
    case ';':
      t.kind = TOK.Semicolon;
      goto Lcommon;
    case '?':
      t.kind = TOK.Question;
      goto Lcommon;
    case ',':
      t.kind = TOK.Comma;
      goto Lcommon;
    case '$':
      t.kind = TOK.Dollar;
    Lcommon:
      ++p;
      t.end = p;
      return;
    case '#':
      return scanSpecialTokenSequence(t);
    default:
    }

    assert(p == t.start);
    assert(*p == c);

    // TODO: consider moving isidbeg() and isdigit() up.
    if (isidbeg(c))
    {
      if (c == 'r' && p[1] == '"' && ++p)
        return scanRawStringLiteral(t);
      if (c == 'x' && p[1] == '"')
        return scanHexStringLiteral(t);
    version(D2)
    {
      if (c == 'q' && p[1] == '"')
        return scanDelimitedStringLiteral(t);
      if (c == 'q' && p[1] == '{')
        return scanTokenStringLiteral(t);
    }
      // Scan identifier.
    Lidentifier:
      do
      { c = *++p; }
      while (isident(c) || !isascii(c) && isUnicodeAlpha())

      t.end = p;

      auto id = IdTable.lookup(t.srcText);
      t.kind = id.kind;
      t.ident = id;

      if (t.kind == TOK.Identifier || t.isKeyword)
        return;
      else if (t.isSpecialToken)
        finalizeSpecialToken(t);
      else if (t.kind == TOK.EOF)
      {
        tail = &t;
        assert(t.srcText == "__EOF__");
      }
      else
        assert(0, "unexpected token type: " ~ Token.toString(t.kind));
      return;
    }

    if (isdigit(c))
      return scanNumber(t);

    // Check for EOF
    if (isEOF(c))
    {
      assert(isEOF(*p), *p~"");
      t.kind = TOK.EOF;
      t.end = p;
      tail = &t;
      assert(t.start == t.end);
      return;
    }

    if (!isascii(c))
    {
      c = decodeUTF8();
      if (isUniAlpha(c))
        goto Lidentifier;
    }

    error(t.start, MID.IllegalCharacter, cast(dchar)c);

    ++p;
    t.kind = TOK.Illegal;
    t.setWhitespaceFlag();
    t.dchar_ = c;
    t.end = p;
    return;
  }

  /// Scans a block comment.
  ///
  /// BlockComment := "/*" AnyChar* "*/"
  void scanBlockComment(ref Token t)
  {
    assert(p[-1] == '/' && *p == '*');
    auto tokenLineNum = lineNum;
    auto tokenLineBegin = lineBegin;
  Loop:
    while (1)
    {
      switch (*++p)
      {
      case '*':
        if (p[1] != '/')
          continue;
        p += 2;
        break Loop;
      case '\r':
        if (p[1] == '\n')
          ++p;
      case '\n':
        assert(isNewlineEnd(p));
        ++lineNum;
        setLineBegin(p+1);
        break;
      default:
        if (!isascii(*p))
        {
          if (isUnicodeNewlineChar(decodeUTF8()))
            goto case '\n';
        }
        else if (isEOF(*p))
        {
          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedBlockComment);
          break Loop;
        }
      }
    }
    t.kind = TOK.Comment;
    t.setWhitespaceFlag();
    t.end = p;
    return;
  }

  /// Scans a nested comment.
  ///
  /// NestedComment := "/+" (AnyChar* | NestedComment) "+/"
  void scanNestedComment(ref Token t)
  {
    assert(p[-1] == '/' && *p == '+');
    auto tokenLineNum = lineNum;
    auto tokenLineBegin = lineBegin;
    uint level = 1;
  Loop:
    while (1)
    {
      switch (*++p)
      {
      case '/':
        if (p[1] == '+')
          ++p, ++level;
        continue;
      case '+':
        if (p[1] != '/')
          continue;
        ++p;
        if (--level != 0)
          continue;
        ++p;
        break Loop;
      case '\r':
        if (p[1] == '\n')
          ++p;
      case '\n':
        assert(isNewlineEnd(p));
        ++lineNum;
        setLineBegin(p+1);
        continue;
      default:
        if (!isascii(*p))
        {
          if (isUnicodeNewlineChar(decodeUTF8()))
            goto case '\n';
        }
        else if (isEOF(*p))
        {
          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedNestedComment);
          break Loop;
        }
      }
    }
    t.kind = TOK.Comment;
    t.setWhitespaceFlag();
    t.end = p;
    return;
  }

  /// Scans the postfix character of a string literal.
  ///
  /// PostfixChar := "c" | "w" | "d"
  char scanPostfix()
  {
    assert(p[-1] == '"' || p[-1] == '`' ||
      { version(D2) return p[-1] == '}';
               else return 0; }()
    );
    switch (*p)
    {
    case 'c':
    case 'w':
    case 'd':
      return *p++;
    default:
      return 0;
    }
    assert(0);
  }

  /// Scans a normal string literal.
  ///
  /// NormalStringLiteral := "\"" Char* "\""
  void scanNormalStringLiteral(ref Token t)
  {
    assert(*p == '"');
    auto tokenLineNum = lineNum;
    auto tokenLineBegin = lineBegin;
    t.kind = TOK.String;
    char[] buffer;
    uint c;
    while (1)
    {
      c = *++p;
      switch (c)
      {
      case '"':
        ++p;
        t.pf = scanPostfix();
      Lreturn:
        t.str = buffer ~ '\0';
        t.end = p;
        return;
      case '\\':
        bool isBinary;
        c = scanEscapeSequence(isBinary);
        --p;
        if (isascii(c) || isBinary)
          buffer ~= c;
        else
          encodeUTF8(buffer, c);
        continue;
      case '\r':
        if (p[1] == '\n')
          ++p;
      case '\n':
        assert(isNewlineEnd(p));
        c = '\n'; // Convert Newline to \n.
        ++lineNum;
        setLineBegin(p+1);
        break;
      case 0, _Z_:
        error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedString);
        goto Lreturn;
      default:
        if (!isascii(c))
        {
          c = decodeUTF8();
          if (isUnicodeNewlineChar(c))
            goto case '\n';
          encodeUTF8(buffer, c);
          continue;
        }
      }
      assert(isascii(c));
      buffer ~= c;
    }
    assert(0);
  }

  /// Scans a character literal.
  ///
  /// CharLiteral := "'" Char "'"
  void scanCharacterLiteral(ref Token t)
  {
    assert(*p == '\'');
    ++p;
    t.kind = TOK.CharLiteral;
    switch (*p)
    {
    case '\\':
      bool notused;
      t.dchar_ = scanEscapeSequence(notused);
      break;
    case '\'':
      error(t.start, MID.EmptyCharacterLiteral);
      break;
    default:
      if (isEndOfLine(p))
        break;
      uint c = *p;
      if (!isascii(c))
        c = decodeUTF8();
      t.dchar_ = c;
      ++p;
    }

    if (*p == '\'')
      ++p;
    else
      error(t.start, MID.UnterminatedCharacterLiteral);
    t.end = p;
  }

  /// Scans a raw string literal.
  ///
  /// RawStringLiteral := "r\"" AnyChar* "\"" | "`" AnyChar* "`"
  void scanRawStringLiteral(ref Token t)
  {
    assert(*p == '`' || *p == '"' && p[-1] == 'r');
    auto tokenLineNum = lineNum;
    auto tokenLineBegin = lineBegin;
    t.kind = TOK.String;
    uint delim = *p;
    char[] buffer;
    uint c;
    while (1)
    {
      c = *++p;
      switch (c)
      {
      case '\r':
        if (p[1] == '\n')
          ++p;
      case '\n':
        assert(isNewlineEnd(p));
        c = '\n'; // Convert Newline to '\n'.
        ++lineNum;
        setLineBegin(p+1);
        break;
      case '`':
      case '"':
        if (c == delim)
        {
          ++p;
          t.pf = scanPostfix();
        Lreturn:
          t.str = buffer ~ '\0';
          t.end = p;
          return;
        }
        break;
      case 0, _Z_:
        error(tokenLineNum, tokenLineBegin, t.start,
          delim == 'r' ? MID.UnterminatedRawString : MID.UnterminatedBackQuoteString);
        goto Lreturn;
      default:
        if (!isascii(c))
        {
          c = decodeUTF8();
          if (isUnicodeNewlineChar(c))
            goto case '\n';
          encodeUTF8(buffer, c);
          continue;
        }
      }
      assert(isascii(c));
      buffer ~= c;
    }
    assert(0);
  }

  /// Scans a hexadecimal string literal.
  ///
  /// HexStringLiteral := "x\"" (HexChar HexChar)* "\""
  void scanHexStringLiteral(ref Token t)
  {
    assert(p[0] == 'x' && p[1] == '"');
    t.kind = TOK.String;

    auto tokenLineNum = lineNum;
    auto tokenLineBegin = lineBegin;

    uint c;
    ubyte[] buffer;
    ubyte h; // hex number
    uint n; // number of hex digits

    ++p;
    assert(*p == '"');
    while (1)
    {
      c = *++p;
      switch (c)
      {
      case '"':
        if (n & 1)
          error(tokenLineNum, tokenLineBegin, t.start, MID.OddNumberOfDigitsInHexString);
        ++p;
        t.pf = scanPostfix();
      Lreturn:
        t.str = cast(string) (buffer ~= 0);
        t.end = p;
        return;
      case '\r':
        if (p[1] == '\n')
          ++p;
      case '\n':
        assert(isNewlineEnd(p));
        ++lineNum;
        setLineBegin(p+1);
        continue;
      default:
        if (ishexad(c))
        {
          if (c <= '9')
            c -= '0';
          else if (c <= 'F')
            c -= 'A' - 10;
          else
            c -= 'a' - 10;

          if (n & 1)
          {
            h <<= 4;
            h |= c;
            buffer ~= h;
          }
          else
            h = cast(ubyte)c;
          ++n;
          continue;
        }
        else if (isspace(c))
          continue; // Skip spaces.
        else if (isEOF(c))
        {
          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedHexString);
          t.pf = 0;
          goto Lreturn;
        }
        else
        {
          auto errorAt = p;
          if (!isascii(c))
          {
            c = decodeUTF8();
            if (isUnicodeNewlineChar(c))
              goto case '\n';
          }
          error(errorAt, MID.NonHexCharInHexString, cast(dchar)c);
        }
      }
    }
    assert(0);
  }

version(DDoc)
{
  /// Scans a delimited string literal.
  void scanDelimitedStringLiteral(ref Token t);
  /// Scans a token string literal.
  ///
  /// TokenStringLiteral := "q{" Token* "}"
  void scanTokenStringLiteral(ref Token t);
}
else
version(D2)
{
  void scanDelimitedStringLiteral(ref Token t)
  {
    assert(p[0] == 'q' && p[1] == '"');
    t.kind = TOK.String;

    auto tokenLineNum = lineNum;
    auto tokenLineBegin = lineBegin;

    char[] buffer;
    dchar opening_delim = 0, // 0 if no nested delimiter or '[', '(', '<', '{'
          closing_delim; // Will be ']', ')', '>', '},
                         // the first character of an identifier or
                         // any other Unicode/ASCII character.
    char[] str_delim; // Identifier delimiter.
    uint level = 1; // Counter for nestable delimiters.

    ++p; ++p; // Skip q"
    uint c = *p;
    switch (c)
    {
    case '(':
      opening_delim = c;
      closing_delim = ')'; // c + 1
      break;
    case '[', '<', '{':
      opening_delim = c;
      closing_delim = c + 2; // Get to closing counterpart. Feature of ASCII table.
      break;
    default:
      dchar scanNewline()
      {
        switch (*p)
        {
        case '\r':
          if (p[1] == '\n')
            ++p;
        case '\n':
          assert(isNewlineEnd(p));
          ++p;
          ++lineNum;
          setLineBegin(p);
          return '\n';
        default:
          if (isUnicodeNewline(p))
          {
            ++p; ++p;
            goto case '\n';
          }
        }
        return 0;
      }
      // Skip leading newlines:
      while (scanNewline() != 0)
      {}
      assert(!isNewline(p));

      char* begin = p;
      c = *p;
      closing_delim = c;
      // TODO: Check for non-printable characters?
      if (!isascii(c))
      {
        closing_delim = decodeUTF8();
        if (!isUniAlpha(closing_delim))
          break; // Not an identifier.
      }
      else if (!isidbeg(c))
        break; // Not an identifier.

      // Parse Identifier + EndOfLine
      do
      { c = *++p; }
      while (isident(c) || !isascii(c) && isUnicodeAlpha())
      // Store identifier
      str_delim = begin[0..p-begin];
      // Scan newline
      if (scanNewline() == '\n')
        --p; // Go back one because of "c = *++p;" in main loop.
      else
      {
        // TODO: error(p, MID.ExpectedNewlineAfterIdentDelim);
      }
    }

    bool checkStringDelim(char* p)
    {
      assert(str_delim.length != 0);
      if (buffer[$-1] == '\n' && // Last character copied to buffer must be '\n'.
          end-p >= str_delim.length && // Check remaining length.
          p[0..str_delim.length] == str_delim) // Compare.
        return true;
      return false;
    }

    while (1)
    {
      c = *++p;
      switch (c)
      {
      case '\r':
        if (p[1] == '\n')
          ++p;
      case '\n':
        assert(isNewlineEnd(p));
        c = '\n'; // Convert Newline to '\n'.
        ++lineNum;
        setLineBegin(p+1);
        break;
      case 0, _Z_:
        // TODO: error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedDelimitedString);
        goto Lreturn3;
      default:
        if (!isascii(c))
        {
          auto begin = p;
          c = decodeUTF8();
          if (isUnicodeNewlineChar(c))
            goto case '\n';
          if (c == closing_delim)
          {
            if (str_delim.length)
            {
              if (checkStringDelim(begin))
              {
                p = begin + str_delim.length;
                goto Lreturn2;
              }
            }
            else
            {
              assert(level == 1);
              --level;
              goto Lreturn;
            }
          }
          encodeUTF8(buffer, c);
          continue;
        }
        else
        {
          if (c == opening_delim)
            ++level;
          else if (c == closing_delim)
          {
            if (str_delim.length)
            {
              if (checkStringDelim(p))
              {
                p += str_delim.length;
                goto Lreturn2;
              }
            }
            else if (--level == 0)
              goto Lreturn;
          }
        }
      }
      assert(isascii(c));
      buffer ~= c;
    }
  Lreturn: // Character delimiter.
    assert(c == closing_delim);
    assert(level == 0);
    ++p; // Skip closing delimiter.
  Lreturn2: // String delimiter.
    if (*p == '"')
      ++p;
    else
    {
      // TODO: error(p, MID.ExpectedDblQuoteAfterDelim, str_delim.length ? str_delim : closing_delim~"");
    }

    t.pf = scanPostfix();
  Lreturn3: // Error.
    t.str = buffer ~ '\0';
    t.end = p;
  }

  void scanTokenStringLiteral(ref Token t)
  {
    assert(p[0] == 'q' && p[1] == '{');
    t.kind = TOK.String;

    auto tokenLineNum = lineNum;
    auto tokenLineBegin = lineBegin;

    // A guard against changes to particular members:
    // this.lineNum_hline and this.errorPath
    ++inTokenString;

    uint lineNum = this.lineNum;
    uint level = 1;

    ++p; ++p; // Skip q{

    auto prev_t = &t;
    Token* token;
    while (1)
    {
      token = new Token;
      scan(*token);
      // Save the tokens in a doubly linked list.
      // Could be useful for various tools.
      token.prev = prev_t;
      prev_t.next = token;
      prev_t = token;
      switch (token.kind)
      {
      case TOK.LBrace:
        ++level;
        continue;
      case TOK.RBrace:
        if (--level == 0)
        {
          t.tok_str = t.next;
          t.next = null;
          break;
        }
        continue;
      case TOK.EOF:
        // TODO: error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedTokenString);
        t.tok_str = t.next;
        t.next = token;
        break;
      default:
        continue;
      }
      break; // Exit loop.
    }

    assert(token.kind == TOK.RBrace || token.kind == TOK.EOF);
    assert(token.kind == TOK.RBrace && t.next is null ||
           token.kind == TOK.EOF && t.next !is null);

    char[] buffer;
    // token points to } or EOF
    if (token.kind == TOK.EOF)
    {
      t.end = token.start;
      buffer = t.srcText[2..$].dup ~ '\0';
    }
    else
    {
      // Assign to buffer before scanPostfix().
      t.end = p;
      buffer = t.srcText[2..$-1].dup ~ '\0';
      t.pf = scanPostfix();
      t.end = p; // Assign again because of postfix.
    }
    // Convert newlines to '\n'.
    if (lineNum != this.lineNum)
    {
      assert(buffer[$-1] == '\0');
      uint i, j;
      for (; i < buffer.length; ++i)
        switch (buffer[i])
        {
        case '\r':
          if (buffer[i+1] == '\n')
            ++i;
        case '\n':
          assert(isNewlineEnd(buffer.ptr + i));
          buffer[j++] = '\n'; // Convert Newline to '\n'.
          break;
        default:
          if (isUnicodeNewline(buffer.ptr + i))
          {
            ++i; ++i;
            goto case '\n';
          }
          buffer[j++] = buffer[i]; // Copy.
        }
      buffer.length = j; // Adjust length.
    }
    assert(buffer[$-1] == '\0');
    t.str = buffer;

    --inTokenString;
  }
} // version(D2)

  /// Scans an escape sequence.
  ///
  /// EscapeSequence := "\" (Octal{1,3} | ("x" Hex{2}) |
  ///                       ("u" Hex{4}) | ("U" Hex{8}) |
  ///                       "'" | "\"" | "\\" | "?" | "a" |
  ///                       "b" | "f" | "n" | "r" | "t" | "v")
  /// Params:
  ///   isBinary = set to true for octal and hexadecimal escapes.
  /// Returns: the escape value.
  dchar scanEscapeSequence(ref bool isBinary)
  out(result)
  { assert(isValidChar(result)); }
  body
  {
    assert(*p == '\\');

    auto sequenceStart = p; // Used for error reporting.

    ++p;
    uint c = char2ev(*p);
    if (c)
    {
      ++p;
      return c;
    }

    uint digits = 2;

    switch (*p)
    {
    case 'x':
      isBinary = true;
    case_Unicode:
      assert(c == 0);
      assert(digits == 2 || digits == 4 || digits == 8);
      while (1)
      {
        ++p;
        if (ishexad(*p))
        {
          c *= 16;
          if (*p <= '9')
            c += *p - '0';
          else if (*p <= 'F')
            c += *p - 'A' + 10;
          else
            c += *p - 'a' + 10;

          if (--digits == 0)
          {
            ++p;
            if (isValidChar(c))
              return c; // Return valid escape value.

            error(sequenceStart, MID.InvalidUnicodeEscapeSequence,
                  sequenceStart[0..p-sequenceStart]);
            break;
          }
          continue;
        }

        error(sequenceStart, MID.InsufficientHexDigits,
              sequenceStart[0..p-sequenceStart]);
        break;
      }
      break;
    case 'u':
      digits = 4;
      goto case_Unicode;
    case 'U':
      digits = 8;
      goto case_Unicode;
    default:
      if (isoctal(*p))
      {
        isBinary = true;
        assert(c == 0);
        c += *p - '0';
        ++p;
        if (!isoctal(*p))
          return c;
        c *= 8;
        c += *p - '0';
        ++p;
        if (!isoctal(*p))
          return c;
        c *= 8;
        c += *p - '0';
        ++p;
        if (c > 0xFF)
          error(sequenceStart, MSG.InvalidOctalEscapeSequence,
                sequenceStart[0..p-sequenceStart]);
        return c; // Return valid escape value.
      }
      else if(*p == '&')
      {
        if (isalpha(*++p))
        {
          auto begin = p;
          while (isalnum(*++p))
          {}

          if (*p == ';')
          {
            // Pass entity excluding '&' and ';'.
            c = entity2Unicode(begin[0..p - begin]);
            ++p; // Skip ;
            if (c != 0xFFFF)
              return c; // Return valid escape value.
            else
              error(sequenceStart, MID.UndefinedHTMLEntity, sequenceStart[0 .. p - sequenceStart]);
          }
          else
            error(sequenceStart, MID.UnterminatedHTMLEntity, sequenceStart[0 .. p - sequenceStart]);
        }
        else
          error(sequenceStart, MID.InvalidBeginHTMLEntity);
      }
      else if (isEndOfLine(p))
        error(sequenceStart, MID.UndefinedEscapeSequence,
          isEOF(*p) ? `\EOF` : `\NewLine`);
      else
      {
        char[] str = `\`;
        if (isascii(c))
          str ~= *p;
        else
          encodeUTF8(str, decodeUTF8());
        ++p;
        // TODO: check for unprintable character?
        error(sequenceStart, MID.UndefinedEscapeSequence, str);
      }
    }
    return REPLACEMENT_CHAR; // Error: return replacement character.
  }

  /// Scans a number literal.
  ///
  /// $(PRE
  /// IntegerLiteral := (Dec|Hex|Bin|Oct)Suffix?
  /// Dec := (0|[1-9][0-9_]*)
  /// Hex := 0[xX][_]*[0-9a-zA-Z][0-9a-zA-Z_]*
  /// Bin := 0[bB][_]*[01][01_]*
  /// Oct := 0[0-7_]*
  /// Suffix := (L[uU]?|[uU]L?)
  /// )
  /// Invalid: "0b_", "0x_", "._" etc.
  void scanNumber(ref Token t)
  {
    ulong ulong_;
    bool overflow;
    bool isDecimal;
    size_t digits;

    if (*p != '0')
      goto LscanInteger;
    ++p; // skip zero
    // check for xX bB ...
    switch (*p)
    {
    case 'x','X':
      goto LscanHex;
    case 'b','B':
      goto LscanBinary;
    case 'L':
      if (p[1] == 'i')
        goto LscanReal; // 0Li
      break; // 0L
    case '.':
      if (p[1] == '.')
        break; // 0..
      // 0.
    case 'i','f','F', // Imaginary and float literal suffixes.
         'e', 'E':    // Float exponent.
      goto LscanReal;
    default:
      if (*p == '_')
        goto LscanOctal; // 0_
      else if (isdigit(*p))
      {
        if (*p == '8' || *p == '9')
          goto Loctal_hasDecimalDigits; // 08 or 09
        else
          goto Loctal_enter_loop; // 0[0-7]
      }
    }

    // Number 0
    assert(p[-1] == '0');
    assert(*p != '_' && !isdigit(*p));
    assert(ulong_ == 0);
    isDecimal = true;
    goto Lfinalize;

  LscanInteger:
    assert(*p != 0 && isdigit(*p));
    isDecimal = true;
    goto Lenter_loop_int;
    while (1)
    {
      if (*++p == '_')
        continue;
      if (!isdigit(*p))
        break;
    Lenter_loop_int:
      if (ulong_ < ulong.max/10 || (ulong_ == ulong.max/10 && *p <= '5'))
      {
        ulong_ *= 10;
        ulong_ += *p - '0';
        continue;
      }
      // Overflow: skip following digits.
      overflow = true;
      while (isdigit(*++p)) {}
      break;
    }

    // The number could be a float, so check overflow below.
    switch (*p)
    {
    case '.':
      if (p[1] != '.')
        goto LscanReal;
      break;
    case 'L':
      if (p[1] != 'i')
        break;
    case 'i', 'f', 'F', 'e', 'E':
      goto LscanReal;
    default:
    }

    if (overflow)
      error(t.start, MID.OverflowDecimalNumber);

    assert((isdigit(p[-1]) || p[-1] == '_') && !isdigit(*p) && *p != '_');
    goto Lfinalize;

  LscanHex:
    assert(digits == 0);
    assert(*p == 'x' || *p == 'X');
    while (1)
    {
      if (*++p == '_')
        continue;
      if (!ishexad(*p))
        break;
      ++digits;
      ulong_ *= 16;
      if (*p <= '9')
        ulong_ += *p - '0';
      else if (*p <= 'F')
        ulong_ += *p - 'A' + 10;
      else
        ulong_ += *p - 'a' + 10;
    }

    assert(ishexad(p[-1]) || p[-1] == '_' || p[-1] == 'x' || p[-1] == 'X');
    assert(!ishexad(*p) && *p != '_');

    switch (*p)
    {
    case '.':
      if (p[1] == '.')
        break;
    case 'p', 'P':
      return scanHexReal(t);
    default:
    }

    if (digits == 0 || digits > 16)
      error(t.start, digits == 0 ? MID.NoDigitsInHexNumber : MID.OverflowHexNumber);

    goto Lfinalize;

  LscanBinary:
    assert(digits == 0);
    assert(*p == 'b' || *p == 'B');
    while (1)
    {
      if (*++p == '0')
      {
        ++digits;
        ulong_ *= 2;
      }
      else if (*p == '1')
      {
        ++digits;
        ulong_ *= 2;
        ulong_ += *p - '0';
      }
      else if (*p == '_')
        continue; 
      else
        break;
    }

    if (digits == 0 || digits > 64)
      error(t.start, digits == 0 ? MID.NoDigitsInBinNumber : MID.OverflowBinaryNumber);

    assert(p[-1] == '0' || p[-1] == '1' || p[-1] == '_' || p[-1] == 'b' || p[-1] == 'B', p[-1] ~ "");
    assert( !(*p == '0' || *p == '1' || *p == '_') );
    goto Lfinalize;

  LscanOctal:
    assert(*p == '_');
    while (1)
    {
      if (*++p == '_')
        continue;
      if (!isoctal(*p))
        break;
    Loctal_enter_loop:
      if (ulong_ < ulong.max/2 || (ulong_ == ulong.max/2 && *p <= '1'))
      {
        ulong_ *= 8;
        ulong_ += *p - '0';
        continue;
      }
      // Overflow: skip following digits.
      overflow = true;
      while (isoctal(*++p)) {}
      break;
    }

    bool hasDecimalDigits;
    if (isdigit(*p))
    {
    Loctal_hasDecimalDigits:
      hasDecimalDigits = true;
      while (isdigit(*++p)) {}
    }

    // The number could be a float, so check errors below.
    switch (*p)
    {
    case '.':
      if (p[1] != '.')
        goto LscanReal;
      break;
    case 'L':
      if (p[1] != 'i')
        break;
    case 'i', 'f', 'F', 'e', 'E':
      goto LscanReal;
    default:
    }

    if (hasDecimalDigits)
      error(t.start, MID.OctalNumberHasDecimals);

    if (overflow)
      error(t.start, MID.OverflowOctalNumber);
//     goto Lfinalize;

  Lfinalize:
    enum Suffix
    {
      None     = 0,
      Unsigned = 1,
      Long     = 2
    }

    // Scan optional suffix: L, Lu, LU, u, uL, U or UL.
    Suffix suffix;
    while (1)
    {
      switch (*p)
      {
      case 'L':
        if (suffix & Suffix.Long)
          break;
        suffix |= Suffix.Long;
        ++p;
        continue;
      case 'u', 'U':
        if (suffix & Suffix.Unsigned)
          break;
        suffix |= Suffix.Unsigned;
        ++p;
        continue;
      default:
        break;
      }
      break;
    }

    // Determine type of Integer.
    switch (suffix)
    {
    case Suffix.None:
      if (ulong_ & 0x8000_0000_0000_0000)
      {
        if (isDecimal)
          error(t.start, MID.OverflowDecimalSign);
        t.kind = TOK.Uint64;
      }
      else if (ulong_ & 0xFFFF_FFFF_0000_0000)
        t.kind = TOK.Int64;
      else if (ulong_ & 0x8000_0000)
        t.kind = isDecimal ? TOK.Int64 : TOK.Uint32;
      else
        t.kind = TOK.Int32;
      break;
    case Suffix.Unsigned:
      if (ulong_ & 0xFFFF_FFFF_0000_0000)
        t.kind = TOK.Uint64;
      else
        t.kind = TOK.Uint32;
      break;
    case Suffix.Long:
      if (ulong_ & 0x8000_0000_0000_0000)
      {
        if (isDecimal)
          error(t.start, MID.OverflowDecimalSign);
        t.kind = TOK.Uint64;
      }
      else
        t.kind = TOK.Int64;
      break;
    case Suffix.Unsigned | Suffix.Long:
      t.kind = TOK.Uint64;
      break;
    default:
      assert(0);
    }
    t.ulong_ = ulong_;
    t.end = p;
    return;
  LscanReal:
    scanReal(t);
    return;
  }

  /// Scans a floating point number literal.
  ///
  /// $(PRE
  /// FloatLiteral := Float[fFL]?i?
  /// Float := DecFloat | HexFloat
  /// DecFloat := ([0-9][0-9_]*[.][0-9_]*DecExponent?) |
  ///             [.][0-9][0-9_]*DecExponent? | [0-9][0-9_]*DecExponent
  /// DecExponent := [eE][+-]?[0-9][0-9_]*
  /// HexFloat := 0[xX](HexDigits[.]HexDigits |
  ///                   [.][0-9a-zA-Z]HexDigits? |
  ///                   HexDigits)HexExponent
  /// HexExponent := [pP][+-]?[0-9][0-9_]*
  /// )
  void scanReal(ref Token t)
  {
    if (*p == '.')
    {
      assert(p[1] != '.');
      // This function was called by scan() or scanNumber().
      while (isdigit(*++p) || *p == '_') {}
    }
    else
      // This function was called by scanNumber().
      assert(delegate ()
        {
          switch (*p)
          {
          case 'L':
            if (p[1] != 'i')
              return false;
          case 'i', 'f', 'F', 'e', 'E':
            return true;
          default:
          }
          return false;
        }()
      );

    // Scan exponent.
    if (*p == 'e' || *p == 'E')
    {
      ++p;
      if (*p == '-' || *p == '+')
        ++p;
      if (isdigit(*p))
        while (isdigit(*++p) || *p == '_') {}
      else
        error(t.start, MID.FloatExpMustStartWithDigit);
    }

    // Copy whole number and remove underscores from buffer.
    char[] buffer = t.start[0..p-t.start].dup;
    uint j;
    foreach (c; buffer)
      if (c != '_')
        buffer[j++] = c;
    buffer.length = j; // Adjust length.
    buffer ~= 0; // Terminate for C functions.

    finalizeFloat(t, buffer);
  }

  /// Scans a hexadecimal floating point number literal.
  void scanHexReal(ref Token t)
  {
    assert(*p == '.' || *p == 'p' || *p == 'P');
    MID mid;
    if (*p == '.')
      while (ishexad(*++p) || *p == '_')
      {}
    // Decimal exponent is required.
    if (*p != 'p' && *p != 'P')
    {
      mid = MID.HexFloatExponentRequired;
      goto Lerr;
    }
    // Scan exponent
    assert(*p == 'p' || *p == 'P');
    ++p;
    if (*p == '+' || *p == '-')
      ++p;
    if (!isdigit(*p))
    {
      mid = MID.HexFloatExpMustStartWithDigit;
      goto Lerr;
    }
    while (isdigit(*++p) || *p == '_')
    {}
    // Copy whole number and remove underscores from buffer.
    char[] buffer = t.start[0..p-t.start].dup;
    uint j;
    foreach (c; buffer)
      if (c != '_')
        buffer[j++] = c;
    buffer.length = j; // Adjust length.
    buffer ~= 0; // Terminate for C functions.
    finalizeFloat(t, buffer);
    return;
  Lerr:
    t.kind = TOK.Float32;
    t.end = p;
    error(t.start, mid);
  }

  /// Sets the value of the token.
  /// Params:
  ///   t = receives the value.
  ///   buffer = the well-formed float number.
  void finalizeFloat(ref Token t, string buffer)
  {
    assert(buffer[$-1] == 0);
    // Float number is well-formed. Check suffixes and do conversion.
    switch (*p)
    {
    case 'f', 'F':
      t.kind = TOK.Float32;
      t.float_ = strtof(buffer.ptr, null);
      ++p;
      break;
    case 'L':
      t.kind = TOK.Float80;
      t.real_ = strtold(buffer.ptr, null);
      ++p;
      break;
    default:
      t.kind = TOK.Float64;
      t.double_ = strtod(buffer.ptr, null);
    }
    if (*p == 'i')
    {
      ++p;
      t.kind += 3; // Switch to imaginary counterpart.
      assert(t.kind == TOK.Imaginary32 ||
             t.kind == TOK.Imaginary64 ||
             t.kind == TOK.Imaginary80);
    }
    if (errno() == ERANGE)
      error(t.start, MID.OverflowFloatNumber);
    t.end = p;
  }

  /// Scans a special token sequence.
  ///
  /// SpecialTokenSequence := "#line" Integer Filespec? EndOfLine
  void scanSpecialTokenSequence(ref Token t)
  {
    assert(*p == '#');
    t.kind = TOK.HashLine;
    t.setWhitespaceFlag();

    MID mid;
    char* errorAtColumn = p;
    char* tokenEnd = ++p;

    if (!(p[0] == 'l' && p[1] == 'i' && p[2] == 'n' && p[3] == 'e'))
    {
      mid = MID.ExpectedIdentifierSTLine;
      goto Lerr;
    }
    p += 3;
    tokenEnd = p + 1;

    // TODO: #line58"path/file" is legal. Require spaces?
    //       State.Space could be used for that purpose.
    enum State
    { /+Space,+/ Integer, Filespec, End }

    State state = State.Integer;

    while (!isEndOfLine(++p))
    {
      if (isspace(*p))
        continue;
      if (state == State.Integer)
      {
        if (!isdigit(*p))
        {
          errorAtColumn = p;
          mid = MID.ExpectedIntegerAfterSTLine;
          goto Lerr;
        }
        t.tokLineNum = new Token;
        scan(*t.tokLineNum);
        tokenEnd = p;
        if (t.tokLineNum.kind != TOK.Int32 && t.tokLineNum.kind != TOK.Uint32)
        {
          errorAtColumn = t.tokLineNum.start;
          mid = MID.ExpectedIntegerAfterSTLine;
          goto Lerr;
        }
        --p; // Go one back because scan() advanced p past the integer.
        state = State.Filespec;
      }
      else if (state == State.Filespec && *p == '"')
      { // MID.ExpectedFilespec is deprecated.
        // if (*p != '"')
        // {
        //   errorAtColumn = p;
        //   mid = MID.ExpectedFilespec;
        //   goto Lerr;
        // }
        t.tokLineFilespec = new Token;
        t.tokLineFilespec.start = p;
        t.tokLineFilespec.kind = TOK.Filespec;
        t.tokLineFilespec.setWhitespaceFlag();
        while (*++p != '"')
        {
          if (isEndOfLine(p))
          {
            errorAtColumn = t.tokLineFilespec.start;
            mid = MID.UnterminatedFilespec;
            t.tokLineFilespec.end = p;
            tokenEnd = p;
            goto Lerr;
          }
          isascii(*p) || decodeUTF8();
        }
        auto start = t.tokLineFilespec.start +1; // +1 skips '"'
        t.tokLineFilespec.str = start[0 .. p - start];
        t.tokLineFilespec.end = p + 1;
        tokenEnd = p + 1;
        state = State.End;
      }
      else/+ if (state == State.End)+/
      {
        mid = MID.UnterminatedSpecialToken;
        goto Lerr;
      }
    }
    assert(isEndOfLine(p));

    if (state == State.Integer)
    {
      errorAtColumn = p;
      mid = MID.ExpectedIntegerAfterSTLine;
      goto Lerr;
    }

    // Evaluate #line only when not in token string.
    if (!inTokenString && t.tokLineNum)
    {
      this.lineNum_hline = this.lineNum - t.tokLineNum.uint_ + 1;
      if (t.tokLineFilespec)
        newFilePath(t.tokLineFilespec.str);
    }
    p = tokenEnd;
    t.end = tokenEnd;

    return;
  Lerr:
    p = tokenEnd;
    t.end = tokenEnd;
    error(errorAtColumn, mid);
  }

  /// Inserts an empty dummy token (TOK.Empty) before t.
  ///
  /// Useful in the parsing phase for representing a node in the AST
  /// that doesn't consume an actual token from the source text.
  Token* insertEmptyTokenBefore(Token* t)
  {
    assert(t !is null && t.prev !is null);
    assert(text.ptr <= t.start && t.start < end, Token.toString(t.kind));
    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.kind));

    auto prev_t = t.prev;
    auto new_t = new Token;
    new_t.kind = TOK.Empty;
    new_t.start = new_t.end = prev_t.end;
    // Link in new token.
    prev_t.next = new_t;
    new_t.prev = prev_t;
    new_t.next = t;
    t.prev = new_t;
    return new_t;
  }

  /// Returns the error line number.
  uint errorLineNumber(uint lineNum)
  {
    return lineNum - this.lineNum_hline;
  }

  /// Forwards error parameters.
  void error(char* columnPos, char[] msg, ...)
  {
    error_(this.lineNum, this.lineBegin, columnPos, msg, _arguments, _argptr);
  }

  /// ditto
  void error(char* columnPos, MID mid, ...)
  {
    error_(this.lineNum, this.lineBegin, columnPos, GetMsg(mid), _arguments, _argptr);
  }

  /// ditto
  void error(uint lineNum, char* lineBegin, char* columnPos, MID mid, ...)
  {
    error_(lineNum, lineBegin, columnPos, GetMsg(mid), _arguments, _argptr);
  }

  /// Creates an error report and appends it to a list.
  /// Params:
  ///   lineNum = the line number.
  ///   lineBegin = points to the first character of the current line.
  ///   columnPos = points to the character where the error is located.
  ///   msg = the message.
  void error_(uint lineNum, char* lineBegin, char* columnPos, char[] msg,
              TypeInfo[] _arguments, Arg _argptr)
  {
    lineNum = this.errorLineNumber(lineNum);
    auto errorPath = this.filePaths.setPath;
    auto location = new Location(errorPath, lineNum, lineBegin, columnPos);
    msg = Format(_arguments, _argptr, msg);
    auto error = new LexerError(location, msg);
    errors ~= error;
    if (infoMan !is null)
      infoMan ~= error;
  }

  /// Scans the whole source text until EOF is encountered.
  void scanAll()
  {
    while (nextToken() != TOK.EOF)
    {}
  }

  /// Returns the first token of the source text.
  /// This can be the EOF token.
  /// Structure: HEAD -> Newline -> First Token
  Token* firstToken()
  {
    return this.head.next.next;
  }

  /// Returns true if str is a valid D identifier.
  static bool isIdentifierString(char[] str)
  {
    if (str.length == 0 || isdigit(str[0]))
      return false;
    size_t idx;
    do
    {
      auto c = dil.Unicode.decode(str, idx);
      if (c == ERROR_CHAR || !(isident(c) || !isascii(c) && isUniAlpha(c)))
        return false;
    } while (idx < str.length)
    return true;
  }

  /// Returns true if str is a keyword or
  /// a special token (__FILE__, __LINE__ etc.)
  static bool isReservedIdentifier(char[] str)
  {
    if (str.length == 0)
      return false;
    auto id = IdTable.inStatic(str);
    if (id is null || id.kind == TOK.Identifier)
      return false; // str is not in the table or a normal identifier.
    return true;
  }

  /// Returns true if this is a valid identifier and if it's not reserved.
  static bool isValidUnreservedIdentifier(char[] str)
  {
    return isIdentifierString(str) && !isReservedIdentifier(str);
  }

  /// Returns true if the current character to be decoded is
  /// a Unicode alpha character.
  ///
  /// The current pointer 'p' is not advanced if false is returned.
  bool isUnicodeAlpha()
  {
    assert(!isascii(*p), "check for ASCII char before calling decodeUTF8().");
    char* p = this.p;
    dchar d = *p;
    ++p; // Move to second byte.
    // Error if second byte is not a trail byte.
    if (!isTrailByte(*p))
      return false;
    // Check for overlong sequences.
    switch (d)
    {
    case 0xE0, 0xF0, 0xF8, 0xFC:
      if ((*p & d) == 0x80)
        return false;
    default:
      if ((d & 0xFE) == 0xC0) // 1100000x
        return false;
    }
    const char[] checkNextByte = "if (!isTrailByte(*++p))"
                                 "  return false;";
    const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;";
    // Decode
    if ((d & 0b1110_0000) == 0b1100_0000)
    {
      d &= 0b0001_1111;
      mixin(appendSixBits);
    }
    else if ((d & 0b1111_0000) == 0b1110_0000)
    {
      d &= 0b0000_1111;
      mixin(appendSixBits ~
            checkNextByte ~ appendSixBits);
    }
    else if ((d & 0b1111_1000) == 0b1111_0000)
    {
      d &= 0b0000_0111;
      mixin(appendSixBits ~
            checkNextByte ~ appendSixBits ~
            checkNextByte ~ appendSixBits);
    }
    else
      return false;

    assert(isTrailByte(*p));
    if (!isValidChar(d) || !isUniAlpha(d))
      return false;
    // Only advance pointer if this is a Unicode alpha character.
    this.p = p;
    return true;
  }

  /// Decodes the next UTF-8 sequence.
  dchar decodeUTF8()
  {
    assert(!isascii(*p), "check for ASCII char before calling decodeUTF8().");
    char* p = this.p;
    dchar d = *p;

    ++p; // Move to second byte.
    // Error if second byte is not a trail byte.
    if (!isTrailByte(*p))
      goto Lerr2;

    // Check for overlong sequences.
    switch (d)
    {
    case 0xE0, // 11100000 100xxxxx
         0xF0, // 11110000 1000xxxx
         0xF8, // 11111000 10000xxx
         0xFC: // 11111100 100000xx
      if ((*p & d) == 0x80)
        goto Lerr;
    default:
      if ((d & 0xFE) == 0xC0) // 1100000x
        goto Lerr;
    }

    const char[] checkNextByte = "if (!isTrailByte(*++p))"
                                 "  goto Lerr2;";
    const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;";

    // Decode
    if ((d & 0b1110_0000) == 0b1100_0000)
    { // 110xxxxx 10xxxxxx
      d &= 0b0001_1111;
      mixin(appendSixBits);
    }
    else if ((d & 0b1111_0000) == 0b1110_0000)
    { // 1110xxxx 10xxxxxx 10xxxxxx
      d &= 0b0000_1111;
      mixin(appendSixBits ~
            checkNextByte ~ appendSixBits);
    }
    else if ((d & 0b1111_1000) == 0b1111_0000)
    { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
      d &= 0b0000_0111;
      mixin(appendSixBits ~
            checkNextByte ~ appendSixBits ~
            checkNextByte ~ appendSixBits);
    }
    else
      // 5 and 6 byte UTF-8 sequences are not allowed yet.
      // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
      // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
      goto Lerr;

    assert(isTrailByte(*p));

    if (!isValidChar(d))
    {
    Lerr:
      // Three cases:
      // *) the UTF-8 sequence was successfully decoded but the resulting
      //    character is invalid.
      //    p points to last trail byte in the sequence.
      // *) the UTF-8 sequence is overlong.
      //    p points to second byte in the sequence.
      // *) the UTF-8 sequence has more than 4 bytes or starts with
      //    a trail byte.
      //    p points to second byte in the sequence.
      assert(isTrailByte(*p));
      // Move to next ASCII character or lead byte of a UTF-8 sequence.
      while (p < (end-1) && isTrailByte(*p))
        ++p;
      --p;
      assert(!isTrailByte(p[1]));
    Lerr2:
      d = REPLACEMENT_CHAR;
      error(this.p, MID.InvalidUTF8Sequence, formatBytes(this.p, p));
    }

    this.p = p;
    return d;
  }

  /// Encodes the character d and appends it to str.
  static void encodeUTF8(ref char[] str, dchar d)
  {
    assert(!isascii(d), "check for ASCII char before calling encodeUTF8().");
    assert(isValidChar(d), "check if character is valid before calling encodeUTF8().");

    char[6] b = void;
    if (d < 0x800)
    {
      b[0] = 0xC0 | (d >> 6);
      b[1] = 0x80 | (d & 0x3F);
      str ~= b[0..2];
    }
    else if (d < 0x10000)
    {
      b[0] = 0xE0 | (d >> 12);
      b[1] = 0x80 | ((d >> 6) & 0x3F);
      b[2] = 0x80 | (d & 0x3F);
      str ~= b[0..3];
    }
    else if (d < 0x200000)
    {
      b[0] = 0xF0 | (d >> 18);
      b[1] = 0x80 | ((d >> 12) & 0x3F);
      b[2] = 0x80 | ((d >> 6) & 0x3F);
      b[3] = 0x80 | (d & 0x3F);
      str ~= b[0..4];
    }
    /+ // There are no 5 and 6 byte UTF-8 sequences yet.
    else if (d < 0x4000000)
    {
      b[0] = 0xF8 | (d >> 24);
      b[1] = 0x80 | ((d >> 18) & 0x3F);
      b[2] = 0x80 | ((d >> 12) & 0x3F);
      b[3] = 0x80 | ((d >> 6) & 0x3F);
      b[4] = 0x80 | (d & 0x3F);
      str ~= b[0..5];
    }
    else if (d < 0x80000000)
    {
      b[0] = 0xFC | (d >> 30);
      b[1] = 0x80 | ((d >> 24) & 0x3F);
      b[2] = 0x80 | ((d >> 18) & 0x3F);
      b[3] = 0x80 | ((d >> 12) & 0x3F);
      b[4] = 0x80 | ((d >> 6) & 0x3F);
      b[5] = 0x80 | (d & 0x3F);
      str ~= b[0..6];
    }
    +/
    else
     assert(0);
  }

  /// Formats the bytes between start and end.
  /// Returns: e.g.: abc -> \x61\x62\x63
  static char[] formatBytes(char* start, char* end)
  {
    auto strLen = end-start;
    const formatLen = `\xXX`.length;
    char[] result = new char[strLen*formatLen]; // Reserve space.
    result.length = 0;
    foreach (c; cast(ubyte[])start[0..strLen])
      result ~= Format("\\x{:X}", c);
    return result;
  }

  /// Searches for an invalid UTF-8 sequence in str.
  /// Returns: a formatted string of the invalid sequence (e.g. \xC0\x80).
  static string findInvalidUTF8Sequence(string str)
  {
    char* p = str.ptr, end = p + str.length;
    while (p < end)
    {
      if (decode(p, end) == ERROR_CHAR)
      {
        auto begin = p;
        // Skip trail-bytes.
        while (++p < end && isTrailByte(*p))
        {}
        return Lexer.formatBytes(begin, p);
      }
    }
    assert(p == end);
    return "";
  }
}

/// Tests the lexer with a list of tokens.
unittest
{
  Stdout("Testing Lexer.\n");
  struct Pair
  {
    char[] tokenText;
    TOK kind;
  }
  static Pair[] pairs = [
    {"#!äöüß",  TOK.Shebang},       {"\n",      TOK.Newline},
    {"//çay",   TOK.Comment},       {"\n",      TOK.Newline},
                                    {"&",       TOK.AndBinary},
    {"/*çağ*/", TOK.Comment},       {"&&",      TOK.AndLogical},
    {"/+çak+/", TOK.Comment},       {"&=",      TOK.AndAssign},
    {">",       TOK.Greater},       {"+",       TOK.Plus},
    {">=",      TOK.GreaterEqual},  {"++",      TOK.PlusPlus},
    {">>",      TOK.RShift},        {"+=",      TOK.PlusAssign},
    {">>=",     TOK.RShiftAssign},  {"-",       TOK.Minus},
    {">>>",     TOK.URShift},       {"--",      TOK.MinusMinus},
    {">>>=",    TOK.URShiftAssign}, {"-=",      TOK.MinusAssign},
    {"<",       TOK.Less},          {"=",       TOK.Assign},
    {"<=",      TOK.LessEqual},     {"==",      TOK.Equal},
    {"<>",      TOK.LorG},          {"~",       TOK.Tilde},
    {"<>=",     TOK.LorEorG},       {"~=",      TOK.CatAssign},
    {"<<",      TOK.LShift},        {"*",       TOK.Mul},
    {"<<=",     TOK.LShiftAssign},  {"*=",      TOK.MulAssign},
    {"!",       TOK.Not},           {"/",       TOK.Div},
    {"!=",      TOK.NotEqual},      {"/=",      TOK.DivAssign},
    {"!<",      TOK.UorGorE},       {"^",       TOK.Xor},
    {"!>",      TOK.UorLorE},       {"^=",      TOK.XorAssign},
    {"!<=",     TOK.UorG},          {"%",       TOK.Mod},
    {"!>=",     TOK.UorL},          {"%=",      TOK.ModAssign},
    {"!<>",     TOK.UorE},          {"(",       TOK.LParen},
    {"!<>=",    TOK.Unordered},     {")",       TOK.RParen},
    {".",       TOK.Dot},           {"[",       TOK.LBracket},
    {"..",      TOK.Slice},         {"]",       TOK.RBracket},
    {"...",     TOK.Ellipses},      {"{",       TOK.LBrace},
    {"|",       TOK.OrBinary},      {"}",       TOK.RBrace},
    {"||",      TOK.OrLogical},     {":",       TOK.Colon},
    {"|=",      TOK.OrAssign},      {";",       TOK.Semicolon},
    {"?",       TOK.Question},      {",",       TOK.Comma},
    {"$",       TOK.Dollar},        {"cam",     TOK.Identifier},
    {"çay",     TOK.Identifier},    {".0",      TOK.Float64},
    {"0",       TOK.Int32},         {"\n",      TOK.Newline},
    {"\r",      TOK.Newline},       {"\r\n",    TOK.Newline},
    {"\u2028",  TOK.Newline},       {"\u2029",  TOK.Newline}
  ];

  char[] src;

  // Join all token texts into a single string.
  foreach (i, pair; pairs)
    if (pair.kind == TOK.Comment && pair.tokenText[1] == '/' || // Line comment.
        pair.kind == TOK.Shebang)
    {
      assert(pairs[i+1].kind == TOK.Newline); // Must be followed by a newline.
      src ~= pair.tokenText;
    }
    else
      src ~= pair.tokenText ~ " ";

  auto lx = new Lexer(new SourceText("", src));
  auto token = lx.getTokens();

  uint i;
  assert(token == lx.head);
  assert(token.next.kind == TOK.Newline);
  token = token.next.next;
  do
  {
    assert(i < pairs.length);
    assert(token.srcText == pairs[i].tokenText, Format("Scanned '{0}' but expected '{1}'", token.srcText, pairs[i].tokenText));
    ++i;
    token = token.next;
  } while (token.kind != TOK.EOF)
}

/// Tests the Lexer's peek() method.
unittest
{
  Stdout("Testing method Lexer.peek()\n");
  auto sourceText = new SourceText("", "unittest { }");
  auto lx = new Lexer(sourceText, null);

  auto next = lx.head;
  lx.peek(next);
  assert(next.kind == TOK.Newline);
  lx.peek(next);
  assert(next.kind == TOK.Unittest);
  lx.peek(next);
  assert(next.kind == TOK.LBrace);
  lx.peek(next);
  assert(next.kind == TOK.RBrace);
  lx.peek(next);
  assert(next.kind == TOK.EOF);

  lx = new Lexer(new SourceText("", ""));
  next = lx.head;
  lx.peek(next);
  assert(next.kind == TOK.Newline);
  lx.peek(next);
  assert(next.kind == TOK.EOF);
}

unittest
{
  // Numbers unittest
  // 0L 0ULi 0_L 0_UL 0x0U 0x0p2 0_Fi 0_e2 0_F 0_i
  // 0u 0U 0uL 0UL 0L 0LU 0Lu
  // 0Li 0f 0F 0fi 0Fi 0i
  // 0b_1_LU 0b1000u
  // 0x232Lu
}