projects/dil: src/dil/lexer/Lexer.d comparison

comparison src/dil/lexer/Lexer.d @ 806:bcb74c9b895c

Moved out files in the trunk folder to the root.

author	Aziz K?ksal <aziz.koeksal@gmail.com>
date	Sun, 09 Mar 2008 00:12:19 +0100
parents	trunk/src/dil/lexer/Lexer.d@cb8040538772
children	49e32b5bc161

comparison

equal deleted inserted replaced

-:a3fab8b74a7d
+:bcb74c9b895c
+/++
+Author: Aziz Köksal
+License: GPL3
++/
+module dil.lexer.Lexer;
+import dil.lexer.Token;
+import dil.lexer.Keywords;
+import dil.lexer.Identifier;
+import dil.lexer.IdTable;
+import dil.Information;
+import dil.Messages;
+import dil.HtmlEntities;
+import dil.CompilerInfo;
+import dil.Unicode;
+import dil.SourceText;
+import dil.Time;
+import common;
+import tango.stdc.stdlib : strtof, strtod, strtold;
+import tango.stdc.errno : errno, ERANGE;
+public import dil.lexer.Funcs;
+/// The Lexer analyzes the characters of a source text and
+/// produces a doubly-linked list of tokens.
+class Lexer
+{
+SourceText srcText; /// The source text.
+char* p;            /// Points to the current character in the source text.
+char* end;          /// Points one character past the end of the source text.
+Token* head;  /// The head of the doubly linked token list.
+Token* tail;  /// The tail of the linked list. Set in scan().
+Token* token; /// Points to the current token in the token list.
+// Members used for error messages:
+InfoManager infoMan;
+LexerError[] errors;
+/// Always points to the first character of the current line.
+char* lineBegin;
+//   Token* newline;     /// Current newline token.
+uint lineNum = 1;   /// Current, actual source text line number.
+uint lineNum_hline; /// Line number set by #line.
+uint inTokenString; /// > 0 if inside q{ }
+/// Holds the original file path and the modified one (by #line.)
+NewlineData.FilePaths* filePaths;
+/// Construct a Lexer object.
+/// Params:
+///   srcText = the UTF-8 source code.
+///   infoMan = used for collecting error messages.
+this(SourceText srcText, InfoManager infoMan = null)
+{
+this.srcText = srcText;
+this.infoMan = infoMan;
+assert(text.length && text[$-1] == 0, "source text has no sentinel character");
+this.p = text.ptr;
+this.end = this.p + text.length;
+this.lineBegin = this.p;
+this.head = new Token;
+this.head.kind = TOK.HEAD;
+this.head.start = this.head.end = this.p;
+this.token = this.head;
+// Initialize this.filePaths.
+newFilePath(this.srcText.filePath);
+// Add a newline as the first token after the head.
+auto newline = new Token;
+newline.kind = TOK.Newline;
+newline.setWhitespaceFlag();
+newline.start = newline.end = this.p;
+newline.newline.filePaths = this.filePaths;
+newline.newline.oriLineNum = 1;
+newline.newline.setLineNum = 0;
+// Link in.
+this.token.next = newline;
+newline.prev = this.token;
+this.token = newline;
+//     this.newline = newline;
+scanShebang();
+}
+/// The destructor deletes the doubly-linked token list.
+~this()
+{
+auto token = head.next;
+while (token !is null)
+{
+assert(token.kind == TOK.EOF ? token == tail && token.next is null : 1);
+delete token.prev;
+token = token.next;
+}
+delete tail;
+}
+char[] text()
+{
+return srcText.data;
+}
+/// The "shebang" may optionally appear once at the beginning of a file.
+/// Regexp: #![^\EndOfLine]*
+void scanShebang()
+{
+if (*p == '#' && p[1] == '!')
+{
+auto t = new Token;
+t.kind = TOK.Shebang;
+t.setWhitespaceFlag();
+t.start = p;
+++p;
+while (!isEndOfLine(++p))
+isascii(*p) || decodeUTF8();
+t.end = p;
+this.token.next = t;
+t.prev = this.token;
+}
+}
+/// Sets the value of the special token.
+void finalizeSpecialToken(ref Token t)
+{
+assert(t.srcText[0..2] == "__");
+switch (t.kind)
+{
+case TOK.FILE:
+t.str = this.filePaths.setPath;
+break;
+case TOK.LINE:
+t.uint_ = this.errorLineNumber(this.lineNum);
+break;
+case TOK.DATE,
+TOK.TIME,
+TOK.TIMESTAMP:
+auto time_str = Time.toString();
+switch (t.kind)
+{
+case TOK.DATE:
+time_str = Time.month_day(time_str) ~ ' ' ~ Time.year(time_str); break;
+case TOK.TIME:
+time_str = Time.time(time_str); break;
+case TOK.TIMESTAMP:
+break; // time_str is the timestamp.
+default: assert(0);
+}
+time_str ~= '\0'; // Terminate with a zero.
+t.str = time_str;
+break;
+case TOK.VENDOR:
+t.str = VENDOR;
+break;
+case TOK.VERSION:
+t.uint_ = VERSION_MAJOR*1000 + VERSION_MINOR;
+break;
+default:
+assert(0);
+}
+}
+/// Sets a new file path.
+void newFilePath(char[] newPath)
+{
+auto paths = new NewlineData.FilePaths;
+paths.oriPath = this.srcText.filePath;
+paths.setPath = newPath;
+this.filePaths = paths;
+}
+private void setLineBegin(char* p)
+{
+// Check that we can look behind one character.
+assert((p-1) >= text.ptr && p < end);
+// Check that previous character is a newline.
+assert(isNewlineEnd(p - 1));
+this.lineBegin = p;
+}
+/// Scans the next token in the source text.
+///
+/// Creates a new token if t.next is null and appends it to the list.
+private void scanNext(ref Token* t)
+{
+assert(t !is null);
+if (t.next)
+{
+t = t.next;
+//       if (t.kind == TOK.Newline)
+//         this.newline = t;
+}
+else if (t != this.tail)
+{
+Token* new_t = new Token;
+scan(*new_t);
+new_t.prev = t;
+t.next = new_t;
+t = new_t;
+}
+}
+/// Advance t one token forward.
+void peek(ref Token* t)
+{
+scanNext(t);
+}
+/// Advance to the next token in the source text.
+TOK nextToken()
+{
+scanNext(this.token);
+return this.token.kind;
+}
+/// Returns true if p points to the last character of a Newline.
+bool isNewlineEnd(char* p)
+{
+if (*p == '\n' || *p == '\r')
+return true;
+if (*p == LS[2] || *p == PS[2])
+if ((p-2) >= text.ptr)
+if (p[-1] == LS[1] && p[-2] == LS[0])
+return true;
+return false;
+}
+/// The main method which recognizes the characters that make up a token.
+///
+/// Complicated tokens are scanned in separate methods.
+public void scan(ref Token t)
+in
+{
+assert(text.ptr <= p && p < end);
+}
+out
+{
+assert(text.ptr <= t.start && t.start < end, Token.toString(t.kind));
+assert(text.ptr <= t.end && t.end <= end, Token.toString(t.kind));
+}
+body
+{
+// Scan whitespace.
+if (isspace(*p))
+{
+t.ws = p;
+while (isspace(*++p))
+{}
+}
+// Scan a token.
+uint c = *p;
+{
+t.start = p;
+// Newline.
+switch (*p)
+{
+case '\r':
+if (p[1] == '\n')
+++p;
+case '\n':
+assert(isNewlineEnd(p));
+++p;
+++lineNum;
+setLineBegin(p);
+//         this.newline = &t;
+t.kind = TOK.Newline;
+t.setWhitespaceFlag();
+t.newline.filePaths = this.filePaths;
+t.newline.oriLineNum = lineNum;
+t.newline.setLineNum = lineNum_hline;
+t.end = p;
+return;
+default:
+if (isUnicodeNewline(p))
+{
+++p; ++p;
+goto case '\n';
+}
+}
+// Identifier or string literal.
+if (isidbeg(c))
+{
+if (c == 'r' && p[1] == '"' && ++p)
+return scanRawStringLiteral(t);
+if (c == 'x' && p[1] == '"')
+return scanHexStringLiteral(t);
+version(D2)
+{
+if (c == 'q' && p[1] == '"')
+return scanDelimitedStringLiteral(t);
+if (c == 'q' && p[1] == '{')
+return scanTokenStringLiteral(t);
+}
+// Scan identifier.
+Lidentifier:
+do
+{ c = *++p; }
+while (isident(c) || !isascii(c) && isUnicodeAlpha())
+t.end = p;
+auto id = IdTable.lookup(t.srcText);
+t.kind = id.kind;
+t.ident = id;
+if (t.kind == TOK.Identifier || t.isKeyword)
+return;
+else if (t.isSpecialToken)
+finalizeSpecialToken(t);
+else if (t.kind == TOK.EOF)
+{
+tail = &t;
+assert(t.srcText == "__EOF__");
+}
+else
+assert(0, "unexpected token type: " ~ Token.toString(t.kind));
+return;
+}
+if (isdigit(c))
+return scanNumber(t);
+if (c == '/')
+{
+c = *++p;
+switch(c)
+{
+case '=':
+++p;
+t.kind = TOK.DivAssign;
+t.end = p;
+return;
+case '+':
+return scanNestedComment(t);
+case '*':
+return scanBlockComment(t);
+case '/':
+while (!isEndOfLine(++p))
+isascii(*p) || decodeUTF8();
+t.kind = TOK.Comment;
+t.setWhitespaceFlag();
+t.end = p;
+return;
+default:
+t.kind = TOK.Div;
+t.end = p;
+return;
+}
+}
+switch (c)
+{
+case '\'':
+return scanCharacterLiteral(t);
+case '`':
+return scanRawStringLiteral(t);
+case '"':
+return scanNormalStringLiteral(t);
+case '\\':
+char[] buffer;
+do
+{
+bool isBinary;
+c = scanEscapeSequence(isBinary);
+if (isascii(c) || isBinary)
+buffer ~= c;
+else
+encodeUTF8(buffer, c);
+} while (*p == '\\')
+buffer ~= 0;
+t.kind = TOK.String;
+t.str = buffer;
+t.end = p;
+return;
+case '>': /* >  >=  >>  >>=  >>>  >>>= */
+c = *++p;
+switch (c)
+{
+case '=':
+t.kind = TOK.GreaterEqual;
+goto Lcommon;
+case '>':
+if (p[1] == '>')
+{
+++p;
+if (p[1] == '=')
+{ ++p;
+t.kind = TOK.URShiftAssign;
+}
+else
+t.kind = TOK.URShift;
+}
+else if (p[1] == '=')
+{
+++p;
+t.kind = TOK.RShiftAssign;
+}
+else
+t.kind = TOK.RShift;
+goto Lcommon;
+default:
+t.kind = TOK.Greater;
+goto Lcommon2;
+}
+assert(0);
+case '<': /* <  <=  <>  <>=  <<  <<= */
+c = *++p;
+switch (c)
+{
+case '=':
+t.kind = TOK.LessEqual;
+goto Lcommon;
+case '<':
+if (p[1] == '=') {
+++p;
+t.kind = TOK.LShiftAssign;
+}
+else
+t.kind = TOK.LShift;
+goto Lcommon;
+case '>':
+if (p[1] == '=') {
+++p;
+t.kind = TOK.LorEorG;
+}
+else
+t.kind = TOK.LorG;
+goto Lcommon;
+default:
+t.kind = TOK.Less;
+goto Lcommon2;
+}
+assert(0);
+case '!': /* !  !<  !>  !<=  !>=  !<>  !<>= */
+c = *++p;
+switch (c)
+{
+case '<':
+c = *++p;
+if (c == '>')
+{
+if (p[1] == '=') {
+++p;
+t.kind = TOK.Unordered;
+}
+else
+t.kind = TOK.UorE;
+}
+else if (c == '=')
+{
+t.kind = TOK.UorG;
+}
+else {
+t.kind = TOK.UorGorE;
+goto Lcommon2;
+}
+goto Lcommon;
+case '>':
+if (p[1] == '=')
+{
+++p;
+t.kind = TOK.UorL;
+}
+else
+t.kind = TOK.UorLorE;
+goto Lcommon;
+case '=':
+t.kind = TOK.NotEqual;
+goto Lcommon;
+default:
+t.kind = TOK.Not;
+goto Lcommon2;
+}
+assert(0);
+case '.': /* .  .[0-9]  ..  ... */
+if (p[1] == '.')
+{
+++p;
+if (p[1] == '.') {
+++p;
+t.kind = TOK.Ellipses;
+}
+else
+t.kind = TOK.Slice;
+}
+else if (isdigit(p[1]))
+{
+return scanReal(t);
+}
+else
+t.kind = TOK.Dot;
+goto Lcommon;
+case '|': /* |  ||  |= */
+c = *++p;
+if (c == '=')
+t.kind = TOK.OrAssign;
+else if (c == '|')
+t.kind = TOK.OrLogical;
+else {
+t.kind = TOK.OrBinary;
+goto Lcommon2;
+}
+goto Lcommon;
+case '&': /* &  &&  &= */
+c = *++p;
+if (c == '=')
+t.kind = TOK.AndAssign;
+else if (c == '&')
+t.kind = TOK.AndLogical;
+else {
+t.kind = TOK.AndBinary;
+goto Lcommon2;
+}
+goto Lcommon;
+case '+': /* +  ++  += */
+c = *++p;
+if (c == '=')
+t.kind = TOK.PlusAssign;
+else if (c == '+')
+t.kind = TOK.PlusPlus;
+else {
+t.kind = TOK.Plus;
+goto Lcommon2;
+}
+goto Lcommon;
+case '-': /* -  --  -= */
+c = *++p;
+if (c == '=')
+t.kind = TOK.MinusAssign;
+else if (c == '-')
+t.kind = TOK.MinusMinus;
+else {
+t.kind = TOK.Minus;
+goto Lcommon2;
+}
+goto Lcommon;
+case '=': /* =  == */
+if (p[1] == '=') {
+++p;
+t.kind = TOK.Equal;
+}
+else
+t.kind = TOK.Assign;
+goto Lcommon;
+case '~': /* ~  ~= */
+if (p[1] == '=') {
+++p;
+t.kind = TOK.CatAssign;
+}
+else
+t.kind = TOK.Tilde;
+goto Lcommon;
+case '*': /* *  *= */
+if (p[1] == '=') {
+++p;
+t.kind = TOK.MulAssign;
+}
+else
+t.kind = TOK.Mul;
+goto Lcommon;
+case '^': /* ^  ^= */
+if (p[1] == '=') {
+++p;
+t.kind = TOK.XorAssign;
+}
+else
+t.kind = TOK.Xor;
+goto Lcommon;
+case '%': /* %  %= */
+if (p[1] == '=') {
+++p;
+t.kind = TOK.ModAssign;
+}
+else
+t.kind = TOK.Mod;
+goto Lcommon;
+// Single character tokens:
+case '(':
+t.kind = TOK.LParen;
+goto Lcommon;
+case ')':
+t.kind = TOK.RParen;
+goto Lcommon;
+case '[':
+t.kind = TOK.LBracket;
+goto Lcommon;
+case ']':
+t.kind = TOK.RBracket;
+goto Lcommon;
+case '{':
+t.kind = TOK.LBrace;
+goto Lcommon;
+case '}':
+t.kind = TOK.RBrace;
+goto Lcommon;
+case ':':
+t.kind = TOK.Colon;
+goto Lcommon;
+case ';':
+t.kind = TOK.Semicolon;
+goto Lcommon;
+case '?':
+t.kind = TOK.Question;
+goto Lcommon;
+case ',':
+t.kind = TOK.Comma;
+goto Lcommon;
+case '$':
+t.kind = TOK.Dollar;
+Lcommon:
+++p;
+Lcommon2:
+t.end = p;
+return;
+case '#':
+return scanSpecialTokenSequence(t);
+default:
+}
+// Check for EOF
+if (isEOF(c))
+{
+assert(isEOF(*p), ""~*p);
+t.kind = TOK.EOF;
+t.end = p;
+tail = &t;
+assert(t.start == t.end);
+return;
+}
+if (!isascii(c))
+{
+c = decodeUTF8();
+if (isUniAlpha(c))
+goto Lidentifier;
+}
+error(t.start, MID.IllegalCharacter, cast(dchar)c);
+++p;
+t.kind = TOK.Illegal;
+t.setWhitespaceFlag();
+t.dchar_ = c;
+t.end = p;
+return;
+}
+}
+/// Converts a string literal to an integer.
+template toUint(char[] T)
+{
+static assert(0 < T.length && T.length <= 4);
+static if (T.length == 1)
+const uint toUint = T[0];
+else
+const uint toUint = (T[0] << ((T.length-1)*8)) | toUint!(T[1..$]);
+}
+static assert(toUint!("\xAA\xBB\xCC\xDD") == 0xAABBCCDD);
+/// Constructs case statements. E.g.:
+/// ---
+//// // case_!("<", "Less", "Lcommon") ->
+/// case 60u:
+///   t.kind = TOK.Less;
+///   goto Lcommon;
+/// ---
+/// Note:Can't use this yet due to a $(DMDBUG 1534, bug) in DMD.
+template case_(char[] str, char[] kind, char[] label)
+{
+const char[] case_ =
+`case `~toUint!(str).stringof~`:`
+`t.kind = TOK.`~kind~`;`
+`goto `~label~`;`;
+}
+//pragma(msg, case_!("<", "Less", "Lcommon"));
+template case_L4(char[] str, TOK kind)
+{
+const char[] case_L4 = case_!(str, kind, "Lcommon_4");
+}
+template case_L3(char[] str, TOK kind)
+{
+const char[] case_L3 = case_!(str, kind, "Lcommon_3");
+}
+template case_L2(char[] str, TOK kind)
+{
+const char[] case_L2 = case_!(str, kind, "Lcommon_2");
+}
+template case_L1(char[] str, TOK kind)
+{
+const char[] case_L3 = case_!(str, kind, "Lcommon");
+}
+/// An alternative scan method.
+/// Profiling shows it's a bit slower.
+public void scan_(ref Token t)
+in
+{
+assert(text.ptr <= p && p < end);
+}
+out
+{
+assert(text.ptr <= t.start && t.start < end, Token.toString(t.kind));
+assert(text.ptr <= t.end && t.end <= end, Token.toString(t.kind));
+}
+body
+{
+// Scan whitespace.
+if (isspace(*p))
+{
+t.ws = p;
+while (isspace(*++p))
+{}
+}
+// Scan a token.
+t.start = p;
+// Newline.
+switch (*p)
+{
+case '\r':
+if (p[1] == '\n')
+++p;
+case '\n':
+assert(isNewlineEnd(p));
+++p;
+++lineNum;
+setLineBegin(p);
+//       this.newline = &t;
+t.kind = TOK.Newline;
+t.setWhitespaceFlag();
+t.newline.filePaths = this.filePaths;
+t.newline.oriLineNum = lineNum;
+t.newline.setLineNum = lineNum_hline;
+t.end = p;
+return;
+default:
+if (isUnicodeNewline(p))
+{
+++p; ++p;
+goto case '\n';
+}
+}
+uint c = *p;
+assert(end - p != 0);
+switch (end - p)
+{
+case 1:
+goto L1character;
+case 2:
+c <<= 8; c |= p[1];
+goto L2characters;
+case 3:
+c <<= 8; c |= p[1]; c <<= 8; c |= p[2];
+goto L3characters;
+default:
+version(BigEndian)
+c = *cast(uint*)p;
+else
+{
+c <<= 8; c |= p[1]; c <<= 8; c |= p[2]; c <<= 8; c |= p[3];
+/+
+c = *cast(uint*)p;
+asm
+{
+mov EDX, c;
+bswap EDX;
+mov c, EDX;
+}
++/
+}
+}
+// 4 character tokens.
+switch (c)
+{
+case toUint!(">>>="):
+t.kind = TOK.RShiftAssign;
+goto Lcommon_4;
+case toUint!("!<>="):
+t.kind = TOK.Unordered;
+Lcommon_4:
+p += 4;
+t.end = p;
+return;
+default:
+}
+c >>>= 8;
+L3characters:
+assert(p == t.start);
+// 3 character tokens.
+switch (c)
+{
+case toUint!(">>="):
+t.kind = TOK.RShiftAssign;
+goto Lcommon_3;
+case toUint!(">>>"):
+t.kind = TOK.URShift;
+goto Lcommon_3;
+case toUint!("<>="):
+t.kind = TOK.LorEorG;
+goto Lcommon_3;
+case toUint!("<<="):
+t.kind = TOK.LShiftAssign;
+goto Lcommon_3;
+case toUint!("!<="):
+t.kind = TOK.UorG;
+goto Lcommon_3;
+case toUint!("!>="):
+t.kind = TOK.UorL;
+goto Lcommon_3;
+case toUint!("!<>"):
+t.kind = TOK.UorE;
+goto Lcommon_3;
+case toUint!("..."):
+t.kind = TOK.Ellipses;
+Lcommon_3:
+p += 3;
+t.end = p;
+return;
+default:
+}
+c >>>= 8;
+L2characters:
+assert(p == t.start);
+// 2 character tokens.
+switch (c)
+{
+case toUint!("/+"):
+++p; // Skip /
+return scanNestedComment(t);
+case toUint!("/*"):
+++p; // Skip /
+return scanBlockComment(t);
+case toUint!("//"):
+++p; // Skip /
+assert(*p == '/');
+while (!isEndOfLine(++p))
+isascii(*p) || decodeUTF8();
+t.kind = TOK.Comment;
+t.setWhitespaceFlag();
+t.end = p;
+return;
+case toUint!(">="):
+t.kind = TOK.GreaterEqual;
+goto Lcommon_2;
+case toUint!(">>"):
+t.kind = TOK.RShift;
+goto Lcommon_2;
+case toUint!("<<"):
+t.kind = TOK.LShift;
+goto Lcommon_2;
+case toUint!("<="):
+t.kind = TOK.LessEqual;
+goto Lcommon_2;
+case toUint!("<>"):
+t.kind = TOK.LorG;
+goto Lcommon_2;
+case toUint!("!<"):
+t.kind = TOK.UorGorE;
+goto Lcommon_2;
+case toUint!("!>"):
+t.kind = TOK.UorLorE;
+goto Lcommon_2;
+case toUint!("!="):
+t.kind = TOK.NotEqual;
+goto Lcommon_2;
+case toUint!(".."):
+t.kind = TOK.Slice;
+goto Lcommon_2;
+case toUint!("&&"):
+t.kind = TOK.AndLogical;
+goto Lcommon_2;
+case toUint!("&="):
+t.kind = TOK.AndAssign;
+goto Lcommon_2;
+case toUint!("||"):
+t.kind = TOK.OrLogical;
+goto Lcommon_2;
+case toUint!("|="):
+t.kind = TOK.OrAssign;
+goto Lcommon_2;
+case toUint!("++"):
+t.kind = TOK.PlusPlus;
+goto Lcommon_2;
+case toUint!("+="):
+t.kind = TOK.PlusAssign;
+goto Lcommon_2;
+case toUint!("--"):
+t.kind = TOK.MinusMinus;
+goto Lcommon_2;
+case toUint!("-="):
+t.kind = TOK.MinusAssign;
+goto Lcommon_2;
+case toUint!("=="):
+t.kind = TOK.Equal;
+goto Lcommon_2;
+case toUint!("~="):
+t.kind = TOK.CatAssign;
+goto Lcommon_2;
+case toUint!("*="):
+t.kind = TOK.MulAssign;
+goto Lcommon_2;
+case toUint!("/="):
+t.kind = TOK.DivAssign;
+goto Lcommon_2;
+case toUint!("^="):
+t.kind = TOK.XorAssign;
+goto Lcommon_2;
+case toUint!("%="):
+t.kind = TOK.ModAssign;
+Lcommon_2:
+p += 2;
+t.end = p;
+return;
+default:
+}
+c >>>= 8;
+L1character:
+assert(p == t.start);
+assert(*p == c, Format("p={0},c={1}", *p, cast(dchar)c));
+// 1 character tokens.
+// TODO: consider storing the token type in ptable.
+switch (c)
+{
+case '\'':
+return scanCharacterLiteral(t);
+case '`':
+return scanRawStringLiteral(t);
+case '"':
+return scanNormalStringLiteral(t);
+case '\\':
+char[] buffer;
+do
+{
+bool isBinary;
+c = scanEscapeSequence(isBinary);
+if (isascii(c) || isBinary)
+buffer ~= c;
+else
+encodeUTF8(buffer, c);
+} while (*p == '\\')
+buffer ~= 0;
+t.kind = TOK.String;
+t.str = buffer;
+t.end = p;
+return;
+case '<':
+t.kind = TOK.Greater;
+goto Lcommon;
+case '>':
+t.kind = TOK.Less;
+goto Lcommon;
+case '^':
+t.kind = TOK.Xor;
+goto Lcommon;
+case '!':
+t.kind = TOK.Not;
+goto Lcommon;
+case '.':
+if (isdigit(p[1]))
+return scanReal(t);
+t.kind = TOK.Dot;
+goto Lcommon;
+case '&':
+t.kind = TOK.AndBinary;
+goto Lcommon;
+case '|':
+t.kind = TOK.OrBinary;
+goto Lcommon;
+case '+':
+t.kind = TOK.Plus;
+goto Lcommon;
+case '-':
+t.kind = TOK.Minus;
+goto Lcommon;
+case '=':
+t.kind = TOK.Assign;
+goto Lcommon;
+case '~':
+t.kind = TOK.Tilde;
+goto Lcommon;
+case '*':
+t.kind = TOK.Mul;
+goto Lcommon;
+case '/':
+t.kind = TOK.Div;
+goto Lcommon;
+case '%':
+t.kind = TOK.Mod;
+goto Lcommon;
+case '(':
+t.kind = TOK.LParen;
+goto Lcommon;
+case ')':
+t.kind = TOK.RParen;
+goto Lcommon;
+case '[':
+t.kind = TOK.LBracket;
+goto Lcommon;
+case ']':
+t.kind = TOK.RBracket;
+goto Lcommon;
+case '{':
+t.kind = TOK.LBrace;
+goto Lcommon;
+case '}':
+t.kind = TOK.RBrace;
+goto Lcommon;
+case ':':
+t.kind = TOK.Colon;
+goto Lcommon;
+case ';':
+t.kind = TOK.Semicolon;
+goto Lcommon;
+case '?':
+t.kind = TOK.Question;
+goto Lcommon;
+case ',':
+t.kind = TOK.Comma;
+goto Lcommon;
+case '$':
+t.kind = TOK.Dollar;
+Lcommon:
+++p;
+t.end = p;
+return;
+case '#':
+return scanSpecialTokenSequence(t);
+default:
+}
+assert(p == t.start);
+assert(*p == c);
+// TODO: consider moving isidbeg() and isdigit() up.
+if (isidbeg(c))
+{
+if (c == 'r' && p[1] == '"' && ++p)
+return scanRawStringLiteral(t);
+if (c == 'x' && p[1] == '"')
+return scanHexStringLiteral(t);
+version(D2)
+{
+if (c == 'q' && p[1] == '"')
+return scanDelimitedStringLiteral(t);
+if (c == 'q' && p[1] == '{')
+return scanTokenStringLiteral(t);
+}
+// Scan identifier.
+Lidentifier:
+do
+{ c = *++p; }
+while (isident(c) || !isascii(c) && isUnicodeAlpha())
+t.end = p;
+auto id = IdTable.lookup(t.srcText);
+t.kind = id.kind;
+t.ident = id;
+if (t.kind == TOK.Identifier || t.isKeyword)
+return;
+else if (t.isSpecialToken)
+finalizeSpecialToken(t);
+else if (t.kind == TOK.EOF)
+{
+tail = &t;
+assert(t.srcText == "__EOF__");
+}
+else
+assert(0, "unexpected token type: " ~ Token.toString(t.kind));
+return;
+}
+if (isdigit(c))
+return scanNumber(t);
+// Check for EOF
+if (isEOF(c))
+{
+assert(isEOF(*p), *p~"");
+t.kind = TOK.EOF;
+t.end = p;
+tail = &t;
+assert(t.start == t.end);
+return;
+}
+if (!isascii(c))
+{
+c = decodeUTF8();
+if (isUniAlpha(c))
+goto Lidentifier;
+}
+error(t.start, MID.IllegalCharacter, cast(dchar)c);
+++p;
+t.kind = TOK.Illegal;
+t.setWhitespaceFlag();
+t.dchar_ = c;
+t.end = p;
+return;
+}
+/// Scans a block comment.
+///
+/// BlockComment := "/*" AnyChar* "*/"
+void scanBlockComment(ref Token t)
+{
+assert(p[-1] == '/' && *p == '*');
+auto tokenLineNum = lineNum;
+auto tokenLineBegin = lineBegin;
+Loop:
+while (1)
+{
+switch (*++p)
+{
+case '*':
+if (p[1] != '/')
+continue;
+p += 2;
+break Loop;
+case '\r':
+if (p[1] == '\n')
+++p;
+case '\n':
+assert(isNewlineEnd(p));
+++lineNum;
+setLineBegin(p+1);
+break;
+default:
+if (!isascii(*p))
+{
+if (isUnicodeNewlineChar(decodeUTF8()))
+goto case '\n';
+}
+else if (isEOF(*p))
+{
+error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedBlockComment);
+break Loop;
+}
+}
+}
+t.kind = TOK.Comment;
+t.setWhitespaceFlag();
+t.end = p;
+return;
+}
+/// Scans a nested comment.
+///
+/// NestedComment := "/+" (AnyChar* | NestedComment) "+/"
+void scanNestedComment(ref Token t)
+{
+assert(p[-1] == '/' && *p == '+');
+auto tokenLineNum = lineNum;
+auto tokenLineBegin = lineBegin;
+uint level = 1;
+Loop:
+while (1)
+{
+switch (*++p)
+{
+case '/':
+if (p[1] == '+')
+++p, ++level;
+continue;
+case '+':
+if (p[1] != '/')
+continue;
+++p;
+if (--level != 0)
+continue;
+++p;
+break Loop;
+case '\r':
+if (p[1] == '\n')
+++p;
+case '\n':
+assert(isNewlineEnd(p));
+++lineNum;
+setLineBegin(p+1);
+continue;
+default:
+if (!isascii(*p))
+{
+if (isUnicodeNewlineChar(decodeUTF8()))
+goto case '\n';
+}
+else if (isEOF(*p))
+{
+error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedNestedComment);
+break Loop;
+}
+}
+}
+t.kind = TOK.Comment;
+t.setWhitespaceFlag();
+t.end = p;
+return;
+}
+/// Scans the postfix character of a string literal.
+///
+/// PostfixChar := "c" | "w" | "d"
+char scanPostfix()
+{
+assert(p[-1] == '"' || p[-1] == '`' ||
+{ version(D2) return p[-1] == '}';
+else return 0; }()
+);
+switch (*p)
+{
+case 'c':
+case 'w':
+case 'd':
+return *p++;
+default:
+return 0;
+}
+assert(0);
+}
+/// Scans a normal string literal.
+///
+/// NormalStringLiteral := "\"" Char* "\""
+void scanNormalStringLiteral(ref Token t)
+{
+assert(*p == '"');
+auto tokenLineNum = lineNum;
+auto tokenLineBegin = lineBegin;
+t.kind = TOK.String;
+char[] buffer;
+uint c;
+while (1)
+{
+c = *++p;
+switch (c)
+{
+case '"':
+++p;
+t.pf = scanPostfix();
+Lreturn:
+t.str = buffer ~ '\0';
+t.end = p;
+return;
+case '\\':
+bool isBinary;
+c = scanEscapeSequence(isBinary);
+--p;
+if (isascii(c) || isBinary)
+buffer ~= c;
+else
+encodeUTF8(buffer, c);
+continue;
+case '\r':
+if (p[1] == '\n')
+++p;
+case '\n':
+assert(isNewlineEnd(p));
+c = '\n'; // Convert Newline to \n.
+++lineNum;
+setLineBegin(p+1);
+break;
+case 0, _Z_:
+error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedString);
+goto Lreturn;
+default:
+if (!isascii(c))
+{
+c = decodeUTF8();
+if (isUnicodeNewlineChar(c))
+goto case '\n';
+encodeUTF8(buffer, c);
+continue;
+}
+}
+assert(isascii(c));
+buffer ~= c;
+}
+assert(0);
+}
+/// Scans a character literal.
+///
+/// CharLiteral := "'" Char "'"
+void scanCharacterLiteral(ref Token t)
+{
+assert(*p == '\'');
+++p;
+t.kind = TOK.CharLiteral;
+switch (*p)
+{
+case '\\':
+bool notused;
+t.dchar_ = scanEscapeSequence(notused);
+break;
+case '\'':
+error(t.start, MID.EmptyCharacterLiteral);
+break;
+default:
+if (isEndOfLine(p))
+break;
+uint c = *p;
+if (!isascii(c))
+c = decodeUTF8();
+t.dchar_ = c;
+++p;
+}
+if (*p == '\'')
+++p;
+else
+error(t.start, MID.UnterminatedCharacterLiteral);
+t.end = p;
+}
+/// Scans a raw string literal.
+///
+/// RawStringLiteral := "r\"" AnyChar* "\"" | "`" AnyChar* "`"
+void scanRawStringLiteral(ref Token t)
+{
+assert(*p == '`' || *p == '"' && p[-1] == 'r');
+auto tokenLineNum = lineNum;
+auto tokenLineBegin = lineBegin;
+t.kind = TOK.String;
+uint delim = *p;
+char[] buffer;
+uint c;
+while (1)
+{
+c = *++p;
+switch (c)
+{
+case '\r':
+if (p[1] == '\n')
+++p;
+case '\n':
+assert(isNewlineEnd(p));
+c = '\n'; // Convert Newline to '\n'.
+++lineNum;
+setLineBegin(p+1);
+break;
+case '`':
+case '"':
+if (c == delim)
+{
+++p;
+t.pf = scanPostfix();
+Lreturn:
+t.str = buffer ~ '\0';
+t.end = p;
+return;
+}
+break;
+case 0, _Z_:
+error(tokenLineNum, tokenLineBegin, t.start,
+delim == 'r' ? MID.UnterminatedRawString : MID.UnterminatedBackQuoteString);
+goto Lreturn;
+default:
+if (!isascii(c))
+{
+c = decodeUTF8();
+if (isUnicodeNewlineChar(c))
+goto case '\n';
+encodeUTF8(buffer, c);
+continue;
+}
+}
+assert(isascii(c));
+buffer ~= c;
+}
+assert(0);
+}
+/// Scans a hexadecimal string literal.
+///
+/// HexStringLiteral := "x\"" (HexChar HexChar)* "\""
+void scanHexStringLiteral(ref Token t)
+{
+assert(p[0] == 'x' && p[1] == '"');
+t.kind = TOK.String;
+auto tokenLineNum = lineNum;
+auto tokenLineBegin = lineBegin;
+uint c;
+ubyte[] buffer;
+ubyte h; // hex number
+uint n; // number of hex digits
+++p;
+assert(*p == '"');
+while (1)
+{
+c = *++p;
+switch (c)
+{
+case '"':
+if (n & 1)
+error(tokenLineNum, tokenLineBegin, t.start, MID.OddNumberOfDigitsInHexString);
+++p;
+t.pf = scanPostfix();
+Lreturn:
+t.str = cast(string) (buffer ~= 0);
+t.end = p;
+return;
+case '\r':
+if (p[1] == '\n')
+++p;
+case '\n':
+assert(isNewlineEnd(p));
+++lineNum;
+setLineBegin(p+1);
+continue;
+default:
+if (ishexad(c))
+{
+if (c <= '9')
+c -= '0';
+else if (c <= 'F')
+c -= 'A' - 10;
+else
+c -= 'a' - 10;
+if (n & 1)
+{
+h <<= 4;
+h |= c;
+buffer ~= h;
+}
+else
+h = cast(ubyte)c;
+++n;
+continue;
+}
+else if (isspace(c))
+continue; // Skip spaces.
+else if (isEOF(c))
+{
+error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedHexString);
+t.pf = 0;
+goto Lreturn;
+}
+else
+{
+auto errorAt = p;
+if (!isascii(c))
+{
+c = decodeUTF8();
+if (isUnicodeNewlineChar(c))
+goto case '\n';
+}
+error(errorAt, MID.NonHexCharInHexString, cast(dchar)c);
+}
+}
+}
+assert(0);
+}
+version(DDoc)
+{
+/// Scans a delimited string literal.
+void scanDelimitedStringLiteral(ref Token t);
+/// Scans a token string literal.
+///
+/// TokenStringLiteral := "q{" Token* "}"
+void scanTokenStringLiteral(ref Token t);
+}
+else
+version(D2)
+{
+void scanDelimitedStringLiteral(ref Token t)
+{
+assert(p[0] == 'q' && p[1] == '"');
+t.kind = TOK.String;
+auto tokenLineNum = lineNum;
+auto tokenLineBegin = lineBegin;
+char[] buffer;
+dchar opening_delim = 0, // 0 if no nested delimiter or '[', '(', '<', '{'
+closing_delim; // Will be ']', ')', '>', '},
+// the first character of an identifier or
+// any other Unicode/ASCII character.
+char[] str_delim; // Identifier delimiter.
+uint level = 1; // Counter for nestable delimiters.
+++p; ++p; // Skip q"
+uint c = *p;
+switch (c)
+{
+case '(':
+opening_delim = c;
+closing_delim = ')'; // c + 1
+break;
+case '[', '<', '{':
+opening_delim = c;
+closing_delim = c + 2; // Get to closing counterpart. Feature of ASCII table.
+break;
+default:
+dchar scanNewline()
+{
+switch (*p)
+{
+case '\r':
+if (p[1] == '\n')
+++p;
+case '\n':
+assert(isNewlineEnd(p));
+++p;
+++lineNum;
+setLineBegin(p);
+return '\n';
+default:
+if (isUnicodeNewline(p))
+{
+++p; ++p;
+goto case '\n';
+}
+}
+return 0;
+}
+// Skip leading newlines:
+while (scanNewline() != 0)
+{}
+assert(!isNewline(p));
+char* begin = p;
+c = *p;
+closing_delim = c;
+// TODO: Check for non-printable characters?
+if (!isascii(c))
+{
+closing_delim = decodeUTF8();
+if (!isUniAlpha(closing_delim))
+break; // Not an identifier.
+}
+else if (!isidbeg(c))
+break; // Not an identifier.
+// Parse Identifier + EndOfLine
+do
+{ c = *++p; }
+while (isident(c) || !isascii(c) && isUnicodeAlpha())
+// Store identifier
+str_delim = begin[0..p-begin];
+// Scan newline
+if (scanNewline() == '\n')
+--p; // Go back one because of "c = *++p;" in main loop.
+else
+{
+// TODO: error(p, MID.ExpectedNewlineAfterIdentDelim);
+}
+}
+bool checkStringDelim(char* p)
+{
+assert(str_delim.length != 0);
+if (buffer[$-1] == '\n' && // Last character copied to buffer must be '\n'.
+end-p >= str_delim.length && // Check remaining length.
+p[0..str_delim.length] == str_delim) // Compare.
+return true;
+return false;
+}
+while (1)
+{
+c = *++p;
+switch (c)
+{
+case '\r':
+if (p[1] == '\n')
+++p;
+case '\n':
+assert(isNewlineEnd(p));
+c = '\n'; // Convert Newline to '\n'.
+++lineNum;
+setLineBegin(p+1);
+break;
+case 0, _Z_:
+// TODO: error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedDelimitedString);
+goto Lreturn3;
+default:
+if (!isascii(c))
+{
+auto begin = p;
+c = decodeUTF8();
+if (isUnicodeNewlineChar(c))
+goto case '\n';
+if (c == closing_delim)
+{
+if (str_delim.length)
+{
+if (checkStringDelim(begin))
+{
+p = begin + str_delim.length;
+goto Lreturn2;
+}
+}
+else
+{
+assert(level == 1);
+--level;
+goto Lreturn;
+}
+}
+encodeUTF8(buffer, c);
+continue;
+}
+else
+{
+if (c == opening_delim)
+++level;
+else if (c == closing_delim)
+{
+if (str_delim.length)
+{
+if (checkStringDelim(p))
+{
+p += str_delim.length;
+goto Lreturn2;
+}
+}
+else if (--level == 0)
+goto Lreturn;
+}
+}
+}
+assert(isascii(c));
+buffer ~= c;
+}
+Lreturn: // Character delimiter.
+assert(c == closing_delim);
+assert(level == 0);
+++p; // Skip closing delimiter.
+Lreturn2: // String delimiter.
+if (*p == '"')
+++p;
+else
+{
+// TODO: error(p, MID.ExpectedDblQuoteAfterDelim, str_delim.length ? str_delim : closing_delim~"");
+}
+t.pf = scanPostfix();
+Lreturn3: // Error.
+t.str = buffer ~ '\0';
+t.end = p;
+}
+void scanTokenStringLiteral(ref Token t)
+{
+assert(p[0] == 'q' && p[1] == '{');
+t.kind = TOK.String;
+auto tokenLineNum = lineNum;
+auto tokenLineBegin = lineBegin;
+// A guard against changes to particular members:
+// this.lineNum_hline and this.errorPath
+++inTokenString;
+uint lineNum = this.lineNum;
+uint level = 1;
+++p; ++p; // Skip q{
+auto prev_t = &t;
+Token* token;
+while (1)
+{
+token = new Token;
+scan(*token);
+// Save the tokens in a doubly linked list.
+// Could be useful for various tools.
+token.prev = prev_t;
+prev_t.next = token;
+prev_t = token;
+switch (token.kind)
+{
+case TOK.LBrace:
+++level;
+continue;
+case TOK.RBrace:
+if (--level == 0)
+{
+t.tok_str = t.next;
+t.next = null;
+break;
+}
+continue;
+case TOK.EOF:
+// TODO: error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedTokenString);
+t.tok_str = t.next;
+t.next = token;
+break;
+default:
+continue;
+}
+break; // Exit loop.
+}
+assert(token.kind == TOK.RBrace || token.kind == TOK.EOF);
+assert(token.kind == TOK.RBrace && t.next is null ||
+token.kind == TOK.EOF && t.next !is null);
+char[] buffer;
+// token points to } or EOF
+if (token.kind == TOK.EOF)
+{
+t.end = token.start;
+buffer = t.srcText[2..$].dup ~ '\0';
+}
+else
+{
+// Assign to buffer before scanPostfix().
+t.end = p;
+buffer = t.srcText[2..$-1].dup ~ '\0';
+t.pf = scanPostfix();
+t.end = p; // Assign again because of postfix.
+}
+// Convert newlines to '\n'.
+if (lineNum != this.lineNum)
+{
+assert(buffer[$-1] == '\0');
+uint i, j;
+for (; i < buffer.length; ++i)
+switch (buffer[i])
+{
+case '\r':
+if (buffer[i+1] == '\n')
+++i;
+case '\n':
+assert(isNewlineEnd(buffer.ptr + i));
+buffer[j++] = '\n'; // Convert Newline to '\n'.
+break;
+default:
+if (isUnicodeNewline(buffer.ptr + i))
+{
+++i; ++i;
+goto case '\n';
+}
+buffer[j++] = buffer[i]; // Copy.
+}
+buffer.length = j; // Adjust length.
+}
+assert(buffer[$-1] == '\0');
+t.str = buffer;
+--inTokenString;
+}
+} // version(D2)
+/// Scans an escape sequence.
+///
+/// EscapeSequence := "\" (Octal{1,3} | ("x" Hex{2}) |
+///                       ("u" Hex{4}) | ("U" Hex{8}) |
+///                       "'" | "\"" | "\\" | "?" | "a" |
+///                       "b" | "f" | "n" | "r" | "t" | "v")
+/// Params:
+///   isBinary = set to true for octal and hexadecimal escapes.
+/// Returns: the escape value.
+dchar scanEscapeSequence(ref bool isBinary)
+out(result)
+{ assert(isValidChar(result)); }
+body
+{
+assert(*p == '\\');
+auto sequenceStart = p; // Used for error reporting.
+++p;
+uint c = char2ev(*p);
+if (c)
+{
+++p;
+return c;
+}
+uint digits = 2;
+switch (*p)
+{
+case 'x':
+isBinary = true;
+case_Unicode:
+assert(c == 0);
+assert(digits == 2 || digits == 4 || digits == 8);
+while (1)
+{
+++p;
+if (ishexad(*p))
+{
+c *= 16;
+if (*p <= '9')
+c += *p - '0';
+else if (*p <= 'F')
+c += *p - 'A' + 10;
+else
+c += *p - 'a' + 10;
+if (--digits == 0)
+{
+++p;
+if (isValidChar(c))
+return c; // Return valid escape value.
+error(sequenceStart, MID.InvalidUnicodeEscapeSequence,
+sequenceStart[0..p-sequenceStart]);
+break;
+}
+continue;
+}
+error(sequenceStart, MID.InsufficientHexDigits,
+sequenceStart[0..p-sequenceStart]);
+break;
+}
+break;
+case 'u':
+digits = 4;
+goto case_Unicode;
+case 'U':
+digits = 8;
+goto case_Unicode;
+default:
+if (isoctal(*p))
+{
+isBinary = true;
+assert(c == 0);
+c += *p - '0';
+++p;
+if (!isoctal(*p))
+return c;
+c *= 8;
+c += *p - '0';
+++p;
+if (!isoctal(*p))
+return c;
+c *= 8;
+c += *p - '0';
+++p;
+if (c > 0xFF)
+error(sequenceStart, MSG.InvalidOctalEscapeSequence,
+sequenceStart[0..p-sequenceStart]);
+return c; // Return valid escape value.
+}
+else if(*p == '&')
+{
+if (isalpha(*++p))
+{
+auto begin = p;
+while (isalnum(*++p))
+{}
+if (*p == ';')
+{
+// Pass entity excluding '&' and ';'.
+c = entity2Unicode(begin[0..p - begin]);
+++p; // Skip ;
+if (c != 0xFFFF)
+return c; // Return valid escape value.
+else
+error(sequenceStart, MID.UndefinedHTMLEntity, sequenceStart[0 .. p - sequenceStart]);
+}
+else
+error(sequenceStart, MID.UnterminatedHTMLEntity, sequenceStart[0 .. p - sequenceStart]);
+}
+else
+error(sequenceStart, MID.InvalidBeginHTMLEntity);
+}
+else if (isEndOfLine(p))
+error(sequenceStart, MID.UndefinedEscapeSequence,
+isEOF(*p) ? `\EOF` : `\NewLine`);
+else
+{
+char[] str = `\`;
+if (isascii(c))
+str ~= *p;
+else
+encodeUTF8(str, decodeUTF8());
+++p;
+// TODO: check for unprintable character?
+error(sequenceStart, MID.UndefinedEscapeSequence, str);
+}
+}
+return REPLACEMENT_CHAR; // Error: return replacement character.
+}
+/// Scans a number literal.
+///
+/// $(PRE
+/// IntegerLiteral := (Dec|Hex|Bin|Oct)Suffix?
+/// Dec := (0|[1-9][0-9_]*)
+/// Hex := 0[xX][_]*[0-9a-zA-Z][0-9a-zA-Z_]*
+/// Bin := 0[bB][_]*[01][01_]*
+/// Oct := 0[0-7_]*
+/// Suffix := (L[uU]?|[uU]L?)
+/// )
+/// Invalid: "0b_", "0x_", "._" etc.
+void scanNumber(ref Token t)
+{
+ulong ulong_;
+bool overflow;
+bool isDecimal;
+size_t digits;
+if (*p != '0')
+goto LscanInteger;
+++p; // skip zero
+// check for xX bB ...
+switch (*p)
+{
+case 'x','X':
+goto LscanHex;
+case 'b','B':
+goto LscanBinary;
+case 'L':
+if (p[1] == 'i')
+goto LscanReal; // 0Li
+break; // 0L
+case '.':
+if (p[1] == '.')
+break; // 0..
+// 0.
+case 'i','f','F', // Imaginary and float literal suffixes.
+'e', 'E':    // Float exponent.
+goto LscanReal;
+default:
+if (*p == '_')
+goto LscanOctal; // 0_
+else if (isdigit(*p))
+{
+if (*p == '8' || *p == '9')
+goto Loctal_hasDecimalDigits; // 08 or 09
+else
+goto Loctal_enter_loop; // 0[0-7]
+}
+}
+// Number 0
+assert(p[-1] == '0');
+assert(*p != '_' && !isdigit(*p));
+assert(ulong_ == 0);
+isDecimal = true;
+goto Lfinalize;
+LscanInteger:
+assert(*p != 0 && isdigit(*p));
+isDecimal = true;
+goto Lenter_loop_int;
+while (1)
+{
+if (*++p == '_')
+continue;
+if (!isdigit(*p))
+break;
+Lenter_loop_int:
+if (ulong_ < ulong.max/10 || (ulong_ == ulong.max/10 && *p <= '5'))
+{
+ulong_ *= 10;
+ulong_ += *p - '0';
+continue;
+}
+// Overflow: skip following digits.
+overflow = true;
+while (isdigit(*++p)) {}
+break;
+}
+// The number could be a float, so check overflow below.
+switch (*p)
+{
+case '.':
+if (p[1] != '.')
+goto LscanReal;
+break;
+case 'L':
+if (p[1] != 'i')
+break;
+case 'i', 'f', 'F', 'e', 'E':
+goto LscanReal;
+default:
+}
+if (overflow)
+error(t.start, MID.OverflowDecimalNumber);
+assert((isdigit(p[-1]) || p[-1] == '_') && !isdigit(*p) && *p != '_');
+goto Lfinalize;
+LscanHex:
+assert(digits == 0);
+assert(*p == 'x' || *p == 'X');
+while (1)
+{
+if (*++p == '_')
+continue;
+if (!ishexad(*p))
+break;
+++digits;
+ulong_ *= 16;
+if (*p <= '9')
+ulong_ += *p - '0';
+else if (*p <= 'F')
+ulong_ += *p - 'A' + 10;
+else
+ulong_ += *p - 'a' + 10;
+}
+assert(ishexad(p[-1]) || p[-1] == '_' || p[-1] == 'x' || p[-1] == 'X');
+assert(!ishexad(*p) && *p != '_');
+switch (*p)
+{
+case '.':
+if (p[1] == '.')
+break;
+case 'p', 'P':
+return scanHexReal(t);
+default:
+}
+if (digits == 0 || digits > 16)
+error(t.start, digits == 0 ? MID.NoDigitsInHexNumber : MID.OverflowHexNumber);
+goto Lfinalize;
+LscanBinary:
+assert(digits == 0);
+assert(*p == 'b' || *p == 'B');
+while (1)
+{
+if (*++p == '0')
+{
+++digits;
+ulong_ *= 2;
+}
+else if (*p == '1')
+{
+++digits;
+ulong_ *= 2;
+ulong_ += *p - '0';
+}
+else if (*p == '_')
+continue;
+else
+break;
+}
+if (digits == 0 || digits > 64)
+error(t.start, digits == 0 ? MID.NoDigitsInBinNumber : MID.OverflowBinaryNumber);
+assert(p[-1] == '0' || p[-1] == '1' || p[-1] == '_' || p[-1] == 'b' || p[-1] == 'B', p[-1] ~ "");
+assert( !(*p == '0' || *p == '1' || *p == '_') );
+goto Lfinalize;
+LscanOctal:
+assert(*p == '_');
+while (1)
+{
+if (*++p == '_')
+continue;
+if (!isoctal(*p))
+break;
+Loctal_enter_loop:
+if (ulong_ < ulong.max/2 || (ulong_ == ulong.max/2 && *p <= '1'))
+{
+ulong_ *= 8;
+ulong_ += *p - '0';
+continue;
+}
+// Overflow: skip following digits.
+overflow = true;
+while (isoctal(*++p)) {}
+break;
+}
+bool hasDecimalDigits;
+if (isdigit(*p))
+{
+Loctal_hasDecimalDigits:
+hasDecimalDigits = true;
+while (isdigit(*++p)) {}
+}
+// The number could be a float, so check errors below.
+switch (*p)
+{
+case '.':
+if (p[1] != '.')
+goto LscanReal;
+break;
+case 'L':
+if (p[1] != 'i')
+break;
+case 'i', 'f', 'F', 'e', 'E':
+goto LscanReal;
+default:
+}
+if (hasDecimalDigits)
+error(t.start, MID.OctalNumberHasDecimals);
+if (overflow)
+error(t.start, MID.OverflowOctalNumber);
+//     goto Lfinalize;
+Lfinalize:
+enum Suffix
+{
+None     = 0,
+Unsigned = 1,
+Long     = 2
+}
+// Scan optional suffix: L, Lu, LU, u, uL, U or UL.
+Suffix suffix;
+while (1)
+{
+switch (*p)
+{
+case 'L':
+if (suffix & Suffix.Long)
+break;
+suffix |= Suffix.Long;
+++p;
+continue;
+case 'u', 'U':
+if (suffix & Suffix.Unsigned)
+break;
+suffix |= Suffix.Unsigned;
+++p;
+continue;
+default:
+break;
+}
+break;
+}
+// Determine type of Integer.
+switch (suffix)
+{
+case Suffix.None:
+if (ulong_ & 0x8000_0000_0000_0000)
+{
+if (isDecimal)
+error(t.start, MID.OverflowDecimalSign);
+t.kind = TOK.Uint64;
+}
+else if (ulong_ & 0xFFFF_FFFF_0000_0000)
+t.kind = TOK.Int64;
+else if (ulong_ & 0x8000_0000)
+t.kind = isDecimal ? TOK.Int64 : TOK.Uint32;
+else
+t.kind = TOK.Int32;
+break;
+case Suffix.Unsigned:
+if (ulong_ & 0xFFFF_FFFF_0000_0000)
+t.kind = TOK.Uint64;
+else
+t.kind = TOK.Uint32;
+break;
+case Suffix.Long:
+if (ulong_ & 0x8000_0000_0000_0000)
+{
+if (isDecimal)
+error(t.start, MID.OverflowDecimalSign);
+t.kind = TOK.Uint64;
+}
+else
+t.kind = TOK.Int64;
+break;
+case Suffix.Unsigned | Suffix.Long:
+t.kind = TOK.Uint64;
+break;
+default:
+assert(0);
+}
+t.ulong_ = ulong_;
+t.end = p;
+return;
+LscanReal:
+scanReal(t);
+return;
+}
+/// Scans a floating point number literal.
+///
+/// $(PRE
+/// FloatLiteral := Float[fFL]?i?
+/// Float := DecFloat | HexFloat
+/// DecFloat := ([0-9][0-9_]*[.][0-9_]*DecExponent?) |
+///             [.][0-9][0-9_]*DecExponent? | [0-9][0-9_]*DecExponent
+/// DecExponent := [eE][+-]?[0-9][0-9_]*
+/// HexFloat := 0[xX](HexDigits[.]HexDigits |
+///                   [.][0-9a-zA-Z]HexDigits? |
+///                   HexDigits)HexExponent
+/// HexExponent := [pP][+-]?[0-9][0-9_]*
+/// )
+void scanReal(ref Token t)
+{
+if (*p == '.')
+{
+assert(p[1] != '.');
+// This function was called by scan() or scanNumber().
+while (isdigit(*++p) || *p == '_') {}
+}
+else
+// This function was called by scanNumber().
+assert(delegate ()
+{
+switch (*p)
+{
+case 'L':
+if (p[1] != 'i')
+return false;
+case 'i', 'f', 'F', 'e', 'E':
+return true;
+default:
+}
+return false;
+}()
+);
+// Scan exponent.
+if (*p == 'e' || *p == 'E')
+{
+++p;
+if (*p == '-' || *p == '+')
+++p;
+if (isdigit(*p))
+while (isdigit(*++p) || *p == '_') {}
+else
+error(t.start, MID.FloatExpMustStartWithDigit);
+}
+// Copy whole number and remove underscores from buffer.
+char[] buffer = t.start[0..p-t.start].dup;
+uint j;
+foreach (c; buffer)
+if (c != '_')
+buffer[j++] = c;
+buffer.length = j; // Adjust length.
+buffer ~= 0; // Terminate for C functions.
+finalizeFloat(t, buffer);
+}
+/// Scans a hexadecimal floating point number literal.
+void scanHexReal(ref Token t)
+{
+assert(*p == '.' || *p == 'p' || *p == 'P');
+MID mid;
+if (*p == '.')
+while (ishexad(*++p) || *p == '_')
+{}
+// Decimal exponent is required.
+if (*p != 'p' && *p != 'P')
+{
+mid = MID.HexFloatExponentRequired;
+goto Lerr;
+}
+// Scan exponent
+assert(*p == 'p' || *p == 'P');
+++p;
+if (*p == '+' || *p == '-')
+++p;
+if (!isdigit(*p))
+{
+mid = MID.HexFloatExpMustStartWithDigit;
+goto Lerr;
+}
+while (isdigit(*++p) || *p == '_')
+{}
+// Copy whole number and remove underscores from buffer.
+char[] buffer = t.start[0..p-t.start].dup;
+uint j;
+foreach (c; buffer)
+if (c != '_')
+buffer[j++] = c;
+buffer.length = j; // Adjust length.
+buffer ~= 0; // Terminate for C functions.
+finalizeFloat(t, buffer);
+return;
+Lerr:
+t.kind = TOK.Float32;
+t.end = p;
+error(t.start, mid);
+}
+/// Sets the value of the token.
+/// Params:
+///   t = receives the value.
+///   buffer = the well-formed float number.
+void finalizeFloat(ref Token t, string buffer)
+{
+assert(buffer[$-1] == 0);
+// Float number is well-formed. Check suffixes and do conversion.
+switch (*p)
+{
+case 'f', 'F':
+t.kind = TOK.Float32;
+t.float_ = strtof(buffer.ptr, null);
+++p;
+break;
+case 'L':
+t.kind = TOK.Float80;
+t.real_ = strtold(buffer.ptr, null);
+++p;
+break;
+default:
+t.kind = TOK.Float64;
+t.double_ = strtod(buffer.ptr, null);
+}
+if (*p == 'i')
+{
+++p;
+t.kind += 3; // Switch to imaginary counterpart.
+assert(t.kind == TOK.Imaginary32 ||
+t.kind == TOK.Imaginary64 ||
+t.kind == TOK.Imaginary80);
+}
+if (errno() == ERANGE)
+error(t.start, MID.OverflowFloatNumber);
+t.end = p;
+}
+/// Scans a special token sequence.
+///
+/// SpecialTokenSequence := "#line" Integer Filespec? EndOfLine
+void scanSpecialTokenSequence(ref Token t)
+{
+assert(*p == '#');
+t.kind = TOK.HashLine;
+t.setWhitespaceFlag();
+MID mid;
+char* errorAtColumn = p;
+char* tokenEnd = ++p;
+if (!(p[0] == 'l' && p[1] == 'i' && p[2] == 'n' && p[3] == 'e'))
+{
+mid = MID.ExpectedIdentifierSTLine;
+goto Lerr;
+}
+p += 3;
+tokenEnd = p + 1;
+// TODO: #line58"path/file" is legal. Require spaces?
+//       State.Space could be used for that purpose.
+enum State
+{ /+Space,+/ Integer, Filespec, End }
+State state = State.Integer;
+while (!isEndOfLine(++p))
+{
+if (isspace(*p))
+continue;
+if (state == State.Integer)
+{
+if (!isdigit(*p))
+{
+errorAtColumn = p;
+mid = MID.ExpectedIntegerAfterSTLine;
+goto Lerr;
+}
+t.tokLineNum = new Token;
+scan(*t.tokLineNum);
+tokenEnd = p;
+if (t.tokLineNum.kind != TOK.Int32 && t.tokLineNum.kind != TOK.Uint32)
+{
+errorAtColumn = t.tokLineNum.start;
+mid = MID.ExpectedIntegerAfterSTLine;
+goto Lerr;
+}
+--p; // Go one back because scan() advanced p past the integer.
+state = State.Filespec;
+}
+else if (state == State.Filespec && *p == '"')
+{ // MID.ExpectedFilespec is deprecated.
+// if (*p != '"')
+// {
+//   errorAtColumn = p;
+//   mid = MID.ExpectedFilespec;
+//   goto Lerr;
+// }
+t.tokLineFilespec = new Token;
+t.tokLineFilespec.start = p;
+t.tokLineFilespec.kind = TOK.Filespec;
+t.tokLineFilespec.setWhitespaceFlag();
+while (*++p != '"')
+{
+if (isEndOfLine(p))
+{
+errorAtColumn = t.tokLineFilespec.start;
+mid = MID.UnterminatedFilespec;
+t.tokLineFilespec.end = p;
+tokenEnd = p;
+goto Lerr;
+}
+isascii(*p) || decodeUTF8();
+}
+auto start = t.tokLineFilespec.start +1; // +1 skips '"'
+t.tokLineFilespec.str = start[0 .. p - start];
+t.tokLineFilespec.end = p + 1;
+tokenEnd = p + 1;
+state = State.End;
+}
+else/+ if (state == State.End)+/
+{
+mid = MID.UnterminatedSpecialToken;
+goto Lerr;
+}
+}
+assert(isEndOfLine(p));
+if (state == State.Integer)
+{
+errorAtColumn = p;
+mid = MID.ExpectedIntegerAfterSTLine;
+goto Lerr;
+}
+// Evaluate #line only when not in token string.
+if (!inTokenString && t.tokLineNum)
+{
+this.lineNum_hline = this.lineNum - t.tokLineNum.uint_ + 1;
+if (t.tokLineFilespec)
+newFilePath(t.tokLineFilespec.str);
+}
+p = tokenEnd;
+t.end = tokenEnd;
+return;
+Lerr:
+p = tokenEnd;
+t.end = tokenEnd;
+error(errorAtColumn, mid);
+}
+/// Inserts an empty dummy token (TOK.Empty) before t.
+///
+/// Useful in the parsing phase for representing a node in the AST
+/// that doesn't consume an actual token from the source text.
+Token* insertEmptyTokenBefore(Token* t)
+{
+assert(t !is null && t.prev !is null);
+assert(text.ptr <= t.start && t.start < end, Token.toString(t.kind));
+assert(text.ptr <= t.end && t.end <= end, Token.toString(t.kind));
+auto prev_t = t.prev;
+auto new_t = new Token;
+new_t.kind = TOK.Empty;
+new_t.start = new_t.end = prev_t.end;
+// Link in new token.
+prev_t.next = new_t;
+new_t.prev = prev_t;
+new_t.next = t;
+t.prev = new_t;
+return new_t;
+}
+/// Returns the error line number.
+uint errorLineNumber(uint lineNum)
+{
+return lineNum - this.lineNum_hline;
+}
+/// Forwards error parameters.
+void error(char* columnPos, char[] msg, ...)
+{
+error_(this.lineNum, this.lineBegin, columnPos, msg, _arguments, _argptr);
+}
+/// ditto
+void error(char* columnPos, MID mid, ...)
+{
+error_(this.lineNum, this.lineBegin, columnPos, GetMsg(mid), _arguments, _argptr);
+}
+/// ditto
+void error(uint lineNum, char* lineBegin, char* columnPos, MID mid, ...)
+{
+error_(lineNum, lineBegin, columnPos, GetMsg(mid), _arguments, _argptr);
+}
+/// Creates an error report and appends it to a list.
+/// Params:
+///   lineNum = the line number.
+///   lineBegin = points to the first character of the current line.
+///   columnPos = points to the character where the error is located.
+///   msg = the message.
+void error_(uint lineNum, char* lineBegin, char* columnPos, char[] msg,
+TypeInfo[] _arguments, Arg _argptr)
+{
+lineNum = this.errorLineNumber(lineNum);
+auto errorPath = this.filePaths.setPath;
+auto location = new Location(errorPath, lineNum, lineBegin, columnPos);
+msg = Format(_arguments, _argptr, msg);
+auto error = new LexerError(location, msg);
+errors ~= error;
+if (infoMan !is null)
+infoMan ~= error;
+}
+/// Scans the whole source text until EOF is encountered.
+void scanAll()
+{
+while (nextToken() != TOK.EOF)
+{}
+}
+/// Returns the first token of the source text.
+/// This can be the EOF token.
+/// Structure: HEAD -> Newline -> First Token
+Token* firstToken()
+{
+return this.head.next.next;
+}
+/// Returns true if str is a valid D identifier.
+static bool isIdentifierString(char[] str)
+{
+if (str.length == 0 || isdigit(str[0]))
+return false;
+size_t idx;
+do
+{
+auto c = dil.Unicode.decode(str, idx);
+if (c == ERROR_CHAR || !(isident(c) || !isascii(c) && isUniAlpha(c)))
+return false;
+} while (idx < str.length)
+return true;
+}
+/// Returns true if str is a keyword or a special token (__FILE__, __LINE__ etc.)
+static bool isReservedIdentifier(char[] str)
+{
+if (!isIdentifierString(str))
+return false; // str is not a valid identifier.
+auto id = IdTable.inStatic(str);
+if (id is null || id.kind == TOK.Identifier)
+return false; // str is not in the table or a normal identifier.
+return true;
+}
+/// Returns true if the current character to be decoded is
+/// a Unicode alpha character.
+///
+/// The current pointer 'p' is not advanced if false is returned.
+bool isUnicodeAlpha()
+{
+assert(!isascii(*p), "check for ASCII char before calling decodeUTF8().");
+char* p = this.p;
+dchar d = *p;
+++p; // Move to second byte.
+// Error if second byte is not a trail byte.
+if (!isTrailByte(*p))
+return false;
+// Check for overlong sequences.
+switch (d)
+{
+case 0xE0, 0xF0, 0xF8, 0xFC:
+if ((*p & d) == 0x80)
+return false;
+default:
+if ((d & 0xFE) == 0xC0) // 1100000x
+return false;
+}
+const char[] checkNextByte = "if (!isTrailByte(*++p))"
+"  return false;";
+const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;";
+// Decode
+if ((d & 0b1110_0000) == 0b1100_0000)
+{
+d &= 0b0001_1111;
+mixin(appendSixBits);
+}
+else if ((d & 0b1111_0000) == 0b1110_0000)
+{
+d &= 0b0000_1111;
+mixin(appendSixBits ~
+checkNextByte ~ appendSixBits);
+}
+else if ((d & 0b1111_1000) == 0b1111_0000)
+{
+d &= 0b0000_0111;
+mixin(appendSixBits ~
+checkNextByte ~ appendSixBits ~
+checkNextByte ~ appendSixBits);
+}
+else
+return false;
+assert(isTrailByte(*p));
+if (!isValidChar(d) || !isUniAlpha(d))
+return false;
+// Only advance pointer if this is a Unicode alpha character.
+this.p = p;
+return true;
+}
+/// Decodes the next UTF-8 sequence.
+dchar decodeUTF8()
+{
+assert(!isascii(*p), "check for ASCII char before calling decodeUTF8().");
+char* p = this.p;
+dchar d = *p;
+++p; // Move to second byte.
+// Error if second byte is not a trail byte.
+if (!isTrailByte(*p))
+goto Lerr2;
+// Check for overlong sequences.
+switch (d)
+{
+case 0xE0, // 11100000 100xxxxx
+0xF0, // 11110000 1000xxxx
+0xF8, // 11111000 10000xxx
+0xFC: // 11111100 100000xx
+if ((*p & d) == 0x80)
+goto Lerr;
+default:
+if ((d & 0xFE) == 0xC0) // 1100000x
+goto Lerr;
+}
+const char[] checkNextByte = "if (!isTrailByte(*++p))"
+"  goto Lerr2;";
+const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;";
+// Decode
+if ((d & 0b1110_0000) == 0b1100_0000)
+{ // 110xxxxx 10xxxxxx
+d &= 0b0001_1111;
+mixin(appendSixBits);
+}
+else if ((d & 0b1111_0000) == 0b1110_0000)
+{ // 1110xxxx 10xxxxxx 10xxxxxx
+d &= 0b0000_1111;
+mixin(appendSixBits ~
+checkNextByte ~ appendSixBits);
+}
+else if ((d & 0b1111_1000) == 0b1111_0000)
+{ // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+d &= 0b0000_0111;
+mixin(appendSixBits ~
+checkNextByte ~ appendSixBits ~
+checkNextByte ~ appendSixBits);
+}
+else
+// 5 and 6 byte UTF-8 sequences are not allowed yet.
+// 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+// 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+goto Lerr;
+assert(isTrailByte(*p));
+if (!isValidChar(d))
+{
+Lerr:
+// Three cases:
+// *) the UTF-8 sequence was successfully decoded but the resulting
+//    character is invalid.
+//    p points to last trail byte in the sequence.
+// *) the UTF-8 sequence is overlong.
+//    p points to second byte in the sequence.
+// *) the UTF-8 sequence has more than 4 bytes or starts with
+//    a trail byte.
+//    p points to second byte in the sequence.
+assert(isTrailByte(*p));
+// Move to next ASCII character or lead byte of a UTF-8 sequence.
+while (p < (end-1) && isTrailByte(*p))
+++p;
+--p;
+assert(!isTrailByte(p[1]));
+Lerr2:
+d = REPLACEMENT_CHAR;
+error(this.p, MID.InvalidUTF8Sequence, formatBytes(this.p, p));
+}
+this.p = p;
+return d;
+}
+/// Encodes the character d and appends it to str.
+static void encodeUTF8(ref char[] str, dchar d)
+{
+assert(!isascii(d), "check for ASCII char before calling encodeUTF8().");
+assert(isValidChar(d), "check if character is valid before calling encodeUTF8().");
+char[6] b = void;
+if (d < 0x800)
+{
+b[0] = 0xC0 | (d >> 6);
+b[1] = 0x80 | (d & 0x3F);
+str ~= b[0..2];
+}
+else if (d < 0x10000)
+{
+b[0] = 0xE0 | (d >> 12);
+b[1] = 0x80 | ((d >> 6) & 0x3F);
+b[2] = 0x80 | (d & 0x3F);
+str ~= b[0..3];
+}
+else if (d < 0x200000)
+{
+b[0] = 0xF0 | (d >> 18);
+b[1] = 0x80 | ((d >> 12) & 0x3F);
+b[2] = 0x80 | ((d >> 6) & 0x3F);
+b[3] = 0x80 | (d & 0x3F);
+str ~= b[0..4];
+}
+/+ // There are no 5 and 6 byte UTF-8 sequences yet.
+else if (d < 0x4000000)
+{
+b[0] = 0xF8 | (d >> 24);
+b[1] = 0x80 | ((d >> 18) & 0x3F);
+b[2] = 0x80 | ((d >> 12) & 0x3F);
+b[3] = 0x80 | ((d >> 6) & 0x3F);
+b[4] = 0x80 | (d & 0x3F);
+str ~= b[0..5];
+}
+else if (d < 0x80000000)
+{
+b[0] = 0xFC | (d >> 30);
+b[1] = 0x80 | ((d >> 24) & 0x3F);
+b[2] = 0x80 | ((d >> 18) & 0x3F);
+b[3] = 0x80 | ((d >> 12) & 0x3F);
+b[4] = 0x80 | ((d >> 6) & 0x3F);
+b[5] = 0x80 | (d & 0x3F);
+str ~= b[0..6];
+}
++/
+else
+assert(0);
+}
+/// Formats the bytes between start and end.
+/// Returns: e.g.: abc -> \x61\x62\x63
+static char[] formatBytes(char* start, char* end)
+{
+auto strLen = end-start;
+const formatLen = `\xXX`.length;
+char[] result = new char[strLen*formatLen]; // Reserve space.
+result.length = 0;
+foreach (c; cast(ubyte[])start[0..strLen])
+result ~= Format("\\x{:X}", c);
+return result;
+}
+/// Searches for an invalid UTF-8 sequence in str.
+/// Returns: a formatted string of the invalid sequence (e.g. \xC0\x80).
+static string findInvalidUTF8Sequence(string str)
+{
+char* p = str.ptr, end = p + str.length;
+while (p < end)
+{
+if (decode(p, end) == ERROR_CHAR)
+{
+auto begin = p;
+// Skip trail-bytes.
+while (++p < end && isTrailByte(*p))
+{}
+return Lexer.formatBytes(begin, p);
+}
+}
+assert(p == end);
+return "";
+}
+}
+/// Tests the lexer with a list of tokens.
+unittest
+{
+Stdout("Testing Lexer.\n");
+struct Pair
+{
+char[] tokenText;
+TOK kind;
+}
+static Pair[] pairs = [
+{"#!äöüß",  TOK.Shebang},       {"\n",      TOK.Newline},
+{"//çay",   TOK.Comment},       {"\n",      TOK.Newline},
+{"&",       TOK.AndBinary},
+{"/*çağ*/", TOK.Comment},       {"&&",      TOK.AndLogical},
+{"/+çak+/", TOK.Comment},       {"&=",      TOK.AndAssign},
+{">",       TOK.Greater},       {"+",       TOK.Plus},
+{">=",      TOK.GreaterEqual},  {"++",      TOK.PlusPlus},
+{">>",      TOK.RShift},        {"+=",      TOK.PlusAssign},
+{">>=",     TOK.RShiftAssign},  {"-",       TOK.Minus},
+{">>>",     TOK.URShift},       {"--",      TOK.MinusMinus},
+{">>>=",    TOK.URShiftAssign}, {"-=",      TOK.MinusAssign},
+{"<",       TOK.Less},          {"=",       TOK.Assign},
+{"<=",      TOK.LessEqual},     {"==",      TOK.Equal},
+{"<>",      TOK.LorG},          {"~",       TOK.Tilde},
+{"<>=",     TOK.LorEorG},       {"~=",      TOK.CatAssign},
+{"<<",      TOK.LShift},        {"*",       TOK.Mul},
+{"<<=",     TOK.LShiftAssign},  {"*=",      TOK.MulAssign},
+{"!",       TOK.Not},           {"/",       TOK.Div},
+{"!=",      TOK.NotEqual},      {"/=",      TOK.DivAssign},
+{"!<",      TOK.UorGorE},       {"^",       TOK.Xor},
+{"!>",      TOK.UorLorE},       {"^=",      TOK.XorAssign},
+{"!<=",     TOK.UorG},          {"%",       TOK.Mod},
+{"!>=",     TOK.UorL},          {"%=",      TOK.ModAssign},
+{"!<>",     TOK.UorE},          {"(",       TOK.LParen},
+{"!<>=",    TOK.Unordered},     {")",       TOK.RParen},
+{".",       TOK.Dot},           {"[",       TOK.LBracket},
+{"..",      TOK.Slice},         {"]",       TOK.RBracket},
+{"...",     TOK.Ellipses},      {"{",       TOK.LBrace},
+{"|",       TOK.OrBinary},      {"}",       TOK.RBrace},
+{"||",      TOK.OrLogical},     {":",       TOK.Colon},
+{"|=",      TOK.OrAssign},      {";",       TOK.Semicolon},
+{"?",       TOK.Question},      {",",       TOK.Comma},
+{"$",       TOK.Dollar},        {"cam",     TOK.Identifier},
+{"çay",     TOK.Identifier},    {".0",      TOK.Float64},
+{"0",       TOK.Int32},         {"\n",      TOK.Newline},
+{"\r",      TOK.Newline},       {"\r\n",    TOK.Newline},
+{"\u2028",  TOK.Newline},       {"\u2029",  TOK.Newline}
+];
+char[] src;
+// Join all token texts into a single string.
+foreach (i, pair; pairs)
+if (pair.kind == TOK.Comment && pair.tokenText[1] == '/' || // Line comment.
+pair.kind == TOK.Shebang)
+{
+assert(pairs[i+1].kind == TOK.Newline); // Must be followed by a newline.
+src ~= pair.tokenText;
+}
+else
+src ~= pair.tokenText ~ " ";
+auto lx = new Lexer(new SourceText("", src));
+auto token = lx.getTokens();
+uint i;
+assert(token == lx.head);
+assert(token.next.kind == TOK.Newline);
+token = token.next.next;
+do
+{
+assert(i < pairs.length);
+assert(token.srcText == pairs[i].tokenText, Format("Scanned '{0}' but expected '{1}'", token.srcText, pairs[i].tokenText));
+++i;
+token = token.next;
+} while (token.kind != TOK.EOF)
+}
+/// Tests the Lexer's peek() method.
+unittest
+{
+Stdout("Testing method Lexer.peek()\n");
+auto sourceText = new SourceText("", "unittest { }");
+auto lx = new Lexer(sourceText, null);
+auto next = lx.head;
+lx.peek(next);
+assert(next.kind == TOK.Newline);
+lx.peek(next);
+assert(next.kind == TOK.Unittest);
+lx.peek(next);
+assert(next.kind == TOK.LBrace);
+lx.peek(next);
+assert(next.kind == TOK.RBrace);
+lx.peek(next);
+assert(next.kind == TOK.EOF);
+lx = new Lexer(new SourceText("", ""));
+next = lx.head;
+lx.peek(next);
+assert(next.kind == TOK.Newline);
+lx.peek(next);
+assert(next.kind == TOK.EOF);
+}
+unittest
+{
+// Numbers unittest
+// 0L 0ULi 0_L 0_UL 0x0U 0x0p2 0_Fi 0_e2 0_F 0_i
+// 0u 0U 0uL 0UL 0L 0LU 0Lu
+// 0Li 0f 0F 0fi 0Fi 0i
+// 0b_1_LU 0b1000u
+// 0x232Lu
+}

Mercurial > projects > dil

comparison src/dil/lexer/Lexer.d @ 806:bcb74c9b895c