Mercurial > projects > ddmd

diff dmd/Lexer.d @ 0:10317f0c89a5
Initial commit
author: korDen
date: Sat, 24 Oct 2009 08:42:06 +0400
children: 7427ded8caf7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dmd/Lexer.d	Sat Oct 24 08:42:06 2009 +0400
@@ -0,0 +1,2425 @@
+module dmd.Lexer;
+
+import dmd.StringTable;
+import dmd.OutBuffer;
+import dmd.Token;
+import dmd.Loc;
+import dmd.Module;
+import dmd.Identifier;
+import dmd.TOK;
+import dmd.Keyword;
+import dmd.StringValue;
+import dmd.Global;
+import dmd.Util;
+import dmd.Id;
+import dmd.Dchar;
+import dmd.Utf;
+
+import std.stdio : writeln;
+
+import core.stdc.ctype;
+import core.stdc.stdlib;
+import core.stdc.string;
+import core.stdc.stdio;
+import core.stdc.time;
+import core.stdc.errno;
+
+enum LS = 0x2028;	// UTF line separator
+enum PS = 0x2029;	// UTF paragraph separator
+
+extern (C) extern
+{
+	__gshared char* __locale_decpoint;
+}
+
+int isUniAlpha(uint u)
+{
+	assert(false);
+}
+
+class Lexer
+{
+    static StringTable stringtable;
+    static OutBuffer stringbuffer;
+    static Token* freelist;
+
+    Loc loc;			// for error messages
+
+    ubyte* base;	// pointer to start of buffer
+    ubyte* end;		// past end of buffer
+    ubyte* p;		// current character
+    Token token;
+    Module mod;
+    int doDocComment;		// collect doc comment information
+    int anyToken;		// !=0 means seen at least one token
+    int commentToken;		// !=0 means comments are TOKcomment's
+	
+	static this()
+	{
+		stringtable = new StringTable();
+		stringbuffer = new OutBuffer();
+	}
+	
+	static ~this()
+	{
+		delete stringtable;
+	}
+
+    this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)
+	{
+		loc = Loc(mod, 1);
+		
+		memset(&token,0,token.sizeof);
+		this.base = base;
+		this.end  = base + endoffset;
+		p = base + begoffset;
+		this.mod = mod;
+		this.doDocComment = doDocComment;
+		this.anyToken = 0;
+		this.commentToken = commentToken;
+		//initKeywords();
+
+		/* If first line starts with '#!', ignore the line
+		 */
+
+		if (p[0] == '#' && p[1] =='!')
+		{
+			p += 2;
+			while (1)
+			{  
+				ubyte c = *p;
+				switch (c)
+				{
+				case '\n':
+					p++;
+					break;
+
+				case '\r':
+					p++;
+					if (*p == '\n')
+					p++;
+					break;
+
+				case 0:
+				case 0x1A:
+					break;
+
+				default:
+					if (c & 0x80)
+					{
+						uint u = decodeUTF();
+						if (u == PS || u == LS)
+							break;
+					}
+					p++;
+					continue;
+				}
+				break;
+			}
+			loc.linnum = 2;
+		}
+	}
+
+version (DMDV2) {
+	static Keyword[] keywords =
+	[
+	//    {	"",		TOK	},
+
+		{	"this",		TOK.TOKthis		},
+		{	"super",	TOK.TOKsuper	},
+		{	"assert",	TOK.TOKassert	},
+		{	"null",		TOK.TOKnull		},
+		{	"true",		TOK.TOKtrue		},
+		{	"false",	TOK.TOKfalse	},
+		{	"cast",		TOK.TOKcast		},
+		{	"new",		TOK.TOKnew		},
+		{	"delete",	TOK.TOKdelete	},
+		{	"throw",	TOK.TOKthrow	},
+		{	"module",	TOK.TOKmodule	},
+		{	"pragma",	TOK.TOKpragma	},
+		{	"typeof",	TOK.TOKtypeof	},
+		{	"typeid",	TOK.TOKtypeid	},
+
+		{	"template",	TOK.TOKtemplate	},
+
+		{	"void",		TOK.TOKvoid		},
+		{	"byte",		TOK.TOKint8		},
+		{	"ubyte",	TOK.TOKuns8		},
+		{	"short",	TOK.TOKint16	},
+		{	"ushort",	TOK.TOKuns16	},
+		{	"int",		TOK.TOKint32	},
+		{	"uint",		TOK.TOKuns32	},
+		{	"long",		TOK.TOKint64	},
+		{	"ulong",	TOK.TOKuns64	},
+		{	"cent",		TOK.TOKcent,	},
+		{	"ucent",	TOK.TOKucent,	},
+		{	"float",	TOK.TOKfloat32	},
+		{	"double",	TOK.TOKfloat64	},
+		{	"real",		TOK.TOKfloat80	},
+
+		{	"bool",		TOK.TOKbool		},
+		{	"char",		TOK.TOKchar		},
+		{	"wchar",	TOK.TOKwchar	},
+		{	"dchar",	TOK.TOKdchar	},
+
+		{	"ifloat",	TOK.TOKimaginary32	},
+		{	"idouble",	TOK.TOKimaginary64	},
+		{	"ireal",	TOK.TOKimaginary80	},
+
+		{	"cfloat",	TOK.TOKcomplex32	},
+		{	"cdouble",	TOK.TOKcomplex64	},
+		{	"creal",	TOK.TOKcomplex80	},
+
+		{	"delegate",	TOK.TOKdelegate	},
+		{	"function",	TOK.TOKfunction	},
+
+		{	"is",		TOK.TOKis		},
+		{	"if",		TOK.TOKif		},
+		{	"else",		TOK.TOKelse		},
+		{	"while",	TOK.TOKwhile	},
+		{	"for",		TOK.TOKfor		},
+		{	"do",		TOK.TOKdo		},
+		{	"switch",	TOK.TOKswitch	},
+		{	"case",		TOK.TOKcase		},
+		{	"default",	TOK.TOKdefault	},
+		{	"break",	TOK.TOKbreak	},
+		{	"continue",	TOK.TOKcontinue	},
+		{	"synchronized",	TOK.TOKsynchronized	},
+		{	"return",	TOK.TOKreturn	},
+		{	"goto",		TOK.TOKgoto		},
+		{	"try",		TOK.TOKtry		},
+		{	"catch",	TOK.TOKcatch	},
+		{	"finally",	TOK.TOKfinally	},
+		{	"with",		TOK.TOKwith		},
+		{	"asm",		TOK.TOKasm		},
+		{	"foreach",	TOK.TOKforeach	},
+		{	"foreach_reverse",	TOK.TOKforeach_reverse	},
+		{	"scope",	TOK.TOKscope	},
+
+		{	"struct",	TOK.TOKstruct	},
+		{	"class",	TOK.TOKclass	},
+		{	"interface",	TOK.TOKinterface	},
+		{	"union",	TOK.TOKunion	},
+		{	"enum",		TOK.TOKenum		},
+		{	"import",	TOK.TOKimport	},
+		{	"mixin",	TOK.TOKmixin	},
+		{	"static",	TOK.TOKstatic	},
+		{	"final",	TOK.TOKfinal	},
+		{	"const",	TOK.TOKconst	},
+		{	"typedef",	TOK.TOKtypedef	},
+		{	"alias",	TOK.TOKalias	},
+		{	"override",	TOK.TOKoverride	},
+		{	"abstract",	TOK.TOKabstract	},
+		{	"volatile",	TOK.TOKvolatile	},
+		{	"debug",	TOK.TOKdebug	},
+		{	"deprecated",	TOK.TOKdeprecated	},
+		{	"in",		TOK.TOKin		},
+		{	"out",		TOK.TOKout		},
+		{	"inout",	TOK.TOKinout	},
+		{	"lazy",		TOK.TOKlazy		},
+		{	"auto",		TOK.TOKauto		},
+
+		{	"align",	TOK.TOKalign	},
+		{	"extern",	TOK.TOKextern	},
+		{	"private",	TOK.TOKprivate	},
+		{	"package",	TOK.TOKpackage	},
+		{	"protected",	TOK.TOKprotected	},
+		{	"public",	TOK.TOKpublic	},
+		{	"export",	TOK.TOKexport	},
+
+		{	"body",		TOK.TOKbody		},
+		{	"invariant",	TOK.TOKinvariant	},
+		{	"unittest",	TOK.TOKunittest	},
+		{	"version",	TOK.TOKversion	},
+		//{	"manifest",	TOK.TOKmanifest	},
+
+		// Added after 1.0
+		{	"ref",		TOK.TOKref		},
+		{	"macro",	TOK.TOKmacro	},
+		{	"pure",		TOK.TOKpure		},
+		{	"nothrow",	TOK.TOKnothrow	},
+		{	"__thread",	TOK.TOKtls		},
+		{	"__gshared",	TOK.TOKgshared	},
+		{	"__traits",	TOK.TOKtraits	},
+		{	"__overloadset", TOK.TOKoverloadset	},
+		{	"__FILE__",	TOK.TOKfile		},
+		{	"__LINE__",	TOK.TOKline		},
+		{	"shared",	TOK.TOKshared	},
+		{	"immutable",	TOK.TOKimmutable	},
+	];
+} else {
+	static Keyword[] keywords =
+	[
+	//    {	"",		TOK	},
+
+		{	"this",		TOK.TOKthis		},
+		{	"super",	TOK.TOKsuper	},
+		{	"assert",	TOK.TOKassert	},
+		{	"null",		TOK.TOKnull		},
+		{	"true",		TOK.TOKtrue		},
+		{	"false",	TOK.TOKfalse	},
+		{	"cast",		TOK.TOKcast		},
+		{	"new",		TOK.TOKnew		},
+		{	"delete",	TOK.TOKdelete	},
+		{	"throw",	TOK.TOKthrow	},
+		{	"module",	TOK.TOKmodule	},
+		{	"pragma",	TOK.TOKpragma	},
+		{	"typeof",	TOK.TOKtypeof	},
+		{	"typeid",	TOK.TOKtypeid	},
+
+		{	"template",	TOK.TOKtemplate	},
+
+		{	"void",		TOK.TOKvoid		},
+		{	"byte",		TOK.TOKint8		},
+		{	"ubyte",	TOK.TOKuns8		},
+		{	"short",	TOK.TOKint16	},
+		{	"ushort",	TOK.TOKuns16	},
+		{	"int",		TOK.TOKint32	},
+		{	"uint",		TOK.TOKuns32	},
+		{	"long",		TOK.TOKint64	},
+		{	"ulong",	TOK.TOKuns64	},
+		{	"cent",		TOK.TOKcent,	},
+		{	"ucent",	TOK.TOKucent,	},
+		{	"float",	TOK.TOKfloat32	},
+		{	"double",	TOK.TOKfloat64	},
+		{	"real",		TOK.TOKfloat80	},
+
+		{	"bool",		TOK.TOKbool		},
+		{	"char",		TOK.TOKchar		},
+		{	"wchar",	TOK.TOKwchar	},
+		{	"dchar",	TOK.TOKdchar	},
+
+		{	"ifloat",	TOK.TOKimaginary32	},
+		{	"idouble",	TOK.TOKimaginary64	},
+		{	"ireal",	TOK.TOKimaginary80	},
+
+		{	"cfloat",	TOK.TOKcomplex32	},
+		{	"cdouble",	TOK.TOKcomplex64	},
+		{	"creal",	TOK.TOKcomplex80	},
+
+		{	"delegate",	TOK.TOKdelegate	},
+		{	"function",	TOK.TOKfunction	},
+
+		{	"is",		TOK.TOKis		},
+		{	"if",		TOK.TOKif		},
+		{	"else",		TOK.TOKelse		},
+		{	"while",	TOK.TOKwhile	},
+		{	"for",		TOK.TOKfor		},
+		{	"do",		TOK.TOKdo		},
+		{	"switch",	TOK.TOKswitch	},
+		{	"case",		TOK.TOKcase		},
+		{	"default",	TOK.TOKdefault	},
+		{	"break",	TOK.TOKbreak	},
+		{	"continue",	TOK.TOKcontinue	},
+		{	"synchronized",	TOK.TOKsynchronized	},
+		{	"return",	TOK.TOKreturn	},
+		{	"goto",		TOK.TOKgoto		},
+		{	"try",		TOK.TOKtry		},
+		{	"catch",	TOK.TOKcatch	},
+		{	"finally",	TOK.TOKfinally	},
+		{	"with",		TOK.TOKwith		},
+		{	"asm",		TOK.TOKasm		},
+		{	"foreach",	TOK.TOKforeach	},
+		{	"foreach_reverse",	TOK.TOKforeach_reverse	},
+		{	"scope",	TOK.TOKscope	},
+
+		{	"struct",	TOK.TOKstruct	},
+		{	"class",	TOK.TOKclass	},
+		{	"interface",	TOK.TOKinterface	},
+		{	"union",	TOK.TOKunion	},
+		{	"enum",		TOK.TOKenum		},
+		{	"import",	TOK.TOKimport	},
+		{	"mixin",	TOK.TOKmixin	},
+		{	"static",	TOK.TOKstatic	},
+		{	"final",	TOK.TOKfinal	},
+		{	"const",	TOK.TOKconst	},
+		{	"typedef",	TOK.TOKtypedef	},
+		{	"alias",	TOK.TOKalias	},
+		{	"override",	TOK.TOKoverride	},
+		{	"abstract",	TOK.TOKabstract	},
+		{	"volatile",	TOK.TOKvolatile	},
+		{	"debug",	TOK.TOKdebug	},
+		{	"deprecated",	TOK.TOKdeprecated	},
+		{	"in",		TOK.TOKin		},
+		{	"out",		TOK.TOKout		},
+		{	"inout",	TOK.TOKinout	},
+		{	"lazy",		TOK.TOKlazy		},
+		{	"auto",		TOK.TOKauto		},
+
+		{	"align",	TOK.TOKalign	},
+		{	"extern",	TOK.TOKextern	},
+		{	"private",	TOK.TOKprivate	},
+		{	"package",	TOK.TOKpackage	},
+		{	"protected",	TOK.TOKprotected	},
+		{	"public",	TOK.TOKpublic	},
+		{	"export",	TOK.TOKexport	},
+
+		{	"body",		TOK.TOKbody		},
+		{	"invariant",	TOK.TOKinvariant	},
+		{	"unittest",	TOK.TOKunittest	},
+		{	"version",	TOK.TOKversion	},
+		//{	"manifest",	TOK.TOKmanifest	},
+
+		// Added after 1.0
+		{	"ref",		TOK.TOKref		},
+		{	"macro",	TOK.TOKmacro	},
+	];
+}
+
+	static ubyte cmtable[256];
+	enum CMoctal =	0x1;
+	enum  CMhex =	0x2;
+	enum  CMidchar =	0x4;
+	
+	ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; }
+	ubyte ishex   (ubyte c) { return cmtable[c] & CMhex; }
+	ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; }
+
+	static void cmtable_init()
+	{
+		for (uint c = 0; c < cmtable.length; c++)
+		{
+			if ('0' <= c && c <= '7')
+				cmtable[c] |= CMoctal;
+			if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
+				cmtable[c] |= CMhex;
+			if (isalnum(c) || c == '_')
+				cmtable[c] |= CMidchar;
+		}
+	}
+
+    static void initKeywords()
+	{
+		uint nkeywords = keywords.length;
+
+		if (global.params.Dversion == 1)
+			nkeywords -= 2;
+
+		cmtable_init();
+		
+		for (uint u = 0; u < nkeywords; u++)
+		{
+			//printf("keyword[%d] = '%s'\n",u, keywords[u].name);
+			string s = keywords[u].name;
+			TOK v = keywords[u].value;
+			StringValue* sv = stringtable.insert(s);
+			sv.ptrvalue = cast(void*) new Identifier(sv.lstring.string_, v);
+
+			//printf("tochars[%d] = '%s'\n",v, s);
+			Token.tochars[v] = s;
+		}
+
+		Token.tochars[TOK.TOKeof]		= "EOF";
+		Token.tochars[TOK.TOKlcurly]		= "{";
+		Token.tochars[TOK.TOKrcurly]		= "}";
+		Token.tochars[TOK.TOKlparen]		= "(";
+		Token.tochars[TOK.TOKrparen]		= ")";
+		Token.tochars[TOK.TOKlbracket]		= "[";
+		Token.tochars[TOK.TOKrbracket]		= "]";
+		Token.tochars[TOK.TOKsemicolon]	= ";";
+		Token.tochars[TOK.TOKcolon]		= ":";
+		Token.tochars[TOK.TOKcomma]		= ",";
+		Token.tochars[TOK.TOKdot]		= ".";
+		Token.tochars[TOK.TOKxor]		= "^";
+		Token.tochars[TOK.TOKxorass]		= "^=";
+		Token.tochars[TOK.TOKassign]		= "=";
+		Token.tochars[TOK.TOKconstruct]	= "=";
+version (DMDV2) {
+		Token.tochars[TOK.TOKblit]		= "=";
+}
+		Token.tochars[TOK.TOKlt]		= "<";
+		Token.tochars[TOK.TOKgt]		= ">";
+		Token.tochars[TOK.TOKle]		= "<=";
+		Token.tochars[TOK.TOKge]		= ">=";
+		Token.tochars[TOK.TOKequal]		= "==";
+		Token.tochars[TOK.TOKnotequal]		= "!=";
+		Token.tochars[TOK.TOKnotidentity]	= "!is";
+		Token.tochars[TOK.TOKtobool]		= "!!";
+
+		Token.tochars[TOK.TOKunord]		= "!<>=";
+		Token.tochars[TOK.TOKue]		= "!<>";
+		Token.tochars[TOK.TOKlg]		= "<>";
+		Token.tochars[TOK.TOKleg]		= "<>=";
+		Token.tochars[TOK.TOKule]		= "!>";
+		Token.tochars[TOK.TOKul]		= "!>=";
+		Token.tochars[TOK.TOKuge]		= "!<";
+		Token.tochars[TOK.TOKug]		= "!<=";
+
+		Token.tochars[TOK.TOKnot]		= "!";
+		Token.tochars[TOK.TOKtobool]		= "!!";
+		Token.tochars[TOK.TOKshl]		= "<<";
+		Token.tochars[TOK.TOKshr]		= ">>";
+		Token.tochars[TOK.TOKushr]		= ">>>";
+		Token.tochars[TOK.TOKadd]		= "+";
+		Token.tochars[TOK.TOKmin]		= "-";
+		Token.tochars[TOK.TOKmul]		= "*";
+		Token.tochars[TOK.TOKdiv]		= "/";
+		Token.tochars[TOK.TOKmod]		= "%";
+		Token.tochars[TOK.TOKslice]		= "..";
+		Token.tochars[TOK.TOKdotdotdot]	= "...";
+		Token.tochars[TOK.TOKand]		= "&";
+		Token.tochars[TOK.TOKandand]		= "&&";
+		Token.tochars[TOK.TOKor]		= "|";
+		Token.tochars[TOK.TOKoror]		= "||";
+		Token.tochars[TOK.TOKarray]		= "[]";
+		Token.tochars[TOK.TOKindex]		= "[i]";
+		Token.tochars[TOK.TOKaddress]		= "&";
+		Token.tochars[TOK.TOKstar]		= "*";
+		Token.tochars[TOK.TOKtilde]		= "~";
+		Token.tochars[TOK.TOKdollar]		= "$";
+		Token.tochars[TOK.TOKcast]		= "cast";
+		Token.tochars[TOK.TOKplusplus]		= "++";
+		Token.tochars[TOK.TOKminusminus]	= "--";
+		Token.tochars[TOK.TOKtype]		= "type";
+		Token.tochars[TOK.TOKquestion]		= "?";
+		Token.tochars[TOK.TOKneg]		= "-";
+		Token.tochars[TOK.TOKuadd]		= "+";
+		Token.tochars[TOK.TOKvar]		= "var";
+		Token.tochars[TOK.TOKaddass]		= "+=";
+		Token.tochars[TOK.TOKminass]		= "-=";
+		Token.tochars[TOK.TOKmulass]		= "*=";
+		Token.tochars[TOK.TOKdivass]		= "/=";
+		Token.tochars[TOK.TOKmodass]		= "%=";
+		Token.tochars[TOK.TOKshlass]		= "<<=";
+		Token.tochars[TOK.TOKshrass]		= ">>=";
+		Token.tochars[TOK.TOKushrass]		= ">>>=";
+		Token.tochars[TOK.TOKandass]		= "&=";
+		Token.tochars[TOK.TOKorass]		= "|=";
+		Token.tochars[TOK.TOKcatass]		= "~=";
+		Token.tochars[TOK.TOKcat]		= "~";
+		Token.tochars[TOK.TOKcall]		= "call";
+		Token.tochars[TOK.TOKidentity]		= "is";
+		Token.tochars[TOK.TOKnotidentity]	= "!is";
+
+		Token.tochars[TOK.TOKorass]		= "|=";
+		Token.tochars[TOK.TOKidentifier]	= "identifier";
+		Token.tochars[TOK.TOKat]		= "@";
+
+		 // For debugging
+		Token.tochars[TOK.TOKdotexp]		= "dotexp";
+		Token.tochars[TOK.TOKdotti]		= "dotti";
+		Token.tochars[TOK.TOKdotvar]		= "dotvar";
+		Token.tochars[TOK.TOKdottype]		= "dottype";
+		Token.tochars[TOK.TOKsymoff]		= "symoff";
+		Token.tochars[TOK.TOKarraylength]	= "arraylength";
+		Token.tochars[TOK.TOKarrayliteral]	= "arrayliteral";
+		Token.tochars[TOK.TOKassocarrayliteral] = "assocarrayliteral";
+		Token.tochars[TOK.TOKstructliteral]	= "structliteral";
+		Token.tochars[TOK.TOKstring]		= "string";
+		Token.tochars[TOK.TOKdsymbol]		= "symbol";
+		Token.tochars[TOK.TOKtuple]		= "tuple";
+		Token.tochars[TOK.TOKdeclaration]	= "declaration";
+		Token.tochars[TOK.TOKdottd]		= "dottd";
+		Token.tochars[TOK.TOKon_scope_exit]	= "scope(exit)";
+		Token.tochars[TOK.TOKon_scope_success]	= "scope(success)";
+		Token.tochars[TOK.TOKon_scope_failure]	= "scope(failure)";
+	}
+
+    static Identifier idPool(string s)
+	{
+		StringValue* sv = stringtable.update(s);
+		Identifier id = cast(Identifier) sv.ptrvalue;
+		if (id is null)
+		{
+			id = new Identifier(sv.lstring.string_, TOK.TOKidentifier);
+			sv.ptrvalue = cast(void*)id;
+		}
+
+		return id;
+	}
+
+    static Identifier uniqueId(string s)
+	{
+		static int num;
+		return uniqueId(s, ++num);
+	}
+
+	/*********************************************
+	 * Create a unique identifier using the prefix s.
+	 */
+    static Identifier uniqueId(string s, int num)
+	{
+		char buffer[32];
+		size_t slen = s.length;
+
+		assert(slen + num.sizeof * 3 + 1 <= buffer.sizeof);
+		int len = sprintf(buffer.ptr, "%.*s%d", s, num);
+
+		return idPool(buffer[0..len].idup);
+	}
+
+    TOK nextToken()
+	{
+		Token *t;
+
+		if (token.next)
+		{
+			t = token.next;
+			memcpy(&token, t, Token.sizeof);
+			t.next = freelist;
+			freelist = t;
+		}
+		else
+		{
+			scan(&token);
+		}
+
+		//token.print();
+		return token.value;
+	}
+
+	/***********************
+	 * Look ahead at next token's value.
+	 */
+    TOK peekNext()
+	{
+		return peek(&token).value;
+	}
+
+    TOK peekNext2()
+	{
+		assert(false);
+	}
+
+    void scan(Token* t)
+	{
+		uint lastLine = loc.linnum;
+		uint linnum;
+
+		t.blockComment = null;
+		t.lineComment = null;
+		while (1)
+		{
+			t.ptr = p;
+			//printf("p = %p, *p = '%c'\n",p,*p);
+			switch (*p)
+			{
+				case 0:
+				case 0x1A:
+				t.value = TOK.TOKeof;			// end of file
+				return;
+
+				case ' ':
+				case '\t':
+				case '\v':
+				case '\f':
+				p++;
+				continue;			// skip white space
+
+				case '\r':
+				p++;
+				if (*p != '\n')			// if CR stands by itself
+					loc.linnum++;
+				continue;			// skip white space
+
+				case '\n':
+				p++;
+				loc.linnum++;
+				continue;			// skip white space
+
+				case '0':  	case '1':   case '2':   case '3':   case '4':
+				case '5':  	case '6':   case '7':   case '8':   case '9':
+				t.value = number(t);
+				return;
+
+version (CSTRINGS) {
+				case '\'':
+				t.value = charConstant(t, 0);
+				return;
+
+				case '"':
+				t.value = stringConstant(t,0);
+				return;
+
+				case 'l':
+				case 'L':
+				if (p[1] == '\'')
+				{
+					p++;
+					t.value = charConstant(t, 1);
+					return;
+				}
+				else if (p[1] == '"')
+				{
+					p++;
+					t.value = stringConstant(t, 1);
+					return;
+				}
+} else {
+				case '\'':
+				t.value = charConstant(t,0);
+				return;
+
+				case 'r':
+				if (p[1] != '"')
+					goto case_ident;
+				p++;
+				case '`':
+				t.value = wysiwygStringConstant(t, *p);
+				return;
+
+				case 'x':
+				if (p[1] != '"')
+					goto case_ident;
+				p++;
+				t.value = hexStringConstant(t);
+				return;
+
+version (DMDV2) {
+				case 'q':
+				if (p[1] == '"')
+				{
+					p++;
+					t.value = delimitedStringConstant(t);
+					return;
+				}
+				else if (p[1] == '{')
+				{
+					p++;
+					t.value = tokenStringConstant(t);
+					return;
+				}
+				else
+					goto case_ident;
+}
+
+				case '"':
+				t.value = escapeStringConstant(t,0);
+				return;
+version (TEXTUAL_ASSEMBLY_OUT) {
+} else {
+				case '\\':			// escaped string literal
+				{	uint c;
+				ubyte* pstart = p;
+
+				stringbuffer.reset();
+				do
+				{
+					p++;
+					switch (*p)
+					{
+					case 'u':
+					case 'U':
+					case '&':
+						c = escapeSequence();
+						stringbuffer.writeUTF8(c);
+						break;
+
+					default:
+						c = escapeSequence();
+						stringbuffer.writeByte(c);
+						break;
+					}
+				} while (*p == '\\');
+				t.len = stringbuffer.offset;
+				stringbuffer.writeByte(0);
+				char* cc = cast(char*)malloc(stringbuffer.offset);
+				memcpy(cc, stringbuffer.data, stringbuffer.offset);
+				t.ustring = cc;
+				t.postfix = 0;
+				t.value = TOK.TOKstring;
+				if (!global.params.useDeprecated)
+					error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", p - pstart, pstart, p - pstart, pstart);
+				return;
+				}
+}
+				case 'l':
+				case 'L':
+}
+				case 'a':  	case 'b':   case 'c':   case 'd':   case 'e':
+				case 'f':  	case 'g':   case 'h':   case 'i':   case 'j':
+				case 'k':  	            case 'm':   case 'n':   case 'o':
+version (DMDV2) {
+				case 'p':  	/*case 'q': case 'r':*/ case 's':   case 't':
+} else {
+				case 'p':  	case 'q': /*case 'r':*/ case 's':   case 't':
+}
+				case 'u':  	case 'v':   case 'w': /*case 'x':*/ case 'y':
+				case 'z':
+				case 'A':  	case 'B':   case 'C':   case 'D':   case 'E':
+				case 'F':  	case 'G':   case 'H':   case 'I':   case 'J':
+				case 'K':  	            case 'M':   case 'N':   case 'O':
+				case 'P':  	case 'Q':   case 'R':   case 'S':   case 'T':
+				case 'U':  	case 'V':   case 'W':   case 'X':   case 'Y':
+				case 'Z':
+				case '_':
+				case_ident:
+				{   ubyte c;
+				StringValue *sv;
+				Identifier id;
+
+				do
+				{
+					c = *++p;
+				} while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
+				sv = stringtable.update((cast(immutable(char)*)t.ptr)[0.. p - t.ptr]);	///
+				id = cast(Identifier) sv.ptrvalue;
+				if (id is null)
+				{   id = new Identifier(sv.lstring.string_, TOK.TOKidentifier);
+					sv.ptrvalue = cast(void*)id;
+				}
+				t.ident = id;
+				t.value = cast(TOK) id.value;
+				anyToken = 1;
+				if (*t.ptr == '_')	// if special identifier token
+				{
+					static char date[11+1];
+					static char time[8+1];
+					static char timestamp[24+1];
+
+					if (!date[0])	// lazy evaluation
+					{   time_t tm;
+					char *p;
+
+					.time(&tm);
+					p = ctime(&tm);
+					assert(p);
+					sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);
+					sprintf(time.ptr, "%.8s", p + 11);
+					sprintf(timestamp.ptr, "%.24s", p);
+					}
+
+///version (DMDV1) {
+///					if (mod && id == Id.FILE)
+///					{
+///					t.ustring = cast(ubyte*)(loc.filename ? loc.filename : mod.ident.toChars());
+///					goto Lstr;
+///					}
+///					else if (mod && id == Id.LINE)
+///					{
+///					t.value = TOK.TOKint64v;
+///					t.uns64value = loc.linnum;
+///					}
+///					else
+///}
+					if (id == Id.DATE)
+					{
+					t.ustring = date.ptr;
+					goto Lstr;
+					}
+					else if (id == Id.TIME)
+					{
+					t.ustring = time.ptr;
+					goto Lstr;
+					}
+					else if (id == Id.VENDOR)
+					{
+					t.ustring = "Digital Mars D".ptr;
+					goto Lstr;
+					}
+					else if (id == Id.TIMESTAMP)
+					{
+					t.ustring = timestamp.ptr;
+					 Lstr:
+					t.value = TOK.TOKstring;
+					 Llen:
+					t.postfix = 0;
+					t.len = strlen(cast(char*)t.ustring);
+					}
+					else if (id == Id.VERSIONX)
+					{
+						uint major = 0;
+						uint minor = 0;
+
+						foreach (char cc; global.version_[1..$])
+						{
+							if (isdigit(cc))
+								minor = minor * 10 + cc - '0';
+							else if (cc == '.')
+							{
+								major = minor;
+								minor = 0;
+							}
+							else
+								break;
+						}
+						t.value = TOK.TOKint64v;
+						t.uns64value = major * 1000 + minor;
+					}
+///version (DMDV2) {
+					else if (id == Id.EOFX)
+					{
+					t.value = TOK.TOKeof;
+					// Advance scanner to end of file
+					while (!(*p == 0 || *p == 0x1A))
+						p++;
+					}
+///}
+				}
+				//printf("t.value = %d\n",t.value);
+				return;
+				}
+
+				case '/':
+				p++;
+				switch (*p)
+				{
+					case '=':
+						p++;
+						t.value = TOK.TOKdivass;
+						return;
+
+					case '*':
+						p++;
+						linnum = loc.linnum;
+						while (1)
+						{
+							while (1)
+							{
+								ubyte c = *p;
+								switch (c)
+								{
+									case '/':
+									break;
+
+									case '\n':
+									loc.linnum++;
+									p++;
+									continue;
+
+									case '\r':
+									p++;
+									if (*p != '\n')
+										loc.linnum++;
+									continue;
+
+									case 0:
+									case 0x1A:
+									error("unterminated /* */ comment");
+									p = end;
+									t.value = TOK.TOKeof;
+									return;
+
+									default:
+									if (c & 0x80)
+									{   uint u = decodeUTF();
+										if (u == PS || u == LS)
+										loc.linnum++;
+									}
+									p++;
+									continue;
+								}
+								break;
+							}
+							p++;
+							if (p[-2] == '*' && p - 3 != t.ptr)
+							break;
+						}
+						if (commentToken)
+						{
+							t.value = TOK.TOKcomment;
+							return;
+						}
+						else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
+						{   // if /** but not /**/
+							getDocComment(t, lastLine == linnum);
+						}
+						continue;
+
+					case '/':		// do // style comments
+						linnum = loc.linnum;
+						while (1)
+						{   ubyte c = *++p;
+							switch (c)
+							{
+							case '\n':
+								break;
+
+							case '\r':
+								if (p[1] == '\n')
+								p++;
+								break;
+
+							case 0:
+							case 0x1A:
+								if (commentToken)
+								{
+								p = end;
+								t.value = TOK.TOKcomment;
+								return;
+								}
+								if (doDocComment && t.ptr[2] == '/')
+								getDocComment(t, lastLine == linnum);
+								p = end;
+								t.value = TOK.TOKeof;
+								return;
+
+							default:
+								if (c & 0x80)
+								{   uint u = decodeUTF();
+								if (u == PS || u == LS)
+									break;
+								}
+								continue;
+							}
+							break;
+						}
+
+						if (commentToken)
+						{
+							p++;
+							loc.linnum++;
+							t.value = TOK.TOKcomment;
+							return;
+						}
+						if (doDocComment && t.ptr[2] == '/')
+							getDocComment(t, lastLine == linnum);
+
+						p++;
+						loc.linnum++;
+						continue;
+
+					case '+':
+					{
+						int nest;
+
+						linnum = loc.linnum;
+						p++;
+						nest = 1;
+						while (1)
+						{   ubyte c = *p;
+							switch (c)
+							{
+							case '/':
+								p++;
+								if (*p == '+')
+								{
+								p++;
+								nest++;
+								}
+								continue;
+
+							case '+':
+								p++;
+								if (*p == '/')
+								{
+								p++;
+								if (--nest == 0)
+									break;
+								}
+								continue;
+
+							case '\r':
+								p++;
+								if (*p != '\n')
+								loc.linnum++;
+								continue;
+
+							case '\n':
+								loc.linnum++;
+								p++;
+								continue;
+
+							case 0:
+							case 0x1A:
+								error("unterminated /+ +/ comment");
+								p = end;
+								t.value = TOK.TOKeof;
+								return;
+
+							default:
+								if (c & 0x80)
+								{   uint u = decodeUTF();
+								if (u == PS || u == LS)
+									loc.linnum++;
+								}
+								p++;
+								continue;
+							}
+							break;
+						}
+						if (commentToken)
+						{
+							t.value = TOK.TOKcomment;
+							return;
+						}
+						if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
+						{   // if /++ but not /++/
+							getDocComment(t, lastLine == linnum);
+						}
+						continue;
+					}
+					
+					default:
+						break;	///
+				}
+				t.value = TOK.TOKdiv;
+				return;
+
+				case '.':
+				p++;
+				if (isdigit(*p))
+				{   /* Note that we don't allow ._1 and ._ as being
+					 * valid floating point numbers.
+					 */
+					p--;
+					t.value = inreal(t);
+				}
+				else if (p[0] == '.')
+				{
+					if (p[1] == '.')
+					{   p += 2;
+					t.value = TOK.TOKdotdotdot;
+					}
+					else
+					{   p++;
+					t.value = TOK.TOKslice;
+					}
+				}
+				else
+					t.value = TOK.TOKdot;
+				return;
+
+				case '&':
+				p++;
+				if (*p == '=')
+				{   p++;
+					t.value = TOK.TOKandass;
+				}
+				else if (*p == '&')
+				{   p++;
+					t.value = TOK.TOKandand;
+				}
+				else
+					t.value = TOK.TOKand;
+				return;
+
+				case '|':
+				p++;
+				if (*p == '=')
+				{   p++;
+					t.value = TOK.TOKorass;
+				}
+				else if (*p == '|')
+				{   p++;
+					t.value = TOK.TOKoror;
+				}
+				else
+					t.value = TOK.TOKor;
+				return;
+
+				case '-':
+				p++;
+				if (*p == '=')
+				{   p++;
+					t.value = TOK.TOKminass;
+				}
+///		#if 0
+///				else if (*p == '>')
+///				{   p++;
+///					t.value = TOK.TOKarrow;
+///				}
+///		#endif
+				else if (*p == '-')
+				{   p++;
+					t.value = TOK.TOKminusminus;
+				}
+				else
+					t.value = TOK.TOKmin;
+				return;
+
+				case '+':
+				p++;
+				if (*p == '=')
+				{   p++;
+					t.value = TOK.TOKaddass;
+				}
+				else if (*p == '+')
+				{   p++;
+					t.value = TOK.TOKplusplus;
+				}
+				else
+					t.value = TOK.TOKadd;
+				return;
+
+				case '<':
+				p++;
+				if (*p == '=')
+				{   p++;
+					t.value = TOK.TOKle;			// <=
+				}
+				else if (*p == '<')
+				{   p++;
+					if (*p == '=')
+					{   p++;
+					t.value = TOK.TOKshlass;		// <<=
+					}
+					else
+					t.value = TOK.TOKshl;		// <<
+				}
+				else if (*p == '>')
+				{   p++;
+					if (*p == '=')
+					{   p++;
+					t.value = TOK.TOKleg;		// <>=
+					}
+					else
+					t.value = TOK.TOKlg;		// <>
+				}
+				else
+					t.value = TOK.TOKlt;			// <
+				return;
+
+				case '>':
+				p++;
+				if (*p == '=')
+				{   p++;
+					t.value = TOK.TOKge;			// >=
+				}
+				else if (*p == '>')
+				{   p++;
+					if (*p == '=')
+					{   p++;
+					t.value = TOK.TOKshrass;		// >>=
+					}
+					else if (*p == '>')
+					{	p++;
+					if (*p == '=')
+					{   p++;
+						t.value = TOK.TOKushrass;	// >>>=
+					}
+					else
+						t.value = TOK.TOKushr;		// >>>
+					}
+					else
+					t.value = TOK.TOKshr;		// >>
+				}
+				else
+					t.value = TOK.TOKgt;			// >
+				return;
+
+				case '!':
+				p++;
+				if (*p == '=')
+				{   p++;
+					if (*p == '=' && global.params.Dversion == 1)
+					{	p++;
+					t.value = TOK.TOKnotidentity;	// !==
+					}
+					else
+					t.value = TOK.TOKnotequal;		// !=
+				}
+				else if (*p == '<')
+				{   p++;
+					if (*p == '>')
+					{	p++;
+					if (*p == '=')
+					{   p++;
+						t.value = TOK.TOKunord; // !<>=
+					}
+					else
+						t.value = TOK.TOKue;	// !<>
+					}
+					else if (*p == '=')
+					{	p++;
+					t.value = TOK.TOKug;	// !<=
+					}
+					else
+					t.value = TOK.TOKuge;	// !<
+				}
+				else if (*p == '>')
+				{   p++;
+					if (*p == '=')
+					{	p++;
+					t.value = TOK.TOKul;	// !>=
+					}
+					else
+					t.value = TOK.TOKule;	// !>
+				}
+				else
+					t.value = TOK.TOKnot;		// !
+				return;
+
+				case '=':
+				p++;
+				if (*p == '=')
+				{   p++;
+					if (*p == '=' && global.params.Dversion == 1)
+					{	p++;
+					t.value = TOK.TOKidentity;		// ===
+					}
+					else
+					t.value = TOK.TOKequal;		// ==
+				}
+				else
+					t.value = TOK.TOKassign;		// =
+				return;
+
+				case '~':
+				p++;
+				if (*p == '=')
+				{   p++;
+					t.value = TOK.TOKcatass;		// ~=
+				}
+				else
+					t.value = TOK.TOKtilde;		// ~
+				return;
+/*
+		#define SINGLE(c,tok) case c: p++; t.value = tok; return;
+
+				SINGLE('(',	TOKlparen)
+				SINGLE(')', TOKrparen)
+				SINGLE('[', TOKlbracket)
+				SINGLE(']', TOKrbracket)
+				SINGLE('{', TOKlcurly)
+				SINGLE('}', TOKrcurly)
+				SINGLE('?', TOKquestion)
+				SINGLE(',', TOKcomma)
+				SINGLE(';', TOKsemicolon)
+				SINGLE(':', TOKcolon)
+				SINGLE('$', TOKdollar)
+				SINGLE('@', TOKat)
+
+		#undef SINGLE
+
+		#define DOUBLE(c1,tok1,c2,tok2)		\
+				case c1:			\
+				p++;			\
+				if (*p == c2)		\
+				{   p++;		\
+					t.value = tok2;	\
+				}			\
+				else			\
+					t.value = tok1;	\
+				return;
+
+				DOUBLE('*', TOKmul, '=', TOKmulass)
+				DOUBLE('%', TOKmod, '=', TOKmodass)
+				DOUBLE('^', TOKxor, '=', TOKxorass)
+
+		#undef DOUBLE
+*/
+
+				case '(': p++; t.value = TOK.TOKlparen; return;
+				case ')': p++; t.value = TOK.TOKrparen; return;
+				case '[': p++; t.value = TOK.TOKlbracket; return;
+				case ']': p++; t.value = TOK.TOKrbracket; return;
+				case '{': p++; t.value = TOK.TOKlcurly; return;
+				case '}': p++; t.value = TOK.TOKrcurly; return;
+				case '?': p++; t.value = TOK.TOKquestion; return;
+				case ',': p++; t.value = TOK.TOKcomma; return;
+				case ';': p++; t.value = TOK.TOKsemicolon; return;
+				case ':': p++; t.value = TOK.TOKcolon; return;
+				case '$': p++; t.value = TOK.TOKdollar; return;
+				case '@': p++; t.value = TOK.TOKat; return;
+
+				case '*':
+					p++;
+					if (*p == '=') {
+						p++;
+						t.value = TOK.TOKmulass;
+					} else {
+						t.value = TOK.TOKmul;
+					}
+					return;
+					
+				case '%':
+					p++;
+					if (*p == '=') {
+						p++;
+						t.value = TOK.TOKmodass;
+					} else {
+						t.value = TOK.TOKmod;
+					}
+					return;
+					
+				case '^':
+					p++;
+					if (*p == '=') {
+						p++;
+						t.value = TOK.TOKxorass;
+					} else {
+						t.value = TOK.TOKxor;
+					}
+					return;
+
+				case '#':
+				p++;
+				pragma_();
+				continue;
+
+				default:
+				{	ubyte c = *p;
+
+				if (c & 0x80)
+				{   uint u = decodeUTF();
+
+					// Check for start of unicode identifier
+					if (isUniAlpha(u))
+					goto case_ident;
+
+					if (u == PS || u == LS)
+					{
+					loc.linnum++;
+					p++;
+					continue;
+					}
+				}
+				if (isprint(c))
+					error("unsupported char '%c'", c);
+				else
+					error("unsupported char 0x%02x", c);
+				p++;
+				continue;
+				}
+			}
+		}
+	}
+
+    Token* peek(Token* ct)
+	{
+		Token* t;
+
+		if (ct.next)
+			t = ct.next;
+		else
+		{
+			t = new Token();
+			scan(t);
+			t.next = null;
+			ct.next = t;
+		}
+		return t;
+	}
+
+    Token* peekPastParen(Token* tk)
+	{
+		//printf("peekPastParen()\n");
+		int parens = 1;
+		int curlynest = 0;
+		while (1)
+		{
+			tk = peek(tk);
+			//tk.print();
+			switch (tk.value)
+			{
+				case TOK.TOKlparen:
+				parens++;
+				continue;
+
+				case TOK.TOKrparen:
+				--parens;
+				if (parens)
+					continue;
+				tk = peek(tk);
+				break;
+
+				case TOK.TOKlcurly:
+				curlynest++;
+				continue;
+
+				case TOK.TOKrcurly:
+				if (--curlynest >= 0)
+					continue;
+				break;
+
+				case TOK.TOKsemicolon:
+				if (curlynest)
+					continue;
+				break;
+
+				case TOK.TOKeof:
+				break;
+
+				default:
+				continue;
+			}
+			return tk;
+		}
+	}
+
+	/*******************************************
+	 * Parse escape sequence.
+	 */
+    uint escapeSequence()
+	{
+		uint c = *p;
+
+	version (TEXTUAL_ASSEMBLY_OUT) {
+		return c;
+	}
+		int n;
+		int ndigits;
+
+		switch (c)
+		{
+			case '\'':
+			case '"':
+			case '?':
+			case '\\':
+			Lconsume:
+				p++;
+				break;
+
+			case 'a':	c = 7;		goto Lconsume;
+			case 'b':	c = 8;		goto Lconsume;
+			case 'f':	c = 12;		goto Lconsume;
+			case 'n':	c = 10;		goto Lconsume;
+			case 'r':	c = 13;		goto Lconsume;
+			case 't':	c = 9;		goto Lconsume;
+			case 'v':	c = 11;		goto Lconsume;
+
+			case 'u':
+				ndigits = 4;
+				goto Lhex;
+			case 'U':
+				ndigits = 8;
+				goto Lhex;
+			case 'x':
+				ndigits = 2;
+			Lhex:
+				p++;
+				c = *p;
+				if (ishex(cast(ubyte)c))
+				{   
+					uint v;
+
+					n = 0;
+					v = 0;
+					while (1)
+					{
+					if (isdigit(c))
+						c -= '0';
+					else if (islower(c))
+						c -= 'a' - 10;
+					else
+						c -= 'A' - 10;
+					v = v * 16 + c;
+					c = *++p;
+					if (++n == ndigits)
+						break;
+					if (!ishex(cast(ubyte)c))
+					{   error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
+						break;
+					}
+					}
+					if (ndigits != 2 && !utf_isValidDchar(v))
+					{	error("invalid UTF character \\U%08x", v);
+					v = '?';	// recover with valid UTF character
+					}
+					c = v;
+				}
+				else
+					error("undefined escape hex sequence \\%c\n",c);
+				break;
+
+			case '&':			// named character entity
+				for (ubyte* idstart = ++p; true; p++)
+				{
+					switch (*p)
+					{
+					case ';':
+						c = HtmlNamedEntity(idstart, p - idstart);
+						if (c == ~0)
+						{   
+							error("unnamed character entity &%s;", idstart[0..(p - idstart)]);
+							c = ' ';
+						}
+						p++;
+						break;
+
+					default:
+						if (isalpha(*p) ||
+						(p != idstart + 1 && isdigit(*p)))
+						continue;
+						error("unterminated named entity");
+						break;
+					}
+					break;
+				}
+				break;
+
+			case 0:
+			case 0x1A:			// end of file
+				c = '\\';
+				break;
+
+			default:
+				if (isoctal(cast(ubyte)c))
+				{   
+					uint v;
+
+					n = 0;
+					v = 0;
+					do
+					{
+					v = v * 8 + (c - '0');
+					c = *++p;
+					} while (++n < 3 && isoctal(cast(ubyte)c));
+					c = v;
+					if (c > 0xFF)
+					error("0%03o is larger than a byte", c);
+				}
+				else
+					error("undefined escape sequence \\%c\n",c);
+				break;
+		}
+		return c;
+	}
+
+    TOK wysiwygStringConstant(Token* t, int tc)
+	{
+		assert(false);
+	}
+
+    TOK hexStringConstant(Token* t)
+	{
+		assert(false);
+	}
+
+version (DMDV2) {
+    TOK delimitedStringConstant(Token* t)
+	{
+		assert(false);
+	}
+
+    TOK tokenStringConstant(Token* t)
+	{
+		assert(false);
+	}
+}
+    TOK escapeStringConstant(Token* t, int wide)
+	{
+		uint c;
+		Loc start = loc;
+
+		p++;
+		stringbuffer.reset();
+		while (true)
+		{
+			c = *p++;
+			switch (c)
+			{
+		version (TEXTUAL_ASSEMBLY_OUT) {
+		} else {
+				case '\\':
+					switch (*p)
+					{
+						case 'u':
+						case 'U':
+						case '&':
+						c = escapeSequence();
+						stringbuffer.writeUTF8(c);
+						continue;
+
+						default:
+						c = escapeSequence();
+						break;
+					}
+					break;
+		}
+				case '\n':
+					loc.linnum++;
+					break;
+
+				case '\r':
+					if (*p == '\n')
+						continue;	// ignore
+					c = '\n';	// treat EndOfLine as \n character
+					loc.linnum++;
+					break;
+
+				case '"':
+					t.len = stringbuffer.offset;
+					stringbuffer.writeByte(0);
+					char* tmp = cast(char*)malloc(stringbuffer.offset);
+					memcpy(tmp, stringbuffer.data, stringbuffer.offset);
+					t.ustring = tmp;
+					stringPostfix(t);
+					return TOK.TOKstring;
+
+				case 0:
+				case 0x1A:
+					p--;
+					error("unterminated string constant starting at %s", start.toChars());
+					t.ustring = "".ptr;
+					t.len = 0;
+					t.postfix = 0;
+					return TOK.TOKstring;
+
+				default:
+					if (c & 0x80)
+					{
+						p--;
+						c = decodeUTF();
+						if (c == LS || c == PS)
+						{	c = '\n';
+						loc.linnum++;
+						}
+						p++;
+						stringbuffer.writeUTF8(c);
+						continue;
+					}
+					break;
+			}
+			stringbuffer.writeByte(c);
+		}
+		
+		assert(false);
+	}
+
+    TOK charConstant(Token* t, int wide)
+	{
+		uint c;
+		TOK tk = TOKcharv;
+
+		//printf("Lexer.charConstant\n");
+		p++;
+		c = *p++;
+		switch (c)
+		{
+		version (TEXTUAL_ASSEMBLY_OUT) {
+		} else {
+			case '\\':
+				switch (*p)
+				{
+				case 'u':
+					t.uns64value = escapeSequence();
+					tk = TOKwcharv;
+					break;
+
+				case 'U':
+				case '&':
+					t.uns64value = escapeSequence();
+					tk = TOKdcharv;
+					break;
+
+				default:
+					t.uns64value = escapeSequence();
+					break;
+				}
+				break;
+		}
+			case '\n':
+			L1:
+				loc.linnum++;
+			case '\r':
+			case 0:
+			case 0x1A:
+			case '\'':
+				error("unterminated character constant");
+				return tk;
+
+			default:
+				if (c & 0x80)
+				{
+					p--;
+					c = decodeUTF();
+					p++;
+					if (c == LS || c == PS)
+						goto L1;
+					if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
+						tk = TOKwcharv;
+					else
+						tk = TOKdcharv;
+				}
+				t.uns64value = c;
+				break;
+		}
+
+		if (*p != '\'')
+		{	
+			error("unterminated character constant");
+			return tk;
+		}
+		p++;
+		return tk;
+	}
+
+	/***************************************
+	 * Get postfix of string literal.
+	 */
+    void stringPostfix(Token* t)
+	{
+		switch (*p)
+		{
+			case 'c':
+			case 'w':
+			case 'd':
+				t.postfix = *p;
+				p++;
+				break;
+
+			default:
+				t.postfix = 0;
+				break;
+		}
+	}
+
+    uint wchar_(uint u)
+	{
+		assert(false);
+	}
+	
+	/**************************************
+	 * Read in a number.
+	 * If it's an integer, store it in tok.TKutok.Vlong.
+	 *	integers can be decimal, octal or hex
+	 *	Handle the suffixes U, UL, LU, L, etc.
+	 * If it's double, store it in tok.TKutok.Vdouble.
+	 * Returns:
+	 *	TKnum
+	 *	TKdouble,...
+	 */
+
+    TOK number(Token* t)
+	{
+		// We use a state machine to collect numbers
+		enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
+		STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
+		STATE_hexh, STATE_error };
+		STATE state;
+
+		enum FLAGS
+		{
+			FLAGS_undefined = 0,
+			FLAGS_decimal  = 1,		// decimal
+			FLAGS_unsigned = 2,		// u or U suffix
+			FLAGS_long     = 4,		// l or L suffix
+		};
+		
+		FLAGS flags = FLAGS.FLAGS_decimal;
+
+		int i;
+		int base;
+		uint c;
+		ubyte *start;
+		TOK result;
+
+		//printf("Lexer.number()\n");
+		state = STATE.STATE_initial;
+		base = 0;
+		stringbuffer.reset();
+		start = p;
+		while (1)
+		{
+		c = *p;
+		switch (state)
+		{
+			case STATE.STATE_initial:		// opening state
+			if (c == '0')
+				state = STATE.STATE_0;
+			else
+				state = STATE.STATE_decimal;
+			break;
+
+			case STATE.STATE_0:
+			flags = (flags & ~FLAGS.FLAGS_decimal);
+			switch (c)
+			{
+version (ZEROH) {
+				case 'H':			// 0h
+				case 'h':
+				goto hexh;
+}
+				case 'X':
+				case 'x':
+				state = STATE.STATE_hex0;
+				break;
+
+				case '.':
+				if (p[1] == '.')	// .. is a separate token
+					goto done;
+				case 'i':
+				case 'f':
+				case 'F':
+				goto real_;
+version (ZEROH) {
+				case 'E':
+				case 'e':
+				goto case_hex;
+}
+				case 'B':
+				case 'b':
+				state = STATE.STATE_binary0;
+				break;
+
+				case '0': case '1': case '2': case '3':
+				case '4': case '5': case '6': case '7':
+				state = STATE.STATE_octal;
+				break;
+
+version (ZEROH) {
+				case '8': case '9': case 'A':
+				case 'C': case 'D': case 'F':
+				case 'a': case 'c': case 'd': case 'f':
+				case_hex:
+				state = STATE.STATE_hexh;
+				break;
+}
+				case '_':
+				state = STATE.STATE_octal;
+				p++;
+				continue;
+
+				case 'L':
+				if (p[1] == 'i')
+					goto real_;
+				goto done;
+
+				default:
+				goto done;
+			}
+			break;
+
+			case STATE.STATE_decimal:		// reading decimal number
+			if (!isdigit(c))
+			{
+version (ZEROH) {
+				if (ishex(c)
+				|| c == 'H' || c == 'h'
+				   )
+				goto hexh;
+}
+				if (c == '_')		// ignore embedded _
+				{	p++;
+				continue;
+				}
+				if (c == '.' && p[1] != '.')
+				goto real_;
+				else if (c == 'i' || c == 'f' || c == 'F' ||
+					 c == 'e' || c == 'E')
+				{
+			real_:	// It's a real number. Back up and rescan as a real
+				p = start;
+				return inreal(t);
+				}
+				else if (c == 'L' && p[1] == 'i')
+				goto real_;
+				goto done;
+			}
+			break;
+
+			case STATE.STATE_hex0:		// reading hex number
+			case STATE.STATE_hex:
+			if (! ishex(cast(ubyte)c))
+			{
+				if (c == '_')		// ignore embedded _
+				{	p++;
+				continue;
+				}
+				if (c == '.' && p[1] != '.')
+				goto real_;
+				if (c == 'P' || c == 'p' || c == 'i')
+				goto real_;
+				if (state == STATE.STATE_hex0)
+				error("Hex digit expected, not '%c'", c);
+				goto done;
+			}
+			state = STATE.STATE_hex;
+			break;
+
+version (ZEROH) {
+			hexh:
+			state = STATE.STATE_hexh;
+			case STATE.STATE_hexh:		// parse numbers like 0FFh
+			if (!ishex(c))
+			{
+				if (c == 'H' || c == 'h')
+				{
+				p++;
+				base = 16;
+				goto done;
+				}
+				else
+				{
+				// Check for something like 1E3 or 0E24
+				if (memchr(cast(char*)stringbuffer.data, 'E', stringbuffer.offset) ||
+					memchr(cast(char*)stringbuffer.data, 'e', stringbuffer.offset))
+					goto real_;
+				error("Hex digit expected, not '%c'", c);
+				goto done;
+				}
+			}
+			break;
+}
+
+			case STATE.STATE_octal:		// reading octal number
+			case STATE.STATE_octale:		// reading octal number with non-octal digits
+			if (!isoctal(cast(ubyte)c))
+			{
+version (ZEROH) {
+				if (ishex(c)
+				|| c == 'H' || c == 'h'
+				   )
+				goto hexh;
+}
+				if (c == '_')		// ignore embedded _
+				{	p++;
+				continue;
+				}
+				if (c == '.' && p[1] != '.')
+				goto real_;
+				if (c == 'i')
+				goto real_;
+				if (isdigit(c))
+				{
+				state = STATE.STATE_octale;
+				}
+				else
+				goto done;
+			}
+			break;
+
+			case STATE.STATE_binary0:		// starting binary number
+			case STATE.STATE_binary:		// reading binary number
+			if (c != '0' && c != '1')
+			{
+version (ZEROH) {
+				if (ishex(c)
+				|| c == 'H' || c == 'h'
+				   )
+				goto hexh;
+}
+				if (c == '_')		// ignore embedded _
+				{	p++;
+				continue;
+				}
+				if (state == STATE.STATE_binary0)
+				{	error("binary digit expected");
+				state = STATE.STATE_error;
+				break;
+				}
+				else
+				goto done;
+			}
+			state = STATE.STATE_binary;
+			break;
+
+			case STATE.STATE_error:		// for error recovery
+			if (!isdigit(c))	// scan until non-digit
+				goto done;
+			break;
+
+			default:
+			assert(0);
+		}
+		stringbuffer.writeByte(c);
+		p++;
+		}
+	done:
+		stringbuffer.writeByte(0);		// terminate string
+		if (state == STATE.STATE_octale)
+		error("Octal digit expected");
+
+		ulong n;			// unsigned >=64 bit integer type
+
+		if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0))
+		n = stringbuffer.data[0] - '0';
+		else
+		{
+		// Convert string to integer
+version (__DMC__) {
+		errno = 0;
+		n = strtoull(cast(char*)stringbuffer.data,null,base);
+		if (errno == ERANGE)
+			error("integer overflow");
+} else {
+		// Not everybody implements strtoull()
+		char* p = cast(char*)stringbuffer.data;
+		int r = 10, d;
+
+		if (*p == '0')
+		{
+			if (p[1] == 'x' || p[1] == 'X')
+			p += 2, r = 16;
+			else if (p[1] == 'b' || p[1] == 'B')
+			p += 2, r = 2;
+			else if (isdigit(p[1]))
+			p += 1, r = 8;
+		}
+
+		n = 0;
+		while (1)
+		{
+			if (*p >= '0' && *p <= '9')
+			d = *p - '0';
+			else if (*p >= 'a' && *p <= 'z')
+			d = *p - 'a' + 10;
+			else if (*p >= 'A' && *p <= 'Z')
+			d = *p - 'A' + 10;
+			else
+			break;
+			if (d >= r)
+			break;
+			ulong n2 = n * r;
+			//printf("n2 / r = %llx, n = %llx\n", n2/r, n);
+			if (n2 / r != n || n2 + d < n)
+			{
+			error ("integer overflow");
+			break;
+			}
+
+			n = n2 + d;
+			p++;
+		}
+}
+		if (n.sizeof > 8 &&
+			n > 0xFFFFFFFFFFFFFFFF)	// if n needs more than 64 bits
+			error("integer overflow");
+		}
+
+		// Parse trailing 'u', 'U', 'l' or 'L' in any combination
+		while (1)
+		{   FLAGS f;
+
+		switch (*p)
+		{   case 'U':
+			case 'u':
+			f = FLAGS.FLAGS_unsigned;
+			goto L1;
+
+			case 'l':
+			if (1 || !global.params.useDeprecated)
+				error("'l' suffix is deprecated, use 'L' instead");
+			case 'L':
+			f = FLAGS.FLAGS_long;
+			L1:
+			p++;
+			if (flags & f)
+				error("unrecognized token");
+			flags = (flags | f);
+			continue;
+			default:
+			break;
+		}
+		break;
+		}
+
+		switch (flags)
+		{
+		case FLAGS.FLAGS_undefined:
+			/* Octal or Hexadecimal constant.
+			 * First that fits: int, uint, long, ulong
+			 */
+			if (n & 0x8000000000000000)
+				result = TOK.TOKuns64v;
+			else if (n & 0xFFFFFFFF00000000)
+				result = TOK.TOKint64v;
+			else if (n & 0x80000000)
+				result = TOK.TOKuns32v;
+			else
+				result = TOK.TOKint32v;
+			break;
+
+		case FLAGS.FLAGS_decimal:
+			/* First that fits: int, long, long long
+			 */
+			if (n & 0x8000000000000000)
+			{	    error("signed integer overflow");
+				result = TOK.TOKuns64v;
+			}
+			else if (n & 0xFFFFFFFF80000000)
+				result = TOK.TOKint64v;
+			else
+				result = TOK.TOKint32v;
+			break;
+
+		case FLAGS.FLAGS_unsigned:
+		case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned:
+			/* First that fits: uint, ulong
+			 */
+			if (n & 0xFFFFFFFF00000000)
+				result = TOK.TOKuns64v;
+			else
+				result = TOK.TOKuns32v;
+			break;
+
+		case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long:
+			if (n & 0x8000000000000000)
+			{	    error("signed integer overflow");
+				result = TOK.TOKuns64v;
+			}
+			else
+				result = TOK.TOKint64v;
+			break;
+
+		case FLAGS.FLAGS_long:
+			if (n & 0x8000000000000000)
+				result = TOK.TOKuns64v;
+			else
+				result = TOK.TOKint64v;
+			break;
+
+		case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
+		case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
+			result = TOK.TOKuns64v;
+			break;
+
+		default:
+debug {
+			printf("%x\n",flags);
+}
+			assert(0);
+		}
+		t.uns64value = n;
+		return result;
+	}
+
+	/**************************************
+	 * Read in characters, converting them to real.
+	 * Bugs:
+	 *	Exponent overflow not detected.
+	 *	Too much requested precision is not detected.
+	 */
+    TOK inreal(Token* t)
+	in
+	{
+		assert(*p == '.' || isdigit(*p));
+	}
+	out (result)
+	{
+		switch (result)
+		{
+			case TOKfloat32v:
+			case TOKfloat64v:
+			case TOKfloat80v:
+			case TOKimaginary32v:
+			case TOKimaginary64v:
+			case TOKimaginary80v:
+				break;
+
+			default:
+				assert(0);
+		}
+	}
+	body
+	{
+		int dblstate;
+		uint c;
+		char hex;			// is this a hexadecimal-floating-constant?
+		TOK result;
+
+		//printf("Lexer.inreal()\n");
+		stringbuffer.reset();
+		dblstate = 0;
+		hex = 0;
+	Lnext:
+		while (true)
+		{
+			// Get next char from input
+			c = *p++;
+			//printf("dblstate = %d, c = '%c'\n", dblstate, c);
+			while (true)
+			{
+				switch (dblstate)
+				{
+					case 0:			// opening state
+						if (c == '0')
+						dblstate = 9;
+						else if (c == '.')
+						dblstate = 3;
+						else
+						dblstate = 1;
+						break;
+
+					case 9:
+						dblstate = 1;
+						if (c == 'X' || c == 'x')
+						{	
+							hex++;
+							break;
+						}
+					case 1:			// digits to left of .
+					case 3:			// digits to right of .
+					case 7:			// continuing exponent digits
+						if (!isdigit(c) && !(hex && isxdigit(c)))
+						{
+							if (c == '_')
+								goto Lnext;	// ignore embedded '_'
+							dblstate++;
+							continue;
+						}
+						break;
+
+					case 2:			// no more digits to left of .
+						if (c == '.')
+						{   
+							dblstate++;
+							break;
+						}
+					case 4:			// no more digits to right of .
+						if ((c == 'E' || c == 'e') ||
+							hex && (c == 'P' || c == 'p'))
+						{   
+							dblstate = 5;
+							hex = 0;	// exponent is always decimal
+							break;
+						}
+						if (hex)
+							error("binary-exponent-part required");
+						goto done;
+
+					case 5:			// looking immediately to right of E
+						dblstate++;
+						if (c == '-' || c == '+')
+							break;
+					case 6:			// 1st exponent digit expected
+						if (!isdigit(c))
+							error("exponent expected");
+						dblstate++;
+						break;
+
+					case 8:			// past end of exponent digits
+						goto done;
+				}
+				break;
+			}
+			stringbuffer.writeByte(c);
+		}
+	done:
+		p--;
+
+		stringbuffer.writeByte(0);
+
+	version (_WIN32) { /// && __DMC__
+		char* save = __locale_decpoint;
+		__locale_decpoint = cast(char*)".".ptr;
+	}
+		t.float80value = strtold(cast(char*)stringbuffer.data, null);
+
+		errno = 0;
+		switch (*p)
+		{
+		case 'F':
+		case 'f':
+			strtof(cast(char*)stringbuffer.data, null);
+			result = TOKfloat32v;
+			p++;
+			break;
+
+		default:
+			strtod(cast(char*)stringbuffer.data, null);
+			result = TOKfloat64v;
+			break;
+
+		case 'l':
+			if (!global.params.useDeprecated)
+				error("'l' suffix is deprecated, use 'L' instead");
+		case 'L':
+			result = TOKfloat80v;
+			p++;
+			break;
+		}
+		if (*p == 'i' || *p == 'I')
+		{
+			if (!global.params.useDeprecated && *p == 'I')
+				error("'I' suffix is deprecated, use 'i' instead");
+			p++;
+			switch (result)
+			{
+				case TOKfloat32v:
+					result = TOKimaginary32v;
+					break;
+				case TOKfloat64v:
+					result = TOKimaginary64v;
+					break;
+				case TOKfloat80v:
+					result = TOKimaginary80v;
+					break;
+			}
+		}
+		
+	version (_WIN32) { ///&& __DMC__
+		__locale_decpoint = save;
+	}
+		if (errno == ERANGE)
+			error("number is not representable");
+
+		return result;
+	}
+
+	void error(T...)(string format, T t)
+	{
+		error(this.loc, format, t);
+	}
+
+    void error(T...)(Loc loc, string format, T t)
+	{
+		if (mod && !global.gag)
+		{
+			string p = loc.toChars();
+			if (p.length != 0)
+				writef("%s: ", p);
+
+			writefln(format, t);
+
+			if (global.errors >= 20)	// moderate blizzard of cascading messages
+				fatal();
+		}
+
+		global.errors++;
+	}
+	
+    void pragma_()
+	{
+		assert(false);
+	}
+
+    uint decodeUTF()
+	{
+		assert(false);
+	}
+
+    void getDocComment(Token* t, uint lineComment)
+	{
+		assert(false);
+	}
+
+    static bool isValidIdentifier(string p)
+	{
+		if (p.length == 0) {
+			return false;
+		}
+
+		if (p[0] >= '0' && p[0] <= '9') {		// beware of isdigit() on signed chars
+			return false;
+		}
+
+		size_t idx = 0;
+		while (idx < p.length)
+		{
+			dchar dc;
+
+			if (utf_decodeChar(p, &idx, &dc) !is null) {
+				return false;
+			}
+
+			if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	/// TODO: reimplement based on strings
+    static ubyte* combineComments(ubyte* c1, ubyte* c2)
+	{
+		//printf("Lexer.combineComments('%s', '%s')\n", c1, c2);
+
+		ubyte* c = c2;
+
+		if (c1)
+		{
+			c = c1;
+			if (c2)
+			{
+				size_t len1 = strlen(cast(char*)c1);
+				size_t len2 = strlen(cast(char*)c2);
+
+				c = cast(ubyte*)malloc(len1 + 1 + len2 + 1);
+				memcpy(c, c1, len1);
+				if (len1 && c1[len1 - 1] != '\n')
+				{
+					c[len1] = '\n';
+					len1++;
+				}
+				memcpy(c + len1, c2, len2);
+				c[len1 + len2] = 0;
+			}
+		}
+
+		return c;
+	}
+}
\ No newline at end of file
author	korDen
date	Sat, 24 Oct 2009 08:42:06 +0400
parents
children	7427ded8caf7