Mercurial > projects > ddmd
diff dmd/Lexer.d @ 0:10317f0c89a5
Initial commit
author | korDen |
---|---|
date | Sat, 24 Oct 2009 08:42:06 +0400 |
parents | |
children | 7427ded8caf7 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dmd/Lexer.d Sat Oct 24 08:42:06 2009 +0400 @@ -0,0 +1,2425 @@ +module dmd.Lexer; + +import dmd.StringTable; +import dmd.OutBuffer; +import dmd.Token; +import dmd.Loc; +import dmd.Module; +import dmd.Identifier; +import dmd.TOK; +import dmd.Keyword; +import dmd.StringValue; +import dmd.Global; +import dmd.Util; +import dmd.Id; +import dmd.Dchar; +import dmd.Utf; + +import std.stdio : writeln; + +import core.stdc.ctype; +import core.stdc.stdlib; +import core.stdc.string; +import core.stdc.stdio; +import core.stdc.time; +import core.stdc.errno; + +enum LS = 0x2028; // UTF line separator +enum PS = 0x2029; // UTF paragraph separator + +extern (C) extern +{ + __gshared char* __locale_decpoint; +} + +int isUniAlpha(uint u) +{ + assert(false); +} + +class Lexer +{ + static StringTable stringtable; + static OutBuffer stringbuffer; + static Token* freelist; + + Loc loc; // for error messages + + ubyte* base; // pointer to start of buffer + ubyte* end; // past end of buffer + ubyte* p; // current character + Token token; + Module mod; + int doDocComment; // collect doc comment information + int anyToken; // !=0 means seen at least one token + int commentToken; // !=0 means comments are TOKcomment's + + static this() + { + stringtable = new StringTable(); + stringbuffer = new OutBuffer(); + } + + static ~this() + { + delete stringtable; + } + + this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken) + { + loc = Loc(mod, 1); + + memset(&token,0,token.sizeof); + this.base = base; + this.end = base + endoffset; + p = base + begoffset; + this.mod = mod; + this.doDocComment = doDocComment; + this.anyToken = 0; + this.commentToken = commentToken; + //initKeywords(); + + /* If first line starts with '#!', ignore the line + */ + + if (p[0] == '#' && p[1] =='!') + { + p += 2; + while (1) + { + ubyte c = *p; + switch (c) + { + case '\n': + p++; + break; + + case '\r': + p++; + if (*p == '\n') + p++; + break; + + case 0: + case 0x1A: + break; + + default: + if (c & 0x80) + { + uint u = decodeUTF(); + if (u == PS || u == LS) + break; + } + p++; + continue; + } + break; + } + loc.linnum = 2; + } + } + +version (DMDV2) { + static Keyword[] keywords = + [ + // { "", TOK }, + + { "this", TOK.TOKthis }, + { "super", TOK.TOKsuper }, + { "assert", TOK.TOKassert }, + { "null", TOK.TOKnull }, + { "true", TOK.TOKtrue }, + { "false", TOK.TOKfalse }, + { "cast", TOK.TOKcast }, + { "new", TOK.TOKnew }, + { "delete", TOK.TOKdelete }, + { "throw", TOK.TOKthrow }, + { "module", TOK.TOKmodule }, + { "pragma", TOK.TOKpragma }, + { "typeof", TOK.TOKtypeof }, + { "typeid", TOK.TOKtypeid }, + + { "template", TOK.TOKtemplate }, + + { "void", TOK.TOKvoid }, + { "byte", TOK.TOKint8 }, + { "ubyte", TOK.TOKuns8 }, + { "short", TOK.TOKint16 }, + { "ushort", TOK.TOKuns16 }, + { "int", TOK.TOKint32 }, + { "uint", TOK.TOKuns32 }, + { "long", TOK.TOKint64 }, + { "ulong", TOK.TOKuns64 }, + { "cent", TOK.TOKcent, }, + { "ucent", TOK.TOKucent, }, + { "float", TOK.TOKfloat32 }, + { "double", TOK.TOKfloat64 }, + { "real", TOK.TOKfloat80 }, + + { "bool", TOK.TOKbool }, + { "char", TOK.TOKchar }, + { "wchar", TOK.TOKwchar }, + { "dchar", TOK.TOKdchar }, + + { "ifloat", TOK.TOKimaginary32 }, + { "idouble", TOK.TOKimaginary64 }, + { "ireal", TOK.TOKimaginary80 }, + + { "cfloat", TOK.TOKcomplex32 }, + { "cdouble", TOK.TOKcomplex64 }, + { "creal", TOK.TOKcomplex80 }, + + { "delegate", TOK.TOKdelegate }, + { "function", TOK.TOKfunction }, + + { "is", TOK.TOKis }, + { "if", TOK.TOKif }, + { "else", TOK.TOKelse }, + { "while", TOK.TOKwhile }, + { "for", TOK.TOKfor }, + { "do", TOK.TOKdo }, + { "switch", TOK.TOKswitch }, + { "case", TOK.TOKcase }, + { "default", TOK.TOKdefault }, + { "break", TOK.TOKbreak }, + { "continue", TOK.TOKcontinue }, + { "synchronized", TOK.TOKsynchronized }, + { "return", TOK.TOKreturn }, + { "goto", TOK.TOKgoto }, + { "try", TOK.TOKtry }, + { "catch", TOK.TOKcatch }, + { "finally", TOK.TOKfinally }, + { "with", TOK.TOKwith }, + { "asm", TOK.TOKasm }, + { "foreach", TOK.TOKforeach }, + { "foreach_reverse", TOK.TOKforeach_reverse }, + { "scope", TOK.TOKscope }, + + { "struct", TOK.TOKstruct }, + { "class", TOK.TOKclass }, + { "interface", TOK.TOKinterface }, + { "union", TOK.TOKunion }, + { "enum", TOK.TOKenum }, + { "import", TOK.TOKimport }, + { "mixin", TOK.TOKmixin }, + { "static", TOK.TOKstatic }, + { "final", TOK.TOKfinal }, + { "const", TOK.TOKconst }, + { "typedef", TOK.TOKtypedef }, + { "alias", TOK.TOKalias }, + { "override", TOK.TOKoverride }, + { "abstract", TOK.TOKabstract }, + { "volatile", TOK.TOKvolatile }, + { "debug", TOK.TOKdebug }, + { "deprecated", TOK.TOKdeprecated }, + { "in", TOK.TOKin }, + { "out", TOK.TOKout }, + { "inout", TOK.TOKinout }, + { "lazy", TOK.TOKlazy }, + { "auto", TOK.TOKauto }, + + { "align", TOK.TOKalign }, + { "extern", TOK.TOKextern }, + { "private", TOK.TOKprivate }, + { "package", TOK.TOKpackage }, + { "protected", TOK.TOKprotected }, + { "public", TOK.TOKpublic }, + { "export", TOK.TOKexport }, + + { "body", TOK.TOKbody }, + { "invariant", TOK.TOKinvariant }, + { "unittest", TOK.TOKunittest }, + { "version", TOK.TOKversion }, + //{ "manifest", TOK.TOKmanifest }, + + // Added after 1.0 + { "ref", TOK.TOKref }, + { "macro", TOK.TOKmacro }, + { "pure", TOK.TOKpure }, + { "nothrow", TOK.TOKnothrow }, + { "__thread", TOK.TOKtls }, + { "__gshared", TOK.TOKgshared }, + { "__traits", TOK.TOKtraits }, + { "__overloadset", TOK.TOKoverloadset }, + { "__FILE__", TOK.TOKfile }, + { "__LINE__", TOK.TOKline }, + { "shared", TOK.TOKshared }, + { "immutable", TOK.TOKimmutable }, + ]; +} else { + static Keyword[] keywords = + [ + // { "", TOK }, + + { "this", TOK.TOKthis }, + { "super", TOK.TOKsuper }, + { "assert", TOK.TOKassert }, + { "null", TOK.TOKnull }, + { "true", TOK.TOKtrue }, + { "false", TOK.TOKfalse }, + { "cast", TOK.TOKcast }, + { "new", TOK.TOKnew }, + { "delete", TOK.TOKdelete }, + { "throw", TOK.TOKthrow }, + { "module", TOK.TOKmodule }, + { "pragma", TOK.TOKpragma }, + { "typeof", TOK.TOKtypeof }, + { "typeid", TOK.TOKtypeid }, + + { "template", TOK.TOKtemplate }, + + { "void", TOK.TOKvoid }, + { "byte", TOK.TOKint8 }, + { "ubyte", TOK.TOKuns8 }, + { "short", TOK.TOKint16 }, + { "ushort", TOK.TOKuns16 }, + { "int", TOK.TOKint32 }, + { "uint", TOK.TOKuns32 }, + { "long", TOK.TOKint64 }, + { "ulong", TOK.TOKuns64 }, + { "cent", TOK.TOKcent, }, + { "ucent", TOK.TOKucent, }, + { "float", TOK.TOKfloat32 }, + { "double", TOK.TOKfloat64 }, + { "real", TOK.TOKfloat80 }, + + { "bool", TOK.TOKbool }, + { "char", TOK.TOKchar }, + { "wchar", TOK.TOKwchar }, + { "dchar", TOK.TOKdchar }, + + { "ifloat", TOK.TOKimaginary32 }, + { "idouble", TOK.TOKimaginary64 }, + { "ireal", TOK.TOKimaginary80 }, + + { "cfloat", TOK.TOKcomplex32 }, + { "cdouble", TOK.TOKcomplex64 }, + { "creal", TOK.TOKcomplex80 }, + + { "delegate", TOK.TOKdelegate }, + { "function", TOK.TOKfunction }, + + { "is", TOK.TOKis }, + { "if", TOK.TOKif }, + { "else", TOK.TOKelse }, + { "while", TOK.TOKwhile }, + { "for", TOK.TOKfor }, + { "do", TOK.TOKdo }, + { "switch", TOK.TOKswitch }, + { "case", TOK.TOKcase }, + { "default", TOK.TOKdefault }, + { "break", TOK.TOKbreak }, + { "continue", TOK.TOKcontinue }, + { "synchronized", TOK.TOKsynchronized }, + { "return", TOK.TOKreturn }, + { "goto", TOK.TOKgoto }, + { "try", TOK.TOKtry }, + { "catch", TOK.TOKcatch }, + { "finally", TOK.TOKfinally }, + { "with", TOK.TOKwith }, + { "asm", TOK.TOKasm }, + { "foreach", TOK.TOKforeach }, + { "foreach_reverse", TOK.TOKforeach_reverse }, + { "scope", TOK.TOKscope }, + + { "struct", TOK.TOKstruct }, + { "class", TOK.TOKclass }, + { "interface", TOK.TOKinterface }, + { "union", TOK.TOKunion }, + { "enum", TOK.TOKenum }, + { "import", TOK.TOKimport }, + { "mixin", TOK.TOKmixin }, + { "static", TOK.TOKstatic }, + { "final", TOK.TOKfinal }, + { "const", TOK.TOKconst }, + { "typedef", TOK.TOKtypedef }, + { "alias", TOK.TOKalias }, + { "override", TOK.TOKoverride }, + { "abstract", TOK.TOKabstract }, + { "volatile", TOK.TOKvolatile }, + { "debug", TOK.TOKdebug }, + { "deprecated", TOK.TOKdeprecated }, + { "in", TOK.TOKin }, + { "out", TOK.TOKout }, + { "inout", TOK.TOKinout }, + { "lazy", TOK.TOKlazy }, + { "auto", TOK.TOKauto }, + + { "align", TOK.TOKalign }, + { "extern", TOK.TOKextern }, + { "private", TOK.TOKprivate }, + { "package", TOK.TOKpackage }, + { "protected", TOK.TOKprotected }, + { "public", TOK.TOKpublic }, + { "export", TOK.TOKexport }, + + { "body", TOK.TOKbody }, + { "invariant", TOK.TOKinvariant }, + { "unittest", TOK.TOKunittest }, + { "version", TOK.TOKversion }, + //{ "manifest", TOK.TOKmanifest }, + + // Added after 1.0 + { "ref", TOK.TOKref }, + { "macro", TOK.TOKmacro }, + ]; +} + + static ubyte cmtable[256]; + enum CMoctal = 0x1; + enum CMhex = 0x2; + enum CMidchar = 0x4; + + ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; } + ubyte ishex (ubyte c) { return cmtable[c] & CMhex; } + ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; } + + static void cmtable_init() + { + for (uint c = 0; c < cmtable.length; c++) + { + if ('0' <= c && c <= '7') + cmtable[c] |= CMoctal; + if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) + cmtable[c] |= CMhex; + if (isalnum(c) || c == '_') + cmtable[c] |= CMidchar; + } + } + + static void initKeywords() + { + uint nkeywords = keywords.length; + + if (global.params.Dversion == 1) + nkeywords -= 2; + + cmtable_init(); + + for (uint u = 0; u < nkeywords; u++) + { + //printf("keyword[%d] = '%s'\n",u, keywords[u].name); + string s = keywords[u].name; + TOK v = keywords[u].value; + StringValue* sv = stringtable.insert(s); + sv.ptrvalue = cast(void*) new Identifier(sv.lstring.string_, v); + + //printf("tochars[%d] = '%s'\n",v, s); + Token.tochars[v] = s; + } + + Token.tochars[TOK.TOKeof] = "EOF"; + Token.tochars[TOK.TOKlcurly] = "{"; + Token.tochars[TOK.TOKrcurly] = "}"; + Token.tochars[TOK.TOKlparen] = "("; + Token.tochars[TOK.TOKrparen] = ")"; + Token.tochars[TOK.TOKlbracket] = "["; + Token.tochars[TOK.TOKrbracket] = "]"; + Token.tochars[TOK.TOKsemicolon] = ";"; + Token.tochars[TOK.TOKcolon] = ":"; + Token.tochars[TOK.TOKcomma] = ","; + Token.tochars[TOK.TOKdot] = "."; + Token.tochars[TOK.TOKxor] = "^"; + Token.tochars[TOK.TOKxorass] = "^="; + Token.tochars[TOK.TOKassign] = "="; + Token.tochars[TOK.TOKconstruct] = "="; +version (DMDV2) { + Token.tochars[TOK.TOKblit] = "="; +} + Token.tochars[TOK.TOKlt] = "<"; + Token.tochars[TOK.TOKgt] = ">"; + Token.tochars[TOK.TOKle] = "<="; + Token.tochars[TOK.TOKge] = ">="; + Token.tochars[TOK.TOKequal] = "=="; + Token.tochars[TOK.TOKnotequal] = "!="; + Token.tochars[TOK.TOKnotidentity] = "!is"; + Token.tochars[TOK.TOKtobool] = "!!"; + + Token.tochars[TOK.TOKunord] = "!<>="; + Token.tochars[TOK.TOKue] = "!<>"; + Token.tochars[TOK.TOKlg] = "<>"; + Token.tochars[TOK.TOKleg] = "<>="; + Token.tochars[TOK.TOKule] = "!>"; + Token.tochars[TOK.TOKul] = "!>="; + Token.tochars[TOK.TOKuge] = "!<"; + Token.tochars[TOK.TOKug] = "!<="; + + Token.tochars[TOK.TOKnot] = "!"; + Token.tochars[TOK.TOKtobool] = "!!"; + Token.tochars[TOK.TOKshl] = "<<"; + Token.tochars[TOK.TOKshr] = ">>"; + Token.tochars[TOK.TOKushr] = ">>>"; + Token.tochars[TOK.TOKadd] = "+"; + Token.tochars[TOK.TOKmin] = "-"; + Token.tochars[TOK.TOKmul] = "*"; + Token.tochars[TOK.TOKdiv] = "/"; + Token.tochars[TOK.TOKmod] = "%"; + Token.tochars[TOK.TOKslice] = ".."; + Token.tochars[TOK.TOKdotdotdot] = "..."; + Token.tochars[TOK.TOKand] = "&"; + Token.tochars[TOK.TOKandand] = "&&"; + Token.tochars[TOK.TOKor] = "|"; + Token.tochars[TOK.TOKoror] = "||"; + Token.tochars[TOK.TOKarray] = "[]"; + Token.tochars[TOK.TOKindex] = "[i]"; + Token.tochars[TOK.TOKaddress] = "&"; + Token.tochars[TOK.TOKstar] = "*"; + Token.tochars[TOK.TOKtilde] = "~"; + Token.tochars[TOK.TOKdollar] = "$"; + Token.tochars[TOK.TOKcast] = "cast"; + Token.tochars[TOK.TOKplusplus] = "++"; + Token.tochars[TOK.TOKminusminus] = "--"; + Token.tochars[TOK.TOKtype] = "type"; + Token.tochars[TOK.TOKquestion] = "?"; + Token.tochars[TOK.TOKneg] = "-"; + Token.tochars[TOK.TOKuadd] = "+"; + Token.tochars[TOK.TOKvar] = "var"; + Token.tochars[TOK.TOKaddass] = "+="; + Token.tochars[TOK.TOKminass] = "-="; + Token.tochars[TOK.TOKmulass] = "*="; + Token.tochars[TOK.TOKdivass] = "/="; + Token.tochars[TOK.TOKmodass] = "%="; + Token.tochars[TOK.TOKshlass] = "<<="; + Token.tochars[TOK.TOKshrass] = ">>="; + Token.tochars[TOK.TOKushrass] = ">>>="; + Token.tochars[TOK.TOKandass] = "&="; + Token.tochars[TOK.TOKorass] = "|="; + Token.tochars[TOK.TOKcatass] = "~="; + Token.tochars[TOK.TOKcat] = "~"; + Token.tochars[TOK.TOKcall] = "call"; + Token.tochars[TOK.TOKidentity] = "is"; + Token.tochars[TOK.TOKnotidentity] = "!is"; + + Token.tochars[TOK.TOKorass] = "|="; + Token.tochars[TOK.TOKidentifier] = "identifier"; + Token.tochars[TOK.TOKat] = "@"; + + // For debugging + Token.tochars[TOK.TOKdotexp] = "dotexp"; + Token.tochars[TOK.TOKdotti] = "dotti"; + Token.tochars[TOK.TOKdotvar] = "dotvar"; + Token.tochars[TOK.TOKdottype] = "dottype"; + Token.tochars[TOK.TOKsymoff] = "symoff"; + Token.tochars[TOK.TOKarraylength] = "arraylength"; + Token.tochars[TOK.TOKarrayliteral] = "arrayliteral"; + Token.tochars[TOK.TOKassocarrayliteral] = "assocarrayliteral"; + Token.tochars[TOK.TOKstructliteral] = "structliteral"; + Token.tochars[TOK.TOKstring] = "string"; + Token.tochars[TOK.TOKdsymbol] = "symbol"; + Token.tochars[TOK.TOKtuple] = "tuple"; + Token.tochars[TOK.TOKdeclaration] = "declaration"; + Token.tochars[TOK.TOKdottd] = "dottd"; + Token.tochars[TOK.TOKon_scope_exit] = "scope(exit)"; + Token.tochars[TOK.TOKon_scope_success] = "scope(success)"; + Token.tochars[TOK.TOKon_scope_failure] = "scope(failure)"; + } + + static Identifier idPool(string s) + { + StringValue* sv = stringtable.update(s); + Identifier id = cast(Identifier) sv.ptrvalue; + if (id is null) + { + id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); + sv.ptrvalue = cast(void*)id; + } + + return id; + } + + static Identifier uniqueId(string s) + { + static int num; + return uniqueId(s, ++num); + } + + /********************************************* + * Create a unique identifier using the prefix s. + */ + static Identifier uniqueId(string s, int num) + { + char buffer[32]; + size_t slen = s.length; + + assert(slen + num.sizeof * 3 + 1 <= buffer.sizeof); + int len = sprintf(buffer.ptr, "%.*s%d", s, num); + + return idPool(buffer[0..len].idup); + } + + TOK nextToken() + { + Token *t; + + if (token.next) + { + t = token.next; + memcpy(&token, t, Token.sizeof); + t.next = freelist; + freelist = t; + } + else + { + scan(&token); + } + + //token.print(); + return token.value; + } + + /*********************** + * Look ahead at next token's value. + */ + TOK peekNext() + { + return peek(&token).value; + } + + TOK peekNext2() + { + assert(false); + } + + void scan(Token* t) + { + uint lastLine = loc.linnum; + uint linnum; + + t.blockComment = null; + t.lineComment = null; + while (1) + { + t.ptr = p; + //printf("p = %p, *p = '%c'\n",p,*p); + switch (*p) + { + case 0: + case 0x1A: + t.value = TOK.TOKeof; // end of file + return; + + case ' ': + case '\t': + case '\v': + case '\f': + p++; + continue; // skip white space + + case '\r': + p++; + if (*p != '\n') // if CR stands by itself + loc.linnum++; + continue; // skip white space + + case '\n': + p++; + loc.linnum++; + continue; // skip white space + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + t.value = number(t); + return; + +version (CSTRINGS) { + case '\'': + t.value = charConstant(t, 0); + return; + + case '"': + t.value = stringConstant(t,0); + return; + + case 'l': + case 'L': + if (p[1] == '\'') + { + p++; + t.value = charConstant(t, 1); + return; + } + else if (p[1] == '"') + { + p++; + t.value = stringConstant(t, 1); + return; + } +} else { + case '\'': + t.value = charConstant(t,0); + return; + + case 'r': + if (p[1] != '"') + goto case_ident; + p++; + case '`': + t.value = wysiwygStringConstant(t, *p); + return; + + case 'x': + if (p[1] != '"') + goto case_ident; + p++; + t.value = hexStringConstant(t); + return; + +version (DMDV2) { + case 'q': + if (p[1] == '"') + { + p++; + t.value = delimitedStringConstant(t); + return; + } + else if (p[1] == '{') + { + p++; + t.value = tokenStringConstant(t); + return; + } + else + goto case_ident; +} + + case '"': + t.value = escapeStringConstant(t,0); + return; +version (TEXTUAL_ASSEMBLY_OUT) { +} else { + case '\\': // escaped string literal + { uint c; + ubyte* pstart = p; + + stringbuffer.reset(); + do + { + p++; + switch (*p) + { + case 'u': + case 'U': + case '&': + c = escapeSequence(); + stringbuffer.writeUTF8(c); + break; + + default: + c = escapeSequence(); + stringbuffer.writeByte(c); + break; + } + } while (*p == '\\'); + t.len = stringbuffer.offset; + stringbuffer.writeByte(0); + char* cc = cast(char*)malloc(stringbuffer.offset); + memcpy(cc, stringbuffer.data, stringbuffer.offset); + t.ustring = cc; + t.postfix = 0; + t.value = TOK.TOKstring; + if (!global.params.useDeprecated) + error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", p - pstart, pstart, p - pstart, pstart); + return; + } +} + case 'l': + case 'L': +} + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'm': case 'n': case 'o': +version (DMDV2) { + case 'p': /*case 'q': case 'r':*/ case 's': case 't': +} else { + case 'p': case 'q': /*case 'r':*/ case 's': case 't': +} + case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': + case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case '_': + case_ident: + { ubyte c; + StringValue *sv; + Identifier id; + + do + { + c = *++p; + } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); + sv = stringtable.update((cast(immutable(char)*)t.ptr)[0.. p - t.ptr]); /// + id = cast(Identifier) sv.ptrvalue; + if (id is null) + { id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); + sv.ptrvalue = cast(void*)id; + } + t.ident = id; + t.value = cast(TOK) id.value; + anyToken = 1; + if (*t.ptr == '_') // if special identifier token + { + static char date[11+1]; + static char time[8+1]; + static char timestamp[24+1]; + + if (!date[0]) // lazy evaluation + { time_t tm; + char *p; + + .time(&tm); + p = ctime(&tm); + assert(p); + sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20); + sprintf(time.ptr, "%.8s", p + 11); + sprintf(timestamp.ptr, "%.24s", p); + } + +///version (DMDV1) { +/// if (mod && id == Id.FILE) +/// { +/// t.ustring = cast(ubyte*)(loc.filename ? loc.filename : mod.ident.toChars()); +/// goto Lstr; +/// } +/// else if (mod && id == Id.LINE) +/// { +/// t.value = TOK.TOKint64v; +/// t.uns64value = loc.linnum; +/// } +/// else +///} + if (id == Id.DATE) + { + t.ustring = date.ptr; + goto Lstr; + } + else if (id == Id.TIME) + { + t.ustring = time.ptr; + goto Lstr; + } + else if (id == Id.VENDOR) + { + t.ustring = "Digital Mars D".ptr; + goto Lstr; + } + else if (id == Id.TIMESTAMP) + { + t.ustring = timestamp.ptr; + Lstr: + t.value = TOK.TOKstring; + Llen: + t.postfix = 0; + t.len = strlen(cast(char*)t.ustring); + } + else if (id == Id.VERSIONX) + { + uint major = 0; + uint minor = 0; + + foreach (char cc; global.version_[1..$]) + { + if (isdigit(cc)) + minor = minor * 10 + cc - '0'; + else if (cc == '.') + { + major = minor; + minor = 0; + } + else + break; + } + t.value = TOK.TOKint64v; + t.uns64value = major * 1000 + minor; + } +///version (DMDV2) { + else if (id == Id.EOFX) + { + t.value = TOK.TOKeof; + // Advance scanner to end of file + while (!(*p == 0 || *p == 0x1A)) + p++; + } +///} + } + //printf("t.value = %d\n",t.value); + return; + } + + case '/': + p++; + switch (*p) + { + case '=': + p++; + t.value = TOK.TOKdivass; + return; + + case '*': + p++; + linnum = loc.linnum; + while (1) + { + while (1) + { + ubyte c = *p; + switch (c) + { + case '/': + break; + + case '\n': + loc.linnum++; + p++; + continue; + + case '\r': + p++; + if (*p != '\n') + loc.linnum++; + continue; + + case 0: + case 0x1A: + error("unterminated /* */ comment"); + p = end; + t.value = TOK.TOKeof; + return; + + default: + if (c & 0x80) + { uint u = decodeUTF(); + if (u == PS || u == LS) + loc.linnum++; + } + p++; + continue; + } + break; + } + p++; + if (p[-2] == '*' && p - 3 != t.ptr) + break; + } + if (commentToken) + { + t.value = TOK.TOKcomment; + return; + } + else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) + { // if /** but not /**/ + getDocComment(t, lastLine == linnum); + } + continue; + + case '/': // do // style comments + linnum = loc.linnum; + while (1) + { ubyte c = *++p; + switch (c) + { + case '\n': + break; + + case '\r': + if (p[1] == '\n') + p++; + break; + + case 0: + case 0x1A: + if (commentToken) + { + p = end; + t.value = TOK.TOKcomment; + return; + } + if (doDocComment && t.ptr[2] == '/') + getDocComment(t, lastLine == linnum); + p = end; + t.value = TOK.TOKeof; + return; + + default: + if (c & 0x80) + { uint u = decodeUTF(); + if (u == PS || u == LS) + break; + } + continue; + } + break; + } + + if (commentToken) + { + p++; + loc.linnum++; + t.value = TOK.TOKcomment; + return; + } + if (doDocComment && t.ptr[2] == '/') + getDocComment(t, lastLine == linnum); + + p++; + loc.linnum++; + continue; + + case '+': + { + int nest; + + linnum = loc.linnum; + p++; + nest = 1; + while (1) + { ubyte c = *p; + switch (c) + { + case '/': + p++; + if (*p == '+') + { + p++; + nest++; + } + continue; + + case '+': + p++; + if (*p == '/') + { + p++; + if (--nest == 0) + break; + } + continue; + + case '\r': + p++; + if (*p != '\n') + loc.linnum++; + continue; + + case '\n': + loc.linnum++; + p++; + continue; + + case 0: + case 0x1A: + error("unterminated /+ +/ comment"); + p = end; + t.value = TOK.TOKeof; + return; + + default: + if (c & 0x80) + { uint u = decodeUTF(); + if (u == PS || u == LS) + loc.linnum++; + } + p++; + continue; + } + break; + } + if (commentToken) + { + t.value = TOK.TOKcomment; + return; + } + if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) + { // if /++ but not /++/ + getDocComment(t, lastLine == linnum); + } + continue; + } + + default: + break; /// + } + t.value = TOK.TOKdiv; + return; + + case '.': + p++; + if (isdigit(*p)) + { /* Note that we don't allow ._1 and ._ as being + * valid floating point numbers. + */ + p--; + t.value = inreal(t); + } + else if (p[0] == '.') + { + if (p[1] == '.') + { p += 2; + t.value = TOK.TOKdotdotdot; + } + else + { p++; + t.value = TOK.TOKslice; + } + } + else + t.value = TOK.TOKdot; + return; + + case '&': + p++; + if (*p == '=') + { p++; + t.value = TOK.TOKandass; + } + else if (*p == '&') + { p++; + t.value = TOK.TOKandand; + } + else + t.value = TOK.TOKand; + return; + + case '|': + p++; + if (*p == '=') + { p++; + t.value = TOK.TOKorass; + } + else if (*p == '|') + { p++; + t.value = TOK.TOKoror; + } + else + t.value = TOK.TOKor; + return; + + case '-': + p++; + if (*p == '=') + { p++; + t.value = TOK.TOKminass; + } +/// #if 0 +/// else if (*p == '>') +/// { p++; +/// t.value = TOK.TOKarrow; +/// } +/// #endif + else if (*p == '-') + { p++; + t.value = TOK.TOKminusminus; + } + else + t.value = TOK.TOKmin; + return; + + case '+': + p++; + if (*p == '=') + { p++; + t.value = TOK.TOKaddass; + } + else if (*p == '+') + { p++; + t.value = TOK.TOKplusplus; + } + else + t.value = TOK.TOKadd; + return; + + case '<': + p++; + if (*p == '=') + { p++; + t.value = TOK.TOKle; // <= + } + else if (*p == '<') + { p++; + if (*p == '=') + { p++; + t.value = TOK.TOKshlass; // <<= + } + else + t.value = TOK.TOKshl; // << + } + else if (*p == '>') + { p++; + if (*p == '=') + { p++; + t.value = TOK.TOKleg; // <>= + } + else + t.value = TOK.TOKlg; // <> + } + else + t.value = TOK.TOKlt; // < + return; + + case '>': + p++; + if (*p == '=') + { p++; + t.value = TOK.TOKge; // >= + } + else if (*p == '>') + { p++; + if (*p == '=') + { p++; + t.value = TOK.TOKshrass; // >>= + } + else if (*p == '>') + { p++; + if (*p == '=') + { p++; + t.value = TOK.TOKushrass; // >>>= + } + else + t.value = TOK.TOKushr; // >>> + } + else + t.value = TOK.TOKshr; // >> + } + else + t.value = TOK.TOKgt; // > + return; + + case '!': + p++; + if (*p == '=') + { p++; + if (*p == '=' && global.params.Dversion == 1) + { p++; + t.value = TOK.TOKnotidentity; // !== + } + else + t.value = TOK.TOKnotequal; // != + } + else if (*p == '<') + { p++; + if (*p == '>') + { p++; + if (*p == '=') + { p++; + t.value = TOK.TOKunord; // !<>= + } + else + t.value = TOK.TOKue; // !<> + } + else if (*p == '=') + { p++; + t.value = TOK.TOKug; // !<= + } + else + t.value = TOK.TOKuge; // !< + } + else if (*p == '>') + { p++; + if (*p == '=') + { p++; + t.value = TOK.TOKul; // !>= + } + else + t.value = TOK.TOKule; // !> + } + else + t.value = TOK.TOKnot; // ! + return; + + case '=': + p++; + if (*p == '=') + { p++; + if (*p == '=' && global.params.Dversion == 1) + { p++; + t.value = TOK.TOKidentity; // === + } + else + t.value = TOK.TOKequal; // == + } + else + t.value = TOK.TOKassign; // = + return; + + case '~': + p++; + if (*p == '=') + { p++; + t.value = TOK.TOKcatass; // ~= + } + else + t.value = TOK.TOKtilde; // ~ + return; +/* + #define SINGLE(c,tok) case c: p++; t.value = tok; return; + + SINGLE('(', TOKlparen) + SINGLE(')', TOKrparen) + SINGLE('[', TOKlbracket) + SINGLE(']', TOKrbracket) + SINGLE('{', TOKlcurly) + SINGLE('}', TOKrcurly) + SINGLE('?', TOKquestion) + SINGLE(',', TOKcomma) + SINGLE(';', TOKsemicolon) + SINGLE(':', TOKcolon) + SINGLE('$', TOKdollar) + SINGLE('@', TOKat) + + #undef SINGLE + + #define DOUBLE(c1,tok1,c2,tok2) \ + case c1: \ + p++; \ + if (*p == c2) \ + { p++; \ + t.value = tok2; \ + } \ + else \ + t.value = tok1; \ + return; + + DOUBLE('*', TOKmul, '=', TOKmulass) + DOUBLE('%', TOKmod, '=', TOKmodass) + DOUBLE('^', TOKxor, '=', TOKxorass) + + #undef DOUBLE +*/ + + case '(': p++; t.value = TOK.TOKlparen; return; + case ')': p++; t.value = TOK.TOKrparen; return; + case '[': p++; t.value = TOK.TOKlbracket; return; + case ']': p++; t.value = TOK.TOKrbracket; return; + case '{': p++; t.value = TOK.TOKlcurly; return; + case '}': p++; t.value = TOK.TOKrcurly; return; + case '?': p++; t.value = TOK.TOKquestion; return; + case ',': p++; t.value = TOK.TOKcomma; return; + case ';': p++; t.value = TOK.TOKsemicolon; return; + case ':': p++; t.value = TOK.TOKcolon; return; + case '$': p++; t.value = TOK.TOKdollar; return; + case '@': p++; t.value = TOK.TOKat; return; + + case '*': + p++; + if (*p == '=') { + p++; + t.value = TOK.TOKmulass; + } else { + t.value = TOK.TOKmul; + } + return; + + case '%': + p++; + if (*p == '=') { + p++; + t.value = TOK.TOKmodass; + } else { + t.value = TOK.TOKmod; + } + return; + + case '^': + p++; + if (*p == '=') { + p++; + t.value = TOK.TOKxorass; + } else { + t.value = TOK.TOKxor; + } + return; + + case '#': + p++; + pragma_(); + continue; + + default: + { ubyte c = *p; + + if (c & 0x80) + { uint u = decodeUTF(); + + // Check for start of unicode identifier + if (isUniAlpha(u)) + goto case_ident; + + if (u == PS || u == LS) + { + loc.linnum++; + p++; + continue; + } + } + if (isprint(c)) + error("unsupported char '%c'", c); + else + error("unsupported char 0x%02x", c); + p++; + continue; + } + } + } + } + + Token* peek(Token* ct) + { + Token* t; + + if (ct.next) + t = ct.next; + else + { + t = new Token(); + scan(t); + t.next = null; + ct.next = t; + } + return t; + } + + Token* peekPastParen(Token* tk) + { + //printf("peekPastParen()\n"); + int parens = 1; + int curlynest = 0; + while (1) + { + tk = peek(tk); + //tk.print(); + switch (tk.value) + { + case TOK.TOKlparen: + parens++; + continue; + + case TOK.TOKrparen: + --parens; + if (parens) + continue; + tk = peek(tk); + break; + + case TOK.TOKlcurly: + curlynest++; + continue; + + case TOK.TOKrcurly: + if (--curlynest >= 0) + continue; + break; + + case TOK.TOKsemicolon: + if (curlynest) + continue; + break; + + case TOK.TOKeof: + break; + + default: + continue; + } + return tk; + } + } + + /******************************************* + * Parse escape sequence. + */ + uint escapeSequence() + { + uint c = *p; + + version (TEXTUAL_ASSEMBLY_OUT) { + return c; + } + int n; + int ndigits; + + switch (c) + { + case '\'': + case '"': + case '?': + case '\\': + Lconsume: + p++; + break; + + case 'a': c = 7; goto Lconsume; + case 'b': c = 8; goto Lconsume; + case 'f': c = 12; goto Lconsume; + case 'n': c = 10; goto Lconsume; + case 'r': c = 13; goto Lconsume; + case 't': c = 9; goto Lconsume; + case 'v': c = 11; goto Lconsume; + + case 'u': + ndigits = 4; + goto Lhex; + case 'U': + ndigits = 8; + goto Lhex; + case 'x': + ndigits = 2; + Lhex: + p++; + c = *p; + if (ishex(cast(ubyte)c)) + { + uint v; + + n = 0; + v = 0; + while (1) + { + if (isdigit(c)) + c -= '0'; + else if (islower(c)) + c -= 'a' - 10; + else + c -= 'A' - 10; + v = v * 16 + c; + c = *++p; + if (++n == ndigits) + break; + if (!ishex(cast(ubyte)c)) + { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); + break; + } + } + if (ndigits != 2 && !utf_isValidDchar(v)) + { error("invalid UTF character \\U%08x", v); + v = '?'; // recover with valid UTF character + } + c = v; + } + else + error("undefined escape hex sequence \\%c\n",c); + break; + + case '&': // named character entity + for (ubyte* idstart = ++p; true; p++) + { + switch (*p) + { + case ';': + c = HtmlNamedEntity(idstart, p - idstart); + if (c == ~0) + { + error("unnamed character entity &%s;", idstart[0..(p - idstart)]); + c = ' '; + } + p++; + break; + + default: + if (isalpha(*p) || + (p != idstart + 1 && isdigit(*p))) + continue; + error("unterminated named entity"); + break; + } + break; + } + break; + + case 0: + case 0x1A: // end of file + c = '\\'; + break; + + default: + if (isoctal(cast(ubyte)c)) + { + uint v; + + n = 0; + v = 0; + do + { + v = v * 8 + (c - '0'); + c = *++p; + } while (++n < 3 && isoctal(cast(ubyte)c)); + c = v; + if (c > 0xFF) + error("0%03o is larger than a byte", c); + } + else + error("undefined escape sequence \\%c\n",c); + break; + } + return c; + } + + TOK wysiwygStringConstant(Token* t, int tc) + { + assert(false); + } + + TOK hexStringConstant(Token* t) + { + assert(false); + } + +version (DMDV2) { + TOK delimitedStringConstant(Token* t) + { + assert(false); + } + + TOK tokenStringConstant(Token* t) + { + assert(false); + } +} + TOK escapeStringConstant(Token* t, int wide) + { + uint c; + Loc start = loc; + + p++; + stringbuffer.reset(); + while (true) + { + c = *p++; + switch (c) + { + version (TEXTUAL_ASSEMBLY_OUT) { + } else { + case '\\': + switch (*p) + { + case 'u': + case 'U': + case '&': + c = escapeSequence(); + stringbuffer.writeUTF8(c); + continue; + + default: + c = escapeSequence(); + break; + } + break; + } + case '\n': + loc.linnum++; + break; + + case '\r': + if (*p == '\n') + continue; // ignore + c = '\n'; // treat EndOfLine as \n character + loc.linnum++; + break; + + case '"': + t.len = stringbuffer.offset; + stringbuffer.writeByte(0); + char* tmp = cast(char*)malloc(stringbuffer.offset); + memcpy(tmp, stringbuffer.data, stringbuffer.offset); + t.ustring = tmp; + stringPostfix(t); + return TOK.TOKstring; + + case 0: + case 0x1A: + p--; + error("unterminated string constant starting at %s", start.toChars()); + t.ustring = "".ptr; + t.len = 0; + t.postfix = 0; + return TOK.TOKstring; + + default: + if (c & 0x80) + { + p--; + c = decodeUTF(); + if (c == LS || c == PS) + { c = '\n'; + loc.linnum++; + } + p++; + stringbuffer.writeUTF8(c); + continue; + } + break; + } + stringbuffer.writeByte(c); + } + + assert(false); + } + + TOK charConstant(Token* t, int wide) + { + uint c; + TOK tk = TOKcharv; + + //printf("Lexer.charConstant\n"); + p++; + c = *p++; + switch (c) + { + version (TEXTUAL_ASSEMBLY_OUT) { + } else { + case '\\': + switch (*p) + { + case 'u': + t.uns64value = escapeSequence(); + tk = TOKwcharv; + break; + + case 'U': + case '&': + t.uns64value = escapeSequence(); + tk = TOKdcharv; + break; + + default: + t.uns64value = escapeSequence(); + break; + } + break; + } + case '\n': + L1: + loc.linnum++; + case '\r': + case 0: + case 0x1A: + case '\'': + error("unterminated character constant"); + return tk; + + default: + if (c & 0x80) + { + p--; + c = decodeUTF(); + p++; + if (c == LS || c == PS) + goto L1; + if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) + tk = TOKwcharv; + else + tk = TOKdcharv; + } + t.uns64value = c; + break; + } + + if (*p != '\'') + { + error("unterminated character constant"); + return tk; + } + p++; + return tk; + } + + /*************************************** + * Get postfix of string literal. + */ + void stringPostfix(Token* t) + { + switch (*p) + { + case 'c': + case 'w': + case 'd': + t.postfix = *p; + p++; + break; + + default: + t.postfix = 0; + break; + } + } + + uint wchar_(uint u) + { + assert(false); + } + + /************************************** + * Read in a number. + * If it's an integer, store it in tok.TKutok.Vlong. + * integers can be decimal, octal or hex + * Handle the suffixes U, UL, LU, L, etc. + * If it's double, store it in tok.TKutok.Vdouble. + * Returns: + * TKnum + * TKdouble,... + */ + + TOK number(Token* t) + { + // We use a state machine to collect numbers + enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, + STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, + STATE_hexh, STATE_error }; + STATE state; + + enum FLAGS + { + FLAGS_undefined = 0, + FLAGS_decimal = 1, // decimal + FLAGS_unsigned = 2, // u or U suffix + FLAGS_long = 4, // l or L suffix + }; + + FLAGS flags = FLAGS.FLAGS_decimal; + + int i; + int base; + uint c; + ubyte *start; + TOK result; + + //printf("Lexer.number()\n"); + state = STATE.STATE_initial; + base = 0; + stringbuffer.reset(); + start = p; + while (1) + { + c = *p; + switch (state) + { + case STATE.STATE_initial: // opening state + if (c == '0') + state = STATE.STATE_0; + else + state = STATE.STATE_decimal; + break; + + case STATE.STATE_0: + flags = (flags & ~FLAGS.FLAGS_decimal); + switch (c) + { +version (ZEROH) { + case 'H': // 0h + case 'h': + goto hexh; +} + case 'X': + case 'x': + state = STATE.STATE_hex0; + break; + + case '.': + if (p[1] == '.') // .. is a separate token + goto done; + case 'i': + case 'f': + case 'F': + goto real_; +version (ZEROH) { + case 'E': + case 'e': + goto case_hex; +} + case 'B': + case 'b': + state = STATE.STATE_binary0; + break; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + state = STATE.STATE_octal; + break; + +version (ZEROH) { + case '8': case '9': case 'A': + case 'C': case 'D': case 'F': + case 'a': case 'c': case 'd': case 'f': + case_hex: + state = STATE.STATE_hexh; + break; +} + case '_': + state = STATE.STATE_octal; + p++; + continue; + + case 'L': + if (p[1] == 'i') + goto real_; + goto done; + + default: + goto done; + } + break; + + case STATE.STATE_decimal: // reading decimal number + if (!isdigit(c)) + { +version (ZEROH) { + if (ishex(c) + || c == 'H' || c == 'h' + ) + goto hexh; +} + if (c == '_') // ignore embedded _ + { p++; + continue; + } + if (c == '.' && p[1] != '.') + goto real_; + else if (c == 'i' || c == 'f' || c == 'F' || + c == 'e' || c == 'E') + { + real_: // It's a real number. Back up and rescan as a real + p = start; + return inreal(t); + } + else if (c == 'L' && p[1] == 'i') + goto real_; + goto done; + } + break; + + case STATE.STATE_hex0: // reading hex number + case STATE.STATE_hex: + if (! ishex(cast(ubyte)c)) + { + if (c == '_') // ignore embedded _ + { p++; + continue; + } + if (c == '.' && p[1] != '.') + goto real_; + if (c == 'P' || c == 'p' || c == 'i') + goto real_; + if (state == STATE.STATE_hex0) + error("Hex digit expected, not '%c'", c); + goto done; + } + state = STATE.STATE_hex; + break; + +version (ZEROH) { + hexh: + state = STATE.STATE_hexh; + case STATE.STATE_hexh: // parse numbers like 0FFh + if (!ishex(c)) + { + if (c == 'H' || c == 'h') + { + p++; + base = 16; + goto done; + } + else + { + // Check for something like 1E3 or 0E24 + if (memchr(cast(char*)stringbuffer.data, 'E', stringbuffer.offset) || + memchr(cast(char*)stringbuffer.data, 'e', stringbuffer.offset)) + goto real_; + error("Hex digit expected, not '%c'", c); + goto done; + } + } + break; +} + + case STATE.STATE_octal: // reading octal number + case STATE.STATE_octale: // reading octal number with non-octal digits + if (!isoctal(cast(ubyte)c)) + { +version (ZEROH) { + if (ishex(c) + || c == 'H' || c == 'h' + ) + goto hexh; +} + if (c == '_') // ignore embedded _ + { p++; + continue; + } + if (c == '.' && p[1] != '.') + goto real_; + if (c == 'i') + goto real_; + if (isdigit(c)) + { + state = STATE.STATE_octale; + } + else + goto done; + } + break; + + case STATE.STATE_binary0: // starting binary number + case STATE.STATE_binary: // reading binary number + if (c != '0' && c != '1') + { +version (ZEROH) { + if (ishex(c) + || c == 'H' || c == 'h' + ) + goto hexh; +} + if (c == '_') // ignore embedded _ + { p++; + continue; + } + if (state == STATE.STATE_binary0) + { error("binary digit expected"); + state = STATE.STATE_error; + break; + } + else + goto done; + } + state = STATE.STATE_binary; + break; + + case STATE.STATE_error: // for error recovery + if (!isdigit(c)) // scan until non-digit + goto done; + break; + + default: + assert(0); + } + stringbuffer.writeByte(c); + p++; + } + done: + stringbuffer.writeByte(0); // terminate string + if (state == STATE.STATE_octale) + error("Octal digit expected"); + + ulong n; // unsigned >=64 bit integer type + + if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0)) + n = stringbuffer.data[0] - '0'; + else + { + // Convert string to integer +version (__DMC__) { + errno = 0; + n = strtoull(cast(char*)stringbuffer.data,null,base); + if (errno == ERANGE) + error("integer overflow"); +} else { + // Not everybody implements strtoull() + char* p = cast(char*)stringbuffer.data; + int r = 10, d; + + if (*p == '0') + { + if (p[1] == 'x' || p[1] == 'X') + p += 2, r = 16; + else if (p[1] == 'b' || p[1] == 'B') + p += 2, r = 2; + else if (isdigit(p[1])) + p += 1, r = 8; + } + + n = 0; + while (1) + { + if (*p >= '0' && *p <= '9') + d = *p - '0'; + else if (*p >= 'a' && *p <= 'z') + d = *p - 'a' + 10; + else if (*p >= 'A' && *p <= 'Z') + d = *p - 'A' + 10; + else + break; + if (d >= r) + break; + ulong n2 = n * r; + //printf("n2 / r = %llx, n = %llx\n", n2/r, n); + if (n2 / r != n || n2 + d < n) + { + error ("integer overflow"); + break; + } + + n = n2 + d; + p++; + } +} + if (n.sizeof > 8 && + n > 0xFFFFFFFFFFFFFFFF) // if n needs more than 64 bits + error("integer overflow"); + } + + // Parse trailing 'u', 'U', 'l' or 'L' in any combination + while (1) + { FLAGS f; + + switch (*p) + { case 'U': + case 'u': + f = FLAGS.FLAGS_unsigned; + goto L1; + + case 'l': + if (1 || !global.params.useDeprecated) + error("'l' suffix is deprecated, use 'L' instead"); + case 'L': + f = FLAGS.FLAGS_long; + L1: + p++; + if (flags & f) + error("unrecognized token"); + flags = (flags | f); + continue; + default: + break; + } + break; + } + + switch (flags) + { + case FLAGS.FLAGS_undefined: + /* Octal or Hexadecimal constant. + * First that fits: int, uint, long, ulong + */ + if (n & 0x8000000000000000) + result = TOK.TOKuns64v; + else if (n & 0xFFFFFFFF00000000) + result = TOK.TOKint64v; + else if (n & 0x80000000) + result = TOK.TOKuns32v; + else + result = TOK.TOKint32v; + break; + + case FLAGS.FLAGS_decimal: + /* First that fits: int, long, long long + */ + if (n & 0x8000000000000000) + { error("signed integer overflow"); + result = TOK.TOKuns64v; + } + else if (n & 0xFFFFFFFF80000000) + result = TOK.TOKint64v; + else + result = TOK.TOKint32v; + break; + + case FLAGS.FLAGS_unsigned: + case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned: + /* First that fits: uint, ulong + */ + if (n & 0xFFFFFFFF00000000) + result = TOK.TOKuns64v; + else + result = TOK.TOKuns32v; + break; + + case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long: + if (n & 0x8000000000000000) + { error("signed integer overflow"); + result = TOK.TOKuns64v; + } + else + result = TOK.TOKint64v; + break; + + case FLAGS.FLAGS_long: + if (n & 0x8000000000000000) + result = TOK.TOKuns64v; + else + result = TOK.TOKint64v; + break; + + case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: + case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: + result = TOK.TOKuns64v; + break; + + default: +debug { + printf("%x\n",flags); +} + assert(0); + } + t.uns64value = n; + return result; + } + + /************************************** + * Read in characters, converting them to real. + * Bugs: + * Exponent overflow not detected. + * Too much requested precision is not detected. + */ + TOK inreal(Token* t) + in + { + assert(*p == '.' || isdigit(*p)); + } + out (result) + { + switch (result) + { + case TOKfloat32v: + case TOKfloat64v: + case TOKfloat80v: + case TOKimaginary32v: + case TOKimaginary64v: + case TOKimaginary80v: + break; + + default: + assert(0); + } + } + body + { + int dblstate; + uint c; + char hex; // is this a hexadecimal-floating-constant? + TOK result; + + //printf("Lexer.inreal()\n"); + stringbuffer.reset(); + dblstate = 0; + hex = 0; + Lnext: + while (true) + { + // Get next char from input + c = *p++; + //printf("dblstate = %d, c = '%c'\n", dblstate, c); + while (true) + { + switch (dblstate) + { + case 0: // opening state + if (c == '0') + dblstate = 9; + else if (c == '.') + dblstate = 3; + else + dblstate = 1; + break; + + case 9: + dblstate = 1; + if (c == 'X' || c == 'x') + { + hex++; + break; + } + case 1: // digits to left of . + case 3: // digits to right of . + case 7: // continuing exponent digits + if (!isdigit(c) && !(hex && isxdigit(c))) + { + if (c == '_') + goto Lnext; // ignore embedded '_' + dblstate++; + continue; + } + break; + + case 2: // no more digits to left of . + if (c == '.') + { + dblstate++; + break; + } + case 4: // no more digits to right of . + if ((c == 'E' || c == 'e') || + hex && (c == 'P' || c == 'p')) + { + dblstate = 5; + hex = 0; // exponent is always decimal + break; + } + if (hex) + error("binary-exponent-part required"); + goto done; + + case 5: // looking immediately to right of E + dblstate++; + if (c == '-' || c == '+') + break; + case 6: // 1st exponent digit expected + if (!isdigit(c)) + error("exponent expected"); + dblstate++; + break; + + case 8: // past end of exponent digits + goto done; + } + break; + } + stringbuffer.writeByte(c); + } + done: + p--; + + stringbuffer.writeByte(0); + + version (_WIN32) { /// && __DMC__ + char* save = __locale_decpoint; + __locale_decpoint = cast(char*)".".ptr; + } + t.float80value = strtold(cast(char*)stringbuffer.data, null); + + errno = 0; + switch (*p) + { + case 'F': + case 'f': + strtof(cast(char*)stringbuffer.data, null); + result = TOKfloat32v; + p++; + break; + + default: + strtod(cast(char*)stringbuffer.data, null); + result = TOKfloat64v; + break; + + case 'l': + if (!global.params.useDeprecated) + error("'l' suffix is deprecated, use 'L' instead"); + case 'L': + result = TOKfloat80v; + p++; + break; + } + if (*p == 'i' || *p == 'I') + { + if (!global.params.useDeprecated && *p == 'I') + error("'I' suffix is deprecated, use 'i' instead"); + p++; + switch (result) + { + case TOKfloat32v: + result = TOKimaginary32v; + break; + case TOKfloat64v: + result = TOKimaginary64v; + break; + case TOKfloat80v: + result = TOKimaginary80v; + break; + } + } + + version (_WIN32) { ///&& __DMC__ + __locale_decpoint = save; + } + if (errno == ERANGE) + error("number is not representable"); + + return result; + } + + void error(T...)(string format, T t) + { + error(this.loc, format, t); + } + + void error(T...)(Loc loc, string format, T t) + { + if (mod && !global.gag) + { + string p = loc.toChars(); + if (p.length != 0) + writef("%s: ", p); + + writefln(format, t); + + if (global.errors >= 20) // moderate blizzard of cascading messages + fatal(); + } + + global.errors++; + } + + void pragma_() + { + assert(false); + } + + uint decodeUTF() + { + assert(false); + } + + void getDocComment(Token* t, uint lineComment) + { + assert(false); + } + + static bool isValidIdentifier(string p) + { + if (p.length == 0) { + return false; + } + + if (p[0] >= '0' && p[0] <= '9') { // beware of isdigit() on signed chars + return false; + } + + size_t idx = 0; + while (idx < p.length) + { + dchar dc; + + if (utf_decodeChar(p, &idx, &dc) !is null) { + return false; + } + + if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) { + return false; + } + } + + return true; + } + + /// TODO: reimplement based on strings + static ubyte* combineComments(ubyte* c1, ubyte* c2) + { + //printf("Lexer.combineComments('%s', '%s')\n", c1, c2); + + ubyte* c = c2; + + if (c1) + { + c = c1; + if (c2) + { + size_t len1 = strlen(cast(char*)c1); + size_t len2 = strlen(cast(char*)c2); + + c = cast(ubyte*)malloc(len1 + 1 + len2 + 1); + memcpy(c, c1, len1); + if (len1 && c1[len1 - 1] != '\n') + { + c[len1] = '\n'; + len1++; + } + memcpy(c + len1, c2, len2); + c[len1 + len2] = 0; + } + } + + return c; + } +} \ No newline at end of file