Mercurial > projects > ddmd
view dmd/Lexer.d @ 72:2e2a5c3f943a
reduced warnings by adding override to the methods
think this also normalizes different line endings used all over the place
author | Trass3r |
---|---|
date | Sat, 28 Aug 2010 16:19:48 +0200 |
parents | b7d29f613539 |
children | ef02e2e203c2 |
line wrap: on
line source
module dmd.Lexer; import dmd.StringTable; import dmd.OutBuffer; import dmd.Token; import dmd.Loc; import dmd.Module; import dmd.Identifier; import dmd.TOK; import dmd.Keyword; import dmd.StringValue; import dmd.Global; import dmd.Util; import dmd.Id; import dmd.Dchar; import dmd.Utf; import std.stdio : writeln; import core.memory; import core.stdc.ctype; import core.stdc.stdlib; import core.stdc.string; import core.stdc.stdio; import core.stdc.time; import core.stdc.errno; enum LS = 0x2028; // UTF line separator enum PS = 0x2029; // UTF paragraph separator extern (C) extern { __gshared char* __locale_decpoint; } int isUniAlpha(uint u) { assert(false); } class Lexer { static StringTable stringtable; static OutBuffer stringbuffer; static Token* freelist; Loc loc; // for error messages ubyte* base; // pointer to start of buffer ubyte* end; // past end of buffer ubyte* p; // current character Token token; Module mod; int doDocComment; // collect doc comment information int anyToken; // !=0 means seen at least one token int commentToken; // !=0 means comments are TOKcomment's static this() { stringtable = new StringTable(); stringbuffer = new OutBuffer(); } static ~this() { //delete stringtable; } this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken) { loc = Loc(mod, 1); memset(&token,0,token.sizeof); this.base = base; this.end = base + endoffset; p = base + begoffset; this.mod = mod; this.doDocComment = doDocComment; this.anyToken = 0; this.commentToken = commentToken; //initKeywords(); /* If first line starts with '#!', ignore the line */ if (p[0] == '#' && p[1] =='!') { p += 2; while (1) { ubyte c = *p; switch (c) { case '\n': p++; break; case '\r': p++; if (*p == '\n') p++; break; case 0: case 0x1A: break; default: if (c & 0x80) { uint u = decodeUTF(); if (u == PS || u == LS) break; } p++; continue; } break; } loc.linnum = 2; } } version (DMDV2) { static Keyword[] keywords = [ // { "", TOK }, { "this", TOK.TOKthis }, { "super", TOK.TOKsuper }, { "assert", TOK.TOKassert }, { "null", TOK.TOKnull }, { "true", TOK.TOKtrue }, { "false", TOK.TOKfalse }, { "cast", TOK.TOKcast }, { "new", TOK.TOKnew }, { "delete", TOK.TOKdelete }, { "throw", TOK.TOKthrow }, { "module", TOK.TOKmodule }, { "pragma", TOK.TOKpragma }, { "typeof", TOK.TOKtypeof }, { "typeid", TOK.TOKtypeid }, { "template", TOK.TOKtemplate }, { "void", TOK.TOKvoid }, { "byte", TOK.TOKint8 }, { "ubyte", TOK.TOKuns8 }, { "short", TOK.TOKint16 }, { "ushort", TOK.TOKuns16 }, { "int", TOK.TOKint32 }, { "uint", TOK.TOKuns32 }, { "long", TOK.TOKint64 }, { "ulong", TOK.TOKuns64 }, { "cent", TOK.TOKcent, }, { "ucent", TOK.TOKucent, }, { "float", TOK.TOKfloat32 }, { "double", TOK.TOKfloat64 }, { "real", TOK.TOKfloat80 }, { "bool", TOK.TOKbool }, { "char", TOK.TOKchar }, { "wchar", TOK.TOKwchar }, { "dchar", TOK.TOKdchar }, { "ifloat", TOK.TOKimaginary32 }, { "idouble", TOK.TOKimaginary64 }, { "ireal", TOK.TOKimaginary80 }, { "cfloat", TOK.TOKcomplex32 }, { "cdouble", TOK.TOKcomplex64 }, { "creal", TOK.TOKcomplex80 }, { "delegate", TOK.TOKdelegate }, { "function", TOK.TOKfunction }, { "is", TOK.TOKis }, { "if", TOK.TOKif }, { "else", TOK.TOKelse }, { "while", TOK.TOKwhile }, { "for", TOK.TOKfor }, { "do", TOK.TOKdo }, { "switch", TOK.TOKswitch }, { "case", TOK.TOKcase }, { "default", TOK.TOKdefault }, { "break", TOK.TOKbreak }, { "continue", TOK.TOKcontinue }, { "synchronized", TOK.TOKsynchronized }, { "return", TOK.TOKreturn }, { "goto", TOK.TOKgoto }, { "try", TOK.TOKtry }, { "catch", TOK.TOKcatch }, { "finally", TOK.TOKfinally }, { "with", TOK.TOKwith }, { "asm", TOK.TOKasm }, { "foreach", TOK.TOKforeach }, { "foreach_reverse", TOK.TOKforeach_reverse }, { "scope", TOK.TOKscope }, { "struct", TOK.TOKstruct }, { "class", TOK.TOKclass }, { "interface", TOK.TOKinterface }, { "union", TOK.TOKunion }, { "enum", TOK.TOKenum }, { "import", TOK.TOKimport }, { "mixin", TOK.TOKmixin }, { "static", TOK.TOKstatic }, { "final", TOK.TOKfinal }, { "const", TOK.TOKconst }, { "typedef", TOK.TOKtypedef }, { "alias", TOK.TOKalias }, { "override", TOK.TOKoverride }, { "abstract", TOK.TOKabstract }, { "volatile", TOK.TOKvolatile }, { "debug", TOK.TOKdebug }, { "deprecated", TOK.TOKdeprecated }, { "in", TOK.TOKin }, { "out", TOK.TOKout }, { "inout", TOK.TOKinout }, { "lazy", TOK.TOKlazy }, { "auto", TOK.TOKauto }, { "align", TOK.TOKalign }, { "extern", TOK.TOKextern }, { "private", TOK.TOKprivate }, { "package", TOK.TOKpackage }, { "protected", TOK.TOKprotected }, { "public", TOK.TOKpublic }, { "export", TOK.TOKexport }, { "body", TOK.TOKbody }, { "invariant", TOK.TOKinvariant }, { "unittest", TOK.TOKunittest }, { "version", TOK.TOKversion }, //{ "manifest", TOK.TOKmanifest }, // Added after 1.0 { "ref", TOK.TOKref }, { "macro", TOK.TOKmacro }, { "pure", TOK.TOKpure }, { "nothrow", TOK.TOKnothrow }, { "__thread", TOK.TOKtls }, { "__gshared", TOK.TOKgshared }, { "__traits", TOK.TOKtraits }, { "__overloadset", TOK.TOKoverloadset }, { "__FILE__", TOK.TOKfile }, { "__LINE__", TOK.TOKline }, { "shared", TOK.TOKshared }, { "immutable", TOK.TOKimmutable }, ]; } else { static Keyword[] keywords = [ // { "", TOK }, { "this", TOK.TOKthis }, { "super", TOK.TOKsuper }, { "assert", TOK.TOKassert }, { "null", TOK.TOKnull }, { "true", TOK.TOKtrue }, { "false", TOK.TOKfalse }, { "cast", TOK.TOKcast }, { "new", TOK.TOKnew }, { "delete", TOK.TOKdelete }, { "throw", TOK.TOKthrow }, { "module", TOK.TOKmodule }, { "pragma", TOK.TOKpragma }, { "typeof", TOK.TOKtypeof }, { "typeid", TOK.TOKtypeid }, { "template", TOK.TOKtemplate }, { "void", TOK.TOKvoid }, { "byte", TOK.TOKint8 }, { "ubyte", TOK.TOKuns8 }, { "short", TOK.TOKint16 }, { "ushort", TOK.TOKuns16 }, { "int", TOK.TOKint32 }, { "uint", TOK.TOKuns32 }, { "long", TOK.TOKint64 }, { "ulong", TOK.TOKuns64 }, { "cent", TOK.TOKcent, }, { "ucent", TOK.TOKucent, }, { "float", TOK.TOKfloat32 }, { "double", TOK.TOKfloat64 }, { "real", TOK.TOKfloat80 }, { "bool", TOK.TOKbool }, { "char", TOK.TOKchar }, { "wchar", TOK.TOKwchar }, { "dchar", TOK.TOKdchar }, { "ifloat", TOK.TOKimaginary32 }, { "idouble", TOK.TOKimaginary64 }, { "ireal", TOK.TOKimaginary80 }, { "cfloat", TOK.TOKcomplex32 }, { "cdouble", TOK.TOKcomplex64 }, { "creal", TOK.TOKcomplex80 }, { "delegate", TOK.TOKdelegate }, { "function", TOK.TOKfunction }, { "is", TOK.TOKis }, { "if", TOK.TOKif }, { "else", TOK.TOKelse }, { "while", TOK.TOKwhile }, { "for", TOK.TOKfor }, { "do", TOK.TOKdo }, { "switch", TOK.TOKswitch }, { "case", TOK.TOKcase }, { "default", TOK.TOKdefault }, { "break", TOK.TOKbreak }, { "continue", TOK.TOKcontinue }, { "synchronized", TOK.TOKsynchronized }, { "return", TOK.TOKreturn }, { "goto", TOK.TOKgoto }, { "try", TOK.TOKtry }, { "catch", TOK.TOKcatch }, { "finally", TOK.TOKfinally }, { "with", TOK.TOKwith }, { "asm", TOK.TOKasm }, { "foreach", TOK.TOKforeach }, { "foreach_reverse", TOK.TOKforeach_reverse }, { "scope", TOK.TOKscope }, { "struct", TOK.TOKstruct }, { "class", TOK.TOKclass }, { "interface", TOK.TOKinterface }, { "union", TOK.TOKunion }, { "enum", TOK.TOKenum }, { "import", TOK.TOKimport }, { "mixin", TOK.TOKmixin }, { "static", TOK.TOKstatic }, { "final", TOK.TOKfinal }, { "const", TOK.TOKconst }, { "typedef", TOK.TOKtypedef }, { "alias", TOK.TOKalias }, { "override", TOK.TOKoverride }, { "abstract", TOK.TOKabstract }, { "volatile", TOK.TOKvolatile }, { "debug", TOK.TOKdebug }, { "deprecated", TOK.TOKdeprecated }, { "in", TOK.TOKin }, { "out", TOK.TOKout }, { "inout", TOK.TOKinout }, { "lazy", TOK.TOKlazy }, { "auto", TOK.TOKauto }, { "align", TOK.TOKalign }, { "extern", TOK.TOKextern }, { "private", TOK.TOKprivate }, { "package", TOK.TOKpackage }, { "protected", TOK.TOKprotected }, { "public", TOK.TOKpublic }, { "export", TOK.TOKexport }, { "body", TOK.TOKbody }, { "invariant", TOK.TOKinvariant }, { "unittest", TOK.TOKunittest }, { "version", TOK.TOKversion }, //{ "manifest", TOK.TOKmanifest }, // Added after 1.0 { "ref", TOK.TOKref }, { "macro", TOK.TOKmacro }, ]; } static ubyte cmtable[256]; enum CMoctal = 0x1; enum CMhex = 0x2; enum CMidchar = 0x4; ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; } ubyte ishex (ubyte c) { return cmtable[c] & CMhex; } ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; } static void cmtable_init() { for (uint c = 0; c < cmtable.length; c++) { if ('0' <= c && c <= '7') cmtable[c] |= CMoctal; if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) cmtable[c] |= CMhex; if (isalnum(c) || c == '_') cmtable[c] |= CMidchar; } } static void initKeywords() { uint nkeywords = keywords.length; if (global.params.Dversion == 1) nkeywords -= 2; cmtable_init(); for (uint u = 0; u < nkeywords; u++) { //printf("keyword[%d] = '%.*s'\n",u, keywords[u].name); string s = keywords[u].name; TOK v = keywords[u].value; StringValue* sv = stringtable.insert(s); sv.ptrvalue = cast(void*) new Identifier(sv.lstring.string_, v); //printf("tochars[%d] = '%s'\n",v, s); Token.tochars[v] = s; } Token.tochars[TOK.TOKeof] = "EOF"; Token.tochars[TOK.TOKlcurly] = "{"; Token.tochars[TOK.TOKrcurly] = "}"; Token.tochars[TOK.TOKlparen] = "("; Token.tochars[TOK.TOKrparen] = ")"; Token.tochars[TOK.TOKlbracket] = "["; Token.tochars[TOK.TOKrbracket] = "]"; Token.tochars[TOK.TOKsemicolon] = ";"; Token.tochars[TOK.TOKcolon] = ":"; Token.tochars[TOK.TOKcomma] = ","; Token.tochars[TOK.TOKdot] = "."; Token.tochars[TOK.TOKxor] = "^"; Token.tochars[TOK.TOKxorass] = "^="; Token.tochars[TOK.TOKassign] = "="; Token.tochars[TOK.TOKconstruct] = "="; version (DMDV2) { Token.tochars[TOK.TOKblit] = "="; } Token.tochars[TOK.TOKlt] = "<"; Token.tochars[TOK.TOKgt] = ">"; Token.tochars[TOK.TOKle] = "<="; Token.tochars[TOK.TOKge] = ">="; Token.tochars[TOK.TOKequal] = "=="; Token.tochars[TOK.TOKnotequal] = "!="; Token.tochars[TOK.TOKnotidentity] = "!is"; Token.tochars[TOK.TOKtobool] = "!!"; Token.tochars[TOK.TOKunord] = "!<>="; Token.tochars[TOK.TOKue] = "!<>"; Token.tochars[TOK.TOKlg] = "<>"; Token.tochars[TOK.TOKleg] = "<>="; Token.tochars[TOK.TOKule] = "!>"; Token.tochars[TOK.TOKul] = "!>="; Token.tochars[TOK.TOKuge] = "!<"; Token.tochars[TOK.TOKug] = "!<="; Token.tochars[TOK.TOKnot] = "!"; Token.tochars[TOK.TOKtobool] = "!!"; Token.tochars[TOK.TOKshl] = "<<"; Token.tochars[TOK.TOKshr] = ">>"; Token.tochars[TOK.TOKushr] = ">>>"; Token.tochars[TOK.TOKadd] = "+"; Token.tochars[TOK.TOKmin] = "-"; Token.tochars[TOK.TOKmul] = "*"; Token.tochars[TOK.TOKdiv] = "/"; Token.tochars[TOK.TOKmod] = "%"; Token.tochars[TOK.TOKslice] = ".."; Token.tochars[TOK.TOKdotdotdot] = "..."; Token.tochars[TOK.TOKand] = "&"; Token.tochars[TOK.TOKandand] = "&&"; Token.tochars[TOK.TOKor] = "|"; Token.tochars[TOK.TOKoror] = "||"; Token.tochars[TOK.TOKarray] = "[]"; Token.tochars[TOK.TOKindex] = "[i]"; Token.tochars[TOK.TOKaddress] = "&"; Token.tochars[TOK.TOKstar] = "*"; Token.tochars[TOK.TOKtilde] = "~"; Token.tochars[TOK.TOKdollar] = "$"; Token.tochars[TOK.TOKcast] = "cast"; Token.tochars[TOK.TOKplusplus] = "++"; Token.tochars[TOK.TOKminusminus] = "--"; Token.tochars[TOK.TOKtype] = "type"; Token.tochars[TOK.TOKquestion] = "?"; Token.tochars[TOK.TOKneg] = "-"; Token.tochars[TOK.TOKuadd] = "+"; Token.tochars[TOK.TOKvar] = "var"; Token.tochars[TOK.TOKaddass] = "+="; Token.tochars[TOK.TOKminass] = "-="; Token.tochars[TOK.TOKmulass] = "*="; Token.tochars[TOK.TOKdivass] = "/="; Token.tochars[TOK.TOKmodass] = "%="; Token.tochars[TOK.TOKshlass] = "<<="; Token.tochars[TOK.TOKshrass] = ">>="; Token.tochars[TOK.TOKushrass] = ">>>="; Token.tochars[TOK.TOKandass] = "&="; Token.tochars[TOK.TOKorass] = "|="; Token.tochars[TOK.TOKcatass] = "~="; Token.tochars[TOK.TOKcat] = "~"; Token.tochars[TOK.TOKcall] = "call"; Token.tochars[TOK.TOKidentity] = "is"; Token.tochars[TOK.TOKnotidentity] = "!is"; Token.tochars[TOK.TOKorass] = "|="; Token.tochars[TOK.TOKidentifier] = "identifier"; Token.tochars[TOK.TOKat] = "@"; // For debugging Token.tochars[TOK.TOKdotexp] = "dotexp"; Token.tochars[TOK.TOKdotti] = "dotti"; Token.tochars[TOK.TOKdotvar] = "dotvar"; Token.tochars[TOK.TOKdottype] = "dottype"; Token.tochars[TOK.TOKsymoff] = "symoff"; Token.tochars[TOK.TOKarraylength] = "arraylength"; Token.tochars[TOK.TOKarrayliteral] = "arrayliteral"; Token.tochars[TOK.TOKassocarrayliteral] = "assocarrayliteral"; Token.tochars[TOK.TOKstructliteral] = "structliteral"; Token.tochars[TOK.TOKstring] = "string"; Token.tochars[TOK.TOKdsymbol] = "symbol"; Token.tochars[TOK.TOKtuple] = "tuple"; Token.tochars[TOK.TOKdeclaration] = "declaration"; Token.tochars[TOK.TOKdottd] = "dottd"; Token.tochars[TOK.TOKon_scope_exit] = "scope(exit)"; Token.tochars[TOK.TOKon_scope_success] = "scope(success)"; Token.tochars[TOK.TOKon_scope_failure] = "scope(failure)"; } static Identifier idPool(string s) { StringValue* sv = stringtable.update(s); Identifier id = cast(Identifier) sv.ptrvalue; if (id is null) { id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); sv.ptrvalue = cast(void*)id; } return id; } static Identifier uniqueId(string s) { static int num; return uniqueId(s, ++num); } /********************************************* * Create a unique identifier using the prefix s. */ static Identifier uniqueId(string s, int num) { char buffer[32]; size_t slen = s.length; assert(slen + num.sizeof * 3 + 1 <= buffer.sizeof); int len = sprintf(buffer.ptr, "%.*s%d", s, num); return idPool(buffer[0..len].idup); } TOK nextToken() { Token *t; if (token.next) { t = token.next; memcpy(&token, t, Token.sizeof); t.next = freelist; freelist = t; } else { scan(&token); } //token.print(); return token.value; } /*********************** * Look ahead at next token's value. */ TOK peekNext() { return peek(&token).value; } TOK peekNext2() { assert(false); } void scan(Token* t) { uint lastLine = loc.linnum; uint linnum; t.blockComment = null; t.lineComment = null; while (1) { t.ptr = p; //printf("p = %p, *p = '%c'\n",p,*p); switch (*p) { case 0: case 0x1A: t.value = TOK.TOKeof; // end of file return; case ' ': case '\t': case '\v': case '\f': p++; continue; // skip white space case '\r': p++; if (*p != '\n') // if CR stands by itself loc.linnum++; continue; // skip white space case '\n': p++; loc.linnum++; continue; // skip white space case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': t.value = number(t); return; version (CSTRINGS) { case '\'': t.value = charConstant(t, 0); return; case '"': t.value = stringConstant(t,0); return; case 'l': case 'L': if (p[1] == '\'') { p++; t.value = charConstant(t, 1); return; } else if (p[1] == '"') { p++; t.value = stringConstant(t, 1); return; } } else { case '\'': t.value = charConstant(t,0); return; case 'r': if (p[1] != '"') goto case_ident; p++; case '`': t.value = wysiwygStringConstant(t, *p); return; case 'x': if (p[1] != '"') goto case_ident; p++; t.value = hexStringConstant(t); return; version (DMDV2) { case 'q': if (p[1] == '"') { p++; t.value = delimitedStringConstant(t); return; } else if (p[1] == '{') { p++; t.value = tokenStringConstant(t); return; } else goto case_ident; } case '"': t.value = escapeStringConstant(t,0); return; version (TEXTUAL_ASSEMBLY_OUT) { } else { case '\\': // escaped string literal { uint c; ubyte* pstart = p; stringbuffer.reset(); do { p++; switch (*p) { case 'u': case 'U': case '&': c = escapeSequence(); stringbuffer.writeUTF8(c); break; default: c = escapeSequence(); stringbuffer.writeByte(c); break; } } while (*p == '\\'); t.len = stringbuffer.offset; stringbuffer.writeByte(0); char* cc = cast(char*)GC.malloc(stringbuffer.offset); memcpy(cc, stringbuffer.data, stringbuffer.offset); t.ustring = cc; t.postfix = 0; t.value = TOK.TOKstring; if (!global.params.useDeprecated) error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", p - pstart, pstart, p - pstart, pstart); return; } } case 'l': case 'L': } case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'm': case 'n': case 'o': version (DMDV2) { case 'p': /*case 'q': case 'r':*/ case 's': case 't': } else { case 'p': case 'q': /*case 'r':*/ case 's': case 't': } case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': case_ident: { ubyte c; StringValue *sv; Identifier id; do { c = *++p; } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); sv = stringtable.update((cast(immutable(char)*)t.ptr)[0.. p - t.ptr]); /// id = cast(Identifier) sv.ptrvalue; if (id is null) { id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); sv.ptrvalue = cast(void*)id; } t.ident = id; t.value = cast(TOK) id.value; anyToken = 1; if (*t.ptr == '_') // if special identifier token { static char date[11+1]; static char time[8+1]; static char timestamp[24+1]; if (!date[0]) // lazy evaluation { time_t tm; char *p; .time(&tm); p = ctime(&tm); assert(p); sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20); sprintf(time.ptr, "%.8s", p + 11); sprintf(timestamp.ptr, "%.24s", p); } ///version (DMDV1) { /// if (mod && id == Id.FILE) /// { /// t.ustring = cast(ubyte*)(loc.filename ? loc.filename : mod.ident.toChars()); /// goto Lstr; /// } /// else if (mod && id == Id.LINE) /// { /// t.value = TOK.TOKint64v; /// t.uns64value = loc.linnum; /// } /// else ///} if (id == Id.DATE) { t.ustring = date.ptr; goto Lstr; } else if (id == Id.TIME) { t.ustring = time.ptr; goto Lstr; } else if (id == Id.VENDOR) { t.ustring = "Digital Mars D".ptr; goto Lstr; } else if (id == Id.TIMESTAMP) { t.ustring = timestamp.ptr; Lstr: t.value = TOK.TOKstring; Llen: t.postfix = 0; t.len = strlen(cast(char*)t.ustring); } else if (id == Id.VERSIONX) { uint major = 0; uint minor = 0; foreach (char cc; global.version_[1..$]) { if (isdigit(cc)) minor = minor * 10 + cc - '0'; else if (cc == '.') { major = minor; minor = 0; } else break; } t.value = TOK.TOKint64v; t.uns64value = major * 1000 + minor; } ///version (DMDV2) { else if (id == Id.EOFX) { t.value = TOK.TOKeof; // Advance scanner to end of file while (!(*p == 0 || *p == 0x1A)) p++; } ///} } //printf("t.value = %d\n",t.value); return; } case '/': p++; switch (*p) { case '=': p++; t.value = TOK.TOKdivass; return; case '*': p++; linnum = loc.linnum; while (1) { while (1) { ubyte c = *p; switch (c) { case '/': break; case '\n': loc.linnum++; p++; continue; case '\r': p++; if (*p != '\n') loc.linnum++; continue; case 0: case 0x1A: error("unterminated /* */ comment"); p = end; t.value = TOK.TOKeof; return; default: if (c & 0x80) { uint u = decodeUTF(); if (u == PS || u == LS) loc.linnum++; } p++; continue; } break; } p++; if (p[-2] == '*' && p - 3 != t.ptr) break; } if (commentToken) { t.value = TOK.TOKcomment; return; } else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) { // if /** but not /**/ getDocComment(t, lastLine == linnum); } continue; case '/': // do // style comments linnum = loc.linnum; while (1) { ubyte c = *++p; switch (c) { case '\n': break; case '\r': if (p[1] == '\n') p++; break; case 0: case 0x1A: if (commentToken) { p = end; t.value = TOK.TOKcomment; return; } if (doDocComment && t.ptr[2] == '/') getDocComment(t, lastLine == linnum); p = end; t.value = TOK.TOKeof; return; default: if (c & 0x80) { uint u = decodeUTF(); if (u == PS || u == LS) break; } continue; } break; } if (commentToken) { p++; loc.linnum++; t.value = TOK.TOKcomment; return; } if (doDocComment && t.ptr[2] == '/') getDocComment(t, lastLine == linnum); p++; loc.linnum++; continue; case '+': { int nest; linnum = loc.linnum; p++; nest = 1; while (1) { ubyte c = *p; switch (c) { case '/': p++; if (*p == '+') { p++; nest++; } continue; case '+': p++; if (*p == '/') { p++; if (--nest == 0) break; } continue; case '\r': p++; if (*p != '\n') loc.linnum++; continue; case '\n': loc.linnum++; p++; continue; case 0: case 0x1A: error("unterminated /+ +/ comment"); p = end; t.value = TOK.TOKeof; return; default: if (c & 0x80) { uint u = decodeUTF(); if (u == PS || u == LS) loc.linnum++; } p++; continue; } break; } if (commentToken) { t.value = TOK.TOKcomment; return; } if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) { // if /++ but not /++/ getDocComment(t, lastLine == linnum); } continue; } default: break; /// } t.value = TOK.TOKdiv; return; case '.': p++; if (isdigit(*p)) { /* Note that we don't allow ._1 and ._ as being * valid floating point numbers. */ p--; t.value = inreal(t); } else if (p[0] == '.') { if (p[1] == '.') { p += 2; t.value = TOK.TOKdotdotdot; } else { p++; t.value = TOK.TOKslice; } } else t.value = TOK.TOKdot; return; case '&': p++; if (*p == '=') { p++; t.value = TOK.TOKandass; } else if (*p == '&') { p++; t.value = TOK.TOKandand; } else t.value = TOK.TOKand; return; case '|': p++; if (*p == '=') { p++; t.value = TOK.TOKorass; } else if (*p == '|') { p++; t.value = TOK.TOKoror; } else t.value = TOK.TOKor; return; case '-': p++; if (*p == '=') { p++; t.value = TOK.TOKminass; } /// #if 0 /// else if (*p == '>') /// { p++; /// t.value = TOK.TOKarrow; /// } /// #endif else if (*p == '-') { p++; t.value = TOK.TOKminusminus; } else t.value = TOK.TOKmin; return; case '+': p++; if (*p == '=') { p++; t.value = TOK.TOKaddass; } else if (*p == '+') { p++; t.value = TOK.TOKplusplus; } else t.value = TOK.TOKadd; return; case '<': p++; if (*p == '=') { p++; t.value = TOK.TOKle; // <= } else if (*p == '<') { p++; if (*p == '=') { p++; t.value = TOK.TOKshlass; // <<= } else t.value = TOK.TOKshl; // << } else if (*p == '>') { p++; if (*p == '=') { p++; t.value = TOK.TOKleg; // <>= } else t.value = TOK.TOKlg; // <> } else t.value = TOK.TOKlt; // < return; case '>': p++; if (*p == '=') { p++; t.value = TOK.TOKge; // >= } else if (*p == '>') { p++; if (*p == '=') { p++; t.value = TOK.TOKshrass; // >>= } else if (*p == '>') { p++; if (*p == '=') { p++; t.value = TOK.TOKushrass; // >>>= } else t.value = TOK.TOKushr; // >>> } else t.value = TOK.TOKshr; // >> } else t.value = TOK.TOKgt; // > return; case '!': p++; if (*p == '=') { p++; if (*p == '=' && global.params.Dversion == 1) { p++; t.value = TOK.TOKnotidentity; // !== } else t.value = TOK.TOKnotequal; // != } else if (*p == '<') { p++; if (*p == '>') { p++; if (*p == '=') { p++; t.value = TOK.TOKunord; // !<>= } else t.value = TOK.TOKue; // !<> } else if (*p == '=') { p++; t.value = TOK.TOKug; // !<= } else t.value = TOK.TOKuge; // !< } else if (*p == '>') { p++; if (*p == '=') { p++; t.value = TOK.TOKul; // !>= } else t.value = TOK.TOKule; // !> } else t.value = TOK.TOKnot; // ! return; case '=': p++; if (*p == '=') { p++; if (*p == '=' && global.params.Dversion == 1) { p++; t.value = TOK.TOKidentity; // === } else t.value = TOK.TOKequal; // == } else t.value = TOK.TOKassign; // = return; case '~': p++; if (*p == '=') { p++; t.value = TOK.TOKcatass; // ~= } else t.value = TOK.TOKtilde; // ~ return; /* #define SINGLE(c,tok) case c: p++; t.value = tok; return; SINGLE('(', TOKlparen) SINGLE(')', TOKrparen) SINGLE('[', TOKlbracket) SINGLE(']', TOKrbracket) SINGLE('{', TOKlcurly) SINGLE('}', TOKrcurly) SINGLE('?', TOKquestion) SINGLE(',', TOKcomma) SINGLE(';', TOKsemicolon) SINGLE(':', TOKcolon) SINGLE('$', TOKdollar) SINGLE('@', TOKat) #undef SINGLE #define DOUBLE(c1,tok1,c2,tok2) \ case c1: \ p++; \ if (*p == c2) \ { p++; \ t.value = tok2; \ } \ else \ t.value = tok1; \ return; DOUBLE('*', TOKmul, '=', TOKmulass) DOUBLE('%', TOKmod, '=', TOKmodass) DOUBLE('^', TOKxor, '=', TOKxorass) #undef DOUBLE */ case '(': p++; t.value = TOK.TOKlparen; return; case ')': p++; t.value = TOK.TOKrparen; return; case '[': p++; t.value = TOK.TOKlbracket; return; case ']': p++; t.value = TOK.TOKrbracket; return; case '{': p++; t.value = TOK.TOKlcurly; return; case '}': p++; t.value = TOK.TOKrcurly; return; case '?': p++; t.value = TOK.TOKquestion; return; case ',': p++; t.value = TOK.TOKcomma; return; case ';': p++; t.value = TOK.TOKsemicolon; return; case ':': p++; t.value = TOK.TOKcolon; return; case '$': p++; t.value = TOK.TOKdollar; return; case '@': p++; t.value = TOK.TOKat; return; case '*': p++; if (*p == '=') { p++; t.value = TOK.TOKmulass; } else { t.value = TOK.TOKmul; } return; case '%': p++; if (*p == '=') { p++; t.value = TOK.TOKmodass; } else { t.value = TOK.TOKmod; } return; case '^': p++; if (*p == '=') { p++; t.value = TOK.TOKxorass; } else { t.value = TOK.TOKxor; } return; case '#': p++; pragma_(); continue; default: { ubyte c = *p; if (c & 0x80) { uint u = decodeUTF(); // Check for start of unicode identifier if (isUniAlpha(u)) goto case_ident; if (u == PS || u == LS) { loc.linnum++; p++; continue; } } if (isprint(c)) error("unsupported char '%c'", c); else error("unsupported char 0x%02x", c); p++; continue; } } } } Token* peek(Token* ct) { Token* t; if (ct.next) t = ct.next; else { t = new Token(); scan(t); t.next = null; ct.next = t; } return t; } Token* peekPastParen(Token* tk) { //printf("peekPastParen()\n"); int parens = 1; int curlynest = 0; while (1) { tk = peek(tk); //tk.print(); switch (tk.value) { case TOK.TOKlparen: parens++; continue; case TOK.TOKrparen: --parens; if (parens) continue; tk = peek(tk); break; case TOK.TOKlcurly: curlynest++; continue; case TOK.TOKrcurly: if (--curlynest >= 0) continue; break; case TOK.TOKsemicolon: if (curlynest) continue; break; case TOK.TOKeof: break; default: continue; } return tk; } } /******************************************* * Parse escape sequence. */ uint escapeSequence() { uint c = *p; version (TEXTUAL_ASSEMBLY_OUT) { return c; } int n; int ndigits; switch (c) { case '\'': case '"': case '?': case '\\': Lconsume: p++; break; case 'a': c = 7; goto Lconsume; case 'b': c = 8; goto Lconsume; case 'f': c = 12; goto Lconsume; case 'n': c = 10; goto Lconsume; case 'r': c = 13; goto Lconsume; case 't': c = 9; goto Lconsume; case 'v': c = 11; goto Lconsume; case 'u': ndigits = 4; goto Lhex; case 'U': ndigits = 8; goto Lhex; case 'x': ndigits = 2; Lhex: p++; c = *p; if (ishex(cast(ubyte)c)) { uint v; n = 0; v = 0; while (1) { if (isdigit(c)) c -= '0'; else if (islower(c)) c -= 'a' - 10; else c -= 'A' - 10; v = v * 16 + c; c = *++p; if (++n == ndigits) break; if (!ishex(cast(ubyte)c)) { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); break; } } if (ndigits != 2 && !utf_isValidDchar(v)) { error("invalid UTF character \\U%08x", v); v = '?'; // recover with valid UTF character } c = v; } else error("undefined escape hex sequence \\%c\n",c); break; case '&': // named character entity for (ubyte* idstart = ++p; true; p++) { switch (*p) { case ';': c = HtmlNamedEntity(idstart, p - idstart); if (c == ~0) { error("unnamed character entity &%s;", idstart[0..(p - idstart)]); c = ' '; } p++; break; default: if (isalpha(*p) || (p != idstart + 1 && isdigit(*p))) continue; error("unterminated named entity"); break; } break; } break; case 0: case 0x1A: // end of file c = '\\'; break; default: if (isoctal(cast(ubyte)c)) { uint v; n = 0; v = 0; do { v = v * 8 + (c - '0'); c = *++p; } while (++n < 3 && isoctal(cast(ubyte)c)); c = v; if (c > 0xFF) error("0%03o is larger than a byte", c); } else error("undefined escape sequence \\%c\n",c); break; } return c; } TOK wysiwygStringConstant(Token* t, int tc) { uint c; Loc start = loc; p++; stringbuffer.reset(); while (true) { c = *p++; switch (c) { case '\n': loc.linnum++; break; case '\r': if (*p == '\n') continue; // ignore c = '\n'; // treat EndOfLine as \n character loc.linnum++; break; case 0: case 0x1A: error("unterminated string constant starting at %s", start.toChars()); t.ustring = "".ptr; t.len = 0; t.postfix = 0; return TOKstring; case '"': case '`': if (c == tc) { t.len = stringbuffer.offset; stringbuffer.writeByte(0); char* tmp = cast(char*)GC.malloc(stringbuffer.offset); memcpy(tmp, stringbuffer.data, stringbuffer.offset); t.ustring = tmp; stringPostfix(t); return TOKstring; } break; default: if (c & 0x80) { p--; uint u = decodeUTF(); p++; if (u == PS || u == LS) loc.linnum++; stringbuffer.writeUTF8(u); continue; } break; } stringbuffer.writeByte(c); } assert(false); } /************************************** * Lex hex strings: * x"0A ae 34FE BD" */ TOK hexStringConstant(Token* t) { uint c; Loc start = loc; uint n = 0; uint v; p++; stringbuffer.reset(); while (1) { c = *p++; switch (c) { case ' ': case '\t': case '\v': case '\f': continue; // skip white space case '\r': if (*p == '\n') continue; // ignore // Treat isolated '\r' as if it were a '\n' case '\n': loc.linnum++; continue; case 0: case 0x1A: error("unterminated string constant starting at %s", start.toChars()); t.ustring = "".ptr; t.len = 0; t.postfix = 0; return TOKstring; case '"': if (n & 1) { error("odd number (%d) of hex characters in hex string", n); stringbuffer.writeByte(v); } t.len = stringbuffer.offset; stringbuffer.writeByte(0); void* mem = malloc(stringbuffer.offset); memcpy(mem, stringbuffer.data, stringbuffer.offset); t.ustring = cast(const(char)*)mem; stringPostfix(t); return TOKstring; default: if (c >= '0' && c <= '9') c -= '0'; else if (c >= 'a' && c <= 'f') c -= 'a' - 10; else if (c >= 'A' && c <= 'F') c -= 'A' - 10; else if (c & 0x80) { p--; uint u = decodeUTF(); p++; if (u == PS || u == LS) loc.linnum++; else error("non-hex character \\u%x", u); } else error("non-hex character '%c'", c); if (n & 1) { v = (v << 4) | c; stringbuffer.writeByte(v); } else v = c; n++; break; } } } version (DMDV2) { /************************************** * Lex delimited strings: * q"(foo(xxx))" // "foo(xxx)" * q"[foo(]" // "foo(" * q"/foo]/" // "foo]" * q"HERE * foo * HERE" // "foo\n" * Input: * p is on the " */ TOK delimitedStringConstant(Token* t) { uint c; Loc start = loc; uint delimleft = 0; uint delimright = 0; uint nest = 1; uint nestcount; Identifier hereid = null; uint blankrol = 0; uint startline = 0; p++; stringbuffer.reset(); while (1) { c = *p++; //printf("c = '%c'\n", c); switch (c) { case '\n': Lnextline: loc.linnum++; startline = 1; if (blankrol) { blankrol = 0; continue; } if (hereid) { stringbuffer.writeUTF8(c); continue; } break; case '\r': if (*p == '\n') continue; // ignore c = '\n'; // treat EndOfLine as \n character goto Lnextline; case 0: case 0x1A: goto Lerror; default: if (c & 0x80) { p--; c = decodeUTF(); p++; if (c == PS || c == LS) goto Lnextline; } break; } if (delimleft == 0) { delimleft = c; nest = 1; nestcount = 1; if (c == '(') delimright = ')'; else if (c == '{') delimright = '}'; else if (c == '[') delimright = ']'; else if (c == '<') delimright = '>'; else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) { // Start of identifier; must be a heredoc Token t2; p--; scan(&t2); // read in heredoc identifier if (t2.value != TOKidentifier) { error("identifier expected for heredoc, not %s", t2.toChars()); delimright = c; } else { hereid = t2.ident; //printf("hereid = '%s'\n", hereid.toChars()); blankrol = 1; } nest = 0; } else { delimright = c; nest = 0; if (isspace(c)) error("delimiter cannot be whitespace"); } } else { if (blankrol) { error("heredoc rest of line should be blank"); blankrol = 0; continue; } if (nest == 1) { if (c == delimleft) nestcount++; else if (c == delimright) { nestcount--; if (nestcount == 0) goto Ldone; } } else if (c == delimright) goto Ldone; if (startline && isalpha(c) && hereid) { Token t2; ubyte* psave = p; p--; scan(&t2); // read in possible heredoc identifier //printf("endid = '%s'\n", t2.ident.toChars()); if (t2.value == TOKidentifier && t2.ident.equals(hereid)) { /* should check that rest of line is blank */ goto Ldone; } p = psave; } stringbuffer.writeUTF8(c); startline = 0; } } Ldone: if (*p == '"') p++; else error("delimited string must end in %c\"", delimright); t.len = stringbuffer.offset; stringbuffer.writeByte(0); void* mem = malloc(stringbuffer.offset); memcpy(mem, stringbuffer.data, stringbuffer.offset); t.ustring = cast(const(char)*)mem; stringPostfix(t); return TOKstring; Lerror: error("unterminated string constant starting at %s", start.toChars()); t.ustring = "".ptr; t.len = 0; t.postfix = 0; return TOKstring; } /************************************** * Lex delimited strings: * q{ foo(xxx) } // " foo(xxx) " * q{foo(} // "foo(" * q{{foo}"}"} // "{foo}"}"" * Input: * p is on the q */ TOK tokenStringConstant(Token* t) { uint nest = 1; Loc start = loc; ubyte* pstart = ++p; while (true) { Token tok; scan(&tok); switch (tok.value) { case TOKlcurly: nest++; continue; case TOKrcurly: if (--nest == 0) goto Ldone; continue; case TOKeof: goto Lerror; default: continue; } } Ldone: t.len = p - 1 - pstart; char* tmp = cast(char*)GC.malloc(t.len + 1); memcpy(tmp, pstart, t.len); tmp[t.len] = 0; t.ustring = tmp; stringPostfix(t); return TOKstring; Lerror: error("unterminated token string constant starting at %s", start.toChars()); t.ustring = "".ptr; t.len = 0; t.postfix = 0; return TOKstring; } } TOK escapeStringConstant(Token* t, int wide) { uint c; Loc start = loc; p++; stringbuffer.reset(); while (true) { c = *p++; switch (c) { version (TEXTUAL_ASSEMBLY_OUT) { } else { case '\\': switch (*p) { case 'u': case 'U': case '&': c = escapeSequence(); stringbuffer.writeUTF8(c); continue; default: c = escapeSequence(); break; } break; } case '\n': loc.linnum++; break; case '\r': if (*p == '\n') continue; // ignore c = '\n'; // treat EndOfLine as \n character loc.linnum++; break; case '"': t.len = stringbuffer.offset; stringbuffer.writeByte(0); char* tmp = cast(char*)GC.malloc(stringbuffer.offset); memcpy(tmp, stringbuffer.data, stringbuffer.offset); t.ustring = tmp; stringPostfix(t); return TOK.TOKstring; case 0: case 0x1A: p--; error("unterminated string constant starting at %s", start.toChars()); t.ustring = "".ptr; t.len = 0; t.postfix = 0; return TOK.TOKstring; default: if (c & 0x80) { p--; c = decodeUTF(); if (c == LS || c == PS) { c = '\n'; loc.linnum++; } p++; stringbuffer.writeUTF8(c); continue; } break; } stringbuffer.writeByte(c); } assert(false); } TOK charConstant(Token* t, int wide) { uint c; TOK tk = TOKcharv; //printf("Lexer.charConstant\n"); p++; c = *p++; switch (c) { version (TEXTUAL_ASSEMBLY_OUT) { } else { case '\\': switch (*p) { case 'u': t.uns64value = escapeSequence(); tk = TOKwcharv; break; case 'U': case '&': t.uns64value = escapeSequence(); tk = TOKdcharv; break; default: t.uns64value = escapeSequence(); break; } break; } case '\n': L1: loc.linnum++; case '\r': case 0: case 0x1A: case '\'': error("unterminated character constant"); return tk; default: if (c & 0x80) { p--; c = decodeUTF(); p++; if (c == LS || c == PS) goto L1; if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) tk = TOKwcharv; else tk = TOKdcharv; } t.uns64value = c; break; } if (*p != '\'') { error("unterminated character constant"); return tk; } p++; return tk; } /*************************************** * Get postfix of string literal. */ void stringPostfix(Token* t) { switch (*p) { case 'c': case 'w': case 'd': t.postfix = *p; p++; break; default: t.postfix = 0; break; } } uint wchar_(uint u) { assert(false); } /************************************** * Read in a number. * If it's an integer, store it in tok.TKutok.Vlong. * integers can be decimal, octal or hex * Handle the suffixes U, UL, LU, L, etc. * If it's double, store it in tok.TKutok.Vdouble. * Returns: * TKnum * TKdouble,... */ TOK number(Token* t) { // We use a state machine to collect numbers enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, STATE_hexh, STATE_error }; STATE state; enum FLAGS { FLAGS_undefined = 0, FLAGS_decimal = 1, // decimal FLAGS_unsigned = 2, // u or U suffix FLAGS_long = 4, // l or L suffix }; FLAGS flags = FLAGS.FLAGS_decimal; int i; int base; uint c; ubyte *start; TOK result; //printf("Lexer.number()\n"); state = STATE.STATE_initial; base = 0; stringbuffer.reset(); start = p; while (1) { c = *p; switch (state) { case STATE.STATE_initial: // opening state if (c == '0') state = STATE.STATE_0; else state = STATE.STATE_decimal; break; case STATE.STATE_0: flags = (flags & ~FLAGS.FLAGS_decimal); switch (c) { version (ZEROH) { case 'H': // 0h case 'h': goto hexh; } case 'X': case 'x': state = STATE.STATE_hex0; break; case '.': if (p[1] == '.') // .. is a separate token goto done; case 'i': case 'f': case 'F': goto real_; version (ZEROH) { case 'E': case 'e': goto case_hex; } case 'B': case 'b': state = STATE.STATE_binary0; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': state = STATE.STATE_octal; break; version (ZEROH) { case '8': case '9': case 'A': case 'C': case 'D': case 'F': case 'a': case 'c': case 'd': case 'f': case_hex: state = STATE.STATE_hexh; break; } case '_': state = STATE.STATE_octal; p++; continue; case 'L': if (p[1] == 'i') goto real_; goto done; default: goto done; } break; case STATE.STATE_decimal: // reading decimal number if (!isdigit(c)) { version (ZEROH) { if (ishex(c) || c == 'H' || c == 'h' ) goto hexh; } if (c == '_') // ignore embedded _ { p++; continue; } if (c == '.' && p[1] != '.') goto real_; else if (c == 'i' || c == 'f' || c == 'F' || c == 'e' || c == 'E') { real_: // It's a real number. Back up and rescan as a real p = start; return inreal(t); } else if (c == 'L' && p[1] == 'i') goto real_; goto done; } break; case STATE.STATE_hex0: // reading hex number case STATE.STATE_hex: if (! ishex(cast(ubyte)c)) { if (c == '_') // ignore embedded _ { p++; continue; } if (c == '.' && p[1] != '.') goto real_; if (c == 'P' || c == 'p' || c == 'i') goto real_; if (state == STATE.STATE_hex0) error("Hex digit expected, not '%c'", c); goto done; } state = STATE.STATE_hex; break; version (ZEROH) { hexh: state = STATE.STATE_hexh; case STATE.STATE_hexh: // parse numbers like 0FFh if (!ishex(c)) { if (c == 'H' || c == 'h') { p++; base = 16; goto done; } else { // Check for something like 1E3 or 0E24 if (memchr(cast(char*)stringbuffer.data, 'E', stringbuffer.offset) || memchr(cast(char*)stringbuffer.data, 'e', stringbuffer.offset)) goto real_; error("Hex digit expected, not '%c'", c); goto done; } } break; } case STATE.STATE_octal: // reading octal number case STATE.STATE_octale: // reading octal number with non-octal digits if (!isoctal(cast(ubyte)c)) { version (ZEROH) { if (ishex(c) || c == 'H' || c == 'h' ) goto hexh; } if (c == '_') // ignore embedded _ { p++; continue; } if (c == '.' && p[1] != '.') goto real_; if (c == 'i') goto real_; if (isdigit(c)) { state = STATE.STATE_octale; } else goto done; } break; case STATE.STATE_binary0: // starting binary number case STATE.STATE_binary: // reading binary number if (c != '0' && c != '1') { version (ZEROH) { if (ishex(c) || c == 'H' || c == 'h' ) goto hexh; } if (c == '_') // ignore embedded _ { p++; continue; } if (state == STATE.STATE_binary0) { error("binary digit expected"); state = STATE.STATE_error; break; } else goto done; } state = STATE.STATE_binary; break; case STATE.STATE_error: // for error recovery if (!isdigit(c)) // scan until non-digit goto done; break; default: assert(0); } stringbuffer.writeByte(c); p++; } done: stringbuffer.writeByte(0); // terminate string if (state == STATE.STATE_octale) error("Octal digit expected"); ulong n; // unsigned >=64 bit integer type if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0)) n = stringbuffer.data[0] - '0'; else { // Convert string to integer version (__DMC__) { errno = 0; n = strtoull(cast(char*)stringbuffer.data,null,base); if (errno == ERANGE) error("integer overflow"); } else { // Not everybody implements strtoull() char* p = cast(char*)stringbuffer.data; int r = 10, d; if (*p == '0') { if (p[1] == 'x' || p[1] == 'X') p += 2, r = 16; else if (p[1] == 'b' || p[1] == 'B') p += 2, r = 2; else if (isdigit(p[1])) p += 1, r = 8; } n = 0; while (1) { if (*p >= '0' && *p <= '9') d = *p - '0'; else if (*p >= 'a' && *p <= 'z') d = *p - 'a' + 10; else if (*p >= 'A' && *p <= 'Z') d = *p - 'A' + 10; else break; if (d >= r) break; ulong n2 = n * r; //printf("n2 / r = %llx, n = %llx\n", n2/r, n); if (n2 / r != n || n2 + d < n) { error ("integer overflow"); break; } n = n2 + d; p++; } } if (n.sizeof > 8 && n > 0xFFFFFFFFFFFFFFFF) // if n needs more than 64 bits error("integer overflow"); } // Parse trailing 'u', 'U', 'l' or 'L' in any combination while (1) { FLAGS f; switch (*p) { case 'U': case 'u': f = FLAGS.FLAGS_unsigned; goto L1; case 'l': if (1 || !global.params.useDeprecated) error("'l' suffix is deprecated, use 'L' instead"); case 'L': f = FLAGS.FLAGS_long; L1: p++; if (flags & f) error("unrecognized token"); flags = (flags | f); continue; default: break; } break; } switch (flags) { case FLAGS.FLAGS_undefined: /* Octal or Hexadecimal constant. * First that fits: int, uint, long, ulong */ if (n & 0x8000000000000000) result = TOK.TOKuns64v; else if (n & 0xFFFFFFFF00000000) result = TOK.TOKint64v; else if (n & 0x80000000) result = TOK.TOKuns32v; else result = TOK.TOKint32v; break; case FLAGS.FLAGS_decimal: /* First that fits: int, long, long long */ if (n & 0x8000000000000000) { error("signed integer overflow"); result = TOK.TOKuns64v; } else if (n & 0xFFFFFFFF80000000) result = TOK.TOKint64v; else result = TOK.TOKint32v; break; case FLAGS.FLAGS_unsigned: case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned: /* First that fits: uint, ulong */ if (n & 0xFFFFFFFF00000000) result = TOK.TOKuns64v; else result = TOK.TOKuns32v; break; case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long: if (n & 0x8000000000000000) { error("signed integer overflow"); result = TOK.TOKuns64v; } else result = TOK.TOKint64v; break; case FLAGS.FLAGS_long: if (n & 0x8000000000000000) result = TOK.TOKuns64v; else result = TOK.TOKint64v; break; case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: result = TOK.TOKuns64v; break; default: debug { printf("%x\n",flags); } assert(0); } t.uns64value = n; return result; } /************************************** * Read in characters, converting them to real. * Bugs: * Exponent overflow not detected. * Too much requested precision is not detected. */ TOK inreal(Token* t) in { assert(*p == '.' || isdigit(*p)); } out (result) { switch (result) { case TOKfloat32v: case TOKfloat64v: case TOKfloat80v: case TOKimaginary32v: case TOKimaginary64v: case TOKimaginary80v: break; default: assert(0); } } body { int dblstate; uint c; char hex; // is this a hexadecimal-floating-constant? TOK result; //printf("Lexer.inreal()\n"); stringbuffer.reset(); dblstate = 0; hex = 0; Lnext: while (true) { // Get next char from input c = *p++; //printf("dblstate = %d, c = '%c'\n", dblstate, c); while (true) { switch (dblstate) { case 0: // opening state if (c == '0') dblstate = 9; else if (c == '.') dblstate = 3; else dblstate = 1; break; case 9: dblstate = 1; if (c == 'X' || c == 'x') { hex++; break; } case 1: // digits to left of . case 3: // digits to right of . case 7: // continuing exponent digits if (!isdigit(c) && !(hex && isxdigit(c))) { if (c == '_') goto Lnext; // ignore embedded '_' dblstate++; continue; } break; case 2: // no more digits to left of . if (c == '.') { dblstate++; break; } case 4: // no more digits to right of . if ((c == 'E' || c == 'e') || hex && (c == 'P' || c == 'p')) { dblstate = 5; hex = 0; // exponent is always decimal break; } if (hex) error("binary-exponent-part required"); goto done; case 5: // looking immediately to right of E dblstate++; if (c == '-' || c == '+') break; case 6: // 1st exponent digit expected if (!isdigit(c)) error("exponent expected"); dblstate++; break; case 8: // past end of exponent digits goto done; } break; } stringbuffer.writeByte(c); } done: p--; stringbuffer.writeByte(0); version (_WIN32) { /// && __DMC__ char* save = __locale_decpoint; __locale_decpoint = cast(char*)".".ptr; } t.float80value = strtold(cast(char*)stringbuffer.data, null); errno = 0; switch (*p) { case 'F': case 'f': strtof(cast(char*)stringbuffer.data, null); result = TOKfloat32v; p++; break; default: strtod(cast(char*)stringbuffer.data, null); result = TOKfloat64v; break; case 'l': if (!global.params.useDeprecated) error("'l' suffix is deprecated, use 'L' instead"); case 'L': result = TOKfloat80v; p++; break; } if (*p == 'i' || *p == 'I') { if (!global.params.useDeprecated && *p == 'I') error("'I' suffix is deprecated, use 'i' instead"); p++; switch (result) { case TOKfloat32v: result = TOKimaginary32v; break; case TOKfloat64v: result = TOKimaginary64v; break; case TOKfloat80v: result = TOKimaginary80v; break; } } version (_WIN32) { ///&& __DMC__ __locale_decpoint = save; } if (errno == ERANGE) error("number is not representable"); return result; } void error(T...)(string format, T t) { error(this.loc, format, t); } void error(T...)(Loc loc, string format, T t) { if (mod && !global.gag) { string p = loc.toChars(); if (p.length != 0) writef("%s: ", p); writefln(format, t); if (global.errors >= 20) // moderate blizzard of cascading messages fatal(); } global.errors++; } void pragma_() { assert(false); } /******************************************** * Decode UTF character. * Issue error messages for invalid sequences. * Return decoded character, advance p to last character in UTF sequence. */ uint decodeUTF() { dchar u; ubyte c; ubyte* s = p; size_t len; size_t idx; string msg; c = *s; assert(c & 0x80); // Check length of remaining string up to 6 UTF-8 characters for (len = 1; len < 6 && s[len]; len++) { ; } idx = 0; msg = utf_decodeChar(cast(string)s[0..len], &idx, &u); p += idx - 1; if (msg) { error("%s", msg); } return u; } void getDocComment(Token* t, uint lineComment) { assert(false); } static bool isValidIdentifier(string p) { if (p.length == 0) { return false; } if (p[0] >= '0' && p[0] <= '9') { // beware of isdigit() on signed chars return false; } size_t idx = 0; while (idx < p.length) { dchar dc; if (utf_decodeChar(p, &idx, &dc) !is null) { return false; } if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) { return false; } } return true; } /// TODO: reimplement based on strings static ubyte* combineComments(ubyte* c1, ubyte* c2) { //printf("Lexer.combineComments('%s', '%s')\n", c1, c2); ubyte* c = c2; if (c1) { c = c1; if (c2) { size_t len1 = strlen(cast(char*)c1); size_t len2 = strlen(cast(char*)c2); c = cast(ubyte*)GC.malloc(len1 + 1 + len2 + 1); memcpy(c, c1, len1); if (len1 && c1[len1 - 1] != '\n') { c[len1] = '\n'; len1++; } memcpy(c + len1, c2, len2); c[len1 + len2] = 0; } } return c; } }