Mercurial > projects > ldc
view dmd2/lexer.c @ 930:7985bb036db4
Follow the D ABI and pass the last arg in a register if it is a struct that fits.
author | Christian Kamm <kamm incasoftware de> |
---|---|
date | Tue, 03 Feb 2009 21:46:46 +0100 |
parents | 356e65836fb5 |
children | 638d16625da2 |
line wrap: on
line source
// Compiler implementation of the D programming language // Copyright (c) 1999-2008 by Digital Mars // All Rights Reserved // written by Walter Bright // http://www.digitalmars.com // License for redistribution is by either the Artistic License // in artistic.txt, or the GNU General Public License in gnu.txt. // See the included readme.txt for details. /* Lexical Analyzer */ #include <stdio.h> #include <string.h> #include <ctype.h> #include <stdarg.h> #include <errno.h> #include <wchar.h> #include <stdlib.h> #include <assert.h> #include <sys/time.h> #include <math.h> #ifdef IN_GCC #include <time.h> #include "mem.h" #else #if __GNUC__ #include <time.h> #endif #if IN_LLVM #include "mem.h" #elif _WIN32 #include "..\root\mem.h" #else #include "../root/mem.h" #endif #endif #include "stringtable.h" #include "lexer.h" #include "utf.h" #include "identifier.h" #include "id.h" #include "module.h" #if _WIN32 && __DMC__ // from \dm\src\include\setlocal.h extern "C" char * __cdecl __locale_decpoint; #endif extern int HtmlNamedEntity(unsigned char *p, int length); #define LS 0x2028 // UTF line separator #define PS 0x2029 // UTF paragraph separator /******************************************** * Do our own char maps */ static unsigned char cmtable[256]; const int CMoctal = 0x1; const int CMhex = 0x2; const int CMidchar = 0x4; inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; } inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; } inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; } static void cmtable_init() { for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++) { if ('0' <= c && c <= '7') cmtable[c] |= CMoctal; if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) cmtable[c] |= CMhex; if (isalnum(c) || c == '_') cmtable[c] |= CMidchar; } } /************************* Token **********************************************/ const char *Token::tochars[TOKMAX]; void *Token::operator new(size_t size) { Token *t; if (Lexer::freelist) { t = Lexer::freelist; Lexer::freelist = t->next; return t; } return ::operator new(size); } #ifdef DEBUG void Token::print() { fprintf(stdmsg, "%s\n", toChars()); } #endif const char *Token::toChars() { const char *p; static char buffer[3 + 3 * sizeof(value) + 1]; p = buffer; switch (value) { case TOKint32v: #if IN_GCC sprintf(buffer,"%d",(d_int32)int64value); #else sprintf(buffer,"%d",int32value); #endif break; case TOKuns32v: case TOKcharv: case TOKwcharv: case TOKdcharv: #if IN_GCC sprintf(buffer,"%uU",(d_uns32)uns64value); #else sprintf(buffer,"%uU",uns32value); #endif break; case TOKint64v: sprintf(buffer,"%lldL",int64value); break; case TOKuns64v: sprintf(buffer,"%lluUL",uns64value); break; #if IN_GCC case TOKfloat32v: case TOKfloat64v: case TOKfloat80v: float80value.format(buffer, sizeof(buffer)); break; case TOKimaginary32v: case TOKimaginary64v: case TOKimaginary80v: float80value.format(buffer, sizeof(buffer)); // %% buffer strcat(buffer, "i"); break; #else case TOKfloat32v: sprintf(buffer,"%Lgf", float80value); break; case TOKfloat64v: sprintf(buffer,"%Lg", float80value); break; case TOKfloat80v: sprintf(buffer,"%LgL", float80value); break; case TOKimaginary32v: sprintf(buffer,"%Lgfi", float80value); break; case TOKimaginary64v: sprintf(buffer,"%Lgi", float80value); break; case TOKimaginary80v: sprintf(buffer,"%LgLi", float80value); break; #endif case TOKstring: #if CSTRINGS p = string; #else { OutBuffer buf; buf.writeByte('"'); for (size_t i = 0; i < len; ) { unsigned c; utf_decodeChar((unsigned char *)ustring, len, &i, &c); switch (c) { case 0: break; case '"': case '\\': buf.writeByte('\\'); default: if (isprint(c)) buf.writeByte(c); else if (c <= 0x7F) buf.printf("\\x%02x", c); else if (c <= 0xFFFF) buf.printf("\\u%04x", c); else buf.printf("\\U%08x", c); continue; } break; } buf.writeByte('"'); if (postfix) buf.writeByte('"'); buf.writeByte(0); p = (char *)buf.extractData(); } #endif break; case TOKidentifier: case TOKenum: case TOKstruct: case TOKimport: CASE_BASIC_TYPES: p = ident->toChars(); break; default: p = toChars(value); break; } return p; } const char *Token::toChars(enum TOK value) { const char *p; static char buffer[3 + 3 * sizeof(value) + 1]; p = tochars[value]; if (!p) { sprintf(buffer,"TOK%d",value); p = buffer; } return p; } /*************************** Lexer ********************************************/ Token *Lexer::freelist = NULL; StringTable Lexer::stringtable; OutBuffer Lexer::stringbuffer; Lexer::Lexer(Module *mod, unsigned char *base, unsigned begoffset, unsigned endoffset, int doDocComment, int commentToken) : loc(mod, 1) { //printf("Lexer::Lexer(%p,%d)\n",base,length); //printf("lexer.mod = %p, %p\n", mod, this->loc.mod); memset(&token,0,sizeof(token)); this->base = base; this->end = base + endoffset; p = base + begoffset; this->mod = mod; this->doDocComment = doDocComment; this->anyToken = 0; this->commentToken = commentToken; //initKeywords(); /* If first line starts with '#!', ignore the line */ if (p[0] == '#' && p[1] =='!') { p += 2; while (1) { unsigned char c = *p; switch (c) { case '\n': p++; break; case '\r': p++; if (*p == '\n') p++; break; case 0: case 0x1A: break; default: if (c & 0x80) { unsigned u = decodeUTF(); if (u == PS || u == LS) break; } p++; continue; } break; } loc.linnum = 2; } } void Lexer::error(const char *format, ...) { if (mod && !global.gag) { char *p = loc.toChars(); if (*p) fprintf(stdmsg, "%s: ", p); mem.free(p); va_list ap; va_start(ap, format); vfprintf(stdmsg, format, ap); va_end(ap); fprintf(stdmsg, "\n"); fflush(stdmsg); if (global.errors >= 20) // moderate blizzard of cascading messages fatal(); } global.errors++; } void Lexer::error(Loc loc, const char *format, ...) { if (mod && !global.gag) { char *p = loc.toChars(); if (*p) fprintf(stdmsg, "%s: ", p); mem.free(p); va_list ap; va_start(ap, format); vfprintf(stdmsg, format, ap); va_end(ap); fprintf(stdmsg, "\n"); fflush(stdmsg); if (global.errors >= 20) // moderate blizzard of cascading messages fatal(); } global.errors++; } TOK Lexer::nextToken() { Token *t; if (token.next) { t = token.next; memcpy(&token,t,sizeof(Token)); t->next = freelist; freelist = t; } else { scan(&token); } //token.print(); return token.value; } Token *Lexer::peek(Token *ct) { Token *t; if (ct->next) t = ct->next; else { t = new Token(); scan(t); t->next = NULL; ct->next = t; } return t; } /*********************** * Look ahead at next token's value. */ TOK Lexer::peekNext() { return peek(&token)->value; } /********************************* * tk is on the opening (. * Look ahead and return token that is past the closing ). */ Token *Lexer::peekPastParen(Token *tk) { //printf("peekPastParen()\n"); int parens = 1; int curlynest = 0; while (1) { tk = peek(tk); //tk->print(); switch (tk->value) { case TOKlparen: parens++; continue; case TOKrparen: --parens; if (parens) continue; tk = peek(tk); break; case TOKlcurly: curlynest++; continue; case TOKrcurly: if (--curlynest >= 0) continue; break; case TOKsemicolon: if (curlynest) continue; break; case TOKeof: break; default: continue; } return tk; } } /********************************** * Determine if string is a valid Identifier. * Placed here because of commonality with Lexer functionality. * Returns: * 0 invalid */ int Lexer::isValidIdentifier(char *p) { size_t len; size_t idx; if (!p || !*p) goto Linvalid; if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars goto Linvalid; len = strlen(p); idx = 0; while (p[idx]) { dchar_t dc; const char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc); if (q) goto Linvalid; if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) goto Linvalid; } return 1; Linvalid: return 0; } /**************************** * Turn next token in buffer into a token. */ void Lexer::scan(Token *t) { unsigned lastLine = loc.linnum; unsigned linnum; t->blockComment = NULL; t->lineComment = NULL; while (1) { t->ptr = p; //printf("p = %p, *p = '%c'\n",p,*p); switch (*p) { case 0: case 0x1A: t->value = TOKeof; // end of file return; case ' ': case '\t': case '\v': case '\f': p++; continue; // skip white space case '\r': p++; if (*p != '\n') // if CR stands by itself loc.linnum++; continue; // skip white space case '\n': p++; loc.linnum++; continue; // skip white space case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': t->value = number(t); return; #if CSTRINGS case '\'': t->value = charConstant(t, 0); return; case '"': t->value = stringConstant(t,0); return; case 'l': case 'L': if (p[1] == '\'') { p++; t->value = charConstant(t, 1); return; } else if (p[1] == '"') { p++; t->value = stringConstant(t, 1); return; } #else case '\'': t->value = charConstant(t,0); return; case 'r': if (p[1] != '"') goto case_ident; p++; case '`': t->value = wysiwygStringConstant(t, *p); return; case 'x': if (p[1] != '"') goto case_ident; p++; t->value = hexStringConstant(t); return; #if DMDV2 case 'q': if (p[1] == '"') { p++; t->value = delimitedStringConstant(t); return; } else if (p[1] == '{') { p++; t->value = tokenStringConstant(t); return; } else goto case_ident; #endif case '"': t->value = escapeStringConstant(t,0); return; case '\\': // escaped string literal { unsigned c; stringbuffer.reset(); do { p++; switch (*p) { case 'u': case 'U': case '&': c = escapeSequence(); stringbuffer.writeUTF8(c); break; default: c = escapeSequence(); stringbuffer.writeByte(c); break; } } while (*p == '\\'); t->len = stringbuffer.offset; stringbuffer.writeByte(0); t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); t->postfix = 0; t->value = TOKstring; return; } case 'l': case 'L': #endif case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'm': case 'n': case 'o': #if DMDV2 case 'p': /*case 'q': case 'r':*/ case 's': case 't': #else case 'p': case 'q': /*case 'r':*/ case 's': case 't': #endif case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': case_ident: { unsigned char c; StringValue *sv; Identifier *id; do { c = *++p; } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); sv = stringtable.update((char *)t->ptr, p - t->ptr); id = (Identifier *) sv->ptrvalue; if (!id) { id = new Identifier(sv->lstring.string,TOKidentifier); sv->ptrvalue = id; } t->ident = id; t->value = (enum TOK) id->value; anyToken = 1; if (*t->ptr == '_') // if special identifier token { static char date[11+1]; static char time[8+1]; static char timestamp[24+1]; if (!date[0]) // lazy evaluation { time_t t; char *p; ::time(&t); p = ctime(&t); assert(p); sprintf(date, "%.6s %.4s", p + 4, p + 20); sprintf(time, "%.8s", p + 11); sprintf(timestamp, "%.24s", p); } #if DMDV1 if (mod && id == Id::FILE) { t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars()); goto Lstring; } else if (mod && id == Id::LINE) { t->value = TOKint64v; t->uns64value = loc.linnum; } else #endif if (id == Id::DATE) { t->ustring = (unsigned char *)date; goto Lstring; } else if (id == Id::TIME) { t->ustring = (unsigned char *)time; goto Lstring; } else if (id == Id::VENDOR) { t->ustring = (unsigned char *)"LDC"; goto Lstring; } else if (id == Id::TIMESTAMP) { t->ustring = (unsigned char *)timestamp; Lstring: t->value = TOKstring; Llen: t->postfix = 0; t->len = strlen((char *)t->ustring); } else if (id == Id::VERSIONX) { unsigned major = 0; unsigned minor = 0; for (const char *p = global.version + 1; 1; p++) { char c = *p; if (isdigit(c)) minor = minor * 10 + c - '0'; else if (c == '.') { major = minor; minor = 0; } else break; } t->value = TOKint64v; t->uns64value = major * 1000 + minor; } #if DMDV2 else if (id == Id::EOFX) { t->value = TOKeof; // Advance scanner to end of file while (!(*p == 0 || *p == 0x1A)) p++; } #endif } //printf("t->value = %d\n",t->value); return; } case '/': p++; switch (*p) { case '=': p++; t->value = TOKdivass; return; case '*': p++; linnum = loc.linnum; while (1) { while (1) { unsigned char c = *p; switch (c) { case '/': break; case '\n': loc.linnum++; p++; continue; case '\r': p++; if (*p != '\n') loc.linnum++; continue; case 0: case 0x1A: error("unterminated /* */ comment"); p = end; t->value = TOKeof; return; default: if (c & 0x80) { unsigned u = decodeUTF(); if (u == PS || u == LS) loc.linnum++; } p++; continue; } break; } p++; if (p[-2] == '*' && p - 3 != t->ptr) break; } if (commentToken) { t->value = TOKcomment; return; } else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr) { // if /** but not /**/ getDocComment(t, lastLine == linnum); } continue; case '/': // do // style comments linnum = loc.linnum; while (1) { unsigned char c = *++p; switch (c) { case '\n': break; case '\r': if (p[1] == '\n') p++; break; case 0: case 0x1A: if (commentToken) { p = end; t->value = TOKcomment; return; } if (doDocComment && t->ptr[2] == '/') getDocComment(t, lastLine == linnum); p = end; t->value = TOKeof; return; default: if (c & 0x80) { unsigned u = decodeUTF(); if (u == PS || u == LS) break; } continue; } break; } if (commentToken) { p++; loc.linnum++; t->value = TOKcomment; return; } if (doDocComment && t->ptr[2] == '/') getDocComment(t, lastLine == linnum); p++; loc.linnum++; continue; case '+': { int nest; linnum = loc.linnum; p++; nest = 1; while (1) { unsigned char c = *p; switch (c) { case '/': p++; if (*p == '+') { p++; nest++; } continue; case '+': p++; if (*p == '/') { p++; if (--nest == 0) break; } continue; case '\r': p++; if (*p != '\n') loc.linnum++; continue; case '\n': loc.linnum++; p++; continue; case 0: case 0x1A: error("unterminated /+ +/ comment"); p = end; t->value = TOKeof; return; default: if (c & 0x80) { unsigned u = decodeUTF(); if (u == PS || u == LS) loc.linnum++; } p++; continue; } break; } if (commentToken) { t->value = TOKcomment; return; } if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr) { // if /++ but not /++/ getDocComment(t, lastLine == linnum); } continue; } } t->value = TOKdiv; return; case '.': p++; if (isdigit(*p)) { /* Note that we don't allow ._1 and ._ as being * valid floating point numbers. */ p--; t->value = inreal(t); } else if (p[0] == '.') { if (p[1] == '.') { p += 2; t->value = TOKdotdotdot; } else { p++; t->value = TOKslice; } } else t->value = TOKdot; return; case '&': p++; if (*p == '=') { p++; t->value = TOKandass; } else if (*p == '&') { p++; t->value = TOKandand; } else t->value = TOKand; return; case '|': p++; if (*p == '=') { p++; t->value = TOKorass; } else if (*p == '|') { p++; t->value = TOKoror; } else t->value = TOKor; return; case '-': p++; if (*p == '=') { p++; t->value = TOKminass; } #if 0 else if (*p == '>') { p++; t->value = TOKarrow; } #endif else if (*p == '-') { p++; t->value = TOKminusminus; } else t->value = TOKmin; return; case '+': p++; if (*p == '=') { p++; t->value = TOKaddass; } else if (*p == '+') { p++; t->value = TOKplusplus; } else t->value = TOKadd; return; case '<': p++; if (*p == '=') { p++; t->value = TOKle; // <= } else if (*p == '<') { p++; if (*p == '=') { p++; t->value = TOKshlass; // <<= } else t->value = TOKshl; // << } else if (*p == '>') { p++; if (*p == '=') { p++; t->value = TOKleg; // <>= } else t->value = TOKlg; // <> } else t->value = TOKlt; // < return; case '>': p++; if (*p == '=') { p++; t->value = TOKge; // >= } else if (*p == '>') { p++; if (*p == '=') { p++; t->value = TOKshrass; // >>= } else if (*p == '>') { p++; if (*p == '=') { p++; t->value = TOKushrass; // >>>= } else t->value = TOKushr; // >>> } else t->value = TOKshr; // >> } else t->value = TOKgt; // > return; case '!': p++; if (*p == '=') { p++; if (*p == '=' && global.params.Dversion == 1) { p++; t->value = TOKnotidentity; // !== } else t->value = TOKnotequal; // != } else if (*p == '<') { p++; if (*p == '>') { p++; if (*p == '=') { p++; t->value = TOKunord; // !<>= } else t->value = TOKue; // !<> } else if (*p == '=') { p++; t->value = TOKug; // !<= } else t->value = TOKuge; // !< } else if (*p == '>') { p++; if (*p == '=') { p++; t->value = TOKul; // !>= } else t->value = TOKule; // !> } else t->value = TOKnot; // ! return; case '=': p++; if (*p == '=') { p++; if (*p == '=' && global.params.Dversion == 1) { p++; t->value = TOKidentity; // === } else t->value = TOKequal; // == } else t->value = TOKassign; // = return; case '~': p++; if (*p == '=') { p++; t->value = TOKcatass; // ~= } else t->value = TOKtilde; // ~ return; #define SINGLE(c,tok) case c: p++; t->value = tok; return; SINGLE('(', TOKlparen) SINGLE(')', TOKrparen) SINGLE('[', TOKlbracket) SINGLE(']', TOKrbracket) SINGLE('{', TOKlcurly) SINGLE('}', TOKrcurly) SINGLE('?', TOKquestion) SINGLE(',', TOKcomma) SINGLE(';', TOKsemicolon) SINGLE(':', TOKcolon) SINGLE('$', TOKdollar) #undef SINGLE #define DOUBLE(c1,tok1,c2,tok2) \ case c1: \ p++; \ if (*p == c2) \ { p++; \ t->value = tok2; \ } \ else \ t->value = tok1; \ return; DOUBLE('*', TOKmul, '=', TOKmulass) DOUBLE('%', TOKmod, '=', TOKmodass) DOUBLE('^', TOKxor, '=', TOKxorass) #undef DOUBLE case '#': p++; pragma(); continue; default: { unsigned char c = *p; if (c & 0x80) { unsigned u = decodeUTF(); // Check for start of unicode identifier if (isUniAlpha(u)) goto case_ident; if (u == PS || u == LS) { loc.linnum++; p++; continue; } } if (isprint(c)) error("unsupported char '%c'", c); else error("unsupported char 0x%02x", c); p++; continue; } } } } /******************************************* * Parse escape sequence. */ unsigned Lexer::escapeSequence() { unsigned c; int n; int ndigits; c = *p; switch (c) { case '\'': case '"': case '?': case '\\': Lconsume: p++; break; case 'a': c = 7; goto Lconsume; case 'b': c = 8; goto Lconsume; case 'f': c = 12; goto Lconsume; case 'n': c = 10; goto Lconsume; case 'r': c = 13; goto Lconsume; case 't': c = 9; goto Lconsume; case 'v': c = 11; goto Lconsume; case 'u': ndigits = 4; goto Lhex; case 'U': ndigits = 8; goto Lhex; case 'x': ndigits = 2; Lhex: p++; c = *p; if (ishex(c)) { unsigned v; n = 0; v = 0; while (1) { if (isdigit(c)) c -= '0'; else if (islower(c)) c -= 'a' - 10; else c -= 'A' - 10; v = v * 16 + c; c = *++p; if (++n == ndigits) break; if (!ishex(c)) { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); break; } } if (ndigits != 2 && !utf_isValidDchar(v)) error("invalid UTF character \\U%08x", v); c = v; } else error("undefined escape hex sequence \\%c\n",c); break; case '&': // named character entity for (unsigned char *idstart = ++p; 1; p++) { switch (*p) { case ';': c = HtmlNamedEntity(idstart, p - idstart); if (c == ~0) { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart); c = ' '; } p++; break; default: if (isalpha(*p) || (p != idstart + 1 && isdigit(*p))) continue; error("unterminated named entity"); break; } break; } break; case 0: case 0x1A: // end of file c = '\\'; break; default: if (isoctal(c)) { unsigned v; n = 0; v = 0; do { v = v * 8 + (c - '0'); c = *++p; } while (++n < 3 && isoctal(c)); c = v; if (c > 0xFF) error("0%03o is larger than a byte", c); } else error("undefined escape sequence \\%c\n",c); break; } return c; } /************************************** */ TOK Lexer::wysiwygStringConstant(Token *t, int tc) { unsigned c; Loc start = loc; p++; stringbuffer.reset(); while (1) { c = *p++; switch (c) { case '\n': loc.linnum++; break; case '\r': if (*p == '\n') continue; // ignore c = '\n'; // treat EndOfLine as \n character loc.linnum++; break; case 0: case 0x1A: error("unterminated string constant starting at %s", start.toChars()); t->ustring = (unsigned char *)""; t->len = 0; t->postfix = 0; return TOKstring; case '"': case '`': if (c == tc) { t->len = stringbuffer.offset; stringbuffer.writeByte(0); t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); stringPostfix(t); return TOKstring; } break; default: if (c & 0x80) { p--; unsigned u = decodeUTF(); p++; if (u == PS || u == LS) loc.linnum++; stringbuffer.writeUTF8(u); continue; } break; } stringbuffer.writeByte(c); } } /************************************** * Lex hex strings: * x"0A ae 34FE BD" */ TOK Lexer::hexStringConstant(Token *t) { unsigned c; Loc start = loc; unsigned n = 0; unsigned v; p++; stringbuffer.reset(); while (1) { c = *p++; switch (c) { case ' ': case '\t': case '\v': case '\f': continue; // skip white space case '\r': if (*p == '\n') continue; // ignore // Treat isolated '\r' as if it were a '\n' case '\n': loc.linnum++; continue; case 0: case 0x1A: error("unterminated string constant starting at %s", start.toChars()); t->ustring = (unsigned char *)""; t->len = 0; t->postfix = 0; return TOKstring; case '"': if (n & 1) { error("odd number (%d) of hex characters in hex string", n); stringbuffer.writeByte(v); } t->len = stringbuffer.offset; stringbuffer.writeByte(0); t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); stringPostfix(t); return TOKstring; default: if (c >= '0' && c <= '9') c -= '0'; else if (c >= 'a' && c <= 'f') c -= 'a' - 10; else if (c >= 'A' && c <= 'F') c -= 'A' - 10; else if (c & 0x80) { p--; unsigned u = decodeUTF(); p++; if (u == PS || u == LS) loc.linnum++; else error("non-hex character \\u%x", u); } else error("non-hex character '%c'", c); if (n & 1) { v = (v << 4) | c; stringbuffer.writeByte(v); } else v = c; n++; break; } } } #if DMDV2 /************************************** * Lex delimited strings: * q"(foo(xxx))" // "foo(xxx)" * q"[foo(]" // "foo(" * q"/foo]/" // "foo]" * q"HERE * foo * HERE" // "foo\n" * Input: * p is on the " */ TOK Lexer::delimitedStringConstant(Token *t) { unsigned c; Loc start = loc; unsigned delimleft = 0; unsigned delimright = 0; unsigned nest = 1; unsigned nestcount; Identifier *hereid = NULL; unsigned blankrol = 0; unsigned startline = 0; p++; stringbuffer.reset(); while (1) { c = *p++; //printf("c = '%c'\n", c); switch (c) { case '\n': Lnextline: loc.linnum++; startline = 1; if (blankrol) { blankrol = 0; continue; } if (hereid) { stringbuffer.writeUTF8(c); continue; } break; case '\r': if (*p == '\n') continue; // ignore c = '\n'; // treat EndOfLine as \n character goto Lnextline; case 0: case 0x1A: goto Lerror; default: if (c & 0x80) { p--; c = decodeUTF(); p++; if (c == PS || c == LS) goto Lnextline; } break; } if (delimleft == 0) { delimleft = c; nest = 1; nestcount = 1; if (c == '(') delimright = ')'; else if (c == '{') delimright = '}'; else if (c == '[') delimright = ']'; else if (c == '<') delimright = '>'; else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) { // Start of identifier; must be a heredoc Token t; p--; scan(&t); // read in heredoc identifier if (t.value != TOKidentifier) { error("identifier expected for heredoc, not %s", t.toChars()); delimright = c; } else { hereid = t.ident; //printf("hereid = '%s'\n", hereid->toChars()); blankrol = 1; } nest = 0; } else { delimright = c; nest = 0; } } else { if (blankrol) { error("heredoc rest of line should be blank"); blankrol = 0; continue; } if (nest == 1) { if (c == delimleft) nestcount++; else if (c == delimright) { nestcount--; if (nestcount == 0) goto Ldone; } } else if (c == delimright) goto Ldone; if (startline && isalpha(c)) { Token t; unsigned char *psave = p; p--; scan(&t); // read in possible heredoc identifier //printf("endid = '%s'\n", t.ident->toChars()); if (t.value == TOKidentifier && t.ident->equals(hereid)) { /* should check that rest of line is blank */ goto Ldone; } p = psave; } stringbuffer.writeUTF8(c); startline = 0; } } Ldone: if (*p == '"') p++; else error("delimited string must end in %c\"", delimright); t->len = stringbuffer.offset; stringbuffer.writeByte(0); t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); stringPostfix(t); return TOKstring; Lerror: error("unterminated string constant starting at %s", start.toChars()); t->ustring = (unsigned char *)""; t->len = 0; t->postfix = 0; return TOKstring; } /************************************** * Lex delimited strings: * q{ foo(xxx) } // " foo(xxx) " * q{foo(} // "foo(" * q{{foo}"}"} // "{foo}"}"" * Input: * p is on the q */ TOK Lexer::tokenStringConstant(Token *t) { unsigned nest = 1; Loc start = loc; unsigned char *pstart = ++p; while (1) { Token tok; scan(&tok); switch (tok.value) { case TOKlcurly: nest++; continue; case TOKrcurly: if (--nest == 0) goto Ldone; continue; case TOKeof: goto Lerror; default: continue; } } Ldone: t->len = p - 1 - pstart; t->ustring = (unsigned char *)mem.malloc(t->len + 1); memcpy(t->ustring, pstart, t->len); t->ustring[t->len] = 0; stringPostfix(t); return TOKstring; Lerror: error("unterminated token string constant starting at %s", start.toChars()); t->ustring = (unsigned char *)""; t->len = 0; t->postfix = 0; return TOKstring; } #endif /************************************** */ TOK Lexer::escapeStringConstant(Token *t, int wide) { unsigned c; Loc start = loc; p++; stringbuffer.reset(); while (1) { c = *p++; switch (c) { case '\\': switch (*p) { case 'u': case 'U': case '&': c = escapeSequence(); stringbuffer.writeUTF8(c); continue; default: c = escapeSequence(); break; } break; case '\n': loc.linnum++; break; case '\r': if (*p == '\n') continue; // ignore c = '\n'; // treat EndOfLine as \n character loc.linnum++; break; case '"': t->len = stringbuffer.offset; stringbuffer.writeByte(0); t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); stringPostfix(t); return TOKstring; case 0: case 0x1A: p--; error("unterminated string constant starting at %s", start.toChars()); t->ustring = (unsigned char *)""; t->len = 0; t->postfix = 0; return TOKstring; default: if (c & 0x80) { p--; c = decodeUTF(); if (c == LS || c == PS) { c = '\n'; loc.linnum++; } p++; stringbuffer.writeUTF8(c); continue; } break; } stringbuffer.writeByte(c); } } /************************************** */ TOK Lexer::charConstant(Token *t, int wide) { unsigned c; TOK tk = TOKcharv; //printf("Lexer::charConstant\n"); p++; c = *p++; switch (c) { case '\\': switch (*p) { case 'u': t->uns64value = escapeSequence(); tk = TOKwcharv; break; case 'U': case '&': t->uns64value = escapeSequence(); tk = TOKdcharv; break; default: t->uns64value = escapeSequence(); break; } break; case '\n': L1: loc.linnum++; case '\r': case 0: case 0x1A: case '\'': error("unterminated character constant"); return tk; default: if (c & 0x80) { p--; c = decodeUTF(); p++; if (c == LS || c == PS) goto L1; if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) tk = TOKwcharv; else tk = TOKdcharv; } t->uns64value = c; break; } if (*p != '\'') { error("unterminated character constant"); return tk; } p++; return tk; } /*************************************** * Get postfix of string literal. */ void Lexer::stringPostfix(Token *t) { switch (*p) { case 'c': case 'w': case 'd': t->postfix = *p; p++; break; default: t->postfix = 0; break; } } /*************************************** * Read \u or \U unicode sequence * Input: * u 'u' or 'U' */ #if 0 unsigned Lexer::wchar(unsigned u) { unsigned value; unsigned n; unsigned char c; unsigned nchars; nchars = (u == 'U') ? 8 : 4; value = 0; for (n = 0; 1; n++) { ++p; if (n == nchars) break; c = *p; if (!ishex(c)) { error("\\%c sequence must be followed by %d hex characters", u, nchars); break; } if (isdigit(c)) c -= '0'; else if (islower(c)) c -= 'a' - 10; else c -= 'A' - 10; value <<= 4; value |= c; } return value; } #endif /************************************** * Read in a number. * If it's an integer, store it in tok.TKutok.Vlong. * integers can be decimal, octal or hex * Handle the suffixes U, UL, LU, L, etc. * If it's double, store it in tok.TKutok.Vdouble. * Returns: * TKnum * TKdouble,... */ TOK Lexer::number(Token *t) { // We use a state machine to collect numbers enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, STATE_hexh, STATE_error }; enum STATE state; enum FLAGS { FLAGS_decimal = 1, // decimal FLAGS_unsigned = 2, // u or U suffix FLAGS_long = 4, // l or L suffix }; enum FLAGS flags = FLAGS_decimal; int i; int base; unsigned c; unsigned char *start; TOK result; //printf("Lexer::number()\n"); state = STATE_initial; base = 0; stringbuffer.reset(); start = p; while (1) { c = *p; switch (state) { case STATE_initial: // opening state if (c == '0') state = STATE_0; else state = STATE_decimal; break; case STATE_0: flags = (FLAGS) (flags & ~FLAGS_decimal); switch (c) { #if ZEROH case 'H': // 0h case 'h': goto hexh; #endif case 'X': case 'x': state = STATE_hex0; break; case '.': if (p[1] == '.') // .. is a separate token goto done; case 'i': case 'f': case 'F': goto real; #if ZEROH case 'E': case 'e': goto case_hex; #endif case 'B': case 'b': state = STATE_binary0; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': state = STATE_octal; break; #if ZEROH case '8': case '9': case 'A': case 'C': case 'D': case 'F': case 'a': case 'c': case 'd': case 'f': case_hex: state = STATE_hexh; break; #endif case '_': state = STATE_octal; p++; continue; case 'L': if (p[1] == 'i') goto real; goto done; default: goto done; } break; case STATE_decimal: // reading decimal number if (!isdigit(c)) { #if ZEROH if (ishex(c) || c == 'H' || c == 'h' ) goto hexh; #endif if (c == '_') // ignore embedded _ { p++; continue; } if (c == '.' && p[1] != '.') goto real; else if (c == 'i' || c == 'f' || c == 'F' || c == 'e' || c == 'E') { real: // It's a real number. Back up and rescan as a real p = start; return inreal(t); } else if (c == 'L' && p[1] == 'i') goto real; goto done; } break; case STATE_hex0: // reading hex number case STATE_hex: if (!ishex(c)) { if (c == '_') // ignore embedded _ { p++; continue; } if (c == '.' && p[1] != '.') goto real; if (c == 'P' || c == 'p' || c == 'i') goto real; if (state == STATE_hex0) error("Hex digit expected, not '%c'", c); goto done; } state = STATE_hex; break; #if ZEROH hexh: state = STATE_hexh; case STATE_hexh: // parse numbers like 0FFh if (!ishex(c)) { if (c == 'H' || c == 'h') { p++; base = 16; goto done; } else { // Check for something like 1E3 or 0E24 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) || memchr((char *)stringbuffer.data, 'e', stringbuffer.offset)) goto real; error("Hex digit expected, not '%c'", c); goto done; } } break; #endif case STATE_octal: // reading octal number case STATE_octale: // reading octal number with non-octal digits if (!isoctal(c)) { #if ZEROH if (ishex(c) || c == 'H' || c == 'h' ) goto hexh; #endif if (c == '_') // ignore embedded _ { p++; continue; } if (c == '.' && p[1] != '.') goto real; if (c == 'i') goto real; if (isdigit(c)) { state = STATE_octale; } else goto done; } break; case STATE_binary0: // starting binary number case STATE_binary: // reading binary number if (c != '0' && c != '1') { #if ZEROH if (ishex(c) || c == 'H' || c == 'h' ) goto hexh; #endif if (c == '_') // ignore embedded _ { p++; continue; } if (state == STATE_binary0) { error("binary digit expected"); state = STATE_error; break; } else goto done; } state = STATE_binary; break; case STATE_error: // for error recovery if (!isdigit(c)) // scan until non-digit goto done; break; default: assert(0); } stringbuffer.writeByte(c); p++; } done: stringbuffer.writeByte(0); // terminate string if (state == STATE_octale) error("Octal digit expected"); uinteger_t n; // unsigned >=64 bit integer type if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0)) n = stringbuffer.data[0] - '0'; else { // Convert string to integer #if __DMC__ errno = 0; n = strtoull((char *)stringbuffer.data,NULL,base); if (errno == ERANGE) error("integer overflow"); #else // Not everybody implements strtoull() char *p = (char *)stringbuffer.data; int r = 10, d; if (*p == '0') { if (p[1] == 'x' || p[1] == 'X') p += 2, r = 16; else if (p[1] == 'b' || p[1] == 'B') p += 2, r = 2; else if (isdigit(p[1])) p += 1, r = 8; } n = 0; while (1) { if (*p >= '0' && *p <= '9') d = *p - '0'; else if (*p >= 'a' && *p <= 'z') d = *p - 'a' + 10; else if (*p >= 'A' && *p <= 'Z') d = *p - 'A' + 10; else break; if (d >= r) break; uinteger_t n2 = n * r; //printf("n2 / r = %llx, n = %llx\n", n2/r, n); if (n2 / r != n || n2 + d < n) { error ("integer overflow"); break; } n = n2 + d; p++; } #endif if (sizeof(n) > 8 && n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits error("integer overflow"); } // Parse trailing 'u', 'U', 'l' or 'L' in any combination while (1) { unsigned char f; switch (*p) { case 'U': case 'u': f = FLAGS_unsigned; goto L1; case 'l': if (1 || !global.params.useDeprecated) error("'l' suffix is deprecated, use 'L' instead"); case 'L': f = FLAGS_long; L1: p++; if (flags & f) error("unrecognized token"); flags = (FLAGS) (flags | f); continue; default: break; } break; } switch (flags) { case 0: /* Octal or Hexadecimal constant. * First that fits: int, uint, long, ulong */ if (n & 0x8000000000000000LL) result = TOKuns64v; else if (n & 0xFFFFFFFF00000000LL) result = TOKint64v; else if (n & 0x80000000) result = TOKuns32v; else result = TOKint32v; break; case FLAGS_decimal: /* First that fits: int, long, long long */ if (n & 0x8000000000000000LL) { error("signed integer overflow"); result = TOKuns64v; } else if (n & 0xFFFFFFFF80000000LL) result = TOKint64v; else result = TOKint32v; break; case FLAGS_unsigned: case FLAGS_decimal | FLAGS_unsigned: /* First that fits: uint, ulong */ if (n & 0xFFFFFFFF00000000LL) result = TOKuns64v; else result = TOKuns32v; break; case FLAGS_decimal | FLAGS_long: if (n & 0x8000000000000000LL) { error("signed integer overflow"); result = TOKuns64v; } else result = TOKint64v; break; case FLAGS_long: if (n & 0x8000000000000000LL) result = TOKuns64v; else result = TOKint64v; break; case FLAGS_unsigned | FLAGS_long: case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: result = TOKuns64v; break; default: #ifdef DEBUG printf("%x\n",flags); #endif assert(0); } t->uns64value = n; return result; } /************************************** * Read in characters, converting them to real. * Bugs: * Exponent overflow not detected. * Too much requested precision is not detected. */ TOK Lexer::inreal(Token *t) #ifdef __DMC__ __in { assert(*p == '.' || isdigit(*p)); } __out (result) { switch (result) { case TOKfloat32v: case TOKfloat64v: case TOKfloat80v: case TOKimaginary32v: case TOKimaginary64v: case TOKimaginary80v: break; default: assert(0); } } __body #endif /* __DMC__ */ { int dblstate; unsigned c; char hex; // is this a hexadecimal-floating-constant? TOK result; //printf("Lexer::inreal()\n"); stringbuffer.reset(); dblstate = 0; hex = 0; Lnext: while (1) { // Get next char from input c = *p++; //printf("dblstate = %d, c = '%c'\n", dblstate, c); while (1) { switch (dblstate) { case 0: // opening state if (c == '0') dblstate = 9; else if (c == '.') dblstate = 3; else dblstate = 1; break; case 9: dblstate = 1; if (c == 'X' || c == 'x') { hex++; break; } case 1: // digits to left of . case 3: // digits to right of . case 7: // continuing exponent digits if (!isdigit(c) && !(hex && isxdigit(c))) { if (c == '_') goto Lnext; // ignore embedded '_' dblstate++; continue; } break; case 2: // no more digits to left of . if (c == '.') { dblstate++; break; } case 4: // no more digits to right of . if ((c == 'E' || c == 'e') || hex && (c == 'P' || c == 'p')) { dblstate = 5; hex = 0; // exponent is always decimal break; } if (hex) error("binary-exponent-part required"); goto done; case 5: // looking immediately to right of E dblstate++; if (c == '-' || c == '+') break; case 6: // 1st exponent digit expected if (!isdigit(c)) error("exponent expected"); dblstate++; break; case 8: // past end of exponent digits goto done; } break; } stringbuffer.writeByte(c); } done: p--; stringbuffer.writeByte(0); #if _WIN32 && __DMC__ char *save = __locale_decpoint; __locale_decpoint = "."; #endif #ifdef IN_GCC t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble); #else t->float80value = strtold((char *)stringbuffer.data, NULL); #endif errno = 0; float strtofres; double strtodres; switch (*p) { case 'F': case 'f': #ifdef IN_GCC real_t::parse((char *)stringbuffer.data, real_t::Float); #else strtofres = strtof((char *)stringbuffer.data, NULL); // LDC change: don't error on gradual underflow if (errno == ERANGE && strtofres != 0 && strtofres != HUGE_VALF && strtofres != -HUGE_VALF) errno = 0; #endif result = TOKfloat32v; p++; break; default: #ifdef IN_GCC real_t::parse((char *)stringbuffer.data, real_t::Double); #else strtodres = strtod((char *)stringbuffer.data, NULL); // LDC change: don't error on gradual underflow if (errno == ERANGE && strtodres != 0 && strtodres != HUGE_VAL && strtodres != -HUGE_VAL) errno = 0; #endif result = TOKfloat64v; break; case 'l': if (!global.params.useDeprecated) error("'l' suffix is deprecated, use 'L' instead"); case 'L': result = TOKfloat80v; p++; break; } if (*p == 'i' || *p == 'I') { if (!global.params.useDeprecated && *p == 'I') error("'I' suffix is deprecated, use 'i' instead"); p++; switch (result) { case TOKfloat32v: result = TOKimaginary32v; break; case TOKfloat64v: result = TOKimaginary64v; break; case TOKfloat80v: result = TOKimaginary80v; break; } } #if _WIN32 && __DMC__ __locale_decpoint = save; #endif if (errno == ERANGE) error("number is not representable"); return result; } /********************************************* * Do pragma. * Currently, the only pragma supported is: * #line linnum [filespec] */ void Lexer::pragma() { Token tok; int linnum; char *filespec = NULL; Loc loc = this->loc; scan(&tok); if (tok.value != TOKidentifier || tok.ident != Id::line) goto Lerr; scan(&tok); if (tok.value == TOKint32v || tok.value == TOKint64v) linnum = tok.uns64value - 1; else goto Lerr; while (1) { switch (*p) { case 0: case 0x1A: case '\n': Lnewline: this->loc.linnum = linnum; if (filespec) this->loc.filename = filespec; return; case '\r': p++; if (*p != '\n') { p--; goto Lnewline; } continue; case ' ': case '\t': case '\v': case '\f': p++; continue; // skip white space case '_': if (mod && memcmp(p, "__FILE__", 8) == 0) { p += 8; filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars()); } continue; case '"': if (filespec) goto Lerr; stringbuffer.reset(); p++; while (1) { unsigned c; c = *p; switch (c) { case '\n': case '\r': case 0: case 0x1A: goto Lerr; case '"': stringbuffer.writeByte(0); filespec = mem.strdup((char *)stringbuffer.data); p++; break; default: if (c & 0x80) { unsigned u = decodeUTF(); if (u == PS || u == LS) goto Lerr; } stringbuffer.writeByte(c); p++; continue; } break; } continue; default: if (*p & 0x80) { unsigned u = decodeUTF(); if (u == PS || u == LS) goto Lnewline; } goto Lerr; } } Lerr: error(loc, "#line integer [\"filespec\"]\\n expected"); } /******************************************** * Decode UTF character. * Issue error messages for invalid sequences. * Return decoded character, advance p to last character in UTF sequence. */ unsigned Lexer::decodeUTF() { dchar_t u; unsigned char c; unsigned char *s = p; size_t len; size_t idx; const char *msg; c = *s; assert(c & 0x80); // Check length of remaining string up to 6 UTF-8 characters for (len = 1; len < 6 && s[len]; len++) ; idx = 0; msg = utf_decodeChar(s, len, &idx, &u); p += idx - 1; if (msg) { error("%s", msg); } return u; } /*************************************************** * Parse doc comment embedded between t->ptr and p. * Remove trailing blanks and tabs from lines. * Replace all newlines with \n. * Remove leading comment character from each line. * Decide if it's a lineComment or a blockComment. * Append to previous one for this token. */ void Lexer::getDocComment(Token *t, unsigned lineComment) { /* ct tells us which kind of comment it is: '/', '*', or '+' */ unsigned char ct = t->ptr[2]; /* Start of comment text skips over / * *, / + +, or / / / */ unsigned char *q = t->ptr + 3; // start of comment text unsigned char *qend = p; if (ct == '*' || ct == '+') qend -= 2; /* Scan over initial row of ****'s or ++++'s or ////'s */ for (; q < qend; q++) { if (*q != ct) break; } /* Remove trailing row of ****'s or ++++'s */ if (ct != '/') { for (; q < qend; qend--) { if (qend[-1] != ct) break; } } /* Comment is now [q .. qend]. * Canonicalize it into buf[]. */ OutBuffer buf; int linestart = 0; for (; q < qend; q++) { unsigned char c = *q; switch (c) { case '*': case '+': if (linestart && c == ct) { linestart = 0; /* Trim preceding whitespace up to preceding \n */ while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) buf.offset--; continue; } break; case ' ': case '\t': break; case '\r': if (q[1] == '\n') continue; // skip the \r goto Lnewline; default: if (c == 226) { // If LS or PS if (q[1] == 128 && (q[2] == 168 || q[2] == 169)) { q += 2; goto Lnewline; } } linestart = 0; break; Lnewline: c = '\n'; // replace all newlines with \n case '\n': linestart = 1; /* Trim trailing whitespace */ while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) buf.offset--; break; } buf.writeByte(c); } // Always end with a newline if (!buf.offset || buf.data[buf.offset - 1] != '\n') buf.writeByte('\n'); buf.writeByte(0); // It's a line comment if the start of the doc comment comes // after other non-whitespace on the same line. unsigned char** dc = (lineComment && anyToken) ? &t->lineComment : &t->blockComment; // Combine with previous doc comment, if any if (*dc) *dc = combineComments(*dc, (unsigned char *)buf.data); else *dc = (unsigned char *)buf.extractData(); } /******************************************** * Combine two document comments into one, * separated by a newline. */ unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2) { //printf("Lexer::combineComments('%s', '%s')\n", c1, c2); unsigned char *c = c2; if (c1) { c = c1; if (c2) { size_t len1 = strlen((char *)c1); size_t len2 = strlen((char *)c2); c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1); memcpy(c, c1, len1); if (len1 && c1[len1 - 1] != '\n') { c[len1] = '\n'; len1++; } memcpy(c + len1, c2, len2); c[len1 + len2] = 0; } } return c; } /******************************************** * Create an identifier in the string table. */ Identifier *Lexer::idPool(const char *s) { size_t len = strlen(s); StringValue *sv = stringtable.update(s, len); Identifier *id = (Identifier *) sv->ptrvalue; if (!id) { id = new Identifier(sv->lstring.string, TOKidentifier); sv->ptrvalue = id; } return id; } /********************************************* * Create a unique identifier using the prefix s. */ Identifier *Lexer::uniqueId(const char *s, int num) { char buffer[32]; size_t slen = strlen(s); assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer)); sprintf(buffer, "%s%d", s, num); return idPool(buffer); } Identifier *Lexer::uniqueId(const char *s) { static int num; return uniqueId(s, ++num); } /**************************************** */ struct Keyword { const char *name; enum TOK value; }; static Keyword keywords[] = { // { "", TOK }, { "this", TOKthis }, { "super", TOKsuper }, { "assert", TOKassert }, { "null", TOKnull }, { "true", TOKtrue }, { "false", TOKfalse }, { "cast", TOKcast }, { "new", TOKnew }, { "delete", TOKdelete }, { "throw", TOKthrow }, { "module", TOKmodule }, { "pragma", TOKpragma }, { "typeof", TOKtypeof }, { "typeid", TOKtypeid }, { "template", TOKtemplate }, { "void", TOKvoid }, { "byte", TOKint8 }, { "ubyte", TOKuns8 }, { "short", TOKint16 }, { "ushort", TOKuns16 }, { "int", TOKint32 }, { "uint", TOKuns32 }, { "long", TOKint64 }, { "ulong", TOKuns64 }, { "cent", TOKcent, }, { "ucent", TOKucent, }, { "float", TOKfloat32 }, { "double", TOKfloat64 }, { "real", TOKfloat80 }, { "bool", TOKbool }, { "char", TOKchar }, { "wchar", TOKwchar }, { "dchar", TOKdchar }, { "ifloat", TOKimaginary32 }, { "idouble", TOKimaginary64 }, { "ireal", TOKimaginary80 }, { "cfloat", TOKcomplex32 }, { "cdouble", TOKcomplex64 }, { "creal", TOKcomplex80 }, { "delegate", TOKdelegate }, { "function", TOKfunction }, { "is", TOKis }, { "if", TOKif }, { "else", TOKelse }, { "while", TOKwhile }, { "for", TOKfor }, { "do", TOKdo }, { "switch", TOKswitch }, { "case", TOKcase }, { "default", TOKdefault }, { "break", TOKbreak }, { "continue", TOKcontinue }, { "synchronized", TOKsynchronized }, { "return", TOKreturn }, { "goto", TOKgoto }, { "try", TOKtry }, { "catch", TOKcatch }, { "finally", TOKfinally }, { "with", TOKwith }, { "asm", TOKasm }, { "foreach", TOKforeach }, { "foreach_reverse", TOKforeach_reverse }, { "scope", TOKscope }, { "struct", TOKstruct }, { "class", TOKclass }, { "interface", TOKinterface }, { "union", TOKunion }, { "enum", TOKenum }, { "import", TOKimport }, { "mixin", TOKmixin }, { "static", TOKstatic }, { "final", TOKfinal }, { "const", TOKconst }, { "typedef", TOKtypedef }, { "alias", TOKalias }, { "override", TOKoverride }, { "abstract", TOKabstract }, { "volatile", TOKvolatile }, { "debug", TOKdebug }, { "deprecated", TOKdeprecated }, { "in", TOKin }, { "out", TOKout }, { "inout", TOKinout }, { "lazy", TOKlazy }, { "auto", TOKauto }, { "align", TOKalign }, { "extern", TOKextern }, { "private", TOKprivate }, { "package", TOKpackage }, { "protected", TOKprotected }, { "public", TOKpublic }, { "export", TOKexport }, { "body", TOKbody }, { "invariant", TOKinvariant }, { "unittest", TOKunittest }, { "version", TOKversion }, //{ "manifest", TOKmanifest }, // Added after 1.0 { "ref", TOKref }, { "macro", TOKmacro }, #if DMDV2 { "pure", TOKpure }, { "nothrow", TOKnothrow }, { "__thread", TOKtls }, { "__traits", TOKtraits }, { "__overloadset", TOKoverloadset }, { "__FILE__", TOKfile }, { "__LINE__", TOKline }, { "shared", TOKshared }, { "immutable", TOKimmutable }, #endif }; int Token::isKeyword() { for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) { if (keywords[u].value == value) return 1; } return 0; } void Lexer::initKeywords() { StringValue *sv; unsigned u; enum TOK v; unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]); if (global.params.Dversion == 1) nkeywords -= 2; cmtable_init(); for (u = 0; u < nkeywords; u++) { const char *s; //printf("keyword[%d] = '%s'\n",u, keywords[u].name); s = keywords[u].name; v = keywords[u].value; sv = stringtable.insert(s, strlen(s)); sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v); //printf("tochars[%d] = '%s'\n",v, s); Token::tochars[v] = s; } Token::tochars[TOKeof] = "EOF"; Token::tochars[TOKlcurly] = "{"; Token::tochars[TOKrcurly] = "}"; Token::tochars[TOKlparen] = "("; Token::tochars[TOKrparen] = ")"; Token::tochars[TOKlbracket] = "["; Token::tochars[TOKrbracket] = "]"; Token::tochars[TOKsemicolon] = ";"; Token::tochars[TOKcolon] = ":"; Token::tochars[TOKcomma] = ","; Token::tochars[TOKdot] = "."; Token::tochars[TOKxor] = "^"; Token::tochars[TOKxorass] = "^="; Token::tochars[TOKassign] = "="; Token::tochars[TOKconstruct] = "="; #if DMDV2 Token::tochars[TOKblit] = "="; #endif Token::tochars[TOKlt] = "<"; Token::tochars[TOKgt] = ">"; Token::tochars[TOKle] = "<="; Token::tochars[TOKge] = ">="; Token::tochars[TOKequal] = "=="; Token::tochars[TOKnotequal] = "!="; Token::tochars[TOKnotidentity] = "!is"; Token::tochars[TOKtobool] = "!!"; Token::tochars[TOKunord] = "!<>="; Token::tochars[TOKue] = "!<>"; Token::tochars[TOKlg] = "<>"; Token::tochars[TOKleg] = "<>="; Token::tochars[TOKule] = "!>"; Token::tochars[TOKul] = "!>="; Token::tochars[TOKuge] = "!<"; Token::tochars[TOKug] = "!<="; Token::tochars[TOKnot] = "!"; Token::tochars[TOKtobool] = "!!"; Token::tochars[TOKshl] = "<<"; Token::tochars[TOKshr] = ">>"; Token::tochars[TOKushr] = ">>>"; Token::tochars[TOKadd] = "+"; Token::tochars[TOKmin] = "-"; Token::tochars[TOKmul] = "*"; Token::tochars[TOKdiv] = "/"; Token::tochars[TOKmod] = "%"; Token::tochars[TOKslice] = ".."; Token::tochars[TOKdotdotdot] = "..."; Token::tochars[TOKand] = "&"; Token::tochars[TOKandand] = "&&"; Token::tochars[TOKor] = "|"; Token::tochars[TOKoror] = "||"; Token::tochars[TOKarray] = "[]"; Token::tochars[TOKindex] = "[i]"; Token::tochars[TOKaddress] = "&"; Token::tochars[TOKstar] = "*"; Token::tochars[TOKtilde] = "~"; Token::tochars[TOKdollar] = "$"; Token::tochars[TOKcast] = "cast"; Token::tochars[TOKplusplus] = "++"; Token::tochars[TOKminusminus] = "--"; Token::tochars[TOKtype] = "type"; Token::tochars[TOKquestion] = "?"; Token::tochars[TOKneg] = "-"; Token::tochars[TOKuadd] = "+"; Token::tochars[TOKvar] = "var"; Token::tochars[TOKaddass] = "+="; Token::tochars[TOKminass] = "-="; Token::tochars[TOKmulass] = "*="; Token::tochars[TOKdivass] = "/="; Token::tochars[TOKmodass] = "%="; Token::tochars[TOKshlass] = "<<="; Token::tochars[TOKshrass] = ">>="; Token::tochars[TOKushrass] = ">>>="; Token::tochars[TOKandass] = "&="; Token::tochars[TOKorass] = "|="; Token::tochars[TOKcatass] = "~="; Token::tochars[TOKcat] = "~"; Token::tochars[TOKcall] = "call"; Token::tochars[TOKidentity] = "is"; Token::tochars[TOKnotidentity] = "!is"; Token::tochars[TOKorass] = "|="; Token::tochars[TOKidentifier] = "identifier"; // For debugging Token::tochars[TOKdotexp] = "dotexp"; Token::tochars[TOKdotti] = "dotti"; Token::tochars[TOKdotvar] = "dotvar"; Token::tochars[TOKdottype] = "dottype"; Token::tochars[TOKsymoff] = "symoff"; Token::tochars[TOKtypedot] = "typedot"; Token::tochars[TOKarraylength] = "arraylength"; Token::tochars[TOKarrayliteral] = "arrayliteral"; Token::tochars[TOKassocarrayliteral] = "assocarrayliteral"; Token::tochars[TOKstructliteral] = "structliteral"; Token::tochars[TOKstring] = "string"; Token::tochars[TOKdsymbol] = "symbol"; Token::tochars[TOKtuple] = "tuple"; Token::tochars[TOKdeclaration] = "declaration"; Token::tochars[TOKdottd] = "dottd"; Token::tochars[TOKon_scope_exit] = "scope(exit)"; }