Mercurial > projects > ldc
diff dmd/lexer.c @ 159:5acec6b2eef8 trunk
[svn r175] merged dmd 1.029
author | ChristianK |
---|---|
date | Thu, 01 May 2008 15:15:28 +0200 |
parents | 0ab29b838084 |
children | 2b72433d5c8c |
line wrap: on
line diff
--- a/dmd/lexer.c Thu May 01 13:33:02 2008 +0200 +++ b/dmd/lexer.c Thu May 01 15:15:28 2008 +0200 @@ -1,3019 +1,3064 @@ - -// Compiler implementation of the D programming language -// Copyright (c) 1999-2007 by Digital Mars -// All Rights Reserved -// written by Walter Bright -// http://www.digitalmars.com -// License for redistribution is by either the Artistic License -// in artistic.txt, or the GNU General Public License in gnu.txt. -// See the included readme.txt for details. - -/* Lexical Analyzer */ - -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#include <stdarg.h> -#include <errno.h> -#include <wchar.h> -#include <stdlib.h> -#include <assert.h> -#include <sys/time.h> - -#ifdef IN_GCC - -#include <time.h> -#include "mem.h" - -#else - -#if __GNUC__ -#include <time.h> -#endif - -#if IN_LLVM -#include "mem.h" -#elif _WIN32 -#include "..\root\mem.h" -#else -#include "../root/mem.h" -#endif -#endif - -#include "stringtable.h" - -#include "lexer.h" -#include "utf.h" -#include "identifier.h" -#include "id.h" -#include "module.h" - -#if _WIN32 && __DMC__ -// from \dm\src\include\setlocal.h -extern "C" char * __cdecl __locale_decpoint; -#endif - -extern int HtmlNamedEntity(unsigned char *p, int length); - -#define LS 0x2028 // UTF line separator -#define PS 0x2029 // UTF paragraph separator - -/******************************************** - * Do our own char maps - */ - -static unsigned char cmtable[256]; - -const int CMoctal = 0x1; -const int CMhex = 0x2; -const int CMidchar = 0x4; - -inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; } -inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; } -inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; } - -static void cmtable_init() -{ - for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++) - { - if ('0' <= c && c <= '7') - cmtable[c] |= CMoctal; - if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) - cmtable[c] |= CMhex; - if (isalnum(c) || c == '_') - cmtable[c] |= CMidchar; - } -} - - -/************************* Token **********************************************/ - -char *Token::tochars[TOKMAX]; - -void *Token::operator new(size_t size) -{ Token *t; - - if (Lexer::freelist) - { - t = Lexer::freelist; - Lexer::freelist = t->next; - return t; - } - - return ::operator new(size); -} - -#ifdef DEBUG -void Token::print() -{ - fprintf(stdmsg, "%s\n", toChars()); -} -#endif - -char *Token::toChars() -{ char *p; - static char buffer[3 + 3 * sizeof(value) + 1]; - - p = buffer; - switch (value) - { - case TOKint32v: -#if IN_GCC - sprintf(buffer,"%d",(d_int32)int64value); -#else - sprintf(buffer,"%d",int32value); -#endif - break; - - case TOKuns32v: - case TOKcharv: - case TOKwcharv: - case TOKdcharv: -#if IN_GCC - sprintf(buffer,"%uU",(d_uns32)uns64value); -#else - sprintf(buffer,"%uU",uns32value); -#endif - break; - - case TOKint64v: - sprintf(buffer,"%jdL",int64value); - break; - - case TOKuns64v: - sprintf(buffer,"%juUL",uns64value); - break; - -#if IN_GCC - case TOKfloat32v: - case TOKfloat64v: - case TOKfloat80v: - float80value.format(buffer, sizeof(buffer)); - break; - case TOKimaginary32v: - case TOKimaginary64v: - case TOKimaginary80v: - float80value.format(buffer, sizeof(buffer)); - // %% buffer - strcat(buffer, "i"); - break; -#else - case TOKfloat32v: - sprintf(buffer,"%Lgf", float80value); - break; - - case TOKfloat64v: - sprintf(buffer,"%Lg", float80value); - break; - - case TOKfloat80v: - sprintf(buffer,"%LgL", float80value); - break; - - case TOKimaginary32v: - sprintf(buffer,"%Lgfi", float80value); - break; - - case TOKimaginary64v: - sprintf(buffer,"%Lgi", float80value); - break; - - case TOKimaginary80v: - sprintf(buffer,"%LgLi", float80value); - break; -#endif - - case TOKstring: -#if CSTRINGS - p = string; -#else - { OutBuffer buf; - - buf.writeByte('"'); - for (size_t i = 0; i < len; ) - { unsigned c; - - utf_decodeChar((unsigned char *)ustring, len, &i, &c); - switch (c) - { - case 0: - break; - - case '"': - case '\\': - buf.writeByte('\\'); - default: - if (isprint(c)) - buf.writeByte(c); - else if (c <= 0x7F) - buf.printf("\\x%02x", c); - else if (c <= 0xFFFF) - buf.printf("\\u%04x", c); - else - buf.printf("\\U%08x", c); - continue; - } - break; - } - buf.writeByte('"'); - if (postfix) - buf.writeByte('"'); - buf.writeByte(0); - p = (char *)buf.extractData(); - } -#endif - break; - - case TOKidentifier: - case TOKenum: - case TOKstruct: - case TOKimport: - CASE_BASIC_TYPES: - p = ident->toChars(); - break; - - default: - p = toChars(value); - break; - } - return p; -} - -char *Token::toChars(enum TOK value) -{ char *p; - static char buffer[3 + 3 * sizeof(value) + 1]; - - p = tochars[value]; - if (!p) - { sprintf(buffer,"TOK%d",value); - p = buffer; - } - return p; -} - -/*************************** Lexer ********************************************/ - -Token *Lexer::freelist = NULL; -StringTable Lexer::stringtable; -OutBuffer Lexer::stringbuffer; - -Lexer::Lexer(Module *mod, - unsigned char *base, unsigned begoffset, unsigned endoffset, - int doDocComment, int commentToken) - : loc(mod, 1) -{ - //printf("Lexer::Lexer(%p,%d)\n",base,length); - //printf("lexer.mod = %p, %p\n", mod, this->loc.mod); - memset(&token,0,sizeof(token)); - this->base = base; - this->end = base + endoffset; - p = base + begoffset; - this->mod = mod; - this->doDocComment = doDocComment; - this->anyToken = 0; - this->commentToken = commentToken; - //initKeywords(); - - /* If first line starts with '#!', ignore the line - */ - - if (p[0] == '#' && p[1] =='!') - { - p += 2; - while (1) - { unsigned char c = *p; - switch (c) - { - case '\n': - p++; - break; - - case '\r': - p++; - if (*p == '\n') - p++; - break; - - case 0: - case 0x1A: - break; - - default: - if (c & 0x80) - { unsigned u = decodeUTF(); - if (u == PS || u == LS) - break; - } - p++; - continue; - } - break; - } - loc.linnum = 2; - } -} - - -void Lexer::error(const char *format, ...) -{ - if (mod && !global.gag) - { - char *p = loc.toChars(); - if (*p) - fprintf(stdmsg, "%s: ", p); - mem.free(p); - - va_list ap; - va_start(ap, format); - vfprintf(stdmsg, format, ap); - va_end(ap); - - fprintf(stdmsg, "\n"); - fflush(stdmsg); - - if (global.errors >= 20) // moderate blizzard of cascading messages - fatal(); - } - global.errors++; -} - -void Lexer::error(Loc loc, const char *format, ...) -{ - if (mod && !global.gag) - { - char *p = loc.toChars(); - if (*p) - fprintf(stdmsg, "%s: ", p); - mem.free(p); - - va_list ap; - va_start(ap, format); - vfprintf(stdmsg, format, ap); - va_end(ap); - - fprintf(stdmsg, "\n"); - fflush(stdmsg); - - if (global.errors >= 20) // moderate blizzard of cascading messages - fatal(); - } - global.errors++; -} - -TOK Lexer::nextToken() -{ Token *t; - - if (token.next) - { - t = token.next; - memcpy(&token,t,sizeof(Token)); - t->next = freelist; - freelist = t; - } - else - { - scan(&token); - } - //token.print(); - return token.value; -} - -Token *Lexer::peek(Token *ct) -{ Token *t; - - if (ct->next) - t = ct->next; - else - { - t = new Token(); - scan(t); - t->next = NULL; - ct->next = t; - } - return t; -} - -/********************************* - * tk is on the opening (. - * Look ahead and return token that is past the closing ). - */ - -Token *Lexer::peekPastParen(Token *tk) -{ - //printf("peekPastParen()\n"); - int parens = 1; - int curlynest = 0; - while (1) - { - tk = peek(tk); - //tk->print(); - switch (tk->value) - { - case TOKlparen: - parens++; - continue; - - case TOKrparen: - --parens; - if (parens) - continue; - tk = peek(tk); - break; - - case TOKlcurly: - curlynest++; - continue; - - case TOKrcurly: - if (--curlynest >= 0) - continue; - break; - - case TOKsemicolon: - if (curlynest) - continue; - break; - - case TOKeof: - break; - - default: - continue; - } - return tk; - } -} - -/********************************** - * Determine if string is a valid Identifier. - * Placed here because of commonality with Lexer functionality. - * Returns: - * 0 invalid - */ - -int Lexer::isValidIdentifier(char *p) -{ - size_t len; - size_t idx; - - if (!p || !*p) - goto Linvalid; - - if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars - goto Linvalid; - - len = strlen(p); - idx = 0; - while (p[idx]) - { dchar_t dc; - - char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc); - if (q) - goto Linvalid; - - if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) - goto Linvalid; - } - return 1; - -Linvalid: - return 0; -} - -/**************************** - * Turn next token in buffer into a token. - */ - -void Lexer::scan(Token *t) -{ - unsigned lastLine = loc.linnum; - unsigned linnum; - - t->blockComment = NULL; - t->lineComment = NULL; - while (1) - { - t->ptr = p; - //printf("p = %p, *p = '%c'\n",p,*p); - switch (*p) - { - case 0: - case 0x1A: - t->value = TOKeof; // end of file - return; - - case ' ': - case '\t': - case '\v': - case '\f': - p++; - continue; // skip white space - - case '\r': - p++; - if (*p != '\n') // if CR stands by itself - loc.linnum++; - continue; // skip white space - - case '\n': - p++; - loc.linnum++; - continue; // skip white space - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - t->value = number(t); - return; - -#if CSTRINGS - case '\'': - t->value = charConstant(t, 0); - return; - - case '"': - t->value = stringConstant(t,0); - return; - - case 'l': - case 'L': - if (p[1] == '\'') - { - p++; - t->value = charConstant(t, 1); - return; - } - else if (p[1] == '"') - { - p++; - t->value = stringConstant(t, 1); - return; - } -#else - case '\'': - t->value = charConstant(t,0); - return; - - case 'r': - if (p[1] != '"') - goto case_ident; - p++; - case '`': - t->value = wysiwygStringConstant(t, *p); - return; - - case 'x': - if (p[1] != '"') - goto case_ident; - p++; - t->value = hexStringConstant(t); - return; - -#if V2 - case 'q': - if (p[1] == '"') - { - p++; - t->value = delimitedStringConstant(t); - return; - } - else if (p[1] == '{') - { - p++; - t->value = tokenStringConstant(t); - return; - } - else - goto case_ident; -#endif - - case '"': - t->value = escapeStringConstant(t,0); - return; - - case '\\': // escaped string literal - { unsigned c; - - stringbuffer.reset(); - do - { - p++; - c = escapeSequence(); - stringbuffer.writeUTF8(c); - } while (*p == '\\'); - t->len = stringbuffer.offset; - stringbuffer.writeByte(0); - t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); - memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); - t->postfix = 0; - t->value = TOKstring; - return; - } - - case 'l': - case 'L': -#endif - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'm': case 'n': case 'o': -#if V2 - case 'p': /*case 'q': case 'r':*/ case 's': case 't': -#else - case 'p': case 'q': /*case 'r':*/ case 's': case 't': -#endif - case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': - case 'z': - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': case 'G': case 'H': case 'I': case 'J': - case 'K': case 'M': case 'N': case 'O': - case 'P': case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': case 'Y': - case 'Z': - case '_': - case_ident: - { unsigned char c; - StringValue *sv; - Identifier *id; - - do - { - c = *++p; - } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); - sv = stringtable.update((char *)t->ptr, p - t->ptr); - id = (Identifier *) sv->ptrvalue; - if (!id) - { id = new Identifier(sv->lstring.string,TOKidentifier); - sv->ptrvalue = id; - } - t->ident = id; - t->value = (enum TOK) id->value; - anyToken = 1; - if (*t->ptr == '_') // if special identifier token - { - static char date[11+1]; - static char time[8+1]; - static char timestamp[24+1]; - - if (!date[0]) // lazy evaluation - { time_t t; - char *p; - - ::time(&t); - p = ctime(&t); - assert(p); - sprintf(date, "%.6s %.4s", p + 4, p + 20); - sprintf(time, "%.8s", p + 11); - sprintf(timestamp, "%.24s", p); - } - - if (mod && id == Id::FILE) - { - t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars()); - goto Lstring; - } - else if (mod && id == Id::LINE) - { - t->value = TOKint64v; - t->uns64value = loc.linnum; - } - else if (id == Id::DATE) - { - t->ustring = (unsigned char *)date; - goto Lstring; - } - else if (id == Id::TIME) - { - t->ustring = (unsigned char *)time; - goto Lstring; - } - else if (id == Id::VENDOR) - { - t->ustring = (unsigned char *)"Digital Mars D"; - goto Lstring; - } - else if (id == Id::TIMESTAMP) - { - t->ustring = (unsigned char *)timestamp; - Lstring: - t->value = TOKstring; - Llen: - t->postfix = 0; - t->len = strlen((char *)t->ustring); - } - else if (id == Id::VERSIONX) - { unsigned major = 0; - unsigned minor = 0; - - for (char *p = global.version + 1; 1; p++) - { - char c = *p; - if (isdigit(c)) - minor = minor * 10 + c - '0'; - else if (c == '.') - { major = minor; - minor = 0; - } - else - break; - } - t->value = TOKint64v; - t->uns64value = major * 1000 + minor; - } - } - //printf("t->value = %d\n",t->value); - return; - } - - case '/': - p++; - switch (*p) - { - case '=': - p++; - t->value = TOKdivass; - return; - - case '*': - p++; - linnum = loc.linnum; - while (1) - { - while (1) - { unsigned char c = *p; - switch (c) - { - case '/': - break; - - case '\n': - loc.linnum++; - p++; - continue; - - case '\r': - p++; - if (*p != '\n') - loc.linnum++; - continue; - - case 0: - case 0x1A: - error("unterminated /* */ comment"); - p = end; - t->value = TOKeof; - return; - - default: - if (c & 0x80) - { unsigned u = decodeUTF(); - if (u == PS || u == LS) - loc.linnum++; - } - p++; - continue; - } - break; - } - p++; - if (p[-2] == '*' && p - 3 != t->ptr) - break; - } - if (commentToken) - { - t->value = TOKcomment; - return; - } - else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr) - { // if /** but not /**/ - getDocComment(t, lastLine == linnum); - } - continue; - - case '/': // do // style comments - linnum = loc.linnum; - while (1) - { unsigned char c = *++p; - switch (c) - { - case '\n': - break; - - case '\r': - if (p[1] == '\n') - p++; - break; - - case 0: - case 0x1A: - if (commentToken) - { - p = end; - t->value = TOKcomment; - return; - } - if (doDocComment && t->ptr[2] == '/') - getDocComment(t, lastLine == linnum); - p = end; - t->value = TOKeof; - return; - - default: - if (c & 0x80) - { unsigned u = decodeUTF(); - if (u == PS || u == LS) - break; - } - continue; - } - break; - } - - if (commentToken) - { - p++; - loc.linnum++; - t->value = TOKcomment; - return; - } - if (doDocComment && t->ptr[2] == '/') - getDocComment(t, lastLine == linnum); - - p++; - loc.linnum++; - continue; - - case '+': - { int nest; - - linnum = loc.linnum; - p++; - nest = 1; - while (1) - { unsigned char c = *p; - switch (c) - { - case '/': - p++; - if (*p == '+') - { - p++; - nest++; - } - continue; - - case '+': - p++; - if (*p == '/') - { - p++; - if (--nest == 0) - break; - } - continue; - - case '\r': - p++; - if (*p != '\n') - loc.linnum++; - continue; - - case '\n': - loc.linnum++; - p++; - continue; - - case 0: - case 0x1A: - error("unterminated /+ +/ comment"); - p = end; - t->value = TOKeof; - return; - - default: - if (c & 0x80) - { unsigned u = decodeUTF(); - if (u == PS || u == LS) - loc.linnum++; - } - p++; - continue; - } - break; - } - if (commentToken) - { - t->value = TOKcomment; - return; - } - if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr) - { // if /++ but not /++/ - getDocComment(t, lastLine == linnum); - } - continue; - } - } - t->value = TOKdiv; - return; - - case '.': - p++; - if (isdigit(*p)) - { /* Note that we don't allow ._1 and ._ as being - * valid floating point numbers. - */ - p--; - t->value = inreal(t); - } - else if (p[0] == '.') - { - if (p[1] == '.') - { p += 2; - t->value = TOKdotdotdot; - } - else - { p++; - t->value = TOKslice; - } - } - else - t->value = TOKdot; - return; - - case '&': - p++; - if (*p == '=') - { p++; - t->value = TOKandass; - } - else if (*p == '&') - { p++; - t->value = TOKandand; - } - else - t->value = TOKand; - return; - - case '|': - p++; - if (*p == '=') - { p++; - t->value = TOKorass; - } - else if (*p == '|') - { p++; - t->value = TOKoror; - } - else - t->value = TOKor; - return; - - case '-': - p++; - if (*p == '=') - { p++; - t->value = TOKminass; - } -#if 0 - else if (*p == '>') - { p++; - t->value = TOKarrow; - } -#endif - else if (*p == '-') - { p++; - t->value = TOKminusminus; - } - else - t->value = TOKmin; - return; - - case '+': - p++; - if (*p == '=') - { p++; - t->value = TOKaddass; - } - else if (*p == '+') - { p++; - t->value = TOKplusplus; - } - else - t->value = TOKadd; - return; - - case '<': - p++; - if (*p == '=') - { p++; - t->value = TOKle; // <= - } - else if (*p == '<') - { p++; - if (*p == '=') - { p++; - t->value = TOKshlass; // <<= - } - else - t->value = TOKshl; // << - } - else if (*p == '>') - { p++; - if (*p == '=') - { p++; - t->value = TOKleg; // <>= - } - else - t->value = TOKlg; // <> - } - else - t->value = TOKlt; // < - return; - - case '>': - p++; - if (*p == '=') - { p++; - t->value = TOKge; // >= - } - else if (*p == '>') - { p++; - if (*p == '=') - { p++; - t->value = TOKshrass; // >>= - } - else if (*p == '>') - { p++; - if (*p == '=') - { p++; - t->value = TOKushrass; // >>>= - } - else - t->value = TOKushr; // >>> - } - else - t->value = TOKshr; // >> - } - else - t->value = TOKgt; // > - return; - - case '!': - p++; - if (*p == '=') - { p++; - if (*p == '=' && global.params.Dversion == 1) - { p++; - t->value = TOKnotidentity; // !== - } - else - t->value = TOKnotequal; // != - } - else if (*p == '<') - { p++; - if (*p == '>') - { p++; - if (*p == '=') - { p++; - t->value = TOKunord; // !<>= - } - else - t->value = TOKue; // !<> - } - else if (*p == '=') - { p++; - t->value = TOKug; // !<= - } - else - t->value = TOKuge; // !< - } - else if (*p == '>') - { p++; - if (*p == '=') - { p++; - t->value = TOKul; // !>= - } - else - t->value = TOKule; // !> - } - else - t->value = TOKnot; // ! - return; - - case '=': - p++; - if (*p == '=') - { p++; - if (*p == '=' && global.params.Dversion == 1) - { p++; - t->value = TOKidentity; // === - } - else - t->value = TOKequal; // == - } - else - t->value = TOKassign; // = - return; - - case '~': - p++; - if (*p == '=') - { p++; - t->value = TOKcatass; // ~= - } - else - t->value = TOKtilde; // ~ - return; - -#define SINGLE(c,tok) case c: p++; t->value = tok; return; - - SINGLE('(', TOKlparen) - SINGLE(')', TOKrparen) - SINGLE('[', TOKlbracket) - SINGLE(']', TOKrbracket) - SINGLE('{', TOKlcurly) - SINGLE('}', TOKrcurly) - SINGLE('?', TOKquestion) - SINGLE(',', TOKcomma) - SINGLE(';', TOKsemicolon) - SINGLE(':', TOKcolon) - SINGLE('$', TOKdollar) - -#undef SINGLE - -#define DOUBLE(c1,tok1,c2,tok2) \ - case c1: \ - p++; \ - if (*p == c2) \ - { p++; \ - t->value = tok2; \ - } \ - else \ - t->value = tok1; \ - return; - - DOUBLE('*', TOKmul, '=', TOKmulass) - DOUBLE('%', TOKmod, '=', TOKmodass) - DOUBLE('^', TOKxor, '=', TOKxorass) - -#undef DOUBLE - - case '#': - p++; - pragma(); - continue; - - default: - { unsigned char c = *p; - - if (c & 0x80) - { unsigned u = decodeUTF(); - - // Check for start of unicode identifier - if (isUniAlpha(u)) - goto case_ident; - - if (u == PS || u == LS) - { - loc.linnum++; - p++; - continue; - } - } - if (isprint(c)) - error("unsupported char '%c'", c); - else - error("unsupported char 0x%02x", c); - p++; - continue; - } - } - } -} - -/******************************************* - * Parse escape sequence. - */ - -unsigned Lexer::escapeSequence() -{ unsigned c; - int n; - int ndigits; - - c = *p; - switch (c) - { - case '\'': - case '"': - case '?': - case '\\': - Lconsume: - p++; - break; - - case 'a': c = 7; goto Lconsume; - case 'b': c = 8; goto Lconsume; - case 'f': c = 12; goto Lconsume; - case 'n': c = 10; goto Lconsume; - case 'r': c = 13; goto Lconsume; - case 't': c = 9; goto Lconsume; - case 'v': c = 11; goto Lconsume; - - case 'u': - ndigits = 4; - goto Lhex; - case 'U': - ndigits = 8; - goto Lhex; - case 'x': - ndigits = 2; - Lhex: - p++; - c = *p; - if (ishex(c)) - { unsigned v; - - n = 0; - v = 0; - while (1) - { - if (isdigit(c)) - c -= '0'; - else if (islower(c)) - c -= 'a' - 10; - else - c -= 'A' - 10; - v = v * 16 + c; - c = *++p; - if (++n == ndigits) - break; - if (!ishex(c)) - { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); - break; - } - } - if (ndigits != 2 && !utf_isValidDchar(v)) - error("invalid UTF character \\U%08x", v); - c = v; - } - else - error("undefined escape hex sequence \\%c\n",c); - break; - - case '&': // named character entity - for (unsigned char *idstart = ++p; 1; p++) - { - switch (*p) - { - case ';': - c = HtmlNamedEntity(idstart, p - idstart); - if (c == ~0) - { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart); - c = ' '; - } - p++; - break; - - default: - if (isalpha(*p) || - (p != idstart + 1 && isdigit(*p))) - continue; - error("unterminated named entity"); - break; - } - break; - } - break; - - case 0: - case 0x1A: // end of file - c = '\\'; - break; - - default: - if (isoctal(c)) - { unsigned char v; - - n = 0; - v = 0; - do - { - v = v * 8 + (c - '0'); - c = *++p; - } while (++n < 3 && isoctal(c)); - c = v; - } - else - error("undefined escape sequence \\%c\n",c); - break; - } - return c; -} - -/************************************** - */ - -TOK Lexer::wysiwygStringConstant(Token *t, int tc) -{ unsigned c; - Loc start = loc; - - p++; - stringbuffer.reset(); - while (1) - { - c = *p++; - switch (c) - { - case '\n': - loc.linnum++; - break; - - case '\r': - if (*p == '\n') - continue; // ignore - c = '\n'; // treat EndOfLine as \n character - loc.linnum++; - break; - - case 0: - case 0x1A: - error("unterminated string constant starting at %s", start.toChars()); - t->ustring = (unsigned char *)""; - t->len = 0; - t->postfix = 0; - return TOKstring; - - case '"': - case '`': - if (c == tc) - { - t->len = stringbuffer.offset; - stringbuffer.writeByte(0); - t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); - memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); - stringPostfix(t); - return TOKstring; - } - break; - - default: - if (c & 0x80) - { p--; - unsigned u = decodeUTF(); - p++; - if (u == PS || u == LS) - loc.linnum++; - stringbuffer.writeUTF8(u); - continue; - } - break; - } - stringbuffer.writeByte(c); - } -} - -/************************************** - * Lex hex strings: - * x"0A ae 34FE BD" - */ - -TOK Lexer::hexStringConstant(Token *t) -{ unsigned c; - Loc start = loc; - unsigned n = 0; - unsigned v; - - p++; - stringbuffer.reset(); - while (1) - { - c = *p++; - switch (c) - { - case ' ': - case '\t': - case '\v': - case '\f': - continue; // skip white space - - case '\r': - if (*p == '\n') - continue; // ignore - // Treat isolated '\r' as if it were a '\n' - case '\n': - loc.linnum++; - continue; - - case 0: - case 0x1A: - error("unterminated string constant starting at %s", start.toChars()); - t->ustring = (unsigned char *)""; - t->len = 0; - t->postfix = 0; - return TOKstring; - - case '"': - if (n & 1) - { error("odd number (%d) of hex characters in hex string", n); - stringbuffer.writeByte(v); - } - t->len = stringbuffer.offset; - stringbuffer.writeByte(0); - t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); - memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); - stringPostfix(t); - return TOKstring; - - default: - if (c >= '0' && c <= '9') - c -= '0'; - else if (c >= 'a' && c <= 'f') - c -= 'a' - 10; - else if (c >= 'A' && c <= 'F') - c -= 'A' - 10; - else if (c & 0x80) - { p--; - unsigned u = decodeUTF(); - p++; - if (u == PS || u == LS) - loc.linnum++; - else - error("non-hex character \\u%x", u); - } - else - error("non-hex character '%c'", c); - if (n & 1) - { v = (v << 4) | c; - stringbuffer.writeByte(v); - } - else - v = c; - n++; - break; - } - } -} - - -#if V2 -/************************************** - * Lex delimited strings: - * q"(foo(xxx))" // "foo(xxx)" - * q"[foo(]" // "foo(" - * q"/foo]/" // "foo]" - * q"HERE - * foo - * HERE" // "foo\n" - * Input: - * p is on the " - */ - -TOK Lexer::delimitedStringConstant(Token *t) -{ unsigned c; - Loc start = loc; - unsigned delimleft = 0; - unsigned delimright = 0; - unsigned nest = 1; - unsigned nestcount; - Identifier *hereid = NULL; - unsigned blankrol = 0; - unsigned startline = 0; - - p++; - stringbuffer.reset(); - while (1) - { - c = *p++; - //printf("c = '%c'\n", c); - switch (c) - { - case '\n': - Lnextline: -printf("Lnextline\n"); - loc.linnum++; - startline = 1; - if (blankrol) - { blankrol = 0; - continue; - } - if (hereid) - { - stringbuffer.writeUTF8(c); - continue; - } - break; - - case '\r': - if (*p == '\n') - continue; // ignore - c = '\n'; // treat EndOfLine as \n character - goto Lnextline; - - case 0: - case 0x1A: - goto Lerror; - - default: - if (c & 0x80) - { p--; - c = decodeUTF(); - p++; - if (c == PS || c == LS) - goto Lnextline; - } - break; - } - if (delimleft == 0) - { delimleft = c; - nest = 1; - nestcount = 1; - if (c == '(') - delimright = ')'; - else if (c == '{') - delimright = '}'; - else if (c == '[') - delimright = ']'; - else if (c == '<') - delimright = '>'; - else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) - { // Start of identifier; must be a heredoc - Token t; - p--; - scan(&t); // read in heredoc identifier - if (t.value != TOKidentifier) - { error("identifier expected for heredoc, not %s", t.toChars()); - delimright = c; - } - else - { hereid = t.ident; -printf("hereid = '%s'\n", hereid->toChars()); - blankrol = 1; - } - nest = 0; - } - else - { delimright = c; - nest = 0; - } - } - else - { - if (blankrol) - { error("heredoc rest of line should be blank"); - blankrol = 0; - continue; - } - if (nest == 1) - { - if (c == delimleft) - nestcount++; - else if (c == delimright) - { nestcount--; - if (nestcount == 0) - goto Ldone; - } - } - else if (c == delimright) - goto Ldone; - if (startline && isalpha(c)) - { Token t; - unsigned char *psave = p; - p--; - scan(&t); // read in possible heredoc identifier -printf("endid = '%s'\n", t.ident->toChars()); - if (t.value == TOKidentifier && t.ident->equals(hereid)) - { /* should check that rest of line is blank - */ -printf("done\n"); - goto Ldone; - } - p = psave; - } - stringbuffer.writeUTF8(c); - startline = 0; - } - } - -Ldone: - if (*p == '"') - p++; - else - error("delimited string must end in %c\"", delimright); - t->len = stringbuffer.offset; - stringbuffer.writeByte(0); - t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); - memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); - stringPostfix(t); - return TOKstring; - -Lerror: - error("unterminated string constant starting at %s", start.toChars()); - t->ustring = (unsigned char *)""; - t->len = 0; - t->postfix = 0; - return TOKstring; -} - -/************************************** - * Lex delimited strings: - * q{ foo(xxx) } // " foo(xxx) " - * q{foo(} // "foo(" - * q{{foo}"}"} // "{foo}"}"" - * Input: - * p is on the q - */ - -TOK Lexer::tokenStringConstant(Token *t) -{ - unsigned nest = 1; - Loc start = loc; - unsigned char *pstart = ++p; - - while (1) - { Token tok; - - scan(&tok); - switch (tok.value) - { - case TOKlcurly: - nest++; - continue; - - case TOKrcurly: - if (--nest == 0) - goto Ldone; - continue; - - case TOKeof: - goto Lerror; - - default: - continue; - } - } - -Ldone: - t->len = p - 1 - pstart; - t->ustring = (unsigned char *)mem.malloc(t->len + 1); - memcpy(t->ustring, pstart, t->len); - t->ustring[t->len] = 0; - stringPostfix(t); - return TOKstring; - -Lerror: - error("unterminated token string constant starting at %s", start.toChars()); - t->ustring = (unsigned char *)""; - t->len = 0; - t->postfix = 0; - return TOKstring; -} - -#endif - - -/************************************** - */ - -TOK Lexer::escapeStringConstant(Token *t, int wide) -{ unsigned c; - Loc start = loc; - - p++; - stringbuffer.reset(); - while (1) - { - c = *p++; - switch (c) - { - case '\\': - switch (*p) - { - case 'u': - case 'U': - case '&': - c = escapeSequence(); - stringbuffer.writeUTF8(c); - continue; - - default: - c = escapeSequence(); - break; - } - break; - - case '\n': - loc.linnum++; - break; - - case '\r': - if (*p == '\n') - continue; // ignore - c = '\n'; // treat EndOfLine as \n character - loc.linnum++; - break; - - case '"': - t->len = stringbuffer.offset; - stringbuffer.writeByte(0); - t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); - memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); - stringPostfix(t); - return TOKstring; - - case 0: - case 0x1A: - p--; - error("unterminated string constant starting at %s", start.toChars()); - t->ustring = (unsigned char *)""; - t->len = 0; - t->postfix = 0; - return TOKstring; - - default: - if (c & 0x80) - { - p--; - c = decodeUTF(); - if (c == LS || c == PS) - { c = '\n'; - loc.linnum++; - } - p++; - stringbuffer.writeUTF8(c); - continue; - } - break; - } - stringbuffer.writeByte(c); - } -} - -/************************************** - */ - -TOK Lexer::charConstant(Token *t, int wide) -{ - unsigned c; - TOK tk = TOKcharv; - - //printf("Lexer::charConstant\n"); - p++; - c = *p++; - switch (c) - { - case '\\': - switch (*p) - { - case 'u': - t->uns64value = escapeSequence(); - tk = TOKwcharv; - break; - - case 'U': - case '&': - t->uns64value = escapeSequence(); - tk = TOKdcharv; - break; - - default: - t->uns64value = escapeSequence(); - break; - } - break; - - case '\n': - L1: - loc.linnum++; - case '\r': - case 0: - case 0x1A: - case '\'': - error("unterminated character constant"); - return tk; - - default: - if (c & 0x80) - { - p--; - c = decodeUTF(); - p++; - if (c == LS || c == PS) - goto L1; - if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) - tk = TOKwcharv; - else - tk = TOKdcharv; - } - t->uns64value = c; - break; - } - - if (*p != '\'') - { error("unterminated character constant"); - return tk; - } - p++; - return tk; -} - -/*************************************** - * Get postfix of string literal. - */ - -void Lexer::stringPostfix(Token *t) -{ - switch (*p) - { - case 'c': - case 'w': - case 'd': - t->postfix = *p; - p++; - break; - - default: - t->postfix = 0; - break; - } -} - -/*************************************** - * Read \u or \U unicode sequence - * Input: - * u 'u' or 'U' - */ - -#if 0 -unsigned Lexer::wchar(unsigned u) -{ - unsigned value; - unsigned n; - unsigned char c; - unsigned nchars; - - nchars = (u == 'U') ? 8 : 4; - value = 0; - for (n = 0; 1; n++) - { - ++p; - if (n == nchars) - break; - c = *p; - if (!ishex(c)) - { error("\\%c sequence must be followed by %d hex characters", u, nchars); - break; - } - if (isdigit(c)) - c -= '0'; - else if (islower(c)) - c -= 'a' - 10; - else - c -= 'A' - 10; - value <<= 4; - value |= c; - } - return value; -} -#endif - -/************************************** - * Read in a number. - * If it's an integer, store it in tok.TKutok.Vlong. - * integers can be decimal, octal or hex - * Handle the suffixes U, UL, LU, L, etc. - * If it's double, store it in tok.TKutok.Vdouble. - * Returns: - * TKnum - * TKdouble,... - */ - -TOK Lexer::number(Token *t) -{ - // We use a state machine to collect numbers - enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, - STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, - STATE_hexh, STATE_error }; - enum STATE state; - - enum FLAGS - { FLAGS_decimal = 1, // decimal - FLAGS_unsigned = 2, // u or U suffix - FLAGS_long = 4, // l or L suffix - }; - enum FLAGS flags = FLAGS_decimal; - - int i; - int base; - unsigned c; - unsigned char *start; - TOK result; - - //printf("Lexer::number()\n"); - state = STATE_initial; - base = 0; - stringbuffer.reset(); - start = p; - while (1) - { - c = *p; - switch (state) - { - case STATE_initial: // opening state - if (c == '0') - state = STATE_0; - else - state = STATE_decimal; - break; - - case STATE_0: - flags = (FLAGS) (flags & ~FLAGS_decimal); - switch (c) - { -#if ZEROH - case 'H': // 0h - case 'h': - goto hexh; -#endif - case 'X': - case 'x': - state = STATE_hex0; - break; - - case '.': - if (p[1] == '.') // .. is a separate token - goto done; - case 'i': - case 'f': - case 'F': - goto real; -#if ZEROH - case 'E': - case 'e': - goto case_hex; -#endif - case 'B': - case 'b': - state = STATE_binary0; - break; - - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - state = STATE_octal; - break; - -#if ZEROH - case '8': case '9': case 'A': - case 'C': case 'D': case 'F': - case 'a': case 'c': case 'd': case 'f': - case_hex: - state = STATE_hexh; - break; -#endif - case '_': - state = STATE_octal; - p++; - continue; - - case 'L': - if (p[1] == 'i') - goto real; - goto done; - - default: - goto done; - } - break; - - case STATE_decimal: // reading decimal number - if (!isdigit(c)) - { -#if ZEROH - if (ishex(c) - || c == 'H' || c == 'h' - ) - goto hexh; -#endif - if (c == '_') // ignore embedded _ - { p++; - continue; - } - if (c == '.' && p[1] != '.') - goto real; - else if (c == 'i' || c == 'f' || c == 'F' || - c == 'e' || c == 'E') - { - real: // It's a real number. Back up and rescan as a real - p = start; - return inreal(t); - } - else if (c == 'L' && p[1] == 'i') - goto real; - goto done; - } - break; - - case STATE_hex0: // reading hex number - case STATE_hex: - if (!ishex(c)) - { - if (c == '_') // ignore embedded _ - { p++; - continue; - } - if (c == '.' && p[1] != '.') - goto real; - if (c == 'P' || c == 'p' || c == 'i') - goto real; - if (state == STATE_hex0) - error("Hex digit expected, not '%c'", c); - goto done; - } - state = STATE_hex; - break; - -#if ZEROH - hexh: - state = STATE_hexh; - case STATE_hexh: // parse numbers like 0FFh - if (!ishex(c)) - { - if (c == 'H' || c == 'h') - { - p++; - base = 16; - goto done; - } - else - { - // Check for something like 1E3 or 0E24 - if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) || - memchr((char *)stringbuffer.data, 'e', stringbuffer.offset)) - goto real; - error("Hex digit expected, not '%c'", c); - goto done; - } - } - break; -#endif - - case STATE_octal: // reading octal number - case STATE_octale: // reading octal number with non-octal digits - if (!isoctal(c)) - { -#if ZEROH - if (ishex(c) - || c == 'H' || c == 'h' - ) - goto hexh; -#endif - if (c == '_') // ignore embedded _ - { p++; - continue; - } - if (c == '.' && p[1] != '.') - goto real; - if (c == 'i') - goto real; - if (isdigit(c)) - { - state = STATE_octale; - } - else - goto done; - } - break; - - case STATE_binary0: // starting binary number - case STATE_binary: // reading binary number - if (c != '0' && c != '1') - { -#if ZEROH - if (ishex(c) - || c == 'H' || c == 'h' - ) - goto hexh; -#endif - if (c == '_') // ignore embedded _ - { p++; - continue; - } - if (state == STATE_binary0) - { error("binary digit expected"); - state = STATE_error; - break; - } - else - goto done; - } - state = STATE_binary; - break; - - case STATE_error: // for error recovery - if (!isdigit(c)) // scan until non-digit - goto done; - break; - - default: - assert(0); - } - stringbuffer.writeByte(c); - p++; - } -done: - stringbuffer.writeByte(0); // terminate string - if (state == STATE_octale) - error("Octal digit expected"); - - uinteger_t n; // unsigned >=64 bit integer type - - if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0)) - n = stringbuffer.data[0] - '0'; - else - { - // Convert string to integer -#if __DMC__ - errno = 0; - n = strtoull((char *)stringbuffer.data,NULL,base); - if (errno == ERANGE) - error("integer overflow"); -#else - // Not everybody implements strtoull() - char *p = (char *)stringbuffer.data; - int r = 10, d; - - if (*p == '0') - { - if (p[1] == 'x' || p[1] == 'X') - p += 2, r = 16; - else if (p[1] == 'b' || p[1] == 'B') - p += 2, r = 2; - else if (isdigit(p[1])) - p += 1, r = 8; - } - - n = 0; - while (1) - { - if (*p >= '0' && *p <= '9') - d = *p - '0'; - else if (*p >= 'a' && *p <= 'z') - d = *p - 'a' + 10; - else if (*p >= 'A' && *p <= 'Z') - d = *p - 'A' + 10; - else - break; - if (d >= r) - break; - if (n && n * r + d <= n) - { - error ("integer overflow"); - break; - } - - n = n * r + d; - p++; - } -#endif - if (sizeof(n) > 8 && - n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits - error("integer overflow"); - } - - // Parse trailing 'u', 'U', 'l' or 'L' in any combination - while (1) - { unsigned char f; - - switch (*p) - { case 'U': - case 'u': - f = FLAGS_unsigned; - goto L1; - - case 'l': - if (1 || !global.params.useDeprecated) - error("'l' suffix is deprecated, use 'L' instead"); - case 'L': - f = FLAGS_long; - L1: - p++; - if (flags & f) - error("unrecognized token"); - flags = (FLAGS) (flags | f); - continue; - default: - break; - } - break; - } - - switch (flags) - { - case 0: - /* Octal or Hexadecimal constant. - * First that fits: int, uint, long, ulong - */ - if (n & 0x8000000000000000LL) - result = TOKuns64v; - else if (n & 0xFFFFFFFF00000000LL) - result = TOKint64v; - else if (n & 0x80000000) - result = TOKuns32v; - else - result = TOKint32v; - break; - - case FLAGS_decimal: - /* First that fits: int, long, long long - */ - if (n & 0x8000000000000000LL) - { error("signed integer overflow"); - result = TOKuns64v; - } - else if (n & 0xFFFFFFFF80000000LL) - result = TOKint64v; - else - result = TOKint32v; - break; - - case FLAGS_unsigned: - case FLAGS_decimal | FLAGS_unsigned: - /* First that fits: uint, ulong - */ - if (n & 0xFFFFFFFF00000000LL) - result = TOKuns64v; - else - result = TOKuns32v; - break; - - case FLAGS_decimal | FLAGS_long: - if (n & 0x8000000000000000LL) - { error("signed integer overflow"); - result = TOKuns64v; - } - else - result = TOKint64v; - break; - - case FLAGS_long: - if (n & 0x8000000000000000LL) - result = TOKuns64v; - else - result = TOKint64v; - break; - - case FLAGS_unsigned | FLAGS_long: - case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: - result = TOKuns64v; - break; - - default: - #ifdef DEBUG - printf("%x\n",flags); - #endif - assert(0); - } - t->uns64value = n; - return result; -} - -/************************************** - * Read in characters, converting them to real. - * Bugs: - * Exponent overflow not detected. - * Too much requested precision is not detected. - */ - -TOK Lexer::inreal(Token *t) -#ifdef __DMC__ -__in -{ - assert(*p == '.' || isdigit(*p)); -} -__out (result) -{ - switch (result) - { - case TOKfloat32v: - case TOKfloat64v: - case TOKfloat80v: - case TOKimaginary32v: - case TOKimaginary64v: - case TOKimaginary80v: - break; - - default: - assert(0); - } -} -__body -#endif /* __DMC__ */ -{ int dblstate; - unsigned c; - char hex; // is this a hexadecimal-floating-constant? - TOK result; - - //printf("Lexer::inreal()\n"); - stringbuffer.reset(); - dblstate = 0; - hex = 0; -Lnext: - while (1) - { - // Get next char from input - c = *p++; - //printf("dblstate = %d, c = '%c'\n", dblstate, c); - while (1) - { - switch (dblstate) - { - case 0: // opening state - if (c == '0') - dblstate = 9; - else if (c == '.') - dblstate = 3; - else - dblstate = 1; - break; - - case 9: - dblstate = 1; - if (c == 'X' || c == 'x') - { hex++; - break; - } - case 1: // digits to left of . - case 3: // digits to right of . - case 7: // continuing exponent digits - if (!isdigit(c) && !(hex && isxdigit(c))) - { - if (c == '_') - goto Lnext; // ignore embedded '_' - dblstate++; - continue; - } - break; - - case 2: // no more digits to left of . - if (c == '.') - { dblstate++; - break; - } - case 4: // no more digits to right of . - if ((c == 'E' || c == 'e') || - hex && (c == 'P' || c == 'p')) - { dblstate = 5; - hex = 0; // exponent is always decimal - break; - } - if (hex) - error("binary-exponent-part required"); - goto done; - - case 5: // looking immediately to right of E - dblstate++; - if (c == '-' || c == '+') - break; - case 6: // 1st exponent digit expected - if (!isdigit(c)) - error("exponent expected"); - dblstate++; - break; - - case 8: // past end of exponent digits - goto done; - } - break; - } - stringbuffer.writeByte(c); - } -done: - p--; - - stringbuffer.writeByte(0); - -#if _WIN32 && __DMC__ - char *save = __locale_decpoint; - __locale_decpoint = "."; -#endif -#ifdef IN_GCC - t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble); -#else - t->float80value = strtold((char *)stringbuffer.data, NULL); -#endif - errno = 0; - switch (*p) - { - case 'F': - case 'f': -#ifdef IN_GCC - real_t::parse((char *)stringbuffer.data, real_t::Float); -#else - strtof((char *)stringbuffer.data, NULL); -#endif - result = TOKfloat32v; - p++; - break; - - default: -#ifdef IN_GCC - real_t::parse((char *)stringbuffer.data, real_t::Double); -#else - strtod((char *)stringbuffer.data, NULL); -#endif - result = TOKfloat64v; - break; - - case 'l': - if (!global.params.useDeprecated) - error("'l' suffix is deprecated, use 'L' instead"); - case 'L': - result = TOKfloat80v; - p++; - break; - } - if (*p == 'i' || *p == 'I') - { - if (!global.params.useDeprecated && *p == 'I') - error("'I' suffix is deprecated, use 'i' instead"); - p++; - switch (result) - { - case TOKfloat32v: - result = TOKimaginary32v; - break; - case TOKfloat64v: - result = TOKimaginary64v; - break; - case TOKfloat80v: - result = TOKimaginary80v; - break; - } - } -#if _WIN32 && __DMC__ - __locale_decpoint = save; -#endif - if (errno == ERANGE) - error("number is not representable"); - return result; -} - -/********************************************* - * Do pragma. - * Currently, the only pragma supported is: - * #line linnum [filespec] - */ - -void Lexer::pragma() -{ - Token tok; - int linnum; - char *filespec = NULL; - Loc loc = this->loc; - - scan(&tok); - if (tok.value != TOKidentifier || tok.ident != Id::line) - goto Lerr; - - scan(&tok); - if (tok.value == TOKint32v || tok.value == TOKint64v) - linnum = tok.uns64value - 1; - else - goto Lerr; - - while (1) - { - switch (*p) - { - case 0: - case 0x1A: - case '\n': - Lnewline: - this->loc.linnum = linnum; - if (filespec) - this->loc.filename = filespec; - return; - - case '\r': - p++; - if (*p != '\n') - { p--; - goto Lnewline; - } - continue; - - case ' ': - case '\t': - case '\v': - case '\f': - p++; - continue; // skip white space - - case '_': - if (mod && memcmp(p, "__FILE__", 8) == 0) - { - p += 8; - filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars()); - } - continue; - - case '"': - if (filespec) - goto Lerr; - stringbuffer.reset(); - p++; - while (1) - { unsigned c; - - c = *p; - switch (c) - { - case '\n': - case '\r': - case 0: - case 0x1A: - goto Lerr; - - case '"': - stringbuffer.writeByte(0); - filespec = mem.strdup((char *)stringbuffer.data); - p++; - break; - - default: - if (c & 0x80) - { unsigned u = decodeUTF(); - if (u == PS || u == LS) - goto Lerr; - } - stringbuffer.writeByte(c); - p++; - continue; - } - break; - } - continue; - - default: - if (*p & 0x80) - { unsigned u = decodeUTF(); - if (u == PS || u == LS) - goto Lnewline; - } - goto Lerr; - } - } - -Lerr: - error(loc, "#line integer [\"filespec\"]\\n expected"); -} - - -/******************************************** - * Decode UTF character. - * Issue error messages for invalid sequences. - * Return decoded character, advance p to last character in UTF sequence. - */ - -unsigned Lexer::decodeUTF() -{ - dchar_t u; - unsigned char c; - unsigned char *s = p; - size_t len; - size_t idx; - char *msg; - - c = *s; - assert(c & 0x80); - - // Check length of remaining string up to 6 UTF-8 characters - for (len = 1; len < 6 && s[len]; len++) - ; - - idx = 0; - msg = utf_decodeChar(s, len, &idx, &u); - p += idx - 1; - if (msg) - { - error("%s", msg); - } - return u; -} - - -/*************************************************** - * Parse doc comment embedded between t->ptr and p. - * Remove trailing blanks and tabs from lines. - * Replace all newlines with \n. - * Remove leading comment character from each line. - * Decide if it's a lineComment or a blockComment. - * Append to previous one for this token. - */ - -void Lexer::getDocComment(Token *t, unsigned lineComment) -{ - OutBuffer buf; - unsigned char ct = t->ptr[2]; - unsigned char *q = t->ptr + 3; // start of comment text - int linestart = 0; - - unsigned char *qend = p; - if (ct == '*' || ct == '+') - qend -= 2; - - /* Scan over initial row of ****'s or ++++'s or ////'s - */ - for (; q < qend; q++) - { - if (*q != ct) - break; - } - - /* Remove trailing row of ****'s or ++++'s - */ - if (ct != '/') - { - for (; q < qend; qend--) - { - if (qend[-1] != ct) - break; - } - } - - for (; q < qend; q++) - { - unsigned char c = *q; - - switch (c) - { - case '*': - case '+': - if (linestart && c == ct) - { linestart = 0; - /* Trim preceding whitespace up to preceding \n - */ - while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) - buf.offset--; - continue; - } - break; - - case ' ': - case '\t': - break; - - case '\r': - if (q[1] == '\n') - continue; // skip the \r - goto Lnewline; - - default: - if (c == 226) - { - // If LS or PS - if (q[1] == 128 && - (q[2] == 168 || q[2] == 169)) - { - q += 2; - goto Lnewline; - } - } - linestart = 0; - break; - - Lnewline: - c = '\n'; // replace all newlines with \n - case '\n': - linestart = 1; - - /* Trim trailing whitespace - */ - while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) - buf.offset--; - - break; - } - buf.writeByte(c); - } - - // Always end with a newline - if (!buf.offset || buf.data[buf.offset - 1] != '\n') - buf.writeByte('\n'); - - buf.writeByte(0); - - // It's a line comment if the start of the doc comment comes - // after other non-whitespace on the same line. - unsigned char** dc = (lineComment && anyToken) - ? &t->lineComment - : &t->blockComment; - - // Combine with previous doc comment, if any - if (*dc) - *dc = combineComments(*dc, (unsigned char *)buf.data); - else - *dc = (unsigned char *)buf.extractData(); -} - -/******************************************** - * Combine two document comments into one. - */ - -unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2) -{ - unsigned char *c = c2; - - if (c1) - { c = c1; - if (c2) - { size_t len1 = strlen((char *)c1); - size_t len2 = strlen((char *)c2); - - c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1); - memcpy(c, c1, len1); - c[len1] = '\n'; - memcpy(c + len1 + 1, c2, len2); - c[len1 + 1 + len2] = 0; - } - } - return c; -} - -/******************************************** - * Create an identifier in the string table. - */ - -Identifier *Lexer::idPool(const char *s) -{ unsigned len; - Identifier *id; - StringValue *sv; - - len = strlen(s); - sv = stringtable.update(s, len); - id = (Identifier *) sv->ptrvalue; - if (!id) - { - id = new Identifier(sv->lstring.string, TOKidentifier); - sv->ptrvalue = id; - } - return id; -} - -/**************************************** - */ - -struct Keyword -{ char *name; - enum TOK value; -}; - -static Keyword keywords[] = -{ -// { "", TOK }, - - { "this", TOKthis }, - { "super", TOKsuper }, - { "assert", TOKassert }, - { "null", TOKnull }, - { "true", TOKtrue }, - { "false", TOKfalse }, - { "cast", TOKcast }, - { "new", TOKnew }, - { "delete", TOKdelete }, - { "throw", TOKthrow }, - { "module", TOKmodule }, - { "pragma", TOKpragma }, - { "typeof", TOKtypeof }, - { "typeid", TOKtypeid }, - - { "template", TOKtemplate }, - - { "void", TOKvoid }, - { "byte", TOKint8 }, - { "ubyte", TOKuns8 }, - { "short", TOKint16 }, - { "ushort", TOKuns16 }, - { "int", TOKint32 }, - { "uint", TOKuns32 }, - { "long", TOKint64 }, - { "ulong", TOKuns64 }, - { "cent", TOKcent, }, - { "ucent", TOKucent, }, - { "float", TOKfloat32 }, - { "double", TOKfloat64 }, - { "real", TOKfloat80 }, - - { "bool", TOKbool }, - { "char", TOKchar }, - { "wchar", TOKwchar }, - { "dchar", TOKdchar }, - - { "ifloat", TOKimaginary32 }, - { "idouble", TOKimaginary64 }, - { "ireal", TOKimaginary80 }, - - { "cfloat", TOKcomplex32 }, - { "cdouble", TOKcomplex64 }, - { "creal", TOKcomplex80 }, - - { "delegate", TOKdelegate }, - { "function", TOKfunction }, - - { "is", TOKis }, - { "if", TOKif }, - { "else", TOKelse }, - { "while", TOKwhile }, - { "for", TOKfor }, - { "do", TOKdo }, - { "switch", TOKswitch }, - { "case", TOKcase }, - { "default", TOKdefault }, - { "break", TOKbreak }, - { "continue", TOKcontinue }, - { "synchronized", TOKsynchronized }, - { "return", TOKreturn }, - { "goto", TOKgoto }, - { "try", TOKtry }, - { "catch", TOKcatch }, - { "finally", TOKfinally }, - { "with", TOKwith }, - { "asm", TOKasm }, - { "foreach", TOKforeach }, - { "foreach_reverse", TOKforeach_reverse }, - { "scope", TOKscope }, - - { "struct", TOKstruct }, - { "class", TOKclass }, - { "interface", TOKinterface }, - { "union", TOKunion }, - { "enum", TOKenum }, - { "import", TOKimport }, - { "mixin", TOKmixin }, - { "static", TOKstatic }, - { "final", TOKfinal }, - { "const", TOKconst }, - { "typedef", TOKtypedef }, - { "alias", TOKalias }, - { "override", TOKoverride }, - { "abstract", TOKabstract }, - { "volatile", TOKvolatile }, - { "debug", TOKdebug }, - { "deprecated", TOKdeprecated }, - { "in", TOKin }, - { "out", TOKout }, - { "inout", TOKinout }, - { "lazy", TOKlazy }, - { "auto", TOKauto }, - - { "align", TOKalign }, - { "extern", TOKextern }, - { "private", TOKprivate }, - { "package", TOKpackage }, - { "protected", TOKprotected }, - { "public", TOKpublic }, - { "export", TOKexport }, - - { "body", TOKbody }, - { "invariant", TOKinvariant }, - { "unittest", TOKunittest }, - { "version", TOKversion }, - - // Added after 1.0 - { "ref", TOKref }, - { "macro", TOKmacro }, -#if V2 - { "__traits", TOKtraits }, -#endif -}; - -int Token::isKeyword() -{ - for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) - { - if (keywords[u].value == value) - return 1; - } - return 0; -} - -void Lexer::initKeywords() -{ StringValue *sv; - unsigned u; - enum TOK v; - unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]); - - if (global.params.Dversion == 1) - nkeywords -= 2; - - cmtable_init(); - - for (u = 0; u < nkeywords; u++) - { char *s; - - //printf("keyword[%d] = '%s'\n",u, keywords[u].name); - s = keywords[u].name; - v = keywords[u].value; - sv = stringtable.insert(s, strlen(s)); - sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v); - - //printf("tochars[%d] = '%s'\n",v, s); - Token::tochars[v] = s; - } - - Token::tochars[TOKeof] = "EOF"; - Token::tochars[TOKlcurly] = "{"; - Token::tochars[TOKrcurly] = "}"; - Token::tochars[TOKlparen] = "("; - Token::tochars[TOKrparen] = ")"; - Token::tochars[TOKlbracket] = "["; - Token::tochars[TOKrbracket] = "]"; - Token::tochars[TOKsemicolon] = ";"; - Token::tochars[TOKcolon] = ":"; - Token::tochars[TOKcomma] = ","; - Token::tochars[TOKdot] = "."; - Token::tochars[TOKxor] = "^"; - Token::tochars[TOKxorass] = "^="; - Token::tochars[TOKassign] = "="; - Token::tochars[TOKconstruct] = "="; - Token::tochars[TOKlt] = "<"; - Token::tochars[TOKgt] = ">"; - Token::tochars[TOKle] = "<="; - Token::tochars[TOKge] = ">="; - Token::tochars[TOKequal] = "=="; - Token::tochars[TOKnotequal] = "!="; - Token::tochars[TOKnotidentity] = "!is"; - Token::tochars[TOKtobool] = "!!"; - - Token::tochars[TOKunord] = "!<>="; - Token::tochars[TOKue] = "!<>"; - Token::tochars[TOKlg] = "<>"; - Token::tochars[TOKleg] = "<>="; - Token::tochars[TOKule] = "!>"; - Token::tochars[TOKul] = "!>="; - Token::tochars[TOKuge] = "!<"; - Token::tochars[TOKug] = "!<="; - - Token::tochars[TOKnot] = "!"; - Token::tochars[TOKtobool] = "!!"; - Token::tochars[TOKshl] = "<<"; - Token::tochars[TOKshr] = ">>"; - Token::tochars[TOKushr] = ">>>"; - Token::tochars[TOKadd] = "+"; - Token::tochars[TOKmin] = "-"; - Token::tochars[TOKmul] = "*"; - Token::tochars[TOKdiv] = "/"; - Token::tochars[TOKmod] = "%"; - Token::tochars[TOKslice] = ".."; - Token::tochars[TOKdotdotdot] = "..."; - Token::tochars[TOKand] = "&"; - Token::tochars[TOKandand] = "&&"; - Token::tochars[TOKor] = "|"; - Token::tochars[TOKoror] = "||"; - Token::tochars[TOKarray] = "[]"; - Token::tochars[TOKindex] = "[i]"; - Token::tochars[TOKaddress] = "&"; - Token::tochars[TOKstar] = "*"; - Token::tochars[TOKtilde] = "~"; - Token::tochars[TOKdollar] = "$"; - Token::tochars[TOKcast] = "cast"; - Token::tochars[TOKplusplus] = "++"; - Token::tochars[TOKminusminus] = "--"; - Token::tochars[TOKtype] = "type"; - Token::tochars[TOKquestion] = "?"; - Token::tochars[TOKneg] = "-"; - Token::tochars[TOKuadd] = "+"; - Token::tochars[TOKvar] = "var"; - Token::tochars[TOKaddass] = "+="; - Token::tochars[TOKminass] = "-="; - Token::tochars[TOKmulass] = "*="; - Token::tochars[TOKdivass] = "/="; - Token::tochars[TOKmodass] = "%="; - Token::tochars[TOKshlass] = "<<="; - Token::tochars[TOKshrass] = ">>="; - Token::tochars[TOKushrass] = ">>>="; - Token::tochars[TOKandass] = "&="; - Token::tochars[TOKorass] = "|="; - Token::tochars[TOKcatass] = "~="; - Token::tochars[TOKcat] = "~"; - Token::tochars[TOKcall] = "call"; - Token::tochars[TOKidentity] = "is"; - Token::tochars[TOKnotidentity] = "!is"; - - Token::tochars[TOKorass] = "|="; - Token::tochars[TOKidentifier] = "identifier"; - - // For debugging - Token::tochars[TOKdotexp] = "dotexp"; - Token::tochars[TOKdotti] = "dotti"; - Token::tochars[TOKdotvar] = "dotvar"; - Token::tochars[TOKdottype] = "dottype"; - Token::tochars[TOKsymoff] = "symoff"; - Token::tochars[TOKtypedot] = "typedot"; - Token::tochars[TOKarraylength] = "arraylength"; - Token::tochars[TOKarrayliteral] = "arrayliteral"; - Token::tochars[TOKassocarrayliteral] = "assocarrayliteral"; - Token::tochars[TOKstructliteral] = "structliteral"; - Token::tochars[TOKstring] = "string"; - Token::tochars[TOKdsymbol] = "symbol"; - Token::tochars[TOKtuple] = "tuple"; - Token::tochars[TOKdeclaration] = "declaration"; - Token::tochars[TOKdottd] = "dottd"; -} + +// Compiler implementation of the D programming language +// Copyright (c) 1999-2008 by Digital Mars +// All Rights Reserved +// written by Walter Bright +// http://www.digitalmars.com +// License for redistribution is by either the Artistic License +// in artistic.txt, or the GNU General Public License in gnu.txt. +// See the included readme.txt for details. + +/* Lexical Analyzer */ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#include <errno.h> +#include <wchar.h> +#include <stdlib.h> +#include <assert.h> +#include <sys/time.h> + +#ifdef IN_GCC + +#include <time.h> +#include "mem.h" + +#else + +#if __GNUC__ +#include <time.h> +#endif + +#if IN_LLVM +#include "mem.h" +#elif _WIN32 +#include "..\root\mem.h" +#else +#include "../root/mem.h" +#endif +#endif + +#include "stringtable.h" + +#include "lexer.h" +#include "utf.h" +#include "identifier.h" +#include "id.h" +#include "module.h" + +#if _WIN32 && __DMC__ +// from \dm\src\include\setlocal.h +extern "C" char * __cdecl __locale_decpoint; +#endif + +extern int HtmlNamedEntity(unsigned char *p, int length); + +#define LS 0x2028 // UTF line separator +#define PS 0x2029 // UTF paragraph separator + +/******************************************** + * Do our own char maps + */ + +static unsigned char cmtable[256]; + +const int CMoctal = 0x1; +const int CMhex = 0x2; +const int CMidchar = 0x4; + +inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; } +inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; } +inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; } + +static void cmtable_init() +{ + for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++) + { + if ('0' <= c && c <= '7') + cmtable[c] |= CMoctal; + if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) + cmtable[c] |= CMhex; + if (isalnum(c) || c == '_') + cmtable[c] |= CMidchar; + } +} + + +/************************* Token **********************************************/ + +char *Token::tochars[TOKMAX]; + +void *Token::operator new(size_t size) +{ Token *t; + + if (Lexer::freelist) + { + t = Lexer::freelist; + Lexer::freelist = t->next; + return t; + } + + return ::operator new(size); +} + +#ifdef DEBUG +void Token::print() +{ + fprintf(stdmsg, "%s\n", toChars()); +} +#endif + +char *Token::toChars() +{ char *p; + static char buffer[3 + 3 * sizeof(value) + 1]; + + p = buffer; + switch (value) + { + case TOKint32v: +#if IN_GCC + sprintf(buffer,"%d",(d_int32)int64value); +#else + sprintf(buffer,"%d",int32value); +#endif + break; + + case TOKuns32v: + case TOKcharv: + case TOKwcharv: + case TOKdcharv: +#if IN_GCC + sprintf(buffer,"%uU",(d_uns32)uns64value); +#else + sprintf(buffer,"%uU",uns32value); +#endif + break; + + case TOKint64v: + sprintf(buffer,"%jdL",int64value); + break; + + case TOKuns64v: + sprintf(buffer,"%juUL",uns64value); + break; + +#if IN_GCC + case TOKfloat32v: + case TOKfloat64v: + case TOKfloat80v: + float80value.format(buffer, sizeof(buffer)); + break; + case TOKimaginary32v: + case TOKimaginary64v: + case TOKimaginary80v: + float80value.format(buffer, sizeof(buffer)); + // %% buffer + strcat(buffer, "i"); + break; +#else + case TOKfloat32v: + sprintf(buffer,"%Lgf", float80value); + break; + + case TOKfloat64v: + sprintf(buffer,"%Lg", float80value); + break; + + case TOKfloat80v: + sprintf(buffer,"%LgL", float80value); + break; + + case TOKimaginary32v: + sprintf(buffer,"%Lgfi", float80value); + break; + + case TOKimaginary64v: + sprintf(buffer,"%Lgi", float80value); + break; + + case TOKimaginary80v: + sprintf(buffer,"%LgLi", float80value); + break; +#endif + + case TOKstring: +#if CSTRINGS + p = string; +#else + { OutBuffer buf; + + buf.writeByte('"'); + for (size_t i = 0; i < len; ) + { unsigned c; + + utf_decodeChar((unsigned char *)ustring, len, &i, &c); + switch (c) + { + case 0: + break; + + case '"': + case '\\': + buf.writeByte('\\'); + default: + if (isprint(c)) + buf.writeByte(c); + else if (c <= 0x7F) + buf.printf("\\x%02x", c); + else if (c <= 0xFFFF) + buf.printf("\\u%04x", c); + else + buf.printf("\\U%08x", c); + continue; + } + break; + } + buf.writeByte('"'); + if (postfix) + buf.writeByte('"'); + buf.writeByte(0); + p = (char *)buf.extractData(); + } +#endif + break; + + case TOKidentifier: + case TOKenum: + case TOKstruct: + case TOKimport: + CASE_BASIC_TYPES: + p = ident->toChars(); + break; + + default: + p = toChars(value); + break; + } + return p; +} + +char *Token::toChars(enum TOK value) +{ char *p; + static char buffer[3 + 3 * sizeof(value) + 1]; + + p = tochars[value]; + if (!p) + { sprintf(buffer,"TOK%d",value); + p = buffer; + } + return p; +} + +/*************************** Lexer ********************************************/ + +Token *Lexer::freelist = NULL; +StringTable Lexer::stringtable; +OutBuffer Lexer::stringbuffer; + +Lexer::Lexer(Module *mod, + unsigned char *base, unsigned begoffset, unsigned endoffset, + int doDocComment, int commentToken) + : loc(mod, 1) +{ + //printf("Lexer::Lexer(%p,%d)\n",base,length); + //printf("lexer.mod = %p, %p\n", mod, this->loc.mod); + memset(&token,0,sizeof(token)); + this->base = base; + this->end = base + endoffset; + p = base + begoffset; + this->mod = mod; + this->doDocComment = doDocComment; + this->anyToken = 0; + this->commentToken = commentToken; + //initKeywords(); + + /* If first line starts with '#!', ignore the line + */ + + if (p[0] == '#' && p[1] =='!') + { + p += 2; + while (1) + { unsigned char c = *p; + switch (c) + { + case '\n': + p++; + break; + + case '\r': + p++; + if (*p == '\n') + p++; + break; + + case 0: + case 0x1A: + break; + + default: + if (c & 0x80) + { unsigned u = decodeUTF(); + if (u == PS || u == LS) + break; + } + p++; + continue; + } + break; + } + loc.linnum = 2; + } +} + + +void Lexer::error(const char *format, ...) +{ + if (mod && !global.gag) + { + char *p = loc.toChars(); + if (*p) + fprintf(stdmsg, "%s: ", p); + mem.free(p); + + va_list ap; + va_start(ap, format); + vfprintf(stdmsg, format, ap); + va_end(ap); + + fprintf(stdmsg, "\n"); + fflush(stdmsg); + + if (global.errors >= 20) // moderate blizzard of cascading messages + fatal(); + } + global.errors++; +} + +void Lexer::error(Loc loc, const char *format, ...) +{ + if (mod && !global.gag) + { + char *p = loc.toChars(); + if (*p) + fprintf(stdmsg, "%s: ", p); + mem.free(p); + + va_list ap; + va_start(ap, format); + vfprintf(stdmsg, format, ap); + va_end(ap); + + fprintf(stdmsg, "\n"); + fflush(stdmsg); + + if (global.errors >= 20) // moderate blizzard of cascading messages + fatal(); + } + global.errors++; +} + +TOK Lexer::nextToken() +{ Token *t; + + if (token.next) + { + t = token.next; + memcpy(&token,t,sizeof(Token)); + t->next = freelist; + freelist = t; + } + else + { + scan(&token); + } + //token.print(); + return token.value; +} + +Token *Lexer::peek(Token *ct) +{ Token *t; + + if (ct->next) + t = ct->next; + else + { + t = new Token(); + scan(t); + t->next = NULL; + ct->next = t; + } + return t; +} + +/********************************* + * tk is on the opening (. + * Look ahead and return token that is past the closing ). + */ + +Token *Lexer::peekPastParen(Token *tk) +{ + //printf("peekPastParen()\n"); + int parens = 1; + int curlynest = 0; + while (1) + { + tk = peek(tk); + //tk->print(); + switch (tk->value) + { + case TOKlparen: + parens++; + continue; + + case TOKrparen: + --parens; + if (parens) + continue; + tk = peek(tk); + break; + + case TOKlcurly: + curlynest++; + continue; + + case TOKrcurly: + if (--curlynest >= 0) + continue; + break; + + case TOKsemicolon: + if (curlynest) + continue; + break; + + case TOKeof: + break; + + default: + continue; + } + return tk; + } +} + +/********************************** + * Determine if string is a valid Identifier. + * Placed here because of commonality with Lexer functionality. + * Returns: + * 0 invalid + */ + +int Lexer::isValidIdentifier(char *p) +{ + size_t len; + size_t idx; + + if (!p || !*p) + goto Linvalid; + + if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars + goto Linvalid; + + len = strlen(p); + idx = 0; + while (p[idx]) + { dchar_t dc; + + char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc); + if (q) + goto Linvalid; + + if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) + goto Linvalid; + } + return 1; + +Linvalid: + return 0; +} + +/**************************** + * Turn next token in buffer into a token. + */ + +void Lexer::scan(Token *t) +{ + unsigned lastLine = loc.linnum; + unsigned linnum; + + t->blockComment = NULL; + t->lineComment = NULL; + while (1) + { + t->ptr = p; + //printf("p = %p, *p = '%c'\n",p,*p); + switch (*p) + { + case 0: + case 0x1A: + t->value = TOKeof; // end of file + return; + + case ' ': + case '\t': + case '\v': + case '\f': + p++; + continue; // skip white space + + case '\r': + p++; + if (*p != '\n') // if CR stands by itself + loc.linnum++; + continue; // skip white space + + case '\n': + p++; + loc.linnum++; + continue; // skip white space + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + t->value = number(t); + return; + +#if CSTRINGS + case '\'': + t->value = charConstant(t, 0); + return; + + case '"': + t->value = stringConstant(t,0); + return; + + case 'l': + case 'L': + if (p[1] == '\'') + { + p++; + t->value = charConstant(t, 1); + return; + } + else if (p[1] == '"') + { + p++; + t->value = stringConstant(t, 1); + return; + } +#else + case '\'': + t->value = charConstant(t,0); + return; + + case 'r': + if (p[1] != '"') + goto case_ident; + p++; + case '`': + t->value = wysiwygStringConstant(t, *p); + return; + + case 'x': + if (p[1] != '"') + goto case_ident; + p++; + t->value = hexStringConstant(t); + return; + +#if V2 + case 'q': + if (p[1] == '"') + { + p++; + t->value = delimitedStringConstant(t); + return; + } + else if (p[1] == '{') + { + p++; + t->value = tokenStringConstant(t); + return; + } + else + goto case_ident; +#endif + + case '"': + t->value = escapeStringConstant(t,0); + return; + + case '\\': // escaped string literal + { unsigned c; + + stringbuffer.reset(); + do + { + p++; + switch (*p) + { + case 'u': + case 'U': + case '&': + c = escapeSequence(); + stringbuffer.writeUTF8(c); + break; + + default: + c = escapeSequence(); + stringbuffer.writeByte(c); + break; + } + } while (*p == '\\'); + t->len = stringbuffer.offset; + stringbuffer.writeByte(0); + t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); + memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); + t->postfix = 0; + t->value = TOKstring; + return; + } + + case 'l': + case 'L': +#endif + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'm': case 'n': case 'o': +#if V2 + case 'p': /*case 'q': case 'r':*/ case 's': case 't': +#else + case 'p': case 'q': /*case 'r':*/ case 's': case 't': +#endif + case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': + case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case '_': + case_ident: + { unsigned char c; + StringValue *sv; + Identifier *id; + + do + { + c = *++p; + } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); + sv = stringtable.update((char *)t->ptr, p - t->ptr); + id = (Identifier *) sv->ptrvalue; + if (!id) + { id = new Identifier(sv->lstring.string,TOKidentifier); + sv->ptrvalue = id; + } + t->ident = id; + t->value = (enum TOK) id->value; + anyToken = 1; + if (*t->ptr == '_') // if special identifier token + { + static char date[11+1]; + static char time[8+1]; + static char timestamp[24+1]; + + if (!date[0]) // lazy evaluation + { time_t t; + char *p; + + ::time(&t); + p = ctime(&t); + assert(p); + sprintf(date, "%.6s %.4s", p + 4, p + 20); + sprintf(time, "%.8s", p + 11); + sprintf(timestamp, "%.24s", p); + } + + if (mod && id == Id::FILE) + { + t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars()); + goto Lstring; + } + else if (mod && id == Id::LINE) + { + t->value = TOKint64v; + t->uns64value = loc.linnum; + } + else if (id == Id::DATE) + { + t->ustring = (unsigned char *)date; + goto Lstring; + } + else if (id == Id::TIME) + { + t->ustring = (unsigned char *)time; + goto Lstring; + } + else if (id == Id::VENDOR) + { + t->ustring = (unsigned char *)"Digital Mars D"; + goto Lstring; + } + else if (id == Id::TIMESTAMP) + { + t->ustring = (unsigned char *)timestamp; + Lstring: + t->value = TOKstring; + Llen: + t->postfix = 0; + t->len = strlen((char *)t->ustring); + } + else if (id == Id::VERSIONX) + { unsigned major = 0; + unsigned minor = 0; + + for (char *p = global.version + 1; 1; p++) + { + char c = *p; + if (isdigit(c)) + minor = minor * 10 + c - '0'; + else if (c == '.') + { major = minor; + minor = 0; + } + else + break; + } + t->value = TOKint64v; + t->uns64value = major * 1000 + minor; + } +#if V2 + else if (id == Id::EOFX) + { + t->value = TOKeof; + // Advance scanner to end of file + while (!(*p == 0 || *p == 0x1A)) + p++; + } +#endif + } + //printf("t->value = %d\n",t->value); + return; + } + + case '/': + p++; + switch (*p) + { + case '=': + p++; + t->value = TOKdivass; + return; + + case '*': + p++; + linnum = loc.linnum; + while (1) + { + while (1) + { unsigned char c = *p; + switch (c) + { + case '/': + break; + + case '\n': + loc.linnum++; + p++; + continue; + + case '\r': + p++; + if (*p != '\n') + loc.linnum++; + continue; + + case 0: + case 0x1A: + error("unterminated /* */ comment"); + p = end; + t->value = TOKeof; + return; + + default: + if (c & 0x80) + { unsigned u = decodeUTF(); + if (u == PS || u == LS) + loc.linnum++; + } + p++; + continue; + } + break; + } + p++; + if (p[-2] == '*' && p - 3 != t->ptr) + break; + } + if (commentToken) + { + t->value = TOKcomment; + return; + } + else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr) + { // if /** but not /**/ + getDocComment(t, lastLine == linnum); + } + continue; + + case '/': // do // style comments + linnum = loc.linnum; + while (1) + { unsigned char c = *++p; + switch (c) + { + case '\n': + break; + + case '\r': + if (p[1] == '\n') + p++; + break; + + case 0: + case 0x1A: + if (commentToken) + { + p = end; + t->value = TOKcomment; + return; + } + if (doDocComment && t->ptr[2] == '/') + getDocComment(t, lastLine == linnum); + p = end; + t->value = TOKeof; + return; + + default: + if (c & 0x80) + { unsigned u = decodeUTF(); + if (u == PS || u == LS) + break; + } + continue; + } + break; + } + + if (commentToken) + { + p++; + loc.linnum++; + t->value = TOKcomment; + return; + } + if (doDocComment && t->ptr[2] == '/') + getDocComment(t, lastLine == linnum); + + p++; + loc.linnum++; + continue; + + case '+': + { int nest; + + linnum = loc.linnum; + p++; + nest = 1; + while (1) + { unsigned char c = *p; + switch (c) + { + case '/': + p++; + if (*p == '+') + { + p++; + nest++; + } + continue; + + case '+': + p++; + if (*p == '/') + { + p++; + if (--nest == 0) + break; + } + continue; + + case '\r': + p++; + if (*p != '\n') + loc.linnum++; + continue; + + case '\n': + loc.linnum++; + p++; + continue; + + case 0: + case 0x1A: + error("unterminated /+ +/ comment"); + p = end; + t->value = TOKeof; + return; + + default: + if (c & 0x80) + { unsigned u = decodeUTF(); + if (u == PS || u == LS) + loc.linnum++; + } + p++; + continue; + } + break; + } + if (commentToken) + { + t->value = TOKcomment; + return; + } + if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr) + { // if /++ but not /++/ + getDocComment(t, lastLine == linnum); + } + continue; + } + } + t->value = TOKdiv; + return; + + case '.': + p++; + if (isdigit(*p)) + { /* Note that we don't allow ._1 and ._ as being + * valid floating point numbers. + */ + p--; + t->value = inreal(t); + } + else if (p[0] == '.') + { + if (p[1] == '.') + { p += 2; + t->value = TOKdotdotdot; + } + else + { p++; + t->value = TOKslice; + } + } + else + t->value = TOKdot; + return; + + case '&': + p++; + if (*p == '=') + { p++; + t->value = TOKandass; + } + else if (*p == '&') + { p++; + t->value = TOKandand; + } + else + t->value = TOKand; + return; + + case '|': + p++; + if (*p == '=') + { p++; + t->value = TOKorass; + } + else if (*p == '|') + { p++; + t->value = TOKoror; + } + else + t->value = TOKor; + return; + + case '-': + p++; + if (*p == '=') + { p++; + t->value = TOKminass; + } +#if 0 + else if (*p == '>') + { p++; + t->value = TOKarrow; + } +#endif + else if (*p == '-') + { p++; + t->value = TOKminusminus; + } + else + t->value = TOKmin; + return; + + case '+': + p++; + if (*p == '=') + { p++; + t->value = TOKaddass; + } + else if (*p == '+') + { p++; + t->value = TOKplusplus; + } + else + t->value = TOKadd; + return; + + case '<': + p++; + if (*p == '=') + { p++; + t->value = TOKle; // <= + } + else if (*p == '<') + { p++; + if (*p == '=') + { p++; + t->value = TOKshlass; // <<= + } + else + t->value = TOKshl; // << + } + else if (*p == '>') + { p++; + if (*p == '=') + { p++; + t->value = TOKleg; // <>= + } + else + t->value = TOKlg; // <> + } + else + t->value = TOKlt; // < + return; + + case '>': + p++; + if (*p == '=') + { p++; + t->value = TOKge; // >= + } + else if (*p == '>') + { p++; + if (*p == '=') + { p++; + t->value = TOKshrass; // >>= + } + else if (*p == '>') + { p++; + if (*p == '=') + { p++; + t->value = TOKushrass; // >>>= + } + else + t->value = TOKushr; // >>> + } + else + t->value = TOKshr; // >> + } + else + t->value = TOKgt; // > + return; + + case '!': + p++; + if (*p == '=') + { p++; + if (*p == '=' && global.params.Dversion == 1) + { p++; + t->value = TOKnotidentity; // !== + } + else + t->value = TOKnotequal; // != + } + else if (*p == '<') + { p++; + if (*p == '>') + { p++; + if (*p == '=') + { p++; + t->value = TOKunord; // !<>= + } + else + t->value = TOKue; // !<> + } + else if (*p == '=') + { p++; + t->value = TOKug; // !<= + } + else + t->value = TOKuge; // !< + } + else if (*p == '>') + { p++; + if (*p == '=') + { p++; + t->value = TOKul; // !>= + } + else + t->value = TOKule; // !> + } + else + t->value = TOKnot; // ! + return; + + case '=': + p++; + if (*p == '=') + { p++; + if (*p == '=' && global.params.Dversion == 1) + { p++; + t->value = TOKidentity; // === + } + else + t->value = TOKequal; // == + } + else + t->value = TOKassign; // = + return; + + case '~': + p++; + if (*p == '=') + { p++; + t->value = TOKcatass; // ~= + } + else + t->value = TOKtilde; // ~ + return; + +#define SINGLE(c,tok) case c: p++; t->value = tok; return; + + SINGLE('(', TOKlparen) + SINGLE(')', TOKrparen) + SINGLE('[', TOKlbracket) + SINGLE(']', TOKrbracket) + SINGLE('{', TOKlcurly) + SINGLE('}', TOKrcurly) + SINGLE('?', TOKquestion) + SINGLE(',', TOKcomma) + SINGLE(';', TOKsemicolon) + SINGLE(':', TOKcolon) + SINGLE('$', TOKdollar) + +#undef SINGLE + +#define DOUBLE(c1,tok1,c2,tok2) \ + case c1: \ + p++; \ + if (*p == c2) \ + { p++; \ + t->value = tok2; \ + } \ + else \ + t->value = tok1; \ + return; + + DOUBLE('*', TOKmul, '=', TOKmulass) + DOUBLE('%', TOKmod, '=', TOKmodass) + DOUBLE('^', TOKxor, '=', TOKxorass) + +#undef DOUBLE + + case '#': + p++; + pragma(); + continue; + + default: + { unsigned char c = *p; + + if (c & 0x80) + { unsigned u = decodeUTF(); + + // Check for start of unicode identifier + if (isUniAlpha(u)) + goto case_ident; + + if (u == PS || u == LS) + { + loc.linnum++; + p++; + continue; + } + } + if (isprint(c)) + error("unsupported char '%c'", c); + else + error("unsupported char 0x%02x", c); + p++; + continue; + } + } + } +} + +/******************************************* + * Parse escape sequence. + */ + +unsigned Lexer::escapeSequence() +{ unsigned c; + int n; + int ndigits; + + c = *p; + switch (c) + { + case '\'': + case '"': + case '?': + case '\\': + Lconsume: + p++; + break; + + case 'a': c = 7; goto Lconsume; + case 'b': c = 8; goto Lconsume; + case 'f': c = 12; goto Lconsume; + case 'n': c = 10; goto Lconsume; + case 'r': c = 13; goto Lconsume; + case 't': c = 9; goto Lconsume; + case 'v': c = 11; goto Lconsume; + + case 'u': + ndigits = 4; + goto Lhex; + case 'U': + ndigits = 8; + goto Lhex; + case 'x': + ndigits = 2; + Lhex: + p++; + c = *p; + if (ishex(c)) + { unsigned v; + + n = 0; + v = 0; + while (1) + { + if (isdigit(c)) + c -= '0'; + else if (islower(c)) + c -= 'a' - 10; + else + c -= 'A' - 10; + v = v * 16 + c; + c = *++p; + if (++n == ndigits) + break; + if (!ishex(c)) + { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); + break; + } + } + if (ndigits != 2 && !utf_isValidDchar(v)) + error("invalid UTF character \\U%08x", v); + c = v; + } + else + error("undefined escape hex sequence \\%c\n",c); + break; + + case '&': // named character entity + for (unsigned char *idstart = ++p; 1; p++) + { + switch (*p) + { + case ';': + c = HtmlNamedEntity(idstart, p - idstart); + if (c == ~0) + { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart); + c = ' '; + } + p++; + break; + + default: + if (isalpha(*p) || + (p != idstart + 1 && isdigit(*p))) + continue; + error("unterminated named entity"); + break; + } + break; + } + break; + + case 0: + case 0x1A: // end of file + c = '\\'; + break; + + default: + if (isoctal(c)) + { unsigned v; + + n = 0; + v = 0; + do + { + v = v * 8 + (c - '0'); + c = *++p; + } while (++n < 3 && isoctal(c)); + c = v; + if (c > 0xFF) + error("0%03o is larger than a byte", c); + } + else + error("undefined escape sequence \\%c\n",c); + break; + } + return c; +} + +/************************************** + */ + +TOK Lexer::wysiwygStringConstant(Token *t, int tc) +{ unsigned c; + Loc start = loc; + + p++; + stringbuffer.reset(); + while (1) + { + c = *p++; + switch (c) + { + case '\n': + loc.linnum++; + break; + + case '\r': + if (*p == '\n') + continue; // ignore + c = '\n'; // treat EndOfLine as \n character + loc.linnum++; + break; + + case 0: + case 0x1A: + error("unterminated string constant starting at %s", start.toChars()); + t->ustring = (unsigned char *)""; + t->len = 0; + t->postfix = 0; + return TOKstring; + + case '"': + case '`': + if (c == tc) + { + t->len = stringbuffer.offset; + stringbuffer.writeByte(0); + t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); + memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); + stringPostfix(t); + return TOKstring; + } + break; + + default: + if (c & 0x80) + { p--; + unsigned u = decodeUTF(); + p++; + if (u == PS || u == LS) + loc.linnum++; + stringbuffer.writeUTF8(u); + continue; + } + break; + } + stringbuffer.writeByte(c); + } +} + +/************************************** + * Lex hex strings: + * x"0A ae 34FE BD" + */ + +TOK Lexer::hexStringConstant(Token *t) +{ unsigned c; + Loc start = loc; + unsigned n = 0; + unsigned v; + + p++; + stringbuffer.reset(); + while (1) + { + c = *p++; + switch (c) + { + case ' ': + case '\t': + case '\v': + case '\f': + continue; // skip white space + + case '\r': + if (*p == '\n') + continue; // ignore + // Treat isolated '\r' as if it were a '\n' + case '\n': + loc.linnum++; + continue; + + case 0: + case 0x1A: + error("unterminated string constant starting at %s", start.toChars()); + t->ustring = (unsigned char *)""; + t->len = 0; + t->postfix = 0; + return TOKstring; + + case '"': + if (n & 1) + { error("odd number (%d) of hex characters in hex string", n); + stringbuffer.writeByte(v); + } + t->len = stringbuffer.offset; + stringbuffer.writeByte(0); + t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); + memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); + stringPostfix(t); + return TOKstring; + + default: + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'a' && c <= 'f') + c -= 'a' - 10; + else if (c >= 'A' && c <= 'F') + c -= 'A' - 10; + else if (c & 0x80) + { p--; + unsigned u = decodeUTF(); + p++; + if (u == PS || u == LS) + loc.linnum++; + else + error("non-hex character \\u%x", u); + } + else + error("non-hex character '%c'", c); + if (n & 1) + { v = (v << 4) | c; + stringbuffer.writeByte(v); + } + else + v = c; + n++; + break; + } + } +} + + +#if V2 +/************************************** + * Lex delimited strings: + * q"(foo(xxx))" // "foo(xxx)" + * q"[foo(]" // "foo(" + * q"/foo]/" // "foo]" + * q"HERE + * foo + * HERE" // "foo\n" + * Input: + * p is on the " + */ + +TOK Lexer::delimitedStringConstant(Token *t) +{ unsigned c; + Loc start = loc; + unsigned delimleft = 0; + unsigned delimright = 0; + unsigned nest = 1; + unsigned nestcount; + Identifier *hereid = NULL; + unsigned blankrol = 0; + unsigned startline = 0; + + p++; + stringbuffer.reset(); + while (1) + { + c = *p++; + //printf("c = '%c'\n", c); + switch (c) + { + case '\n': + Lnextline: + loc.linnum++; + startline = 1; + if (blankrol) + { blankrol = 0; + continue; + } + if (hereid) + { + stringbuffer.writeUTF8(c); + continue; + } + break; + + case '\r': + if (*p == '\n') + continue; // ignore + c = '\n'; // treat EndOfLine as \n character + goto Lnextline; + + case 0: + case 0x1A: + goto Lerror; + + default: + if (c & 0x80) + { p--; + c = decodeUTF(); + p++; + if (c == PS || c == LS) + goto Lnextline; + } + break; + } + if (delimleft == 0) + { delimleft = c; + nest = 1; + nestcount = 1; + if (c == '(') + delimright = ')'; + else if (c == '{') + delimright = '}'; + else if (c == '[') + delimright = ']'; + else if (c == '<') + delimright = '>'; + else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) + { // Start of identifier; must be a heredoc + Token t; + p--; + scan(&t); // read in heredoc identifier + if (t.value != TOKidentifier) + { error("identifier expected for heredoc, not %s", t.toChars()); + delimright = c; + } + else + { hereid = t.ident; + //printf("hereid = '%s'\n", hereid->toChars()); + blankrol = 1; + } + nest = 0; + } + else + { delimright = c; + nest = 0; + } + } + else + { + if (blankrol) + { error("heredoc rest of line should be blank"); + blankrol = 0; + continue; + } + if (nest == 1) + { + if (c == delimleft) + nestcount++; + else if (c == delimright) + { nestcount--; + if (nestcount == 0) + goto Ldone; + } + } + else if (c == delimright) + goto Ldone; + if (startline && isalpha(c)) + { Token t; + unsigned char *psave = p; + p--; + scan(&t); // read in possible heredoc identifier + //printf("endid = '%s'\n", t.ident->toChars()); + if (t.value == TOKidentifier && t.ident->equals(hereid)) + { /* should check that rest of line is blank + */ + goto Ldone; + } + p = psave; + } + stringbuffer.writeUTF8(c); + startline = 0; + } + } + +Ldone: + if (*p == '"') + p++; + else + error("delimited string must end in %c\"", delimright); + t->len = stringbuffer.offset; + stringbuffer.writeByte(0); + t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); + memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); + stringPostfix(t); + return TOKstring; + +Lerror: + error("unterminated string constant starting at %s", start.toChars()); + t->ustring = (unsigned char *)""; + t->len = 0; + t->postfix = 0; + return TOKstring; +} + +/************************************** + * Lex delimited strings: + * q{ foo(xxx) } // " foo(xxx) " + * q{foo(} // "foo(" + * q{{foo}"}"} // "{foo}"}"" + * Input: + * p is on the q + */ + +TOK Lexer::tokenStringConstant(Token *t) +{ + unsigned nest = 1; + Loc start = loc; + unsigned char *pstart = ++p; + + while (1) + { Token tok; + + scan(&tok); + switch (tok.value) + { + case TOKlcurly: + nest++; + continue; + + case TOKrcurly: + if (--nest == 0) + goto Ldone; + continue; + + case TOKeof: + goto Lerror; + + default: + continue; + } + } + +Ldone: + t->len = p - 1 - pstart; + t->ustring = (unsigned char *)mem.malloc(t->len + 1); + memcpy(t->ustring, pstart, t->len); + t->ustring[t->len] = 0; + stringPostfix(t); + return TOKstring; + +Lerror: + error("unterminated token string constant starting at %s", start.toChars()); + t->ustring = (unsigned char *)""; + t->len = 0; + t->postfix = 0; + return TOKstring; +} + +#endif + + +/************************************** + */ + +TOK Lexer::escapeStringConstant(Token *t, int wide) +{ unsigned c; + Loc start = loc; + + p++; + stringbuffer.reset(); + while (1) + { + c = *p++; + switch (c) + { + case '\\': + switch (*p) + { + case 'u': + case 'U': + case '&': + c = escapeSequence(); + stringbuffer.writeUTF8(c); + continue; + + default: + c = escapeSequence(); + break; + } + break; + + case '\n': + loc.linnum++; + break; + + case '\r': + if (*p == '\n') + continue; // ignore + c = '\n'; // treat EndOfLine as \n character + loc.linnum++; + break; + + case '"': + t->len = stringbuffer.offset; + stringbuffer.writeByte(0); + t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); + memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); + stringPostfix(t); + return TOKstring; + + case 0: + case 0x1A: + p--; + error("unterminated string constant starting at %s", start.toChars()); + t->ustring = (unsigned char *)""; + t->len = 0; + t->postfix = 0; + return TOKstring; + + default: + if (c & 0x80) + { + p--; + c = decodeUTF(); + if (c == LS || c == PS) + { c = '\n'; + loc.linnum++; + } + p++; + stringbuffer.writeUTF8(c); + continue; + } + break; + } + stringbuffer.writeByte(c); + } +} + +/************************************** + */ + +TOK Lexer::charConstant(Token *t, int wide) +{ + unsigned c; + TOK tk = TOKcharv; + + //printf("Lexer::charConstant\n"); + p++; + c = *p++; + switch (c) + { + case '\\': + switch (*p) + { + case 'u': + t->uns64value = escapeSequence(); + tk = TOKwcharv; + break; + + case 'U': + case '&': + t->uns64value = escapeSequence(); + tk = TOKdcharv; + break; + + default: + t->uns64value = escapeSequence(); + break; + } + break; + + case '\n': + L1: + loc.linnum++; + case '\r': + case 0: + case 0x1A: + case '\'': + error("unterminated character constant"); + return tk; + + default: + if (c & 0x80) + { + p--; + c = decodeUTF(); + p++; + if (c == LS || c == PS) + goto L1; + if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) + tk = TOKwcharv; + else + tk = TOKdcharv; + } + t->uns64value = c; + break; + } + + if (*p != '\'') + { error("unterminated character constant"); + return tk; + } + p++; + return tk; +} + +/*************************************** + * Get postfix of string literal. + */ + +void Lexer::stringPostfix(Token *t) +{ + switch (*p) + { + case 'c': + case 'w': + case 'd': + t->postfix = *p; + p++; + break; + + default: + t->postfix = 0; + break; + } +} + +/*************************************** + * Read \u or \U unicode sequence + * Input: + * u 'u' or 'U' + */ + +#if 0 +unsigned Lexer::wchar(unsigned u) +{ + unsigned value; + unsigned n; + unsigned char c; + unsigned nchars; + + nchars = (u == 'U') ? 8 : 4; + value = 0; + for (n = 0; 1; n++) + { + ++p; + if (n == nchars) + break; + c = *p; + if (!ishex(c)) + { error("\\%c sequence must be followed by %d hex characters", u, nchars); + break; + } + if (isdigit(c)) + c -= '0'; + else if (islower(c)) + c -= 'a' - 10; + else + c -= 'A' - 10; + value <<= 4; + value |= c; + } + return value; +} +#endif + +/************************************** + * Read in a number. + * If it's an integer, store it in tok.TKutok.Vlong. + * integers can be decimal, octal or hex + * Handle the suffixes U, UL, LU, L, etc. + * If it's double, store it in tok.TKutok.Vdouble. + * Returns: + * TKnum + * TKdouble,... + */ + +TOK Lexer::number(Token *t) +{ + // We use a state machine to collect numbers + enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, + STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, + STATE_hexh, STATE_error }; + enum STATE state; + + enum FLAGS + { FLAGS_decimal = 1, // decimal + FLAGS_unsigned = 2, // u or U suffix + FLAGS_long = 4, // l or L suffix + }; + enum FLAGS flags = FLAGS_decimal; + + int i; + int base; + unsigned c; + unsigned char *start; + TOK result; + + //printf("Lexer::number()\n"); + state = STATE_initial; + base = 0; + stringbuffer.reset(); + start = p; + while (1) + { + c = *p; + switch (state) + { + case STATE_initial: // opening state + if (c == '0') + state = STATE_0; + else + state = STATE_decimal; + break; + + case STATE_0: + flags = (FLAGS) (flags & ~FLAGS_decimal); + switch (c) + { +#if ZEROH + case 'H': // 0h + case 'h': + goto hexh; +#endif + case 'X': + case 'x': + state = STATE_hex0; + break; + + case '.': + if (p[1] == '.') // .. is a separate token + goto done; + case 'i': + case 'f': + case 'F': + goto real; +#if ZEROH + case 'E': + case 'e': + goto case_hex; +#endif + case 'B': + case 'b': + state = STATE_binary0; + break; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + state = STATE_octal; + break; + +#if ZEROH + case '8': case '9': case 'A': + case 'C': case 'D': case 'F': + case 'a': case 'c': case 'd': case 'f': + case_hex: + state = STATE_hexh; + break; +#endif + case '_': + state = STATE_octal; + p++; + continue; + + case 'L': + if (p[1] == 'i') + goto real; + goto done; + + default: + goto done; + } + break; + + case STATE_decimal: // reading decimal number + if (!isdigit(c)) + { +#if ZEROH + if (ishex(c) + || c == 'H' || c == 'h' + ) + goto hexh; +#endif + if (c == '_') // ignore embedded _ + { p++; + continue; + } + if (c == '.' && p[1] != '.') + goto real; + else if (c == 'i' || c == 'f' || c == 'F' || + c == 'e' || c == 'E') + { + real: // It's a real number. Back up and rescan as a real + p = start; + return inreal(t); + } + else if (c == 'L' && p[1] == 'i') + goto real; + goto done; + } + break; + + case STATE_hex0: // reading hex number + case STATE_hex: + if (!ishex(c)) + { + if (c == '_') // ignore embedded _ + { p++; + continue; + } + if (c == '.' && p[1] != '.') + goto real; + if (c == 'P' || c == 'p' || c == 'i') + goto real; + if (state == STATE_hex0) + error("Hex digit expected, not '%c'", c); + goto done; + } + state = STATE_hex; + break; + +#if ZEROH + hexh: + state = STATE_hexh; + case STATE_hexh: // parse numbers like 0FFh + if (!ishex(c)) + { + if (c == 'H' || c == 'h') + { + p++; + base = 16; + goto done; + } + else + { + // Check for something like 1E3 or 0E24 + if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) || + memchr((char *)stringbuffer.data, 'e', stringbuffer.offset)) + goto real; + error("Hex digit expected, not '%c'", c); + goto done; + } + } + break; +#endif + + case STATE_octal: // reading octal number + case STATE_octale: // reading octal number with non-octal digits + if (!isoctal(c)) + { +#if ZEROH + if (ishex(c) + || c == 'H' || c == 'h' + ) + goto hexh; +#endif + if (c == '_') // ignore embedded _ + { p++; + continue; + } + if (c == '.' && p[1] != '.') + goto real; + if (c == 'i') + goto real; + if (isdigit(c)) + { + state = STATE_octale; + } + else + goto done; + } + break; + + case STATE_binary0: // starting binary number + case STATE_binary: // reading binary number + if (c != '0' && c != '1') + { +#if ZEROH + if (ishex(c) + || c == 'H' || c == 'h' + ) + goto hexh; +#endif + if (c == '_') // ignore embedded _ + { p++; + continue; + } + if (state == STATE_binary0) + { error("binary digit expected"); + state = STATE_error; + break; + } + else + goto done; + } + state = STATE_binary; + break; + + case STATE_error: // for error recovery + if (!isdigit(c)) // scan until non-digit + goto done; + break; + + default: + assert(0); + } + stringbuffer.writeByte(c); + p++; + } +done: + stringbuffer.writeByte(0); // terminate string + if (state == STATE_octale) + error("Octal digit expected"); + + uinteger_t n; // unsigned >=64 bit integer type + + if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0)) + n = stringbuffer.data[0] - '0'; + else + { + // Convert string to integer +#if __DMC__ + errno = 0; + n = strtoull((char *)stringbuffer.data,NULL,base); + if (errno == ERANGE) + error("integer overflow"); +#else + // Not everybody implements strtoull() + char *p = (char *)stringbuffer.data; + int r = 10, d; + + if (*p == '0') + { + if (p[1] == 'x' || p[1] == 'X') + p += 2, r = 16; + else if (p[1] == 'b' || p[1] == 'B') + p += 2, r = 2; + else if (isdigit(p[1])) + p += 1, r = 8; + } + + n = 0; + while (1) + { + if (*p >= '0' && *p <= '9') + d = *p - '0'; + else if (*p >= 'a' && *p <= 'z') + d = *p - 'a' + 10; + else if (*p >= 'A' && *p <= 'Z') + d = *p - 'A' + 10; + else + break; + if (d >= r) + break; + if (n && n * r + d <= n) + { + error ("integer overflow"); + break; + } + + n = n * r + d; + p++; + } +#endif + if (sizeof(n) > 8 && + n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits + error("integer overflow"); + } + + // Parse trailing 'u', 'U', 'l' or 'L' in any combination + while (1) + { unsigned char f; + + switch (*p) + { case 'U': + case 'u': + f = FLAGS_unsigned; + goto L1; + + case 'l': + if (1 || !global.params.useDeprecated) + error("'l' suffix is deprecated, use 'L' instead"); + case 'L': + f = FLAGS_long; + L1: + p++; + if (flags & f) + error("unrecognized token"); + flags = (FLAGS) (flags | f); + continue; + default: + break; + } + break; + } + + switch (flags) + { + case 0: + /* Octal or Hexadecimal constant. + * First that fits: int, uint, long, ulong + */ + if (n & 0x8000000000000000LL) + result = TOKuns64v; + else if (n & 0xFFFFFFFF00000000LL) + result = TOKint64v; + else if (n & 0x80000000) + result = TOKuns32v; + else + result = TOKint32v; + break; + + case FLAGS_decimal: + /* First that fits: int, long, long long + */ + if (n & 0x8000000000000000LL) + { error("signed integer overflow"); + result = TOKuns64v; + } + else if (n & 0xFFFFFFFF80000000LL) + result = TOKint64v; + else + result = TOKint32v; + break; + + case FLAGS_unsigned: + case FLAGS_decimal | FLAGS_unsigned: + /* First that fits: uint, ulong + */ + if (n & 0xFFFFFFFF00000000LL) + result = TOKuns64v; + else + result = TOKuns32v; + break; + + case FLAGS_decimal | FLAGS_long: + if (n & 0x8000000000000000LL) + { error("signed integer overflow"); + result = TOKuns64v; + } + else + result = TOKint64v; + break; + + case FLAGS_long: + if (n & 0x8000000000000000LL) + result = TOKuns64v; + else + result = TOKint64v; + break; + + case FLAGS_unsigned | FLAGS_long: + case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: + result = TOKuns64v; + break; + + default: + #ifdef DEBUG + printf("%x\n",flags); + #endif + assert(0); + } + t->uns64value = n; + return result; +} + +/************************************** + * Read in characters, converting them to real. + * Bugs: + * Exponent overflow not detected. + * Too much requested precision is not detected. + */ + +TOK Lexer::inreal(Token *t) +#ifdef __DMC__ +__in +{ + assert(*p == '.' || isdigit(*p)); +} +__out (result) +{ + switch (result) + { + case TOKfloat32v: + case TOKfloat64v: + case TOKfloat80v: + case TOKimaginary32v: + case TOKimaginary64v: + case TOKimaginary80v: + break; + + default: + assert(0); + } +} +__body +#endif /* __DMC__ */ +{ int dblstate; + unsigned c; + char hex; // is this a hexadecimal-floating-constant? + TOK result; + + //printf("Lexer::inreal()\n"); + stringbuffer.reset(); + dblstate = 0; + hex = 0; +Lnext: + while (1) + { + // Get next char from input + c = *p++; + //printf("dblstate = %d, c = '%c'\n", dblstate, c); + while (1) + { + switch (dblstate) + { + case 0: // opening state + if (c == '0') + dblstate = 9; + else if (c == '.') + dblstate = 3; + else + dblstate = 1; + break; + + case 9: + dblstate = 1; + if (c == 'X' || c == 'x') + { hex++; + break; + } + case 1: // digits to left of . + case 3: // digits to right of . + case 7: // continuing exponent digits + if (!isdigit(c) && !(hex && isxdigit(c))) + { + if (c == '_') + goto Lnext; // ignore embedded '_' + dblstate++; + continue; + } + break; + + case 2: // no more digits to left of . + if (c == '.') + { dblstate++; + break; + } + case 4: // no more digits to right of . + if ((c == 'E' || c == 'e') || + hex && (c == 'P' || c == 'p')) + { dblstate = 5; + hex = 0; // exponent is always decimal + break; + } + if (hex) + error("binary-exponent-part required"); + goto done; + + case 5: // looking immediately to right of E + dblstate++; + if (c == '-' || c == '+') + break; + case 6: // 1st exponent digit expected + if (!isdigit(c)) + error("exponent expected"); + dblstate++; + break; + + case 8: // past end of exponent digits + goto done; + } + break; + } + stringbuffer.writeByte(c); + } +done: + p--; + + stringbuffer.writeByte(0); + +#if _WIN32 && __DMC__ + char *save = __locale_decpoint; + __locale_decpoint = "."; +#endif +#ifdef IN_GCC + t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble); +#else + t->float80value = strtold((char *)stringbuffer.data, NULL); +#endif + errno = 0; + switch (*p) + { + case 'F': + case 'f': +#ifdef IN_GCC + real_t::parse((char *)stringbuffer.data, real_t::Float); +#else + strtof((char *)stringbuffer.data, NULL); +#endif + result = TOKfloat32v; + p++; + break; + + default: +#ifdef IN_GCC + real_t::parse((char *)stringbuffer.data, real_t::Double); +#else + strtod((char *)stringbuffer.data, NULL); +#endif + result = TOKfloat64v; + break; + + case 'l': + if (!global.params.useDeprecated) + error("'l' suffix is deprecated, use 'L' instead"); + case 'L': + result = TOKfloat80v; + p++; + break; + } + if (*p == 'i' || *p == 'I') + { + if (!global.params.useDeprecated && *p == 'I') + error("'I' suffix is deprecated, use 'i' instead"); + p++; + switch (result) + { + case TOKfloat32v: + result = TOKimaginary32v; + break; + case TOKfloat64v: + result = TOKimaginary64v; + break; + case TOKfloat80v: + result = TOKimaginary80v; + break; + } + } +#if _WIN32 && __DMC__ + __locale_decpoint = save; +#endif + if (errno == ERANGE) + error("number is not representable"); + return result; +} + +/********************************************* + * Do pragma. + * Currently, the only pragma supported is: + * #line linnum [filespec] + */ + +void Lexer::pragma() +{ + Token tok; + int linnum; + char *filespec = NULL; + Loc loc = this->loc; + + scan(&tok); + if (tok.value != TOKidentifier || tok.ident != Id::line) + goto Lerr; + + scan(&tok); + if (tok.value == TOKint32v || tok.value == TOKint64v) + linnum = tok.uns64value - 1; + else + goto Lerr; + + while (1) + { + switch (*p) + { + case 0: + case 0x1A: + case '\n': + Lnewline: + this->loc.linnum = linnum; + if (filespec) + this->loc.filename = filespec; + return; + + case '\r': + p++; + if (*p != '\n') + { p--; + goto Lnewline; + } + continue; + + case ' ': + case '\t': + case '\v': + case '\f': + p++; + continue; // skip white space + + case '_': + if (mod && memcmp(p, "__FILE__", 8) == 0) + { + p += 8; + filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars()); + } + continue; + + case '"': + if (filespec) + goto Lerr; + stringbuffer.reset(); + p++; + while (1) + { unsigned c; + + c = *p; + switch (c) + { + case '\n': + case '\r': + case 0: + case 0x1A: + goto Lerr; + + case '"': + stringbuffer.writeByte(0); + filespec = mem.strdup((char *)stringbuffer.data); + p++; + break; + + default: + if (c & 0x80) + { unsigned u = decodeUTF(); + if (u == PS || u == LS) + goto Lerr; + } + stringbuffer.writeByte(c); + p++; + continue; + } + break; + } + continue; + + default: + if (*p & 0x80) + { unsigned u = decodeUTF(); + if (u == PS || u == LS) + goto Lnewline; + } + goto Lerr; + } + } + +Lerr: + error(loc, "#line integer [\"filespec\"]\\n expected"); +} + + +/******************************************** + * Decode UTF character. + * Issue error messages for invalid sequences. + * Return decoded character, advance p to last character in UTF sequence. + */ + +unsigned Lexer::decodeUTF() +{ + dchar_t u; + unsigned char c; + unsigned char *s = p; + size_t len; + size_t idx; + char *msg; + + c = *s; + assert(c & 0x80); + + // Check length of remaining string up to 6 UTF-8 characters + for (len = 1; len < 6 && s[len]; len++) + ; + + idx = 0; + msg = utf_decodeChar(s, len, &idx, &u); + p += idx - 1; + if (msg) + { + error("%s", msg); + } + return u; +} + + +/*************************************************** + * Parse doc comment embedded between t->ptr and p. + * Remove trailing blanks and tabs from lines. + * Replace all newlines with \n. + * Remove leading comment character from each line. + * Decide if it's a lineComment or a blockComment. + * Append to previous one for this token. + */ + +void Lexer::getDocComment(Token *t, unsigned lineComment) +{ + OutBuffer buf; + unsigned char ct = t->ptr[2]; + unsigned char *q = t->ptr + 3; // start of comment text + int linestart = 0; + + unsigned char *qend = p; + if (ct == '*' || ct == '+') + qend -= 2; + + /* Scan over initial row of ****'s or ++++'s or ////'s + */ + for (; q < qend; q++) + { + if (*q != ct) + break; + } + + /* Remove trailing row of ****'s or ++++'s + */ + if (ct != '/') + { + for (; q < qend; qend--) + { + if (qend[-1] != ct) + break; + } + } + + for (; q < qend; q++) + { + unsigned char c = *q; + + switch (c) + { + case '*': + case '+': + if (linestart && c == ct) + { linestart = 0; + /* Trim preceding whitespace up to preceding \n + */ + while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) + buf.offset--; + continue; + } + break; + + case ' ': + case '\t': + break; + + case '\r': + if (q[1] == '\n') + continue; // skip the \r + goto Lnewline; + + default: + if (c == 226) + { + // If LS or PS + if (q[1] == 128 && + (q[2] == 168 || q[2] == 169)) + { + q += 2; + goto Lnewline; + } + } + linestart = 0; + break; + + Lnewline: + c = '\n'; // replace all newlines with \n + case '\n': + linestart = 1; + + /* Trim trailing whitespace + */ + while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) + buf.offset--; + + break; + } + buf.writeByte(c); + } + + // Always end with a newline + if (!buf.offset || buf.data[buf.offset - 1] != '\n') + buf.writeByte('\n'); + + buf.writeByte(0); + + // It's a line comment if the start of the doc comment comes + // after other non-whitespace on the same line. + unsigned char** dc = (lineComment && anyToken) + ? &t->lineComment + : &t->blockComment; + + // Combine with previous doc comment, if any + if (*dc) + *dc = combineComments(*dc, (unsigned char *)buf.data); + else + *dc = (unsigned char *)buf.extractData(); +} + +/******************************************** + * Combine two document comments into one. + */ + +unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2) +{ + unsigned char *c = c2; + + if (c1) + { c = c1; + if (c2) + { size_t len1 = strlen((char *)c1); + size_t len2 = strlen((char *)c2); + + c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1); + memcpy(c, c1, len1); + c[len1] = '\n'; + memcpy(c + len1 + 1, c2, len2); + c[len1 + 1 + len2] = 0; + } + } + return c; +} + +/******************************************** + * Create an identifier in the string table. + */ + +Identifier *Lexer::idPool(const char *s) +{ + size_t len = strlen(s); + StringValue *sv = stringtable.update(s, len); + Identifier *id = (Identifier *) sv->ptrvalue; + if (!id) + { + id = new Identifier(sv->lstring.string, TOKidentifier); + sv->ptrvalue = id; + } + return id; +} + +/********************************************* + * Create a unique identifier using the prefix s. + */ + +Identifier *Lexer::uniqueId(const char *s, int num) +{ char buffer[32]; + size_t slen = strlen(s); + + assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer)); + sprintf(buffer, "%s%d", s, num); + return idPool(buffer); +} + +Identifier *Lexer::uniqueId(const char *s) +{ + static int num; + return uniqueId(s, ++num); +} + +/**************************************** + */ + +struct Keyword +{ char *name; + enum TOK value; +}; + +static Keyword keywords[] = +{ +// { "", TOK }, + + { "this", TOKthis }, + { "super", TOKsuper }, + { "assert", TOKassert }, + { "null", TOKnull }, + { "true", TOKtrue }, + { "false", TOKfalse }, + { "cast", TOKcast }, + { "new", TOKnew }, + { "delete", TOKdelete }, + { "throw", TOKthrow }, + { "module", TOKmodule }, + { "pragma", TOKpragma }, + { "typeof", TOKtypeof }, + { "typeid", TOKtypeid }, + + { "template", TOKtemplate }, + + { "void", TOKvoid }, + { "byte", TOKint8 }, + { "ubyte", TOKuns8 }, + { "short", TOKint16 }, + { "ushort", TOKuns16 }, + { "int", TOKint32 }, + { "uint", TOKuns32 }, + { "long", TOKint64 }, + { "ulong", TOKuns64 }, + { "cent", TOKcent, }, + { "ucent", TOKucent, }, + { "float", TOKfloat32 }, + { "double", TOKfloat64 }, + { "real", TOKfloat80 }, + + { "bool", TOKbool }, + { "char", TOKchar }, + { "wchar", TOKwchar }, + { "dchar", TOKdchar }, + + { "ifloat", TOKimaginary32 }, + { "idouble", TOKimaginary64 }, + { "ireal", TOKimaginary80 }, + + { "cfloat", TOKcomplex32 }, + { "cdouble", TOKcomplex64 }, + { "creal", TOKcomplex80 }, + + { "delegate", TOKdelegate }, + { "function", TOKfunction }, + + { "is", TOKis }, + { "if", TOKif }, + { "else", TOKelse }, + { "while", TOKwhile }, + { "for", TOKfor }, + { "do", TOKdo }, + { "switch", TOKswitch }, + { "case", TOKcase }, + { "default", TOKdefault }, + { "break", TOKbreak }, + { "continue", TOKcontinue }, + { "synchronized", TOKsynchronized }, + { "return", TOKreturn }, + { "goto", TOKgoto }, + { "try", TOKtry }, + { "catch", TOKcatch }, + { "finally", TOKfinally }, + { "with", TOKwith }, + { "asm", TOKasm }, + { "foreach", TOKforeach }, + { "foreach_reverse", TOKforeach_reverse }, + { "scope", TOKscope }, + + { "struct", TOKstruct }, + { "class", TOKclass }, + { "interface", TOKinterface }, + { "union", TOKunion }, + { "enum", TOKenum }, + { "import", TOKimport }, + { "mixin", TOKmixin }, + { "static", TOKstatic }, + { "final", TOKfinal }, + { "const", TOKconst }, + { "typedef", TOKtypedef }, + { "alias", TOKalias }, + { "override", TOKoverride }, + { "abstract", TOKabstract }, + { "volatile", TOKvolatile }, + { "debug", TOKdebug }, + { "deprecated", TOKdeprecated }, + { "in", TOKin }, + { "out", TOKout }, + { "inout", TOKinout }, + { "lazy", TOKlazy }, + { "auto", TOKauto }, + + { "align", TOKalign }, + { "extern", TOKextern }, + { "private", TOKprivate }, + { "package", TOKpackage }, + { "protected", TOKprotected }, + { "public", TOKpublic }, + { "export", TOKexport }, + + { "body", TOKbody }, + { "invariant", TOKinvariant }, + { "unittest", TOKunittest }, + { "version", TOKversion }, + //{ "manifest", TOKmanifest }, + + // Added after 1.0 + { "ref", TOKref }, + { "macro", TOKmacro }, +#if V2 + { "pure", TOKpure }, + { "nothrow", TOKnothrow }, + { "__traits", TOKtraits }, + { "__overloadset", TOKoverloadset }, +#endif +}; + +int Token::isKeyword() +{ + for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) + { + if (keywords[u].value == value) + return 1; + } + return 0; +} + +void Lexer::initKeywords() +{ StringValue *sv; + unsigned u; + enum TOK v; + unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]); + + if (global.params.Dversion == 1) + nkeywords -= 2; + + cmtable_init(); + + for (u = 0; u < nkeywords; u++) + { char *s; + + //printf("keyword[%d] = '%s'\n",u, keywords[u].name); + s = keywords[u].name; + v = keywords[u].value; + sv = stringtable.insert(s, strlen(s)); + sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v); + + //printf("tochars[%d] = '%s'\n",v, s); + Token::tochars[v] = s; + } + + Token::tochars[TOKeof] = "EOF"; + Token::tochars[TOKlcurly] = "{"; + Token::tochars[TOKrcurly] = "}"; + Token::tochars[TOKlparen] = "("; + Token::tochars[TOKrparen] = ")"; + Token::tochars[TOKlbracket] = "["; + Token::tochars[TOKrbracket] = "]"; + Token::tochars[TOKsemicolon] = ";"; + Token::tochars[TOKcolon] = ":"; + Token::tochars[TOKcomma] = ","; + Token::tochars[TOKdot] = "."; + Token::tochars[TOKxor] = "^"; + Token::tochars[TOKxorass] = "^="; + Token::tochars[TOKassign] = "="; + Token::tochars[TOKconstruct] = "="; +#if V2 + Token::tochars[TOKblit] = "="; +#endif + Token::tochars[TOKlt] = "<"; + Token::tochars[TOKgt] = ">"; + Token::tochars[TOKle] = "<="; + Token::tochars[TOKge] = ">="; + Token::tochars[TOKequal] = "=="; + Token::tochars[TOKnotequal] = "!="; + Token::tochars[TOKnotidentity] = "!is"; + Token::tochars[TOKtobool] = "!!"; + + Token::tochars[TOKunord] = "!<>="; + Token::tochars[TOKue] = "!<>"; + Token::tochars[TOKlg] = "<>"; + Token::tochars[TOKleg] = "<>="; + Token::tochars[TOKule] = "!>"; + Token::tochars[TOKul] = "!>="; + Token::tochars[TOKuge] = "!<"; + Token::tochars[TOKug] = "!<="; + + Token::tochars[TOKnot] = "!"; + Token::tochars[TOKtobool] = "!!"; + Token::tochars[TOKshl] = "<<"; + Token::tochars[TOKshr] = ">>"; + Token::tochars[TOKushr] = ">>>"; + Token::tochars[TOKadd] = "+"; + Token::tochars[TOKmin] = "-"; + Token::tochars[TOKmul] = "*"; + Token::tochars[TOKdiv] = "/"; + Token::tochars[TOKmod] = "%"; + Token::tochars[TOKslice] = ".."; + Token::tochars[TOKdotdotdot] = "..."; + Token::tochars[TOKand] = "&"; + Token::tochars[TOKandand] = "&&"; + Token::tochars[TOKor] = "|"; + Token::tochars[TOKoror] = "||"; + Token::tochars[TOKarray] = "[]"; + Token::tochars[TOKindex] = "[i]"; + Token::tochars[TOKaddress] = "&"; + Token::tochars[TOKstar] = "*"; + Token::tochars[TOKtilde] = "~"; + Token::tochars[TOKdollar] = "$"; + Token::tochars[TOKcast] = "cast"; + Token::tochars[TOKplusplus] = "++"; + Token::tochars[TOKminusminus] = "--"; + Token::tochars[TOKtype] = "type"; + Token::tochars[TOKquestion] = "?"; + Token::tochars[TOKneg] = "-"; + Token::tochars[TOKuadd] = "+"; + Token::tochars[TOKvar] = "var"; + Token::tochars[TOKaddass] = "+="; + Token::tochars[TOKminass] = "-="; + Token::tochars[TOKmulass] = "*="; + Token::tochars[TOKdivass] = "/="; + Token::tochars[TOKmodass] = "%="; + Token::tochars[TOKshlass] = "<<="; + Token::tochars[TOKshrass] = ">>="; + Token::tochars[TOKushrass] = ">>>="; + Token::tochars[TOKandass] = "&="; + Token::tochars[TOKorass] = "|="; + Token::tochars[TOKcatass] = "~="; + Token::tochars[TOKcat] = "~"; + Token::tochars[TOKcall] = "call"; + Token::tochars[TOKidentity] = "is"; + Token::tochars[TOKnotidentity] = "!is"; + + Token::tochars[TOKorass] = "|="; + Token::tochars[TOKidentifier] = "identifier"; + + // For debugging + Token::tochars[TOKdotexp] = "dotexp"; + Token::tochars[TOKdotti] = "dotti"; + Token::tochars[TOKdotvar] = "dotvar"; + Token::tochars[TOKdottype] = "dottype"; + Token::tochars[TOKsymoff] = "symoff"; + Token::tochars[TOKtypedot] = "typedot"; + Token::tochars[TOKarraylength] = "arraylength"; + Token::tochars[TOKarrayliteral] = "arrayliteral"; + Token::tochars[TOKassocarrayliteral] = "assocarrayliteral"; + Token::tochars[TOKstructliteral] = "structliteral"; + Token::tochars[TOKstring] = "string"; + Token::tochars[TOKdsymbol] = "symbol"; + Token::tochars[TOKtuple] = "tuple"; + Token::tochars[TOKdeclaration] = "declaration"; + Token::tochars[TOKdottd] = "dottd"; + Token::tochars[TOKon_scope_exit] = "scope(exit)"; +}