comparison dmd/Lexer.d @ 0:10317f0c89a5

Initial commit
author korDen
date Sat, 24 Oct 2009 08:42:06 +0400
parents
children 7427ded8caf7
comparison
equal deleted inserted replaced
-1:000000000000 0:10317f0c89a5
1 module dmd.Lexer;
2
3 import dmd.StringTable;
4 import dmd.OutBuffer;
5 import dmd.Token;
6 import dmd.Loc;
7 import dmd.Module;
8 import dmd.Identifier;
9 import dmd.TOK;
10 import dmd.Keyword;
11 import dmd.StringValue;
12 import dmd.Global;
13 import dmd.Util;
14 import dmd.Id;
15 import dmd.Dchar;
16 import dmd.Utf;
17
18 import std.stdio : writeln;
19
20 import core.stdc.ctype;
21 import core.stdc.stdlib;
22 import core.stdc.string;
23 import core.stdc.stdio;
24 import core.stdc.time;
25 import core.stdc.errno;
26
27 enum LS = 0x2028; // UTF line separator
28 enum PS = 0x2029; // UTF paragraph separator
29
30 extern (C) extern
31 {
32 __gshared char* __locale_decpoint;
33 }
34
35 int isUniAlpha(uint u)
36 {
37 assert(false);
38 }
39
40 class Lexer
41 {
42 static StringTable stringtable;
43 static OutBuffer stringbuffer;
44 static Token* freelist;
45
46 Loc loc; // for error messages
47
48 ubyte* base; // pointer to start of buffer
49 ubyte* end; // past end of buffer
50 ubyte* p; // current character
51 Token token;
52 Module mod;
53 int doDocComment; // collect doc comment information
54 int anyToken; // !=0 means seen at least one token
55 int commentToken; // !=0 means comments are TOKcomment's
56
57 static this()
58 {
59 stringtable = new StringTable();
60 stringbuffer = new OutBuffer();
61 }
62
63 static ~this()
64 {
65 delete stringtable;
66 }
67
68 this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)
69 {
70 loc = Loc(mod, 1);
71
72 memset(&token,0,token.sizeof);
73 this.base = base;
74 this.end = base + endoffset;
75 p = base + begoffset;
76 this.mod = mod;
77 this.doDocComment = doDocComment;
78 this.anyToken = 0;
79 this.commentToken = commentToken;
80 //initKeywords();
81
82 /* If first line starts with '#!', ignore the line
83 */
84
85 if (p[0] == '#' && p[1] =='!')
86 {
87 p += 2;
88 while (1)
89 {
90 ubyte c = *p;
91 switch (c)
92 {
93 case '\n':
94 p++;
95 break;
96
97 case '\r':
98 p++;
99 if (*p == '\n')
100 p++;
101 break;
102
103 case 0:
104 case 0x1A:
105 break;
106
107 default:
108 if (c & 0x80)
109 {
110 uint u = decodeUTF();
111 if (u == PS || u == LS)
112 break;
113 }
114 p++;
115 continue;
116 }
117 break;
118 }
119 loc.linnum = 2;
120 }
121 }
122
123 version (DMDV2) {
124 static Keyword[] keywords =
125 [
126 // { "", TOK },
127
128 { "this", TOK.TOKthis },
129 { "super", TOK.TOKsuper },
130 { "assert", TOK.TOKassert },
131 { "null", TOK.TOKnull },
132 { "true", TOK.TOKtrue },
133 { "false", TOK.TOKfalse },
134 { "cast", TOK.TOKcast },
135 { "new", TOK.TOKnew },
136 { "delete", TOK.TOKdelete },
137 { "throw", TOK.TOKthrow },
138 { "module", TOK.TOKmodule },
139 { "pragma", TOK.TOKpragma },
140 { "typeof", TOK.TOKtypeof },
141 { "typeid", TOK.TOKtypeid },
142
143 { "template", TOK.TOKtemplate },
144
145 { "void", TOK.TOKvoid },
146 { "byte", TOK.TOKint8 },
147 { "ubyte", TOK.TOKuns8 },
148 { "short", TOK.TOKint16 },
149 { "ushort", TOK.TOKuns16 },
150 { "int", TOK.TOKint32 },
151 { "uint", TOK.TOKuns32 },
152 { "long", TOK.TOKint64 },
153 { "ulong", TOK.TOKuns64 },
154 { "cent", TOK.TOKcent, },
155 { "ucent", TOK.TOKucent, },
156 { "float", TOK.TOKfloat32 },
157 { "double", TOK.TOKfloat64 },
158 { "real", TOK.TOKfloat80 },
159
160 { "bool", TOK.TOKbool },
161 { "char", TOK.TOKchar },
162 { "wchar", TOK.TOKwchar },
163 { "dchar", TOK.TOKdchar },
164
165 { "ifloat", TOK.TOKimaginary32 },
166 { "idouble", TOK.TOKimaginary64 },
167 { "ireal", TOK.TOKimaginary80 },
168
169 { "cfloat", TOK.TOKcomplex32 },
170 { "cdouble", TOK.TOKcomplex64 },
171 { "creal", TOK.TOKcomplex80 },
172
173 { "delegate", TOK.TOKdelegate },
174 { "function", TOK.TOKfunction },
175
176 { "is", TOK.TOKis },
177 { "if", TOK.TOKif },
178 { "else", TOK.TOKelse },
179 { "while", TOK.TOKwhile },
180 { "for", TOK.TOKfor },
181 { "do", TOK.TOKdo },
182 { "switch", TOK.TOKswitch },
183 { "case", TOK.TOKcase },
184 { "default", TOK.TOKdefault },
185 { "break", TOK.TOKbreak },
186 { "continue", TOK.TOKcontinue },
187 { "synchronized", TOK.TOKsynchronized },
188 { "return", TOK.TOKreturn },
189 { "goto", TOK.TOKgoto },
190 { "try", TOK.TOKtry },
191 { "catch", TOK.TOKcatch },
192 { "finally", TOK.TOKfinally },
193 { "with", TOK.TOKwith },
194 { "asm", TOK.TOKasm },
195 { "foreach", TOK.TOKforeach },
196 { "foreach_reverse", TOK.TOKforeach_reverse },
197 { "scope", TOK.TOKscope },
198
199 { "struct", TOK.TOKstruct },
200 { "class", TOK.TOKclass },
201 { "interface", TOK.TOKinterface },
202 { "union", TOK.TOKunion },
203 { "enum", TOK.TOKenum },
204 { "import", TOK.TOKimport },
205 { "mixin", TOK.TOKmixin },
206 { "static", TOK.TOKstatic },
207 { "final", TOK.TOKfinal },
208 { "const", TOK.TOKconst },
209 { "typedef", TOK.TOKtypedef },
210 { "alias", TOK.TOKalias },
211 { "override", TOK.TOKoverride },
212 { "abstract", TOK.TOKabstract },
213 { "volatile", TOK.TOKvolatile },
214 { "debug", TOK.TOKdebug },
215 { "deprecated", TOK.TOKdeprecated },
216 { "in", TOK.TOKin },
217 { "out", TOK.TOKout },
218 { "inout", TOK.TOKinout },
219 { "lazy", TOK.TOKlazy },
220 { "auto", TOK.TOKauto },
221
222 { "align", TOK.TOKalign },
223 { "extern", TOK.TOKextern },
224 { "private", TOK.TOKprivate },
225 { "package", TOK.TOKpackage },
226 { "protected", TOK.TOKprotected },
227 { "public", TOK.TOKpublic },
228 { "export", TOK.TOKexport },
229
230 { "body", TOK.TOKbody },
231 { "invariant", TOK.TOKinvariant },
232 { "unittest", TOK.TOKunittest },
233 { "version", TOK.TOKversion },
234 //{ "manifest", TOK.TOKmanifest },
235
236 // Added after 1.0
237 { "ref", TOK.TOKref },
238 { "macro", TOK.TOKmacro },
239 { "pure", TOK.TOKpure },
240 { "nothrow", TOK.TOKnothrow },
241 { "__thread", TOK.TOKtls },
242 { "__gshared", TOK.TOKgshared },
243 { "__traits", TOK.TOKtraits },
244 { "__overloadset", TOK.TOKoverloadset },
245 { "__FILE__", TOK.TOKfile },
246 { "__LINE__", TOK.TOKline },
247 { "shared", TOK.TOKshared },
248 { "immutable", TOK.TOKimmutable },
249 ];
250 } else {
251 static Keyword[] keywords =
252 [
253 // { "", TOK },
254
255 { "this", TOK.TOKthis },
256 { "super", TOK.TOKsuper },
257 { "assert", TOK.TOKassert },
258 { "null", TOK.TOKnull },
259 { "true", TOK.TOKtrue },
260 { "false", TOK.TOKfalse },
261 { "cast", TOK.TOKcast },
262 { "new", TOK.TOKnew },
263 { "delete", TOK.TOKdelete },
264 { "throw", TOK.TOKthrow },
265 { "module", TOK.TOKmodule },
266 { "pragma", TOK.TOKpragma },
267 { "typeof", TOK.TOKtypeof },
268 { "typeid", TOK.TOKtypeid },
269
270 { "template", TOK.TOKtemplate },
271
272 { "void", TOK.TOKvoid },
273 { "byte", TOK.TOKint8 },
274 { "ubyte", TOK.TOKuns8 },
275 { "short", TOK.TOKint16 },
276 { "ushort", TOK.TOKuns16 },
277 { "int", TOK.TOKint32 },
278 { "uint", TOK.TOKuns32 },
279 { "long", TOK.TOKint64 },
280 { "ulong", TOK.TOKuns64 },
281 { "cent", TOK.TOKcent, },
282 { "ucent", TOK.TOKucent, },
283 { "float", TOK.TOKfloat32 },
284 { "double", TOK.TOKfloat64 },
285 { "real", TOK.TOKfloat80 },
286
287 { "bool", TOK.TOKbool },
288 { "char", TOK.TOKchar },
289 { "wchar", TOK.TOKwchar },
290 { "dchar", TOK.TOKdchar },
291
292 { "ifloat", TOK.TOKimaginary32 },
293 { "idouble", TOK.TOKimaginary64 },
294 { "ireal", TOK.TOKimaginary80 },
295
296 { "cfloat", TOK.TOKcomplex32 },
297 { "cdouble", TOK.TOKcomplex64 },
298 { "creal", TOK.TOKcomplex80 },
299
300 { "delegate", TOK.TOKdelegate },
301 { "function", TOK.TOKfunction },
302
303 { "is", TOK.TOKis },
304 { "if", TOK.TOKif },
305 { "else", TOK.TOKelse },
306 { "while", TOK.TOKwhile },
307 { "for", TOK.TOKfor },
308 { "do", TOK.TOKdo },
309 { "switch", TOK.TOKswitch },
310 { "case", TOK.TOKcase },
311 { "default", TOK.TOKdefault },
312 { "break", TOK.TOKbreak },
313 { "continue", TOK.TOKcontinue },
314 { "synchronized", TOK.TOKsynchronized },
315 { "return", TOK.TOKreturn },
316 { "goto", TOK.TOKgoto },
317 { "try", TOK.TOKtry },
318 { "catch", TOK.TOKcatch },
319 { "finally", TOK.TOKfinally },
320 { "with", TOK.TOKwith },
321 { "asm", TOK.TOKasm },
322 { "foreach", TOK.TOKforeach },
323 { "foreach_reverse", TOK.TOKforeach_reverse },
324 { "scope", TOK.TOKscope },
325
326 { "struct", TOK.TOKstruct },
327 { "class", TOK.TOKclass },
328 { "interface", TOK.TOKinterface },
329 { "union", TOK.TOKunion },
330 { "enum", TOK.TOKenum },
331 { "import", TOK.TOKimport },
332 { "mixin", TOK.TOKmixin },
333 { "static", TOK.TOKstatic },
334 { "final", TOK.TOKfinal },
335 { "const", TOK.TOKconst },
336 { "typedef", TOK.TOKtypedef },
337 { "alias", TOK.TOKalias },
338 { "override", TOK.TOKoverride },
339 { "abstract", TOK.TOKabstract },
340 { "volatile", TOK.TOKvolatile },
341 { "debug", TOK.TOKdebug },
342 { "deprecated", TOK.TOKdeprecated },
343 { "in", TOK.TOKin },
344 { "out", TOK.TOKout },
345 { "inout", TOK.TOKinout },
346 { "lazy", TOK.TOKlazy },
347 { "auto", TOK.TOKauto },
348
349 { "align", TOK.TOKalign },
350 { "extern", TOK.TOKextern },
351 { "private", TOK.TOKprivate },
352 { "package", TOK.TOKpackage },
353 { "protected", TOK.TOKprotected },
354 { "public", TOK.TOKpublic },
355 { "export", TOK.TOKexport },
356
357 { "body", TOK.TOKbody },
358 { "invariant", TOK.TOKinvariant },
359 { "unittest", TOK.TOKunittest },
360 { "version", TOK.TOKversion },
361 //{ "manifest", TOK.TOKmanifest },
362
363 // Added after 1.0
364 { "ref", TOK.TOKref },
365 { "macro", TOK.TOKmacro },
366 ];
367 }
368
369 static ubyte cmtable[256];
370 enum CMoctal = 0x1;
371 enum CMhex = 0x2;
372 enum CMidchar = 0x4;
373
374 ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; }
375 ubyte ishex (ubyte c) { return cmtable[c] & CMhex; }
376 ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; }
377
378 static void cmtable_init()
379 {
380 for (uint c = 0; c < cmtable.length; c++)
381 {
382 if ('0' <= c && c <= '7')
383 cmtable[c] |= CMoctal;
384 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
385 cmtable[c] |= CMhex;
386 if (isalnum(c) || c == '_')
387 cmtable[c] |= CMidchar;
388 }
389 }
390
391 static void initKeywords()
392 {
393 uint nkeywords = keywords.length;
394
395 if (global.params.Dversion == 1)
396 nkeywords -= 2;
397
398 cmtable_init();
399
400 for (uint u = 0; u < nkeywords; u++)
401 {
402 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
403 string s = keywords[u].name;
404 TOK v = keywords[u].value;
405 StringValue* sv = stringtable.insert(s);
406 sv.ptrvalue = cast(void*) new Identifier(sv.lstring.string_, v);
407
408 //printf("tochars[%d] = '%s'\n",v, s);
409 Token.tochars[v] = s;
410 }
411
412 Token.tochars[TOK.TOKeof] = "EOF";
413 Token.tochars[TOK.TOKlcurly] = "{";
414 Token.tochars[TOK.TOKrcurly] = "}";
415 Token.tochars[TOK.TOKlparen] = "(";
416 Token.tochars[TOK.TOKrparen] = ")";
417 Token.tochars[TOK.TOKlbracket] = "[";
418 Token.tochars[TOK.TOKrbracket] = "]";
419 Token.tochars[TOK.TOKsemicolon] = ";";
420 Token.tochars[TOK.TOKcolon] = ":";
421 Token.tochars[TOK.TOKcomma] = ",";
422 Token.tochars[TOK.TOKdot] = ".";
423 Token.tochars[TOK.TOKxor] = "^";
424 Token.tochars[TOK.TOKxorass] = "^=";
425 Token.tochars[TOK.TOKassign] = "=";
426 Token.tochars[TOK.TOKconstruct] = "=";
427 version (DMDV2) {
428 Token.tochars[TOK.TOKblit] = "=";
429 }
430 Token.tochars[TOK.TOKlt] = "<";
431 Token.tochars[TOK.TOKgt] = ">";
432 Token.tochars[TOK.TOKle] = "<=";
433 Token.tochars[TOK.TOKge] = ">=";
434 Token.tochars[TOK.TOKequal] = "==";
435 Token.tochars[TOK.TOKnotequal] = "!=";
436 Token.tochars[TOK.TOKnotidentity] = "!is";
437 Token.tochars[TOK.TOKtobool] = "!!";
438
439 Token.tochars[TOK.TOKunord] = "!<>=";
440 Token.tochars[TOK.TOKue] = "!<>";
441 Token.tochars[TOK.TOKlg] = "<>";
442 Token.tochars[TOK.TOKleg] = "<>=";
443 Token.tochars[TOK.TOKule] = "!>";
444 Token.tochars[TOK.TOKul] = "!>=";
445 Token.tochars[TOK.TOKuge] = "!<";
446 Token.tochars[TOK.TOKug] = "!<=";
447
448 Token.tochars[TOK.TOKnot] = "!";
449 Token.tochars[TOK.TOKtobool] = "!!";
450 Token.tochars[TOK.TOKshl] = "<<";
451 Token.tochars[TOK.TOKshr] = ">>";
452 Token.tochars[TOK.TOKushr] = ">>>";
453 Token.tochars[TOK.TOKadd] = "+";
454 Token.tochars[TOK.TOKmin] = "-";
455 Token.tochars[TOK.TOKmul] = "*";
456 Token.tochars[TOK.TOKdiv] = "/";
457 Token.tochars[TOK.TOKmod] = "%";
458 Token.tochars[TOK.TOKslice] = "..";
459 Token.tochars[TOK.TOKdotdotdot] = "...";
460 Token.tochars[TOK.TOKand] = "&";
461 Token.tochars[TOK.TOKandand] = "&&";
462 Token.tochars[TOK.TOKor] = "|";
463 Token.tochars[TOK.TOKoror] = "||";
464 Token.tochars[TOK.TOKarray] = "[]";
465 Token.tochars[TOK.TOKindex] = "[i]";
466 Token.tochars[TOK.TOKaddress] = "&";
467 Token.tochars[TOK.TOKstar] = "*";
468 Token.tochars[TOK.TOKtilde] = "~";
469 Token.tochars[TOK.TOKdollar] = "$";
470 Token.tochars[TOK.TOKcast] = "cast";
471 Token.tochars[TOK.TOKplusplus] = "++";
472 Token.tochars[TOK.TOKminusminus] = "--";
473 Token.tochars[TOK.TOKtype] = "type";
474 Token.tochars[TOK.TOKquestion] = "?";
475 Token.tochars[TOK.TOKneg] = "-";
476 Token.tochars[TOK.TOKuadd] = "+";
477 Token.tochars[TOK.TOKvar] = "var";
478 Token.tochars[TOK.TOKaddass] = "+=";
479 Token.tochars[TOK.TOKminass] = "-=";
480 Token.tochars[TOK.TOKmulass] = "*=";
481 Token.tochars[TOK.TOKdivass] = "/=";
482 Token.tochars[TOK.TOKmodass] = "%=";
483 Token.tochars[TOK.TOKshlass] = "<<=";
484 Token.tochars[TOK.TOKshrass] = ">>=";
485 Token.tochars[TOK.TOKushrass] = ">>>=";
486 Token.tochars[TOK.TOKandass] = "&=";
487 Token.tochars[TOK.TOKorass] = "|=";
488 Token.tochars[TOK.TOKcatass] = "~=";
489 Token.tochars[TOK.TOKcat] = "~";
490 Token.tochars[TOK.TOKcall] = "call";
491 Token.tochars[TOK.TOKidentity] = "is";
492 Token.tochars[TOK.TOKnotidentity] = "!is";
493
494 Token.tochars[TOK.TOKorass] = "|=";
495 Token.tochars[TOK.TOKidentifier] = "identifier";
496 Token.tochars[TOK.TOKat] = "@";
497
498 // For debugging
499 Token.tochars[TOK.TOKdotexp] = "dotexp";
500 Token.tochars[TOK.TOKdotti] = "dotti";
501 Token.tochars[TOK.TOKdotvar] = "dotvar";
502 Token.tochars[TOK.TOKdottype] = "dottype";
503 Token.tochars[TOK.TOKsymoff] = "symoff";
504 Token.tochars[TOK.TOKarraylength] = "arraylength";
505 Token.tochars[TOK.TOKarrayliteral] = "arrayliteral";
506 Token.tochars[TOK.TOKassocarrayliteral] = "assocarrayliteral";
507 Token.tochars[TOK.TOKstructliteral] = "structliteral";
508 Token.tochars[TOK.TOKstring] = "string";
509 Token.tochars[TOK.TOKdsymbol] = "symbol";
510 Token.tochars[TOK.TOKtuple] = "tuple";
511 Token.tochars[TOK.TOKdeclaration] = "declaration";
512 Token.tochars[TOK.TOKdottd] = "dottd";
513 Token.tochars[TOK.TOKon_scope_exit] = "scope(exit)";
514 Token.tochars[TOK.TOKon_scope_success] = "scope(success)";
515 Token.tochars[TOK.TOKon_scope_failure] = "scope(failure)";
516 }
517
518 static Identifier idPool(string s)
519 {
520 StringValue* sv = stringtable.update(s);
521 Identifier id = cast(Identifier) sv.ptrvalue;
522 if (id is null)
523 {
524 id = new Identifier(sv.lstring.string_, TOK.TOKidentifier);
525 sv.ptrvalue = cast(void*)id;
526 }
527
528 return id;
529 }
530
531 static Identifier uniqueId(string s)
532 {
533 static int num;
534 return uniqueId(s, ++num);
535 }
536
537 /*********************************************
538 * Create a unique identifier using the prefix s.
539 */
540 static Identifier uniqueId(string s, int num)
541 {
542 char buffer[32];
543 size_t slen = s.length;
544
545 assert(slen + num.sizeof * 3 + 1 <= buffer.sizeof);
546 int len = sprintf(buffer.ptr, "%.*s%d", s, num);
547
548 return idPool(buffer[0..len].idup);
549 }
550
551 TOK nextToken()
552 {
553 Token *t;
554
555 if (token.next)
556 {
557 t = token.next;
558 memcpy(&token, t, Token.sizeof);
559 t.next = freelist;
560 freelist = t;
561 }
562 else
563 {
564 scan(&token);
565 }
566
567 //token.print();
568 return token.value;
569 }
570
571 /***********************
572 * Look ahead at next token's value.
573 */
574 TOK peekNext()
575 {
576 return peek(&token).value;
577 }
578
579 TOK peekNext2()
580 {
581 assert(false);
582 }
583
584 void scan(Token* t)
585 {
586 uint lastLine = loc.linnum;
587 uint linnum;
588
589 t.blockComment = null;
590 t.lineComment = null;
591 while (1)
592 {
593 t.ptr = p;
594 //printf("p = %p, *p = '%c'\n",p,*p);
595 switch (*p)
596 {
597 case 0:
598 case 0x1A:
599 t.value = TOK.TOKeof; // end of file
600 return;
601
602 case ' ':
603 case '\t':
604 case '\v':
605 case '\f':
606 p++;
607 continue; // skip white space
608
609 case '\r':
610 p++;
611 if (*p != '\n') // if CR stands by itself
612 loc.linnum++;
613 continue; // skip white space
614
615 case '\n':
616 p++;
617 loc.linnum++;
618 continue; // skip white space
619
620 case '0': case '1': case '2': case '3': case '4':
621 case '5': case '6': case '7': case '8': case '9':
622 t.value = number(t);
623 return;
624
625 version (CSTRINGS) {
626 case '\'':
627 t.value = charConstant(t, 0);
628 return;
629
630 case '"':
631 t.value = stringConstant(t,0);
632 return;
633
634 case 'l':
635 case 'L':
636 if (p[1] == '\'')
637 {
638 p++;
639 t.value = charConstant(t, 1);
640 return;
641 }
642 else if (p[1] == '"')
643 {
644 p++;
645 t.value = stringConstant(t, 1);
646 return;
647 }
648 } else {
649 case '\'':
650 t.value = charConstant(t,0);
651 return;
652
653 case 'r':
654 if (p[1] != '"')
655 goto case_ident;
656 p++;
657 case '`':
658 t.value = wysiwygStringConstant(t, *p);
659 return;
660
661 case 'x':
662 if (p[1] != '"')
663 goto case_ident;
664 p++;
665 t.value = hexStringConstant(t);
666 return;
667
668 version (DMDV2) {
669 case 'q':
670 if (p[1] == '"')
671 {
672 p++;
673 t.value = delimitedStringConstant(t);
674 return;
675 }
676 else if (p[1] == '{')
677 {
678 p++;
679 t.value = tokenStringConstant(t);
680 return;
681 }
682 else
683 goto case_ident;
684 }
685
686 case '"':
687 t.value = escapeStringConstant(t,0);
688 return;
689 version (TEXTUAL_ASSEMBLY_OUT) {
690 } else {
691 case '\\': // escaped string literal
692 { uint c;
693 ubyte* pstart = p;
694
695 stringbuffer.reset();
696 do
697 {
698 p++;
699 switch (*p)
700 {
701 case 'u':
702 case 'U':
703 case '&':
704 c = escapeSequence();
705 stringbuffer.writeUTF8(c);
706 break;
707
708 default:
709 c = escapeSequence();
710 stringbuffer.writeByte(c);
711 break;
712 }
713 } while (*p == '\\');
714 t.len = stringbuffer.offset;
715 stringbuffer.writeByte(0);
716 char* cc = cast(char*)malloc(stringbuffer.offset);
717 memcpy(cc, stringbuffer.data, stringbuffer.offset);
718 t.ustring = cc;
719 t.postfix = 0;
720 t.value = TOK.TOKstring;
721 if (!global.params.useDeprecated)
722 error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", p - pstart, pstart, p - pstart, pstart);
723 return;
724 }
725 }
726 case 'l':
727 case 'L':
728 }
729 case 'a': case 'b': case 'c': case 'd': case 'e':
730 case 'f': case 'g': case 'h': case 'i': case 'j':
731 case 'k': case 'm': case 'n': case 'o':
732 version (DMDV2) {
733 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
734 } else {
735 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
736 }
737 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
738 case 'z':
739 case 'A': case 'B': case 'C': case 'D': case 'E':
740 case 'F': case 'G': case 'H': case 'I': case 'J':
741 case 'K': case 'M': case 'N': case 'O':
742 case 'P': case 'Q': case 'R': case 'S': case 'T':
743 case 'U': case 'V': case 'W': case 'X': case 'Y':
744 case 'Z':
745 case '_':
746 case_ident:
747 { ubyte c;
748 StringValue *sv;
749 Identifier id;
750
751 do
752 {
753 c = *++p;
754 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
755 sv = stringtable.update((cast(immutable(char)*)t.ptr)[0.. p - t.ptr]); ///
756 id = cast(Identifier) sv.ptrvalue;
757 if (id is null)
758 { id = new Identifier(sv.lstring.string_, TOK.TOKidentifier);
759 sv.ptrvalue = cast(void*)id;
760 }
761 t.ident = id;
762 t.value = cast(TOK) id.value;
763 anyToken = 1;
764 if (*t.ptr == '_') // if special identifier token
765 {
766 static char date[11+1];
767 static char time[8+1];
768 static char timestamp[24+1];
769
770 if (!date[0]) // lazy evaluation
771 { time_t tm;
772 char *p;
773
774 .time(&tm);
775 p = ctime(&tm);
776 assert(p);
777 sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);
778 sprintf(time.ptr, "%.8s", p + 11);
779 sprintf(timestamp.ptr, "%.24s", p);
780 }
781
782 ///version (DMDV1) {
783 /// if (mod && id == Id.FILE)
784 /// {
785 /// t.ustring = cast(ubyte*)(loc.filename ? loc.filename : mod.ident.toChars());
786 /// goto Lstr;
787 /// }
788 /// else if (mod && id == Id.LINE)
789 /// {
790 /// t.value = TOK.TOKint64v;
791 /// t.uns64value = loc.linnum;
792 /// }
793 /// else
794 ///}
795 if (id == Id.DATE)
796 {
797 t.ustring = date.ptr;
798 goto Lstr;
799 }
800 else if (id == Id.TIME)
801 {
802 t.ustring = time.ptr;
803 goto Lstr;
804 }
805 else if (id == Id.VENDOR)
806 {
807 t.ustring = "Digital Mars D".ptr;
808 goto Lstr;
809 }
810 else if (id == Id.TIMESTAMP)
811 {
812 t.ustring = timestamp.ptr;
813 Lstr:
814 t.value = TOK.TOKstring;
815 Llen:
816 t.postfix = 0;
817 t.len = strlen(cast(char*)t.ustring);
818 }
819 else if (id == Id.VERSIONX)
820 {
821 uint major = 0;
822 uint minor = 0;
823
824 foreach (char cc; global.version_[1..$])
825 {
826 if (isdigit(cc))
827 minor = minor * 10 + cc - '0';
828 else if (cc == '.')
829 {
830 major = minor;
831 minor = 0;
832 }
833 else
834 break;
835 }
836 t.value = TOK.TOKint64v;
837 t.uns64value = major * 1000 + minor;
838 }
839 ///version (DMDV2) {
840 else if (id == Id.EOFX)
841 {
842 t.value = TOK.TOKeof;
843 // Advance scanner to end of file
844 while (!(*p == 0 || *p == 0x1A))
845 p++;
846 }
847 ///}
848 }
849 //printf("t.value = %d\n",t.value);
850 return;
851 }
852
853 case '/':
854 p++;
855 switch (*p)
856 {
857 case '=':
858 p++;
859 t.value = TOK.TOKdivass;
860 return;
861
862 case '*':
863 p++;
864 linnum = loc.linnum;
865 while (1)
866 {
867 while (1)
868 {
869 ubyte c = *p;
870 switch (c)
871 {
872 case '/':
873 break;
874
875 case '\n':
876 loc.linnum++;
877 p++;
878 continue;
879
880 case '\r':
881 p++;
882 if (*p != '\n')
883 loc.linnum++;
884 continue;
885
886 case 0:
887 case 0x1A:
888 error("unterminated /* */ comment");
889 p = end;
890 t.value = TOK.TOKeof;
891 return;
892
893 default:
894 if (c & 0x80)
895 { uint u = decodeUTF();
896 if (u == PS || u == LS)
897 loc.linnum++;
898 }
899 p++;
900 continue;
901 }
902 break;
903 }
904 p++;
905 if (p[-2] == '*' && p - 3 != t.ptr)
906 break;
907 }
908 if (commentToken)
909 {
910 t.value = TOK.TOKcomment;
911 return;
912 }
913 else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
914 { // if /** but not /**/
915 getDocComment(t, lastLine == linnum);
916 }
917 continue;
918
919 case '/': // do // style comments
920 linnum = loc.linnum;
921 while (1)
922 { ubyte c = *++p;
923 switch (c)
924 {
925 case '\n':
926 break;
927
928 case '\r':
929 if (p[1] == '\n')
930 p++;
931 break;
932
933 case 0:
934 case 0x1A:
935 if (commentToken)
936 {
937 p = end;
938 t.value = TOK.TOKcomment;
939 return;
940 }
941 if (doDocComment && t.ptr[2] == '/')
942 getDocComment(t, lastLine == linnum);
943 p = end;
944 t.value = TOK.TOKeof;
945 return;
946
947 default:
948 if (c & 0x80)
949 { uint u = decodeUTF();
950 if (u == PS || u == LS)
951 break;
952 }
953 continue;
954 }
955 break;
956 }
957
958 if (commentToken)
959 {
960 p++;
961 loc.linnum++;
962 t.value = TOK.TOKcomment;
963 return;
964 }
965 if (doDocComment && t.ptr[2] == '/')
966 getDocComment(t, lastLine == linnum);
967
968 p++;
969 loc.linnum++;
970 continue;
971
972 case '+':
973 {
974 int nest;
975
976 linnum = loc.linnum;
977 p++;
978 nest = 1;
979 while (1)
980 { ubyte c = *p;
981 switch (c)
982 {
983 case '/':
984 p++;
985 if (*p == '+')
986 {
987 p++;
988 nest++;
989 }
990 continue;
991
992 case '+':
993 p++;
994 if (*p == '/')
995 {
996 p++;
997 if (--nest == 0)
998 break;
999 }
1000 continue;
1001
1002 case '\r':
1003 p++;
1004 if (*p != '\n')
1005 loc.linnum++;
1006 continue;
1007
1008 case '\n':
1009 loc.linnum++;
1010 p++;
1011 continue;
1012
1013 case 0:
1014 case 0x1A:
1015 error("unterminated /+ +/ comment");
1016 p = end;
1017 t.value = TOK.TOKeof;
1018 return;
1019
1020 default:
1021 if (c & 0x80)
1022 { uint u = decodeUTF();
1023 if (u == PS || u == LS)
1024 loc.linnum++;
1025 }
1026 p++;
1027 continue;
1028 }
1029 break;
1030 }
1031 if (commentToken)
1032 {
1033 t.value = TOK.TOKcomment;
1034 return;
1035 }
1036 if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
1037 { // if /++ but not /++/
1038 getDocComment(t, lastLine == linnum);
1039 }
1040 continue;
1041 }
1042
1043 default:
1044 break; ///
1045 }
1046 t.value = TOK.TOKdiv;
1047 return;
1048
1049 case '.':
1050 p++;
1051 if (isdigit(*p))
1052 { /* Note that we don't allow ._1 and ._ as being
1053 * valid floating point numbers.
1054 */
1055 p--;
1056 t.value = inreal(t);
1057 }
1058 else if (p[0] == '.')
1059 {
1060 if (p[1] == '.')
1061 { p += 2;
1062 t.value = TOK.TOKdotdotdot;
1063 }
1064 else
1065 { p++;
1066 t.value = TOK.TOKslice;
1067 }
1068 }
1069 else
1070 t.value = TOK.TOKdot;
1071 return;
1072
1073 case '&':
1074 p++;
1075 if (*p == '=')
1076 { p++;
1077 t.value = TOK.TOKandass;
1078 }
1079 else if (*p == '&')
1080 { p++;
1081 t.value = TOK.TOKandand;
1082 }
1083 else
1084 t.value = TOK.TOKand;
1085 return;
1086
1087 case '|':
1088 p++;
1089 if (*p == '=')
1090 { p++;
1091 t.value = TOK.TOKorass;
1092 }
1093 else if (*p == '|')
1094 { p++;
1095 t.value = TOK.TOKoror;
1096 }
1097 else
1098 t.value = TOK.TOKor;
1099 return;
1100
1101 case '-':
1102 p++;
1103 if (*p == '=')
1104 { p++;
1105 t.value = TOK.TOKminass;
1106 }
1107 /// #if 0
1108 /// else if (*p == '>')
1109 /// { p++;
1110 /// t.value = TOK.TOKarrow;
1111 /// }
1112 /// #endif
1113 else if (*p == '-')
1114 { p++;
1115 t.value = TOK.TOKminusminus;
1116 }
1117 else
1118 t.value = TOK.TOKmin;
1119 return;
1120
1121 case '+':
1122 p++;
1123 if (*p == '=')
1124 { p++;
1125 t.value = TOK.TOKaddass;
1126 }
1127 else if (*p == '+')
1128 { p++;
1129 t.value = TOK.TOKplusplus;
1130 }
1131 else
1132 t.value = TOK.TOKadd;
1133 return;
1134
1135 case '<':
1136 p++;
1137 if (*p == '=')
1138 { p++;
1139 t.value = TOK.TOKle; // <=
1140 }
1141 else if (*p == '<')
1142 { p++;
1143 if (*p == '=')
1144 { p++;
1145 t.value = TOK.TOKshlass; // <<=
1146 }
1147 else
1148 t.value = TOK.TOKshl; // <<
1149 }
1150 else if (*p == '>')
1151 { p++;
1152 if (*p == '=')
1153 { p++;
1154 t.value = TOK.TOKleg; // <>=
1155 }
1156 else
1157 t.value = TOK.TOKlg; // <>
1158 }
1159 else
1160 t.value = TOK.TOKlt; // <
1161 return;
1162
1163 case '>':
1164 p++;
1165 if (*p == '=')
1166 { p++;
1167 t.value = TOK.TOKge; // >=
1168 }
1169 else if (*p == '>')
1170 { p++;
1171 if (*p == '=')
1172 { p++;
1173 t.value = TOK.TOKshrass; // >>=
1174 }
1175 else if (*p == '>')
1176 { p++;
1177 if (*p == '=')
1178 { p++;
1179 t.value = TOK.TOKushrass; // >>>=
1180 }
1181 else
1182 t.value = TOK.TOKushr; // >>>
1183 }
1184 else
1185 t.value = TOK.TOKshr; // >>
1186 }
1187 else
1188 t.value = TOK.TOKgt; // >
1189 return;
1190
1191 case '!':
1192 p++;
1193 if (*p == '=')
1194 { p++;
1195 if (*p == '=' && global.params.Dversion == 1)
1196 { p++;
1197 t.value = TOK.TOKnotidentity; // !==
1198 }
1199 else
1200 t.value = TOK.TOKnotequal; // !=
1201 }
1202 else if (*p == '<')
1203 { p++;
1204 if (*p == '>')
1205 { p++;
1206 if (*p == '=')
1207 { p++;
1208 t.value = TOK.TOKunord; // !<>=
1209 }
1210 else
1211 t.value = TOK.TOKue; // !<>
1212 }
1213 else if (*p == '=')
1214 { p++;
1215 t.value = TOK.TOKug; // !<=
1216 }
1217 else
1218 t.value = TOK.TOKuge; // !<
1219 }
1220 else if (*p == '>')
1221 { p++;
1222 if (*p == '=')
1223 { p++;
1224 t.value = TOK.TOKul; // !>=
1225 }
1226 else
1227 t.value = TOK.TOKule; // !>
1228 }
1229 else
1230 t.value = TOK.TOKnot; // !
1231 return;
1232
1233 case '=':
1234 p++;
1235 if (*p == '=')
1236 { p++;
1237 if (*p == '=' && global.params.Dversion == 1)
1238 { p++;
1239 t.value = TOK.TOKidentity; // ===
1240 }
1241 else
1242 t.value = TOK.TOKequal; // ==
1243 }
1244 else
1245 t.value = TOK.TOKassign; // =
1246 return;
1247
1248 case '~':
1249 p++;
1250 if (*p == '=')
1251 { p++;
1252 t.value = TOK.TOKcatass; // ~=
1253 }
1254 else
1255 t.value = TOK.TOKtilde; // ~
1256 return;
1257 /*
1258 #define SINGLE(c,tok) case c: p++; t.value = tok; return;
1259
1260 SINGLE('(', TOKlparen)
1261 SINGLE(')', TOKrparen)
1262 SINGLE('[', TOKlbracket)
1263 SINGLE(']', TOKrbracket)
1264 SINGLE('{', TOKlcurly)
1265 SINGLE('}', TOKrcurly)
1266 SINGLE('?', TOKquestion)
1267 SINGLE(',', TOKcomma)
1268 SINGLE(';', TOKsemicolon)
1269 SINGLE(':', TOKcolon)
1270 SINGLE('$', TOKdollar)
1271 SINGLE('@', TOKat)
1272
1273 #undef SINGLE
1274
1275 #define DOUBLE(c1,tok1,c2,tok2) \
1276 case c1: \
1277 p++; \
1278 if (*p == c2) \
1279 { p++; \
1280 t.value = tok2; \
1281 } \
1282 else \
1283 t.value = tok1; \
1284 return;
1285
1286 DOUBLE('*', TOKmul, '=', TOKmulass)
1287 DOUBLE('%', TOKmod, '=', TOKmodass)
1288 DOUBLE('^', TOKxor, '=', TOKxorass)
1289
1290 #undef DOUBLE
1291 */
1292
1293 case '(': p++; t.value = TOK.TOKlparen; return;
1294 case ')': p++; t.value = TOK.TOKrparen; return;
1295 case '[': p++; t.value = TOK.TOKlbracket; return;
1296 case ']': p++; t.value = TOK.TOKrbracket; return;
1297 case '{': p++; t.value = TOK.TOKlcurly; return;
1298 case '}': p++; t.value = TOK.TOKrcurly; return;
1299 case '?': p++; t.value = TOK.TOKquestion; return;
1300 case ',': p++; t.value = TOK.TOKcomma; return;
1301 case ';': p++; t.value = TOK.TOKsemicolon; return;
1302 case ':': p++; t.value = TOK.TOKcolon; return;
1303 case '$': p++; t.value = TOK.TOKdollar; return;
1304 case '@': p++; t.value = TOK.TOKat; return;
1305
1306 case '*':
1307 p++;
1308 if (*p == '=') {
1309 p++;
1310 t.value = TOK.TOKmulass;
1311 } else {
1312 t.value = TOK.TOKmul;
1313 }
1314 return;
1315
1316 case '%':
1317 p++;
1318 if (*p == '=') {
1319 p++;
1320 t.value = TOK.TOKmodass;
1321 } else {
1322 t.value = TOK.TOKmod;
1323 }
1324 return;
1325
1326 case '^':
1327 p++;
1328 if (*p == '=') {
1329 p++;
1330 t.value = TOK.TOKxorass;
1331 } else {
1332 t.value = TOK.TOKxor;
1333 }
1334 return;
1335
1336 case '#':
1337 p++;
1338 pragma_();
1339 continue;
1340
1341 default:
1342 { ubyte c = *p;
1343
1344 if (c & 0x80)
1345 { uint u = decodeUTF();
1346
1347 // Check for start of unicode identifier
1348 if (isUniAlpha(u))
1349 goto case_ident;
1350
1351 if (u == PS || u == LS)
1352 {
1353 loc.linnum++;
1354 p++;
1355 continue;
1356 }
1357 }
1358 if (isprint(c))
1359 error("unsupported char '%c'", c);
1360 else
1361 error("unsupported char 0x%02x", c);
1362 p++;
1363 continue;
1364 }
1365 }
1366 }
1367 }
1368
1369 Token* peek(Token* ct)
1370 {
1371 Token* t;
1372
1373 if (ct.next)
1374 t = ct.next;
1375 else
1376 {
1377 t = new Token();
1378 scan(t);
1379 t.next = null;
1380 ct.next = t;
1381 }
1382 return t;
1383 }
1384
1385 Token* peekPastParen(Token* tk)
1386 {
1387 //printf("peekPastParen()\n");
1388 int parens = 1;
1389 int curlynest = 0;
1390 while (1)
1391 {
1392 tk = peek(tk);
1393 //tk.print();
1394 switch (tk.value)
1395 {
1396 case TOK.TOKlparen:
1397 parens++;
1398 continue;
1399
1400 case TOK.TOKrparen:
1401 --parens;
1402 if (parens)
1403 continue;
1404 tk = peek(tk);
1405 break;
1406
1407 case TOK.TOKlcurly:
1408 curlynest++;
1409 continue;
1410
1411 case TOK.TOKrcurly:
1412 if (--curlynest >= 0)
1413 continue;
1414 break;
1415
1416 case TOK.TOKsemicolon:
1417 if (curlynest)
1418 continue;
1419 break;
1420
1421 case TOK.TOKeof:
1422 break;
1423
1424 default:
1425 continue;
1426 }
1427 return tk;
1428 }
1429 }
1430
1431 /*******************************************
1432 * Parse escape sequence.
1433 */
1434 uint escapeSequence()
1435 {
1436 uint c = *p;
1437
1438 version (TEXTUAL_ASSEMBLY_OUT) {
1439 return c;
1440 }
1441 int n;
1442 int ndigits;
1443
1444 switch (c)
1445 {
1446 case '\'':
1447 case '"':
1448 case '?':
1449 case '\\':
1450 Lconsume:
1451 p++;
1452 break;
1453
1454 case 'a': c = 7; goto Lconsume;
1455 case 'b': c = 8; goto Lconsume;
1456 case 'f': c = 12; goto Lconsume;
1457 case 'n': c = 10; goto Lconsume;
1458 case 'r': c = 13; goto Lconsume;
1459 case 't': c = 9; goto Lconsume;
1460 case 'v': c = 11; goto Lconsume;
1461
1462 case 'u':
1463 ndigits = 4;
1464 goto Lhex;
1465 case 'U':
1466 ndigits = 8;
1467 goto Lhex;
1468 case 'x':
1469 ndigits = 2;
1470 Lhex:
1471 p++;
1472 c = *p;
1473 if (ishex(cast(ubyte)c))
1474 {
1475 uint v;
1476
1477 n = 0;
1478 v = 0;
1479 while (1)
1480 {
1481 if (isdigit(c))
1482 c -= '0';
1483 else if (islower(c))
1484 c -= 'a' - 10;
1485 else
1486 c -= 'A' - 10;
1487 v = v * 16 + c;
1488 c = *++p;
1489 if (++n == ndigits)
1490 break;
1491 if (!ishex(cast(ubyte)c))
1492 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
1493 break;
1494 }
1495 }
1496 if (ndigits != 2 && !utf_isValidDchar(v))
1497 { error("invalid UTF character \\U%08x", v);
1498 v = '?'; // recover with valid UTF character
1499 }
1500 c = v;
1501 }
1502 else
1503 error("undefined escape hex sequence \\%c\n",c);
1504 break;
1505
1506 case '&': // named character entity
1507 for (ubyte* idstart = ++p; true; p++)
1508 {
1509 switch (*p)
1510 {
1511 case ';':
1512 c = HtmlNamedEntity(idstart, p - idstart);
1513 if (c == ~0)
1514 {
1515 error("unnamed character entity &%s;", idstart[0..(p - idstart)]);
1516 c = ' ';
1517 }
1518 p++;
1519 break;
1520
1521 default:
1522 if (isalpha(*p) ||
1523 (p != idstart + 1 && isdigit(*p)))
1524 continue;
1525 error("unterminated named entity");
1526 break;
1527 }
1528 break;
1529 }
1530 break;
1531
1532 case 0:
1533 case 0x1A: // end of file
1534 c = '\\';
1535 break;
1536
1537 default:
1538 if (isoctal(cast(ubyte)c))
1539 {
1540 uint v;
1541
1542 n = 0;
1543 v = 0;
1544 do
1545 {
1546 v = v * 8 + (c - '0');
1547 c = *++p;
1548 } while (++n < 3 && isoctal(cast(ubyte)c));
1549 c = v;
1550 if (c > 0xFF)
1551 error("0%03o is larger than a byte", c);
1552 }
1553 else
1554 error("undefined escape sequence \\%c\n",c);
1555 break;
1556 }
1557 return c;
1558 }
1559
1560 TOK wysiwygStringConstant(Token* t, int tc)
1561 {
1562 assert(false);
1563 }
1564
1565 TOK hexStringConstant(Token* t)
1566 {
1567 assert(false);
1568 }
1569
1570 version (DMDV2) {
1571 TOK delimitedStringConstant(Token* t)
1572 {
1573 assert(false);
1574 }
1575
1576 TOK tokenStringConstant(Token* t)
1577 {
1578 assert(false);
1579 }
1580 }
1581 TOK escapeStringConstant(Token* t, int wide)
1582 {
1583 uint c;
1584 Loc start = loc;
1585
1586 p++;
1587 stringbuffer.reset();
1588 while (true)
1589 {
1590 c = *p++;
1591 switch (c)
1592 {
1593 version (TEXTUAL_ASSEMBLY_OUT) {
1594 } else {
1595 case '\\':
1596 switch (*p)
1597 {
1598 case 'u':
1599 case 'U':
1600 case '&':
1601 c = escapeSequence();
1602 stringbuffer.writeUTF8(c);
1603 continue;
1604
1605 default:
1606 c = escapeSequence();
1607 break;
1608 }
1609 break;
1610 }
1611 case '\n':
1612 loc.linnum++;
1613 break;
1614
1615 case '\r':
1616 if (*p == '\n')
1617 continue; // ignore
1618 c = '\n'; // treat EndOfLine as \n character
1619 loc.linnum++;
1620 break;
1621
1622 case '"':
1623 t.len = stringbuffer.offset;
1624 stringbuffer.writeByte(0);
1625 char* tmp = cast(char*)malloc(stringbuffer.offset);
1626 memcpy(tmp, stringbuffer.data, stringbuffer.offset);
1627 t.ustring = tmp;
1628 stringPostfix(t);
1629 return TOK.TOKstring;
1630
1631 case 0:
1632 case 0x1A:
1633 p--;
1634 error("unterminated string constant starting at %s", start.toChars());
1635 t.ustring = "".ptr;
1636 t.len = 0;
1637 t.postfix = 0;
1638 return TOK.TOKstring;
1639
1640 default:
1641 if (c & 0x80)
1642 {
1643 p--;
1644 c = decodeUTF();
1645 if (c == LS || c == PS)
1646 { c = '\n';
1647 loc.linnum++;
1648 }
1649 p++;
1650 stringbuffer.writeUTF8(c);
1651 continue;
1652 }
1653 break;
1654 }
1655 stringbuffer.writeByte(c);
1656 }
1657
1658 assert(false);
1659 }
1660
1661 TOK charConstant(Token* t, int wide)
1662 {
1663 uint c;
1664 TOK tk = TOKcharv;
1665
1666 //printf("Lexer.charConstant\n");
1667 p++;
1668 c = *p++;
1669 switch (c)
1670 {
1671 version (TEXTUAL_ASSEMBLY_OUT) {
1672 } else {
1673 case '\\':
1674 switch (*p)
1675 {
1676 case 'u':
1677 t.uns64value = escapeSequence();
1678 tk = TOKwcharv;
1679 break;
1680
1681 case 'U':
1682 case '&':
1683 t.uns64value = escapeSequence();
1684 tk = TOKdcharv;
1685 break;
1686
1687 default:
1688 t.uns64value = escapeSequence();
1689 break;
1690 }
1691 break;
1692 }
1693 case '\n':
1694 L1:
1695 loc.linnum++;
1696 case '\r':
1697 case 0:
1698 case 0x1A:
1699 case '\'':
1700 error("unterminated character constant");
1701 return tk;
1702
1703 default:
1704 if (c & 0x80)
1705 {
1706 p--;
1707 c = decodeUTF();
1708 p++;
1709 if (c == LS || c == PS)
1710 goto L1;
1711 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1712 tk = TOKwcharv;
1713 else
1714 tk = TOKdcharv;
1715 }
1716 t.uns64value = c;
1717 break;
1718 }
1719
1720 if (*p != '\'')
1721 {
1722 error("unterminated character constant");
1723 return tk;
1724 }
1725 p++;
1726 return tk;
1727 }
1728
1729 /***************************************
1730 * Get postfix of string literal.
1731 */
1732 void stringPostfix(Token* t)
1733 {
1734 switch (*p)
1735 {
1736 case 'c':
1737 case 'w':
1738 case 'd':
1739 t.postfix = *p;
1740 p++;
1741 break;
1742
1743 default:
1744 t.postfix = 0;
1745 break;
1746 }
1747 }
1748
1749 uint wchar_(uint u)
1750 {
1751 assert(false);
1752 }
1753
1754 /**************************************
1755 * Read in a number.
1756 * If it's an integer, store it in tok.TKutok.Vlong.
1757 * integers can be decimal, octal or hex
1758 * Handle the suffixes U, UL, LU, L, etc.
1759 * If it's double, store it in tok.TKutok.Vdouble.
1760 * Returns:
1761 * TKnum
1762 * TKdouble,...
1763 */
1764
1765 TOK number(Token* t)
1766 {
1767 // We use a state machine to collect numbers
1768 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
1769 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
1770 STATE_hexh, STATE_error };
1771 STATE state;
1772
1773 enum FLAGS
1774 {
1775 FLAGS_undefined = 0,
1776 FLAGS_decimal = 1, // decimal
1777 FLAGS_unsigned = 2, // u or U suffix
1778 FLAGS_long = 4, // l or L suffix
1779 };
1780
1781 FLAGS flags = FLAGS.FLAGS_decimal;
1782
1783 int i;
1784 int base;
1785 uint c;
1786 ubyte *start;
1787 TOK result;
1788
1789 //printf("Lexer.number()\n");
1790 state = STATE.STATE_initial;
1791 base = 0;
1792 stringbuffer.reset();
1793 start = p;
1794 while (1)
1795 {
1796 c = *p;
1797 switch (state)
1798 {
1799 case STATE.STATE_initial: // opening state
1800 if (c == '0')
1801 state = STATE.STATE_0;
1802 else
1803 state = STATE.STATE_decimal;
1804 break;
1805
1806 case STATE.STATE_0:
1807 flags = (flags & ~FLAGS.FLAGS_decimal);
1808 switch (c)
1809 {
1810 version (ZEROH) {
1811 case 'H': // 0h
1812 case 'h':
1813 goto hexh;
1814 }
1815 case 'X':
1816 case 'x':
1817 state = STATE.STATE_hex0;
1818 break;
1819
1820 case '.':
1821 if (p[1] == '.') // .. is a separate token
1822 goto done;
1823 case 'i':
1824 case 'f':
1825 case 'F':
1826 goto real_;
1827 version (ZEROH) {
1828 case 'E':
1829 case 'e':
1830 goto case_hex;
1831 }
1832 case 'B':
1833 case 'b':
1834 state = STATE.STATE_binary0;
1835 break;
1836
1837 case '0': case '1': case '2': case '3':
1838 case '4': case '5': case '6': case '7':
1839 state = STATE.STATE_octal;
1840 break;
1841
1842 version (ZEROH) {
1843 case '8': case '9': case 'A':
1844 case 'C': case 'D': case 'F':
1845 case 'a': case 'c': case 'd': case 'f':
1846 case_hex:
1847 state = STATE.STATE_hexh;
1848 break;
1849 }
1850 case '_':
1851 state = STATE.STATE_octal;
1852 p++;
1853 continue;
1854
1855 case 'L':
1856 if (p[1] == 'i')
1857 goto real_;
1858 goto done;
1859
1860 default:
1861 goto done;
1862 }
1863 break;
1864
1865 case STATE.STATE_decimal: // reading decimal number
1866 if (!isdigit(c))
1867 {
1868 version (ZEROH) {
1869 if (ishex(c)
1870 || c == 'H' || c == 'h'
1871 )
1872 goto hexh;
1873 }
1874 if (c == '_') // ignore embedded _
1875 { p++;
1876 continue;
1877 }
1878 if (c == '.' && p[1] != '.')
1879 goto real_;
1880 else if (c == 'i' || c == 'f' || c == 'F' ||
1881 c == 'e' || c == 'E')
1882 {
1883 real_: // It's a real number. Back up and rescan as a real
1884 p = start;
1885 return inreal(t);
1886 }
1887 else if (c == 'L' && p[1] == 'i')
1888 goto real_;
1889 goto done;
1890 }
1891 break;
1892
1893 case STATE.STATE_hex0: // reading hex number
1894 case STATE.STATE_hex:
1895 if (! ishex(cast(ubyte)c))
1896 {
1897 if (c == '_') // ignore embedded _
1898 { p++;
1899 continue;
1900 }
1901 if (c == '.' && p[1] != '.')
1902 goto real_;
1903 if (c == 'P' || c == 'p' || c == 'i')
1904 goto real_;
1905 if (state == STATE.STATE_hex0)
1906 error("Hex digit expected, not '%c'", c);
1907 goto done;
1908 }
1909 state = STATE.STATE_hex;
1910 break;
1911
1912 version (ZEROH) {
1913 hexh:
1914 state = STATE.STATE_hexh;
1915 case STATE.STATE_hexh: // parse numbers like 0FFh
1916 if (!ishex(c))
1917 {
1918 if (c == 'H' || c == 'h')
1919 {
1920 p++;
1921 base = 16;
1922 goto done;
1923 }
1924 else
1925 {
1926 // Check for something like 1E3 or 0E24
1927 if (memchr(cast(char*)stringbuffer.data, 'E', stringbuffer.offset) ||
1928 memchr(cast(char*)stringbuffer.data, 'e', stringbuffer.offset))
1929 goto real_;
1930 error("Hex digit expected, not '%c'", c);
1931 goto done;
1932 }
1933 }
1934 break;
1935 }
1936
1937 case STATE.STATE_octal: // reading octal number
1938 case STATE.STATE_octale: // reading octal number with non-octal digits
1939 if (!isoctal(cast(ubyte)c))
1940 {
1941 version (ZEROH) {
1942 if (ishex(c)
1943 || c == 'H' || c == 'h'
1944 )
1945 goto hexh;
1946 }
1947 if (c == '_') // ignore embedded _
1948 { p++;
1949 continue;
1950 }
1951 if (c == '.' && p[1] != '.')
1952 goto real_;
1953 if (c == 'i')
1954 goto real_;
1955 if (isdigit(c))
1956 {
1957 state = STATE.STATE_octale;
1958 }
1959 else
1960 goto done;
1961 }
1962 break;
1963
1964 case STATE.STATE_binary0: // starting binary number
1965 case STATE.STATE_binary: // reading binary number
1966 if (c != '0' && c != '1')
1967 {
1968 version (ZEROH) {
1969 if (ishex(c)
1970 || c == 'H' || c == 'h'
1971 )
1972 goto hexh;
1973 }
1974 if (c == '_') // ignore embedded _
1975 { p++;
1976 continue;
1977 }
1978 if (state == STATE.STATE_binary0)
1979 { error("binary digit expected");
1980 state = STATE.STATE_error;
1981 break;
1982 }
1983 else
1984 goto done;
1985 }
1986 state = STATE.STATE_binary;
1987 break;
1988
1989 case STATE.STATE_error: // for error recovery
1990 if (!isdigit(c)) // scan until non-digit
1991 goto done;
1992 break;
1993
1994 default:
1995 assert(0);
1996 }
1997 stringbuffer.writeByte(c);
1998 p++;
1999 }
2000 done:
2001 stringbuffer.writeByte(0); // terminate string
2002 if (state == STATE.STATE_octale)
2003 error("Octal digit expected");
2004
2005 ulong n; // unsigned >=64 bit integer type
2006
2007 if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0))
2008 n = stringbuffer.data[0] - '0';
2009 else
2010 {
2011 // Convert string to integer
2012 version (__DMC__) {
2013 errno = 0;
2014 n = strtoull(cast(char*)stringbuffer.data,null,base);
2015 if (errno == ERANGE)
2016 error("integer overflow");
2017 } else {
2018 // Not everybody implements strtoull()
2019 char* p = cast(char*)stringbuffer.data;
2020 int r = 10, d;
2021
2022 if (*p == '0')
2023 {
2024 if (p[1] == 'x' || p[1] == 'X')
2025 p += 2, r = 16;
2026 else if (p[1] == 'b' || p[1] == 'B')
2027 p += 2, r = 2;
2028 else if (isdigit(p[1]))
2029 p += 1, r = 8;
2030 }
2031
2032 n = 0;
2033 while (1)
2034 {
2035 if (*p >= '0' && *p <= '9')
2036 d = *p - '0';
2037 else if (*p >= 'a' && *p <= 'z')
2038 d = *p - 'a' + 10;
2039 else if (*p >= 'A' && *p <= 'Z')
2040 d = *p - 'A' + 10;
2041 else
2042 break;
2043 if (d >= r)
2044 break;
2045 ulong n2 = n * r;
2046 //printf("n2 / r = %llx, n = %llx\n", n2/r, n);
2047 if (n2 / r != n || n2 + d < n)
2048 {
2049 error ("integer overflow");
2050 break;
2051 }
2052
2053 n = n2 + d;
2054 p++;
2055 }
2056 }
2057 if (n.sizeof > 8 &&
2058 n > 0xFFFFFFFFFFFFFFFF) // if n needs more than 64 bits
2059 error("integer overflow");
2060 }
2061
2062 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2063 while (1)
2064 { FLAGS f;
2065
2066 switch (*p)
2067 { case 'U':
2068 case 'u':
2069 f = FLAGS.FLAGS_unsigned;
2070 goto L1;
2071
2072 case 'l':
2073 if (1 || !global.params.useDeprecated)
2074 error("'l' suffix is deprecated, use 'L' instead");
2075 case 'L':
2076 f = FLAGS.FLAGS_long;
2077 L1:
2078 p++;
2079 if (flags & f)
2080 error("unrecognized token");
2081 flags = (flags | f);
2082 continue;
2083 default:
2084 break;
2085 }
2086 break;
2087 }
2088
2089 switch (flags)
2090 {
2091 case FLAGS.FLAGS_undefined:
2092 /* Octal or Hexadecimal constant.
2093 * First that fits: int, uint, long, ulong
2094 */
2095 if (n & 0x8000000000000000)
2096 result = TOK.TOKuns64v;
2097 else if (n & 0xFFFFFFFF00000000)
2098 result = TOK.TOKint64v;
2099 else if (n & 0x80000000)
2100 result = TOK.TOKuns32v;
2101 else
2102 result = TOK.TOKint32v;
2103 break;
2104
2105 case FLAGS.FLAGS_decimal:
2106 /* First that fits: int, long, long long
2107 */
2108 if (n & 0x8000000000000000)
2109 { error("signed integer overflow");
2110 result = TOK.TOKuns64v;
2111 }
2112 else if (n & 0xFFFFFFFF80000000)
2113 result = TOK.TOKint64v;
2114 else
2115 result = TOK.TOKint32v;
2116 break;
2117
2118 case FLAGS.FLAGS_unsigned:
2119 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned:
2120 /* First that fits: uint, ulong
2121 */
2122 if (n & 0xFFFFFFFF00000000)
2123 result = TOK.TOKuns64v;
2124 else
2125 result = TOK.TOKuns32v;
2126 break;
2127
2128 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long:
2129 if (n & 0x8000000000000000)
2130 { error("signed integer overflow");
2131 result = TOK.TOKuns64v;
2132 }
2133 else
2134 result = TOK.TOKint64v;
2135 break;
2136
2137 case FLAGS.FLAGS_long:
2138 if (n & 0x8000000000000000)
2139 result = TOK.TOKuns64v;
2140 else
2141 result = TOK.TOKint64v;
2142 break;
2143
2144 case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
2145 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
2146 result = TOK.TOKuns64v;
2147 break;
2148
2149 default:
2150 debug {
2151 printf("%x\n",flags);
2152 }
2153 assert(0);
2154 }
2155 t.uns64value = n;
2156 return result;
2157 }
2158
2159 /**************************************
2160 * Read in characters, converting them to real.
2161 * Bugs:
2162 * Exponent overflow not detected.
2163 * Too much requested precision is not detected.
2164 */
2165 TOK inreal(Token* t)
2166 in
2167 {
2168 assert(*p == '.' || isdigit(*p));
2169 }
2170 out (result)
2171 {
2172 switch (result)
2173 {
2174 case TOKfloat32v:
2175 case TOKfloat64v:
2176 case TOKfloat80v:
2177 case TOKimaginary32v:
2178 case TOKimaginary64v:
2179 case TOKimaginary80v:
2180 break;
2181
2182 default:
2183 assert(0);
2184 }
2185 }
2186 body
2187 {
2188 int dblstate;
2189 uint c;
2190 char hex; // is this a hexadecimal-floating-constant?
2191 TOK result;
2192
2193 //printf("Lexer.inreal()\n");
2194 stringbuffer.reset();
2195 dblstate = 0;
2196 hex = 0;
2197 Lnext:
2198 while (true)
2199 {
2200 // Get next char from input
2201 c = *p++;
2202 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
2203 while (true)
2204 {
2205 switch (dblstate)
2206 {
2207 case 0: // opening state
2208 if (c == '0')
2209 dblstate = 9;
2210 else if (c == '.')
2211 dblstate = 3;
2212 else
2213 dblstate = 1;
2214 break;
2215
2216 case 9:
2217 dblstate = 1;
2218 if (c == 'X' || c == 'x')
2219 {
2220 hex++;
2221 break;
2222 }
2223 case 1: // digits to left of .
2224 case 3: // digits to right of .
2225 case 7: // continuing exponent digits
2226 if (!isdigit(c) && !(hex && isxdigit(c)))
2227 {
2228 if (c == '_')
2229 goto Lnext; // ignore embedded '_'
2230 dblstate++;
2231 continue;
2232 }
2233 break;
2234
2235 case 2: // no more digits to left of .
2236 if (c == '.')
2237 {
2238 dblstate++;
2239 break;
2240 }
2241 case 4: // no more digits to right of .
2242 if ((c == 'E' || c == 'e') ||
2243 hex && (c == 'P' || c == 'p'))
2244 {
2245 dblstate = 5;
2246 hex = 0; // exponent is always decimal
2247 break;
2248 }
2249 if (hex)
2250 error("binary-exponent-part required");
2251 goto done;
2252
2253 case 5: // looking immediately to right of E
2254 dblstate++;
2255 if (c == '-' || c == '+')
2256 break;
2257 case 6: // 1st exponent digit expected
2258 if (!isdigit(c))
2259 error("exponent expected");
2260 dblstate++;
2261 break;
2262
2263 case 8: // past end of exponent digits
2264 goto done;
2265 }
2266 break;
2267 }
2268 stringbuffer.writeByte(c);
2269 }
2270 done:
2271 p--;
2272
2273 stringbuffer.writeByte(0);
2274
2275 version (_WIN32) { /// && __DMC__
2276 char* save = __locale_decpoint;
2277 __locale_decpoint = cast(char*)".".ptr;
2278 }
2279 t.float80value = strtold(cast(char*)stringbuffer.data, null);
2280
2281 errno = 0;
2282 switch (*p)
2283 {
2284 case 'F':
2285 case 'f':
2286 strtof(cast(char*)stringbuffer.data, null);
2287 result = TOKfloat32v;
2288 p++;
2289 break;
2290
2291 default:
2292 strtod(cast(char*)stringbuffer.data, null);
2293 result = TOKfloat64v;
2294 break;
2295
2296 case 'l':
2297 if (!global.params.useDeprecated)
2298 error("'l' suffix is deprecated, use 'L' instead");
2299 case 'L':
2300 result = TOKfloat80v;
2301 p++;
2302 break;
2303 }
2304 if (*p == 'i' || *p == 'I')
2305 {
2306 if (!global.params.useDeprecated && *p == 'I')
2307 error("'I' suffix is deprecated, use 'i' instead");
2308 p++;
2309 switch (result)
2310 {
2311 case TOKfloat32v:
2312 result = TOKimaginary32v;
2313 break;
2314 case TOKfloat64v:
2315 result = TOKimaginary64v;
2316 break;
2317 case TOKfloat80v:
2318 result = TOKimaginary80v;
2319 break;
2320 }
2321 }
2322
2323 version (_WIN32) { ///&& __DMC__
2324 __locale_decpoint = save;
2325 }
2326 if (errno == ERANGE)
2327 error("number is not representable");
2328
2329 return result;
2330 }
2331
2332 void error(T...)(string format, T t)
2333 {
2334 error(this.loc, format, t);
2335 }
2336
2337 void error(T...)(Loc loc, string format, T t)
2338 {
2339 if (mod && !global.gag)
2340 {
2341 string p = loc.toChars();
2342 if (p.length != 0)
2343 writef("%s: ", p);
2344
2345 writefln(format, t);
2346
2347 if (global.errors >= 20) // moderate blizzard of cascading messages
2348 fatal();
2349 }
2350
2351 global.errors++;
2352 }
2353
2354 void pragma_()
2355 {
2356 assert(false);
2357 }
2358
2359 uint decodeUTF()
2360 {
2361 assert(false);
2362 }
2363
2364 void getDocComment(Token* t, uint lineComment)
2365 {
2366 assert(false);
2367 }
2368
2369 static bool isValidIdentifier(string p)
2370 {
2371 if (p.length == 0) {
2372 return false;
2373 }
2374
2375 if (p[0] >= '0' && p[0] <= '9') { // beware of isdigit() on signed chars
2376 return false;
2377 }
2378
2379 size_t idx = 0;
2380 while (idx < p.length)
2381 {
2382 dchar dc;
2383
2384 if (utf_decodeChar(p, &idx, &dc) !is null) {
2385 return false;
2386 }
2387
2388 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) {
2389 return false;
2390 }
2391 }
2392
2393 return true;
2394 }
2395
2396 /// TODO: reimplement based on strings
2397 static ubyte* combineComments(ubyte* c1, ubyte* c2)
2398 {
2399 //printf("Lexer.combineComments('%s', '%s')\n", c1, c2);
2400
2401 ubyte* c = c2;
2402
2403 if (c1)
2404 {
2405 c = c1;
2406 if (c2)
2407 {
2408 size_t len1 = strlen(cast(char*)c1);
2409 size_t len2 = strlen(cast(char*)c2);
2410
2411 c = cast(ubyte*)malloc(len1 + 1 + len2 + 1);
2412 memcpy(c, c1, len1);
2413 if (len1 && c1[len1 - 1] != '\n')
2414 {
2415 c[len1] = '\n';
2416 len1++;
2417 }
2418 memcpy(c + len1, c2, len2);
2419 c[len1 + len2] = 0;
2420 }
2421 }
2422
2423 return c;
2424 }
2425 }