0
|
1 module dmd.Lexer;
|
|
2
|
|
3 import dmd.StringTable;
|
|
4 import dmd.OutBuffer;
|
|
5 import dmd.Token;
|
|
6 import dmd.Loc;
|
|
7 import dmd.Module;
|
|
8 import dmd.Identifier;
|
|
9 import dmd.TOK;
|
|
10 import dmd.Keyword;
|
|
11 import dmd.StringValue;
|
|
12 import dmd.Global;
|
|
13 import dmd.Util;
|
|
14 import dmd.Id;
|
|
15 import dmd.Dchar;
|
|
16 import dmd.Utf;
|
|
17
|
|
18 import std.stdio : writeln;
|
|
19
|
4
|
20 import core.memory;
|
2
|
21
|
0
|
22 import core.stdc.ctype;
|
|
23 import core.stdc.stdlib;
|
|
24 import core.stdc.string;
|
|
25 import core.stdc.stdio;
|
|
26 import core.stdc.time;
|
|
27 import core.stdc.errno;
|
|
28
|
|
29 enum LS = 0x2028; // UTF line separator
|
|
30 enum PS = 0x2029; // UTF paragraph separator
|
|
31
|
|
32 extern (C) extern
|
|
33 {
|
|
34 __gshared char* __locale_decpoint;
|
|
35 }
|
|
36
|
|
37 int isUniAlpha(uint u)
|
|
38 {
|
|
39 assert(false);
|
|
40 }
|
|
41
|
|
42 class Lexer
|
|
43 {
|
|
44 static StringTable stringtable;
|
|
45 static OutBuffer stringbuffer;
|
|
46 static Token* freelist;
|
|
47
|
|
48 Loc loc; // for error messages
|
|
49
|
|
50 ubyte* base; // pointer to start of buffer
|
|
51 ubyte* end; // past end of buffer
|
|
52 ubyte* p; // current character
|
|
53 Token token;
|
|
54 Module mod;
|
|
55 int doDocComment; // collect doc comment information
|
|
56 int anyToken; // !=0 means seen at least one token
|
|
57 int commentToken; // !=0 means comments are TOKcomment's
|
|
58
|
|
59 static this()
|
|
60 {
|
|
61 stringtable = new StringTable();
|
|
62 stringbuffer = new OutBuffer();
|
|
63 }
|
|
64
|
|
65 static ~this()
|
|
66 {
|
|
67 delete stringtable;
|
|
68 }
|
|
69
|
|
70 this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)
|
|
71 {
|
|
72 loc = Loc(mod, 1);
|
|
73
|
|
74 memset(&token,0,token.sizeof);
|
|
75 this.base = base;
|
|
76 this.end = base + endoffset;
|
|
77 p = base + begoffset;
|
|
78 this.mod = mod;
|
|
79 this.doDocComment = doDocComment;
|
|
80 this.anyToken = 0;
|
|
81 this.commentToken = commentToken;
|
|
82 //initKeywords();
|
|
83
|
|
84 /* If first line starts with '#!', ignore the line
|
|
85 */
|
|
86
|
|
87 if (p[0] == '#' && p[1] =='!')
|
|
88 {
|
|
89 p += 2;
|
|
90 while (1)
|
|
91 {
|
|
92 ubyte c = *p;
|
|
93 switch (c)
|
|
94 {
|
|
95 case '\n':
|
|
96 p++;
|
|
97 break;
|
|
98
|
|
99 case '\r':
|
|
100 p++;
|
|
101 if (*p == '\n')
|
|
102 p++;
|
|
103 break;
|
|
104
|
|
105 case 0:
|
|
106 case 0x1A:
|
|
107 break;
|
|
108
|
|
109 default:
|
|
110 if (c & 0x80)
|
|
111 {
|
|
112 uint u = decodeUTF();
|
|
113 if (u == PS || u == LS)
|
|
114 break;
|
|
115 }
|
|
116 p++;
|
|
117 continue;
|
|
118 }
|
|
119 break;
|
|
120 }
|
|
121 loc.linnum = 2;
|
|
122 }
|
|
123 }
|
|
124
|
|
125 version (DMDV2) {
|
|
126 static Keyword[] keywords =
|
|
127 [
|
|
128 // { "", TOK },
|
|
129
|
|
130 { "this", TOK.TOKthis },
|
|
131 { "super", TOK.TOKsuper },
|
|
132 { "assert", TOK.TOKassert },
|
|
133 { "null", TOK.TOKnull },
|
|
134 { "true", TOK.TOKtrue },
|
|
135 { "false", TOK.TOKfalse },
|
|
136 { "cast", TOK.TOKcast },
|
|
137 { "new", TOK.TOKnew },
|
|
138 { "delete", TOK.TOKdelete },
|
|
139 { "throw", TOK.TOKthrow },
|
|
140 { "module", TOK.TOKmodule },
|
|
141 { "pragma", TOK.TOKpragma },
|
|
142 { "typeof", TOK.TOKtypeof },
|
|
143 { "typeid", TOK.TOKtypeid },
|
|
144
|
|
145 { "template", TOK.TOKtemplate },
|
|
146
|
|
147 { "void", TOK.TOKvoid },
|
|
148 { "byte", TOK.TOKint8 },
|
|
149 { "ubyte", TOK.TOKuns8 },
|
|
150 { "short", TOK.TOKint16 },
|
|
151 { "ushort", TOK.TOKuns16 },
|
|
152 { "int", TOK.TOKint32 },
|
|
153 { "uint", TOK.TOKuns32 },
|
|
154 { "long", TOK.TOKint64 },
|
|
155 { "ulong", TOK.TOKuns64 },
|
|
156 { "cent", TOK.TOKcent, },
|
|
157 { "ucent", TOK.TOKucent, },
|
|
158 { "float", TOK.TOKfloat32 },
|
|
159 { "double", TOK.TOKfloat64 },
|
|
160 { "real", TOK.TOKfloat80 },
|
|
161
|
|
162 { "bool", TOK.TOKbool },
|
|
163 { "char", TOK.TOKchar },
|
|
164 { "wchar", TOK.TOKwchar },
|
|
165 { "dchar", TOK.TOKdchar },
|
|
166
|
|
167 { "ifloat", TOK.TOKimaginary32 },
|
|
168 { "idouble", TOK.TOKimaginary64 },
|
|
169 { "ireal", TOK.TOKimaginary80 },
|
|
170
|
|
171 { "cfloat", TOK.TOKcomplex32 },
|
|
172 { "cdouble", TOK.TOKcomplex64 },
|
|
173 { "creal", TOK.TOKcomplex80 },
|
|
174
|
|
175 { "delegate", TOK.TOKdelegate },
|
|
176 { "function", TOK.TOKfunction },
|
|
177
|
|
178 { "is", TOK.TOKis },
|
|
179 { "if", TOK.TOKif },
|
|
180 { "else", TOK.TOKelse },
|
|
181 { "while", TOK.TOKwhile },
|
|
182 { "for", TOK.TOKfor },
|
|
183 { "do", TOK.TOKdo },
|
|
184 { "switch", TOK.TOKswitch },
|
|
185 { "case", TOK.TOKcase },
|
|
186 { "default", TOK.TOKdefault },
|
|
187 { "break", TOK.TOKbreak },
|
|
188 { "continue", TOK.TOKcontinue },
|
|
189 { "synchronized", TOK.TOKsynchronized },
|
|
190 { "return", TOK.TOKreturn },
|
|
191 { "goto", TOK.TOKgoto },
|
|
192 { "try", TOK.TOKtry },
|
|
193 { "catch", TOK.TOKcatch },
|
|
194 { "finally", TOK.TOKfinally },
|
|
195 { "with", TOK.TOKwith },
|
|
196 { "asm", TOK.TOKasm },
|
|
197 { "foreach", TOK.TOKforeach },
|
|
198 { "foreach_reverse", TOK.TOKforeach_reverse },
|
|
199 { "scope", TOK.TOKscope },
|
|
200
|
|
201 { "struct", TOK.TOKstruct },
|
|
202 { "class", TOK.TOKclass },
|
|
203 { "interface", TOK.TOKinterface },
|
|
204 { "union", TOK.TOKunion },
|
|
205 { "enum", TOK.TOKenum },
|
|
206 { "import", TOK.TOKimport },
|
|
207 { "mixin", TOK.TOKmixin },
|
|
208 { "static", TOK.TOKstatic },
|
|
209 { "final", TOK.TOKfinal },
|
|
210 { "const", TOK.TOKconst },
|
|
211 { "typedef", TOK.TOKtypedef },
|
|
212 { "alias", TOK.TOKalias },
|
|
213 { "override", TOK.TOKoverride },
|
|
214 { "abstract", TOK.TOKabstract },
|
|
215 { "volatile", TOK.TOKvolatile },
|
|
216 { "debug", TOK.TOKdebug },
|
|
217 { "deprecated", TOK.TOKdeprecated },
|
|
218 { "in", TOK.TOKin },
|
|
219 { "out", TOK.TOKout },
|
|
220 { "inout", TOK.TOKinout },
|
|
221 { "lazy", TOK.TOKlazy },
|
|
222 { "auto", TOK.TOKauto },
|
|
223
|
|
224 { "align", TOK.TOKalign },
|
|
225 { "extern", TOK.TOKextern },
|
|
226 { "private", TOK.TOKprivate },
|
|
227 { "package", TOK.TOKpackage },
|
|
228 { "protected", TOK.TOKprotected },
|
|
229 { "public", TOK.TOKpublic },
|
|
230 { "export", TOK.TOKexport },
|
|
231
|
|
232 { "body", TOK.TOKbody },
|
|
233 { "invariant", TOK.TOKinvariant },
|
|
234 { "unittest", TOK.TOKunittest },
|
|
235 { "version", TOK.TOKversion },
|
|
236 //{ "manifest", TOK.TOKmanifest },
|
|
237
|
|
238 // Added after 1.0
|
|
239 { "ref", TOK.TOKref },
|
|
240 { "macro", TOK.TOKmacro },
|
|
241 { "pure", TOK.TOKpure },
|
|
242 { "nothrow", TOK.TOKnothrow },
|
|
243 { "__thread", TOK.TOKtls },
|
|
244 { "__gshared", TOK.TOKgshared },
|
|
245 { "__traits", TOK.TOKtraits },
|
|
246 { "__overloadset", TOK.TOKoverloadset },
|
|
247 { "__FILE__", TOK.TOKfile },
|
|
248 { "__LINE__", TOK.TOKline },
|
|
249 { "shared", TOK.TOKshared },
|
|
250 { "immutable", TOK.TOKimmutable },
|
|
251 ];
|
|
252 } else {
|
|
253 static Keyword[] keywords =
|
|
254 [
|
|
255 // { "", TOK },
|
|
256
|
|
257 { "this", TOK.TOKthis },
|
|
258 { "super", TOK.TOKsuper },
|
|
259 { "assert", TOK.TOKassert },
|
|
260 { "null", TOK.TOKnull },
|
|
261 { "true", TOK.TOKtrue },
|
|
262 { "false", TOK.TOKfalse },
|
|
263 { "cast", TOK.TOKcast },
|
|
264 { "new", TOK.TOKnew },
|
|
265 { "delete", TOK.TOKdelete },
|
|
266 { "throw", TOK.TOKthrow },
|
|
267 { "module", TOK.TOKmodule },
|
|
268 { "pragma", TOK.TOKpragma },
|
|
269 { "typeof", TOK.TOKtypeof },
|
|
270 { "typeid", TOK.TOKtypeid },
|
|
271
|
|
272 { "template", TOK.TOKtemplate },
|
|
273
|
|
274 { "void", TOK.TOKvoid },
|
|
275 { "byte", TOK.TOKint8 },
|
|
276 { "ubyte", TOK.TOKuns8 },
|
|
277 { "short", TOK.TOKint16 },
|
|
278 { "ushort", TOK.TOKuns16 },
|
|
279 { "int", TOK.TOKint32 },
|
|
280 { "uint", TOK.TOKuns32 },
|
|
281 { "long", TOK.TOKint64 },
|
|
282 { "ulong", TOK.TOKuns64 },
|
|
283 { "cent", TOK.TOKcent, },
|
|
284 { "ucent", TOK.TOKucent, },
|
|
285 { "float", TOK.TOKfloat32 },
|
|
286 { "double", TOK.TOKfloat64 },
|
|
287 { "real", TOK.TOKfloat80 },
|
|
288
|
|
289 { "bool", TOK.TOKbool },
|
|
290 { "char", TOK.TOKchar },
|
|
291 { "wchar", TOK.TOKwchar },
|
|
292 { "dchar", TOK.TOKdchar },
|
|
293
|
|
294 { "ifloat", TOK.TOKimaginary32 },
|
|
295 { "idouble", TOK.TOKimaginary64 },
|
|
296 { "ireal", TOK.TOKimaginary80 },
|
|
297
|
|
298 { "cfloat", TOK.TOKcomplex32 },
|
|
299 { "cdouble", TOK.TOKcomplex64 },
|
|
300 { "creal", TOK.TOKcomplex80 },
|
|
301
|
|
302 { "delegate", TOK.TOKdelegate },
|
|
303 { "function", TOK.TOKfunction },
|
|
304
|
|
305 { "is", TOK.TOKis },
|
|
306 { "if", TOK.TOKif },
|
|
307 { "else", TOK.TOKelse },
|
|
308 { "while", TOK.TOKwhile },
|
|
309 { "for", TOK.TOKfor },
|
|
310 { "do", TOK.TOKdo },
|
|
311 { "switch", TOK.TOKswitch },
|
|
312 { "case", TOK.TOKcase },
|
|
313 { "default", TOK.TOKdefault },
|
|
314 { "break", TOK.TOKbreak },
|
|
315 { "continue", TOK.TOKcontinue },
|
|
316 { "synchronized", TOK.TOKsynchronized },
|
|
317 { "return", TOK.TOKreturn },
|
|
318 { "goto", TOK.TOKgoto },
|
|
319 { "try", TOK.TOKtry },
|
|
320 { "catch", TOK.TOKcatch },
|
|
321 { "finally", TOK.TOKfinally },
|
|
322 { "with", TOK.TOKwith },
|
|
323 { "asm", TOK.TOKasm },
|
|
324 { "foreach", TOK.TOKforeach },
|
|
325 { "foreach_reverse", TOK.TOKforeach_reverse },
|
|
326 { "scope", TOK.TOKscope },
|
|
327
|
|
328 { "struct", TOK.TOKstruct },
|
|
329 { "class", TOK.TOKclass },
|
|
330 { "interface", TOK.TOKinterface },
|
|
331 { "union", TOK.TOKunion },
|
|
332 { "enum", TOK.TOKenum },
|
|
333 { "import", TOK.TOKimport },
|
|
334 { "mixin", TOK.TOKmixin },
|
|
335 { "static", TOK.TOKstatic },
|
|
336 { "final", TOK.TOKfinal },
|
|
337 { "const", TOK.TOKconst },
|
|
338 { "typedef", TOK.TOKtypedef },
|
|
339 { "alias", TOK.TOKalias },
|
|
340 { "override", TOK.TOKoverride },
|
|
341 { "abstract", TOK.TOKabstract },
|
|
342 { "volatile", TOK.TOKvolatile },
|
|
343 { "debug", TOK.TOKdebug },
|
|
344 { "deprecated", TOK.TOKdeprecated },
|
|
345 { "in", TOK.TOKin },
|
|
346 { "out", TOK.TOKout },
|
|
347 { "inout", TOK.TOKinout },
|
|
348 { "lazy", TOK.TOKlazy },
|
|
349 { "auto", TOK.TOKauto },
|
|
350
|
|
351 { "align", TOK.TOKalign },
|
|
352 { "extern", TOK.TOKextern },
|
|
353 { "private", TOK.TOKprivate },
|
|
354 { "package", TOK.TOKpackage },
|
|
355 { "protected", TOK.TOKprotected },
|
|
356 { "public", TOK.TOKpublic },
|
|
357 { "export", TOK.TOKexport },
|
|
358
|
|
359 { "body", TOK.TOKbody },
|
|
360 { "invariant", TOK.TOKinvariant },
|
|
361 { "unittest", TOK.TOKunittest },
|
|
362 { "version", TOK.TOKversion },
|
|
363 //{ "manifest", TOK.TOKmanifest },
|
|
364
|
|
365 // Added after 1.0
|
|
366 { "ref", TOK.TOKref },
|
|
367 { "macro", TOK.TOKmacro },
|
|
368 ];
|
|
369 }
|
|
370
|
|
371 static ubyte cmtable[256];
|
|
372 enum CMoctal = 0x1;
|
|
373 enum CMhex = 0x2;
|
|
374 enum CMidchar = 0x4;
|
|
375
|
|
376 ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; }
|
|
377 ubyte ishex (ubyte c) { return cmtable[c] & CMhex; }
|
|
378 ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; }
|
|
379
|
|
380 static void cmtable_init()
|
|
381 {
|
|
382 for (uint c = 0; c < cmtable.length; c++)
|
|
383 {
|
|
384 if ('0' <= c && c <= '7')
|
|
385 cmtable[c] |= CMoctal;
|
|
386 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
|
|
387 cmtable[c] |= CMhex;
|
|
388 if (isalnum(c) || c == '_')
|
|
389 cmtable[c] |= CMidchar;
|
|
390 }
|
|
391 }
|
|
392
|
|
393 static void initKeywords()
|
|
394 {
|
|
395 uint nkeywords = keywords.length;
|
|
396
|
|
397 if (global.params.Dversion == 1)
|
|
398 nkeywords -= 2;
|
|
399
|
|
400 cmtable_init();
|
|
401
|
|
402 for (uint u = 0; u < nkeywords; u++)
|
|
403 {
|
|
404 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
|
|
405 string s = keywords[u].name;
|
|
406 TOK v = keywords[u].value;
|
|
407 StringValue* sv = stringtable.insert(s);
|
|
408 sv.ptrvalue = cast(void*) new Identifier(sv.lstring.string_, v);
|
|
409
|
|
410 //printf("tochars[%d] = '%s'\n",v, s);
|
|
411 Token.tochars[v] = s;
|
|
412 }
|
|
413
|
|
414 Token.tochars[TOK.TOKeof] = "EOF";
|
|
415 Token.tochars[TOK.TOKlcurly] = "{";
|
|
416 Token.tochars[TOK.TOKrcurly] = "}";
|
|
417 Token.tochars[TOK.TOKlparen] = "(";
|
|
418 Token.tochars[TOK.TOKrparen] = ")";
|
|
419 Token.tochars[TOK.TOKlbracket] = "[";
|
|
420 Token.tochars[TOK.TOKrbracket] = "]";
|
|
421 Token.tochars[TOK.TOKsemicolon] = ";";
|
|
422 Token.tochars[TOK.TOKcolon] = ":";
|
|
423 Token.tochars[TOK.TOKcomma] = ",";
|
|
424 Token.tochars[TOK.TOKdot] = ".";
|
|
425 Token.tochars[TOK.TOKxor] = "^";
|
|
426 Token.tochars[TOK.TOKxorass] = "^=";
|
|
427 Token.tochars[TOK.TOKassign] = "=";
|
|
428 Token.tochars[TOK.TOKconstruct] = "=";
|
|
429 version (DMDV2) {
|
|
430 Token.tochars[TOK.TOKblit] = "=";
|
|
431 }
|
|
432 Token.tochars[TOK.TOKlt] = "<";
|
|
433 Token.tochars[TOK.TOKgt] = ">";
|
|
434 Token.tochars[TOK.TOKle] = "<=";
|
|
435 Token.tochars[TOK.TOKge] = ">=";
|
|
436 Token.tochars[TOK.TOKequal] = "==";
|
|
437 Token.tochars[TOK.TOKnotequal] = "!=";
|
|
438 Token.tochars[TOK.TOKnotidentity] = "!is";
|
|
439 Token.tochars[TOK.TOKtobool] = "!!";
|
|
440
|
|
441 Token.tochars[TOK.TOKunord] = "!<>=";
|
|
442 Token.tochars[TOK.TOKue] = "!<>";
|
|
443 Token.tochars[TOK.TOKlg] = "<>";
|
|
444 Token.tochars[TOK.TOKleg] = "<>=";
|
|
445 Token.tochars[TOK.TOKule] = "!>";
|
|
446 Token.tochars[TOK.TOKul] = "!>=";
|
|
447 Token.tochars[TOK.TOKuge] = "!<";
|
|
448 Token.tochars[TOK.TOKug] = "!<=";
|
|
449
|
|
450 Token.tochars[TOK.TOKnot] = "!";
|
|
451 Token.tochars[TOK.TOKtobool] = "!!";
|
|
452 Token.tochars[TOK.TOKshl] = "<<";
|
|
453 Token.tochars[TOK.TOKshr] = ">>";
|
|
454 Token.tochars[TOK.TOKushr] = ">>>";
|
|
455 Token.tochars[TOK.TOKadd] = "+";
|
|
456 Token.tochars[TOK.TOKmin] = "-";
|
|
457 Token.tochars[TOK.TOKmul] = "*";
|
|
458 Token.tochars[TOK.TOKdiv] = "/";
|
|
459 Token.tochars[TOK.TOKmod] = "%";
|
|
460 Token.tochars[TOK.TOKslice] = "..";
|
|
461 Token.tochars[TOK.TOKdotdotdot] = "...";
|
|
462 Token.tochars[TOK.TOKand] = "&";
|
|
463 Token.tochars[TOK.TOKandand] = "&&";
|
|
464 Token.tochars[TOK.TOKor] = "|";
|
|
465 Token.tochars[TOK.TOKoror] = "||";
|
|
466 Token.tochars[TOK.TOKarray] = "[]";
|
|
467 Token.tochars[TOK.TOKindex] = "[i]";
|
|
468 Token.tochars[TOK.TOKaddress] = "&";
|
|
469 Token.tochars[TOK.TOKstar] = "*";
|
|
470 Token.tochars[TOK.TOKtilde] = "~";
|
|
471 Token.tochars[TOK.TOKdollar] = "$";
|
|
472 Token.tochars[TOK.TOKcast] = "cast";
|
|
473 Token.tochars[TOK.TOKplusplus] = "++";
|
|
474 Token.tochars[TOK.TOKminusminus] = "--";
|
|
475 Token.tochars[TOK.TOKtype] = "type";
|
|
476 Token.tochars[TOK.TOKquestion] = "?";
|
|
477 Token.tochars[TOK.TOKneg] = "-";
|
|
478 Token.tochars[TOK.TOKuadd] = "+";
|
|
479 Token.tochars[TOK.TOKvar] = "var";
|
|
480 Token.tochars[TOK.TOKaddass] = "+=";
|
|
481 Token.tochars[TOK.TOKminass] = "-=";
|
|
482 Token.tochars[TOK.TOKmulass] = "*=";
|
|
483 Token.tochars[TOK.TOKdivass] = "/=";
|
|
484 Token.tochars[TOK.TOKmodass] = "%=";
|
|
485 Token.tochars[TOK.TOKshlass] = "<<=";
|
|
486 Token.tochars[TOK.TOKshrass] = ">>=";
|
|
487 Token.tochars[TOK.TOKushrass] = ">>>=";
|
|
488 Token.tochars[TOK.TOKandass] = "&=";
|
|
489 Token.tochars[TOK.TOKorass] = "|=";
|
|
490 Token.tochars[TOK.TOKcatass] = "~=";
|
|
491 Token.tochars[TOK.TOKcat] = "~";
|
|
492 Token.tochars[TOK.TOKcall] = "call";
|
|
493 Token.tochars[TOK.TOKidentity] = "is";
|
|
494 Token.tochars[TOK.TOKnotidentity] = "!is";
|
|
495
|
|
496 Token.tochars[TOK.TOKorass] = "|=";
|
|
497 Token.tochars[TOK.TOKidentifier] = "identifier";
|
|
498 Token.tochars[TOK.TOKat] = "@";
|
|
499
|
|
500 // For debugging
|
|
501 Token.tochars[TOK.TOKdotexp] = "dotexp";
|
|
502 Token.tochars[TOK.TOKdotti] = "dotti";
|
|
503 Token.tochars[TOK.TOKdotvar] = "dotvar";
|
|
504 Token.tochars[TOK.TOKdottype] = "dottype";
|
|
505 Token.tochars[TOK.TOKsymoff] = "symoff";
|
|
506 Token.tochars[TOK.TOKarraylength] = "arraylength";
|
|
507 Token.tochars[TOK.TOKarrayliteral] = "arrayliteral";
|
|
508 Token.tochars[TOK.TOKassocarrayliteral] = "assocarrayliteral";
|
|
509 Token.tochars[TOK.TOKstructliteral] = "structliteral";
|
|
510 Token.tochars[TOK.TOKstring] = "string";
|
|
511 Token.tochars[TOK.TOKdsymbol] = "symbol";
|
|
512 Token.tochars[TOK.TOKtuple] = "tuple";
|
|
513 Token.tochars[TOK.TOKdeclaration] = "declaration";
|
|
514 Token.tochars[TOK.TOKdottd] = "dottd";
|
|
515 Token.tochars[TOK.TOKon_scope_exit] = "scope(exit)";
|
|
516 Token.tochars[TOK.TOKon_scope_success] = "scope(success)";
|
|
517 Token.tochars[TOK.TOKon_scope_failure] = "scope(failure)";
|
|
518 }
|
|
519
|
|
520 static Identifier idPool(string s)
|
|
521 {
|
|
522 StringValue* sv = stringtable.update(s);
|
|
523 Identifier id = cast(Identifier) sv.ptrvalue;
|
|
524 if (id is null)
|
|
525 {
|
|
526 id = new Identifier(sv.lstring.string_, TOK.TOKidentifier);
|
|
527 sv.ptrvalue = cast(void*)id;
|
|
528 }
|
|
529
|
|
530 return id;
|
|
531 }
|
|
532
|
|
533 static Identifier uniqueId(string s)
|
|
534 {
|
|
535 static int num;
|
|
536 return uniqueId(s, ++num);
|
|
537 }
|
|
538
|
|
539 /*********************************************
|
|
540 * Create a unique identifier using the prefix s.
|
|
541 */
|
|
542 static Identifier uniqueId(string s, int num)
|
|
543 {
|
|
544 char buffer[32];
|
|
545 size_t slen = s.length;
|
|
546
|
|
547 assert(slen + num.sizeof * 3 + 1 <= buffer.sizeof);
|
|
548 int len = sprintf(buffer.ptr, "%.*s%d", s, num);
|
|
549
|
|
550 return idPool(buffer[0..len].idup);
|
|
551 }
|
|
552
|
|
553 TOK nextToken()
|
|
554 {
|
|
555 Token *t;
|
|
556
|
|
557 if (token.next)
|
|
558 {
|
|
559 t = token.next;
|
|
560 memcpy(&token, t, Token.sizeof);
|
|
561 t.next = freelist;
|
|
562 freelist = t;
|
|
563 }
|
|
564 else
|
|
565 {
|
|
566 scan(&token);
|
|
567 }
|
|
568
|
|
569 //token.print();
|
|
570 return token.value;
|
|
571 }
|
|
572
|
|
573 /***********************
|
|
574 * Look ahead at next token's value.
|
|
575 */
|
|
576 TOK peekNext()
|
|
577 {
|
|
578 return peek(&token).value;
|
|
579 }
|
|
580
|
|
581 TOK peekNext2()
|
|
582 {
|
|
583 assert(false);
|
|
584 }
|
|
585
|
|
586 void scan(Token* t)
|
|
587 {
|
|
588 uint lastLine = loc.linnum;
|
|
589 uint linnum;
|
|
590
|
|
591 t.blockComment = null;
|
|
592 t.lineComment = null;
|
|
593 while (1)
|
|
594 {
|
|
595 t.ptr = p;
|
|
596 //printf("p = %p, *p = '%c'\n",p,*p);
|
|
597 switch (*p)
|
|
598 {
|
|
599 case 0:
|
|
600 case 0x1A:
|
|
601 t.value = TOK.TOKeof; // end of file
|
|
602 return;
|
|
603
|
|
604 case ' ':
|
|
605 case '\t':
|
|
606 case '\v':
|
|
607 case '\f':
|
|
608 p++;
|
|
609 continue; // skip white space
|
|
610
|
|
611 case '\r':
|
|
612 p++;
|
|
613 if (*p != '\n') // if CR stands by itself
|
|
614 loc.linnum++;
|
|
615 continue; // skip white space
|
|
616
|
|
617 case '\n':
|
|
618 p++;
|
|
619 loc.linnum++;
|
|
620 continue; // skip white space
|
|
621
|
|
622 case '0': case '1': case '2': case '3': case '4':
|
|
623 case '5': case '6': case '7': case '8': case '9':
|
|
624 t.value = number(t);
|
|
625 return;
|
|
626
|
|
627 version (CSTRINGS) {
|
|
628 case '\'':
|
|
629 t.value = charConstant(t, 0);
|
|
630 return;
|
|
631
|
|
632 case '"':
|
|
633 t.value = stringConstant(t,0);
|
|
634 return;
|
|
635
|
|
636 case 'l':
|
|
637 case 'L':
|
|
638 if (p[1] == '\'')
|
|
639 {
|
|
640 p++;
|
|
641 t.value = charConstant(t, 1);
|
|
642 return;
|
|
643 }
|
|
644 else if (p[1] == '"')
|
|
645 {
|
|
646 p++;
|
|
647 t.value = stringConstant(t, 1);
|
|
648 return;
|
|
649 }
|
|
650 } else {
|
|
651 case '\'':
|
|
652 t.value = charConstant(t,0);
|
|
653 return;
|
|
654
|
|
655 case 'r':
|
|
656 if (p[1] != '"')
|
|
657 goto case_ident;
|
|
658 p++;
|
|
659 case '`':
|
|
660 t.value = wysiwygStringConstant(t, *p);
|
|
661 return;
|
|
662
|
|
663 case 'x':
|
|
664 if (p[1] != '"')
|
|
665 goto case_ident;
|
|
666 p++;
|
|
667 t.value = hexStringConstant(t);
|
|
668 return;
|
|
669
|
|
670 version (DMDV2) {
|
|
671 case 'q':
|
|
672 if (p[1] == '"')
|
|
673 {
|
|
674 p++;
|
|
675 t.value = delimitedStringConstant(t);
|
|
676 return;
|
|
677 }
|
|
678 else if (p[1] == '{')
|
|
679 {
|
|
680 p++;
|
|
681 t.value = tokenStringConstant(t);
|
|
682 return;
|
|
683 }
|
|
684 else
|
|
685 goto case_ident;
|
|
686 }
|
|
687
|
|
688 case '"':
|
|
689 t.value = escapeStringConstant(t,0);
|
|
690 return;
|
|
691 version (TEXTUAL_ASSEMBLY_OUT) {
|
|
692 } else {
|
|
693 case '\\': // escaped string literal
|
|
694 { uint c;
|
|
695 ubyte* pstart = p;
|
|
696
|
|
697 stringbuffer.reset();
|
|
698 do
|
|
699 {
|
|
700 p++;
|
|
701 switch (*p)
|
|
702 {
|
|
703 case 'u':
|
|
704 case 'U':
|
|
705 case '&':
|
|
706 c = escapeSequence();
|
|
707 stringbuffer.writeUTF8(c);
|
|
708 break;
|
|
709
|
|
710 default:
|
|
711 c = escapeSequence();
|
|
712 stringbuffer.writeByte(c);
|
|
713 break;
|
|
714 }
|
|
715 } while (*p == '\\');
|
|
716 t.len = stringbuffer.offset;
|
|
717 stringbuffer.writeByte(0);
|
2
|
718 char* cc = cast(char*)GC.malloc(stringbuffer.offset);
|
0
|
719 memcpy(cc, stringbuffer.data, stringbuffer.offset);
|
|
720 t.ustring = cc;
|
|
721 t.postfix = 0;
|
|
722 t.value = TOK.TOKstring;
|
|
723 if (!global.params.useDeprecated)
|
|
724 error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", p - pstart, pstart, p - pstart, pstart);
|
|
725 return;
|
|
726 }
|
|
727 }
|
|
728 case 'l':
|
|
729 case 'L':
|
|
730 }
|
|
731 case 'a': case 'b': case 'c': case 'd': case 'e':
|
|
732 case 'f': case 'g': case 'h': case 'i': case 'j':
|
|
733 case 'k': case 'm': case 'n': case 'o':
|
|
734 version (DMDV2) {
|
|
735 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
|
|
736 } else {
|
|
737 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
|
|
738 }
|
|
739 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
|
|
740 case 'z':
|
|
741 case 'A': case 'B': case 'C': case 'D': case 'E':
|
|
742 case 'F': case 'G': case 'H': case 'I': case 'J':
|
|
743 case 'K': case 'M': case 'N': case 'O':
|
|
744 case 'P': case 'Q': case 'R': case 'S': case 'T':
|
|
745 case 'U': case 'V': case 'W': case 'X': case 'Y':
|
|
746 case 'Z':
|
|
747 case '_':
|
|
748 case_ident:
|
|
749 { ubyte c;
|
|
750 StringValue *sv;
|
|
751 Identifier id;
|
|
752
|
|
753 do
|
|
754 {
|
|
755 c = *++p;
|
|
756 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
|
|
757 sv = stringtable.update((cast(immutable(char)*)t.ptr)[0.. p - t.ptr]); ///
|
|
758 id = cast(Identifier) sv.ptrvalue;
|
|
759 if (id is null)
|
|
760 { id = new Identifier(sv.lstring.string_, TOK.TOKidentifier);
|
|
761 sv.ptrvalue = cast(void*)id;
|
|
762 }
|
|
763 t.ident = id;
|
|
764 t.value = cast(TOK) id.value;
|
|
765 anyToken = 1;
|
|
766 if (*t.ptr == '_') // if special identifier token
|
|
767 {
|
|
768 static char date[11+1];
|
|
769 static char time[8+1];
|
|
770 static char timestamp[24+1];
|
|
771
|
|
772 if (!date[0]) // lazy evaluation
|
|
773 { time_t tm;
|
|
774 char *p;
|
|
775
|
|
776 .time(&tm);
|
|
777 p = ctime(&tm);
|
|
778 assert(p);
|
|
779 sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);
|
|
780 sprintf(time.ptr, "%.8s", p + 11);
|
|
781 sprintf(timestamp.ptr, "%.24s", p);
|
|
782 }
|
|
783
|
|
784 ///version (DMDV1) {
|
|
785 /// if (mod && id == Id.FILE)
|
|
786 /// {
|
|
787 /// t.ustring = cast(ubyte*)(loc.filename ? loc.filename : mod.ident.toChars());
|
|
788 /// goto Lstr;
|
|
789 /// }
|
|
790 /// else if (mod && id == Id.LINE)
|
|
791 /// {
|
|
792 /// t.value = TOK.TOKint64v;
|
|
793 /// t.uns64value = loc.linnum;
|
|
794 /// }
|
|
795 /// else
|
|
796 ///}
|
|
797 if (id == Id.DATE)
|
|
798 {
|
|
799 t.ustring = date.ptr;
|
|
800 goto Lstr;
|
|
801 }
|
|
802 else if (id == Id.TIME)
|
|
803 {
|
|
804 t.ustring = time.ptr;
|
|
805 goto Lstr;
|
|
806 }
|
|
807 else if (id == Id.VENDOR)
|
|
808 {
|
|
809 t.ustring = "Digital Mars D".ptr;
|
|
810 goto Lstr;
|
|
811 }
|
|
812 else if (id == Id.TIMESTAMP)
|
|
813 {
|
|
814 t.ustring = timestamp.ptr;
|
|
815 Lstr:
|
|
816 t.value = TOK.TOKstring;
|
|
817 Llen:
|
|
818 t.postfix = 0;
|
|
819 t.len = strlen(cast(char*)t.ustring);
|
|
820 }
|
|
821 else if (id == Id.VERSIONX)
|
|
822 {
|
|
823 uint major = 0;
|
|
824 uint minor = 0;
|
|
825
|
|
826 foreach (char cc; global.version_[1..$])
|
|
827 {
|
|
828 if (isdigit(cc))
|
|
829 minor = minor * 10 + cc - '0';
|
|
830 else if (cc == '.')
|
|
831 {
|
|
832 major = minor;
|
|
833 minor = 0;
|
|
834 }
|
|
835 else
|
|
836 break;
|
|
837 }
|
|
838 t.value = TOK.TOKint64v;
|
|
839 t.uns64value = major * 1000 + minor;
|
|
840 }
|
|
841 ///version (DMDV2) {
|
|
842 else if (id == Id.EOFX)
|
|
843 {
|
|
844 t.value = TOK.TOKeof;
|
|
845 // Advance scanner to end of file
|
|
846 while (!(*p == 0 || *p == 0x1A))
|
|
847 p++;
|
|
848 }
|
|
849 ///}
|
|
850 }
|
|
851 //printf("t.value = %d\n",t.value);
|
|
852 return;
|
|
853 }
|
|
854
|
|
855 case '/':
|
|
856 p++;
|
|
857 switch (*p)
|
|
858 {
|
|
859 case '=':
|
|
860 p++;
|
|
861 t.value = TOK.TOKdivass;
|
|
862 return;
|
|
863
|
|
864 case '*':
|
|
865 p++;
|
|
866 linnum = loc.linnum;
|
|
867 while (1)
|
|
868 {
|
|
869 while (1)
|
|
870 {
|
|
871 ubyte c = *p;
|
|
872 switch (c)
|
|
873 {
|
|
874 case '/':
|
|
875 break;
|
|
876
|
|
877 case '\n':
|
|
878 loc.linnum++;
|
|
879 p++;
|
|
880 continue;
|
|
881
|
|
882 case '\r':
|
|
883 p++;
|
|
884 if (*p != '\n')
|
|
885 loc.linnum++;
|
|
886 continue;
|
|
887
|
|
888 case 0:
|
|
889 case 0x1A:
|
|
890 error("unterminated /* */ comment");
|
|
891 p = end;
|
|
892 t.value = TOK.TOKeof;
|
|
893 return;
|
|
894
|
|
895 default:
|
|
896 if (c & 0x80)
|
|
897 { uint u = decodeUTF();
|
|
898 if (u == PS || u == LS)
|
|
899 loc.linnum++;
|
|
900 }
|
|
901 p++;
|
|
902 continue;
|
|
903 }
|
|
904 break;
|
|
905 }
|
|
906 p++;
|
|
907 if (p[-2] == '*' && p - 3 != t.ptr)
|
|
908 break;
|
|
909 }
|
|
910 if (commentToken)
|
|
911 {
|
|
912 t.value = TOK.TOKcomment;
|
|
913 return;
|
|
914 }
|
|
915 else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
|
|
916 { // if /** but not /**/
|
|
917 getDocComment(t, lastLine == linnum);
|
|
918 }
|
|
919 continue;
|
|
920
|
|
921 case '/': // do // style comments
|
|
922 linnum = loc.linnum;
|
|
923 while (1)
|
|
924 { ubyte c = *++p;
|
|
925 switch (c)
|
|
926 {
|
|
927 case '\n':
|
|
928 break;
|
|
929
|
|
930 case '\r':
|
|
931 if (p[1] == '\n')
|
|
932 p++;
|
|
933 break;
|
|
934
|
|
935 case 0:
|
|
936 case 0x1A:
|
|
937 if (commentToken)
|
|
938 {
|
|
939 p = end;
|
|
940 t.value = TOK.TOKcomment;
|
|
941 return;
|
|
942 }
|
|
943 if (doDocComment && t.ptr[2] == '/')
|
|
944 getDocComment(t, lastLine == linnum);
|
|
945 p = end;
|
|
946 t.value = TOK.TOKeof;
|
|
947 return;
|
|
948
|
|
949 default:
|
|
950 if (c & 0x80)
|
|
951 { uint u = decodeUTF();
|
|
952 if (u == PS || u == LS)
|
|
953 break;
|
|
954 }
|
|
955 continue;
|
|
956 }
|
|
957 break;
|
|
958 }
|
|
959
|
|
960 if (commentToken)
|
|
961 {
|
|
962 p++;
|
|
963 loc.linnum++;
|
|
964 t.value = TOK.TOKcomment;
|
|
965 return;
|
|
966 }
|
|
967 if (doDocComment && t.ptr[2] == '/')
|
|
968 getDocComment(t, lastLine == linnum);
|
|
969
|
|
970 p++;
|
|
971 loc.linnum++;
|
|
972 continue;
|
|
973
|
|
974 case '+':
|
|
975 {
|
|
976 int nest;
|
|
977
|
|
978 linnum = loc.linnum;
|
|
979 p++;
|
|
980 nest = 1;
|
|
981 while (1)
|
|
982 { ubyte c = *p;
|
|
983 switch (c)
|
|
984 {
|
|
985 case '/':
|
|
986 p++;
|
|
987 if (*p == '+')
|
|
988 {
|
|
989 p++;
|
|
990 nest++;
|
|
991 }
|
|
992 continue;
|
|
993
|
|
994 case '+':
|
|
995 p++;
|
|
996 if (*p == '/')
|
|
997 {
|
|
998 p++;
|
|
999 if (--nest == 0)
|
|
1000 break;
|
|
1001 }
|
|
1002 continue;
|
|
1003
|
|
1004 case '\r':
|
|
1005 p++;
|
|
1006 if (*p != '\n')
|
|
1007 loc.linnum++;
|
|
1008 continue;
|
|
1009
|
|
1010 case '\n':
|
|
1011 loc.linnum++;
|
|
1012 p++;
|
|
1013 continue;
|
|
1014
|
|
1015 case 0:
|
|
1016 case 0x1A:
|
|
1017 error("unterminated /+ +/ comment");
|
|
1018 p = end;
|
|
1019 t.value = TOK.TOKeof;
|
|
1020 return;
|
|
1021
|
|
1022 default:
|
|
1023 if (c & 0x80)
|
|
1024 { uint u = decodeUTF();
|
|
1025 if (u == PS || u == LS)
|
|
1026 loc.linnum++;
|
|
1027 }
|
|
1028 p++;
|
|
1029 continue;
|
|
1030 }
|
|
1031 break;
|
|
1032 }
|
|
1033 if (commentToken)
|
|
1034 {
|
|
1035 t.value = TOK.TOKcomment;
|
|
1036 return;
|
|
1037 }
|
|
1038 if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
|
|
1039 { // if /++ but not /++/
|
|
1040 getDocComment(t, lastLine == linnum);
|
|
1041 }
|
|
1042 continue;
|
|
1043 }
|
|
1044
|
|
1045 default:
|
|
1046 break; ///
|
|
1047 }
|
|
1048 t.value = TOK.TOKdiv;
|
|
1049 return;
|
|
1050
|
|
1051 case '.':
|
|
1052 p++;
|
|
1053 if (isdigit(*p))
|
|
1054 { /* Note that we don't allow ._1 and ._ as being
|
|
1055 * valid floating point numbers.
|
|
1056 */
|
|
1057 p--;
|
|
1058 t.value = inreal(t);
|
|
1059 }
|
|
1060 else if (p[0] == '.')
|
|
1061 {
|
|
1062 if (p[1] == '.')
|
|
1063 { p += 2;
|
|
1064 t.value = TOK.TOKdotdotdot;
|
|
1065 }
|
|
1066 else
|
|
1067 { p++;
|
|
1068 t.value = TOK.TOKslice;
|
|
1069 }
|
|
1070 }
|
|
1071 else
|
|
1072 t.value = TOK.TOKdot;
|
|
1073 return;
|
|
1074
|
|
1075 case '&':
|
|
1076 p++;
|
|
1077 if (*p == '=')
|
|
1078 { p++;
|
|
1079 t.value = TOK.TOKandass;
|
|
1080 }
|
|
1081 else if (*p == '&')
|
|
1082 { p++;
|
|
1083 t.value = TOK.TOKandand;
|
|
1084 }
|
|
1085 else
|
|
1086 t.value = TOK.TOKand;
|
|
1087 return;
|
|
1088
|
|
1089 case '|':
|
|
1090 p++;
|
|
1091 if (*p == '=')
|
|
1092 { p++;
|
|
1093 t.value = TOK.TOKorass;
|
|
1094 }
|
|
1095 else if (*p == '|')
|
|
1096 { p++;
|
|
1097 t.value = TOK.TOKoror;
|
|
1098 }
|
|
1099 else
|
|
1100 t.value = TOK.TOKor;
|
|
1101 return;
|
|
1102
|
|
1103 case '-':
|
|
1104 p++;
|
|
1105 if (*p == '=')
|
|
1106 { p++;
|
|
1107 t.value = TOK.TOKminass;
|
|
1108 }
|
|
1109 /// #if 0
|
|
1110 /// else if (*p == '>')
|
|
1111 /// { p++;
|
|
1112 /// t.value = TOK.TOKarrow;
|
|
1113 /// }
|
|
1114 /// #endif
|
|
1115 else if (*p == '-')
|
|
1116 { p++;
|
|
1117 t.value = TOK.TOKminusminus;
|
|
1118 }
|
|
1119 else
|
|
1120 t.value = TOK.TOKmin;
|
|
1121 return;
|
|
1122
|
|
1123 case '+':
|
|
1124 p++;
|
|
1125 if (*p == '=')
|
|
1126 { p++;
|
|
1127 t.value = TOK.TOKaddass;
|
|
1128 }
|
|
1129 else if (*p == '+')
|
|
1130 { p++;
|
|
1131 t.value = TOK.TOKplusplus;
|
|
1132 }
|
|
1133 else
|
|
1134 t.value = TOK.TOKadd;
|
|
1135 return;
|
|
1136
|
|
1137 case '<':
|
|
1138 p++;
|
|
1139 if (*p == '=')
|
|
1140 { p++;
|
|
1141 t.value = TOK.TOKle; // <=
|
|
1142 }
|
|
1143 else if (*p == '<')
|
|
1144 { p++;
|
|
1145 if (*p == '=')
|
|
1146 { p++;
|
|
1147 t.value = TOK.TOKshlass; // <<=
|
|
1148 }
|
|
1149 else
|
|
1150 t.value = TOK.TOKshl; // <<
|
|
1151 }
|
|
1152 else if (*p == '>')
|
|
1153 { p++;
|
|
1154 if (*p == '=')
|
|
1155 { p++;
|
|
1156 t.value = TOK.TOKleg; // <>=
|
|
1157 }
|
|
1158 else
|
|
1159 t.value = TOK.TOKlg; // <>
|
|
1160 }
|
|
1161 else
|
|
1162 t.value = TOK.TOKlt; // <
|
|
1163 return;
|
|
1164
|
|
1165 case '>':
|
|
1166 p++;
|
|
1167 if (*p == '=')
|
|
1168 { p++;
|
|
1169 t.value = TOK.TOKge; // >=
|
|
1170 }
|
|
1171 else if (*p == '>')
|
|
1172 { p++;
|
|
1173 if (*p == '=')
|
|
1174 { p++;
|
|
1175 t.value = TOK.TOKshrass; // >>=
|
|
1176 }
|
|
1177 else if (*p == '>')
|
|
1178 { p++;
|
|
1179 if (*p == '=')
|
|
1180 { p++;
|
|
1181 t.value = TOK.TOKushrass; // >>>=
|
|
1182 }
|
|
1183 else
|
|
1184 t.value = TOK.TOKushr; // >>>
|
|
1185 }
|
|
1186 else
|
|
1187 t.value = TOK.TOKshr; // >>
|
|
1188 }
|
|
1189 else
|
|
1190 t.value = TOK.TOKgt; // >
|
|
1191 return;
|
|
1192
|
|
1193 case '!':
|
|
1194 p++;
|
|
1195 if (*p == '=')
|
|
1196 { p++;
|
|
1197 if (*p == '=' && global.params.Dversion == 1)
|
|
1198 { p++;
|
|
1199 t.value = TOK.TOKnotidentity; // !==
|
|
1200 }
|
|
1201 else
|
|
1202 t.value = TOK.TOKnotequal; // !=
|
|
1203 }
|
|
1204 else if (*p == '<')
|
|
1205 { p++;
|
|
1206 if (*p == '>')
|
|
1207 { p++;
|
|
1208 if (*p == '=')
|
|
1209 { p++;
|
|
1210 t.value = TOK.TOKunord; // !<>=
|
|
1211 }
|
|
1212 else
|
|
1213 t.value = TOK.TOKue; // !<>
|
|
1214 }
|
|
1215 else if (*p == '=')
|
|
1216 { p++;
|
|
1217 t.value = TOK.TOKug; // !<=
|
|
1218 }
|
|
1219 else
|
|
1220 t.value = TOK.TOKuge; // !<
|
|
1221 }
|
|
1222 else if (*p == '>')
|
|
1223 { p++;
|
|
1224 if (*p == '=')
|
|
1225 { p++;
|
|
1226 t.value = TOK.TOKul; // !>=
|
|
1227 }
|
|
1228 else
|
|
1229 t.value = TOK.TOKule; // !>
|
|
1230 }
|
|
1231 else
|
|
1232 t.value = TOK.TOKnot; // !
|
|
1233 return;
|
|
1234
|
|
1235 case '=':
|
|
1236 p++;
|
|
1237 if (*p == '=')
|
|
1238 { p++;
|
|
1239 if (*p == '=' && global.params.Dversion == 1)
|
|
1240 { p++;
|
|
1241 t.value = TOK.TOKidentity; // ===
|
|
1242 }
|
|
1243 else
|
|
1244 t.value = TOK.TOKequal; // ==
|
|
1245 }
|
|
1246 else
|
|
1247 t.value = TOK.TOKassign; // =
|
|
1248 return;
|
|
1249
|
|
1250 case '~':
|
|
1251 p++;
|
|
1252 if (*p == '=')
|
|
1253 { p++;
|
|
1254 t.value = TOK.TOKcatass; // ~=
|
|
1255 }
|
|
1256 else
|
|
1257 t.value = TOK.TOKtilde; // ~
|
|
1258 return;
|
|
1259 /*
|
|
1260 #define SINGLE(c,tok) case c: p++; t.value = tok; return;
|
|
1261
|
|
1262 SINGLE('(', TOKlparen)
|
|
1263 SINGLE(')', TOKrparen)
|
|
1264 SINGLE('[', TOKlbracket)
|
|
1265 SINGLE(']', TOKrbracket)
|
|
1266 SINGLE('{', TOKlcurly)
|
|
1267 SINGLE('}', TOKrcurly)
|
|
1268 SINGLE('?', TOKquestion)
|
|
1269 SINGLE(',', TOKcomma)
|
|
1270 SINGLE(';', TOKsemicolon)
|
|
1271 SINGLE(':', TOKcolon)
|
|
1272 SINGLE('$', TOKdollar)
|
|
1273 SINGLE('@', TOKat)
|
|
1274
|
|
1275 #undef SINGLE
|
|
1276
|
|
1277 #define DOUBLE(c1,tok1,c2,tok2) \
|
|
1278 case c1: \
|
|
1279 p++; \
|
|
1280 if (*p == c2) \
|
|
1281 { p++; \
|
|
1282 t.value = tok2; \
|
|
1283 } \
|
|
1284 else \
|
|
1285 t.value = tok1; \
|
|
1286 return;
|
|
1287
|
|
1288 DOUBLE('*', TOKmul, '=', TOKmulass)
|
|
1289 DOUBLE('%', TOKmod, '=', TOKmodass)
|
|
1290 DOUBLE('^', TOKxor, '=', TOKxorass)
|
|
1291
|
|
1292 #undef DOUBLE
|
|
1293 */
|
|
1294
|
|
1295 case '(': p++; t.value = TOK.TOKlparen; return;
|
|
1296 case ')': p++; t.value = TOK.TOKrparen; return;
|
|
1297 case '[': p++; t.value = TOK.TOKlbracket; return;
|
|
1298 case ']': p++; t.value = TOK.TOKrbracket; return;
|
|
1299 case '{': p++; t.value = TOK.TOKlcurly; return;
|
|
1300 case '}': p++; t.value = TOK.TOKrcurly; return;
|
|
1301 case '?': p++; t.value = TOK.TOKquestion; return;
|
|
1302 case ',': p++; t.value = TOK.TOKcomma; return;
|
|
1303 case ';': p++; t.value = TOK.TOKsemicolon; return;
|
|
1304 case ':': p++; t.value = TOK.TOKcolon; return;
|
|
1305 case '$': p++; t.value = TOK.TOKdollar; return;
|
|
1306 case '@': p++; t.value = TOK.TOKat; return;
|
|
1307
|
|
1308 case '*':
|
|
1309 p++;
|
|
1310 if (*p == '=') {
|
|
1311 p++;
|
|
1312 t.value = TOK.TOKmulass;
|
|
1313 } else {
|
|
1314 t.value = TOK.TOKmul;
|
|
1315 }
|
|
1316 return;
|
|
1317
|
|
1318 case '%':
|
|
1319 p++;
|
|
1320 if (*p == '=') {
|
|
1321 p++;
|
|
1322 t.value = TOK.TOKmodass;
|
|
1323 } else {
|
|
1324 t.value = TOK.TOKmod;
|
|
1325 }
|
|
1326 return;
|
|
1327
|
|
1328 case '^':
|
|
1329 p++;
|
|
1330 if (*p == '=') {
|
|
1331 p++;
|
|
1332 t.value = TOK.TOKxorass;
|
|
1333 } else {
|
|
1334 t.value = TOK.TOKxor;
|
|
1335 }
|
|
1336 return;
|
|
1337
|
|
1338 case '#':
|
|
1339 p++;
|
|
1340 pragma_();
|
|
1341 continue;
|
|
1342
|
|
1343 default:
|
|
1344 { ubyte c = *p;
|
|
1345
|
|
1346 if (c & 0x80)
|
|
1347 { uint u = decodeUTF();
|
|
1348
|
|
1349 // Check for start of unicode identifier
|
|
1350 if (isUniAlpha(u))
|
|
1351 goto case_ident;
|
|
1352
|
|
1353 if (u == PS || u == LS)
|
|
1354 {
|
|
1355 loc.linnum++;
|
|
1356 p++;
|
|
1357 continue;
|
|
1358 }
|
|
1359 }
|
|
1360 if (isprint(c))
|
|
1361 error("unsupported char '%c'", c);
|
|
1362 else
|
|
1363 error("unsupported char 0x%02x", c);
|
|
1364 p++;
|
|
1365 continue;
|
|
1366 }
|
|
1367 }
|
|
1368 }
|
|
1369 }
|
|
1370
|
|
1371 Token* peek(Token* ct)
|
|
1372 {
|
|
1373 Token* t;
|
|
1374
|
|
1375 if (ct.next)
|
|
1376 t = ct.next;
|
|
1377 else
|
|
1378 {
|
|
1379 t = new Token();
|
|
1380 scan(t);
|
|
1381 t.next = null;
|
|
1382 ct.next = t;
|
|
1383 }
|
|
1384 return t;
|
|
1385 }
|
|
1386
|
|
1387 Token* peekPastParen(Token* tk)
|
|
1388 {
|
|
1389 //printf("peekPastParen()\n");
|
|
1390 int parens = 1;
|
|
1391 int curlynest = 0;
|
|
1392 while (1)
|
|
1393 {
|
|
1394 tk = peek(tk);
|
|
1395 //tk.print();
|
|
1396 switch (tk.value)
|
|
1397 {
|
|
1398 case TOK.TOKlparen:
|
|
1399 parens++;
|
|
1400 continue;
|
|
1401
|
|
1402 case TOK.TOKrparen:
|
|
1403 --parens;
|
|
1404 if (parens)
|
|
1405 continue;
|
|
1406 tk = peek(tk);
|
|
1407 break;
|
|
1408
|
|
1409 case TOK.TOKlcurly:
|
|
1410 curlynest++;
|
|
1411 continue;
|
|
1412
|
|
1413 case TOK.TOKrcurly:
|
|
1414 if (--curlynest >= 0)
|
|
1415 continue;
|
|
1416 break;
|
|
1417
|
|
1418 case TOK.TOKsemicolon:
|
|
1419 if (curlynest)
|
|
1420 continue;
|
|
1421 break;
|
|
1422
|
|
1423 case TOK.TOKeof:
|
|
1424 break;
|
|
1425
|
|
1426 default:
|
|
1427 continue;
|
|
1428 }
|
|
1429 return tk;
|
|
1430 }
|
|
1431 }
|
|
1432
|
|
1433 /*******************************************
|
|
1434 * Parse escape sequence.
|
|
1435 */
|
|
1436 uint escapeSequence()
|
|
1437 {
|
|
1438 uint c = *p;
|
|
1439
|
|
1440 version (TEXTUAL_ASSEMBLY_OUT) {
|
|
1441 return c;
|
|
1442 }
|
|
1443 int n;
|
|
1444 int ndigits;
|
|
1445
|
|
1446 switch (c)
|
|
1447 {
|
|
1448 case '\'':
|
|
1449 case '"':
|
|
1450 case '?':
|
|
1451 case '\\':
|
|
1452 Lconsume:
|
|
1453 p++;
|
|
1454 break;
|
|
1455
|
|
1456 case 'a': c = 7; goto Lconsume;
|
|
1457 case 'b': c = 8; goto Lconsume;
|
|
1458 case 'f': c = 12; goto Lconsume;
|
|
1459 case 'n': c = 10; goto Lconsume;
|
|
1460 case 'r': c = 13; goto Lconsume;
|
|
1461 case 't': c = 9; goto Lconsume;
|
|
1462 case 'v': c = 11; goto Lconsume;
|
|
1463
|
|
1464 case 'u':
|
|
1465 ndigits = 4;
|
|
1466 goto Lhex;
|
|
1467 case 'U':
|
|
1468 ndigits = 8;
|
|
1469 goto Lhex;
|
|
1470 case 'x':
|
|
1471 ndigits = 2;
|
|
1472 Lhex:
|
|
1473 p++;
|
|
1474 c = *p;
|
|
1475 if (ishex(cast(ubyte)c))
|
|
1476 {
|
|
1477 uint v;
|
|
1478
|
|
1479 n = 0;
|
|
1480 v = 0;
|
|
1481 while (1)
|
|
1482 {
|
|
1483 if (isdigit(c))
|
|
1484 c -= '0';
|
|
1485 else if (islower(c))
|
|
1486 c -= 'a' - 10;
|
|
1487 else
|
|
1488 c -= 'A' - 10;
|
|
1489 v = v * 16 + c;
|
|
1490 c = *++p;
|
|
1491 if (++n == ndigits)
|
|
1492 break;
|
|
1493 if (!ishex(cast(ubyte)c))
|
|
1494 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
|
|
1495 break;
|
|
1496 }
|
|
1497 }
|
|
1498 if (ndigits != 2 && !utf_isValidDchar(v))
|
|
1499 { error("invalid UTF character \\U%08x", v);
|
|
1500 v = '?'; // recover with valid UTF character
|
|
1501 }
|
|
1502 c = v;
|
|
1503 }
|
|
1504 else
|
|
1505 error("undefined escape hex sequence \\%c\n",c);
|
|
1506 break;
|
|
1507
|
|
1508 case '&': // named character entity
|
|
1509 for (ubyte* idstart = ++p; true; p++)
|
|
1510 {
|
|
1511 switch (*p)
|
|
1512 {
|
|
1513 case ';':
|
|
1514 c = HtmlNamedEntity(idstart, p - idstart);
|
|
1515 if (c == ~0)
|
|
1516 {
|
|
1517 error("unnamed character entity &%s;", idstart[0..(p - idstart)]);
|
|
1518 c = ' ';
|
|
1519 }
|
|
1520 p++;
|
|
1521 break;
|
|
1522
|
|
1523 default:
|
|
1524 if (isalpha(*p) ||
|
|
1525 (p != idstart + 1 && isdigit(*p)))
|
|
1526 continue;
|
|
1527 error("unterminated named entity");
|
|
1528 break;
|
|
1529 }
|
|
1530 break;
|
|
1531 }
|
|
1532 break;
|
|
1533
|
|
1534 case 0:
|
|
1535 case 0x1A: // end of file
|
|
1536 c = '\\';
|
|
1537 break;
|
|
1538
|
|
1539 default:
|
|
1540 if (isoctal(cast(ubyte)c))
|
|
1541 {
|
|
1542 uint v;
|
|
1543
|
|
1544 n = 0;
|
|
1545 v = 0;
|
|
1546 do
|
|
1547 {
|
|
1548 v = v * 8 + (c - '0');
|
|
1549 c = *++p;
|
|
1550 } while (++n < 3 && isoctal(cast(ubyte)c));
|
|
1551 c = v;
|
|
1552 if (c > 0xFF)
|
|
1553 error("0%03o is larger than a byte", c);
|
|
1554 }
|
|
1555 else
|
|
1556 error("undefined escape sequence \\%c\n",c);
|
|
1557 break;
|
|
1558 }
|
|
1559 return c;
|
|
1560 }
|
|
1561
|
|
1562 TOK wysiwygStringConstant(Token* t, int tc)
|
|
1563 {
|
|
1564 assert(false);
|
|
1565 }
|
|
1566
|
|
1567 TOK hexStringConstant(Token* t)
|
|
1568 {
|
|
1569 assert(false);
|
|
1570 }
|
|
1571
|
|
1572 version (DMDV2) {
|
|
1573 TOK delimitedStringConstant(Token* t)
|
|
1574 {
|
|
1575 assert(false);
|
|
1576 }
|
|
1577
|
|
1578 TOK tokenStringConstant(Token* t)
|
|
1579 {
|
|
1580 assert(false);
|
|
1581 }
|
|
1582 }
|
|
1583 TOK escapeStringConstant(Token* t, int wide)
|
|
1584 {
|
|
1585 uint c;
|
|
1586 Loc start = loc;
|
|
1587
|
|
1588 p++;
|
|
1589 stringbuffer.reset();
|
|
1590 while (true)
|
|
1591 {
|
|
1592 c = *p++;
|
|
1593 switch (c)
|
|
1594 {
|
|
1595 version (TEXTUAL_ASSEMBLY_OUT) {
|
|
1596 } else {
|
|
1597 case '\\':
|
|
1598 switch (*p)
|
|
1599 {
|
|
1600 case 'u':
|
|
1601 case 'U':
|
|
1602 case '&':
|
|
1603 c = escapeSequence();
|
|
1604 stringbuffer.writeUTF8(c);
|
|
1605 continue;
|
|
1606
|
|
1607 default:
|
|
1608 c = escapeSequence();
|
|
1609 break;
|
|
1610 }
|
|
1611 break;
|
|
1612 }
|
|
1613 case '\n':
|
|
1614 loc.linnum++;
|
|
1615 break;
|
|
1616
|
|
1617 case '\r':
|
|
1618 if (*p == '\n')
|
|
1619 continue; // ignore
|
|
1620 c = '\n'; // treat EndOfLine as \n character
|
|
1621 loc.linnum++;
|
|
1622 break;
|
|
1623
|
|
1624 case '"':
|
|
1625 t.len = stringbuffer.offset;
|
|
1626 stringbuffer.writeByte(0);
|
2
|
1627 char* tmp = cast(char*)GC.malloc(stringbuffer.offset);
|
0
|
1628 memcpy(tmp, stringbuffer.data, stringbuffer.offset);
|
|
1629 t.ustring = tmp;
|
|
1630 stringPostfix(t);
|
|
1631 return TOK.TOKstring;
|
|
1632
|
|
1633 case 0:
|
|
1634 case 0x1A:
|
|
1635 p--;
|
|
1636 error("unterminated string constant starting at %s", start.toChars());
|
|
1637 t.ustring = "".ptr;
|
|
1638 t.len = 0;
|
|
1639 t.postfix = 0;
|
|
1640 return TOK.TOKstring;
|
|
1641
|
|
1642 default:
|
|
1643 if (c & 0x80)
|
|
1644 {
|
|
1645 p--;
|
|
1646 c = decodeUTF();
|
|
1647 if (c == LS || c == PS)
|
|
1648 { c = '\n';
|
|
1649 loc.linnum++;
|
|
1650 }
|
|
1651 p++;
|
|
1652 stringbuffer.writeUTF8(c);
|
|
1653 continue;
|
|
1654 }
|
|
1655 break;
|
|
1656 }
|
|
1657 stringbuffer.writeByte(c);
|
|
1658 }
|
|
1659
|
|
1660 assert(false);
|
|
1661 }
|
|
1662
|
|
1663 TOK charConstant(Token* t, int wide)
|
|
1664 {
|
|
1665 uint c;
|
|
1666 TOK tk = TOKcharv;
|
|
1667
|
|
1668 //printf("Lexer.charConstant\n");
|
|
1669 p++;
|
|
1670 c = *p++;
|
|
1671 switch (c)
|
|
1672 {
|
|
1673 version (TEXTUAL_ASSEMBLY_OUT) {
|
|
1674 } else {
|
|
1675 case '\\':
|
|
1676 switch (*p)
|
|
1677 {
|
|
1678 case 'u':
|
|
1679 t.uns64value = escapeSequence();
|
|
1680 tk = TOKwcharv;
|
|
1681 break;
|
|
1682
|
|
1683 case 'U':
|
|
1684 case '&':
|
|
1685 t.uns64value = escapeSequence();
|
|
1686 tk = TOKdcharv;
|
|
1687 break;
|
|
1688
|
|
1689 default:
|
|
1690 t.uns64value = escapeSequence();
|
|
1691 break;
|
|
1692 }
|
|
1693 break;
|
|
1694 }
|
|
1695 case '\n':
|
|
1696 L1:
|
|
1697 loc.linnum++;
|
|
1698 case '\r':
|
|
1699 case 0:
|
|
1700 case 0x1A:
|
|
1701 case '\'':
|
|
1702 error("unterminated character constant");
|
|
1703 return tk;
|
|
1704
|
|
1705 default:
|
|
1706 if (c & 0x80)
|
|
1707 {
|
|
1708 p--;
|
|
1709 c = decodeUTF();
|
|
1710 p++;
|
|
1711 if (c == LS || c == PS)
|
|
1712 goto L1;
|
|
1713 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
|
|
1714 tk = TOKwcharv;
|
|
1715 else
|
|
1716 tk = TOKdcharv;
|
|
1717 }
|
|
1718 t.uns64value = c;
|
|
1719 break;
|
|
1720 }
|
|
1721
|
|
1722 if (*p != '\'')
|
|
1723 {
|
|
1724 error("unterminated character constant");
|
|
1725 return tk;
|
|
1726 }
|
|
1727 p++;
|
|
1728 return tk;
|
|
1729 }
|
|
1730
|
|
1731 /***************************************
|
|
1732 * Get postfix of string literal.
|
|
1733 */
|
|
1734 void stringPostfix(Token* t)
|
|
1735 {
|
|
1736 switch (*p)
|
|
1737 {
|
|
1738 case 'c':
|
|
1739 case 'w':
|
|
1740 case 'd':
|
|
1741 t.postfix = *p;
|
|
1742 p++;
|
|
1743 break;
|
|
1744
|
|
1745 default:
|
|
1746 t.postfix = 0;
|
|
1747 break;
|
|
1748 }
|
|
1749 }
|
|
1750
|
|
1751 uint wchar_(uint u)
|
|
1752 {
|
|
1753 assert(false);
|
|
1754 }
|
|
1755
|
|
1756 /**************************************
|
|
1757 * Read in a number.
|
|
1758 * If it's an integer, store it in tok.TKutok.Vlong.
|
|
1759 * integers can be decimal, octal or hex
|
|
1760 * Handle the suffixes U, UL, LU, L, etc.
|
|
1761 * If it's double, store it in tok.TKutok.Vdouble.
|
|
1762 * Returns:
|
|
1763 * TKnum
|
|
1764 * TKdouble,...
|
|
1765 */
|
|
1766
|
|
1767 TOK number(Token* t)
|
|
1768 {
|
|
1769 // We use a state machine to collect numbers
|
|
1770 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
|
|
1771 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
|
|
1772 STATE_hexh, STATE_error };
|
|
1773 STATE state;
|
|
1774
|
|
1775 enum FLAGS
|
|
1776 {
|
|
1777 FLAGS_undefined = 0,
|
|
1778 FLAGS_decimal = 1, // decimal
|
|
1779 FLAGS_unsigned = 2, // u or U suffix
|
|
1780 FLAGS_long = 4, // l or L suffix
|
|
1781 };
|
|
1782
|
|
1783 FLAGS flags = FLAGS.FLAGS_decimal;
|
|
1784
|
|
1785 int i;
|
|
1786 int base;
|
|
1787 uint c;
|
|
1788 ubyte *start;
|
|
1789 TOK result;
|
|
1790
|
|
1791 //printf("Lexer.number()\n");
|
|
1792 state = STATE.STATE_initial;
|
|
1793 base = 0;
|
|
1794 stringbuffer.reset();
|
|
1795 start = p;
|
|
1796 while (1)
|
|
1797 {
|
|
1798 c = *p;
|
|
1799 switch (state)
|
|
1800 {
|
|
1801 case STATE.STATE_initial: // opening state
|
|
1802 if (c == '0')
|
|
1803 state = STATE.STATE_0;
|
|
1804 else
|
|
1805 state = STATE.STATE_decimal;
|
|
1806 break;
|
|
1807
|
|
1808 case STATE.STATE_0:
|
|
1809 flags = (flags & ~FLAGS.FLAGS_decimal);
|
|
1810 switch (c)
|
|
1811 {
|
|
1812 version (ZEROH) {
|
|
1813 case 'H': // 0h
|
|
1814 case 'h':
|
|
1815 goto hexh;
|
|
1816 }
|
|
1817 case 'X':
|
|
1818 case 'x':
|
|
1819 state = STATE.STATE_hex0;
|
|
1820 break;
|
|
1821
|
|
1822 case '.':
|
|
1823 if (p[1] == '.') // .. is a separate token
|
|
1824 goto done;
|
|
1825 case 'i':
|
|
1826 case 'f':
|
|
1827 case 'F':
|
|
1828 goto real_;
|
|
1829 version (ZEROH) {
|
|
1830 case 'E':
|
|
1831 case 'e':
|
|
1832 goto case_hex;
|
|
1833 }
|
|
1834 case 'B':
|
|
1835 case 'b':
|
|
1836 state = STATE.STATE_binary0;
|
|
1837 break;
|
|
1838
|
|
1839 case '0': case '1': case '2': case '3':
|
|
1840 case '4': case '5': case '6': case '7':
|
|
1841 state = STATE.STATE_octal;
|
|
1842 break;
|
|
1843
|
|
1844 version (ZEROH) {
|
|
1845 case '8': case '9': case 'A':
|
|
1846 case 'C': case 'D': case 'F':
|
|
1847 case 'a': case 'c': case 'd': case 'f':
|
|
1848 case_hex:
|
|
1849 state = STATE.STATE_hexh;
|
|
1850 break;
|
|
1851 }
|
|
1852 case '_':
|
|
1853 state = STATE.STATE_octal;
|
|
1854 p++;
|
|
1855 continue;
|
|
1856
|
|
1857 case 'L':
|
|
1858 if (p[1] == 'i')
|
|
1859 goto real_;
|
|
1860 goto done;
|
|
1861
|
|
1862 default:
|
|
1863 goto done;
|
|
1864 }
|
|
1865 break;
|
|
1866
|
|
1867 case STATE.STATE_decimal: // reading decimal number
|
|
1868 if (!isdigit(c))
|
|
1869 {
|
|
1870 version (ZEROH) {
|
|
1871 if (ishex(c)
|
|
1872 || c == 'H' || c == 'h'
|
|
1873 )
|
|
1874 goto hexh;
|
|
1875 }
|
|
1876 if (c == '_') // ignore embedded _
|
|
1877 { p++;
|
|
1878 continue;
|
|
1879 }
|
|
1880 if (c == '.' && p[1] != '.')
|
|
1881 goto real_;
|
|
1882 else if (c == 'i' || c == 'f' || c == 'F' ||
|
|
1883 c == 'e' || c == 'E')
|
|
1884 {
|
|
1885 real_: // It's a real number. Back up and rescan as a real
|
|
1886 p = start;
|
|
1887 return inreal(t);
|
|
1888 }
|
|
1889 else if (c == 'L' && p[1] == 'i')
|
|
1890 goto real_;
|
|
1891 goto done;
|
|
1892 }
|
|
1893 break;
|
|
1894
|
|
1895 case STATE.STATE_hex0: // reading hex number
|
|
1896 case STATE.STATE_hex:
|
|
1897 if (! ishex(cast(ubyte)c))
|
|
1898 {
|
|
1899 if (c == '_') // ignore embedded _
|
|
1900 { p++;
|
|
1901 continue;
|
|
1902 }
|
|
1903 if (c == '.' && p[1] != '.')
|
|
1904 goto real_;
|
|
1905 if (c == 'P' || c == 'p' || c == 'i')
|
|
1906 goto real_;
|
|
1907 if (state == STATE.STATE_hex0)
|
|
1908 error("Hex digit expected, not '%c'", c);
|
|
1909 goto done;
|
|
1910 }
|
|
1911 state = STATE.STATE_hex;
|
|
1912 break;
|
|
1913
|
|
1914 version (ZEROH) {
|
|
1915 hexh:
|
|
1916 state = STATE.STATE_hexh;
|
|
1917 case STATE.STATE_hexh: // parse numbers like 0FFh
|
|
1918 if (!ishex(c))
|
|
1919 {
|
|
1920 if (c == 'H' || c == 'h')
|
|
1921 {
|
|
1922 p++;
|
|
1923 base = 16;
|
|
1924 goto done;
|
|
1925 }
|
|
1926 else
|
|
1927 {
|
|
1928 // Check for something like 1E3 or 0E24
|
|
1929 if (memchr(cast(char*)stringbuffer.data, 'E', stringbuffer.offset) ||
|
|
1930 memchr(cast(char*)stringbuffer.data, 'e', stringbuffer.offset))
|
|
1931 goto real_;
|
|
1932 error("Hex digit expected, not '%c'", c);
|
|
1933 goto done;
|
|
1934 }
|
|
1935 }
|
|
1936 break;
|
|
1937 }
|
|
1938
|
|
1939 case STATE.STATE_octal: // reading octal number
|
|
1940 case STATE.STATE_octale: // reading octal number with non-octal digits
|
|
1941 if (!isoctal(cast(ubyte)c))
|
|
1942 {
|
|
1943 version (ZEROH) {
|
|
1944 if (ishex(c)
|
|
1945 || c == 'H' || c == 'h'
|
|
1946 )
|
|
1947 goto hexh;
|
|
1948 }
|
|
1949 if (c == '_') // ignore embedded _
|
|
1950 { p++;
|
|
1951 continue;
|
|
1952 }
|
|
1953 if (c == '.' && p[1] != '.')
|
|
1954 goto real_;
|
|
1955 if (c == 'i')
|
|
1956 goto real_;
|
|
1957 if (isdigit(c))
|
|
1958 {
|
|
1959 state = STATE.STATE_octale;
|
|
1960 }
|
|
1961 else
|
|
1962 goto done;
|
|
1963 }
|
|
1964 break;
|
|
1965
|
|
1966 case STATE.STATE_binary0: // starting binary number
|
|
1967 case STATE.STATE_binary: // reading binary number
|
|
1968 if (c != '0' && c != '1')
|
|
1969 {
|
|
1970 version (ZEROH) {
|
|
1971 if (ishex(c)
|
|
1972 || c == 'H' || c == 'h'
|
|
1973 )
|
|
1974 goto hexh;
|
|
1975 }
|
|
1976 if (c == '_') // ignore embedded _
|
|
1977 { p++;
|
|
1978 continue;
|
|
1979 }
|
|
1980 if (state == STATE.STATE_binary0)
|
|
1981 { error("binary digit expected");
|
|
1982 state = STATE.STATE_error;
|
|
1983 break;
|
|
1984 }
|
|
1985 else
|
|
1986 goto done;
|
|
1987 }
|
|
1988 state = STATE.STATE_binary;
|
|
1989 break;
|
|
1990
|
|
1991 case STATE.STATE_error: // for error recovery
|
|
1992 if (!isdigit(c)) // scan until non-digit
|
|
1993 goto done;
|
|
1994 break;
|
|
1995
|
|
1996 default:
|
|
1997 assert(0);
|
|
1998 }
|
|
1999 stringbuffer.writeByte(c);
|
|
2000 p++;
|
|
2001 }
|
|
2002 done:
|
|
2003 stringbuffer.writeByte(0); // terminate string
|
|
2004 if (state == STATE.STATE_octale)
|
|
2005 error("Octal digit expected");
|
|
2006
|
|
2007 ulong n; // unsigned >=64 bit integer type
|
|
2008
|
|
2009 if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0))
|
|
2010 n = stringbuffer.data[0] - '0';
|
|
2011 else
|
|
2012 {
|
|
2013 // Convert string to integer
|
|
2014 version (__DMC__) {
|
|
2015 errno = 0;
|
|
2016 n = strtoull(cast(char*)stringbuffer.data,null,base);
|
|
2017 if (errno == ERANGE)
|
|
2018 error("integer overflow");
|
|
2019 } else {
|
|
2020 // Not everybody implements strtoull()
|
|
2021 char* p = cast(char*)stringbuffer.data;
|
|
2022 int r = 10, d;
|
|
2023
|
|
2024 if (*p == '0')
|
|
2025 {
|
|
2026 if (p[1] == 'x' || p[1] == 'X')
|
|
2027 p += 2, r = 16;
|
|
2028 else if (p[1] == 'b' || p[1] == 'B')
|
|
2029 p += 2, r = 2;
|
|
2030 else if (isdigit(p[1]))
|
|
2031 p += 1, r = 8;
|
|
2032 }
|
|
2033
|
|
2034 n = 0;
|
|
2035 while (1)
|
|
2036 {
|
|
2037 if (*p >= '0' && *p <= '9')
|
|
2038 d = *p - '0';
|
|
2039 else if (*p >= 'a' && *p <= 'z')
|
|
2040 d = *p - 'a' + 10;
|
|
2041 else if (*p >= 'A' && *p <= 'Z')
|
|
2042 d = *p - 'A' + 10;
|
|
2043 else
|
|
2044 break;
|
|
2045 if (d >= r)
|
|
2046 break;
|
|
2047 ulong n2 = n * r;
|
|
2048 //printf("n2 / r = %llx, n = %llx\n", n2/r, n);
|
|
2049 if (n2 / r != n || n2 + d < n)
|
|
2050 {
|
|
2051 error ("integer overflow");
|
|
2052 break;
|
|
2053 }
|
|
2054
|
|
2055 n = n2 + d;
|
|
2056 p++;
|
|
2057 }
|
|
2058 }
|
|
2059 if (n.sizeof > 8 &&
|
|
2060 n > 0xFFFFFFFFFFFFFFFF) // if n needs more than 64 bits
|
|
2061 error("integer overflow");
|
|
2062 }
|
|
2063
|
|
2064 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
|
|
2065 while (1)
|
|
2066 { FLAGS f;
|
|
2067
|
|
2068 switch (*p)
|
|
2069 { case 'U':
|
|
2070 case 'u':
|
|
2071 f = FLAGS.FLAGS_unsigned;
|
|
2072 goto L1;
|
|
2073
|
|
2074 case 'l':
|
|
2075 if (1 || !global.params.useDeprecated)
|
|
2076 error("'l' suffix is deprecated, use 'L' instead");
|
|
2077 case 'L':
|
|
2078 f = FLAGS.FLAGS_long;
|
|
2079 L1:
|
|
2080 p++;
|
|
2081 if (flags & f)
|
|
2082 error("unrecognized token");
|
|
2083 flags = (flags | f);
|
|
2084 continue;
|
|
2085 default:
|
|
2086 break;
|
|
2087 }
|
|
2088 break;
|
|
2089 }
|
|
2090
|
|
2091 switch (flags)
|
|
2092 {
|
|
2093 case FLAGS.FLAGS_undefined:
|
|
2094 /* Octal or Hexadecimal constant.
|
|
2095 * First that fits: int, uint, long, ulong
|
|
2096 */
|
|
2097 if (n & 0x8000000000000000)
|
|
2098 result = TOK.TOKuns64v;
|
|
2099 else if (n & 0xFFFFFFFF00000000)
|
|
2100 result = TOK.TOKint64v;
|
|
2101 else if (n & 0x80000000)
|
|
2102 result = TOK.TOKuns32v;
|
|
2103 else
|
|
2104 result = TOK.TOKint32v;
|
|
2105 break;
|
|
2106
|
|
2107 case FLAGS.FLAGS_decimal:
|
|
2108 /* First that fits: int, long, long long
|
|
2109 */
|
|
2110 if (n & 0x8000000000000000)
|
|
2111 { error("signed integer overflow");
|
|
2112 result = TOK.TOKuns64v;
|
|
2113 }
|
|
2114 else if (n & 0xFFFFFFFF80000000)
|
|
2115 result = TOK.TOKint64v;
|
|
2116 else
|
|
2117 result = TOK.TOKint32v;
|
|
2118 break;
|
|
2119
|
|
2120 case FLAGS.FLAGS_unsigned:
|
|
2121 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned:
|
|
2122 /* First that fits: uint, ulong
|
|
2123 */
|
|
2124 if (n & 0xFFFFFFFF00000000)
|
|
2125 result = TOK.TOKuns64v;
|
|
2126 else
|
|
2127 result = TOK.TOKuns32v;
|
|
2128 break;
|
|
2129
|
|
2130 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long:
|
|
2131 if (n & 0x8000000000000000)
|
|
2132 { error("signed integer overflow");
|
|
2133 result = TOK.TOKuns64v;
|
|
2134 }
|
|
2135 else
|
|
2136 result = TOK.TOKint64v;
|
|
2137 break;
|
|
2138
|
|
2139 case FLAGS.FLAGS_long:
|
|
2140 if (n & 0x8000000000000000)
|
|
2141 result = TOK.TOKuns64v;
|
|
2142 else
|
|
2143 result = TOK.TOKint64v;
|
|
2144 break;
|
|
2145
|
|
2146 case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
|
|
2147 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
|
|
2148 result = TOK.TOKuns64v;
|
|
2149 break;
|
|
2150
|
|
2151 default:
|
|
2152 debug {
|
|
2153 printf("%x\n",flags);
|
|
2154 }
|
|
2155 assert(0);
|
|
2156 }
|
|
2157 t.uns64value = n;
|
|
2158 return result;
|
|
2159 }
|
|
2160
|
|
2161 /**************************************
|
|
2162 * Read in characters, converting them to real.
|
|
2163 * Bugs:
|
|
2164 * Exponent overflow not detected.
|
|
2165 * Too much requested precision is not detected.
|
|
2166 */
|
|
2167 TOK inreal(Token* t)
|
|
2168 in
|
|
2169 {
|
|
2170 assert(*p == '.' || isdigit(*p));
|
|
2171 }
|
|
2172 out (result)
|
|
2173 {
|
|
2174 switch (result)
|
|
2175 {
|
|
2176 case TOKfloat32v:
|
|
2177 case TOKfloat64v:
|
|
2178 case TOKfloat80v:
|
|
2179 case TOKimaginary32v:
|
|
2180 case TOKimaginary64v:
|
|
2181 case TOKimaginary80v:
|
|
2182 break;
|
|
2183
|
|
2184 default:
|
|
2185 assert(0);
|
|
2186 }
|
|
2187 }
|
|
2188 body
|
|
2189 {
|
|
2190 int dblstate;
|
|
2191 uint c;
|
|
2192 char hex; // is this a hexadecimal-floating-constant?
|
|
2193 TOK result;
|
|
2194
|
|
2195 //printf("Lexer.inreal()\n");
|
|
2196 stringbuffer.reset();
|
|
2197 dblstate = 0;
|
|
2198 hex = 0;
|
|
2199 Lnext:
|
|
2200 while (true)
|
|
2201 {
|
|
2202 // Get next char from input
|
|
2203 c = *p++;
|
|
2204 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
|
|
2205 while (true)
|
|
2206 {
|
|
2207 switch (dblstate)
|
|
2208 {
|
|
2209 case 0: // opening state
|
|
2210 if (c == '0')
|
|
2211 dblstate = 9;
|
|
2212 else if (c == '.')
|
|
2213 dblstate = 3;
|
|
2214 else
|
|
2215 dblstate = 1;
|
|
2216 break;
|
|
2217
|
|
2218 case 9:
|
|
2219 dblstate = 1;
|
|
2220 if (c == 'X' || c == 'x')
|
|
2221 {
|
|
2222 hex++;
|
|
2223 break;
|
|
2224 }
|
|
2225 case 1: // digits to left of .
|
|
2226 case 3: // digits to right of .
|
|
2227 case 7: // continuing exponent digits
|
|
2228 if (!isdigit(c) && !(hex && isxdigit(c)))
|
|
2229 {
|
|
2230 if (c == '_')
|
|
2231 goto Lnext; // ignore embedded '_'
|
|
2232 dblstate++;
|
|
2233 continue;
|
|
2234 }
|
|
2235 break;
|
|
2236
|
|
2237 case 2: // no more digits to left of .
|
|
2238 if (c == '.')
|
|
2239 {
|
|
2240 dblstate++;
|
|
2241 break;
|
|
2242 }
|
|
2243 case 4: // no more digits to right of .
|
|
2244 if ((c == 'E' || c == 'e') ||
|
|
2245 hex && (c == 'P' || c == 'p'))
|
|
2246 {
|
|
2247 dblstate = 5;
|
|
2248 hex = 0; // exponent is always decimal
|
|
2249 break;
|
|
2250 }
|
|
2251 if (hex)
|
|
2252 error("binary-exponent-part required");
|
|
2253 goto done;
|
|
2254
|
|
2255 case 5: // looking immediately to right of E
|
|
2256 dblstate++;
|
|
2257 if (c == '-' || c == '+')
|
|
2258 break;
|
|
2259 case 6: // 1st exponent digit expected
|
|
2260 if (!isdigit(c))
|
|
2261 error("exponent expected");
|
|
2262 dblstate++;
|
|
2263 break;
|
|
2264
|
|
2265 case 8: // past end of exponent digits
|
|
2266 goto done;
|
|
2267 }
|
|
2268 break;
|
|
2269 }
|
|
2270 stringbuffer.writeByte(c);
|
|
2271 }
|
|
2272 done:
|
|
2273 p--;
|
|
2274
|
|
2275 stringbuffer.writeByte(0);
|
|
2276
|
|
2277 version (_WIN32) { /// && __DMC__
|
|
2278 char* save = __locale_decpoint;
|
|
2279 __locale_decpoint = cast(char*)".".ptr;
|
|
2280 }
|
|
2281 t.float80value = strtold(cast(char*)stringbuffer.data, null);
|
|
2282
|
|
2283 errno = 0;
|
|
2284 switch (*p)
|
|
2285 {
|
|
2286 case 'F':
|
|
2287 case 'f':
|
|
2288 strtof(cast(char*)stringbuffer.data, null);
|
|
2289 result = TOKfloat32v;
|
|
2290 p++;
|
|
2291 break;
|
|
2292
|
|
2293 default:
|
|
2294 strtod(cast(char*)stringbuffer.data, null);
|
|
2295 result = TOKfloat64v;
|
|
2296 break;
|
|
2297
|
|
2298 case 'l':
|
|
2299 if (!global.params.useDeprecated)
|
|
2300 error("'l' suffix is deprecated, use 'L' instead");
|
|
2301 case 'L':
|
|
2302 result = TOKfloat80v;
|
|
2303 p++;
|
|
2304 break;
|
|
2305 }
|
|
2306 if (*p == 'i' || *p == 'I')
|
|
2307 {
|
|
2308 if (!global.params.useDeprecated && *p == 'I')
|
|
2309 error("'I' suffix is deprecated, use 'i' instead");
|
|
2310 p++;
|
|
2311 switch (result)
|
|
2312 {
|
|
2313 case TOKfloat32v:
|
|
2314 result = TOKimaginary32v;
|
|
2315 break;
|
|
2316 case TOKfloat64v:
|
|
2317 result = TOKimaginary64v;
|
|
2318 break;
|
|
2319 case TOKfloat80v:
|
|
2320 result = TOKimaginary80v;
|
|
2321 break;
|
|
2322 }
|
|
2323 }
|
|
2324
|
|
2325 version (_WIN32) { ///&& __DMC__
|
|
2326 __locale_decpoint = save;
|
|
2327 }
|
|
2328 if (errno == ERANGE)
|
|
2329 error("number is not representable");
|
|
2330
|
|
2331 return result;
|
|
2332 }
|
|
2333
|
|
2334 void error(T...)(string format, T t)
|
|
2335 {
|
|
2336 error(this.loc, format, t);
|
|
2337 }
|
|
2338
|
|
2339 void error(T...)(Loc loc, string format, T t)
|
|
2340 {
|
|
2341 if (mod && !global.gag)
|
|
2342 {
|
|
2343 string p = loc.toChars();
|
|
2344 if (p.length != 0)
|
|
2345 writef("%s: ", p);
|
|
2346
|
|
2347 writefln(format, t);
|
|
2348
|
|
2349 if (global.errors >= 20) // moderate blizzard of cascading messages
|
|
2350 fatal();
|
|
2351 }
|
|
2352
|
|
2353 global.errors++;
|
|
2354 }
|
|
2355
|
|
2356 void pragma_()
|
|
2357 {
|
|
2358 assert(false);
|
|
2359 }
|
|
2360
|
|
2361 uint decodeUTF()
|
|
2362 {
|
|
2363 assert(false);
|
|
2364 }
|
|
2365
|
|
2366 void getDocComment(Token* t, uint lineComment)
|
|
2367 {
|
|
2368 assert(false);
|
|
2369 }
|
|
2370
|
|
2371 static bool isValidIdentifier(string p)
|
|
2372 {
|
|
2373 if (p.length == 0) {
|
|
2374 return false;
|
|
2375 }
|
|
2376
|
|
2377 if (p[0] >= '0' && p[0] <= '9') { // beware of isdigit() on signed chars
|
|
2378 return false;
|
|
2379 }
|
|
2380
|
|
2381 size_t idx = 0;
|
|
2382 while (idx < p.length)
|
|
2383 {
|
|
2384 dchar dc;
|
|
2385
|
|
2386 if (utf_decodeChar(p, &idx, &dc) !is null) {
|
|
2387 return false;
|
|
2388 }
|
|
2389
|
|
2390 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) {
|
|
2391 return false;
|
|
2392 }
|
|
2393 }
|
|
2394
|
|
2395 return true;
|
|
2396 }
|
|
2397
|
|
2398 /// TODO: reimplement based on strings
|
|
2399 static ubyte* combineComments(ubyte* c1, ubyte* c2)
|
|
2400 {
|
|
2401 //printf("Lexer.combineComments('%s', '%s')\n", c1, c2);
|
|
2402
|
|
2403 ubyte* c = c2;
|
|
2404
|
|
2405 if (c1)
|
|
2406 {
|
|
2407 c = c1;
|
|
2408 if (c2)
|
|
2409 {
|
|
2410 size_t len1 = strlen(cast(char*)c1);
|
|
2411 size_t len2 = strlen(cast(char*)c2);
|
|
2412
|
2
|
2413 c = cast(ubyte*)GC.malloc(len1 + 1 + len2 + 1);
|
0
|
2414 memcpy(c, c1, len1);
|
|
2415 if (len1 && c1[len1 - 1] != '\n')
|
|
2416 {
|
|
2417 c[len1] = '\n';
|
|
2418 len1++;
|
|
2419 }
|
|
2420 memcpy(c + len1, c2, len2);
|
|
2421 c[len1 + len2] = 0;
|
|
2422 }
|
|
2423 }
|
|
2424
|
|
2425 return c;
|
|
2426 }
|
|
2427 } |