Mercurial > projects > ddmd
comparison dmd/Lexer.d @ 0:10317f0c89a5
Initial commit
author | korDen |
---|---|
date | Sat, 24 Oct 2009 08:42:06 +0400 |
parents | |
children | 7427ded8caf7 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:10317f0c89a5 |
---|---|
1 module dmd.Lexer; | |
2 | |
3 import dmd.StringTable; | |
4 import dmd.OutBuffer; | |
5 import dmd.Token; | |
6 import dmd.Loc; | |
7 import dmd.Module; | |
8 import dmd.Identifier; | |
9 import dmd.TOK; | |
10 import dmd.Keyword; | |
11 import dmd.StringValue; | |
12 import dmd.Global; | |
13 import dmd.Util; | |
14 import dmd.Id; | |
15 import dmd.Dchar; | |
16 import dmd.Utf; | |
17 | |
18 import std.stdio : writeln; | |
19 | |
20 import core.stdc.ctype; | |
21 import core.stdc.stdlib; | |
22 import core.stdc.string; | |
23 import core.stdc.stdio; | |
24 import core.stdc.time; | |
25 import core.stdc.errno; | |
26 | |
27 enum LS = 0x2028; // UTF line separator | |
28 enum PS = 0x2029; // UTF paragraph separator | |
29 | |
30 extern (C) extern | |
31 { | |
32 __gshared char* __locale_decpoint; | |
33 } | |
34 | |
35 int isUniAlpha(uint u) | |
36 { | |
37 assert(false); | |
38 } | |
39 | |
40 class Lexer | |
41 { | |
42 static StringTable stringtable; | |
43 static OutBuffer stringbuffer; | |
44 static Token* freelist; | |
45 | |
46 Loc loc; // for error messages | |
47 | |
48 ubyte* base; // pointer to start of buffer | |
49 ubyte* end; // past end of buffer | |
50 ubyte* p; // current character | |
51 Token token; | |
52 Module mod; | |
53 int doDocComment; // collect doc comment information | |
54 int anyToken; // !=0 means seen at least one token | |
55 int commentToken; // !=0 means comments are TOKcomment's | |
56 | |
57 static this() | |
58 { | |
59 stringtable = new StringTable(); | |
60 stringbuffer = new OutBuffer(); | |
61 } | |
62 | |
63 static ~this() | |
64 { | |
65 delete stringtable; | |
66 } | |
67 | |
68 this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken) | |
69 { | |
70 loc = Loc(mod, 1); | |
71 | |
72 memset(&token,0,token.sizeof); | |
73 this.base = base; | |
74 this.end = base + endoffset; | |
75 p = base + begoffset; | |
76 this.mod = mod; | |
77 this.doDocComment = doDocComment; | |
78 this.anyToken = 0; | |
79 this.commentToken = commentToken; | |
80 //initKeywords(); | |
81 | |
82 /* If first line starts with '#!', ignore the line | |
83 */ | |
84 | |
85 if (p[0] == '#' && p[1] =='!') | |
86 { | |
87 p += 2; | |
88 while (1) | |
89 { | |
90 ubyte c = *p; | |
91 switch (c) | |
92 { | |
93 case '\n': | |
94 p++; | |
95 break; | |
96 | |
97 case '\r': | |
98 p++; | |
99 if (*p == '\n') | |
100 p++; | |
101 break; | |
102 | |
103 case 0: | |
104 case 0x1A: | |
105 break; | |
106 | |
107 default: | |
108 if (c & 0x80) | |
109 { | |
110 uint u = decodeUTF(); | |
111 if (u == PS || u == LS) | |
112 break; | |
113 } | |
114 p++; | |
115 continue; | |
116 } | |
117 break; | |
118 } | |
119 loc.linnum = 2; | |
120 } | |
121 } | |
122 | |
123 version (DMDV2) { | |
124 static Keyword[] keywords = | |
125 [ | |
126 // { "", TOK }, | |
127 | |
128 { "this", TOK.TOKthis }, | |
129 { "super", TOK.TOKsuper }, | |
130 { "assert", TOK.TOKassert }, | |
131 { "null", TOK.TOKnull }, | |
132 { "true", TOK.TOKtrue }, | |
133 { "false", TOK.TOKfalse }, | |
134 { "cast", TOK.TOKcast }, | |
135 { "new", TOK.TOKnew }, | |
136 { "delete", TOK.TOKdelete }, | |
137 { "throw", TOK.TOKthrow }, | |
138 { "module", TOK.TOKmodule }, | |
139 { "pragma", TOK.TOKpragma }, | |
140 { "typeof", TOK.TOKtypeof }, | |
141 { "typeid", TOK.TOKtypeid }, | |
142 | |
143 { "template", TOK.TOKtemplate }, | |
144 | |
145 { "void", TOK.TOKvoid }, | |
146 { "byte", TOK.TOKint8 }, | |
147 { "ubyte", TOK.TOKuns8 }, | |
148 { "short", TOK.TOKint16 }, | |
149 { "ushort", TOK.TOKuns16 }, | |
150 { "int", TOK.TOKint32 }, | |
151 { "uint", TOK.TOKuns32 }, | |
152 { "long", TOK.TOKint64 }, | |
153 { "ulong", TOK.TOKuns64 }, | |
154 { "cent", TOK.TOKcent, }, | |
155 { "ucent", TOK.TOKucent, }, | |
156 { "float", TOK.TOKfloat32 }, | |
157 { "double", TOK.TOKfloat64 }, | |
158 { "real", TOK.TOKfloat80 }, | |
159 | |
160 { "bool", TOK.TOKbool }, | |
161 { "char", TOK.TOKchar }, | |
162 { "wchar", TOK.TOKwchar }, | |
163 { "dchar", TOK.TOKdchar }, | |
164 | |
165 { "ifloat", TOK.TOKimaginary32 }, | |
166 { "idouble", TOK.TOKimaginary64 }, | |
167 { "ireal", TOK.TOKimaginary80 }, | |
168 | |
169 { "cfloat", TOK.TOKcomplex32 }, | |
170 { "cdouble", TOK.TOKcomplex64 }, | |
171 { "creal", TOK.TOKcomplex80 }, | |
172 | |
173 { "delegate", TOK.TOKdelegate }, | |
174 { "function", TOK.TOKfunction }, | |
175 | |
176 { "is", TOK.TOKis }, | |
177 { "if", TOK.TOKif }, | |
178 { "else", TOK.TOKelse }, | |
179 { "while", TOK.TOKwhile }, | |
180 { "for", TOK.TOKfor }, | |
181 { "do", TOK.TOKdo }, | |
182 { "switch", TOK.TOKswitch }, | |
183 { "case", TOK.TOKcase }, | |
184 { "default", TOK.TOKdefault }, | |
185 { "break", TOK.TOKbreak }, | |
186 { "continue", TOK.TOKcontinue }, | |
187 { "synchronized", TOK.TOKsynchronized }, | |
188 { "return", TOK.TOKreturn }, | |
189 { "goto", TOK.TOKgoto }, | |
190 { "try", TOK.TOKtry }, | |
191 { "catch", TOK.TOKcatch }, | |
192 { "finally", TOK.TOKfinally }, | |
193 { "with", TOK.TOKwith }, | |
194 { "asm", TOK.TOKasm }, | |
195 { "foreach", TOK.TOKforeach }, | |
196 { "foreach_reverse", TOK.TOKforeach_reverse }, | |
197 { "scope", TOK.TOKscope }, | |
198 | |
199 { "struct", TOK.TOKstruct }, | |
200 { "class", TOK.TOKclass }, | |
201 { "interface", TOK.TOKinterface }, | |
202 { "union", TOK.TOKunion }, | |
203 { "enum", TOK.TOKenum }, | |
204 { "import", TOK.TOKimport }, | |
205 { "mixin", TOK.TOKmixin }, | |
206 { "static", TOK.TOKstatic }, | |
207 { "final", TOK.TOKfinal }, | |
208 { "const", TOK.TOKconst }, | |
209 { "typedef", TOK.TOKtypedef }, | |
210 { "alias", TOK.TOKalias }, | |
211 { "override", TOK.TOKoverride }, | |
212 { "abstract", TOK.TOKabstract }, | |
213 { "volatile", TOK.TOKvolatile }, | |
214 { "debug", TOK.TOKdebug }, | |
215 { "deprecated", TOK.TOKdeprecated }, | |
216 { "in", TOK.TOKin }, | |
217 { "out", TOK.TOKout }, | |
218 { "inout", TOK.TOKinout }, | |
219 { "lazy", TOK.TOKlazy }, | |
220 { "auto", TOK.TOKauto }, | |
221 | |
222 { "align", TOK.TOKalign }, | |
223 { "extern", TOK.TOKextern }, | |
224 { "private", TOK.TOKprivate }, | |
225 { "package", TOK.TOKpackage }, | |
226 { "protected", TOK.TOKprotected }, | |
227 { "public", TOK.TOKpublic }, | |
228 { "export", TOK.TOKexport }, | |
229 | |
230 { "body", TOK.TOKbody }, | |
231 { "invariant", TOK.TOKinvariant }, | |
232 { "unittest", TOK.TOKunittest }, | |
233 { "version", TOK.TOKversion }, | |
234 //{ "manifest", TOK.TOKmanifest }, | |
235 | |
236 // Added after 1.0 | |
237 { "ref", TOK.TOKref }, | |
238 { "macro", TOK.TOKmacro }, | |
239 { "pure", TOK.TOKpure }, | |
240 { "nothrow", TOK.TOKnothrow }, | |
241 { "__thread", TOK.TOKtls }, | |
242 { "__gshared", TOK.TOKgshared }, | |
243 { "__traits", TOK.TOKtraits }, | |
244 { "__overloadset", TOK.TOKoverloadset }, | |
245 { "__FILE__", TOK.TOKfile }, | |
246 { "__LINE__", TOK.TOKline }, | |
247 { "shared", TOK.TOKshared }, | |
248 { "immutable", TOK.TOKimmutable }, | |
249 ]; | |
250 } else { | |
251 static Keyword[] keywords = | |
252 [ | |
253 // { "", TOK }, | |
254 | |
255 { "this", TOK.TOKthis }, | |
256 { "super", TOK.TOKsuper }, | |
257 { "assert", TOK.TOKassert }, | |
258 { "null", TOK.TOKnull }, | |
259 { "true", TOK.TOKtrue }, | |
260 { "false", TOK.TOKfalse }, | |
261 { "cast", TOK.TOKcast }, | |
262 { "new", TOK.TOKnew }, | |
263 { "delete", TOK.TOKdelete }, | |
264 { "throw", TOK.TOKthrow }, | |
265 { "module", TOK.TOKmodule }, | |
266 { "pragma", TOK.TOKpragma }, | |
267 { "typeof", TOK.TOKtypeof }, | |
268 { "typeid", TOK.TOKtypeid }, | |
269 | |
270 { "template", TOK.TOKtemplate }, | |
271 | |
272 { "void", TOK.TOKvoid }, | |
273 { "byte", TOK.TOKint8 }, | |
274 { "ubyte", TOK.TOKuns8 }, | |
275 { "short", TOK.TOKint16 }, | |
276 { "ushort", TOK.TOKuns16 }, | |
277 { "int", TOK.TOKint32 }, | |
278 { "uint", TOK.TOKuns32 }, | |
279 { "long", TOK.TOKint64 }, | |
280 { "ulong", TOK.TOKuns64 }, | |
281 { "cent", TOK.TOKcent, }, | |
282 { "ucent", TOK.TOKucent, }, | |
283 { "float", TOK.TOKfloat32 }, | |
284 { "double", TOK.TOKfloat64 }, | |
285 { "real", TOK.TOKfloat80 }, | |
286 | |
287 { "bool", TOK.TOKbool }, | |
288 { "char", TOK.TOKchar }, | |
289 { "wchar", TOK.TOKwchar }, | |
290 { "dchar", TOK.TOKdchar }, | |
291 | |
292 { "ifloat", TOK.TOKimaginary32 }, | |
293 { "idouble", TOK.TOKimaginary64 }, | |
294 { "ireal", TOK.TOKimaginary80 }, | |
295 | |
296 { "cfloat", TOK.TOKcomplex32 }, | |
297 { "cdouble", TOK.TOKcomplex64 }, | |
298 { "creal", TOK.TOKcomplex80 }, | |
299 | |
300 { "delegate", TOK.TOKdelegate }, | |
301 { "function", TOK.TOKfunction }, | |
302 | |
303 { "is", TOK.TOKis }, | |
304 { "if", TOK.TOKif }, | |
305 { "else", TOK.TOKelse }, | |
306 { "while", TOK.TOKwhile }, | |
307 { "for", TOK.TOKfor }, | |
308 { "do", TOK.TOKdo }, | |
309 { "switch", TOK.TOKswitch }, | |
310 { "case", TOK.TOKcase }, | |
311 { "default", TOK.TOKdefault }, | |
312 { "break", TOK.TOKbreak }, | |
313 { "continue", TOK.TOKcontinue }, | |
314 { "synchronized", TOK.TOKsynchronized }, | |
315 { "return", TOK.TOKreturn }, | |
316 { "goto", TOK.TOKgoto }, | |
317 { "try", TOK.TOKtry }, | |
318 { "catch", TOK.TOKcatch }, | |
319 { "finally", TOK.TOKfinally }, | |
320 { "with", TOK.TOKwith }, | |
321 { "asm", TOK.TOKasm }, | |
322 { "foreach", TOK.TOKforeach }, | |
323 { "foreach_reverse", TOK.TOKforeach_reverse }, | |
324 { "scope", TOK.TOKscope }, | |
325 | |
326 { "struct", TOK.TOKstruct }, | |
327 { "class", TOK.TOKclass }, | |
328 { "interface", TOK.TOKinterface }, | |
329 { "union", TOK.TOKunion }, | |
330 { "enum", TOK.TOKenum }, | |
331 { "import", TOK.TOKimport }, | |
332 { "mixin", TOK.TOKmixin }, | |
333 { "static", TOK.TOKstatic }, | |
334 { "final", TOK.TOKfinal }, | |
335 { "const", TOK.TOKconst }, | |
336 { "typedef", TOK.TOKtypedef }, | |
337 { "alias", TOK.TOKalias }, | |
338 { "override", TOK.TOKoverride }, | |
339 { "abstract", TOK.TOKabstract }, | |
340 { "volatile", TOK.TOKvolatile }, | |
341 { "debug", TOK.TOKdebug }, | |
342 { "deprecated", TOK.TOKdeprecated }, | |
343 { "in", TOK.TOKin }, | |
344 { "out", TOK.TOKout }, | |
345 { "inout", TOK.TOKinout }, | |
346 { "lazy", TOK.TOKlazy }, | |
347 { "auto", TOK.TOKauto }, | |
348 | |
349 { "align", TOK.TOKalign }, | |
350 { "extern", TOK.TOKextern }, | |
351 { "private", TOK.TOKprivate }, | |
352 { "package", TOK.TOKpackage }, | |
353 { "protected", TOK.TOKprotected }, | |
354 { "public", TOK.TOKpublic }, | |
355 { "export", TOK.TOKexport }, | |
356 | |
357 { "body", TOK.TOKbody }, | |
358 { "invariant", TOK.TOKinvariant }, | |
359 { "unittest", TOK.TOKunittest }, | |
360 { "version", TOK.TOKversion }, | |
361 //{ "manifest", TOK.TOKmanifest }, | |
362 | |
363 // Added after 1.0 | |
364 { "ref", TOK.TOKref }, | |
365 { "macro", TOK.TOKmacro }, | |
366 ]; | |
367 } | |
368 | |
369 static ubyte cmtable[256]; | |
370 enum CMoctal = 0x1; | |
371 enum CMhex = 0x2; | |
372 enum CMidchar = 0x4; | |
373 | |
374 ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; } | |
375 ubyte ishex (ubyte c) { return cmtable[c] & CMhex; } | |
376 ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; } | |
377 | |
378 static void cmtable_init() | |
379 { | |
380 for (uint c = 0; c < cmtable.length; c++) | |
381 { | |
382 if ('0' <= c && c <= '7') | |
383 cmtable[c] |= CMoctal; | |
384 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) | |
385 cmtable[c] |= CMhex; | |
386 if (isalnum(c) || c == '_') | |
387 cmtable[c] |= CMidchar; | |
388 } | |
389 } | |
390 | |
391 static void initKeywords() | |
392 { | |
393 uint nkeywords = keywords.length; | |
394 | |
395 if (global.params.Dversion == 1) | |
396 nkeywords -= 2; | |
397 | |
398 cmtable_init(); | |
399 | |
400 for (uint u = 0; u < nkeywords; u++) | |
401 { | |
402 //printf("keyword[%d] = '%s'\n",u, keywords[u].name); | |
403 string s = keywords[u].name; | |
404 TOK v = keywords[u].value; | |
405 StringValue* sv = stringtable.insert(s); | |
406 sv.ptrvalue = cast(void*) new Identifier(sv.lstring.string_, v); | |
407 | |
408 //printf("tochars[%d] = '%s'\n",v, s); | |
409 Token.tochars[v] = s; | |
410 } | |
411 | |
412 Token.tochars[TOK.TOKeof] = "EOF"; | |
413 Token.tochars[TOK.TOKlcurly] = "{"; | |
414 Token.tochars[TOK.TOKrcurly] = "}"; | |
415 Token.tochars[TOK.TOKlparen] = "("; | |
416 Token.tochars[TOK.TOKrparen] = ")"; | |
417 Token.tochars[TOK.TOKlbracket] = "["; | |
418 Token.tochars[TOK.TOKrbracket] = "]"; | |
419 Token.tochars[TOK.TOKsemicolon] = ";"; | |
420 Token.tochars[TOK.TOKcolon] = ":"; | |
421 Token.tochars[TOK.TOKcomma] = ","; | |
422 Token.tochars[TOK.TOKdot] = "."; | |
423 Token.tochars[TOK.TOKxor] = "^"; | |
424 Token.tochars[TOK.TOKxorass] = "^="; | |
425 Token.tochars[TOK.TOKassign] = "="; | |
426 Token.tochars[TOK.TOKconstruct] = "="; | |
427 version (DMDV2) { | |
428 Token.tochars[TOK.TOKblit] = "="; | |
429 } | |
430 Token.tochars[TOK.TOKlt] = "<"; | |
431 Token.tochars[TOK.TOKgt] = ">"; | |
432 Token.tochars[TOK.TOKle] = "<="; | |
433 Token.tochars[TOK.TOKge] = ">="; | |
434 Token.tochars[TOK.TOKequal] = "=="; | |
435 Token.tochars[TOK.TOKnotequal] = "!="; | |
436 Token.tochars[TOK.TOKnotidentity] = "!is"; | |
437 Token.tochars[TOK.TOKtobool] = "!!"; | |
438 | |
439 Token.tochars[TOK.TOKunord] = "!<>="; | |
440 Token.tochars[TOK.TOKue] = "!<>"; | |
441 Token.tochars[TOK.TOKlg] = "<>"; | |
442 Token.tochars[TOK.TOKleg] = "<>="; | |
443 Token.tochars[TOK.TOKule] = "!>"; | |
444 Token.tochars[TOK.TOKul] = "!>="; | |
445 Token.tochars[TOK.TOKuge] = "!<"; | |
446 Token.tochars[TOK.TOKug] = "!<="; | |
447 | |
448 Token.tochars[TOK.TOKnot] = "!"; | |
449 Token.tochars[TOK.TOKtobool] = "!!"; | |
450 Token.tochars[TOK.TOKshl] = "<<"; | |
451 Token.tochars[TOK.TOKshr] = ">>"; | |
452 Token.tochars[TOK.TOKushr] = ">>>"; | |
453 Token.tochars[TOK.TOKadd] = "+"; | |
454 Token.tochars[TOK.TOKmin] = "-"; | |
455 Token.tochars[TOK.TOKmul] = "*"; | |
456 Token.tochars[TOK.TOKdiv] = "/"; | |
457 Token.tochars[TOK.TOKmod] = "%"; | |
458 Token.tochars[TOK.TOKslice] = ".."; | |
459 Token.tochars[TOK.TOKdotdotdot] = "..."; | |
460 Token.tochars[TOK.TOKand] = "&"; | |
461 Token.tochars[TOK.TOKandand] = "&&"; | |
462 Token.tochars[TOK.TOKor] = "|"; | |
463 Token.tochars[TOK.TOKoror] = "||"; | |
464 Token.tochars[TOK.TOKarray] = "[]"; | |
465 Token.tochars[TOK.TOKindex] = "[i]"; | |
466 Token.tochars[TOK.TOKaddress] = "&"; | |
467 Token.tochars[TOK.TOKstar] = "*"; | |
468 Token.tochars[TOK.TOKtilde] = "~"; | |
469 Token.tochars[TOK.TOKdollar] = "$"; | |
470 Token.tochars[TOK.TOKcast] = "cast"; | |
471 Token.tochars[TOK.TOKplusplus] = "++"; | |
472 Token.tochars[TOK.TOKminusminus] = "--"; | |
473 Token.tochars[TOK.TOKtype] = "type"; | |
474 Token.tochars[TOK.TOKquestion] = "?"; | |
475 Token.tochars[TOK.TOKneg] = "-"; | |
476 Token.tochars[TOK.TOKuadd] = "+"; | |
477 Token.tochars[TOK.TOKvar] = "var"; | |
478 Token.tochars[TOK.TOKaddass] = "+="; | |
479 Token.tochars[TOK.TOKminass] = "-="; | |
480 Token.tochars[TOK.TOKmulass] = "*="; | |
481 Token.tochars[TOK.TOKdivass] = "/="; | |
482 Token.tochars[TOK.TOKmodass] = "%="; | |
483 Token.tochars[TOK.TOKshlass] = "<<="; | |
484 Token.tochars[TOK.TOKshrass] = ">>="; | |
485 Token.tochars[TOK.TOKushrass] = ">>>="; | |
486 Token.tochars[TOK.TOKandass] = "&="; | |
487 Token.tochars[TOK.TOKorass] = "|="; | |
488 Token.tochars[TOK.TOKcatass] = "~="; | |
489 Token.tochars[TOK.TOKcat] = "~"; | |
490 Token.tochars[TOK.TOKcall] = "call"; | |
491 Token.tochars[TOK.TOKidentity] = "is"; | |
492 Token.tochars[TOK.TOKnotidentity] = "!is"; | |
493 | |
494 Token.tochars[TOK.TOKorass] = "|="; | |
495 Token.tochars[TOK.TOKidentifier] = "identifier"; | |
496 Token.tochars[TOK.TOKat] = "@"; | |
497 | |
498 // For debugging | |
499 Token.tochars[TOK.TOKdotexp] = "dotexp"; | |
500 Token.tochars[TOK.TOKdotti] = "dotti"; | |
501 Token.tochars[TOK.TOKdotvar] = "dotvar"; | |
502 Token.tochars[TOK.TOKdottype] = "dottype"; | |
503 Token.tochars[TOK.TOKsymoff] = "symoff"; | |
504 Token.tochars[TOK.TOKarraylength] = "arraylength"; | |
505 Token.tochars[TOK.TOKarrayliteral] = "arrayliteral"; | |
506 Token.tochars[TOK.TOKassocarrayliteral] = "assocarrayliteral"; | |
507 Token.tochars[TOK.TOKstructliteral] = "structliteral"; | |
508 Token.tochars[TOK.TOKstring] = "string"; | |
509 Token.tochars[TOK.TOKdsymbol] = "symbol"; | |
510 Token.tochars[TOK.TOKtuple] = "tuple"; | |
511 Token.tochars[TOK.TOKdeclaration] = "declaration"; | |
512 Token.tochars[TOK.TOKdottd] = "dottd"; | |
513 Token.tochars[TOK.TOKon_scope_exit] = "scope(exit)"; | |
514 Token.tochars[TOK.TOKon_scope_success] = "scope(success)"; | |
515 Token.tochars[TOK.TOKon_scope_failure] = "scope(failure)"; | |
516 } | |
517 | |
518 static Identifier idPool(string s) | |
519 { | |
520 StringValue* sv = stringtable.update(s); | |
521 Identifier id = cast(Identifier) sv.ptrvalue; | |
522 if (id is null) | |
523 { | |
524 id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); | |
525 sv.ptrvalue = cast(void*)id; | |
526 } | |
527 | |
528 return id; | |
529 } | |
530 | |
531 static Identifier uniqueId(string s) | |
532 { | |
533 static int num; | |
534 return uniqueId(s, ++num); | |
535 } | |
536 | |
537 /********************************************* | |
538 * Create a unique identifier using the prefix s. | |
539 */ | |
540 static Identifier uniqueId(string s, int num) | |
541 { | |
542 char buffer[32]; | |
543 size_t slen = s.length; | |
544 | |
545 assert(slen + num.sizeof * 3 + 1 <= buffer.sizeof); | |
546 int len = sprintf(buffer.ptr, "%.*s%d", s, num); | |
547 | |
548 return idPool(buffer[0..len].idup); | |
549 } | |
550 | |
551 TOK nextToken() | |
552 { | |
553 Token *t; | |
554 | |
555 if (token.next) | |
556 { | |
557 t = token.next; | |
558 memcpy(&token, t, Token.sizeof); | |
559 t.next = freelist; | |
560 freelist = t; | |
561 } | |
562 else | |
563 { | |
564 scan(&token); | |
565 } | |
566 | |
567 //token.print(); | |
568 return token.value; | |
569 } | |
570 | |
571 /*********************** | |
572 * Look ahead at next token's value. | |
573 */ | |
574 TOK peekNext() | |
575 { | |
576 return peek(&token).value; | |
577 } | |
578 | |
579 TOK peekNext2() | |
580 { | |
581 assert(false); | |
582 } | |
583 | |
584 void scan(Token* t) | |
585 { | |
586 uint lastLine = loc.linnum; | |
587 uint linnum; | |
588 | |
589 t.blockComment = null; | |
590 t.lineComment = null; | |
591 while (1) | |
592 { | |
593 t.ptr = p; | |
594 //printf("p = %p, *p = '%c'\n",p,*p); | |
595 switch (*p) | |
596 { | |
597 case 0: | |
598 case 0x1A: | |
599 t.value = TOK.TOKeof; // end of file | |
600 return; | |
601 | |
602 case ' ': | |
603 case '\t': | |
604 case '\v': | |
605 case '\f': | |
606 p++; | |
607 continue; // skip white space | |
608 | |
609 case '\r': | |
610 p++; | |
611 if (*p != '\n') // if CR stands by itself | |
612 loc.linnum++; | |
613 continue; // skip white space | |
614 | |
615 case '\n': | |
616 p++; | |
617 loc.linnum++; | |
618 continue; // skip white space | |
619 | |
620 case '0': case '1': case '2': case '3': case '4': | |
621 case '5': case '6': case '7': case '8': case '9': | |
622 t.value = number(t); | |
623 return; | |
624 | |
625 version (CSTRINGS) { | |
626 case '\'': | |
627 t.value = charConstant(t, 0); | |
628 return; | |
629 | |
630 case '"': | |
631 t.value = stringConstant(t,0); | |
632 return; | |
633 | |
634 case 'l': | |
635 case 'L': | |
636 if (p[1] == '\'') | |
637 { | |
638 p++; | |
639 t.value = charConstant(t, 1); | |
640 return; | |
641 } | |
642 else if (p[1] == '"') | |
643 { | |
644 p++; | |
645 t.value = stringConstant(t, 1); | |
646 return; | |
647 } | |
648 } else { | |
649 case '\'': | |
650 t.value = charConstant(t,0); | |
651 return; | |
652 | |
653 case 'r': | |
654 if (p[1] != '"') | |
655 goto case_ident; | |
656 p++; | |
657 case '`': | |
658 t.value = wysiwygStringConstant(t, *p); | |
659 return; | |
660 | |
661 case 'x': | |
662 if (p[1] != '"') | |
663 goto case_ident; | |
664 p++; | |
665 t.value = hexStringConstant(t); | |
666 return; | |
667 | |
668 version (DMDV2) { | |
669 case 'q': | |
670 if (p[1] == '"') | |
671 { | |
672 p++; | |
673 t.value = delimitedStringConstant(t); | |
674 return; | |
675 } | |
676 else if (p[1] == '{') | |
677 { | |
678 p++; | |
679 t.value = tokenStringConstant(t); | |
680 return; | |
681 } | |
682 else | |
683 goto case_ident; | |
684 } | |
685 | |
686 case '"': | |
687 t.value = escapeStringConstant(t,0); | |
688 return; | |
689 version (TEXTUAL_ASSEMBLY_OUT) { | |
690 } else { | |
691 case '\\': // escaped string literal | |
692 { uint c; | |
693 ubyte* pstart = p; | |
694 | |
695 stringbuffer.reset(); | |
696 do | |
697 { | |
698 p++; | |
699 switch (*p) | |
700 { | |
701 case 'u': | |
702 case 'U': | |
703 case '&': | |
704 c = escapeSequence(); | |
705 stringbuffer.writeUTF8(c); | |
706 break; | |
707 | |
708 default: | |
709 c = escapeSequence(); | |
710 stringbuffer.writeByte(c); | |
711 break; | |
712 } | |
713 } while (*p == '\\'); | |
714 t.len = stringbuffer.offset; | |
715 stringbuffer.writeByte(0); | |
716 char* cc = cast(char*)malloc(stringbuffer.offset); | |
717 memcpy(cc, stringbuffer.data, stringbuffer.offset); | |
718 t.ustring = cc; | |
719 t.postfix = 0; | |
720 t.value = TOK.TOKstring; | |
721 if (!global.params.useDeprecated) | |
722 error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", p - pstart, pstart, p - pstart, pstart); | |
723 return; | |
724 } | |
725 } | |
726 case 'l': | |
727 case 'L': | |
728 } | |
729 case 'a': case 'b': case 'c': case 'd': case 'e': | |
730 case 'f': case 'g': case 'h': case 'i': case 'j': | |
731 case 'k': case 'm': case 'n': case 'o': | |
732 version (DMDV2) { | |
733 case 'p': /*case 'q': case 'r':*/ case 's': case 't': | |
734 } else { | |
735 case 'p': case 'q': /*case 'r':*/ case 's': case 't': | |
736 } | |
737 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': | |
738 case 'z': | |
739 case 'A': case 'B': case 'C': case 'D': case 'E': | |
740 case 'F': case 'G': case 'H': case 'I': case 'J': | |
741 case 'K': case 'M': case 'N': case 'O': | |
742 case 'P': case 'Q': case 'R': case 'S': case 'T': | |
743 case 'U': case 'V': case 'W': case 'X': case 'Y': | |
744 case 'Z': | |
745 case '_': | |
746 case_ident: | |
747 { ubyte c; | |
748 StringValue *sv; | |
749 Identifier id; | |
750 | |
751 do | |
752 { | |
753 c = *++p; | |
754 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); | |
755 sv = stringtable.update((cast(immutable(char)*)t.ptr)[0.. p - t.ptr]); /// | |
756 id = cast(Identifier) sv.ptrvalue; | |
757 if (id is null) | |
758 { id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); | |
759 sv.ptrvalue = cast(void*)id; | |
760 } | |
761 t.ident = id; | |
762 t.value = cast(TOK) id.value; | |
763 anyToken = 1; | |
764 if (*t.ptr == '_') // if special identifier token | |
765 { | |
766 static char date[11+1]; | |
767 static char time[8+1]; | |
768 static char timestamp[24+1]; | |
769 | |
770 if (!date[0]) // lazy evaluation | |
771 { time_t tm; | |
772 char *p; | |
773 | |
774 .time(&tm); | |
775 p = ctime(&tm); | |
776 assert(p); | |
777 sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20); | |
778 sprintf(time.ptr, "%.8s", p + 11); | |
779 sprintf(timestamp.ptr, "%.24s", p); | |
780 } | |
781 | |
782 ///version (DMDV1) { | |
783 /// if (mod && id == Id.FILE) | |
784 /// { | |
785 /// t.ustring = cast(ubyte*)(loc.filename ? loc.filename : mod.ident.toChars()); | |
786 /// goto Lstr; | |
787 /// } | |
788 /// else if (mod && id == Id.LINE) | |
789 /// { | |
790 /// t.value = TOK.TOKint64v; | |
791 /// t.uns64value = loc.linnum; | |
792 /// } | |
793 /// else | |
794 ///} | |
795 if (id == Id.DATE) | |
796 { | |
797 t.ustring = date.ptr; | |
798 goto Lstr; | |
799 } | |
800 else if (id == Id.TIME) | |
801 { | |
802 t.ustring = time.ptr; | |
803 goto Lstr; | |
804 } | |
805 else if (id == Id.VENDOR) | |
806 { | |
807 t.ustring = "Digital Mars D".ptr; | |
808 goto Lstr; | |
809 } | |
810 else if (id == Id.TIMESTAMP) | |
811 { | |
812 t.ustring = timestamp.ptr; | |
813 Lstr: | |
814 t.value = TOK.TOKstring; | |
815 Llen: | |
816 t.postfix = 0; | |
817 t.len = strlen(cast(char*)t.ustring); | |
818 } | |
819 else if (id == Id.VERSIONX) | |
820 { | |
821 uint major = 0; | |
822 uint minor = 0; | |
823 | |
824 foreach (char cc; global.version_[1..$]) | |
825 { | |
826 if (isdigit(cc)) | |
827 minor = minor * 10 + cc - '0'; | |
828 else if (cc == '.') | |
829 { | |
830 major = minor; | |
831 minor = 0; | |
832 } | |
833 else | |
834 break; | |
835 } | |
836 t.value = TOK.TOKint64v; | |
837 t.uns64value = major * 1000 + minor; | |
838 } | |
839 ///version (DMDV2) { | |
840 else if (id == Id.EOFX) | |
841 { | |
842 t.value = TOK.TOKeof; | |
843 // Advance scanner to end of file | |
844 while (!(*p == 0 || *p == 0x1A)) | |
845 p++; | |
846 } | |
847 ///} | |
848 } | |
849 //printf("t.value = %d\n",t.value); | |
850 return; | |
851 } | |
852 | |
853 case '/': | |
854 p++; | |
855 switch (*p) | |
856 { | |
857 case '=': | |
858 p++; | |
859 t.value = TOK.TOKdivass; | |
860 return; | |
861 | |
862 case '*': | |
863 p++; | |
864 linnum = loc.linnum; | |
865 while (1) | |
866 { | |
867 while (1) | |
868 { | |
869 ubyte c = *p; | |
870 switch (c) | |
871 { | |
872 case '/': | |
873 break; | |
874 | |
875 case '\n': | |
876 loc.linnum++; | |
877 p++; | |
878 continue; | |
879 | |
880 case '\r': | |
881 p++; | |
882 if (*p != '\n') | |
883 loc.linnum++; | |
884 continue; | |
885 | |
886 case 0: | |
887 case 0x1A: | |
888 error("unterminated /* */ comment"); | |
889 p = end; | |
890 t.value = TOK.TOKeof; | |
891 return; | |
892 | |
893 default: | |
894 if (c & 0x80) | |
895 { uint u = decodeUTF(); | |
896 if (u == PS || u == LS) | |
897 loc.linnum++; | |
898 } | |
899 p++; | |
900 continue; | |
901 } | |
902 break; | |
903 } | |
904 p++; | |
905 if (p[-2] == '*' && p - 3 != t.ptr) | |
906 break; | |
907 } | |
908 if (commentToken) | |
909 { | |
910 t.value = TOK.TOKcomment; | |
911 return; | |
912 } | |
913 else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) | |
914 { // if /** but not /**/ | |
915 getDocComment(t, lastLine == linnum); | |
916 } | |
917 continue; | |
918 | |
919 case '/': // do // style comments | |
920 linnum = loc.linnum; | |
921 while (1) | |
922 { ubyte c = *++p; | |
923 switch (c) | |
924 { | |
925 case '\n': | |
926 break; | |
927 | |
928 case '\r': | |
929 if (p[1] == '\n') | |
930 p++; | |
931 break; | |
932 | |
933 case 0: | |
934 case 0x1A: | |
935 if (commentToken) | |
936 { | |
937 p = end; | |
938 t.value = TOK.TOKcomment; | |
939 return; | |
940 } | |
941 if (doDocComment && t.ptr[2] == '/') | |
942 getDocComment(t, lastLine == linnum); | |
943 p = end; | |
944 t.value = TOK.TOKeof; | |
945 return; | |
946 | |
947 default: | |
948 if (c & 0x80) | |
949 { uint u = decodeUTF(); | |
950 if (u == PS || u == LS) | |
951 break; | |
952 } | |
953 continue; | |
954 } | |
955 break; | |
956 } | |
957 | |
958 if (commentToken) | |
959 { | |
960 p++; | |
961 loc.linnum++; | |
962 t.value = TOK.TOKcomment; | |
963 return; | |
964 } | |
965 if (doDocComment && t.ptr[2] == '/') | |
966 getDocComment(t, lastLine == linnum); | |
967 | |
968 p++; | |
969 loc.linnum++; | |
970 continue; | |
971 | |
972 case '+': | |
973 { | |
974 int nest; | |
975 | |
976 linnum = loc.linnum; | |
977 p++; | |
978 nest = 1; | |
979 while (1) | |
980 { ubyte c = *p; | |
981 switch (c) | |
982 { | |
983 case '/': | |
984 p++; | |
985 if (*p == '+') | |
986 { | |
987 p++; | |
988 nest++; | |
989 } | |
990 continue; | |
991 | |
992 case '+': | |
993 p++; | |
994 if (*p == '/') | |
995 { | |
996 p++; | |
997 if (--nest == 0) | |
998 break; | |
999 } | |
1000 continue; | |
1001 | |
1002 case '\r': | |
1003 p++; | |
1004 if (*p != '\n') | |
1005 loc.linnum++; | |
1006 continue; | |
1007 | |
1008 case '\n': | |
1009 loc.linnum++; | |
1010 p++; | |
1011 continue; | |
1012 | |
1013 case 0: | |
1014 case 0x1A: | |
1015 error("unterminated /+ +/ comment"); | |
1016 p = end; | |
1017 t.value = TOK.TOKeof; | |
1018 return; | |
1019 | |
1020 default: | |
1021 if (c & 0x80) | |
1022 { uint u = decodeUTF(); | |
1023 if (u == PS || u == LS) | |
1024 loc.linnum++; | |
1025 } | |
1026 p++; | |
1027 continue; | |
1028 } | |
1029 break; | |
1030 } | |
1031 if (commentToken) | |
1032 { | |
1033 t.value = TOK.TOKcomment; | |
1034 return; | |
1035 } | |
1036 if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) | |
1037 { // if /++ but not /++/ | |
1038 getDocComment(t, lastLine == linnum); | |
1039 } | |
1040 continue; | |
1041 } | |
1042 | |
1043 default: | |
1044 break; /// | |
1045 } | |
1046 t.value = TOK.TOKdiv; | |
1047 return; | |
1048 | |
1049 case '.': | |
1050 p++; | |
1051 if (isdigit(*p)) | |
1052 { /* Note that we don't allow ._1 and ._ as being | |
1053 * valid floating point numbers. | |
1054 */ | |
1055 p--; | |
1056 t.value = inreal(t); | |
1057 } | |
1058 else if (p[0] == '.') | |
1059 { | |
1060 if (p[1] == '.') | |
1061 { p += 2; | |
1062 t.value = TOK.TOKdotdotdot; | |
1063 } | |
1064 else | |
1065 { p++; | |
1066 t.value = TOK.TOKslice; | |
1067 } | |
1068 } | |
1069 else | |
1070 t.value = TOK.TOKdot; | |
1071 return; | |
1072 | |
1073 case '&': | |
1074 p++; | |
1075 if (*p == '=') | |
1076 { p++; | |
1077 t.value = TOK.TOKandass; | |
1078 } | |
1079 else if (*p == '&') | |
1080 { p++; | |
1081 t.value = TOK.TOKandand; | |
1082 } | |
1083 else | |
1084 t.value = TOK.TOKand; | |
1085 return; | |
1086 | |
1087 case '|': | |
1088 p++; | |
1089 if (*p == '=') | |
1090 { p++; | |
1091 t.value = TOK.TOKorass; | |
1092 } | |
1093 else if (*p == '|') | |
1094 { p++; | |
1095 t.value = TOK.TOKoror; | |
1096 } | |
1097 else | |
1098 t.value = TOK.TOKor; | |
1099 return; | |
1100 | |
1101 case '-': | |
1102 p++; | |
1103 if (*p == '=') | |
1104 { p++; | |
1105 t.value = TOK.TOKminass; | |
1106 } | |
1107 /// #if 0 | |
1108 /// else if (*p == '>') | |
1109 /// { p++; | |
1110 /// t.value = TOK.TOKarrow; | |
1111 /// } | |
1112 /// #endif | |
1113 else if (*p == '-') | |
1114 { p++; | |
1115 t.value = TOK.TOKminusminus; | |
1116 } | |
1117 else | |
1118 t.value = TOK.TOKmin; | |
1119 return; | |
1120 | |
1121 case '+': | |
1122 p++; | |
1123 if (*p == '=') | |
1124 { p++; | |
1125 t.value = TOK.TOKaddass; | |
1126 } | |
1127 else if (*p == '+') | |
1128 { p++; | |
1129 t.value = TOK.TOKplusplus; | |
1130 } | |
1131 else | |
1132 t.value = TOK.TOKadd; | |
1133 return; | |
1134 | |
1135 case '<': | |
1136 p++; | |
1137 if (*p == '=') | |
1138 { p++; | |
1139 t.value = TOK.TOKle; // <= | |
1140 } | |
1141 else if (*p == '<') | |
1142 { p++; | |
1143 if (*p == '=') | |
1144 { p++; | |
1145 t.value = TOK.TOKshlass; // <<= | |
1146 } | |
1147 else | |
1148 t.value = TOK.TOKshl; // << | |
1149 } | |
1150 else if (*p == '>') | |
1151 { p++; | |
1152 if (*p == '=') | |
1153 { p++; | |
1154 t.value = TOK.TOKleg; // <>= | |
1155 } | |
1156 else | |
1157 t.value = TOK.TOKlg; // <> | |
1158 } | |
1159 else | |
1160 t.value = TOK.TOKlt; // < | |
1161 return; | |
1162 | |
1163 case '>': | |
1164 p++; | |
1165 if (*p == '=') | |
1166 { p++; | |
1167 t.value = TOK.TOKge; // >= | |
1168 } | |
1169 else if (*p == '>') | |
1170 { p++; | |
1171 if (*p == '=') | |
1172 { p++; | |
1173 t.value = TOK.TOKshrass; // >>= | |
1174 } | |
1175 else if (*p == '>') | |
1176 { p++; | |
1177 if (*p == '=') | |
1178 { p++; | |
1179 t.value = TOK.TOKushrass; // >>>= | |
1180 } | |
1181 else | |
1182 t.value = TOK.TOKushr; // >>> | |
1183 } | |
1184 else | |
1185 t.value = TOK.TOKshr; // >> | |
1186 } | |
1187 else | |
1188 t.value = TOK.TOKgt; // > | |
1189 return; | |
1190 | |
1191 case '!': | |
1192 p++; | |
1193 if (*p == '=') | |
1194 { p++; | |
1195 if (*p == '=' && global.params.Dversion == 1) | |
1196 { p++; | |
1197 t.value = TOK.TOKnotidentity; // !== | |
1198 } | |
1199 else | |
1200 t.value = TOK.TOKnotequal; // != | |
1201 } | |
1202 else if (*p == '<') | |
1203 { p++; | |
1204 if (*p == '>') | |
1205 { p++; | |
1206 if (*p == '=') | |
1207 { p++; | |
1208 t.value = TOK.TOKunord; // !<>= | |
1209 } | |
1210 else | |
1211 t.value = TOK.TOKue; // !<> | |
1212 } | |
1213 else if (*p == '=') | |
1214 { p++; | |
1215 t.value = TOK.TOKug; // !<= | |
1216 } | |
1217 else | |
1218 t.value = TOK.TOKuge; // !< | |
1219 } | |
1220 else if (*p == '>') | |
1221 { p++; | |
1222 if (*p == '=') | |
1223 { p++; | |
1224 t.value = TOK.TOKul; // !>= | |
1225 } | |
1226 else | |
1227 t.value = TOK.TOKule; // !> | |
1228 } | |
1229 else | |
1230 t.value = TOK.TOKnot; // ! | |
1231 return; | |
1232 | |
1233 case '=': | |
1234 p++; | |
1235 if (*p == '=') | |
1236 { p++; | |
1237 if (*p == '=' && global.params.Dversion == 1) | |
1238 { p++; | |
1239 t.value = TOK.TOKidentity; // === | |
1240 } | |
1241 else | |
1242 t.value = TOK.TOKequal; // == | |
1243 } | |
1244 else | |
1245 t.value = TOK.TOKassign; // = | |
1246 return; | |
1247 | |
1248 case '~': | |
1249 p++; | |
1250 if (*p == '=') | |
1251 { p++; | |
1252 t.value = TOK.TOKcatass; // ~= | |
1253 } | |
1254 else | |
1255 t.value = TOK.TOKtilde; // ~ | |
1256 return; | |
1257 /* | |
1258 #define SINGLE(c,tok) case c: p++; t.value = tok; return; | |
1259 | |
1260 SINGLE('(', TOKlparen) | |
1261 SINGLE(')', TOKrparen) | |
1262 SINGLE('[', TOKlbracket) | |
1263 SINGLE(']', TOKrbracket) | |
1264 SINGLE('{', TOKlcurly) | |
1265 SINGLE('}', TOKrcurly) | |
1266 SINGLE('?', TOKquestion) | |
1267 SINGLE(',', TOKcomma) | |
1268 SINGLE(';', TOKsemicolon) | |
1269 SINGLE(':', TOKcolon) | |
1270 SINGLE('$', TOKdollar) | |
1271 SINGLE('@', TOKat) | |
1272 | |
1273 #undef SINGLE | |
1274 | |
1275 #define DOUBLE(c1,tok1,c2,tok2) \ | |
1276 case c1: \ | |
1277 p++; \ | |
1278 if (*p == c2) \ | |
1279 { p++; \ | |
1280 t.value = tok2; \ | |
1281 } \ | |
1282 else \ | |
1283 t.value = tok1; \ | |
1284 return; | |
1285 | |
1286 DOUBLE('*', TOKmul, '=', TOKmulass) | |
1287 DOUBLE('%', TOKmod, '=', TOKmodass) | |
1288 DOUBLE('^', TOKxor, '=', TOKxorass) | |
1289 | |
1290 #undef DOUBLE | |
1291 */ | |
1292 | |
1293 case '(': p++; t.value = TOK.TOKlparen; return; | |
1294 case ')': p++; t.value = TOK.TOKrparen; return; | |
1295 case '[': p++; t.value = TOK.TOKlbracket; return; | |
1296 case ']': p++; t.value = TOK.TOKrbracket; return; | |
1297 case '{': p++; t.value = TOK.TOKlcurly; return; | |
1298 case '}': p++; t.value = TOK.TOKrcurly; return; | |
1299 case '?': p++; t.value = TOK.TOKquestion; return; | |
1300 case ',': p++; t.value = TOK.TOKcomma; return; | |
1301 case ';': p++; t.value = TOK.TOKsemicolon; return; | |
1302 case ':': p++; t.value = TOK.TOKcolon; return; | |
1303 case '$': p++; t.value = TOK.TOKdollar; return; | |
1304 case '@': p++; t.value = TOK.TOKat; return; | |
1305 | |
1306 case '*': | |
1307 p++; | |
1308 if (*p == '=') { | |
1309 p++; | |
1310 t.value = TOK.TOKmulass; | |
1311 } else { | |
1312 t.value = TOK.TOKmul; | |
1313 } | |
1314 return; | |
1315 | |
1316 case '%': | |
1317 p++; | |
1318 if (*p == '=') { | |
1319 p++; | |
1320 t.value = TOK.TOKmodass; | |
1321 } else { | |
1322 t.value = TOK.TOKmod; | |
1323 } | |
1324 return; | |
1325 | |
1326 case '^': | |
1327 p++; | |
1328 if (*p == '=') { | |
1329 p++; | |
1330 t.value = TOK.TOKxorass; | |
1331 } else { | |
1332 t.value = TOK.TOKxor; | |
1333 } | |
1334 return; | |
1335 | |
1336 case '#': | |
1337 p++; | |
1338 pragma_(); | |
1339 continue; | |
1340 | |
1341 default: | |
1342 { ubyte c = *p; | |
1343 | |
1344 if (c & 0x80) | |
1345 { uint u = decodeUTF(); | |
1346 | |
1347 // Check for start of unicode identifier | |
1348 if (isUniAlpha(u)) | |
1349 goto case_ident; | |
1350 | |
1351 if (u == PS || u == LS) | |
1352 { | |
1353 loc.linnum++; | |
1354 p++; | |
1355 continue; | |
1356 } | |
1357 } | |
1358 if (isprint(c)) | |
1359 error("unsupported char '%c'", c); | |
1360 else | |
1361 error("unsupported char 0x%02x", c); | |
1362 p++; | |
1363 continue; | |
1364 } | |
1365 } | |
1366 } | |
1367 } | |
1368 | |
1369 Token* peek(Token* ct) | |
1370 { | |
1371 Token* t; | |
1372 | |
1373 if (ct.next) | |
1374 t = ct.next; | |
1375 else | |
1376 { | |
1377 t = new Token(); | |
1378 scan(t); | |
1379 t.next = null; | |
1380 ct.next = t; | |
1381 } | |
1382 return t; | |
1383 } | |
1384 | |
1385 Token* peekPastParen(Token* tk) | |
1386 { | |
1387 //printf("peekPastParen()\n"); | |
1388 int parens = 1; | |
1389 int curlynest = 0; | |
1390 while (1) | |
1391 { | |
1392 tk = peek(tk); | |
1393 //tk.print(); | |
1394 switch (tk.value) | |
1395 { | |
1396 case TOK.TOKlparen: | |
1397 parens++; | |
1398 continue; | |
1399 | |
1400 case TOK.TOKrparen: | |
1401 --parens; | |
1402 if (parens) | |
1403 continue; | |
1404 tk = peek(tk); | |
1405 break; | |
1406 | |
1407 case TOK.TOKlcurly: | |
1408 curlynest++; | |
1409 continue; | |
1410 | |
1411 case TOK.TOKrcurly: | |
1412 if (--curlynest >= 0) | |
1413 continue; | |
1414 break; | |
1415 | |
1416 case TOK.TOKsemicolon: | |
1417 if (curlynest) | |
1418 continue; | |
1419 break; | |
1420 | |
1421 case TOK.TOKeof: | |
1422 break; | |
1423 | |
1424 default: | |
1425 continue; | |
1426 } | |
1427 return tk; | |
1428 } | |
1429 } | |
1430 | |
1431 /******************************************* | |
1432 * Parse escape sequence. | |
1433 */ | |
1434 uint escapeSequence() | |
1435 { | |
1436 uint c = *p; | |
1437 | |
1438 version (TEXTUAL_ASSEMBLY_OUT) { | |
1439 return c; | |
1440 } | |
1441 int n; | |
1442 int ndigits; | |
1443 | |
1444 switch (c) | |
1445 { | |
1446 case '\'': | |
1447 case '"': | |
1448 case '?': | |
1449 case '\\': | |
1450 Lconsume: | |
1451 p++; | |
1452 break; | |
1453 | |
1454 case 'a': c = 7; goto Lconsume; | |
1455 case 'b': c = 8; goto Lconsume; | |
1456 case 'f': c = 12; goto Lconsume; | |
1457 case 'n': c = 10; goto Lconsume; | |
1458 case 'r': c = 13; goto Lconsume; | |
1459 case 't': c = 9; goto Lconsume; | |
1460 case 'v': c = 11; goto Lconsume; | |
1461 | |
1462 case 'u': | |
1463 ndigits = 4; | |
1464 goto Lhex; | |
1465 case 'U': | |
1466 ndigits = 8; | |
1467 goto Lhex; | |
1468 case 'x': | |
1469 ndigits = 2; | |
1470 Lhex: | |
1471 p++; | |
1472 c = *p; | |
1473 if (ishex(cast(ubyte)c)) | |
1474 { | |
1475 uint v; | |
1476 | |
1477 n = 0; | |
1478 v = 0; | |
1479 while (1) | |
1480 { | |
1481 if (isdigit(c)) | |
1482 c -= '0'; | |
1483 else if (islower(c)) | |
1484 c -= 'a' - 10; | |
1485 else | |
1486 c -= 'A' - 10; | |
1487 v = v * 16 + c; | |
1488 c = *++p; | |
1489 if (++n == ndigits) | |
1490 break; | |
1491 if (!ishex(cast(ubyte)c)) | |
1492 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); | |
1493 break; | |
1494 } | |
1495 } | |
1496 if (ndigits != 2 && !utf_isValidDchar(v)) | |
1497 { error("invalid UTF character \\U%08x", v); | |
1498 v = '?'; // recover with valid UTF character | |
1499 } | |
1500 c = v; | |
1501 } | |
1502 else | |
1503 error("undefined escape hex sequence \\%c\n",c); | |
1504 break; | |
1505 | |
1506 case '&': // named character entity | |
1507 for (ubyte* idstart = ++p; true; p++) | |
1508 { | |
1509 switch (*p) | |
1510 { | |
1511 case ';': | |
1512 c = HtmlNamedEntity(idstart, p - idstart); | |
1513 if (c == ~0) | |
1514 { | |
1515 error("unnamed character entity &%s;", idstart[0..(p - idstart)]); | |
1516 c = ' '; | |
1517 } | |
1518 p++; | |
1519 break; | |
1520 | |
1521 default: | |
1522 if (isalpha(*p) || | |
1523 (p != idstart + 1 && isdigit(*p))) | |
1524 continue; | |
1525 error("unterminated named entity"); | |
1526 break; | |
1527 } | |
1528 break; | |
1529 } | |
1530 break; | |
1531 | |
1532 case 0: | |
1533 case 0x1A: // end of file | |
1534 c = '\\'; | |
1535 break; | |
1536 | |
1537 default: | |
1538 if (isoctal(cast(ubyte)c)) | |
1539 { | |
1540 uint v; | |
1541 | |
1542 n = 0; | |
1543 v = 0; | |
1544 do | |
1545 { | |
1546 v = v * 8 + (c - '0'); | |
1547 c = *++p; | |
1548 } while (++n < 3 && isoctal(cast(ubyte)c)); | |
1549 c = v; | |
1550 if (c > 0xFF) | |
1551 error("0%03o is larger than a byte", c); | |
1552 } | |
1553 else | |
1554 error("undefined escape sequence \\%c\n",c); | |
1555 break; | |
1556 } | |
1557 return c; | |
1558 } | |
1559 | |
1560 TOK wysiwygStringConstant(Token* t, int tc) | |
1561 { | |
1562 assert(false); | |
1563 } | |
1564 | |
1565 TOK hexStringConstant(Token* t) | |
1566 { | |
1567 assert(false); | |
1568 } | |
1569 | |
1570 version (DMDV2) { | |
1571 TOK delimitedStringConstant(Token* t) | |
1572 { | |
1573 assert(false); | |
1574 } | |
1575 | |
1576 TOK tokenStringConstant(Token* t) | |
1577 { | |
1578 assert(false); | |
1579 } | |
1580 } | |
1581 TOK escapeStringConstant(Token* t, int wide) | |
1582 { | |
1583 uint c; | |
1584 Loc start = loc; | |
1585 | |
1586 p++; | |
1587 stringbuffer.reset(); | |
1588 while (true) | |
1589 { | |
1590 c = *p++; | |
1591 switch (c) | |
1592 { | |
1593 version (TEXTUAL_ASSEMBLY_OUT) { | |
1594 } else { | |
1595 case '\\': | |
1596 switch (*p) | |
1597 { | |
1598 case 'u': | |
1599 case 'U': | |
1600 case '&': | |
1601 c = escapeSequence(); | |
1602 stringbuffer.writeUTF8(c); | |
1603 continue; | |
1604 | |
1605 default: | |
1606 c = escapeSequence(); | |
1607 break; | |
1608 } | |
1609 break; | |
1610 } | |
1611 case '\n': | |
1612 loc.linnum++; | |
1613 break; | |
1614 | |
1615 case '\r': | |
1616 if (*p == '\n') | |
1617 continue; // ignore | |
1618 c = '\n'; // treat EndOfLine as \n character | |
1619 loc.linnum++; | |
1620 break; | |
1621 | |
1622 case '"': | |
1623 t.len = stringbuffer.offset; | |
1624 stringbuffer.writeByte(0); | |
1625 char* tmp = cast(char*)malloc(stringbuffer.offset); | |
1626 memcpy(tmp, stringbuffer.data, stringbuffer.offset); | |
1627 t.ustring = tmp; | |
1628 stringPostfix(t); | |
1629 return TOK.TOKstring; | |
1630 | |
1631 case 0: | |
1632 case 0x1A: | |
1633 p--; | |
1634 error("unterminated string constant starting at %s", start.toChars()); | |
1635 t.ustring = "".ptr; | |
1636 t.len = 0; | |
1637 t.postfix = 0; | |
1638 return TOK.TOKstring; | |
1639 | |
1640 default: | |
1641 if (c & 0x80) | |
1642 { | |
1643 p--; | |
1644 c = decodeUTF(); | |
1645 if (c == LS || c == PS) | |
1646 { c = '\n'; | |
1647 loc.linnum++; | |
1648 } | |
1649 p++; | |
1650 stringbuffer.writeUTF8(c); | |
1651 continue; | |
1652 } | |
1653 break; | |
1654 } | |
1655 stringbuffer.writeByte(c); | |
1656 } | |
1657 | |
1658 assert(false); | |
1659 } | |
1660 | |
1661 TOK charConstant(Token* t, int wide) | |
1662 { | |
1663 uint c; | |
1664 TOK tk = TOKcharv; | |
1665 | |
1666 //printf("Lexer.charConstant\n"); | |
1667 p++; | |
1668 c = *p++; | |
1669 switch (c) | |
1670 { | |
1671 version (TEXTUAL_ASSEMBLY_OUT) { | |
1672 } else { | |
1673 case '\\': | |
1674 switch (*p) | |
1675 { | |
1676 case 'u': | |
1677 t.uns64value = escapeSequence(); | |
1678 tk = TOKwcharv; | |
1679 break; | |
1680 | |
1681 case 'U': | |
1682 case '&': | |
1683 t.uns64value = escapeSequence(); | |
1684 tk = TOKdcharv; | |
1685 break; | |
1686 | |
1687 default: | |
1688 t.uns64value = escapeSequence(); | |
1689 break; | |
1690 } | |
1691 break; | |
1692 } | |
1693 case '\n': | |
1694 L1: | |
1695 loc.linnum++; | |
1696 case '\r': | |
1697 case 0: | |
1698 case 0x1A: | |
1699 case '\'': | |
1700 error("unterminated character constant"); | |
1701 return tk; | |
1702 | |
1703 default: | |
1704 if (c & 0x80) | |
1705 { | |
1706 p--; | |
1707 c = decodeUTF(); | |
1708 p++; | |
1709 if (c == LS || c == PS) | |
1710 goto L1; | |
1711 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) | |
1712 tk = TOKwcharv; | |
1713 else | |
1714 tk = TOKdcharv; | |
1715 } | |
1716 t.uns64value = c; | |
1717 break; | |
1718 } | |
1719 | |
1720 if (*p != '\'') | |
1721 { | |
1722 error("unterminated character constant"); | |
1723 return tk; | |
1724 } | |
1725 p++; | |
1726 return tk; | |
1727 } | |
1728 | |
1729 /*************************************** | |
1730 * Get postfix of string literal. | |
1731 */ | |
1732 void stringPostfix(Token* t) | |
1733 { | |
1734 switch (*p) | |
1735 { | |
1736 case 'c': | |
1737 case 'w': | |
1738 case 'd': | |
1739 t.postfix = *p; | |
1740 p++; | |
1741 break; | |
1742 | |
1743 default: | |
1744 t.postfix = 0; | |
1745 break; | |
1746 } | |
1747 } | |
1748 | |
1749 uint wchar_(uint u) | |
1750 { | |
1751 assert(false); | |
1752 } | |
1753 | |
1754 /************************************** | |
1755 * Read in a number. | |
1756 * If it's an integer, store it in tok.TKutok.Vlong. | |
1757 * integers can be decimal, octal or hex | |
1758 * Handle the suffixes U, UL, LU, L, etc. | |
1759 * If it's double, store it in tok.TKutok.Vdouble. | |
1760 * Returns: | |
1761 * TKnum | |
1762 * TKdouble,... | |
1763 */ | |
1764 | |
1765 TOK number(Token* t) | |
1766 { | |
1767 // We use a state machine to collect numbers | |
1768 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, | |
1769 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, | |
1770 STATE_hexh, STATE_error }; | |
1771 STATE state; | |
1772 | |
1773 enum FLAGS | |
1774 { | |
1775 FLAGS_undefined = 0, | |
1776 FLAGS_decimal = 1, // decimal | |
1777 FLAGS_unsigned = 2, // u or U suffix | |
1778 FLAGS_long = 4, // l or L suffix | |
1779 }; | |
1780 | |
1781 FLAGS flags = FLAGS.FLAGS_decimal; | |
1782 | |
1783 int i; | |
1784 int base; | |
1785 uint c; | |
1786 ubyte *start; | |
1787 TOK result; | |
1788 | |
1789 //printf("Lexer.number()\n"); | |
1790 state = STATE.STATE_initial; | |
1791 base = 0; | |
1792 stringbuffer.reset(); | |
1793 start = p; | |
1794 while (1) | |
1795 { | |
1796 c = *p; | |
1797 switch (state) | |
1798 { | |
1799 case STATE.STATE_initial: // opening state | |
1800 if (c == '0') | |
1801 state = STATE.STATE_0; | |
1802 else | |
1803 state = STATE.STATE_decimal; | |
1804 break; | |
1805 | |
1806 case STATE.STATE_0: | |
1807 flags = (flags & ~FLAGS.FLAGS_decimal); | |
1808 switch (c) | |
1809 { | |
1810 version (ZEROH) { | |
1811 case 'H': // 0h | |
1812 case 'h': | |
1813 goto hexh; | |
1814 } | |
1815 case 'X': | |
1816 case 'x': | |
1817 state = STATE.STATE_hex0; | |
1818 break; | |
1819 | |
1820 case '.': | |
1821 if (p[1] == '.') // .. is a separate token | |
1822 goto done; | |
1823 case 'i': | |
1824 case 'f': | |
1825 case 'F': | |
1826 goto real_; | |
1827 version (ZEROH) { | |
1828 case 'E': | |
1829 case 'e': | |
1830 goto case_hex; | |
1831 } | |
1832 case 'B': | |
1833 case 'b': | |
1834 state = STATE.STATE_binary0; | |
1835 break; | |
1836 | |
1837 case '0': case '1': case '2': case '3': | |
1838 case '4': case '5': case '6': case '7': | |
1839 state = STATE.STATE_octal; | |
1840 break; | |
1841 | |
1842 version (ZEROH) { | |
1843 case '8': case '9': case 'A': | |
1844 case 'C': case 'D': case 'F': | |
1845 case 'a': case 'c': case 'd': case 'f': | |
1846 case_hex: | |
1847 state = STATE.STATE_hexh; | |
1848 break; | |
1849 } | |
1850 case '_': | |
1851 state = STATE.STATE_octal; | |
1852 p++; | |
1853 continue; | |
1854 | |
1855 case 'L': | |
1856 if (p[1] == 'i') | |
1857 goto real_; | |
1858 goto done; | |
1859 | |
1860 default: | |
1861 goto done; | |
1862 } | |
1863 break; | |
1864 | |
1865 case STATE.STATE_decimal: // reading decimal number | |
1866 if (!isdigit(c)) | |
1867 { | |
1868 version (ZEROH) { | |
1869 if (ishex(c) | |
1870 || c == 'H' || c == 'h' | |
1871 ) | |
1872 goto hexh; | |
1873 } | |
1874 if (c == '_') // ignore embedded _ | |
1875 { p++; | |
1876 continue; | |
1877 } | |
1878 if (c == '.' && p[1] != '.') | |
1879 goto real_; | |
1880 else if (c == 'i' || c == 'f' || c == 'F' || | |
1881 c == 'e' || c == 'E') | |
1882 { | |
1883 real_: // It's a real number. Back up and rescan as a real | |
1884 p = start; | |
1885 return inreal(t); | |
1886 } | |
1887 else if (c == 'L' && p[1] == 'i') | |
1888 goto real_; | |
1889 goto done; | |
1890 } | |
1891 break; | |
1892 | |
1893 case STATE.STATE_hex0: // reading hex number | |
1894 case STATE.STATE_hex: | |
1895 if (! ishex(cast(ubyte)c)) | |
1896 { | |
1897 if (c == '_') // ignore embedded _ | |
1898 { p++; | |
1899 continue; | |
1900 } | |
1901 if (c == '.' && p[1] != '.') | |
1902 goto real_; | |
1903 if (c == 'P' || c == 'p' || c == 'i') | |
1904 goto real_; | |
1905 if (state == STATE.STATE_hex0) | |
1906 error("Hex digit expected, not '%c'", c); | |
1907 goto done; | |
1908 } | |
1909 state = STATE.STATE_hex; | |
1910 break; | |
1911 | |
1912 version (ZEROH) { | |
1913 hexh: | |
1914 state = STATE.STATE_hexh; | |
1915 case STATE.STATE_hexh: // parse numbers like 0FFh | |
1916 if (!ishex(c)) | |
1917 { | |
1918 if (c == 'H' || c == 'h') | |
1919 { | |
1920 p++; | |
1921 base = 16; | |
1922 goto done; | |
1923 } | |
1924 else | |
1925 { | |
1926 // Check for something like 1E3 or 0E24 | |
1927 if (memchr(cast(char*)stringbuffer.data, 'E', stringbuffer.offset) || | |
1928 memchr(cast(char*)stringbuffer.data, 'e', stringbuffer.offset)) | |
1929 goto real_; | |
1930 error("Hex digit expected, not '%c'", c); | |
1931 goto done; | |
1932 } | |
1933 } | |
1934 break; | |
1935 } | |
1936 | |
1937 case STATE.STATE_octal: // reading octal number | |
1938 case STATE.STATE_octale: // reading octal number with non-octal digits | |
1939 if (!isoctal(cast(ubyte)c)) | |
1940 { | |
1941 version (ZEROH) { | |
1942 if (ishex(c) | |
1943 || c == 'H' || c == 'h' | |
1944 ) | |
1945 goto hexh; | |
1946 } | |
1947 if (c == '_') // ignore embedded _ | |
1948 { p++; | |
1949 continue; | |
1950 } | |
1951 if (c == '.' && p[1] != '.') | |
1952 goto real_; | |
1953 if (c == 'i') | |
1954 goto real_; | |
1955 if (isdigit(c)) | |
1956 { | |
1957 state = STATE.STATE_octale; | |
1958 } | |
1959 else | |
1960 goto done; | |
1961 } | |
1962 break; | |
1963 | |
1964 case STATE.STATE_binary0: // starting binary number | |
1965 case STATE.STATE_binary: // reading binary number | |
1966 if (c != '0' && c != '1') | |
1967 { | |
1968 version (ZEROH) { | |
1969 if (ishex(c) | |
1970 || c == 'H' || c == 'h' | |
1971 ) | |
1972 goto hexh; | |
1973 } | |
1974 if (c == '_') // ignore embedded _ | |
1975 { p++; | |
1976 continue; | |
1977 } | |
1978 if (state == STATE.STATE_binary0) | |
1979 { error("binary digit expected"); | |
1980 state = STATE.STATE_error; | |
1981 break; | |
1982 } | |
1983 else | |
1984 goto done; | |
1985 } | |
1986 state = STATE.STATE_binary; | |
1987 break; | |
1988 | |
1989 case STATE.STATE_error: // for error recovery | |
1990 if (!isdigit(c)) // scan until non-digit | |
1991 goto done; | |
1992 break; | |
1993 | |
1994 default: | |
1995 assert(0); | |
1996 } | |
1997 stringbuffer.writeByte(c); | |
1998 p++; | |
1999 } | |
2000 done: | |
2001 stringbuffer.writeByte(0); // terminate string | |
2002 if (state == STATE.STATE_octale) | |
2003 error("Octal digit expected"); | |
2004 | |
2005 ulong n; // unsigned >=64 bit integer type | |
2006 | |
2007 if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0)) | |
2008 n = stringbuffer.data[0] - '0'; | |
2009 else | |
2010 { | |
2011 // Convert string to integer | |
2012 version (__DMC__) { | |
2013 errno = 0; | |
2014 n = strtoull(cast(char*)stringbuffer.data,null,base); | |
2015 if (errno == ERANGE) | |
2016 error("integer overflow"); | |
2017 } else { | |
2018 // Not everybody implements strtoull() | |
2019 char* p = cast(char*)stringbuffer.data; | |
2020 int r = 10, d; | |
2021 | |
2022 if (*p == '0') | |
2023 { | |
2024 if (p[1] == 'x' || p[1] == 'X') | |
2025 p += 2, r = 16; | |
2026 else if (p[1] == 'b' || p[1] == 'B') | |
2027 p += 2, r = 2; | |
2028 else if (isdigit(p[1])) | |
2029 p += 1, r = 8; | |
2030 } | |
2031 | |
2032 n = 0; | |
2033 while (1) | |
2034 { | |
2035 if (*p >= '0' && *p <= '9') | |
2036 d = *p - '0'; | |
2037 else if (*p >= 'a' && *p <= 'z') | |
2038 d = *p - 'a' + 10; | |
2039 else if (*p >= 'A' && *p <= 'Z') | |
2040 d = *p - 'A' + 10; | |
2041 else | |
2042 break; | |
2043 if (d >= r) | |
2044 break; | |
2045 ulong n2 = n * r; | |
2046 //printf("n2 / r = %llx, n = %llx\n", n2/r, n); | |
2047 if (n2 / r != n || n2 + d < n) | |
2048 { | |
2049 error ("integer overflow"); | |
2050 break; | |
2051 } | |
2052 | |
2053 n = n2 + d; | |
2054 p++; | |
2055 } | |
2056 } | |
2057 if (n.sizeof > 8 && | |
2058 n > 0xFFFFFFFFFFFFFFFF) // if n needs more than 64 bits | |
2059 error("integer overflow"); | |
2060 } | |
2061 | |
2062 // Parse trailing 'u', 'U', 'l' or 'L' in any combination | |
2063 while (1) | |
2064 { FLAGS f; | |
2065 | |
2066 switch (*p) | |
2067 { case 'U': | |
2068 case 'u': | |
2069 f = FLAGS.FLAGS_unsigned; | |
2070 goto L1; | |
2071 | |
2072 case 'l': | |
2073 if (1 || !global.params.useDeprecated) | |
2074 error("'l' suffix is deprecated, use 'L' instead"); | |
2075 case 'L': | |
2076 f = FLAGS.FLAGS_long; | |
2077 L1: | |
2078 p++; | |
2079 if (flags & f) | |
2080 error("unrecognized token"); | |
2081 flags = (flags | f); | |
2082 continue; | |
2083 default: | |
2084 break; | |
2085 } | |
2086 break; | |
2087 } | |
2088 | |
2089 switch (flags) | |
2090 { | |
2091 case FLAGS.FLAGS_undefined: | |
2092 /* Octal or Hexadecimal constant. | |
2093 * First that fits: int, uint, long, ulong | |
2094 */ | |
2095 if (n & 0x8000000000000000) | |
2096 result = TOK.TOKuns64v; | |
2097 else if (n & 0xFFFFFFFF00000000) | |
2098 result = TOK.TOKint64v; | |
2099 else if (n & 0x80000000) | |
2100 result = TOK.TOKuns32v; | |
2101 else | |
2102 result = TOK.TOKint32v; | |
2103 break; | |
2104 | |
2105 case FLAGS.FLAGS_decimal: | |
2106 /* First that fits: int, long, long long | |
2107 */ | |
2108 if (n & 0x8000000000000000) | |
2109 { error("signed integer overflow"); | |
2110 result = TOK.TOKuns64v; | |
2111 } | |
2112 else if (n & 0xFFFFFFFF80000000) | |
2113 result = TOK.TOKint64v; | |
2114 else | |
2115 result = TOK.TOKint32v; | |
2116 break; | |
2117 | |
2118 case FLAGS.FLAGS_unsigned: | |
2119 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned: | |
2120 /* First that fits: uint, ulong | |
2121 */ | |
2122 if (n & 0xFFFFFFFF00000000) | |
2123 result = TOK.TOKuns64v; | |
2124 else | |
2125 result = TOK.TOKuns32v; | |
2126 break; | |
2127 | |
2128 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long: | |
2129 if (n & 0x8000000000000000) | |
2130 { error("signed integer overflow"); | |
2131 result = TOK.TOKuns64v; | |
2132 } | |
2133 else | |
2134 result = TOK.TOKint64v; | |
2135 break; | |
2136 | |
2137 case FLAGS.FLAGS_long: | |
2138 if (n & 0x8000000000000000) | |
2139 result = TOK.TOKuns64v; | |
2140 else | |
2141 result = TOK.TOKint64v; | |
2142 break; | |
2143 | |
2144 case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: | |
2145 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: | |
2146 result = TOK.TOKuns64v; | |
2147 break; | |
2148 | |
2149 default: | |
2150 debug { | |
2151 printf("%x\n",flags); | |
2152 } | |
2153 assert(0); | |
2154 } | |
2155 t.uns64value = n; | |
2156 return result; | |
2157 } | |
2158 | |
2159 /************************************** | |
2160 * Read in characters, converting them to real. | |
2161 * Bugs: | |
2162 * Exponent overflow not detected. | |
2163 * Too much requested precision is not detected. | |
2164 */ | |
2165 TOK inreal(Token* t) | |
2166 in | |
2167 { | |
2168 assert(*p == '.' || isdigit(*p)); | |
2169 } | |
2170 out (result) | |
2171 { | |
2172 switch (result) | |
2173 { | |
2174 case TOKfloat32v: | |
2175 case TOKfloat64v: | |
2176 case TOKfloat80v: | |
2177 case TOKimaginary32v: | |
2178 case TOKimaginary64v: | |
2179 case TOKimaginary80v: | |
2180 break; | |
2181 | |
2182 default: | |
2183 assert(0); | |
2184 } | |
2185 } | |
2186 body | |
2187 { | |
2188 int dblstate; | |
2189 uint c; | |
2190 char hex; // is this a hexadecimal-floating-constant? | |
2191 TOK result; | |
2192 | |
2193 //printf("Lexer.inreal()\n"); | |
2194 stringbuffer.reset(); | |
2195 dblstate = 0; | |
2196 hex = 0; | |
2197 Lnext: | |
2198 while (true) | |
2199 { | |
2200 // Get next char from input | |
2201 c = *p++; | |
2202 //printf("dblstate = %d, c = '%c'\n", dblstate, c); | |
2203 while (true) | |
2204 { | |
2205 switch (dblstate) | |
2206 { | |
2207 case 0: // opening state | |
2208 if (c == '0') | |
2209 dblstate = 9; | |
2210 else if (c == '.') | |
2211 dblstate = 3; | |
2212 else | |
2213 dblstate = 1; | |
2214 break; | |
2215 | |
2216 case 9: | |
2217 dblstate = 1; | |
2218 if (c == 'X' || c == 'x') | |
2219 { | |
2220 hex++; | |
2221 break; | |
2222 } | |
2223 case 1: // digits to left of . | |
2224 case 3: // digits to right of . | |
2225 case 7: // continuing exponent digits | |
2226 if (!isdigit(c) && !(hex && isxdigit(c))) | |
2227 { | |
2228 if (c == '_') | |
2229 goto Lnext; // ignore embedded '_' | |
2230 dblstate++; | |
2231 continue; | |
2232 } | |
2233 break; | |
2234 | |
2235 case 2: // no more digits to left of . | |
2236 if (c == '.') | |
2237 { | |
2238 dblstate++; | |
2239 break; | |
2240 } | |
2241 case 4: // no more digits to right of . | |
2242 if ((c == 'E' || c == 'e') || | |
2243 hex && (c == 'P' || c == 'p')) | |
2244 { | |
2245 dblstate = 5; | |
2246 hex = 0; // exponent is always decimal | |
2247 break; | |
2248 } | |
2249 if (hex) | |
2250 error("binary-exponent-part required"); | |
2251 goto done; | |
2252 | |
2253 case 5: // looking immediately to right of E | |
2254 dblstate++; | |
2255 if (c == '-' || c == '+') | |
2256 break; | |
2257 case 6: // 1st exponent digit expected | |
2258 if (!isdigit(c)) | |
2259 error("exponent expected"); | |
2260 dblstate++; | |
2261 break; | |
2262 | |
2263 case 8: // past end of exponent digits | |
2264 goto done; | |
2265 } | |
2266 break; | |
2267 } | |
2268 stringbuffer.writeByte(c); | |
2269 } | |
2270 done: | |
2271 p--; | |
2272 | |
2273 stringbuffer.writeByte(0); | |
2274 | |
2275 version (_WIN32) { /// && __DMC__ | |
2276 char* save = __locale_decpoint; | |
2277 __locale_decpoint = cast(char*)".".ptr; | |
2278 } | |
2279 t.float80value = strtold(cast(char*)stringbuffer.data, null); | |
2280 | |
2281 errno = 0; | |
2282 switch (*p) | |
2283 { | |
2284 case 'F': | |
2285 case 'f': | |
2286 strtof(cast(char*)stringbuffer.data, null); | |
2287 result = TOKfloat32v; | |
2288 p++; | |
2289 break; | |
2290 | |
2291 default: | |
2292 strtod(cast(char*)stringbuffer.data, null); | |
2293 result = TOKfloat64v; | |
2294 break; | |
2295 | |
2296 case 'l': | |
2297 if (!global.params.useDeprecated) | |
2298 error("'l' suffix is deprecated, use 'L' instead"); | |
2299 case 'L': | |
2300 result = TOKfloat80v; | |
2301 p++; | |
2302 break; | |
2303 } | |
2304 if (*p == 'i' || *p == 'I') | |
2305 { | |
2306 if (!global.params.useDeprecated && *p == 'I') | |
2307 error("'I' suffix is deprecated, use 'i' instead"); | |
2308 p++; | |
2309 switch (result) | |
2310 { | |
2311 case TOKfloat32v: | |
2312 result = TOKimaginary32v; | |
2313 break; | |
2314 case TOKfloat64v: | |
2315 result = TOKimaginary64v; | |
2316 break; | |
2317 case TOKfloat80v: | |
2318 result = TOKimaginary80v; | |
2319 break; | |
2320 } | |
2321 } | |
2322 | |
2323 version (_WIN32) { ///&& __DMC__ | |
2324 __locale_decpoint = save; | |
2325 } | |
2326 if (errno == ERANGE) | |
2327 error("number is not representable"); | |
2328 | |
2329 return result; | |
2330 } | |
2331 | |
2332 void error(T...)(string format, T t) | |
2333 { | |
2334 error(this.loc, format, t); | |
2335 } | |
2336 | |
2337 void error(T...)(Loc loc, string format, T t) | |
2338 { | |
2339 if (mod && !global.gag) | |
2340 { | |
2341 string p = loc.toChars(); | |
2342 if (p.length != 0) | |
2343 writef("%s: ", p); | |
2344 | |
2345 writefln(format, t); | |
2346 | |
2347 if (global.errors >= 20) // moderate blizzard of cascading messages | |
2348 fatal(); | |
2349 } | |
2350 | |
2351 global.errors++; | |
2352 } | |
2353 | |
2354 void pragma_() | |
2355 { | |
2356 assert(false); | |
2357 } | |
2358 | |
2359 uint decodeUTF() | |
2360 { | |
2361 assert(false); | |
2362 } | |
2363 | |
2364 void getDocComment(Token* t, uint lineComment) | |
2365 { | |
2366 assert(false); | |
2367 } | |
2368 | |
2369 static bool isValidIdentifier(string p) | |
2370 { | |
2371 if (p.length == 0) { | |
2372 return false; | |
2373 } | |
2374 | |
2375 if (p[0] >= '0' && p[0] <= '9') { // beware of isdigit() on signed chars | |
2376 return false; | |
2377 } | |
2378 | |
2379 size_t idx = 0; | |
2380 while (idx < p.length) | |
2381 { | |
2382 dchar dc; | |
2383 | |
2384 if (utf_decodeChar(p, &idx, &dc) !is null) { | |
2385 return false; | |
2386 } | |
2387 | |
2388 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) { | |
2389 return false; | |
2390 } | |
2391 } | |
2392 | |
2393 return true; | |
2394 } | |
2395 | |
2396 /// TODO: reimplement based on strings | |
2397 static ubyte* combineComments(ubyte* c1, ubyte* c2) | |
2398 { | |
2399 //printf("Lexer.combineComments('%s', '%s')\n", c1, c2); | |
2400 | |
2401 ubyte* c = c2; | |
2402 | |
2403 if (c1) | |
2404 { | |
2405 c = c1; | |
2406 if (c2) | |
2407 { | |
2408 size_t len1 = strlen(cast(char*)c1); | |
2409 size_t len2 = strlen(cast(char*)c2); | |
2410 | |
2411 c = cast(ubyte*)malloc(len1 + 1 + len2 + 1); | |
2412 memcpy(c, c1, len1); | |
2413 if (len1 && c1[len1 - 1] != '\n') | |
2414 { | |
2415 c[len1] = '\n'; | |
2416 len1++; | |
2417 } | |
2418 memcpy(c + len1, c2, len2); | |
2419 c[len1 + len2] = 0; | |
2420 } | |
2421 } | |
2422 | |
2423 return c; | |
2424 } | |
2425 } |