Mercurial > projects > ddmd
annotate dmd/Lexer.d @ 130:60bb0fe4563e
dmdfe 2.037 first main iteration
author | Eldar Insafutdinov <e.insafutdinov@gmail.com> |
---|---|
date | Thu, 09 Sep 2010 22:51:44 +0100 |
parents | e28b18c23469 |
children | af1bebfd96a4 |
rev | line source |
---|---|
0 | 1 module dmd.Lexer; |
2 | |
114 | 3 import dmd.common; |
0 | 4 import dmd.StringTable; |
5 import dmd.OutBuffer; | |
6 import dmd.Token; | |
7 import dmd.Loc; | |
8 import dmd.Module; | |
9 import dmd.Identifier; | |
10 import dmd.TOK; | |
11 import dmd.Keyword; | |
12 import dmd.StringValue; | |
13 import dmd.Global; | |
14 import dmd.Util; | |
15 import dmd.Id; | |
16 import dmd.Dchar; | |
17 import dmd.Utf; | |
18 | |
19 import std.stdio : writeln; | |
20 | |
4 | 21 import core.memory; |
2 | 22 |
0 | 23 import core.stdc.ctype; |
24 import core.stdc.stdlib; | |
25 import core.stdc.string; | |
26 import core.stdc.stdio; | |
27 import core.stdc.time; | |
28 import core.stdc.errno; | |
29 | |
30 enum LS = 0x2028; // UTF line separator | |
31 enum PS = 0x2029; // UTF paragraph separator | |
32 | |
33 extern (C) extern | |
34 { | |
35 __gshared char* __locale_decpoint; | |
36 } | |
37 | |
38 int isUniAlpha(uint u) | |
39 { | |
40 assert(false); | |
41 } | |
42 | |
43 class Lexer | |
44 { | |
45 static StringTable stringtable; | |
46 static OutBuffer stringbuffer; | |
47 static Token* freelist; | |
48 | |
49 Loc loc; // for error messages | |
50 | |
51 ubyte* base; // pointer to start of buffer | |
52 ubyte* end; // past end of buffer | |
53 ubyte* p; // current character | |
54 Token token; | |
55 Module mod; | |
56 int doDocComment; // collect doc comment information | |
57 int anyToken; // !=0 means seen at least one token | |
58 int commentToken; // !=0 means comments are TOKcomment's | |
59 | |
60 static this() | |
61 { | |
62 stringtable = new StringTable(); | |
63 stringbuffer = new OutBuffer(); | |
64 } | |
65 | |
66 static ~this() | |
67 { | |
34 | 68 //delete stringtable; |
0 | 69 } |
70 | |
71 this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken) | |
72 { | |
73 loc = Loc(mod, 1); | |
74 | |
75 memset(&token,0,token.sizeof); | |
76 this.base = base; | |
77 this.end = base + endoffset; | |
78 p = base + begoffset; | |
79 this.mod = mod; | |
80 this.doDocComment = doDocComment; | |
81 this.anyToken = 0; | |
82 this.commentToken = commentToken; | |
83 //initKeywords(); | |
84 | |
85 /* If first line starts with '#!', ignore the line | |
86 */ | |
87 | |
88 if (p[0] == '#' && p[1] =='!') | |
89 { | |
90 p += 2; | |
91 while (1) | |
92 { | |
93 ubyte c = *p; | |
94 switch (c) | |
95 { | |
96 case '\n': | |
97 p++; | |
98 break; | |
99 | |
100 case '\r': | |
101 p++; | |
102 if (*p == '\n') | |
103 p++; | |
104 break; | |
105 | |
106 case 0: | |
107 case 0x1A: | |
108 break; | |
109 | |
110 default: | |
111 if (c & 0x80) | |
112 { | |
113 uint u = decodeUTF(); | |
114 if (u == PS || u == LS) | |
115 break; | |
116 } | |
117 p++; | |
118 continue; | |
119 } | |
120 break; | |
121 } | |
122 loc.linnum = 2; | |
123 } | |
124 } | |
125 | |
126 version (DMDV2) { | |
127 static Keyword[] keywords = | |
128 [ | |
129 // { "", TOK }, | |
130 | |
131 { "this", TOK.TOKthis }, | |
132 { "super", TOK.TOKsuper }, | |
133 { "assert", TOK.TOKassert }, | |
134 { "null", TOK.TOKnull }, | |
135 { "true", TOK.TOKtrue }, | |
136 { "false", TOK.TOKfalse }, | |
137 { "cast", TOK.TOKcast }, | |
138 { "new", TOK.TOKnew }, | |
139 { "delete", TOK.TOKdelete }, | |
140 { "throw", TOK.TOKthrow }, | |
141 { "module", TOK.TOKmodule }, | |
142 { "pragma", TOK.TOKpragma }, | |
143 { "typeof", TOK.TOKtypeof }, | |
144 { "typeid", TOK.TOKtypeid }, | |
145 | |
146 { "template", TOK.TOKtemplate }, | |
147 | |
148 { "void", TOK.TOKvoid }, | |
149 { "byte", TOK.TOKint8 }, | |
150 { "ubyte", TOK.TOKuns8 }, | |
151 { "short", TOK.TOKint16 }, | |
152 { "ushort", TOK.TOKuns16 }, | |
153 { "int", TOK.TOKint32 }, | |
154 { "uint", TOK.TOKuns32 }, | |
155 { "long", TOK.TOKint64 }, | |
156 { "ulong", TOK.TOKuns64 }, | |
157 { "cent", TOK.TOKcent, }, | |
158 { "ucent", TOK.TOKucent, }, | |
159 { "float", TOK.TOKfloat32 }, | |
160 { "double", TOK.TOKfloat64 }, | |
161 { "real", TOK.TOKfloat80 }, | |
162 | |
163 { "bool", TOK.TOKbool }, | |
164 { "char", TOK.TOKchar }, | |
165 { "wchar", TOK.TOKwchar }, | |
166 { "dchar", TOK.TOKdchar }, | |
167 | |
168 { "ifloat", TOK.TOKimaginary32 }, | |
169 { "idouble", TOK.TOKimaginary64 }, | |
170 { "ireal", TOK.TOKimaginary80 }, | |
171 | |
172 { "cfloat", TOK.TOKcomplex32 }, | |
173 { "cdouble", TOK.TOKcomplex64 }, | |
174 { "creal", TOK.TOKcomplex80 }, | |
175 | |
176 { "delegate", TOK.TOKdelegate }, | |
177 { "function", TOK.TOKfunction }, | |
178 | |
179 { "is", TOK.TOKis }, | |
180 { "if", TOK.TOKif }, | |
181 { "else", TOK.TOKelse }, | |
182 { "while", TOK.TOKwhile }, | |
183 { "for", TOK.TOKfor }, | |
184 { "do", TOK.TOKdo }, | |
185 { "switch", TOK.TOKswitch }, | |
186 { "case", TOK.TOKcase }, | |
187 { "default", TOK.TOKdefault }, | |
188 { "break", TOK.TOKbreak }, | |
189 { "continue", TOK.TOKcontinue }, | |
190 { "synchronized", TOK.TOKsynchronized }, | |
191 { "return", TOK.TOKreturn }, | |
192 { "goto", TOK.TOKgoto }, | |
193 { "try", TOK.TOKtry }, | |
194 { "catch", TOK.TOKcatch }, | |
195 { "finally", TOK.TOKfinally }, | |
196 { "with", TOK.TOKwith }, | |
197 { "asm", TOK.TOKasm }, | |
198 { "foreach", TOK.TOKforeach }, | |
199 { "foreach_reverse", TOK.TOKforeach_reverse }, | |
200 { "scope", TOK.TOKscope }, | |
201 | |
202 { "struct", TOK.TOKstruct }, | |
203 { "class", TOK.TOKclass }, | |
204 { "interface", TOK.TOKinterface }, | |
205 { "union", TOK.TOKunion }, | |
206 { "enum", TOK.TOKenum }, | |
207 { "import", TOK.TOKimport }, | |
208 { "mixin", TOK.TOKmixin }, | |
209 { "static", TOK.TOKstatic }, | |
210 { "final", TOK.TOKfinal }, | |
211 { "const", TOK.TOKconst }, | |
212 { "typedef", TOK.TOKtypedef }, | |
213 { "alias", TOK.TOKalias }, | |
214 { "override", TOK.TOKoverride }, | |
215 { "abstract", TOK.TOKabstract }, | |
216 { "volatile", TOK.TOKvolatile }, | |
217 { "debug", TOK.TOKdebug }, | |
218 { "deprecated", TOK.TOKdeprecated }, | |
219 { "in", TOK.TOKin }, | |
220 { "out", TOK.TOKout }, | |
221 { "inout", TOK.TOKinout }, | |
222 { "lazy", TOK.TOKlazy }, | |
223 { "auto", TOK.TOKauto }, | |
224 | |
225 { "align", TOK.TOKalign }, | |
226 { "extern", TOK.TOKextern }, | |
227 { "private", TOK.TOKprivate }, | |
228 { "package", TOK.TOKpackage }, | |
229 { "protected", TOK.TOKprotected }, | |
230 { "public", TOK.TOKpublic }, | |
231 { "export", TOK.TOKexport }, | |
232 | |
233 { "body", TOK.TOKbody }, | |
234 { "invariant", TOK.TOKinvariant }, | |
235 { "unittest", TOK.TOKunittest }, | |
236 { "version", TOK.TOKversion }, | |
237 //{ "manifest", TOK.TOKmanifest }, | |
238 | |
239 // Added after 1.0 | |
240 { "ref", TOK.TOKref }, | |
241 { "macro", TOK.TOKmacro }, | |
242 { "pure", TOK.TOKpure }, | |
243 { "nothrow", TOK.TOKnothrow }, | |
244 { "__thread", TOK.TOKtls }, | |
245 { "__gshared", TOK.TOKgshared }, | |
246 { "__traits", TOK.TOKtraits }, | |
247 { "__overloadset", TOK.TOKoverloadset }, | |
248 { "__FILE__", TOK.TOKfile }, | |
249 { "__LINE__", TOK.TOKline }, | |
250 { "shared", TOK.TOKshared }, | |
251 { "immutable", TOK.TOKimmutable }, | |
252 ]; | |
253 } else { | |
254 static Keyword[] keywords = | |
255 [ | |
256 // { "", TOK }, | |
257 | |
258 { "this", TOK.TOKthis }, | |
259 { "super", TOK.TOKsuper }, | |
260 { "assert", TOK.TOKassert }, | |
261 { "null", TOK.TOKnull }, | |
262 { "true", TOK.TOKtrue }, | |
263 { "false", TOK.TOKfalse }, | |
264 { "cast", TOK.TOKcast }, | |
265 { "new", TOK.TOKnew }, | |
266 { "delete", TOK.TOKdelete }, | |
267 { "throw", TOK.TOKthrow }, | |
268 { "module", TOK.TOKmodule }, | |
269 { "pragma", TOK.TOKpragma }, | |
270 { "typeof", TOK.TOKtypeof }, | |
271 { "typeid", TOK.TOKtypeid }, | |
272 | |
273 { "template", TOK.TOKtemplate }, | |
274 | |
275 { "void", TOK.TOKvoid }, | |
276 { "byte", TOK.TOKint8 }, | |
277 { "ubyte", TOK.TOKuns8 }, | |
278 { "short", TOK.TOKint16 }, | |
279 { "ushort", TOK.TOKuns16 }, | |
280 { "int", TOK.TOKint32 }, | |
281 { "uint", TOK.TOKuns32 }, | |
282 { "long", TOK.TOKint64 }, | |
283 { "ulong", TOK.TOKuns64 }, | |
284 { "cent", TOK.TOKcent, }, | |
285 { "ucent", TOK.TOKucent, }, | |
286 { "float", TOK.TOKfloat32 }, | |
287 { "double", TOK.TOKfloat64 }, | |
288 { "real", TOK.TOKfloat80 }, | |
289 | |
290 { "bool", TOK.TOKbool }, | |
291 { "char", TOK.TOKchar }, | |
292 { "wchar", TOK.TOKwchar }, | |
293 { "dchar", TOK.TOKdchar }, | |
294 | |
295 { "ifloat", TOK.TOKimaginary32 }, | |
296 { "idouble", TOK.TOKimaginary64 }, | |
297 { "ireal", TOK.TOKimaginary80 }, | |
298 | |
299 { "cfloat", TOK.TOKcomplex32 }, | |
300 { "cdouble", TOK.TOKcomplex64 }, | |
301 { "creal", TOK.TOKcomplex80 }, | |
302 | |
303 { "delegate", TOK.TOKdelegate }, | |
304 { "function", TOK.TOKfunction }, | |
305 | |
306 { "is", TOK.TOKis }, | |
307 { "if", TOK.TOKif }, | |
308 { "else", TOK.TOKelse }, | |
309 { "while", TOK.TOKwhile }, | |
310 { "for", TOK.TOKfor }, | |
311 { "do", TOK.TOKdo }, | |
312 { "switch", TOK.TOKswitch }, | |
313 { "case", TOK.TOKcase }, | |
314 { "default", TOK.TOKdefault }, | |
315 { "break", TOK.TOKbreak }, | |
316 { "continue", TOK.TOKcontinue }, | |
317 { "synchronized", TOK.TOKsynchronized }, | |
318 { "return", TOK.TOKreturn }, | |
319 { "goto", TOK.TOKgoto }, | |
320 { "try", TOK.TOKtry }, | |
321 { "catch", TOK.TOKcatch }, | |
322 { "finally", TOK.TOKfinally }, | |
323 { "with", TOK.TOKwith }, | |
324 { "asm", TOK.TOKasm }, | |
325 { "foreach", TOK.TOKforeach }, | |
326 { "foreach_reverse", TOK.TOKforeach_reverse }, | |
327 { "scope", TOK.TOKscope }, | |
328 | |
329 { "struct", TOK.TOKstruct }, | |
330 { "class", TOK.TOKclass }, | |
331 { "interface", TOK.TOKinterface }, | |
332 { "union", TOK.TOKunion }, | |
333 { "enum", TOK.TOKenum }, | |
334 { "import", TOK.TOKimport }, | |
335 { "mixin", TOK.TOKmixin }, | |
336 { "static", TOK.TOKstatic }, | |
337 { "final", TOK.TOKfinal }, | |
338 { "const", TOK.TOKconst }, | |
339 { "typedef", TOK.TOKtypedef }, | |
340 { "alias", TOK.TOKalias }, | |
341 { "override", TOK.TOKoverride }, | |
342 { "abstract", TOK.TOKabstract }, | |
343 { "volatile", TOK.TOKvolatile }, | |
344 { "debug", TOK.TOKdebug }, | |
345 { "deprecated", TOK.TOKdeprecated }, | |
346 { "in", TOK.TOKin }, | |
347 { "out", TOK.TOKout }, | |
348 { "inout", TOK.TOKinout }, | |
349 { "lazy", TOK.TOKlazy }, | |
350 { "auto", TOK.TOKauto }, | |
351 | |
352 { "align", TOK.TOKalign }, | |
353 { "extern", TOK.TOKextern }, | |
354 { "private", TOK.TOKprivate }, | |
355 { "package", TOK.TOKpackage }, | |
356 { "protected", TOK.TOKprotected }, | |
357 { "public", TOK.TOKpublic }, | |
358 { "export", TOK.TOKexport }, | |
359 | |
360 { "body", TOK.TOKbody }, | |
361 { "invariant", TOK.TOKinvariant }, | |
362 { "unittest", TOK.TOKunittest }, | |
363 { "version", TOK.TOKversion }, | |
364 //{ "manifest", TOK.TOKmanifest }, | |
365 | |
366 // Added after 1.0 | |
367 { "ref", TOK.TOKref }, | |
368 { "macro", TOK.TOKmacro }, | |
369 ]; | |
370 } | |
371 | |
372 static ubyte cmtable[256]; | |
373 enum CMoctal = 0x1; | |
374 enum CMhex = 0x2; | |
375 enum CMidchar = 0x4; | |
376 | |
377 ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; } | |
378 ubyte ishex (ubyte c) { return cmtable[c] & CMhex; } | |
379 ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; } | |
380 | |
381 static void cmtable_init() | |
382 { | |
383 for (uint c = 0; c < cmtable.length; c++) | |
384 { | |
385 if ('0' <= c && c <= '7') | |
386 cmtable[c] |= CMoctal; | |
387 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) | |
388 cmtable[c] |= CMhex; | |
389 if (isalnum(c) || c == '_') | |
390 cmtable[c] |= CMidchar; | |
391 } | |
392 } | |
393 | |
394 static void initKeywords() | |
395 { | |
396 uint nkeywords = keywords.length; | |
397 | |
398 if (global.params.Dversion == 1) | |
399 nkeywords -= 2; | |
400 | |
401 cmtable_init(); | |
402 | |
403 for (uint u = 0; u < nkeywords; u++) | |
404 { | |
34 | 405 //printf("keyword[%d] = '%.*s'\n",u, keywords[u].name); |
0 | 406 string s = keywords[u].name; |
407 TOK v = keywords[u].value; | |
408 StringValue* sv = stringtable.insert(s); | |
409 sv.ptrvalue = cast(void*) new Identifier(sv.lstring.string_, v); | |
410 | |
411 //printf("tochars[%d] = '%s'\n",v, s); | |
412 Token.tochars[v] = s; | |
413 } | |
414 | |
415 Token.tochars[TOK.TOKeof] = "EOF"; | |
416 Token.tochars[TOK.TOKlcurly] = "{"; | |
417 Token.tochars[TOK.TOKrcurly] = "}"; | |
418 Token.tochars[TOK.TOKlparen] = "("; | |
419 Token.tochars[TOK.TOKrparen] = ")"; | |
420 Token.tochars[TOK.TOKlbracket] = "["; | |
421 Token.tochars[TOK.TOKrbracket] = "]"; | |
422 Token.tochars[TOK.TOKsemicolon] = ";"; | |
423 Token.tochars[TOK.TOKcolon] = ":"; | |
424 Token.tochars[TOK.TOKcomma] = ","; | |
425 Token.tochars[TOK.TOKdot] = "."; | |
426 Token.tochars[TOK.TOKxor] = "^"; | |
427 Token.tochars[TOK.TOKxorass] = "^="; | |
428 Token.tochars[TOK.TOKassign] = "="; | |
429 Token.tochars[TOK.TOKconstruct] = "="; | |
430 version (DMDV2) { | |
431 Token.tochars[TOK.TOKblit] = "="; | |
432 } | |
433 Token.tochars[TOK.TOKlt] = "<"; | |
434 Token.tochars[TOK.TOKgt] = ">"; | |
435 Token.tochars[TOK.TOKle] = "<="; | |
436 Token.tochars[TOK.TOKge] = ">="; | |
437 Token.tochars[TOK.TOKequal] = "=="; | |
438 Token.tochars[TOK.TOKnotequal] = "!="; | |
439 Token.tochars[TOK.TOKnotidentity] = "!is"; | |
440 Token.tochars[TOK.TOKtobool] = "!!"; | |
441 | |
442 Token.tochars[TOK.TOKunord] = "!<>="; | |
443 Token.tochars[TOK.TOKue] = "!<>"; | |
444 Token.tochars[TOK.TOKlg] = "<>"; | |
445 Token.tochars[TOK.TOKleg] = "<>="; | |
446 Token.tochars[TOK.TOKule] = "!>"; | |
447 Token.tochars[TOK.TOKul] = "!>="; | |
448 Token.tochars[TOK.TOKuge] = "!<"; | |
449 Token.tochars[TOK.TOKug] = "!<="; | |
450 | |
451 Token.tochars[TOK.TOKnot] = "!"; | |
452 Token.tochars[TOK.TOKtobool] = "!!"; | |
453 Token.tochars[TOK.TOKshl] = "<<"; | |
454 Token.tochars[TOK.TOKshr] = ">>"; | |
455 Token.tochars[TOK.TOKushr] = ">>>"; | |
456 Token.tochars[TOK.TOKadd] = "+"; | |
457 Token.tochars[TOK.TOKmin] = "-"; | |
458 Token.tochars[TOK.TOKmul] = "*"; | |
459 Token.tochars[TOK.TOKdiv] = "/"; | |
460 Token.tochars[TOK.TOKmod] = "%"; | |
461 Token.tochars[TOK.TOKslice] = ".."; | |
462 Token.tochars[TOK.TOKdotdotdot] = "..."; | |
463 Token.tochars[TOK.TOKand] = "&"; | |
464 Token.tochars[TOK.TOKandand] = "&&"; | |
465 Token.tochars[TOK.TOKor] = "|"; | |
466 Token.tochars[TOK.TOKoror] = "||"; | |
467 Token.tochars[TOK.TOKarray] = "[]"; | |
468 Token.tochars[TOK.TOKindex] = "[i]"; | |
469 Token.tochars[TOK.TOKaddress] = "&"; | |
470 Token.tochars[TOK.TOKstar] = "*"; | |
471 Token.tochars[TOK.TOKtilde] = "~"; | |
472 Token.tochars[TOK.TOKdollar] = "$"; | |
473 Token.tochars[TOK.TOKcast] = "cast"; | |
474 Token.tochars[TOK.TOKplusplus] = "++"; | |
475 Token.tochars[TOK.TOKminusminus] = "--"; | |
476 Token.tochars[TOK.TOKtype] = "type"; | |
477 Token.tochars[TOK.TOKquestion] = "?"; | |
478 Token.tochars[TOK.TOKneg] = "-"; | |
479 Token.tochars[TOK.TOKuadd] = "+"; | |
480 Token.tochars[TOK.TOKvar] = "var"; | |
481 Token.tochars[TOK.TOKaddass] = "+="; | |
482 Token.tochars[TOK.TOKminass] = "-="; | |
483 Token.tochars[TOK.TOKmulass] = "*="; | |
484 Token.tochars[TOK.TOKdivass] = "/="; | |
485 Token.tochars[TOK.TOKmodass] = "%="; | |
486 Token.tochars[TOK.TOKshlass] = "<<="; | |
487 Token.tochars[TOK.TOKshrass] = ">>="; | |
488 Token.tochars[TOK.TOKushrass] = ">>>="; | |
489 Token.tochars[TOK.TOKandass] = "&="; | |
490 Token.tochars[TOK.TOKorass] = "|="; | |
491 Token.tochars[TOK.TOKcatass] = "~="; | |
492 Token.tochars[TOK.TOKcat] = "~"; | |
493 Token.tochars[TOK.TOKcall] = "call"; | |
494 Token.tochars[TOK.TOKidentity] = "is"; | |
495 Token.tochars[TOK.TOKnotidentity] = "!is"; | |
496 | |
497 Token.tochars[TOK.TOKorass] = "|="; | |
498 Token.tochars[TOK.TOKidentifier] = "identifier"; | |
499 Token.tochars[TOK.TOKat] = "@"; | |
130
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
500 Token.tochars[TOK.TOKpow] = "^^"; |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
501 //Token.tochars[TOK.TOKpowass] = "^^="; |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
502 |
0 | 503 // For debugging |
73 | 504 Token.tochars[TOKerror] = "error"; |
0 | 505 Token.tochars[TOK.TOKdotexp] = "dotexp"; |
506 Token.tochars[TOK.TOKdotti] = "dotti"; | |
507 Token.tochars[TOK.TOKdotvar] = "dotvar"; | |
508 Token.tochars[TOK.TOKdottype] = "dottype"; | |
509 Token.tochars[TOK.TOKsymoff] = "symoff"; | |
510 Token.tochars[TOK.TOKarraylength] = "arraylength"; | |
511 Token.tochars[TOK.TOKarrayliteral] = "arrayliteral"; | |
512 Token.tochars[TOK.TOKassocarrayliteral] = "assocarrayliteral"; | |
513 Token.tochars[TOK.TOKstructliteral] = "structliteral"; | |
514 Token.tochars[TOK.TOKstring] = "string"; | |
515 Token.tochars[TOK.TOKdsymbol] = "symbol"; | |
516 Token.tochars[TOK.TOKtuple] = "tuple"; | |
517 Token.tochars[TOK.TOKdeclaration] = "declaration"; | |
518 Token.tochars[TOK.TOKdottd] = "dottd"; | |
519 Token.tochars[TOK.TOKon_scope_exit] = "scope(exit)"; | |
520 Token.tochars[TOK.TOKon_scope_success] = "scope(success)"; | |
521 Token.tochars[TOK.TOKon_scope_failure] = "scope(failure)"; | |
522 } | |
523 | |
524 static Identifier idPool(string s) | |
525 { | |
526 StringValue* sv = stringtable.update(s); | |
527 Identifier id = cast(Identifier) sv.ptrvalue; | |
528 if (id is null) | |
529 { | |
530 id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); | |
531 sv.ptrvalue = cast(void*)id; | |
532 } | |
533 | |
534 return id; | |
535 } | |
536 | |
537 static Identifier uniqueId(string s) | |
538 { | |
539 static int num; | |
540 return uniqueId(s, ++num); | |
541 } | |
542 | |
543 /********************************************* | |
544 * Create a unique identifier using the prefix s. | |
545 */ | |
546 static Identifier uniqueId(string s, int num) | |
547 { | |
548 char buffer[32]; | |
549 size_t slen = s.length; | |
550 | |
551 assert(slen + num.sizeof * 3 + 1 <= buffer.sizeof); | |
552 int len = sprintf(buffer.ptr, "%.*s%d", s, num); | |
553 | |
554 return idPool(buffer[0..len].idup); | |
555 } | |
556 | |
557 TOK nextToken() | |
558 { | |
559 Token *t; | |
560 | |
561 if (token.next) | |
562 { | |
563 t = token.next; | |
564 memcpy(&token, t, Token.sizeof); | |
565 t.next = freelist; | |
566 freelist = t; | |
567 } | |
568 else | |
569 { | |
570 scan(&token); | |
571 } | |
572 | |
573 //token.print(); | |
574 return token.value; | |
575 } | |
576 | |
577 /*********************** | |
578 * Look ahead at next token's value. | |
579 */ | |
580 TOK peekNext() | |
581 { | |
582 return peek(&token).value; | |
583 } | |
584 | |
585 TOK peekNext2() | |
586 { | |
587 assert(false); | |
588 } | |
589 | |
590 void scan(Token* t) | |
591 { | |
592 uint lastLine = loc.linnum; | |
593 uint linnum; | |
594 | |
595 t.blockComment = null; | |
596 t.lineComment = null; | |
597 while (1) | |
598 { | |
599 t.ptr = p; | |
600 //printf("p = %p, *p = '%c'\n",p,*p); | |
601 switch (*p) | |
602 { | |
603 case 0: | |
604 case 0x1A: | |
605 t.value = TOK.TOKeof; // end of file | |
606 return; | |
607 | |
608 case ' ': | |
609 case '\t': | |
610 case '\v': | |
611 case '\f': | |
612 p++; | |
613 continue; // skip white space | |
614 | |
615 case '\r': | |
616 p++; | |
617 if (*p != '\n') // if CR stands by itself | |
618 loc.linnum++; | |
619 continue; // skip white space | |
620 | |
621 case '\n': | |
622 p++; | |
623 loc.linnum++; | |
624 continue; // skip white space | |
625 | |
626 case '0': case '1': case '2': case '3': case '4': | |
627 case '5': case '6': case '7': case '8': case '9': | |
628 t.value = number(t); | |
629 return; | |
630 | |
631 version (CSTRINGS) { | |
632 case '\'': | |
633 t.value = charConstant(t, 0); | |
634 return; | |
635 | |
636 case '"': | |
637 t.value = stringConstant(t,0); | |
638 return; | |
639 | |
640 case 'l': | |
641 case 'L': | |
642 if (p[1] == '\'') | |
643 { | |
644 p++; | |
645 t.value = charConstant(t, 1); | |
646 return; | |
647 } | |
648 else if (p[1] == '"') | |
649 { | |
650 p++; | |
651 t.value = stringConstant(t, 1); | |
652 return; | |
653 } | |
654 } else { | |
655 case '\'': | |
656 t.value = charConstant(t,0); | |
657 return; | |
658 | |
659 case 'r': | |
660 if (p[1] != '"') | |
661 goto case_ident; | |
662 p++; | |
663 case '`': | |
664 t.value = wysiwygStringConstant(t, *p); | |
665 return; | |
666 | |
667 case 'x': | |
668 if (p[1] != '"') | |
669 goto case_ident; | |
670 p++; | |
671 t.value = hexStringConstant(t); | |
672 return; | |
673 | |
674 version (DMDV2) { | |
675 case 'q': | |
676 if (p[1] == '"') | |
677 { | |
678 p++; | |
679 t.value = delimitedStringConstant(t); | |
680 return; | |
681 } | |
682 else if (p[1] == '{') | |
683 { | |
684 p++; | |
685 t.value = tokenStringConstant(t); | |
686 return; | |
687 } | |
688 else | |
689 goto case_ident; | |
690 } | |
691 | |
692 case '"': | |
693 t.value = escapeStringConstant(t,0); | |
694 return; | |
695 version (TEXTUAL_ASSEMBLY_OUT) { | |
696 } else { | |
697 case '\\': // escaped string literal | |
698 { uint c; | |
699 ubyte* pstart = p; | |
700 | |
701 stringbuffer.reset(); | |
702 do | |
703 { | |
704 p++; | |
705 switch (*p) | |
706 { | |
707 case 'u': | |
708 case 'U': | |
709 case '&': | |
710 c = escapeSequence(); | |
711 stringbuffer.writeUTF8(c); | |
712 break; | |
713 | |
714 default: | |
715 c = escapeSequence(); | |
716 stringbuffer.writeByte(c); | |
717 break; | |
718 } | |
719 } while (*p == '\\'); | |
720 t.len = stringbuffer.offset; | |
721 stringbuffer.writeByte(0); | |
2 | 722 char* cc = cast(char*)GC.malloc(stringbuffer.offset); |
0 | 723 memcpy(cc, stringbuffer.data, stringbuffer.offset); |
724 t.ustring = cc; | |
725 t.postfix = 0; | |
726 t.value = TOK.TOKstring; | |
727 if (!global.params.useDeprecated) | |
728 error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", p - pstart, pstart, p - pstart, pstart); | |
729 return; | |
730 } | |
731 } | |
732 case 'l': | |
733 case 'L': | |
734 } | |
735 case 'a': case 'b': case 'c': case 'd': case 'e': | |
736 case 'f': case 'g': case 'h': case 'i': case 'j': | |
737 case 'k': case 'm': case 'n': case 'o': | |
738 version (DMDV2) { | |
739 case 'p': /*case 'q': case 'r':*/ case 's': case 't': | |
740 } else { | |
741 case 'p': case 'q': /*case 'r':*/ case 's': case 't': | |
742 } | |
743 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': | |
744 case 'z': | |
745 case 'A': case 'B': case 'C': case 'D': case 'E': | |
746 case 'F': case 'G': case 'H': case 'I': case 'J': | |
747 case 'K': case 'M': case 'N': case 'O': | |
748 case 'P': case 'Q': case 'R': case 'S': case 'T': | |
749 case 'U': case 'V': case 'W': case 'X': case 'Y': | |
750 case 'Z': | |
751 case '_': | |
752 case_ident: | |
753 { ubyte c; | |
754 StringValue *sv; | |
755 Identifier id; | |
756 | |
757 do | |
758 { | |
759 c = *++p; | |
760 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); | |
761 sv = stringtable.update((cast(immutable(char)*)t.ptr)[0.. p - t.ptr]); /// | |
762 id = cast(Identifier) sv.ptrvalue; | |
763 if (id is null) | |
764 { id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); | |
765 sv.ptrvalue = cast(void*)id; | |
766 } | |
767 t.ident = id; | |
768 t.value = cast(TOK) id.value; | |
769 anyToken = 1; | |
770 if (*t.ptr == '_') // if special identifier token | |
771 { | |
772 static char date[11+1]; | |
773 static char time[8+1]; | |
774 static char timestamp[24+1]; | |
775 | |
776 if (!date[0]) // lazy evaluation | |
777 { time_t tm; | |
778 char *p; | |
779 | |
780 .time(&tm); | |
781 p = ctime(&tm); | |
782 assert(p); | |
783 sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20); | |
784 sprintf(time.ptr, "%.8s", p + 11); | |
785 sprintf(timestamp.ptr, "%.24s", p); | |
786 } | |
787 | |
788 ///version (DMDV1) { | |
789 /// if (mod && id == Id.FILE) | |
790 /// { | |
791 /// t.ustring = cast(ubyte*)(loc.filename ? loc.filename : mod.ident.toChars()); | |
792 /// goto Lstr; | |
793 /// } | |
794 /// else if (mod && id == Id.LINE) | |
795 /// { | |
796 /// t.value = TOK.TOKint64v; | |
797 /// t.uns64value = loc.linnum; | |
798 /// } | |
799 /// else | |
800 ///} | |
801 if (id == Id.DATE) | |
802 { | |
803 t.ustring = date.ptr; | |
804 goto Lstr; | |
805 } | |
806 else if (id == Id.TIME) | |
807 { | |
808 t.ustring = time.ptr; | |
809 goto Lstr; | |
810 } | |
811 else if (id == Id.VENDOR) | |
812 { | |
813 t.ustring = "Digital Mars D".ptr; | |
814 goto Lstr; | |
815 } | |
816 else if (id == Id.TIMESTAMP) | |
817 { | |
818 t.ustring = timestamp.ptr; | |
819 Lstr: | |
820 t.value = TOK.TOKstring; | |
821 Llen: | |
822 t.postfix = 0; | |
823 t.len = strlen(cast(char*)t.ustring); | |
824 } | |
825 else if (id == Id.VERSIONX) | |
826 { | |
827 uint major = 0; | |
828 uint minor = 0; | |
829 | |
830 foreach (char cc; global.version_[1..$]) | |
831 { | |
832 if (isdigit(cc)) | |
833 minor = minor * 10 + cc - '0'; | |
834 else if (cc == '.') | |
835 { | |
836 major = minor; | |
837 minor = 0; | |
838 } | |
839 else | |
840 break; | |
841 } | |
842 t.value = TOK.TOKint64v; | |
843 t.uns64value = major * 1000 + minor; | |
844 } | |
845 ///version (DMDV2) { | |
846 else if (id == Id.EOFX) | |
847 { | |
848 t.value = TOK.TOKeof; | |
849 // Advance scanner to end of file | |
850 while (!(*p == 0 || *p == 0x1A)) | |
851 p++; | |
852 } | |
853 ///} | |
854 } | |
855 //printf("t.value = %d\n",t.value); | |
856 return; | |
857 } | |
858 | |
859 case '/': | |
860 p++; | |
861 switch (*p) | |
862 { | |
863 case '=': | |
864 p++; | |
865 t.value = TOK.TOKdivass; | |
866 return; | |
867 | |
868 case '*': | |
869 p++; | |
870 linnum = loc.linnum; | |
871 while (1) | |
872 { | |
873 while (1) | |
874 { | |
875 ubyte c = *p; | |
876 switch (c) | |
877 { | |
878 case '/': | |
879 break; | |
880 | |
881 case '\n': | |
882 loc.linnum++; | |
883 p++; | |
884 continue; | |
885 | |
886 case '\r': | |
887 p++; | |
888 if (*p != '\n') | |
889 loc.linnum++; | |
890 continue; | |
891 | |
892 case 0: | |
893 case 0x1A: | |
894 error("unterminated /* */ comment"); | |
895 p = end; | |
896 t.value = TOK.TOKeof; | |
897 return; | |
898 | |
899 default: | |
900 if (c & 0x80) | |
901 { uint u = decodeUTF(); | |
902 if (u == PS || u == LS) | |
903 loc.linnum++; | |
904 } | |
905 p++; | |
906 continue; | |
907 } | |
908 break; | |
909 } | |
910 p++; | |
911 if (p[-2] == '*' && p - 3 != t.ptr) | |
912 break; | |
913 } | |
914 if (commentToken) | |
915 { | |
916 t.value = TOK.TOKcomment; | |
917 return; | |
918 } | |
919 else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) | |
920 { // if /** but not /**/ | |
921 getDocComment(t, lastLine == linnum); | |
922 } | |
923 continue; | |
924 | |
925 case '/': // do // style comments | |
926 linnum = loc.linnum; | |
927 while (1) | |
928 { ubyte c = *++p; | |
929 switch (c) | |
930 { | |
931 case '\n': | |
932 break; | |
933 | |
934 case '\r': | |
935 if (p[1] == '\n') | |
936 p++; | |
937 break; | |
938 | |
939 case 0: | |
940 case 0x1A: | |
941 if (commentToken) | |
942 { | |
943 p = end; | |
944 t.value = TOK.TOKcomment; | |
945 return; | |
946 } | |
947 if (doDocComment && t.ptr[2] == '/') | |
948 getDocComment(t, lastLine == linnum); | |
949 p = end; | |
950 t.value = TOK.TOKeof; | |
951 return; | |
952 | |
953 default: | |
954 if (c & 0x80) | |
955 { uint u = decodeUTF(); | |
956 if (u == PS || u == LS) | |
957 break; | |
958 } | |
959 continue; | |
960 } | |
961 break; | |
962 } | |
963 | |
964 if (commentToken) | |
965 { | |
966 p++; | |
967 loc.linnum++; | |
968 t.value = TOK.TOKcomment; | |
969 return; | |
970 } | |
971 if (doDocComment && t.ptr[2] == '/') | |
972 getDocComment(t, lastLine == linnum); | |
973 | |
974 p++; | |
975 loc.linnum++; | |
976 continue; | |
977 | |
978 case '+': | |
979 { | |
980 int nest; | |
981 | |
982 linnum = loc.linnum; | |
983 p++; | |
984 nest = 1; | |
985 while (1) | |
986 { ubyte c = *p; | |
987 switch (c) | |
988 { | |
989 case '/': | |
990 p++; | |
991 if (*p == '+') | |
992 { | |
993 p++; | |
994 nest++; | |
995 } | |
996 continue; | |
997 | |
998 case '+': | |
999 p++; | |
1000 if (*p == '/') | |
1001 { | |
1002 p++; | |
1003 if (--nest == 0) | |
1004 break; | |
1005 } | |
1006 continue; | |
1007 | |
1008 case '\r': | |
1009 p++; | |
1010 if (*p != '\n') | |
1011 loc.linnum++; | |
1012 continue; | |
1013 | |
1014 case '\n': | |
1015 loc.linnum++; | |
1016 p++; | |
1017 continue; | |
1018 | |
1019 case 0: | |
1020 case 0x1A: | |
1021 error("unterminated /+ +/ comment"); | |
1022 p = end; | |
1023 t.value = TOK.TOKeof; | |
1024 return; | |
1025 | |
1026 default: | |
1027 if (c & 0x80) | |
1028 { uint u = decodeUTF(); | |
1029 if (u == PS || u == LS) | |
1030 loc.linnum++; | |
1031 } | |
1032 p++; | |
1033 continue; | |
1034 } | |
1035 break; | |
1036 } | |
1037 if (commentToken) | |
1038 { | |
1039 t.value = TOK.TOKcomment; | |
1040 return; | |
1041 } | |
1042 if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) | |
1043 { // if /++ but not /++/ | |
1044 getDocComment(t, lastLine == linnum); | |
1045 } | |
1046 continue; | |
1047 } | |
1048 | |
1049 default: | |
1050 break; /// | |
1051 } | |
1052 t.value = TOK.TOKdiv; | |
1053 return; | |
1054 | |
1055 case '.': | |
1056 p++; | |
1057 if (isdigit(*p)) | |
1058 { /* Note that we don't allow ._1 and ._ as being | |
1059 * valid floating point numbers. | |
1060 */ | |
1061 p--; | |
1062 t.value = inreal(t); | |
1063 } | |
1064 else if (p[0] == '.') | |
1065 { | |
1066 if (p[1] == '.') | |
1067 { p += 2; | |
1068 t.value = TOK.TOKdotdotdot; | |
1069 } | |
1070 else | |
1071 { p++; | |
1072 t.value = TOK.TOKslice; | |
1073 } | |
1074 } | |
1075 else | |
1076 t.value = TOK.TOKdot; | |
1077 return; | |
1078 | |
1079 case '&': | |
1080 p++; | |
1081 if (*p == '=') | |
1082 { p++; | |
1083 t.value = TOK.TOKandass; | |
1084 } | |
1085 else if (*p == '&') | |
1086 { p++; | |
1087 t.value = TOK.TOKandand; | |
1088 } | |
1089 else | |
1090 t.value = TOK.TOKand; | |
1091 return; | |
1092 | |
1093 case '|': | |
1094 p++; | |
1095 if (*p == '=') | |
1096 { p++; | |
1097 t.value = TOK.TOKorass; | |
1098 } | |
1099 else if (*p == '|') | |
1100 { p++; | |
1101 t.value = TOK.TOKoror; | |
1102 } | |
1103 else | |
1104 t.value = TOK.TOKor; | |
1105 return; | |
1106 | |
1107 case '-': | |
1108 p++; | |
1109 if (*p == '=') | |
1110 { p++; | |
1111 t.value = TOK.TOKminass; | |
1112 } | |
1113 /// #if 0 | |
1114 /// else if (*p == '>') | |
1115 /// { p++; | |
1116 /// t.value = TOK.TOKarrow; | |
1117 /// } | |
1118 /// #endif | |
1119 else if (*p == '-') | |
1120 { p++; | |
1121 t.value = TOK.TOKminusminus; | |
1122 } | |
1123 else | |
1124 t.value = TOK.TOKmin; | |
1125 return; | |
1126 | |
1127 case '+': | |
1128 p++; | |
1129 if (*p == '=') | |
1130 { p++; | |
1131 t.value = TOK.TOKaddass; | |
1132 } | |
1133 else if (*p == '+') | |
1134 { p++; | |
1135 t.value = TOK.TOKplusplus; | |
1136 } | |
1137 else | |
1138 t.value = TOK.TOKadd; | |
1139 return; | |
1140 | |
1141 case '<': | |
1142 p++; | |
1143 if (*p == '=') | |
1144 { p++; | |
1145 t.value = TOK.TOKle; // <= | |
1146 } | |
1147 else if (*p == '<') | |
1148 { p++; | |
1149 if (*p == '=') | |
1150 { p++; | |
1151 t.value = TOK.TOKshlass; // <<= | |
1152 } | |
1153 else | |
1154 t.value = TOK.TOKshl; // << | |
1155 } | |
1156 else if (*p == '>') | |
1157 { p++; | |
1158 if (*p == '=') | |
1159 { p++; | |
1160 t.value = TOK.TOKleg; // <>= | |
1161 } | |
1162 else | |
1163 t.value = TOK.TOKlg; // <> | |
1164 } | |
1165 else | |
1166 t.value = TOK.TOKlt; // < | |
1167 return; | |
1168 | |
1169 case '>': | |
1170 p++; | |
1171 if (*p == '=') | |
1172 { p++; | |
1173 t.value = TOK.TOKge; // >= | |
1174 } | |
1175 else if (*p == '>') | |
1176 { p++; | |
1177 if (*p == '=') | |
1178 { p++; | |
1179 t.value = TOK.TOKshrass; // >>= | |
1180 } | |
1181 else if (*p == '>') | |
1182 { p++; | |
1183 if (*p == '=') | |
1184 { p++; | |
1185 t.value = TOK.TOKushrass; // >>>= | |
1186 } | |
1187 else | |
1188 t.value = TOK.TOKushr; // >>> | |
1189 } | |
1190 else | |
1191 t.value = TOK.TOKshr; // >> | |
1192 } | |
1193 else | |
1194 t.value = TOK.TOKgt; // > | |
1195 return; | |
1196 | |
1197 case '!': | |
1198 p++; | |
1199 if (*p == '=') | |
1200 { p++; | |
1201 if (*p == '=' && global.params.Dversion == 1) | |
1202 { p++; | |
1203 t.value = TOK.TOKnotidentity; // !== | |
1204 } | |
1205 else | |
1206 t.value = TOK.TOKnotequal; // != | |
1207 } | |
1208 else if (*p == '<') | |
1209 { p++; | |
1210 if (*p == '>') | |
1211 { p++; | |
1212 if (*p == '=') | |
1213 { p++; | |
1214 t.value = TOK.TOKunord; // !<>= | |
1215 } | |
1216 else | |
1217 t.value = TOK.TOKue; // !<> | |
1218 } | |
1219 else if (*p == '=') | |
1220 { p++; | |
1221 t.value = TOK.TOKug; // !<= | |
1222 } | |
1223 else | |
1224 t.value = TOK.TOKuge; // !< | |
1225 } | |
1226 else if (*p == '>') | |
1227 { p++; | |
1228 if (*p == '=') | |
1229 { p++; | |
1230 t.value = TOK.TOKul; // !>= | |
1231 } | |
1232 else | |
1233 t.value = TOK.TOKule; // !> | |
1234 } | |
1235 else | |
1236 t.value = TOK.TOKnot; // ! | |
1237 return; | |
1238 | |
1239 case '=': | |
1240 p++; | |
1241 if (*p == '=') | |
1242 { p++; | |
1243 if (*p == '=' && global.params.Dversion == 1) | |
1244 { p++; | |
1245 t.value = TOK.TOKidentity; // === | |
1246 } | |
1247 else | |
1248 t.value = TOK.TOKequal; // == | |
1249 } | |
1250 else | |
1251 t.value = TOK.TOKassign; // = | |
1252 return; | |
1253 | |
1254 case '~': | |
1255 p++; | |
1256 if (*p == '=') | |
1257 { p++; | |
1258 t.value = TOK.TOKcatass; // ~= | |
1259 } | |
1260 else | |
1261 t.value = TOK.TOKtilde; // ~ | |
1262 return; | |
130
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1263 |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1264 version(DMDV2) { |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1265 case '^': |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1266 p++; |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1267 if (*p == '^') |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1268 { p++; |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1269 //static if (false) { |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1270 // if (*p == '=') |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1271 // { p++; |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1272 // t.value = TOKpowass; // ^^= |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1273 // } |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1274 // else |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1275 //} |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1276 t.value = TOKpow; // ^^ |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1277 } |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1278 else if (*p == '=') |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1279 { p++; |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1280 t.value = TOKxorass; // ^= |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1281 } |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1282 else |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1283 t.value = TOKxor; // ^ |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1284 return; |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1285 } |
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1286 |
0 | 1287 /* |
1288 #define SINGLE(c,tok) case c: p++; t.value = tok; return; | |
1289 | |
1290 SINGLE('(', TOKlparen) | |
1291 SINGLE(')', TOKrparen) | |
1292 SINGLE('[', TOKlbracket) | |
1293 SINGLE(']', TOKrbracket) | |
1294 SINGLE('{', TOKlcurly) | |
1295 SINGLE('}', TOKrcurly) | |
1296 SINGLE('?', TOKquestion) | |
1297 SINGLE(',', TOKcomma) | |
1298 SINGLE(';', TOKsemicolon) | |
1299 SINGLE(':', TOKcolon) | |
1300 SINGLE('$', TOKdollar) | |
1301 SINGLE('@', TOKat) | |
1302 | |
1303 #undef SINGLE | |
1304 | |
1305 #define DOUBLE(c1,tok1,c2,tok2) \ | |
1306 case c1: \ | |
1307 p++; \ | |
1308 if (*p == c2) \ | |
1309 { p++; \ | |
1310 t.value = tok2; \ | |
1311 } \ | |
1312 else \ | |
1313 t.value = tok1; \ | |
1314 return; | |
1315 | |
1316 DOUBLE('*', TOKmul, '=', TOKmulass) | |
1317 DOUBLE('%', TOKmod, '=', TOKmodass) | |
130
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1318 #if DMDV1 |
0 | 1319 DOUBLE('^', TOKxor, '=', TOKxorass) |
130
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1320 #endif |
0 | 1321 #undef DOUBLE |
1322 */ | |
1323 | |
1324 case '(': p++; t.value = TOK.TOKlparen; return; | |
1325 case ')': p++; t.value = TOK.TOKrparen; return; | |
1326 case '[': p++; t.value = TOK.TOKlbracket; return; | |
1327 case ']': p++; t.value = TOK.TOKrbracket; return; | |
1328 case '{': p++; t.value = TOK.TOKlcurly; return; | |
1329 case '}': p++; t.value = TOK.TOKrcurly; return; | |
1330 case '?': p++; t.value = TOK.TOKquestion; return; | |
1331 case ',': p++; t.value = TOK.TOKcomma; return; | |
1332 case ';': p++; t.value = TOK.TOKsemicolon; return; | |
1333 case ':': p++; t.value = TOK.TOKcolon; return; | |
1334 case '$': p++; t.value = TOK.TOKdollar; return; | |
1335 case '@': p++; t.value = TOK.TOKat; return; | |
1336 | |
1337 case '*': | |
1338 p++; | |
1339 if (*p == '=') { | |
1340 p++; | |
1341 t.value = TOK.TOKmulass; | |
1342 } else { | |
1343 t.value = TOK.TOKmul; | |
1344 } | |
1345 return; | |
1346 | |
1347 case '%': | |
1348 p++; | |
1349 if (*p == '=') { | |
1350 p++; | |
1351 t.value = TOK.TOKmodass; | |
1352 } else { | |
1353 t.value = TOK.TOKmod; | |
1354 } | |
1355 return; | |
130
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1356 version(DMDV1) { |
0 | 1357 case '^': |
1358 p++; | |
1359 if (*p == '=') { | |
1360 p++; | |
1361 t.value = TOK.TOKxorass; | |
1362 } else { | |
1363 t.value = TOK.TOKxor; | |
1364 } | |
1365 return; | |
130
60bb0fe4563e
dmdfe 2.037 first main iteration
Eldar Insafutdinov <e.insafutdinov@gmail.com>
parents:
114
diff
changeset
|
1366 } |
0 | 1367 case '#': |
1368 p++; | |
1369 pragma_(); | |
1370 continue; | |
1371 | |
1372 default: | |
1373 { ubyte c = *p; | |
1374 | |
1375 if (c & 0x80) | |
1376 { uint u = decodeUTF(); | |
1377 | |
1378 // Check for start of unicode identifier | |
1379 if (isUniAlpha(u)) | |
1380 goto case_ident; | |
1381 | |
1382 if (u == PS || u == LS) | |
1383 { | |
1384 loc.linnum++; | |
1385 p++; | |
1386 continue; | |
1387 } | |
1388 } | |
1389 if (isprint(c)) | |
1390 error("unsupported char '%c'", c); | |
1391 else | |
1392 error("unsupported char 0x%02x", c); | |
1393 p++; | |
1394 continue; | |
1395 } | |
1396 } | |
1397 } | |
1398 } | |
1399 | |
1400 Token* peek(Token* ct) | |
1401 { | |
1402 Token* t; | |
1403 | |
1404 if (ct.next) | |
1405 t = ct.next; | |
1406 else | |
1407 { | |
1408 t = new Token(); | |
1409 scan(t); | |
1410 t.next = null; | |
1411 ct.next = t; | |
1412 } | |
1413 return t; | |
1414 } | |
1415 | |
1416 Token* peekPastParen(Token* tk) | |
1417 { | |
1418 //printf("peekPastParen()\n"); | |
1419 int parens = 1; | |
1420 int curlynest = 0; | |
1421 while (1) | |
1422 { | |
1423 tk = peek(tk); | |
1424 //tk.print(); | |
1425 switch (tk.value) | |
1426 { | |
1427 case TOK.TOKlparen: | |
1428 parens++; | |
1429 continue; | |
1430 | |
1431 case TOK.TOKrparen: | |
1432 --parens; | |
1433 if (parens) | |
1434 continue; | |
1435 tk = peek(tk); | |
1436 break; | |
1437 | |
1438 case TOK.TOKlcurly: | |
1439 curlynest++; | |
1440 continue; | |
1441 | |
1442 case TOK.TOKrcurly: | |
1443 if (--curlynest >= 0) | |
1444 continue; | |
1445 break; | |
1446 | |
1447 case TOK.TOKsemicolon: | |
1448 if (curlynest) | |
1449 continue; | |
1450 break; | |
1451 | |
1452 case TOK.TOKeof: | |
1453 break; | |
1454 | |
1455 default: | |
1456 continue; | |
1457 } | |
1458 return tk; | |
1459 } | |
1460 } | |
1461 | |
1462 /******************************************* | |
1463 * Parse escape sequence. | |
1464 */ | |
1465 uint escapeSequence() | |
1466 { | |
1467 uint c = *p; | |
1468 | |
1469 version (TEXTUAL_ASSEMBLY_OUT) { | |
1470 return c; | |
1471 } | |
1472 int n; | |
1473 int ndigits; | |
1474 | |
1475 switch (c) | |
1476 { | |
1477 case '\'': | |
1478 case '"': | |
1479 case '?': | |
1480 case '\\': | |
1481 Lconsume: | |
1482 p++; | |
1483 break; | |
1484 | |
1485 case 'a': c = 7; goto Lconsume; | |
1486 case 'b': c = 8; goto Lconsume; | |
1487 case 'f': c = 12; goto Lconsume; | |
1488 case 'n': c = 10; goto Lconsume; | |
1489 case 'r': c = 13; goto Lconsume; | |
1490 case 't': c = 9; goto Lconsume; | |
1491 case 'v': c = 11; goto Lconsume; | |
1492 | |
1493 case 'u': | |
1494 ndigits = 4; | |
1495 goto Lhex; | |
1496 case 'U': | |
1497 ndigits = 8; | |
1498 goto Lhex; | |
1499 case 'x': | |
1500 ndigits = 2; | |
1501 Lhex: | |
1502 p++; | |
1503 c = *p; | |
1504 if (ishex(cast(ubyte)c)) | |
1505 { | |
1506 uint v; | |
1507 | |
1508 n = 0; | |
1509 v = 0; | |
1510 while (1) | |
1511 { | |
1512 if (isdigit(c)) | |
1513 c -= '0'; | |
1514 else if (islower(c)) | |
1515 c -= 'a' - 10; | |
1516 else | |
1517 c -= 'A' - 10; | |
1518 v = v * 16 + c; | |
1519 c = *++p; | |
1520 if (++n == ndigits) | |
1521 break; | |
1522 if (!ishex(cast(ubyte)c)) | |
1523 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); | |
1524 break; | |
1525 } | |
1526 } | |
1527 if (ndigits != 2 && !utf_isValidDchar(v)) | |
1528 { error("invalid UTF character \\U%08x", v); | |
1529 v = '?'; // recover with valid UTF character | |
1530 } | |
1531 c = v; | |
1532 } | |
1533 else | |
1534 error("undefined escape hex sequence \\%c\n",c); | |
1535 break; | |
1536 | |
1537 case '&': // named character entity | |
1538 for (ubyte* idstart = ++p; true; p++) | |
1539 { | |
1540 switch (*p) | |
1541 { | |
1542 case ';': | |
1543 c = HtmlNamedEntity(idstart, p - idstart); | |
1544 if (c == ~0) | |
1545 { | |
1546 error("unnamed character entity &%s;", idstart[0..(p - idstart)]); | |
1547 c = ' '; | |
1548 } | |
1549 p++; | |
1550 break; | |
1551 | |
1552 default: | |
1553 if (isalpha(*p) || | |
1554 (p != idstart + 1 && isdigit(*p))) | |
1555 continue; | |
1556 error("unterminated named entity"); | |
1557 break; | |
1558 } | |
1559 break; | |
1560 } | |
1561 break; | |
1562 | |
1563 case 0: | |
1564 case 0x1A: // end of file | |
1565 c = '\\'; | |
1566 break; | |
1567 | |
1568 default: | |
1569 if (isoctal(cast(ubyte)c)) | |
1570 { | |
1571 uint v; | |
1572 | |
1573 n = 0; | |
1574 v = 0; | |
1575 do | |
1576 { | |
1577 v = v * 8 + (c - '0'); | |
1578 c = *++p; | |
1579 } while (++n < 3 && isoctal(cast(ubyte)c)); | |
1580 c = v; | |
1581 if (c > 0xFF) | |
1582 error("0%03o is larger than a byte", c); | |
1583 } | |
1584 else | |
1585 error("undefined escape sequence \\%c\n",c); | |
1586 break; | |
1587 } | |
1588 return c; | |
1589 } | |
1590 | |
1591 TOK wysiwygStringConstant(Token* t, int tc) | |
1592 { | |
8
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1593 uint c; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1594 Loc start = loc; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1595 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1596 p++; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1597 stringbuffer.reset(); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1598 while (true) |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1599 { |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1600 c = *p++; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1601 switch (c) |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1602 { |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1603 case '\n': |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1604 loc.linnum++; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1605 break; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1606 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1607 case '\r': |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1608 if (*p == '\n') |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1609 continue; // ignore |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1610 c = '\n'; // treat EndOfLine as \n character |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1611 loc.linnum++; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1612 break; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1613 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1614 case 0: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1615 case 0x1A: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1616 error("unterminated string constant starting at %s", start.toChars()); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1617 t.ustring = "".ptr; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1618 t.len = 0; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1619 t.postfix = 0; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1620 return TOKstring; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1621 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1622 case '"': |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1623 case '`': |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1624 if (c == tc) |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1625 { |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1626 t.len = stringbuffer.offset; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1627 stringbuffer.writeByte(0); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1628 char* tmp = cast(char*)GC.malloc(stringbuffer.offset); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1629 memcpy(tmp, stringbuffer.data, stringbuffer.offset); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1630 t.ustring = tmp; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1631 stringPostfix(t); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1632 return TOKstring; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1633 } |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1634 break; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1635 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1636 default: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1637 if (c & 0x80) |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1638 { p--; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1639 uint u = decodeUTF(); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1640 p++; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1641 if (u == PS || u == LS) |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1642 loc.linnum++; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1643 stringbuffer.writeUTF8(u); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1644 continue; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1645 } |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1646 break; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1647 } |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1648 stringbuffer.writeByte(c); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1649 } |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1650 |
0 | 1651 assert(false); |
1652 } | |
1653 | |
51 | 1654 /************************************** |
1655 * Lex hex strings: | |
1656 * x"0A ae 34FE BD" | |
1657 */ | |
0 | 1658 TOK hexStringConstant(Token* t) |
1659 { | |
51 | 1660 uint c; |
1661 Loc start = loc; | |
1662 uint n = 0; | |
1663 uint v; | |
1664 | |
1665 p++; | |
1666 stringbuffer.reset(); | |
1667 while (1) | |
1668 { | |
1669 c = *p++; | |
1670 switch (c) | |
1671 { | |
1672 case ' ': | |
1673 case '\t': | |
1674 case '\v': | |
1675 case '\f': | |
1676 continue; // skip white space | |
1677 | |
1678 case '\r': | |
1679 if (*p == '\n') | |
1680 continue; // ignore | |
1681 // Treat isolated '\r' as if it were a '\n' | |
1682 case '\n': | |
1683 loc.linnum++; | |
1684 continue; | |
1685 | |
1686 case 0: | |
1687 case 0x1A: | |
1688 error("unterminated string constant starting at %s", start.toChars()); | |
1689 t.ustring = "".ptr; | |
1690 t.len = 0; | |
1691 t.postfix = 0; | |
1692 return TOKstring; | |
1693 | |
1694 case '"': | |
1695 if (n & 1) | |
1696 { | |
1697 error("odd number (%d) of hex characters in hex string", n); | |
1698 stringbuffer.writeByte(v); | |
1699 } | |
1700 t.len = stringbuffer.offset; | |
1701 stringbuffer.writeByte(0); | |
1702 void* mem = malloc(stringbuffer.offset); | |
1703 memcpy(mem, stringbuffer.data, stringbuffer.offset); | |
1704 t.ustring = cast(const(char)*)mem; | |
1705 stringPostfix(t); | |
1706 return TOKstring; | |
1707 | |
1708 default: | |
1709 if (c >= '0' && c <= '9') | |
1710 c -= '0'; | |
1711 else if (c >= 'a' && c <= 'f') | |
1712 c -= 'a' - 10; | |
1713 else if (c >= 'A' && c <= 'F') | |
1714 c -= 'A' - 10; | |
1715 else if (c & 0x80) | |
1716 { p--; | |
1717 uint u = decodeUTF(); | |
1718 p++; | |
1719 if (u == PS || u == LS) | |
1720 loc.linnum++; | |
1721 else | |
1722 error("non-hex character \\u%x", u); | |
1723 } | |
1724 else | |
1725 error("non-hex character '%c'", c); | |
1726 if (n & 1) | |
1727 { v = (v << 4) | c; | |
1728 stringbuffer.writeByte(v); | |
1729 } | |
1730 else | |
1731 v = c; | |
1732 n++; | |
1733 break; | |
1734 } | |
1735 } | |
0 | 1736 } |
1737 | |
1738 version (DMDV2) { | |
51 | 1739 /************************************** |
1740 * Lex delimited strings: | |
1741 * q"(foo(xxx))" // "foo(xxx)" | |
1742 * q"[foo(]" // "foo(" | |
1743 * q"/foo]/" // "foo]" | |
1744 * q"HERE | |
1745 * foo | |
1746 * HERE" // "foo\n" | |
1747 * Input: | |
1748 * p is on the " | |
1749 */ | |
0 | 1750 TOK delimitedStringConstant(Token* t) |
1751 { | |
51 | 1752 uint c; |
1753 Loc start = loc; | |
1754 uint delimleft = 0; | |
1755 uint delimright = 0; | |
1756 uint nest = 1; | |
1757 uint nestcount; | |
1758 Identifier hereid = null; | |
1759 uint blankrol = 0; | |
1760 uint startline = 0; | |
1761 | |
1762 p++; | |
1763 stringbuffer.reset(); | |
1764 while (1) | |
1765 { | |
1766 c = *p++; | |
1767 //printf("c = '%c'\n", c); | |
1768 switch (c) | |
1769 { | |
1770 case '\n': | |
1771 Lnextline: | |
1772 loc.linnum++; | |
1773 startline = 1; | |
1774 if (blankrol) | |
1775 { blankrol = 0; | |
1776 continue; | |
1777 } | |
1778 if (hereid) | |
1779 { | |
1780 stringbuffer.writeUTF8(c); | |
1781 continue; | |
1782 } | |
1783 break; | |
1784 | |
1785 case '\r': | |
1786 if (*p == '\n') | |
1787 continue; // ignore | |
1788 c = '\n'; // treat EndOfLine as \n character | |
1789 goto Lnextline; | |
1790 | |
1791 case 0: | |
1792 case 0x1A: | |
1793 goto Lerror; | |
1794 | |
1795 default: | |
1796 if (c & 0x80) | |
1797 { p--; | |
1798 c = decodeUTF(); | |
1799 p++; | |
1800 if (c == PS || c == LS) | |
1801 goto Lnextline; | |
1802 } | |
1803 break; | |
1804 } | |
1805 if (delimleft == 0) | |
1806 { | |
1807 delimleft = c; | |
1808 nest = 1; | |
1809 nestcount = 1; | |
1810 if (c == '(') | |
1811 delimright = ')'; | |
1812 else if (c == '{') | |
1813 delimright = '}'; | |
1814 else if (c == '[') | |
1815 delimright = ']'; | |
1816 else if (c == '<') | |
1817 delimright = '>'; | |
1818 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) | |
1819 { | |
1820 // Start of identifier; must be a heredoc | |
1821 Token t2; | |
1822 p--; | |
1823 scan(&t2); // read in heredoc identifier | |
1824 if (t2.value != TOKidentifier) | |
1825 { | |
1826 error("identifier expected for heredoc, not %s", t2.toChars()); | |
1827 delimright = c; | |
1828 } | |
1829 else | |
1830 { | |
1831 hereid = t2.ident; | |
1832 //printf("hereid = '%s'\n", hereid.toChars()); | |
1833 blankrol = 1; | |
1834 } | |
1835 nest = 0; | |
1836 } | |
1837 else | |
1838 { | |
1839 delimright = c; | |
1840 nest = 0; | |
1841 if (isspace(c)) | |
1842 error("delimiter cannot be whitespace"); | |
1843 } | |
1844 } | |
1845 else | |
1846 { | |
1847 if (blankrol) | |
1848 { | |
1849 error("heredoc rest of line should be blank"); | |
1850 blankrol = 0; | |
1851 continue; | |
1852 } | |
1853 if (nest == 1) | |
1854 { | |
1855 if (c == delimleft) | |
1856 nestcount++; | |
1857 else if (c == delimright) | |
1858 { nestcount--; | |
1859 if (nestcount == 0) | |
1860 goto Ldone; | |
1861 } | |
1862 } | |
1863 else if (c == delimright) | |
1864 goto Ldone; | |
1865 if (startline && isalpha(c) && hereid) | |
1866 { | |
1867 Token t2; | |
1868 ubyte* psave = p; | |
1869 p--; | |
1870 scan(&t2); // read in possible heredoc identifier | |
1871 //printf("endid = '%s'\n", t2.ident.toChars()); | |
1872 if (t2.value == TOKidentifier && t2.ident.equals(hereid)) | |
1873 { | |
1874 /* should check that rest of line is blank | |
1875 */ | |
1876 goto Ldone; | |
1877 } | |
1878 p = psave; | |
1879 } | |
1880 stringbuffer.writeUTF8(c); | |
1881 startline = 0; | |
1882 } | |
1883 } | |
1884 | |
1885 Ldone: | |
1886 if (*p == '"') | |
1887 p++; | |
1888 else | |
1889 error("delimited string must end in %c\"", delimright); | |
1890 t.len = stringbuffer.offset; | |
1891 stringbuffer.writeByte(0); | |
1892 void* mem = malloc(stringbuffer.offset); | |
1893 memcpy(mem, stringbuffer.data, stringbuffer.offset); | |
1894 t.ustring = cast(const(char)*)mem; | |
1895 stringPostfix(t); | |
1896 return TOKstring; | |
1897 | |
1898 Lerror: | |
1899 error("unterminated string constant starting at %s", start.toChars()); | |
1900 t.ustring = "".ptr; | |
1901 t.len = 0; | |
1902 t.postfix = 0; | |
1903 return TOKstring; | |
0 | 1904 } |
1905 | |
8
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1906 /************************************** |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1907 * Lex delimited strings: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1908 * q{ foo(xxx) } // " foo(xxx) " |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1909 * q{foo(} // "foo(" |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1910 * q{{foo}"}"} // "{foo}"}"" |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1911 * Input: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1912 * p is on the q |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1913 */ |
0 | 1914 TOK tokenStringConstant(Token* t) |
1915 { | |
8
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1916 uint nest = 1; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1917 Loc start = loc; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1918 ubyte* pstart = ++p; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1919 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1920 while (true) |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1921 { |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1922 Token tok; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1923 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1924 scan(&tok); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1925 switch (tok.value) |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1926 { |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1927 case TOKlcurly: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1928 nest++; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1929 continue; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1930 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1931 case TOKrcurly: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1932 if (--nest == 0) |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1933 goto Ldone; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1934 continue; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1935 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1936 case TOKeof: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1937 goto Lerror; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1938 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1939 default: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1940 continue; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1941 } |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1942 } |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1943 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1944 Ldone: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1945 t.len = p - 1 - pstart; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1946 char* tmp = cast(char*)GC.malloc(t.len + 1); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1947 memcpy(tmp, pstart, t.len); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1948 tmp[t.len] = 0; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1949 t.ustring = tmp; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1950 stringPostfix(t); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1951 return TOKstring; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1952 |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1953 Lerror: |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1954 error("unterminated token string constant starting at %s", start.toChars()); |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1955 t.ustring = "".ptr; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1956 t.len = 0; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1957 t.postfix = 0; |
d42cd5917df4
wysiwyg strings, alias this, templates, TypeSlice implementation
dkoroskin <>
parents:
4
diff
changeset
|
1958 return TOKstring; |
0 | 1959 } |
1960 } | |
1961 TOK escapeStringConstant(Token* t, int wide) | |
1962 { | |
1963 uint c; | |
1964 Loc start = loc; | |
1965 | |
1966 p++; | |
1967 stringbuffer.reset(); | |
1968 while (true) | |
1969 { | |
1970 c = *p++; | |
1971 switch (c) | |
1972 { | |
1973 version (TEXTUAL_ASSEMBLY_OUT) { | |
1974 } else { | |
1975 case '\\': | |
1976 switch (*p) | |
1977 { | |
1978 case 'u': | |
1979 case 'U': | |
1980 case '&': | |
1981 c = escapeSequence(); | |
1982 stringbuffer.writeUTF8(c); | |
1983 continue; | |
1984 | |
1985 default: | |
1986 c = escapeSequence(); | |
1987 break; | |
1988 } | |
1989 break; | |
1990 } | |
1991 case '\n': | |
1992 loc.linnum++; | |
1993 break; | |
1994 | |
1995 case '\r': | |
1996 if (*p == '\n') | |
1997 continue; // ignore | |
1998 c = '\n'; // treat EndOfLine as \n character | |
1999 loc.linnum++; | |
2000 break; | |
2001 | |
2002 case '"': | |
2003 t.len = stringbuffer.offset; | |
2004 stringbuffer.writeByte(0); | |
2 | 2005 char* tmp = cast(char*)GC.malloc(stringbuffer.offset); |
0 | 2006 memcpy(tmp, stringbuffer.data, stringbuffer.offset); |
2007 t.ustring = tmp; | |
2008 stringPostfix(t); | |
2009 return TOK.TOKstring; | |
2010 | |
2011 case 0: | |
2012 case 0x1A: | |
2013 p--; | |
2014 error("unterminated string constant starting at %s", start.toChars()); | |
2015 t.ustring = "".ptr; | |
2016 t.len = 0; | |
2017 t.postfix = 0; | |
2018 return TOK.TOKstring; | |
2019 | |
2020 default: | |
2021 if (c & 0x80) | |
2022 { | |
2023 p--; | |
2024 c = decodeUTF(); | |
2025 if (c == LS || c == PS) | |
2026 { c = '\n'; | |
2027 loc.linnum++; | |
2028 } | |
2029 p++; | |
2030 stringbuffer.writeUTF8(c); | |
2031 continue; | |
2032 } | |
2033 break; | |
2034 } | |
2035 stringbuffer.writeByte(c); | |
2036 } | |
2037 | |
2038 assert(false); | |
2039 } | |
2040 | |
2041 TOK charConstant(Token* t, int wide) | |
2042 { | |
2043 uint c; | |
2044 TOK tk = TOKcharv; | |
2045 | |
2046 //printf("Lexer.charConstant\n"); | |
2047 p++; | |
2048 c = *p++; | |
2049 switch (c) | |
2050 { | |
2051 version (TEXTUAL_ASSEMBLY_OUT) { | |
2052 } else { | |
2053 case '\\': | |
2054 switch (*p) | |
2055 { | |
2056 case 'u': | |
2057 t.uns64value = escapeSequence(); | |
2058 tk = TOKwcharv; | |
2059 break; | |
2060 | |
2061 case 'U': | |
2062 case '&': | |
2063 t.uns64value = escapeSequence(); | |
2064 tk = TOKdcharv; | |
2065 break; | |
2066 | |
2067 default: | |
2068 t.uns64value = escapeSequence(); | |
2069 break; | |
2070 } | |
2071 break; | |
2072 } | |
2073 case '\n': | |
2074 L1: | |
2075 loc.linnum++; | |
2076 case '\r': | |
2077 case 0: | |
2078 case 0x1A: | |
2079 case '\'': | |
2080 error("unterminated character constant"); | |
2081 return tk; | |
2082 | |
2083 default: | |
2084 if (c & 0x80) | |
2085 { | |
2086 p--; | |
2087 c = decodeUTF(); | |
2088 p++; | |
2089 if (c == LS || c == PS) | |
2090 goto L1; | |
2091 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) | |
2092 tk = TOKwcharv; | |
2093 else | |
2094 tk = TOKdcharv; | |
2095 } | |
2096 t.uns64value = c; | |
2097 break; | |
2098 } | |
2099 | |
2100 if (*p != '\'') | |
2101 { | |
2102 error("unterminated character constant"); | |
2103 return tk; | |
2104 } | |
2105 p++; | |
2106 return tk; | |
2107 } | |
2108 | |
2109 /*************************************** | |
2110 * Get postfix of string literal. | |
2111 */ | |
2112 void stringPostfix(Token* t) | |
2113 { | |
2114 switch (*p) | |
2115 { | |
2116 case 'c': | |
2117 case 'w': | |
2118 case 'd': | |
2119 t.postfix = *p; | |
2120 p++; | |
2121 break; | |
2122 | |
2123 default: | |
2124 t.postfix = 0; | |
2125 break; | |
2126 } | |
2127 } | |
2128 | |
2129 uint wchar_(uint u) | |
2130 { | |
2131 assert(false); | |
2132 } | |
2133 | |
2134 /************************************** | |
2135 * Read in a number. | |
2136 * If it's an integer, store it in tok.TKutok.Vlong. | |
2137 * integers can be decimal, octal or hex | |
2138 * Handle the suffixes U, UL, LU, L, etc. | |
2139 * If it's double, store it in tok.TKutok.Vdouble. | |
2140 * Returns: | |
2141 * TKnum | |
2142 * TKdouble,... | |
2143 */ | |
2144 | |
2145 TOK number(Token* t) | |
2146 { | |
2147 // We use a state machine to collect numbers | |
2148 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, | |
2149 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, | |
2150 STATE_hexh, STATE_error }; | |
2151 STATE state; | |
2152 | |
2153 enum FLAGS | |
2154 { | |
2155 FLAGS_undefined = 0, | |
2156 FLAGS_decimal = 1, // decimal | |
2157 FLAGS_unsigned = 2, // u or U suffix | |
2158 FLAGS_long = 4, // l or L suffix | |
2159 }; | |
2160 | |
2161 FLAGS flags = FLAGS.FLAGS_decimal; | |
2162 | |
2163 int i; | |
2164 int base; | |
2165 uint c; | |
2166 ubyte *start; | |
2167 TOK result; | |
2168 | |
2169 //printf("Lexer.number()\n"); | |
2170 state = STATE.STATE_initial; | |
2171 base = 0; | |
2172 stringbuffer.reset(); | |
2173 start = p; | |
2174 while (1) | |
2175 { | |
2176 c = *p; | |
2177 switch (state) | |
2178 { | |
2179 case STATE.STATE_initial: // opening state | |
2180 if (c == '0') | |
2181 state = STATE.STATE_0; | |
2182 else | |
2183 state = STATE.STATE_decimal; | |
2184 break; | |
2185 | |
2186 case STATE.STATE_0: | |
2187 flags = (flags & ~FLAGS.FLAGS_decimal); | |
2188 switch (c) | |
2189 { | |
2190 version (ZEROH) { | |
2191 case 'H': // 0h | |
2192 case 'h': | |
2193 goto hexh; | |
2194 } | |
2195 case 'X': | |
2196 case 'x': | |
2197 state = STATE.STATE_hex0; | |
2198 break; | |
2199 | |
2200 case '.': | |
2201 if (p[1] == '.') // .. is a separate token | |
2202 goto done; | |
2203 case 'i': | |
2204 case 'f': | |
2205 case 'F': | |
2206 goto real_; | |
2207 version (ZEROH) { | |
2208 case 'E': | |
2209 case 'e': | |
2210 goto case_hex; | |
2211 } | |
2212 case 'B': | |
2213 case 'b': | |
2214 state = STATE.STATE_binary0; | |
2215 break; | |
2216 | |
2217 case '0': case '1': case '2': case '3': | |
2218 case '4': case '5': case '6': case '7': | |
2219 state = STATE.STATE_octal; | |
2220 break; | |
2221 | |
2222 version (ZEROH) { | |
2223 case '8': case '9': case 'A': | |
2224 case 'C': case 'D': case 'F': | |
2225 case 'a': case 'c': case 'd': case 'f': | |
2226 case_hex: | |
2227 state = STATE.STATE_hexh; | |
2228 break; | |
2229 } | |
2230 case '_': | |
2231 state = STATE.STATE_octal; | |
2232 p++; | |
2233 continue; | |
2234 | |
2235 case 'L': | |
2236 if (p[1] == 'i') | |
2237 goto real_; | |
2238 goto done; | |
2239 | |
2240 default: | |
2241 goto done; | |
2242 } | |
2243 break; | |
2244 | |
2245 case STATE.STATE_decimal: // reading decimal number | |
2246 if (!isdigit(c)) | |
2247 { | |
2248 version (ZEROH) { | |
2249 if (ishex(c) | |
2250 || c == 'H' || c == 'h' | |
2251 ) | |
2252 goto hexh; | |
2253 } | |
2254 if (c == '_') // ignore embedded _ | |
2255 { p++; | |
2256 continue; | |
2257 } | |
2258 if (c == '.' && p[1] != '.') | |
2259 goto real_; | |
2260 else if (c == 'i' || c == 'f' || c == 'F' || | |
2261 c == 'e' || c == 'E') | |
2262 { | |
2263 real_: // It's a real number. Back up and rescan as a real | |
2264 p = start; | |
2265 return inreal(t); | |
2266 } | |
2267 else if (c == 'L' && p[1] == 'i') | |
2268 goto real_; | |
2269 goto done; | |
2270 } | |
2271 break; | |
2272 | |
2273 case STATE.STATE_hex0: // reading hex number | |
2274 case STATE.STATE_hex: | |
2275 if (! ishex(cast(ubyte)c)) | |
2276 { | |
2277 if (c == '_') // ignore embedded _ | |
2278 { p++; | |
2279 continue; | |
2280 } | |
2281 if (c == '.' && p[1] != '.') | |
2282 goto real_; | |
2283 if (c == 'P' || c == 'p' || c == 'i') | |
2284 goto real_; | |
2285 if (state == STATE.STATE_hex0) | |
2286 error("Hex digit expected, not '%c'", c); | |
2287 goto done; | |
2288 } | |
2289 state = STATE.STATE_hex; | |
2290 break; | |
2291 | |
2292 version (ZEROH) { | |
2293 hexh: | |
2294 state = STATE.STATE_hexh; | |
2295 case STATE.STATE_hexh: // parse numbers like 0FFh | |
2296 if (!ishex(c)) | |
2297 { | |
2298 if (c == 'H' || c == 'h') | |
2299 { | |
2300 p++; | |
2301 base = 16; | |
2302 goto done; | |
2303 } | |
2304 else | |
2305 { | |
2306 // Check for something like 1E3 or 0E24 | |
2307 if (memchr(cast(char*)stringbuffer.data, 'E', stringbuffer.offset) || | |
2308 memchr(cast(char*)stringbuffer.data, 'e', stringbuffer.offset)) | |
2309 goto real_; | |
2310 error("Hex digit expected, not '%c'", c); | |
2311 goto done; | |
2312 } | |
2313 } | |
2314 break; | |
2315 } | |
2316 | |
2317 case STATE.STATE_octal: // reading octal number | |
2318 case STATE.STATE_octale: // reading octal number with non-octal digits | |
2319 if (!isoctal(cast(ubyte)c)) | |
2320 { | |
2321 version (ZEROH) { | |
2322 if (ishex(c) | |
2323 || c == 'H' || c == 'h' | |
2324 ) | |
2325 goto hexh; | |
2326 } | |
2327 if (c == '_') // ignore embedded _ | |
2328 { p++; | |
2329 continue; | |
2330 } | |
2331 if (c == '.' && p[1] != '.') | |
2332 goto real_; | |
2333 if (c == 'i') | |
2334 goto real_; | |
2335 if (isdigit(c)) | |
2336 { | |
2337 state = STATE.STATE_octale; | |
2338 } | |
2339 else | |
2340 goto done; | |
2341 } | |
2342 break; | |
2343 | |
2344 case STATE.STATE_binary0: // starting binary number | |
2345 case STATE.STATE_binary: // reading binary number | |
2346 if (c != '0' && c != '1') | |
2347 { | |
2348 version (ZEROH) { | |
2349 if (ishex(c) | |
2350 || c == 'H' || c == 'h' | |
2351 ) | |
2352 goto hexh; | |
2353 } | |
2354 if (c == '_') // ignore embedded _ | |
2355 { p++; | |
2356 continue; | |
2357 } | |
2358 if (state == STATE.STATE_binary0) | |
2359 { error("binary digit expected"); | |
2360 state = STATE.STATE_error; | |
2361 break; | |
2362 } | |
2363 else | |
2364 goto done; | |
2365 } | |
2366 state = STATE.STATE_binary; | |
2367 break; | |
2368 | |
2369 case STATE.STATE_error: // for error recovery | |
2370 if (!isdigit(c)) // scan until non-digit | |
2371 goto done; | |
2372 break; | |
2373 | |
2374 default: | |
2375 assert(0); | |
2376 } | |
2377 stringbuffer.writeByte(c); | |
2378 p++; | |
2379 } | |
2380 done: | |
2381 stringbuffer.writeByte(0); // terminate string | |
2382 if (state == STATE.STATE_octale) | |
2383 error("Octal digit expected"); | |
2384 | |
2385 ulong n; // unsigned >=64 bit integer type | |
2386 | |
2387 if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0)) | |
2388 n = stringbuffer.data[0] - '0'; | |
2389 else | |
2390 { | |
2391 // Convert string to integer | |
2392 version (__DMC__) { | |
2393 errno = 0; | |
2394 n = strtoull(cast(char*)stringbuffer.data,null,base); | |
2395 if (errno == ERANGE) | |
2396 error("integer overflow"); | |
2397 } else { | |
2398 // Not everybody implements strtoull() | |
2399 char* p = cast(char*)stringbuffer.data; | |
2400 int r = 10, d; | |
2401 | |
2402 if (*p == '0') | |
2403 { | |
2404 if (p[1] == 'x' || p[1] == 'X') | |
2405 p += 2, r = 16; | |
2406 else if (p[1] == 'b' || p[1] == 'B') | |
2407 p += 2, r = 2; | |
2408 else if (isdigit(p[1])) | |
2409 p += 1, r = 8; | |
2410 } | |
2411 | |
2412 n = 0; | |
2413 while (1) | |
2414 { | |
2415 if (*p >= '0' && *p <= '9') | |
2416 d = *p - '0'; | |
2417 else if (*p >= 'a' && *p <= 'z') | |
2418 d = *p - 'a' + 10; | |
2419 else if (*p >= 'A' && *p <= 'Z') | |
2420 d = *p - 'A' + 10; | |
2421 else | |
2422 break; | |
2423 if (d >= r) | |
2424 break; | |
2425 ulong n2 = n * r; | |
2426 //printf("n2 / r = %llx, n = %llx\n", n2/r, n); | |
2427 if (n2 / r != n || n2 + d < n) | |
2428 { | |
2429 error ("integer overflow"); | |
2430 break; | |
2431 } | |
2432 | |
2433 n = n2 + d; | |
2434 p++; | |
2435 } | |
2436 } | |
2437 if (n.sizeof > 8 && | |
2438 n > 0xFFFFFFFFFFFFFFFF) // if n needs more than 64 bits | |
2439 error("integer overflow"); | |
2440 } | |
2441 | |
2442 // Parse trailing 'u', 'U', 'l' or 'L' in any combination | |
2443 while (1) | |
2444 { FLAGS f; | |
2445 | |
2446 switch (*p) | |
2447 { case 'U': | |
2448 case 'u': | |
2449 f = FLAGS.FLAGS_unsigned; | |
2450 goto L1; | |
2451 | |
2452 case 'l': | |
2453 if (1 || !global.params.useDeprecated) | |
2454 error("'l' suffix is deprecated, use 'L' instead"); | |
2455 case 'L': | |
2456 f = FLAGS.FLAGS_long; | |
2457 L1: | |
2458 p++; | |
2459 if (flags & f) | |
2460 error("unrecognized token"); | |
2461 flags = (flags | f); | |
2462 continue; | |
2463 default: | |
2464 break; | |
2465 } | |
2466 break; | |
2467 } | |
2468 | |
2469 switch (flags) | |
2470 { | |
2471 case FLAGS.FLAGS_undefined: | |
2472 /* Octal or Hexadecimal constant. | |
2473 * First that fits: int, uint, long, ulong | |
2474 */ | |
2475 if (n & 0x8000000000000000) | |
2476 result = TOK.TOKuns64v; | |
2477 else if (n & 0xFFFFFFFF00000000) | |
2478 result = TOK.TOKint64v; | |
2479 else if (n & 0x80000000) | |
2480 result = TOK.TOKuns32v; | |
2481 else | |
2482 result = TOK.TOKint32v; | |
2483 break; | |
2484 | |
2485 case FLAGS.FLAGS_decimal: | |
2486 /* First that fits: int, long, long long | |
2487 */ | |
2488 if (n & 0x8000000000000000) | |
2489 { error("signed integer overflow"); | |
2490 result = TOK.TOKuns64v; | |
2491 } | |
2492 else if (n & 0xFFFFFFFF80000000) | |
2493 result = TOK.TOKint64v; | |
2494 else | |
2495 result = TOK.TOKint32v; | |
2496 break; | |
2497 | |
2498 case FLAGS.FLAGS_unsigned: | |
2499 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned: | |
2500 /* First that fits: uint, ulong | |
2501 */ | |
2502 if (n & 0xFFFFFFFF00000000) | |
2503 result = TOK.TOKuns64v; | |
2504 else | |
2505 result = TOK.TOKuns32v; | |
2506 break; | |
2507 | |
2508 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long: | |
2509 if (n & 0x8000000000000000) | |
2510 { error("signed integer overflow"); | |
2511 result = TOK.TOKuns64v; | |
2512 } | |
2513 else | |
2514 result = TOK.TOKint64v; | |
2515 break; | |
2516 | |
2517 case FLAGS.FLAGS_long: | |
2518 if (n & 0x8000000000000000) | |
2519 result = TOK.TOKuns64v; | |
2520 else | |
2521 result = TOK.TOKint64v; | |
2522 break; | |
2523 | |
2524 case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: | |
2525 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: | |
2526 result = TOK.TOKuns64v; | |
2527 break; | |
2528 | |
2529 default: | |
2530 debug { | |
2531 printf("%x\n",flags); | |
2532 } | |
2533 assert(0); | |
2534 } | |
2535 t.uns64value = n; | |
2536 return result; | |
2537 } | |
2538 | |
2539 /************************************** | |
2540 * Read in characters, converting them to real. | |
2541 * Bugs: | |
2542 * Exponent overflow not detected. | |
2543 * Too much requested precision is not detected. | |
2544 */ | |
2545 TOK inreal(Token* t) | |
2546 in | |
2547 { | |
2548 assert(*p == '.' || isdigit(*p)); | |
2549 } | |
2550 out (result) | |
2551 { | |
2552 switch (result) | |
2553 { | |
2554 case TOKfloat32v: | |
2555 case TOKfloat64v: | |
2556 case TOKfloat80v: | |
2557 case TOKimaginary32v: | |
2558 case TOKimaginary64v: | |
2559 case TOKimaginary80v: | |
2560 break; | |
2561 | |
2562 default: | |
2563 assert(0); | |
2564 } | |
2565 } | |
2566 body | |
2567 { | |
2568 int dblstate; | |
2569 uint c; | |
2570 char hex; // is this a hexadecimal-floating-constant? | |
2571 TOK result; | |
2572 | |
2573 //printf("Lexer.inreal()\n"); | |
2574 stringbuffer.reset(); | |
2575 dblstate = 0; | |
2576 hex = 0; | |
2577 Lnext: | |
2578 while (true) | |
2579 { | |
2580 // Get next char from input | |
2581 c = *p++; | |
2582 //printf("dblstate = %d, c = '%c'\n", dblstate, c); | |
2583 while (true) | |
2584 { | |
2585 switch (dblstate) | |
2586 { | |
2587 case 0: // opening state | |
2588 if (c == '0') | |
2589 dblstate = 9; | |
2590 else if (c == '.') | |
2591 dblstate = 3; | |
2592 else | |
2593 dblstate = 1; | |
2594 break; | |
2595 | |
2596 case 9: | |
2597 dblstate = 1; | |
2598 if (c == 'X' || c == 'x') | |
2599 { | |
2600 hex++; | |
2601 break; | |
2602 } | |
2603 case 1: // digits to left of . | |
2604 case 3: // digits to right of . | |
2605 case 7: // continuing exponent digits | |
2606 if (!isdigit(c) && !(hex && isxdigit(c))) | |
2607 { | |
2608 if (c == '_') | |
2609 goto Lnext; // ignore embedded '_' | |
2610 dblstate++; | |
2611 continue; | |
2612 } | |
2613 break; | |
2614 | |
2615 case 2: // no more digits to left of . | |
2616 if (c == '.') | |
2617 { | |
2618 dblstate++; | |
2619 break; | |
2620 } | |
2621 case 4: // no more digits to right of . | |
2622 if ((c == 'E' || c == 'e') || | |
2623 hex && (c == 'P' || c == 'p')) | |
2624 { | |
2625 dblstate = 5; | |
2626 hex = 0; // exponent is always decimal | |
2627 break; | |
2628 } | |
2629 if (hex) | |
2630 error("binary-exponent-part required"); | |
2631 goto done; | |
2632 | |
2633 case 5: // looking immediately to right of E | |
2634 dblstate++; | |
2635 if (c == '-' || c == '+') | |
2636 break; | |
2637 case 6: // 1st exponent digit expected | |
2638 if (!isdigit(c)) | |
2639 error("exponent expected"); | |
2640 dblstate++; | |
2641 break; | |
2642 | |
2643 case 8: // past end of exponent digits | |
2644 goto done; | |
79 | 2645 |
2646 default: | |
2647 assert(0, "inreal.dblstate has unexpected value"); | |
0 | 2648 } |
2649 break; | |
2650 } | |
2651 stringbuffer.writeByte(c); | |
2652 } | |
2653 done: | |
2654 p--; | |
2655 | |
2656 stringbuffer.writeByte(0); | |
2657 | |
114 | 2658 version (Windows) { /// && __DMC__ |
0 | 2659 char* save = __locale_decpoint; |
2660 __locale_decpoint = cast(char*)".".ptr; | |
2661 } | |
2662 t.float80value = strtold(cast(char*)stringbuffer.data, null); | |
2663 | |
2664 errno = 0; | |
2665 switch (*p) | |
2666 { | |
2667 case 'F': | |
2668 case 'f': | |
2669 strtof(cast(char*)stringbuffer.data, null); | |
2670 result = TOKfloat32v; | |
2671 p++; | |
2672 break; | |
2673 | |
2674 default: | |
2675 strtod(cast(char*)stringbuffer.data, null); | |
2676 result = TOKfloat64v; | |
2677 break; | |
2678 | |
2679 case 'l': | |
2680 if (!global.params.useDeprecated) | |
2681 error("'l' suffix is deprecated, use 'L' instead"); | |
2682 case 'L': | |
2683 result = TOKfloat80v; | |
2684 p++; | |
2685 break; | |
2686 } | |
2687 if (*p == 'i' || *p == 'I') | |
2688 { | |
2689 if (!global.params.useDeprecated && *p == 'I') | |
2690 error("'I' suffix is deprecated, use 'i' instead"); | |
2691 p++; | |
2692 switch (result) | |
2693 { | |
2694 case TOKfloat32v: | |
2695 result = TOKimaginary32v; | |
2696 break; | |
2697 case TOKfloat64v: | |
2698 result = TOKimaginary64v; | |
2699 break; | |
2700 case TOKfloat80v: | |
2701 result = TOKimaginary80v; | |
2702 break; | |
2703 } | |
2704 } | |
2705 | |
114 | 2706 version (Windows) { ///&& __DMC__ |
0 | 2707 __locale_decpoint = save; |
2708 } | |
2709 if (errno == ERANGE) | |
2710 error("number is not representable"); | |
2711 | |
2712 return result; | |
2713 } | |
2714 | |
2715 void error(T...)(string format, T t) | |
2716 { | |
2717 error(this.loc, format, t); | |
2718 } | |
2719 | |
2720 void error(T...)(Loc loc, string format, T t) | |
2721 { | |
2722 if (mod && !global.gag) | |
2723 { | |
2724 string p = loc.toChars(); | |
2725 if (p.length != 0) | |
2726 writef("%s: ", p); | |
2727 | |
2728 writefln(format, t); | |
2729 | |
2730 if (global.errors >= 20) // moderate blizzard of cascading messages | |
2731 fatal(); | |
2732 } | |
2733 | |
2734 global.errors++; | |
2735 } | |
2736 | |
2737 void pragma_() | |
2738 { | |
2739 assert(false); | |
2740 } | |
2741 | |
49 | 2742 /******************************************** |
2743 * Decode UTF character. | |
2744 * Issue error messages for invalid sequences. | |
2745 * Return decoded character, advance p to last character in UTF sequence. | |
2746 */ | |
0 | 2747 uint decodeUTF() |
2748 { | |
49 | 2749 dchar u; |
2750 ubyte c; | |
2751 ubyte* s = p; | |
2752 size_t len; | |
2753 size_t idx; | |
2754 string msg; | |
2755 | |
2756 c = *s; | |
2757 assert(c & 0x80); | |
2758 | |
2759 // Check length of remaining string up to 6 UTF-8 characters | |
2760 for (len = 1; len < 6 && s[len]; len++) { | |
2761 ; | |
2762 } | |
2763 | |
2764 idx = 0; | |
2765 msg = utf_decodeChar(cast(string)s[0..len], &idx, &u); | |
2766 p += idx - 1; | |
2767 if (msg) | |
2768 { | |
2769 error("%s", msg); | |
2770 } | |
2771 return u; | |
0 | 2772 } |
2773 | |
2774 void getDocComment(Token* t, uint lineComment) | |
2775 { | |
2776 assert(false); | |
2777 } | |
2778 | |
2779 static bool isValidIdentifier(string p) | |
2780 { | |
2781 if (p.length == 0) { | |
2782 return false; | |
2783 } | |
2784 | |
2785 if (p[0] >= '0' && p[0] <= '9') { // beware of isdigit() on signed chars | |
2786 return false; | |
2787 } | |
2788 | |
2789 size_t idx = 0; | |
2790 while (idx < p.length) | |
2791 { | |
2792 dchar dc; | |
2793 | |
2794 if (utf_decodeChar(p, &idx, &dc) !is null) { | |
2795 return false; | |
2796 } | |
2797 | |
2798 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) { | |
2799 return false; | |
2800 } | |
2801 } | |
2802 | |
2803 return true; | |
2804 } | |
2805 | |
79 | 2806 /// TODO: use normal string append when GC works |
2807 static string combineComments(const(char)[] c1, const(char)[] c2) | |
0 | 2808 { |
79 | 2809 //writef("Lexer.combineComments('%s', '%s')\n", c1, c2); |
0 | 2810 |
79 | 2811 char[] c = cast(char[]) c2; |
2812 | |
2813 if (c1 !is null) | |
0 | 2814 { |
79 | 2815 c = cast(char[]) c1; |
2816 if (c2 !is null) | |
0 | 2817 { |
79 | 2818 c = cast(char[]) (GC.malloc(c1.length + 1 + c2.length)[0 .. c1.length + 1 + c2.length]); |
2819 size_t len1 = c1.length; | |
2820 c[0..len1] = c1[]; | |
2821 c[len1++] = '\n'; | |
2822 c[len1 .. len1 + c2.length] = c2[]; | |
0 | 2823 } |
2824 } | |
79 | 2825 |
2826 return cast(string)c; | |
0 | 2827 } |
2828 } |