0
|
1 module dmd.Lexer;
|
|
2
|
|
3 import dmd.StringTable;
|
|
4 import dmd.OutBuffer;
|
|
5 import dmd.Token;
|
|
6 import dmd.Loc;
|
|
7 import dmd.Module;
|
|
8 import dmd.Identifier;
|
|
9 import dmd.TOK;
|
|
10 import dmd.Keyword;
|
|
11 import dmd.StringValue;
|
|
12 import dmd.Global;
|
|
13 import dmd.Util;
|
|
14 import dmd.Id;
|
|
15 import dmd.Dchar;
|
|
16 import dmd.Utf;
|
|
17
|
|
18 import std.stdio : writeln;
|
|
19
|
|
20 import core.stdc.ctype;
|
|
21 import core.stdc.stdlib;
|
|
22 import core.stdc.string;
|
|
23 import core.stdc.stdio;
|
|
24 import core.stdc.time;
|
|
25 import core.stdc.errno;
|
|
26
|
|
27 enum LS = 0x2028; // UTF line separator
|
|
28 enum PS = 0x2029; // UTF paragraph separator
|
|
29
|
|
30 extern (C) extern
|
|
31 {
|
|
32 __gshared char* __locale_decpoint;
|
|
33 }
|
|
34
|
|
35 int isUniAlpha(uint u)
|
|
36 {
|
|
37 assert(false);
|
|
38 }
|
|
39
|
|
40 class Lexer
|
|
41 {
|
|
42 static StringTable stringtable;
|
|
43 static OutBuffer stringbuffer;
|
|
44 static Token* freelist;
|
|
45
|
|
46 Loc loc; // for error messages
|
|
47
|
|
48 ubyte* base; // pointer to start of buffer
|
|
49 ubyte* end; // past end of buffer
|
|
50 ubyte* p; // current character
|
|
51 Token token;
|
|
52 Module mod;
|
|
53 int doDocComment; // collect doc comment information
|
|
54 int anyToken; // !=0 means seen at least one token
|
|
55 int commentToken; // !=0 means comments are TOKcomment's
|
|
56
|
|
57 static this()
|
|
58 {
|
|
59 stringtable = new StringTable();
|
|
60 stringbuffer = new OutBuffer();
|
|
61 }
|
|
62
|
|
63 static ~this()
|
|
64 {
|
|
65 delete stringtable;
|
|
66 }
|
|
67
|
|
68 this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)
|
|
69 {
|
|
70 loc = Loc(mod, 1);
|
|
71
|
|
72 memset(&token,0,token.sizeof);
|
|
73 this.base = base;
|
|
74 this.end = base + endoffset;
|
|
75 p = base + begoffset;
|
|
76 this.mod = mod;
|
|
77 this.doDocComment = doDocComment;
|
|
78 this.anyToken = 0;
|
|
79 this.commentToken = commentToken;
|
|
80 //initKeywords();
|
|
81
|
|
82 /* If first line starts with '#!', ignore the line
|
|
83 */
|
|
84
|
|
85 if (p[0] == '#' && p[1] =='!')
|
|
86 {
|
|
87 p += 2;
|
|
88 while (1)
|
|
89 {
|
|
90 ubyte c = *p;
|
|
91 switch (c)
|
|
92 {
|
|
93 case '\n':
|
|
94 p++;
|
|
95 break;
|
|
96
|
|
97 case '\r':
|
|
98 p++;
|
|
99 if (*p == '\n')
|
|
100 p++;
|
|
101 break;
|
|
102
|
|
103 case 0:
|
|
104 case 0x1A:
|
|
105 break;
|
|
106
|
|
107 default:
|
|
108 if (c & 0x80)
|
|
109 {
|
|
110 uint u = decodeUTF();
|
|
111 if (u == PS || u == LS)
|
|
112 break;
|
|
113 }
|
|
114 p++;
|
|
115 continue;
|
|
116 }
|
|
117 break;
|
|
118 }
|
|
119 loc.linnum = 2;
|
|
120 }
|
|
121 }
|
|
122
|
|
123 version (DMDV2) {
|
|
124 static Keyword[] keywords =
|
|
125 [
|
|
126 // { "", TOK },
|
|
127
|
|
128 { "this", TOK.TOKthis },
|
|
129 { "super", TOK.TOKsuper },
|
|
130 { "assert", TOK.TOKassert },
|
|
131 { "null", TOK.TOKnull },
|
|
132 { "true", TOK.TOKtrue },
|
|
133 { "false", TOK.TOKfalse },
|
|
134 { "cast", TOK.TOKcast },
|
|
135 { "new", TOK.TOKnew },
|
|
136 { "delete", TOK.TOKdelete },
|
|
137 { "throw", TOK.TOKthrow },
|
|
138 { "module", TOK.TOKmodule },
|
|
139 { "pragma", TOK.TOKpragma },
|
|
140 { "typeof", TOK.TOKtypeof },
|
|
141 { "typeid", TOK.TOKtypeid },
|
|
142
|
|
143 { "template", TOK.TOKtemplate },
|
|
144
|
|
145 { "void", TOK.TOKvoid },
|
|
146 { "byte", TOK.TOKint8 },
|
|
147 { "ubyte", TOK.TOKuns8 },
|
|
148 { "short", TOK.TOKint16 },
|
|
149 { "ushort", TOK.TOKuns16 },
|
|
150 { "int", TOK.TOKint32 },
|
|
151 { "uint", TOK.TOKuns32 },
|
|
152 { "long", TOK.TOKint64 },
|
|
153 { "ulong", TOK.TOKuns64 },
|
|
154 { "cent", TOK.TOKcent, },
|
|
155 { "ucent", TOK.TOKucent, },
|
|
156 { "float", TOK.TOKfloat32 },
|
|
157 { "double", TOK.TOKfloat64 },
|
|
158 { "real", TOK.TOKfloat80 },
|
|
159
|
|
160 { "bool", TOK.TOKbool },
|
|
161 { "char", TOK.TOKchar },
|
|
162 { "wchar", TOK.TOKwchar },
|
|
163 { "dchar", TOK.TOKdchar },
|
|
164
|
|
165 { "ifloat", TOK.TOKimaginary32 },
|
|
166 { "idouble", TOK.TOKimaginary64 },
|
|
167 { "ireal", TOK.TOKimaginary80 },
|
|
168
|
|
169 { "cfloat", TOK.TOKcomplex32 },
|
|
170 { "cdouble", TOK.TOKcomplex64 },
|
|
171 { "creal", TOK.TOKcomplex80 },
|
|
172
|
|
173 { "delegate", TOK.TOKdelegate },
|
|
174 { "function", TOK.TOKfunction },
|
|
175
|
|
176 { "is", TOK.TOKis },
|
|
177 { "if", TOK.TOKif },
|
|
178 { "else", TOK.TOKelse },
|
|
179 { "while", TOK.TOKwhile },
|
|
180 { "for", TOK.TOKfor },
|
|
181 { "do", TOK.TOKdo },
|
|
182 { "switch", TOK.TOKswitch },
|
|
183 { "case", TOK.TOKcase },
|
|
184 { "default", TOK.TOKdefault },
|
|
185 { "break", TOK.TOKbreak },
|
|
186 { "continue", TOK.TOKcontinue },
|
|
187 { "synchronized", TOK.TOKsynchronized },
|
|
188 { "return", TOK.TOKreturn },
|
|
189 { "goto", TOK.TOKgoto },
|
|
190 { "try", TOK.TOKtry },
|
|
191 { "catch", TOK.TOKcatch },
|
|
192 { "finally", TOK.TOKfinally },
|
|
193 { "with", TOK.TOKwith },
|
|
194 { "asm", TOK.TOKasm },
|
|
195 { "foreach", TOK.TOKforeach },
|
|
196 { "foreach_reverse", TOK.TOKforeach_reverse },
|
|
197 { "scope", TOK.TOKscope },
|
|
198
|
|
199 { "struct", TOK.TOKstruct },
|
|
200 { "class", TOK.TOKclass },
|
|
201 { "interface", TOK.TOKinterface },
|
|
202 { "union", TOK.TOKunion },
|
|
203 { "enum", TOK.TOKenum },
|
|
204 { "import", TOK.TOKimport },
|
|
205 { "mixin", TOK.TOKmixin },
|
|
206 { "static", TOK.TOKstatic },
|
|
207 { "final", TOK.TOKfinal },
|
|
208 { "const", TOK.TOKconst },
|
|
209 { "typedef", TOK.TOKtypedef },
|
|
210 { "alias", TOK.TOKalias },
|
|
211 { "override", TOK.TOKoverride },
|
|
212 { "abstract", TOK.TOKabstract },
|
|
213 { "volatile", TOK.TOKvolatile },
|
|
214 { "debug", TOK.TOKdebug },
|
|
215 { "deprecated", TOK.TOKdeprecated },
|
|
216 { "in", TOK.TOKin },
|
|
217 { "out", TOK.TOKout },
|
|
218 { "inout", TOK.TOKinout },
|
|
219 { "lazy", TOK.TOKlazy },
|
|
220 { "auto", TOK.TOKauto },
|
|
221
|
|
222 { "align", TOK.TOKalign },
|
|
223 { "extern", TOK.TOKextern },
|
|
224 { "private", TOK.TOKprivate },
|
|
225 { "package", TOK.TOKpackage },
|
|
226 { "protected", TOK.TOKprotected },
|
|
227 { "public", TOK.TOKpublic },
|
|
228 { "export", TOK.TOKexport },
|
|
229
|
|
230 { "body", TOK.TOKbody },
|
|
231 { "invariant", TOK.TOKinvariant },
|
|
232 { "unittest", TOK.TOKunittest },
|
|
233 { "version", TOK.TOKversion },
|
|
234 //{ "manifest", TOK.TOKmanifest },
|
|
235
|
|
236 // Added after 1.0
|
|
237 { "ref", TOK.TOKref },
|
|
238 { "macro", TOK.TOKmacro },
|
|
239 { "pure", TOK.TOKpure },
|
|
240 { "nothrow", TOK.TOKnothrow },
|
|
241 { "__thread", TOK.TOKtls },
|
|
242 { "__gshared", TOK.TOKgshared },
|
|
243 { "__traits", TOK.TOKtraits },
|
|
244 { "__overloadset", TOK.TOKoverloadset },
|
|
245 { "__FILE__", TOK.TOKfile },
|
|
246 { "__LINE__", TOK.TOKline },
|
|
247 { "shared", TOK.TOKshared },
|
|
248 { "immutable", TOK.TOKimmutable },
|
|
249 ];
|
|
250 } else {
|
|
251 static Keyword[] keywords =
|
|
252 [
|
|
253 // { "", TOK },
|
|
254
|
|
255 { "this", TOK.TOKthis },
|
|
256 { "super", TOK.TOKsuper },
|
|
257 { "assert", TOK.TOKassert },
|
|
258 { "null", TOK.TOKnull },
|
|
259 { "true", TOK.TOKtrue },
|
|
260 { "false", TOK.TOKfalse },
|
|
261 { "cast", TOK.TOKcast },
|
|
262 { "new", TOK.TOKnew },
|
|
263 { "delete", TOK.TOKdelete },
|
|
264 { "throw", TOK.TOKthrow },
|
|
265 { "module", TOK.TOKmodule },
|
|
266 { "pragma", TOK.TOKpragma },
|
|
267 { "typeof", TOK.TOKtypeof },
|
|
268 { "typeid", TOK.TOKtypeid },
|
|
269
|
|
270 { "template", TOK.TOKtemplate },
|
|
271
|
|
272 { "void", TOK.TOKvoid },
|
|
273 { "byte", TOK.TOKint8 },
|
|
274 { "ubyte", TOK.TOKuns8 },
|
|
275 { "short", TOK.TOKint16 },
|
|
276 { "ushort", TOK.TOKuns16 },
|
|
277 { "int", TOK.TOKint32 },
|
|
278 { "uint", TOK.TOKuns32 },
|
|
279 { "long", TOK.TOKint64 },
|
|
280 { "ulong", TOK.TOKuns64 },
|
|
281 { "cent", TOK.TOKcent, },
|
|
282 { "ucent", TOK.TOKucent, },
|
|
283 { "float", TOK.TOKfloat32 },
|
|
284 { "double", TOK.TOKfloat64 },
|
|
285 { "real", TOK.TOKfloat80 },
|
|
286
|
|
287 { "bool", TOK.TOKbool },
|
|
288 { "char", TOK.TOKchar },
|
|
289 { "wchar", TOK.TOKwchar },
|
|
290 { "dchar", TOK.TOKdchar },
|
|
291
|
|
292 { "ifloat", TOK.TOKimaginary32 },
|
|
293 { "idouble", TOK.TOKimaginary64 },
|
|
294 { "ireal", TOK.TOKimaginary80 },
|
|
295
|
|
296 { "cfloat", TOK.TOKcomplex32 },
|
|
297 { "cdouble", TOK.TOKcomplex64 },
|
|
298 { "creal", TOK.TOKcomplex80 },
|
|
299
|
|
300 { "delegate", TOK.TOKdelegate },
|
|
301 { "function", TOK.TOKfunction },
|
|
302
|
|
303 { "is", TOK.TOKis },
|
|
304 { "if", TOK.TOKif },
|
|
305 { "else", TOK.TOKelse },
|
|
306 { "while", TOK.TOKwhile },
|
|
307 { "for", TOK.TOKfor },
|
|
308 { "do", TOK.TOKdo },
|
|
309 { "switch", TOK.TOKswitch },
|
|
310 { "case", TOK.TOKcase },
|
|
311 { "default", TOK.TOKdefault },
|
|
312 { "break", TOK.TOKbreak },
|
|
313 { "continue", TOK.TOKcontinue },
|
|
314 { "synchronized", TOK.TOKsynchronized },
|
|
315 { "return", TOK.TOKreturn },
|
|
316 { "goto", TOK.TOKgoto },
|
|
317 { "try", TOK.TOKtry },
|
|
318 { "catch", TOK.TOKcatch },
|
|
319 { "finally", TOK.TOKfinally },
|
|
320 { "with", TOK.TOKwith },
|
|
321 { "asm", TOK.TOKasm },
|
|
322 { "foreach", TOK.TOKforeach },
|
|
323 { "foreach_reverse", TOK.TOKforeach_reverse },
|
|
324 { "scope", TOK.TOKscope },
|
|
325
|
|
326 { "struct", TOK.TOKstruct },
|
|
327 { "class", TOK.TOKclass },
|
|
328 { "interface", TOK.TOKinterface },
|
|
329 { "union", TOK.TOKunion },
|
|
330 { "enum", TOK.TOKenum },
|
|
331 { "import", TOK.TOKimport },
|
|
332 { "mixin", TOK.TOKmixin },
|
|
333 { "static", TOK.TOKstatic },
|
|
334 { "final", TOK.TOKfinal },
|
|
335 { "const", TOK.TOKconst },
|
|
336 { "typedef", TOK.TOKtypedef },
|
|
337 { "alias", TOK.TOKalias },
|
|
338 { "override", TOK.TOKoverride },
|
|
339 { "abstract", TOK.TOKabstract },
|
|
340 { "volatile", TOK.TOKvolatile },
|
|
341 { "debug", TOK.TOKdebug },
|
|
342 { "deprecated", TOK.TOKdeprecated },
|
|
343 { "in", TOK.TOKin },
|
|
344 { "out", TOK.TOKout },
|
|
345 { "inout", TOK.TOKinout },
|
|
346 { "lazy", TOK.TOKlazy },
|
|
347 { "auto", TOK.TOKauto },
|
|
348
|
|
349 { "align", TOK.TOKalign },
|
|
350 { "extern", TOK.TOKextern },
|
|
351 { "private", TOK.TOKprivate },
|
|
352 { "package", TOK.TOKpackage },
|
|
353 { "protected", TOK.TOKprotected },
|
|
354 { "public", TOK.TOKpublic },
|
|
355 { "export", TOK.TOKexport },
|
|
356
|
|
357 { "body", TOK.TOKbody },
|
|
358 { "invariant", TOK.TOKinvariant },
|
|
359 { "unittest", TOK.TOKunittest },
|
|
360 { "version", TOK.TOKversion },
|
|
361 //{ "manifest", TOK.TOKmanifest },
|
|
362
|
|
363 // Added after 1.0
|
|
364 { "ref", TOK.TOKref },
|
|
365 { "macro", TOK.TOKmacro },
|
|
366 ];
|
|
367 }
|
|
368
|
|
369 static ubyte cmtable[256];
|
|
370 enum CMoctal = 0x1;
|
|
371 enum CMhex = 0x2;
|
|
372 enum CMidchar = 0x4;
|
|
373
|
|
374 ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; }
|
|
375 ubyte ishex (ubyte c) { return cmtable[c] & CMhex; }
|
|
376 ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; }
|
|
377
|
|
378 static void cmtable_init()
|
|
379 {
|
|
380 for (uint c = 0; c < cmtable.length; c++)
|
|
381 {
|
|
382 if ('0' <= c && c <= '7')
|
|
383 cmtable[c] |= CMoctal;
|
|
384 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
|
|
385 cmtable[c] |= CMhex;
|
|
386 if (isalnum(c) || c == '_')
|
|
387 cmtable[c] |= CMidchar;
|
|
388 }
|
|
389 }
|
|
390
|
|
391 static void initKeywords()
|
|
392 {
|
|
393 uint nkeywords = keywords.length;
|
|
394
|
|
395 if (global.params.Dversion == 1)
|
|
396 nkeywords -= 2;
|
|
397
|
|
398 cmtable_init();
|
|
399
|
|
400 for (uint u = 0; u < nkeywords; u++)
|
|
401 {
|
|
402 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
|
|
403 string s = keywords[u].name;
|
|
404 TOK v = keywords[u].value;
|
|
405 StringValue* sv = stringtable.insert(s);
|
|
406 sv.ptrvalue = cast(void*) new Identifier(sv.lstring.string_, v);
|
|
407
|
|
408 //printf("tochars[%d] = '%s'\n",v, s);
|
|
409 Token.tochars[v] = s;
|
|
410 }
|
|
411
|
|
412 Token.tochars[TOK.TOKeof] = "EOF";
|
|
413 Token.tochars[TOK.TOKlcurly] = "{";
|
|
414 Token.tochars[TOK.TOKrcurly] = "}";
|
|
415 Token.tochars[TOK.TOKlparen] = "(";
|
|
416 Token.tochars[TOK.TOKrparen] = ")";
|
|
417 Token.tochars[TOK.TOKlbracket] = "[";
|
|
418 Token.tochars[TOK.TOKrbracket] = "]";
|
|
419 Token.tochars[TOK.TOKsemicolon] = ";";
|
|
420 Token.tochars[TOK.TOKcolon] = ":";
|
|
421 Token.tochars[TOK.TOKcomma] = ",";
|
|
422 Token.tochars[TOK.TOKdot] = ".";
|
|
423 Token.tochars[TOK.TOKxor] = "^";
|
|
424 Token.tochars[TOK.TOKxorass] = "^=";
|
|
425 Token.tochars[TOK.TOKassign] = "=";
|
|
426 Token.tochars[TOK.TOKconstruct] = "=";
|
|
427 version (DMDV2) {
|
|
428 Token.tochars[TOK.TOKblit] = "=";
|
|
429 }
|
|
430 Token.tochars[TOK.TOKlt] = "<";
|
|
431 Token.tochars[TOK.TOKgt] = ">";
|
|
432 Token.tochars[TOK.TOKle] = "<=";
|
|
433 Token.tochars[TOK.TOKge] = ">=";
|
|
434 Token.tochars[TOK.TOKequal] = "==";
|
|
435 Token.tochars[TOK.TOKnotequal] = "!=";
|
|
436 Token.tochars[TOK.TOKnotidentity] = "!is";
|
|
437 Token.tochars[TOK.TOKtobool] = "!!";
|
|
438
|
|
439 Token.tochars[TOK.TOKunord] = "!<>=";
|
|
440 Token.tochars[TOK.TOKue] = "!<>";
|
|
441 Token.tochars[TOK.TOKlg] = "<>";
|
|
442 Token.tochars[TOK.TOKleg] = "<>=";
|
|
443 Token.tochars[TOK.TOKule] = "!>";
|
|
444 Token.tochars[TOK.TOKul] = "!>=";
|
|
445 Token.tochars[TOK.TOKuge] = "!<";
|
|
446 Token.tochars[TOK.TOKug] = "!<=";
|
|
447
|
|
448 Token.tochars[TOK.TOKnot] = "!";
|
|
449 Token.tochars[TOK.TOKtobool] = "!!";
|
|
450 Token.tochars[TOK.TOKshl] = "<<";
|
|
451 Token.tochars[TOK.TOKshr] = ">>";
|
|
452 Token.tochars[TOK.TOKushr] = ">>>";
|
|
453 Token.tochars[TOK.TOKadd] = "+";
|
|
454 Token.tochars[TOK.TOKmin] = "-";
|
|
455 Token.tochars[TOK.TOKmul] = "*";
|
|
456 Token.tochars[TOK.TOKdiv] = "/";
|
|
457 Token.tochars[TOK.TOKmod] = "%";
|
|
458 Token.tochars[TOK.TOKslice] = "..";
|
|
459 Token.tochars[TOK.TOKdotdotdot] = "...";
|
|
460 Token.tochars[TOK.TOKand] = "&";
|
|
461 Token.tochars[TOK.TOKandand] = "&&";
|
|
462 Token.tochars[TOK.TOKor] = "|";
|
|
463 Token.tochars[TOK.TOKoror] = "||";
|
|
464 Token.tochars[TOK.TOKarray] = "[]";
|
|
465 Token.tochars[TOK.TOKindex] = "[i]";
|
|
466 Token.tochars[TOK.TOKaddress] = "&";
|
|
467 Token.tochars[TOK.TOKstar] = "*";
|
|
468 Token.tochars[TOK.TOKtilde] = "~";
|
|
469 Token.tochars[TOK.TOKdollar] = "$";
|
|
470 Token.tochars[TOK.TOKcast] = "cast";
|
|
471 Token.tochars[TOK.TOKplusplus] = "++";
|
|
472 Token.tochars[TOK.TOKminusminus] = "--";
|
|
473 Token.tochars[TOK.TOKtype] = "type";
|
|
474 Token.tochars[TOK.TOKquestion] = "?";
|
|
475 Token.tochars[TOK.TOKneg] = "-";
|
|
476 Token.tochars[TOK.TOKuadd] = "+";
|
|
477 Token.tochars[TOK.TOKvar] = "var";
|
|
478 Token.tochars[TOK.TOKaddass] = "+=";
|
|
479 Token.tochars[TOK.TOKminass] = "-=";
|
|
480 Token.tochars[TOK.TOKmulass] = "*=";
|
|
481 Token.tochars[TOK.TOKdivass] = "/=";
|
|
482 Token.tochars[TOK.TOKmodass] = "%=";
|
|
483 Token.tochars[TOK.TOKshlass] = "<<=";
|
|
484 Token.tochars[TOK.TOKshrass] = ">>=";
|
|
485 Token.tochars[TOK.TOKushrass] = ">>>=";
|
|
486 Token.tochars[TOK.TOKandass] = "&=";
|
|
487 Token.tochars[TOK.TOKorass] = "|=";
|
|
488 Token.tochars[TOK.TOKcatass] = "~=";
|
|
489 Token.tochars[TOK.TOKcat] = "~";
|
|
490 Token.tochars[TOK.TOKcall] = "call";
|
|
491 Token.tochars[TOK.TOKidentity] = "is";
|
|
492 Token.tochars[TOK.TOKnotidentity] = "!is";
|
|
493
|
|
494 Token.tochars[TOK.TOKorass] = "|=";
|
|
495 Token.tochars[TOK.TOKidentifier] = "identifier";
|
|
496 Token.tochars[TOK.TOKat] = "@";
|
|
497
|
|
498 // For debugging
|
|
499 Token.tochars[TOK.TOKdotexp] = "dotexp";
|
|
500 Token.tochars[TOK.TOKdotti] = "dotti";
|
|
501 Token.tochars[TOK.TOKdotvar] = "dotvar";
|
|
502 Token.tochars[TOK.TOKdottype] = "dottype";
|
|
503 Token.tochars[TOK.TOKsymoff] = "symoff";
|
|
504 Token.tochars[TOK.TOKarraylength] = "arraylength";
|
|
505 Token.tochars[TOK.TOKarrayliteral] = "arrayliteral";
|
|
506 Token.tochars[TOK.TOKassocarrayliteral] = "assocarrayliteral";
|
|
507 Token.tochars[TOK.TOKstructliteral] = "structliteral";
|
|
508 Token.tochars[TOK.TOKstring] = "string";
|
|
509 Token.tochars[TOK.TOKdsymbol] = "symbol";
|
|
510 Token.tochars[TOK.TOKtuple] = "tuple";
|
|
511 Token.tochars[TOK.TOKdeclaration] = "declaration";
|
|
512 Token.tochars[TOK.TOKdottd] = "dottd";
|
|
513 Token.tochars[TOK.TOKon_scope_exit] = "scope(exit)";
|
|
514 Token.tochars[TOK.TOKon_scope_success] = "scope(success)";
|
|
515 Token.tochars[TOK.TOKon_scope_failure] = "scope(failure)";
|
|
516 }
|
|
517
|
|
518 static Identifier idPool(string s)
|
|
519 {
|
|
520 StringValue* sv = stringtable.update(s);
|
|
521 Identifier id = cast(Identifier) sv.ptrvalue;
|
|
522 if (id is null)
|
|
523 {
|
|
524 id = new Identifier(sv.lstring.string_, TOK.TOKidentifier);
|
|
525 sv.ptrvalue = cast(void*)id;
|
|
526 }
|
|
527
|
|
528 return id;
|
|
529 }
|
|
530
|
|
531 static Identifier uniqueId(string s)
|
|
532 {
|
|
533 static int num;
|
|
534 return uniqueId(s, ++num);
|
|
535 }
|
|
536
|
|
537 /*********************************************
|
|
538 * Create a unique identifier using the prefix s.
|
|
539 */
|
|
540 static Identifier uniqueId(string s, int num)
|
|
541 {
|
|
542 char buffer[32];
|
|
543 size_t slen = s.length;
|
|
544
|
|
545 assert(slen + num.sizeof * 3 + 1 <= buffer.sizeof);
|
|
546 int len = sprintf(buffer.ptr, "%.*s%d", s, num);
|
|
547
|
|
548 return idPool(buffer[0..len].idup);
|
|
549 }
|
|
550
|
|
551 TOK nextToken()
|
|
552 {
|
|
553 Token *t;
|
|
554
|
|
555 if (token.next)
|
|
556 {
|
|
557 t = token.next;
|
|
558 memcpy(&token, t, Token.sizeof);
|
|
559 t.next = freelist;
|
|
560 freelist = t;
|
|
561 }
|
|
562 else
|
|
563 {
|
|
564 scan(&token);
|
|
565 }
|
|
566
|
|
567 //token.print();
|
|
568 return token.value;
|
|
569 }
|
|
570
|
|
571 /***********************
|
|
572 * Look ahead at next token's value.
|
|
573 */
|
|
574 TOK peekNext()
|
|
575 {
|
|
576 return peek(&token).value;
|
|
577 }
|
|
578
|
|
579 TOK peekNext2()
|
|
580 {
|
|
581 assert(false);
|
|
582 }
|
|
583
|
|
584 void scan(Token* t)
|
|
585 {
|
|
586 uint lastLine = loc.linnum;
|
|
587 uint linnum;
|
|
588
|
|
589 t.blockComment = null;
|
|
590 t.lineComment = null;
|
|
591 while (1)
|
|
592 {
|
|
593 t.ptr = p;
|
|
594 //printf("p = %p, *p = '%c'\n",p,*p);
|
|
595 switch (*p)
|
|
596 {
|
|
597 case 0:
|
|
598 case 0x1A:
|
|
599 t.value = TOK.TOKeof; // end of file
|
|
600 return;
|
|
601
|
|
602 case ' ':
|
|
603 case '\t':
|
|
604 case '\v':
|
|
605 case '\f':
|
|
606 p++;
|
|
607 continue; // skip white space
|
|
608
|
|
609 case '\r':
|
|
610 p++;
|
|
611 if (*p != '\n') // if CR stands by itself
|
|
612 loc.linnum++;
|
|
613 continue; // skip white space
|
|
614
|
|
615 case '\n':
|
|
616 p++;
|
|
617 loc.linnum++;
|
|
618 continue; // skip white space
|
|
619
|
|
620 case '0': case '1': case '2': case '3': case '4':
|
|
621 case '5': case '6': case '7': case '8': case '9':
|
|
622 t.value = number(t);
|
|
623 return;
|
|
624
|
|
625 version (CSTRINGS) {
|
|
626 case '\'':
|
|
627 t.value = charConstant(t, 0);
|
|
628 return;
|
|
629
|
|
630 case '"':
|
|
631 t.value = stringConstant(t,0);
|
|
632 return;
|
|
633
|
|
634 case 'l':
|
|
635 case 'L':
|
|
636 if (p[1] == '\'')
|
|
637 {
|
|
638 p++;
|
|
639 t.value = charConstant(t, 1);
|
|
640 return;
|
|
641 }
|
|
642 else if (p[1] == '"')
|
|
643 {
|
|
644 p++;
|
|
645 t.value = stringConstant(t, 1);
|
|
646 return;
|
|
647 }
|
|
648 } else {
|
|
649 case '\'':
|
|
650 t.value = charConstant(t,0);
|
|
651 return;
|
|
652
|
|
653 case 'r':
|
|
654 if (p[1] != '"')
|
|
655 goto case_ident;
|
|
656 p++;
|
|
657 case '`':
|
|
658 t.value = wysiwygStringConstant(t, *p);
|
|
659 return;
|
|
660
|
|
661 case 'x':
|
|
662 if (p[1] != '"')
|
|
663 goto case_ident;
|
|
664 p++;
|
|
665 t.value = hexStringConstant(t);
|
|
666 return;
|
|
667
|
|
668 version (DMDV2) {
|
|
669 case 'q':
|
|
670 if (p[1] == '"')
|
|
671 {
|
|
672 p++;
|
|
673 t.value = delimitedStringConstant(t);
|
|
674 return;
|
|
675 }
|
|
676 else if (p[1] == '{')
|
|
677 {
|
|
678 p++;
|
|
679 t.value = tokenStringConstant(t);
|
|
680 return;
|
|
681 }
|
|
682 else
|
|
683 goto case_ident;
|
|
684 }
|
|
685
|
|
686 case '"':
|
|
687 t.value = escapeStringConstant(t,0);
|
|
688 return;
|
|
689 version (TEXTUAL_ASSEMBLY_OUT) {
|
|
690 } else {
|
|
691 case '\\': // escaped string literal
|
|
692 { uint c;
|
|
693 ubyte* pstart = p;
|
|
694
|
|
695 stringbuffer.reset();
|
|
696 do
|
|
697 {
|
|
698 p++;
|
|
699 switch (*p)
|
|
700 {
|
|
701 case 'u':
|
|
702 case 'U':
|
|
703 case '&':
|
|
704 c = escapeSequence();
|
|
705 stringbuffer.writeUTF8(c);
|
|
706 break;
|
|
707
|
|
708 default:
|
|
709 c = escapeSequence();
|
|
710 stringbuffer.writeByte(c);
|
|
711 break;
|
|
712 }
|
|
713 } while (*p == '\\');
|
|
714 t.len = stringbuffer.offset;
|
|
715 stringbuffer.writeByte(0);
|
|
716 char* cc = cast(char*)malloc(stringbuffer.offset);
|
|
717 memcpy(cc, stringbuffer.data, stringbuffer.offset);
|
|
718 t.ustring = cc;
|
|
719 t.postfix = 0;
|
|
720 t.value = TOK.TOKstring;
|
|
721 if (!global.params.useDeprecated)
|
|
722 error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", p - pstart, pstart, p - pstart, pstart);
|
|
723 return;
|
|
724 }
|
|
725 }
|
|
726 case 'l':
|
|
727 case 'L':
|
|
728 }
|
|
729 case 'a': case 'b': case 'c': case 'd': case 'e':
|
|
730 case 'f': case 'g': case 'h': case 'i': case 'j':
|
|
731 case 'k': case 'm': case 'n': case 'o':
|
|
732 version (DMDV2) {
|
|
733 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
|
|
734 } else {
|
|
735 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
|
|
736 }
|
|
737 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
|
|
738 case 'z':
|
|
739 case 'A': case 'B': case 'C': case 'D': case 'E':
|
|
740 case 'F': case 'G': case 'H': case 'I': case 'J':
|
|
741 case 'K': case 'M': case 'N': case 'O':
|
|
742 case 'P': case 'Q': case 'R': case 'S': case 'T':
|
|
743 case 'U': case 'V': case 'W': case 'X': case 'Y':
|
|
744 case 'Z':
|
|
745 case '_':
|
|
746 case_ident:
|
|
747 { ubyte c;
|
|
748 StringValue *sv;
|
|
749 Identifier id;
|
|
750
|
|
751 do
|
|
752 {
|
|
753 c = *++p;
|
|
754 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
|
|
755 sv = stringtable.update((cast(immutable(char)*)t.ptr)[0.. p - t.ptr]); ///
|
|
756 id = cast(Identifier) sv.ptrvalue;
|
|
757 if (id is null)
|
|
758 { id = new Identifier(sv.lstring.string_, TOK.TOKidentifier);
|
|
759 sv.ptrvalue = cast(void*)id;
|
|
760 }
|
|
761 t.ident = id;
|
|
762 t.value = cast(TOK) id.value;
|
|
763 anyToken = 1;
|
|
764 if (*t.ptr == '_') // if special identifier token
|
|
765 {
|
|
766 static char date[11+1];
|
|
767 static char time[8+1];
|
|
768 static char timestamp[24+1];
|
|
769
|
|
770 if (!date[0]) // lazy evaluation
|
|
771 { time_t tm;
|
|
772 char *p;
|
|
773
|
|
774 .time(&tm);
|
|
775 p = ctime(&tm);
|
|
776 assert(p);
|
|
777 sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);
|
|
778 sprintf(time.ptr, "%.8s", p + 11);
|
|
779 sprintf(timestamp.ptr, "%.24s", p);
|
|
780 }
|
|
781
|
|
782 ///version (DMDV1) {
|
|
783 /// if (mod && id == Id.FILE)
|
|
784 /// {
|
|
785 /// t.ustring = cast(ubyte*)(loc.filename ? loc.filename : mod.ident.toChars());
|
|
786 /// goto Lstr;
|
|
787 /// }
|
|
788 /// else if (mod && id == Id.LINE)
|
|
789 /// {
|
|
790 /// t.value = TOK.TOKint64v;
|
|
791 /// t.uns64value = loc.linnum;
|
|
792 /// }
|
|
793 /// else
|
|
794 ///}
|
|
795 if (id == Id.DATE)
|
|
796 {
|
|
797 t.ustring = date.ptr;
|
|
798 goto Lstr;
|
|
799 }
|
|
800 else if (id == Id.TIME)
|
|
801 {
|
|
802 t.ustring = time.ptr;
|
|
803 goto Lstr;
|
|
804 }
|
|
805 else if (id == Id.VENDOR)
|
|
806 {
|
|
807 t.ustring = "Digital Mars D".ptr;
|
|
808 goto Lstr;
|
|
809 }
|
|
810 else if (id == Id.TIMESTAMP)
|
|
811 {
|
|
812 t.ustring = timestamp.ptr;
|
|
813 Lstr:
|
|
814 t.value = TOK.TOKstring;
|
|
815 Llen:
|
|
816 t.postfix = 0;
|
|
817 t.len = strlen(cast(char*)t.ustring);
|
|
818 }
|
|
819 else if (id == Id.VERSIONX)
|
|
820 {
|
|
821 uint major = 0;
|
|
822 uint minor = 0;
|
|
823
|
|
824 foreach (char cc; global.version_[1..$])
|
|
825 {
|
|
826 if (isdigit(cc))
|
|
827 minor = minor * 10 + cc - '0';
|
|
828 else if (cc == '.')
|
|
829 {
|
|
830 major = minor;
|
|
831 minor = 0;
|
|
832 }
|
|
833 else
|
|
834 break;
|
|
835 }
|
|
836 t.value = TOK.TOKint64v;
|
|
837 t.uns64value = major * 1000 + minor;
|
|
838 }
|
|
839 ///version (DMDV2) {
|
|
840 else if (id == Id.EOFX)
|
|
841 {
|
|
842 t.value = TOK.TOKeof;
|
|
843 // Advance scanner to end of file
|
|
844 while (!(*p == 0 || *p == 0x1A))
|
|
845 p++;
|
|
846 }
|
|
847 ///}
|
|
848 }
|
|
849 //printf("t.value = %d\n",t.value);
|
|
850 return;
|
|
851 }
|
|
852
|
|
853 case '/':
|
|
854 p++;
|
|
855 switch (*p)
|
|
856 {
|
|
857 case '=':
|
|
858 p++;
|
|
859 t.value = TOK.TOKdivass;
|
|
860 return;
|
|
861
|
|
862 case '*':
|
|
863 p++;
|
|
864 linnum = loc.linnum;
|
|
865 while (1)
|
|
866 {
|
|
867 while (1)
|
|
868 {
|
|
869 ubyte c = *p;
|
|
870 switch (c)
|
|
871 {
|
|
872 case '/':
|
|
873 break;
|
|
874
|
|
875 case '\n':
|
|
876 loc.linnum++;
|
|
877 p++;
|
|
878 continue;
|
|
879
|
|
880 case '\r':
|
|
881 p++;
|
|
882 if (*p != '\n')
|
|
883 loc.linnum++;
|
|
884 continue;
|
|
885
|
|
886 case 0:
|
|
887 case 0x1A:
|
|
888 error("unterminated /* */ comment");
|
|
889 p = end;
|
|
890 t.value = TOK.TOKeof;
|
|
891 return;
|
|
892
|
|
893 default:
|
|
894 if (c & 0x80)
|
|
895 { uint u = decodeUTF();
|
|
896 if (u == PS || u == LS)
|
|
897 loc.linnum++;
|
|
898 }
|
|
899 p++;
|
|
900 continue;
|
|
901 }
|
|
902 break;
|
|
903 }
|
|
904 p++;
|
|
905 if (p[-2] == '*' && p - 3 != t.ptr)
|
|
906 break;
|
|
907 }
|
|
908 if (commentToken)
|
|
909 {
|
|
910 t.value = TOK.TOKcomment;
|
|
911 return;
|
|
912 }
|
|
913 else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
|
|
914 { // if /** but not /**/
|
|
915 getDocComment(t, lastLine == linnum);
|
|
916 }
|
|
917 continue;
|
|
918
|
|
919 case '/': // do // style comments
|
|
920 linnum = loc.linnum;
|
|
921 while (1)
|
|
922 { ubyte c = *++p;
|
|
923 switch (c)
|
|
924 {
|
|
925 case '\n':
|
|
926 break;
|
|
927
|
|
928 case '\r':
|
|
929 if (p[1] == '\n')
|
|
930 p++;
|
|
931 break;
|
|
932
|
|
933 case 0:
|
|
934 case 0x1A:
|
|
935 if (commentToken)
|
|
936 {
|
|
937 p = end;
|
|
938 t.value = TOK.TOKcomment;
|
|
939 return;
|
|
940 }
|
|
941 if (doDocComment && t.ptr[2] == '/')
|
|
942 getDocComment(t, lastLine == linnum);
|
|
943 p = end;
|
|
944 t.value = TOK.TOKeof;
|
|
945 return;
|
|
946
|
|
947 default:
|
|
948 if (c & 0x80)
|
|
949 { uint u = decodeUTF();
|
|
950 if (u == PS || u == LS)
|
|
951 break;
|
|
952 }
|
|
953 continue;
|
|
954 }
|
|
955 break;
|
|
956 }
|
|
957
|
|
958 if (commentToken)
|
|
959 {
|
|
960 p++;
|
|
961 loc.linnum++;
|
|
962 t.value = TOK.TOKcomment;
|
|
963 return;
|
|
964 }
|
|
965 if (doDocComment && t.ptr[2] == '/')
|
|
966 getDocComment(t, lastLine == linnum);
|
|
967
|
|
968 p++;
|
|
969 loc.linnum++;
|
|
970 continue;
|
|
971
|
|
972 case '+':
|
|
973 {
|
|
974 int nest;
|
|
975
|
|
976 linnum = loc.linnum;
|
|
977 p++;
|
|
978 nest = 1;
|
|
979 while (1)
|
|
980 { ubyte c = *p;
|
|
981 switch (c)
|
|
982 {
|
|
983 case '/':
|
|
984 p++;
|
|
985 if (*p == '+')
|
|
986 {
|
|
987 p++;
|
|
988 nest++;
|
|
989 }
|
|
990 continue;
|
|
991
|
|
992 case '+':
|
|
993 p++;
|
|
994 if (*p == '/')
|
|
995 {
|
|
996 p++;
|
|
997 if (--nest == 0)
|
|
998 break;
|
|
999 }
|
|
1000 continue;
|
|
1001
|
|
1002 case '\r':
|
|
1003 p++;
|
|
1004 if (*p != '\n')
|
|
1005 loc.linnum++;
|
|
1006 continue;
|
|
1007
|
|
1008 case '\n':
|
|
1009 loc.linnum++;
|
|
1010 p++;
|
|
1011 continue;
|
|
1012
|
|
1013 case 0:
|
|
1014 case 0x1A:
|
|
1015 error("unterminated /+ +/ comment");
|
|
1016 p = end;
|
|
1017 t.value = TOK.TOKeof;
|
|
1018 return;
|
|
1019
|
|
1020 default:
|
|
1021 if (c & 0x80)
|
|
1022 { uint u = decodeUTF();
|
|
1023 if (u == PS || u == LS)
|
|
1024 loc.linnum++;
|
|
1025 }
|
|
1026 p++;
|
|
1027 continue;
|
|
1028 }
|
|
1029 break;
|
|
1030 }
|
|
1031 if (commentToken)
|
|
1032 {
|
|
1033 t.value = TOK.TOKcomment;
|
|
1034 return;
|
|
1035 }
|
|
1036 if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
|
|
1037 { // if /++ but not /++/
|
|
1038 getDocComment(t, lastLine == linnum);
|
|
1039 }
|
|
1040 continue;
|
|
1041 }
|
|
1042
|
|
1043 default:
|
|
1044 break; ///
|
|
1045 }
|
|
1046 t.value = TOK.TOKdiv;
|
|
1047 return;
|
|
1048
|
|
1049 case '.':
|
|
1050 p++;
|
|
1051 if (isdigit(*p))
|
|
1052 { /* Note that we don't allow ._1 and ._ as being
|
|
1053 * valid floating point numbers.
|
|
1054 */
|
|
1055 p--;
|
|
1056 t.value = inreal(t);
|
|
1057 }
|
|
1058 else if (p[0] == '.')
|
|
1059 {
|
|
1060 if (p[1] == '.')
|
|
1061 { p += 2;
|
|
1062 t.value = TOK.TOKdotdotdot;
|
|
1063 }
|
|
1064 else
|
|
1065 { p++;
|
|
1066 t.value = TOK.TOKslice;
|
|
1067 }
|
|
1068 }
|
|
1069 else
|
|
1070 t.value = TOK.TOKdot;
|
|
1071 return;
|
|
1072
|
|
1073 case '&':
|
|
1074 p++;
|
|
1075 if (*p == '=')
|
|
1076 { p++;
|
|
1077 t.value = TOK.TOKandass;
|
|
1078 }
|
|
1079 else if (*p == '&')
|
|
1080 { p++;
|
|
1081 t.value = TOK.TOKandand;
|
|
1082 }
|
|
1083 else
|
|
1084 t.value = TOK.TOKand;
|
|
1085 return;
|
|
1086
|
|
1087 case '|':
|
|
1088 p++;
|
|
1089 if (*p == '=')
|
|
1090 { p++;
|
|
1091 t.value = TOK.TOKorass;
|
|
1092 }
|
|
1093 else if (*p == '|')
|
|
1094 { p++;
|
|
1095 t.value = TOK.TOKoror;
|
|
1096 }
|
|
1097 else
|
|
1098 t.value = TOK.TOKor;
|
|
1099 return;
|
|
1100
|
|
1101 case '-':
|
|
1102 p++;
|
|
1103 if (*p == '=')
|
|
1104 { p++;
|
|
1105 t.value = TOK.TOKminass;
|
|
1106 }
|
|
1107 /// #if 0
|
|
1108 /// else if (*p == '>')
|
|
1109 /// { p++;
|
|
1110 /// t.value = TOK.TOKarrow;
|
|
1111 /// }
|
|
1112 /// #endif
|
|
1113 else if (*p == '-')
|
|
1114 { p++;
|
|
1115 t.value = TOK.TOKminusminus;
|
|
1116 }
|
|
1117 else
|
|
1118 t.value = TOK.TOKmin;
|
|
1119 return;
|
|
1120
|
|
1121 case '+':
|
|
1122 p++;
|
|
1123 if (*p == '=')
|
|
1124 { p++;
|
|
1125 t.value = TOK.TOKaddass;
|
|
1126 }
|
|
1127 else if (*p == '+')
|
|
1128 { p++;
|
|
1129 t.value = TOK.TOKplusplus;
|
|
1130 }
|
|
1131 else
|
|
1132 t.value = TOK.TOKadd;
|
|
1133 return;
|
|
1134
|
|
1135 case '<':
|
|
1136 p++;
|
|
1137 if (*p == '=')
|
|
1138 { p++;
|
|
1139 t.value = TOK.TOKle; // <=
|
|
1140 }
|
|
1141 else if (*p == '<')
|
|
1142 { p++;
|
|
1143 if (*p == '=')
|
|
1144 { p++;
|
|
1145 t.value = TOK.TOKshlass; // <<=
|
|
1146 }
|
|
1147 else
|
|
1148 t.value = TOK.TOKshl; // <<
|
|
1149 }
|
|
1150 else if (*p == '>')
|
|
1151 { p++;
|
|
1152 if (*p == '=')
|
|
1153 { p++;
|
|
1154 t.value = TOK.TOKleg; // <>=
|
|
1155 }
|
|
1156 else
|
|
1157 t.value = TOK.TOKlg; // <>
|
|
1158 }
|
|
1159 else
|
|
1160 t.value = TOK.TOKlt; // <
|
|
1161 return;
|
|
1162
|
|
1163 case '>':
|
|
1164 p++;
|
|
1165 if (*p == '=')
|
|
1166 { p++;
|
|
1167 t.value = TOK.TOKge; // >=
|
|
1168 }
|
|
1169 else if (*p == '>')
|
|
1170 { p++;
|
|
1171 if (*p == '=')
|
|
1172 { p++;
|
|
1173 t.value = TOK.TOKshrass; // >>=
|
|
1174 }
|
|
1175 else if (*p == '>')
|
|
1176 { p++;
|
|
1177 if (*p == '=')
|
|
1178 { p++;
|
|
1179 t.value = TOK.TOKushrass; // >>>=
|
|
1180 }
|
|
1181 else
|
|
1182 t.value = TOK.TOKushr; // >>>
|
|
1183 }
|
|
1184 else
|
|
1185 t.value = TOK.TOKshr; // >>
|
|
1186 }
|
|
1187 else
|
|
1188 t.value = TOK.TOKgt; // >
|
|
1189 return;
|
|
1190
|
|
1191 case '!':
|
|
1192 p++;
|
|
1193 if (*p == '=')
|
|
1194 { p++;
|
|
1195 if (*p == '=' && global.params.Dversion == 1)
|
|
1196 { p++;
|
|
1197 t.value = TOK.TOKnotidentity; // !==
|
|
1198 }
|
|
1199 else
|
|
1200 t.value = TOK.TOKnotequal; // !=
|
|
1201 }
|
|
1202 else if (*p == '<')
|
|
1203 { p++;
|
|
1204 if (*p == '>')
|
|
1205 { p++;
|
|
1206 if (*p == '=')
|
|
1207 { p++;
|
|
1208 t.value = TOK.TOKunord; // !<>=
|
|
1209 }
|
|
1210 else
|
|
1211 t.value = TOK.TOKue; // !<>
|
|
1212 }
|
|
1213 else if (*p == '=')
|
|
1214 { p++;
|
|
1215 t.value = TOK.TOKug; // !<=
|
|
1216 }
|
|
1217 else
|
|
1218 t.value = TOK.TOKuge; // !<
|
|
1219 }
|
|
1220 else if (*p == '>')
|
|
1221 { p++;
|
|
1222 if (*p == '=')
|
|
1223 { p++;
|
|
1224 t.value = TOK.TOKul; // !>=
|
|
1225 }
|
|
1226 else
|
|
1227 t.value = TOK.TOKule; // !>
|
|
1228 }
|
|
1229 else
|
|
1230 t.value = TOK.TOKnot; // !
|
|
1231 return;
|
|
1232
|
|
1233 case '=':
|
|
1234 p++;
|
|
1235 if (*p == '=')
|
|
1236 { p++;
|
|
1237 if (*p == '=' && global.params.Dversion == 1)
|
|
1238 { p++;
|
|
1239 t.value = TOK.TOKidentity; // ===
|
|
1240 }
|
|
1241 else
|
|
1242 t.value = TOK.TOKequal; // ==
|
|
1243 }
|
|
1244 else
|
|
1245 t.value = TOK.TOKassign; // =
|
|
1246 return;
|
|
1247
|
|
1248 case '~':
|
|
1249 p++;
|
|
1250 if (*p == '=')
|
|
1251 { p++;
|
|
1252 t.value = TOK.TOKcatass; // ~=
|
|
1253 }
|
|
1254 else
|
|
1255 t.value = TOK.TOKtilde; // ~
|
|
1256 return;
|
|
1257 /*
|
|
1258 #define SINGLE(c,tok) case c: p++; t.value = tok; return;
|
|
1259
|
|
1260 SINGLE('(', TOKlparen)
|
|
1261 SINGLE(')', TOKrparen)
|
|
1262 SINGLE('[', TOKlbracket)
|
|
1263 SINGLE(']', TOKrbracket)
|
|
1264 SINGLE('{', TOKlcurly)
|
|
1265 SINGLE('}', TOKrcurly)
|
|
1266 SINGLE('?', TOKquestion)
|
|
1267 SINGLE(',', TOKcomma)
|
|
1268 SINGLE(';', TOKsemicolon)
|
|
1269 SINGLE(':', TOKcolon)
|
|
1270 SINGLE('$', TOKdollar)
|
|
1271 SINGLE('@', TOKat)
|
|
1272
|
|
1273 #undef SINGLE
|
|
1274
|
|
1275 #define DOUBLE(c1,tok1,c2,tok2) \
|
|
1276 case c1: \
|
|
1277 p++; \
|
|
1278 if (*p == c2) \
|
|
1279 { p++; \
|
|
1280 t.value = tok2; \
|
|
1281 } \
|
|
1282 else \
|
|
1283 t.value = tok1; \
|
|
1284 return;
|
|
1285
|
|
1286 DOUBLE('*', TOKmul, '=', TOKmulass)
|
|
1287 DOUBLE('%', TOKmod, '=', TOKmodass)
|
|
1288 DOUBLE('^', TOKxor, '=', TOKxorass)
|
|
1289
|
|
1290 #undef DOUBLE
|
|
1291 */
|
|
1292
|
|
1293 case '(': p++; t.value = TOK.TOKlparen; return;
|
|
1294 case ')': p++; t.value = TOK.TOKrparen; return;
|
|
1295 case '[': p++; t.value = TOK.TOKlbracket; return;
|
|
1296 case ']': p++; t.value = TOK.TOKrbracket; return;
|
|
1297 case '{': p++; t.value = TOK.TOKlcurly; return;
|
|
1298 case '}': p++; t.value = TOK.TOKrcurly; return;
|
|
1299 case '?': p++; t.value = TOK.TOKquestion; return;
|
|
1300 case ',': p++; t.value = TOK.TOKcomma; return;
|
|
1301 case ';': p++; t.value = TOK.TOKsemicolon; return;
|
|
1302 case ':': p++; t.value = TOK.TOKcolon; return;
|
|
1303 case '$': p++; t.value = TOK.TOKdollar; return;
|
|
1304 case '@': p++; t.value = TOK.TOKat; return;
|
|
1305
|
|
1306 case '*':
|
|
1307 p++;
|
|
1308 if (*p == '=') {
|
|
1309 p++;
|
|
1310 t.value = TOK.TOKmulass;
|
|
1311 } else {
|
|
1312 t.value = TOK.TOKmul;
|
|
1313 }
|
|
1314 return;
|
|
1315
|
|
1316 case '%':
|
|
1317 p++;
|
|
1318 if (*p == '=') {
|
|
1319 p++;
|
|
1320 t.value = TOK.TOKmodass;
|
|
1321 } else {
|
|
1322 t.value = TOK.TOKmod;
|
|
1323 }
|
|
1324 return;
|
|
1325
|
|
1326 case '^':
|
|
1327 p++;
|
|
1328 if (*p == '=') {
|
|
1329 p++;
|
|
1330 t.value = TOK.TOKxorass;
|
|
1331 } else {
|
|
1332 t.value = TOK.TOKxor;
|
|
1333 }
|
|
1334 return;
|
|
1335
|
|
1336 case '#':
|
|
1337 p++;
|
|
1338 pragma_();
|
|
1339 continue;
|
|
1340
|
|
1341 default:
|
|
1342 { ubyte c = *p;
|
|
1343
|
|
1344 if (c & 0x80)
|
|
1345 { uint u = decodeUTF();
|
|
1346
|
|
1347 // Check for start of unicode identifier
|
|
1348 if (isUniAlpha(u))
|
|
1349 goto case_ident;
|
|
1350
|
|
1351 if (u == PS || u == LS)
|
|
1352 {
|
|
1353 loc.linnum++;
|
|
1354 p++;
|
|
1355 continue;
|
|
1356 }
|
|
1357 }
|
|
1358 if (isprint(c))
|
|
1359 error("unsupported char '%c'", c);
|
|
1360 else
|
|
1361 error("unsupported char 0x%02x", c);
|
|
1362 p++;
|
|
1363 continue;
|
|
1364 }
|
|
1365 }
|
|
1366 }
|
|
1367 }
|
|
1368
|
|
1369 Token* peek(Token* ct)
|
|
1370 {
|
|
1371 Token* t;
|
|
1372
|
|
1373 if (ct.next)
|
|
1374 t = ct.next;
|
|
1375 else
|
|
1376 {
|
|
1377 t = new Token();
|
|
1378 scan(t);
|
|
1379 t.next = null;
|
|
1380 ct.next = t;
|
|
1381 }
|
|
1382 return t;
|
|
1383 }
|
|
1384
|
|
1385 Token* peekPastParen(Token* tk)
|
|
1386 {
|
|
1387 //printf("peekPastParen()\n");
|
|
1388 int parens = 1;
|
|
1389 int curlynest = 0;
|
|
1390 while (1)
|
|
1391 {
|
|
1392 tk = peek(tk);
|
|
1393 //tk.print();
|
|
1394 switch (tk.value)
|
|
1395 {
|
|
1396 case TOK.TOKlparen:
|
|
1397 parens++;
|
|
1398 continue;
|
|
1399
|
|
1400 case TOK.TOKrparen:
|
|
1401 --parens;
|
|
1402 if (parens)
|
|
1403 continue;
|
|
1404 tk = peek(tk);
|
|
1405 break;
|
|
1406
|
|
1407 case TOK.TOKlcurly:
|
|
1408 curlynest++;
|
|
1409 continue;
|
|
1410
|
|
1411 case TOK.TOKrcurly:
|
|
1412 if (--curlynest >= 0)
|
|
1413 continue;
|
|
1414 break;
|
|
1415
|
|
1416 case TOK.TOKsemicolon:
|
|
1417 if (curlynest)
|
|
1418 continue;
|
|
1419 break;
|
|
1420
|
|
1421 case TOK.TOKeof:
|
|
1422 break;
|
|
1423
|
|
1424 default:
|
|
1425 continue;
|
|
1426 }
|
|
1427 return tk;
|
|
1428 }
|
|
1429 }
|
|
1430
|
|
1431 /*******************************************
|
|
1432 * Parse escape sequence.
|
|
1433 */
|
|
1434 uint escapeSequence()
|
|
1435 {
|
|
1436 uint c = *p;
|
|
1437
|
|
1438 version (TEXTUAL_ASSEMBLY_OUT) {
|
|
1439 return c;
|
|
1440 }
|
|
1441 int n;
|
|
1442 int ndigits;
|
|
1443
|
|
1444 switch (c)
|
|
1445 {
|
|
1446 case '\'':
|
|
1447 case '"':
|
|
1448 case '?':
|
|
1449 case '\\':
|
|
1450 Lconsume:
|
|
1451 p++;
|
|
1452 break;
|
|
1453
|
|
1454 case 'a': c = 7; goto Lconsume;
|
|
1455 case 'b': c = 8; goto Lconsume;
|
|
1456 case 'f': c = 12; goto Lconsume;
|
|
1457 case 'n': c = 10; goto Lconsume;
|
|
1458 case 'r': c = 13; goto Lconsume;
|
|
1459 case 't': c = 9; goto Lconsume;
|
|
1460 case 'v': c = 11; goto Lconsume;
|
|
1461
|
|
1462 case 'u':
|
|
1463 ndigits = 4;
|
|
1464 goto Lhex;
|
|
1465 case 'U':
|
|
1466 ndigits = 8;
|
|
1467 goto Lhex;
|
|
1468 case 'x':
|
|
1469 ndigits = 2;
|
|
1470 Lhex:
|
|
1471 p++;
|
|
1472 c = *p;
|
|
1473 if (ishex(cast(ubyte)c))
|
|
1474 {
|
|
1475 uint v;
|
|
1476
|
|
1477 n = 0;
|
|
1478 v = 0;
|
|
1479 while (1)
|
|
1480 {
|
|
1481 if (isdigit(c))
|
|
1482 c -= '0';
|
|
1483 else if (islower(c))
|
|
1484 c -= 'a' - 10;
|
|
1485 else
|
|
1486 c -= 'A' - 10;
|
|
1487 v = v * 16 + c;
|
|
1488 c = *++p;
|
|
1489 if (++n == ndigits)
|
|
1490 break;
|
|
1491 if (!ishex(cast(ubyte)c))
|
|
1492 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
|
|
1493 break;
|
|
1494 }
|
|
1495 }
|
|
1496 if (ndigits != 2 && !utf_isValidDchar(v))
|
|
1497 { error("invalid UTF character \\U%08x", v);
|
|
1498 v = '?'; // recover with valid UTF character
|
|
1499 }
|
|
1500 c = v;
|
|
1501 }
|
|
1502 else
|
|
1503 error("undefined escape hex sequence \\%c\n",c);
|
|
1504 break;
|
|
1505
|
|
1506 case '&': // named character entity
|
|
1507 for (ubyte* idstart = ++p; true; p++)
|
|
1508 {
|
|
1509 switch (*p)
|
|
1510 {
|
|
1511 case ';':
|
|
1512 c = HtmlNamedEntity(idstart, p - idstart);
|
|
1513 if (c == ~0)
|
|
1514 {
|
|
1515 error("unnamed character entity &%s;", idstart[0..(p - idstart)]);
|
|
1516 c = ' ';
|
|
1517 }
|
|
1518 p++;
|
|
1519 break;
|
|
1520
|
|
1521 default:
|
|
1522 if (isalpha(*p) ||
|
|
1523 (p != idstart + 1 && isdigit(*p)))
|
|
1524 continue;
|
|
1525 error("unterminated named entity");
|
|
1526 break;
|
|
1527 }
|
|
1528 break;
|
|
1529 }
|
|
1530 break;
|
|
1531
|
|
1532 case 0:
|
|
1533 case 0x1A: // end of file
|
|
1534 c = '\\';
|
|
1535 break;
|
|
1536
|
|
1537 default:
|
|
1538 if (isoctal(cast(ubyte)c))
|
|
1539 {
|
|
1540 uint v;
|
|
1541
|
|
1542 n = 0;
|
|
1543 v = 0;
|
|
1544 do
|
|
1545 {
|
|
1546 v = v * 8 + (c - '0');
|
|
1547 c = *++p;
|
|
1548 } while (++n < 3 && isoctal(cast(ubyte)c));
|
|
1549 c = v;
|
|
1550 if (c > 0xFF)
|
|
1551 error("0%03o is larger than a byte", c);
|
|
1552 }
|
|
1553 else
|
|
1554 error("undefined escape sequence \\%c\n",c);
|
|
1555 break;
|
|
1556 }
|
|
1557 return c;
|
|
1558 }
|
|
1559
|
|
1560 TOK wysiwygStringConstant(Token* t, int tc)
|
|
1561 {
|
|
1562 assert(false);
|
|
1563 }
|
|
1564
|
|
1565 TOK hexStringConstant(Token* t)
|
|
1566 {
|
|
1567 assert(false);
|
|
1568 }
|
|
1569
|
|
1570 version (DMDV2) {
|
|
1571 TOK delimitedStringConstant(Token* t)
|
|
1572 {
|
|
1573 assert(false);
|
|
1574 }
|
|
1575
|
|
1576 TOK tokenStringConstant(Token* t)
|
|
1577 {
|
|
1578 assert(false);
|
|
1579 }
|
|
1580 }
|
|
1581 TOK escapeStringConstant(Token* t, int wide)
|
|
1582 {
|
|
1583 uint c;
|
|
1584 Loc start = loc;
|
|
1585
|
|
1586 p++;
|
|
1587 stringbuffer.reset();
|
|
1588 while (true)
|
|
1589 {
|
|
1590 c = *p++;
|
|
1591 switch (c)
|
|
1592 {
|
|
1593 version (TEXTUAL_ASSEMBLY_OUT) {
|
|
1594 } else {
|
|
1595 case '\\':
|
|
1596 switch (*p)
|
|
1597 {
|
|
1598 case 'u':
|
|
1599 case 'U':
|
|
1600 case '&':
|
|
1601 c = escapeSequence();
|
|
1602 stringbuffer.writeUTF8(c);
|
|
1603 continue;
|
|
1604
|
|
1605 default:
|
|
1606 c = escapeSequence();
|
|
1607 break;
|
|
1608 }
|
|
1609 break;
|
|
1610 }
|
|
1611 case '\n':
|
|
1612 loc.linnum++;
|
|
1613 break;
|
|
1614
|
|
1615 case '\r':
|
|
1616 if (*p == '\n')
|
|
1617 continue; // ignore
|
|
1618 c = '\n'; // treat EndOfLine as \n character
|
|
1619 loc.linnum++;
|
|
1620 break;
|
|
1621
|
|
1622 case '"':
|
|
1623 t.len = stringbuffer.offset;
|
|
1624 stringbuffer.writeByte(0);
|
|
1625 char* tmp = cast(char*)malloc(stringbuffer.offset);
|
|
1626 memcpy(tmp, stringbuffer.data, stringbuffer.offset);
|
|
1627 t.ustring = tmp;
|
|
1628 stringPostfix(t);
|
|
1629 return TOK.TOKstring;
|
|
1630
|
|
1631 case 0:
|
|
1632 case 0x1A:
|
|
1633 p--;
|
|
1634 error("unterminated string constant starting at %s", start.toChars());
|
|
1635 t.ustring = "".ptr;
|
|
1636 t.len = 0;
|
|
1637 t.postfix = 0;
|
|
1638 return TOK.TOKstring;
|
|
1639
|
|
1640 default:
|
|
1641 if (c & 0x80)
|
|
1642 {
|
|
1643 p--;
|
|
1644 c = decodeUTF();
|
|
1645 if (c == LS || c == PS)
|
|
1646 { c = '\n';
|
|
1647 loc.linnum++;
|
|
1648 }
|
|
1649 p++;
|
|
1650 stringbuffer.writeUTF8(c);
|
|
1651 continue;
|
|
1652 }
|
|
1653 break;
|
|
1654 }
|
|
1655 stringbuffer.writeByte(c);
|
|
1656 }
|
|
1657
|
|
1658 assert(false);
|
|
1659 }
|
|
1660
|
|
1661 TOK charConstant(Token* t, int wide)
|
|
1662 {
|
|
1663 uint c;
|
|
1664 TOK tk = TOKcharv;
|
|
1665
|
|
1666 //printf("Lexer.charConstant\n");
|
|
1667 p++;
|
|
1668 c = *p++;
|
|
1669 switch (c)
|
|
1670 {
|
|
1671 version (TEXTUAL_ASSEMBLY_OUT) {
|
|
1672 } else {
|
|
1673 case '\\':
|
|
1674 switch (*p)
|
|
1675 {
|
|
1676 case 'u':
|
|
1677 t.uns64value = escapeSequence();
|
|
1678 tk = TOKwcharv;
|
|
1679 break;
|
|
1680
|
|
1681 case 'U':
|
|
1682 case '&':
|
|
1683 t.uns64value = escapeSequence();
|
|
1684 tk = TOKdcharv;
|
|
1685 break;
|
|
1686
|
|
1687 default:
|
|
1688 t.uns64value = escapeSequence();
|
|
1689 break;
|
|
1690 }
|
|
1691 break;
|
|
1692 }
|
|
1693 case '\n':
|
|
1694 L1:
|
|
1695 loc.linnum++;
|
|
1696 case '\r':
|
|
1697 case 0:
|
|
1698 case 0x1A:
|
|
1699 case '\'':
|
|
1700 error("unterminated character constant");
|
|
1701 return tk;
|
|
1702
|
|
1703 default:
|
|
1704 if (c & 0x80)
|
|
1705 {
|
|
1706 p--;
|
|
1707 c = decodeUTF();
|
|
1708 p++;
|
|
1709 if (c == LS || c == PS)
|
|
1710 goto L1;
|
|
1711 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
|
|
1712 tk = TOKwcharv;
|
|
1713 else
|
|
1714 tk = TOKdcharv;
|
|
1715 }
|
|
1716 t.uns64value = c;
|
|
1717 break;
|
|
1718 }
|
|
1719
|
|
1720 if (*p != '\'')
|
|
1721 {
|
|
1722 error("unterminated character constant");
|
|
1723 return tk;
|
|
1724 }
|
|
1725 p++;
|
|
1726 return tk;
|
|
1727 }
|
|
1728
|
|
1729 /***************************************
|
|
1730 * Get postfix of string literal.
|
|
1731 */
|
|
1732 void stringPostfix(Token* t)
|
|
1733 {
|
|
1734 switch (*p)
|
|
1735 {
|
|
1736 case 'c':
|
|
1737 case 'w':
|
|
1738 case 'd':
|
|
1739 t.postfix = *p;
|
|
1740 p++;
|
|
1741 break;
|
|
1742
|
|
1743 default:
|
|
1744 t.postfix = 0;
|
|
1745 break;
|
|
1746 }
|
|
1747 }
|
|
1748
|
|
1749 uint wchar_(uint u)
|
|
1750 {
|
|
1751 assert(false);
|
|
1752 }
|
|
1753
|
|
1754 /**************************************
|
|
1755 * Read in a number.
|
|
1756 * If it's an integer, store it in tok.TKutok.Vlong.
|
|
1757 * integers can be decimal, octal or hex
|
|
1758 * Handle the suffixes U, UL, LU, L, etc.
|
|
1759 * If it's double, store it in tok.TKutok.Vdouble.
|
|
1760 * Returns:
|
|
1761 * TKnum
|
|
1762 * TKdouble,...
|
|
1763 */
|
|
1764
|
|
1765 TOK number(Token* t)
|
|
1766 {
|
|
1767 // We use a state machine to collect numbers
|
|
1768 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
|
|
1769 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
|
|
1770 STATE_hexh, STATE_error };
|
|
1771 STATE state;
|
|
1772
|
|
1773 enum FLAGS
|
|
1774 {
|
|
1775 FLAGS_undefined = 0,
|
|
1776 FLAGS_decimal = 1, // decimal
|
|
1777 FLAGS_unsigned = 2, // u or U suffix
|
|
1778 FLAGS_long = 4, // l or L suffix
|
|
1779 };
|
|
1780
|
|
1781 FLAGS flags = FLAGS.FLAGS_decimal;
|
|
1782
|
|
1783 int i;
|
|
1784 int base;
|
|
1785 uint c;
|
|
1786 ubyte *start;
|
|
1787 TOK result;
|
|
1788
|
|
1789 //printf("Lexer.number()\n");
|
|
1790 state = STATE.STATE_initial;
|
|
1791 base = 0;
|
|
1792 stringbuffer.reset();
|
|
1793 start = p;
|
|
1794 while (1)
|
|
1795 {
|
|
1796 c = *p;
|
|
1797 switch (state)
|
|
1798 {
|
|
1799 case STATE.STATE_initial: // opening state
|
|
1800 if (c == '0')
|
|
1801 state = STATE.STATE_0;
|
|
1802 else
|
|
1803 state = STATE.STATE_decimal;
|
|
1804 break;
|
|
1805
|
|
1806 case STATE.STATE_0:
|
|
1807 flags = (flags & ~FLAGS.FLAGS_decimal);
|
|
1808 switch (c)
|
|
1809 {
|
|
1810 version (ZEROH) {
|
|
1811 case 'H': // 0h
|
|
1812 case 'h':
|
|
1813 goto hexh;
|
|
1814 }
|
|
1815 case 'X':
|
|
1816 case 'x':
|
|
1817 state = STATE.STATE_hex0;
|
|
1818 break;
|
|
1819
|
|
1820 case '.':
|
|
1821 if (p[1] == '.') // .. is a separate token
|
|
1822 goto done;
|
|
1823 case 'i':
|
|
1824 case 'f':
|
|
1825 case 'F':
|
|
1826 goto real_;
|
|
1827 version (ZEROH) {
|
|
1828 case 'E':
|
|
1829 case 'e':
|
|
1830 goto case_hex;
|
|
1831 }
|
|
1832 case 'B':
|
|
1833 case 'b':
|
|
1834 state = STATE.STATE_binary0;
|
|
1835 break;
|
|
1836
|
|
1837 case '0': case '1': case '2': case '3':
|
|
1838 case '4': case '5': case '6': case '7':
|
|
1839 state = STATE.STATE_octal;
|
|
1840 break;
|
|
1841
|
|
1842 version (ZEROH) {
|
|
1843 case '8': case '9': case 'A':
|
|
1844 case 'C': case 'D': case 'F':
|
|
1845 case 'a': case 'c': case 'd': case 'f':
|
|
1846 case_hex:
|
|
1847 state = STATE.STATE_hexh;
|
|
1848 break;
|
|
1849 }
|
|
1850 case '_':
|
|
1851 state = STATE.STATE_octal;
|
|
1852 p++;
|
|
1853 continue;
|
|
1854
|
|
1855 case 'L':
|
|
1856 if (p[1] == 'i')
|
|
1857 goto real_;
|
|
1858 goto done;
|
|
1859
|
|
1860 default:
|
|
1861 goto done;
|
|
1862 }
|
|
1863 break;
|
|
1864
|
|
1865 case STATE.STATE_decimal: // reading decimal number
|
|
1866 if (!isdigit(c))
|
|
1867 {
|
|
1868 version (ZEROH) {
|
|
1869 if (ishex(c)
|
|
1870 || c == 'H' || c == 'h'
|
|
1871 )
|
|
1872 goto hexh;
|
|
1873 }
|
|
1874 if (c == '_') // ignore embedded _
|
|
1875 { p++;
|
|
1876 continue;
|
|
1877 }
|
|
1878 if (c == '.' && p[1] != '.')
|
|
1879 goto real_;
|
|
1880 else if (c == 'i' || c == 'f' || c == 'F' ||
|
|
1881 c == 'e' || c == 'E')
|
|
1882 {
|
|
1883 real_: // It's a real number. Back up and rescan as a real
|
|
1884 p = start;
|
|
1885 return inreal(t);
|
|
1886 }
|
|
1887 else if (c == 'L' && p[1] == 'i')
|
|
1888 goto real_;
|
|
1889 goto done;
|
|
1890 }
|
|
1891 break;
|
|
1892
|
|
1893 case STATE.STATE_hex0: // reading hex number
|
|
1894 case STATE.STATE_hex:
|
|
1895 if (! ishex(cast(ubyte)c))
|
|
1896 {
|
|
1897 if (c == '_') // ignore embedded _
|
|
1898 { p++;
|
|
1899 continue;
|
|
1900 }
|
|
1901 if (c == '.' && p[1] != '.')
|
|
1902 goto real_;
|
|
1903 if (c == 'P' || c == 'p' || c == 'i')
|
|
1904 goto real_;
|
|
1905 if (state == STATE.STATE_hex0)
|
|
1906 error("Hex digit expected, not '%c'", c);
|
|
1907 goto done;
|
|
1908 }
|
|
1909 state = STATE.STATE_hex;
|
|
1910 break;
|
|
1911
|
|
1912 version (ZEROH) {
|
|
1913 hexh:
|
|
1914 state = STATE.STATE_hexh;
|
|
1915 case STATE.STATE_hexh: // parse numbers like 0FFh
|
|
1916 if (!ishex(c))
|
|
1917 {
|
|
1918 if (c == 'H' || c == 'h')
|
|
1919 {
|
|
1920 p++;
|
|
1921 base = 16;
|
|
1922 goto done;
|
|
1923 }
|
|
1924 else
|
|
1925 {
|
|
1926 // Check for something like 1E3 or 0E24
|
|
1927 if (memchr(cast(char*)stringbuffer.data, 'E', stringbuffer.offset) ||
|
|
1928 memchr(cast(char*)stringbuffer.data, 'e', stringbuffer.offset))
|
|
1929 goto real_;
|
|
1930 error("Hex digit expected, not '%c'", c);
|
|
1931 goto done;
|
|
1932 }
|
|
1933 }
|
|
1934 break;
|
|
1935 }
|
|
1936
|
|
1937 case STATE.STATE_octal: // reading octal number
|
|
1938 case STATE.STATE_octale: // reading octal number with non-octal digits
|
|
1939 if (!isoctal(cast(ubyte)c))
|
|
1940 {
|
|
1941 version (ZEROH) {
|
|
1942 if (ishex(c)
|
|
1943 || c == 'H' || c == 'h'
|
|
1944 )
|
|
1945 goto hexh;
|
|
1946 }
|
|
1947 if (c == '_') // ignore embedded _
|
|
1948 { p++;
|
|
1949 continue;
|
|
1950 }
|
|
1951 if (c == '.' && p[1] != '.')
|
|
1952 goto real_;
|
|
1953 if (c == 'i')
|
|
1954 goto real_;
|
|
1955 if (isdigit(c))
|
|
1956 {
|
|
1957 state = STATE.STATE_octale;
|
|
1958 }
|
|
1959 else
|
|
1960 goto done;
|
|
1961 }
|
|
1962 break;
|
|
1963
|
|
1964 case STATE.STATE_binary0: // starting binary number
|
|
1965 case STATE.STATE_binary: // reading binary number
|
|
1966 if (c != '0' && c != '1')
|
|
1967 {
|
|
1968 version (ZEROH) {
|
|
1969 if (ishex(c)
|
|
1970 || c == 'H' || c == 'h'
|
|
1971 )
|
|
1972 goto hexh;
|
|
1973 }
|
|
1974 if (c == '_') // ignore embedded _
|
|
1975 { p++;
|
|
1976 continue;
|
|
1977 }
|
|
1978 if (state == STATE.STATE_binary0)
|
|
1979 { error("binary digit expected");
|
|
1980 state = STATE.STATE_error;
|
|
1981 break;
|
|
1982 }
|
|
1983 else
|
|
1984 goto done;
|
|
1985 }
|
|
1986 state = STATE.STATE_binary;
|
|
1987 break;
|
|
1988
|
|
1989 case STATE.STATE_error: // for error recovery
|
|
1990 if (!isdigit(c)) // scan until non-digit
|
|
1991 goto done;
|
|
1992 break;
|
|
1993
|
|
1994 default:
|
|
1995 assert(0);
|
|
1996 }
|
|
1997 stringbuffer.writeByte(c);
|
|
1998 p++;
|
|
1999 }
|
|
2000 done:
|
|
2001 stringbuffer.writeByte(0); // terminate string
|
|
2002 if (state == STATE.STATE_octale)
|
|
2003 error("Octal digit expected");
|
|
2004
|
|
2005 ulong n; // unsigned >=64 bit integer type
|
|
2006
|
|
2007 if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0))
|
|
2008 n = stringbuffer.data[0] - '0';
|
|
2009 else
|
|
2010 {
|
|
2011 // Convert string to integer
|
|
2012 version (__DMC__) {
|
|
2013 errno = 0;
|
|
2014 n = strtoull(cast(char*)stringbuffer.data,null,base);
|
|
2015 if (errno == ERANGE)
|
|
2016 error("integer overflow");
|
|
2017 } else {
|
|
2018 // Not everybody implements strtoull()
|
|
2019 char* p = cast(char*)stringbuffer.data;
|
|
2020 int r = 10, d;
|
|
2021
|
|
2022 if (*p == '0')
|
|
2023 {
|
|
2024 if (p[1] == 'x' || p[1] == 'X')
|
|
2025 p += 2, r = 16;
|
|
2026 else if (p[1] == 'b' || p[1] == 'B')
|
|
2027 p += 2, r = 2;
|
|
2028 else if (isdigit(p[1]))
|
|
2029 p += 1, r = 8;
|
|
2030 }
|
|
2031
|
|
2032 n = 0;
|
|
2033 while (1)
|
|
2034 {
|
|
2035 if (*p >= '0' && *p <= '9')
|
|
2036 d = *p - '0';
|
|
2037 else if (*p >= 'a' && *p <= 'z')
|
|
2038 d = *p - 'a' + 10;
|
|
2039 else if (*p >= 'A' && *p <= 'Z')
|
|
2040 d = *p - 'A' + 10;
|
|
2041 else
|
|
2042 break;
|
|
2043 if (d >= r)
|
|
2044 break;
|
|
2045 ulong n2 = n * r;
|
|
2046 //printf("n2 / r = %llx, n = %llx\n", n2/r, n);
|
|
2047 if (n2 / r != n || n2 + d < n)
|
|
2048 {
|
|
2049 error ("integer overflow");
|
|
2050 break;
|
|
2051 }
|
|
2052
|
|
2053 n = n2 + d;
|
|
2054 p++;
|
|
2055 }
|
|
2056 }
|
|
2057 if (n.sizeof > 8 &&
|
|
2058 n > 0xFFFFFFFFFFFFFFFF) // if n needs more than 64 bits
|
|
2059 error("integer overflow");
|
|
2060 }
|
|
2061
|
|
2062 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
|
|
2063 while (1)
|
|
2064 { FLAGS f;
|
|
2065
|
|
2066 switch (*p)
|
|
2067 { case 'U':
|
|
2068 case 'u':
|
|
2069 f = FLAGS.FLAGS_unsigned;
|
|
2070 goto L1;
|
|
2071
|
|
2072 case 'l':
|
|
2073 if (1 || !global.params.useDeprecated)
|
|
2074 error("'l' suffix is deprecated, use 'L' instead");
|
|
2075 case 'L':
|
|
2076 f = FLAGS.FLAGS_long;
|
|
2077 L1:
|
|
2078 p++;
|
|
2079 if (flags & f)
|
|
2080 error("unrecognized token");
|
|
2081 flags = (flags | f);
|
|
2082 continue;
|
|
2083 default:
|
|
2084 break;
|
|
2085 }
|
|
2086 break;
|
|
2087 }
|
|
2088
|
|
2089 switch (flags)
|
|
2090 {
|
|
2091 case FLAGS.FLAGS_undefined:
|
|
2092 /* Octal or Hexadecimal constant.
|
|
2093 * First that fits: int, uint, long, ulong
|
|
2094 */
|
|
2095 if (n & 0x8000000000000000)
|
|
2096 result = TOK.TOKuns64v;
|
|
2097 else if (n & 0xFFFFFFFF00000000)
|
|
2098 result = TOK.TOKint64v;
|
|
2099 else if (n & 0x80000000)
|
|
2100 result = TOK.TOKuns32v;
|
|
2101 else
|
|
2102 result = TOK.TOKint32v;
|
|
2103 break;
|
|
2104
|
|
2105 case FLAGS.FLAGS_decimal:
|
|
2106 /* First that fits: int, long, long long
|
|
2107 */
|
|
2108 if (n & 0x8000000000000000)
|
|
2109 { error("signed integer overflow");
|
|
2110 result = TOK.TOKuns64v;
|
|
2111 }
|
|
2112 else if (n & 0xFFFFFFFF80000000)
|
|
2113 result = TOK.TOKint64v;
|
|
2114 else
|
|
2115 result = TOK.TOKint32v;
|
|
2116 break;
|
|
2117
|
|
2118 case FLAGS.FLAGS_unsigned:
|
|
2119 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned:
|
|
2120 /* First that fits: uint, ulong
|
|
2121 */
|
|
2122 if (n & 0xFFFFFFFF00000000)
|
|
2123 result = TOK.TOKuns64v;
|
|
2124 else
|
|
2125 result = TOK.TOKuns32v;
|
|
2126 break;
|
|
2127
|
|
2128 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long:
|
|
2129 if (n & 0x8000000000000000)
|
|
2130 { error("signed integer overflow");
|
|
2131 result = TOK.TOKuns64v;
|
|
2132 }
|
|
2133 else
|
|
2134 result = TOK.TOKint64v;
|
|
2135 break;
|
|
2136
|
|
2137 case FLAGS.FLAGS_long:
|
|
2138 if (n & 0x8000000000000000)
|
|
2139 result = TOK.TOKuns64v;
|
|
2140 else
|
|
2141 result = TOK.TOKint64v;
|
|
2142 break;
|
|
2143
|
|
2144 case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
|
|
2145 case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
|
|
2146 result = TOK.TOKuns64v;
|
|
2147 break;
|
|
2148
|
|
2149 default:
|
|
2150 debug {
|
|
2151 printf("%x\n",flags);
|
|
2152 }
|
|
2153 assert(0);
|
|
2154 }
|
|
2155 t.uns64value = n;
|
|
2156 return result;
|
|
2157 }
|
|
2158
|
|
2159 /**************************************
|
|
2160 * Read in characters, converting them to real.
|
|
2161 * Bugs:
|
|
2162 * Exponent overflow not detected.
|
|
2163 * Too much requested precision is not detected.
|
|
2164 */
|
|
2165 TOK inreal(Token* t)
|
|
2166 in
|
|
2167 {
|
|
2168 assert(*p == '.' || isdigit(*p));
|
|
2169 }
|
|
2170 out (result)
|
|
2171 {
|
|
2172 switch (result)
|
|
2173 {
|
|
2174 case TOKfloat32v:
|
|
2175 case TOKfloat64v:
|
|
2176 case TOKfloat80v:
|
|
2177 case TOKimaginary32v:
|
|
2178 case TOKimaginary64v:
|
|
2179 case TOKimaginary80v:
|
|
2180 break;
|
|
2181
|
|
2182 default:
|
|
2183 assert(0);
|
|
2184 }
|
|
2185 }
|
|
2186 body
|
|
2187 {
|
|
2188 int dblstate;
|
|
2189 uint c;
|
|
2190 char hex; // is this a hexadecimal-floating-constant?
|
|
2191 TOK result;
|
|
2192
|
|
2193 //printf("Lexer.inreal()\n");
|
|
2194 stringbuffer.reset();
|
|
2195 dblstate = 0;
|
|
2196 hex = 0;
|
|
2197 Lnext:
|
|
2198 while (true)
|
|
2199 {
|
|
2200 // Get next char from input
|
|
2201 c = *p++;
|
|
2202 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
|
|
2203 while (true)
|
|
2204 {
|
|
2205 switch (dblstate)
|
|
2206 {
|
|
2207 case 0: // opening state
|
|
2208 if (c == '0')
|
|
2209 dblstate = 9;
|
|
2210 else if (c == '.')
|
|
2211 dblstate = 3;
|
|
2212 else
|
|
2213 dblstate = 1;
|
|
2214 break;
|
|
2215
|
|
2216 case 9:
|
|
2217 dblstate = 1;
|
|
2218 if (c == 'X' || c == 'x')
|
|
2219 {
|
|
2220 hex++;
|
|
2221 break;
|
|
2222 }
|
|
2223 case 1: // digits to left of .
|
|
2224 case 3: // digits to right of .
|
|
2225 case 7: // continuing exponent digits
|
|
2226 if (!isdigit(c) && !(hex && isxdigit(c)))
|
|
2227 {
|
|
2228 if (c == '_')
|
|
2229 goto Lnext; // ignore embedded '_'
|
|
2230 dblstate++;
|
|
2231 continue;
|
|
2232 }
|
|
2233 break;
|
|
2234
|
|
2235 case 2: // no more digits to left of .
|
|
2236 if (c == '.')
|
|
2237 {
|
|
2238 dblstate++;
|
|
2239 break;
|
|
2240 }
|
|
2241 case 4: // no more digits to right of .
|
|
2242 if ((c == 'E' || c == 'e') ||
|
|
2243 hex && (c == 'P' || c == 'p'))
|
|
2244 {
|
|
2245 dblstate = 5;
|
|
2246 hex = 0; // exponent is always decimal
|
|
2247 break;
|
|
2248 }
|
|
2249 if (hex)
|
|
2250 error("binary-exponent-part required");
|
|
2251 goto done;
|
|
2252
|
|
2253 case 5: // looking immediately to right of E
|
|
2254 dblstate++;
|
|
2255 if (c == '-' || c == '+')
|
|
2256 break;
|
|
2257 case 6: // 1st exponent digit expected
|
|
2258 if (!isdigit(c))
|
|
2259 error("exponent expected");
|
|
2260 dblstate++;
|
|
2261 break;
|
|
2262
|
|
2263 case 8: // past end of exponent digits
|
|
2264 goto done;
|
|
2265 }
|
|
2266 break;
|
|
2267 }
|
|
2268 stringbuffer.writeByte(c);
|
|
2269 }
|
|
2270 done:
|
|
2271 p--;
|
|
2272
|
|
2273 stringbuffer.writeByte(0);
|
|
2274
|
|
2275 version (_WIN32) { /// && __DMC__
|
|
2276 char* save = __locale_decpoint;
|
|
2277 __locale_decpoint = cast(char*)".".ptr;
|
|
2278 }
|
|
2279 t.float80value = strtold(cast(char*)stringbuffer.data, null);
|
|
2280
|
|
2281 errno = 0;
|
|
2282 switch (*p)
|
|
2283 {
|
|
2284 case 'F':
|
|
2285 case 'f':
|
|
2286 strtof(cast(char*)stringbuffer.data, null);
|
|
2287 result = TOKfloat32v;
|
|
2288 p++;
|
|
2289 break;
|
|
2290
|
|
2291 default:
|
|
2292 strtod(cast(char*)stringbuffer.data, null);
|
|
2293 result = TOKfloat64v;
|
|
2294 break;
|
|
2295
|
|
2296 case 'l':
|
|
2297 if (!global.params.useDeprecated)
|
|
2298 error("'l' suffix is deprecated, use 'L' instead");
|
|
2299 case 'L':
|
|
2300 result = TOKfloat80v;
|
|
2301 p++;
|
|
2302 break;
|
|
2303 }
|
|
2304 if (*p == 'i' || *p == 'I')
|
|
2305 {
|
|
2306 if (!global.params.useDeprecated && *p == 'I')
|
|
2307 error("'I' suffix is deprecated, use 'i' instead");
|
|
2308 p++;
|
|
2309 switch (result)
|
|
2310 {
|
|
2311 case TOKfloat32v:
|
|
2312 result = TOKimaginary32v;
|
|
2313 break;
|
|
2314 case TOKfloat64v:
|
|
2315 result = TOKimaginary64v;
|
|
2316 break;
|
|
2317 case TOKfloat80v:
|
|
2318 result = TOKimaginary80v;
|
|
2319 break;
|
|
2320 }
|
|
2321 }
|
|
2322
|
|
2323 version (_WIN32) { ///&& __DMC__
|
|
2324 __locale_decpoint = save;
|
|
2325 }
|
|
2326 if (errno == ERANGE)
|
|
2327 error("number is not representable");
|
|
2328
|
|
2329 return result;
|
|
2330 }
|
|
2331
|
|
2332 void error(T...)(string format, T t)
|
|
2333 {
|
|
2334 error(this.loc, format, t);
|
|
2335 }
|
|
2336
|
|
2337 void error(T...)(Loc loc, string format, T t)
|
|
2338 {
|
|
2339 if (mod && !global.gag)
|
|
2340 {
|
|
2341 string p = loc.toChars();
|
|
2342 if (p.length != 0)
|
|
2343 writef("%s: ", p);
|
|
2344
|
|
2345 writefln(format, t);
|
|
2346
|
|
2347 if (global.errors >= 20) // moderate blizzard of cascading messages
|
|
2348 fatal();
|
|
2349 }
|
|
2350
|
|
2351 global.errors++;
|
|
2352 }
|
|
2353
|
|
2354 void pragma_()
|
|
2355 {
|
|
2356 assert(false);
|
|
2357 }
|
|
2358
|
|
2359 uint decodeUTF()
|
|
2360 {
|
|
2361 assert(false);
|
|
2362 }
|
|
2363
|
|
2364 void getDocComment(Token* t, uint lineComment)
|
|
2365 {
|
|
2366 assert(false);
|
|
2367 }
|
|
2368
|
|
2369 static bool isValidIdentifier(string p)
|
|
2370 {
|
|
2371 if (p.length == 0) {
|
|
2372 return false;
|
|
2373 }
|
|
2374
|
|
2375 if (p[0] >= '0' && p[0] <= '9') { // beware of isdigit() on signed chars
|
|
2376 return false;
|
|
2377 }
|
|
2378
|
|
2379 size_t idx = 0;
|
|
2380 while (idx < p.length)
|
|
2381 {
|
|
2382 dchar dc;
|
|
2383
|
|
2384 if (utf_decodeChar(p, &idx, &dc) !is null) {
|
|
2385 return false;
|
|
2386 }
|
|
2387
|
|
2388 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) {
|
|
2389 return false;
|
|
2390 }
|
|
2391 }
|
|
2392
|
|
2393 return true;
|
|
2394 }
|
|
2395
|
|
2396 /// TODO: reimplement based on strings
|
|
2397 static ubyte* combineComments(ubyte* c1, ubyte* c2)
|
|
2398 {
|
|
2399 //printf("Lexer.combineComments('%s', '%s')\n", c1, c2);
|
|
2400
|
|
2401 ubyte* c = c2;
|
|
2402
|
|
2403 if (c1)
|
|
2404 {
|
|
2405 c = c1;
|
|
2406 if (c2)
|
|
2407 {
|
|
2408 size_t len1 = strlen(cast(char*)c1);
|
|
2409 size_t len2 = strlen(cast(char*)c2);
|
|
2410
|
|
2411 c = cast(ubyte*)malloc(len1 + 1 + len2 + 1);
|
|
2412 memcpy(c, c1, len1);
|
|
2413 if (len1 && c1[len1 - 1] != '\n')
|
|
2414 {
|
|
2415 c[len1] = '\n';
|
|
2416 len1++;
|
|
2417 }
|
|
2418 memcpy(c + len1, c2, len2);
|
|
2419 c[len1 + len2] = 0;
|
|
2420 }
|
|
2421 }
|
|
2422
|
|
2423 return c;
|
|
2424 }
|
|
2425 } |