Mercurial > projects > ldc
annotate dmd/lexer.c @ 1351:8d501abecd24
Initial (but disabled) fix for ticket #294 , the actual part that fixes the bug is in a #if 0 block as I'm afraid it will cause regressions. I'm most likely not going to be around tonight, and maybe not tomorrow as well, so I'm pushing it in case someone wants to run some serious testing/investigate the problem noted in llvmhelpers.cpp : realignOffset .
author | Tomas Lindquist Olsen <tomas.l.olsen gmail com> |
---|---|
date | Thu, 14 May 2009 17:20:17 +0200 |
parents | 79758fd2f48a |
children | 8026319762be |
rev | line source |
---|---|
159 | 1 |
2 // Compiler implementation of the D programming language | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
3 // Copyright (c) 1999-2009 by Digital Mars |
159 | 4 // All Rights Reserved |
5 // written by Walter Bright | |
6 // http://www.digitalmars.com | |
7 // License for redistribution is by either the Artistic License | |
8 // in artistic.txt, or the GNU General Public License in gnu.txt. | |
9 // See the included readme.txt for details. | |
10 | |
1228
79758fd2f48a
Added Doxygen file.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1195
diff
changeset
|
11 #if IN_LLVM |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
12 #include <cmath> |
1228
79758fd2f48a
Added Doxygen file.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1195
diff
changeset
|
13 #endif |
872
aa953cc960b6
Apply BlueZeniX's patch for OpenSolaris compatibility. Fixes #158.
Christian Kamm <kamm incasoftware de>
parents:
846
diff
changeset
|
14 |
159 | 15 /* Lexical Analyzer */ |
16 | |
17 #include <stdio.h> | |
18 #include <string.h> | |
19 #include <ctype.h> | |
20 #include <stdarg.h> | |
21 #include <errno.h> | |
22 #include <wchar.h> | |
23 #include <stdlib.h> | |
24 #include <assert.h> | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
25 #include <time.h> // for time() and ctime() |
159 | 26 |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
872
diff
changeset
|
27 #include "rmem.h" |
159 | 28 |
29 #include "stringtable.h" | |
30 | |
31 #include "lexer.h" | |
32 #include "utf.h" | |
33 #include "identifier.h" | |
34 #include "id.h" | |
35 #include "module.h" | |
36 | |
37 #if _WIN32 && __DMC__ | |
38 // from \dm\src\include\setlocal.h | |
39 extern "C" char * __cdecl __locale_decpoint; | |
40 #endif | |
41 | |
42 extern int HtmlNamedEntity(unsigned char *p, int length); | |
43 | |
44 #define LS 0x2028 // UTF line separator | |
45 #define PS 0x2029 // UTF paragraph separator | |
46 | |
47 /******************************************** | |
48 * Do our own char maps | |
49 */ | |
50 | |
51 static unsigned char cmtable[256]; | |
52 | |
53 const int CMoctal = 0x1; | |
54 const int CMhex = 0x2; | |
55 const int CMidchar = 0x4; | |
56 | |
57 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; } | |
58 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; } | |
59 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; } | |
60 | |
61 static void cmtable_init() | |
62 { | |
63 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++) | |
64 { | |
65 if ('0' <= c && c <= '7') | |
66 cmtable[c] |= CMoctal; | |
67 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) | |
68 cmtable[c] |= CMhex; | |
69 if (isalnum(c) || c == '_') | |
70 cmtable[c] |= CMidchar; | |
71 } | |
72 } | |
73 | |
74 | |
75 /************************* Token **********************************************/ | |
76 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
77 const char *Token::tochars[TOKMAX]; |
159 | 78 |
79 void *Token::operator new(size_t size) | |
80 { Token *t; | |
81 | |
82 if (Lexer::freelist) | |
83 { | |
84 t = Lexer::freelist; | |
85 Lexer::freelist = t->next; | |
86 return t; | |
87 } | |
88 | |
89 return ::operator new(size); | |
90 } | |
91 | |
92 #ifdef DEBUG | |
93 void Token::print() | |
94 { | |
95 fprintf(stdmsg, "%s\n", toChars()); | |
96 } | |
97 #endif | |
98 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
99 const char *Token::toChars() |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
100 { const char *p; |
159 | 101 static char buffer[3 + 3 * sizeof(value) + 1]; |
102 | |
103 p = buffer; | |
104 switch (value) | |
105 { | |
106 case TOKint32v: | |
107 sprintf(buffer,"%d",(d_int32)int64value); | |
108 break; | |
109 | |
110 case TOKuns32v: | |
111 case TOKcharv: | |
112 case TOKwcharv: | |
113 case TOKdcharv: | |
114 sprintf(buffer,"%uU",(d_uns32)uns64value); | |
115 break; | |
116 | |
117 case TOKint64v: | |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
872
diff
changeset
|
118 sprintf(buffer,"%jdL",int64value); |
159 | 119 break; |
120 | |
121 case TOKuns64v: | |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
872
diff
changeset
|
122 sprintf(buffer,"%juUL",uns64value); |
159 | 123 break; |
124 | |
125 #if IN_GCC | |
126 case TOKfloat32v: | |
127 case TOKfloat64v: | |
128 case TOKfloat80v: | |
129 float80value.format(buffer, sizeof(buffer)); | |
130 break; | |
131 case TOKimaginary32v: | |
132 case TOKimaginary64v: | |
133 case TOKimaginary80v: | |
134 float80value.format(buffer, sizeof(buffer)); | |
135 // %% buffer | |
136 strcat(buffer, "i"); | |
137 break; | |
138 #else | |
139 case TOKfloat32v: | |
140 sprintf(buffer,"%Lgf", float80value); | |
141 break; | |
142 | |
143 case TOKfloat64v: | |
144 sprintf(buffer,"%Lg", float80value); | |
145 break; | |
146 | |
147 case TOKfloat80v: | |
148 sprintf(buffer,"%LgL", float80value); | |
149 break; | |
150 | |
151 case TOKimaginary32v: | |
152 sprintf(buffer,"%Lgfi", float80value); | |
153 break; | |
154 | |
155 case TOKimaginary64v: | |
156 sprintf(buffer,"%Lgi", float80value); | |
157 break; | |
158 | |
159 case TOKimaginary80v: | |
160 sprintf(buffer,"%LgLi", float80value); | |
161 break; | |
162 #endif | |
163 | |
164 case TOKstring: | |
165 #if CSTRINGS | |
166 p = string; | |
167 #else | |
168 { OutBuffer buf; | |
169 | |
170 buf.writeByte('"'); | |
171 for (size_t i = 0; i < len; ) | |
172 { unsigned c; | |
173 | |
174 utf_decodeChar((unsigned char *)ustring, len, &i, &c); | |
175 switch (c) | |
176 { | |
177 case 0: | |
178 break; | |
179 | |
180 case '"': | |
181 case '\\': | |
182 buf.writeByte('\\'); | |
183 default: | |
184 if (isprint(c)) | |
185 buf.writeByte(c); | |
186 else if (c <= 0x7F) | |
187 buf.printf("\\x%02x", c); | |
188 else if (c <= 0xFFFF) | |
189 buf.printf("\\u%04x", c); | |
190 else | |
191 buf.printf("\\U%08x", c); | |
192 continue; | |
193 } | |
194 break; | |
195 } | |
196 buf.writeByte('"'); | |
197 if (postfix) | |
198 buf.writeByte('"'); | |
199 buf.writeByte(0); | |
200 p = (char *)buf.extractData(); | |
201 } | |
202 #endif | |
203 break; | |
204 | |
205 case TOKidentifier: | |
206 case TOKenum: | |
207 case TOKstruct: | |
208 case TOKimport: | |
209 CASE_BASIC_TYPES: | |
210 p = ident->toChars(); | |
211 break; | |
212 | |
213 default: | |
214 p = toChars(value); | |
215 break; | |
216 } | |
217 return p; | |
218 } | |
219 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
220 const char *Token::toChars(enum TOK value) |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
221 { const char *p; |
159 | 222 static char buffer[3 + 3 * sizeof(value) + 1]; |
223 | |
224 p = tochars[value]; | |
225 if (!p) | |
226 { sprintf(buffer,"TOK%d",value); | |
227 p = buffer; | |
228 } | |
229 return p; | |
230 } | |
231 | |
232 /*************************** Lexer ********************************************/ | |
233 | |
234 Token *Lexer::freelist = NULL; | |
235 StringTable Lexer::stringtable; | |
236 OutBuffer Lexer::stringbuffer; | |
237 | |
238 Lexer::Lexer(Module *mod, | |
239 unsigned char *base, unsigned begoffset, unsigned endoffset, | |
240 int doDocComment, int commentToken) | |
241 : loc(mod, 1) | |
242 { | |
243 //printf("Lexer::Lexer(%p,%d)\n",base,length); | |
244 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod); | |
245 memset(&token,0,sizeof(token)); | |
246 this->base = base; | |
247 this->end = base + endoffset; | |
248 p = base + begoffset; | |
249 this->mod = mod; | |
250 this->doDocComment = doDocComment; | |
251 this->anyToken = 0; | |
252 this->commentToken = commentToken; | |
253 //initKeywords(); | |
254 | |
255 /* If first line starts with '#!', ignore the line | |
256 */ | |
257 | |
258 if (p[0] == '#' && p[1] =='!') | |
259 { | |
260 p += 2; | |
261 while (1) | |
262 { unsigned char c = *p; | |
263 switch (c) | |
264 { | |
265 case '\n': | |
266 p++; | |
267 break; | |
268 | |
269 case '\r': | |
270 p++; | |
271 if (*p == '\n') | |
272 p++; | |
273 break; | |
274 | |
275 case 0: | |
276 case 0x1A: | |
277 break; | |
278 | |
279 default: | |
280 if (c & 0x80) | |
281 { unsigned u = decodeUTF(); | |
282 if (u == PS || u == LS) | |
283 break; | |
284 } | |
285 p++; | |
286 continue; | |
287 } | |
288 break; | |
289 } | |
290 loc.linnum = 2; | |
291 } | |
292 } | |
293 | |
294 | |
295 void Lexer::error(const char *format, ...) | |
296 { | |
297 if (mod && !global.gag) | |
298 { | |
299 char *p = loc.toChars(); | |
300 if (*p) | |
301 fprintf(stdmsg, "%s: ", p); | |
302 mem.free(p); | |
303 | |
304 va_list ap; | |
305 va_start(ap, format); | |
306 vfprintf(stdmsg, format, ap); | |
307 va_end(ap); | |
308 | |
309 fprintf(stdmsg, "\n"); | |
310 fflush(stdmsg); | |
311 | |
312 if (global.errors >= 20) // moderate blizzard of cascading messages | |
313 fatal(); | |
314 } | |
315 global.errors++; | |
316 } | |
317 | |
318 void Lexer::error(Loc loc, const char *format, ...) | |
319 { | |
320 if (mod && !global.gag) | |
321 { | |
322 char *p = loc.toChars(); | |
323 if (*p) | |
324 fprintf(stdmsg, "%s: ", p); | |
325 mem.free(p); | |
326 | |
327 va_list ap; | |
328 va_start(ap, format); | |
329 vfprintf(stdmsg, format, ap); | |
330 va_end(ap); | |
331 | |
332 fprintf(stdmsg, "\n"); | |
333 fflush(stdmsg); | |
334 | |
335 if (global.errors >= 20) // moderate blizzard of cascading messages | |
336 fatal(); | |
337 } | |
338 global.errors++; | |
339 } | |
340 | |
341 TOK Lexer::nextToken() | |
342 { Token *t; | |
343 | |
344 if (token.next) | |
345 { | |
346 t = token.next; | |
347 memcpy(&token,t,sizeof(Token)); | |
348 t->next = freelist; | |
349 freelist = t; | |
350 } | |
351 else | |
352 { | |
353 scan(&token); | |
354 } | |
355 //token.print(); | |
356 return token.value; | |
357 } | |
358 | |
359 Token *Lexer::peek(Token *ct) | |
360 { Token *t; | |
361 | |
362 if (ct->next) | |
363 t = ct->next; | |
364 else | |
365 { | |
366 t = new Token(); | |
367 scan(t); | |
368 t->next = NULL; | |
369 ct->next = t; | |
370 } | |
371 return t; | |
372 } | |
373 | |
717
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
374 /*********************** |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
375 * Look ahead at next token's value. |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
376 */ |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
377 |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
378 TOK Lexer::peekNext() |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
379 { |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
380 return peek(&token)->value; |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
381 } |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
382 |
159 | 383 /********************************* |
384 * tk is on the opening (. | |
385 * Look ahead and return token that is past the closing ). | |
386 */ | |
387 | |
388 Token *Lexer::peekPastParen(Token *tk) | |
389 { | |
390 //printf("peekPastParen()\n"); | |
391 int parens = 1; | |
392 int curlynest = 0; | |
393 while (1) | |
394 { | |
395 tk = peek(tk); | |
396 //tk->print(); | |
397 switch (tk->value) | |
398 { | |
399 case TOKlparen: | |
400 parens++; | |
401 continue; | |
402 | |
403 case TOKrparen: | |
404 --parens; | |
405 if (parens) | |
406 continue; | |
407 tk = peek(tk); | |
408 break; | |
409 | |
410 case TOKlcurly: | |
411 curlynest++; | |
412 continue; | |
413 | |
414 case TOKrcurly: | |
415 if (--curlynest >= 0) | |
416 continue; | |
417 break; | |
418 | |
419 case TOKsemicolon: | |
420 if (curlynest) | |
421 continue; | |
422 break; | |
423 | |
424 case TOKeof: | |
425 break; | |
426 | |
427 default: | |
428 continue; | |
429 } | |
430 return tk; | |
431 } | |
432 } | |
433 | |
434 /********************************** | |
435 * Determine if string is a valid Identifier. | |
436 * Placed here because of commonality with Lexer functionality. | |
437 * Returns: | |
438 * 0 invalid | |
439 */ | |
440 | |
441 int Lexer::isValidIdentifier(char *p) | |
442 { | |
443 size_t len; | |
444 size_t idx; | |
445 | |
446 if (!p || !*p) | |
447 goto Linvalid; | |
448 | |
449 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars | |
450 goto Linvalid; | |
451 | |
452 len = strlen(p); | |
453 idx = 0; | |
454 while (p[idx]) | |
455 { dchar_t dc; | |
456 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
457 const char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc); |
159 | 458 if (q) |
459 goto Linvalid; | |
460 | |
461 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) | |
462 goto Linvalid; | |
463 } | |
464 return 1; | |
465 | |
466 Linvalid: | |
467 return 0; | |
468 } | |
469 | |
470 /**************************** | |
471 * Turn next token in buffer into a token. | |
472 */ | |
473 | |
474 void Lexer::scan(Token *t) | |
475 { | |
476 unsigned lastLine = loc.linnum; | |
477 unsigned linnum; | |
478 | |
479 t->blockComment = NULL; | |
480 t->lineComment = NULL; | |
481 while (1) | |
482 { | |
483 t->ptr = p; | |
484 //printf("p = %p, *p = '%c'\n",p,*p); | |
485 switch (*p) | |
486 { | |
487 case 0: | |
488 case 0x1A: | |
489 t->value = TOKeof; // end of file | |
490 return; | |
491 | |
492 case ' ': | |
493 case '\t': | |
494 case '\v': | |
495 case '\f': | |
496 p++; | |
497 continue; // skip white space | |
498 | |
499 case '\r': | |
500 p++; | |
501 if (*p != '\n') // if CR stands by itself | |
502 loc.linnum++; | |
503 continue; // skip white space | |
504 | |
505 case '\n': | |
506 p++; | |
507 loc.linnum++; | |
508 continue; // skip white space | |
509 | |
510 case '0': case '1': case '2': case '3': case '4': | |
511 case '5': case '6': case '7': case '8': case '9': | |
512 t->value = number(t); | |
513 return; | |
514 | |
515 #if CSTRINGS | |
516 case '\'': | |
517 t->value = charConstant(t, 0); | |
518 return; | |
519 | |
520 case '"': | |
521 t->value = stringConstant(t,0); | |
522 return; | |
523 | |
524 case 'l': | |
525 case 'L': | |
526 if (p[1] == '\'') | |
527 { | |
528 p++; | |
529 t->value = charConstant(t, 1); | |
530 return; | |
531 } | |
532 else if (p[1] == '"') | |
533 { | |
534 p++; | |
535 t->value = stringConstant(t, 1); | |
536 return; | |
537 } | |
538 #else | |
539 case '\'': | |
540 t->value = charConstant(t,0); | |
541 return; | |
542 | |
543 case 'r': | |
544 if (p[1] != '"') | |
545 goto case_ident; | |
546 p++; | |
547 case '`': | |
548 t->value = wysiwygStringConstant(t, *p); | |
549 return; | |
550 | |
551 case 'x': | |
552 if (p[1] != '"') | |
553 goto case_ident; | |
554 p++; | |
555 t->value = hexStringConstant(t); | |
556 return; | |
557 | |
336 | 558 #if DMDV2 |
159 | 559 case 'q': |
560 if (p[1] == '"') | |
561 { | |
562 p++; | |
563 t->value = delimitedStringConstant(t); | |
564 return; | |
565 } | |
566 else if (p[1] == '{') | |
567 { | |
568 p++; | |
569 t->value = tokenStringConstant(t); | |
570 return; | |
571 } | |
572 else | |
573 goto case_ident; | |
574 #endif | |
575 | |
576 case '"': | |
577 t->value = escapeStringConstant(t,0); | |
578 return; | |
579 | |
580 case '\\': // escaped string literal | |
581 { unsigned c; | |
582 | |
583 stringbuffer.reset(); | |
584 do | |
585 { | |
586 p++; | |
587 switch (*p) | |
588 { | |
589 case 'u': | |
590 case 'U': | |
591 case '&': | |
592 c = escapeSequence(); | |
593 stringbuffer.writeUTF8(c); | |
594 break; | |
595 | |
596 default: | |
597 c = escapeSequence(); | |
598 stringbuffer.writeByte(c); | |
599 break; | |
600 } | |
601 } while (*p == '\\'); | |
602 t->len = stringbuffer.offset; | |
603 stringbuffer.writeByte(0); | |
604 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
605 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
606 t->postfix = 0; | |
607 t->value = TOKstring; | |
608 return; | |
609 } | |
610 | |
611 case 'l': | |
612 case 'L': | |
613 #endif | |
614 case 'a': case 'b': case 'c': case 'd': case 'e': | |
615 case 'f': case 'g': case 'h': case 'i': case 'j': | |
616 case 'k': case 'm': case 'n': case 'o': | |
336 | 617 #if DMDV2 |
159 | 618 case 'p': /*case 'q': case 'r':*/ case 's': case 't': |
619 #else | |
620 case 'p': case 'q': /*case 'r':*/ case 's': case 't': | |
621 #endif | |
622 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': | |
623 case 'z': | |
624 case 'A': case 'B': case 'C': case 'D': case 'E': | |
625 case 'F': case 'G': case 'H': case 'I': case 'J': | |
626 case 'K': case 'M': case 'N': case 'O': | |
627 case 'P': case 'Q': case 'R': case 'S': case 'T': | |
628 case 'U': case 'V': case 'W': case 'X': case 'Y': | |
629 case 'Z': | |
630 case '_': | |
631 case_ident: | |
632 { unsigned char c; | |
633 StringValue *sv; | |
634 Identifier *id; | |
635 | |
636 do | |
637 { | |
638 c = *++p; | |
639 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); | |
640 sv = stringtable.update((char *)t->ptr, p - t->ptr); | |
641 id = (Identifier *) sv->ptrvalue; | |
642 if (!id) | |
643 { id = new Identifier(sv->lstring.string,TOKidentifier); | |
644 sv->ptrvalue = id; | |
645 } | |
646 t->ident = id; | |
647 t->value = (enum TOK) id->value; | |
648 anyToken = 1; | |
649 if (*t->ptr == '_') // if special identifier token | |
650 { | |
651 static char date[11+1]; | |
652 static char time[8+1]; | |
653 static char timestamp[24+1]; | |
654 | |
655 if (!date[0]) // lazy evaluation | |
656 { time_t t; | |
657 char *p; | |
658 | |
659 ::time(&t); | |
660 p = ctime(&t); | |
661 assert(p); | |
662 sprintf(date, "%.6s %.4s", p + 4, p + 20); | |
663 sprintf(time, "%.8s", p + 11); | |
664 sprintf(timestamp, "%.24s", p); | |
665 } | |
666 | |
336 | 667 #if DMDV1 |
159 | 668 if (mod && id == Id::FILE) |
669 { | |
670 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars()); | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
671 goto Lstr; |
159 | 672 } |
673 else if (mod && id == Id::LINE) | |
674 { | |
675 t->value = TOKint64v; | |
676 t->uns64value = loc.linnum; | |
677 } | |
336 | 678 else |
679 #endif | |
680 if (id == Id::DATE) | |
159 | 681 { |
682 t->ustring = (unsigned char *)date; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
683 goto Lstr; |
159 | 684 } |
685 else if (id == Id::TIME) | |
686 { | |
687 t->ustring = (unsigned char *)time; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
688 goto Lstr; |
159 | 689 } |
690 else if (id == Id::VENDOR) | |
691 { | |
664
eef8ac26c66c
Some missed LLVMDC -> LDC.
Christian Kamm <kamm incasoftware de>
parents:
658
diff
changeset
|
692 t->ustring = (unsigned char *)"LDC"; |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
693 goto Lstr; |
159 | 694 } |
695 else if (id == Id::TIMESTAMP) | |
696 { | |
697 t->ustring = (unsigned char *)timestamp; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
698 Lstr: |
159 | 699 t->value = TOKstring; |
700 Llen: | |
701 t->postfix = 0; | |
702 t->len = strlen((char *)t->ustring); | |
703 } | |
704 else if (id == Id::VERSIONX) | |
705 { unsigned major = 0; | |
706 unsigned minor = 0; | |
707 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
708 for (const char *p = global.version + 1; 1; p++) |
159 | 709 { |
710 char c = *p; | |
711 if (isdigit(c)) | |
712 minor = minor * 10 + c - '0'; | |
713 else if (c == '.') | |
714 { major = minor; | |
715 minor = 0; | |
716 } | |
717 else | |
718 break; | |
719 } | |
720 t->value = TOKint64v; | |
721 t->uns64value = major * 1000 + minor; | |
722 } | |
336 | 723 #if DMDV2 |
159 | 724 else if (id == Id::EOFX) |
725 { | |
726 t->value = TOKeof; | |
727 // Advance scanner to end of file | |
728 while (!(*p == 0 || *p == 0x1A)) | |
729 p++; | |
730 } | |
731 #endif | |
732 } | |
733 //printf("t->value = %d\n",t->value); | |
734 return; | |
735 } | |
736 | |
737 case '/': | |
738 p++; | |
739 switch (*p) | |
740 { | |
741 case '=': | |
742 p++; | |
743 t->value = TOKdivass; | |
744 return; | |
745 | |
746 case '*': | |
747 p++; | |
748 linnum = loc.linnum; | |
749 while (1) | |
750 { | |
751 while (1) | |
752 { unsigned char c = *p; | |
753 switch (c) | |
754 { | |
755 case '/': | |
756 break; | |
757 | |
758 case '\n': | |
759 loc.linnum++; | |
760 p++; | |
761 continue; | |
762 | |
763 case '\r': | |
764 p++; | |
765 if (*p != '\n') | |
766 loc.linnum++; | |
767 continue; | |
768 | |
769 case 0: | |
770 case 0x1A: | |
771 error("unterminated /* */ comment"); | |
772 p = end; | |
773 t->value = TOKeof; | |
774 return; | |
775 | |
776 default: | |
777 if (c & 0x80) | |
778 { unsigned u = decodeUTF(); | |
779 if (u == PS || u == LS) | |
780 loc.linnum++; | |
781 } | |
782 p++; | |
783 continue; | |
784 } | |
785 break; | |
786 } | |
787 p++; | |
788 if (p[-2] == '*' && p - 3 != t->ptr) | |
789 break; | |
790 } | |
791 if (commentToken) | |
792 { | |
793 t->value = TOKcomment; | |
794 return; | |
795 } | |
796 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr) | |
797 { // if /** but not /**/ | |
798 getDocComment(t, lastLine == linnum); | |
799 } | |
800 continue; | |
801 | |
802 case '/': // do // style comments | |
803 linnum = loc.linnum; | |
804 while (1) | |
805 { unsigned char c = *++p; | |
806 switch (c) | |
807 { | |
808 case '\n': | |
809 break; | |
810 | |
811 case '\r': | |
812 if (p[1] == '\n') | |
813 p++; | |
814 break; | |
815 | |
816 case 0: | |
817 case 0x1A: | |
818 if (commentToken) | |
819 { | |
820 p = end; | |
821 t->value = TOKcomment; | |
822 return; | |
823 } | |
824 if (doDocComment && t->ptr[2] == '/') | |
825 getDocComment(t, lastLine == linnum); | |
826 p = end; | |
827 t->value = TOKeof; | |
828 return; | |
829 | |
830 default: | |
831 if (c & 0x80) | |
832 { unsigned u = decodeUTF(); | |
833 if (u == PS || u == LS) | |
834 break; | |
835 } | |
836 continue; | |
837 } | |
838 break; | |
839 } | |
840 | |
841 if (commentToken) | |
842 { | |
843 p++; | |
844 loc.linnum++; | |
845 t->value = TOKcomment; | |
846 return; | |
847 } | |
848 if (doDocComment && t->ptr[2] == '/') | |
849 getDocComment(t, lastLine == linnum); | |
850 | |
851 p++; | |
852 loc.linnum++; | |
853 continue; | |
854 | |
855 case '+': | |
856 { int nest; | |
857 | |
858 linnum = loc.linnum; | |
859 p++; | |
860 nest = 1; | |
861 while (1) | |
862 { unsigned char c = *p; | |
863 switch (c) | |
864 { | |
865 case '/': | |
866 p++; | |
867 if (*p == '+') | |
868 { | |
869 p++; | |
870 nest++; | |
871 } | |
872 continue; | |
873 | |
874 case '+': | |
875 p++; | |
876 if (*p == '/') | |
877 { | |
878 p++; | |
879 if (--nest == 0) | |
880 break; | |
881 } | |
882 continue; | |
883 | |
884 case '\r': | |
885 p++; | |
886 if (*p != '\n') | |
887 loc.linnum++; | |
888 continue; | |
889 | |
890 case '\n': | |
891 loc.linnum++; | |
892 p++; | |
893 continue; | |
894 | |
895 case 0: | |
896 case 0x1A: | |
897 error("unterminated /+ +/ comment"); | |
898 p = end; | |
899 t->value = TOKeof; | |
900 return; | |
901 | |
902 default: | |
903 if (c & 0x80) | |
904 { unsigned u = decodeUTF(); | |
905 if (u == PS || u == LS) | |
906 loc.linnum++; | |
907 } | |
908 p++; | |
909 continue; | |
910 } | |
911 break; | |
912 } | |
913 if (commentToken) | |
914 { | |
915 t->value = TOKcomment; | |
916 return; | |
917 } | |
918 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr) | |
919 { // if /++ but not /++/ | |
920 getDocComment(t, lastLine == linnum); | |
921 } | |
922 continue; | |
923 } | |
924 } | |
925 t->value = TOKdiv; | |
926 return; | |
927 | |
928 case '.': | |
929 p++; | |
930 if (isdigit(*p)) | |
931 { /* Note that we don't allow ._1 and ._ as being | |
932 * valid floating point numbers. | |
933 */ | |
934 p--; | |
935 t->value = inreal(t); | |
936 } | |
937 else if (p[0] == '.') | |
938 { | |
939 if (p[1] == '.') | |
940 { p += 2; | |
941 t->value = TOKdotdotdot; | |
942 } | |
943 else | |
944 { p++; | |
945 t->value = TOKslice; | |
946 } | |
947 } | |
948 else | |
949 t->value = TOKdot; | |
950 return; | |
951 | |
952 case '&': | |
953 p++; | |
954 if (*p == '=') | |
955 { p++; | |
956 t->value = TOKandass; | |
957 } | |
958 else if (*p == '&') | |
959 { p++; | |
960 t->value = TOKandand; | |
961 } | |
962 else | |
963 t->value = TOKand; | |
964 return; | |
965 | |
966 case '|': | |
967 p++; | |
968 if (*p == '=') | |
969 { p++; | |
970 t->value = TOKorass; | |
971 } | |
972 else if (*p == '|') | |
973 { p++; | |
974 t->value = TOKoror; | |
975 } | |
976 else | |
977 t->value = TOKor; | |
978 return; | |
979 | |
980 case '-': | |
981 p++; | |
982 if (*p == '=') | |
983 { p++; | |
984 t->value = TOKminass; | |
985 } | |
986 #if 0 | |
987 else if (*p == '>') | |
988 { p++; | |
989 t->value = TOKarrow; | |
990 } | |
991 #endif | |
992 else if (*p == '-') | |
993 { p++; | |
994 t->value = TOKminusminus; | |
995 } | |
996 else | |
997 t->value = TOKmin; | |
998 return; | |
999 | |
1000 case '+': | |
1001 p++; | |
1002 if (*p == '=') | |
1003 { p++; | |
1004 t->value = TOKaddass; | |
1005 } | |
1006 else if (*p == '+') | |
1007 { p++; | |
1008 t->value = TOKplusplus; | |
1009 } | |
1010 else | |
1011 t->value = TOKadd; | |
1012 return; | |
1013 | |
1014 case '<': | |
1015 p++; | |
1016 if (*p == '=') | |
1017 { p++; | |
1018 t->value = TOKle; // <= | |
1019 } | |
1020 else if (*p == '<') | |
1021 { p++; | |
1022 if (*p == '=') | |
1023 { p++; | |
1024 t->value = TOKshlass; // <<= | |
1025 } | |
1026 else | |
1027 t->value = TOKshl; // << | |
1028 } | |
1029 else if (*p == '>') | |
1030 { p++; | |
1031 if (*p == '=') | |
1032 { p++; | |
1033 t->value = TOKleg; // <>= | |
1034 } | |
1035 else | |
1036 t->value = TOKlg; // <> | |
1037 } | |
1038 else | |
1039 t->value = TOKlt; // < | |
1040 return; | |
1041 | |
1042 case '>': | |
1043 p++; | |
1044 if (*p == '=') | |
1045 { p++; | |
1046 t->value = TOKge; // >= | |
1047 } | |
1048 else if (*p == '>') | |
1049 { p++; | |
1050 if (*p == '=') | |
1051 { p++; | |
1052 t->value = TOKshrass; // >>= | |
1053 } | |
1054 else if (*p == '>') | |
1055 { p++; | |
1056 if (*p == '=') | |
1057 { p++; | |
1058 t->value = TOKushrass; // >>>= | |
1059 } | |
1060 else | |
1061 t->value = TOKushr; // >>> | |
1062 } | |
1063 else | |
1064 t->value = TOKshr; // >> | |
1065 } | |
1066 else | |
1067 t->value = TOKgt; // > | |
1068 return; | |
1069 | |
1070 case '!': | |
1071 p++; | |
1072 if (*p == '=') | |
1073 { p++; | |
1074 if (*p == '=' && global.params.Dversion == 1) | |
1075 { p++; | |
1076 t->value = TOKnotidentity; // !== | |
1077 } | |
1078 else | |
1079 t->value = TOKnotequal; // != | |
1080 } | |
1081 else if (*p == '<') | |
1082 { p++; | |
1083 if (*p == '>') | |
1084 { p++; | |
1085 if (*p == '=') | |
1086 { p++; | |
1087 t->value = TOKunord; // !<>= | |
1088 } | |
1089 else | |
1090 t->value = TOKue; // !<> | |
1091 } | |
1092 else if (*p == '=') | |
1093 { p++; | |
1094 t->value = TOKug; // !<= | |
1095 } | |
1096 else | |
1097 t->value = TOKuge; // !< | |
1098 } | |
1099 else if (*p == '>') | |
1100 { p++; | |
1101 if (*p == '=') | |
1102 { p++; | |
1103 t->value = TOKul; // !>= | |
1104 } | |
1105 else | |
1106 t->value = TOKule; // !> | |
1107 } | |
1108 else | |
1109 t->value = TOKnot; // ! | |
1110 return; | |
1111 | |
1112 case '=': | |
1113 p++; | |
1114 if (*p == '=') | |
1115 { p++; | |
1116 if (*p == '=' && global.params.Dversion == 1) | |
1117 { p++; | |
1118 t->value = TOKidentity; // === | |
1119 } | |
1120 else | |
1121 t->value = TOKequal; // == | |
1122 } | |
1123 else | |
1124 t->value = TOKassign; // = | |
1125 return; | |
1126 | |
1127 case '~': | |
1128 p++; | |
1129 if (*p == '=') | |
1130 { p++; | |
1131 t->value = TOKcatass; // ~= | |
1132 } | |
1133 else | |
1134 t->value = TOKtilde; // ~ | |
1135 return; | |
1136 | |
1137 #define SINGLE(c,tok) case c: p++; t->value = tok; return; | |
1138 | |
1139 SINGLE('(', TOKlparen) | |
1140 SINGLE(')', TOKrparen) | |
1141 SINGLE('[', TOKlbracket) | |
1142 SINGLE(']', TOKrbracket) | |
1143 SINGLE('{', TOKlcurly) | |
1144 SINGLE('}', TOKrcurly) | |
1145 SINGLE('?', TOKquestion) | |
1146 SINGLE(',', TOKcomma) | |
1147 SINGLE(';', TOKsemicolon) | |
1148 SINGLE(':', TOKcolon) | |
1149 SINGLE('$', TOKdollar) | |
1150 | |
1151 #undef SINGLE | |
1152 | |
1153 #define DOUBLE(c1,tok1,c2,tok2) \ | |
1154 case c1: \ | |
1155 p++; \ | |
1156 if (*p == c2) \ | |
1157 { p++; \ | |
1158 t->value = tok2; \ | |
1159 } \ | |
1160 else \ | |
1161 t->value = tok1; \ | |
1162 return; | |
1163 | |
1164 DOUBLE('*', TOKmul, '=', TOKmulass) | |
1165 DOUBLE('%', TOKmod, '=', TOKmodass) | |
1166 DOUBLE('^', TOKxor, '=', TOKxorass) | |
1167 | |
1168 #undef DOUBLE | |
1169 | |
1170 case '#': | |
1171 p++; | |
1172 pragma(); | |
1173 continue; | |
1174 | |
1175 default: | |
1176 { unsigned char c = *p; | |
1177 | |
1178 if (c & 0x80) | |
1179 { unsigned u = decodeUTF(); | |
1180 | |
1181 // Check for start of unicode identifier | |
1182 if (isUniAlpha(u)) | |
1183 goto case_ident; | |
1184 | |
1185 if (u == PS || u == LS) | |
1186 { | |
1187 loc.linnum++; | |
1188 p++; | |
1189 continue; | |
1190 } | |
1191 } | |
1192 if (isprint(c)) | |
1193 error("unsupported char '%c'", c); | |
1194 else | |
1195 error("unsupported char 0x%02x", c); | |
1196 p++; | |
1197 continue; | |
1198 } | |
1199 } | |
1200 } | |
1201 } | |
1202 | |
1203 /******************************************* | |
1204 * Parse escape sequence. | |
1205 */ | |
1206 | |
1207 unsigned Lexer::escapeSequence() | |
1208 { unsigned c; | |
1209 int n; | |
1210 int ndigits; | |
1211 | |
1212 c = *p; | |
1213 switch (c) | |
1214 { | |
1215 case '\'': | |
1216 case '"': | |
1217 case '?': | |
1218 case '\\': | |
1219 Lconsume: | |
1220 p++; | |
1221 break; | |
1222 | |
1223 case 'a': c = 7; goto Lconsume; | |
1224 case 'b': c = 8; goto Lconsume; | |
1225 case 'f': c = 12; goto Lconsume; | |
1226 case 'n': c = 10; goto Lconsume; | |
1227 case 'r': c = 13; goto Lconsume; | |
1228 case 't': c = 9; goto Lconsume; | |
1229 case 'v': c = 11; goto Lconsume; | |
1230 | |
1231 case 'u': | |
1232 ndigits = 4; | |
1233 goto Lhex; | |
1234 case 'U': | |
1235 ndigits = 8; | |
1236 goto Lhex; | |
1237 case 'x': | |
1238 ndigits = 2; | |
1239 Lhex: | |
1240 p++; | |
1241 c = *p; | |
1242 if (ishex(c)) | |
1243 { unsigned v; | |
1244 | |
1245 n = 0; | |
1246 v = 0; | |
1247 while (1) | |
1248 { | |
1249 if (isdigit(c)) | |
1250 c -= '0'; | |
1251 else if (islower(c)) | |
1252 c -= 'a' - 10; | |
1253 else | |
1254 c -= 'A' - 10; | |
1255 v = v * 16 + c; | |
1256 c = *++p; | |
1257 if (++n == ndigits) | |
1258 break; | |
1259 if (!ishex(c)) | |
1260 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); | |
1261 break; | |
1262 } | |
1263 } | |
1264 if (ndigits != 2 && !utf_isValidDchar(v)) | |
1265 error("invalid UTF character \\U%08x", v); | |
1266 c = v; | |
1267 } | |
1268 else | |
1269 error("undefined escape hex sequence \\%c\n",c); | |
1270 break; | |
1271 | |
1272 case '&': // named character entity | |
1273 for (unsigned char *idstart = ++p; 1; p++) | |
1274 { | |
1275 switch (*p) | |
1276 { | |
1277 case ';': | |
1278 c = HtmlNamedEntity(idstart, p - idstart); | |
1279 if (c == ~0) | |
1280 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart); | |
1281 c = ' '; | |
1282 } | |
1283 p++; | |
1284 break; | |
1285 | |
1286 default: | |
1287 if (isalpha(*p) || | |
1288 (p != idstart + 1 && isdigit(*p))) | |
1289 continue; | |
1290 error("unterminated named entity"); | |
1291 break; | |
1292 } | |
1293 break; | |
1294 } | |
1295 break; | |
1296 | |
1297 case 0: | |
1298 case 0x1A: // end of file | |
1299 c = '\\'; | |
1300 break; | |
1301 | |
1302 default: | |
1303 if (isoctal(c)) | |
1304 { unsigned v; | |
1305 | |
1306 n = 0; | |
1307 v = 0; | |
1308 do | |
1309 { | |
1310 v = v * 8 + (c - '0'); | |
1311 c = *++p; | |
1312 } while (++n < 3 && isoctal(c)); | |
1313 c = v; | |
1314 if (c > 0xFF) | |
1315 error("0%03o is larger than a byte", c); | |
1316 } | |
1317 else | |
1318 error("undefined escape sequence \\%c\n",c); | |
1319 break; | |
1320 } | |
1321 return c; | |
1322 } | |
1323 | |
1324 /************************************** | |
1325 */ | |
1326 | |
1327 TOK Lexer::wysiwygStringConstant(Token *t, int tc) | |
1328 { unsigned c; | |
1329 Loc start = loc; | |
1330 | |
1331 p++; | |
1332 stringbuffer.reset(); | |
1333 while (1) | |
1334 { | |
1335 c = *p++; | |
1336 switch (c) | |
1337 { | |
1338 case '\n': | |
1339 loc.linnum++; | |
1340 break; | |
1341 | |
1342 case '\r': | |
1343 if (*p == '\n') | |
1344 continue; // ignore | |
1345 c = '\n'; // treat EndOfLine as \n character | |
1346 loc.linnum++; | |
1347 break; | |
1348 | |
1349 case 0: | |
1350 case 0x1A: | |
1351 error("unterminated string constant starting at %s", start.toChars()); | |
1352 t->ustring = (unsigned char *)""; | |
1353 t->len = 0; | |
1354 t->postfix = 0; | |
1355 return TOKstring; | |
1356 | |
1357 case '"': | |
1358 case '`': | |
1359 if (c == tc) | |
1360 { | |
1361 t->len = stringbuffer.offset; | |
1362 stringbuffer.writeByte(0); | |
1363 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1364 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1365 stringPostfix(t); | |
1366 return TOKstring; | |
1367 } | |
1368 break; | |
1369 | |
1370 default: | |
1371 if (c & 0x80) | |
1372 { p--; | |
1373 unsigned u = decodeUTF(); | |
1374 p++; | |
1375 if (u == PS || u == LS) | |
1376 loc.linnum++; | |
1377 stringbuffer.writeUTF8(u); | |
1378 continue; | |
1379 } | |
1380 break; | |
1381 } | |
1382 stringbuffer.writeByte(c); | |
1383 } | |
1384 } | |
1385 | |
1386 /************************************** | |
1387 * Lex hex strings: | |
1388 * x"0A ae 34FE BD" | |
1389 */ | |
1390 | |
1391 TOK Lexer::hexStringConstant(Token *t) | |
1392 { unsigned c; | |
1393 Loc start = loc; | |
1394 unsigned n = 0; | |
1395 unsigned v; | |
1396 | |
1397 p++; | |
1398 stringbuffer.reset(); | |
1399 while (1) | |
1400 { | |
1401 c = *p++; | |
1402 switch (c) | |
1403 { | |
1404 case ' ': | |
1405 case '\t': | |
1406 case '\v': | |
1407 case '\f': | |
1408 continue; // skip white space | |
1409 | |
1410 case '\r': | |
1411 if (*p == '\n') | |
1412 continue; // ignore | |
1413 // Treat isolated '\r' as if it were a '\n' | |
1414 case '\n': | |
1415 loc.linnum++; | |
1416 continue; | |
1417 | |
1418 case 0: | |
1419 case 0x1A: | |
1420 error("unterminated string constant starting at %s", start.toChars()); | |
1421 t->ustring = (unsigned char *)""; | |
1422 t->len = 0; | |
1423 t->postfix = 0; | |
1424 return TOKstring; | |
1425 | |
1426 case '"': | |
1427 if (n & 1) | |
1428 { error("odd number (%d) of hex characters in hex string", n); | |
1429 stringbuffer.writeByte(v); | |
1430 } | |
1431 t->len = stringbuffer.offset; | |
1432 stringbuffer.writeByte(0); | |
1433 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1434 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1435 stringPostfix(t); | |
1436 return TOKstring; | |
1437 | |
1438 default: | |
1439 if (c >= '0' && c <= '9') | |
1440 c -= '0'; | |
1441 else if (c >= 'a' && c <= 'f') | |
1442 c -= 'a' - 10; | |
1443 else if (c >= 'A' && c <= 'F') | |
1444 c -= 'A' - 10; | |
1445 else if (c & 0x80) | |
1446 { p--; | |
1447 unsigned u = decodeUTF(); | |
1448 p++; | |
1449 if (u == PS || u == LS) | |
1450 loc.linnum++; | |
1451 else | |
1452 error("non-hex character \\u%x", u); | |
1453 } | |
1454 else | |
1455 error("non-hex character '%c'", c); | |
1456 if (n & 1) | |
1457 { v = (v << 4) | c; | |
1458 stringbuffer.writeByte(v); | |
1459 } | |
1460 else | |
1461 v = c; | |
1462 n++; | |
1463 break; | |
1464 } | |
1465 } | |
1466 } | |
1467 | |
1468 | |
336 | 1469 #if DMDV2 |
159 | 1470 /************************************** |
1471 * Lex delimited strings: | |
1472 * q"(foo(xxx))" // "foo(xxx)" | |
1473 * q"[foo(]" // "foo(" | |
1474 * q"/foo]/" // "foo]" | |
1475 * q"HERE | |
1476 * foo | |
1477 * HERE" // "foo\n" | |
1478 * Input: | |
1479 * p is on the " | |
1480 */ | |
1481 | |
1482 TOK Lexer::delimitedStringConstant(Token *t) | |
1483 { unsigned c; | |
1484 Loc start = loc; | |
1485 unsigned delimleft = 0; | |
1486 unsigned delimright = 0; | |
1487 unsigned nest = 1; | |
1488 unsigned nestcount; | |
1489 Identifier *hereid = NULL; | |
1490 unsigned blankrol = 0; | |
1491 unsigned startline = 0; | |
1492 | |
1493 p++; | |
1494 stringbuffer.reset(); | |
1495 while (1) | |
1496 { | |
1497 c = *p++; | |
1498 //printf("c = '%c'\n", c); | |
1499 switch (c) | |
1500 { | |
1501 case '\n': | |
1502 Lnextline: | |
1503 loc.linnum++; | |
1504 startline = 1; | |
1505 if (blankrol) | |
1506 { blankrol = 0; | |
1507 continue; | |
1508 } | |
1509 if (hereid) | |
1510 { | |
1511 stringbuffer.writeUTF8(c); | |
1512 continue; | |
1513 } | |
1514 break; | |
1515 | |
1516 case '\r': | |
1517 if (*p == '\n') | |
1518 continue; // ignore | |
1519 c = '\n'; // treat EndOfLine as \n character | |
1520 goto Lnextline; | |
1521 | |
1522 case 0: | |
1523 case 0x1A: | |
1524 goto Lerror; | |
1525 | |
1526 default: | |
1527 if (c & 0x80) | |
1528 { p--; | |
1529 c = decodeUTF(); | |
1530 p++; | |
1531 if (c == PS || c == LS) | |
1532 goto Lnextline; | |
1533 } | |
1534 break; | |
1535 } | |
1536 if (delimleft == 0) | |
1537 { delimleft = c; | |
1538 nest = 1; | |
1539 nestcount = 1; | |
1540 if (c == '(') | |
1541 delimright = ')'; | |
1542 else if (c == '{') | |
1543 delimright = '}'; | |
1544 else if (c == '[') | |
1545 delimright = ']'; | |
1546 else if (c == '<') | |
1547 delimright = '>'; | |
1548 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) | |
1549 { // Start of identifier; must be a heredoc | |
1550 Token t; | |
1551 p--; | |
1552 scan(&t); // read in heredoc identifier | |
1553 if (t.value != TOKidentifier) | |
1554 { error("identifier expected for heredoc, not %s", t.toChars()); | |
1555 delimright = c; | |
1556 } | |
1557 else | |
1558 { hereid = t.ident; | |
1559 //printf("hereid = '%s'\n", hereid->toChars()); | |
1560 blankrol = 1; | |
1561 } | |
1562 nest = 0; | |
1563 } | |
1564 else | |
1565 { delimright = c; | |
1566 nest = 0; | |
1567 } | |
1568 } | |
1569 else | |
1570 { | |
1571 if (blankrol) | |
1572 { error("heredoc rest of line should be blank"); | |
1573 blankrol = 0; | |
1574 continue; | |
1575 } | |
1576 if (nest == 1) | |
1577 { | |
1578 if (c == delimleft) | |
1579 nestcount++; | |
1580 else if (c == delimright) | |
1581 { nestcount--; | |
1582 if (nestcount == 0) | |
1583 goto Ldone; | |
1584 } | |
1585 } | |
1586 else if (c == delimright) | |
1587 goto Ldone; | |
1588 if (startline && isalpha(c)) | |
1589 { Token t; | |
1590 unsigned char *psave = p; | |
1591 p--; | |
1592 scan(&t); // read in possible heredoc identifier | |
1593 //printf("endid = '%s'\n", t.ident->toChars()); | |
1594 if (t.value == TOKidentifier && t.ident->equals(hereid)) | |
1595 { /* should check that rest of line is blank | |
1596 */ | |
1597 goto Ldone; | |
1598 } | |
1599 p = psave; | |
1600 } | |
1601 stringbuffer.writeUTF8(c); | |
1602 startline = 0; | |
1603 } | |
1604 } | |
1605 | |
1606 Ldone: | |
1607 if (*p == '"') | |
1608 p++; | |
1609 else | |
1610 error("delimited string must end in %c\"", delimright); | |
1611 t->len = stringbuffer.offset; | |
1612 stringbuffer.writeByte(0); | |
1613 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1614 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1615 stringPostfix(t); | |
1616 return TOKstring; | |
1617 | |
1618 Lerror: | |
1619 error("unterminated string constant starting at %s", start.toChars()); | |
1620 t->ustring = (unsigned char *)""; | |
1621 t->len = 0; | |
1622 t->postfix = 0; | |
1623 return TOKstring; | |
1624 } | |
1625 | |
1626 /************************************** | |
1627 * Lex delimited strings: | |
1628 * q{ foo(xxx) } // " foo(xxx) " | |
1629 * q{foo(} // "foo(" | |
1630 * q{{foo}"}"} // "{foo}"}"" | |
1631 * Input: | |
1632 * p is on the q | |
1633 */ | |
1634 | |
1635 TOK Lexer::tokenStringConstant(Token *t) | |
1636 { | |
1637 unsigned nest = 1; | |
1638 Loc start = loc; | |
1639 unsigned char *pstart = ++p; | |
1640 | |
1641 while (1) | |
1642 { Token tok; | |
1643 | |
1644 scan(&tok); | |
1645 switch (tok.value) | |
1646 { | |
1647 case TOKlcurly: | |
1648 nest++; | |
1649 continue; | |
1650 | |
1651 case TOKrcurly: | |
1652 if (--nest == 0) | |
1653 goto Ldone; | |
1654 continue; | |
1655 | |
1656 case TOKeof: | |
1657 goto Lerror; | |
1658 | |
1659 default: | |
1660 continue; | |
1661 } | |
1662 } | |
1663 | |
1664 Ldone: | |
1665 t->len = p - 1 - pstart; | |
1666 t->ustring = (unsigned char *)mem.malloc(t->len + 1); | |
1667 memcpy(t->ustring, pstart, t->len); | |
1668 t->ustring[t->len] = 0; | |
1669 stringPostfix(t); | |
1670 return TOKstring; | |
1671 | |
1672 Lerror: | |
1673 error("unterminated token string constant starting at %s", start.toChars()); | |
1674 t->ustring = (unsigned char *)""; | |
1675 t->len = 0; | |
1676 t->postfix = 0; | |
1677 return TOKstring; | |
1678 } | |
1679 | |
1680 #endif | |
1681 | |
1682 | |
1683 /************************************** | |
1684 */ | |
1685 | |
1686 TOK Lexer::escapeStringConstant(Token *t, int wide) | |
1687 { unsigned c; | |
1688 Loc start = loc; | |
1689 | |
1690 p++; | |
1691 stringbuffer.reset(); | |
1692 while (1) | |
1693 { | |
1694 c = *p++; | |
1695 switch (c) | |
1696 { | |
1697 case '\\': | |
1698 switch (*p) | |
1699 { | |
1700 case 'u': | |
1701 case 'U': | |
1702 case '&': | |
1703 c = escapeSequence(); | |
1704 stringbuffer.writeUTF8(c); | |
1705 continue; | |
1706 | |
1707 default: | |
1708 c = escapeSequence(); | |
1709 break; | |
1710 } | |
1711 break; | |
1712 | |
1713 case '\n': | |
1714 loc.linnum++; | |
1715 break; | |
1716 | |
1717 case '\r': | |
1718 if (*p == '\n') | |
1719 continue; // ignore | |
1720 c = '\n'; // treat EndOfLine as \n character | |
1721 loc.linnum++; | |
1722 break; | |
1723 | |
1724 case '"': | |
1725 t->len = stringbuffer.offset; | |
1726 stringbuffer.writeByte(0); | |
1727 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1728 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1729 stringPostfix(t); | |
1730 return TOKstring; | |
1731 | |
1732 case 0: | |
1733 case 0x1A: | |
1734 p--; | |
1735 error("unterminated string constant starting at %s", start.toChars()); | |
1736 t->ustring = (unsigned char *)""; | |
1737 t->len = 0; | |
1738 t->postfix = 0; | |
1739 return TOKstring; | |
1740 | |
1741 default: | |
1742 if (c & 0x80) | |
1743 { | |
1744 p--; | |
1745 c = decodeUTF(); | |
1746 if (c == LS || c == PS) | |
1747 { c = '\n'; | |
1748 loc.linnum++; | |
1749 } | |
1750 p++; | |
1751 stringbuffer.writeUTF8(c); | |
1752 continue; | |
1753 } | |
1754 break; | |
1755 } | |
1756 stringbuffer.writeByte(c); | |
1757 } | |
1758 } | |
1759 | |
1760 /************************************** | |
1761 */ | |
1762 | |
1763 TOK Lexer::charConstant(Token *t, int wide) | |
1764 { | |
1765 unsigned c; | |
1766 TOK tk = TOKcharv; | |
1767 | |
1768 //printf("Lexer::charConstant\n"); | |
1769 p++; | |
1770 c = *p++; | |
1771 switch (c) | |
1772 { | |
1773 case '\\': | |
1774 switch (*p) | |
1775 { | |
1776 case 'u': | |
1777 t->uns64value = escapeSequence(); | |
1778 tk = TOKwcharv; | |
1779 break; | |
1780 | |
1781 case 'U': | |
1782 case '&': | |
1783 t->uns64value = escapeSequence(); | |
1784 tk = TOKdcharv; | |
1785 break; | |
1786 | |
1787 default: | |
1788 t->uns64value = escapeSequence(); | |
1789 break; | |
1790 } | |
1791 break; | |
1792 | |
1793 case '\n': | |
1794 L1: | |
1795 loc.linnum++; | |
1796 case '\r': | |
1797 case 0: | |
1798 case 0x1A: | |
1799 case '\'': | |
1800 error("unterminated character constant"); | |
1801 return tk; | |
1802 | |
1803 default: | |
1804 if (c & 0x80) | |
1805 { | |
1806 p--; | |
1807 c = decodeUTF(); | |
1808 p++; | |
1809 if (c == LS || c == PS) | |
1810 goto L1; | |
1811 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) | |
1812 tk = TOKwcharv; | |
1813 else | |
1814 tk = TOKdcharv; | |
1815 } | |
1816 t->uns64value = c; | |
1817 break; | |
1818 } | |
1819 | |
1820 if (*p != '\'') | |
1821 { error("unterminated character constant"); | |
1822 return tk; | |
1823 } | |
1824 p++; | |
1825 return tk; | |
1826 } | |
1827 | |
1828 /*************************************** | |
1829 * Get postfix of string literal. | |
1830 */ | |
1831 | |
1832 void Lexer::stringPostfix(Token *t) | |
1833 { | |
1834 switch (*p) | |
1835 { | |
1836 case 'c': | |
1837 case 'w': | |
1838 case 'd': | |
1839 t->postfix = *p; | |
1840 p++; | |
1841 break; | |
1842 | |
1843 default: | |
1844 t->postfix = 0; | |
1845 break; | |
1846 } | |
1847 } | |
1848 | |
1849 /*************************************** | |
1850 * Read \u or \U unicode sequence | |
1851 * Input: | |
1852 * u 'u' or 'U' | |
1853 */ | |
1854 | |
1855 #if 0 | |
1856 unsigned Lexer::wchar(unsigned u) | |
1857 { | |
1858 unsigned value; | |
1859 unsigned n; | |
1860 unsigned char c; | |
1861 unsigned nchars; | |
1862 | |
1863 nchars = (u == 'U') ? 8 : 4; | |
1864 value = 0; | |
1865 for (n = 0; 1; n++) | |
1866 { | |
1867 ++p; | |
1868 if (n == nchars) | |
1869 break; | |
1870 c = *p; | |
1871 if (!ishex(c)) | |
1872 { error("\\%c sequence must be followed by %d hex characters", u, nchars); | |
1873 break; | |
1874 } | |
1875 if (isdigit(c)) | |
1876 c -= '0'; | |
1877 else if (islower(c)) | |
1878 c -= 'a' - 10; | |
1879 else | |
1880 c -= 'A' - 10; | |
1881 value <<= 4; | |
1882 value |= c; | |
1883 } | |
1884 return value; | |
1885 } | |
1886 #endif | |
1887 | |
1888 /************************************** | |
1889 * Read in a number. | |
1890 * If it's an integer, store it in tok.TKutok.Vlong. | |
1891 * integers can be decimal, octal or hex | |
1892 * Handle the suffixes U, UL, LU, L, etc. | |
1893 * If it's double, store it in tok.TKutok.Vdouble. | |
1894 * Returns: | |
1895 * TKnum | |
1896 * TKdouble,... | |
1897 */ | |
1898 | |
1899 TOK Lexer::number(Token *t) | |
1900 { | |
1901 // We use a state machine to collect numbers | |
1902 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, | |
1903 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, | |
1904 STATE_hexh, STATE_error }; | |
1905 enum STATE state; | |
1906 | |
1907 enum FLAGS | |
1908 { FLAGS_decimal = 1, // decimal | |
1909 FLAGS_unsigned = 2, // u or U suffix | |
1910 FLAGS_long = 4, // l or L suffix | |
1911 }; | |
1912 enum FLAGS flags = FLAGS_decimal; | |
1913 | |
1914 int i; | |
1915 int base; | |
1916 unsigned c; | |
1917 unsigned char *start; | |
1918 TOK result; | |
1919 | |
1920 //printf("Lexer::number()\n"); | |
1921 state = STATE_initial; | |
1922 base = 0; | |
1923 stringbuffer.reset(); | |
1924 start = p; | |
1925 while (1) | |
1926 { | |
1927 c = *p; | |
1928 switch (state) | |
1929 { | |
1930 case STATE_initial: // opening state | |
1931 if (c == '0') | |
1932 state = STATE_0; | |
1933 else | |
1934 state = STATE_decimal; | |
1935 break; | |
1936 | |
1937 case STATE_0: | |
1938 flags = (FLAGS) (flags & ~FLAGS_decimal); | |
1939 switch (c) | |
1940 { | |
1941 #if ZEROH | |
1942 case 'H': // 0h | |
1943 case 'h': | |
1944 goto hexh; | |
1945 #endif | |
1946 case 'X': | |
1947 case 'x': | |
1948 state = STATE_hex0; | |
1949 break; | |
1950 | |
1951 case '.': | |
1952 if (p[1] == '.') // .. is a separate token | |
1953 goto done; | |
1954 case 'i': | |
1955 case 'f': | |
1956 case 'F': | |
1957 goto real; | |
1958 #if ZEROH | |
1959 case 'E': | |
1960 case 'e': | |
1961 goto case_hex; | |
1962 #endif | |
1963 case 'B': | |
1964 case 'b': | |
1965 state = STATE_binary0; | |
1966 break; | |
1967 | |
1968 case '0': case '1': case '2': case '3': | |
1969 case '4': case '5': case '6': case '7': | |
1970 state = STATE_octal; | |
1971 break; | |
1972 | |
1973 #if ZEROH | |
1974 case '8': case '9': case 'A': | |
1975 case 'C': case 'D': case 'F': | |
1976 case 'a': case 'c': case 'd': case 'f': | |
1977 case_hex: | |
1978 state = STATE_hexh; | |
1979 break; | |
1980 #endif | |
1981 case '_': | |
1982 state = STATE_octal; | |
1983 p++; | |
1984 continue; | |
1985 | |
1986 case 'L': | |
1987 if (p[1] == 'i') | |
1988 goto real; | |
1989 goto done; | |
1990 | |
1991 default: | |
1992 goto done; | |
1993 } | |
1994 break; | |
1995 | |
1996 case STATE_decimal: // reading decimal number | |
1997 if (!isdigit(c)) | |
1998 { | |
1999 #if ZEROH | |
2000 if (ishex(c) | |
2001 || c == 'H' || c == 'h' | |
2002 ) | |
2003 goto hexh; | |
2004 #endif | |
2005 if (c == '_') // ignore embedded _ | |
2006 { p++; | |
2007 continue; | |
2008 } | |
2009 if (c == '.' && p[1] != '.') | |
2010 goto real; | |
2011 else if (c == 'i' || c == 'f' || c == 'F' || | |
2012 c == 'e' || c == 'E') | |
2013 { | |
2014 real: // It's a real number. Back up and rescan as a real | |
2015 p = start; | |
2016 return inreal(t); | |
2017 } | |
2018 else if (c == 'L' && p[1] == 'i') | |
2019 goto real; | |
2020 goto done; | |
2021 } | |
2022 break; | |
2023 | |
2024 case STATE_hex0: // reading hex number | |
2025 case STATE_hex: | |
2026 if (!ishex(c)) | |
2027 { | |
2028 if (c == '_') // ignore embedded _ | |
2029 { p++; | |
2030 continue; | |
2031 } | |
2032 if (c == '.' && p[1] != '.') | |
2033 goto real; | |
2034 if (c == 'P' || c == 'p' || c == 'i') | |
2035 goto real; | |
2036 if (state == STATE_hex0) | |
2037 error("Hex digit expected, not '%c'", c); | |
2038 goto done; | |
2039 } | |
2040 state = STATE_hex; | |
2041 break; | |
2042 | |
2043 #if ZEROH | |
2044 hexh: | |
2045 state = STATE_hexh; | |
2046 case STATE_hexh: // parse numbers like 0FFh | |
2047 if (!ishex(c)) | |
2048 { | |
2049 if (c == 'H' || c == 'h') | |
2050 { | |
2051 p++; | |
2052 base = 16; | |
2053 goto done; | |
2054 } | |
2055 else | |
2056 { | |
2057 // Check for something like 1E3 or 0E24 | |
2058 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) || | |
2059 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset)) | |
2060 goto real; | |
2061 error("Hex digit expected, not '%c'", c); | |
2062 goto done; | |
2063 } | |
2064 } | |
2065 break; | |
2066 #endif | |
2067 | |
2068 case STATE_octal: // reading octal number | |
2069 case STATE_octale: // reading octal number with non-octal digits | |
2070 if (!isoctal(c)) | |
2071 { | |
2072 #if ZEROH | |
2073 if (ishex(c) | |
2074 || c == 'H' || c == 'h' | |
2075 ) | |
2076 goto hexh; | |
2077 #endif | |
2078 if (c == '_') // ignore embedded _ | |
2079 { p++; | |
2080 continue; | |
2081 } | |
2082 if (c == '.' && p[1] != '.') | |
2083 goto real; | |
2084 if (c == 'i') | |
2085 goto real; | |
2086 if (isdigit(c)) | |
2087 { | |
2088 state = STATE_octale; | |
2089 } | |
2090 else | |
2091 goto done; | |
2092 } | |
2093 break; | |
2094 | |
2095 case STATE_binary0: // starting binary number | |
2096 case STATE_binary: // reading binary number | |
2097 if (c != '0' && c != '1') | |
2098 { | |
2099 #if ZEROH | |
2100 if (ishex(c) | |
2101 || c == 'H' || c == 'h' | |
2102 ) | |
2103 goto hexh; | |
2104 #endif | |
2105 if (c == '_') // ignore embedded _ | |
2106 { p++; | |
2107 continue; | |
2108 } | |
2109 if (state == STATE_binary0) | |
2110 { error("binary digit expected"); | |
2111 state = STATE_error; | |
2112 break; | |
2113 } | |
2114 else | |
2115 goto done; | |
2116 } | |
2117 state = STATE_binary; | |
2118 break; | |
2119 | |
2120 case STATE_error: // for error recovery | |
2121 if (!isdigit(c)) // scan until non-digit | |
2122 goto done; | |
2123 break; | |
2124 | |
2125 default: | |
2126 assert(0); | |
2127 } | |
2128 stringbuffer.writeByte(c); | |
2129 p++; | |
2130 } | |
2131 done: | |
2132 stringbuffer.writeByte(0); // terminate string | |
2133 if (state == STATE_octale) | |
2134 error("Octal digit expected"); | |
2135 | |
2136 uinteger_t n; // unsigned >=64 bit integer type | |
2137 | |
2138 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0)) | |
2139 n = stringbuffer.data[0] - '0'; | |
2140 else | |
2141 { | |
2142 // Convert string to integer | |
2143 #if __DMC__ | |
2144 errno = 0; | |
2145 n = strtoull((char *)stringbuffer.data,NULL,base); | |
2146 if (errno == ERANGE) | |
2147 error("integer overflow"); | |
2148 #else | |
2149 // Not everybody implements strtoull() | |
2150 char *p = (char *)stringbuffer.data; | |
2151 int r = 10, d; | |
2152 | |
2153 if (*p == '0') | |
2154 { | |
2155 if (p[1] == 'x' || p[1] == 'X') | |
2156 p += 2, r = 16; | |
2157 else if (p[1] == 'b' || p[1] == 'B') | |
2158 p += 2, r = 2; | |
2159 else if (isdigit(p[1])) | |
2160 p += 1, r = 8; | |
2161 } | |
2162 | |
2163 n = 0; | |
2164 while (1) | |
2165 { | |
2166 if (*p >= '0' && *p <= '9') | |
2167 d = *p - '0'; | |
2168 else if (*p >= 'a' && *p <= 'z') | |
2169 d = *p - 'a' + 10; | |
2170 else if (*p >= 'A' && *p <= 'Z') | |
2171 d = *p - 'A' + 10; | |
2172 else | |
2173 break; | |
2174 if (d >= r) | |
2175 break; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2176 uinteger_t n2 = n * r; |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2177 //printf("n2 / r = %llx, n = %llx\n", n2/r, n); |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2178 if (n2 / r != n || n2 + d < n) |
159 | 2179 { |
2180 error ("integer overflow"); | |
2181 break; | |
2182 } | |
2183 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2184 n = n2 + d; |
159 | 2185 p++; |
2186 } | |
2187 #endif | |
2188 if (sizeof(n) > 8 && | |
2189 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits | |
2190 error("integer overflow"); | |
2191 } | |
2192 | |
2193 // Parse trailing 'u', 'U', 'l' or 'L' in any combination | |
2194 while (1) | |
2195 { unsigned char f; | |
2196 | |
2197 switch (*p) | |
2198 { case 'U': | |
2199 case 'u': | |
2200 f = FLAGS_unsigned; | |
2201 goto L1; | |
2202 | |
2203 case 'l': | |
2204 if (1 || !global.params.useDeprecated) | |
2205 error("'l' suffix is deprecated, use 'L' instead"); | |
2206 case 'L': | |
2207 f = FLAGS_long; | |
2208 L1: | |
2209 p++; | |
2210 if (flags & f) | |
2211 error("unrecognized token"); | |
2212 flags = (FLAGS) (flags | f); | |
2213 continue; | |
2214 default: | |
2215 break; | |
2216 } | |
2217 break; | |
2218 } | |
2219 | |
2220 switch (flags) | |
2221 { | |
2222 case 0: | |
2223 /* Octal or Hexadecimal constant. | |
2224 * First that fits: int, uint, long, ulong | |
2225 */ | |
2226 if (n & 0x8000000000000000LL) | |
2227 result = TOKuns64v; | |
2228 else if (n & 0xFFFFFFFF00000000LL) | |
2229 result = TOKint64v; | |
2230 else if (n & 0x80000000) | |
2231 result = TOKuns32v; | |
2232 else | |
2233 result = TOKint32v; | |
2234 break; | |
2235 | |
2236 case FLAGS_decimal: | |
2237 /* First that fits: int, long, long long | |
2238 */ | |
2239 if (n & 0x8000000000000000LL) | |
2240 { error("signed integer overflow"); | |
2241 result = TOKuns64v; | |
2242 } | |
2243 else if (n & 0xFFFFFFFF80000000LL) | |
2244 result = TOKint64v; | |
2245 else | |
2246 result = TOKint32v; | |
2247 break; | |
2248 | |
2249 case FLAGS_unsigned: | |
2250 case FLAGS_decimal | FLAGS_unsigned: | |
2251 /* First that fits: uint, ulong | |
2252 */ | |
2253 if (n & 0xFFFFFFFF00000000LL) | |
2254 result = TOKuns64v; | |
2255 else | |
2256 result = TOKuns32v; | |
2257 break; | |
2258 | |
2259 case FLAGS_decimal | FLAGS_long: | |
2260 if (n & 0x8000000000000000LL) | |
2261 { error("signed integer overflow"); | |
2262 result = TOKuns64v; | |
2263 } | |
2264 else | |
2265 result = TOKint64v; | |
2266 break; | |
2267 | |
2268 case FLAGS_long: | |
2269 if (n & 0x8000000000000000LL) | |
2270 result = TOKuns64v; | |
2271 else | |
2272 result = TOKint64v; | |
2273 break; | |
2274 | |
2275 case FLAGS_unsigned | FLAGS_long: | |
2276 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: | |
2277 result = TOKuns64v; | |
2278 break; | |
2279 | |
2280 default: | |
2281 #ifdef DEBUG | |
2282 printf("%x\n",flags); | |
2283 #endif | |
2284 assert(0); | |
2285 } | |
2286 t->uns64value = n; | |
2287 return result; | |
2288 } | |
2289 | |
2290 /************************************** | |
2291 * Read in characters, converting them to real. | |
2292 * Bugs: | |
2293 * Exponent overflow not detected. | |
2294 * Too much requested precision is not detected. | |
2295 */ | |
2296 | |
2297 TOK Lexer::inreal(Token *t) | |
2298 #ifdef __DMC__ | |
2299 __in | |
2300 { | |
2301 assert(*p == '.' || isdigit(*p)); | |
2302 } | |
2303 __out (result) | |
2304 { | |
2305 switch (result) | |
2306 { | |
2307 case TOKfloat32v: | |
2308 case TOKfloat64v: | |
2309 case TOKfloat80v: | |
2310 case TOKimaginary32v: | |
2311 case TOKimaginary64v: | |
2312 case TOKimaginary80v: | |
2313 break; | |
2314 | |
2315 default: | |
2316 assert(0); | |
2317 } | |
2318 } | |
2319 __body | |
2320 #endif /* __DMC__ */ | |
2321 { int dblstate; | |
2322 unsigned c; | |
2323 char hex; // is this a hexadecimal-floating-constant? | |
2324 TOK result; | |
2325 | |
2326 //printf("Lexer::inreal()\n"); | |
2327 stringbuffer.reset(); | |
2328 dblstate = 0; | |
2329 hex = 0; | |
2330 Lnext: | |
2331 while (1) | |
2332 { | |
2333 // Get next char from input | |
2334 c = *p++; | |
2335 //printf("dblstate = %d, c = '%c'\n", dblstate, c); | |
2336 while (1) | |
2337 { | |
2338 switch (dblstate) | |
2339 { | |
2340 case 0: // opening state | |
2341 if (c == '0') | |
2342 dblstate = 9; | |
2343 else if (c == '.') | |
2344 dblstate = 3; | |
2345 else | |
2346 dblstate = 1; | |
2347 break; | |
2348 | |
2349 case 9: | |
2350 dblstate = 1; | |
2351 if (c == 'X' || c == 'x') | |
2352 { hex++; | |
2353 break; | |
2354 } | |
2355 case 1: // digits to left of . | |
2356 case 3: // digits to right of . | |
2357 case 7: // continuing exponent digits | |
2358 if (!isdigit(c) && !(hex && isxdigit(c))) | |
2359 { | |
2360 if (c == '_') | |
2361 goto Lnext; // ignore embedded '_' | |
2362 dblstate++; | |
2363 continue; | |
2364 } | |
2365 break; | |
2366 | |
2367 case 2: // no more digits to left of . | |
2368 if (c == '.') | |
2369 { dblstate++; | |
2370 break; | |
2371 } | |
2372 case 4: // no more digits to right of . | |
2373 if ((c == 'E' || c == 'e') || | |
2374 hex && (c == 'P' || c == 'p')) | |
2375 { dblstate = 5; | |
2376 hex = 0; // exponent is always decimal | |
2377 break; | |
2378 } | |
2379 if (hex) | |
2380 error("binary-exponent-part required"); | |
2381 goto done; | |
2382 | |
2383 case 5: // looking immediately to right of E | |
2384 dblstate++; | |
2385 if (c == '-' || c == '+') | |
2386 break; | |
2387 case 6: // 1st exponent digit expected | |
2388 if (!isdigit(c)) | |
2389 error("exponent expected"); | |
2390 dblstate++; | |
2391 break; | |
2392 | |
2393 case 8: // past end of exponent digits | |
2394 goto done; | |
2395 } | |
2396 break; | |
2397 } | |
2398 stringbuffer.writeByte(c); | |
2399 } | |
2400 done: | |
2401 p--; | |
2402 | |
2403 stringbuffer.writeByte(0); | |
2404 | |
2405 #if _WIN32 && __DMC__ | |
2406 char *save = __locale_decpoint; | |
2407 __locale_decpoint = "."; | |
2408 #endif | |
2409 #ifdef IN_GCC | |
2410 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble); | |
2411 #else | |
2412 t->float80value = strtold((char *)stringbuffer.data, NULL); | |
2413 #endif | |
2414 errno = 0; | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2415 float strtofres; |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2416 double strtodres; |
159 | 2417 switch (*p) |
2418 { | |
2419 case 'F': | |
2420 case 'f': | |
2421 #ifdef IN_GCC | |
2422 real_t::parse((char *)stringbuffer.data, real_t::Float); | |
2423 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2424 strtofres = strtof((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2425 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2426 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2427 strtofres != 0 && strtofres != HUGE_VALF && strtofres != -HUGE_VALF) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2428 errno = 0; |
159 | 2429 #endif |
2430 result = TOKfloat32v; | |
2431 p++; | |
2432 break; | |
2433 | |
2434 default: | |
2435 #ifdef IN_GCC | |
2436 real_t::parse((char *)stringbuffer.data, real_t::Double); | |
2437 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2438 strtodres = strtod((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2439 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2440 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2441 strtodres != 0 && strtodres != HUGE_VAL && strtodres != -HUGE_VAL) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2442 errno = 0; |
159 | 2443 #endif |
2444 result = TOKfloat64v; | |
2445 break; | |
2446 | |
2447 case 'l': | |
2448 if (!global.params.useDeprecated) | |
2449 error("'l' suffix is deprecated, use 'L' instead"); | |
2450 case 'L': | |
2451 result = TOKfloat80v; | |
2452 p++; | |
2453 break; | |
2454 } | |
2455 if (*p == 'i' || *p == 'I') | |
2456 { | |
2457 if (!global.params.useDeprecated && *p == 'I') | |
2458 error("'I' suffix is deprecated, use 'i' instead"); | |
2459 p++; | |
2460 switch (result) | |
2461 { | |
2462 case TOKfloat32v: | |
2463 result = TOKimaginary32v; | |
2464 break; | |
2465 case TOKfloat64v: | |
2466 result = TOKimaginary64v; | |
2467 break; | |
2468 case TOKfloat80v: | |
2469 result = TOKimaginary80v; | |
2470 break; | |
2471 } | |
2472 } | |
2473 #if _WIN32 && __DMC__ | |
2474 __locale_decpoint = save; | |
2475 #endif | |
2476 if (errno == ERANGE) | |
2477 error("number is not representable"); | |
2478 return result; | |
2479 } | |
2480 | |
2481 /********************************************* | |
2482 * Do pragma. | |
2483 * Currently, the only pragma supported is: | |
2484 * #line linnum [filespec] | |
2485 */ | |
2486 | |
2487 void Lexer::pragma() | |
2488 { | |
2489 Token tok; | |
2490 int linnum; | |
2491 char *filespec = NULL; | |
2492 Loc loc = this->loc; | |
2493 | |
2494 scan(&tok); | |
2495 if (tok.value != TOKidentifier || tok.ident != Id::line) | |
2496 goto Lerr; | |
2497 | |
2498 scan(&tok); | |
2499 if (tok.value == TOKint32v || tok.value == TOKint64v) | |
2500 linnum = tok.uns64value - 1; | |
2501 else | |
2502 goto Lerr; | |
2503 | |
2504 while (1) | |
2505 { | |
2506 switch (*p) | |
2507 { | |
2508 case 0: | |
2509 case 0x1A: | |
2510 case '\n': | |
2511 Lnewline: | |
2512 this->loc.linnum = linnum; | |
2513 if (filespec) | |
2514 this->loc.filename = filespec; | |
2515 return; | |
2516 | |
2517 case '\r': | |
2518 p++; | |
2519 if (*p != '\n') | |
2520 { p--; | |
2521 goto Lnewline; | |
2522 } | |
2523 continue; | |
2524 | |
2525 case ' ': | |
2526 case '\t': | |
2527 case '\v': | |
2528 case '\f': | |
2529 p++; | |
2530 continue; // skip white space | |
2531 | |
2532 case '_': | |
2533 if (mod && memcmp(p, "__FILE__", 8) == 0) | |
2534 { | |
2535 p += 8; | |
2536 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars()); | |
2537 } | |
2538 continue; | |
2539 | |
2540 case '"': | |
2541 if (filespec) | |
2542 goto Lerr; | |
2543 stringbuffer.reset(); | |
2544 p++; | |
2545 while (1) | |
2546 { unsigned c; | |
2547 | |
2548 c = *p; | |
2549 switch (c) | |
2550 { | |
2551 case '\n': | |
2552 case '\r': | |
2553 case 0: | |
2554 case 0x1A: | |
2555 goto Lerr; | |
2556 | |
2557 case '"': | |
2558 stringbuffer.writeByte(0); | |
2559 filespec = mem.strdup((char *)stringbuffer.data); | |
2560 p++; | |
2561 break; | |
2562 | |
2563 default: | |
2564 if (c & 0x80) | |
2565 { unsigned u = decodeUTF(); | |
2566 if (u == PS || u == LS) | |
2567 goto Lerr; | |
2568 } | |
2569 stringbuffer.writeByte(c); | |
2570 p++; | |
2571 continue; | |
2572 } | |
2573 break; | |
2574 } | |
2575 continue; | |
2576 | |
2577 default: | |
2578 if (*p & 0x80) | |
2579 { unsigned u = decodeUTF(); | |
2580 if (u == PS || u == LS) | |
2581 goto Lnewline; | |
2582 } | |
2583 goto Lerr; | |
2584 } | |
2585 } | |
2586 | |
2587 Lerr: | |
2588 error(loc, "#line integer [\"filespec\"]\\n expected"); | |
2589 } | |
2590 | |
2591 | |
2592 /******************************************** | |
2593 * Decode UTF character. | |
2594 * Issue error messages for invalid sequences. | |
2595 * Return decoded character, advance p to last character in UTF sequence. | |
2596 */ | |
2597 | |
2598 unsigned Lexer::decodeUTF() | |
2599 { | |
2600 dchar_t u; | |
2601 unsigned char c; | |
2602 unsigned char *s = p; | |
2603 size_t len; | |
2604 size_t idx; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2605 const char *msg; |
159 | 2606 |
2607 c = *s; | |
2608 assert(c & 0x80); | |
2609 | |
2610 // Check length of remaining string up to 6 UTF-8 characters | |
2611 for (len = 1; len < 6 && s[len]; len++) | |
2612 ; | |
2613 | |
2614 idx = 0; | |
2615 msg = utf_decodeChar(s, len, &idx, &u); | |
2616 p += idx - 1; | |
2617 if (msg) | |
2618 { | |
2619 error("%s", msg); | |
2620 } | |
2621 return u; | |
2622 } | |
2623 | |
2624 | |
2625 /*************************************************** | |
2626 * Parse doc comment embedded between t->ptr and p. | |
2627 * Remove trailing blanks and tabs from lines. | |
2628 * Replace all newlines with \n. | |
2629 * Remove leading comment character from each line. | |
2630 * Decide if it's a lineComment or a blockComment. | |
2631 * Append to previous one for this token. | |
2632 */ | |
2633 | |
2634 void Lexer::getDocComment(Token *t, unsigned lineComment) | |
2635 { | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2636 /* ct tells us which kind of comment it is: '/', '*', or '+' |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2637 */ |
159 | 2638 unsigned char ct = t->ptr[2]; |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2639 |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2640 /* Start of comment text skips over / * *, / + +, or / / / |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2641 */ |
159 | 2642 unsigned char *q = t->ptr + 3; // start of comment text |
2643 | |
2644 unsigned char *qend = p; | |
2645 if (ct == '*' || ct == '+') | |
2646 qend -= 2; | |
2647 | |
2648 /* Scan over initial row of ****'s or ++++'s or ////'s | |
2649 */ | |
2650 for (; q < qend; q++) | |
2651 { | |
2652 if (*q != ct) | |
2653 break; | |
2654 } | |
2655 | |
2656 /* Remove trailing row of ****'s or ++++'s | |
2657 */ | |
2658 if (ct != '/') | |
2659 { | |
2660 for (; q < qend; qend--) | |
2661 { | |
2662 if (qend[-1] != ct) | |
2663 break; | |
2664 } | |
2665 } | |
2666 | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2667 /* Comment is now [q .. qend]. |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2668 * Canonicalize it into buf[]. |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2669 */ |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2670 OutBuffer buf; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2671 int linestart = 0; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2672 |
159 | 2673 for (; q < qend; q++) |
2674 { | |
2675 unsigned char c = *q; | |
2676 | |
2677 switch (c) | |
2678 { | |
2679 case '*': | |
2680 case '+': | |
2681 if (linestart && c == ct) | |
2682 { linestart = 0; | |
2683 /* Trim preceding whitespace up to preceding \n | |
2684 */ | |
2685 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2686 buf.offset--; | |
2687 continue; | |
2688 } | |
2689 break; | |
2690 | |
2691 case ' ': | |
2692 case '\t': | |
2693 break; | |
2694 | |
2695 case '\r': | |
2696 if (q[1] == '\n') | |
2697 continue; // skip the \r | |
2698 goto Lnewline; | |
2699 | |
2700 default: | |
2701 if (c == 226) | |
2702 { | |
2703 // If LS or PS | |
2704 if (q[1] == 128 && | |
2705 (q[2] == 168 || q[2] == 169)) | |
2706 { | |
2707 q += 2; | |
2708 goto Lnewline; | |
2709 } | |
2710 } | |
2711 linestart = 0; | |
2712 break; | |
2713 | |
2714 Lnewline: | |
2715 c = '\n'; // replace all newlines with \n | |
2716 case '\n': | |
2717 linestart = 1; | |
2718 | |
2719 /* Trim trailing whitespace | |
2720 */ | |
2721 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2722 buf.offset--; | |
2723 | |
2724 break; | |
2725 } | |
2726 buf.writeByte(c); | |
2727 } | |
2728 | |
2729 // Always end with a newline | |
2730 if (!buf.offset || buf.data[buf.offset - 1] != '\n') | |
2731 buf.writeByte('\n'); | |
2732 | |
2733 buf.writeByte(0); | |
2734 | |
2735 // It's a line comment if the start of the doc comment comes | |
2736 // after other non-whitespace on the same line. | |
2737 unsigned char** dc = (lineComment && anyToken) | |
2738 ? &t->lineComment | |
2739 : &t->blockComment; | |
2740 | |
2741 // Combine with previous doc comment, if any | |
2742 if (*dc) | |
2743 *dc = combineComments(*dc, (unsigned char *)buf.data); | |
2744 else | |
2745 *dc = (unsigned char *)buf.extractData(); | |
2746 } | |
2747 | |
2748 /******************************************** | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2749 * Combine two document comments into one, |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2750 * separated by a newline. |
159 | 2751 */ |
2752 | |
2753 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2) | |
2754 { | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2755 //printf("Lexer::combineComments('%s', '%s')\n", c1, c2); |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2756 |
159 | 2757 unsigned char *c = c2; |
2758 | |
2759 if (c1) | |
2760 { c = c1; | |
2761 if (c2) | |
2762 { size_t len1 = strlen((char *)c1); | |
2763 size_t len2 = strlen((char *)c2); | |
2764 | |
2765 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1); | |
2766 memcpy(c, c1, len1); | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2767 if (len1 && c1[len1 - 1] != '\n') |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2768 { c[len1] = '\n'; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2769 len1++; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2770 } |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2771 memcpy(c + len1, c2, len2); |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2772 c[len1 + len2] = 0; |
159 | 2773 } |
2774 } | |
2775 return c; | |
2776 } | |
2777 | |
2778 /******************************************** | |
2779 * Create an identifier in the string table. | |
2780 */ | |
2781 | |
2782 Identifier *Lexer::idPool(const char *s) | |
2783 { | |
2784 size_t len = strlen(s); | |
2785 StringValue *sv = stringtable.update(s, len); | |
2786 Identifier *id = (Identifier *) sv->ptrvalue; | |
2787 if (!id) | |
2788 { | |
2789 id = new Identifier(sv->lstring.string, TOKidentifier); | |
2790 sv->ptrvalue = id; | |
2791 } | |
2792 return id; | |
2793 } | |
2794 | |
2795 /********************************************* | |
2796 * Create a unique identifier using the prefix s. | |
2797 */ | |
2798 | |
2799 Identifier *Lexer::uniqueId(const char *s, int num) | |
2800 { char buffer[32]; | |
2801 size_t slen = strlen(s); | |
2802 | |
2803 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer)); | |
2804 sprintf(buffer, "%s%d", s, num); | |
2805 return idPool(buffer); | |
2806 } | |
2807 | |
2808 Identifier *Lexer::uniqueId(const char *s) | |
2809 { | |
2810 static int num; | |
2811 return uniqueId(s, ++num); | |
2812 } | |
2813 | |
2814 /**************************************** | |
2815 */ | |
2816 | |
2817 struct Keyword | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2818 { const char *name; |
159 | 2819 enum TOK value; |
2820 }; | |
2821 | |
2822 static Keyword keywords[] = | |
2823 { | |
2824 // { "", TOK }, | |
2825 | |
2826 { "this", TOKthis }, | |
2827 { "super", TOKsuper }, | |
2828 { "assert", TOKassert }, | |
2829 { "null", TOKnull }, | |
2830 { "true", TOKtrue }, | |
2831 { "false", TOKfalse }, | |
2832 { "cast", TOKcast }, | |
2833 { "new", TOKnew }, | |
2834 { "delete", TOKdelete }, | |
2835 { "throw", TOKthrow }, | |
2836 { "module", TOKmodule }, | |
2837 { "pragma", TOKpragma }, | |
2838 { "typeof", TOKtypeof }, | |
2839 { "typeid", TOKtypeid }, | |
2840 | |
2841 { "template", TOKtemplate }, | |
2842 | |
2843 { "void", TOKvoid }, | |
2844 { "byte", TOKint8 }, | |
2845 { "ubyte", TOKuns8 }, | |
2846 { "short", TOKint16 }, | |
2847 { "ushort", TOKuns16 }, | |
2848 { "int", TOKint32 }, | |
2849 { "uint", TOKuns32 }, | |
2850 { "long", TOKint64 }, | |
2851 { "ulong", TOKuns64 }, | |
2852 { "cent", TOKcent, }, | |
2853 { "ucent", TOKucent, }, | |
2854 { "float", TOKfloat32 }, | |
2855 { "double", TOKfloat64 }, | |
2856 { "real", TOKfloat80 }, | |
2857 | |
2858 { "bool", TOKbool }, | |
2859 { "char", TOKchar }, | |
2860 { "wchar", TOKwchar }, | |
2861 { "dchar", TOKdchar }, | |
2862 | |
2863 { "ifloat", TOKimaginary32 }, | |
2864 { "idouble", TOKimaginary64 }, | |
2865 { "ireal", TOKimaginary80 }, | |
2866 | |
2867 { "cfloat", TOKcomplex32 }, | |
2868 { "cdouble", TOKcomplex64 }, | |
2869 { "creal", TOKcomplex80 }, | |
2870 | |
2871 { "delegate", TOKdelegate }, | |
2872 { "function", TOKfunction }, | |
2873 | |
2874 { "is", TOKis }, | |
2875 { "if", TOKif }, | |
2876 { "else", TOKelse }, | |
2877 { "while", TOKwhile }, | |
2878 { "for", TOKfor }, | |
2879 { "do", TOKdo }, | |
2880 { "switch", TOKswitch }, | |
2881 { "case", TOKcase }, | |
2882 { "default", TOKdefault }, | |
2883 { "break", TOKbreak }, | |
2884 { "continue", TOKcontinue }, | |
2885 { "synchronized", TOKsynchronized }, | |
2886 { "return", TOKreturn }, | |
2887 { "goto", TOKgoto }, | |
2888 { "try", TOKtry }, | |
2889 { "catch", TOKcatch }, | |
2890 { "finally", TOKfinally }, | |
2891 { "with", TOKwith }, | |
2892 { "asm", TOKasm }, | |
2893 { "foreach", TOKforeach }, | |
2894 { "foreach_reverse", TOKforeach_reverse }, | |
2895 { "scope", TOKscope }, | |
2896 | |
2897 { "struct", TOKstruct }, | |
2898 { "class", TOKclass }, | |
2899 { "interface", TOKinterface }, | |
2900 { "union", TOKunion }, | |
2901 { "enum", TOKenum }, | |
2902 { "import", TOKimport }, | |
2903 { "mixin", TOKmixin }, | |
2904 { "static", TOKstatic }, | |
2905 { "final", TOKfinal }, | |
2906 { "const", TOKconst }, | |
2907 { "typedef", TOKtypedef }, | |
2908 { "alias", TOKalias }, | |
2909 { "override", TOKoverride }, | |
2910 { "abstract", TOKabstract }, | |
2911 { "volatile", TOKvolatile }, | |
2912 { "debug", TOKdebug }, | |
2913 { "deprecated", TOKdeprecated }, | |
2914 { "in", TOKin }, | |
2915 { "out", TOKout }, | |
2916 { "inout", TOKinout }, | |
2917 { "lazy", TOKlazy }, | |
2918 { "auto", TOKauto }, | |
2919 | |
2920 { "align", TOKalign }, | |
2921 { "extern", TOKextern }, | |
2922 { "private", TOKprivate }, | |
2923 { "package", TOKpackage }, | |
2924 { "protected", TOKprotected }, | |
2925 { "public", TOKpublic }, | |
2926 { "export", TOKexport }, | |
2927 | |
2928 { "body", TOKbody }, | |
2929 { "invariant", TOKinvariant }, | |
2930 { "unittest", TOKunittest }, | |
2931 { "version", TOKversion }, | |
2932 //{ "manifest", TOKmanifest }, | |
2933 | |
2934 // Added after 1.0 | |
2935 { "ref", TOKref }, | |
2936 { "macro", TOKmacro }, | |
336 | 2937 #if DMDV2 |
159 | 2938 { "pure", TOKpure }, |
2939 { "nothrow", TOKnothrow }, | |
336 | 2940 { "__thread", TOKtls }, |
159 | 2941 { "__traits", TOKtraits }, |
2942 { "__overloadset", TOKoverloadset }, | |
336 | 2943 { "__FILE__", TOKfile }, |
2944 { "__LINE__", TOKline }, | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2945 { "shared", TOKshared }, |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2946 { "immutable", TOKimmutable }, |
159 | 2947 #endif |
2948 }; | |
2949 | |
2950 int Token::isKeyword() | |
2951 { | |
2952 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) | |
2953 { | |
2954 if (keywords[u].value == value) | |
2955 return 1; | |
2956 } | |
2957 return 0; | |
2958 } | |
2959 | |
2960 void Lexer::initKeywords() | |
2961 { StringValue *sv; | |
2962 unsigned u; | |
2963 enum TOK v; | |
2964 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]); | |
2965 | |
2966 if (global.params.Dversion == 1) | |
2967 nkeywords -= 2; | |
2968 | |
2969 cmtable_init(); | |
2970 | |
2971 for (u = 0; u < nkeywords; u++) | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2972 { const char *s; |
159 | 2973 |
2974 //printf("keyword[%d] = '%s'\n",u, keywords[u].name); | |
2975 s = keywords[u].name; | |
2976 v = keywords[u].value; | |
2977 sv = stringtable.insert(s, strlen(s)); | |
2978 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v); | |
2979 | |
2980 //printf("tochars[%d] = '%s'\n",v, s); | |
2981 Token::tochars[v] = s; | |
2982 } | |
2983 | |
2984 Token::tochars[TOKeof] = "EOF"; | |
2985 Token::tochars[TOKlcurly] = "{"; | |
2986 Token::tochars[TOKrcurly] = "}"; | |
2987 Token::tochars[TOKlparen] = "("; | |
2988 Token::tochars[TOKrparen] = ")"; | |
2989 Token::tochars[TOKlbracket] = "["; | |
2990 Token::tochars[TOKrbracket] = "]"; | |
2991 Token::tochars[TOKsemicolon] = ";"; | |
2992 Token::tochars[TOKcolon] = ":"; | |
2993 Token::tochars[TOKcomma] = ","; | |
2994 Token::tochars[TOKdot] = "."; | |
2995 Token::tochars[TOKxor] = "^"; | |
2996 Token::tochars[TOKxorass] = "^="; | |
2997 Token::tochars[TOKassign] = "="; | |
2998 Token::tochars[TOKconstruct] = "="; | |
336 | 2999 #if DMDV2 |
159 | 3000 Token::tochars[TOKblit] = "="; |
3001 #endif | |
3002 Token::tochars[TOKlt] = "<"; | |
3003 Token::tochars[TOKgt] = ">"; | |
3004 Token::tochars[TOKle] = "<="; | |
3005 Token::tochars[TOKge] = ">="; | |
3006 Token::tochars[TOKequal] = "=="; | |
3007 Token::tochars[TOKnotequal] = "!="; | |
3008 Token::tochars[TOKnotidentity] = "!is"; | |
3009 Token::tochars[TOKtobool] = "!!"; | |
3010 | |
3011 Token::tochars[TOKunord] = "!<>="; | |
3012 Token::tochars[TOKue] = "!<>"; | |
3013 Token::tochars[TOKlg] = "<>"; | |
3014 Token::tochars[TOKleg] = "<>="; | |
3015 Token::tochars[TOKule] = "!>"; | |
3016 Token::tochars[TOKul] = "!>="; | |
3017 Token::tochars[TOKuge] = "!<"; | |
3018 Token::tochars[TOKug] = "!<="; | |
3019 | |
3020 Token::tochars[TOKnot] = "!"; | |
3021 Token::tochars[TOKtobool] = "!!"; | |
3022 Token::tochars[TOKshl] = "<<"; | |
3023 Token::tochars[TOKshr] = ">>"; | |
3024 Token::tochars[TOKushr] = ">>>"; | |
3025 Token::tochars[TOKadd] = "+"; | |
3026 Token::tochars[TOKmin] = "-"; | |
3027 Token::tochars[TOKmul] = "*"; | |
3028 Token::tochars[TOKdiv] = "/"; | |
3029 Token::tochars[TOKmod] = "%"; | |
3030 Token::tochars[TOKslice] = ".."; | |
3031 Token::tochars[TOKdotdotdot] = "..."; | |
3032 Token::tochars[TOKand] = "&"; | |
3033 Token::tochars[TOKandand] = "&&"; | |
3034 Token::tochars[TOKor] = "|"; | |
3035 Token::tochars[TOKoror] = "||"; | |
3036 Token::tochars[TOKarray] = "[]"; | |
3037 Token::tochars[TOKindex] = "[i]"; | |
3038 Token::tochars[TOKaddress] = "&"; | |
3039 Token::tochars[TOKstar] = "*"; | |
3040 Token::tochars[TOKtilde] = "~"; | |
3041 Token::tochars[TOKdollar] = "$"; | |
3042 Token::tochars[TOKcast] = "cast"; | |
3043 Token::tochars[TOKplusplus] = "++"; | |
3044 Token::tochars[TOKminusminus] = "--"; | |
3045 Token::tochars[TOKtype] = "type"; | |
3046 Token::tochars[TOKquestion] = "?"; | |
3047 Token::tochars[TOKneg] = "-"; | |
3048 Token::tochars[TOKuadd] = "+"; | |
3049 Token::tochars[TOKvar] = "var"; | |
3050 Token::tochars[TOKaddass] = "+="; | |
3051 Token::tochars[TOKminass] = "-="; | |
3052 Token::tochars[TOKmulass] = "*="; | |
3053 Token::tochars[TOKdivass] = "/="; | |
3054 Token::tochars[TOKmodass] = "%="; | |
3055 Token::tochars[TOKshlass] = "<<="; | |
3056 Token::tochars[TOKshrass] = ">>="; | |
3057 Token::tochars[TOKushrass] = ">>>="; | |
3058 Token::tochars[TOKandass] = "&="; | |
3059 Token::tochars[TOKorass] = "|="; | |
3060 Token::tochars[TOKcatass] = "~="; | |
3061 Token::tochars[TOKcat] = "~"; | |
3062 Token::tochars[TOKcall] = "call"; | |
3063 Token::tochars[TOKidentity] = "is"; | |
3064 Token::tochars[TOKnotidentity] = "!is"; | |
3065 | |
3066 Token::tochars[TOKorass] = "|="; | |
3067 Token::tochars[TOKidentifier] = "identifier"; | |
3068 | |
3069 // For debugging | |
3070 Token::tochars[TOKdotexp] = "dotexp"; | |
3071 Token::tochars[TOKdotti] = "dotti"; | |
3072 Token::tochars[TOKdotvar] = "dotvar"; | |
3073 Token::tochars[TOKdottype] = "dottype"; | |
3074 Token::tochars[TOKsymoff] = "symoff"; | |
3075 Token::tochars[TOKtypedot] = "typedot"; | |
3076 Token::tochars[TOKarraylength] = "arraylength"; | |
3077 Token::tochars[TOKarrayliteral] = "arrayliteral"; | |
3078 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral"; | |
3079 Token::tochars[TOKstructliteral] = "structliteral"; | |
3080 Token::tochars[TOKstring] = "string"; | |
3081 Token::tochars[TOKdsymbol] = "symbol"; | |
3082 Token::tochars[TOKtuple] = "tuple"; | |
3083 Token::tochars[TOKdeclaration] = "declaration"; | |
3084 Token::tochars[TOKdottd] = "dottd"; | |
3085 Token::tochars[TOKon_scope_exit] = "scope(exit)"; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
3086 Token::tochars[TOKon_scope_success] = "scope(success)"; |
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
3087 Token::tochars[TOKon_scope_failure] = "scope(failure)"; |
159 | 3088 } |