Mercurial > projects > ldc
annotate dmd/lexer.c @ 948:780530d1cad3
Revert templates to old behavior.
While emitting a template instantiation only once is good for compile times
and binary sizes, it doesn't work with linkonce linkage as inlined function
bodies could be discarded. Since we don't want to inhibit inlining, templates
are reverted to the previous behavior, where an instantiation is emitted for
each module using it.
In the future, a custom inlining pass may allow us to switch back to
common/weak linkage and reenable smart template instance emission.
author | Christian Kamm <kamm incasoftware de> |
---|---|
date | Sun, 08 Feb 2009 21:44:46 +0100 |
parents | aa953cc960b6 |
children | b30fe7e1dbb9 |
rev | line source |
---|---|
159 | 1 |
2 // Compiler implementation of the D programming language | |
3 // Copyright (c) 1999-2008 by Digital Mars | |
4 // All Rights Reserved | |
5 // written by Walter Bright | |
6 // http://www.digitalmars.com | |
7 // License for redistribution is by either the Artistic License | |
8 // in artistic.txt, or the GNU General Public License in gnu.txt. | |
9 // See the included readme.txt for details. | |
10 | |
872
aa953cc960b6
Apply BlueZeniX's patch for OpenSolaris compatibility. Fixes #158.
Christian Kamm <kamm incasoftware de>
parents:
846
diff
changeset
|
11 #define __C99FEATURES__ 1 |
aa953cc960b6
Apply BlueZeniX's patch for OpenSolaris compatibility. Fixes #158.
Christian Kamm <kamm incasoftware de>
parents:
846
diff
changeset
|
12 |
159 | 13 /* Lexical Analyzer */ |
14 | |
15 #include <stdio.h> | |
16 #include <string.h> | |
17 #include <ctype.h> | |
18 #include <stdarg.h> | |
19 #include <errno.h> | |
20 #include <wchar.h> | |
21 #include <stdlib.h> | |
22 #include <assert.h> | |
23 #include <sys/time.h> | |
697
25a32766ed57
Missing include added.
Christian Kamm <kamm incasoftware de>
parents:
696
diff
changeset
|
24 #include <math.h> |
159 | 25 |
26 #ifdef IN_GCC | |
27 | |
28 #include <time.h> | |
29 #include "mem.h" | |
30 | |
31 #else | |
32 | |
33 #if __GNUC__ | |
34 #include <time.h> | |
35 #endif | |
36 | |
37 #if IN_LLVM | |
38 #include "mem.h" | |
39 #elif _WIN32 | |
40 #include "..\root\mem.h" | |
41 #else | |
42 #include "../root/mem.h" | |
43 #endif | |
44 #endif | |
45 | |
46 #include "stringtable.h" | |
47 | |
48 #include "lexer.h" | |
49 #include "utf.h" | |
50 #include "identifier.h" | |
51 #include "id.h" | |
52 #include "module.h" | |
53 | |
54 #if _WIN32 && __DMC__ | |
55 // from \dm\src\include\setlocal.h | |
56 extern "C" char * __cdecl __locale_decpoint; | |
57 #endif | |
58 | |
59 extern int HtmlNamedEntity(unsigned char *p, int length); | |
60 | |
61 #define LS 0x2028 // UTF line separator | |
62 #define PS 0x2029 // UTF paragraph separator | |
63 | |
64 /******************************************** | |
65 * Do our own char maps | |
66 */ | |
67 | |
68 static unsigned char cmtable[256]; | |
69 | |
70 const int CMoctal = 0x1; | |
71 const int CMhex = 0x2; | |
72 const int CMidchar = 0x4; | |
73 | |
74 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; } | |
75 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; } | |
76 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; } | |
77 | |
78 static void cmtable_init() | |
79 { | |
80 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++) | |
81 { | |
82 if ('0' <= c && c <= '7') | |
83 cmtable[c] |= CMoctal; | |
84 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) | |
85 cmtable[c] |= CMhex; | |
86 if (isalnum(c) || c == '_') | |
87 cmtable[c] |= CMidchar; | |
88 } | |
89 } | |
90 | |
91 | |
92 /************************* Token **********************************************/ | |
93 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
94 const char *Token::tochars[TOKMAX]; |
159 | 95 |
96 void *Token::operator new(size_t size) | |
97 { Token *t; | |
98 | |
99 if (Lexer::freelist) | |
100 { | |
101 t = Lexer::freelist; | |
102 Lexer::freelist = t->next; | |
103 return t; | |
104 } | |
105 | |
106 return ::operator new(size); | |
107 } | |
108 | |
109 #ifdef DEBUG | |
110 void Token::print() | |
111 { | |
112 fprintf(stdmsg, "%s\n", toChars()); | |
113 } | |
114 #endif | |
115 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
116 const char *Token::toChars() |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
117 { const char *p; |
159 | 118 static char buffer[3 + 3 * sizeof(value) + 1]; |
119 | |
120 p = buffer; | |
121 switch (value) | |
122 { | |
123 case TOKint32v: | |
124 #if IN_GCC | |
125 sprintf(buffer,"%d",(d_int32)int64value); | |
126 #else | |
127 sprintf(buffer,"%d",int32value); | |
128 #endif | |
129 break; | |
130 | |
131 case TOKuns32v: | |
132 case TOKcharv: | |
133 case TOKwcharv: | |
134 case TOKdcharv: | |
135 #if IN_GCC | |
136 sprintf(buffer,"%uU",(d_uns32)uns64value); | |
137 #else | |
138 sprintf(buffer,"%uU",uns32value); | |
139 #endif | |
140 break; | |
141 | |
142 case TOKint64v: | |
794
661384d6a936
Fix warnings on x86-64. By fvbommel.
Christian Kamm <kamm incasoftware de>
parents:
717
diff
changeset
|
143 sprintf(buffer,"%lldL",(long long)int64value); |
159 | 144 break; |
145 | |
146 case TOKuns64v: | |
794
661384d6a936
Fix warnings on x86-64. By fvbommel.
Christian Kamm <kamm incasoftware de>
parents:
717
diff
changeset
|
147 sprintf(buffer,"%lluUL",(unsigned long long)uns64value); |
159 | 148 break; |
149 | |
150 #if IN_GCC | |
151 case TOKfloat32v: | |
152 case TOKfloat64v: | |
153 case TOKfloat80v: | |
154 float80value.format(buffer, sizeof(buffer)); | |
155 break; | |
156 case TOKimaginary32v: | |
157 case TOKimaginary64v: | |
158 case TOKimaginary80v: | |
159 float80value.format(buffer, sizeof(buffer)); | |
160 // %% buffer | |
161 strcat(buffer, "i"); | |
162 break; | |
163 #else | |
164 case TOKfloat32v: | |
165 sprintf(buffer,"%Lgf", float80value); | |
166 break; | |
167 | |
168 case TOKfloat64v: | |
169 sprintf(buffer,"%Lg", float80value); | |
170 break; | |
171 | |
172 case TOKfloat80v: | |
173 sprintf(buffer,"%LgL", float80value); | |
174 break; | |
175 | |
176 case TOKimaginary32v: | |
177 sprintf(buffer,"%Lgfi", float80value); | |
178 break; | |
179 | |
180 case TOKimaginary64v: | |
181 sprintf(buffer,"%Lgi", float80value); | |
182 break; | |
183 | |
184 case TOKimaginary80v: | |
185 sprintf(buffer,"%LgLi", float80value); | |
186 break; | |
187 #endif | |
188 | |
189 case TOKstring: | |
190 #if CSTRINGS | |
191 p = string; | |
192 #else | |
193 { OutBuffer buf; | |
194 | |
195 buf.writeByte('"'); | |
196 for (size_t i = 0; i < len; ) | |
197 { unsigned c; | |
198 | |
199 utf_decodeChar((unsigned char *)ustring, len, &i, &c); | |
200 switch (c) | |
201 { | |
202 case 0: | |
203 break; | |
204 | |
205 case '"': | |
206 case '\\': | |
207 buf.writeByte('\\'); | |
208 default: | |
209 if (isprint(c)) | |
210 buf.writeByte(c); | |
211 else if (c <= 0x7F) | |
212 buf.printf("\\x%02x", c); | |
213 else if (c <= 0xFFFF) | |
214 buf.printf("\\u%04x", c); | |
215 else | |
216 buf.printf("\\U%08x", c); | |
217 continue; | |
218 } | |
219 break; | |
220 } | |
221 buf.writeByte('"'); | |
222 if (postfix) | |
223 buf.writeByte('"'); | |
224 buf.writeByte(0); | |
225 p = (char *)buf.extractData(); | |
226 } | |
227 #endif | |
228 break; | |
229 | |
230 case TOKidentifier: | |
231 case TOKenum: | |
232 case TOKstruct: | |
233 case TOKimport: | |
234 CASE_BASIC_TYPES: | |
235 p = ident->toChars(); | |
236 break; | |
237 | |
238 default: | |
239 p = toChars(value); | |
240 break; | |
241 } | |
242 return p; | |
243 } | |
244 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
245 const char *Token::toChars(enum TOK value) |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
246 { const char *p; |
159 | 247 static char buffer[3 + 3 * sizeof(value) + 1]; |
248 | |
249 p = tochars[value]; | |
250 if (!p) | |
251 { sprintf(buffer,"TOK%d",value); | |
252 p = buffer; | |
253 } | |
254 return p; | |
255 } | |
256 | |
257 /*************************** Lexer ********************************************/ | |
258 | |
259 Token *Lexer::freelist = NULL; | |
260 StringTable Lexer::stringtable; | |
261 OutBuffer Lexer::stringbuffer; | |
262 | |
263 Lexer::Lexer(Module *mod, | |
264 unsigned char *base, unsigned begoffset, unsigned endoffset, | |
265 int doDocComment, int commentToken) | |
266 : loc(mod, 1) | |
267 { | |
268 //printf("Lexer::Lexer(%p,%d)\n",base,length); | |
269 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod); | |
270 memset(&token,0,sizeof(token)); | |
271 this->base = base; | |
272 this->end = base + endoffset; | |
273 p = base + begoffset; | |
274 this->mod = mod; | |
275 this->doDocComment = doDocComment; | |
276 this->anyToken = 0; | |
277 this->commentToken = commentToken; | |
278 //initKeywords(); | |
279 | |
280 /* If first line starts with '#!', ignore the line | |
281 */ | |
282 | |
283 if (p[0] == '#' && p[1] =='!') | |
284 { | |
285 p += 2; | |
286 while (1) | |
287 { unsigned char c = *p; | |
288 switch (c) | |
289 { | |
290 case '\n': | |
291 p++; | |
292 break; | |
293 | |
294 case '\r': | |
295 p++; | |
296 if (*p == '\n') | |
297 p++; | |
298 break; | |
299 | |
300 case 0: | |
301 case 0x1A: | |
302 break; | |
303 | |
304 default: | |
305 if (c & 0x80) | |
306 { unsigned u = decodeUTF(); | |
307 if (u == PS || u == LS) | |
308 break; | |
309 } | |
310 p++; | |
311 continue; | |
312 } | |
313 break; | |
314 } | |
315 loc.linnum = 2; | |
316 } | |
317 } | |
318 | |
319 | |
320 void Lexer::error(const char *format, ...) | |
321 { | |
322 if (mod && !global.gag) | |
323 { | |
324 char *p = loc.toChars(); | |
325 if (*p) | |
326 fprintf(stdmsg, "%s: ", p); | |
327 mem.free(p); | |
328 | |
329 va_list ap; | |
330 va_start(ap, format); | |
331 vfprintf(stdmsg, format, ap); | |
332 va_end(ap); | |
333 | |
334 fprintf(stdmsg, "\n"); | |
335 fflush(stdmsg); | |
336 | |
337 if (global.errors >= 20) // moderate blizzard of cascading messages | |
338 fatal(); | |
339 } | |
340 global.errors++; | |
341 } | |
342 | |
343 void Lexer::error(Loc loc, const char *format, ...) | |
344 { | |
345 if (mod && !global.gag) | |
346 { | |
347 char *p = loc.toChars(); | |
348 if (*p) | |
349 fprintf(stdmsg, "%s: ", p); | |
350 mem.free(p); | |
351 | |
352 va_list ap; | |
353 va_start(ap, format); | |
354 vfprintf(stdmsg, format, ap); | |
355 va_end(ap); | |
356 | |
357 fprintf(stdmsg, "\n"); | |
358 fflush(stdmsg); | |
359 | |
360 if (global.errors >= 20) // moderate blizzard of cascading messages | |
361 fatal(); | |
362 } | |
363 global.errors++; | |
364 } | |
365 | |
366 TOK Lexer::nextToken() | |
367 { Token *t; | |
368 | |
369 if (token.next) | |
370 { | |
371 t = token.next; | |
372 memcpy(&token,t,sizeof(Token)); | |
373 t->next = freelist; | |
374 freelist = t; | |
375 } | |
376 else | |
377 { | |
378 scan(&token); | |
379 } | |
380 //token.print(); | |
381 return token.value; | |
382 } | |
383 | |
384 Token *Lexer::peek(Token *ct) | |
385 { Token *t; | |
386 | |
387 if (ct->next) | |
388 t = ct->next; | |
389 else | |
390 { | |
391 t = new Token(); | |
392 scan(t); | |
393 t->next = NULL; | |
394 ct->next = t; | |
395 } | |
396 return t; | |
397 } | |
398 | |
717
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
399 /*********************** |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
400 * Look ahead at next token's value. |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
401 */ |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
402 |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
403 TOK Lexer::peekNext() |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
404 { |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
405 return peek(&token)->value; |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
406 } |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
407 |
159 | 408 /********************************* |
409 * tk is on the opening (. | |
410 * Look ahead and return token that is past the closing ). | |
411 */ | |
412 | |
413 Token *Lexer::peekPastParen(Token *tk) | |
414 { | |
415 //printf("peekPastParen()\n"); | |
416 int parens = 1; | |
417 int curlynest = 0; | |
418 while (1) | |
419 { | |
420 tk = peek(tk); | |
421 //tk->print(); | |
422 switch (tk->value) | |
423 { | |
424 case TOKlparen: | |
425 parens++; | |
426 continue; | |
427 | |
428 case TOKrparen: | |
429 --parens; | |
430 if (parens) | |
431 continue; | |
432 tk = peek(tk); | |
433 break; | |
434 | |
435 case TOKlcurly: | |
436 curlynest++; | |
437 continue; | |
438 | |
439 case TOKrcurly: | |
440 if (--curlynest >= 0) | |
441 continue; | |
442 break; | |
443 | |
444 case TOKsemicolon: | |
445 if (curlynest) | |
446 continue; | |
447 break; | |
448 | |
449 case TOKeof: | |
450 break; | |
451 | |
452 default: | |
453 continue; | |
454 } | |
455 return tk; | |
456 } | |
457 } | |
458 | |
459 /********************************** | |
460 * Determine if string is a valid Identifier. | |
461 * Placed here because of commonality with Lexer functionality. | |
462 * Returns: | |
463 * 0 invalid | |
464 */ | |
465 | |
466 int Lexer::isValidIdentifier(char *p) | |
467 { | |
468 size_t len; | |
469 size_t idx; | |
470 | |
471 if (!p || !*p) | |
472 goto Linvalid; | |
473 | |
474 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars | |
475 goto Linvalid; | |
476 | |
477 len = strlen(p); | |
478 idx = 0; | |
479 while (p[idx]) | |
480 { dchar_t dc; | |
481 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
482 const char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc); |
159 | 483 if (q) |
484 goto Linvalid; | |
485 | |
486 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) | |
487 goto Linvalid; | |
488 } | |
489 return 1; | |
490 | |
491 Linvalid: | |
492 return 0; | |
493 } | |
494 | |
495 /**************************** | |
496 * Turn next token in buffer into a token. | |
497 */ | |
498 | |
499 void Lexer::scan(Token *t) | |
500 { | |
501 unsigned lastLine = loc.linnum; | |
502 unsigned linnum; | |
503 | |
504 t->blockComment = NULL; | |
505 t->lineComment = NULL; | |
506 while (1) | |
507 { | |
508 t->ptr = p; | |
509 //printf("p = %p, *p = '%c'\n",p,*p); | |
510 switch (*p) | |
511 { | |
512 case 0: | |
513 case 0x1A: | |
514 t->value = TOKeof; // end of file | |
515 return; | |
516 | |
517 case ' ': | |
518 case '\t': | |
519 case '\v': | |
520 case '\f': | |
521 p++; | |
522 continue; // skip white space | |
523 | |
524 case '\r': | |
525 p++; | |
526 if (*p != '\n') // if CR stands by itself | |
527 loc.linnum++; | |
528 continue; // skip white space | |
529 | |
530 case '\n': | |
531 p++; | |
532 loc.linnum++; | |
533 continue; // skip white space | |
534 | |
535 case '0': case '1': case '2': case '3': case '4': | |
536 case '5': case '6': case '7': case '8': case '9': | |
537 t->value = number(t); | |
538 return; | |
539 | |
540 #if CSTRINGS | |
541 case '\'': | |
542 t->value = charConstant(t, 0); | |
543 return; | |
544 | |
545 case '"': | |
546 t->value = stringConstant(t,0); | |
547 return; | |
548 | |
549 case 'l': | |
550 case 'L': | |
551 if (p[1] == '\'') | |
552 { | |
553 p++; | |
554 t->value = charConstant(t, 1); | |
555 return; | |
556 } | |
557 else if (p[1] == '"') | |
558 { | |
559 p++; | |
560 t->value = stringConstant(t, 1); | |
561 return; | |
562 } | |
563 #else | |
564 case '\'': | |
565 t->value = charConstant(t,0); | |
566 return; | |
567 | |
568 case 'r': | |
569 if (p[1] != '"') | |
570 goto case_ident; | |
571 p++; | |
572 case '`': | |
573 t->value = wysiwygStringConstant(t, *p); | |
574 return; | |
575 | |
576 case 'x': | |
577 if (p[1] != '"') | |
578 goto case_ident; | |
579 p++; | |
580 t->value = hexStringConstant(t); | |
581 return; | |
582 | |
336 | 583 #if DMDV2 |
159 | 584 case 'q': |
585 if (p[1] == '"') | |
586 { | |
587 p++; | |
588 t->value = delimitedStringConstant(t); | |
589 return; | |
590 } | |
591 else if (p[1] == '{') | |
592 { | |
593 p++; | |
594 t->value = tokenStringConstant(t); | |
595 return; | |
596 } | |
597 else | |
598 goto case_ident; | |
599 #endif | |
600 | |
601 case '"': | |
602 t->value = escapeStringConstant(t,0); | |
603 return; | |
604 | |
605 case '\\': // escaped string literal | |
606 { unsigned c; | |
607 | |
608 stringbuffer.reset(); | |
609 do | |
610 { | |
611 p++; | |
612 switch (*p) | |
613 { | |
614 case 'u': | |
615 case 'U': | |
616 case '&': | |
617 c = escapeSequence(); | |
618 stringbuffer.writeUTF8(c); | |
619 break; | |
620 | |
621 default: | |
622 c = escapeSequence(); | |
623 stringbuffer.writeByte(c); | |
624 break; | |
625 } | |
626 } while (*p == '\\'); | |
627 t->len = stringbuffer.offset; | |
628 stringbuffer.writeByte(0); | |
629 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
630 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
631 t->postfix = 0; | |
632 t->value = TOKstring; | |
633 return; | |
634 } | |
635 | |
636 case 'l': | |
637 case 'L': | |
638 #endif | |
639 case 'a': case 'b': case 'c': case 'd': case 'e': | |
640 case 'f': case 'g': case 'h': case 'i': case 'j': | |
641 case 'k': case 'm': case 'n': case 'o': | |
336 | 642 #if DMDV2 |
159 | 643 case 'p': /*case 'q': case 'r':*/ case 's': case 't': |
644 #else | |
645 case 'p': case 'q': /*case 'r':*/ case 's': case 't': | |
646 #endif | |
647 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': | |
648 case 'z': | |
649 case 'A': case 'B': case 'C': case 'D': case 'E': | |
650 case 'F': case 'G': case 'H': case 'I': case 'J': | |
651 case 'K': case 'M': case 'N': case 'O': | |
652 case 'P': case 'Q': case 'R': case 'S': case 'T': | |
653 case 'U': case 'V': case 'W': case 'X': case 'Y': | |
654 case 'Z': | |
655 case '_': | |
656 case_ident: | |
657 { unsigned char c; | |
658 StringValue *sv; | |
659 Identifier *id; | |
660 | |
661 do | |
662 { | |
663 c = *++p; | |
664 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); | |
665 sv = stringtable.update((char *)t->ptr, p - t->ptr); | |
666 id = (Identifier *) sv->ptrvalue; | |
667 if (!id) | |
668 { id = new Identifier(sv->lstring.string,TOKidentifier); | |
669 sv->ptrvalue = id; | |
670 } | |
671 t->ident = id; | |
672 t->value = (enum TOK) id->value; | |
673 anyToken = 1; | |
674 if (*t->ptr == '_') // if special identifier token | |
675 { | |
676 static char date[11+1]; | |
677 static char time[8+1]; | |
678 static char timestamp[24+1]; | |
679 | |
680 if (!date[0]) // lazy evaluation | |
681 { time_t t; | |
682 char *p; | |
683 | |
684 ::time(&t); | |
685 p = ctime(&t); | |
686 assert(p); | |
687 sprintf(date, "%.6s %.4s", p + 4, p + 20); | |
688 sprintf(time, "%.8s", p + 11); | |
689 sprintf(timestamp, "%.24s", p); | |
690 } | |
691 | |
336 | 692 #if DMDV1 |
159 | 693 if (mod && id == Id::FILE) |
694 { | |
695 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars()); | |
696 goto Lstring; | |
697 } | |
698 else if (mod && id == Id::LINE) | |
699 { | |
700 t->value = TOKint64v; | |
701 t->uns64value = loc.linnum; | |
702 } | |
336 | 703 else |
704 #endif | |
705 if (id == Id::DATE) | |
159 | 706 { |
707 t->ustring = (unsigned char *)date; | |
708 goto Lstring; | |
709 } | |
710 else if (id == Id::TIME) | |
711 { | |
712 t->ustring = (unsigned char *)time; | |
713 goto Lstring; | |
714 } | |
715 else if (id == Id::VENDOR) | |
716 { | |
664
eef8ac26c66c
Some missed LLVMDC -> LDC.
Christian Kamm <kamm incasoftware de>
parents:
658
diff
changeset
|
717 t->ustring = (unsigned char *)"LDC"; |
159 | 718 goto Lstring; |
719 } | |
720 else if (id == Id::TIMESTAMP) | |
721 { | |
722 t->ustring = (unsigned char *)timestamp; | |
723 Lstring: | |
724 t->value = TOKstring; | |
725 Llen: | |
726 t->postfix = 0; | |
727 t->len = strlen((char *)t->ustring); | |
728 } | |
729 else if (id == Id::VERSIONX) | |
730 { unsigned major = 0; | |
731 unsigned minor = 0; | |
732 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
733 for (const char *p = global.version + 1; 1; p++) |
159 | 734 { |
735 char c = *p; | |
736 if (isdigit(c)) | |
737 minor = minor * 10 + c - '0'; | |
738 else if (c == '.') | |
739 { major = minor; | |
740 minor = 0; | |
741 } | |
742 else | |
743 break; | |
744 } | |
745 t->value = TOKint64v; | |
746 t->uns64value = major * 1000 + minor; | |
747 } | |
336 | 748 #if DMDV2 |
159 | 749 else if (id == Id::EOFX) |
750 { | |
751 t->value = TOKeof; | |
752 // Advance scanner to end of file | |
753 while (!(*p == 0 || *p == 0x1A)) | |
754 p++; | |
755 } | |
756 #endif | |
757 } | |
758 //printf("t->value = %d\n",t->value); | |
759 return; | |
760 } | |
761 | |
762 case '/': | |
763 p++; | |
764 switch (*p) | |
765 { | |
766 case '=': | |
767 p++; | |
768 t->value = TOKdivass; | |
769 return; | |
770 | |
771 case '*': | |
772 p++; | |
773 linnum = loc.linnum; | |
774 while (1) | |
775 { | |
776 while (1) | |
777 { unsigned char c = *p; | |
778 switch (c) | |
779 { | |
780 case '/': | |
781 break; | |
782 | |
783 case '\n': | |
784 loc.linnum++; | |
785 p++; | |
786 continue; | |
787 | |
788 case '\r': | |
789 p++; | |
790 if (*p != '\n') | |
791 loc.linnum++; | |
792 continue; | |
793 | |
794 case 0: | |
795 case 0x1A: | |
796 error("unterminated /* */ comment"); | |
797 p = end; | |
798 t->value = TOKeof; | |
799 return; | |
800 | |
801 default: | |
802 if (c & 0x80) | |
803 { unsigned u = decodeUTF(); | |
804 if (u == PS || u == LS) | |
805 loc.linnum++; | |
806 } | |
807 p++; | |
808 continue; | |
809 } | |
810 break; | |
811 } | |
812 p++; | |
813 if (p[-2] == '*' && p - 3 != t->ptr) | |
814 break; | |
815 } | |
816 if (commentToken) | |
817 { | |
818 t->value = TOKcomment; | |
819 return; | |
820 } | |
821 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr) | |
822 { // if /** but not /**/ | |
823 getDocComment(t, lastLine == linnum); | |
824 } | |
825 continue; | |
826 | |
827 case '/': // do // style comments | |
828 linnum = loc.linnum; | |
829 while (1) | |
830 { unsigned char c = *++p; | |
831 switch (c) | |
832 { | |
833 case '\n': | |
834 break; | |
835 | |
836 case '\r': | |
837 if (p[1] == '\n') | |
838 p++; | |
839 break; | |
840 | |
841 case 0: | |
842 case 0x1A: | |
843 if (commentToken) | |
844 { | |
845 p = end; | |
846 t->value = TOKcomment; | |
847 return; | |
848 } | |
849 if (doDocComment && t->ptr[2] == '/') | |
850 getDocComment(t, lastLine == linnum); | |
851 p = end; | |
852 t->value = TOKeof; | |
853 return; | |
854 | |
855 default: | |
856 if (c & 0x80) | |
857 { unsigned u = decodeUTF(); | |
858 if (u == PS || u == LS) | |
859 break; | |
860 } | |
861 continue; | |
862 } | |
863 break; | |
864 } | |
865 | |
866 if (commentToken) | |
867 { | |
868 p++; | |
869 loc.linnum++; | |
870 t->value = TOKcomment; | |
871 return; | |
872 } | |
873 if (doDocComment && t->ptr[2] == '/') | |
874 getDocComment(t, lastLine == linnum); | |
875 | |
876 p++; | |
877 loc.linnum++; | |
878 continue; | |
879 | |
880 case '+': | |
881 { int nest; | |
882 | |
883 linnum = loc.linnum; | |
884 p++; | |
885 nest = 1; | |
886 while (1) | |
887 { unsigned char c = *p; | |
888 switch (c) | |
889 { | |
890 case '/': | |
891 p++; | |
892 if (*p == '+') | |
893 { | |
894 p++; | |
895 nest++; | |
896 } | |
897 continue; | |
898 | |
899 case '+': | |
900 p++; | |
901 if (*p == '/') | |
902 { | |
903 p++; | |
904 if (--nest == 0) | |
905 break; | |
906 } | |
907 continue; | |
908 | |
909 case '\r': | |
910 p++; | |
911 if (*p != '\n') | |
912 loc.linnum++; | |
913 continue; | |
914 | |
915 case '\n': | |
916 loc.linnum++; | |
917 p++; | |
918 continue; | |
919 | |
920 case 0: | |
921 case 0x1A: | |
922 error("unterminated /+ +/ comment"); | |
923 p = end; | |
924 t->value = TOKeof; | |
925 return; | |
926 | |
927 default: | |
928 if (c & 0x80) | |
929 { unsigned u = decodeUTF(); | |
930 if (u == PS || u == LS) | |
931 loc.linnum++; | |
932 } | |
933 p++; | |
934 continue; | |
935 } | |
936 break; | |
937 } | |
938 if (commentToken) | |
939 { | |
940 t->value = TOKcomment; | |
941 return; | |
942 } | |
943 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr) | |
944 { // if /++ but not /++/ | |
945 getDocComment(t, lastLine == linnum); | |
946 } | |
947 continue; | |
948 } | |
949 } | |
950 t->value = TOKdiv; | |
951 return; | |
952 | |
953 case '.': | |
954 p++; | |
955 if (isdigit(*p)) | |
956 { /* Note that we don't allow ._1 and ._ as being | |
957 * valid floating point numbers. | |
958 */ | |
959 p--; | |
960 t->value = inreal(t); | |
961 } | |
962 else if (p[0] == '.') | |
963 { | |
964 if (p[1] == '.') | |
965 { p += 2; | |
966 t->value = TOKdotdotdot; | |
967 } | |
968 else | |
969 { p++; | |
970 t->value = TOKslice; | |
971 } | |
972 } | |
973 else | |
974 t->value = TOKdot; | |
975 return; | |
976 | |
977 case '&': | |
978 p++; | |
979 if (*p == '=') | |
980 { p++; | |
981 t->value = TOKandass; | |
982 } | |
983 else if (*p == '&') | |
984 { p++; | |
985 t->value = TOKandand; | |
986 } | |
987 else | |
988 t->value = TOKand; | |
989 return; | |
990 | |
991 case '|': | |
992 p++; | |
993 if (*p == '=') | |
994 { p++; | |
995 t->value = TOKorass; | |
996 } | |
997 else if (*p == '|') | |
998 { p++; | |
999 t->value = TOKoror; | |
1000 } | |
1001 else | |
1002 t->value = TOKor; | |
1003 return; | |
1004 | |
1005 case '-': | |
1006 p++; | |
1007 if (*p == '=') | |
1008 { p++; | |
1009 t->value = TOKminass; | |
1010 } | |
1011 #if 0 | |
1012 else if (*p == '>') | |
1013 { p++; | |
1014 t->value = TOKarrow; | |
1015 } | |
1016 #endif | |
1017 else if (*p == '-') | |
1018 { p++; | |
1019 t->value = TOKminusminus; | |
1020 } | |
1021 else | |
1022 t->value = TOKmin; | |
1023 return; | |
1024 | |
1025 case '+': | |
1026 p++; | |
1027 if (*p == '=') | |
1028 { p++; | |
1029 t->value = TOKaddass; | |
1030 } | |
1031 else if (*p == '+') | |
1032 { p++; | |
1033 t->value = TOKplusplus; | |
1034 } | |
1035 else | |
1036 t->value = TOKadd; | |
1037 return; | |
1038 | |
1039 case '<': | |
1040 p++; | |
1041 if (*p == '=') | |
1042 { p++; | |
1043 t->value = TOKle; // <= | |
1044 } | |
1045 else if (*p == '<') | |
1046 { p++; | |
1047 if (*p == '=') | |
1048 { p++; | |
1049 t->value = TOKshlass; // <<= | |
1050 } | |
1051 else | |
1052 t->value = TOKshl; // << | |
1053 } | |
1054 else if (*p == '>') | |
1055 { p++; | |
1056 if (*p == '=') | |
1057 { p++; | |
1058 t->value = TOKleg; // <>= | |
1059 } | |
1060 else | |
1061 t->value = TOKlg; // <> | |
1062 } | |
1063 else | |
1064 t->value = TOKlt; // < | |
1065 return; | |
1066 | |
1067 case '>': | |
1068 p++; | |
1069 if (*p == '=') | |
1070 { p++; | |
1071 t->value = TOKge; // >= | |
1072 } | |
1073 else if (*p == '>') | |
1074 { p++; | |
1075 if (*p == '=') | |
1076 { p++; | |
1077 t->value = TOKshrass; // >>= | |
1078 } | |
1079 else if (*p == '>') | |
1080 { p++; | |
1081 if (*p == '=') | |
1082 { p++; | |
1083 t->value = TOKushrass; // >>>= | |
1084 } | |
1085 else | |
1086 t->value = TOKushr; // >>> | |
1087 } | |
1088 else | |
1089 t->value = TOKshr; // >> | |
1090 } | |
1091 else | |
1092 t->value = TOKgt; // > | |
1093 return; | |
1094 | |
1095 case '!': | |
1096 p++; | |
1097 if (*p == '=') | |
1098 { p++; | |
1099 if (*p == '=' && global.params.Dversion == 1) | |
1100 { p++; | |
1101 t->value = TOKnotidentity; // !== | |
1102 } | |
1103 else | |
1104 t->value = TOKnotequal; // != | |
1105 } | |
1106 else if (*p == '<') | |
1107 { p++; | |
1108 if (*p == '>') | |
1109 { p++; | |
1110 if (*p == '=') | |
1111 { p++; | |
1112 t->value = TOKunord; // !<>= | |
1113 } | |
1114 else | |
1115 t->value = TOKue; // !<> | |
1116 } | |
1117 else if (*p == '=') | |
1118 { p++; | |
1119 t->value = TOKug; // !<= | |
1120 } | |
1121 else | |
1122 t->value = TOKuge; // !< | |
1123 } | |
1124 else if (*p == '>') | |
1125 { p++; | |
1126 if (*p == '=') | |
1127 { p++; | |
1128 t->value = TOKul; // !>= | |
1129 } | |
1130 else | |
1131 t->value = TOKule; // !> | |
1132 } | |
1133 else | |
1134 t->value = TOKnot; // ! | |
1135 return; | |
1136 | |
1137 case '=': | |
1138 p++; | |
1139 if (*p == '=') | |
1140 { p++; | |
1141 if (*p == '=' && global.params.Dversion == 1) | |
1142 { p++; | |
1143 t->value = TOKidentity; // === | |
1144 } | |
1145 else | |
1146 t->value = TOKequal; // == | |
1147 } | |
1148 else | |
1149 t->value = TOKassign; // = | |
1150 return; | |
1151 | |
1152 case '~': | |
1153 p++; | |
1154 if (*p == '=') | |
1155 { p++; | |
1156 t->value = TOKcatass; // ~= | |
1157 } | |
1158 else | |
1159 t->value = TOKtilde; // ~ | |
1160 return; | |
1161 | |
1162 #define SINGLE(c,tok) case c: p++; t->value = tok; return; | |
1163 | |
1164 SINGLE('(', TOKlparen) | |
1165 SINGLE(')', TOKrparen) | |
1166 SINGLE('[', TOKlbracket) | |
1167 SINGLE(']', TOKrbracket) | |
1168 SINGLE('{', TOKlcurly) | |
1169 SINGLE('}', TOKrcurly) | |
1170 SINGLE('?', TOKquestion) | |
1171 SINGLE(',', TOKcomma) | |
1172 SINGLE(';', TOKsemicolon) | |
1173 SINGLE(':', TOKcolon) | |
1174 SINGLE('$', TOKdollar) | |
1175 | |
1176 #undef SINGLE | |
1177 | |
1178 #define DOUBLE(c1,tok1,c2,tok2) \ | |
1179 case c1: \ | |
1180 p++; \ | |
1181 if (*p == c2) \ | |
1182 { p++; \ | |
1183 t->value = tok2; \ | |
1184 } \ | |
1185 else \ | |
1186 t->value = tok1; \ | |
1187 return; | |
1188 | |
1189 DOUBLE('*', TOKmul, '=', TOKmulass) | |
1190 DOUBLE('%', TOKmod, '=', TOKmodass) | |
1191 DOUBLE('^', TOKxor, '=', TOKxorass) | |
1192 | |
1193 #undef DOUBLE | |
1194 | |
1195 case '#': | |
1196 p++; | |
1197 pragma(); | |
1198 continue; | |
1199 | |
1200 default: | |
1201 { unsigned char c = *p; | |
1202 | |
1203 if (c & 0x80) | |
1204 { unsigned u = decodeUTF(); | |
1205 | |
1206 // Check for start of unicode identifier | |
1207 if (isUniAlpha(u)) | |
1208 goto case_ident; | |
1209 | |
1210 if (u == PS || u == LS) | |
1211 { | |
1212 loc.linnum++; | |
1213 p++; | |
1214 continue; | |
1215 } | |
1216 } | |
1217 if (isprint(c)) | |
1218 error("unsupported char '%c'", c); | |
1219 else | |
1220 error("unsupported char 0x%02x", c); | |
1221 p++; | |
1222 continue; | |
1223 } | |
1224 } | |
1225 } | |
1226 } | |
1227 | |
1228 /******************************************* | |
1229 * Parse escape sequence. | |
1230 */ | |
1231 | |
1232 unsigned Lexer::escapeSequence() | |
1233 { unsigned c; | |
1234 int n; | |
1235 int ndigits; | |
1236 | |
1237 c = *p; | |
1238 switch (c) | |
1239 { | |
1240 case '\'': | |
1241 case '"': | |
1242 case '?': | |
1243 case '\\': | |
1244 Lconsume: | |
1245 p++; | |
1246 break; | |
1247 | |
1248 case 'a': c = 7; goto Lconsume; | |
1249 case 'b': c = 8; goto Lconsume; | |
1250 case 'f': c = 12; goto Lconsume; | |
1251 case 'n': c = 10; goto Lconsume; | |
1252 case 'r': c = 13; goto Lconsume; | |
1253 case 't': c = 9; goto Lconsume; | |
1254 case 'v': c = 11; goto Lconsume; | |
1255 | |
1256 case 'u': | |
1257 ndigits = 4; | |
1258 goto Lhex; | |
1259 case 'U': | |
1260 ndigits = 8; | |
1261 goto Lhex; | |
1262 case 'x': | |
1263 ndigits = 2; | |
1264 Lhex: | |
1265 p++; | |
1266 c = *p; | |
1267 if (ishex(c)) | |
1268 { unsigned v; | |
1269 | |
1270 n = 0; | |
1271 v = 0; | |
1272 while (1) | |
1273 { | |
1274 if (isdigit(c)) | |
1275 c -= '0'; | |
1276 else if (islower(c)) | |
1277 c -= 'a' - 10; | |
1278 else | |
1279 c -= 'A' - 10; | |
1280 v = v * 16 + c; | |
1281 c = *++p; | |
1282 if (++n == ndigits) | |
1283 break; | |
1284 if (!ishex(c)) | |
1285 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); | |
1286 break; | |
1287 } | |
1288 } | |
1289 if (ndigits != 2 && !utf_isValidDchar(v)) | |
1290 error("invalid UTF character \\U%08x", v); | |
1291 c = v; | |
1292 } | |
1293 else | |
1294 error("undefined escape hex sequence \\%c\n",c); | |
1295 break; | |
1296 | |
1297 case '&': // named character entity | |
1298 for (unsigned char *idstart = ++p; 1; p++) | |
1299 { | |
1300 switch (*p) | |
1301 { | |
1302 case ';': | |
1303 c = HtmlNamedEntity(idstart, p - idstart); | |
1304 if (c == ~0) | |
1305 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart); | |
1306 c = ' '; | |
1307 } | |
1308 p++; | |
1309 break; | |
1310 | |
1311 default: | |
1312 if (isalpha(*p) || | |
1313 (p != idstart + 1 && isdigit(*p))) | |
1314 continue; | |
1315 error("unterminated named entity"); | |
1316 break; | |
1317 } | |
1318 break; | |
1319 } | |
1320 break; | |
1321 | |
1322 case 0: | |
1323 case 0x1A: // end of file | |
1324 c = '\\'; | |
1325 break; | |
1326 | |
1327 default: | |
1328 if (isoctal(c)) | |
1329 { unsigned v; | |
1330 | |
1331 n = 0; | |
1332 v = 0; | |
1333 do | |
1334 { | |
1335 v = v * 8 + (c - '0'); | |
1336 c = *++p; | |
1337 } while (++n < 3 && isoctal(c)); | |
1338 c = v; | |
1339 if (c > 0xFF) | |
1340 error("0%03o is larger than a byte", c); | |
1341 } | |
1342 else | |
1343 error("undefined escape sequence \\%c\n",c); | |
1344 break; | |
1345 } | |
1346 return c; | |
1347 } | |
1348 | |
1349 /************************************** | |
1350 */ | |
1351 | |
1352 TOK Lexer::wysiwygStringConstant(Token *t, int tc) | |
1353 { unsigned c; | |
1354 Loc start = loc; | |
1355 | |
1356 p++; | |
1357 stringbuffer.reset(); | |
1358 while (1) | |
1359 { | |
1360 c = *p++; | |
1361 switch (c) | |
1362 { | |
1363 case '\n': | |
1364 loc.linnum++; | |
1365 break; | |
1366 | |
1367 case '\r': | |
1368 if (*p == '\n') | |
1369 continue; // ignore | |
1370 c = '\n'; // treat EndOfLine as \n character | |
1371 loc.linnum++; | |
1372 break; | |
1373 | |
1374 case 0: | |
1375 case 0x1A: | |
1376 error("unterminated string constant starting at %s", start.toChars()); | |
1377 t->ustring = (unsigned char *)""; | |
1378 t->len = 0; | |
1379 t->postfix = 0; | |
1380 return TOKstring; | |
1381 | |
1382 case '"': | |
1383 case '`': | |
1384 if (c == tc) | |
1385 { | |
1386 t->len = stringbuffer.offset; | |
1387 stringbuffer.writeByte(0); | |
1388 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1389 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1390 stringPostfix(t); | |
1391 return TOKstring; | |
1392 } | |
1393 break; | |
1394 | |
1395 default: | |
1396 if (c & 0x80) | |
1397 { p--; | |
1398 unsigned u = decodeUTF(); | |
1399 p++; | |
1400 if (u == PS || u == LS) | |
1401 loc.linnum++; | |
1402 stringbuffer.writeUTF8(u); | |
1403 continue; | |
1404 } | |
1405 break; | |
1406 } | |
1407 stringbuffer.writeByte(c); | |
1408 } | |
1409 } | |
1410 | |
1411 /************************************** | |
1412 * Lex hex strings: | |
1413 * x"0A ae 34FE BD" | |
1414 */ | |
1415 | |
1416 TOK Lexer::hexStringConstant(Token *t) | |
1417 { unsigned c; | |
1418 Loc start = loc; | |
1419 unsigned n = 0; | |
1420 unsigned v; | |
1421 | |
1422 p++; | |
1423 stringbuffer.reset(); | |
1424 while (1) | |
1425 { | |
1426 c = *p++; | |
1427 switch (c) | |
1428 { | |
1429 case ' ': | |
1430 case '\t': | |
1431 case '\v': | |
1432 case '\f': | |
1433 continue; // skip white space | |
1434 | |
1435 case '\r': | |
1436 if (*p == '\n') | |
1437 continue; // ignore | |
1438 // Treat isolated '\r' as if it were a '\n' | |
1439 case '\n': | |
1440 loc.linnum++; | |
1441 continue; | |
1442 | |
1443 case 0: | |
1444 case 0x1A: | |
1445 error("unterminated string constant starting at %s", start.toChars()); | |
1446 t->ustring = (unsigned char *)""; | |
1447 t->len = 0; | |
1448 t->postfix = 0; | |
1449 return TOKstring; | |
1450 | |
1451 case '"': | |
1452 if (n & 1) | |
1453 { error("odd number (%d) of hex characters in hex string", n); | |
1454 stringbuffer.writeByte(v); | |
1455 } | |
1456 t->len = stringbuffer.offset; | |
1457 stringbuffer.writeByte(0); | |
1458 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1459 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1460 stringPostfix(t); | |
1461 return TOKstring; | |
1462 | |
1463 default: | |
1464 if (c >= '0' && c <= '9') | |
1465 c -= '0'; | |
1466 else if (c >= 'a' && c <= 'f') | |
1467 c -= 'a' - 10; | |
1468 else if (c >= 'A' && c <= 'F') | |
1469 c -= 'A' - 10; | |
1470 else if (c & 0x80) | |
1471 { p--; | |
1472 unsigned u = decodeUTF(); | |
1473 p++; | |
1474 if (u == PS || u == LS) | |
1475 loc.linnum++; | |
1476 else | |
1477 error("non-hex character \\u%x", u); | |
1478 } | |
1479 else | |
1480 error("non-hex character '%c'", c); | |
1481 if (n & 1) | |
1482 { v = (v << 4) | c; | |
1483 stringbuffer.writeByte(v); | |
1484 } | |
1485 else | |
1486 v = c; | |
1487 n++; | |
1488 break; | |
1489 } | |
1490 } | |
1491 } | |
1492 | |
1493 | |
336 | 1494 #if DMDV2 |
159 | 1495 /************************************** |
1496 * Lex delimited strings: | |
1497 * q"(foo(xxx))" // "foo(xxx)" | |
1498 * q"[foo(]" // "foo(" | |
1499 * q"/foo]/" // "foo]" | |
1500 * q"HERE | |
1501 * foo | |
1502 * HERE" // "foo\n" | |
1503 * Input: | |
1504 * p is on the " | |
1505 */ | |
1506 | |
1507 TOK Lexer::delimitedStringConstant(Token *t) | |
1508 { unsigned c; | |
1509 Loc start = loc; | |
1510 unsigned delimleft = 0; | |
1511 unsigned delimright = 0; | |
1512 unsigned nest = 1; | |
1513 unsigned nestcount; | |
1514 Identifier *hereid = NULL; | |
1515 unsigned blankrol = 0; | |
1516 unsigned startline = 0; | |
1517 | |
1518 p++; | |
1519 stringbuffer.reset(); | |
1520 while (1) | |
1521 { | |
1522 c = *p++; | |
1523 //printf("c = '%c'\n", c); | |
1524 switch (c) | |
1525 { | |
1526 case '\n': | |
1527 Lnextline: | |
1528 loc.linnum++; | |
1529 startline = 1; | |
1530 if (blankrol) | |
1531 { blankrol = 0; | |
1532 continue; | |
1533 } | |
1534 if (hereid) | |
1535 { | |
1536 stringbuffer.writeUTF8(c); | |
1537 continue; | |
1538 } | |
1539 break; | |
1540 | |
1541 case '\r': | |
1542 if (*p == '\n') | |
1543 continue; // ignore | |
1544 c = '\n'; // treat EndOfLine as \n character | |
1545 goto Lnextline; | |
1546 | |
1547 case 0: | |
1548 case 0x1A: | |
1549 goto Lerror; | |
1550 | |
1551 default: | |
1552 if (c & 0x80) | |
1553 { p--; | |
1554 c = decodeUTF(); | |
1555 p++; | |
1556 if (c == PS || c == LS) | |
1557 goto Lnextline; | |
1558 } | |
1559 break; | |
1560 } | |
1561 if (delimleft == 0) | |
1562 { delimleft = c; | |
1563 nest = 1; | |
1564 nestcount = 1; | |
1565 if (c == '(') | |
1566 delimright = ')'; | |
1567 else if (c == '{') | |
1568 delimright = '}'; | |
1569 else if (c == '[') | |
1570 delimright = ']'; | |
1571 else if (c == '<') | |
1572 delimright = '>'; | |
1573 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) | |
1574 { // Start of identifier; must be a heredoc | |
1575 Token t; | |
1576 p--; | |
1577 scan(&t); // read in heredoc identifier | |
1578 if (t.value != TOKidentifier) | |
1579 { error("identifier expected for heredoc, not %s", t.toChars()); | |
1580 delimright = c; | |
1581 } | |
1582 else | |
1583 { hereid = t.ident; | |
1584 //printf("hereid = '%s'\n", hereid->toChars()); | |
1585 blankrol = 1; | |
1586 } | |
1587 nest = 0; | |
1588 } | |
1589 else | |
1590 { delimright = c; | |
1591 nest = 0; | |
1592 } | |
1593 } | |
1594 else | |
1595 { | |
1596 if (blankrol) | |
1597 { error("heredoc rest of line should be blank"); | |
1598 blankrol = 0; | |
1599 continue; | |
1600 } | |
1601 if (nest == 1) | |
1602 { | |
1603 if (c == delimleft) | |
1604 nestcount++; | |
1605 else if (c == delimright) | |
1606 { nestcount--; | |
1607 if (nestcount == 0) | |
1608 goto Ldone; | |
1609 } | |
1610 } | |
1611 else if (c == delimright) | |
1612 goto Ldone; | |
1613 if (startline && isalpha(c)) | |
1614 { Token t; | |
1615 unsigned char *psave = p; | |
1616 p--; | |
1617 scan(&t); // read in possible heredoc identifier | |
1618 //printf("endid = '%s'\n", t.ident->toChars()); | |
1619 if (t.value == TOKidentifier && t.ident->equals(hereid)) | |
1620 { /* should check that rest of line is blank | |
1621 */ | |
1622 goto Ldone; | |
1623 } | |
1624 p = psave; | |
1625 } | |
1626 stringbuffer.writeUTF8(c); | |
1627 startline = 0; | |
1628 } | |
1629 } | |
1630 | |
1631 Ldone: | |
1632 if (*p == '"') | |
1633 p++; | |
1634 else | |
1635 error("delimited string must end in %c\"", delimright); | |
1636 t->len = stringbuffer.offset; | |
1637 stringbuffer.writeByte(0); | |
1638 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1639 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1640 stringPostfix(t); | |
1641 return TOKstring; | |
1642 | |
1643 Lerror: | |
1644 error("unterminated string constant starting at %s", start.toChars()); | |
1645 t->ustring = (unsigned char *)""; | |
1646 t->len = 0; | |
1647 t->postfix = 0; | |
1648 return TOKstring; | |
1649 } | |
1650 | |
1651 /************************************** | |
1652 * Lex delimited strings: | |
1653 * q{ foo(xxx) } // " foo(xxx) " | |
1654 * q{foo(} // "foo(" | |
1655 * q{{foo}"}"} // "{foo}"}"" | |
1656 * Input: | |
1657 * p is on the q | |
1658 */ | |
1659 | |
1660 TOK Lexer::tokenStringConstant(Token *t) | |
1661 { | |
1662 unsigned nest = 1; | |
1663 Loc start = loc; | |
1664 unsigned char *pstart = ++p; | |
1665 | |
1666 while (1) | |
1667 { Token tok; | |
1668 | |
1669 scan(&tok); | |
1670 switch (tok.value) | |
1671 { | |
1672 case TOKlcurly: | |
1673 nest++; | |
1674 continue; | |
1675 | |
1676 case TOKrcurly: | |
1677 if (--nest == 0) | |
1678 goto Ldone; | |
1679 continue; | |
1680 | |
1681 case TOKeof: | |
1682 goto Lerror; | |
1683 | |
1684 default: | |
1685 continue; | |
1686 } | |
1687 } | |
1688 | |
1689 Ldone: | |
1690 t->len = p - 1 - pstart; | |
1691 t->ustring = (unsigned char *)mem.malloc(t->len + 1); | |
1692 memcpy(t->ustring, pstart, t->len); | |
1693 t->ustring[t->len] = 0; | |
1694 stringPostfix(t); | |
1695 return TOKstring; | |
1696 | |
1697 Lerror: | |
1698 error("unterminated token string constant starting at %s", start.toChars()); | |
1699 t->ustring = (unsigned char *)""; | |
1700 t->len = 0; | |
1701 t->postfix = 0; | |
1702 return TOKstring; | |
1703 } | |
1704 | |
1705 #endif | |
1706 | |
1707 | |
1708 /************************************** | |
1709 */ | |
1710 | |
1711 TOK Lexer::escapeStringConstant(Token *t, int wide) | |
1712 { unsigned c; | |
1713 Loc start = loc; | |
1714 | |
1715 p++; | |
1716 stringbuffer.reset(); | |
1717 while (1) | |
1718 { | |
1719 c = *p++; | |
1720 switch (c) | |
1721 { | |
1722 case '\\': | |
1723 switch (*p) | |
1724 { | |
1725 case 'u': | |
1726 case 'U': | |
1727 case '&': | |
1728 c = escapeSequence(); | |
1729 stringbuffer.writeUTF8(c); | |
1730 continue; | |
1731 | |
1732 default: | |
1733 c = escapeSequence(); | |
1734 break; | |
1735 } | |
1736 break; | |
1737 | |
1738 case '\n': | |
1739 loc.linnum++; | |
1740 break; | |
1741 | |
1742 case '\r': | |
1743 if (*p == '\n') | |
1744 continue; // ignore | |
1745 c = '\n'; // treat EndOfLine as \n character | |
1746 loc.linnum++; | |
1747 break; | |
1748 | |
1749 case '"': | |
1750 t->len = stringbuffer.offset; | |
1751 stringbuffer.writeByte(0); | |
1752 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1753 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1754 stringPostfix(t); | |
1755 return TOKstring; | |
1756 | |
1757 case 0: | |
1758 case 0x1A: | |
1759 p--; | |
1760 error("unterminated string constant starting at %s", start.toChars()); | |
1761 t->ustring = (unsigned char *)""; | |
1762 t->len = 0; | |
1763 t->postfix = 0; | |
1764 return TOKstring; | |
1765 | |
1766 default: | |
1767 if (c & 0x80) | |
1768 { | |
1769 p--; | |
1770 c = decodeUTF(); | |
1771 if (c == LS || c == PS) | |
1772 { c = '\n'; | |
1773 loc.linnum++; | |
1774 } | |
1775 p++; | |
1776 stringbuffer.writeUTF8(c); | |
1777 continue; | |
1778 } | |
1779 break; | |
1780 } | |
1781 stringbuffer.writeByte(c); | |
1782 } | |
1783 } | |
1784 | |
1785 /************************************** | |
1786 */ | |
1787 | |
1788 TOK Lexer::charConstant(Token *t, int wide) | |
1789 { | |
1790 unsigned c; | |
1791 TOK tk = TOKcharv; | |
1792 | |
1793 //printf("Lexer::charConstant\n"); | |
1794 p++; | |
1795 c = *p++; | |
1796 switch (c) | |
1797 { | |
1798 case '\\': | |
1799 switch (*p) | |
1800 { | |
1801 case 'u': | |
1802 t->uns64value = escapeSequence(); | |
1803 tk = TOKwcharv; | |
1804 break; | |
1805 | |
1806 case 'U': | |
1807 case '&': | |
1808 t->uns64value = escapeSequence(); | |
1809 tk = TOKdcharv; | |
1810 break; | |
1811 | |
1812 default: | |
1813 t->uns64value = escapeSequence(); | |
1814 break; | |
1815 } | |
1816 break; | |
1817 | |
1818 case '\n': | |
1819 L1: | |
1820 loc.linnum++; | |
1821 case '\r': | |
1822 case 0: | |
1823 case 0x1A: | |
1824 case '\'': | |
1825 error("unterminated character constant"); | |
1826 return tk; | |
1827 | |
1828 default: | |
1829 if (c & 0x80) | |
1830 { | |
1831 p--; | |
1832 c = decodeUTF(); | |
1833 p++; | |
1834 if (c == LS || c == PS) | |
1835 goto L1; | |
1836 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) | |
1837 tk = TOKwcharv; | |
1838 else | |
1839 tk = TOKdcharv; | |
1840 } | |
1841 t->uns64value = c; | |
1842 break; | |
1843 } | |
1844 | |
1845 if (*p != '\'') | |
1846 { error("unterminated character constant"); | |
1847 return tk; | |
1848 } | |
1849 p++; | |
1850 return tk; | |
1851 } | |
1852 | |
1853 /*************************************** | |
1854 * Get postfix of string literal. | |
1855 */ | |
1856 | |
1857 void Lexer::stringPostfix(Token *t) | |
1858 { | |
1859 switch (*p) | |
1860 { | |
1861 case 'c': | |
1862 case 'w': | |
1863 case 'd': | |
1864 t->postfix = *p; | |
1865 p++; | |
1866 break; | |
1867 | |
1868 default: | |
1869 t->postfix = 0; | |
1870 break; | |
1871 } | |
1872 } | |
1873 | |
1874 /*************************************** | |
1875 * Read \u or \U unicode sequence | |
1876 * Input: | |
1877 * u 'u' or 'U' | |
1878 */ | |
1879 | |
1880 #if 0 | |
1881 unsigned Lexer::wchar(unsigned u) | |
1882 { | |
1883 unsigned value; | |
1884 unsigned n; | |
1885 unsigned char c; | |
1886 unsigned nchars; | |
1887 | |
1888 nchars = (u == 'U') ? 8 : 4; | |
1889 value = 0; | |
1890 for (n = 0; 1; n++) | |
1891 { | |
1892 ++p; | |
1893 if (n == nchars) | |
1894 break; | |
1895 c = *p; | |
1896 if (!ishex(c)) | |
1897 { error("\\%c sequence must be followed by %d hex characters", u, nchars); | |
1898 break; | |
1899 } | |
1900 if (isdigit(c)) | |
1901 c -= '0'; | |
1902 else if (islower(c)) | |
1903 c -= 'a' - 10; | |
1904 else | |
1905 c -= 'A' - 10; | |
1906 value <<= 4; | |
1907 value |= c; | |
1908 } | |
1909 return value; | |
1910 } | |
1911 #endif | |
1912 | |
1913 /************************************** | |
1914 * Read in a number. | |
1915 * If it's an integer, store it in tok.TKutok.Vlong. | |
1916 * integers can be decimal, octal or hex | |
1917 * Handle the suffixes U, UL, LU, L, etc. | |
1918 * If it's double, store it in tok.TKutok.Vdouble. | |
1919 * Returns: | |
1920 * TKnum | |
1921 * TKdouble,... | |
1922 */ | |
1923 | |
1924 TOK Lexer::number(Token *t) | |
1925 { | |
1926 // We use a state machine to collect numbers | |
1927 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, | |
1928 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, | |
1929 STATE_hexh, STATE_error }; | |
1930 enum STATE state; | |
1931 | |
1932 enum FLAGS | |
1933 { FLAGS_decimal = 1, // decimal | |
1934 FLAGS_unsigned = 2, // u or U suffix | |
1935 FLAGS_long = 4, // l or L suffix | |
1936 }; | |
1937 enum FLAGS flags = FLAGS_decimal; | |
1938 | |
1939 int i; | |
1940 int base; | |
1941 unsigned c; | |
1942 unsigned char *start; | |
1943 TOK result; | |
1944 | |
1945 //printf("Lexer::number()\n"); | |
1946 state = STATE_initial; | |
1947 base = 0; | |
1948 stringbuffer.reset(); | |
1949 start = p; | |
1950 while (1) | |
1951 { | |
1952 c = *p; | |
1953 switch (state) | |
1954 { | |
1955 case STATE_initial: // opening state | |
1956 if (c == '0') | |
1957 state = STATE_0; | |
1958 else | |
1959 state = STATE_decimal; | |
1960 break; | |
1961 | |
1962 case STATE_0: | |
1963 flags = (FLAGS) (flags & ~FLAGS_decimal); | |
1964 switch (c) | |
1965 { | |
1966 #if ZEROH | |
1967 case 'H': // 0h | |
1968 case 'h': | |
1969 goto hexh; | |
1970 #endif | |
1971 case 'X': | |
1972 case 'x': | |
1973 state = STATE_hex0; | |
1974 break; | |
1975 | |
1976 case '.': | |
1977 if (p[1] == '.') // .. is a separate token | |
1978 goto done; | |
1979 case 'i': | |
1980 case 'f': | |
1981 case 'F': | |
1982 goto real; | |
1983 #if ZEROH | |
1984 case 'E': | |
1985 case 'e': | |
1986 goto case_hex; | |
1987 #endif | |
1988 case 'B': | |
1989 case 'b': | |
1990 state = STATE_binary0; | |
1991 break; | |
1992 | |
1993 case '0': case '1': case '2': case '3': | |
1994 case '4': case '5': case '6': case '7': | |
1995 state = STATE_octal; | |
1996 break; | |
1997 | |
1998 #if ZEROH | |
1999 case '8': case '9': case 'A': | |
2000 case 'C': case 'D': case 'F': | |
2001 case 'a': case 'c': case 'd': case 'f': | |
2002 case_hex: | |
2003 state = STATE_hexh; | |
2004 break; | |
2005 #endif | |
2006 case '_': | |
2007 state = STATE_octal; | |
2008 p++; | |
2009 continue; | |
2010 | |
2011 case 'L': | |
2012 if (p[1] == 'i') | |
2013 goto real; | |
2014 goto done; | |
2015 | |
2016 default: | |
2017 goto done; | |
2018 } | |
2019 break; | |
2020 | |
2021 case STATE_decimal: // reading decimal number | |
2022 if (!isdigit(c)) | |
2023 { | |
2024 #if ZEROH | |
2025 if (ishex(c) | |
2026 || c == 'H' || c == 'h' | |
2027 ) | |
2028 goto hexh; | |
2029 #endif | |
2030 if (c == '_') // ignore embedded _ | |
2031 { p++; | |
2032 continue; | |
2033 } | |
2034 if (c == '.' && p[1] != '.') | |
2035 goto real; | |
2036 else if (c == 'i' || c == 'f' || c == 'F' || | |
2037 c == 'e' || c == 'E') | |
2038 { | |
2039 real: // It's a real number. Back up and rescan as a real | |
2040 p = start; | |
2041 return inreal(t); | |
2042 } | |
2043 else if (c == 'L' && p[1] == 'i') | |
2044 goto real; | |
2045 goto done; | |
2046 } | |
2047 break; | |
2048 | |
2049 case STATE_hex0: // reading hex number | |
2050 case STATE_hex: | |
2051 if (!ishex(c)) | |
2052 { | |
2053 if (c == '_') // ignore embedded _ | |
2054 { p++; | |
2055 continue; | |
2056 } | |
2057 if (c == '.' && p[1] != '.') | |
2058 goto real; | |
2059 if (c == 'P' || c == 'p' || c == 'i') | |
2060 goto real; | |
2061 if (state == STATE_hex0) | |
2062 error("Hex digit expected, not '%c'", c); | |
2063 goto done; | |
2064 } | |
2065 state = STATE_hex; | |
2066 break; | |
2067 | |
2068 #if ZEROH | |
2069 hexh: | |
2070 state = STATE_hexh; | |
2071 case STATE_hexh: // parse numbers like 0FFh | |
2072 if (!ishex(c)) | |
2073 { | |
2074 if (c == 'H' || c == 'h') | |
2075 { | |
2076 p++; | |
2077 base = 16; | |
2078 goto done; | |
2079 } | |
2080 else | |
2081 { | |
2082 // Check for something like 1E3 or 0E24 | |
2083 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) || | |
2084 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset)) | |
2085 goto real; | |
2086 error("Hex digit expected, not '%c'", c); | |
2087 goto done; | |
2088 } | |
2089 } | |
2090 break; | |
2091 #endif | |
2092 | |
2093 case STATE_octal: // reading octal number | |
2094 case STATE_octale: // reading octal number with non-octal digits | |
2095 if (!isoctal(c)) | |
2096 { | |
2097 #if ZEROH | |
2098 if (ishex(c) | |
2099 || c == 'H' || c == 'h' | |
2100 ) | |
2101 goto hexh; | |
2102 #endif | |
2103 if (c == '_') // ignore embedded _ | |
2104 { p++; | |
2105 continue; | |
2106 } | |
2107 if (c == '.' && p[1] != '.') | |
2108 goto real; | |
2109 if (c == 'i') | |
2110 goto real; | |
2111 if (isdigit(c)) | |
2112 { | |
2113 state = STATE_octale; | |
2114 } | |
2115 else | |
2116 goto done; | |
2117 } | |
2118 break; | |
2119 | |
2120 case STATE_binary0: // starting binary number | |
2121 case STATE_binary: // reading binary number | |
2122 if (c != '0' && c != '1') | |
2123 { | |
2124 #if ZEROH | |
2125 if (ishex(c) | |
2126 || c == 'H' || c == 'h' | |
2127 ) | |
2128 goto hexh; | |
2129 #endif | |
2130 if (c == '_') // ignore embedded _ | |
2131 { p++; | |
2132 continue; | |
2133 } | |
2134 if (state == STATE_binary0) | |
2135 { error("binary digit expected"); | |
2136 state = STATE_error; | |
2137 break; | |
2138 } | |
2139 else | |
2140 goto done; | |
2141 } | |
2142 state = STATE_binary; | |
2143 break; | |
2144 | |
2145 case STATE_error: // for error recovery | |
2146 if (!isdigit(c)) // scan until non-digit | |
2147 goto done; | |
2148 break; | |
2149 | |
2150 default: | |
2151 assert(0); | |
2152 } | |
2153 stringbuffer.writeByte(c); | |
2154 p++; | |
2155 } | |
2156 done: | |
2157 stringbuffer.writeByte(0); // terminate string | |
2158 if (state == STATE_octale) | |
2159 error("Octal digit expected"); | |
2160 | |
2161 uinteger_t n; // unsigned >=64 bit integer type | |
2162 | |
2163 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0)) | |
2164 n = stringbuffer.data[0] - '0'; | |
2165 else | |
2166 { | |
2167 // Convert string to integer | |
2168 #if __DMC__ | |
2169 errno = 0; | |
2170 n = strtoull((char *)stringbuffer.data,NULL,base); | |
2171 if (errno == ERANGE) | |
2172 error("integer overflow"); | |
2173 #else | |
2174 // Not everybody implements strtoull() | |
2175 char *p = (char *)stringbuffer.data; | |
2176 int r = 10, d; | |
2177 | |
2178 if (*p == '0') | |
2179 { | |
2180 if (p[1] == 'x' || p[1] == 'X') | |
2181 p += 2, r = 16; | |
2182 else if (p[1] == 'b' || p[1] == 'B') | |
2183 p += 2, r = 2; | |
2184 else if (isdigit(p[1])) | |
2185 p += 1, r = 8; | |
2186 } | |
2187 | |
2188 n = 0; | |
2189 while (1) | |
2190 { | |
2191 if (*p >= '0' && *p <= '9') | |
2192 d = *p - '0'; | |
2193 else if (*p >= 'a' && *p <= 'z') | |
2194 d = *p - 'a' + 10; | |
2195 else if (*p >= 'A' && *p <= 'Z') | |
2196 d = *p - 'A' + 10; | |
2197 else | |
2198 break; | |
2199 if (d >= r) | |
2200 break; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2201 uinteger_t n2 = n * r; |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2202 //printf("n2 / r = %llx, n = %llx\n", n2/r, n); |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2203 if (n2 / r != n || n2 + d < n) |
159 | 2204 { |
2205 error ("integer overflow"); | |
2206 break; | |
2207 } | |
2208 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2209 n = n2 + d; |
159 | 2210 p++; |
2211 } | |
2212 #endif | |
2213 if (sizeof(n) > 8 && | |
2214 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits | |
2215 error("integer overflow"); | |
2216 } | |
2217 | |
2218 // Parse trailing 'u', 'U', 'l' or 'L' in any combination | |
2219 while (1) | |
2220 { unsigned char f; | |
2221 | |
2222 switch (*p) | |
2223 { case 'U': | |
2224 case 'u': | |
2225 f = FLAGS_unsigned; | |
2226 goto L1; | |
2227 | |
2228 case 'l': | |
2229 if (1 || !global.params.useDeprecated) | |
2230 error("'l' suffix is deprecated, use 'L' instead"); | |
2231 case 'L': | |
2232 f = FLAGS_long; | |
2233 L1: | |
2234 p++; | |
2235 if (flags & f) | |
2236 error("unrecognized token"); | |
2237 flags = (FLAGS) (flags | f); | |
2238 continue; | |
2239 default: | |
2240 break; | |
2241 } | |
2242 break; | |
2243 } | |
2244 | |
2245 switch (flags) | |
2246 { | |
2247 case 0: | |
2248 /* Octal or Hexadecimal constant. | |
2249 * First that fits: int, uint, long, ulong | |
2250 */ | |
2251 if (n & 0x8000000000000000LL) | |
2252 result = TOKuns64v; | |
2253 else if (n & 0xFFFFFFFF00000000LL) | |
2254 result = TOKint64v; | |
2255 else if (n & 0x80000000) | |
2256 result = TOKuns32v; | |
2257 else | |
2258 result = TOKint32v; | |
2259 break; | |
2260 | |
2261 case FLAGS_decimal: | |
2262 /* First that fits: int, long, long long | |
2263 */ | |
2264 if (n & 0x8000000000000000LL) | |
2265 { error("signed integer overflow"); | |
2266 result = TOKuns64v; | |
2267 } | |
2268 else if (n & 0xFFFFFFFF80000000LL) | |
2269 result = TOKint64v; | |
2270 else | |
2271 result = TOKint32v; | |
2272 break; | |
2273 | |
2274 case FLAGS_unsigned: | |
2275 case FLAGS_decimal | FLAGS_unsigned: | |
2276 /* First that fits: uint, ulong | |
2277 */ | |
2278 if (n & 0xFFFFFFFF00000000LL) | |
2279 result = TOKuns64v; | |
2280 else | |
2281 result = TOKuns32v; | |
2282 break; | |
2283 | |
2284 case FLAGS_decimal | FLAGS_long: | |
2285 if (n & 0x8000000000000000LL) | |
2286 { error("signed integer overflow"); | |
2287 result = TOKuns64v; | |
2288 } | |
2289 else | |
2290 result = TOKint64v; | |
2291 break; | |
2292 | |
2293 case FLAGS_long: | |
2294 if (n & 0x8000000000000000LL) | |
2295 result = TOKuns64v; | |
2296 else | |
2297 result = TOKint64v; | |
2298 break; | |
2299 | |
2300 case FLAGS_unsigned | FLAGS_long: | |
2301 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: | |
2302 result = TOKuns64v; | |
2303 break; | |
2304 | |
2305 default: | |
2306 #ifdef DEBUG | |
2307 printf("%x\n",flags); | |
2308 #endif | |
2309 assert(0); | |
2310 } | |
2311 t->uns64value = n; | |
2312 return result; | |
2313 } | |
2314 | |
2315 /************************************** | |
2316 * Read in characters, converting them to real. | |
2317 * Bugs: | |
2318 * Exponent overflow not detected. | |
2319 * Too much requested precision is not detected. | |
2320 */ | |
2321 | |
2322 TOK Lexer::inreal(Token *t) | |
2323 #ifdef __DMC__ | |
2324 __in | |
2325 { | |
2326 assert(*p == '.' || isdigit(*p)); | |
2327 } | |
2328 __out (result) | |
2329 { | |
2330 switch (result) | |
2331 { | |
2332 case TOKfloat32v: | |
2333 case TOKfloat64v: | |
2334 case TOKfloat80v: | |
2335 case TOKimaginary32v: | |
2336 case TOKimaginary64v: | |
2337 case TOKimaginary80v: | |
2338 break; | |
2339 | |
2340 default: | |
2341 assert(0); | |
2342 } | |
2343 } | |
2344 __body | |
2345 #endif /* __DMC__ */ | |
2346 { int dblstate; | |
2347 unsigned c; | |
2348 char hex; // is this a hexadecimal-floating-constant? | |
2349 TOK result; | |
2350 | |
2351 //printf("Lexer::inreal()\n"); | |
2352 stringbuffer.reset(); | |
2353 dblstate = 0; | |
2354 hex = 0; | |
2355 Lnext: | |
2356 while (1) | |
2357 { | |
2358 // Get next char from input | |
2359 c = *p++; | |
2360 //printf("dblstate = %d, c = '%c'\n", dblstate, c); | |
2361 while (1) | |
2362 { | |
2363 switch (dblstate) | |
2364 { | |
2365 case 0: // opening state | |
2366 if (c == '0') | |
2367 dblstate = 9; | |
2368 else if (c == '.') | |
2369 dblstate = 3; | |
2370 else | |
2371 dblstate = 1; | |
2372 break; | |
2373 | |
2374 case 9: | |
2375 dblstate = 1; | |
2376 if (c == 'X' || c == 'x') | |
2377 { hex++; | |
2378 break; | |
2379 } | |
2380 case 1: // digits to left of . | |
2381 case 3: // digits to right of . | |
2382 case 7: // continuing exponent digits | |
2383 if (!isdigit(c) && !(hex && isxdigit(c))) | |
2384 { | |
2385 if (c == '_') | |
2386 goto Lnext; // ignore embedded '_' | |
2387 dblstate++; | |
2388 continue; | |
2389 } | |
2390 break; | |
2391 | |
2392 case 2: // no more digits to left of . | |
2393 if (c == '.') | |
2394 { dblstate++; | |
2395 break; | |
2396 } | |
2397 case 4: // no more digits to right of . | |
2398 if ((c == 'E' || c == 'e') || | |
2399 hex && (c == 'P' || c == 'p')) | |
2400 { dblstate = 5; | |
2401 hex = 0; // exponent is always decimal | |
2402 break; | |
2403 } | |
2404 if (hex) | |
2405 error("binary-exponent-part required"); | |
2406 goto done; | |
2407 | |
2408 case 5: // looking immediately to right of E | |
2409 dblstate++; | |
2410 if (c == '-' || c == '+') | |
2411 break; | |
2412 case 6: // 1st exponent digit expected | |
2413 if (!isdigit(c)) | |
2414 error("exponent expected"); | |
2415 dblstate++; | |
2416 break; | |
2417 | |
2418 case 8: // past end of exponent digits | |
2419 goto done; | |
2420 } | |
2421 break; | |
2422 } | |
2423 stringbuffer.writeByte(c); | |
2424 } | |
2425 done: | |
2426 p--; | |
2427 | |
2428 stringbuffer.writeByte(0); | |
2429 | |
2430 #if _WIN32 && __DMC__ | |
2431 char *save = __locale_decpoint; | |
2432 __locale_decpoint = "."; | |
2433 #endif | |
2434 #ifdef IN_GCC | |
2435 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble); | |
2436 #else | |
2437 t->float80value = strtold((char *)stringbuffer.data, NULL); | |
2438 #endif | |
2439 errno = 0; | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2440 float strtofres; |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2441 double strtodres; |
159 | 2442 switch (*p) |
2443 { | |
2444 case 'F': | |
2445 case 'f': | |
2446 #ifdef IN_GCC | |
2447 real_t::parse((char *)stringbuffer.data, real_t::Float); | |
2448 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2449 strtofres = strtof((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2450 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2451 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2452 strtofres != 0 && strtofres != HUGE_VALF && strtofres != -HUGE_VALF) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2453 errno = 0; |
159 | 2454 #endif |
2455 result = TOKfloat32v; | |
2456 p++; | |
2457 break; | |
2458 | |
2459 default: | |
2460 #ifdef IN_GCC | |
2461 real_t::parse((char *)stringbuffer.data, real_t::Double); | |
2462 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2463 strtodres = strtod((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2464 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2465 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2466 strtodres != 0 && strtodres != HUGE_VAL && strtodres != -HUGE_VAL) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2467 errno = 0; |
159 | 2468 #endif |
2469 result = TOKfloat64v; | |
2470 break; | |
2471 | |
2472 case 'l': | |
2473 if (!global.params.useDeprecated) | |
2474 error("'l' suffix is deprecated, use 'L' instead"); | |
2475 case 'L': | |
2476 result = TOKfloat80v; | |
2477 p++; | |
2478 break; | |
2479 } | |
2480 if (*p == 'i' || *p == 'I') | |
2481 { | |
2482 if (!global.params.useDeprecated && *p == 'I') | |
2483 error("'I' suffix is deprecated, use 'i' instead"); | |
2484 p++; | |
2485 switch (result) | |
2486 { | |
2487 case TOKfloat32v: | |
2488 result = TOKimaginary32v; | |
2489 break; | |
2490 case TOKfloat64v: | |
2491 result = TOKimaginary64v; | |
2492 break; | |
2493 case TOKfloat80v: | |
2494 result = TOKimaginary80v; | |
2495 break; | |
2496 } | |
2497 } | |
2498 #if _WIN32 && __DMC__ | |
2499 __locale_decpoint = save; | |
2500 #endif | |
2501 if (errno == ERANGE) | |
2502 error("number is not representable"); | |
2503 return result; | |
2504 } | |
2505 | |
2506 /********************************************* | |
2507 * Do pragma. | |
2508 * Currently, the only pragma supported is: | |
2509 * #line linnum [filespec] | |
2510 */ | |
2511 | |
2512 void Lexer::pragma() | |
2513 { | |
2514 Token tok; | |
2515 int linnum; | |
2516 char *filespec = NULL; | |
2517 Loc loc = this->loc; | |
2518 | |
2519 scan(&tok); | |
2520 if (tok.value != TOKidentifier || tok.ident != Id::line) | |
2521 goto Lerr; | |
2522 | |
2523 scan(&tok); | |
2524 if (tok.value == TOKint32v || tok.value == TOKint64v) | |
2525 linnum = tok.uns64value - 1; | |
2526 else | |
2527 goto Lerr; | |
2528 | |
2529 while (1) | |
2530 { | |
2531 switch (*p) | |
2532 { | |
2533 case 0: | |
2534 case 0x1A: | |
2535 case '\n': | |
2536 Lnewline: | |
2537 this->loc.linnum = linnum; | |
2538 if (filespec) | |
2539 this->loc.filename = filespec; | |
2540 return; | |
2541 | |
2542 case '\r': | |
2543 p++; | |
2544 if (*p != '\n') | |
2545 { p--; | |
2546 goto Lnewline; | |
2547 } | |
2548 continue; | |
2549 | |
2550 case ' ': | |
2551 case '\t': | |
2552 case '\v': | |
2553 case '\f': | |
2554 p++; | |
2555 continue; // skip white space | |
2556 | |
2557 case '_': | |
2558 if (mod && memcmp(p, "__FILE__", 8) == 0) | |
2559 { | |
2560 p += 8; | |
2561 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars()); | |
2562 } | |
2563 continue; | |
2564 | |
2565 case '"': | |
2566 if (filespec) | |
2567 goto Lerr; | |
2568 stringbuffer.reset(); | |
2569 p++; | |
2570 while (1) | |
2571 { unsigned c; | |
2572 | |
2573 c = *p; | |
2574 switch (c) | |
2575 { | |
2576 case '\n': | |
2577 case '\r': | |
2578 case 0: | |
2579 case 0x1A: | |
2580 goto Lerr; | |
2581 | |
2582 case '"': | |
2583 stringbuffer.writeByte(0); | |
2584 filespec = mem.strdup((char *)stringbuffer.data); | |
2585 p++; | |
2586 break; | |
2587 | |
2588 default: | |
2589 if (c & 0x80) | |
2590 { unsigned u = decodeUTF(); | |
2591 if (u == PS || u == LS) | |
2592 goto Lerr; | |
2593 } | |
2594 stringbuffer.writeByte(c); | |
2595 p++; | |
2596 continue; | |
2597 } | |
2598 break; | |
2599 } | |
2600 continue; | |
2601 | |
2602 default: | |
2603 if (*p & 0x80) | |
2604 { unsigned u = decodeUTF(); | |
2605 if (u == PS || u == LS) | |
2606 goto Lnewline; | |
2607 } | |
2608 goto Lerr; | |
2609 } | |
2610 } | |
2611 | |
2612 Lerr: | |
2613 error(loc, "#line integer [\"filespec\"]\\n expected"); | |
2614 } | |
2615 | |
2616 | |
2617 /******************************************** | |
2618 * Decode UTF character. | |
2619 * Issue error messages for invalid sequences. | |
2620 * Return decoded character, advance p to last character in UTF sequence. | |
2621 */ | |
2622 | |
2623 unsigned Lexer::decodeUTF() | |
2624 { | |
2625 dchar_t u; | |
2626 unsigned char c; | |
2627 unsigned char *s = p; | |
2628 size_t len; | |
2629 size_t idx; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2630 const char *msg; |
159 | 2631 |
2632 c = *s; | |
2633 assert(c & 0x80); | |
2634 | |
2635 // Check length of remaining string up to 6 UTF-8 characters | |
2636 for (len = 1; len < 6 && s[len]; len++) | |
2637 ; | |
2638 | |
2639 idx = 0; | |
2640 msg = utf_decodeChar(s, len, &idx, &u); | |
2641 p += idx - 1; | |
2642 if (msg) | |
2643 { | |
2644 error("%s", msg); | |
2645 } | |
2646 return u; | |
2647 } | |
2648 | |
2649 | |
2650 /*************************************************** | |
2651 * Parse doc comment embedded between t->ptr and p. | |
2652 * Remove trailing blanks and tabs from lines. | |
2653 * Replace all newlines with \n. | |
2654 * Remove leading comment character from each line. | |
2655 * Decide if it's a lineComment or a blockComment. | |
2656 * Append to previous one for this token. | |
2657 */ | |
2658 | |
2659 void Lexer::getDocComment(Token *t, unsigned lineComment) | |
2660 { | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2661 /* ct tells us which kind of comment it is: '/', '*', or '+' |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2662 */ |
159 | 2663 unsigned char ct = t->ptr[2]; |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2664 |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2665 /* Start of comment text skips over / * *, / + +, or / / / |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2666 */ |
159 | 2667 unsigned char *q = t->ptr + 3; // start of comment text |
2668 | |
2669 unsigned char *qend = p; | |
2670 if (ct == '*' || ct == '+') | |
2671 qend -= 2; | |
2672 | |
2673 /* Scan over initial row of ****'s or ++++'s or ////'s | |
2674 */ | |
2675 for (; q < qend; q++) | |
2676 { | |
2677 if (*q != ct) | |
2678 break; | |
2679 } | |
2680 | |
2681 /* Remove trailing row of ****'s or ++++'s | |
2682 */ | |
2683 if (ct != '/') | |
2684 { | |
2685 for (; q < qend; qend--) | |
2686 { | |
2687 if (qend[-1] != ct) | |
2688 break; | |
2689 } | |
2690 } | |
2691 | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2692 /* Comment is now [q .. qend]. |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2693 * Canonicalize it into buf[]. |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2694 */ |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2695 OutBuffer buf; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2696 int linestart = 0; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2697 |
159 | 2698 for (; q < qend; q++) |
2699 { | |
2700 unsigned char c = *q; | |
2701 | |
2702 switch (c) | |
2703 { | |
2704 case '*': | |
2705 case '+': | |
2706 if (linestart && c == ct) | |
2707 { linestart = 0; | |
2708 /* Trim preceding whitespace up to preceding \n | |
2709 */ | |
2710 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2711 buf.offset--; | |
2712 continue; | |
2713 } | |
2714 break; | |
2715 | |
2716 case ' ': | |
2717 case '\t': | |
2718 break; | |
2719 | |
2720 case '\r': | |
2721 if (q[1] == '\n') | |
2722 continue; // skip the \r | |
2723 goto Lnewline; | |
2724 | |
2725 default: | |
2726 if (c == 226) | |
2727 { | |
2728 // If LS or PS | |
2729 if (q[1] == 128 && | |
2730 (q[2] == 168 || q[2] == 169)) | |
2731 { | |
2732 q += 2; | |
2733 goto Lnewline; | |
2734 } | |
2735 } | |
2736 linestart = 0; | |
2737 break; | |
2738 | |
2739 Lnewline: | |
2740 c = '\n'; // replace all newlines with \n | |
2741 case '\n': | |
2742 linestart = 1; | |
2743 | |
2744 /* Trim trailing whitespace | |
2745 */ | |
2746 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2747 buf.offset--; | |
2748 | |
2749 break; | |
2750 } | |
2751 buf.writeByte(c); | |
2752 } | |
2753 | |
2754 // Always end with a newline | |
2755 if (!buf.offset || buf.data[buf.offset - 1] != '\n') | |
2756 buf.writeByte('\n'); | |
2757 | |
2758 buf.writeByte(0); | |
2759 | |
2760 // It's a line comment if the start of the doc comment comes | |
2761 // after other non-whitespace on the same line. | |
2762 unsigned char** dc = (lineComment && anyToken) | |
2763 ? &t->lineComment | |
2764 : &t->blockComment; | |
2765 | |
2766 // Combine with previous doc comment, if any | |
2767 if (*dc) | |
2768 *dc = combineComments(*dc, (unsigned char *)buf.data); | |
2769 else | |
2770 *dc = (unsigned char *)buf.extractData(); | |
2771 } | |
2772 | |
2773 /******************************************** | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2774 * Combine two document comments into one, |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2775 * separated by a newline. |
159 | 2776 */ |
2777 | |
2778 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2) | |
2779 { | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2780 //printf("Lexer::combineComments('%s', '%s')\n", c1, c2); |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2781 |
159 | 2782 unsigned char *c = c2; |
2783 | |
2784 if (c1) | |
2785 { c = c1; | |
2786 if (c2) | |
2787 { size_t len1 = strlen((char *)c1); | |
2788 size_t len2 = strlen((char *)c2); | |
2789 | |
2790 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1); | |
2791 memcpy(c, c1, len1); | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2792 if (len1 && c1[len1 - 1] != '\n') |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2793 { c[len1] = '\n'; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2794 len1++; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2795 } |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2796 memcpy(c + len1, c2, len2); |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2797 c[len1 + len2] = 0; |
159 | 2798 } |
2799 } | |
2800 return c; | |
2801 } | |
2802 | |
2803 /******************************************** | |
2804 * Create an identifier in the string table. | |
2805 */ | |
2806 | |
2807 Identifier *Lexer::idPool(const char *s) | |
2808 { | |
2809 size_t len = strlen(s); | |
2810 StringValue *sv = stringtable.update(s, len); | |
2811 Identifier *id = (Identifier *) sv->ptrvalue; | |
2812 if (!id) | |
2813 { | |
2814 id = new Identifier(sv->lstring.string, TOKidentifier); | |
2815 sv->ptrvalue = id; | |
2816 } | |
2817 return id; | |
2818 } | |
2819 | |
2820 /********************************************* | |
2821 * Create a unique identifier using the prefix s. | |
2822 */ | |
2823 | |
2824 Identifier *Lexer::uniqueId(const char *s, int num) | |
2825 { char buffer[32]; | |
2826 size_t slen = strlen(s); | |
2827 | |
2828 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer)); | |
2829 sprintf(buffer, "%s%d", s, num); | |
2830 return idPool(buffer); | |
2831 } | |
2832 | |
2833 Identifier *Lexer::uniqueId(const char *s) | |
2834 { | |
2835 static int num; | |
2836 return uniqueId(s, ++num); | |
2837 } | |
2838 | |
2839 /**************************************** | |
2840 */ | |
2841 | |
2842 struct Keyword | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2843 { const char *name; |
159 | 2844 enum TOK value; |
2845 }; | |
2846 | |
2847 static Keyword keywords[] = | |
2848 { | |
2849 // { "", TOK }, | |
2850 | |
2851 { "this", TOKthis }, | |
2852 { "super", TOKsuper }, | |
2853 { "assert", TOKassert }, | |
2854 { "null", TOKnull }, | |
2855 { "true", TOKtrue }, | |
2856 { "false", TOKfalse }, | |
2857 { "cast", TOKcast }, | |
2858 { "new", TOKnew }, | |
2859 { "delete", TOKdelete }, | |
2860 { "throw", TOKthrow }, | |
2861 { "module", TOKmodule }, | |
2862 { "pragma", TOKpragma }, | |
2863 { "typeof", TOKtypeof }, | |
2864 { "typeid", TOKtypeid }, | |
2865 | |
2866 { "template", TOKtemplate }, | |
2867 | |
2868 { "void", TOKvoid }, | |
2869 { "byte", TOKint8 }, | |
2870 { "ubyte", TOKuns8 }, | |
2871 { "short", TOKint16 }, | |
2872 { "ushort", TOKuns16 }, | |
2873 { "int", TOKint32 }, | |
2874 { "uint", TOKuns32 }, | |
2875 { "long", TOKint64 }, | |
2876 { "ulong", TOKuns64 }, | |
2877 { "cent", TOKcent, }, | |
2878 { "ucent", TOKucent, }, | |
2879 { "float", TOKfloat32 }, | |
2880 { "double", TOKfloat64 }, | |
2881 { "real", TOKfloat80 }, | |
2882 | |
2883 { "bool", TOKbool }, | |
2884 { "char", TOKchar }, | |
2885 { "wchar", TOKwchar }, | |
2886 { "dchar", TOKdchar }, | |
2887 | |
2888 { "ifloat", TOKimaginary32 }, | |
2889 { "idouble", TOKimaginary64 }, | |
2890 { "ireal", TOKimaginary80 }, | |
2891 | |
2892 { "cfloat", TOKcomplex32 }, | |
2893 { "cdouble", TOKcomplex64 }, | |
2894 { "creal", TOKcomplex80 }, | |
2895 | |
2896 { "delegate", TOKdelegate }, | |
2897 { "function", TOKfunction }, | |
2898 | |
2899 { "is", TOKis }, | |
2900 { "if", TOKif }, | |
2901 { "else", TOKelse }, | |
2902 { "while", TOKwhile }, | |
2903 { "for", TOKfor }, | |
2904 { "do", TOKdo }, | |
2905 { "switch", TOKswitch }, | |
2906 { "case", TOKcase }, | |
2907 { "default", TOKdefault }, | |
2908 { "break", TOKbreak }, | |
2909 { "continue", TOKcontinue }, | |
2910 { "synchronized", TOKsynchronized }, | |
2911 { "return", TOKreturn }, | |
2912 { "goto", TOKgoto }, | |
2913 { "try", TOKtry }, | |
2914 { "catch", TOKcatch }, | |
2915 { "finally", TOKfinally }, | |
2916 { "with", TOKwith }, | |
2917 { "asm", TOKasm }, | |
2918 { "foreach", TOKforeach }, | |
2919 { "foreach_reverse", TOKforeach_reverse }, | |
2920 { "scope", TOKscope }, | |
2921 | |
2922 { "struct", TOKstruct }, | |
2923 { "class", TOKclass }, | |
2924 { "interface", TOKinterface }, | |
2925 { "union", TOKunion }, | |
2926 { "enum", TOKenum }, | |
2927 { "import", TOKimport }, | |
2928 { "mixin", TOKmixin }, | |
2929 { "static", TOKstatic }, | |
2930 { "final", TOKfinal }, | |
2931 { "const", TOKconst }, | |
2932 { "typedef", TOKtypedef }, | |
2933 { "alias", TOKalias }, | |
2934 { "override", TOKoverride }, | |
2935 { "abstract", TOKabstract }, | |
2936 { "volatile", TOKvolatile }, | |
2937 { "debug", TOKdebug }, | |
2938 { "deprecated", TOKdeprecated }, | |
2939 { "in", TOKin }, | |
2940 { "out", TOKout }, | |
2941 { "inout", TOKinout }, | |
2942 { "lazy", TOKlazy }, | |
2943 { "auto", TOKauto }, | |
2944 | |
2945 { "align", TOKalign }, | |
2946 { "extern", TOKextern }, | |
2947 { "private", TOKprivate }, | |
2948 { "package", TOKpackage }, | |
2949 { "protected", TOKprotected }, | |
2950 { "public", TOKpublic }, | |
2951 { "export", TOKexport }, | |
2952 | |
2953 { "body", TOKbody }, | |
2954 { "invariant", TOKinvariant }, | |
2955 { "unittest", TOKunittest }, | |
2956 { "version", TOKversion }, | |
2957 //{ "manifest", TOKmanifest }, | |
2958 | |
2959 // Added after 1.0 | |
2960 { "ref", TOKref }, | |
2961 { "macro", TOKmacro }, | |
336 | 2962 #if DMDV2 |
159 | 2963 { "pure", TOKpure }, |
2964 { "nothrow", TOKnothrow }, | |
336 | 2965 { "__thread", TOKtls }, |
159 | 2966 { "__traits", TOKtraits }, |
2967 { "__overloadset", TOKoverloadset }, | |
336 | 2968 { "__FILE__", TOKfile }, |
2969 { "__LINE__", TOKline }, | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2970 { "shared", TOKshared }, |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2971 { "immutable", TOKimmutable }, |
159 | 2972 #endif |
2973 }; | |
2974 | |
2975 int Token::isKeyword() | |
2976 { | |
2977 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) | |
2978 { | |
2979 if (keywords[u].value == value) | |
2980 return 1; | |
2981 } | |
2982 return 0; | |
2983 } | |
2984 | |
2985 void Lexer::initKeywords() | |
2986 { StringValue *sv; | |
2987 unsigned u; | |
2988 enum TOK v; | |
2989 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]); | |
2990 | |
2991 if (global.params.Dversion == 1) | |
2992 nkeywords -= 2; | |
2993 | |
2994 cmtable_init(); | |
2995 | |
2996 for (u = 0; u < nkeywords; u++) | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2997 { const char *s; |
159 | 2998 |
2999 //printf("keyword[%d] = '%s'\n",u, keywords[u].name); | |
3000 s = keywords[u].name; | |
3001 v = keywords[u].value; | |
3002 sv = stringtable.insert(s, strlen(s)); | |
3003 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v); | |
3004 | |
3005 //printf("tochars[%d] = '%s'\n",v, s); | |
3006 Token::tochars[v] = s; | |
3007 } | |
3008 | |
3009 Token::tochars[TOKeof] = "EOF"; | |
3010 Token::tochars[TOKlcurly] = "{"; | |
3011 Token::tochars[TOKrcurly] = "}"; | |
3012 Token::tochars[TOKlparen] = "("; | |
3013 Token::tochars[TOKrparen] = ")"; | |
3014 Token::tochars[TOKlbracket] = "["; | |
3015 Token::tochars[TOKrbracket] = "]"; | |
3016 Token::tochars[TOKsemicolon] = ";"; | |
3017 Token::tochars[TOKcolon] = ":"; | |
3018 Token::tochars[TOKcomma] = ","; | |
3019 Token::tochars[TOKdot] = "."; | |
3020 Token::tochars[TOKxor] = "^"; | |
3021 Token::tochars[TOKxorass] = "^="; | |
3022 Token::tochars[TOKassign] = "="; | |
3023 Token::tochars[TOKconstruct] = "="; | |
336 | 3024 #if DMDV2 |
159 | 3025 Token::tochars[TOKblit] = "="; |
3026 #endif | |
3027 Token::tochars[TOKlt] = "<"; | |
3028 Token::tochars[TOKgt] = ">"; | |
3029 Token::tochars[TOKle] = "<="; | |
3030 Token::tochars[TOKge] = ">="; | |
3031 Token::tochars[TOKequal] = "=="; | |
3032 Token::tochars[TOKnotequal] = "!="; | |
3033 Token::tochars[TOKnotidentity] = "!is"; | |
3034 Token::tochars[TOKtobool] = "!!"; | |
3035 | |
3036 Token::tochars[TOKunord] = "!<>="; | |
3037 Token::tochars[TOKue] = "!<>"; | |
3038 Token::tochars[TOKlg] = "<>"; | |
3039 Token::tochars[TOKleg] = "<>="; | |
3040 Token::tochars[TOKule] = "!>"; | |
3041 Token::tochars[TOKul] = "!>="; | |
3042 Token::tochars[TOKuge] = "!<"; | |
3043 Token::tochars[TOKug] = "!<="; | |
3044 | |
3045 Token::tochars[TOKnot] = "!"; | |
3046 Token::tochars[TOKtobool] = "!!"; | |
3047 Token::tochars[TOKshl] = "<<"; | |
3048 Token::tochars[TOKshr] = ">>"; | |
3049 Token::tochars[TOKushr] = ">>>"; | |
3050 Token::tochars[TOKadd] = "+"; | |
3051 Token::tochars[TOKmin] = "-"; | |
3052 Token::tochars[TOKmul] = "*"; | |
3053 Token::tochars[TOKdiv] = "/"; | |
3054 Token::tochars[TOKmod] = "%"; | |
3055 Token::tochars[TOKslice] = ".."; | |
3056 Token::tochars[TOKdotdotdot] = "..."; | |
3057 Token::tochars[TOKand] = "&"; | |
3058 Token::tochars[TOKandand] = "&&"; | |
3059 Token::tochars[TOKor] = "|"; | |
3060 Token::tochars[TOKoror] = "||"; | |
3061 Token::tochars[TOKarray] = "[]"; | |
3062 Token::tochars[TOKindex] = "[i]"; | |
3063 Token::tochars[TOKaddress] = "&"; | |
3064 Token::tochars[TOKstar] = "*"; | |
3065 Token::tochars[TOKtilde] = "~"; | |
3066 Token::tochars[TOKdollar] = "$"; | |
3067 Token::tochars[TOKcast] = "cast"; | |
3068 Token::tochars[TOKplusplus] = "++"; | |
3069 Token::tochars[TOKminusminus] = "--"; | |
3070 Token::tochars[TOKtype] = "type"; | |
3071 Token::tochars[TOKquestion] = "?"; | |
3072 Token::tochars[TOKneg] = "-"; | |
3073 Token::tochars[TOKuadd] = "+"; | |
3074 Token::tochars[TOKvar] = "var"; | |
3075 Token::tochars[TOKaddass] = "+="; | |
3076 Token::tochars[TOKminass] = "-="; | |
3077 Token::tochars[TOKmulass] = "*="; | |
3078 Token::tochars[TOKdivass] = "/="; | |
3079 Token::tochars[TOKmodass] = "%="; | |
3080 Token::tochars[TOKshlass] = "<<="; | |
3081 Token::tochars[TOKshrass] = ">>="; | |
3082 Token::tochars[TOKushrass] = ">>>="; | |
3083 Token::tochars[TOKandass] = "&="; | |
3084 Token::tochars[TOKorass] = "|="; | |
3085 Token::tochars[TOKcatass] = "~="; | |
3086 Token::tochars[TOKcat] = "~"; | |
3087 Token::tochars[TOKcall] = "call"; | |
3088 Token::tochars[TOKidentity] = "is"; | |
3089 Token::tochars[TOKnotidentity] = "!is"; | |
3090 | |
3091 Token::tochars[TOKorass] = "|="; | |
3092 Token::tochars[TOKidentifier] = "identifier"; | |
3093 | |
3094 // For debugging | |
3095 Token::tochars[TOKdotexp] = "dotexp"; | |
3096 Token::tochars[TOKdotti] = "dotti"; | |
3097 Token::tochars[TOKdotvar] = "dotvar"; | |
3098 Token::tochars[TOKdottype] = "dottype"; | |
3099 Token::tochars[TOKsymoff] = "symoff"; | |
3100 Token::tochars[TOKtypedot] = "typedot"; | |
3101 Token::tochars[TOKarraylength] = "arraylength"; | |
3102 Token::tochars[TOKarrayliteral] = "arrayliteral"; | |
3103 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral"; | |
3104 Token::tochars[TOKstructliteral] = "structliteral"; | |
3105 Token::tochars[TOKstring] = "string"; | |
3106 Token::tochars[TOKdsymbol] = "symbol"; | |
3107 Token::tochars[TOKtuple] = "tuple"; | |
3108 Token::tochars[TOKdeclaration] = "declaration"; | |
3109 Token::tochars[TOKdottd] = "dottd"; | |
3110 Token::tochars[TOKon_scope_exit] = "scope(exit)"; | |
3111 } |