Mercurial > projects > ldc
annotate dmd/lexer.c @ 837:331a176c1f4f
Removed error on naked, not fully complete, but I'll be doing more work on it during this Christmas, and some things do work.
Fixed taking delegate of final class method. see mini/delegate3.d.
author | Tomas Lindquist Olsen <tomas.l.olsen@gmail.com> |
---|---|
date | Tue, 09 Dec 2008 14:07:30 +0100 |
parents | 661384d6a936 |
children | bc982f1ad106 |
rev | line source |
---|---|
159 | 1 |
2 // Compiler implementation of the D programming language | |
3 // Copyright (c) 1999-2008 by Digital Mars | |
4 // All Rights Reserved | |
5 // written by Walter Bright | |
6 // http://www.digitalmars.com | |
7 // License for redistribution is by either the Artistic License | |
8 // in artistic.txt, or the GNU General Public License in gnu.txt. | |
9 // See the included readme.txt for details. | |
10 | |
11 /* Lexical Analyzer */ | |
12 | |
13 #include <stdio.h> | |
14 #include <string.h> | |
15 #include <ctype.h> | |
16 #include <stdarg.h> | |
17 #include <errno.h> | |
18 #include <wchar.h> | |
19 #include <stdlib.h> | |
20 #include <assert.h> | |
21 #include <sys/time.h> | |
697
25a32766ed57
Missing include added.
Christian Kamm <kamm incasoftware de>
parents:
696
diff
changeset
|
22 #include <math.h> |
159 | 23 |
24 #ifdef IN_GCC | |
25 | |
26 #include <time.h> | |
27 #include "mem.h" | |
28 | |
29 #else | |
30 | |
31 #if __GNUC__ | |
32 #include <time.h> | |
33 #endif | |
34 | |
35 #if IN_LLVM | |
36 #include "mem.h" | |
37 #elif _WIN32 | |
38 #include "..\root\mem.h" | |
39 #else | |
40 #include "../root/mem.h" | |
41 #endif | |
42 #endif | |
43 | |
44 #include "stringtable.h" | |
45 | |
46 #include "lexer.h" | |
47 #include "utf.h" | |
48 #include "identifier.h" | |
49 #include "id.h" | |
50 #include "module.h" | |
51 | |
52 #if _WIN32 && __DMC__ | |
53 // from \dm\src\include\setlocal.h | |
54 extern "C" char * __cdecl __locale_decpoint; | |
55 #endif | |
56 | |
57 extern int HtmlNamedEntity(unsigned char *p, int length); | |
58 | |
59 #define LS 0x2028 // UTF line separator | |
60 #define PS 0x2029 // UTF paragraph separator | |
61 | |
62 /******************************************** | |
63 * Do our own char maps | |
64 */ | |
65 | |
66 static unsigned char cmtable[256]; | |
67 | |
68 const int CMoctal = 0x1; | |
69 const int CMhex = 0x2; | |
70 const int CMidchar = 0x4; | |
71 | |
72 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; } | |
73 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; } | |
74 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; } | |
75 | |
76 static void cmtable_init() | |
77 { | |
78 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++) | |
79 { | |
80 if ('0' <= c && c <= '7') | |
81 cmtable[c] |= CMoctal; | |
82 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) | |
83 cmtable[c] |= CMhex; | |
84 if (isalnum(c) || c == '_') | |
85 cmtable[c] |= CMidchar; | |
86 } | |
87 } | |
88 | |
89 | |
90 /************************* Token **********************************************/ | |
91 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
92 const char *Token::tochars[TOKMAX]; |
159 | 93 |
94 void *Token::operator new(size_t size) | |
95 { Token *t; | |
96 | |
97 if (Lexer::freelist) | |
98 { | |
99 t = Lexer::freelist; | |
100 Lexer::freelist = t->next; | |
101 return t; | |
102 } | |
103 | |
104 return ::operator new(size); | |
105 } | |
106 | |
107 #ifdef DEBUG | |
108 void Token::print() | |
109 { | |
110 fprintf(stdmsg, "%s\n", toChars()); | |
111 } | |
112 #endif | |
113 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
114 const char *Token::toChars() |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
115 { const char *p; |
159 | 116 static char buffer[3 + 3 * sizeof(value) + 1]; |
117 | |
118 p = buffer; | |
119 switch (value) | |
120 { | |
121 case TOKint32v: | |
122 #if IN_GCC | |
123 sprintf(buffer,"%d",(d_int32)int64value); | |
124 #else | |
125 sprintf(buffer,"%d",int32value); | |
126 #endif | |
127 break; | |
128 | |
129 case TOKuns32v: | |
130 case TOKcharv: | |
131 case TOKwcharv: | |
132 case TOKdcharv: | |
133 #if IN_GCC | |
134 sprintf(buffer,"%uU",(d_uns32)uns64value); | |
135 #else | |
136 sprintf(buffer,"%uU",uns32value); | |
137 #endif | |
138 break; | |
139 | |
140 case TOKint64v: | |
794
661384d6a936
Fix warnings on x86-64. By fvbommel.
Christian Kamm <kamm incasoftware de>
parents:
717
diff
changeset
|
141 sprintf(buffer,"%lldL",(long long)int64value); |
159 | 142 break; |
143 | |
144 case TOKuns64v: | |
794
661384d6a936
Fix warnings on x86-64. By fvbommel.
Christian Kamm <kamm incasoftware de>
parents:
717
diff
changeset
|
145 sprintf(buffer,"%lluUL",(unsigned long long)uns64value); |
159 | 146 break; |
147 | |
148 #if IN_GCC | |
149 case TOKfloat32v: | |
150 case TOKfloat64v: | |
151 case TOKfloat80v: | |
152 float80value.format(buffer, sizeof(buffer)); | |
153 break; | |
154 case TOKimaginary32v: | |
155 case TOKimaginary64v: | |
156 case TOKimaginary80v: | |
157 float80value.format(buffer, sizeof(buffer)); | |
158 // %% buffer | |
159 strcat(buffer, "i"); | |
160 break; | |
161 #else | |
162 case TOKfloat32v: | |
163 sprintf(buffer,"%Lgf", float80value); | |
164 break; | |
165 | |
166 case TOKfloat64v: | |
167 sprintf(buffer,"%Lg", float80value); | |
168 break; | |
169 | |
170 case TOKfloat80v: | |
171 sprintf(buffer,"%LgL", float80value); | |
172 break; | |
173 | |
174 case TOKimaginary32v: | |
175 sprintf(buffer,"%Lgfi", float80value); | |
176 break; | |
177 | |
178 case TOKimaginary64v: | |
179 sprintf(buffer,"%Lgi", float80value); | |
180 break; | |
181 | |
182 case TOKimaginary80v: | |
183 sprintf(buffer,"%LgLi", float80value); | |
184 break; | |
185 #endif | |
186 | |
187 case TOKstring: | |
188 #if CSTRINGS | |
189 p = string; | |
190 #else | |
191 { OutBuffer buf; | |
192 | |
193 buf.writeByte('"'); | |
194 for (size_t i = 0; i < len; ) | |
195 { unsigned c; | |
196 | |
197 utf_decodeChar((unsigned char *)ustring, len, &i, &c); | |
198 switch (c) | |
199 { | |
200 case 0: | |
201 break; | |
202 | |
203 case '"': | |
204 case '\\': | |
205 buf.writeByte('\\'); | |
206 default: | |
207 if (isprint(c)) | |
208 buf.writeByte(c); | |
209 else if (c <= 0x7F) | |
210 buf.printf("\\x%02x", c); | |
211 else if (c <= 0xFFFF) | |
212 buf.printf("\\u%04x", c); | |
213 else | |
214 buf.printf("\\U%08x", c); | |
215 continue; | |
216 } | |
217 break; | |
218 } | |
219 buf.writeByte('"'); | |
220 if (postfix) | |
221 buf.writeByte('"'); | |
222 buf.writeByte(0); | |
223 p = (char *)buf.extractData(); | |
224 } | |
225 #endif | |
226 break; | |
227 | |
228 case TOKidentifier: | |
229 case TOKenum: | |
230 case TOKstruct: | |
231 case TOKimport: | |
232 CASE_BASIC_TYPES: | |
233 p = ident->toChars(); | |
234 break; | |
235 | |
236 default: | |
237 p = toChars(value); | |
238 break; | |
239 } | |
240 return p; | |
241 } | |
242 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
243 const char *Token::toChars(enum TOK value) |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
244 { const char *p; |
159 | 245 static char buffer[3 + 3 * sizeof(value) + 1]; |
246 | |
247 p = tochars[value]; | |
248 if (!p) | |
249 { sprintf(buffer,"TOK%d",value); | |
250 p = buffer; | |
251 } | |
252 return p; | |
253 } | |
254 | |
255 /*************************** Lexer ********************************************/ | |
256 | |
257 Token *Lexer::freelist = NULL; | |
258 StringTable Lexer::stringtable; | |
259 OutBuffer Lexer::stringbuffer; | |
260 | |
261 Lexer::Lexer(Module *mod, | |
262 unsigned char *base, unsigned begoffset, unsigned endoffset, | |
263 int doDocComment, int commentToken) | |
264 : loc(mod, 1) | |
265 { | |
266 //printf("Lexer::Lexer(%p,%d)\n",base,length); | |
267 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod); | |
268 memset(&token,0,sizeof(token)); | |
269 this->base = base; | |
270 this->end = base + endoffset; | |
271 p = base + begoffset; | |
272 this->mod = mod; | |
273 this->doDocComment = doDocComment; | |
274 this->anyToken = 0; | |
275 this->commentToken = commentToken; | |
276 //initKeywords(); | |
277 | |
278 /* If first line starts with '#!', ignore the line | |
279 */ | |
280 | |
281 if (p[0] == '#' && p[1] =='!') | |
282 { | |
283 p += 2; | |
284 while (1) | |
285 { unsigned char c = *p; | |
286 switch (c) | |
287 { | |
288 case '\n': | |
289 p++; | |
290 break; | |
291 | |
292 case '\r': | |
293 p++; | |
294 if (*p == '\n') | |
295 p++; | |
296 break; | |
297 | |
298 case 0: | |
299 case 0x1A: | |
300 break; | |
301 | |
302 default: | |
303 if (c & 0x80) | |
304 { unsigned u = decodeUTF(); | |
305 if (u == PS || u == LS) | |
306 break; | |
307 } | |
308 p++; | |
309 continue; | |
310 } | |
311 break; | |
312 } | |
313 loc.linnum = 2; | |
314 } | |
315 } | |
316 | |
317 | |
318 void Lexer::error(const char *format, ...) | |
319 { | |
320 if (mod && !global.gag) | |
321 { | |
322 char *p = loc.toChars(); | |
323 if (*p) | |
324 fprintf(stdmsg, "%s: ", p); | |
325 mem.free(p); | |
326 | |
327 va_list ap; | |
328 va_start(ap, format); | |
329 vfprintf(stdmsg, format, ap); | |
330 va_end(ap); | |
331 | |
332 fprintf(stdmsg, "\n"); | |
333 fflush(stdmsg); | |
334 | |
335 if (global.errors >= 20) // moderate blizzard of cascading messages | |
336 fatal(); | |
337 } | |
338 global.errors++; | |
339 } | |
340 | |
341 void Lexer::error(Loc loc, const char *format, ...) | |
342 { | |
343 if (mod && !global.gag) | |
344 { | |
345 char *p = loc.toChars(); | |
346 if (*p) | |
347 fprintf(stdmsg, "%s: ", p); | |
348 mem.free(p); | |
349 | |
350 va_list ap; | |
351 va_start(ap, format); | |
352 vfprintf(stdmsg, format, ap); | |
353 va_end(ap); | |
354 | |
355 fprintf(stdmsg, "\n"); | |
356 fflush(stdmsg); | |
357 | |
358 if (global.errors >= 20) // moderate blizzard of cascading messages | |
359 fatal(); | |
360 } | |
361 global.errors++; | |
362 } | |
363 | |
364 TOK Lexer::nextToken() | |
365 { Token *t; | |
366 | |
367 if (token.next) | |
368 { | |
369 t = token.next; | |
370 memcpy(&token,t,sizeof(Token)); | |
371 t->next = freelist; | |
372 freelist = t; | |
373 } | |
374 else | |
375 { | |
376 scan(&token); | |
377 } | |
378 //token.print(); | |
379 return token.value; | |
380 } | |
381 | |
382 Token *Lexer::peek(Token *ct) | |
383 { Token *t; | |
384 | |
385 if (ct->next) | |
386 t = ct->next; | |
387 else | |
388 { | |
389 t = new Token(); | |
390 scan(t); | |
391 t->next = NULL; | |
392 ct->next = t; | |
393 } | |
394 return t; | |
395 } | |
396 | |
717
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
397 /*********************** |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
398 * Look ahead at next token's value. |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
399 */ |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
400 |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
401 TOK Lexer::peekNext() |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
402 { |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
403 return peek(&token)->value; |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
404 } |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
405 |
159 | 406 /********************************* |
407 * tk is on the opening (. | |
408 * Look ahead and return token that is past the closing ). | |
409 */ | |
410 | |
411 Token *Lexer::peekPastParen(Token *tk) | |
412 { | |
413 //printf("peekPastParen()\n"); | |
414 int parens = 1; | |
415 int curlynest = 0; | |
416 while (1) | |
417 { | |
418 tk = peek(tk); | |
419 //tk->print(); | |
420 switch (tk->value) | |
421 { | |
422 case TOKlparen: | |
423 parens++; | |
424 continue; | |
425 | |
426 case TOKrparen: | |
427 --parens; | |
428 if (parens) | |
429 continue; | |
430 tk = peek(tk); | |
431 break; | |
432 | |
433 case TOKlcurly: | |
434 curlynest++; | |
435 continue; | |
436 | |
437 case TOKrcurly: | |
438 if (--curlynest >= 0) | |
439 continue; | |
440 break; | |
441 | |
442 case TOKsemicolon: | |
443 if (curlynest) | |
444 continue; | |
445 break; | |
446 | |
447 case TOKeof: | |
448 break; | |
449 | |
450 default: | |
451 continue; | |
452 } | |
453 return tk; | |
454 } | |
455 } | |
456 | |
457 /********************************** | |
458 * Determine if string is a valid Identifier. | |
459 * Placed here because of commonality with Lexer functionality. | |
460 * Returns: | |
461 * 0 invalid | |
462 */ | |
463 | |
464 int Lexer::isValidIdentifier(char *p) | |
465 { | |
466 size_t len; | |
467 size_t idx; | |
468 | |
469 if (!p || !*p) | |
470 goto Linvalid; | |
471 | |
472 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars | |
473 goto Linvalid; | |
474 | |
475 len = strlen(p); | |
476 idx = 0; | |
477 while (p[idx]) | |
478 { dchar_t dc; | |
479 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
480 const char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc); |
159 | 481 if (q) |
482 goto Linvalid; | |
483 | |
484 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) | |
485 goto Linvalid; | |
486 } | |
487 return 1; | |
488 | |
489 Linvalid: | |
490 return 0; | |
491 } | |
492 | |
493 /**************************** | |
494 * Turn next token in buffer into a token. | |
495 */ | |
496 | |
497 void Lexer::scan(Token *t) | |
498 { | |
499 unsigned lastLine = loc.linnum; | |
500 unsigned linnum; | |
501 | |
502 t->blockComment = NULL; | |
503 t->lineComment = NULL; | |
504 while (1) | |
505 { | |
506 t->ptr = p; | |
507 //printf("p = %p, *p = '%c'\n",p,*p); | |
508 switch (*p) | |
509 { | |
510 case 0: | |
511 case 0x1A: | |
512 t->value = TOKeof; // end of file | |
513 return; | |
514 | |
515 case ' ': | |
516 case '\t': | |
517 case '\v': | |
518 case '\f': | |
519 p++; | |
520 continue; // skip white space | |
521 | |
522 case '\r': | |
523 p++; | |
524 if (*p != '\n') // if CR stands by itself | |
525 loc.linnum++; | |
526 continue; // skip white space | |
527 | |
528 case '\n': | |
529 p++; | |
530 loc.linnum++; | |
531 continue; // skip white space | |
532 | |
533 case '0': case '1': case '2': case '3': case '4': | |
534 case '5': case '6': case '7': case '8': case '9': | |
535 t->value = number(t); | |
536 return; | |
537 | |
538 #if CSTRINGS | |
539 case '\'': | |
540 t->value = charConstant(t, 0); | |
541 return; | |
542 | |
543 case '"': | |
544 t->value = stringConstant(t,0); | |
545 return; | |
546 | |
547 case 'l': | |
548 case 'L': | |
549 if (p[1] == '\'') | |
550 { | |
551 p++; | |
552 t->value = charConstant(t, 1); | |
553 return; | |
554 } | |
555 else if (p[1] == '"') | |
556 { | |
557 p++; | |
558 t->value = stringConstant(t, 1); | |
559 return; | |
560 } | |
561 #else | |
562 case '\'': | |
563 t->value = charConstant(t,0); | |
564 return; | |
565 | |
566 case 'r': | |
567 if (p[1] != '"') | |
568 goto case_ident; | |
569 p++; | |
570 case '`': | |
571 t->value = wysiwygStringConstant(t, *p); | |
572 return; | |
573 | |
574 case 'x': | |
575 if (p[1] != '"') | |
576 goto case_ident; | |
577 p++; | |
578 t->value = hexStringConstant(t); | |
579 return; | |
580 | |
336 | 581 #if DMDV2 |
159 | 582 case 'q': |
583 if (p[1] == '"') | |
584 { | |
585 p++; | |
586 t->value = delimitedStringConstant(t); | |
587 return; | |
588 } | |
589 else if (p[1] == '{') | |
590 { | |
591 p++; | |
592 t->value = tokenStringConstant(t); | |
593 return; | |
594 } | |
595 else | |
596 goto case_ident; | |
597 #endif | |
598 | |
599 case '"': | |
600 t->value = escapeStringConstant(t,0); | |
601 return; | |
602 | |
603 case '\\': // escaped string literal | |
604 { unsigned c; | |
605 | |
606 stringbuffer.reset(); | |
607 do | |
608 { | |
609 p++; | |
610 switch (*p) | |
611 { | |
612 case 'u': | |
613 case 'U': | |
614 case '&': | |
615 c = escapeSequence(); | |
616 stringbuffer.writeUTF8(c); | |
617 break; | |
618 | |
619 default: | |
620 c = escapeSequence(); | |
621 stringbuffer.writeByte(c); | |
622 break; | |
623 } | |
624 } while (*p == '\\'); | |
625 t->len = stringbuffer.offset; | |
626 stringbuffer.writeByte(0); | |
627 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
628 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
629 t->postfix = 0; | |
630 t->value = TOKstring; | |
631 return; | |
632 } | |
633 | |
634 case 'l': | |
635 case 'L': | |
636 #endif | |
637 case 'a': case 'b': case 'c': case 'd': case 'e': | |
638 case 'f': case 'g': case 'h': case 'i': case 'j': | |
639 case 'k': case 'm': case 'n': case 'o': | |
336 | 640 #if DMDV2 |
159 | 641 case 'p': /*case 'q': case 'r':*/ case 's': case 't': |
642 #else | |
643 case 'p': case 'q': /*case 'r':*/ case 's': case 't': | |
644 #endif | |
645 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': | |
646 case 'z': | |
647 case 'A': case 'B': case 'C': case 'D': case 'E': | |
648 case 'F': case 'G': case 'H': case 'I': case 'J': | |
649 case 'K': case 'M': case 'N': case 'O': | |
650 case 'P': case 'Q': case 'R': case 'S': case 'T': | |
651 case 'U': case 'V': case 'W': case 'X': case 'Y': | |
652 case 'Z': | |
653 case '_': | |
654 case_ident: | |
655 { unsigned char c; | |
656 StringValue *sv; | |
657 Identifier *id; | |
658 | |
659 do | |
660 { | |
661 c = *++p; | |
662 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); | |
663 sv = stringtable.update((char *)t->ptr, p - t->ptr); | |
664 id = (Identifier *) sv->ptrvalue; | |
665 if (!id) | |
666 { id = new Identifier(sv->lstring.string,TOKidentifier); | |
667 sv->ptrvalue = id; | |
668 } | |
669 t->ident = id; | |
670 t->value = (enum TOK) id->value; | |
671 anyToken = 1; | |
672 if (*t->ptr == '_') // if special identifier token | |
673 { | |
674 static char date[11+1]; | |
675 static char time[8+1]; | |
676 static char timestamp[24+1]; | |
677 | |
678 if (!date[0]) // lazy evaluation | |
679 { time_t t; | |
680 char *p; | |
681 | |
682 ::time(&t); | |
683 p = ctime(&t); | |
684 assert(p); | |
685 sprintf(date, "%.6s %.4s", p + 4, p + 20); | |
686 sprintf(time, "%.8s", p + 11); | |
687 sprintf(timestamp, "%.24s", p); | |
688 } | |
689 | |
336 | 690 #if DMDV1 |
159 | 691 if (mod && id == Id::FILE) |
692 { | |
693 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars()); | |
694 goto Lstring; | |
695 } | |
696 else if (mod && id == Id::LINE) | |
697 { | |
698 t->value = TOKint64v; | |
699 t->uns64value = loc.linnum; | |
700 } | |
336 | 701 else |
702 #endif | |
703 if (id == Id::DATE) | |
159 | 704 { |
705 t->ustring = (unsigned char *)date; | |
706 goto Lstring; | |
707 } | |
708 else if (id == Id::TIME) | |
709 { | |
710 t->ustring = (unsigned char *)time; | |
711 goto Lstring; | |
712 } | |
713 else if (id == Id::VENDOR) | |
714 { | |
664
eef8ac26c66c
Some missed LLVMDC -> LDC.
Christian Kamm <kamm incasoftware de>
parents:
658
diff
changeset
|
715 t->ustring = (unsigned char *)"LDC"; |
159 | 716 goto Lstring; |
717 } | |
718 else if (id == Id::TIMESTAMP) | |
719 { | |
720 t->ustring = (unsigned char *)timestamp; | |
721 Lstring: | |
722 t->value = TOKstring; | |
723 Llen: | |
724 t->postfix = 0; | |
725 t->len = strlen((char *)t->ustring); | |
726 } | |
727 else if (id == Id::VERSIONX) | |
728 { unsigned major = 0; | |
729 unsigned minor = 0; | |
730 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
731 for (const char *p = global.version + 1; 1; p++) |
159 | 732 { |
733 char c = *p; | |
734 if (isdigit(c)) | |
735 minor = minor * 10 + c - '0'; | |
736 else if (c == '.') | |
737 { major = minor; | |
738 minor = 0; | |
739 } | |
740 else | |
741 break; | |
742 } | |
743 t->value = TOKint64v; | |
744 t->uns64value = major * 1000 + minor; | |
745 } | |
336 | 746 #if DMDV2 |
159 | 747 else if (id == Id::EOFX) |
748 { | |
749 t->value = TOKeof; | |
750 // Advance scanner to end of file | |
751 while (!(*p == 0 || *p == 0x1A)) | |
752 p++; | |
753 } | |
754 #endif | |
755 } | |
756 //printf("t->value = %d\n",t->value); | |
757 return; | |
758 } | |
759 | |
760 case '/': | |
761 p++; | |
762 switch (*p) | |
763 { | |
764 case '=': | |
765 p++; | |
766 t->value = TOKdivass; | |
767 return; | |
768 | |
769 case '*': | |
770 p++; | |
771 linnum = loc.linnum; | |
772 while (1) | |
773 { | |
774 while (1) | |
775 { unsigned char c = *p; | |
776 switch (c) | |
777 { | |
778 case '/': | |
779 break; | |
780 | |
781 case '\n': | |
782 loc.linnum++; | |
783 p++; | |
784 continue; | |
785 | |
786 case '\r': | |
787 p++; | |
788 if (*p != '\n') | |
789 loc.linnum++; | |
790 continue; | |
791 | |
792 case 0: | |
793 case 0x1A: | |
794 error("unterminated /* */ comment"); | |
795 p = end; | |
796 t->value = TOKeof; | |
797 return; | |
798 | |
799 default: | |
800 if (c & 0x80) | |
801 { unsigned u = decodeUTF(); | |
802 if (u == PS || u == LS) | |
803 loc.linnum++; | |
804 } | |
805 p++; | |
806 continue; | |
807 } | |
808 break; | |
809 } | |
810 p++; | |
811 if (p[-2] == '*' && p - 3 != t->ptr) | |
812 break; | |
813 } | |
814 if (commentToken) | |
815 { | |
816 t->value = TOKcomment; | |
817 return; | |
818 } | |
819 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr) | |
820 { // if /** but not /**/ | |
821 getDocComment(t, lastLine == linnum); | |
822 } | |
823 continue; | |
824 | |
825 case '/': // do // style comments | |
826 linnum = loc.linnum; | |
827 while (1) | |
828 { unsigned char c = *++p; | |
829 switch (c) | |
830 { | |
831 case '\n': | |
832 break; | |
833 | |
834 case '\r': | |
835 if (p[1] == '\n') | |
836 p++; | |
837 break; | |
838 | |
839 case 0: | |
840 case 0x1A: | |
841 if (commentToken) | |
842 { | |
843 p = end; | |
844 t->value = TOKcomment; | |
845 return; | |
846 } | |
847 if (doDocComment && t->ptr[2] == '/') | |
848 getDocComment(t, lastLine == linnum); | |
849 p = end; | |
850 t->value = TOKeof; | |
851 return; | |
852 | |
853 default: | |
854 if (c & 0x80) | |
855 { unsigned u = decodeUTF(); | |
856 if (u == PS || u == LS) | |
857 break; | |
858 } | |
859 continue; | |
860 } | |
861 break; | |
862 } | |
863 | |
864 if (commentToken) | |
865 { | |
866 p++; | |
867 loc.linnum++; | |
868 t->value = TOKcomment; | |
869 return; | |
870 } | |
871 if (doDocComment && t->ptr[2] == '/') | |
872 getDocComment(t, lastLine == linnum); | |
873 | |
874 p++; | |
875 loc.linnum++; | |
876 continue; | |
877 | |
878 case '+': | |
879 { int nest; | |
880 | |
881 linnum = loc.linnum; | |
882 p++; | |
883 nest = 1; | |
884 while (1) | |
885 { unsigned char c = *p; | |
886 switch (c) | |
887 { | |
888 case '/': | |
889 p++; | |
890 if (*p == '+') | |
891 { | |
892 p++; | |
893 nest++; | |
894 } | |
895 continue; | |
896 | |
897 case '+': | |
898 p++; | |
899 if (*p == '/') | |
900 { | |
901 p++; | |
902 if (--nest == 0) | |
903 break; | |
904 } | |
905 continue; | |
906 | |
907 case '\r': | |
908 p++; | |
909 if (*p != '\n') | |
910 loc.linnum++; | |
911 continue; | |
912 | |
913 case '\n': | |
914 loc.linnum++; | |
915 p++; | |
916 continue; | |
917 | |
918 case 0: | |
919 case 0x1A: | |
920 error("unterminated /+ +/ comment"); | |
921 p = end; | |
922 t->value = TOKeof; | |
923 return; | |
924 | |
925 default: | |
926 if (c & 0x80) | |
927 { unsigned u = decodeUTF(); | |
928 if (u == PS || u == LS) | |
929 loc.linnum++; | |
930 } | |
931 p++; | |
932 continue; | |
933 } | |
934 break; | |
935 } | |
936 if (commentToken) | |
937 { | |
938 t->value = TOKcomment; | |
939 return; | |
940 } | |
941 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr) | |
942 { // if /++ but not /++/ | |
943 getDocComment(t, lastLine == linnum); | |
944 } | |
945 continue; | |
946 } | |
947 } | |
948 t->value = TOKdiv; | |
949 return; | |
950 | |
951 case '.': | |
952 p++; | |
953 if (isdigit(*p)) | |
954 { /* Note that we don't allow ._1 and ._ as being | |
955 * valid floating point numbers. | |
956 */ | |
957 p--; | |
958 t->value = inreal(t); | |
959 } | |
960 else if (p[0] == '.') | |
961 { | |
962 if (p[1] == '.') | |
963 { p += 2; | |
964 t->value = TOKdotdotdot; | |
965 } | |
966 else | |
967 { p++; | |
968 t->value = TOKslice; | |
969 } | |
970 } | |
971 else | |
972 t->value = TOKdot; | |
973 return; | |
974 | |
975 case '&': | |
976 p++; | |
977 if (*p == '=') | |
978 { p++; | |
979 t->value = TOKandass; | |
980 } | |
981 else if (*p == '&') | |
982 { p++; | |
983 t->value = TOKandand; | |
984 } | |
985 else | |
986 t->value = TOKand; | |
987 return; | |
988 | |
989 case '|': | |
990 p++; | |
991 if (*p == '=') | |
992 { p++; | |
993 t->value = TOKorass; | |
994 } | |
995 else if (*p == '|') | |
996 { p++; | |
997 t->value = TOKoror; | |
998 } | |
999 else | |
1000 t->value = TOKor; | |
1001 return; | |
1002 | |
1003 case '-': | |
1004 p++; | |
1005 if (*p == '=') | |
1006 { p++; | |
1007 t->value = TOKminass; | |
1008 } | |
1009 #if 0 | |
1010 else if (*p == '>') | |
1011 { p++; | |
1012 t->value = TOKarrow; | |
1013 } | |
1014 #endif | |
1015 else if (*p == '-') | |
1016 { p++; | |
1017 t->value = TOKminusminus; | |
1018 } | |
1019 else | |
1020 t->value = TOKmin; | |
1021 return; | |
1022 | |
1023 case '+': | |
1024 p++; | |
1025 if (*p == '=') | |
1026 { p++; | |
1027 t->value = TOKaddass; | |
1028 } | |
1029 else if (*p == '+') | |
1030 { p++; | |
1031 t->value = TOKplusplus; | |
1032 } | |
1033 else | |
1034 t->value = TOKadd; | |
1035 return; | |
1036 | |
1037 case '<': | |
1038 p++; | |
1039 if (*p == '=') | |
1040 { p++; | |
1041 t->value = TOKle; // <= | |
1042 } | |
1043 else if (*p == '<') | |
1044 { p++; | |
1045 if (*p == '=') | |
1046 { p++; | |
1047 t->value = TOKshlass; // <<= | |
1048 } | |
1049 else | |
1050 t->value = TOKshl; // << | |
1051 } | |
1052 else if (*p == '>') | |
1053 { p++; | |
1054 if (*p == '=') | |
1055 { p++; | |
1056 t->value = TOKleg; // <>= | |
1057 } | |
1058 else | |
1059 t->value = TOKlg; // <> | |
1060 } | |
1061 else | |
1062 t->value = TOKlt; // < | |
1063 return; | |
1064 | |
1065 case '>': | |
1066 p++; | |
1067 if (*p == '=') | |
1068 { p++; | |
1069 t->value = TOKge; // >= | |
1070 } | |
1071 else if (*p == '>') | |
1072 { p++; | |
1073 if (*p == '=') | |
1074 { p++; | |
1075 t->value = TOKshrass; // >>= | |
1076 } | |
1077 else if (*p == '>') | |
1078 { p++; | |
1079 if (*p == '=') | |
1080 { p++; | |
1081 t->value = TOKushrass; // >>>= | |
1082 } | |
1083 else | |
1084 t->value = TOKushr; // >>> | |
1085 } | |
1086 else | |
1087 t->value = TOKshr; // >> | |
1088 } | |
1089 else | |
1090 t->value = TOKgt; // > | |
1091 return; | |
1092 | |
1093 case '!': | |
1094 p++; | |
1095 if (*p == '=') | |
1096 { p++; | |
1097 if (*p == '=' && global.params.Dversion == 1) | |
1098 { p++; | |
1099 t->value = TOKnotidentity; // !== | |
1100 } | |
1101 else | |
1102 t->value = TOKnotequal; // != | |
1103 } | |
1104 else if (*p == '<') | |
1105 { p++; | |
1106 if (*p == '>') | |
1107 { p++; | |
1108 if (*p == '=') | |
1109 { p++; | |
1110 t->value = TOKunord; // !<>= | |
1111 } | |
1112 else | |
1113 t->value = TOKue; // !<> | |
1114 } | |
1115 else if (*p == '=') | |
1116 { p++; | |
1117 t->value = TOKug; // !<= | |
1118 } | |
1119 else | |
1120 t->value = TOKuge; // !< | |
1121 } | |
1122 else if (*p == '>') | |
1123 { p++; | |
1124 if (*p == '=') | |
1125 { p++; | |
1126 t->value = TOKul; // !>= | |
1127 } | |
1128 else | |
1129 t->value = TOKule; // !> | |
1130 } | |
1131 else | |
1132 t->value = TOKnot; // ! | |
1133 return; | |
1134 | |
1135 case '=': | |
1136 p++; | |
1137 if (*p == '=') | |
1138 { p++; | |
1139 if (*p == '=' && global.params.Dversion == 1) | |
1140 { p++; | |
1141 t->value = TOKidentity; // === | |
1142 } | |
1143 else | |
1144 t->value = TOKequal; // == | |
1145 } | |
1146 else | |
1147 t->value = TOKassign; // = | |
1148 return; | |
1149 | |
1150 case '~': | |
1151 p++; | |
1152 if (*p == '=') | |
1153 { p++; | |
1154 t->value = TOKcatass; // ~= | |
1155 } | |
1156 else | |
1157 t->value = TOKtilde; // ~ | |
1158 return; | |
1159 | |
1160 #define SINGLE(c,tok) case c: p++; t->value = tok; return; | |
1161 | |
1162 SINGLE('(', TOKlparen) | |
1163 SINGLE(')', TOKrparen) | |
1164 SINGLE('[', TOKlbracket) | |
1165 SINGLE(']', TOKrbracket) | |
1166 SINGLE('{', TOKlcurly) | |
1167 SINGLE('}', TOKrcurly) | |
1168 SINGLE('?', TOKquestion) | |
1169 SINGLE(',', TOKcomma) | |
1170 SINGLE(';', TOKsemicolon) | |
1171 SINGLE(':', TOKcolon) | |
1172 SINGLE('$', TOKdollar) | |
1173 | |
1174 #undef SINGLE | |
1175 | |
1176 #define DOUBLE(c1,tok1,c2,tok2) \ | |
1177 case c1: \ | |
1178 p++; \ | |
1179 if (*p == c2) \ | |
1180 { p++; \ | |
1181 t->value = tok2; \ | |
1182 } \ | |
1183 else \ | |
1184 t->value = tok1; \ | |
1185 return; | |
1186 | |
1187 DOUBLE('*', TOKmul, '=', TOKmulass) | |
1188 DOUBLE('%', TOKmod, '=', TOKmodass) | |
1189 DOUBLE('^', TOKxor, '=', TOKxorass) | |
1190 | |
1191 #undef DOUBLE | |
1192 | |
1193 case '#': | |
1194 p++; | |
1195 pragma(); | |
1196 continue; | |
1197 | |
1198 default: | |
1199 { unsigned char c = *p; | |
1200 | |
1201 if (c & 0x80) | |
1202 { unsigned u = decodeUTF(); | |
1203 | |
1204 // Check for start of unicode identifier | |
1205 if (isUniAlpha(u)) | |
1206 goto case_ident; | |
1207 | |
1208 if (u == PS || u == LS) | |
1209 { | |
1210 loc.linnum++; | |
1211 p++; | |
1212 continue; | |
1213 } | |
1214 } | |
1215 if (isprint(c)) | |
1216 error("unsupported char '%c'", c); | |
1217 else | |
1218 error("unsupported char 0x%02x", c); | |
1219 p++; | |
1220 continue; | |
1221 } | |
1222 } | |
1223 } | |
1224 } | |
1225 | |
1226 /******************************************* | |
1227 * Parse escape sequence. | |
1228 */ | |
1229 | |
1230 unsigned Lexer::escapeSequence() | |
1231 { unsigned c; | |
1232 int n; | |
1233 int ndigits; | |
1234 | |
1235 c = *p; | |
1236 switch (c) | |
1237 { | |
1238 case '\'': | |
1239 case '"': | |
1240 case '?': | |
1241 case '\\': | |
1242 Lconsume: | |
1243 p++; | |
1244 break; | |
1245 | |
1246 case 'a': c = 7; goto Lconsume; | |
1247 case 'b': c = 8; goto Lconsume; | |
1248 case 'f': c = 12; goto Lconsume; | |
1249 case 'n': c = 10; goto Lconsume; | |
1250 case 'r': c = 13; goto Lconsume; | |
1251 case 't': c = 9; goto Lconsume; | |
1252 case 'v': c = 11; goto Lconsume; | |
1253 | |
1254 case 'u': | |
1255 ndigits = 4; | |
1256 goto Lhex; | |
1257 case 'U': | |
1258 ndigits = 8; | |
1259 goto Lhex; | |
1260 case 'x': | |
1261 ndigits = 2; | |
1262 Lhex: | |
1263 p++; | |
1264 c = *p; | |
1265 if (ishex(c)) | |
1266 { unsigned v; | |
1267 | |
1268 n = 0; | |
1269 v = 0; | |
1270 while (1) | |
1271 { | |
1272 if (isdigit(c)) | |
1273 c -= '0'; | |
1274 else if (islower(c)) | |
1275 c -= 'a' - 10; | |
1276 else | |
1277 c -= 'A' - 10; | |
1278 v = v * 16 + c; | |
1279 c = *++p; | |
1280 if (++n == ndigits) | |
1281 break; | |
1282 if (!ishex(c)) | |
1283 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); | |
1284 break; | |
1285 } | |
1286 } | |
1287 if (ndigits != 2 && !utf_isValidDchar(v)) | |
1288 error("invalid UTF character \\U%08x", v); | |
1289 c = v; | |
1290 } | |
1291 else | |
1292 error("undefined escape hex sequence \\%c\n",c); | |
1293 break; | |
1294 | |
1295 case '&': // named character entity | |
1296 for (unsigned char *idstart = ++p; 1; p++) | |
1297 { | |
1298 switch (*p) | |
1299 { | |
1300 case ';': | |
1301 c = HtmlNamedEntity(idstart, p - idstart); | |
1302 if (c == ~0) | |
1303 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart); | |
1304 c = ' '; | |
1305 } | |
1306 p++; | |
1307 break; | |
1308 | |
1309 default: | |
1310 if (isalpha(*p) || | |
1311 (p != idstart + 1 && isdigit(*p))) | |
1312 continue; | |
1313 error("unterminated named entity"); | |
1314 break; | |
1315 } | |
1316 break; | |
1317 } | |
1318 break; | |
1319 | |
1320 case 0: | |
1321 case 0x1A: // end of file | |
1322 c = '\\'; | |
1323 break; | |
1324 | |
1325 default: | |
1326 if (isoctal(c)) | |
1327 { unsigned v; | |
1328 | |
1329 n = 0; | |
1330 v = 0; | |
1331 do | |
1332 { | |
1333 v = v * 8 + (c - '0'); | |
1334 c = *++p; | |
1335 } while (++n < 3 && isoctal(c)); | |
1336 c = v; | |
1337 if (c > 0xFF) | |
1338 error("0%03o is larger than a byte", c); | |
1339 } | |
1340 else | |
1341 error("undefined escape sequence \\%c\n",c); | |
1342 break; | |
1343 } | |
1344 return c; | |
1345 } | |
1346 | |
1347 /************************************** | |
1348 */ | |
1349 | |
1350 TOK Lexer::wysiwygStringConstant(Token *t, int tc) | |
1351 { unsigned c; | |
1352 Loc start = loc; | |
1353 | |
1354 p++; | |
1355 stringbuffer.reset(); | |
1356 while (1) | |
1357 { | |
1358 c = *p++; | |
1359 switch (c) | |
1360 { | |
1361 case '\n': | |
1362 loc.linnum++; | |
1363 break; | |
1364 | |
1365 case '\r': | |
1366 if (*p == '\n') | |
1367 continue; // ignore | |
1368 c = '\n'; // treat EndOfLine as \n character | |
1369 loc.linnum++; | |
1370 break; | |
1371 | |
1372 case 0: | |
1373 case 0x1A: | |
1374 error("unterminated string constant starting at %s", start.toChars()); | |
1375 t->ustring = (unsigned char *)""; | |
1376 t->len = 0; | |
1377 t->postfix = 0; | |
1378 return TOKstring; | |
1379 | |
1380 case '"': | |
1381 case '`': | |
1382 if (c == tc) | |
1383 { | |
1384 t->len = stringbuffer.offset; | |
1385 stringbuffer.writeByte(0); | |
1386 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1387 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1388 stringPostfix(t); | |
1389 return TOKstring; | |
1390 } | |
1391 break; | |
1392 | |
1393 default: | |
1394 if (c & 0x80) | |
1395 { p--; | |
1396 unsigned u = decodeUTF(); | |
1397 p++; | |
1398 if (u == PS || u == LS) | |
1399 loc.linnum++; | |
1400 stringbuffer.writeUTF8(u); | |
1401 continue; | |
1402 } | |
1403 break; | |
1404 } | |
1405 stringbuffer.writeByte(c); | |
1406 } | |
1407 } | |
1408 | |
1409 /************************************** | |
1410 * Lex hex strings: | |
1411 * x"0A ae 34FE BD" | |
1412 */ | |
1413 | |
1414 TOK Lexer::hexStringConstant(Token *t) | |
1415 { unsigned c; | |
1416 Loc start = loc; | |
1417 unsigned n = 0; | |
1418 unsigned v; | |
1419 | |
1420 p++; | |
1421 stringbuffer.reset(); | |
1422 while (1) | |
1423 { | |
1424 c = *p++; | |
1425 switch (c) | |
1426 { | |
1427 case ' ': | |
1428 case '\t': | |
1429 case '\v': | |
1430 case '\f': | |
1431 continue; // skip white space | |
1432 | |
1433 case '\r': | |
1434 if (*p == '\n') | |
1435 continue; // ignore | |
1436 // Treat isolated '\r' as if it were a '\n' | |
1437 case '\n': | |
1438 loc.linnum++; | |
1439 continue; | |
1440 | |
1441 case 0: | |
1442 case 0x1A: | |
1443 error("unterminated string constant starting at %s", start.toChars()); | |
1444 t->ustring = (unsigned char *)""; | |
1445 t->len = 0; | |
1446 t->postfix = 0; | |
1447 return TOKstring; | |
1448 | |
1449 case '"': | |
1450 if (n & 1) | |
1451 { error("odd number (%d) of hex characters in hex string", n); | |
1452 stringbuffer.writeByte(v); | |
1453 } | |
1454 t->len = stringbuffer.offset; | |
1455 stringbuffer.writeByte(0); | |
1456 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1457 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1458 stringPostfix(t); | |
1459 return TOKstring; | |
1460 | |
1461 default: | |
1462 if (c >= '0' && c <= '9') | |
1463 c -= '0'; | |
1464 else if (c >= 'a' && c <= 'f') | |
1465 c -= 'a' - 10; | |
1466 else if (c >= 'A' && c <= 'F') | |
1467 c -= 'A' - 10; | |
1468 else if (c & 0x80) | |
1469 { p--; | |
1470 unsigned u = decodeUTF(); | |
1471 p++; | |
1472 if (u == PS || u == LS) | |
1473 loc.linnum++; | |
1474 else | |
1475 error("non-hex character \\u%x", u); | |
1476 } | |
1477 else | |
1478 error("non-hex character '%c'", c); | |
1479 if (n & 1) | |
1480 { v = (v << 4) | c; | |
1481 stringbuffer.writeByte(v); | |
1482 } | |
1483 else | |
1484 v = c; | |
1485 n++; | |
1486 break; | |
1487 } | |
1488 } | |
1489 } | |
1490 | |
1491 | |
336 | 1492 #if DMDV2 |
159 | 1493 /************************************** |
1494 * Lex delimited strings: | |
1495 * q"(foo(xxx))" // "foo(xxx)" | |
1496 * q"[foo(]" // "foo(" | |
1497 * q"/foo]/" // "foo]" | |
1498 * q"HERE | |
1499 * foo | |
1500 * HERE" // "foo\n" | |
1501 * Input: | |
1502 * p is on the " | |
1503 */ | |
1504 | |
1505 TOK Lexer::delimitedStringConstant(Token *t) | |
1506 { unsigned c; | |
1507 Loc start = loc; | |
1508 unsigned delimleft = 0; | |
1509 unsigned delimright = 0; | |
1510 unsigned nest = 1; | |
1511 unsigned nestcount; | |
1512 Identifier *hereid = NULL; | |
1513 unsigned blankrol = 0; | |
1514 unsigned startline = 0; | |
1515 | |
1516 p++; | |
1517 stringbuffer.reset(); | |
1518 while (1) | |
1519 { | |
1520 c = *p++; | |
1521 //printf("c = '%c'\n", c); | |
1522 switch (c) | |
1523 { | |
1524 case '\n': | |
1525 Lnextline: | |
1526 loc.linnum++; | |
1527 startline = 1; | |
1528 if (blankrol) | |
1529 { blankrol = 0; | |
1530 continue; | |
1531 } | |
1532 if (hereid) | |
1533 { | |
1534 stringbuffer.writeUTF8(c); | |
1535 continue; | |
1536 } | |
1537 break; | |
1538 | |
1539 case '\r': | |
1540 if (*p == '\n') | |
1541 continue; // ignore | |
1542 c = '\n'; // treat EndOfLine as \n character | |
1543 goto Lnextline; | |
1544 | |
1545 case 0: | |
1546 case 0x1A: | |
1547 goto Lerror; | |
1548 | |
1549 default: | |
1550 if (c & 0x80) | |
1551 { p--; | |
1552 c = decodeUTF(); | |
1553 p++; | |
1554 if (c == PS || c == LS) | |
1555 goto Lnextline; | |
1556 } | |
1557 break; | |
1558 } | |
1559 if (delimleft == 0) | |
1560 { delimleft = c; | |
1561 nest = 1; | |
1562 nestcount = 1; | |
1563 if (c == '(') | |
1564 delimright = ')'; | |
1565 else if (c == '{') | |
1566 delimright = '}'; | |
1567 else if (c == '[') | |
1568 delimright = ']'; | |
1569 else if (c == '<') | |
1570 delimright = '>'; | |
1571 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) | |
1572 { // Start of identifier; must be a heredoc | |
1573 Token t; | |
1574 p--; | |
1575 scan(&t); // read in heredoc identifier | |
1576 if (t.value != TOKidentifier) | |
1577 { error("identifier expected for heredoc, not %s", t.toChars()); | |
1578 delimright = c; | |
1579 } | |
1580 else | |
1581 { hereid = t.ident; | |
1582 //printf("hereid = '%s'\n", hereid->toChars()); | |
1583 blankrol = 1; | |
1584 } | |
1585 nest = 0; | |
1586 } | |
1587 else | |
1588 { delimright = c; | |
1589 nest = 0; | |
1590 } | |
1591 } | |
1592 else | |
1593 { | |
1594 if (blankrol) | |
1595 { error("heredoc rest of line should be blank"); | |
1596 blankrol = 0; | |
1597 continue; | |
1598 } | |
1599 if (nest == 1) | |
1600 { | |
1601 if (c == delimleft) | |
1602 nestcount++; | |
1603 else if (c == delimright) | |
1604 { nestcount--; | |
1605 if (nestcount == 0) | |
1606 goto Ldone; | |
1607 } | |
1608 } | |
1609 else if (c == delimright) | |
1610 goto Ldone; | |
1611 if (startline && isalpha(c)) | |
1612 { Token t; | |
1613 unsigned char *psave = p; | |
1614 p--; | |
1615 scan(&t); // read in possible heredoc identifier | |
1616 //printf("endid = '%s'\n", t.ident->toChars()); | |
1617 if (t.value == TOKidentifier && t.ident->equals(hereid)) | |
1618 { /* should check that rest of line is blank | |
1619 */ | |
1620 goto Ldone; | |
1621 } | |
1622 p = psave; | |
1623 } | |
1624 stringbuffer.writeUTF8(c); | |
1625 startline = 0; | |
1626 } | |
1627 } | |
1628 | |
1629 Ldone: | |
1630 if (*p == '"') | |
1631 p++; | |
1632 else | |
1633 error("delimited string must end in %c\"", delimright); | |
1634 t->len = stringbuffer.offset; | |
1635 stringbuffer.writeByte(0); | |
1636 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1637 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1638 stringPostfix(t); | |
1639 return TOKstring; | |
1640 | |
1641 Lerror: | |
1642 error("unterminated string constant starting at %s", start.toChars()); | |
1643 t->ustring = (unsigned char *)""; | |
1644 t->len = 0; | |
1645 t->postfix = 0; | |
1646 return TOKstring; | |
1647 } | |
1648 | |
1649 /************************************** | |
1650 * Lex delimited strings: | |
1651 * q{ foo(xxx) } // " foo(xxx) " | |
1652 * q{foo(} // "foo(" | |
1653 * q{{foo}"}"} // "{foo}"}"" | |
1654 * Input: | |
1655 * p is on the q | |
1656 */ | |
1657 | |
1658 TOK Lexer::tokenStringConstant(Token *t) | |
1659 { | |
1660 unsigned nest = 1; | |
1661 Loc start = loc; | |
1662 unsigned char *pstart = ++p; | |
1663 | |
1664 while (1) | |
1665 { Token tok; | |
1666 | |
1667 scan(&tok); | |
1668 switch (tok.value) | |
1669 { | |
1670 case TOKlcurly: | |
1671 nest++; | |
1672 continue; | |
1673 | |
1674 case TOKrcurly: | |
1675 if (--nest == 0) | |
1676 goto Ldone; | |
1677 continue; | |
1678 | |
1679 case TOKeof: | |
1680 goto Lerror; | |
1681 | |
1682 default: | |
1683 continue; | |
1684 } | |
1685 } | |
1686 | |
1687 Ldone: | |
1688 t->len = p - 1 - pstart; | |
1689 t->ustring = (unsigned char *)mem.malloc(t->len + 1); | |
1690 memcpy(t->ustring, pstart, t->len); | |
1691 t->ustring[t->len] = 0; | |
1692 stringPostfix(t); | |
1693 return TOKstring; | |
1694 | |
1695 Lerror: | |
1696 error("unterminated token string constant starting at %s", start.toChars()); | |
1697 t->ustring = (unsigned char *)""; | |
1698 t->len = 0; | |
1699 t->postfix = 0; | |
1700 return TOKstring; | |
1701 } | |
1702 | |
1703 #endif | |
1704 | |
1705 | |
1706 /************************************** | |
1707 */ | |
1708 | |
1709 TOK Lexer::escapeStringConstant(Token *t, int wide) | |
1710 { unsigned c; | |
1711 Loc start = loc; | |
1712 | |
1713 p++; | |
1714 stringbuffer.reset(); | |
1715 while (1) | |
1716 { | |
1717 c = *p++; | |
1718 switch (c) | |
1719 { | |
1720 case '\\': | |
1721 switch (*p) | |
1722 { | |
1723 case 'u': | |
1724 case 'U': | |
1725 case '&': | |
1726 c = escapeSequence(); | |
1727 stringbuffer.writeUTF8(c); | |
1728 continue; | |
1729 | |
1730 default: | |
1731 c = escapeSequence(); | |
1732 break; | |
1733 } | |
1734 break; | |
1735 | |
1736 case '\n': | |
1737 loc.linnum++; | |
1738 break; | |
1739 | |
1740 case '\r': | |
1741 if (*p == '\n') | |
1742 continue; // ignore | |
1743 c = '\n'; // treat EndOfLine as \n character | |
1744 loc.linnum++; | |
1745 break; | |
1746 | |
1747 case '"': | |
1748 t->len = stringbuffer.offset; | |
1749 stringbuffer.writeByte(0); | |
1750 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1751 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1752 stringPostfix(t); | |
1753 return TOKstring; | |
1754 | |
1755 case 0: | |
1756 case 0x1A: | |
1757 p--; | |
1758 error("unterminated string constant starting at %s", start.toChars()); | |
1759 t->ustring = (unsigned char *)""; | |
1760 t->len = 0; | |
1761 t->postfix = 0; | |
1762 return TOKstring; | |
1763 | |
1764 default: | |
1765 if (c & 0x80) | |
1766 { | |
1767 p--; | |
1768 c = decodeUTF(); | |
1769 if (c == LS || c == PS) | |
1770 { c = '\n'; | |
1771 loc.linnum++; | |
1772 } | |
1773 p++; | |
1774 stringbuffer.writeUTF8(c); | |
1775 continue; | |
1776 } | |
1777 break; | |
1778 } | |
1779 stringbuffer.writeByte(c); | |
1780 } | |
1781 } | |
1782 | |
1783 /************************************** | |
1784 */ | |
1785 | |
1786 TOK Lexer::charConstant(Token *t, int wide) | |
1787 { | |
1788 unsigned c; | |
1789 TOK tk = TOKcharv; | |
1790 | |
1791 //printf("Lexer::charConstant\n"); | |
1792 p++; | |
1793 c = *p++; | |
1794 switch (c) | |
1795 { | |
1796 case '\\': | |
1797 switch (*p) | |
1798 { | |
1799 case 'u': | |
1800 t->uns64value = escapeSequence(); | |
1801 tk = TOKwcharv; | |
1802 break; | |
1803 | |
1804 case 'U': | |
1805 case '&': | |
1806 t->uns64value = escapeSequence(); | |
1807 tk = TOKdcharv; | |
1808 break; | |
1809 | |
1810 default: | |
1811 t->uns64value = escapeSequence(); | |
1812 break; | |
1813 } | |
1814 break; | |
1815 | |
1816 case '\n': | |
1817 L1: | |
1818 loc.linnum++; | |
1819 case '\r': | |
1820 case 0: | |
1821 case 0x1A: | |
1822 case '\'': | |
1823 error("unterminated character constant"); | |
1824 return tk; | |
1825 | |
1826 default: | |
1827 if (c & 0x80) | |
1828 { | |
1829 p--; | |
1830 c = decodeUTF(); | |
1831 p++; | |
1832 if (c == LS || c == PS) | |
1833 goto L1; | |
1834 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) | |
1835 tk = TOKwcharv; | |
1836 else | |
1837 tk = TOKdcharv; | |
1838 } | |
1839 t->uns64value = c; | |
1840 break; | |
1841 } | |
1842 | |
1843 if (*p != '\'') | |
1844 { error("unterminated character constant"); | |
1845 return tk; | |
1846 } | |
1847 p++; | |
1848 return tk; | |
1849 } | |
1850 | |
1851 /*************************************** | |
1852 * Get postfix of string literal. | |
1853 */ | |
1854 | |
1855 void Lexer::stringPostfix(Token *t) | |
1856 { | |
1857 switch (*p) | |
1858 { | |
1859 case 'c': | |
1860 case 'w': | |
1861 case 'd': | |
1862 t->postfix = *p; | |
1863 p++; | |
1864 break; | |
1865 | |
1866 default: | |
1867 t->postfix = 0; | |
1868 break; | |
1869 } | |
1870 } | |
1871 | |
1872 /*************************************** | |
1873 * Read \u or \U unicode sequence | |
1874 * Input: | |
1875 * u 'u' or 'U' | |
1876 */ | |
1877 | |
1878 #if 0 | |
1879 unsigned Lexer::wchar(unsigned u) | |
1880 { | |
1881 unsigned value; | |
1882 unsigned n; | |
1883 unsigned char c; | |
1884 unsigned nchars; | |
1885 | |
1886 nchars = (u == 'U') ? 8 : 4; | |
1887 value = 0; | |
1888 for (n = 0; 1; n++) | |
1889 { | |
1890 ++p; | |
1891 if (n == nchars) | |
1892 break; | |
1893 c = *p; | |
1894 if (!ishex(c)) | |
1895 { error("\\%c sequence must be followed by %d hex characters", u, nchars); | |
1896 break; | |
1897 } | |
1898 if (isdigit(c)) | |
1899 c -= '0'; | |
1900 else if (islower(c)) | |
1901 c -= 'a' - 10; | |
1902 else | |
1903 c -= 'A' - 10; | |
1904 value <<= 4; | |
1905 value |= c; | |
1906 } | |
1907 return value; | |
1908 } | |
1909 #endif | |
1910 | |
1911 /************************************** | |
1912 * Read in a number. | |
1913 * If it's an integer, store it in tok.TKutok.Vlong. | |
1914 * integers can be decimal, octal or hex | |
1915 * Handle the suffixes U, UL, LU, L, etc. | |
1916 * If it's double, store it in tok.TKutok.Vdouble. | |
1917 * Returns: | |
1918 * TKnum | |
1919 * TKdouble,... | |
1920 */ | |
1921 | |
1922 TOK Lexer::number(Token *t) | |
1923 { | |
1924 // We use a state machine to collect numbers | |
1925 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, | |
1926 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, | |
1927 STATE_hexh, STATE_error }; | |
1928 enum STATE state; | |
1929 | |
1930 enum FLAGS | |
1931 { FLAGS_decimal = 1, // decimal | |
1932 FLAGS_unsigned = 2, // u or U suffix | |
1933 FLAGS_long = 4, // l or L suffix | |
1934 }; | |
1935 enum FLAGS flags = FLAGS_decimal; | |
1936 | |
1937 int i; | |
1938 int base; | |
1939 unsigned c; | |
1940 unsigned char *start; | |
1941 TOK result; | |
1942 | |
1943 //printf("Lexer::number()\n"); | |
1944 state = STATE_initial; | |
1945 base = 0; | |
1946 stringbuffer.reset(); | |
1947 start = p; | |
1948 while (1) | |
1949 { | |
1950 c = *p; | |
1951 switch (state) | |
1952 { | |
1953 case STATE_initial: // opening state | |
1954 if (c == '0') | |
1955 state = STATE_0; | |
1956 else | |
1957 state = STATE_decimal; | |
1958 break; | |
1959 | |
1960 case STATE_0: | |
1961 flags = (FLAGS) (flags & ~FLAGS_decimal); | |
1962 switch (c) | |
1963 { | |
1964 #if ZEROH | |
1965 case 'H': // 0h | |
1966 case 'h': | |
1967 goto hexh; | |
1968 #endif | |
1969 case 'X': | |
1970 case 'x': | |
1971 state = STATE_hex0; | |
1972 break; | |
1973 | |
1974 case '.': | |
1975 if (p[1] == '.') // .. is a separate token | |
1976 goto done; | |
1977 case 'i': | |
1978 case 'f': | |
1979 case 'F': | |
1980 goto real; | |
1981 #if ZEROH | |
1982 case 'E': | |
1983 case 'e': | |
1984 goto case_hex; | |
1985 #endif | |
1986 case 'B': | |
1987 case 'b': | |
1988 state = STATE_binary0; | |
1989 break; | |
1990 | |
1991 case '0': case '1': case '2': case '3': | |
1992 case '4': case '5': case '6': case '7': | |
1993 state = STATE_octal; | |
1994 break; | |
1995 | |
1996 #if ZEROH | |
1997 case '8': case '9': case 'A': | |
1998 case 'C': case 'D': case 'F': | |
1999 case 'a': case 'c': case 'd': case 'f': | |
2000 case_hex: | |
2001 state = STATE_hexh; | |
2002 break; | |
2003 #endif | |
2004 case '_': | |
2005 state = STATE_octal; | |
2006 p++; | |
2007 continue; | |
2008 | |
2009 case 'L': | |
2010 if (p[1] == 'i') | |
2011 goto real; | |
2012 goto done; | |
2013 | |
2014 default: | |
2015 goto done; | |
2016 } | |
2017 break; | |
2018 | |
2019 case STATE_decimal: // reading decimal number | |
2020 if (!isdigit(c)) | |
2021 { | |
2022 #if ZEROH | |
2023 if (ishex(c) | |
2024 || c == 'H' || c == 'h' | |
2025 ) | |
2026 goto hexh; | |
2027 #endif | |
2028 if (c == '_') // ignore embedded _ | |
2029 { p++; | |
2030 continue; | |
2031 } | |
2032 if (c == '.' && p[1] != '.') | |
2033 goto real; | |
2034 else if (c == 'i' || c == 'f' || c == 'F' || | |
2035 c == 'e' || c == 'E') | |
2036 { | |
2037 real: // It's a real number. Back up and rescan as a real | |
2038 p = start; | |
2039 return inreal(t); | |
2040 } | |
2041 else if (c == 'L' && p[1] == 'i') | |
2042 goto real; | |
2043 goto done; | |
2044 } | |
2045 break; | |
2046 | |
2047 case STATE_hex0: // reading hex number | |
2048 case STATE_hex: | |
2049 if (!ishex(c)) | |
2050 { | |
2051 if (c == '_') // ignore embedded _ | |
2052 { p++; | |
2053 continue; | |
2054 } | |
2055 if (c == '.' && p[1] != '.') | |
2056 goto real; | |
2057 if (c == 'P' || c == 'p' || c == 'i') | |
2058 goto real; | |
2059 if (state == STATE_hex0) | |
2060 error("Hex digit expected, not '%c'", c); | |
2061 goto done; | |
2062 } | |
2063 state = STATE_hex; | |
2064 break; | |
2065 | |
2066 #if ZEROH | |
2067 hexh: | |
2068 state = STATE_hexh; | |
2069 case STATE_hexh: // parse numbers like 0FFh | |
2070 if (!ishex(c)) | |
2071 { | |
2072 if (c == 'H' || c == 'h') | |
2073 { | |
2074 p++; | |
2075 base = 16; | |
2076 goto done; | |
2077 } | |
2078 else | |
2079 { | |
2080 // Check for something like 1E3 or 0E24 | |
2081 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) || | |
2082 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset)) | |
2083 goto real; | |
2084 error("Hex digit expected, not '%c'", c); | |
2085 goto done; | |
2086 } | |
2087 } | |
2088 break; | |
2089 #endif | |
2090 | |
2091 case STATE_octal: // reading octal number | |
2092 case STATE_octale: // reading octal number with non-octal digits | |
2093 if (!isoctal(c)) | |
2094 { | |
2095 #if ZEROH | |
2096 if (ishex(c) | |
2097 || c == 'H' || c == 'h' | |
2098 ) | |
2099 goto hexh; | |
2100 #endif | |
2101 if (c == '_') // ignore embedded _ | |
2102 { p++; | |
2103 continue; | |
2104 } | |
2105 if (c == '.' && p[1] != '.') | |
2106 goto real; | |
2107 if (c == 'i') | |
2108 goto real; | |
2109 if (isdigit(c)) | |
2110 { | |
2111 state = STATE_octale; | |
2112 } | |
2113 else | |
2114 goto done; | |
2115 } | |
2116 break; | |
2117 | |
2118 case STATE_binary0: // starting binary number | |
2119 case STATE_binary: // reading binary number | |
2120 if (c != '0' && c != '1') | |
2121 { | |
2122 #if ZEROH | |
2123 if (ishex(c) | |
2124 || c == 'H' || c == 'h' | |
2125 ) | |
2126 goto hexh; | |
2127 #endif | |
2128 if (c == '_') // ignore embedded _ | |
2129 { p++; | |
2130 continue; | |
2131 } | |
2132 if (state == STATE_binary0) | |
2133 { error("binary digit expected"); | |
2134 state = STATE_error; | |
2135 break; | |
2136 } | |
2137 else | |
2138 goto done; | |
2139 } | |
2140 state = STATE_binary; | |
2141 break; | |
2142 | |
2143 case STATE_error: // for error recovery | |
2144 if (!isdigit(c)) // scan until non-digit | |
2145 goto done; | |
2146 break; | |
2147 | |
2148 default: | |
2149 assert(0); | |
2150 } | |
2151 stringbuffer.writeByte(c); | |
2152 p++; | |
2153 } | |
2154 done: | |
2155 stringbuffer.writeByte(0); // terminate string | |
2156 if (state == STATE_octale) | |
2157 error("Octal digit expected"); | |
2158 | |
2159 uinteger_t n; // unsigned >=64 bit integer type | |
2160 | |
2161 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0)) | |
2162 n = stringbuffer.data[0] - '0'; | |
2163 else | |
2164 { | |
2165 // Convert string to integer | |
2166 #if __DMC__ | |
2167 errno = 0; | |
2168 n = strtoull((char *)stringbuffer.data,NULL,base); | |
2169 if (errno == ERANGE) | |
2170 error("integer overflow"); | |
2171 #else | |
2172 // Not everybody implements strtoull() | |
2173 char *p = (char *)stringbuffer.data; | |
2174 int r = 10, d; | |
2175 | |
2176 if (*p == '0') | |
2177 { | |
2178 if (p[1] == 'x' || p[1] == 'X') | |
2179 p += 2, r = 16; | |
2180 else if (p[1] == 'b' || p[1] == 'B') | |
2181 p += 2, r = 2; | |
2182 else if (isdigit(p[1])) | |
2183 p += 1, r = 8; | |
2184 } | |
2185 | |
2186 n = 0; | |
2187 while (1) | |
2188 { | |
2189 if (*p >= '0' && *p <= '9') | |
2190 d = *p - '0'; | |
2191 else if (*p >= 'a' && *p <= 'z') | |
2192 d = *p - 'a' + 10; | |
2193 else if (*p >= 'A' && *p <= 'Z') | |
2194 d = *p - 'A' + 10; | |
2195 else | |
2196 break; | |
2197 if (d >= r) | |
2198 break; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2199 uinteger_t n2 = n * r; |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2200 //printf("n2 / r = %llx, n = %llx\n", n2/r, n); |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2201 if (n2 / r != n || n2 + d < n) |
159 | 2202 { |
2203 error ("integer overflow"); | |
2204 break; | |
2205 } | |
2206 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2207 n = n2 + d; |
159 | 2208 p++; |
2209 } | |
2210 #endif | |
2211 if (sizeof(n) > 8 && | |
2212 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits | |
2213 error("integer overflow"); | |
2214 } | |
2215 | |
2216 // Parse trailing 'u', 'U', 'l' or 'L' in any combination | |
2217 while (1) | |
2218 { unsigned char f; | |
2219 | |
2220 switch (*p) | |
2221 { case 'U': | |
2222 case 'u': | |
2223 f = FLAGS_unsigned; | |
2224 goto L1; | |
2225 | |
2226 case 'l': | |
2227 if (1 || !global.params.useDeprecated) | |
2228 error("'l' suffix is deprecated, use 'L' instead"); | |
2229 case 'L': | |
2230 f = FLAGS_long; | |
2231 L1: | |
2232 p++; | |
2233 if (flags & f) | |
2234 error("unrecognized token"); | |
2235 flags = (FLAGS) (flags | f); | |
2236 continue; | |
2237 default: | |
2238 break; | |
2239 } | |
2240 break; | |
2241 } | |
2242 | |
2243 switch (flags) | |
2244 { | |
2245 case 0: | |
2246 /* Octal or Hexadecimal constant. | |
2247 * First that fits: int, uint, long, ulong | |
2248 */ | |
2249 if (n & 0x8000000000000000LL) | |
2250 result = TOKuns64v; | |
2251 else if (n & 0xFFFFFFFF00000000LL) | |
2252 result = TOKint64v; | |
2253 else if (n & 0x80000000) | |
2254 result = TOKuns32v; | |
2255 else | |
2256 result = TOKint32v; | |
2257 break; | |
2258 | |
2259 case FLAGS_decimal: | |
2260 /* First that fits: int, long, long long | |
2261 */ | |
2262 if (n & 0x8000000000000000LL) | |
2263 { error("signed integer overflow"); | |
2264 result = TOKuns64v; | |
2265 } | |
2266 else if (n & 0xFFFFFFFF80000000LL) | |
2267 result = TOKint64v; | |
2268 else | |
2269 result = TOKint32v; | |
2270 break; | |
2271 | |
2272 case FLAGS_unsigned: | |
2273 case FLAGS_decimal | FLAGS_unsigned: | |
2274 /* First that fits: uint, ulong | |
2275 */ | |
2276 if (n & 0xFFFFFFFF00000000LL) | |
2277 result = TOKuns64v; | |
2278 else | |
2279 result = TOKuns32v; | |
2280 break; | |
2281 | |
2282 case FLAGS_decimal | FLAGS_long: | |
2283 if (n & 0x8000000000000000LL) | |
2284 { error("signed integer overflow"); | |
2285 result = TOKuns64v; | |
2286 } | |
2287 else | |
2288 result = TOKint64v; | |
2289 break; | |
2290 | |
2291 case FLAGS_long: | |
2292 if (n & 0x8000000000000000LL) | |
2293 result = TOKuns64v; | |
2294 else | |
2295 result = TOKint64v; | |
2296 break; | |
2297 | |
2298 case FLAGS_unsigned | FLAGS_long: | |
2299 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: | |
2300 result = TOKuns64v; | |
2301 break; | |
2302 | |
2303 default: | |
2304 #ifdef DEBUG | |
2305 printf("%x\n",flags); | |
2306 #endif | |
2307 assert(0); | |
2308 } | |
2309 t->uns64value = n; | |
2310 return result; | |
2311 } | |
2312 | |
2313 /************************************** | |
2314 * Read in characters, converting them to real. | |
2315 * Bugs: | |
2316 * Exponent overflow not detected. | |
2317 * Too much requested precision is not detected. | |
2318 */ | |
2319 | |
2320 TOK Lexer::inreal(Token *t) | |
2321 #ifdef __DMC__ | |
2322 __in | |
2323 { | |
2324 assert(*p == '.' || isdigit(*p)); | |
2325 } | |
2326 __out (result) | |
2327 { | |
2328 switch (result) | |
2329 { | |
2330 case TOKfloat32v: | |
2331 case TOKfloat64v: | |
2332 case TOKfloat80v: | |
2333 case TOKimaginary32v: | |
2334 case TOKimaginary64v: | |
2335 case TOKimaginary80v: | |
2336 break; | |
2337 | |
2338 default: | |
2339 assert(0); | |
2340 } | |
2341 } | |
2342 __body | |
2343 #endif /* __DMC__ */ | |
2344 { int dblstate; | |
2345 unsigned c; | |
2346 char hex; // is this a hexadecimal-floating-constant? | |
2347 TOK result; | |
2348 | |
2349 //printf("Lexer::inreal()\n"); | |
2350 stringbuffer.reset(); | |
2351 dblstate = 0; | |
2352 hex = 0; | |
2353 Lnext: | |
2354 while (1) | |
2355 { | |
2356 // Get next char from input | |
2357 c = *p++; | |
2358 //printf("dblstate = %d, c = '%c'\n", dblstate, c); | |
2359 while (1) | |
2360 { | |
2361 switch (dblstate) | |
2362 { | |
2363 case 0: // opening state | |
2364 if (c == '0') | |
2365 dblstate = 9; | |
2366 else if (c == '.') | |
2367 dblstate = 3; | |
2368 else | |
2369 dblstate = 1; | |
2370 break; | |
2371 | |
2372 case 9: | |
2373 dblstate = 1; | |
2374 if (c == 'X' || c == 'x') | |
2375 { hex++; | |
2376 break; | |
2377 } | |
2378 case 1: // digits to left of . | |
2379 case 3: // digits to right of . | |
2380 case 7: // continuing exponent digits | |
2381 if (!isdigit(c) && !(hex && isxdigit(c))) | |
2382 { | |
2383 if (c == '_') | |
2384 goto Lnext; // ignore embedded '_' | |
2385 dblstate++; | |
2386 continue; | |
2387 } | |
2388 break; | |
2389 | |
2390 case 2: // no more digits to left of . | |
2391 if (c == '.') | |
2392 { dblstate++; | |
2393 break; | |
2394 } | |
2395 case 4: // no more digits to right of . | |
2396 if ((c == 'E' || c == 'e') || | |
2397 hex && (c == 'P' || c == 'p')) | |
2398 { dblstate = 5; | |
2399 hex = 0; // exponent is always decimal | |
2400 break; | |
2401 } | |
2402 if (hex) | |
2403 error("binary-exponent-part required"); | |
2404 goto done; | |
2405 | |
2406 case 5: // looking immediately to right of E | |
2407 dblstate++; | |
2408 if (c == '-' || c == '+') | |
2409 break; | |
2410 case 6: // 1st exponent digit expected | |
2411 if (!isdigit(c)) | |
2412 error("exponent expected"); | |
2413 dblstate++; | |
2414 break; | |
2415 | |
2416 case 8: // past end of exponent digits | |
2417 goto done; | |
2418 } | |
2419 break; | |
2420 } | |
2421 stringbuffer.writeByte(c); | |
2422 } | |
2423 done: | |
2424 p--; | |
2425 | |
2426 stringbuffer.writeByte(0); | |
2427 | |
2428 #if _WIN32 && __DMC__ | |
2429 char *save = __locale_decpoint; | |
2430 __locale_decpoint = "."; | |
2431 #endif | |
2432 #ifdef IN_GCC | |
2433 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble); | |
2434 #else | |
2435 t->float80value = strtold((char *)stringbuffer.data, NULL); | |
2436 #endif | |
2437 errno = 0; | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2438 float strtofres; |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2439 double strtodres; |
159 | 2440 switch (*p) |
2441 { | |
2442 case 'F': | |
2443 case 'f': | |
2444 #ifdef IN_GCC | |
2445 real_t::parse((char *)stringbuffer.data, real_t::Float); | |
2446 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2447 strtofres = strtof((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2448 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2449 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2450 strtofres != 0 && strtofres != HUGE_VALF && strtofres != -HUGE_VALF) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2451 errno = 0; |
159 | 2452 #endif |
2453 result = TOKfloat32v; | |
2454 p++; | |
2455 break; | |
2456 | |
2457 default: | |
2458 #ifdef IN_GCC | |
2459 real_t::parse((char *)stringbuffer.data, real_t::Double); | |
2460 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2461 strtodres = strtod((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2462 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2463 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2464 strtodres != 0 && strtodres != HUGE_VAL && strtodres != -HUGE_VAL) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2465 errno = 0; |
159 | 2466 #endif |
2467 result = TOKfloat64v; | |
2468 break; | |
2469 | |
2470 case 'l': | |
2471 if (!global.params.useDeprecated) | |
2472 error("'l' suffix is deprecated, use 'L' instead"); | |
2473 case 'L': | |
2474 result = TOKfloat80v; | |
2475 p++; | |
2476 break; | |
2477 } | |
2478 if (*p == 'i' || *p == 'I') | |
2479 { | |
2480 if (!global.params.useDeprecated && *p == 'I') | |
2481 error("'I' suffix is deprecated, use 'i' instead"); | |
2482 p++; | |
2483 switch (result) | |
2484 { | |
2485 case TOKfloat32v: | |
2486 result = TOKimaginary32v; | |
2487 break; | |
2488 case TOKfloat64v: | |
2489 result = TOKimaginary64v; | |
2490 break; | |
2491 case TOKfloat80v: | |
2492 result = TOKimaginary80v; | |
2493 break; | |
2494 } | |
2495 } | |
2496 #if _WIN32 && __DMC__ | |
2497 __locale_decpoint = save; | |
2498 #endif | |
2499 if (errno == ERANGE) | |
2500 error("number is not representable"); | |
2501 return result; | |
2502 } | |
2503 | |
2504 /********************************************* | |
2505 * Do pragma. | |
2506 * Currently, the only pragma supported is: | |
2507 * #line linnum [filespec] | |
2508 */ | |
2509 | |
2510 void Lexer::pragma() | |
2511 { | |
2512 Token tok; | |
2513 int linnum; | |
2514 char *filespec = NULL; | |
2515 Loc loc = this->loc; | |
2516 | |
2517 scan(&tok); | |
2518 if (tok.value != TOKidentifier || tok.ident != Id::line) | |
2519 goto Lerr; | |
2520 | |
2521 scan(&tok); | |
2522 if (tok.value == TOKint32v || tok.value == TOKint64v) | |
2523 linnum = tok.uns64value - 1; | |
2524 else | |
2525 goto Lerr; | |
2526 | |
2527 while (1) | |
2528 { | |
2529 switch (*p) | |
2530 { | |
2531 case 0: | |
2532 case 0x1A: | |
2533 case '\n': | |
2534 Lnewline: | |
2535 this->loc.linnum = linnum; | |
2536 if (filespec) | |
2537 this->loc.filename = filespec; | |
2538 return; | |
2539 | |
2540 case '\r': | |
2541 p++; | |
2542 if (*p != '\n') | |
2543 { p--; | |
2544 goto Lnewline; | |
2545 } | |
2546 continue; | |
2547 | |
2548 case ' ': | |
2549 case '\t': | |
2550 case '\v': | |
2551 case '\f': | |
2552 p++; | |
2553 continue; // skip white space | |
2554 | |
2555 case '_': | |
2556 if (mod && memcmp(p, "__FILE__", 8) == 0) | |
2557 { | |
2558 p += 8; | |
2559 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars()); | |
2560 } | |
2561 continue; | |
2562 | |
2563 case '"': | |
2564 if (filespec) | |
2565 goto Lerr; | |
2566 stringbuffer.reset(); | |
2567 p++; | |
2568 while (1) | |
2569 { unsigned c; | |
2570 | |
2571 c = *p; | |
2572 switch (c) | |
2573 { | |
2574 case '\n': | |
2575 case '\r': | |
2576 case 0: | |
2577 case 0x1A: | |
2578 goto Lerr; | |
2579 | |
2580 case '"': | |
2581 stringbuffer.writeByte(0); | |
2582 filespec = mem.strdup((char *)stringbuffer.data); | |
2583 p++; | |
2584 break; | |
2585 | |
2586 default: | |
2587 if (c & 0x80) | |
2588 { unsigned u = decodeUTF(); | |
2589 if (u == PS || u == LS) | |
2590 goto Lerr; | |
2591 } | |
2592 stringbuffer.writeByte(c); | |
2593 p++; | |
2594 continue; | |
2595 } | |
2596 break; | |
2597 } | |
2598 continue; | |
2599 | |
2600 default: | |
2601 if (*p & 0x80) | |
2602 { unsigned u = decodeUTF(); | |
2603 if (u == PS || u == LS) | |
2604 goto Lnewline; | |
2605 } | |
2606 goto Lerr; | |
2607 } | |
2608 } | |
2609 | |
2610 Lerr: | |
2611 error(loc, "#line integer [\"filespec\"]\\n expected"); | |
2612 } | |
2613 | |
2614 | |
2615 /******************************************** | |
2616 * Decode UTF character. | |
2617 * Issue error messages for invalid sequences. | |
2618 * Return decoded character, advance p to last character in UTF sequence. | |
2619 */ | |
2620 | |
2621 unsigned Lexer::decodeUTF() | |
2622 { | |
2623 dchar_t u; | |
2624 unsigned char c; | |
2625 unsigned char *s = p; | |
2626 size_t len; | |
2627 size_t idx; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2628 const char *msg; |
159 | 2629 |
2630 c = *s; | |
2631 assert(c & 0x80); | |
2632 | |
2633 // Check length of remaining string up to 6 UTF-8 characters | |
2634 for (len = 1; len < 6 && s[len]; len++) | |
2635 ; | |
2636 | |
2637 idx = 0; | |
2638 msg = utf_decodeChar(s, len, &idx, &u); | |
2639 p += idx - 1; | |
2640 if (msg) | |
2641 { | |
2642 error("%s", msg); | |
2643 } | |
2644 return u; | |
2645 } | |
2646 | |
2647 | |
2648 /*************************************************** | |
2649 * Parse doc comment embedded between t->ptr and p. | |
2650 * Remove trailing blanks and tabs from lines. | |
2651 * Replace all newlines with \n. | |
2652 * Remove leading comment character from each line. | |
2653 * Decide if it's a lineComment or a blockComment. | |
2654 * Append to previous one for this token. | |
2655 */ | |
2656 | |
2657 void Lexer::getDocComment(Token *t, unsigned lineComment) | |
2658 { | |
2659 OutBuffer buf; | |
2660 unsigned char ct = t->ptr[2]; | |
2661 unsigned char *q = t->ptr + 3; // start of comment text | |
2662 int linestart = 0; | |
2663 | |
2664 unsigned char *qend = p; | |
2665 if (ct == '*' || ct == '+') | |
2666 qend -= 2; | |
2667 | |
2668 /* Scan over initial row of ****'s or ++++'s or ////'s | |
2669 */ | |
2670 for (; q < qend; q++) | |
2671 { | |
2672 if (*q != ct) | |
2673 break; | |
2674 } | |
2675 | |
2676 /* Remove trailing row of ****'s or ++++'s | |
2677 */ | |
2678 if (ct != '/') | |
2679 { | |
2680 for (; q < qend; qend--) | |
2681 { | |
2682 if (qend[-1] != ct) | |
2683 break; | |
2684 } | |
2685 } | |
2686 | |
2687 for (; q < qend; q++) | |
2688 { | |
2689 unsigned char c = *q; | |
2690 | |
2691 switch (c) | |
2692 { | |
2693 case '*': | |
2694 case '+': | |
2695 if (linestart && c == ct) | |
2696 { linestart = 0; | |
2697 /* Trim preceding whitespace up to preceding \n | |
2698 */ | |
2699 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2700 buf.offset--; | |
2701 continue; | |
2702 } | |
2703 break; | |
2704 | |
2705 case ' ': | |
2706 case '\t': | |
2707 break; | |
2708 | |
2709 case '\r': | |
2710 if (q[1] == '\n') | |
2711 continue; // skip the \r | |
2712 goto Lnewline; | |
2713 | |
2714 default: | |
2715 if (c == 226) | |
2716 { | |
2717 // If LS or PS | |
2718 if (q[1] == 128 && | |
2719 (q[2] == 168 || q[2] == 169)) | |
2720 { | |
2721 q += 2; | |
2722 goto Lnewline; | |
2723 } | |
2724 } | |
2725 linestart = 0; | |
2726 break; | |
2727 | |
2728 Lnewline: | |
2729 c = '\n'; // replace all newlines with \n | |
2730 case '\n': | |
2731 linestart = 1; | |
2732 | |
2733 /* Trim trailing whitespace | |
2734 */ | |
2735 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2736 buf.offset--; | |
2737 | |
2738 break; | |
2739 } | |
2740 buf.writeByte(c); | |
2741 } | |
2742 | |
2743 // Always end with a newline | |
2744 if (!buf.offset || buf.data[buf.offset - 1] != '\n') | |
2745 buf.writeByte('\n'); | |
2746 | |
2747 buf.writeByte(0); | |
2748 | |
2749 // It's a line comment if the start of the doc comment comes | |
2750 // after other non-whitespace on the same line. | |
2751 unsigned char** dc = (lineComment && anyToken) | |
2752 ? &t->lineComment | |
2753 : &t->blockComment; | |
2754 | |
2755 // Combine with previous doc comment, if any | |
2756 if (*dc) | |
2757 *dc = combineComments(*dc, (unsigned char *)buf.data); | |
2758 else | |
2759 *dc = (unsigned char *)buf.extractData(); | |
2760 } | |
2761 | |
2762 /******************************************** | |
2763 * Combine two document comments into one. | |
2764 */ | |
2765 | |
2766 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2) | |
2767 { | |
2768 unsigned char *c = c2; | |
2769 | |
2770 if (c1) | |
2771 { c = c1; | |
2772 if (c2) | |
2773 { size_t len1 = strlen((char *)c1); | |
2774 size_t len2 = strlen((char *)c2); | |
2775 | |
2776 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1); | |
2777 memcpy(c, c1, len1); | |
2778 c[len1] = '\n'; | |
2779 memcpy(c + len1 + 1, c2, len2); | |
2780 c[len1 + 1 + len2] = 0; | |
2781 } | |
2782 } | |
2783 return c; | |
2784 } | |
2785 | |
2786 /******************************************** | |
2787 * Create an identifier in the string table. | |
2788 */ | |
2789 | |
2790 Identifier *Lexer::idPool(const char *s) | |
2791 { | |
2792 size_t len = strlen(s); | |
2793 StringValue *sv = stringtable.update(s, len); | |
2794 Identifier *id = (Identifier *) sv->ptrvalue; | |
2795 if (!id) | |
2796 { | |
2797 id = new Identifier(sv->lstring.string, TOKidentifier); | |
2798 sv->ptrvalue = id; | |
2799 } | |
2800 return id; | |
2801 } | |
2802 | |
2803 /********************************************* | |
2804 * Create a unique identifier using the prefix s. | |
2805 */ | |
2806 | |
2807 Identifier *Lexer::uniqueId(const char *s, int num) | |
2808 { char buffer[32]; | |
2809 size_t slen = strlen(s); | |
2810 | |
2811 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer)); | |
2812 sprintf(buffer, "%s%d", s, num); | |
2813 return idPool(buffer); | |
2814 } | |
2815 | |
2816 Identifier *Lexer::uniqueId(const char *s) | |
2817 { | |
2818 static int num; | |
2819 return uniqueId(s, ++num); | |
2820 } | |
2821 | |
2822 /**************************************** | |
2823 */ | |
2824 | |
2825 struct Keyword | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2826 { const char *name; |
159 | 2827 enum TOK value; |
2828 }; | |
2829 | |
2830 static Keyword keywords[] = | |
2831 { | |
2832 // { "", TOK }, | |
2833 | |
2834 { "this", TOKthis }, | |
2835 { "super", TOKsuper }, | |
2836 { "assert", TOKassert }, | |
2837 { "null", TOKnull }, | |
2838 { "true", TOKtrue }, | |
2839 { "false", TOKfalse }, | |
2840 { "cast", TOKcast }, | |
2841 { "new", TOKnew }, | |
2842 { "delete", TOKdelete }, | |
2843 { "throw", TOKthrow }, | |
2844 { "module", TOKmodule }, | |
2845 { "pragma", TOKpragma }, | |
2846 { "typeof", TOKtypeof }, | |
2847 { "typeid", TOKtypeid }, | |
2848 | |
2849 { "template", TOKtemplate }, | |
2850 | |
2851 { "void", TOKvoid }, | |
2852 { "byte", TOKint8 }, | |
2853 { "ubyte", TOKuns8 }, | |
2854 { "short", TOKint16 }, | |
2855 { "ushort", TOKuns16 }, | |
2856 { "int", TOKint32 }, | |
2857 { "uint", TOKuns32 }, | |
2858 { "long", TOKint64 }, | |
2859 { "ulong", TOKuns64 }, | |
2860 { "cent", TOKcent, }, | |
2861 { "ucent", TOKucent, }, | |
2862 { "float", TOKfloat32 }, | |
2863 { "double", TOKfloat64 }, | |
2864 { "real", TOKfloat80 }, | |
2865 | |
2866 { "bool", TOKbool }, | |
2867 { "char", TOKchar }, | |
2868 { "wchar", TOKwchar }, | |
2869 { "dchar", TOKdchar }, | |
2870 | |
2871 { "ifloat", TOKimaginary32 }, | |
2872 { "idouble", TOKimaginary64 }, | |
2873 { "ireal", TOKimaginary80 }, | |
2874 | |
2875 { "cfloat", TOKcomplex32 }, | |
2876 { "cdouble", TOKcomplex64 }, | |
2877 { "creal", TOKcomplex80 }, | |
2878 | |
2879 { "delegate", TOKdelegate }, | |
2880 { "function", TOKfunction }, | |
2881 | |
2882 { "is", TOKis }, | |
2883 { "if", TOKif }, | |
2884 { "else", TOKelse }, | |
2885 { "while", TOKwhile }, | |
2886 { "for", TOKfor }, | |
2887 { "do", TOKdo }, | |
2888 { "switch", TOKswitch }, | |
2889 { "case", TOKcase }, | |
2890 { "default", TOKdefault }, | |
2891 { "break", TOKbreak }, | |
2892 { "continue", TOKcontinue }, | |
2893 { "synchronized", TOKsynchronized }, | |
2894 { "return", TOKreturn }, | |
2895 { "goto", TOKgoto }, | |
2896 { "try", TOKtry }, | |
2897 { "catch", TOKcatch }, | |
2898 { "finally", TOKfinally }, | |
2899 { "with", TOKwith }, | |
2900 { "asm", TOKasm }, | |
2901 { "foreach", TOKforeach }, | |
2902 { "foreach_reverse", TOKforeach_reverse }, | |
2903 { "scope", TOKscope }, | |
2904 | |
2905 { "struct", TOKstruct }, | |
2906 { "class", TOKclass }, | |
2907 { "interface", TOKinterface }, | |
2908 { "union", TOKunion }, | |
2909 { "enum", TOKenum }, | |
2910 { "import", TOKimport }, | |
2911 { "mixin", TOKmixin }, | |
2912 { "static", TOKstatic }, | |
2913 { "final", TOKfinal }, | |
2914 { "const", TOKconst }, | |
2915 { "typedef", TOKtypedef }, | |
2916 { "alias", TOKalias }, | |
2917 { "override", TOKoverride }, | |
2918 { "abstract", TOKabstract }, | |
2919 { "volatile", TOKvolatile }, | |
2920 { "debug", TOKdebug }, | |
2921 { "deprecated", TOKdeprecated }, | |
2922 { "in", TOKin }, | |
2923 { "out", TOKout }, | |
2924 { "inout", TOKinout }, | |
2925 { "lazy", TOKlazy }, | |
2926 { "auto", TOKauto }, | |
2927 | |
2928 { "align", TOKalign }, | |
2929 { "extern", TOKextern }, | |
2930 { "private", TOKprivate }, | |
2931 { "package", TOKpackage }, | |
2932 { "protected", TOKprotected }, | |
2933 { "public", TOKpublic }, | |
2934 { "export", TOKexport }, | |
2935 | |
2936 { "body", TOKbody }, | |
2937 { "invariant", TOKinvariant }, | |
2938 { "unittest", TOKunittest }, | |
2939 { "version", TOKversion }, | |
2940 //{ "manifest", TOKmanifest }, | |
2941 | |
2942 // Added after 1.0 | |
2943 { "ref", TOKref }, | |
2944 { "macro", TOKmacro }, | |
336 | 2945 #if DMDV2 |
159 | 2946 { "pure", TOKpure }, |
2947 { "nothrow", TOKnothrow }, | |
336 | 2948 { "__thread", TOKtls }, |
159 | 2949 { "__traits", TOKtraits }, |
2950 { "__overloadset", TOKoverloadset }, | |
336 | 2951 { "__FILE__", TOKfile }, |
2952 { "__LINE__", TOKline }, | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2953 { "shared", TOKshared }, |
159 | 2954 #endif |
2955 }; | |
2956 | |
2957 int Token::isKeyword() | |
2958 { | |
2959 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) | |
2960 { | |
2961 if (keywords[u].value == value) | |
2962 return 1; | |
2963 } | |
2964 return 0; | |
2965 } | |
2966 | |
2967 void Lexer::initKeywords() | |
2968 { StringValue *sv; | |
2969 unsigned u; | |
2970 enum TOK v; | |
2971 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]); | |
2972 | |
2973 if (global.params.Dversion == 1) | |
2974 nkeywords -= 2; | |
2975 | |
2976 cmtable_init(); | |
2977 | |
2978 for (u = 0; u < nkeywords; u++) | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2979 { const char *s; |
159 | 2980 |
2981 //printf("keyword[%d] = '%s'\n",u, keywords[u].name); | |
2982 s = keywords[u].name; | |
2983 v = keywords[u].value; | |
2984 sv = stringtable.insert(s, strlen(s)); | |
2985 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v); | |
2986 | |
2987 //printf("tochars[%d] = '%s'\n",v, s); | |
2988 Token::tochars[v] = s; | |
2989 } | |
2990 | |
2991 Token::tochars[TOKeof] = "EOF"; | |
2992 Token::tochars[TOKlcurly] = "{"; | |
2993 Token::tochars[TOKrcurly] = "}"; | |
2994 Token::tochars[TOKlparen] = "("; | |
2995 Token::tochars[TOKrparen] = ")"; | |
2996 Token::tochars[TOKlbracket] = "["; | |
2997 Token::tochars[TOKrbracket] = "]"; | |
2998 Token::tochars[TOKsemicolon] = ";"; | |
2999 Token::tochars[TOKcolon] = ":"; | |
3000 Token::tochars[TOKcomma] = ","; | |
3001 Token::tochars[TOKdot] = "."; | |
3002 Token::tochars[TOKxor] = "^"; | |
3003 Token::tochars[TOKxorass] = "^="; | |
3004 Token::tochars[TOKassign] = "="; | |
3005 Token::tochars[TOKconstruct] = "="; | |
336 | 3006 #if DMDV2 |
159 | 3007 Token::tochars[TOKblit] = "="; |
3008 #endif | |
3009 Token::tochars[TOKlt] = "<"; | |
3010 Token::tochars[TOKgt] = ">"; | |
3011 Token::tochars[TOKle] = "<="; | |
3012 Token::tochars[TOKge] = ">="; | |
3013 Token::tochars[TOKequal] = "=="; | |
3014 Token::tochars[TOKnotequal] = "!="; | |
3015 Token::tochars[TOKnotidentity] = "!is"; | |
3016 Token::tochars[TOKtobool] = "!!"; | |
3017 | |
3018 Token::tochars[TOKunord] = "!<>="; | |
3019 Token::tochars[TOKue] = "!<>"; | |
3020 Token::tochars[TOKlg] = "<>"; | |
3021 Token::tochars[TOKleg] = "<>="; | |
3022 Token::tochars[TOKule] = "!>"; | |
3023 Token::tochars[TOKul] = "!>="; | |
3024 Token::tochars[TOKuge] = "!<"; | |
3025 Token::tochars[TOKug] = "!<="; | |
3026 | |
3027 Token::tochars[TOKnot] = "!"; | |
3028 Token::tochars[TOKtobool] = "!!"; | |
3029 Token::tochars[TOKshl] = "<<"; | |
3030 Token::tochars[TOKshr] = ">>"; | |
3031 Token::tochars[TOKushr] = ">>>"; | |
3032 Token::tochars[TOKadd] = "+"; | |
3033 Token::tochars[TOKmin] = "-"; | |
3034 Token::tochars[TOKmul] = "*"; | |
3035 Token::tochars[TOKdiv] = "/"; | |
3036 Token::tochars[TOKmod] = "%"; | |
3037 Token::tochars[TOKslice] = ".."; | |
3038 Token::tochars[TOKdotdotdot] = "..."; | |
3039 Token::tochars[TOKand] = "&"; | |
3040 Token::tochars[TOKandand] = "&&"; | |
3041 Token::tochars[TOKor] = "|"; | |
3042 Token::tochars[TOKoror] = "||"; | |
3043 Token::tochars[TOKarray] = "[]"; | |
3044 Token::tochars[TOKindex] = "[i]"; | |
3045 Token::tochars[TOKaddress] = "&"; | |
3046 Token::tochars[TOKstar] = "*"; | |
3047 Token::tochars[TOKtilde] = "~"; | |
3048 Token::tochars[TOKdollar] = "$"; | |
3049 Token::tochars[TOKcast] = "cast"; | |
3050 Token::tochars[TOKplusplus] = "++"; | |
3051 Token::tochars[TOKminusminus] = "--"; | |
3052 Token::tochars[TOKtype] = "type"; | |
3053 Token::tochars[TOKquestion] = "?"; | |
3054 Token::tochars[TOKneg] = "-"; | |
3055 Token::tochars[TOKuadd] = "+"; | |
3056 Token::tochars[TOKvar] = "var"; | |
3057 Token::tochars[TOKaddass] = "+="; | |
3058 Token::tochars[TOKminass] = "-="; | |
3059 Token::tochars[TOKmulass] = "*="; | |
3060 Token::tochars[TOKdivass] = "/="; | |
3061 Token::tochars[TOKmodass] = "%="; | |
3062 Token::tochars[TOKshlass] = "<<="; | |
3063 Token::tochars[TOKshrass] = ">>="; | |
3064 Token::tochars[TOKushrass] = ">>>="; | |
3065 Token::tochars[TOKandass] = "&="; | |
3066 Token::tochars[TOKorass] = "|="; | |
3067 Token::tochars[TOKcatass] = "~="; | |
3068 Token::tochars[TOKcat] = "~"; | |
3069 Token::tochars[TOKcall] = "call"; | |
3070 Token::tochars[TOKidentity] = "is"; | |
3071 Token::tochars[TOKnotidentity] = "!is"; | |
3072 | |
3073 Token::tochars[TOKorass] = "|="; | |
3074 Token::tochars[TOKidentifier] = "identifier"; | |
3075 | |
3076 // For debugging | |
3077 Token::tochars[TOKdotexp] = "dotexp"; | |
3078 Token::tochars[TOKdotti] = "dotti"; | |
3079 Token::tochars[TOKdotvar] = "dotvar"; | |
3080 Token::tochars[TOKdottype] = "dottype"; | |
3081 Token::tochars[TOKsymoff] = "symoff"; | |
3082 Token::tochars[TOKtypedot] = "typedot"; | |
3083 Token::tochars[TOKarraylength] = "arraylength"; | |
3084 Token::tochars[TOKarrayliteral] = "arrayliteral"; | |
3085 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral"; | |
3086 Token::tochars[TOKstructliteral] = "structliteral"; | |
3087 Token::tochars[TOKstring] = "string"; | |
3088 Token::tochars[TOKdsymbol] = "symbol"; | |
3089 Token::tochars[TOKtuple] = "tuple"; | |
3090 Token::tochars[TOKdeclaration] = "declaration"; | |
3091 Token::tochars[TOKdottd] = "dottd"; | |
3092 Token::tochars[TOKon_scope_exit] = "scope(exit)"; | |
3093 } |