159
|
1
|
|
2 // Compiler implementation of the D programming language
|
|
3 // Copyright (c) 1999-2008 by Digital Mars
|
|
4 // All Rights Reserved
|
|
5 // written by Walter Bright
|
|
6 // http://www.digitalmars.com
|
|
7 // License for redistribution is by either the Artistic License
|
|
8 // in artistic.txt, or the GNU General Public License in gnu.txt.
|
|
9 // See the included readme.txt for details.
|
|
10
|
|
11 /* Lexical Analyzer */
|
|
12
|
|
13 #include <stdio.h>
|
|
14 #include <string.h>
|
|
15 #include <ctype.h>
|
|
16 #include <stdarg.h>
|
|
17 #include <errno.h>
|
|
18 #include <wchar.h>
|
|
19 #include <stdlib.h>
|
|
20 #include <assert.h>
|
|
21 #include <sys/time.h>
|
|
22
|
|
23 #ifdef IN_GCC
|
|
24
|
|
25 #include <time.h>
|
|
26 #include "mem.h"
|
|
27
|
|
28 #else
|
|
29
|
|
30 #if __GNUC__
|
|
31 #include <time.h>
|
|
32 #endif
|
|
33
|
|
34 #if IN_LLVM
|
|
35 #include "mem.h"
|
|
36 #elif _WIN32
|
|
37 #include "..\root\mem.h"
|
|
38 #else
|
|
39 #include "../root/mem.h"
|
|
40 #endif
|
|
41 #endif
|
|
42
|
|
43 #include "stringtable.h"
|
|
44
|
|
45 #include "lexer.h"
|
|
46 #include "utf.h"
|
|
47 #include "identifier.h"
|
|
48 #include "id.h"
|
|
49 #include "module.h"
|
|
50
|
|
51 #if _WIN32 && __DMC__
|
|
52 // from \dm\src\include\setlocal.h
|
|
53 extern "C" char * __cdecl __locale_decpoint;
|
|
54 #endif
|
|
55
|
|
56 extern int HtmlNamedEntity(unsigned char *p, int length);
|
|
57
|
|
58 #define LS 0x2028 // UTF line separator
|
|
59 #define PS 0x2029 // UTF paragraph separator
|
|
60
|
|
61 /********************************************
|
|
62 * Do our own char maps
|
|
63 */
|
|
64
|
|
65 static unsigned char cmtable[256];
|
|
66
|
|
67 const int CMoctal = 0x1;
|
|
68 const int CMhex = 0x2;
|
|
69 const int CMidchar = 0x4;
|
|
70
|
|
71 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
|
|
72 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; }
|
|
73 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }
|
|
74
|
|
75 static void cmtable_init()
|
|
76 {
|
|
77 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
|
|
78 {
|
|
79 if ('0' <= c && c <= '7')
|
|
80 cmtable[c] |= CMoctal;
|
|
81 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
|
|
82 cmtable[c] |= CMhex;
|
|
83 if (isalnum(c) || c == '_')
|
|
84 cmtable[c] |= CMidchar;
|
|
85 }
|
|
86 }
|
|
87
|
|
88
|
|
89 /************************* Token **********************************************/
|
|
90
|
|
91 char *Token::tochars[TOKMAX];
|
|
92
|
|
93 void *Token::operator new(size_t size)
|
|
94 { Token *t;
|
|
95
|
|
96 if (Lexer::freelist)
|
|
97 {
|
|
98 t = Lexer::freelist;
|
|
99 Lexer::freelist = t->next;
|
|
100 return t;
|
|
101 }
|
|
102
|
|
103 return ::operator new(size);
|
|
104 }
|
|
105
|
|
106 #ifdef DEBUG
|
|
107 void Token::print()
|
|
108 {
|
|
109 fprintf(stdmsg, "%s\n", toChars());
|
|
110 }
|
|
111 #endif
|
|
112
|
|
113 char *Token::toChars()
|
|
114 { char *p;
|
|
115 static char buffer[3 + 3 * sizeof(value) + 1];
|
|
116
|
|
117 p = buffer;
|
|
118 switch (value)
|
|
119 {
|
|
120 case TOKint32v:
|
|
121 #if IN_GCC
|
|
122 sprintf(buffer,"%d",(d_int32)int64value);
|
|
123 #else
|
|
124 sprintf(buffer,"%d",int32value);
|
|
125 #endif
|
|
126 break;
|
|
127
|
|
128 case TOKuns32v:
|
|
129 case TOKcharv:
|
|
130 case TOKwcharv:
|
|
131 case TOKdcharv:
|
|
132 #if IN_GCC
|
|
133 sprintf(buffer,"%uU",(d_uns32)uns64value);
|
|
134 #else
|
|
135 sprintf(buffer,"%uU",uns32value);
|
|
136 #endif
|
|
137 break;
|
|
138
|
|
139 case TOKint64v:
|
|
140 sprintf(buffer,"%jdL",int64value);
|
|
141 break;
|
|
142
|
|
143 case TOKuns64v:
|
|
144 sprintf(buffer,"%juUL",uns64value);
|
|
145 break;
|
|
146
|
|
147 #if IN_GCC
|
|
148 case TOKfloat32v:
|
|
149 case TOKfloat64v:
|
|
150 case TOKfloat80v:
|
|
151 float80value.format(buffer, sizeof(buffer));
|
|
152 break;
|
|
153 case TOKimaginary32v:
|
|
154 case TOKimaginary64v:
|
|
155 case TOKimaginary80v:
|
|
156 float80value.format(buffer, sizeof(buffer));
|
|
157 // %% buffer
|
|
158 strcat(buffer, "i");
|
|
159 break;
|
|
160 #else
|
|
161 case TOKfloat32v:
|
|
162 sprintf(buffer,"%Lgf", float80value);
|
|
163 break;
|
|
164
|
|
165 case TOKfloat64v:
|
|
166 sprintf(buffer,"%Lg", float80value);
|
|
167 break;
|
|
168
|
|
169 case TOKfloat80v:
|
|
170 sprintf(buffer,"%LgL", float80value);
|
|
171 break;
|
|
172
|
|
173 case TOKimaginary32v:
|
|
174 sprintf(buffer,"%Lgfi", float80value);
|
|
175 break;
|
|
176
|
|
177 case TOKimaginary64v:
|
|
178 sprintf(buffer,"%Lgi", float80value);
|
|
179 break;
|
|
180
|
|
181 case TOKimaginary80v:
|
|
182 sprintf(buffer,"%LgLi", float80value);
|
|
183 break;
|
|
184 #endif
|
|
185
|
|
186 case TOKstring:
|
|
187 #if CSTRINGS
|
|
188 p = string;
|
|
189 #else
|
|
190 { OutBuffer buf;
|
|
191
|
|
192 buf.writeByte('"');
|
|
193 for (size_t i = 0; i < len; )
|
|
194 { unsigned c;
|
|
195
|
|
196 utf_decodeChar((unsigned char *)ustring, len, &i, &c);
|
|
197 switch (c)
|
|
198 {
|
|
199 case 0:
|
|
200 break;
|
|
201
|
|
202 case '"':
|
|
203 case '\\':
|
|
204 buf.writeByte('\\');
|
|
205 default:
|
|
206 if (isprint(c))
|
|
207 buf.writeByte(c);
|
|
208 else if (c <= 0x7F)
|
|
209 buf.printf("\\x%02x", c);
|
|
210 else if (c <= 0xFFFF)
|
|
211 buf.printf("\\u%04x", c);
|
|
212 else
|
|
213 buf.printf("\\U%08x", c);
|
|
214 continue;
|
|
215 }
|
|
216 break;
|
|
217 }
|
|
218 buf.writeByte('"');
|
|
219 if (postfix)
|
|
220 buf.writeByte('"');
|
|
221 buf.writeByte(0);
|
|
222 p = (char *)buf.extractData();
|
|
223 }
|
|
224 #endif
|
|
225 break;
|
|
226
|
|
227 case TOKidentifier:
|
|
228 case TOKenum:
|
|
229 case TOKstruct:
|
|
230 case TOKimport:
|
|
231 CASE_BASIC_TYPES:
|
|
232 p = ident->toChars();
|
|
233 break;
|
|
234
|
|
235 default:
|
|
236 p = toChars(value);
|
|
237 break;
|
|
238 }
|
|
239 return p;
|
|
240 }
|
|
241
|
|
242 char *Token::toChars(enum TOK value)
|
|
243 { char *p;
|
|
244 static char buffer[3 + 3 * sizeof(value) + 1];
|
|
245
|
|
246 p = tochars[value];
|
|
247 if (!p)
|
|
248 { sprintf(buffer,"TOK%d",value);
|
|
249 p = buffer;
|
|
250 }
|
|
251 return p;
|
|
252 }
|
|
253
|
|
254 /*************************** Lexer ********************************************/
|
|
255
|
|
256 Token *Lexer::freelist = NULL;
|
|
257 StringTable Lexer::stringtable;
|
|
258 OutBuffer Lexer::stringbuffer;
|
|
259
|
|
260 Lexer::Lexer(Module *mod,
|
|
261 unsigned char *base, unsigned begoffset, unsigned endoffset,
|
|
262 int doDocComment, int commentToken)
|
|
263 : loc(mod, 1)
|
|
264 {
|
|
265 //printf("Lexer::Lexer(%p,%d)\n",base,length);
|
|
266 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod);
|
|
267 memset(&token,0,sizeof(token));
|
|
268 this->base = base;
|
|
269 this->end = base + endoffset;
|
|
270 p = base + begoffset;
|
|
271 this->mod = mod;
|
|
272 this->doDocComment = doDocComment;
|
|
273 this->anyToken = 0;
|
|
274 this->commentToken = commentToken;
|
|
275 //initKeywords();
|
|
276
|
|
277 /* If first line starts with '#!', ignore the line
|
|
278 */
|
|
279
|
|
280 if (p[0] == '#' && p[1] =='!')
|
|
281 {
|
|
282 p += 2;
|
|
283 while (1)
|
|
284 { unsigned char c = *p;
|
|
285 switch (c)
|
|
286 {
|
|
287 case '\n':
|
|
288 p++;
|
|
289 break;
|
|
290
|
|
291 case '\r':
|
|
292 p++;
|
|
293 if (*p == '\n')
|
|
294 p++;
|
|
295 break;
|
|
296
|
|
297 case 0:
|
|
298 case 0x1A:
|
|
299 break;
|
|
300
|
|
301 default:
|
|
302 if (c & 0x80)
|
|
303 { unsigned u = decodeUTF();
|
|
304 if (u == PS || u == LS)
|
|
305 break;
|
|
306 }
|
|
307 p++;
|
|
308 continue;
|
|
309 }
|
|
310 break;
|
|
311 }
|
|
312 loc.linnum = 2;
|
|
313 }
|
|
314 }
|
|
315
|
|
316
|
|
317 void Lexer::error(const char *format, ...)
|
|
318 {
|
|
319 if (mod && !global.gag)
|
|
320 {
|
|
321 char *p = loc.toChars();
|
|
322 if (*p)
|
|
323 fprintf(stdmsg, "%s: ", p);
|
|
324 mem.free(p);
|
|
325
|
|
326 va_list ap;
|
|
327 va_start(ap, format);
|
|
328 vfprintf(stdmsg, format, ap);
|
|
329 va_end(ap);
|
|
330
|
|
331 fprintf(stdmsg, "\n");
|
|
332 fflush(stdmsg);
|
|
333
|
|
334 if (global.errors >= 20) // moderate blizzard of cascading messages
|
|
335 fatal();
|
|
336 }
|
|
337 global.errors++;
|
|
338 }
|
|
339
|
|
340 void Lexer::error(Loc loc, const char *format, ...)
|
|
341 {
|
|
342 if (mod && !global.gag)
|
|
343 {
|
|
344 char *p = loc.toChars();
|
|
345 if (*p)
|
|
346 fprintf(stdmsg, "%s: ", p);
|
|
347 mem.free(p);
|
|
348
|
|
349 va_list ap;
|
|
350 va_start(ap, format);
|
|
351 vfprintf(stdmsg, format, ap);
|
|
352 va_end(ap);
|
|
353
|
|
354 fprintf(stdmsg, "\n");
|
|
355 fflush(stdmsg);
|
|
356
|
|
357 if (global.errors >= 20) // moderate blizzard of cascading messages
|
|
358 fatal();
|
|
359 }
|
|
360 global.errors++;
|
|
361 }
|
|
362
|
|
363 TOK Lexer::nextToken()
|
|
364 { Token *t;
|
|
365
|
|
366 if (token.next)
|
|
367 {
|
|
368 t = token.next;
|
|
369 memcpy(&token,t,sizeof(Token));
|
|
370 t->next = freelist;
|
|
371 freelist = t;
|
|
372 }
|
|
373 else
|
|
374 {
|
|
375 scan(&token);
|
|
376 }
|
|
377 //token.print();
|
|
378 return token.value;
|
|
379 }
|
|
380
|
|
381 Token *Lexer::peek(Token *ct)
|
|
382 { Token *t;
|
|
383
|
|
384 if (ct->next)
|
|
385 t = ct->next;
|
|
386 else
|
|
387 {
|
|
388 t = new Token();
|
|
389 scan(t);
|
|
390 t->next = NULL;
|
|
391 ct->next = t;
|
|
392 }
|
|
393 return t;
|
|
394 }
|
|
395
|
|
396 /*********************************
|
|
397 * tk is on the opening (.
|
|
398 * Look ahead and return token that is past the closing ).
|
|
399 */
|
|
400
|
|
401 Token *Lexer::peekPastParen(Token *tk)
|
|
402 {
|
|
403 //printf("peekPastParen()\n");
|
|
404 int parens = 1;
|
|
405 int curlynest = 0;
|
|
406 while (1)
|
|
407 {
|
|
408 tk = peek(tk);
|
|
409 //tk->print();
|
|
410 switch (tk->value)
|
|
411 {
|
|
412 case TOKlparen:
|
|
413 parens++;
|
|
414 continue;
|
|
415
|
|
416 case TOKrparen:
|
|
417 --parens;
|
|
418 if (parens)
|
|
419 continue;
|
|
420 tk = peek(tk);
|
|
421 break;
|
|
422
|
|
423 case TOKlcurly:
|
|
424 curlynest++;
|
|
425 continue;
|
|
426
|
|
427 case TOKrcurly:
|
|
428 if (--curlynest >= 0)
|
|
429 continue;
|
|
430 break;
|
|
431
|
|
432 case TOKsemicolon:
|
|
433 if (curlynest)
|
|
434 continue;
|
|
435 break;
|
|
436
|
|
437 case TOKeof:
|
|
438 break;
|
|
439
|
|
440 default:
|
|
441 continue;
|
|
442 }
|
|
443 return tk;
|
|
444 }
|
|
445 }
|
|
446
|
|
447 /**********************************
|
|
448 * Determine if string is a valid Identifier.
|
|
449 * Placed here because of commonality with Lexer functionality.
|
|
450 * Returns:
|
|
451 * 0 invalid
|
|
452 */
|
|
453
|
|
454 int Lexer::isValidIdentifier(char *p)
|
|
455 {
|
|
456 size_t len;
|
|
457 size_t idx;
|
|
458
|
|
459 if (!p || !*p)
|
|
460 goto Linvalid;
|
|
461
|
|
462 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
|
|
463 goto Linvalid;
|
|
464
|
|
465 len = strlen(p);
|
|
466 idx = 0;
|
|
467 while (p[idx])
|
|
468 { dchar_t dc;
|
|
469
|
|
470 char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc);
|
|
471 if (q)
|
|
472 goto Linvalid;
|
|
473
|
|
474 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
|
|
475 goto Linvalid;
|
|
476 }
|
|
477 return 1;
|
|
478
|
|
479 Linvalid:
|
|
480 return 0;
|
|
481 }
|
|
482
|
|
483 /****************************
|
|
484 * Turn next token in buffer into a token.
|
|
485 */
|
|
486
|
|
487 void Lexer::scan(Token *t)
|
|
488 {
|
|
489 unsigned lastLine = loc.linnum;
|
|
490 unsigned linnum;
|
|
491
|
|
492 t->blockComment = NULL;
|
|
493 t->lineComment = NULL;
|
|
494 while (1)
|
|
495 {
|
|
496 t->ptr = p;
|
|
497 //printf("p = %p, *p = '%c'\n",p,*p);
|
|
498 switch (*p)
|
|
499 {
|
|
500 case 0:
|
|
501 case 0x1A:
|
|
502 t->value = TOKeof; // end of file
|
|
503 return;
|
|
504
|
|
505 case ' ':
|
|
506 case '\t':
|
|
507 case '\v':
|
|
508 case '\f':
|
|
509 p++;
|
|
510 continue; // skip white space
|
|
511
|
|
512 case '\r':
|
|
513 p++;
|
|
514 if (*p != '\n') // if CR stands by itself
|
|
515 loc.linnum++;
|
|
516 continue; // skip white space
|
|
517
|
|
518 case '\n':
|
|
519 p++;
|
|
520 loc.linnum++;
|
|
521 continue; // skip white space
|
|
522
|
|
523 case '0': case '1': case '2': case '3': case '4':
|
|
524 case '5': case '6': case '7': case '8': case '9':
|
|
525 t->value = number(t);
|
|
526 return;
|
|
527
|
|
528 #if CSTRINGS
|
|
529 case '\'':
|
|
530 t->value = charConstant(t, 0);
|
|
531 return;
|
|
532
|
|
533 case '"':
|
|
534 t->value = stringConstant(t,0);
|
|
535 return;
|
|
536
|
|
537 case 'l':
|
|
538 case 'L':
|
|
539 if (p[1] == '\'')
|
|
540 {
|
|
541 p++;
|
|
542 t->value = charConstant(t, 1);
|
|
543 return;
|
|
544 }
|
|
545 else if (p[1] == '"')
|
|
546 {
|
|
547 p++;
|
|
548 t->value = stringConstant(t, 1);
|
|
549 return;
|
|
550 }
|
|
551 #else
|
|
552 case '\'':
|
|
553 t->value = charConstant(t,0);
|
|
554 return;
|
|
555
|
|
556 case 'r':
|
|
557 if (p[1] != '"')
|
|
558 goto case_ident;
|
|
559 p++;
|
|
560 case '`':
|
|
561 t->value = wysiwygStringConstant(t, *p);
|
|
562 return;
|
|
563
|
|
564 case 'x':
|
|
565 if (p[1] != '"')
|
|
566 goto case_ident;
|
|
567 p++;
|
|
568 t->value = hexStringConstant(t);
|
|
569 return;
|
|
570
|
|
571 #if V2
|
|
572 case 'q':
|
|
573 if (p[1] == '"')
|
|
574 {
|
|
575 p++;
|
|
576 t->value = delimitedStringConstant(t);
|
|
577 return;
|
|
578 }
|
|
579 else if (p[1] == '{')
|
|
580 {
|
|
581 p++;
|
|
582 t->value = tokenStringConstant(t);
|
|
583 return;
|
|
584 }
|
|
585 else
|
|
586 goto case_ident;
|
|
587 #endif
|
|
588
|
|
589 case '"':
|
|
590 t->value = escapeStringConstant(t,0);
|
|
591 return;
|
|
592
|
|
593 case '\\': // escaped string literal
|
|
594 { unsigned c;
|
|
595
|
|
596 stringbuffer.reset();
|
|
597 do
|
|
598 {
|
|
599 p++;
|
|
600 switch (*p)
|
|
601 {
|
|
602 case 'u':
|
|
603 case 'U':
|
|
604 case '&':
|
|
605 c = escapeSequence();
|
|
606 stringbuffer.writeUTF8(c);
|
|
607 break;
|
|
608
|
|
609 default:
|
|
610 c = escapeSequence();
|
|
611 stringbuffer.writeByte(c);
|
|
612 break;
|
|
613 }
|
|
614 } while (*p == '\\');
|
|
615 t->len = stringbuffer.offset;
|
|
616 stringbuffer.writeByte(0);
|
|
617 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
618 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
619 t->postfix = 0;
|
|
620 t->value = TOKstring;
|
|
621 return;
|
|
622 }
|
|
623
|
|
624 case 'l':
|
|
625 case 'L':
|
|
626 #endif
|
|
627 case 'a': case 'b': case 'c': case 'd': case 'e':
|
|
628 case 'f': case 'g': case 'h': case 'i': case 'j':
|
|
629 case 'k': case 'm': case 'n': case 'o':
|
|
630 #if V2
|
|
631 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
|
|
632 #else
|
|
633 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
|
|
634 #endif
|
|
635 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
|
|
636 case 'z':
|
|
637 case 'A': case 'B': case 'C': case 'D': case 'E':
|
|
638 case 'F': case 'G': case 'H': case 'I': case 'J':
|
|
639 case 'K': case 'M': case 'N': case 'O':
|
|
640 case 'P': case 'Q': case 'R': case 'S': case 'T':
|
|
641 case 'U': case 'V': case 'W': case 'X': case 'Y':
|
|
642 case 'Z':
|
|
643 case '_':
|
|
644 case_ident:
|
|
645 { unsigned char c;
|
|
646 StringValue *sv;
|
|
647 Identifier *id;
|
|
648
|
|
649 do
|
|
650 {
|
|
651 c = *++p;
|
|
652 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
|
|
653 sv = stringtable.update((char *)t->ptr, p - t->ptr);
|
|
654 id = (Identifier *) sv->ptrvalue;
|
|
655 if (!id)
|
|
656 { id = new Identifier(sv->lstring.string,TOKidentifier);
|
|
657 sv->ptrvalue = id;
|
|
658 }
|
|
659 t->ident = id;
|
|
660 t->value = (enum TOK) id->value;
|
|
661 anyToken = 1;
|
|
662 if (*t->ptr == '_') // if special identifier token
|
|
663 {
|
|
664 static char date[11+1];
|
|
665 static char time[8+1];
|
|
666 static char timestamp[24+1];
|
|
667
|
|
668 if (!date[0]) // lazy evaluation
|
|
669 { time_t t;
|
|
670 char *p;
|
|
671
|
|
672 ::time(&t);
|
|
673 p = ctime(&t);
|
|
674 assert(p);
|
|
675 sprintf(date, "%.6s %.4s", p + 4, p + 20);
|
|
676 sprintf(time, "%.8s", p + 11);
|
|
677 sprintf(timestamp, "%.24s", p);
|
|
678 }
|
|
679
|
|
680 if (mod && id == Id::FILE)
|
|
681 {
|
|
682 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars());
|
|
683 goto Lstring;
|
|
684 }
|
|
685 else if (mod && id == Id::LINE)
|
|
686 {
|
|
687 t->value = TOKint64v;
|
|
688 t->uns64value = loc.linnum;
|
|
689 }
|
|
690 else if (id == Id::DATE)
|
|
691 {
|
|
692 t->ustring = (unsigned char *)date;
|
|
693 goto Lstring;
|
|
694 }
|
|
695 else if (id == Id::TIME)
|
|
696 {
|
|
697 t->ustring = (unsigned char *)time;
|
|
698 goto Lstring;
|
|
699 }
|
|
700 else if (id == Id::VENDOR)
|
|
701 {
|
|
702 t->ustring = (unsigned char *)"Digital Mars D";
|
|
703 goto Lstring;
|
|
704 }
|
|
705 else if (id == Id::TIMESTAMP)
|
|
706 {
|
|
707 t->ustring = (unsigned char *)timestamp;
|
|
708 Lstring:
|
|
709 t->value = TOKstring;
|
|
710 Llen:
|
|
711 t->postfix = 0;
|
|
712 t->len = strlen((char *)t->ustring);
|
|
713 }
|
|
714 else if (id == Id::VERSIONX)
|
|
715 { unsigned major = 0;
|
|
716 unsigned minor = 0;
|
|
717
|
|
718 for (char *p = global.version + 1; 1; p++)
|
|
719 {
|
|
720 char c = *p;
|
|
721 if (isdigit(c))
|
|
722 minor = minor * 10 + c - '0';
|
|
723 else if (c == '.')
|
|
724 { major = minor;
|
|
725 minor = 0;
|
|
726 }
|
|
727 else
|
|
728 break;
|
|
729 }
|
|
730 t->value = TOKint64v;
|
|
731 t->uns64value = major * 1000 + minor;
|
|
732 }
|
|
733 #if V2
|
|
734 else if (id == Id::EOFX)
|
|
735 {
|
|
736 t->value = TOKeof;
|
|
737 // Advance scanner to end of file
|
|
738 while (!(*p == 0 || *p == 0x1A))
|
|
739 p++;
|
|
740 }
|
|
741 #endif
|
|
742 }
|
|
743 //printf("t->value = %d\n",t->value);
|
|
744 return;
|
|
745 }
|
|
746
|
|
747 case '/':
|
|
748 p++;
|
|
749 switch (*p)
|
|
750 {
|
|
751 case '=':
|
|
752 p++;
|
|
753 t->value = TOKdivass;
|
|
754 return;
|
|
755
|
|
756 case '*':
|
|
757 p++;
|
|
758 linnum = loc.linnum;
|
|
759 while (1)
|
|
760 {
|
|
761 while (1)
|
|
762 { unsigned char c = *p;
|
|
763 switch (c)
|
|
764 {
|
|
765 case '/':
|
|
766 break;
|
|
767
|
|
768 case '\n':
|
|
769 loc.linnum++;
|
|
770 p++;
|
|
771 continue;
|
|
772
|
|
773 case '\r':
|
|
774 p++;
|
|
775 if (*p != '\n')
|
|
776 loc.linnum++;
|
|
777 continue;
|
|
778
|
|
779 case 0:
|
|
780 case 0x1A:
|
|
781 error("unterminated /* */ comment");
|
|
782 p = end;
|
|
783 t->value = TOKeof;
|
|
784 return;
|
|
785
|
|
786 default:
|
|
787 if (c & 0x80)
|
|
788 { unsigned u = decodeUTF();
|
|
789 if (u == PS || u == LS)
|
|
790 loc.linnum++;
|
|
791 }
|
|
792 p++;
|
|
793 continue;
|
|
794 }
|
|
795 break;
|
|
796 }
|
|
797 p++;
|
|
798 if (p[-2] == '*' && p - 3 != t->ptr)
|
|
799 break;
|
|
800 }
|
|
801 if (commentToken)
|
|
802 {
|
|
803 t->value = TOKcomment;
|
|
804 return;
|
|
805 }
|
|
806 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
|
|
807 { // if /** but not /**/
|
|
808 getDocComment(t, lastLine == linnum);
|
|
809 }
|
|
810 continue;
|
|
811
|
|
812 case '/': // do // style comments
|
|
813 linnum = loc.linnum;
|
|
814 while (1)
|
|
815 { unsigned char c = *++p;
|
|
816 switch (c)
|
|
817 {
|
|
818 case '\n':
|
|
819 break;
|
|
820
|
|
821 case '\r':
|
|
822 if (p[1] == '\n')
|
|
823 p++;
|
|
824 break;
|
|
825
|
|
826 case 0:
|
|
827 case 0x1A:
|
|
828 if (commentToken)
|
|
829 {
|
|
830 p = end;
|
|
831 t->value = TOKcomment;
|
|
832 return;
|
|
833 }
|
|
834 if (doDocComment && t->ptr[2] == '/')
|
|
835 getDocComment(t, lastLine == linnum);
|
|
836 p = end;
|
|
837 t->value = TOKeof;
|
|
838 return;
|
|
839
|
|
840 default:
|
|
841 if (c & 0x80)
|
|
842 { unsigned u = decodeUTF();
|
|
843 if (u == PS || u == LS)
|
|
844 break;
|
|
845 }
|
|
846 continue;
|
|
847 }
|
|
848 break;
|
|
849 }
|
|
850
|
|
851 if (commentToken)
|
|
852 {
|
|
853 p++;
|
|
854 loc.linnum++;
|
|
855 t->value = TOKcomment;
|
|
856 return;
|
|
857 }
|
|
858 if (doDocComment && t->ptr[2] == '/')
|
|
859 getDocComment(t, lastLine == linnum);
|
|
860
|
|
861 p++;
|
|
862 loc.linnum++;
|
|
863 continue;
|
|
864
|
|
865 case '+':
|
|
866 { int nest;
|
|
867
|
|
868 linnum = loc.linnum;
|
|
869 p++;
|
|
870 nest = 1;
|
|
871 while (1)
|
|
872 { unsigned char c = *p;
|
|
873 switch (c)
|
|
874 {
|
|
875 case '/':
|
|
876 p++;
|
|
877 if (*p == '+')
|
|
878 {
|
|
879 p++;
|
|
880 nest++;
|
|
881 }
|
|
882 continue;
|
|
883
|
|
884 case '+':
|
|
885 p++;
|
|
886 if (*p == '/')
|
|
887 {
|
|
888 p++;
|
|
889 if (--nest == 0)
|
|
890 break;
|
|
891 }
|
|
892 continue;
|
|
893
|
|
894 case '\r':
|
|
895 p++;
|
|
896 if (*p != '\n')
|
|
897 loc.linnum++;
|
|
898 continue;
|
|
899
|
|
900 case '\n':
|
|
901 loc.linnum++;
|
|
902 p++;
|
|
903 continue;
|
|
904
|
|
905 case 0:
|
|
906 case 0x1A:
|
|
907 error("unterminated /+ +/ comment");
|
|
908 p = end;
|
|
909 t->value = TOKeof;
|
|
910 return;
|
|
911
|
|
912 default:
|
|
913 if (c & 0x80)
|
|
914 { unsigned u = decodeUTF();
|
|
915 if (u == PS || u == LS)
|
|
916 loc.linnum++;
|
|
917 }
|
|
918 p++;
|
|
919 continue;
|
|
920 }
|
|
921 break;
|
|
922 }
|
|
923 if (commentToken)
|
|
924 {
|
|
925 t->value = TOKcomment;
|
|
926 return;
|
|
927 }
|
|
928 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
|
|
929 { // if /++ but not /++/
|
|
930 getDocComment(t, lastLine == linnum);
|
|
931 }
|
|
932 continue;
|
|
933 }
|
|
934 }
|
|
935 t->value = TOKdiv;
|
|
936 return;
|
|
937
|
|
938 case '.':
|
|
939 p++;
|
|
940 if (isdigit(*p))
|
|
941 { /* Note that we don't allow ._1 and ._ as being
|
|
942 * valid floating point numbers.
|
|
943 */
|
|
944 p--;
|
|
945 t->value = inreal(t);
|
|
946 }
|
|
947 else if (p[0] == '.')
|
|
948 {
|
|
949 if (p[1] == '.')
|
|
950 { p += 2;
|
|
951 t->value = TOKdotdotdot;
|
|
952 }
|
|
953 else
|
|
954 { p++;
|
|
955 t->value = TOKslice;
|
|
956 }
|
|
957 }
|
|
958 else
|
|
959 t->value = TOKdot;
|
|
960 return;
|
|
961
|
|
962 case '&':
|
|
963 p++;
|
|
964 if (*p == '=')
|
|
965 { p++;
|
|
966 t->value = TOKandass;
|
|
967 }
|
|
968 else if (*p == '&')
|
|
969 { p++;
|
|
970 t->value = TOKandand;
|
|
971 }
|
|
972 else
|
|
973 t->value = TOKand;
|
|
974 return;
|
|
975
|
|
976 case '|':
|
|
977 p++;
|
|
978 if (*p == '=')
|
|
979 { p++;
|
|
980 t->value = TOKorass;
|
|
981 }
|
|
982 else if (*p == '|')
|
|
983 { p++;
|
|
984 t->value = TOKoror;
|
|
985 }
|
|
986 else
|
|
987 t->value = TOKor;
|
|
988 return;
|
|
989
|
|
990 case '-':
|
|
991 p++;
|
|
992 if (*p == '=')
|
|
993 { p++;
|
|
994 t->value = TOKminass;
|
|
995 }
|
|
996 #if 0
|
|
997 else if (*p == '>')
|
|
998 { p++;
|
|
999 t->value = TOKarrow;
|
|
1000 }
|
|
1001 #endif
|
|
1002 else if (*p == '-')
|
|
1003 { p++;
|
|
1004 t->value = TOKminusminus;
|
|
1005 }
|
|
1006 else
|
|
1007 t->value = TOKmin;
|
|
1008 return;
|
|
1009
|
|
1010 case '+':
|
|
1011 p++;
|
|
1012 if (*p == '=')
|
|
1013 { p++;
|
|
1014 t->value = TOKaddass;
|
|
1015 }
|
|
1016 else if (*p == '+')
|
|
1017 { p++;
|
|
1018 t->value = TOKplusplus;
|
|
1019 }
|
|
1020 else
|
|
1021 t->value = TOKadd;
|
|
1022 return;
|
|
1023
|
|
1024 case '<':
|
|
1025 p++;
|
|
1026 if (*p == '=')
|
|
1027 { p++;
|
|
1028 t->value = TOKle; // <=
|
|
1029 }
|
|
1030 else if (*p == '<')
|
|
1031 { p++;
|
|
1032 if (*p == '=')
|
|
1033 { p++;
|
|
1034 t->value = TOKshlass; // <<=
|
|
1035 }
|
|
1036 else
|
|
1037 t->value = TOKshl; // <<
|
|
1038 }
|
|
1039 else if (*p == '>')
|
|
1040 { p++;
|
|
1041 if (*p == '=')
|
|
1042 { p++;
|
|
1043 t->value = TOKleg; // <>=
|
|
1044 }
|
|
1045 else
|
|
1046 t->value = TOKlg; // <>
|
|
1047 }
|
|
1048 else
|
|
1049 t->value = TOKlt; // <
|
|
1050 return;
|
|
1051
|
|
1052 case '>':
|
|
1053 p++;
|
|
1054 if (*p == '=')
|
|
1055 { p++;
|
|
1056 t->value = TOKge; // >=
|
|
1057 }
|
|
1058 else if (*p == '>')
|
|
1059 { p++;
|
|
1060 if (*p == '=')
|
|
1061 { p++;
|
|
1062 t->value = TOKshrass; // >>=
|
|
1063 }
|
|
1064 else if (*p == '>')
|
|
1065 { p++;
|
|
1066 if (*p == '=')
|
|
1067 { p++;
|
|
1068 t->value = TOKushrass; // >>>=
|
|
1069 }
|
|
1070 else
|
|
1071 t->value = TOKushr; // >>>
|
|
1072 }
|
|
1073 else
|
|
1074 t->value = TOKshr; // >>
|
|
1075 }
|
|
1076 else
|
|
1077 t->value = TOKgt; // >
|
|
1078 return;
|
|
1079
|
|
1080 case '!':
|
|
1081 p++;
|
|
1082 if (*p == '=')
|
|
1083 { p++;
|
|
1084 if (*p == '=' && global.params.Dversion == 1)
|
|
1085 { p++;
|
|
1086 t->value = TOKnotidentity; // !==
|
|
1087 }
|
|
1088 else
|
|
1089 t->value = TOKnotequal; // !=
|
|
1090 }
|
|
1091 else if (*p == '<')
|
|
1092 { p++;
|
|
1093 if (*p == '>')
|
|
1094 { p++;
|
|
1095 if (*p == '=')
|
|
1096 { p++;
|
|
1097 t->value = TOKunord; // !<>=
|
|
1098 }
|
|
1099 else
|
|
1100 t->value = TOKue; // !<>
|
|
1101 }
|
|
1102 else if (*p == '=')
|
|
1103 { p++;
|
|
1104 t->value = TOKug; // !<=
|
|
1105 }
|
|
1106 else
|
|
1107 t->value = TOKuge; // !<
|
|
1108 }
|
|
1109 else if (*p == '>')
|
|
1110 { p++;
|
|
1111 if (*p == '=')
|
|
1112 { p++;
|
|
1113 t->value = TOKul; // !>=
|
|
1114 }
|
|
1115 else
|
|
1116 t->value = TOKule; // !>
|
|
1117 }
|
|
1118 else
|
|
1119 t->value = TOKnot; // !
|
|
1120 return;
|
|
1121
|
|
1122 case '=':
|
|
1123 p++;
|
|
1124 if (*p == '=')
|
|
1125 { p++;
|
|
1126 if (*p == '=' && global.params.Dversion == 1)
|
|
1127 { p++;
|
|
1128 t->value = TOKidentity; // ===
|
|
1129 }
|
|
1130 else
|
|
1131 t->value = TOKequal; // ==
|
|
1132 }
|
|
1133 else
|
|
1134 t->value = TOKassign; // =
|
|
1135 return;
|
|
1136
|
|
1137 case '~':
|
|
1138 p++;
|
|
1139 if (*p == '=')
|
|
1140 { p++;
|
|
1141 t->value = TOKcatass; // ~=
|
|
1142 }
|
|
1143 else
|
|
1144 t->value = TOKtilde; // ~
|
|
1145 return;
|
|
1146
|
|
1147 #define SINGLE(c,tok) case c: p++; t->value = tok; return;
|
|
1148
|
|
1149 SINGLE('(', TOKlparen)
|
|
1150 SINGLE(')', TOKrparen)
|
|
1151 SINGLE('[', TOKlbracket)
|
|
1152 SINGLE(']', TOKrbracket)
|
|
1153 SINGLE('{', TOKlcurly)
|
|
1154 SINGLE('}', TOKrcurly)
|
|
1155 SINGLE('?', TOKquestion)
|
|
1156 SINGLE(',', TOKcomma)
|
|
1157 SINGLE(';', TOKsemicolon)
|
|
1158 SINGLE(':', TOKcolon)
|
|
1159 SINGLE('$', TOKdollar)
|
|
1160
|
|
1161 #undef SINGLE
|
|
1162
|
|
1163 #define DOUBLE(c1,tok1,c2,tok2) \
|
|
1164 case c1: \
|
|
1165 p++; \
|
|
1166 if (*p == c2) \
|
|
1167 { p++; \
|
|
1168 t->value = tok2; \
|
|
1169 } \
|
|
1170 else \
|
|
1171 t->value = tok1; \
|
|
1172 return;
|
|
1173
|
|
1174 DOUBLE('*', TOKmul, '=', TOKmulass)
|
|
1175 DOUBLE('%', TOKmod, '=', TOKmodass)
|
|
1176 DOUBLE('^', TOKxor, '=', TOKxorass)
|
|
1177
|
|
1178 #undef DOUBLE
|
|
1179
|
|
1180 case '#':
|
|
1181 p++;
|
|
1182 pragma();
|
|
1183 continue;
|
|
1184
|
|
1185 default:
|
|
1186 { unsigned char c = *p;
|
|
1187
|
|
1188 if (c & 0x80)
|
|
1189 { unsigned u = decodeUTF();
|
|
1190
|
|
1191 // Check for start of unicode identifier
|
|
1192 if (isUniAlpha(u))
|
|
1193 goto case_ident;
|
|
1194
|
|
1195 if (u == PS || u == LS)
|
|
1196 {
|
|
1197 loc.linnum++;
|
|
1198 p++;
|
|
1199 continue;
|
|
1200 }
|
|
1201 }
|
|
1202 if (isprint(c))
|
|
1203 error("unsupported char '%c'", c);
|
|
1204 else
|
|
1205 error("unsupported char 0x%02x", c);
|
|
1206 p++;
|
|
1207 continue;
|
|
1208 }
|
|
1209 }
|
|
1210 }
|
|
1211 }
|
|
1212
|
|
1213 /*******************************************
|
|
1214 * Parse escape sequence.
|
|
1215 */
|
|
1216
|
|
1217 unsigned Lexer::escapeSequence()
|
|
1218 { unsigned c;
|
|
1219 int n;
|
|
1220 int ndigits;
|
|
1221
|
|
1222 c = *p;
|
|
1223 switch (c)
|
|
1224 {
|
|
1225 case '\'':
|
|
1226 case '"':
|
|
1227 case '?':
|
|
1228 case '\\':
|
|
1229 Lconsume:
|
|
1230 p++;
|
|
1231 break;
|
|
1232
|
|
1233 case 'a': c = 7; goto Lconsume;
|
|
1234 case 'b': c = 8; goto Lconsume;
|
|
1235 case 'f': c = 12; goto Lconsume;
|
|
1236 case 'n': c = 10; goto Lconsume;
|
|
1237 case 'r': c = 13; goto Lconsume;
|
|
1238 case 't': c = 9; goto Lconsume;
|
|
1239 case 'v': c = 11; goto Lconsume;
|
|
1240
|
|
1241 case 'u':
|
|
1242 ndigits = 4;
|
|
1243 goto Lhex;
|
|
1244 case 'U':
|
|
1245 ndigits = 8;
|
|
1246 goto Lhex;
|
|
1247 case 'x':
|
|
1248 ndigits = 2;
|
|
1249 Lhex:
|
|
1250 p++;
|
|
1251 c = *p;
|
|
1252 if (ishex(c))
|
|
1253 { unsigned v;
|
|
1254
|
|
1255 n = 0;
|
|
1256 v = 0;
|
|
1257 while (1)
|
|
1258 {
|
|
1259 if (isdigit(c))
|
|
1260 c -= '0';
|
|
1261 else if (islower(c))
|
|
1262 c -= 'a' - 10;
|
|
1263 else
|
|
1264 c -= 'A' - 10;
|
|
1265 v = v * 16 + c;
|
|
1266 c = *++p;
|
|
1267 if (++n == ndigits)
|
|
1268 break;
|
|
1269 if (!ishex(c))
|
|
1270 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
|
|
1271 break;
|
|
1272 }
|
|
1273 }
|
|
1274 if (ndigits != 2 && !utf_isValidDchar(v))
|
|
1275 error("invalid UTF character \\U%08x", v);
|
|
1276 c = v;
|
|
1277 }
|
|
1278 else
|
|
1279 error("undefined escape hex sequence \\%c\n",c);
|
|
1280 break;
|
|
1281
|
|
1282 case '&': // named character entity
|
|
1283 for (unsigned char *idstart = ++p; 1; p++)
|
|
1284 {
|
|
1285 switch (*p)
|
|
1286 {
|
|
1287 case ';':
|
|
1288 c = HtmlNamedEntity(idstart, p - idstart);
|
|
1289 if (c == ~0)
|
|
1290 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
|
|
1291 c = ' ';
|
|
1292 }
|
|
1293 p++;
|
|
1294 break;
|
|
1295
|
|
1296 default:
|
|
1297 if (isalpha(*p) ||
|
|
1298 (p != idstart + 1 && isdigit(*p)))
|
|
1299 continue;
|
|
1300 error("unterminated named entity");
|
|
1301 break;
|
|
1302 }
|
|
1303 break;
|
|
1304 }
|
|
1305 break;
|
|
1306
|
|
1307 case 0:
|
|
1308 case 0x1A: // end of file
|
|
1309 c = '\\';
|
|
1310 break;
|
|
1311
|
|
1312 default:
|
|
1313 if (isoctal(c))
|
|
1314 { unsigned v;
|
|
1315
|
|
1316 n = 0;
|
|
1317 v = 0;
|
|
1318 do
|
|
1319 {
|
|
1320 v = v * 8 + (c - '0');
|
|
1321 c = *++p;
|
|
1322 } while (++n < 3 && isoctal(c));
|
|
1323 c = v;
|
|
1324 if (c > 0xFF)
|
|
1325 error("0%03o is larger than a byte", c);
|
|
1326 }
|
|
1327 else
|
|
1328 error("undefined escape sequence \\%c\n",c);
|
|
1329 break;
|
|
1330 }
|
|
1331 return c;
|
|
1332 }
|
|
1333
|
|
1334 /**************************************
|
|
1335 */
|
|
1336
|
|
1337 TOK Lexer::wysiwygStringConstant(Token *t, int tc)
|
|
1338 { unsigned c;
|
|
1339 Loc start = loc;
|
|
1340
|
|
1341 p++;
|
|
1342 stringbuffer.reset();
|
|
1343 while (1)
|
|
1344 {
|
|
1345 c = *p++;
|
|
1346 switch (c)
|
|
1347 {
|
|
1348 case '\n':
|
|
1349 loc.linnum++;
|
|
1350 break;
|
|
1351
|
|
1352 case '\r':
|
|
1353 if (*p == '\n')
|
|
1354 continue; // ignore
|
|
1355 c = '\n'; // treat EndOfLine as \n character
|
|
1356 loc.linnum++;
|
|
1357 break;
|
|
1358
|
|
1359 case 0:
|
|
1360 case 0x1A:
|
|
1361 error("unterminated string constant starting at %s", start.toChars());
|
|
1362 t->ustring = (unsigned char *)"";
|
|
1363 t->len = 0;
|
|
1364 t->postfix = 0;
|
|
1365 return TOKstring;
|
|
1366
|
|
1367 case '"':
|
|
1368 case '`':
|
|
1369 if (c == tc)
|
|
1370 {
|
|
1371 t->len = stringbuffer.offset;
|
|
1372 stringbuffer.writeByte(0);
|
|
1373 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
1374 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
1375 stringPostfix(t);
|
|
1376 return TOKstring;
|
|
1377 }
|
|
1378 break;
|
|
1379
|
|
1380 default:
|
|
1381 if (c & 0x80)
|
|
1382 { p--;
|
|
1383 unsigned u = decodeUTF();
|
|
1384 p++;
|
|
1385 if (u == PS || u == LS)
|
|
1386 loc.linnum++;
|
|
1387 stringbuffer.writeUTF8(u);
|
|
1388 continue;
|
|
1389 }
|
|
1390 break;
|
|
1391 }
|
|
1392 stringbuffer.writeByte(c);
|
|
1393 }
|
|
1394 }
|
|
1395
|
|
1396 /**************************************
|
|
1397 * Lex hex strings:
|
|
1398 * x"0A ae 34FE BD"
|
|
1399 */
|
|
1400
|
|
1401 TOK Lexer::hexStringConstant(Token *t)
|
|
1402 { unsigned c;
|
|
1403 Loc start = loc;
|
|
1404 unsigned n = 0;
|
|
1405 unsigned v;
|
|
1406
|
|
1407 p++;
|
|
1408 stringbuffer.reset();
|
|
1409 while (1)
|
|
1410 {
|
|
1411 c = *p++;
|
|
1412 switch (c)
|
|
1413 {
|
|
1414 case ' ':
|
|
1415 case '\t':
|
|
1416 case '\v':
|
|
1417 case '\f':
|
|
1418 continue; // skip white space
|
|
1419
|
|
1420 case '\r':
|
|
1421 if (*p == '\n')
|
|
1422 continue; // ignore
|
|
1423 // Treat isolated '\r' as if it were a '\n'
|
|
1424 case '\n':
|
|
1425 loc.linnum++;
|
|
1426 continue;
|
|
1427
|
|
1428 case 0:
|
|
1429 case 0x1A:
|
|
1430 error("unterminated string constant starting at %s", start.toChars());
|
|
1431 t->ustring = (unsigned char *)"";
|
|
1432 t->len = 0;
|
|
1433 t->postfix = 0;
|
|
1434 return TOKstring;
|
|
1435
|
|
1436 case '"':
|
|
1437 if (n & 1)
|
|
1438 { error("odd number (%d) of hex characters in hex string", n);
|
|
1439 stringbuffer.writeByte(v);
|
|
1440 }
|
|
1441 t->len = stringbuffer.offset;
|
|
1442 stringbuffer.writeByte(0);
|
|
1443 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
1444 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
1445 stringPostfix(t);
|
|
1446 return TOKstring;
|
|
1447
|
|
1448 default:
|
|
1449 if (c >= '0' && c <= '9')
|
|
1450 c -= '0';
|
|
1451 else if (c >= 'a' && c <= 'f')
|
|
1452 c -= 'a' - 10;
|
|
1453 else if (c >= 'A' && c <= 'F')
|
|
1454 c -= 'A' - 10;
|
|
1455 else if (c & 0x80)
|
|
1456 { p--;
|
|
1457 unsigned u = decodeUTF();
|
|
1458 p++;
|
|
1459 if (u == PS || u == LS)
|
|
1460 loc.linnum++;
|
|
1461 else
|
|
1462 error("non-hex character \\u%x", u);
|
|
1463 }
|
|
1464 else
|
|
1465 error("non-hex character '%c'", c);
|
|
1466 if (n & 1)
|
|
1467 { v = (v << 4) | c;
|
|
1468 stringbuffer.writeByte(v);
|
|
1469 }
|
|
1470 else
|
|
1471 v = c;
|
|
1472 n++;
|
|
1473 break;
|
|
1474 }
|
|
1475 }
|
|
1476 }
|
|
1477
|
|
1478
|
|
1479 #if V2
|
|
1480 /**************************************
|
|
1481 * Lex delimited strings:
|
|
1482 * q"(foo(xxx))" // "foo(xxx)"
|
|
1483 * q"[foo(]" // "foo("
|
|
1484 * q"/foo]/" // "foo]"
|
|
1485 * q"HERE
|
|
1486 * foo
|
|
1487 * HERE" // "foo\n"
|
|
1488 * Input:
|
|
1489 * p is on the "
|
|
1490 */
|
|
1491
|
|
1492 TOK Lexer::delimitedStringConstant(Token *t)
|
|
1493 { unsigned c;
|
|
1494 Loc start = loc;
|
|
1495 unsigned delimleft = 0;
|
|
1496 unsigned delimright = 0;
|
|
1497 unsigned nest = 1;
|
|
1498 unsigned nestcount;
|
|
1499 Identifier *hereid = NULL;
|
|
1500 unsigned blankrol = 0;
|
|
1501 unsigned startline = 0;
|
|
1502
|
|
1503 p++;
|
|
1504 stringbuffer.reset();
|
|
1505 while (1)
|
|
1506 {
|
|
1507 c = *p++;
|
|
1508 //printf("c = '%c'\n", c);
|
|
1509 switch (c)
|
|
1510 {
|
|
1511 case '\n':
|
|
1512 Lnextline:
|
|
1513 loc.linnum++;
|
|
1514 startline = 1;
|
|
1515 if (blankrol)
|
|
1516 { blankrol = 0;
|
|
1517 continue;
|
|
1518 }
|
|
1519 if (hereid)
|
|
1520 {
|
|
1521 stringbuffer.writeUTF8(c);
|
|
1522 continue;
|
|
1523 }
|
|
1524 break;
|
|
1525
|
|
1526 case '\r':
|
|
1527 if (*p == '\n')
|
|
1528 continue; // ignore
|
|
1529 c = '\n'; // treat EndOfLine as \n character
|
|
1530 goto Lnextline;
|
|
1531
|
|
1532 case 0:
|
|
1533 case 0x1A:
|
|
1534 goto Lerror;
|
|
1535
|
|
1536 default:
|
|
1537 if (c & 0x80)
|
|
1538 { p--;
|
|
1539 c = decodeUTF();
|
|
1540 p++;
|
|
1541 if (c == PS || c == LS)
|
|
1542 goto Lnextline;
|
|
1543 }
|
|
1544 break;
|
|
1545 }
|
|
1546 if (delimleft == 0)
|
|
1547 { delimleft = c;
|
|
1548 nest = 1;
|
|
1549 nestcount = 1;
|
|
1550 if (c == '(')
|
|
1551 delimright = ')';
|
|
1552 else if (c == '{')
|
|
1553 delimright = '}';
|
|
1554 else if (c == '[')
|
|
1555 delimright = ']';
|
|
1556 else if (c == '<')
|
|
1557 delimright = '>';
|
|
1558 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
|
|
1559 { // Start of identifier; must be a heredoc
|
|
1560 Token t;
|
|
1561 p--;
|
|
1562 scan(&t); // read in heredoc identifier
|
|
1563 if (t.value != TOKidentifier)
|
|
1564 { error("identifier expected for heredoc, not %s", t.toChars());
|
|
1565 delimright = c;
|
|
1566 }
|
|
1567 else
|
|
1568 { hereid = t.ident;
|
|
1569 //printf("hereid = '%s'\n", hereid->toChars());
|
|
1570 blankrol = 1;
|
|
1571 }
|
|
1572 nest = 0;
|
|
1573 }
|
|
1574 else
|
|
1575 { delimright = c;
|
|
1576 nest = 0;
|
|
1577 }
|
|
1578 }
|
|
1579 else
|
|
1580 {
|
|
1581 if (blankrol)
|
|
1582 { error("heredoc rest of line should be blank");
|
|
1583 blankrol = 0;
|
|
1584 continue;
|
|
1585 }
|
|
1586 if (nest == 1)
|
|
1587 {
|
|
1588 if (c == delimleft)
|
|
1589 nestcount++;
|
|
1590 else if (c == delimright)
|
|
1591 { nestcount--;
|
|
1592 if (nestcount == 0)
|
|
1593 goto Ldone;
|
|
1594 }
|
|
1595 }
|
|
1596 else if (c == delimright)
|
|
1597 goto Ldone;
|
|
1598 if (startline && isalpha(c))
|
|
1599 { Token t;
|
|
1600 unsigned char *psave = p;
|
|
1601 p--;
|
|
1602 scan(&t); // read in possible heredoc identifier
|
|
1603 //printf("endid = '%s'\n", t.ident->toChars());
|
|
1604 if (t.value == TOKidentifier && t.ident->equals(hereid))
|
|
1605 { /* should check that rest of line is blank
|
|
1606 */
|
|
1607 goto Ldone;
|
|
1608 }
|
|
1609 p = psave;
|
|
1610 }
|
|
1611 stringbuffer.writeUTF8(c);
|
|
1612 startline = 0;
|
|
1613 }
|
|
1614 }
|
|
1615
|
|
1616 Ldone:
|
|
1617 if (*p == '"')
|
|
1618 p++;
|
|
1619 else
|
|
1620 error("delimited string must end in %c\"", delimright);
|
|
1621 t->len = stringbuffer.offset;
|
|
1622 stringbuffer.writeByte(0);
|
|
1623 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
1624 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
1625 stringPostfix(t);
|
|
1626 return TOKstring;
|
|
1627
|
|
1628 Lerror:
|
|
1629 error("unterminated string constant starting at %s", start.toChars());
|
|
1630 t->ustring = (unsigned char *)"";
|
|
1631 t->len = 0;
|
|
1632 t->postfix = 0;
|
|
1633 return TOKstring;
|
|
1634 }
|
|
1635
|
|
1636 /**************************************
|
|
1637 * Lex delimited strings:
|
|
1638 * q{ foo(xxx) } // " foo(xxx) "
|
|
1639 * q{foo(} // "foo("
|
|
1640 * q{{foo}"}"} // "{foo}"}""
|
|
1641 * Input:
|
|
1642 * p is on the q
|
|
1643 */
|
|
1644
|
|
1645 TOK Lexer::tokenStringConstant(Token *t)
|
|
1646 {
|
|
1647 unsigned nest = 1;
|
|
1648 Loc start = loc;
|
|
1649 unsigned char *pstart = ++p;
|
|
1650
|
|
1651 while (1)
|
|
1652 { Token tok;
|
|
1653
|
|
1654 scan(&tok);
|
|
1655 switch (tok.value)
|
|
1656 {
|
|
1657 case TOKlcurly:
|
|
1658 nest++;
|
|
1659 continue;
|
|
1660
|
|
1661 case TOKrcurly:
|
|
1662 if (--nest == 0)
|
|
1663 goto Ldone;
|
|
1664 continue;
|
|
1665
|
|
1666 case TOKeof:
|
|
1667 goto Lerror;
|
|
1668
|
|
1669 default:
|
|
1670 continue;
|
|
1671 }
|
|
1672 }
|
|
1673
|
|
1674 Ldone:
|
|
1675 t->len = p - 1 - pstart;
|
|
1676 t->ustring = (unsigned char *)mem.malloc(t->len + 1);
|
|
1677 memcpy(t->ustring, pstart, t->len);
|
|
1678 t->ustring[t->len] = 0;
|
|
1679 stringPostfix(t);
|
|
1680 return TOKstring;
|
|
1681
|
|
1682 Lerror:
|
|
1683 error("unterminated token string constant starting at %s", start.toChars());
|
|
1684 t->ustring = (unsigned char *)"";
|
|
1685 t->len = 0;
|
|
1686 t->postfix = 0;
|
|
1687 return TOKstring;
|
|
1688 }
|
|
1689
|
|
1690 #endif
|
|
1691
|
|
1692
|
|
1693 /**************************************
|
|
1694 */
|
|
1695
|
|
1696 TOK Lexer::escapeStringConstant(Token *t, int wide)
|
|
1697 { unsigned c;
|
|
1698 Loc start = loc;
|
|
1699
|
|
1700 p++;
|
|
1701 stringbuffer.reset();
|
|
1702 while (1)
|
|
1703 {
|
|
1704 c = *p++;
|
|
1705 switch (c)
|
|
1706 {
|
|
1707 case '\\':
|
|
1708 switch (*p)
|
|
1709 {
|
|
1710 case 'u':
|
|
1711 case 'U':
|
|
1712 case '&':
|
|
1713 c = escapeSequence();
|
|
1714 stringbuffer.writeUTF8(c);
|
|
1715 continue;
|
|
1716
|
|
1717 default:
|
|
1718 c = escapeSequence();
|
|
1719 break;
|
|
1720 }
|
|
1721 break;
|
|
1722
|
|
1723 case '\n':
|
|
1724 loc.linnum++;
|
|
1725 break;
|
|
1726
|
|
1727 case '\r':
|
|
1728 if (*p == '\n')
|
|
1729 continue; // ignore
|
|
1730 c = '\n'; // treat EndOfLine as \n character
|
|
1731 loc.linnum++;
|
|
1732 break;
|
|
1733
|
|
1734 case '"':
|
|
1735 t->len = stringbuffer.offset;
|
|
1736 stringbuffer.writeByte(0);
|
|
1737 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
1738 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
1739 stringPostfix(t);
|
|
1740 return TOKstring;
|
|
1741
|
|
1742 case 0:
|
|
1743 case 0x1A:
|
|
1744 p--;
|
|
1745 error("unterminated string constant starting at %s", start.toChars());
|
|
1746 t->ustring = (unsigned char *)"";
|
|
1747 t->len = 0;
|
|
1748 t->postfix = 0;
|
|
1749 return TOKstring;
|
|
1750
|
|
1751 default:
|
|
1752 if (c & 0x80)
|
|
1753 {
|
|
1754 p--;
|
|
1755 c = decodeUTF();
|
|
1756 if (c == LS || c == PS)
|
|
1757 { c = '\n';
|
|
1758 loc.linnum++;
|
|
1759 }
|
|
1760 p++;
|
|
1761 stringbuffer.writeUTF8(c);
|
|
1762 continue;
|
|
1763 }
|
|
1764 break;
|
|
1765 }
|
|
1766 stringbuffer.writeByte(c);
|
|
1767 }
|
|
1768 }
|
|
1769
|
|
1770 /**************************************
|
|
1771 */
|
|
1772
|
|
1773 TOK Lexer::charConstant(Token *t, int wide)
|
|
1774 {
|
|
1775 unsigned c;
|
|
1776 TOK tk = TOKcharv;
|
|
1777
|
|
1778 //printf("Lexer::charConstant\n");
|
|
1779 p++;
|
|
1780 c = *p++;
|
|
1781 switch (c)
|
|
1782 {
|
|
1783 case '\\':
|
|
1784 switch (*p)
|
|
1785 {
|
|
1786 case 'u':
|
|
1787 t->uns64value = escapeSequence();
|
|
1788 tk = TOKwcharv;
|
|
1789 break;
|
|
1790
|
|
1791 case 'U':
|
|
1792 case '&':
|
|
1793 t->uns64value = escapeSequence();
|
|
1794 tk = TOKdcharv;
|
|
1795 break;
|
|
1796
|
|
1797 default:
|
|
1798 t->uns64value = escapeSequence();
|
|
1799 break;
|
|
1800 }
|
|
1801 break;
|
|
1802
|
|
1803 case '\n':
|
|
1804 L1:
|
|
1805 loc.linnum++;
|
|
1806 case '\r':
|
|
1807 case 0:
|
|
1808 case 0x1A:
|
|
1809 case '\'':
|
|
1810 error("unterminated character constant");
|
|
1811 return tk;
|
|
1812
|
|
1813 default:
|
|
1814 if (c & 0x80)
|
|
1815 {
|
|
1816 p--;
|
|
1817 c = decodeUTF();
|
|
1818 p++;
|
|
1819 if (c == LS || c == PS)
|
|
1820 goto L1;
|
|
1821 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
|
|
1822 tk = TOKwcharv;
|
|
1823 else
|
|
1824 tk = TOKdcharv;
|
|
1825 }
|
|
1826 t->uns64value = c;
|
|
1827 break;
|
|
1828 }
|
|
1829
|
|
1830 if (*p != '\'')
|
|
1831 { error("unterminated character constant");
|
|
1832 return tk;
|
|
1833 }
|
|
1834 p++;
|
|
1835 return tk;
|
|
1836 }
|
|
1837
|
|
1838 /***************************************
|
|
1839 * Get postfix of string literal.
|
|
1840 */
|
|
1841
|
|
1842 void Lexer::stringPostfix(Token *t)
|
|
1843 {
|
|
1844 switch (*p)
|
|
1845 {
|
|
1846 case 'c':
|
|
1847 case 'w':
|
|
1848 case 'd':
|
|
1849 t->postfix = *p;
|
|
1850 p++;
|
|
1851 break;
|
|
1852
|
|
1853 default:
|
|
1854 t->postfix = 0;
|
|
1855 break;
|
|
1856 }
|
|
1857 }
|
|
1858
|
|
1859 /***************************************
|
|
1860 * Read \u or \U unicode sequence
|
|
1861 * Input:
|
|
1862 * u 'u' or 'U'
|
|
1863 */
|
|
1864
|
|
1865 #if 0
|
|
1866 unsigned Lexer::wchar(unsigned u)
|
|
1867 {
|
|
1868 unsigned value;
|
|
1869 unsigned n;
|
|
1870 unsigned char c;
|
|
1871 unsigned nchars;
|
|
1872
|
|
1873 nchars = (u == 'U') ? 8 : 4;
|
|
1874 value = 0;
|
|
1875 for (n = 0; 1; n++)
|
|
1876 {
|
|
1877 ++p;
|
|
1878 if (n == nchars)
|
|
1879 break;
|
|
1880 c = *p;
|
|
1881 if (!ishex(c))
|
|
1882 { error("\\%c sequence must be followed by %d hex characters", u, nchars);
|
|
1883 break;
|
|
1884 }
|
|
1885 if (isdigit(c))
|
|
1886 c -= '0';
|
|
1887 else if (islower(c))
|
|
1888 c -= 'a' - 10;
|
|
1889 else
|
|
1890 c -= 'A' - 10;
|
|
1891 value <<= 4;
|
|
1892 value |= c;
|
|
1893 }
|
|
1894 return value;
|
|
1895 }
|
|
1896 #endif
|
|
1897
|
|
1898 /**************************************
|
|
1899 * Read in a number.
|
|
1900 * If it's an integer, store it in tok.TKutok.Vlong.
|
|
1901 * integers can be decimal, octal or hex
|
|
1902 * Handle the suffixes U, UL, LU, L, etc.
|
|
1903 * If it's double, store it in tok.TKutok.Vdouble.
|
|
1904 * Returns:
|
|
1905 * TKnum
|
|
1906 * TKdouble,...
|
|
1907 */
|
|
1908
|
|
1909 TOK Lexer::number(Token *t)
|
|
1910 {
|
|
1911 // We use a state machine to collect numbers
|
|
1912 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
|
|
1913 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
|
|
1914 STATE_hexh, STATE_error };
|
|
1915 enum STATE state;
|
|
1916
|
|
1917 enum FLAGS
|
|
1918 { FLAGS_decimal = 1, // decimal
|
|
1919 FLAGS_unsigned = 2, // u or U suffix
|
|
1920 FLAGS_long = 4, // l or L suffix
|
|
1921 };
|
|
1922 enum FLAGS flags = FLAGS_decimal;
|
|
1923
|
|
1924 int i;
|
|
1925 int base;
|
|
1926 unsigned c;
|
|
1927 unsigned char *start;
|
|
1928 TOK result;
|
|
1929
|
|
1930 //printf("Lexer::number()\n");
|
|
1931 state = STATE_initial;
|
|
1932 base = 0;
|
|
1933 stringbuffer.reset();
|
|
1934 start = p;
|
|
1935 while (1)
|
|
1936 {
|
|
1937 c = *p;
|
|
1938 switch (state)
|
|
1939 {
|
|
1940 case STATE_initial: // opening state
|
|
1941 if (c == '0')
|
|
1942 state = STATE_0;
|
|
1943 else
|
|
1944 state = STATE_decimal;
|
|
1945 break;
|
|
1946
|
|
1947 case STATE_0:
|
|
1948 flags = (FLAGS) (flags & ~FLAGS_decimal);
|
|
1949 switch (c)
|
|
1950 {
|
|
1951 #if ZEROH
|
|
1952 case 'H': // 0h
|
|
1953 case 'h':
|
|
1954 goto hexh;
|
|
1955 #endif
|
|
1956 case 'X':
|
|
1957 case 'x':
|
|
1958 state = STATE_hex0;
|
|
1959 break;
|
|
1960
|
|
1961 case '.':
|
|
1962 if (p[1] == '.') // .. is a separate token
|
|
1963 goto done;
|
|
1964 case 'i':
|
|
1965 case 'f':
|
|
1966 case 'F':
|
|
1967 goto real;
|
|
1968 #if ZEROH
|
|
1969 case 'E':
|
|
1970 case 'e':
|
|
1971 goto case_hex;
|
|
1972 #endif
|
|
1973 case 'B':
|
|
1974 case 'b':
|
|
1975 state = STATE_binary0;
|
|
1976 break;
|
|
1977
|
|
1978 case '0': case '1': case '2': case '3':
|
|
1979 case '4': case '5': case '6': case '7':
|
|
1980 state = STATE_octal;
|
|
1981 break;
|
|
1982
|
|
1983 #if ZEROH
|
|
1984 case '8': case '9': case 'A':
|
|
1985 case 'C': case 'D': case 'F':
|
|
1986 case 'a': case 'c': case 'd': case 'f':
|
|
1987 case_hex:
|
|
1988 state = STATE_hexh;
|
|
1989 break;
|
|
1990 #endif
|
|
1991 case '_':
|
|
1992 state = STATE_octal;
|
|
1993 p++;
|
|
1994 continue;
|
|
1995
|
|
1996 case 'L':
|
|
1997 if (p[1] == 'i')
|
|
1998 goto real;
|
|
1999 goto done;
|
|
2000
|
|
2001 default:
|
|
2002 goto done;
|
|
2003 }
|
|
2004 break;
|
|
2005
|
|
2006 case STATE_decimal: // reading decimal number
|
|
2007 if (!isdigit(c))
|
|
2008 {
|
|
2009 #if ZEROH
|
|
2010 if (ishex(c)
|
|
2011 || c == 'H' || c == 'h'
|
|
2012 )
|
|
2013 goto hexh;
|
|
2014 #endif
|
|
2015 if (c == '_') // ignore embedded _
|
|
2016 { p++;
|
|
2017 continue;
|
|
2018 }
|
|
2019 if (c == '.' && p[1] != '.')
|
|
2020 goto real;
|
|
2021 else if (c == 'i' || c == 'f' || c == 'F' ||
|
|
2022 c == 'e' || c == 'E')
|
|
2023 {
|
|
2024 real: // It's a real number. Back up and rescan as a real
|
|
2025 p = start;
|
|
2026 return inreal(t);
|
|
2027 }
|
|
2028 else if (c == 'L' && p[1] == 'i')
|
|
2029 goto real;
|
|
2030 goto done;
|
|
2031 }
|
|
2032 break;
|
|
2033
|
|
2034 case STATE_hex0: // reading hex number
|
|
2035 case STATE_hex:
|
|
2036 if (!ishex(c))
|
|
2037 {
|
|
2038 if (c == '_') // ignore embedded _
|
|
2039 { p++;
|
|
2040 continue;
|
|
2041 }
|
|
2042 if (c == '.' && p[1] != '.')
|
|
2043 goto real;
|
|
2044 if (c == 'P' || c == 'p' || c == 'i')
|
|
2045 goto real;
|
|
2046 if (state == STATE_hex0)
|
|
2047 error("Hex digit expected, not '%c'", c);
|
|
2048 goto done;
|
|
2049 }
|
|
2050 state = STATE_hex;
|
|
2051 break;
|
|
2052
|
|
2053 #if ZEROH
|
|
2054 hexh:
|
|
2055 state = STATE_hexh;
|
|
2056 case STATE_hexh: // parse numbers like 0FFh
|
|
2057 if (!ishex(c))
|
|
2058 {
|
|
2059 if (c == 'H' || c == 'h')
|
|
2060 {
|
|
2061 p++;
|
|
2062 base = 16;
|
|
2063 goto done;
|
|
2064 }
|
|
2065 else
|
|
2066 {
|
|
2067 // Check for something like 1E3 or 0E24
|
|
2068 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) ||
|
|
2069 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset))
|
|
2070 goto real;
|
|
2071 error("Hex digit expected, not '%c'", c);
|
|
2072 goto done;
|
|
2073 }
|
|
2074 }
|
|
2075 break;
|
|
2076 #endif
|
|
2077
|
|
2078 case STATE_octal: // reading octal number
|
|
2079 case STATE_octale: // reading octal number with non-octal digits
|
|
2080 if (!isoctal(c))
|
|
2081 {
|
|
2082 #if ZEROH
|
|
2083 if (ishex(c)
|
|
2084 || c == 'H' || c == 'h'
|
|
2085 )
|
|
2086 goto hexh;
|
|
2087 #endif
|
|
2088 if (c == '_') // ignore embedded _
|
|
2089 { p++;
|
|
2090 continue;
|
|
2091 }
|
|
2092 if (c == '.' && p[1] != '.')
|
|
2093 goto real;
|
|
2094 if (c == 'i')
|
|
2095 goto real;
|
|
2096 if (isdigit(c))
|
|
2097 {
|
|
2098 state = STATE_octale;
|
|
2099 }
|
|
2100 else
|
|
2101 goto done;
|
|
2102 }
|
|
2103 break;
|
|
2104
|
|
2105 case STATE_binary0: // starting binary number
|
|
2106 case STATE_binary: // reading binary number
|
|
2107 if (c != '0' && c != '1')
|
|
2108 {
|
|
2109 #if ZEROH
|
|
2110 if (ishex(c)
|
|
2111 || c == 'H' || c == 'h'
|
|
2112 )
|
|
2113 goto hexh;
|
|
2114 #endif
|
|
2115 if (c == '_') // ignore embedded _
|
|
2116 { p++;
|
|
2117 continue;
|
|
2118 }
|
|
2119 if (state == STATE_binary0)
|
|
2120 { error("binary digit expected");
|
|
2121 state = STATE_error;
|
|
2122 break;
|
|
2123 }
|
|
2124 else
|
|
2125 goto done;
|
|
2126 }
|
|
2127 state = STATE_binary;
|
|
2128 break;
|
|
2129
|
|
2130 case STATE_error: // for error recovery
|
|
2131 if (!isdigit(c)) // scan until non-digit
|
|
2132 goto done;
|
|
2133 break;
|
|
2134
|
|
2135 default:
|
|
2136 assert(0);
|
|
2137 }
|
|
2138 stringbuffer.writeByte(c);
|
|
2139 p++;
|
|
2140 }
|
|
2141 done:
|
|
2142 stringbuffer.writeByte(0); // terminate string
|
|
2143 if (state == STATE_octale)
|
|
2144 error("Octal digit expected");
|
|
2145
|
|
2146 uinteger_t n; // unsigned >=64 bit integer type
|
|
2147
|
|
2148 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0))
|
|
2149 n = stringbuffer.data[0] - '0';
|
|
2150 else
|
|
2151 {
|
|
2152 // Convert string to integer
|
|
2153 #if __DMC__
|
|
2154 errno = 0;
|
|
2155 n = strtoull((char *)stringbuffer.data,NULL,base);
|
|
2156 if (errno == ERANGE)
|
|
2157 error("integer overflow");
|
|
2158 #else
|
|
2159 // Not everybody implements strtoull()
|
|
2160 char *p = (char *)stringbuffer.data;
|
|
2161 int r = 10, d;
|
|
2162
|
|
2163 if (*p == '0')
|
|
2164 {
|
|
2165 if (p[1] == 'x' || p[1] == 'X')
|
|
2166 p += 2, r = 16;
|
|
2167 else if (p[1] == 'b' || p[1] == 'B')
|
|
2168 p += 2, r = 2;
|
|
2169 else if (isdigit(p[1]))
|
|
2170 p += 1, r = 8;
|
|
2171 }
|
|
2172
|
|
2173 n = 0;
|
|
2174 while (1)
|
|
2175 {
|
|
2176 if (*p >= '0' && *p <= '9')
|
|
2177 d = *p - '0';
|
|
2178 else if (*p >= 'a' && *p <= 'z')
|
|
2179 d = *p - 'a' + 10;
|
|
2180 else if (*p >= 'A' && *p <= 'Z')
|
|
2181 d = *p - 'A' + 10;
|
|
2182 else
|
|
2183 break;
|
|
2184 if (d >= r)
|
|
2185 break;
|
|
2186 if (n && n * r + d <= n)
|
|
2187 {
|
|
2188 error ("integer overflow");
|
|
2189 break;
|
|
2190 }
|
|
2191
|
|
2192 n = n * r + d;
|
|
2193 p++;
|
|
2194 }
|
|
2195 #endif
|
|
2196 if (sizeof(n) > 8 &&
|
|
2197 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits
|
|
2198 error("integer overflow");
|
|
2199 }
|
|
2200
|
|
2201 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
|
|
2202 while (1)
|
|
2203 { unsigned char f;
|
|
2204
|
|
2205 switch (*p)
|
|
2206 { case 'U':
|
|
2207 case 'u':
|
|
2208 f = FLAGS_unsigned;
|
|
2209 goto L1;
|
|
2210
|
|
2211 case 'l':
|
|
2212 if (1 || !global.params.useDeprecated)
|
|
2213 error("'l' suffix is deprecated, use 'L' instead");
|
|
2214 case 'L':
|
|
2215 f = FLAGS_long;
|
|
2216 L1:
|
|
2217 p++;
|
|
2218 if (flags & f)
|
|
2219 error("unrecognized token");
|
|
2220 flags = (FLAGS) (flags | f);
|
|
2221 continue;
|
|
2222 default:
|
|
2223 break;
|
|
2224 }
|
|
2225 break;
|
|
2226 }
|
|
2227
|
|
2228 switch (flags)
|
|
2229 {
|
|
2230 case 0:
|
|
2231 /* Octal or Hexadecimal constant.
|
|
2232 * First that fits: int, uint, long, ulong
|
|
2233 */
|
|
2234 if (n & 0x8000000000000000LL)
|
|
2235 result = TOKuns64v;
|
|
2236 else if (n & 0xFFFFFFFF00000000LL)
|
|
2237 result = TOKint64v;
|
|
2238 else if (n & 0x80000000)
|
|
2239 result = TOKuns32v;
|
|
2240 else
|
|
2241 result = TOKint32v;
|
|
2242 break;
|
|
2243
|
|
2244 case FLAGS_decimal:
|
|
2245 /* First that fits: int, long, long long
|
|
2246 */
|
|
2247 if (n & 0x8000000000000000LL)
|
|
2248 { error("signed integer overflow");
|
|
2249 result = TOKuns64v;
|
|
2250 }
|
|
2251 else if (n & 0xFFFFFFFF80000000LL)
|
|
2252 result = TOKint64v;
|
|
2253 else
|
|
2254 result = TOKint32v;
|
|
2255 break;
|
|
2256
|
|
2257 case FLAGS_unsigned:
|
|
2258 case FLAGS_decimal | FLAGS_unsigned:
|
|
2259 /* First that fits: uint, ulong
|
|
2260 */
|
|
2261 if (n & 0xFFFFFFFF00000000LL)
|
|
2262 result = TOKuns64v;
|
|
2263 else
|
|
2264 result = TOKuns32v;
|
|
2265 break;
|
|
2266
|
|
2267 case FLAGS_decimal | FLAGS_long:
|
|
2268 if (n & 0x8000000000000000LL)
|
|
2269 { error("signed integer overflow");
|
|
2270 result = TOKuns64v;
|
|
2271 }
|
|
2272 else
|
|
2273 result = TOKint64v;
|
|
2274 break;
|
|
2275
|
|
2276 case FLAGS_long:
|
|
2277 if (n & 0x8000000000000000LL)
|
|
2278 result = TOKuns64v;
|
|
2279 else
|
|
2280 result = TOKint64v;
|
|
2281 break;
|
|
2282
|
|
2283 case FLAGS_unsigned | FLAGS_long:
|
|
2284 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
|
|
2285 result = TOKuns64v;
|
|
2286 break;
|
|
2287
|
|
2288 default:
|
|
2289 #ifdef DEBUG
|
|
2290 printf("%x\n",flags);
|
|
2291 #endif
|
|
2292 assert(0);
|
|
2293 }
|
|
2294 t->uns64value = n;
|
|
2295 return result;
|
|
2296 }
|
|
2297
|
|
2298 /**************************************
|
|
2299 * Read in characters, converting them to real.
|
|
2300 * Bugs:
|
|
2301 * Exponent overflow not detected.
|
|
2302 * Too much requested precision is not detected.
|
|
2303 */
|
|
2304
|
|
2305 TOK Lexer::inreal(Token *t)
|
|
2306 #ifdef __DMC__
|
|
2307 __in
|
|
2308 {
|
|
2309 assert(*p == '.' || isdigit(*p));
|
|
2310 }
|
|
2311 __out (result)
|
|
2312 {
|
|
2313 switch (result)
|
|
2314 {
|
|
2315 case TOKfloat32v:
|
|
2316 case TOKfloat64v:
|
|
2317 case TOKfloat80v:
|
|
2318 case TOKimaginary32v:
|
|
2319 case TOKimaginary64v:
|
|
2320 case TOKimaginary80v:
|
|
2321 break;
|
|
2322
|
|
2323 default:
|
|
2324 assert(0);
|
|
2325 }
|
|
2326 }
|
|
2327 __body
|
|
2328 #endif /* __DMC__ */
|
|
2329 { int dblstate;
|
|
2330 unsigned c;
|
|
2331 char hex; // is this a hexadecimal-floating-constant?
|
|
2332 TOK result;
|
|
2333
|
|
2334 //printf("Lexer::inreal()\n");
|
|
2335 stringbuffer.reset();
|
|
2336 dblstate = 0;
|
|
2337 hex = 0;
|
|
2338 Lnext:
|
|
2339 while (1)
|
|
2340 {
|
|
2341 // Get next char from input
|
|
2342 c = *p++;
|
|
2343 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
|
|
2344 while (1)
|
|
2345 {
|
|
2346 switch (dblstate)
|
|
2347 {
|
|
2348 case 0: // opening state
|
|
2349 if (c == '0')
|
|
2350 dblstate = 9;
|
|
2351 else if (c == '.')
|
|
2352 dblstate = 3;
|
|
2353 else
|
|
2354 dblstate = 1;
|
|
2355 break;
|
|
2356
|
|
2357 case 9:
|
|
2358 dblstate = 1;
|
|
2359 if (c == 'X' || c == 'x')
|
|
2360 { hex++;
|
|
2361 break;
|
|
2362 }
|
|
2363 case 1: // digits to left of .
|
|
2364 case 3: // digits to right of .
|
|
2365 case 7: // continuing exponent digits
|
|
2366 if (!isdigit(c) && !(hex && isxdigit(c)))
|
|
2367 {
|
|
2368 if (c == '_')
|
|
2369 goto Lnext; // ignore embedded '_'
|
|
2370 dblstate++;
|
|
2371 continue;
|
|
2372 }
|
|
2373 break;
|
|
2374
|
|
2375 case 2: // no more digits to left of .
|
|
2376 if (c == '.')
|
|
2377 { dblstate++;
|
|
2378 break;
|
|
2379 }
|
|
2380 case 4: // no more digits to right of .
|
|
2381 if ((c == 'E' || c == 'e') ||
|
|
2382 hex && (c == 'P' || c == 'p'))
|
|
2383 { dblstate = 5;
|
|
2384 hex = 0; // exponent is always decimal
|
|
2385 break;
|
|
2386 }
|
|
2387 if (hex)
|
|
2388 error("binary-exponent-part required");
|
|
2389 goto done;
|
|
2390
|
|
2391 case 5: // looking immediately to right of E
|
|
2392 dblstate++;
|
|
2393 if (c == '-' || c == '+')
|
|
2394 break;
|
|
2395 case 6: // 1st exponent digit expected
|
|
2396 if (!isdigit(c))
|
|
2397 error("exponent expected");
|
|
2398 dblstate++;
|
|
2399 break;
|
|
2400
|
|
2401 case 8: // past end of exponent digits
|
|
2402 goto done;
|
|
2403 }
|
|
2404 break;
|
|
2405 }
|
|
2406 stringbuffer.writeByte(c);
|
|
2407 }
|
|
2408 done:
|
|
2409 p--;
|
|
2410
|
|
2411 stringbuffer.writeByte(0);
|
|
2412
|
|
2413 #if _WIN32 && __DMC__
|
|
2414 char *save = __locale_decpoint;
|
|
2415 __locale_decpoint = ".";
|
|
2416 #endif
|
|
2417 #ifdef IN_GCC
|
|
2418 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble);
|
|
2419 #else
|
|
2420 t->float80value = strtold((char *)stringbuffer.data, NULL);
|
|
2421 #endif
|
|
2422 errno = 0;
|
|
2423 switch (*p)
|
|
2424 {
|
|
2425 case 'F':
|
|
2426 case 'f':
|
|
2427 #ifdef IN_GCC
|
|
2428 real_t::parse((char *)stringbuffer.data, real_t::Float);
|
|
2429 #else
|
|
2430 strtof((char *)stringbuffer.data, NULL);
|
|
2431 #endif
|
|
2432 result = TOKfloat32v;
|
|
2433 p++;
|
|
2434 break;
|
|
2435
|
|
2436 default:
|
|
2437 #ifdef IN_GCC
|
|
2438 real_t::parse((char *)stringbuffer.data, real_t::Double);
|
|
2439 #else
|
|
2440 strtod((char *)stringbuffer.data, NULL);
|
|
2441 #endif
|
|
2442 result = TOKfloat64v;
|
|
2443 break;
|
|
2444
|
|
2445 case 'l':
|
|
2446 if (!global.params.useDeprecated)
|
|
2447 error("'l' suffix is deprecated, use 'L' instead");
|
|
2448 case 'L':
|
|
2449 result = TOKfloat80v;
|
|
2450 p++;
|
|
2451 break;
|
|
2452 }
|
|
2453 if (*p == 'i' || *p == 'I')
|
|
2454 {
|
|
2455 if (!global.params.useDeprecated && *p == 'I')
|
|
2456 error("'I' suffix is deprecated, use 'i' instead");
|
|
2457 p++;
|
|
2458 switch (result)
|
|
2459 {
|
|
2460 case TOKfloat32v:
|
|
2461 result = TOKimaginary32v;
|
|
2462 break;
|
|
2463 case TOKfloat64v:
|
|
2464 result = TOKimaginary64v;
|
|
2465 break;
|
|
2466 case TOKfloat80v:
|
|
2467 result = TOKimaginary80v;
|
|
2468 break;
|
|
2469 }
|
|
2470 }
|
|
2471 #if _WIN32 && __DMC__
|
|
2472 __locale_decpoint = save;
|
|
2473 #endif
|
|
2474 if (errno == ERANGE)
|
|
2475 error("number is not representable");
|
|
2476 return result;
|
|
2477 }
|
|
2478
|
|
2479 /*********************************************
|
|
2480 * Do pragma.
|
|
2481 * Currently, the only pragma supported is:
|
|
2482 * #line linnum [filespec]
|
|
2483 */
|
|
2484
|
|
2485 void Lexer::pragma()
|
|
2486 {
|
|
2487 Token tok;
|
|
2488 int linnum;
|
|
2489 char *filespec = NULL;
|
|
2490 Loc loc = this->loc;
|
|
2491
|
|
2492 scan(&tok);
|
|
2493 if (tok.value != TOKidentifier || tok.ident != Id::line)
|
|
2494 goto Lerr;
|
|
2495
|
|
2496 scan(&tok);
|
|
2497 if (tok.value == TOKint32v || tok.value == TOKint64v)
|
|
2498 linnum = tok.uns64value - 1;
|
|
2499 else
|
|
2500 goto Lerr;
|
|
2501
|
|
2502 while (1)
|
|
2503 {
|
|
2504 switch (*p)
|
|
2505 {
|
|
2506 case 0:
|
|
2507 case 0x1A:
|
|
2508 case '\n':
|
|
2509 Lnewline:
|
|
2510 this->loc.linnum = linnum;
|
|
2511 if (filespec)
|
|
2512 this->loc.filename = filespec;
|
|
2513 return;
|
|
2514
|
|
2515 case '\r':
|
|
2516 p++;
|
|
2517 if (*p != '\n')
|
|
2518 { p--;
|
|
2519 goto Lnewline;
|
|
2520 }
|
|
2521 continue;
|
|
2522
|
|
2523 case ' ':
|
|
2524 case '\t':
|
|
2525 case '\v':
|
|
2526 case '\f':
|
|
2527 p++;
|
|
2528 continue; // skip white space
|
|
2529
|
|
2530 case '_':
|
|
2531 if (mod && memcmp(p, "__FILE__", 8) == 0)
|
|
2532 {
|
|
2533 p += 8;
|
|
2534 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars());
|
|
2535 }
|
|
2536 continue;
|
|
2537
|
|
2538 case '"':
|
|
2539 if (filespec)
|
|
2540 goto Lerr;
|
|
2541 stringbuffer.reset();
|
|
2542 p++;
|
|
2543 while (1)
|
|
2544 { unsigned c;
|
|
2545
|
|
2546 c = *p;
|
|
2547 switch (c)
|
|
2548 {
|
|
2549 case '\n':
|
|
2550 case '\r':
|
|
2551 case 0:
|
|
2552 case 0x1A:
|
|
2553 goto Lerr;
|
|
2554
|
|
2555 case '"':
|
|
2556 stringbuffer.writeByte(0);
|
|
2557 filespec = mem.strdup((char *)stringbuffer.data);
|
|
2558 p++;
|
|
2559 break;
|
|
2560
|
|
2561 default:
|
|
2562 if (c & 0x80)
|
|
2563 { unsigned u = decodeUTF();
|
|
2564 if (u == PS || u == LS)
|
|
2565 goto Lerr;
|
|
2566 }
|
|
2567 stringbuffer.writeByte(c);
|
|
2568 p++;
|
|
2569 continue;
|
|
2570 }
|
|
2571 break;
|
|
2572 }
|
|
2573 continue;
|
|
2574
|
|
2575 default:
|
|
2576 if (*p & 0x80)
|
|
2577 { unsigned u = decodeUTF();
|
|
2578 if (u == PS || u == LS)
|
|
2579 goto Lnewline;
|
|
2580 }
|
|
2581 goto Lerr;
|
|
2582 }
|
|
2583 }
|
|
2584
|
|
2585 Lerr:
|
|
2586 error(loc, "#line integer [\"filespec\"]\\n expected");
|
|
2587 }
|
|
2588
|
|
2589
|
|
2590 /********************************************
|
|
2591 * Decode UTF character.
|
|
2592 * Issue error messages for invalid sequences.
|
|
2593 * Return decoded character, advance p to last character in UTF sequence.
|
|
2594 */
|
|
2595
|
|
2596 unsigned Lexer::decodeUTF()
|
|
2597 {
|
|
2598 dchar_t u;
|
|
2599 unsigned char c;
|
|
2600 unsigned char *s = p;
|
|
2601 size_t len;
|
|
2602 size_t idx;
|
|
2603 char *msg;
|
|
2604
|
|
2605 c = *s;
|
|
2606 assert(c & 0x80);
|
|
2607
|
|
2608 // Check length of remaining string up to 6 UTF-8 characters
|
|
2609 for (len = 1; len < 6 && s[len]; len++)
|
|
2610 ;
|
|
2611
|
|
2612 idx = 0;
|
|
2613 msg = utf_decodeChar(s, len, &idx, &u);
|
|
2614 p += idx - 1;
|
|
2615 if (msg)
|
|
2616 {
|
|
2617 error("%s", msg);
|
|
2618 }
|
|
2619 return u;
|
|
2620 }
|
|
2621
|
|
2622
|
|
2623 /***************************************************
|
|
2624 * Parse doc comment embedded between t->ptr and p.
|
|
2625 * Remove trailing blanks and tabs from lines.
|
|
2626 * Replace all newlines with \n.
|
|
2627 * Remove leading comment character from each line.
|
|
2628 * Decide if it's a lineComment or a blockComment.
|
|
2629 * Append to previous one for this token.
|
|
2630 */
|
|
2631
|
|
2632 void Lexer::getDocComment(Token *t, unsigned lineComment)
|
|
2633 {
|
|
2634 OutBuffer buf;
|
|
2635 unsigned char ct = t->ptr[2];
|
|
2636 unsigned char *q = t->ptr + 3; // start of comment text
|
|
2637 int linestart = 0;
|
|
2638
|
|
2639 unsigned char *qend = p;
|
|
2640 if (ct == '*' || ct == '+')
|
|
2641 qend -= 2;
|
|
2642
|
|
2643 /* Scan over initial row of ****'s or ++++'s or ////'s
|
|
2644 */
|
|
2645 for (; q < qend; q++)
|
|
2646 {
|
|
2647 if (*q != ct)
|
|
2648 break;
|
|
2649 }
|
|
2650
|
|
2651 /* Remove trailing row of ****'s or ++++'s
|
|
2652 */
|
|
2653 if (ct != '/')
|
|
2654 {
|
|
2655 for (; q < qend; qend--)
|
|
2656 {
|
|
2657 if (qend[-1] != ct)
|
|
2658 break;
|
|
2659 }
|
|
2660 }
|
|
2661
|
|
2662 for (; q < qend; q++)
|
|
2663 {
|
|
2664 unsigned char c = *q;
|
|
2665
|
|
2666 switch (c)
|
|
2667 {
|
|
2668 case '*':
|
|
2669 case '+':
|
|
2670 if (linestart && c == ct)
|
|
2671 { linestart = 0;
|
|
2672 /* Trim preceding whitespace up to preceding \n
|
|
2673 */
|
|
2674 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
|
|
2675 buf.offset--;
|
|
2676 continue;
|
|
2677 }
|
|
2678 break;
|
|
2679
|
|
2680 case ' ':
|
|
2681 case '\t':
|
|
2682 break;
|
|
2683
|
|
2684 case '\r':
|
|
2685 if (q[1] == '\n')
|
|
2686 continue; // skip the \r
|
|
2687 goto Lnewline;
|
|
2688
|
|
2689 default:
|
|
2690 if (c == 226)
|
|
2691 {
|
|
2692 // If LS or PS
|
|
2693 if (q[1] == 128 &&
|
|
2694 (q[2] == 168 || q[2] == 169))
|
|
2695 {
|
|
2696 q += 2;
|
|
2697 goto Lnewline;
|
|
2698 }
|
|
2699 }
|
|
2700 linestart = 0;
|
|
2701 break;
|
|
2702
|
|
2703 Lnewline:
|
|
2704 c = '\n'; // replace all newlines with \n
|
|
2705 case '\n':
|
|
2706 linestart = 1;
|
|
2707
|
|
2708 /* Trim trailing whitespace
|
|
2709 */
|
|
2710 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
|
|
2711 buf.offset--;
|
|
2712
|
|
2713 break;
|
|
2714 }
|
|
2715 buf.writeByte(c);
|
|
2716 }
|
|
2717
|
|
2718 // Always end with a newline
|
|
2719 if (!buf.offset || buf.data[buf.offset - 1] != '\n')
|
|
2720 buf.writeByte('\n');
|
|
2721
|
|
2722 buf.writeByte(0);
|
|
2723
|
|
2724 // It's a line comment if the start of the doc comment comes
|
|
2725 // after other non-whitespace on the same line.
|
|
2726 unsigned char** dc = (lineComment && anyToken)
|
|
2727 ? &t->lineComment
|
|
2728 : &t->blockComment;
|
|
2729
|
|
2730 // Combine with previous doc comment, if any
|
|
2731 if (*dc)
|
|
2732 *dc = combineComments(*dc, (unsigned char *)buf.data);
|
|
2733 else
|
|
2734 *dc = (unsigned char *)buf.extractData();
|
|
2735 }
|
|
2736
|
|
2737 /********************************************
|
|
2738 * Combine two document comments into one.
|
|
2739 */
|
|
2740
|
|
2741 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2)
|
|
2742 {
|
|
2743 unsigned char *c = c2;
|
|
2744
|
|
2745 if (c1)
|
|
2746 { c = c1;
|
|
2747 if (c2)
|
|
2748 { size_t len1 = strlen((char *)c1);
|
|
2749 size_t len2 = strlen((char *)c2);
|
|
2750
|
|
2751 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1);
|
|
2752 memcpy(c, c1, len1);
|
|
2753 c[len1] = '\n';
|
|
2754 memcpy(c + len1 + 1, c2, len2);
|
|
2755 c[len1 + 1 + len2] = 0;
|
|
2756 }
|
|
2757 }
|
|
2758 return c;
|
|
2759 }
|
|
2760
|
|
2761 /********************************************
|
|
2762 * Create an identifier in the string table.
|
|
2763 */
|
|
2764
|
|
2765 Identifier *Lexer::idPool(const char *s)
|
|
2766 {
|
|
2767 size_t len = strlen(s);
|
|
2768 StringValue *sv = stringtable.update(s, len);
|
|
2769 Identifier *id = (Identifier *) sv->ptrvalue;
|
|
2770 if (!id)
|
|
2771 {
|
|
2772 id = new Identifier(sv->lstring.string, TOKidentifier);
|
|
2773 sv->ptrvalue = id;
|
|
2774 }
|
|
2775 return id;
|
|
2776 }
|
|
2777
|
|
2778 /*********************************************
|
|
2779 * Create a unique identifier using the prefix s.
|
|
2780 */
|
|
2781
|
|
2782 Identifier *Lexer::uniqueId(const char *s, int num)
|
|
2783 { char buffer[32];
|
|
2784 size_t slen = strlen(s);
|
|
2785
|
|
2786 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer));
|
|
2787 sprintf(buffer, "%s%d", s, num);
|
|
2788 return idPool(buffer);
|
|
2789 }
|
|
2790
|
|
2791 Identifier *Lexer::uniqueId(const char *s)
|
|
2792 {
|
|
2793 static int num;
|
|
2794 return uniqueId(s, ++num);
|
|
2795 }
|
|
2796
|
|
2797 /****************************************
|
|
2798 */
|
|
2799
|
|
2800 struct Keyword
|
|
2801 { char *name;
|
|
2802 enum TOK value;
|
|
2803 };
|
|
2804
|
|
2805 static Keyword keywords[] =
|
|
2806 {
|
|
2807 // { "", TOK },
|
|
2808
|
|
2809 { "this", TOKthis },
|
|
2810 { "super", TOKsuper },
|
|
2811 { "assert", TOKassert },
|
|
2812 { "null", TOKnull },
|
|
2813 { "true", TOKtrue },
|
|
2814 { "false", TOKfalse },
|
|
2815 { "cast", TOKcast },
|
|
2816 { "new", TOKnew },
|
|
2817 { "delete", TOKdelete },
|
|
2818 { "throw", TOKthrow },
|
|
2819 { "module", TOKmodule },
|
|
2820 { "pragma", TOKpragma },
|
|
2821 { "typeof", TOKtypeof },
|
|
2822 { "typeid", TOKtypeid },
|
|
2823
|
|
2824 { "template", TOKtemplate },
|
|
2825
|
|
2826 { "void", TOKvoid },
|
|
2827 { "byte", TOKint8 },
|
|
2828 { "ubyte", TOKuns8 },
|
|
2829 { "short", TOKint16 },
|
|
2830 { "ushort", TOKuns16 },
|
|
2831 { "int", TOKint32 },
|
|
2832 { "uint", TOKuns32 },
|
|
2833 { "long", TOKint64 },
|
|
2834 { "ulong", TOKuns64 },
|
|
2835 { "cent", TOKcent, },
|
|
2836 { "ucent", TOKucent, },
|
|
2837 { "float", TOKfloat32 },
|
|
2838 { "double", TOKfloat64 },
|
|
2839 { "real", TOKfloat80 },
|
|
2840
|
|
2841 { "bool", TOKbool },
|
|
2842 { "char", TOKchar },
|
|
2843 { "wchar", TOKwchar },
|
|
2844 { "dchar", TOKdchar },
|
|
2845
|
|
2846 { "ifloat", TOKimaginary32 },
|
|
2847 { "idouble", TOKimaginary64 },
|
|
2848 { "ireal", TOKimaginary80 },
|
|
2849
|
|
2850 { "cfloat", TOKcomplex32 },
|
|
2851 { "cdouble", TOKcomplex64 },
|
|
2852 { "creal", TOKcomplex80 },
|
|
2853
|
|
2854 { "delegate", TOKdelegate },
|
|
2855 { "function", TOKfunction },
|
|
2856
|
|
2857 { "is", TOKis },
|
|
2858 { "if", TOKif },
|
|
2859 { "else", TOKelse },
|
|
2860 { "while", TOKwhile },
|
|
2861 { "for", TOKfor },
|
|
2862 { "do", TOKdo },
|
|
2863 { "switch", TOKswitch },
|
|
2864 { "case", TOKcase },
|
|
2865 { "default", TOKdefault },
|
|
2866 { "break", TOKbreak },
|
|
2867 { "continue", TOKcontinue },
|
|
2868 { "synchronized", TOKsynchronized },
|
|
2869 { "return", TOKreturn },
|
|
2870 { "goto", TOKgoto },
|
|
2871 { "try", TOKtry },
|
|
2872 { "catch", TOKcatch },
|
|
2873 { "finally", TOKfinally },
|
|
2874 { "with", TOKwith },
|
|
2875 { "asm", TOKasm },
|
|
2876 { "foreach", TOKforeach },
|
|
2877 { "foreach_reverse", TOKforeach_reverse },
|
|
2878 { "scope", TOKscope },
|
|
2879
|
|
2880 { "struct", TOKstruct },
|
|
2881 { "class", TOKclass },
|
|
2882 { "interface", TOKinterface },
|
|
2883 { "union", TOKunion },
|
|
2884 { "enum", TOKenum },
|
|
2885 { "import", TOKimport },
|
|
2886 { "mixin", TOKmixin },
|
|
2887 { "static", TOKstatic },
|
|
2888 { "final", TOKfinal },
|
|
2889 { "const", TOKconst },
|
|
2890 { "typedef", TOKtypedef },
|
|
2891 { "alias", TOKalias },
|
|
2892 { "override", TOKoverride },
|
|
2893 { "abstract", TOKabstract },
|
|
2894 { "volatile", TOKvolatile },
|
|
2895 { "debug", TOKdebug },
|
|
2896 { "deprecated", TOKdeprecated },
|
|
2897 { "in", TOKin },
|
|
2898 { "out", TOKout },
|
|
2899 { "inout", TOKinout },
|
|
2900 { "lazy", TOKlazy },
|
|
2901 { "auto", TOKauto },
|
|
2902
|
|
2903 { "align", TOKalign },
|
|
2904 { "extern", TOKextern },
|
|
2905 { "private", TOKprivate },
|
|
2906 { "package", TOKpackage },
|
|
2907 { "protected", TOKprotected },
|
|
2908 { "public", TOKpublic },
|
|
2909 { "export", TOKexport },
|
|
2910
|
|
2911 { "body", TOKbody },
|
|
2912 { "invariant", TOKinvariant },
|
|
2913 { "unittest", TOKunittest },
|
|
2914 { "version", TOKversion },
|
|
2915 //{ "manifest", TOKmanifest },
|
|
2916
|
|
2917 // Added after 1.0
|
|
2918 { "ref", TOKref },
|
|
2919 { "macro", TOKmacro },
|
|
2920 #if V2
|
|
2921 { "pure", TOKpure },
|
|
2922 { "nothrow", TOKnothrow },
|
|
2923 { "__traits", TOKtraits },
|
|
2924 { "__overloadset", TOKoverloadset },
|
|
2925 #endif
|
|
2926 };
|
|
2927
|
|
2928 int Token::isKeyword()
|
|
2929 {
|
|
2930 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)
|
|
2931 {
|
|
2932 if (keywords[u].value == value)
|
|
2933 return 1;
|
|
2934 }
|
|
2935 return 0;
|
|
2936 }
|
|
2937
|
|
2938 void Lexer::initKeywords()
|
|
2939 { StringValue *sv;
|
|
2940 unsigned u;
|
|
2941 enum TOK v;
|
|
2942 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]);
|
|
2943
|
|
2944 if (global.params.Dversion == 1)
|
|
2945 nkeywords -= 2;
|
|
2946
|
|
2947 cmtable_init();
|
|
2948
|
|
2949 for (u = 0; u < nkeywords; u++)
|
|
2950 { char *s;
|
|
2951
|
|
2952 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
|
|
2953 s = keywords[u].name;
|
|
2954 v = keywords[u].value;
|
|
2955 sv = stringtable.insert(s, strlen(s));
|
|
2956 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v);
|
|
2957
|
|
2958 //printf("tochars[%d] = '%s'\n",v, s);
|
|
2959 Token::tochars[v] = s;
|
|
2960 }
|
|
2961
|
|
2962 Token::tochars[TOKeof] = "EOF";
|
|
2963 Token::tochars[TOKlcurly] = "{";
|
|
2964 Token::tochars[TOKrcurly] = "}";
|
|
2965 Token::tochars[TOKlparen] = "(";
|
|
2966 Token::tochars[TOKrparen] = ")";
|
|
2967 Token::tochars[TOKlbracket] = "[";
|
|
2968 Token::tochars[TOKrbracket] = "]";
|
|
2969 Token::tochars[TOKsemicolon] = ";";
|
|
2970 Token::tochars[TOKcolon] = ":";
|
|
2971 Token::tochars[TOKcomma] = ",";
|
|
2972 Token::tochars[TOKdot] = ".";
|
|
2973 Token::tochars[TOKxor] = "^";
|
|
2974 Token::tochars[TOKxorass] = "^=";
|
|
2975 Token::tochars[TOKassign] = "=";
|
|
2976 Token::tochars[TOKconstruct] = "=";
|
|
2977 #if V2
|
|
2978 Token::tochars[TOKblit] = "=";
|
|
2979 #endif
|
|
2980 Token::tochars[TOKlt] = "<";
|
|
2981 Token::tochars[TOKgt] = ">";
|
|
2982 Token::tochars[TOKle] = "<=";
|
|
2983 Token::tochars[TOKge] = ">=";
|
|
2984 Token::tochars[TOKequal] = "==";
|
|
2985 Token::tochars[TOKnotequal] = "!=";
|
|
2986 Token::tochars[TOKnotidentity] = "!is";
|
|
2987 Token::tochars[TOKtobool] = "!!";
|
|
2988
|
|
2989 Token::tochars[TOKunord] = "!<>=";
|
|
2990 Token::tochars[TOKue] = "!<>";
|
|
2991 Token::tochars[TOKlg] = "<>";
|
|
2992 Token::tochars[TOKleg] = "<>=";
|
|
2993 Token::tochars[TOKule] = "!>";
|
|
2994 Token::tochars[TOKul] = "!>=";
|
|
2995 Token::tochars[TOKuge] = "!<";
|
|
2996 Token::tochars[TOKug] = "!<=";
|
|
2997
|
|
2998 Token::tochars[TOKnot] = "!";
|
|
2999 Token::tochars[TOKtobool] = "!!";
|
|
3000 Token::tochars[TOKshl] = "<<";
|
|
3001 Token::tochars[TOKshr] = ">>";
|
|
3002 Token::tochars[TOKushr] = ">>>";
|
|
3003 Token::tochars[TOKadd] = "+";
|
|
3004 Token::tochars[TOKmin] = "-";
|
|
3005 Token::tochars[TOKmul] = "*";
|
|
3006 Token::tochars[TOKdiv] = "/";
|
|
3007 Token::tochars[TOKmod] = "%";
|
|
3008 Token::tochars[TOKslice] = "..";
|
|
3009 Token::tochars[TOKdotdotdot] = "...";
|
|
3010 Token::tochars[TOKand] = "&";
|
|
3011 Token::tochars[TOKandand] = "&&";
|
|
3012 Token::tochars[TOKor] = "|";
|
|
3013 Token::tochars[TOKoror] = "||";
|
|
3014 Token::tochars[TOKarray] = "[]";
|
|
3015 Token::tochars[TOKindex] = "[i]";
|
|
3016 Token::tochars[TOKaddress] = "&";
|
|
3017 Token::tochars[TOKstar] = "*";
|
|
3018 Token::tochars[TOKtilde] = "~";
|
|
3019 Token::tochars[TOKdollar] = "$";
|
|
3020 Token::tochars[TOKcast] = "cast";
|
|
3021 Token::tochars[TOKplusplus] = "++";
|
|
3022 Token::tochars[TOKminusminus] = "--";
|
|
3023 Token::tochars[TOKtype] = "type";
|
|
3024 Token::tochars[TOKquestion] = "?";
|
|
3025 Token::tochars[TOKneg] = "-";
|
|
3026 Token::tochars[TOKuadd] = "+";
|
|
3027 Token::tochars[TOKvar] = "var";
|
|
3028 Token::tochars[TOKaddass] = "+=";
|
|
3029 Token::tochars[TOKminass] = "-=";
|
|
3030 Token::tochars[TOKmulass] = "*=";
|
|
3031 Token::tochars[TOKdivass] = "/=";
|
|
3032 Token::tochars[TOKmodass] = "%=";
|
|
3033 Token::tochars[TOKshlass] = "<<=";
|
|
3034 Token::tochars[TOKshrass] = ">>=";
|
|
3035 Token::tochars[TOKushrass] = ">>>=";
|
|
3036 Token::tochars[TOKandass] = "&=";
|
|
3037 Token::tochars[TOKorass] = "|=";
|
|
3038 Token::tochars[TOKcatass] = "~=";
|
|
3039 Token::tochars[TOKcat] = "~";
|
|
3040 Token::tochars[TOKcall] = "call";
|
|
3041 Token::tochars[TOKidentity] = "is";
|
|
3042 Token::tochars[TOKnotidentity] = "!is";
|
|
3043
|
|
3044 Token::tochars[TOKorass] = "|=";
|
|
3045 Token::tochars[TOKidentifier] = "identifier";
|
|
3046
|
|
3047 // For debugging
|
|
3048 Token::tochars[TOKdotexp] = "dotexp";
|
|
3049 Token::tochars[TOKdotti] = "dotti";
|
|
3050 Token::tochars[TOKdotvar] = "dotvar";
|
|
3051 Token::tochars[TOKdottype] = "dottype";
|
|
3052 Token::tochars[TOKsymoff] = "symoff";
|
|
3053 Token::tochars[TOKtypedot] = "typedot";
|
|
3054 Token::tochars[TOKarraylength] = "arraylength";
|
|
3055 Token::tochars[TOKarrayliteral] = "arrayliteral";
|
|
3056 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral";
|
|
3057 Token::tochars[TOKstructliteral] = "structliteral";
|
|
3058 Token::tochars[TOKstring] = "string";
|
|
3059 Token::tochars[TOKdsymbol] = "symbol";
|
|
3060 Token::tochars[TOKtuple] = "tuple";
|
|
3061 Token::tochars[TOKdeclaration] = "declaration";
|
|
3062 Token::tochars[TOKdottd] = "dottd";
|
|
3063 Token::tochars[TOKon_scope_exit] = "scope(exit)";
|
|
3064 }
|