Mercurial > projects > ldc
annotate dmd/lexer.c @ 1650:40bd4a0d4870
Update to work with LLVM 2.7.
Removed use of dyn_cast, llvm no compiles
without exceptions and rtti by
default. We do need exceptions for the libconfig stuff, but rtti isn't
necessary (anymore).
Debug info needs to be rewritten, as in LLVM 2.7 the format has
completely changed. To have something to look at while rewriting, the
old code has been wrapped inside #ifndef DISABLE_DEBUG_INFO , this means
that you have to define this to compile at the moment.
Updated tango 0.99.9 patch to include updated EH runtime code, which is
needed for LLVM 2.7 as well.
author | Tomas Lindquist Olsen |
---|---|
date | Wed, 19 May 2010 12:42:32 +0200 |
parents | dbf7b54f542f |
children |
rev | line source |
---|---|
159 | 1 |
2 // Compiler implementation of the D programming language | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
3 // Copyright (c) 1999-2009 by Digital Mars |
159 | 4 // All Rights Reserved |
5 // written by Walter Bright | |
6 // http://www.digitalmars.com | |
7 // License for redistribution is by either the Artistic License | |
8 // in artistic.txt, or the GNU General Public License in gnu.txt. | |
9 // See the included readme.txt for details. | |
10 | |
1431
5f6f0929ee4c
Define __C99FEATURES__ in lexer.c for Solaris. Fixes #313.
Christian Kamm <kamm incasoftware de>
parents:
1367
diff
changeset
|
11 #if __sun && __SVR4 |
5f6f0929ee4c
Define __C99FEATURES__ in lexer.c for Solaris. Fixes #313.
Christian Kamm <kamm incasoftware de>
parents:
1367
diff
changeset
|
12 #define __C99FEATURES__ 1 // Needed on Solaris for NaN and more, LDC#313 |
5f6f0929ee4c
Define __C99FEATURES__ in lexer.c for Solaris. Fixes #313.
Christian Kamm <kamm incasoftware de>
parents:
1367
diff
changeset
|
13 #endif |
5f6f0929ee4c
Define __C99FEATURES__ in lexer.c for Solaris. Fixes #313.
Christian Kamm <kamm incasoftware de>
parents:
1367
diff
changeset
|
14 |
1228
79758fd2f48a
Added Doxygen file.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1195
diff
changeset
|
15 #if IN_LLVM |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
16 #include <cmath> |
1228
79758fd2f48a
Added Doxygen file.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1195
diff
changeset
|
17 #endif |
872
aa953cc960b6
Apply BlueZeniX's patch for OpenSolaris compatibility. Fixes #158.
Christian Kamm <kamm incasoftware de>
parents:
846
diff
changeset
|
18 |
159 | 19 /* Lexical Analyzer */ |
20 | |
21 #include <stdio.h> | |
22 #include <string.h> | |
23 #include <ctype.h> | |
24 #include <stdarg.h> | |
25 #include <errno.h> | |
26 #include <wchar.h> | |
27 #include <stdlib.h> | |
28 #include <assert.h> | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
29 #include <time.h> // for time() and ctime() |
159 | 30 |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
872
diff
changeset
|
31 #include "rmem.h" |
159 | 32 |
33 #include "stringtable.h" | |
34 | |
35 #include "lexer.h" | |
36 #include "utf.h" | |
37 #include "identifier.h" | |
38 #include "id.h" | |
39 #include "module.h" | |
40 | |
41 #if _WIN32 && __DMC__ | |
42 // from \dm\src\include\setlocal.h | |
43 extern "C" char * __cdecl __locale_decpoint; | |
44 #endif | |
45 | |
46 extern int HtmlNamedEntity(unsigned char *p, int length); | |
47 | |
48 #define LS 0x2028 // UTF line separator | |
49 #define PS 0x2029 // UTF paragraph separator | |
50 | |
51 /******************************************** | |
52 * Do our own char maps | |
53 */ | |
54 | |
55 static unsigned char cmtable[256]; | |
56 | |
57 const int CMoctal = 0x1; | |
58 const int CMhex = 0x2; | |
59 const int CMidchar = 0x4; | |
60 | |
61 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; } | |
62 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; } | |
63 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; } | |
64 | |
65 static void cmtable_init() | |
66 { | |
67 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++) | |
68 { | |
69 if ('0' <= c && c <= '7') | |
70 cmtable[c] |= CMoctal; | |
71 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) | |
72 cmtable[c] |= CMhex; | |
73 if (isalnum(c) || c == '_') | |
74 cmtable[c] |= CMidchar; | |
75 } | |
76 } | |
77 | |
78 | |
79 /************************* Token **********************************************/ | |
80 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
81 const char *Token::tochars[TOKMAX]; |
159 | 82 |
83 void *Token::operator new(size_t size) | |
84 { Token *t; | |
85 | |
86 if (Lexer::freelist) | |
87 { | |
88 t = Lexer::freelist; | |
89 Lexer::freelist = t->next; | |
90 return t; | |
91 } | |
92 | |
93 return ::operator new(size); | |
94 } | |
95 | |
96 #ifdef DEBUG | |
97 void Token::print() | |
98 { | |
99 fprintf(stdmsg, "%s\n", toChars()); | |
100 } | |
101 #endif | |
102 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
103 const char *Token::toChars() |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
104 { const char *p; |
159 | 105 static char buffer[3 + 3 * sizeof(value) + 1]; |
106 | |
107 p = buffer; | |
108 switch (value) | |
109 { | |
110 case TOKint32v: | |
111 sprintf(buffer,"%d",(d_int32)int64value); | |
112 break; | |
113 | |
114 case TOKuns32v: | |
115 case TOKcharv: | |
116 case TOKwcharv: | |
117 case TOKdcharv: | |
118 sprintf(buffer,"%uU",(d_uns32)uns64value); | |
119 break; | |
120 | |
121 case TOKint64v: | |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
872
diff
changeset
|
122 sprintf(buffer,"%jdL",int64value); |
159 | 123 break; |
124 | |
125 case TOKuns64v: | |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
872
diff
changeset
|
126 sprintf(buffer,"%juUL",uns64value); |
159 | 127 break; |
128 | |
129 #if IN_GCC | |
130 case TOKfloat32v: | |
131 case TOKfloat64v: | |
132 case TOKfloat80v: | |
133 float80value.format(buffer, sizeof(buffer)); | |
134 break; | |
135 case TOKimaginary32v: | |
136 case TOKimaginary64v: | |
137 case TOKimaginary80v: | |
138 float80value.format(buffer, sizeof(buffer)); | |
139 // %% buffer | |
140 strcat(buffer, "i"); | |
141 break; | |
142 #else | |
143 case TOKfloat32v: | |
144 sprintf(buffer,"%Lgf", float80value); | |
145 break; | |
146 | |
147 case TOKfloat64v: | |
148 sprintf(buffer,"%Lg", float80value); | |
149 break; | |
150 | |
151 case TOKfloat80v: | |
152 sprintf(buffer,"%LgL", float80value); | |
153 break; | |
154 | |
155 case TOKimaginary32v: | |
156 sprintf(buffer,"%Lgfi", float80value); | |
157 break; | |
158 | |
159 case TOKimaginary64v: | |
160 sprintf(buffer,"%Lgi", float80value); | |
161 break; | |
162 | |
163 case TOKimaginary80v: | |
164 sprintf(buffer,"%LgLi", float80value); | |
165 break; | |
166 #endif | |
167 | |
168 case TOKstring: | |
169 #if CSTRINGS | |
170 p = string; | |
171 #else | |
172 { OutBuffer buf; | |
173 | |
174 buf.writeByte('"'); | |
175 for (size_t i = 0; i < len; ) | |
176 { unsigned c; | |
177 | |
178 utf_decodeChar((unsigned char *)ustring, len, &i, &c); | |
179 switch (c) | |
180 { | |
181 case 0: | |
182 break; | |
183 | |
184 case '"': | |
185 case '\\': | |
186 buf.writeByte('\\'); | |
187 default: | |
188 if (isprint(c)) | |
189 buf.writeByte(c); | |
190 else if (c <= 0x7F) | |
191 buf.printf("\\x%02x", c); | |
192 else if (c <= 0xFFFF) | |
193 buf.printf("\\u%04x", c); | |
194 else | |
195 buf.printf("\\U%08x", c); | |
196 continue; | |
197 } | |
198 break; | |
199 } | |
200 buf.writeByte('"'); | |
201 if (postfix) | |
202 buf.writeByte('"'); | |
203 buf.writeByte(0); | |
204 p = (char *)buf.extractData(); | |
205 } | |
206 #endif | |
207 break; | |
208 | |
209 case TOKidentifier: | |
210 case TOKenum: | |
211 case TOKstruct: | |
212 case TOKimport: | |
1603
eae495e6ae8d
Merge DMD r248: implement Denis Koroskin's macro suggestion
Leandro Lucarella <llucax@gmail.com>
parents:
1587
diff
changeset
|
213 case BASIC_TYPES: |
159 | 214 p = ident->toChars(); |
215 break; | |
216 | |
217 default: | |
218 p = toChars(value); | |
219 break; | |
220 } | |
221 return p; | |
222 } | |
223 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
224 const char *Token::toChars(enum TOK value) |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
225 { const char *p; |
159 | 226 static char buffer[3 + 3 * sizeof(value) + 1]; |
227 | |
228 p = tochars[value]; | |
229 if (!p) | |
230 { sprintf(buffer,"TOK%d",value); | |
231 p = buffer; | |
232 } | |
233 return p; | |
234 } | |
235 | |
236 /*************************** Lexer ********************************************/ | |
237 | |
238 Token *Lexer::freelist = NULL; | |
239 StringTable Lexer::stringtable; | |
240 OutBuffer Lexer::stringbuffer; | |
241 | |
242 Lexer::Lexer(Module *mod, | |
243 unsigned char *base, unsigned begoffset, unsigned endoffset, | |
244 int doDocComment, int commentToken) | |
245 : loc(mod, 1) | |
246 { | |
247 //printf("Lexer::Lexer(%p,%d)\n",base,length); | |
248 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod); | |
249 memset(&token,0,sizeof(token)); | |
250 this->base = base; | |
251 this->end = base + endoffset; | |
252 p = base + begoffset; | |
253 this->mod = mod; | |
254 this->doDocComment = doDocComment; | |
255 this->anyToken = 0; | |
256 this->commentToken = commentToken; | |
257 //initKeywords(); | |
258 | |
259 /* If first line starts with '#!', ignore the line | |
260 */ | |
261 | |
262 if (p[0] == '#' && p[1] =='!') | |
263 { | |
264 p += 2; | |
265 while (1) | |
266 { unsigned char c = *p; | |
267 switch (c) | |
268 { | |
269 case '\n': | |
270 p++; | |
271 break; | |
272 | |
273 case '\r': | |
274 p++; | |
275 if (*p == '\n') | |
276 p++; | |
277 break; | |
278 | |
279 case 0: | |
280 case 0x1A: | |
281 break; | |
282 | |
283 default: | |
284 if (c & 0x80) | |
285 { unsigned u = decodeUTF(); | |
286 if (u == PS || u == LS) | |
287 break; | |
288 } | |
289 p++; | |
290 continue; | |
291 } | |
292 break; | |
293 } | |
294 loc.linnum = 2; | |
295 } | |
296 } | |
297 | |
298 | |
299 void Lexer::error(const char *format, ...) | |
300 { | |
301 if (mod && !global.gag) | |
302 { | |
303 char *p = loc.toChars(); | |
304 if (*p) | |
305 fprintf(stdmsg, "%s: ", p); | |
306 mem.free(p); | |
307 | |
308 va_list ap; | |
309 va_start(ap, format); | |
310 vfprintf(stdmsg, format, ap); | |
311 va_end(ap); | |
312 | |
313 fprintf(stdmsg, "\n"); | |
314 fflush(stdmsg); | |
315 | |
316 if (global.errors >= 20) // moderate blizzard of cascading messages | |
317 fatal(); | |
318 } | |
319 global.errors++; | |
320 } | |
321 | |
322 void Lexer::error(Loc loc, const char *format, ...) | |
323 { | |
324 if (mod && !global.gag) | |
325 { | |
326 char *p = loc.toChars(); | |
327 if (*p) | |
328 fprintf(stdmsg, "%s: ", p); | |
329 mem.free(p); | |
330 | |
331 va_list ap; | |
332 va_start(ap, format); | |
333 vfprintf(stdmsg, format, ap); | |
334 va_end(ap); | |
335 | |
336 fprintf(stdmsg, "\n"); | |
337 fflush(stdmsg); | |
338 | |
339 if (global.errors >= 20) // moderate blizzard of cascading messages | |
340 fatal(); | |
341 } | |
342 global.errors++; | |
343 } | |
344 | |
345 TOK Lexer::nextToken() | |
346 { Token *t; | |
347 | |
348 if (token.next) | |
349 { | |
350 t = token.next; | |
351 memcpy(&token,t,sizeof(Token)); | |
352 t->next = freelist; | |
353 freelist = t; | |
354 } | |
355 else | |
356 { | |
357 scan(&token); | |
358 } | |
359 //token.print(); | |
360 return token.value; | |
361 } | |
362 | |
363 Token *Lexer::peek(Token *ct) | |
364 { Token *t; | |
365 | |
366 if (ct->next) | |
367 t = ct->next; | |
368 else | |
369 { | |
370 t = new Token(); | |
371 scan(t); | |
372 t->next = NULL; | |
373 ct->next = t; | |
374 } | |
375 return t; | |
376 } | |
377 | |
717
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
378 /*********************** |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
379 * Look ahead at next token's value. |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
380 */ |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
381 |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
382 TOK Lexer::peekNext() |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
383 { |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
384 return peek(&token)->value; |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
385 } |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
386 |
159 | 387 /********************************* |
388 * tk is on the opening (. | |
389 * Look ahead and return token that is past the closing ). | |
390 */ | |
391 | |
392 Token *Lexer::peekPastParen(Token *tk) | |
393 { | |
394 //printf("peekPastParen()\n"); | |
395 int parens = 1; | |
396 int curlynest = 0; | |
397 while (1) | |
398 { | |
399 tk = peek(tk); | |
400 //tk->print(); | |
401 switch (tk->value) | |
402 { | |
403 case TOKlparen: | |
404 parens++; | |
405 continue; | |
406 | |
407 case TOKrparen: | |
408 --parens; | |
409 if (parens) | |
410 continue; | |
411 tk = peek(tk); | |
412 break; | |
413 | |
414 case TOKlcurly: | |
415 curlynest++; | |
416 continue; | |
417 | |
418 case TOKrcurly: | |
419 if (--curlynest >= 0) | |
420 continue; | |
421 break; | |
422 | |
423 case TOKsemicolon: | |
424 if (curlynest) | |
425 continue; | |
426 break; | |
427 | |
428 case TOKeof: | |
429 break; | |
430 | |
431 default: | |
432 continue; | |
433 } | |
434 return tk; | |
435 } | |
436 } | |
437 | |
438 /********************************** | |
439 * Determine if string is a valid Identifier. | |
440 * Placed here because of commonality with Lexer functionality. | |
441 * Returns: | |
442 * 0 invalid | |
443 */ | |
444 | |
445 int Lexer::isValidIdentifier(char *p) | |
446 { | |
447 size_t len; | |
448 size_t idx; | |
449 | |
450 if (!p || !*p) | |
451 goto Linvalid; | |
452 | |
453 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars | |
454 goto Linvalid; | |
455 | |
456 len = strlen(p); | |
457 idx = 0; | |
458 while (p[idx]) | |
459 { dchar_t dc; | |
460 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
461 const char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc); |
159 | 462 if (q) |
463 goto Linvalid; | |
464 | |
465 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) | |
466 goto Linvalid; | |
467 } | |
468 return 1; | |
469 | |
470 Linvalid: | |
471 return 0; | |
472 } | |
473 | |
474 /**************************** | |
475 * Turn next token in buffer into a token. | |
476 */ | |
477 | |
478 void Lexer::scan(Token *t) | |
479 { | |
480 unsigned lastLine = loc.linnum; | |
481 unsigned linnum; | |
482 | |
483 t->blockComment = NULL; | |
484 t->lineComment = NULL; | |
485 while (1) | |
486 { | |
487 t->ptr = p; | |
488 //printf("p = %p, *p = '%c'\n",p,*p); | |
489 switch (*p) | |
490 { | |
491 case 0: | |
492 case 0x1A: | |
493 t->value = TOKeof; // end of file | |
494 return; | |
495 | |
496 case ' ': | |
497 case '\t': | |
498 case '\v': | |
499 case '\f': | |
500 p++; | |
501 continue; // skip white space | |
502 | |
503 case '\r': | |
504 p++; | |
505 if (*p != '\n') // if CR stands by itself | |
506 loc.linnum++; | |
507 continue; // skip white space | |
508 | |
509 case '\n': | |
510 p++; | |
511 loc.linnum++; | |
512 continue; // skip white space | |
513 | |
514 case '0': case '1': case '2': case '3': case '4': | |
515 case '5': case '6': case '7': case '8': case '9': | |
516 t->value = number(t); | |
517 return; | |
518 | |
519 #if CSTRINGS | |
520 case '\'': | |
521 t->value = charConstant(t, 0); | |
522 return; | |
523 | |
524 case '"': | |
525 t->value = stringConstant(t,0); | |
526 return; | |
527 | |
528 case 'l': | |
529 case 'L': | |
530 if (p[1] == '\'') | |
531 { | |
532 p++; | |
533 t->value = charConstant(t, 1); | |
534 return; | |
535 } | |
536 else if (p[1] == '"') | |
537 { | |
538 p++; | |
539 t->value = stringConstant(t, 1); | |
540 return; | |
541 } | |
542 #else | |
543 case '\'': | |
544 t->value = charConstant(t,0); | |
545 return; | |
546 | |
547 case 'r': | |
548 if (p[1] != '"') | |
549 goto case_ident; | |
550 p++; | |
551 case '`': | |
552 t->value = wysiwygStringConstant(t, *p); | |
553 return; | |
554 | |
555 case 'x': | |
556 if (p[1] != '"') | |
557 goto case_ident; | |
558 p++; | |
559 t->value = hexStringConstant(t); | |
560 return; | |
561 | |
336 | 562 #if DMDV2 |
159 | 563 case 'q': |
564 if (p[1] == '"') | |
565 { | |
566 p++; | |
567 t->value = delimitedStringConstant(t); | |
568 return; | |
569 } | |
570 else if (p[1] == '{') | |
571 { | |
572 p++; | |
573 t->value = tokenStringConstant(t); | |
574 return; | |
575 } | |
576 else | |
577 goto case_ident; | |
578 #endif | |
579 | |
580 case '"': | |
581 t->value = escapeStringConstant(t,0); | |
582 return; | |
583 | |
584 case '\\': // escaped string literal | |
585 { unsigned c; | |
586 | |
587 stringbuffer.reset(); | |
588 do | |
589 { | |
590 p++; | |
591 switch (*p) | |
592 { | |
593 case 'u': | |
594 case 'U': | |
595 case '&': | |
596 c = escapeSequence(); | |
597 stringbuffer.writeUTF8(c); | |
598 break; | |
599 | |
600 default: | |
601 c = escapeSequence(); | |
602 stringbuffer.writeByte(c); | |
603 break; | |
604 } | |
605 } while (*p == '\\'); | |
606 t->len = stringbuffer.offset; | |
607 stringbuffer.writeByte(0); | |
608 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
609 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
610 t->postfix = 0; | |
611 t->value = TOKstring; | |
612 return; | |
613 } | |
614 | |
615 case 'l': | |
616 case 'L': | |
617 #endif | |
618 case 'a': case 'b': case 'c': case 'd': case 'e': | |
619 case 'f': case 'g': case 'h': case 'i': case 'j': | |
620 case 'k': case 'm': case 'n': case 'o': | |
336 | 621 #if DMDV2 |
159 | 622 case 'p': /*case 'q': case 'r':*/ case 's': case 't': |
623 #else | |
624 case 'p': case 'q': /*case 'r':*/ case 's': case 't': | |
625 #endif | |
626 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': | |
627 case 'z': | |
628 case 'A': case 'B': case 'C': case 'D': case 'E': | |
629 case 'F': case 'G': case 'H': case 'I': case 'J': | |
630 case 'K': case 'M': case 'N': case 'O': | |
631 case 'P': case 'Q': case 'R': case 'S': case 'T': | |
632 case 'U': case 'V': case 'W': case 'X': case 'Y': | |
633 case 'Z': | |
634 case '_': | |
635 case_ident: | |
636 { unsigned char c; | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
637 |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
638 while (1) |
159 | 639 { |
640 c = *++p; | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
641 if (isidchar(c)) |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
642 continue; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
643 else if (c & 0x80) |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
644 { unsigned char *s = p; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
645 unsigned u = decodeUTF(); |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
646 if (isUniAlpha(u)) |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
647 continue; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
648 error("char 0x%04x not allowed in identifier", u); |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
649 p = s; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
650 } |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
651 break; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
652 } |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
653 |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
654 StringValue *sv = stringtable.update((char *)t->ptr, p - t->ptr); |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
655 Identifier *id = (Identifier *) sv->ptrvalue; |
159 | 656 if (!id) |
657 { id = new Identifier(sv->lstring.string,TOKidentifier); | |
658 sv->ptrvalue = id; | |
659 } | |
660 t->ident = id; | |
661 t->value = (enum TOK) id->value; | |
662 anyToken = 1; | |
663 if (*t->ptr == '_') // if special identifier token | |
664 { | |
665 static char date[11+1]; | |
666 static char time[8+1]; | |
667 static char timestamp[24+1]; | |
668 | |
669 if (!date[0]) // lazy evaluation | |
670 { time_t t; | |
671 char *p; | |
672 | |
673 ::time(&t); | |
674 p = ctime(&t); | |
675 assert(p); | |
676 sprintf(date, "%.6s %.4s", p + 4, p + 20); | |
677 sprintf(time, "%.8s", p + 11); | |
678 sprintf(timestamp, "%.24s", p); | |
679 } | |
680 | |
336 | 681 #if DMDV1 |
159 | 682 if (mod && id == Id::FILE) |
683 { | |
684 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars()); | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
685 goto Lstr; |
159 | 686 } |
687 else if (mod && id == Id::LINE) | |
688 { | |
689 t->value = TOKint64v; | |
690 t->uns64value = loc.linnum; | |
691 } | |
336 | 692 else |
693 #endif | |
694 if (id == Id::DATE) | |
159 | 695 { |
696 t->ustring = (unsigned char *)date; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
697 goto Lstr; |
159 | 698 } |
699 else if (id == Id::TIME) | |
700 { | |
701 t->ustring = (unsigned char *)time; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
702 goto Lstr; |
159 | 703 } |
704 else if (id == Id::VENDOR) | |
705 { | |
664
eef8ac26c66c
Some missed LLVMDC -> LDC.
Christian Kamm <kamm incasoftware de>
parents:
658
diff
changeset
|
706 t->ustring = (unsigned char *)"LDC"; |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
707 goto Lstr; |
159 | 708 } |
709 else if (id == Id::TIMESTAMP) | |
710 { | |
711 t->ustring = (unsigned char *)timestamp; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
712 Lstr: |
159 | 713 t->value = TOKstring; |
714 Llen: | |
715 t->postfix = 0; | |
716 t->len = strlen((char *)t->ustring); | |
717 } | |
718 else if (id == Id::VERSIONX) | |
719 { unsigned major = 0; | |
720 unsigned minor = 0; | |
721 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
722 for (const char *p = global.version + 1; 1; p++) |
159 | 723 { |
724 char c = *p; | |
725 if (isdigit(c)) | |
726 minor = minor * 10 + c - '0'; | |
727 else if (c == '.') | |
728 { major = minor; | |
729 minor = 0; | |
730 } | |
731 else | |
732 break; | |
733 } | |
734 t->value = TOKint64v; | |
735 t->uns64value = major * 1000 + minor; | |
736 } | |
336 | 737 #if DMDV2 |
159 | 738 else if (id == Id::EOFX) |
739 { | |
740 t->value = TOKeof; | |
741 // Advance scanner to end of file | |
742 while (!(*p == 0 || *p == 0x1A)) | |
743 p++; | |
744 } | |
745 #endif | |
746 } | |
747 //printf("t->value = %d\n",t->value); | |
748 return; | |
749 } | |
750 | |
751 case '/': | |
752 p++; | |
753 switch (*p) | |
754 { | |
755 case '=': | |
756 p++; | |
757 t->value = TOKdivass; | |
758 return; | |
759 | |
760 case '*': | |
761 p++; | |
762 linnum = loc.linnum; | |
763 while (1) | |
764 { | |
765 while (1) | |
766 { unsigned char c = *p; | |
767 switch (c) | |
768 { | |
769 case '/': | |
770 break; | |
771 | |
772 case '\n': | |
773 loc.linnum++; | |
774 p++; | |
775 continue; | |
776 | |
777 case '\r': | |
778 p++; | |
779 if (*p != '\n') | |
780 loc.linnum++; | |
781 continue; | |
782 | |
783 case 0: | |
784 case 0x1A: | |
785 error("unterminated /* */ comment"); | |
786 p = end; | |
787 t->value = TOKeof; | |
788 return; | |
789 | |
790 default: | |
791 if (c & 0x80) | |
792 { unsigned u = decodeUTF(); | |
793 if (u == PS || u == LS) | |
794 loc.linnum++; | |
795 } | |
796 p++; | |
797 continue; | |
798 } | |
799 break; | |
800 } | |
801 p++; | |
802 if (p[-2] == '*' && p - 3 != t->ptr) | |
803 break; | |
804 } | |
805 if (commentToken) | |
806 { | |
807 t->value = TOKcomment; | |
808 return; | |
809 } | |
810 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr) | |
811 { // if /** but not /**/ | |
812 getDocComment(t, lastLine == linnum); | |
813 } | |
814 continue; | |
815 | |
816 case '/': // do // style comments | |
817 linnum = loc.linnum; | |
818 while (1) | |
819 { unsigned char c = *++p; | |
820 switch (c) | |
821 { | |
822 case '\n': | |
823 break; | |
824 | |
825 case '\r': | |
826 if (p[1] == '\n') | |
827 p++; | |
828 break; | |
829 | |
830 case 0: | |
831 case 0x1A: | |
832 if (commentToken) | |
833 { | |
834 p = end; | |
835 t->value = TOKcomment; | |
836 return; | |
837 } | |
838 if (doDocComment && t->ptr[2] == '/') | |
839 getDocComment(t, lastLine == linnum); | |
840 p = end; | |
841 t->value = TOKeof; | |
842 return; | |
843 | |
844 default: | |
845 if (c & 0x80) | |
846 { unsigned u = decodeUTF(); | |
847 if (u == PS || u == LS) | |
848 break; | |
849 } | |
850 continue; | |
851 } | |
852 break; | |
853 } | |
854 | |
855 if (commentToken) | |
856 { | |
857 p++; | |
858 loc.linnum++; | |
859 t->value = TOKcomment; | |
860 return; | |
861 } | |
862 if (doDocComment && t->ptr[2] == '/') | |
863 getDocComment(t, lastLine == linnum); | |
864 | |
865 p++; | |
866 loc.linnum++; | |
867 continue; | |
868 | |
869 case '+': | |
870 { int nest; | |
871 | |
872 linnum = loc.linnum; | |
873 p++; | |
874 nest = 1; | |
875 while (1) | |
876 { unsigned char c = *p; | |
877 switch (c) | |
878 { | |
879 case '/': | |
880 p++; | |
881 if (*p == '+') | |
882 { | |
883 p++; | |
884 nest++; | |
885 } | |
886 continue; | |
887 | |
888 case '+': | |
889 p++; | |
890 if (*p == '/') | |
891 { | |
892 p++; | |
893 if (--nest == 0) | |
894 break; | |
895 } | |
896 continue; | |
897 | |
898 case '\r': | |
899 p++; | |
900 if (*p != '\n') | |
901 loc.linnum++; | |
902 continue; | |
903 | |
904 case '\n': | |
905 loc.linnum++; | |
906 p++; | |
907 continue; | |
908 | |
909 case 0: | |
910 case 0x1A: | |
911 error("unterminated /+ +/ comment"); | |
912 p = end; | |
913 t->value = TOKeof; | |
914 return; | |
915 | |
916 default: | |
917 if (c & 0x80) | |
918 { unsigned u = decodeUTF(); | |
919 if (u == PS || u == LS) | |
920 loc.linnum++; | |
921 } | |
922 p++; | |
923 continue; | |
924 } | |
925 break; | |
926 } | |
927 if (commentToken) | |
928 { | |
929 t->value = TOKcomment; | |
930 return; | |
931 } | |
932 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr) | |
933 { // if /++ but not /++/ | |
934 getDocComment(t, lastLine == linnum); | |
935 } | |
936 continue; | |
937 } | |
938 } | |
939 t->value = TOKdiv; | |
940 return; | |
941 | |
942 case '.': | |
943 p++; | |
944 if (isdigit(*p)) | |
945 { /* Note that we don't allow ._1 and ._ as being | |
946 * valid floating point numbers. | |
947 */ | |
948 p--; | |
949 t->value = inreal(t); | |
950 } | |
951 else if (p[0] == '.') | |
952 { | |
953 if (p[1] == '.') | |
954 { p += 2; | |
955 t->value = TOKdotdotdot; | |
956 } | |
957 else | |
958 { p++; | |
959 t->value = TOKslice; | |
960 } | |
961 } | |
962 else | |
963 t->value = TOKdot; | |
964 return; | |
965 | |
966 case '&': | |
967 p++; | |
968 if (*p == '=') | |
969 { p++; | |
970 t->value = TOKandass; | |
971 } | |
972 else if (*p == '&') | |
973 { p++; | |
974 t->value = TOKandand; | |
975 } | |
976 else | |
977 t->value = TOKand; | |
978 return; | |
979 | |
980 case '|': | |
981 p++; | |
982 if (*p == '=') | |
983 { p++; | |
984 t->value = TOKorass; | |
985 } | |
986 else if (*p == '|') | |
987 { p++; | |
988 t->value = TOKoror; | |
989 } | |
990 else | |
991 t->value = TOKor; | |
992 return; | |
993 | |
994 case '-': | |
995 p++; | |
996 if (*p == '=') | |
997 { p++; | |
998 t->value = TOKminass; | |
999 } | |
1000 #if 0 | |
1001 else if (*p == '>') | |
1002 { p++; | |
1003 t->value = TOKarrow; | |
1004 } | |
1005 #endif | |
1006 else if (*p == '-') | |
1007 { p++; | |
1008 t->value = TOKminusminus; | |
1009 } | |
1010 else | |
1011 t->value = TOKmin; | |
1012 return; | |
1013 | |
1014 case '+': | |
1015 p++; | |
1016 if (*p == '=') | |
1017 { p++; | |
1018 t->value = TOKaddass; | |
1019 } | |
1020 else if (*p == '+') | |
1021 { p++; | |
1022 t->value = TOKplusplus; | |
1023 } | |
1024 else | |
1025 t->value = TOKadd; | |
1026 return; | |
1027 | |
1028 case '<': | |
1029 p++; | |
1030 if (*p == '=') | |
1031 { p++; | |
1032 t->value = TOKle; // <= | |
1033 } | |
1034 else if (*p == '<') | |
1035 { p++; | |
1036 if (*p == '=') | |
1037 { p++; | |
1038 t->value = TOKshlass; // <<= | |
1039 } | |
1040 else | |
1041 t->value = TOKshl; // << | |
1042 } | |
1043 else if (*p == '>') | |
1044 { p++; | |
1045 if (*p == '=') | |
1046 { p++; | |
1047 t->value = TOKleg; // <>= | |
1048 } | |
1049 else | |
1050 t->value = TOKlg; // <> | |
1051 } | |
1052 else | |
1053 t->value = TOKlt; // < | |
1054 return; | |
1055 | |
1056 case '>': | |
1057 p++; | |
1058 if (*p == '=') | |
1059 { p++; | |
1060 t->value = TOKge; // >= | |
1061 } | |
1062 else if (*p == '>') | |
1063 { p++; | |
1064 if (*p == '=') | |
1065 { p++; | |
1066 t->value = TOKshrass; // >>= | |
1067 } | |
1068 else if (*p == '>') | |
1069 { p++; | |
1070 if (*p == '=') | |
1071 { p++; | |
1072 t->value = TOKushrass; // >>>= | |
1073 } | |
1074 else | |
1075 t->value = TOKushr; // >>> | |
1076 } | |
1077 else | |
1078 t->value = TOKshr; // >> | |
1079 } | |
1080 else | |
1081 t->value = TOKgt; // > | |
1082 return; | |
1083 | |
1084 case '!': | |
1085 p++; | |
1086 if (*p == '=') | |
1087 { p++; | |
1088 if (*p == '=' && global.params.Dversion == 1) | |
1089 { p++; | |
1090 t->value = TOKnotidentity; // !== | |
1091 } | |
1092 else | |
1093 t->value = TOKnotequal; // != | |
1094 } | |
1095 else if (*p == '<') | |
1096 { p++; | |
1097 if (*p == '>') | |
1098 { p++; | |
1099 if (*p == '=') | |
1100 { p++; | |
1101 t->value = TOKunord; // !<>= | |
1102 } | |
1103 else | |
1104 t->value = TOKue; // !<> | |
1105 } | |
1106 else if (*p == '=') | |
1107 { p++; | |
1108 t->value = TOKug; // !<= | |
1109 } | |
1110 else | |
1111 t->value = TOKuge; // !< | |
1112 } | |
1113 else if (*p == '>') | |
1114 { p++; | |
1115 if (*p == '=') | |
1116 { p++; | |
1117 t->value = TOKul; // !>= | |
1118 } | |
1119 else | |
1120 t->value = TOKule; // !> | |
1121 } | |
1122 else | |
1123 t->value = TOKnot; // ! | |
1124 return; | |
1125 | |
1126 case '=': | |
1127 p++; | |
1128 if (*p == '=') | |
1129 { p++; | |
1130 if (*p == '=' && global.params.Dversion == 1) | |
1131 { p++; | |
1132 t->value = TOKidentity; // === | |
1133 } | |
1134 else | |
1135 t->value = TOKequal; // == | |
1136 } | |
1137 else | |
1138 t->value = TOKassign; // = | |
1139 return; | |
1140 | |
1141 case '~': | |
1142 p++; | |
1143 if (*p == '=') | |
1144 { p++; | |
1145 t->value = TOKcatass; // ~= | |
1146 } | |
1147 else | |
1148 t->value = TOKtilde; // ~ | |
1149 return; | |
1150 | |
1151 #define SINGLE(c,tok) case c: p++; t->value = tok; return; | |
1152 | |
1153 SINGLE('(', TOKlparen) | |
1154 SINGLE(')', TOKrparen) | |
1155 SINGLE('[', TOKlbracket) | |
1156 SINGLE(']', TOKrbracket) | |
1157 SINGLE('{', TOKlcurly) | |
1158 SINGLE('}', TOKrcurly) | |
1159 SINGLE('?', TOKquestion) | |
1160 SINGLE(',', TOKcomma) | |
1161 SINGLE(';', TOKsemicolon) | |
1162 SINGLE(':', TOKcolon) | |
1163 SINGLE('$', TOKdollar) | |
1164 | |
1165 #undef SINGLE | |
1166 | |
1167 #define DOUBLE(c1,tok1,c2,tok2) \ | |
1168 case c1: \ | |
1169 p++; \ | |
1170 if (*p == c2) \ | |
1171 { p++; \ | |
1172 t->value = tok2; \ | |
1173 } \ | |
1174 else \ | |
1175 t->value = tok1; \ | |
1176 return; | |
1177 | |
1178 DOUBLE('*', TOKmul, '=', TOKmulass) | |
1179 DOUBLE('%', TOKmod, '=', TOKmodass) | |
1180 DOUBLE('^', TOKxor, '=', TOKxorass) | |
1181 | |
1182 #undef DOUBLE | |
1183 | |
1184 case '#': | |
1185 p++; | |
1186 pragma(); | |
1187 continue; | |
1188 | |
1189 default: | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1190 { unsigned c = *p; |
159 | 1191 |
1192 if (c & 0x80) | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1193 { c = decodeUTF(); |
159 | 1194 |
1195 // Check for start of unicode identifier | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1196 if (isUniAlpha(c)) |
159 | 1197 goto case_ident; |
1198 | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1199 if (c == PS || c == LS) |
159 | 1200 { |
1201 loc.linnum++; | |
1202 p++; | |
1203 continue; | |
1204 } | |
1205 } | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1206 if (c < 0x80 && isprint(c)) |
159 | 1207 error("unsupported char '%c'", c); |
1208 else | |
1209 error("unsupported char 0x%02x", c); | |
1210 p++; | |
1211 continue; | |
1212 } | |
1213 } | |
1214 } | |
1215 } | |
1216 | |
1217 /******************************************* | |
1218 * Parse escape sequence. | |
1219 */ | |
1220 | |
1221 unsigned Lexer::escapeSequence() | |
1222 { unsigned c; | |
1223 int n; | |
1224 int ndigits; | |
1225 | |
1226 c = *p; | |
1227 switch (c) | |
1228 { | |
1229 case '\'': | |
1230 case '"': | |
1231 case '?': | |
1232 case '\\': | |
1233 Lconsume: | |
1234 p++; | |
1235 break; | |
1236 | |
1237 case 'a': c = 7; goto Lconsume; | |
1238 case 'b': c = 8; goto Lconsume; | |
1239 case 'f': c = 12; goto Lconsume; | |
1240 case 'n': c = 10; goto Lconsume; | |
1241 case 'r': c = 13; goto Lconsume; | |
1242 case 't': c = 9; goto Lconsume; | |
1243 case 'v': c = 11; goto Lconsume; | |
1244 | |
1245 case 'u': | |
1246 ndigits = 4; | |
1247 goto Lhex; | |
1248 case 'U': | |
1249 ndigits = 8; | |
1250 goto Lhex; | |
1251 case 'x': | |
1252 ndigits = 2; | |
1253 Lhex: | |
1254 p++; | |
1255 c = *p; | |
1256 if (ishex(c)) | |
1257 { unsigned v; | |
1258 | |
1259 n = 0; | |
1260 v = 0; | |
1261 while (1) | |
1262 { | |
1263 if (isdigit(c)) | |
1264 c -= '0'; | |
1265 else if (islower(c)) | |
1266 c -= 'a' - 10; | |
1267 else | |
1268 c -= 'A' - 10; | |
1269 v = v * 16 + c; | |
1270 c = *++p; | |
1271 if (++n == ndigits) | |
1272 break; | |
1273 if (!ishex(c)) | |
1274 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); | |
1275 break; | |
1276 } | |
1277 } | |
1278 if (ndigits != 2 && !utf_isValidDchar(v)) | |
1587 | 1279 { error("invalid UTF character \\U%08x", v); |
1280 v = '?'; // recover with valid UTF character | |
1281 } | |
159 | 1282 c = v; |
1283 } | |
1284 else | |
1285 error("undefined escape hex sequence \\%c\n",c); | |
1286 break; | |
1287 | |
1288 case '&': // named character entity | |
1289 for (unsigned char *idstart = ++p; 1; p++) | |
1290 { | |
1291 switch (*p) | |
1292 { | |
1293 case ';': | |
1294 c = HtmlNamedEntity(idstart, p - idstart); | |
1295 if (c == ~0) | |
1296 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart); | |
1297 c = ' '; | |
1298 } | |
1299 p++; | |
1300 break; | |
1301 | |
1302 default: | |
1303 if (isalpha(*p) || | |
1304 (p != idstart + 1 && isdigit(*p))) | |
1305 continue; | |
1306 error("unterminated named entity"); | |
1307 break; | |
1308 } | |
1309 break; | |
1310 } | |
1311 break; | |
1312 | |
1313 case 0: | |
1314 case 0x1A: // end of file | |
1315 c = '\\'; | |
1316 break; | |
1317 | |
1318 default: | |
1319 if (isoctal(c)) | |
1320 { unsigned v; | |
1321 | |
1322 n = 0; | |
1323 v = 0; | |
1324 do | |
1325 { | |
1326 v = v * 8 + (c - '0'); | |
1327 c = *++p; | |
1328 } while (++n < 3 && isoctal(c)); | |
1329 c = v; | |
1330 if (c > 0xFF) | |
1331 error("0%03o is larger than a byte", c); | |
1332 } | |
1333 else | |
1334 error("undefined escape sequence \\%c\n",c); | |
1335 break; | |
1336 } | |
1337 return c; | |
1338 } | |
1339 | |
1340 /************************************** | |
1341 */ | |
1342 | |
1343 TOK Lexer::wysiwygStringConstant(Token *t, int tc) | |
1344 { unsigned c; | |
1345 Loc start = loc; | |
1346 | |
1347 p++; | |
1348 stringbuffer.reset(); | |
1349 while (1) | |
1350 { | |
1351 c = *p++; | |
1352 switch (c) | |
1353 { | |
1354 case '\n': | |
1355 loc.linnum++; | |
1356 break; | |
1357 | |
1358 case '\r': | |
1359 if (*p == '\n') | |
1360 continue; // ignore | |
1361 c = '\n'; // treat EndOfLine as \n character | |
1362 loc.linnum++; | |
1363 break; | |
1364 | |
1365 case 0: | |
1366 case 0x1A: | |
1367 error("unterminated string constant starting at %s", start.toChars()); | |
1368 t->ustring = (unsigned char *)""; | |
1369 t->len = 0; | |
1370 t->postfix = 0; | |
1371 return TOKstring; | |
1372 | |
1373 case '"': | |
1374 case '`': | |
1375 if (c == tc) | |
1376 { | |
1377 t->len = stringbuffer.offset; | |
1378 stringbuffer.writeByte(0); | |
1379 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1380 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1381 stringPostfix(t); | |
1382 return TOKstring; | |
1383 } | |
1384 break; | |
1385 | |
1386 default: | |
1387 if (c & 0x80) | |
1388 { p--; | |
1389 unsigned u = decodeUTF(); | |
1390 p++; | |
1391 if (u == PS || u == LS) | |
1392 loc.linnum++; | |
1393 stringbuffer.writeUTF8(u); | |
1394 continue; | |
1395 } | |
1396 break; | |
1397 } | |
1398 stringbuffer.writeByte(c); | |
1399 } | |
1400 } | |
1401 | |
1402 /************************************** | |
1403 * Lex hex strings: | |
1404 * x"0A ae 34FE BD" | |
1405 */ | |
1406 | |
1407 TOK Lexer::hexStringConstant(Token *t) | |
1408 { unsigned c; | |
1409 Loc start = loc; | |
1410 unsigned n = 0; | |
1411 unsigned v; | |
1412 | |
1413 p++; | |
1414 stringbuffer.reset(); | |
1415 while (1) | |
1416 { | |
1417 c = *p++; | |
1418 switch (c) | |
1419 { | |
1420 case ' ': | |
1421 case '\t': | |
1422 case '\v': | |
1423 case '\f': | |
1424 continue; // skip white space | |
1425 | |
1426 case '\r': | |
1427 if (*p == '\n') | |
1428 continue; // ignore | |
1429 // Treat isolated '\r' as if it were a '\n' | |
1430 case '\n': | |
1431 loc.linnum++; | |
1432 continue; | |
1433 | |
1434 case 0: | |
1435 case 0x1A: | |
1436 error("unterminated string constant starting at %s", start.toChars()); | |
1437 t->ustring = (unsigned char *)""; | |
1438 t->len = 0; | |
1439 t->postfix = 0; | |
1440 return TOKstring; | |
1441 | |
1442 case '"': | |
1443 if (n & 1) | |
1444 { error("odd number (%d) of hex characters in hex string", n); | |
1445 stringbuffer.writeByte(v); | |
1446 } | |
1447 t->len = stringbuffer.offset; | |
1448 stringbuffer.writeByte(0); | |
1449 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1450 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1451 stringPostfix(t); | |
1452 return TOKstring; | |
1453 | |
1454 default: | |
1455 if (c >= '0' && c <= '9') | |
1456 c -= '0'; | |
1457 else if (c >= 'a' && c <= 'f') | |
1458 c -= 'a' - 10; | |
1459 else if (c >= 'A' && c <= 'F') | |
1460 c -= 'A' - 10; | |
1461 else if (c & 0x80) | |
1462 { p--; | |
1463 unsigned u = decodeUTF(); | |
1464 p++; | |
1465 if (u == PS || u == LS) | |
1466 loc.linnum++; | |
1467 else | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1468 error("non-hex character \\u%04x", u); |
159 | 1469 } |
1470 else | |
1471 error("non-hex character '%c'", c); | |
1472 if (n & 1) | |
1473 { v = (v << 4) | c; | |
1474 stringbuffer.writeByte(v); | |
1475 } | |
1476 else | |
1477 v = c; | |
1478 n++; | |
1479 break; | |
1480 } | |
1481 } | |
1482 } | |
1483 | |
1484 | |
336 | 1485 #if DMDV2 |
159 | 1486 /************************************** |
1487 * Lex delimited strings: | |
1488 * q"(foo(xxx))" // "foo(xxx)" | |
1489 * q"[foo(]" // "foo(" | |
1490 * q"/foo]/" // "foo]" | |
1491 * q"HERE | |
1492 * foo | |
1493 * HERE" // "foo\n" | |
1494 * Input: | |
1495 * p is on the " | |
1496 */ | |
1497 | |
1498 TOK Lexer::delimitedStringConstant(Token *t) | |
1499 { unsigned c; | |
1500 Loc start = loc; | |
1501 unsigned delimleft = 0; | |
1502 unsigned delimright = 0; | |
1503 unsigned nest = 1; | |
1504 unsigned nestcount; | |
1505 Identifier *hereid = NULL; | |
1506 unsigned blankrol = 0; | |
1507 unsigned startline = 0; | |
1508 | |
1509 p++; | |
1510 stringbuffer.reset(); | |
1511 while (1) | |
1512 { | |
1513 c = *p++; | |
1514 //printf("c = '%c'\n", c); | |
1515 switch (c) | |
1516 { | |
1517 case '\n': | |
1518 Lnextline: | |
1519 loc.linnum++; | |
1520 startline = 1; | |
1521 if (blankrol) | |
1522 { blankrol = 0; | |
1523 continue; | |
1524 } | |
1525 if (hereid) | |
1526 { | |
1527 stringbuffer.writeUTF8(c); | |
1528 continue; | |
1529 } | |
1530 break; | |
1531 | |
1532 case '\r': | |
1533 if (*p == '\n') | |
1534 continue; // ignore | |
1535 c = '\n'; // treat EndOfLine as \n character | |
1536 goto Lnextline; | |
1537 | |
1538 case 0: | |
1539 case 0x1A: | |
1540 goto Lerror; | |
1541 | |
1542 default: | |
1543 if (c & 0x80) | |
1544 { p--; | |
1545 c = decodeUTF(); | |
1546 p++; | |
1547 if (c == PS || c == LS) | |
1548 goto Lnextline; | |
1549 } | |
1550 break; | |
1551 } | |
1552 if (delimleft == 0) | |
1553 { delimleft = c; | |
1554 nest = 1; | |
1555 nestcount = 1; | |
1556 if (c == '(') | |
1557 delimright = ')'; | |
1558 else if (c == '{') | |
1559 delimright = '}'; | |
1560 else if (c == '[') | |
1561 delimright = ']'; | |
1562 else if (c == '<') | |
1563 delimright = '>'; | |
1564 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) | |
1565 { // Start of identifier; must be a heredoc | |
1566 Token t; | |
1567 p--; | |
1568 scan(&t); // read in heredoc identifier | |
1569 if (t.value != TOKidentifier) | |
1570 { error("identifier expected for heredoc, not %s", t.toChars()); | |
1571 delimright = c; | |
1572 } | |
1573 else | |
1574 { hereid = t.ident; | |
1575 //printf("hereid = '%s'\n", hereid->toChars()); | |
1576 blankrol = 1; | |
1577 } | |
1578 nest = 0; | |
1579 } | |
1580 else | |
1581 { delimright = c; | |
1582 nest = 0; | |
1583 } | |
1584 } | |
1585 else | |
1586 { | |
1587 if (blankrol) | |
1588 { error("heredoc rest of line should be blank"); | |
1589 blankrol = 0; | |
1590 continue; | |
1591 } | |
1592 if (nest == 1) | |
1593 { | |
1594 if (c == delimleft) | |
1595 nestcount++; | |
1596 else if (c == delimright) | |
1597 { nestcount--; | |
1598 if (nestcount == 0) | |
1599 goto Ldone; | |
1600 } | |
1601 } | |
1602 else if (c == delimright) | |
1603 goto Ldone; | |
1604 if (startline && isalpha(c)) | |
1605 { Token t; | |
1606 unsigned char *psave = p; | |
1607 p--; | |
1608 scan(&t); // read in possible heredoc identifier | |
1609 //printf("endid = '%s'\n", t.ident->toChars()); | |
1610 if (t.value == TOKidentifier && t.ident->equals(hereid)) | |
1611 { /* should check that rest of line is blank | |
1612 */ | |
1613 goto Ldone; | |
1614 } | |
1615 p = psave; | |
1616 } | |
1617 stringbuffer.writeUTF8(c); | |
1618 startline = 0; | |
1619 } | |
1620 } | |
1621 | |
1622 Ldone: | |
1623 if (*p == '"') | |
1624 p++; | |
1625 else | |
1626 error("delimited string must end in %c\"", delimright); | |
1627 t->len = stringbuffer.offset; | |
1628 stringbuffer.writeByte(0); | |
1629 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1630 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1631 stringPostfix(t); | |
1632 return TOKstring; | |
1633 | |
1634 Lerror: | |
1635 error("unterminated string constant starting at %s", start.toChars()); | |
1636 t->ustring = (unsigned char *)""; | |
1637 t->len = 0; | |
1638 t->postfix = 0; | |
1639 return TOKstring; | |
1640 } | |
1641 | |
1642 /************************************** | |
1643 * Lex delimited strings: | |
1644 * q{ foo(xxx) } // " foo(xxx) " | |
1645 * q{foo(} // "foo(" | |
1646 * q{{foo}"}"} // "{foo}"}"" | |
1647 * Input: | |
1648 * p is on the q | |
1649 */ | |
1650 | |
1651 TOK Lexer::tokenStringConstant(Token *t) | |
1652 { | |
1653 unsigned nest = 1; | |
1654 Loc start = loc; | |
1655 unsigned char *pstart = ++p; | |
1656 | |
1657 while (1) | |
1658 { Token tok; | |
1659 | |
1660 scan(&tok); | |
1661 switch (tok.value) | |
1662 { | |
1663 case TOKlcurly: | |
1664 nest++; | |
1665 continue; | |
1666 | |
1667 case TOKrcurly: | |
1668 if (--nest == 0) | |
1669 goto Ldone; | |
1670 continue; | |
1671 | |
1672 case TOKeof: | |
1673 goto Lerror; | |
1674 | |
1675 default: | |
1676 continue; | |
1677 } | |
1678 } | |
1679 | |
1680 Ldone: | |
1681 t->len = p - 1 - pstart; | |
1682 t->ustring = (unsigned char *)mem.malloc(t->len + 1); | |
1683 memcpy(t->ustring, pstart, t->len); | |
1684 t->ustring[t->len] = 0; | |
1685 stringPostfix(t); | |
1686 return TOKstring; | |
1687 | |
1688 Lerror: | |
1689 error("unterminated token string constant starting at %s", start.toChars()); | |
1690 t->ustring = (unsigned char *)""; | |
1691 t->len = 0; | |
1692 t->postfix = 0; | |
1693 return TOKstring; | |
1694 } | |
1695 | |
1696 #endif | |
1697 | |
1698 | |
1699 /************************************** | |
1700 */ | |
1701 | |
1702 TOK Lexer::escapeStringConstant(Token *t, int wide) | |
1703 { unsigned c; | |
1704 Loc start = loc; | |
1705 | |
1706 p++; | |
1707 stringbuffer.reset(); | |
1708 while (1) | |
1709 { | |
1710 c = *p++; | |
1711 switch (c) | |
1712 { | |
1713 case '\\': | |
1714 switch (*p) | |
1715 { | |
1716 case 'u': | |
1717 case 'U': | |
1718 case '&': | |
1719 c = escapeSequence(); | |
1720 stringbuffer.writeUTF8(c); | |
1721 continue; | |
1722 | |
1723 default: | |
1724 c = escapeSequence(); | |
1725 break; | |
1726 } | |
1727 break; | |
1728 | |
1729 case '\n': | |
1730 loc.linnum++; | |
1731 break; | |
1732 | |
1733 case '\r': | |
1734 if (*p == '\n') | |
1735 continue; // ignore | |
1736 c = '\n'; // treat EndOfLine as \n character | |
1737 loc.linnum++; | |
1738 break; | |
1739 | |
1740 case '"': | |
1741 t->len = stringbuffer.offset; | |
1742 stringbuffer.writeByte(0); | |
1743 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1744 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1745 stringPostfix(t); | |
1746 return TOKstring; | |
1747 | |
1748 case 0: | |
1749 case 0x1A: | |
1750 p--; | |
1751 error("unterminated string constant starting at %s", start.toChars()); | |
1752 t->ustring = (unsigned char *)""; | |
1753 t->len = 0; | |
1754 t->postfix = 0; | |
1755 return TOKstring; | |
1756 | |
1757 default: | |
1758 if (c & 0x80) | |
1759 { | |
1760 p--; | |
1761 c = decodeUTF(); | |
1762 if (c == LS || c == PS) | |
1763 { c = '\n'; | |
1764 loc.linnum++; | |
1765 } | |
1766 p++; | |
1767 stringbuffer.writeUTF8(c); | |
1768 continue; | |
1769 } | |
1770 break; | |
1771 } | |
1772 stringbuffer.writeByte(c); | |
1773 } | |
1774 } | |
1775 | |
1776 /************************************** | |
1777 */ | |
1778 | |
1779 TOK Lexer::charConstant(Token *t, int wide) | |
1780 { | |
1781 unsigned c; | |
1782 TOK tk = TOKcharv; | |
1783 | |
1784 //printf("Lexer::charConstant\n"); | |
1785 p++; | |
1786 c = *p++; | |
1787 switch (c) | |
1788 { | |
1789 case '\\': | |
1790 switch (*p) | |
1791 { | |
1792 case 'u': | |
1793 t->uns64value = escapeSequence(); | |
1794 tk = TOKwcharv; | |
1795 break; | |
1796 | |
1797 case 'U': | |
1798 case '&': | |
1799 t->uns64value = escapeSequence(); | |
1800 tk = TOKdcharv; | |
1801 break; | |
1802 | |
1803 default: | |
1804 t->uns64value = escapeSequence(); | |
1805 break; | |
1806 } | |
1807 break; | |
1808 | |
1809 case '\n': | |
1810 L1: | |
1811 loc.linnum++; | |
1812 case '\r': | |
1813 case 0: | |
1814 case 0x1A: | |
1815 case '\'': | |
1816 error("unterminated character constant"); | |
1817 return tk; | |
1818 | |
1819 default: | |
1820 if (c & 0x80) | |
1821 { | |
1822 p--; | |
1823 c = decodeUTF(); | |
1824 p++; | |
1825 if (c == LS || c == PS) | |
1826 goto L1; | |
1827 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) | |
1828 tk = TOKwcharv; | |
1829 else | |
1830 tk = TOKdcharv; | |
1831 } | |
1832 t->uns64value = c; | |
1833 break; | |
1834 } | |
1835 | |
1836 if (*p != '\'') | |
1837 { error("unterminated character constant"); | |
1838 return tk; | |
1839 } | |
1840 p++; | |
1841 return tk; | |
1842 } | |
1843 | |
1844 /*************************************** | |
1845 * Get postfix of string literal. | |
1846 */ | |
1847 | |
1848 void Lexer::stringPostfix(Token *t) | |
1849 { | |
1850 switch (*p) | |
1851 { | |
1852 case 'c': | |
1853 case 'w': | |
1854 case 'd': | |
1855 t->postfix = *p; | |
1856 p++; | |
1857 break; | |
1858 | |
1859 default: | |
1860 t->postfix = 0; | |
1861 break; | |
1862 } | |
1863 } | |
1864 | |
1865 /*************************************** | |
1866 * Read \u or \U unicode sequence | |
1867 * Input: | |
1868 * u 'u' or 'U' | |
1869 */ | |
1870 | |
1871 #if 0 | |
1872 unsigned Lexer::wchar(unsigned u) | |
1873 { | |
1874 unsigned value; | |
1875 unsigned n; | |
1876 unsigned char c; | |
1877 unsigned nchars; | |
1878 | |
1879 nchars = (u == 'U') ? 8 : 4; | |
1880 value = 0; | |
1881 for (n = 0; 1; n++) | |
1882 { | |
1883 ++p; | |
1884 if (n == nchars) | |
1885 break; | |
1886 c = *p; | |
1887 if (!ishex(c)) | |
1888 { error("\\%c sequence must be followed by %d hex characters", u, nchars); | |
1889 break; | |
1890 } | |
1891 if (isdigit(c)) | |
1892 c -= '0'; | |
1893 else if (islower(c)) | |
1894 c -= 'a' - 10; | |
1895 else | |
1896 c -= 'A' - 10; | |
1897 value <<= 4; | |
1898 value |= c; | |
1899 } | |
1900 return value; | |
1901 } | |
1902 #endif | |
1903 | |
1904 /************************************** | |
1905 * Read in a number. | |
1906 * If it's an integer, store it in tok.TKutok.Vlong. | |
1907 * integers can be decimal, octal or hex | |
1908 * Handle the suffixes U, UL, LU, L, etc. | |
1909 * If it's double, store it in tok.TKutok.Vdouble. | |
1910 * Returns: | |
1911 * TKnum | |
1912 * TKdouble,... | |
1913 */ | |
1914 | |
1915 TOK Lexer::number(Token *t) | |
1916 { | |
1917 // We use a state machine to collect numbers | |
1918 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, | |
1919 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, | |
1920 STATE_hexh, STATE_error }; | |
1921 enum STATE state; | |
1922 | |
1923 enum FLAGS | |
1924 { FLAGS_decimal = 1, // decimal | |
1925 FLAGS_unsigned = 2, // u or U suffix | |
1926 FLAGS_long = 4, // l or L suffix | |
1927 }; | |
1928 enum FLAGS flags = FLAGS_decimal; | |
1929 | |
1930 int i; | |
1931 int base; | |
1932 unsigned c; | |
1933 unsigned char *start; | |
1934 TOK result; | |
1935 | |
1936 //printf("Lexer::number()\n"); | |
1937 state = STATE_initial; | |
1938 base = 0; | |
1939 stringbuffer.reset(); | |
1940 start = p; | |
1941 while (1) | |
1942 { | |
1943 c = *p; | |
1944 switch (state) | |
1945 { | |
1946 case STATE_initial: // opening state | |
1947 if (c == '0') | |
1948 state = STATE_0; | |
1949 else | |
1950 state = STATE_decimal; | |
1951 break; | |
1952 | |
1953 case STATE_0: | |
1954 flags = (FLAGS) (flags & ~FLAGS_decimal); | |
1955 switch (c) | |
1956 { | |
1957 #if ZEROH | |
1958 case 'H': // 0h | |
1959 case 'h': | |
1960 goto hexh; | |
1961 #endif | |
1962 case 'X': | |
1963 case 'x': | |
1964 state = STATE_hex0; | |
1965 break; | |
1966 | |
1967 case '.': | |
1968 if (p[1] == '.') // .. is a separate token | |
1969 goto done; | |
1970 case 'i': | |
1971 case 'f': | |
1972 case 'F': | |
1973 goto real; | |
1974 #if ZEROH | |
1975 case 'E': | |
1976 case 'e': | |
1977 goto case_hex; | |
1978 #endif | |
1979 case 'B': | |
1980 case 'b': | |
1981 state = STATE_binary0; | |
1982 break; | |
1983 | |
1984 case '0': case '1': case '2': case '3': | |
1985 case '4': case '5': case '6': case '7': | |
1986 state = STATE_octal; | |
1987 break; | |
1988 | |
1989 #if ZEROH | |
1990 case '8': case '9': case 'A': | |
1991 case 'C': case 'D': case 'F': | |
1992 case 'a': case 'c': case 'd': case 'f': | |
1993 case_hex: | |
1994 state = STATE_hexh; | |
1995 break; | |
1996 #endif | |
1997 case '_': | |
1998 state = STATE_octal; | |
1999 p++; | |
2000 continue; | |
2001 | |
2002 case 'L': | |
2003 if (p[1] == 'i') | |
2004 goto real; | |
2005 goto done; | |
2006 | |
2007 default: | |
2008 goto done; | |
2009 } | |
2010 break; | |
2011 | |
2012 case STATE_decimal: // reading decimal number | |
2013 if (!isdigit(c)) | |
2014 { | |
2015 #if ZEROH | |
2016 if (ishex(c) | |
2017 || c == 'H' || c == 'h' | |
2018 ) | |
2019 goto hexh; | |
2020 #endif | |
2021 if (c == '_') // ignore embedded _ | |
2022 { p++; | |
2023 continue; | |
2024 } | |
2025 if (c == '.' && p[1] != '.') | |
2026 goto real; | |
2027 else if (c == 'i' || c == 'f' || c == 'F' || | |
2028 c == 'e' || c == 'E') | |
2029 { | |
2030 real: // It's a real number. Back up and rescan as a real | |
2031 p = start; | |
2032 return inreal(t); | |
2033 } | |
2034 else if (c == 'L' && p[1] == 'i') | |
2035 goto real; | |
2036 goto done; | |
2037 } | |
2038 break; | |
2039 | |
2040 case STATE_hex0: // reading hex number | |
2041 case STATE_hex: | |
2042 if (!ishex(c)) | |
2043 { | |
2044 if (c == '_') // ignore embedded _ | |
2045 { p++; | |
2046 continue; | |
2047 } | |
2048 if (c == '.' && p[1] != '.') | |
2049 goto real; | |
2050 if (c == 'P' || c == 'p' || c == 'i') | |
2051 goto real; | |
2052 if (state == STATE_hex0) | |
2053 error("Hex digit expected, not '%c'", c); | |
2054 goto done; | |
2055 } | |
2056 state = STATE_hex; | |
2057 break; | |
2058 | |
2059 #if ZEROH | |
2060 hexh: | |
2061 state = STATE_hexh; | |
2062 case STATE_hexh: // parse numbers like 0FFh | |
2063 if (!ishex(c)) | |
2064 { | |
2065 if (c == 'H' || c == 'h') | |
2066 { | |
2067 p++; | |
2068 base = 16; | |
2069 goto done; | |
2070 } | |
2071 else | |
2072 { | |
2073 // Check for something like 1E3 or 0E24 | |
2074 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) || | |
2075 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset)) | |
2076 goto real; | |
2077 error("Hex digit expected, not '%c'", c); | |
2078 goto done; | |
2079 } | |
2080 } | |
2081 break; | |
2082 #endif | |
2083 | |
2084 case STATE_octal: // reading octal number | |
2085 case STATE_octale: // reading octal number with non-octal digits | |
2086 if (!isoctal(c)) | |
2087 { | |
2088 #if ZEROH | |
2089 if (ishex(c) | |
2090 || c == 'H' || c == 'h' | |
2091 ) | |
2092 goto hexh; | |
2093 #endif | |
2094 if (c == '_') // ignore embedded _ | |
2095 { p++; | |
2096 continue; | |
2097 } | |
2098 if (c == '.' && p[1] != '.') | |
2099 goto real; | |
2100 if (c == 'i') | |
2101 goto real; | |
2102 if (isdigit(c)) | |
2103 { | |
2104 state = STATE_octale; | |
2105 } | |
2106 else | |
2107 goto done; | |
2108 } | |
2109 break; | |
2110 | |
2111 case STATE_binary0: // starting binary number | |
2112 case STATE_binary: // reading binary number | |
2113 if (c != '0' && c != '1') | |
2114 { | |
2115 #if ZEROH | |
2116 if (ishex(c) | |
2117 || c == 'H' || c == 'h' | |
2118 ) | |
2119 goto hexh; | |
2120 #endif | |
2121 if (c == '_') // ignore embedded _ | |
2122 { p++; | |
2123 continue; | |
2124 } | |
2125 if (state == STATE_binary0) | |
2126 { error("binary digit expected"); | |
2127 state = STATE_error; | |
2128 break; | |
2129 } | |
2130 else | |
2131 goto done; | |
2132 } | |
2133 state = STATE_binary; | |
2134 break; | |
2135 | |
2136 case STATE_error: // for error recovery | |
2137 if (!isdigit(c)) // scan until non-digit | |
2138 goto done; | |
2139 break; | |
2140 | |
2141 default: | |
2142 assert(0); | |
2143 } | |
2144 stringbuffer.writeByte(c); | |
2145 p++; | |
2146 } | |
2147 done: | |
2148 stringbuffer.writeByte(0); // terminate string | |
2149 if (state == STATE_octale) | |
2150 error("Octal digit expected"); | |
2151 | |
2152 uinteger_t n; // unsigned >=64 bit integer type | |
2153 | |
2154 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0)) | |
2155 n = stringbuffer.data[0] - '0'; | |
2156 else | |
2157 { | |
2158 // Convert string to integer | |
2159 #if __DMC__ | |
2160 errno = 0; | |
2161 n = strtoull((char *)stringbuffer.data,NULL,base); | |
2162 if (errno == ERANGE) | |
2163 error("integer overflow"); | |
2164 #else | |
2165 // Not everybody implements strtoull() | |
2166 char *p = (char *)stringbuffer.data; | |
2167 int r = 10, d; | |
2168 | |
2169 if (*p == '0') | |
2170 { | |
2171 if (p[1] == 'x' || p[1] == 'X') | |
2172 p += 2, r = 16; | |
2173 else if (p[1] == 'b' || p[1] == 'B') | |
2174 p += 2, r = 2; | |
2175 else if (isdigit(p[1])) | |
2176 p += 1, r = 8; | |
2177 } | |
2178 | |
2179 n = 0; | |
2180 while (1) | |
2181 { | |
2182 if (*p >= '0' && *p <= '9') | |
2183 d = *p - '0'; | |
2184 else if (*p >= 'a' && *p <= 'z') | |
2185 d = *p - 'a' + 10; | |
2186 else if (*p >= 'A' && *p <= 'Z') | |
2187 d = *p - 'A' + 10; | |
2188 else | |
2189 break; | |
2190 if (d >= r) | |
2191 break; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2192 uinteger_t n2 = n * r; |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2193 //printf("n2 / r = %llx, n = %llx\n", n2/r, n); |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2194 if (n2 / r != n || n2 + d < n) |
159 | 2195 { |
2196 error ("integer overflow"); | |
2197 break; | |
2198 } | |
2199 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2200 n = n2 + d; |
159 | 2201 p++; |
2202 } | |
2203 #endif | |
2204 if (sizeof(n) > 8 && | |
2205 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits | |
2206 error("integer overflow"); | |
2207 } | |
2208 | |
2209 // Parse trailing 'u', 'U', 'l' or 'L' in any combination | |
2210 while (1) | |
2211 { unsigned char f; | |
2212 | |
2213 switch (*p) | |
2214 { case 'U': | |
2215 case 'u': | |
2216 f = FLAGS_unsigned; | |
2217 goto L1; | |
2218 | |
2219 case 'l': | |
2220 if (1 || !global.params.useDeprecated) | |
2221 error("'l' suffix is deprecated, use 'L' instead"); | |
2222 case 'L': | |
2223 f = FLAGS_long; | |
2224 L1: | |
2225 p++; | |
2226 if (flags & f) | |
2227 error("unrecognized token"); | |
2228 flags = (FLAGS) (flags | f); | |
2229 continue; | |
2230 default: | |
2231 break; | |
2232 } | |
2233 break; | |
2234 } | |
2235 | |
2236 switch (flags) | |
2237 { | |
2238 case 0: | |
2239 /* Octal or Hexadecimal constant. | |
2240 * First that fits: int, uint, long, ulong | |
2241 */ | |
2242 if (n & 0x8000000000000000LL) | |
2243 result = TOKuns64v; | |
2244 else if (n & 0xFFFFFFFF00000000LL) | |
2245 result = TOKint64v; | |
2246 else if (n & 0x80000000) | |
2247 result = TOKuns32v; | |
2248 else | |
2249 result = TOKint32v; | |
2250 break; | |
2251 | |
2252 case FLAGS_decimal: | |
2253 /* First that fits: int, long, long long | |
2254 */ | |
2255 if (n & 0x8000000000000000LL) | |
2256 { error("signed integer overflow"); | |
2257 result = TOKuns64v; | |
2258 } | |
2259 else if (n & 0xFFFFFFFF80000000LL) | |
2260 result = TOKint64v; | |
2261 else | |
2262 result = TOKint32v; | |
2263 break; | |
2264 | |
2265 case FLAGS_unsigned: | |
2266 case FLAGS_decimal | FLAGS_unsigned: | |
2267 /* First that fits: uint, ulong | |
2268 */ | |
2269 if (n & 0xFFFFFFFF00000000LL) | |
2270 result = TOKuns64v; | |
2271 else | |
2272 result = TOKuns32v; | |
2273 break; | |
2274 | |
2275 case FLAGS_decimal | FLAGS_long: | |
2276 if (n & 0x8000000000000000LL) | |
2277 { error("signed integer overflow"); | |
2278 result = TOKuns64v; | |
2279 } | |
2280 else | |
2281 result = TOKint64v; | |
2282 break; | |
2283 | |
2284 case FLAGS_long: | |
2285 if (n & 0x8000000000000000LL) | |
2286 result = TOKuns64v; | |
2287 else | |
2288 result = TOKint64v; | |
2289 break; | |
2290 | |
2291 case FLAGS_unsigned | FLAGS_long: | |
2292 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: | |
2293 result = TOKuns64v; | |
2294 break; | |
2295 | |
2296 default: | |
2297 #ifdef DEBUG | |
2298 printf("%x\n",flags); | |
2299 #endif | |
2300 assert(0); | |
2301 } | |
2302 t->uns64value = n; | |
2303 return result; | |
2304 } | |
2305 | |
2306 /************************************** | |
2307 * Read in characters, converting them to real. | |
2308 * Bugs: | |
2309 * Exponent overflow not detected. | |
2310 * Too much requested precision is not detected. | |
2311 */ | |
2312 | |
2313 TOK Lexer::inreal(Token *t) | |
2314 #ifdef __DMC__ | |
2315 __in | |
2316 { | |
2317 assert(*p == '.' || isdigit(*p)); | |
2318 } | |
2319 __out (result) | |
2320 { | |
2321 switch (result) | |
2322 { | |
2323 case TOKfloat32v: | |
2324 case TOKfloat64v: | |
2325 case TOKfloat80v: | |
2326 case TOKimaginary32v: | |
2327 case TOKimaginary64v: | |
2328 case TOKimaginary80v: | |
2329 break; | |
2330 | |
2331 default: | |
2332 assert(0); | |
2333 } | |
2334 } | |
2335 __body | |
2336 #endif /* __DMC__ */ | |
2337 { int dblstate; | |
2338 unsigned c; | |
2339 char hex; // is this a hexadecimal-floating-constant? | |
2340 TOK result; | |
2341 | |
2342 //printf("Lexer::inreal()\n"); | |
2343 stringbuffer.reset(); | |
2344 dblstate = 0; | |
2345 hex = 0; | |
2346 Lnext: | |
2347 while (1) | |
2348 { | |
2349 // Get next char from input | |
2350 c = *p++; | |
2351 //printf("dblstate = %d, c = '%c'\n", dblstate, c); | |
2352 while (1) | |
2353 { | |
2354 switch (dblstate) | |
2355 { | |
2356 case 0: // opening state | |
2357 if (c == '0') | |
2358 dblstate = 9; | |
2359 else if (c == '.') | |
2360 dblstate = 3; | |
2361 else | |
2362 dblstate = 1; | |
2363 break; | |
2364 | |
2365 case 9: | |
2366 dblstate = 1; | |
2367 if (c == 'X' || c == 'x') | |
2368 { hex++; | |
2369 break; | |
2370 } | |
2371 case 1: // digits to left of . | |
2372 case 3: // digits to right of . | |
2373 case 7: // continuing exponent digits | |
2374 if (!isdigit(c) && !(hex && isxdigit(c))) | |
2375 { | |
2376 if (c == '_') | |
2377 goto Lnext; // ignore embedded '_' | |
2378 dblstate++; | |
2379 continue; | |
2380 } | |
2381 break; | |
2382 | |
2383 case 2: // no more digits to left of . | |
2384 if (c == '.') | |
2385 { dblstate++; | |
2386 break; | |
2387 } | |
2388 case 4: // no more digits to right of . | |
2389 if ((c == 'E' || c == 'e') || | |
2390 hex && (c == 'P' || c == 'p')) | |
2391 { dblstate = 5; | |
2392 hex = 0; // exponent is always decimal | |
2393 break; | |
2394 } | |
2395 if (hex) | |
2396 error("binary-exponent-part required"); | |
2397 goto done; | |
2398 | |
2399 case 5: // looking immediately to right of E | |
2400 dblstate++; | |
2401 if (c == '-' || c == '+') | |
2402 break; | |
2403 case 6: // 1st exponent digit expected | |
2404 if (!isdigit(c)) | |
2405 error("exponent expected"); | |
2406 dblstate++; | |
2407 break; | |
2408 | |
2409 case 8: // past end of exponent digits | |
2410 goto done; | |
2411 } | |
2412 break; | |
2413 } | |
2414 stringbuffer.writeByte(c); | |
2415 } | |
2416 done: | |
2417 p--; | |
2418 | |
2419 stringbuffer.writeByte(0); | |
2420 | |
2421 #if _WIN32 && __DMC__ | |
2422 char *save = __locale_decpoint; | |
2423 __locale_decpoint = "."; | |
2424 #endif | |
2425 #ifdef IN_GCC | |
2426 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble); | |
2427 #else | |
2428 t->float80value = strtold((char *)stringbuffer.data, NULL); | |
2429 #endif | |
2430 errno = 0; | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2431 float strtofres; |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2432 double strtodres; |
159 | 2433 switch (*p) |
2434 { | |
2435 case 'F': | |
2436 case 'f': | |
2437 #ifdef IN_GCC | |
2438 real_t::parse((char *)stringbuffer.data, real_t::Float); | |
2439 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2440 strtofres = strtof((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2441 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2442 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2443 strtofres != 0 && strtofres != HUGE_VALF && strtofres != -HUGE_VALF) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2444 errno = 0; |
159 | 2445 #endif |
2446 result = TOKfloat32v; | |
2447 p++; | |
2448 break; | |
2449 | |
2450 default: | |
2451 #ifdef IN_GCC | |
2452 real_t::parse((char *)stringbuffer.data, real_t::Double); | |
2453 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2454 strtodres = strtod((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2455 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2456 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2457 strtodres != 0 && strtodres != HUGE_VAL && strtodres != -HUGE_VAL) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2458 errno = 0; |
159 | 2459 #endif |
2460 result = TOKfloat64v; | |
2461 break; | |
2462 | |
2463 case 'l': | |
2464 if (!global.params.useDeprecated) | |
2465 error("'l' suffix is deprecated, use 'L' instead"); | |
2466 case 'L': | |
2467 result = TOKfloat80v; | |
2468 p++; | |
2469 break; | |
2470 } | |
2471 if (*p == 'i' || *p == 'I') | |
2472 { | |
2473 if (!global.params.useDeprecated && *p == 'I') | |
2474 error("'I' suffix is deprecated, use 'i' instead"); | |
2475 p++; | |
2476 switch (result) | |
2477 { | |
2478 case TOKfloat32v: | |
2479 result = TOKimaginary32v; | |
2480 break; | |
2481 case TOKfloat64v: | |
2482 result = TOKimaginary64v; | |
2483 break; | |
2484 case TOKfloat80v: | |
2485 result = TOKimaginary80v; | |
2486 break; | |
2487 } | |
2488 } | |
2489 #if _WIN32 && __DMC__ | |
2490 __locale_decpoint = save; | |
2491 #endif | |
2492 if (errno == ERANGE) | |
2493 error("number is not representable"); | |
2494 return result; | |
2495 } | |
2496 | |
2497 /********************************************* | |
2498 * Do pragma. | |
2499 * Currently, the only pragma supported is: | |
2500 * #line linnum [filespec] | |
2501 */ | |
2502 | |
2503 void Lexer::pragma() | |
2504 { | |
2505 Token tok; | |
2506 int linnum; | |
2507 char *filespec = NULL; | |
2508 Loc loc = this->loc; | |
2509 | |
2510 scan(&tok); | |
2511 if (tok.value != TOKidentifier || tok.ident != Id::line) | |
2512 goto Lerr; | |
2513 | |
2514 scan(&tok); | |
2515 if (tok.value == TOKint32v || tok.value == TOKint64v) | |
2516 linnum = tok.uns64value - 1; | |
2517 else | |
2518 goto Lerr; | |
2519 | |
2520 while (1) | |
2521 { | |
2522 switch (*p) | |
2523 { | |
2524 case 0: | |
2525 case 0x1A: | |
2526 case '\n': | |
2527 Lnewline: | |
2528 this->loc.linnum = linnum; | |
2529 if (filespec) | |
2530 this->loc.filename = filespec; | |
2531 return; | |
2532 | |
2533 case '\r': | |
2534 p++; | |
2535 if (*p != '\n') | |
2536 { p--; | |
2537 goto Lnewline; | |
2538 } | |
2539 continue; | |
2540 | |
2541 case ' ': | |
2542 case '\t': | |
2543 case '\v': | |
2544 case '\f': | |
2545 p++; | |
2546 continue; // skip white space | |
2547 | |
2548 case '_': | |
2549 if (mod && memcmp(p, "__FILE__", 8) == 0) | |
2550 { | |
2551 p += 8; | |
2552 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars()); | |
2553 } | |
2554 continue; | |
2555 | |
2556 case '"': | |
2557 if (filespec) | |
2558 goto Lerr; | |
2559 stringbuffer.reset(); | |
2560 p++; | |
2561 while (1) | |
2562 { unsigned c; | |
2563 | |
2564 c = *p; | |
2565 switch (c) | |
2566 { | |
2567 case '\n': | |
2568 case '\r': | |
2569 case 0: | |
2570 case 0x1A: | |
2571 goto Lerr; | |
2572 | |
2573 case '"': | |
2574 stringbuffer.writeByte(0); | |
2575 filespec = mem.strdup((char *)stringbuffer.data); | |
2576 p++; | |
2577 break; | |
2578 | |
2579 default: | |
2580 if (c & 0x80) | |
2581 { unsigned u = decodeUTF(); | |
2582 if (u == PS || u == LS) | |
2583 goto Lerr; | |
2584 } | |
2585 stringbuffer.writeByte(c); | |
2586 p++; | |
2587 continue; | |
2588 } | |
2589 break; | |
2590 } | |
2591 continue; | |
2592 | |
2593 default: | |
2594 if (*p & 0x80) | |
2595 { unsigned u = decodeUTF(); | |
2596 if (u == PS || u == LS) | |
2597 goto Lnewline; | |
2598 } | |
2599 goto Lerr; | |
2600 } | |
2601 } | |
2602 | |
2603 Lerr: | |
2604 error(loc, "#line integer [\"filespec\"]\\n expected"); | |
2605 } | |
2606 | |
2607 | |
2608 /******************************************** | |
2609 * Decode UTF character. | |
2610 * Issue error messages for invalid sequences. | |
2611 * Return decoded character, advance p to last character in UTF sequence. | |
2612 */ | |
2613 | |
2614 unsigned Lexer::decodeUTF() | |
2615 { | |
2616 dchar_t u; | |
2617 unsigned char c; | |
2618 unsigned char *s = p; | |
2619 size_t len; | |
2620 size_t idx; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2621 const char *msg; |
159 | 2622 |
2623 c = *s; | |
2624 assert(c & 0x80); | |
2625 | |
2626 // Check length of remaining string up to 6 UTF-8 characters | |
2627 for (len = 1; len < 6 && s[len]; len++) | |
2628 ; | |
2629 | |
2630 idx = 0; | |
2631 msg = utf_decodeChar(s, len, &idx, &u); | |
2632 p += idx - 1; | |
2633 if (msg) | |
2634 { | |
2635 error("%s", msg); | |
2636 } | |
2637 return u; | |
2638 } | |
2639 | |
2640 | |
2641 /*************************************************** | |
2642 * Parse doc comment embedded between t->ptr and p. | |
2643 * Remove trailing blanks and tabs from lines. | |
2644 * Replace all newlines with \n. | |
2645 * Remove leading comment character from each line. | |
2646 * Decide if it's a lineComment or a blockComment. | |
2647 * Append to previous one for this token. | |
2648 */ | |
2649 | |
2650 void Lexer::getDocComment(Token *t, unsigned lineComment) | |
2651 { | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2652 /* ct tells us which kind of comment it is: '/', '*', or '+' |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2653 */ |
159 | 2654 unsigned char ct = t->ptr[2]; |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2655 |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2656 /* Start of comment text skips over / * *, / + +, or / / / |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2657 */ |
159 | 2658 unsigned char *q = t->ptr + 3; // start of comment text |
2659 | |
2660 unsigned char *qend = p; | |
2661 if (ct == '*' || ct == '+') | |
2662 qend -= 2; | |
2663 | |
2664 /* Scan over initial row of ****'s or ++++'s or ////'s | |
2665 */ | |
2666 for (; q < qend; q++) | |
2667 { | |
2668 if (*q != ct) | |
2669 break; | |
2670 } | |
2671 | |
2672 /* Remove trailing row of ****'s or ++++'s | |
2673 */ | |
2674 if (ct != '/') | |
2675 { | |
2676 for (; q < qend; qend--) | |
2677 { | |
2678 if (qend[-1] != ct) | |
2679 break; | |
2680 } | |
2681 } | |
2682 | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2683 /* Comment is now [q .. qend]. |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2684 * Canonicalize it into buf[]. |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2685 */ |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2686 OutBuffer buf; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2687 int linestart = 0; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2688 |
159 | 2689 for (; q < qend; q++) |
2690 { | |
2691 unsigned char c = *q; | |
2692 | |
2693 switch (c) | |
2694 { | |
2695 case '*': | |
2696 case '+': | |
2697 if (linestart && c == ct) | |
2698 { linestart = 0; | |
2699 /* Trim preceding whitespace up to preceding \n | |
2700 */ | |
2701 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2702 buf.offset--; | |
2703 continue; | |
2704 } | |
2705 break; | |
2706 | |
2707 case ' ': | |
2708 case '\t': | |
2709 break; | |
2710 | |
2711 case '\r': | |
2712 if (q[1] == '\n') | |
2713 continue; // skip the \r | |
2714 goto Lnewline; | |
2715 | |
2716 default: | |
2717 if (c == 226) | |
2718 { | |
2719 // If LS or PS | |
2720 if (q[1] == 128 && | |
2721 (q[2] == 168 || q[2] == 169)) | |
2722 { | |
2723 q += 2; | |
2724 goto Lnewline; | |
2725 } | |
2726 } | |
2727 linestart = 0; | |
2728 break; | |
2729 | |
2730 Lnewline: | |
2731 c = '\n'; // replace all newlines with \n | |
2732 case '\n': | |
2733 linestart = 1; | |
2734 | |
2735 /* Trim trailing whitespace | |
2736 */ | |
2737 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2738 buf.offset--; | |
2739 | |
2740 break; | |
2741 } | |
2742 buf.writeByte(c); | |
2743 } | |
2744 | |
2745 // Always end with a newline | |
2746 if (!buf.offset || buf.data[buf.offset - 1] != '\n') | |
2747 buf.writeByte('\n'); | |
2748 | |
2749 buf.writeByte(0); | |
2750 | |
2751 // It's a line comment if the start of the doc comment comes | |
2752 // after other non-whitespace on the same line. | |
2753 unsigned char** dc = (lineComment && anyToken) | |
2754 ? &t->lineComment | |
2755 : &t->blockComment; | |
2756 | |
2757 // Combine with previous doc comment, if any | |
2758 if (*dc) | |
2759 *dc = combineComments(*dc, (unsigned char *)buf.data); | |
2760 else | |
2761 *dc = (unsigned char *)buf.extractData(); | |
2762 } | |
2763 | |
2764 /******************************************** | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2765 * Combine two document comments into one, |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2766 * separated by a newline. |
159 | 2767 */ |
2768 | |
2769 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2) | |
2770 { | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2771 //printf("Lexer::combineComments('%s', '%s')\n", c1, c2); |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2772 |
159 | 2773 unsigned char *c = c2; |
2774 | |
2775 if (c1) | |
2776 { c = c1; | |
2777 if (c2) | |
2778 { size_t len1 = strlen((char *)c1); | |
2779 size_t len2 = strlen((char *)c2); | |
2780 | |
2781 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1); | |
2782 memcpy(c, c1, len1); | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2783 if (len1 && c1[len1 - 1] != '\n') |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2784 { c[len1] = '\n'; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2785 len1++; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2786 } |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2787 memcpy(c + len1, c2, len2); |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2788 c[len1 + len2] = 0; |
159 | 2789 } |
2790 } | |
2791 return c; | |
2792 } | |
2793 | |
2794 /******************************************** | |
2795 * Create an identifier in the string table. | |
2796 */ | |
2797 | |
2798 Identifier *Lexer::idPool(const char *s) | |
2799 { | |
2800 size_t len = strlen(s); | |
2801 StringValue *sv = stringtable.update(s, len); | |
2802 Identifier *id = (Identifier *) sv->ptrvalue; | |
2803 if (!id) | |
2804 { | |
2805 id = new Identifier(sv->lstring.string, TOKidentifier); | |
2806 sv->ptrvalue = id; | |
2807 } | |
2808 return id; | |
2809 } | |
2810 | |
2811 /********************************************* | |
2812 * Create a unique identifier using the prefix s. | |
2813 */ | |
2814 | |
2815 Identifier *Lexer::uniqueId(const char *s, int num) | |
2816 { char buffer[32]; | |
2817 size_t slen = strlen(s); | |
2818 | |
2819 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer)); | |
2820 sprintf(buffer, "%s%d", s, num); | |
2821 return idPool(buffer); | |
2822 } | |
2823 | |
2824 Identifier *Lexer::uniqueId(const char *s) | |
2825 { | |
2826 static int num; | |
2827 return uniqueId(s, ++num); | |
2828 } | |
2829 | |
2830 /**************************************** | |
2831 */ | |
2832 | |
2833 struct Keyword | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2834 { const char *name; |
159 | 2835 enum TOK value; |
2836 }; | |
2837 | |
2838 static Keyword keywords[] = | |
2839 { | |
2840 // { "", TOK }, | |
2841 | |
2842 { "this", TOKthis }, | |
2843 { "super", TOKsuper }, | |
2844 { "assert", TOKassert }, | |
2845 { "null", TOKnull }, | |
2846 { "true", TOKtrue }, | |
2847 { "false", TOKfalse }, | |
2848 { "cast", TOKcast }, | |
2849 { "new", TOKnew }, | |
2850 { "delete", TOKdelete }, | |
2851 { "throw", TOKthrow }, | |
2852 { "module", TOKmodule }, | |
2853 { "pragma", TOKpragma }, | |
2854 { "typeof", TOKtypeof }, | |
2855 { "typeid", TOKtypeid }, | |
2856 | |
2857 { "template", TOKtemplate }, | |
2858 | |
2859 { "void", TOKvoid }, | |
2860 { "byte", TOKint8 }, | |
2861 { "ubyte", TOKuns8 }, | |
2862 { "short", TOKint16 }, | |
2863 { "ushort", TOKuns16 }, | |
2864 { "int", TOKint32 }, | |
2865 { "uint", TOKuns32 }, | |
2866 { "long", TOKint64 }, | |
2867 { "ulong", TOKuns64 }, | |
2868 { "cent", TOKcent, }, | |
2869 { "ucent", TOKucent, }, | |
2870 { "float", TOKfloat32 }, | |
2871 { "double", TOKfloat64 }, | |
2872 { "real", TOKfloat80 }, | |
2873 | |
2874 { "bool", TOKbool }, | |
2875 { "char", TOKchar }, | |
2876 { "wchar", TOKwchar }, | |
2877 { "dchar", TOKdchar }, | |
2878 | |
2879 { "ifloat", TOKimaginary32 }, | |
2880 { "idouble", TOKimaginary64 }, | |
2881 { "ireal", TOKimaginary80 }, | |
2882 | |
2883 { "cfloat", TOKcomplex32 }, | |
2884 { "cdouble", TOKcomplex64 }, | |
2885 { "creal", TOKcomplex80 }, | |
2886 | |
2887 { "delegate", TOKdelegate }, | |
2888 { "function", TOKfunction }, | |
2889 | |
2890 { "is", TOKis }, | |
2891 { "if", TOKif }, | |
2892 { "else", TOKelse }, | |
2893 { "while", TOKwhile }, | |
2894 { "for", TOKfor }, | |
2895 { "do", TOKdo }, | |
2896 { "switch", TOKswitch }, | |
2897 { "case", TOKcase }, | |
2898 { "default", TOKdefault }, | |
2899 { "break", TOKbreak }, | |
2900 { "continue", TOKcontinue }, | |
2901 { "synchronized", TOKsynchronized }, | |
2902 { "return", TOKreturn }, | |
2903 { "goto", TOKgoto }, | |
2904 { "try", TOKtry }, | |
2905 { "catch", TOKcatch }, | |
2906 { "finally", TOKfinally }, | |
2907 { "with", TOKwith }, | |
2908 { "asm", TOKasm }, | |
2909 { "foreach", TOKforeach }, | |
2910 { "foreach_reverse", TOKforeach_reverse }, | |
2911 { "scope", TOKscope }, | |
2912 | |
2913 { "struct", TOKstruct }, | |
2914 { "class", TOKclass }, | |
2915 { "interface", TOKinterface }, | |
2916 { "union", TOKunion }, | |
2917 { "enum", TOKenum }, | |
2918 { "import", TOKimport }, | |
2919 { "mixin", TOKmixin }, | |
2920 { "static", TOKstatic }, | |
2921 { "final", TOKfinal }, | |
2922 { "const", TOKconst }, | |
2923 { "typedef", TOKtypedef }, | |
2924 { "alias", TOKalias }, | |
2925 { "override", TOKoverride }, | |
2926 { "abstract", TOKabstract }, | |
2927 { "volatile", TOKvolatile }, | |
2928 { "debug", TOKdebug }, | |
2929 { "deprecated", TOKdeprecated }, | |
2930 { "in", TOKin }, | |
2931 { "out", TOKout }, | |
2932 { "inout", TOKinout }, | |
2933 { "lazy", TOKlazy }, | |
2934 { "auto", TOKauto }, | |
2935 | |
2936 { "align", TOKalign }, | |
2937 { "extern", TOKextern }, | |
2938 { "private", TOKprivate }, | |
2939 { "package", TOKpackage }, | |
2940 { "protected", TOKprotected }, | |
2941 { "public", TOKpublic }, | |
2942 { "export", TOKexport }, | |
2943 | |
2944 { "body", TOKbody }, | |
2945 { "invariant", TOKinvariant }, | |
2946 { "unittest", TOKunittest }, | |
2947 { "version", TOKversion }, | |
2948 //{ "manifest", TOKmanifest }, | |
2949 | |
2950 // Added after 1.0 | |
2951 { "ref", TOKref }, | |
2952 { "macro", TOKmacro }, | |
336 | 2953 #if DMDV2 |
159 | 2954 { "pure", TOKpure }, |
2955 { "nothrow", TOKnothrow }, | |
336 | 2956 { "__thread", TOKtls }, |
159 | 2957 { "__traits", TOKtraits }, |
2958 { "__overloadset", TOKoverloadset }, | |
336 | 2959 { "__FILE__", TOKfile }, |
2960 { "__LINE__", TOKline }, | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2961 { "shared", TOKshared }, |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2962 { "immutable", TOKimmutable }, |
159 | 2963 #endif |
2964 }; | |
2965 | |
2966 int Token::isKeyword() | |
2967 { | |
2968 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) | |
2969 { | |
2970 if (keywords[u].value == value) | |
2971 return 1; | |
2972 } | |
2973 return 0; | |
2974 } | |
2975 | |
2976 void Lexer::initKeywords() | |
2977 { StringValue *sv; | |
2978 unsigned u; | |
2979 enum TOK v; | |
2980 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]); | |
2981 | |
2982 if (global.params.Dversion == 1) | |
2983 nkeywords -= 2; | |
2984 | |
2985 cmtable_init(); | |
2986 | |
2987 for (u = 0; u < nkeywords; u++) | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2988 { const char *s; |
159 | 2989 |
2990 //printf("keyword[%d] = '%s'\n",u, keywords[u].name); | |
2991 s = keywords[u].name; | |
2992 v = keywords[u].value; | |
2993 sv = stringtable.insert(s, strlen(s)); | |
2994 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v); | |
2995 | |
2996 //printf("tochars[%d] = '%s'\n",v, s); | |
2997 Token::tochars[v] = s; | |
2998 } | |
2999 | |
3000 Token::tochars[TOKeof] = "EOF"; | |
3001 Token::tochars[TOKlcurly] = "{"; | |
3002 Token::tochars[TOKrcurly] = "}"; | |
3003 Token::tochars[TOKlparen] = "("; | |
3004 Token::tochars[TOKrparen] = ")"; | |
3005 Token::tochars[TOKlbracket] = "["; | |
3006 Token::tochars[TOKrbracket] = "]"; | |
3007 Token::tochars[TOKsemicolon] = ";"; | |
3008 Token::tochars[TOKcolon] = ":"; | |
3009 Token::tochars[TOKcomma] = ","; | |
3010 Token::tochars[TOKdot] = "."; | |
3011 Token::tochars[TOKxor] = "^"; | |
3012 Token::tochars[TOKxorass] = "^="; | |
3013 Token::tochars[TOKassign] = "="; | |
3014 Token::tochars[TOKconstruct] = "="; | |
336 | 3015 #if DMDV2 |
159 | 3016 Token::tochars[TOKblit] = "="; |
3017 #endif | |
3018 Token::tochars[TOKlt] = "<"; | |
3019 Token::tochars[TOKgt] = ">"; | |
3020 Token::tochars[TOKle] = "<="; | |
3021 Token::tochars[TOKge] = ">="; | |
3022 Token::tochars[TOKequal] = "=="; | |
3023 Token::tochars[TOKnotequal] = "!="; | |
3024 Token::tochars[TOKnotidentity] = "!is"; | |
3025 Token::tochars[TOKtobool] = "!!"; | |
3026 | |
3027 Token::tochars[TOKunord] = "!<>="; | |
3028 Token::tochars[TOKue] = "!<>"; | |
3029 Token::tochars[TOKlg] = "<>"; | |
3030 Token::tochars[TOKleg] = "<>="; | |
3031 Token::tochars[TOKule] = "!>"; | |
3032 Token::tochars[TOKul] = "!>="; | |
3033 Token::tochars[TOKuge] = "!<"; | |
3034 Token::tochars[TOKug] = "!<="; | |
3035 | |
3036 Token::tochars[TOKnot] = "!"; | |
3037 Token::tochars[TOKtobool] = "!!"; | |
3038 Token::tochars[TOKshl] = "<<"; | |
3039 Token::tochars[TOKshr] = ">>"; | |
3040 Token::tochars[TOKushr] = ">>>"; | |
3041 Token::tochars[TOKadd] = "+"; | |
3042 Token::tochars[TOKmin] = "-"; | |
3043 Token::tochars[TOKmul] = "*"; | |
3044 Token::tochars[TOKdiv] = "/"; | |
3045 Token::tochars[TOKmod] = "%"; | |
3046 Token::tochars[TOKslice] = ".."; | |
3047 Token::tochars[TOKdotdotdot] = "..."; | |
3048 Token::tochars[TOKand] = "&"; | |
3049 Token::tochars[TOKandand] = "&&"; | |
3050 Token::tochars[TOKor] = "|"; | |
3051 Token::tochars[TOKoror] = "||"; | |
3052 Token::tochars[TOKarray] = "[]"; | |
3053 Token::tochars[TOKindex] = "[i]"; | |
3054 Token::tochars[TOKaddress] = "&"; | |
3055 Token::tochars[TOKstar] = "*"; | |
3056 Token::tochars[TOKtilde] = "~"; | |
3057 Token::tochars[TOKdollar] = "$"; | |
3058 Token::tochars[TOKcast] = "cast"; | |
3059 Token::tochars[TOKplusplus] = "++"; | |
3060 Token::tochars[TOKminusminus] = "--"; | |
3061 Token::tochars[TOKtype] = "type"; | |
3062 Token::tochars[TOKquestion] = "?"; | |
3063 Token::tochars[TOKneg] = "-"; | |
3064 Token::tochars[TOKuadd] = "+"; | |
3065 Token::tochars[TOKvar] = "var"; | |
3066 Token::tochars[TOKaddass] = "+="; | |
3067 Token::tochars[TOKminass] = "-="; | |
3068 Token::tochars[TOKmulass] = "*="; | |
3069 Token::tochars[TOKdivass] = "/="; | |
3070 Token::tochars[TOKmodass] = "%="; | |
3071 Token::tochars[TOKshlass] = "<<="; | |
3072 Token::tochars[TOKshrass] = ">>="; | |
3073 Token::tochars[TOKushrass] = ">>>="; | |
3074 Token::tochars[TOKandass] = "&="; | |
3075 Token::tochars[TOKorass] = "|="; | |
3076 Token::tochars[TOKcatass] = "~="; | |
3077 Token::tochars[TOKcat] = "~"; | |
3078 Token::tochars[TOKcall] = "call"; | |
3079 Token::tochars[TOKidentity] = "is"; | |
3080 Token::tochars[TOKnotidentity] = "!is"; | |
3081 | |
3082 Token::tochars[TOKorass] = "|="; | |
3083 Token::tochars[TOKidentifier] = "identifier"; | |
3084 | |
3085 // For debugging | |
1587 | 3086 Token::tochars[TOKerror] = "error"; |
159 | 3087 Token::tochars[TOKdotexp] = "dotexp"; |
3088 Token::tochars[TOKdotti] = "dotti"; | |
3089 Token::tochars[TOKdotvar] = "dotvar"; | |
3090 Token::tochars[TOKdottype] = "dottype"; | |
3091 Token::tochars[TOKsymoff] = "symoff"; | |
3092 Token::tochars[TOKarraylength] = "arraylength"; | |
3093 Token::tochars[TOKarrayliteral] = "arrayliteral"; | |
3094 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral"; | |
3095 Token::tochars[TOKstructliteral] = "structliteral"; | |
3096 Token::tochars[TOKstring] = "string"; | |
3097 Token::tochars[TOKdsymbol] = "symbol"; | |
3098 Token::tochars[TOKtuple] = "tuple"; | |
3099 Token::tochars[TOKdeclaration] = "declaration"; | |
3100 Token::tochars[TOKdottd] = "dottd"; | |
3101 Token::tochars[TOKon_scope_exit] = "scope(exit)"; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
3102 Token::tochars[TOKon_scope_success] = "scope(success)"; |
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
3103 Token::tochars[TOKon_scope_failure] = "scope(failure)"; |
159 | 3104 } |