Mercurial > projects > ldc
annotate dmd/lexer.c @ 1614:dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
bugzilla 3455 Some Unicode characters not allowed in identifiers.
---
dmd/lexer.c | 34 ++++++++++++++++++++++------------
1 files changed, 22 insertions(+), 12 deletions(-)
author | Leandro Lucarella <llucax@gmail.com> |
---|---|
date | Wed, 06 Jan 2010 15:18:21 -0300 |
parents | eae495e6ae8d |
children |
rev | line source |
---|---|
159 | 1 |
2 // Compiler implementation of the D programming language | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
3 // Copyright (c) 1999-2009 by Digital Mars |
159 | 4 // All Rights Reserved |
5 // written by Walter Bright | |
6 // http://www.digitalmars.com | |
7 // License for redistribution is by either the Artistic License | |
8 // in artistic.txt, or the GNU General Public License in gnu.txt. | |
9 // See the included readme.txt for details. | |
10 | |
1431
5f6f0929ee4c
Define __C99FEATURES__ in lexer.c for Solaris. Fixes #313.
Christian Kamm <kamm incasoftware de>
parents:
1367
diff
changeset
|
11 #if __sun && __SVR4 |
5f6f0929ee4c
Define __C99FEATURES__ in lexer.c for Solaris. Fixes #313.
Christian Kamm <kamm incasoftware de>
parents:
1367
diff
changeset
|
12 #define __C99FEATURES__ 1 // Needed on Solaris for NaN and more, LDC#313 |
5f6f0929ee4c
Define __C99FEATURES__ in lexer.c for Solaris. Fixes #313.
Christian Kamm <kamm incasoftware de>
parents:
1367
diff
changeset
|
13 #endif |
5f6f0929ee4c
Define __C99FEATURES__ in lexer.c for Solaris. Fixes #313.
Christian Kamm <kamm incasoftware de>
parents:
1367
diff
changeset
|
14 |
1228
79758fd2f48a
Added Doxygen file.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1195
diff
changeset
|
15 #if IN_LLVM |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
16 #include <cmath> |
1228
79758fd2f48a
Added Doxygen file.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1195
diff
changeset
|
17 #endif |
872
aa953cc960b6
Apply BlueZeniX's patch for OpenSolaris compatibility. Fixes #158.
Christian Kamm <kamm incasoftware de>
parents:
846
diff
changeset
|
18 |
159 | 19 /* Lexical Analyzer */ |
20 | |
21 #include <stdio.h> | |
22 #include <string.h> | |
23 #include <ctype.h> | |
24 #include <stdarg.h> | |
25 #include <errno.h> | |
26 #include <wchar.h> | |
27 #include <stdlib.h> | |
28 #include <assert.h> | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
29 #include <time.h> // for time() and ctime() |
159 | 30 |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
872
diff
changeset
|
31 #include "rmem.h" |
159 | 32 |
33 #include "stringtable.h" | |
34 | |
35 #include "lexer.h" | |
36 #include "utf.h" | |
37 #include "identifier.h" | |
38 #include "id.h" | |
39 #include "module.h" | |
40 | |
41 #if _WIN32 && __DMC__ | |
42 // from \dm\src\include\setlocal.h | |
43 extern "C" char * __cdecl __locale_decpoint; | |
44 #endif | |
45 | |
46 extern int HtmlNamedEntity(unsigned char *p, int length); | |
47 | |
48 #define LS 0x2028 // UTF line separator | |
49 #define PS 0x2029 // UTF paragraph separator | |
50 | |
51 /******************************************** | |
52 * Do our own char maps | |
53 */ | |
54 | |
55 static unsigned char cmtable[256]; | |
56 | |
57 const int CMoctal = 0x1; | |
58 const int CMhex = 0x2; | |
59 const int CMidchar = 0x4; | |
60 | |
61 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; } | |
62 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; } | |
63 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; } | |
64 | |
65 static void cmtable_init() | |
66 { | |
67 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++) | |
68 { | |
69 if ('0' <= c && c <= '7') | |
70 cmtable[c] |= CMoctal; | |
71 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) | |
72 cmtable[c] |= CMhex; | |
73 if (isalnum(c) || c == '_') | |
74 cmtable[c] |= CMidchar; | |
75 } | |
76 } | |
77 | |
78 | |
79 /************************* Token **********************************************/ | |
80 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
81 const char *Token::tochars[TOKMAX]; |
159 | 82 |
83 void *Token::operator new(size_t size) | |
84 { Token *t; | |
85 | |
86 if (Lexer::freelist) | |
87 { | |
88 t = Lexer::freelist; | |
89 Lexer::freelist = t->next; | |
90 return t; | |
91 } | |
92 | |
93 return ::operator new(size); | |
94 } | |
95 | |
96 #ifdef DEBUG | |
97 void Token::print() | |
98 { | |
99 fprintf(stdmsg, "%s\n", toChars()); | |
100 } | |
101 #endif | |
102 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
103 const char *Token::toChars() |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
104 { const char *p; |
159 | 105 static char buffer[3 + 3 * sizeof(value) + 1]; |
106 | |
107 p = buffer; | |
108 switch (value) | |
109 { | |
110 case TOKint32v: | |
111 sprintf(buffer,"%d",(d_int32)int64value); | |
112 break; | |
113 | |
114 case TOKuns32v: | |
115 case TOKcharv: | |
116 case TOKwcharv: | |
117 case TOKdcharv: | |
118 sprintf(buffer,"%uU",(d_uns32)uns64value); | |
119 break; | |
120 | |
121 case TOKint64v: | |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
872
diff
changeset
|
122 sprintf(buffer,"%jdL",int64value); |
159 | 123 break; |
124 | |
125 case TOKuns64v: | |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
872
diff
changeset
|
126 sprintf(buffer,"%juUL",uns64value); |
159 | 127 break; |
128 | |
129 #if IN_GCC | |
130 case TOKfloat32v: | |
131 case TOKfloat64v: | |
132 case TOKfloat80v: | |
133 float80value.format(buffer, sizeof(buffer)); | |
134 break; | |
135 case TOKimaginary32v: | |
136 case TOKimaginary64v: | |
137 case TOKimaginary80v: | |
138 float80value.format(buffer, sizeof(buffer)); | |
139 // %% buffer | |
140 strcat(buffer, "i"); | |
141 break; | |
142 #else | |
143 case TOKfloat32v: | |
144 sprintf(buffer,"%Lgf", float80value); | |
145 break; | |
146 | |
147 case TOKfloat64v: | |
148 sprintf(buffer,"%Lg", float80value); | |
149 break; | |
150 | |
151 case TOKfloat80v: | |
152 sprintf(buffer,"%LgL", float80value); | |
153 break; | |
154 | |
155 case TOKimaginary32v: | |
156 sprintf(buffer,"%Lgfi", float80value); | |
157 break; | |
158 | |
159 case TOKimaginary64v: | |
160 sprintf(buffer,"%Lgi", float80value); | |
161 break; | |
162 | |
163 case TOKimaginary80v: | |
164 sprintf(buffer,"%LgLi", float80value); | |
165 break; | |
166 #endif | |
167 | |
168 case TOKstring: | |
169 #if CSTRINGS | |
170 p = string; | |
171 #else | |
172 { OutBuffer buf; | |
173 | |
174 buf.writeByte('"'); | |
175 for (size_t i = 0; i < len; ) | |
176 { unsigned c; | |
177 | |
178 utf_decodeChar((unsigned char *)ustring, len, &i, &c); | |
179 switch (c) | |
180 { | |
181 case 0: | |
182 break; | |
183 | |
184 case '"': | |
185 case '\\': | |
186 buf.writeByte('\\'); | |
187 default: | |
188 if (isprint(c)) | |
189 buf.writeByte(c); | |
190 else if (c <= 0x7F) | |
191 buf.printf("\\x%02x", c); | |
192 else if (c <= 0xFFFF) | |
193 buf.printf("\\u%04x", c); | |
194 else | |
195 buf.printf("\\U%08x", c); | |
196 continue; | |
197 } | |
198 break; | |
199 } | |
200 buf.writeByte('"'); | |
201 if (postfix) | |
202 buf.writeByte('"'); | |
203 buf.writeByte(0); | |
204 p = (char *)buf.extractData(); | |
205 } | |
206 #endif | |
207 break; | |
208 | |
209 case TOKidentifier: | |
210 case TOKenum: | |
211 case TOKstruct: | |
212 case TOKimport: | |
1603
eae495e6ae8d
Merge DMD r248: implement Denis Koroskin's macro suggestion
Leandro Lucarella <llucax@gmail.com>
parents:
1587
diff
changeset
|
213 case BASIC_TYPES: |
159 | 214 p = ident->toChars(); |
215 break; | |
216 | |
217 default: | |
218 p = toChars(value); | |
219 break; | |
220 } | |
221 return p; | |
222 } | |
223 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
224 const char *Token::toChars(enum TOK value) |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
225 { const char *p; |
159 | 226 static char buffer[3 + 3 * sizeof(value) + 1]; |
227 | |
228 p = tochars[value]; | |
229 if (!p) | |
230 { sprintf(buffer,"TOK%d",value); | |
231 p = buffer; | |
232 } | |
233 return p; | |
234 } | |
235 | |
236 /*************************** Lexer ********************************************/ | |
237 | |
238 Token *Lexer::freelist = NULL; | |
239 StringTable Lexer::stringtable; | |
240 OutBuffer Lexer::stringbuffer; | |
241 | |
242 Lexer::Lexer(Module *mod, | |
243 unsigned char *base, unsigned begoffset, unsigned endoffset, | |
244 int doDocComment, int commentToken) | |
245 : loc(mod, 1) | |
246 { | |
247 //printf("Lexer::Lexer(%p,%d)\n",base,length); | |
248 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod); | |
249 memset(&token,0,sizeof(token)); | |
250 this->base = base; | |
251 this->end = base + endoffset; | |
252 p = base + begoffset; | |
253 this->mod = mod; | |
254 this->doDocComment = doDocComment; | |
255 this->anyToken = 0; | |
256 this->commentToken = commentToken; | |
257 //initKeywords(); | |
258 | |
259 /* If first line starts with '#!', ignore the line | |
260 */ | |
261 | |
262 if (p[0] == '#' && p[1] =='!') | |
263 { | |
264 p += 2; | |
265 while (1) | |
266 { unsigned char c = *p; | |
267 switch (c) | |
268 { | |
269 case '\n': | |
270 p++; | |
271 break; | |
272 | |
273 case '\r': | |
274 p++; | |
275 if (*p == '\n') | |
276 p++; | |
277 break; | |
278 | |
279 case 0: | |
280 case 0x1A: | |
281 break; | |
282 | |
283 default: | |
284 if (c & 0x80) | |
285 { unsigned u = decodeUTF(); | |
286 if (u == PS || u == LS) | |
287 break; | |
288 } | |
289 p++; | |
290 continue; | |
291 } | |
292 break; | |
293 } | |
294 loc.linnum = 2; | |
295 } | |
296 } | |
297 | |
298 | |
299 void Lexer::error(const char *format, ...) | |
300 { | |
301 if (mod && !global.gag) | |
302 { | |
303 char *p = loc.toChars(); | |
304 if (*p) | |
305 fprintf(stdmsg, "%s: ", p); | |
306 mem.free(p); | |
307 | |
308 va_list ap; | |
309 va_start(ap, format); | |
310 vfprintf(stdmsg, format, ap); | |
311 va_end(ap); | |
312 | |
313 fprintf(stdmsg, "\n"); | |
314 fflush(stdmsg); | |
315 | |
316 if (global.errors >= 20) // moderate blizzard of cascading messages | |
317 fatal(); | |
318 } | |
319 global.errors++; | |
320 } | |
321 | |
322 void Lexer::error(Loc loc, const char *format, ...) | |
323 { | |
324 if (mod && !global.gag) | |
325 { | |
326 char *p = loc.toChars(); | |
327 if (*p) | |
328 fprintf(stdmsg, "%s: ", p); | |
329 mem.free(p); | |
330 | |
331 va_list ap; | |
332 va_start(ap, format); | |
333 vfprintf(stdmsg, format, ap); | |
334 va_end(ap); | |
335 | |
336 fprintf(stdmsg, "\n"); | |
337 fflush(stdmsg); | |
338 | |
339 if (global.errors >= 20) // moderate blizzard of cascading messages | |
340 fatal(); | |
341 } | |
342 global.errors++; | |
343 } | |
344 | |
345 TOK Lexer::nextToken() | |
346 { Token *t; | |
347 | |
348 if (token.next) | |
349 { | |
350 t = token.next; | |
351 memcpy(&token,t,sizeof(Token)); | |
352 t->next = freelist; | |
353 freelist = t; | |
354 } | |
355 else | |
356 { | |
357 scan(&token); | |
358 } | |
359 //token.print(); | |
360 return token.value; | |
361 } | |
362 | |
363 Token *Lexer::peek(Token *ct) | |
364 { Token *t; | |
365 | |
366 if (ct->next) | |
367 t = ct->next; | |
368 else | |
369 { | |
370 t = new Token(); | |
371 scan(t); | |
372 t->next = NULL; | |
373 ct->next = t; | |
374 } | |
375 return t; | |
376 } | |
377 | |
717
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
378 /*********************** |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
379 * Look ahead at next token's value. |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
380 */ |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
381 |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
382 TOK Lexer::peekNext() |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
383 { |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
384 return peek(&token)->value; |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
385 } |
a26b0c5d5942
Merged DMD 1.036.
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
697
diff
changeset
|
386 |
159 | 387 /********************************* |
388 * tk is on the opening (. | |
389 * Look ahead and return token that is past the closing ). | |
390 */ | |
391 | |
392 Token *Lexer::peekPastParen(Token *tk) | |
393 { | |
394 //printf("peekPastParen()\n"); | |
395 int parens = 1; | |
396 int curlynest = 0; | |
397 while (1) | |
398 { | |
399 tk = peek(tk); | |
400 //tk->print(); | |
401 switch (tk->value) | |
402 { | |
403 case TOKlparen: | |
404 parens++; | |
405 continue; | |
406 | |
407 case TOKrparen: | |
408 --parens; | |
409 if (parens) | |
410 continue; | |
411 tk = peek(tk); | |
412 break; | |
413 | |
414 case TOKlcurly: | |
415 curlynest++; | |
416 continue; | |
417 | |
418 case TOKrcurly: | |
419 if (--curlynest >= 0) | |
420 continue; | |
421 break; | |
422 | |
423 case TOKsemicolon: | |
424 if (curlynest) | |
425 continue; | |
426 break; | |
427 | |
428 case TOKeof: | |
429 break; | |
430 | |
431 default: | |
432 continue; | |
433 } | |
434 return tk; | |
435 } | |
436 } | |
437 | |
438 /********************************** | |
439 * Determine if string is a valid Identifier. | |
440 * Placed here because of commonality with Lexer functionality. | |
441 * Returns: | |
442 * 0 invalid | |
443 */ | |
444 | |
445 int Lexer::isValidIdentifier(char *p) | |
446 { | |
447 size_t len; | |
448 size_t idx; | |
449 | |
450 if (!p || !*p) | |
451 goto Linvalid; | |
452 | |
453 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars | |
454 goto Linvalid; | |
455 | |
456 len = strlen(p); | |
457 idx = 0; | |
458 while (p[idx]) | |
459 { dchar_t dc; | |
460 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
461 const char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc); |
159 | 462 if (q) |
463 goto Linvalid; | |
464 | |
465 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) | |
466 goto Linvalid; | |
467 } | |
468 return 1; | |
469 | |
470 Linvalid: | |
471 return 0; | |
472 } | |
473 | |
474 /**************************** | |
475 * Turn next token in buffer into a token. | |
476 */ | |
477 | |
478 void Lexer::scan(Token *t) | |
479 { | |
480 unsigned lastLine = loc.linnum; | |
481 unsigned linnum; | |
482 | |
483 t->blockComment = NULL; | |
484 t->lineComment = NULL; | |
485 while (1) | |
486 { | |
487 t->ptr = p; | |
488 //printf("p = %p, *p = '%c'\n",p,*p); | |
489 switch (*p) | |
490 { | |
491 case 0: | |
492 case 0x1A: | |
493 t->value = TOKeof; // end of file | |
494 return; | |
495 | |
496 case ' ': | |
497 case '\t': | |
498 case '\v': | |
499 case '\f': | |
500 p++; | |
501 continue; // skip white space | |
502 | |
503 case '\r': | |
504 p++; | |
505 if (*p != '\n') // if CR stands by itself | |
506 loc.linnum++; | |
507 continue; // skip white space | |
508 | |
509 case '\n': | |
510 p++; | |
511 loc.linnum++; | |
512 continue; // skip white space | |
513 | |
514 case '0': case '1': case '2': case '3': case '4': | |
515 case '5': case '6': case '7': case '8': case '9': | |
516 t->value = number(t); | |
517 return; | |
518 | |
519 #if CSTRINGS | |
520 case '\'': | |
521 t->value = charConstant(t, 0); | |
522 return; | |
523 | |
524 case '"': | |
525 t->value = stringConstant(t,0); | |
526 return; | |
527 | |
528 case 'l': | |
529 case 'L': | |
530 if (p[1] == '\'') | |
531 { | |
532 p++; | |
533 t->value = charConstant(t, 1); | |
534 return; | |
535 } | |
536 else if (p[1] == '"') | |
537 { | |
538 p++; | |
539 t->value = stringConstant(t, 1); | |
540 return; | |
541 } | |
542 #else | |
543 case '\'': | |
544 t->value = charConstant(t,0); | |
545 return; | |
546 | |
547 case 'r': | |
548 if (p[1] != '"') | |
549 goto case_ident; | |
550 p++; | |
551 case '`': | |
552 t->value = wysiwygStringConstant(t, *p); | |
553 return; | |
554 | |
555 case 'x': | |
556 if (p[1] != '"') | |
557 goto case_ident; | |
558 p++; | |
559 t->value = hexStringConstant(t); | |
560 return; | |
561 | |
336 | 562 #if DMDV2 |
159 | 563 case 'q': |
564 if (p[1] == '"') | |
565 { | |
566 p++; | |
567 t->value = delimitedStringConstant(t); | |
568 return; | |
569 } | |
570 else if (p[1] == '{') | |
571 { | |
572 p++; | |
573 t->value = tokenStringConstant(t); | |
574 return; | |
575 } | |
576 else | |
577 goto case_ident; | |
578 #endif | |
579 | |
580 case '"': | |
581 t->value = escapeStringConstant(t,0); | |
582 return; | |
583 | |
584 case '\\': // escaped string literal | |
585 { unsigned c; | |
586 | |
587 stringbuffer.reset(); | |
588 do | |
589 { | |
590 p++; | |
591 switch (*p) | |
592 { | |
593 case 'u': | |
594 case 'U': | |
595 case '&': | |
596 c = escapeSequence(); | |
597 stringbuffer.writeUTF8(c); | |
598 break; | |
599 | |
600 default: | |
601 c = escapeSequence(); | |
602 stringbuffer.writeByte(c); | |
603 break; | |
604 } | |
605 } while (*p == '\\'); | |
606 t->len = stringbuffer.offset; | |
607 stringbuffer.writeByte(0); | |
608 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
609 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
610 t->postfix = 0; | |
611 t->value = TOKstring; | |
612 return; | |
613 } | |
614 | |
615 case 'l': | |
616 case 'L': | |
617 #endif | |
618 case 'a': case 'b': case 'c': case 'd': case 'e': | |
619 case 'f': case 'g': case 'h': case 'i': case 'j': | |
620 case 'k': case 'm': case 'n': case 'o': | |
336 | 621 #if DMDV2 |
159 | 622 case 'p': /*case 'q': case 'r':*/ case 's': case 't': |
623 #else | |
624 case 'p': case 'q': /*case 'r':*/ case 's': case 't': | |
625 #endif | |
626 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': | |
627 case 'z': | |
628 case 'A': case 'B': case 'C': case 'D': case 'E': | |
629 case 'F': case 'G': case 'H': case 'I': case 'J': | |
630 case 'K': case 'M': case 'N': case 'O': | |
631 case 'P': case 'Q': case 'R': case 'S': case 'T': | |
632 case 'U': case 'V': case 'W': case 'X': case 'Y': | |
633 case 'Z': | |
634 case '_': | |
635 case_ident: | |
636 { unsigned char c; | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
637 |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
638 while (1) |
159 | 639 { |
640 c = *++p; | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
641 if (isidchar(c)) |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
642 continue; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
643 else if (c & 0x80) |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
644 { unsigned char *s = p; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
645 unsigned u = decodeUTF(); |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
646 if (isUniAlpha(u)) |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
647 continue; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
648 error("char 0x%04x not allowed in identifier", u); |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
649 p = s; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
650 } |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
651 break; |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
652 } |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
653 |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
654 StringValue *sv = stringtable.update((char *)t->ptr, p - t->ptr); |
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
655 Identifier *id = (Identifier *) sv->ptrvalue; |
159 | 656 if (!id) |
657 { id = new Identifier(sv->lstring.string,TOKidentifier); | |
658 sv->ptrvalue = id; | |
659 } | |
660 t->ident = id; | |
661 t->value = (enum TOK) id->value; | |
662 anyToken = 1; | |
663 if (*t->ptr == '_') // if special identifier token | |
664 { | |
665 static char date[11+1]; | |
666 static char time[8+1]; | |
667 static char timestamp[24+1]; | |
668 | |
669 if (!date[0]) // lazy evaluation | |
670 { time_t t; | |
671 char *p; | |
672 | |
673 ::time(&t); | |
674 p = ctime(&t); | |
675 assert(p); | |
676 sprintf(date, "%.6s %.4s", p + 4, p + 20); | |
677 sprintf(time, "%.8s", p + 11); | |
678 sprintf(timestamp, "%.24s", p); | |
679 } | |
680 | |
336 | 681 #if DMDV1 |
159 | 682 if (mod && id == Id::FILE) |
683 { | |
684 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars()); | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
685 goto Lstr; |
159 | 686 } |
687 else if (mod && id == Id::LINE) | |
688 { | |
689 t->value = TOKint64v; | |
690 t->uns64value = loc.linnum; | |
691 } | |
336 | 692 else |
693 #endif | |
694 if (id == Id::DATE) | |
159 | 695 { |
696 t->ustring = (unsigned char *)date; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
697 goto Lstr; |
159 | 698 } |
699 else if (id == Id::TIME) | |
700 { | |
701 t->ustring = (unsigned char *)time; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
702 goto Lstr; |
159 | 703 } |
704 else if (id == Id::VENDOR) | |
705 { | |
664
eef8ac26c66c
Some missed LLVMDC -> LDC.
Christian Kamm <kamm incasoftware de>
parents:
658
diff
changeset
|
706 t->ustring = (unsigned char *)"LDC"; |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
707 goto Lstr; |
159 | 708 } |
709 else if (id == Id::TIMESTAMP) | |
710 { | |
711 t->ustring = (unsigned char *)timestamp; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
712 Lstr: |
159 | 713 t->value = TOKstring; |
714 Llen: | |
715 t->postfix = 0; | |
716 t->len = strlen((char *)t->ustring); | |
717 } | |
718 else if (id == Id::VERSIONX) | |
719 { unsigned major = 0; | |
720 unsigned minor = 0; | |
721 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
722 for (const char *p = global.version + 1; 1; p++) |
159 | 723 { |
724 char c = *p; | |
725 if (isdigit(c)) | |
726 minor = minor * 10 + c - '0'; | |
727 else if (c == '.') | |
728 { major = minor; | |
729 minor = 0; | |
730 } | |
731 else | |
732 break; | |
733 } | |
734 t->value = TOKint64v; | |
735 t->uns64value = major * 1000 + minor; | |
736 } | |
336 | 737 #if DMDV2 |
159 | 738 else if (id == Id::EOFX) |
739 { | |
740 t->value = TOKeof; | |
741 // Advance scanner to end of file | |
742 while (!(*p == 0 || *p == 0x1A)) | |
743 p++; | |
744 } | |
745 #endif | |
746 } | |
747 //printf("t->value = %d\n",t->value); | |
748 return; | |
749 } | |
750 | |
751 case '/': | |
752 p++; | |
753 switch (*p) | |
754 { | |
755 case '=': | |
756 p++; | |
757 t->value = TOKdivass; | |
758 return; | |
759 | |
760 case '*': | |
761 p++; | |
762 linnum = loc.linnum; | |
763 while (1) | |
764 { | |
765 while (1) | |
766 { unsigned char c = *p; | |
767 switch (c) | |
768 { | |
769 case '/': | |
770 break; | |
771 | |
772 case '\n': | |
773 loc.linnum++; | |
774 p++; | |
775 continue; | |
776 | |
777 case '\r': | |
778 p++; | |
779 if (*p != '\n') | |
780 loc.linnum++; | |
781 continue; | |
782 | |
783 case 0: | |
784 case 0x1A: | |
785 error("unterminated /* */ comment"); | |
786 p = end; | |
787 t->value = TOKeof; | |
788 return; | |
789 | |
790 default: | |
791 if (c & 0x80) | |
792 { unsigned u = decodeUTF(); | |
793 if (u == PS || u == LS) | |
794 loc.linnum++; | |
795 } | |
796 p++; | |
797 continue; | |
798 } | |
799 break; | |
800 } | |
801 p++; | |
802 if (p[-2] == '*' && p - 3 != t->ptr) | |
803 break; | |
804 } | |
805 if (commentToken) | |
806 { | |
807 t->value = TOKcomment; | |
808 return; | |
809 } | |
810 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr) | |
811 { // if /** but not /**/ | |
812 getDocComment(t, lastLine == linnum); | |
813 } | |
814 continue; | |
815 | |
816 case '/': // do // style comments | |
817 linnum = loc.linnum; | |
818 while (1) | |
819 { unsigned char c = *++p; | |
820 switch (c) | |
821 { | |
822 case '\n': | |
823 break; | |
824 | |
825 case '\r': | |
826 if (p[1] == '\n') | |
827 p++; | |
828 break; | |
829 | |
830 case 0: | |
831 case 0x1A: | |
832 if (commentToken) | |
833 { | |
834 p = end; | |
835 t->value = TOKcomment; | |
836 return; | |
837 } | |
838 if (doDocComment && t->ptr[2] == '/') | |
839 getDocComment(t, lastLine == linnum); | |
840 p = end; | |
841 t->value = TOKeof; | |
842 return; | |
843 | |
844 default: | |
845 if (c & 0x80) | |
846 { unsigned u = decodeUTF(); | |
847 if (u == PS || u == LS) | |
848 break; | |
849 } | |
850 continue; | |
851 } | |
852 break; | |
853 } | |
854 | |
855 if (commentToken) | |
856 { | |
857 p++; | |
858 loc.linnum++; | |
859 t->value = TOKcomment; | |
860 return; | |
861 } | |
862 if (doDocComment && t->ptr[2] == '/') | |
863 getDocComment(t, lastLine == linnum); | |
864 | |
865 p++; | |
866 loc.linnum++; | |
867 continue; | |
868 | |
869 case '+': | |
870 { int nest; | |
871 | |
872 linnum = loc.linnum; | |
873 p++; | |
874 nest = 1; | |
875 while (1) | |
876 { unsigned char c = *p; | |
877 switch (c) | |
878 { | |
879 case '/': | |
880 p++; | |
881 if (*p == '+') | |
882 { | |
883 p++; | |
884 nest++; | |
885 } | |
886 continue; | |
887 | |
888 case '+': | |
889 p++; | |
890 if (*p == '/') | |
891 { | |
892 p++; | |
893 if (--nest == 0) | |
894 break; | |
895 } | |
896 continue; | |
897 | |
898 case '\r': | |
899 p++; | |
900 if (*p != '\n') | |
901 loc.linnum++; | |
902 continue; | |
903 | |
904 case '\n': | |
905 loc.linnum++; | |
906 p++; | |
907 continue; | |
908 | |
909 case 0: | |
910 case 0x1A: | |
911 error("unterminated /+ +/ comment"); | |
912 p = end; | |
913 t->value = TOKeof; | |
914 return; | |
915 | |
916 default: | |
917 if (c & 0x80) | |
918 { unsigned u = decodeUTF(); | |
919 if (u == PS || u == LS) | |
920 loc.linnum++; | |
921 } | |
922 p++; | |
923 continue; | |
924 } | |
925 break; | |
926 } | |
927 if (commentToken) | |
928 { | |
929 t->value = TOKcomment; | |
930 return; | |
931 } | |
932 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr) | |
933 { // if /++ but not /++/ | |
934 getDocComment(t, lastLine == linnum); | |
935 } | |
936 continue; | |
937 } | |
938 } | |
939 t->value = TOKdiv; | |
940 return; | |
941 | |
942 case '.': | |
943 p++; | |
944 if (isdigit(*p)) | |
945 { /* Note that we don't allow ._1 and ._ as being | |
946 * valid floating point numbers. | |
947 */ | |
948 p--; | |
949 t->value = inreal(t); | |
950 } | |
951 else if (p[0] == '.') | |
952 { | |
953 if (p[1] == '.') | |
954 { p += 2; | |
955 t->value = TOKdotdotdot; | |
956 } | |
957 else | |
958 { p++; | |
959 t->value = TOKslice; | |
960 } | |
961 } | |
962 else | |
963 t->value = TOKdot; | |
964 return; | |
965 | |
966 case '&': | |
967 p++; | |
968 if (*p == '=') | |
969 { p++; | |
970 t->value = TOKandass; | |
971 } | |
972 else if (*p == '&') | |
973 { p++; | |
974 t->value = TOKandand; | |
975 } | |
976 else | |
977 t->value = TOKand; | |
978 return; | |
979 | |
980 case '|': | |
981 p++; | |
982 if (*p == '=') | |
983 { p++; | |
984 t->value = TOKorass; | |
985 } | |
986 else if (*p == '|') | |
987 { p++; | |
988 t->value = TOKoror; | |
989 } | |
990 else | |
991 t->value = TOKor; | |
992 return; | |
993 | |
994 case '-': | |
995 p++; | |
996 if (*p == '=') | |
997 { p++; | |
998 t->value = TOKminass; | |
999 } | |
1000 #if 0 | |
1001 else if (*p == '>') | |
1002 { p++; | |
1003 t->value = TOKarrow; | |
1004 } | |
1005 #endif | |
1006 else if (*p == '-') | |
1007 { p++; | |
1008 t->value = TOKminusminus; | |
1009 } | |
1010 else | |
1011 t->value = TOKmin; | |
1012 return; | |
1013 | |
1014 case '+': | |
1015 p++; | |
1016 if (*p == '=') | |
1017 { p++; | |
1018 t->value = TOKaddass; | |
1019 } | |
1020 else if (*p == '+') | |
1021 { p++; | |
1022 t->value = TOKplusplus; | |
1023 } | |
1024 else | |
1025 t->value = TOKadd; | |
1026 return; | |
1027 | |
1028 case '<': | |
1029 p++; | |
1030 if (*p == '=') | |
1031 { p++; | |
1032 t->value = TOKle; // <= | |
1033 } | |
1034 else if (*p == '<') | |
1035 { p++; | |
1036 if (*p == '=') | |
1037 { p++; | |
1038 t->value = TOKshlass; // <<= | |
1039 } | |
1040 else | |
1041 t->value = TOKshl; // << | |
1042 } | |
1043 else if (*p == '>') | |
1044 { p++; | |
1045 if (*p == '=') | |
1046 { p++; | |
1047 t->value = TOKleg; // <>= | |
1048 } | |
1049 else | |
1050 t->value = TOKlg; // <> | |
1051 } | |
1052 else | |
1053 t->value = TOKlt; // < | |
1054 return; | |
1055 | |
1056 case '>': | |
1057 p++; | |
1058 if (*p == '=') | |
1059 { p++; | |
1060 t->value = TOKge; // >= | |
1061 } | |
1062 else if (*p == '>') | |
1063 { p++; | |
1064 if (*p == '=') | |
1065 { p++; | |
1066 t->value = TOKshrass; // >>= | |
1067 } | |
1068 else if (*p == '>') | |
1069 { p++; | |
1070 if (*p == '=') | |
1071 { p++; | |
1072 t->value = TOKushrass; // >>>= | |
1073 } | |
1074 else | |
1075 t->value = TOKushr; // >>> | |
1076 } | |
1077 else | |
1078 t->value = TOKshr; // >> | |
1079 } | |
1080 else | |
1081 t->value = TOKgt; // > | |
1082 return; | |
1083 | |
1084 case '!': | |
1085 p++; | |
1086 if (*p == '=') | |
1087 { p++; | |
1088 if (*p == '=' && global.params.Dversion == 1) | |
1089 { p++; | |
1090 t->value = TOKnotidentity; // !== | |
1091 } | |
1092 else | |
1093 t->value = TOKnotequal; // != | |
1094 } | |
1095 else if (*p == '<') | |
1096 { p++; | |
1097 if (*p == '>') | |
1098 { p++; | |
1099 if (*p == '=') | |
1100 { p++; | |
1101 t->value = TOKunord; // !<>= | |
1102 } | |
1103 else | |
1104 t->value = TOKue; // !<> | |
1105 } | |
1106 else if (*p == '=') | |
1107 { p++; | |
1108 t->value = TOKug; // !<= | |
1109 } | |
1110 else | |
1111 t->value = TOKuge; // !< | |
1112 } | |
1113 else if (*p == '>') | |
1114 { p++; | |
1115 if (*p == '=') | |
1116 { p++; | |
1117 t->value = TOKul; // !>= | |
1118 } | |
1119 else | |
1120 t->value = TOKule; // !> | |
1121 } | |
1122 else | |
1123 t->value = TOKnot; // ! | |
1124 return; | |
1125 | |
1126 case '=': | |
1127 p++; | |
1128 if (*p == '=') | |
1129 { p++; | |
1130 if (*p == '=' && global.params.Dversion == 1) | |
1131 { p++; | |
1132 t->value = TOKidentity; // === | |
1133 } | |
1134 else | |
1135 t->value = TOKequal; // == | |
1136 } | |
1137 else | |
1138 t->value = TOKassign; // = | |
1139 return; | |
1140 | |
1141 case '~': | |
1142 p++; | |
1143 if (*p == '=') | |
1144 { p++; | |
1145 t->value = TOKcatass; // ~= | |
1146 } | |
1147 else | |
1148 t->value = TOKtilde; // ~ | |
1149 return; | |
1150 | |
1151 #define SINGLE(c,tok) case c: p++; t->value = tok; return; | |
1152 | |
1153 SINGLE('(', TOKlparen) | |
1154 SINGLE(')', TOKrparen) | |
1155 SINGLE('[', TOKlbracket) | |
1156 SINGLE(']', TOKrbracket) | |
1157 SINGLE('{', TOKlcurly) | |
1158 SINGLE('}', TOKrcurly) | |
1159 SINGLE('?', TOKquestion) | |
1160 SINGLE(',', TOKcomma) | |
1161 SINGLE(';', TOKsemicolon) | |
1162 SINGLE(':', TOKcolon) | |
1163 SINGLE('$', TOKdollar) | |
1164 | |
1165 #undef SINGLE | |
1166 | |
1167 #define DOUBLE(c1,tok1,c2,tok2) \ | |
1168 case c1: \ | |
1169 p++; \ | |
1170 if (*p == c2) \ | |
1171 { p++; \ | |
1172 t->value = tok2; \ | |
1173 } \ | |
1174 else \ | |
1175 t->value = tok1; \ | |
1176 return; | |
1177 | |
1178 DOUBLE('*', TOKmul, '=', TOKmulass) | |
1179 DOUBLE('%', TOKmod, '=', TOKmodass) | |
1180 DOUBLE('^', TOKxor, '=', TOKxorass) | |
1181 | |
1182 #undef DOUBLE | |
1183 | |
1184 case '#': | |
1185 p++; | |
1186 pragma(); | |
1187 continue; | |
1188 | |
1189 default: | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1190 { unsigned c = *p; |
159 | 1191 |
1192 if (c & 0x80) | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1193 { c = decodeUTF(); |
159 | 1194 |
1195 // Check for start of unicode identifier | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1196 if (isUniAlpha(c)) |
159 | 1197 goto case_ident; |
1198 | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1199 if (c == PS || c == LS) |
159 | 1200 { |
1201 loc.linnum++; | |
1202 p++; | |
1203 continue; | |
1204 } | |
1205 } | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1206 if (c < 0x80 && isprint(c)) |
159 | 1207 error("unsupported char '%c'", c); |
1208 else | |
1209 error("unsupported char 0x%02x", c); | |
1210 p++; | |
1211 continue; | |
1212 } | |
1213 } | |
1214 } | |
1215 } | |
1216 | |
1217 /******************************************* | |
1218 * Parse escape sequence. | |
1219 */ | |
1220 | |
1221 unsigned Lexer::escapeSequence() | |
1222 { unsigned c; | |
1223 int n; | |
1224 int ndigits; | |
1225 | |
1226 c = *p; | |
1227 switch (c) | |
1228 { | |
1229 case '\'': | |
1230 case '"': | |
1231 case '?': | |
1232 case '\\': | |
1233 Lconsume: | |
1234 p++; | |
1235 break; | |
1236 | |
1237 case 'a': c = 7; goto Lconsume; | |
1238 case 'b': c = 8; goto Lconsume; | |
1239 case 'f': c = 12; goto Lconsume; | |
1240 case 'n': c = 10; goto Lconsume; | |
1241 case 'r': c = 13; goto Lconsume; | |
1242 case 't': c = 9; goto Lconsume; | |
1243 case 'v': c = 11; goto Lconsume; | |
1244 | |
1245 case 'u': | |
1246 ndigits = 4; | |
1247 goto Lhex; | |
1248 case 'U': | |
1249 ndigits = 8; | |
1250 goto Lhex; | |
1251 case 'x': | |
1252 ndigits = 2; | |
1253 Lhex: | |
1254 p++; | |
1255 c = *p; | |
1256 if (ishex(c)) | |
1257 { unsigned v; | |
1258 | |
1259 n = 0; | |
1260 v = 0; | |
1261 while (1) | |
1262 { | |
1263 if (isdigit(c)) | |
1264 c -= '0'; | |
1265 else if (islower(c)) | |
1266 c -= 'a' - 10; | |
1267 else | |
1268 c -= 'A' - 10; | |
1269 v = v * 16 + c; | |
1270 c = *++p; | |
1271 if (++n == ndigits) | |
1272 break; | |
1273 if (!ishex(c)) | |
1274 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); | |
1275 break; | |
1276 } | |
1277 } | |
1278 if (ndigits != 2 && !utf_isValidDchar(v)) | |
1587 | 1279 { error("invalid UTF character \\U%08x", v); |
1280 v = '?'; // recover with valid UTF character | |
1281 } | |
159 | 1282 c = v; |
1283 } | |
1284 else | |
1285 error("undefined escape hex sequence \\%c\n",c); | |
1286 break; | |
1287 | |
1288 case '&': // named character entity | |
1289 for (unsigned char *idstart = ++p; 1; p++) | |
1290 { | |
1291 switch (*p) | |
1292 { | |
1293 case ';': | |
1294 c = HtmlNamedEntity(idstart, p - idstart); | |
1295 if (c == ~0) | |
1296 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart); | |
1297 c = ' '; | |
1298 } | |
1299 p++; | |
1300 break; | |
1301 | |
1302 default: | |
1303 if (isalpha(*p) || | |
1304 (p != idstart + 1 && isdigit(*p))) | |
1305 continue; | |
1306 error("unterminated named entity"); | |
1307 break; | |
1308 } | |
1309 break; | |
1310 } | |
1311 break; | |
1312 | |
1313 case 0: | |
1314 case 0x1A: // end of file | |
1315 c = '\\'; | |
1316 break; | |
1317 | |
1318 default: | |
1319 if (isoctal(c)) | |
1320 { unsigned v; | |
1321 | |
1322 n = 0; | |
1323 v = 0; | |
1324 do | |
1325 { | |
1326 v = v * 8 + (c - '0'); | |
1327 c = *++p; | |
1328 } while (++n < 3 && isoctal(c)); | |
1329 c = v; | |
1330 if (c > 0xFF) | |
1331 error("0%03o is larger than a byte", c); | |
1332 } | |
1333 else | |
1334 error("undefined escape sequence \\%c\n",c); | |
1335 break; | |
1336 } | |
1337 return c; | |
1338 } | |
1339 | |
1340 /************************************** | |
1341 */ | |
1342 | |
1343 TOK Lexer::wysiwygStringConstant(Token *t, int tc) | |
1344 { unsigned c; | |
1345 Loc start = loc; | |
1346 | |
1347 p++; | |
1348 stringbuffer.reset(); | |
1349 while (1) | |
1350 { | |
1351 c = *p++; | |
1352 switch (c) | |
1353 { | |
1354 case '\n': | |
1355 loc.linnum++; | |
1356 break; | |
1357 | |
1358 case '\r': | |
1359 if (*p == '\n') | |
1360 continue; // ignore | |
1361 c = '\n'; // treat EndOfLine as \n character | |
1362 loc.linnum++; | |
1363 break; | |
1364 | |
1365 case 0: | |
1366 case 0x1A: | |
1367 error("unterminated string constant starting at %s", start.toChars()); | |
1368 t->ustring = (unsigned char *)""; | |
1369 t->len = 0; | |
1370 t->postfix = 0; | |
1371 return TOKstring; | |
1372 | |
1373 case '"': | |
1374 case '`': | |
1375 if (c == tc) | |
1376 { | |
1377 t->len = stringbuffer.offset; | |
1378 stringbuffer.writeByte(0); | |
1379 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1380 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1381 stringPostfix(t); | |
1382 return TOKstring; | |
1383 } | |
1384 break; | |
1385 | |
1386 default: | |
1387 if (c & 0x80) | |
1388 { p--; | |
1389 unsigned u = decodeUTF(); | |
1390 p++; | |
1391 if (u == PS || u == LS) | |
1392 loc.linnum++; | |
1393 stringbuffer.writeUTF8(u); | |
1394 continue; | |
1395 } | |
1396 break; | |
1397 } | |
1398 stringbuffer.writeByte(c); | |
1399 } | |
1400 } | |
1401 | |
1402 /************************************** | |
1403 * Lex hex strings: | |
1404 * x"0A ae 34FE BD" | |
1405 */ | |
1406 | |
1407 TOK Lexer::hexStringConstant(Token *t) | |
1408 { unsigned c; | |
1409 Loc start = loc; | |
1410 unsigned n = 0; | |
1411 unsigned v; | |
1412 | |
1413 p++; | |
1414 stringbuffer.reset(); | |
1415 while (1) | |
1416 { | |
1417 c = *p++; | |
1418 switch (c) | |
1419 { | |
1420 case ' ': | |
1421 case '\t': | |
1422 case '\v': | |
1423 case '\f': | |
1424 continue; // skip white space | |
1425 | |
1426 case '\r': | |
1427 if (*p == '\n') | |
1428 continue; // ignore | |
1429 // Treat isolated '\r' as if it were a '\n' | |
1430 case '\n': | |
1431 loc.linnum++; | |
1432 continue; | |
1433 | |
1434 case 0: | |
1435 case 0x1A: | |
1436 error("unterminated string constant starting at %s", start.toChars()); | |
1437 t->ustring = (unsigned char *)""; | |
1438 t->len = 0; | |
1439 t->postfix = 0; | |
1440 return TOKstring; | |
1441 | |
1442 case '"': | |
1443 if (n & 1) | |
1444 { error("odd number (%d) of hex characters in hex string", n); | |
1445 stringbuffer.writeByte(v); | |
1446 } | |
1447 t->len = stringbuffer.offset; | |
1448 stringbuffer.writeByte(0); | |
1449 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1450 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1451 stringPostfix(t); | |
1452 return TOKstring; | |
1453 | |
1454 default: | |
1455 if (c >= '0' && c <= '9') | |
1456 c -= '0'; | |
1457 else if (c >= 'a' && c <= 'f') | |
1458 c -= 'a' - 10; | |
1459 else if (c >= 'A' && c <= 'F') | |
1460 c -= 'A' - 10; | |
1461 else if (c & 0x80) | |
1462 { p--; | |
1463 unsigned u = decodeUTF(); | |
1464 p++; | |
1465 if (u == PS || u == LS) | |
1466 loc.linnum++; | |
1467 else | |
1614
dbf7b54f542f
Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed...
Leandro Lucarella <llucax@gmail.com>
parents:
1603
diff
changeset
|
1468 error("non-hex character \\u%04x", u); |
159 | 1469 } |
1470 else | |
1471 error("non-hex character '%c'", c); | |
1472 if (n & 1) | |
1473 { v = (v << 4) | c; | |
1474 stringbuffer.writeByte(v); | |
1475 } | |
1476 else | |
1477 v = c; | |
1478 n++; | |
1479 break; | |
1480 } | |
1481 } | |
1482 } | |
1483 | |
1484 | |
336 | 1485 #if DMDV2 |
159 | 1486 /************************************** |
1487 * Lex delimited strings: | |
1488 * q"(foo(xxx))" // "foo(xxx)" | |
1489 * q"[foo(]" // "foo(" | |
1490 * q"/foo]/" // "foo]" | |
1491 * q"HERE | |
1492 * foo | |
1493 * HERE" // "foo\n" | |
1494 * Input: | |
1495 * p is on the " | |
1496 */ | |
1497 | |
1498 TOK Lexer::delimitedStringConstant(Token *t) | |
1499 { unsigned c; | |
1500 Loc start = loc; | |
1501 unsigned delimleft = 0; | |
1502 unsigned delimright = 0; | |
1503 unsigned nest = 1; | |
1504 unsigned nestcount; | |
1505 Identifier *hereid = NULL; | |
1506 unsigned blankrol = 0; | |
1507 unsigned startline = 0; | |
1508 | |
1509 p++; | |
1510 stringbuffer.reset(); | |
1511 while (1) | |
1512 { | |
1513 c = *p++; | |
1514 //printf("c = '%c'\n", c); | |
1515 switch (c) | |
1516 { | |
1517 case '\n': | |
1518 Lnextline: | |
1519 loc.linnum++; | |
1520 startline = 1; | |
1521 if (blankrol) | |
1522 { blankrol = 0; | |
1523 continue; | |
1524 } | |
1525 if (hereid) | |
1526 { | |
1527 stringbuffer.writeUTF8(c); | |
1528 continue; | |
1529 } | |
1530 break; | |
1531 | |
1532 case '\r': | |
1533 if (*p == '\n') | |
1534 continue; // ignore | |
1535 c = '\n'; // treat EndOfLine as \n character | |
1536 goto Lnextline; | |
1537 | |
1538 case 0: | |
1539 case 0x1A: | |
1540 goto Lerror; | |
1541 | |
1542 default: | |
1543 if (c & 0x80) | |
1544 { p--; | |
1545 c = decodeUTF(); | |
1546 p++; | |
1547 if (c == PS || c == LS) | |
1548 goto Lnextline; | |
1549 } | |
1550 break; | |
1551 } | |
1552 if (delimleft == 0) | |
1553 { delimleft = c; | |
1554 nest = 1; | |
1555 nestcount = 1; | |
1556 if (c == '(') | |
1557 delimright = ')'; | |
1558 else if (c == '{') | |
1559 delimright = '}'; | |
1560 else if (c == '[') | |
1561 delimright = ']'; | |
1562 else if (c == '<') | |
1563 delimright = '>'; | |
1564 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) | |
1565 { // Start of identifier; must be a heredoc | |
1566 Token t; | |
1567 p--; | |
1568 scan(&t); // read in heredoc identifier | |
1569 if (t.value != TOKidentifier) | |
1570 { error("identifier expected for heredoc, not %s", t.toChars()); | |
1571 delimright = c; | |
1572 } | |
1573 else | |
1574 { hereid = t.ident; | |
1575 //printf("hereid = '%s'\n", hereid->toChars()); | |
1576 blankrol = 1; | |
1577 } | |
1578 nest = 0; | |
1579 } | |
1580 else | |
1581 { delimright = c; | |
1582 nest = 0; | |
1583 } | |
1584 } | |
1585 else | |
1586 { | |
1587 if (blankrol) | |
1588 { error("heredoc rest of line should be blank"); | |
1589 blankrol = 0; | |
1590 continue; | |
1591 } | |
1592 if (nest == 1) | |
1593 { | |
1594 if (c == delimleft) | |
1595 nestcount++; | |
1596 else if (c == delimright) | |
1597 { nestcount--; | |
1598 if (nestcount == 0) | |
1599 goto Ldone; | |
1600 } | |
1601 } | |
1602 else if (c == delimright) | |
1603 goto Ldone; | |
1604 if (startline && isalpha(c)) | |
1605 { Token t; | |
1606 unsigned char *psave = p; | |
1607 p--; | |
1608 scan(&t); // read in possible heredoc identifier | |
1609 //printf("endid = '%s'\n", t.ident->toChars()); | |
1610 if (t.value == TOKidentifier && t.ident->equals(hereid)) | |
1611 { /* should check that rest of line is blank | |
1612 */ | |
1613 goto Ldone; | |
1614 } | |
1615 p = psave; | |
1616 } | |
1617 stringbuffer.writeUTF8(c); | |
1618 startline = 0; | |
1619 } | |
1620 } | |
1621 | |
1622 Ldone: | |
1623 if (*p == '"') | |
1624 p++; | |
1625 else | |
1626 error("delimited string must end in %c\"", delimright); | |
1627 t->len = stringbuffer.offset; | |
1628 stringbuffer.writeByte(0); | |
1629 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1630 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1631 stringPostfix(t); | |
1632 return TOKstring; | |
1633 | |
1634 Lerror: | |
1635 error("unterminated string constant starting at %s", start.toChars()); | |
1636 t->ustring = (unsigned char *)""; | |
1637 t->len = 0; | |
1638 t->postfix = 0; | |
1639 return TOKstring; | |
1640 } | |
1641 | |
1642 /************************************** | |
1643 * Lex delimited strings: | |
1644 * q{ foo(xxx) } // " foo(xxx) " | |
1645 * q{foo(} // "foo(" | |
1646 * q{{foo}"}"} // "{foo}"}"" | |
1647 * Input: | |
1648 * p is on the q | |
1649 */ | |
1650 | |
1651 TOK Lexer::tokenStringConstant(Token *t) | |
1652 { | |
1653 unsigned nest = 1; | |
1654 Loc start = loc; | |
1655 unsigned char *pstart = ++p; | |
1656 | |
1657 while (1) | |
1658 { Token tok; | |
1659 | |
1660 scan(&tok); | |
1661 switch (tok.value) | |
1662 { | |
1663 case TOKlcurly: | |
1664 nest++; | |
1665 continue; | |
1666 | |
1667 case TOKrcurly: | |
1668 if (--nest == 0) | |
1669 goto Ldone; | |
1670 continue; | |
1671 | |
1672 case TOKeof: | |
1673 goto Lerror; | |
1674 | |
1675 default: | |
1676 continue; | |
1677 } | |
1678 } | |
1679 | |
1680 Ldone: | |
1681 t->len = p - 1 - pstart; | |
1682 t->ustring = (unsigned char *)mem.malloc(t->len + 1); | |
1683 memcpy(t->ustring, pstart, t->len); | |
1684 t->ustring[t->len] = 0; | |
1685 stringPostfix(t); | |
1686 return TOKstring; | |
1687 | |
1688 Lerror: | |
1689 error("unterminated token string constant starting at %s", start.toChars()); | |
1690 t->ustring = (unsigned char *)""; | |
1691 t->len = 0; | |
1692 t->postfix = 0; | |
1693 return TOKstring; | |
1694 } | |
1695 | |
1696 #endif | |
1697 | |
1698 | |
1699 /************************************** | |
1700 */ | |
1701 | |
1702 TOK Lexer::escapeStringConstant(Token *t, int wide) | |
1703 { unsigned c; | |
1704 Loc start = loc; | |
1705 | |
1706 p++; | |
1707 stringbuffer.reset(); | |
1708 while (1) | |
1709 { | |
1710 c = *p++; | |
1711 switch (c) | |
1712 { | |
1713 case '\\': | |
1714 switch (*p) | |
1715 { | |
1716 case 'u': | |
1717 case 'U': | |
1718 case '&': | |
1719 c = escapeSequence(); | |
1720 stringbuffer.writeUTF8(c); | |
1721 continue; | |
1722 | |
1723 default: | |
1724 c = escapeSequence(); | |
1725 break; | |
1726 } | |
1727 break; | |
1728 | |
1729 case '\n': | |
1730 loc.linnum++; | |
1731 break; | |
1732 | |
1733 case '\r': | |
1734 if (*p == '\n') | |
1735 continue; // ignore | |
1736 c = '\n'; // treat EndOfLine as \n character | |
1737 loc.linnum++; | |
1738 break; | |
1739 | |
1740 case '"': | |
1741 t->len = stringbuffer.offset; | |
1742 stringbuffer.writeByte(0); | |
1743 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); | |
1744 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); | |
1745 stringPostfix(t); | |
1746 return TOKstring; | |
1747 | |
1748 case 0: | |
1749 case 0x1A: | |
1750 p--; | |
1751 error("unterminated string constant starting at %s", start.toChars()); | |
1752 t->ustring = (unsigned char *)""; | |
1753 t->len = 0; | |
1754 t->postfix = 0; | |
1755 return TOKstring; | |
1756 | |
1757 default: | |
1758 if (c & 0x80) | |
1759 { | |
1760 p--; | |
1761 c = decodeUTF(); | |
1762 if (c == LS || c == PS) | |
1763 { c = '\n'; | |
1764 loc.linnum++; | |
1765 } | |
1766 p++; | |
1767 stringbuffer.writeUTF8(c); | |
1768 continue; | |
1769 } | |
1770 break; | |
1771 } | |
1772 stringbuffer.writeByte(c); | |
1773 } | |
1774 } | |
1775 | |
1776 /************************************** | |
1777 */ | |
1778 | |
1779 TOK Lexer::charConstant(Token *t, int wide) | |
1780 { | |
1781 unsigned c; | |
1782 TOK tk = TOKcharv; | |
1783 | |
1784 //printf("Lexer::charConstant\n"); | |
1785 p++; | |
1786 c = *p++; | |
1787 switch (c) | |
1788 { | |
1789 case '\\': | |
1790 switch (*p) | |
1791 { | |
1792 case 'u': | |
1793 t->uns64value = escapeSequence(); | |
1794 tk = TOKwcharv; | |
1795 break; | |
1796 | |
1797 case 'U': | |
1798 case '&': | |
1799 t->uns64value = escapeSequence(); | |
1800 tk = TOKdcharv; | |
1801 break; | |
1802 | |
1803 default: | |
1804 t->uns64value = escapeSequence(); | |
1805 break; | |
1806 } | |
1807 break; | |
1808 | |
1809 case '\n': | |
1810 L1: | |
1811 loc.linnum++; | |
1812 case '\r': | |
1813 case 0: | |
1814 case 0x1A: | |
1815 case '\'': | |
1816 error("unterminated character constant"); | |
1817 return tk; | |
1818 | |
1819 default: | |
1820 if (c & 0x80) | |
1821 { | |
1822 p--; | |
1823 c = decodeUTF(); | |
1824 p++; | |
1825 if (c == LS || c == PS) | |
1826 goto L1; | |
1827 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) | |
1828 tk = TOKwcharv; | |
1829 else | |
1830 tk = TOKdcharv; | |
1831 } | |
1832 t->uns64value = c; | |
1833 break; | |
1834 } | |
1835 | |
1836 if (*p != '\'') | |
1837 { error("unterminated character constant"); | |
1838 return tk; | |
1839 } | |
1840 p++; | |
1841 return tk; | |
1842 } | |
1843 | |
1844 /*************************************** | |
1845 * Get postfix of string literal. | |
1846 */ | |
1847 | |
1848 void Lexer::stringPostfix(Token *t) | |
1849 { | |
1850 switch (*p) | |
1851 { | |
1852 case 'c': | |
1853 case 'w': | |
1854 case 'd': | |
1855 t->postfix = *p; | |
1856 p++; | |
1857 break; | |
1858 | |
1859 default: | |
1860 t->postfix = 0; | |
1861 break; | |
1862 } | |
1863 } | |
1864 | |
1865 /*************************************** | |
1866 * Read \u or \U unicode sequence | |
1867 * Input: | |
1868 * u 'u' or 'U' | |
1869 */ | |
1870 | |
1871 #if 0 | |
1872 unsigned Lexer::wchar(unsigned u) | |
1873 { | |
1874 unsigned value; | |
1875 unsigned n; | |
1876 unsigned char c; | |
1877 unsigned nchars; | |
1878 | |
1879 nchars = (u == 'U') ? 8 : 4; | |
1880 value = 0; | |
1881 for (n = 0; 1; n++) | |
1882 { | |
1883 ++p; | |
1884 if (n == nchars) | |
1885 break; | |
1886 c = *p; | |
1887 if (!ishex(c)) | |
1888 { error("\\%c sequence must be followed by %d hex characters", u, nchars); | |
1889 break; | |
1890 } | |
1891 if (isdigit(c)) | |
1892 c -= '0'; | |
1893 else if (islower(c)) | |
1894 c -= 'a' - 10; | |
1895 else | |
1896 c -= 'A' - 10; | |
1897 value <<= 4; | |
1898 value |= c; | |
1899 } | |
1900 return value; | |
1901 } | |
1902 #endif | |
1903 | |
1904 /************************************** | |
1905 * Read in a number. | |
1906 * If it's an integer, store it in tok.TKutok.Vlong. | |
1907 * integers can be decimal, octal or hex | |
1908 * Handle the suffixes U, UL, LU, L, etc. | |
1909 * If it's double, store it in tok.TKutok.Vdouble. | |
1910 * Returns: | |
1911 * TKnum | |
1912 * TKdouble,... | |
1913 */ | |
1914 | |
1915 TOK Lexer::number(Token *t) | |
1916 { | |
1917 // We use a state machine to collect numbers | |
1918 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, | |
1919 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, | |
1920 STATE_hexh, STATE_error }; | |
1921 enum STATE state; | |
1922 | |
1923 enum FLAGS | |
1924 { FLAGS_decimal = 1, // decimal | |
1925 FLAGS_unsigned = 2, // u or U suffix | |
1926 FLAGS_long = 4, // l or L suffix | |
1927 }; | |
1928 enum FLAGS flags = FLAGS_decimal; | |
1929 | |
1930 int i; | |
1931 int base; | |
1932 unsigned c; | |
1933 unsigned char *start; | |
1934 TOK result; | |
1935 | |
1936 //printf("Lexer::number()\n"); | |
1937 state = STATE_initial; | |
1938 base = 0; | |
1939 stringbuffer.reset(); | |
1940 start = p; | |
1941 while (1) | |
1942 { | |
1943 c = *p; | |
1944 switch (state) | |
1945 { | |
1946 case STATE_initial: // opening state | |
1947 if (c == '0') | |
1948 state = STATE_0; | |
1949 else | |
1950 state = STATE_decimal; | |
1951 break; | |
1952 | |
1953 case STATE_0: | |
1954 flags = (FLAGS) (flags & ~FLAGS_decimal); | |
1955 switch (c) | |
1956 { | |
1957 #if ZEROH | |
1958 case 'H': // 0h | |
1959 case 'h': | |
1960 goto hexh; | |
1961 #endif | |
1962 case 'X': | |
1963 case 'x': | |
1964 state = STATE_hex0; | |
1965 break; | |
1966 | |
1967 case '.': | |
1968 if (p[1] == '.') // .. is a separate token | |
1969 goto done; | |
1970 case 'i': | |
1971 case 'f': | |
1972 case 'F': | |
1973 goto real; | |
1974 #if ZEROH | |
1975 case 'E': | |
1976 case 'e': | |
1977 goto case_hex; | |
1978 #endif | |
1979 case 'B': | |
1980 case 'b': | |
1981 state = STATE_binary0; | |
1982 break; | |
1983 | |
1984 case '0': case '1': case '2': case '3': | |
1985 case '4': case '5': case '6': case '7': | |
1986 state = STATE_octal; | |
1987 break; | |
1988 | |
1989 #if ZEROH | |
1990 case '8': case '9': case 'A': | |
1991 case 'C': case 'D': case 'F': | |
1992 case 'a': case 'c': case 'd': case 'f': | |
1993 case_hex: | |
1994 state = STATE_hexh; | |
1995 break; | |
1996 #endif | |
1997 case '_': | |
1998 state = STATE_octal; | |
1999 p++; | |
2000 continue; | |
2001 | |
2002 case 'L': | |
2003 if (p[1] == 'i') | |
2004 goto real; | |
2005 goto done; | |
2006 | |
2007 default: | |
2008 goto done; | |
2009 } | |
2010 break; | |
2011 | |
2012 case STATE_decimal: // reading decimal number | |
2013 if (!isdigit(c)) | |
2014 { | |
2015 #if ZEROH | |
2016 if (ishex(c) | |
2017 || c == 'H' || c == 'h' | |
2018 ) | |
2019 goto hexh; | |
2020 #endif | |
2021 if (c == '_') // ignore embedded _ | |
2022 { p++; | |
2023 continue; | |
2024 } | |
2025 if (c == '.' && p[1] != '.') | |
2026 goto real; | |
2027 else if (c == 'i' || c == 'f' || c == 'F' || | |
2028 c == 'e' || c == 'E') | |
2029 { | |
2030 real: // It's a real number. Back up and rescan as a real | |
2031 p = start; | |
2032 return inreal(t); | |
2033 } | |
2034 else if (c == 'L' && p[1] == 'i') | |
2035 goto real; | |
2036 goto done; | |
2037 } | |
2038 break; | |
2039 | |
2040 case STATE_hex0: // reading hex number | |
2041 case STATE_hex: | |
2042 if (!ishex(c)) | |
2043 { | |
2044 if (c == '_') // ignore embedded _ | |
2045 { p++; | |
2046 continue; | |
2047 } | |
2048 if (c == '.' && p[1] != '.') | |
2049 goto real; | |
2050 if (c == 'P' || c == 'p' || c == 'i') | |
2051 goto real; | |
2052 if (state == STATE_hex0) | |
2053 error("Hex digit expected, not '%c'", c); | |
2054 goto done; | |
2055 } | |
2056 state = STATE_hex; | |
2057 break; | |
2058 | |
2059 #if ZEROH | |
2060 hexh: | |
2061 state = STATE_hexh; | |
2062 case STATE_hexh: // parse numbers like 0FFh | |
2063 if (!ishex(c)) | |
2064 { | |
2065 if (c == 'H' || c == 'h') | |
2066 { | |
2067 p++; | |
2068 base = 16; | |
2069 goto done; | |
2070 } | |
2071 else | |
2072 { | |
2073 // Check for something like 1E3 or 0E24 | |
2074 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) || | |
2075 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset)) | |
2076 goto real; | |
2077 error("Hex digit expected, not '%c'", c); | |
2078 goto done; | |
2079 } | |
2080 } | |
2081 break; | |
2082 #endif | |
2083 | |
2084 case STATE_octal: // reading octal number | |
2085 case STATE_octale: // reading octal number with non-octal digits | |
2086 if (!isoctal(c)) | |
2087 { | |
2088 #if ZEROH | |
2089 if (ishex(c) | |
2090 || c == 'H' || c == 'h' | |
2091 ) | |
2092 goto hexh; | |
2093 #endif | |
2094 if (c == '_') // ignore embedded _ | |
2095 { p++; | |
2096 continue; | |
2097 } | |
2098 if (c == '.' && p[1] != '.') | |
2099 goto real; | |
2100 if (c == 'i') | |
2101 goto real; | |
2102 if (isdigit(c)) | |
2103 { | |
2104 state = STATE_octale; | |
2105 } | |
2106 else | |
2107 goto done; | |
2108 } | |
2109 break; | |
2110 | |
2111 case STATE_binary0: // starting binary number | |
2112 case STATE_binary: // reading binary number | |
2113 if (c != '0' && c != '1') | |
2114 { | |
2115 #if ZEROH | |
2116 if (ishex(c) | |
2117 || c == 'H' || c == 'h' | |
2118 ) | |
2119 goto hexh; | |
2120 #endif | |
2121 if (c == '_') // ignore embedded _ | |
2122 { p++; | |
2123 continue; | |
2124 } | |
2125 if (state == STATE_binary0) | |
2126 { error("binary digit expected"); | |
2127 state = STATE_error; | |
2128 break; | |
2129 } | |
2130 else | |
2131 goto done; | |
2132 } | |
2133 state = STATE_binary; | |
2134 break; | |
2135 | |
2136 case STATE_error: // for error recovery | |
2137 if (!isdigit(c)) // scan until non-digit | |
2138 goto done; | |
2139 break; | |
2140 | |
2141 default: | |
2142 assert(0); | |
2143 } | |
2144 stringbuffer.writeByte(c); | |
2145 p++; | |
2146 } | |
2147 done: | |
2148 stringbuffer.writeByte(0); // terminate string | |
2149 if (state == STATE_octale) | |
2150 error("Octal digit expected"); | |
2151 | |
2152 uinteger_t n; // unsigned >=64 bit integer type | |
2153 | |
2154 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0)) | |
2155 n = stringbuffer.data[0] - '0'; | |
2156 else | |
2157 { | |
2158 // Convert string to integer | |
2159 #if __DMC__ | |
2160 errno = 0; | |
2161 n = strtoull((char *)stringbuffer.data,NULL,base); | |
2162 if (errno == ERANGE) | |
2163 error("integer overflow"); | |
2164 #else | |
2165 // Not everybody implements strtoull() | |
2166 char *p = (char *)stringbuffer.data; | |
2167 int r = 10, d; | |
2168 | |
2169 if (*p == '0') | |
2170 { | |
2171 if (p[1] == 'x' || p[1] == 'X') | |
2172 p += 2, r = 16; | |
2173 else if (p[1] == 'b' || p[1] == 'B') | |
2174 p += 2, r = 2; | |
2175 else if (isdigit(p[1])) | |
2176 p += 1, r = 8; | |
2177 } | |
2178 | |
2179 n = 0; | |
2180 while (1) | |
2181 { | |
2182 if (*p >= '0' && *p <= '9') | |
2183 d = *p - '0'; | |
2184 else if (*p >= 'a' && *p <= 'z') | |
2185 d = *p - 'a' + 10; | |
2186 else if (*p >= 'A' && *p <= 'Z') | |
2187 d = *p - 'A' + 10; | |
2188 else | |
2189 break; | |
2190 if (d >= r) | |
2191 break; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2192 uinteger_t n2 = n * r; |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2193 //printf("n2 / r = %llx, n = %llx\n", n2/r, n); |
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2194 if (n2 / r != n || n2 + d < n) |
159 | 2195 { |
2196 error ("integer overflow"); | |
2197 break; | |
2198 } | |
2199 | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2200 n = n2 + d; |
159 | 2201 p++; |
2202 } | |
2203 #endif | |
2204 if (sizeof(n) > 8 && | |
2205 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits | |
2206 error("integer overflow"); | |
2207 } | |
2208 | |
2209 // Parse trailing 'u', 'U', 'l' or 'L' in any combination | |
2210 while (1) | |
2211 { unsigned char f; | |
2212 | |
2213 switch (*p) | |
2214 { case 'U': | |
2215 case 'u': | |
2216 f = FLAGS_unsigned; | |
2217 goto L1; | |
2218 | |
2219 case 'l': | |
2220 if (1 || !global.params.useDeprecated) | |
2221 error("'l' suffix is deprecated, use 'L' instead"); | |
2222 case 'L': | |
2223 f = FLAGS_long; | |
2224 L1: | |
2225 p++; | |
2226 if (flags & f) | |
2227 error("unrecognized token"); | |
2228 flags = (FLAGS) (flags | f); | |
2229 continue; | |
2230 default: | |
2231 break; | |
2232 } | |
2233 break; | |
2234 } | |
2235 | |
2236 switch (flags) | |
2237 { | |
2238 case 0: | |
2239 /* Octal or Hexadecimal constant. | |
2240 * First that fits: int, uint, long, ulong | |
2241 */ | |
2242 if (n & 0x8000000000000000LL) | |
2243 result = TOKuns64v; | |
2244 else if (n & 0xFFFFFFFF00000000LL) | |
2245 result = TOKint64v; | |
2246 else if (n & 0x80000000) | |
2247 result = TOKuns32v; | |
2248 else | |
2249 result = TOKint32v; | |
2250 break; | |
2251 | |
2252 case FLAGS_decimal: | |
2253 /* First that fits: int, long, long long | |
2254 */ | |
2255 if (n & 0x8000000000000000LL) | |
2256 { error("signed integer overflow"); | |
2257 result = TOKuns64v; | |
2258 } | |
2259 else if (n & 0xFFFFFFFF80000000LL) | |
2260 result = TOKint64v; | |
2261 else | |
2262 result = TOKint32v; | |
2263 break; | |
2264 | |
2265 case FLAGS_unsigned: | |
2266 case FLAGS_decimal | FLAGS_unsigned: | |
2267 /* First that fits: uint, ulong | |
2268 */ | |
2269 if (n & 0xFFFFFFFF00000000LL) | |
2270 result = TOKuns64v; | |
2271 else | |
2272 result = TOKuns32v; | |
2273 break; | |
2274 | |
2275 case FLAGS_decimal | FLAGS_long: | |
2276 if (n & 0x8000000000000000LL) | |
2277 { error("signed integer overflow"); | |
2278 result = TOKuns64v; | |
2279 } | |
2280 else | |
2281 result = TOKint64v; | |
2282 break; | |
2283 | |
2284 case FLAGS_long: | |
2285 if (n & 0x8000000000000000LL) | |
2286 result = TOKuns64v; | |
2287 else | |
2288 result = TOKint64v; | |
2289 break; | |
2290 | |
2291 case FLAGS_unsigned | FLAGS_long: | |
2292 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: | |
2293 result = TOKuns64v; | |
2294 break; | |
2295 | |
2296 default: | |
2297 #ifdef DEBUG | |
2298 printf("%x\n",flags); | |
2299 #endif | |
2300 assert(0); | |
2301 } | |
2302 t->uns64value = n; | |
2303 return result; | |
2304 } | |
2305 | |
2306 /************************************** | |
2307 * Read in characters, converting them to real. | |
2308 * Bugs: | |
2309 * Exponent overflow not detected. | |
2310 * Too much requested precision is not detected. | |
2311 */ | |
2312 | |
2313 TOK Lexer::inreal(Token *t) | |
2314 #ifdef __DMC__ | |
2315 __in | |
2316 { | |
2317 assert(*p == '.' || isdigit(*p)); | |
2318 } | |
2319 __out (result) | |
2320 { | |
2321 switch (result) | |
2322 { | |
2323 case TOKfloat32v: | |
2324 case TOKfloat64v: | |
2325 case TOKfloat80v: | |
2326 case TOKimaginary32v: | |
2327 case TOKimaginary64v: | |
2328 case TOKimaginary80v: | |
2329 break; | |
2330 | |
2331 default: | |
2332 assert(0); | |
2333 } | |
2334 } | |
2335 __body | |
2336 #endif /* __DMC__ */ | |
2337 { int dblstate; | |
2338 unsigned c; | |
2339 char hex; // is this a hexadecimal-floating-constant? | |
2340 TOK result; | |
2341 | |
2342 //printf("Lexer::inreal()\n"); | |
2343 stringbuffer.reset(); | |
2344 dblstate = 0; | |
2345 hex = 0; | |
2346 Lnext: | |
2347 while (1) | |
2348 { | |
2349 // Get next char from input | |
2350 c = *p++; | |
2351 //printf("dblstate = %d, c = '%c'\n", dblstate, c); | |
2352 while (1) | |
2353 { | |
2354 switch (dblstate) | |
2355 { | |
2356 case 0: // opening state | |
2357 if (c == '0') | |
2358 dblstate = 9; | |
2359 else if (c == '.') | |
2360 dblstate = 3; | |
2361 else | |
2362 dblstate = 1; | |
2363 break; | |
2364 | |
2365 case 9: | |
2366 dblstate = 1; | |
2367 if (c == 'X' || c == 'x') | |
2368 { hex++; | |
2369 break; | |
2370 } | |
2371 case 1: // digits to left of . | |
2372 case 3: // digits to right of . | |
2373 case 7: // continuing exponent digits | |
2374 if (!isdigit(c) && !(hex && isxdigit(c))) | |
2375 { | |
2376 if (c == '_') | |
2377 goto Lnext; // ignore embedded '_' | |
2378 dblstate++; | |
2379 continue; | |
2380 } | |
2381 break; | |
2382 | |
2383 case 2: // no more digits to left of . | |
2384 if (c == '.') | |
2385 { dblstate++; | |
2386 break; | |
2387 } | |
2388 case 4: // no more digits to right of . | |
2389 if ((c == 'E' || c == 'e') || | |
2390 hex && (c == 'P' || c == 'p')) | |
2391 { dblstate = 5; | |
2392 hex = 0; // exponent is always decimal | |
2393 break; | |
2394 } | |
2395 if (hex) | |
2396 error("binary-exponent-part required"); | |
2397 goto done; | |
2398 | |
2399 case 5: // looking immediately to right of E | |
2400 dblstate++; | |
2401 if (c == '-' || c == '+') | |
2402 break; | |
2403 case 6: // 1st exponent digit expected | |
2404 if (!isdigit(c)) | |
2405 error("exponent expected"); | |
2406 dblstate++; | |
2407 break; | |
2408 | |
2409 case 8: // past end of exponent digits | |
2410 goto done; | |
2411 } | |
2412 break; | |
2413 } | |
2414 stringbuffer.writeByte(c); | |
2415 } | |
2416 done: | |
2417 p--; | |
2418 | |
2419 stringbuffer.writeByte(0); | |
2420 | |
2421 #if _WIN32 && __DMC__ | |
2422 char *save = __locale_decpoint; | |
2423 __locale_decpoint = "."; | |
2424 #endif | |
2425 #ifdef IN_GCC | |
2426 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble); | |
2427 #else | |
2428 t->float80value = strtold((char *)stringbuffer.data, NULL); | |
2429 #endif | |
2430 errno = 0; | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2431 float strtofres; |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2432 double strtodres; |
159 | 2433 switch (*p) |
2434 { | |
2435 case 'F': | |
2436 case 'f': | |
2437 #ifdef IN_GCC | |
2438 real_t::parse((char *)stringbuffer.data, real_t::Float); | |
2439 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2440 strtofres = strtof((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2441 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2442 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2443 strtofres != 0 && strtofres != HUGE_VALF && strtofres != -HUGE_VALF) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2444 errno = 0; |
159 | 2445 #endif |
2446 result = TOKfloat32v; | |
2447 p++; | |
2448 break; | |
2449 | |
2450 default: | |
2451 #ifdef IN_GCC | |
2452 real_t::parse((char *)stringbuffer.data, real_t::Double); | |
2453 #else | |
696
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2454 strtodres = strtod((char *)stringbuffer.data, NULL); |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2455 // LDC change: don't error on gradual underflow |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2456 if (errno == ERANGE && |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2457 strtodres != 0 && strtodres != HUGE_VAL && strtodres != -HUGE_VAL) |
48f462341528
Fix issues with gradual underflow and strtof, strtod on Mac.
Christian Kamm <kamm incasoftware de>
parents:
664
diff
changeset
|
2458 errno = 0; |
159 | 2459 #endif |
2460 result = TOKfloat64v; | |
2461 break; | |
2462 | |
2463 case 'l': | |
2464 if (!global.params.useDeprecated) | |
2465 error("'l' suffix is deprecated, use 'L' instead"); | |
2466 case 'L': | |
2467 result = TOKfloat80v; | |
2468 p++; | |
2469 break; | |
2470 } | |
2471 if (*p == 'i' || *p == 'I') | |
2472 { | |
2473 if (!global.params.useDeprecated && *p == 'I') | |
2474 error("'I' suffix is deprecated, use 'i' instead"); | |
2475 p++; | |
2476 switch (result) | |
2477 { | |
2478 case TOKfloat32v: | |
2479 result = TOKimaginary32v; | |
2480 break; | |
2481 case TOKfloat64v: | |
2482 result = TOKimaginary64v; | |
2483 break; | |
2484 case TOKfloat80v: | |
2485 result = TOKimaginary80v; | |
2486 break; | |
2487 } | |
2488 } | |
2489 #if _WIN32 && __DMC__ | |
2490 __locale_decpoint = save; | |
2491 #endif | |
2492 if (errno == ERANGE) | |
2493 error("number is not representable"); | |
2494 return result; | |
2495 } | |
2496 | |
2497 /********************************************* | |
2498 * Do pragma. | |
2499 * Currently, the only pragma supported is: | |
2500 * #line linnum [filespec] | |
2501 */ | |
2502 | |
2503 void Lexer::pragma() | |
2504 { | |
2505 Token tok; | |
2506 int linnum; | |
2507 char *filespec = NULL; | |
2508 Loc loc = this->loc; | |
2509 | |
2510 scan(&tok); | |
2511 if (tok.value != TOKidentifier || tok.ident != Id::line) | |
2512 goto Lerr; | |
2513 | |
2514 scan(&tok); | |
2515 if (tok.value == TOKint32v || tok.value == TOKint64v) | |
2516 linnum = tok.uns64value - 1; | |
2517 else | |
2518 goto Lerr; | |
2519 | |
2520 while (1) | |
2521 { | |
2522 switch (*p) | |
2523 { | |
2524 case 0: | |
2525 case 0x1A: | |
2526 case '\n': | |
2527 Lnewline: | |
2528 this->loc.linnum = linnum; | |
2529 if (filespec) | |
2530 this->loc.filename = filespec; | |
2531 return; | |
2532 | |
2533 case '\r': | |
2534 p++; | |
2535 if (*p != '\n') | |
2536 { p--; | |
2537 goto Lnewline; | |
2538 } | |
2539 continue; | |
2540 | |
2541 case ' ': | |
2542 case '\t': | |
2543 case '\v': | |
2544 case '\f': | |
2545 p++; | |
2546 continue; // skip white space | |
2547 | |
2548 case '_': | |
2549 if (mod && memcmp(p, "__FILE__", 8) == 0) | |
2550 { | |
2551 p += 8; | |
2552 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars()); | |
2553 } | |
2554 continue; | |
2555 | |
2556 case '"': | |
2557 if (filespec) | |
2558 goto Lerr; | |
2559 stringbuffer.reset(); | |
2560 p++; | |
2561 while (1) | |
2562 { unsigned c; | |
2563 | |
2564 c = *p; | |
2565 switch (c) | |
2566 { | |
2567 case '\n': | |
2568 case '\r': | |
2569 case 0: | |
2570 case 0x1A: | |
2571 goto Lerr; | |
2572 | |
2573 case '"': | |
2574 stringbuffer.writeByte(0); | |
2575 filespec = mem.strdup((char *)stringbuffer.data); | |
2576 p++; | |
2577 break; | |
2578 | |
2579 default: | |
2580 if (c & 0x80) | |
2581 { unsigned u = decodeUTF(); | |
2582 if (u == PS || u == LS) | |
2583 goto Lerr; | |
2584 } | |
2585 stringbuffer.writeByte(c); | |
2586 p++; | |
2587 continue; | |
2588 } | |
2589 break; | |
2590 } | |
2591 continue; | |
2592 | |
2593 default: | |
2594 if (*p & 0x80) | |
2595 { unsigned u = decodeUTF(); | |
2596 if (u == PS || u == LS) | |
2597 goto Lnewline; | |
2598 } | |
2599 goto Lerr; | |
2600 } | |
2601 } | |
2602 | |
2603 Lerr: | |
2604 error(loc, "#line integer [\"filespec\"]\\n expected"); | |
2605 } | |
2606 | |
2607 | |
2608 /******************************************** | |
2609 * Decode UTF character. | |
2610 * Issue error messages for invalid sequences. | |
2611 * Return decoded character, advance p to last character in UTF sequence. | |
2612 */ | |
2613 | |
2614 unsigned Lexer::decodeUTF() | |
2615 { | |
2616 dchar_t u; | |
2617 unsigned char c; | |
2618 unsigned char *s = p; | |
2619 size_t len; | |
2620 size_t idx; | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2621 const char *msg; |
159 | 2622 |
2623 c = *s; | |
2624 assert(c & 0x80); | |
2625 | |
2626 // Check length of remaining string up to 6 UTF-8 characters | |
2627 for (len = 1; len < 6 && s[len]; len++) | |
2628 ; | |
2629 | |
2630 idx = 0; | |
2631 msg = utf_decodeChar(s, len, &idx, &u); | |
2632 p += idx - 1; | |
2633 if (msg) | |
2634 { | |
2635 error("%s", msg); | |
2636 } | |
2637 return u; | |
2638 } | |
2639 | |
2640 | |
2641 /*************************************************** | |
2642 * Parse doc comment embedded between t->ptr and p. | |
2643 * Remove trailing blanks and tabs from lines. | |
2644 * Replace all newlines with \n. | |
2645 * Remove leading comment character from each line. | |
2646 * Decide if it's a lineComment or a blockComment. | |
2647 * Append to previous one for this token. | |
2648 */ | |
2649 | |
2650 void Lexer::getDocComment(Token *t, unsigned lineComment) | |
2651 { | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2652 /* ct tells us which kind of comment it is: '/', '*', or '+' |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2653 */ |
159 | 2654 unsigned char ct = t->ptr[2]; |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2655 |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2656 /* Start of comment text skips over / * *, / + +, or / / / |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2657 */ |
159 | 2658 unsigned char *q = t->ptr + 3; // start of comment text |
2659 | |
2660 unsigned char *qend = p; | |
2661 if (ct == '*' || ct == '+') | |
2662 qend -= 2; | |
2663 | |
2664 /* Scan over initial row of ****'s or ++++'s or ////'s | |
2665 */ | |
2666 for (; q < qend; q++) | |
2667 { | |
2668 if (*q != ct) | |
2669 break; | |
2670 } | |
2671 | |
2672 /* Remove trailing row of ****'s or ++++'s | |
2673 */ | |
2674 if (ct != '/') | |
2675 { | |
2676 for (; q < qend; qend--) | |
2677 { | |
2678 if (qend[-1] != ct) | |
2679 break; | |
2680 } | |
2681 } | |
2682 | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2683 /* Comment is now [q .. qend]. |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2684 * Canonicalize it into buf[]. |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2685 */ |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2686 OutBuffer buf; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2687 int linestart = 0; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2688 |
159 | 2689 for (; q < qend; q++) |
2690 { | |
2691 unsigned char c = *q; | |
2692 | |
2693 switch (c) | |
2694 { | |
2695 case '*': | |
2696 case '+': | |
2697 if (linestart && c == ct) | |
2698 { linestart = 0; | |
2699 /* Trim preceding whitespace up to preceding \n | |
2700 */ | |
2701 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2702 buf.offset--; | |
2703 continue; | |
2704 } | |
2705 break; | |
2706 | |
2707 case ' ': | |
2708 case '\t': | |
2709 break; | |
2710 | |
2711 case '\r': | |
2712 if (q[1] == '\n') | |
2713 continue; // skip the \r | |
2714 goto Lnewline; | |
2715 | |
2716 default: | |
2717 if (c == 226) | |
2718 { | |
2719 // If LS or PS | |
2720 if (q[1] == 128 && | |
2721 (q[2] == 168 || q[2] == 169)) | |
2722 { | |
2723 q += 2; | |
2724 goto Lnewline; | |
2725 } | |
2726 } | |
2727 linestart = 0; | |
2728 break; | |
2729 | |
2730 Lnewline: | |
2731 c = '\n'; // replace all newlines with \n | |
2732 case '\n': | |
2733 linestart = 1; | |
2734 | |
2735 /* Trim trailing whitespace | |
2736 */ | |
2737 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) | |
2738 buf.offset--; | |
2739 | |
2740 break; | |
2741 } | |
2742 buf.writeByte(c); | |
2743 } | |
2744 | |
2745 // Always end with a newline | |
2746 if (!buf.offset || buf.data[buf.offset - 1] != '\n') | |
2747 buf.writeByte('\n'); | |
2748 | |
2749 buf.writeByte(0); | |
2750 | |
2751 // It's a line comment if the start of the doc comment comes | |
2752 // after other non-whitespace on the same line. | |
2753 unsigned char** dc = (lineComment && anyToken) | |
2754 ? &t->lineComment | |
2755 : &t->blockComment; | |
2756 | |
2757 // Combine with previous doc comment, if any | |
2758 if (*dc) | |
2759 *dc = combineComments(*dc, (unsigned char *)buf.data); | |
2760 else | |
2761 *dc = (unsigned char *)buf.extractData(); | |
2762 } | |
2763 | |
2764 /******************************************** | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2765 * Combine two document comments into one, |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2766 * separated by a newline. |
159 | 2767 */ |
2768 | |
2769 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2) | |
2770 { | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2771 //printf("Lexer::combineComments('%s', '%s')\n", c1, c2); |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2772 |
159 | 2773 unsigned char *c = c2; |
2774 | |
2775 if (c1) | |
2776 { c = c1; | |
2777 if (c2) | |
2778 { size_t len1 = strlen((char *)c1); | |
2779 size_t len2 = strlen((char *)c2); | |
2780 | |
2781 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1); | |
2782 memcpy(c, c1, len1); | |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2783 if (len1 && c1[len1 - 1] != '\n') |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2784 { c[len1] = '\n'; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2785 len1++; |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2786 } |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2787 memcpy(c + len1, c2, len2); |
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2788 c[len1 + len2] = 0; |
159 | 2789 } |
2790 } | |
2791 return c; | |
2792 } | |
2793 | |
2794 /******************************************** | |
2795 * Create an identifier in the string table. | |
2796 */ | |
2797 | |
2798 Identifier *Lexer::idPool(const char *s) | |
2799 { | |
2800 size_t len = strlen(s); | |
2801 StringValue *sv = stringtable.update(s, len); | |
2802 Identifier *id = (Identifier *) sv->ptrvalue; | |
2803 if (!id) | |
2804 { | |
2805 id = new Identifier(sv->lstring.string, TOKidentifier); | |
2806 sv->ptrvalue = id; | |
2807 } | |
2808 return id; | |
2809 } | |
2810 | |
2811 /********************************************* | |
2812 * Create a unique identifier using the prefix s. | |
2813 */ | |
2814 | |
2815 Identifier *Lexer::uniqueId(const char *s, int num) | |
2816 { char buffer[32]; | |
2817 size_t slen = strlen(s); | |
2818 | |
2819 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer)); | |
2820 sprintf(buffer, "%s%d", s, num); | |
2821 return idPool(buffer); | |
2822 } | |
2823 | |
2824 Identifier *Lexer::uniqueId(const char *s) | |
2825 { | |
2826 static int num; | |
2827 return uniqueId(s, ++num); | |
2828 } | |
2829 | |
2830 /**************************************** | |
2831 */ | |
2832 | |
2833 struct Keyword | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2834 { const char *name; |
159 | 2835 enum TOK value; |
2836 }; | |
2837 | |
2838 static Keyword keywords[] = | |
2839 { | |
2840 // { "", TOK }, | |
2841 | |
2842 { "this", TOKthis }, | |
2843 { "super", TOKsuper }, | |
2844 { "assert", TOKassert }, | |
2845 { "null", TOKnull }, | |
2846 { "true", TOKtrue }, | |
2847 { "false", TOKfalse }, | |
2848 { "cast", TOKcast }, | |
2849 { "new", TOKnew }, | |
2850 { "delete", TOKdelete }, | |
2851 { "throw", TOKthrow }, | |
2852 { "module", TOKmodule }, | |
2853 { "pragma", TOKpragma }, | |
2854 { "typeof", TOKtypeof }, | |
2855 { "typeid", TOKtypeid }, | |
2856 | |
2857 { "template", TOKtemplate }, | |
2858 | |
2859 { "void", TOKvoid }, | |
2860 { "byte", TOKint8 }, | |
2861 { "ubyte", TOKuns8 }, | |
2862 { "short", TOKint16 }, | |
2863 { "ushort", TOKuns16 }, | |
2864 { "int", TOKint32 }, | |
2865 { "uint", TOKuns32 }, | |
2866 { "long", TOKint64 }, | |
2867 { "ulong", TOKuns64 }, | |
2868 { "cent", TOKcent, }, | |
2869 { "ucent", TOKucent, }, | |
2870 { "float", TOKfloat32 }, | |
2871 { "double", TOKfloat64 }, | |
2872 { "real", TOKfloat80 }, | |
2873 | |
2874 { "bool", TOKbool }, | |
2875 { "char", TOKchar }, | |
2876 { "wchar", TOKwchar }, | |
2877 { "dchar", TOKdchar }, | |
2878 | |
2879 { "ifloat", TOKimaginary32 }, | |
2880 { "idouble", TOKimaginary64 }, | |
2881 { "ireal", TOKimaginary80 }, | |
2882 | |
2883 { "cfloat", TOKcomplex32 }, | |
2884 { "cdouble", TOKcomplex64 }, | |
2885 { "creal", TOKcomplex80 }, | |
2886 | |
2887 { "delegate", TOKdelegate }, | |
2888 { "function", TOKfunction }, | |
2889 | |
2890 { "is", TOKis }, | |
2891 { "if", TOKif }, | |
2892 { "else", TOKelse }, | |
2893 { "while", TOKwhile }, | |
2894 { "for", TOKfor }, | |
2895 { "do", TOKdo }, | |
2896 { "switch", TOKswitch }, | |
2897 { "case", TOKcase }, | |
2898 { "default", TOKdefault }, | |
2899 { "break", TOKbreak }, | |
2900 { "continue", TOKcontinue }, | |
2901 { "synchronized", TOKsynchronized }, | |
2902 { "return", TOKreturn }, | |
2903 { "goto", TOKgoto }, | |
2904 { "try", TOKtry }, | |
2905 { "catch", TOKcatch }, | |
2906 { "finally", TOKfinally }, | |
2907 { "with", TOKwith }, | |
2908 { "asm", TOKasm }, | |
2909 { "foreach", TOKforeach }, | |
2910 { "foreach_reverse", TOKforeach_reverse }, | |
2911 { "scope", TOKscope }, | |
2912 | |
2913 { "struct", TOKstruct }, | |
2914 { "class", TOKclass }, | |
2915 { "interface", TOKinterface }, | |
2916 { "union", TOKunion }, | |
2917 { "enum", TOKenum }, | |
2918 { "import", TOKimport }, | |
2919 { "mixin", TOKmixin }, | |
2920 { "static", TOKstatic }, | |
2921 { "final", TOKfinal }, | |
2922 { "const", TOKconst }, | |
2923 { "typedef", TOKtypedef }, | |
2924 { "alias", TOKalias }, | |
2925 { "override", TOKoverride }, | |
2926 { "abstract", TOKabstract }, | |
2927 { "volatile", TOKvolatile }, | |
2928 { "debug", TOKdebug }, | |
2929 { "deprecated", TOKdeprecated }, | |
2930 { "in", TOKin }, | |
2931 { "out", TOKout }, | |
2932 { "inout", TOKinout }, | |
2933 { "lazy", TOKlazy }, | |
2934 { "auto", TOKauto }, | |
2935 | |
2936 { "align", TOKalign }, | |
2937 { "extern", TOKextern }, | |
2938 { "private", TOKprivate }, | |
2939 { "package", TOKpackage }, | |
2940 { "protected", TOKprotected }, | |
2941 { "public", TOKpublic }, | |
2942 { "export", TOKexport }, | |
2943 | |
2944 { "body", TOKbody }, | |
2945 { "invariant", TOKinvariant }, | |
2946 { "unittest", TOKunittest }, | |
2947 { "version", TOKversion }, | |
2948 //{ "manifest", TOKmanifest }, | |
2949 | |
2950 // Added after 1.0 | |
2951 { "ref", TOKref }, | |
2952 { "macro", TOKmacro }, | |
336 | 2953 #if DMDV2 |
159 | 2954 { "pure", TOKpure }, |
2955 { "nothrow", TOKnothrow }, | |
336 | 2956 { "__thread", TOKtls }, |
159 | 2957 { "__traits", TOKtraits }, |
2958 { "__overloadset", TOKoverloadset }, | |
336 | 2959 { "__FILE__", TOKfile }, |
2960 { "__LINE__", TOKline }, | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2961 { "shared", TOKshared }, |
846
bc982f1ad106
Merged DMD 1.037 frontend
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
794
diff
changeset
|
2962 { "immutable", TOKimmutable }, |
159 | 2963 #endif |
2964 }; | |
2965 | |
2966 int Token::isKeyword() | |
2967 { | |
2968 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) | |
2969 { | |
2970 if (keywords[u].value == value) | |
2971 return 1; | |
2972 } | |
2973 return 0; | |
2974 } | |
2975 | |
2976 void Lexer::initKeywords() | |
2977 { StringValue *sv; | |
2978 unsigned u; | |
2979 enum TOK v; | |
2980 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]); | |
2981 | |
2982 if (global.params.Dversion == 1) | |
2983 nkeywords -= 2; | |
2984 | |
2985 cmtable_init(); | |
2986 | |
2987 for (u = 0; u < nkeywords; u++) | |
658
50383e476c7e
Upgraded frontend to DMD 1.035
Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
parents:
366
diff
changeset
|
2988 { const char *s; |
159 | 2989 |
2990 //printf("keyword[%d] = '%s'\n",u, keywords[u].name); | |
2991 s = keywords[u].name; | |
2992 v = keywords[u].value; | |
2993 sv = stringtable.insert(s, strlen(s)); | |
2994 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v); | |
2995 | |
2996 //printf("tochars[%d] = '%s'\n",v, s); | |
2997 Token::tochars[v] = s; | |
2998 } | |
2999 | |
3000 Token::tochars[TOKeof] = "EOF"; | |
3001 Token::tochars[TOKlcurly] = "{"; | |
3002 Token::tochars[TOKrcurly] = "}"; | |
3003 Token::tochars[TOKlparen] = "("; | |
3004 Token::tochars[TOKrparen] = ")"; | |
3005 Token::tochars[TOKlbracket] = "["; | |
3006 Token::tochars[TOKrbracket] = "]"; | |
3007 Token::tochars[TOKsemicolon] = ";"; | |
3008 Token::tochars[TOKcolon] = ":"; | |
3009 Token::tochars[TOKcomma] = ","; | |
3010 Token::tochars[TOKdot] = "."; | |
3011 Token::tochars[TOKxor] = "^"; | |
3012 Token::tochars[TOKxorass] = "^="; | |
3013 Token::tochars[TOKassign] = "="; | |
3014 Token::tochars[TOKconstruct] = "="; | |
336 | 3015 #if DMDV2 |
159 | 3016 Token::tochars[TOKblit] = "="; |
3017 #endif | |
3018 Token::tochars[TOKlt] = "<"; | |
3019 Token::tochars[TOKgt] = ">"; | |
3020 Token::tochars[TOKle] = "<="; | |
3021 Token::tochars[TOKge] = ">="; | |
3022 Token::tochars[TOKequal] = "=="; | |
3023 Token::tochars[TOKnotequal] = "!="; | |
3024 Token::tochars[TOKnotidentity] = "!is"; | |
3025 Token::tochars[TOKtobool] = "!!"; | |
3026 | |
3027 Token::tochars[TOKunord] = "!<>="; | |
3028 Token::tochars[TOKue] = "!<>"; | |
3029 Token::tochars[TOKlg] = "<>"; | |
3030 Token::tochars[TOKleg] = "<>="; | |
3031 Token::tochars[TOKule] = "!>"; | |
3032 Token::tochars[TOKul] = "!>="; | |
3033 Token::tochars[TOKuge] = "!<"; | |
3034 Token::tochars[TOKug] = "!<="; | |
3035 | |
3036 Token::tochars[TOKnot] = "!"; | |
3037 Token::tochars[TOKtobool] = "!!"; | |
3038 Token::tochars[TOKshl] = "<<"; | |
3039 Token::tochars[TOKshr] = ">>"; | |
3040 Token::tochars[TOKushr] = ">>>"; | |
3041 Token::tochars[TOKadd] = "+"; | |
3042 Token::tochars[TOKmin] = "-"; | |
3043 Token::tochars[TOKmul] = "*"; | |
3044 Token::tochars[TOKdiv] = "/"; | |
3045 Token::tochars[TOKmod] = "%"; | |
3046 Token::tochars[TOKslice] = ".."; | |
3047 Token::tochars[TOKdotdotdot] = "..."; | |
3048 Token::tochars[TOKand] = "&"; | |
3049 Token::tochars[TOKandand] = "&&"; | |
3050 Token::tochars[TOKor] = "|"; | |
3051 Token::tochars[TOKoror] = "||"; | |
3052 Token::tochars[TOKarray] = "[]"; | |
3053 Token::tochars[TOKindex] = "[i]"; | |
3054 Token::tochars[TOKaddress] = "&"; | |
3055 Token::tochars[TOKstar] = "*"; | |
3056 Token::tochars[TOKtilde] = "~"; | |
3057 Token::tochars[TOKdollar] = "$"; | |
3058 Token::tochars[TOKcast] = "cast"; | |
3059 Token::tochars[TOKplusplus] = "++"; | |
3060 Token::tochars[TOKminusminus] = "--"; | |
3061 Token::tochars[TOKtype] = "type"; | |
3062 Token::tochars[TOKquestion] = "?"; | |
3063 Token::tochars[TOKneg] = "-"; | |
3064 Token::tochars[TOKuadd] = "+"; | |
3065 Token::tochars[TOKvar] = "var"; | |
3066 Token::tochars[TOKaddass] = "+="; | |
3067 Token::tochars[TOKminass] = "-="; | |
3068 Token::tochars[TOKmulass] = "*="; | |
3069 Token::tochars[TOKdivass] = "/="; | |
3070 Token::tochars[TOKmodass] = "%="; | |
3071 Token::tochars[TOKshlass] = "<<="; | |
3072 Token::tochars[TOKshrass] = ">>="; | |
3073 Token::tochars[TOKushrass] = ">>>="; | |
3074 Token::tochars[TOKandass] = "&="; | |
3075 Token::tochars[TOKorass] = "|="; | |
3076 Token::tochars[TOKcatass] = "~="; | |
3077 Token::tochars[TOKcat] = "~"; | |
3078 Token::tochars[TOKcall] = "call"; | |
3079 Token::tochars[TOKidentity] = "is"; | |
3080 Token::tochars[TOKnotidentity] = "!is"; | |
3081 | |
3082 Token::tochars[TOKorass] = "|="; | |
3083 Token::tochars[TOKidentifier] = "identifier"; | |
3084 | |
3085 // For debugging | |
1587 | 3086 Token::tochars[TOKerror] = "error"; |
159 | 3087 Token::tochars[TOKdotexp] = "dotexp"; |
3088 Token::tochars[TOKdotti] = "dotti"; | |
3089 Token::tochars[TOKdotvar] = "dotvar"; | |
3090 Token::tochars[TOKdottype] = "dottype"; | |
3091 Token::tochars[TOKsymoff] = "symoff"; | |
3092 Token::tochars[TOKarraylength] = "arraylength"; | |
3093 Token::tochars[TOKarrayliteral] = "arrayliteral"; | |
3094 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral"; | |
3095 Token::tochars[TOKstructliteral] = "structliteral"; | |
3096 Token::tochars[TOKstring] = "string"; | |
3097 Token::tochars[TOKdsymbol] = "symbol"; | |
3098 Token::tochars[TOKtuple] = "tuple"; | |
3099 Token::tochars[TOKdeclaration] = "declaration"; | |
3100 Token::tochars[TOKdottd] = "dottd"; | |
3101 Token::tochars[TOKon_scope_exit] = "scope(exit)"; | |
1195
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
3102 Token::tochars[TOKon_scope_success] = "scope(success)"; |
e961851fb8be
Merged DMD 1.042.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1165
diff
changeset
|
3103 Token::tochars[TOKon_scope_failure] = "scope(failure)"; |
159 | 3104 } |