comparison dmd2/lexer.c @ 758:f04dde6e882c

Added initial D2 support, D2 frontend and changes to codegen to make things compile.
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Tue, 11 Nov 2008 01:38:48 +0100
parents
children 356e65836fb5
comparison
equal deleted inserted replaced
757:2c730d530c98 758:f04dde6e882c
1
2 // Compiler implementation of the D programming language
3 // Copyright (c) 1999-2008 by Digital Mars
4 // All Rights Reserved
5 // written by Walter Bright
6 // http://www.digitalmars.com
7 // License for redistribution is by either the Artistic License
8 // in artistic.txt, or the GNU General Public License in gnu.txt.
9 // See the included readme.txt for details.
10
11 /* Lexical Analyzer */
12
13 #include <stdio.h>
14 #include <string.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17 #include <errno.h>
18 #include <wchar.h>
19 #include <stdlib.h>
20 #include <assert.h>
21 #include <sys/time.h>
22 #include <math.h>
23
24 #ifdef IN_GCC
25
26 #include <time.h>
27 #include "mem.h"
28
29 #else
30
31 #if __GNUC__
32 #include <time.h>
33 #endif
34
35 #if IN_LLVM
36 #include "mem.h"
37 #elif _WIN32
38 #include "..\root\mem.h"
39 #else
40 #include "../root/mem.h"
41 #endif
42 #endif
43
44 #include "stringtable.h"
45
46 #include "lexer.h"
47 #include "utf.h"
48 #include "identifier.h"
49 #include "id.h"
50 #include "module.h"
51
52 #if _WIN32 && __DMC__
53 // from \dm\src\include\setlocal.h
54 extern "C" char * __cdecl __locale_decpoint;
55 #endif
56
57 extern int HtmlNamedEntity(unsigned char *p, int length);
58
59 #define LS 0x2028 // UTF line separator
60 #define PS 0x2029 // UTF paragraph separator
61
62 /********************************************
63 * Do our own char maps
64 */
65
66 static unsigned char cmtable[256];
67
68 const int CMoctal = 0x1;
69 const int CMhex = 0x2;
70 const int CMidchar = 0x4;
71
72 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
73 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; }
74 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }
75
76 static void cmtable_init()
77 {
78 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
79 {
80 if ('0' <= c && c <= '7')
81 cmtable[c] |= CMoctal;
82 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
83 cmtable[c] |= CMhex;
84 if (isalnum(c) || c == '_')
85 cmtable[c] |= CMidchar;
86 }
87 }
88
89
90 /************************* Token **********************************************/
91
92 const char *Token::tochars[TOKMAX];
93
94 void *Token::operator new(size_t size)
95 { Token *t;
96
97 if (Lexer::freelist)
98 {
99 t = Lexer::freelist;
100 Lexer::freelist = t->next;
101 return t;
102 }
103
104 return ::operator new(size);
105 }
106
107 #ifdef DEBUG
108 void Token::print()
109 {
110 fprintf(stdmsg, "%s\n", toChars());
111 }
112 #endif
113
114 const char *Token::toChars()
115 { const char *p;
116 static char buffer[3 + 3 * sizeof(value) + 1];
117
118 p = buffer;
119 switch (value)
120 {
121 case TOKint32v:
122 #if IN_GCC
123 sprintf(buffer,"%d",(d_int32)int64value);
124 #else
125 sprintf(buffer,"%d",int32value);
126 #endif
127 break;
128
129 case TOKuns32v:
130 case TOKcharv:
131 case TOKwcharv:
132 case TOKdcharv:
133 #if IN_GCC
134 sprintf(buffer,"%uU",(d_uns32)uns64value);
135 #else
136 sprintf(buffer,"%uU",uns32value);
137 #endif
138 break;
139
140 case TOKint64v:
141 sprintf(buffer,"%lldL",int64value);
142 break;
143
144 case TOKuns64v:
145 sprintf(buffer,"%lluUL",uns64value);
146 break;
147
148 #if IN_GCC
149 case TOKfloat32v:
150 case TOKfloat64v:
151 case TOKfloat80v:
152 float80value.format(buffer, sizeof(buffer));
153 break;
154 case TOKimaginary32v:
155 case TOKimaginary64v:
156 case TOKimaginary80v:
157 float80value.format(buffer, sizeof(buffer));
158 // %% buffer
159 strcat(buffer, "i");
160 break;
161 #else
162 case TOKfloat32v:
163 sprintf(buffer,"%Lgf", float80value);
164 break;
165
166 case TOKfloat64v:
167 sprintf(buffer,"%Lg", float80value);
168 break;
169
170 case TOKfloat80v:
171 sprintf(buffer,"%LgL", float80value);
172 break;
173
174 case TOKimaginary32v:
175 sprintf(buffer,"%Lgfi", float80value);
176 break;
177
178 case TOKimaginary64v:
179 sprintf(buffer,"%Lgi", float80value);
180 break;
181
182 case TOKimaginary80v:
183 sprintf(buffer,"%LgLi", float80value);
184 break;
185 #endif
186
187 case TOKstring:
188 #if CSTRINGS
189 p = string;
190 #else
191 { OutBuffer buf;
192
193 buf.writeByte('"');
194 for (size_t i = 0; i < len; )
195 { unsigned c;
196
197 utf_decodeChar((unsigned char *)ustring, len, &i, &c);
198 switch (c)
199 {
200 case 0:
201 break;
202
203 case '"':
204 case '\\':
205 buf.writeByte('\\');
206 default:
207 if (isprint(c))
208 buf.writeByte(c);
209 else if (c <= 0x7F)
210 buf.printf("\\x%02x", c);
211 else if (c <= 0xFFFF)
212 buf.printf("\\u%04x", c);
213 else
214 buf.printf("\\U%08x", c);
215 continue;
216 }
217 break;
218 }
219 buf.writeByte('"');
220 if (postfix)
221 buf.writeByte('"');
222 buf.writeByte(0);
223 p = (char *)buf.extractData();
224 }
225 #endif
226 break;
227
228 case TOKidentifier:
229 case TOKenum:
230 case TOKstruct:
231 case TOKimport:
232 CASE_BASIC_TYPES:
233 p = ident->toChars();
234 break;
235
236 default:
237 p = toChars(value);
238 break;
239 }
240 return p;
241 }
242
243 const char *Token::toChars(enum TOK value)
244 { const char *p;
245 static char buffer[3 + 3 * sizeof(value) + 1];
246
247 p = tochars[value];
248 if (!p)
249 { sprintf(buffer,"TOK%d",value);
250 p = buffer;
251 }
252 return p;
253 }
254
255 /*************************** Lexer ********************************************/
256
257 Token *Lexer::freelist = NULL;
258 StringTable Lexer::stringtable;
259 OutBuffer Lexer::stringbuffer;
260
261 Lexer::Lexer(Module *mod,
262 unsigned char *base, unsigned begoffset, unsigned endoffset,
263 int doDocComment, int commentToken)
264 : loc(mod, 1)
265 {
266 //printf("Lexer::Lexer(%p,%d)\n",base,length);
267 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod);
268 memset(&token,0,sizeof(token));
269 this->base = base;
270 this->end = base + endoffset;
271 p = base + begoffset;
272 this->mod = mod;
273 this->doDocComment = doDocComment;
274 this->anyToken = 0;
275 this->commentToken = commentToken;
276 //initKeywords();
277
278 /* If first line starts with '#!', ignore the line
279 */
280
281 if (p[0] == '#' && p[1] =='!')
282 {
283 p += 2;
284 while (1)
285 { unsigned char c = *p;
286 switch (c)
287 {
288 case '\n':
289 p++;
290 break;
291
292 case '\r':
293 p++;
294 if (*p == '\n')
295 p++;
296 break;
297
298 case 0:
299 case 0x1A:
300 break;
301
302 default:
303 if (c & 0x80)
304 { unsigned u = decodeUTF();
305 if (u == PS || u == LS)
306 break;
307 }
308 p++;
309 continue;
310 }
311 break;
312 }
313 loc.linnum = 2;
314 }
315 }
316
317
318 void Lexer::error(const char *format, ...)
319 {
320 if (mod && !global.gag)
321 {
322 char *p = loc.toChars();
323 if (*p)
324 fprintf(stdmsg, "%s: ", p);
325 mem.free(p);
326
327 va_list ap;
328 va_start(ap, format);
329 vfprintf(stdmsg, format, ap);
330 va_end(ap);
331
332 fprintf(stdmsg, "\n");
333 fflush(stdmsg);
334
335 if (global.errors >= 20) // moderate blizzard of cascading messages
336 fatal();
337 }
338 global.errors++;
339 }
340
341 void Lexer::error(Loc loc, const char *format, ...)
342 {
343 if (mod && !global.gag)
344 {
345 char *p = loc.toChars();
346 if (*p)
347 fprintf(stdmsg, "%s: ", p);
348 mem.free(p);
349
350 va_list ap;
351 va_start(ap, format);
352 vfprintf(stdmsg, format, ap);
353 va_end(ap);
354
355 fprintf(stdmsg, "\n");
356 fflush(stdmsg);
357
358 if (global.errors >= 20) // moderate blizzard of cascading messages
359 fatal();
360 }
361 global.errors++;
362 }
363
364 TOK Lexer::nextToken()
365 { Token *t;
366
367 if (token.next)
368 {
369 t = token.next;
370 memcpy(&token,t,sizeof(Token));
371 t->next = freelist;
372 freelist = t;
373 }
374 else
375 {
376 scan(&token);
377 }
378 //token.print();
379 return token.value;
380 }
381
382 Token *Lexer::peek(Token *ct)
383 { Token *t;
384
385 if (ct->next)
386 t = ct->next;
387 else
388 {
389 t = new Token();
390 scan(t);
391 t->next = NULL;
392 ct->next = t;
393 }
394 return t;
395 }
396
397 /***********************
398 * Look ahead at next token's value.
399 */
400
401 TOK Lexer::peekNext()
402 {
403 return peek(&token)->value;
404 }
405
406 /*********************************
407 * tk is on the opening (.
408 * Look ahead and return token that is past the closing ).
409 */
410
411 Token *Lexer::peekPastParen(Token *tk)
412 {
413 //printf("peekPastParen()\n");
414 int parens = 1;
415 int curlynest = 0;
416 while (1)
417 {
418 tk = peek(tk);
419 //tk->print();
420 switch (tk->value)
421 {
422 case TOKlparen:
423 parens++;
424 continue;
425
426 case TOKrparen:
427 --parens;
428 if (parens)
429 continue;
430 tk = peek(tk);
431 break;
432
433 case TOKlcurly:
434 curlynest++;
435 continue;
436
437 case TOKrcurly:
438 if (--curlynest >= 0)
439 continue;
440 break;
441
442 case TOKsemicolon:
443 if (curlynest)
444 continue;
445 break;
446
447 case TOKeof:
448 break;
449
450 default:
451 continue;
452 }
453 return tk;
454 }
455 }
456
457 /**********************************
458 * Determine if string is a valid Identifier.
459 * Placed here because of commonality with Lexer functionality.
460 * Returns:
461 * 0 invalid
462 */
463
464 int Lexer::isValidIdentifier(char *p)
465 {
466 size_t len;
467 size_t idx;
468
469 if (!p || !*p)
470 goto Linvalid;
471
472 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
473 goto Linvalid;
474
475 len = strlen(p);
476 idx = 0;
477 while (p[idx])
478 { dchar_t dc;
479
480 const char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc);
481 if (q)
482 goto Linvalid;
483
484 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
485 goto Linvalid;
486 }
487 return 1;
488
489 Linvalid:
490 return 0;
491 }
492
493 /****************************
494 * Turn next token in buffer into a token.
495 */
496
497 void Lexer::scan(Token *t)
498 {
499 unsigned lastLine = loc.linnum;
500 unsigned linnum;
501
502 t->blockComment = NULL;
503 t->lineComment = NULL;
504 while (1)
505 {
506 t->ptr = p;
507 //printf("p = %p, *p = '%c'\n",p,*p);
508 switch (*p)
509 {
510 case 0:
511 case 0x1A:
512 t->value = TOKeof; // end of file
513 return;
514
515 case ' ':
516 case '\t':
517 case '\v':
518 case '\f':
519 p++;
520 continue; // skip white space
521
522 case '\r':
523 p++;
524 if (*p != '\n') // if CR stands by itself
525 loc.linnum++;
526 continue; // skip white space
527
528 case '\n':
529 p++;
530 loc.linnum++;
531 continue; // skip white space
532
533 case '0': case '1': case '2': case '3': case '4':
534 case '5': case '6': case '7': case '8': case '9':
535 t->value = number(t);
536 return;
537
538 #if CSTRINGS
539 case '\'':
540 t->value = charConstant(t, 0);
541 return;
542
543 case '"':
544 t->value = stringConstant(t,0);
545 return;
546
547 case 'l':
548 case 'L':
549 if (p[1] == '\'')
550 {
551 p++;
552 t->value = charConstant(t, 1);
553 return;
554 }
555 else if (p[1] == '"')
556 {
557 p++;
558 t->value = stringConstant(t, 1);
559 return;
560 }
561 #else
562 case '\'':
563 t->value = charConstant(t,0);
564 return;
565
566 case 'r':
567 if (p[1] != '"')
568 goto case_ident;
569 p++;
570 case '`':
571 t->value = wysiwygStringConstant(t, *p);
572 return;
573
574 case 'x':
575 if (p[1] != '"')
576 goto case_ident;
577 p++;
578 t->value = hexStringConstant(t);
579 return;
580
581 #if DMDV2
582 case 'q':
583 if (p[1] == '"')
584 {
585 p++;
586 t->value = delimitedStringConstant(t);
587 return;
588 }
589 else if (p[1] == '{')
590 {
591 p++;
592 t->value = tokenStringConstant(t);
593 return;
594 }
595 else
596 goto case_ident;
597 #endif
598
599 case '"':
600 t->value = escapeStringConstant(t,0);
601 return;
602
603 case '\\': // escaped string literal
604 { unsigned c;
605
606 stringbuffer.reset();
607 do
608 {
609 p++;
610 switch (*p)
611 {
612 case 'u':
613 case 'U':
614 case '&':
615 c = escapeSequence();
616 stringbuffer.writeUTF8(c);
617 break;
618
619 default:
620 c = escapeSequence();
621 stringbuffer.writeByte(c);
622 break;
623 }
624 } while (*p == '\\');
625 t->len = stringbuffer.offset;
626 stringbuffer.writeByte(0);
627 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
628 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
629 t->postfix = 0;
630 t->value = TOKstring;
631 return;
632 }
633
634 case 'l':
635 case 'L':
636 #endif
637 case 'a': case 'b': case 'c': case 'd': case 'e':
638 case 'f': case 'g': case 'h': case 'i': case 'j':
639 case 'k': case 'm': case 'n': case 'o':
640 #if DMDV2
641 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
642 #else
643 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
644 #endif
645 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
646 case 'z':
647 case 'A': case 'B': case 'C': case 'D': case 'E':
648 case 'F': case 'G': case 'H': case 'I': case 'J':
649 case 'K': case 'M': case 'N': case 'O':
650 case 'P': case 'Q': case 'R': case 'S': case 'T':
651 case 'U': case 'V': case 'W': case 'X': case 'Y':
652 case 'Z':
653 case '_':
654 case_ident:
655 { unsigned char c;
656 StringValue *sv;
657 Identifier *id;
658
659 do
660 {
661 c = *++p;
662 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
663 sv = stringtable.update((char *)t->ptr, p - t->ptr);
664 id = (Identifier *) sv->ptrvalue;
665 if (!id)
666 { id = new Identifier(sv->lstring.string,TOKidentifier);
667 sv->ptrvalue = id;
668 }
669 t->ident = id;
670 t->value = (enum TOK) id->value;
671 anyToken = 1;
672 if (*t->ptr == '_') // if special identifier token
673 {
674 static char date[11+1];
675 static char time[8+1];
676 static char timestamp[24+1];
677
678 if (!date[0]) // lazy evaluation
679 { time_t t;
680 char *p;
681
682 ::time(&t);
683 p = ctime(&t);
684 assert(p);
685 sprintf(date, "%.6s %.4s", p + 4, p + 20);
686 sprintf(time, "%.8s", p + 11);
687 sprintf(timestamp, "%.24s", p);
688 }
689
690 #if DMDV1
691 if (mod && id == Id::FILE)
692 {
693 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars());
694 goto Lstring;
695 }
696 else if (mod && id == Id::LINE)
697 {
698 t->value = TOKint64v;
699 t->uns64value = loc.linnum;
700 }
701 else
702 #endif
703 if (id == Id::DATE)
704 {
705 t->ustring = (unsigned char *)date;
706 goto Lstring;
707 }
708 else if (id == Id::TIME)
709 {
710 t->ustring = (unsigned char *)time;
711 goto Lstring;
712 }
713 else if (id == Id::VENDOR)
714 {
715 t->ustring = (unsigned char *)"LDC";
716 goto Lstring;
717 }
718 else if (id == Id::TIMESTAMP)
719 {
720 t->ustring = (unsigned char *)timestamp;
721 Lstring:
722 t->value = TOKstring;
723 Llen:
724 t->postfix = 0;
725 t->len = strlen((char *)t->ustring);
726 }
727 else if (id == Id::VERSIONX)
728 { unsigned major = 0;
729 unsigned minor = 0;
730
731 for (const char *p = global.version + 1; 1; p++)
732 {
733 char c = *p;
734 if (isdigit(c))
735 minor = minor * 10 + c - '0';
736 else if (c == '.')
737 { major = minor;
738 minor = 0;
739 }
740 else
741 break;
742 }
743 t->value = TOKint64v;
744 t->uns64value = major * 1000 + minor;
745 }
746 #if DMDV2
747 else if (id == Id::EOFX)
748 {
749 t->value = TOKeof;
750 // Advance scanner to end of file
751 while (!(*p == 0 || *p == 0x1A))
752 p++;
753 }
754 #endif
755 }
756 //printf("t->value = %d\n",t->value);
757 return;
758 }
759
760 case '/':
761 p++;
762 switch (*p)
763 {
764 case '=':
765 p++;
766 t->value = TOKdivass;
767 return;
768
769 case '*':
770 p++;
771 linnum = loc.linnum;
772 while (1)
773 {
774 while (1)
775 { unsigned char c = *p;
776 switch (c)
777 {
778 case '/':
779 break;
780
781 case '\n':
782 loc.linnum++;
783 p++;
784 continue;
785
786 case '\r':
787 p++;
788 if (*p != '\n')
789 loc.linnum++;
790 continue;
791
792 case 0:
793 case 0x1A:
794 error("unterminated /* */ comment");
795 p = end;
796 t->value = TOKeof;
797 return;
798
799 default:
800 if (c & 0x80)
801 { unsigned u = decodeUTF();
802 if (u == PS || u == LS)
803 loc.linnum++;
804 }
805 p++;
806 continue;
807 }
808 break;
809 }
810 p++;
811 if (p[-2] == '*' && p - 3 != t->ptr)
812 break;
813 }
814 if (commentToken)
815 {
816 t->value = TOKcomment;
817 return;
818 }
819 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
820 { // if /** but not /**/
821 getDocComment(t, lastLine == linnum);
822 }
823 continue;
824
825 case '/': // do // style comments
826 linnum = loc.linnum;
827 while (1)
828 { unsigned char c = *++p;
829 switch (c)
830 {
831 case '\n':
832 break;
833
834 case '\r':
835 if (p[1] == '\n')
836 p++;
837 break;
838
839 case 0:
840 case 0x1A:
841 if (commentToken)
842 {
843 p = end;
844 t->value = TOKcomment;
845 return;
846 }
847 if (doDocComment && t->ptr[2] == '/')
848 getDocComment(t, lastLine == linnum);
849 p = end;
850 t->value = TOKeof;
851 return;
852
853 default:
854 if (c & 0x80)
855 { unsigned u = decodeUTF();
856 if (u == PS || u == LS)
857 break;
858 }
859 continue;
860 }
861 break;
862 }
863
864 if (commentToken)
865 {
866 p++;
867 loc.linnum++;
868 t->value = TOKcomment;
869 return;
870 }
871 if (doDocComment && t->ptr[2] == '/')
872 getDocComment(t, lastLine == linnum);
873
874 p++;
875 loc.linnum++;
876 continue;
877
878 case '+':
879 { int nest;
880
881 linnum = loc.linnum;
882 p++;
883 nest = 1;
884 while (1)
885 { unsigned char c = *p;
886 switch (c)
887 {
888 case '/':
889 p++;
890 if (*p == '+')
891 {
892 p++;
893 nest++;
894 }
895 continue;
896
897 case '+':
898 p++;
899 if (*p == '/')
900 {
901 p++;
902 if (--nest == 0)
903 break;
904 }
905 continue;
906
907 case '\r':
908 p++;
909 if (*p != '\n')
910 loc.linnum++;
911 continue;
912
913 case '\n':
914 loc.linnum++;
915 p++;
916 continue;
917
918 case 0:
919 case 0x1A:
920 error("unterminated /+ +/ comment");
921 p = end;
922 t->value = TOKeof;
923 return;
924
925 default:
926 if (c & 0x80)
927 { unsigned u = decodeUTF();
928 if (u == PS || u == LS)
929 loc.linnum++;
930 }
931 p++;
932 continue;
933 }
934 break;
935 }
936 if (commentToken)
937 {
938 t->value = TOKcomment;
939 return;
940 }
941 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
942 { // if /++ but not /++/
943 getDocComment(t, lastLine == linnum);
944 }
945 continue;
946 }
947 }
948 t->value = TOKdiv;
949 return;
950
951 case '.':
952 p++;
953 if (isdigit(*p))
954 { /* Note that we don't allow ._1 and ._ as being
955 * valid floating point numbers.
956 */
957 p--;
958 t->value = inreal(t);
959 }
960 else if (p[0] == '.')
961 {
962 if (p[1] == '.')
963 { p += 2;
964 t->value = TOKdotdotdot;
965 }
966 else
967 { p++;
968 t->value = TOKslice;
969 }
970 }
971 else
972 t->value = TOKdot;
973 return;
974
975 case '&':
976 p++;
977 if (*p == '=')
978 { p++;
979 t->value = TOKandass;
980 }
981 else if (*p == '&')
982 { p++;
983 t->value = TOKandand;
984 }
985 else
986 t->value = TOKand;
987 return;
988
989 case '|':
990 p++;
991 if (*p == '=')
992 { p++;
993 t->value = TOKorass;
994 }
995 else if (*p == '|')
996 { p++;
997 t->value = TOKoror;
998 }
999 else
1000 t->value = TOKor;
1001 return;
1002
1003 case '-':
1004 p++;
1005 if (*p == '=')
1006 { p++;
1007 t->value = TOKminass;
1008 }
1009 #if 0
1010 else if (*p == '>')
1011 { p++;
1012 t->value = TOKarrow;
1013 }
1014 #endif
1015 else if (*p == '-')
1016 { p++;
1017 t->value = TOKminusminus;
1018 }
1019 else
1020 t->value = TOKmin;
1021 return;
1022
1023 case '+':
1024 p++;
1025 if (*p == '=')
1026 { p++;
1027 t->value = TOKaddass;
1028 }
1029 else if (*p == '+')
1030 { p++;
1031 t->value = TOKplusplus;
1032 }
1033 else
1034 t->value = TOKadd;
1035 return;
1036
1037 case '<':
1038 p++;
1039 if (*p == '=')
1040 { p++;
1041 t->value = TOKle; // <=
1042 }
1043 else if (*p == '<')
1044 { p++;
1045 if (*p == '=')
1046 { p++;
1047 t->value = TOKshlass; // <<=
1048 }
1049 else
1050 t->value = TOKshl; // <<
1051 }
1052 else if (*p == '>')
1053 { p++;
1054 if (*p == '=')
1055 { p++;
1056 t->value = TOKleg; // <>=
1057 }
1058 else
1059 t->value = TOKlg; // <>
1060 }
1061 else
1062 t->value = TOKlt; // <
1063 return;
1064
1065 case '>':
1066 p++;
1067 if (*p == '=')
1068 { p++;
1069 t->value = TOKge; // >=
1070 }
1071 else if (*p == '>')
1072 { p++;
1073 if (*p == '=')
1074 { p++;
1075 t->value = TOKshrass; // >>=
1076 }
1077 else if (*p == '>')
1078 { p++;
1079 if (*p == '=')
1080 { p++;
1081 t->value = TOKushrass; // >>>=
1082 }
1083 else
1084 t->value = TOKushr; // >>>
1085 }
1086 else
1087 t->value = TOKshr; // >>
1088 }
1089 else
1090 t->value = TOKgt; // >
1091 return;
1092
1093 case '!':
1094 p++;
1095 if (*p == '=')
1096 { p++;
1097 if (*p == '=' && global.params.Dversion == 1)
1098 { p++;
1099 t->value = TOKnotidentity; // !==
1100 }
1101 else
1102 t->value = TOKnotequal; // !=
1103 }
1104 else if (*p == '<')
1105 { p++;
1106 if (*p == '>')
1107 { p++;
1108 if (*p == '=')
1109 { p++;
1110 t->value = TOKunord; // !<>=
1111 }
1112 else
1113 t->value = TOKue; // !<>
1114 }
1115 else if (*p == '=')
1116 { p++;
1117 t->value = TOKug; // !<=
1118 }
1119 else
1120 t->value = TOKuge; // !<
1121 }
1122 else if (*p == '>')
1123 { p++;
1124 if (*p == '=')
1125 { p++;
1126 t->value = TOKul; // !>=
1127 }
1128 else
1129 t->value = TOKule; // !>
1130 }
1131 else
1132 t->value = TOKnot; // !
1133 return;
1134
1135 case '=':
1136 p++;
1137 if (*p == '=')
1138 { p++;
1139 if (*p == '=' && global.params.Dversion == 1)
1140 { p++;
1141 t->value = TOKidentity; // ===
1142 }
1143 else
1144 t->value = TOKequal; // ==
1145 }
1146 else
1147 t->value = TOKassign; // =
1148 return;
1149
1150 case '~':
1151 p++;
1152 if (*p == '=')
1153 { p++;
1154 t->value = TOKcatass; // ~=
1155 }
1156 else
1157 t->value = TOKtilde; // ~
1158 return;
1159
1160 #define SINGLE(c,tok) case c: p++; t->value = tok; return;
1161
1162 SINGLE('(', TOKlparen)
1163 SINGLE(')', TOKrparen)
1164 SINGLE('[', TOKlbracket)
1165 SINGLE(']', TOKrbracket)
1166 SINGLE('{', TOKlcurly)
1167 SINGLE('}', TOKrcurly)
1168 SINGLE('?', TOKquestion)
1169 SINGLE(',', TOKcomma)
1170 SINGLE(';', TOKsemicolon)
1171 SINGLE(':', TOKcolon)
1172 SINGLE('$', TOKdollar)
1173
1174 #undef SINGLE
1175
1176 #define DOUBLE(c1,tok1,c2,tok2) \
1177 case c1: \
1178 p++; \
1179 if (*p == c2) \
1180 { p++; \
1181 t->value = tok2; \
1182 } \
1183 else \
1184 t->value = tok1; \
1185 return;
1186
1187 DOUBLE('*', TOKmul, '=', TOKmulass)
1188 DOUBLE('%', TOKmod, '=', TOKmodass)
1189 DOUBLE('^', TOKxor, '=', TOKxorass)
1190
1191 #undef DOUBLE
1192
1193 case '#':
1194 p++;
1195 pragma();
1196 continue;
1197
1198 default:
1199 { unsigned char c = *p;
1200
1201 if (c & 0x80)
1202 { unsigned u = decodeUTF();
1203
1204 // Check for start of unicode identifier
1205 if (isUniAlpha(u))
1206 goto case_ident;
1207
1208 if (u == PS || u == LS)
1209 {
1210 loc.linnum++;
1211 p++;
1212 continue;
1213 }
1214 }
1215 if (isprint(c))
1216 error("unsupported char '%c'", c);
1217 else
1218 error("unsupported char 0x%02x", c);
1219 p++;
1220 continue;
1221 }
1222 }
1223 }
1224 }
1225
1226 /*******************************************
1227 * Parse escape sequence.
1228 */
1229
1230 unsigned Lexer::escapeSequence()
1231 { unsigned c;
1232 int n;
1233 int ndigits;
1234
1235 c = *p;
1236 switch (c)
1237 {
1238 case '\'':
1239 case '"':
1240 case '?':
1241 case '\\':
1242 Lconsume:
1243 p++;
1244 break;
1245
1246 case 'a': c = 7; goto Lconsume;
1247 case 'b': c = 8; goto Lconsume;
1248 case 'f': c = 12; goto Lconsume;
1249 case 'n': c = 10; goto Lconsume;
1250 case 'r': c = 13; goto Lconsume;
1251 case 't': c = 9; goto Lconsume;
1252 case 'v': c = 11; goto Lconsume;
1253
1254 case 'u':
1255 ndigits = 4;
1256 goto Lhex;
1257 case 'U':
1258 ndigits = 8;
1259 goto Lhex;
1260 case 'x':
1261 ndigits = 2;
1262 Lhex:
1263 p++;
1264 c = *p;
1265 if (ishex(c))
1266 { unsigned v;
1267
1268 n = 0;
1269 v = 0;
1270 while (1)
1271 {
1272 if (isdigit(c))
1273 c -= '0';
1274 else if (islower(c))
1275 c -= 'a' - 10;
1276 else
1277 c -= 'A' - 10;
1278 v = v * 16 + c;
1279 c = *++p;
1280 if (++n == ndigits)
1281 break;
1282 if (!ishex(c))
1283 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
1284 break;
1285 }
1286 }
1287 if (ndigits != 2 && !utf_isValidDchar(v))
1288 error("invalid UTF character \\U%08x", v);
1289 c = v;
1290 }
1291 else
1292 error("undefined escape hex sequence \\%c\n",c);
1293 break;
1294
1295 case '&': // named character entity
1296 for (unsigned char *idstart = ++p; 1; p++)
1297 {
1298 switch (*p)
1299 {
1300 case ';':
1301 c = HtmlNamedEntity(idstart, p - idstart);
1302 if (c == ~0)
1303 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
1304 c = ' ';
1305 }
1306 p++;
1307 break;
1308
1309 default:
1310 if (isalpha(*p) ||
1311 (p != idstart + 1 && isdigit(*p)))
1312 continue;
1313 error("unterminated named entity");
1314 break;
1315 }
1316 break;
1317 }
1318 break;
1319
1320 case 0:
1321 case 0x1A: // end of file
1322 c = '\\';
1323 break;
1324
1325 default:
1326 if (isoctal(c))
1327 { unsigned v;
1328
1329 n = 0;
1330 v = 0;
1331 do
1332 {
1333 v = v * 8 + (c - '0');
1334 c = *++p;
1335 } while (++n < 3 && isoctal(c));
1336 c = v;
1337 if (c > 0xFF)
1338 error("0%03o is larger than a byte", c);
1339 }
1340 else
1341 error("undefined escape sequence \\%c\n",c);
1342 break;
1343 }
1344 return c;
1345 }
1346
1347 /**************************************
1348 */
1349
1350 TOK Lexer::wysiwygStringConstant(Token *t, int tc)
1351 { unsigned c;
1352 Loc start = loc;
1353
1354 p++;
1355 stringbuffer.reset();
1356 while (1)
1357 {
1358 c = *p++;
1359 switch (c)
1360 {
1361 case '\n':
1362 loc.linnum++;
1363 break;
1364
1365 case '\r':
1366 if (*p == '\n')
1367 continue; // ignore
1368 c = '\n'; // treat EndOfLine as \n character
1369 loc.linnum++;
1370 break;
1371
1372 case 0:
1373 case 0x1A:
1374 error("unterminated string constant starting at %s", start.toChars());
1375 t->ustring = (unsigned char *)"";
1376 t->len = 0;
1377 t->postfix = 0;
1378 return TOKstring;
1379
1380 case '"':
1381 case '`':
1382 if (c == tc)
1383 {
1384 t->len = stringbuffer.offset;
1385 stringbuffer.writeByte(0);
1386 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1387 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1388 stringPostfix(t);
1389 return TOKstring;
1390 }
1391 break;
1392
1393 default:
1394 if (c & 0x80)
1395 { p--;
1396 unsigned u = decodeUTF();
1397 p++;
1398 if (u == PS || u == LS)
1399 loc.linnum++;
1400 stringbuffer.writeUTF8(u);
1401 continue;
1402 }
1403 break;
1404 }
1405 stringbuffer.writeByte(c);
1406 }
1407 }
1408
1409 /**************************************
1410 * Lex hex strings:
1411 * x"0A ae 34FE BD"
1412 */
1413
1414 TOK Lexer::hexStringConstant(Token *t)
1415 { unsigned c;
1416 Loc start = loc;
1417 unsigned n = 0;
1418 unsigned v;
1419
1420 p++;
1421 stringbuffer.reset();
1422 while (1)
1423 {
1424 c = *p++;
1425 switch (c)
1426 {
1427 case ' ':
1428 case '\t':
1429 case '\v':
1430 case '\f':
1431 continue; // skip white space
1432
1433 case '\r':
1434 if (*p == '\n')
1435 continue; // ignore
1436 // Treat isolated '\r' as if it were a '\n'
1437 case '\n':
1438 loc.linnum++;
1439 continue;
1440
1441 case 0:
1442 case 0x1A:
1443 error("unterminated string constant starting at %s", start.toChars());
1444 t->ustring = (unsigned char *)"";
1445 t->len = 0;
1446 t->postfix = 0;
1447 return TOKstring;
1448
1449 case '"':
1450 if (n & 1)
1451 { error("odd number (%d) of hex characters in hex string", n);
1452 stringbuffer.writeByte(v);
1453 }
1454 t->len = stringbuffer.offset;
1455 stringbuffer.writeByte(0);
1456 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1457 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1458 stringPostfix(t);
1459 return TOKstring;
1460
1461 default:
1462 if (c >= '0' && c <= '9')
1463 c -= '0';
1464 else if (c >= 'a' && c <= 'f')
1465 c -= 'a' - 10;
1466 else if (c >= 'A' && c <= 'F')
1467 c -= 'A' - 10;
1468 else if (c & 0x80)
1469 { p--;
1470 unsigned u = decodeUTF();
1471 p++;
1472 if (u == PS || u == LS)
1473 loc.linnum++;
1474 else
1475 error("non-hex character \\u%x", u);
1476 }
1477 else
1478 error("non-hex character '%c'", c);
1479 if (n & 1)
1480 { v = (v << 4) | c;
1481 stringbuffer.writeByte(v);
1482 }
1483 else
1484 v = c;
1485 n++;
1486 break;
1487 }
1488 }
1489 }
1490
1491
1492 #if DMDV2
1493 /**************************************
1494 * Lex delimited strings:
1495 * q"(foo(xxx))" // "foo(xxx)"
1496 * q"[foo(]" // "foo("
1497 * q"/foo]/" // "foo]"
1498 * q"HERE
1499 * foo
1500 * HERE" // "foo\n"
1501 * Input:
1502 * p is on the "
1503 */
1504
1505 TOK Lexer::delimitedStringConstant(Token *t)
1506 { unsigned c;
1507 Loc start = loc;
1508 unsigned delimleft = 0;
1509 unsigned delimright = 0;
1510 unsigned nest = 1;
1511 unsigned nestcount;
1512 Identifier *hereid = NULL;
1513 unsigned blankrol = 0;
1514 unsigned startline = 0;
1515
1516 p++;
1517 stringbuffer.reset();
1518 while (1)
1519 {
1520 c = *p++;
1521 //printf("c = '%c'\n", c);
1522 switch (c)
1523 {
1524 case '\n':
1525 Lnextline:
1526 loc.linnum++;
1527 startline = 1;
1528 if (blankrol)
1529 { blankrol = 0;
1530 continue;
1531 }
1532 if (hereid)
1533 {
1534 stringbuffer.writeUTF8(c);
1535 continue;
1536 }
1537 break;
1538
1539 case '\r':
1540 if (*p == '\n')
1541 continue; // ignore
1542 c = '\n'; // treat EndOfLine as \n character
1543 goto Lnextline;
1544
1545 case 0:
1546 case 0x1A:
1547 goto Lerror;
1548
1549 default:
1550 if (c & 0x80)
1551 { p--;
1552 c = decodeUTF();
1553 p++;
1554 if (c == PS || c == LS)
1555 goto Lnextline;
1556 }
1557 break;
1558 }
1559 if (delimleft == 0)
1560 { delimleft = c;
1561 nest = 1;
1562 nestcount = 1;
1563 if (c == '(')
1564 delimright = ')';
1565 else if (c == '{')
1566 delimright = '}';
1567 else if (c == '[')
1568 delimright = ']';
1569 else if (c == '<')
1570 delimright = '>';
1571 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1572 { // Start of identifier; must be a heredoc
1573 Token t;
1574 p--;
1575 scan(&t); // read in heredoc identifier
1576 if (t.value != TOKidentifier)
1577 { error("identifier expected for heredoc, not %s", t.toChars());
1578 delimright = c;
1579 }
1580 else
1581 { hereid = t.ident;
1582 //printf("hereid = '%s'\n", hereid->toChars());
1583 blankrol = 1;
1584 }
1585 nest = 0;
1586 }
1587 else
1588 { delimright = c;
1589 nest = 0;
1590 }
1591 }
1592 else
1593 {
1594 if (blankrol)
1595 { error("heredoc rest of line should be blank");
1596 blankrol = 0;
1597 continue;
1598 }
1599 if (nest == 1)
1600 {
1601 if (c == delimleft)
1602 nestcount++;
1603 else if (c == delimright)
1604 { nestcount--;
1605 if (nestcount == 0)
1606 goto Ldone;
1607 }
1608 }
1609 else if (c == delimright)
1610 goto Ldone;
1611 if (startline && isalpha(c))
1612 { Token t;
1613 unsigned char *psave = p;
1614 p--;
1615 scan(&t); // read in possible heredoc identifier
1616 //printf("endid = '%s'\n", t.ident->toChars());
1617 if (t.value == TOKidentifier && t.ident->equals(hereid))
1618 { /* should check that rest of line is blank
1619 */
1620 goto Ldone;
1621 }
1622 p = psave;
1623 }
1624 stringbuffer.writeUTF8(c);
1625 startline = 0;
1626 }
1627 }
1628
1629 Ldone:
1630 if (*p == '"')
1631 p++;
1632 else
1633 error("delimited string must end in %c\"", delimright);
1634 t->len = stringbuffer.offset;
1635 stringbuffer.writeByte(0);
1636 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1637 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1638 stringPostfix(t);
1639 return TOKstring;
1640
1641 Lerror:
1642 error("unterminated string constant starting at %s", start.toChars());
1643 t->ustring = (unsigned char *)"";
1644 t->len = 0;
1645 t->postfix = 0;
1646 return TOKstring;
1647 }
1648
1649 /**************************************
1650 * Lex delimited strings:
1651 * q{ foo(xxx) } // " foo(xxx) "
1652 * q{foo(} // "foo("
1653 * q{{foo}"}"} // "{foo}"}""
1654 * Input:
1655 * p is on the q
1656 */
1657
1658 TOK Lexer::tokenStringConstant(Token *t)
1659 {
1660 unsigned nest = 1;
1661 Loc start = loc;
1662 unsigned char *pstart = ++p;
1663
1664 while (1)
1665 { Token tok;
1666
1667 scan(&tok);
1668 switch (tok.value)
1669 {
1670 case TOKlcurly:
1671 nest++;
1672 continue;
1673
1674 case TOKrcurly:
1675 if (--nest == 0)
1676 goto Ldone;
1677 continue;
1678
1679 case TOKeof:
1680 goto Lerror;
1681
1682 default:
1683 continue;
1684 }
1685 }
1686
1687 Ldone:
1688 t->len = p - 1 - pstart;
1689 t->ustring = (unsigned char *)mem.malloc(t->len + 1);
1690 memcpy(t->ustring, pstart, t->len);
1691 t->ustring[t->len] = 0;
1692 stringPostfix(t);
1693 return TOKstring;
1694
1695 Lerror:
1696 error("unterminated token string constant starting at %s", start.toChars());
1697 t->ustring = (unsigned char *)"";
1698 t->len = 0;
1699 t->postfix = 0;
1700 return TOKstring;
1701 }
1702
1703 #endif
1704
1705
1706 /**************************************
1707 */
1708
1709 TOK Lexer::escapeStringConstant(Token *t, int wide)
1710 { unsigned c;
1711 Loc start = loc;
1712
1713 p++;
1714 stringbuffer.reset();
1715 while (1)
1716 {
1717 c = *p++;
1718 switch (c)
1719 {
1720 case '\\':
1721 switch (*p)
1722 {
1723 case 'u':
1724 case 'U':
1725 case '&':
1726 c = escapeSequence();
1727 stringbuffer.writeUTF8(c);
1728 continue;
1729
1730 default:
1731 c = escapeSequence();
1732 break;
1733 }
1734 break;
1735
1736 case '\n':
1737 loc.linnum++;
1738 break;
1739
1740 case '\r':
1741 if (*p == '\n')
1742 continue; // ignore
1743 c = '\n'; // treat EndOfLine as \n character
1744 loc.linnum++;
1745 break;
1746
1747 case '"':
1748 t->len = stringbuffer.offset;
1749 stringbuffer.writeByte(0);
1750 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1751 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1752 stringPostfix(t);
1753 return TOKstring;
1754
1755 case 0:
1756 case 0x1A:
1757 p--;
1758 error("unterminated string constant starting at %s", start.toChars());
1759 t->ustring = (unsigned char *)"";
1760 t->len = 0;
1761 t->postfix = 0;
1762 return TOKstring;
1763
1764 default:
1765 if (c & 0x80)
1766 {
1767 p--;
1768 c = decodeUTF();
1769 if (c == LS || c == PS)
1770 { c = '\n';
1771 loc.linnum++;
1772 }
1773 p++;
1774 stringbuffer.writeUTF8(c);
1775 continue;
1776 }
1777 break;
1778 }
1779 stringbuffer.writeByte(c);
1780 }
1781 }
1782
1783 /**************************************
1784 */
1785
1786 TOK Lexer::charConstant(Token *t, int wide)
1787 {
1788 unsigned c;
1789 TOK tk = TOKcharv;
1790
1791 //printf("Lexer::charConstant\n");
1792 p++;
1793 c = *p++;
1794 switch (c)
1795 {
1796 case '\\':
1797 switch (*p)
1798 {
1799 case 'u':
1800 t->uns64value = escapeSequence();
1801 tk = TOKwcharv;
1802 break;
1803
1804 case 'U':
1805 case '&':
1806 t->uns64value = escapeSequence();
1807 tk = TOKdcharv;
1808 break;
1809
1810 default:
1811 t->uns64value = escapeSequence();
1812 break;
1813 }
1814 break;
1815
1816 case '\n':
1817 L1:
1818 loc.linnum++;
1819 case '\r':
1820 case 0:
1821 case 0x1A:
1822 case '\'':
1823 error("unterminated character constant");
1824 return tk;
1825
1826 default:
1827 if (c & 0x80)
1828 {
1829 p--;
1830 c = decodeUTF();
1831 p++;
1832 if (c == LS || c == PS)
1833 goto L1;
1834 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1835 tk = TOKwcharv;
1836 else
1837 tk = TOKdcharv;
1838 }
1839 t->uns64value = c;
1840 break;
1841 }
1842
1843 if (*p != '\'')
1844 { error("unterminated character constant");
1845 return tk;
1846 }
1847 p++;
1848 return tk;
1849 }
1850
1851 /***************************************
1852 * Get postfix of string literal.
1853 */
1854
1855 void Lexer::stringPostfix(Token *t)
1856 {
1857 switch (*p)
1858 {
1859 case 'c':
1860 case 'w':
1861 case 'd':
1862 t->postfix = *p;
1863 p++;
1864 break;
1865
1866 default:
1867 t->postfix = 0;
1868 break;
1869 }
1870 }
1871
1872 /***************************************
1873 * Read \u or \U unicode sequence
1874 * Input:
1875 * u 'u' or 'U'
1876 */
1877
1878 #if 0
1879 unsigned Lexer::wchar(unsigned u)
1880 {
1881 unsigned value;
1882 unsigned n;
1883 unsigned char c;
1884 unsigned nchars;
1885
1886 nchars = (u == 'U') ? 8 : 4;
1887 value = 0;
1888 for (n = 0; 1; n++)
1889 {
1890 ++p;
1891 if (n == nchars)
1892 break;
1893 c = *p;
1894 if (!ishex(c))
1895 { error("\\%c sequence must be followed by %d hex characters", u, nchars);
1896 break;
1897 }
1898 if (isdigit(c))
1899 c -= '0';
1900 else if (islower(c))
1901 c -= 'a' - 10;
1902 else
1903 c -= 'A' - 10;
1904 value <<= 4;
1905 value |= c;
1906 }
1907 return value;
1908 }
1909 #endif
1910
1911 /**************************************
1912 * Read in a number.
1913 * If it's an integer, store it in tok.TKutok.Vlong.
1914 * integers can be decimal, octal or hex
1915 * Handle the suffixes U, UL, LU, L, etc.
1916 * If it's double, store it in tok.TKutok.Vdouble.
1917 * Returns:
1918 * TKnum
1919 * TKdouble,...
1920 */
1921
1922 TOK Lexer::number(Token *t)
1923 {
1924 // We use a state machine to collect numbers
1925 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
1926 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
1927 STATE_hexh, STATE_error };
1928 enum STATE state;
1929
1930 enum FLAGS
1931 { FLAGS_decimal = 1, // decimal
1932 FLAGS_unsigned = 2, // u or U suffix
1933 FLAGS_long = 4, // l or L suffix
1934 };
1935 enum FLAGS flags = FLAGS_decimal;
1936
1937 int i;
1938 int base;
1939 unsigned c;
1940 unsigned char *start;
1941 TOK result;
1942
1943 //printf("Lexer::number()\n");
1944 state = STATE_initial;
1945 base = 0;
1946 stringbuffer.reset();
1947 start = p;
1948 while (1)
1949 {
1950 c = *p;
1951 switch (state)
1952 {
1953 case STATE_initial: // opening state
1954 if (c == '0')
1955 state = STATE_0;
1956 else
1957 state = STATE_decimal;
1958 break;
1959
1960 case STATE_0:
1961 flags = (FLAGS) (flags & ~FLAGS_decimal);
1962 switch (c)
1963 {
1964 #if ZEROH
1965 case 'H': // 0h
1966 case 'h':
1967 goto hexh;
1968 #endif
1969 case 'X':
1970 case 'x':
1971 state = STATE_hex0;
1972 break;
1973
1974 case '.':
1975 if (p[1] == '.') // .. is a separate token
1976 goto done;
1977 case 'i':
1978 case 'f':
1979 case 'F':
1980 goto real;
1981 #if ZEROH
1982 case 'E':
1983 case 'e':
1984 goto case_hex;
1985 #endif
1986 case 'B':
1987 case 'b':
1988 state = STATE_binary0;
1989 break;
1990
1991 case '0': case '1': case '2': case '3':
1992 case '4': case '5': case '6': case '7':
1993 state = STATE_octal;
1994 break;
1995
1996 #if ZEROH
1997 case '8': case '9': case 'A':
1998 case 'C': case 'D': case 'F':
1999 case 'a': case 'c': case 'd': case 'f':
2000 case_hex:
2001 state = STATE_hexh;
2002 break;
2003 #endif
2004 case '_':
2005 state = STATE_octal;
2006 p++;
2007 continue;
2008
2009 case 'L':
2010 if (p[1] == 'i')
2011 goto real;
2012 goto done;
2013
2014 default:
2015 goto done;
2016 }
2017 break;
2018
2019 case STATE_decimal: // reading decimal number
2020 if (!isdigit(c))
2021 {
2022 #if ZEROH
2023 if (ishex(c)
2024 || c == 'H' || c == 'h'
2025 )
2026 goto hexh;
2027 #endif
2028 if (c == '_') // ignore embedded _
2029 { p++;
2030 continue;
2031 }
2032 if (c == '.' && p[1] != '.')
2033 goto real;
2034 else if (c == 'i' || c == 'f' || c == 'F' ||
2035 c == 'e' || c == 'E')
2036 {
2037 real: // It's a real number. Back up and rescan as a real
2038 p = start;
2039 return inreal(t);
2040 }
2041 else if (c == 'L' && p[1] == 'i')
2042 goto real;
2043 goto done;
2044 }
2045 break;
2046
2047 case STATE_hex0: // reading hex number
2048 case STATE_hex:
2049 if (!ishex(c))
2050 {
2051 if (c == '_') // ignore embedded _
2052 { p++;
2053 continue;
2054 }
2055 if (c == '.' && p[1] != '.')
2056 goto real;
2057 if (c == 'P' || c == 'p' || c == 'i')
2058 goto real;
2059 if (state == STATE_hex0)
2060 error("Hex digit expected, not '%c'", c);
2061 goto done;
2062 }
2063 state = STATE_hex;
2064 break;
2065
2066 #if ZEROH
2067 hexh:
2068 state = STATE_hexh;
2069 case STATE_hexh: // parse numbers like 0FFh
2070 if (!ishex(c))
2071 {
2072 if (c == 'H' || c == 'h')
2073 {
2074 p++;
2075 base = 16;
2076 goto done;
2077 }
2078 else
2079 {
2080 // Check for something like 1E3 or 0E24
2081 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) ||
2082 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset))
2083 goto real;
2084 error("Hex digit expected, not '%c'", c);
2085 goto done;
2086 }
2087 }
2088 break;
2089 #endif
2090
2091 case STATE_octal: // reading octal number
2092 case STATE_octale: // reading octal number with non-octal digits
2093 if (!isoctal(c))
2094 {
2095 #if ZEROH
2096 if (ishex(c)
2097 || c == 'H' || c == 'h'
2098 )
2099 goto hexh;
2100 #endif
2101 if (c == '_') // ignore embedded _
2102 { p++;
2103 continue;
2104 }
2105 if (c == '.' && p[1] != '.')
2106 goto real;
2107 if (c == 'i')
2108 goto real;
2109 if (isdigit(c))
2110 {
2111 state = STATE_octale;
2112 }
2113 else
2114 goto done;
2115 }
2116 break;
2117
2118 case STATE_binary0: // starting binary number
2119 case STATE_binary: // reading binary number
2120 if (c != '0' && c != '1')
2121 {
2122 #if ZEROH
2123 if (ishex(c)
2124 || c == 'H' || c == 'h'
2125 )
2126 goto hexh;
2127 #endif
2128 if (c == '_') // ignore embedded _
2129 { p++;
2130 continue;
2131 }
2132 if (state == STATE_binary0)
2133 { error("binary digit expected");
2134 state = STATE_error;
2135 break;
2136 }
2137 else
2138 goto done;
2139 }
2140 state = STATE_binary;
2141 break;
2142
2143 case STATE_error: // for error recovery
2144 if (!isdigit(c)) // scan until non-digit
2145 goto done;
2146 break;
2147
2148 default:
2149 assert(0);
2150 }
2151 stringbuffer.writeByte(c);
2152 p++;
2153 }
2154 done:
2155 stringbuffer.writeByte(0); // terminate string
2156 if (state == STATE_octale)
2157 error("Octal digit expected");
2158
2159 uinteger_t n; // unsigned >=64 bit integer type
2160
2161 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0))
2162 n = stringbuffer.data[0] - '0';
2163 else
2164 {
2165 // Convert string to integer
2166 #if __DMC__
2167 errno = 0;
2168 n = strtoull((char *)stringbuffer.data,NULL,base);
2169 if (errno == ERANGE)
2170 error("integer overflow");
2171 #else
2172 // Not everybody implements strtoull()
2173 char *p = (char *)stringbuffer.data;
2174 int r = 10, d;
2175
2176 if (*p == '0')
2177 {
2178 if (p[1] == 'x' || p[1] == 'X')
2179 p += 2, r = 16;
2180 else if (p[1] == 'b' || p[1] == 'B')
2181 p += 2, r = 2;
2182 else if (isdigit(p[1]))
2183 p += 1, r = 8;
2184 }
2185
2186 n = 0;
2187 while (1)
2188 {
2189 if (*p >= '0' && *p <= '9')
2190 d = *p - '0';
2191 else if (*p >= 'a' && *p <= 'z')
2192 d = *p - 'a' + 10;
2193 else if (*p >= 'A' && *p <= 'Z')
2194 d = *p - 'A' + 10;
2195 else
2196 break;
2197 if (d >= r)
2198 break;
2199 uinteger_t n2 = n * r;
2200 //printf("n2 / r = %llx, n = %llx\n", n2/r, n);
2201 if (n2 / r != n || n2 + d < n)
2202 {
2203 error ("integer overflow");
2204 break;
2205 }
2206
2207 n = n2 + d;
2208 p++;
2209 }
2210 #endif
2211 if (sizeof(n) > 8 &&
2212 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits
2213 error("integer overflow");
2214 }
2215
2216 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2217 while (1)
2218 { unsigned char f;
2219
2220 switch (*p)
2221 { case 'U':
2222 case 'u':
2223 f = FLAGS_unsigned;
2224 goto L1;
2225
2226 case 'l':
2227 if (1 || !global.params.useDeprecated)
2228 error("'l' suffix is deprecated, use 'L' instead");
2229 case 'L':
2230 f = FLAGS_long;
2231 L1:
2232 p++;
2233 if (flags & f)
2234 error("unrecognized token");
2235 flags = (FLAGS) (flags | f);
2236 continue;
2237 default:
2238 break;
2239 }
2240 break;
2241 }
2242
2243 switch (flags)
2244 {
2245 case 0:
2246 /* Octal or Hexadecimal constant.
2247 * First that fits: int, uint, long, ulong
2248 */
2249 if (n & 0x8000000000000000LL)
2250 result = TOKuns64v;
2251 else if (n & 0xFFFFFFFF00000000LL)
2252 result = TOKint64v;
2253 else if (n & 0x80000000)
2254 result = TOKuns32v;
2255 else
2256 result = TOKint32v;
2257 break;
2258
2259 case FLAGS_decimal:
2260 /* First that fits: int, long, long long
2261 */
2262 if (n & 0x8000000000000000LL)
2263 { error("signed integer overflow");
2264 result = TOKuns64v;
2265 }
2266 else if (n & 0xFFFFFFFF80000000LL)
2267 result = TOKint64v;
2268 else
2269 result = TOKint32v;
2270 break;
2271
2272 case FLAGS_unsigned:
2273 case FLAGS_decimal | FLAGS_unsigned:
2274 /* First that fits: uint, ulong
2275 */
2276 if (n & 0xFFFFFFFF00000000LL)
2277 result = TOKuns64v;
2278 else
2279 result = TOKuns32v;
2280 break;
2281
2282 case FLAGS_decimal | FLAGS_long:
2283 if (n & 0x8000000000000000LL)
2284 { error("signed integer overflow");
2285 result = TOKuns64v;
2286 }
2287 else
2288 result = TOKint64v;
2289 break;
2290
2291 case FLAGS_long:
2292 if (n & 0x8000000000000000LL)
2293 result = TOKuns64v;
2294 else
2295 result = TOKint64v;
2296 break;
2297
2298 case FLAGS_unsigned | FLAGS_long:
2299 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
2300 result = TOKuns64v;
2301 break;
2302
2303 default:
2304 #ifdef DEBUG
2305 printf("%x\n",flags);
2306 #endif
2307 assert(0);
2308 }
2309 t->uns64value = n;
2310 return result;
2311 }
2312
2313 /**************************************
2314 * Read in characters, converting them to real.
2315 * Bugs:
2316 * Exponent overflow not detected.
2317 * Too much requested precision is not detected.
2318 */
2319
2320 TOK Lexer::inreal(Token *t)
2321 #ifdef __DMC__
2322 __in
2323 {
2324 assert(*p == '.' || isdigit(*p));
2325 }
2326 __out (result)
2327 {
2328 switch (result)
2329 {
2330 case TOKfloat32v:
2331 case TOKfloat64v:
2332 case TOKfloat80v:
2333 case TOKimaginary32v:
2334 case TOKimaginary64v:
2335 case TOKimaginary80v:
2336 break;
2337
2338 default:
2339 assert(0);
2340 }
2341 }
2342 __body
2343 #endif /* __DMC__ */
2344 { int dblstate;
2345 unsigned c;
2346 char hex; // is this a hexadecimal-floating-constant?
2347 TOK result;
2348
2349 //printf("Lexer::inreal()\n");
2350 stringbuffer.reset();
2351 dblstate = 0;
2352 hex = 0;
2353 Lnext:
2354 while (1)
2355 {
2356 // Get next char from input
2357 c = *p++;
2358 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
2359 while (1)
2360 {
2361 switch (dblstate)
2362 {
2363 case 0: // opening state
2364 if (c == '0')
2365 dblstate = 9;
2366 else if (c == '.')
2367 dblstate = 3;
2368 else
2369 dblstate = 1;
2370 break;
2371
2372 case 9:
2373 dblstate = 1;
2374 if (c == 'X' || c == 'x')
2375 { hex++;
2376 break;
2377 }
2378 case 1: // digits to left of .
2379 case 3: // digits to right of .
2380 case 7: // continuing exponent digits
2381 if (!isdigit(c) && !(hex && isxdigit(c)))
2382 {
2383 if (c == '_')
2384 goto Lnext; // ignore embedded '_'
2385 dblstate++;
2386 continue;
2387 }
2388 break;
2389
2390 case 2: // no more digits to left of .
2391 if (c == '.')
2392 { dblstate++;
2393 break;
2394 }
2395 case 4: // no more digits to right of .
2396 if ((c == 'E' || c == 'e') ||
2397 hex && (c == 'P' || c == 'p'))
2398 { dblstate = 5;
2399 hex = 0; // exponent is always decimal
2400 break;
2401 }
2402 if (hex)
2403 error("binary-exponent-part required");
2404 goto done;
2405
2406 case 5: // looking immediately to right of E
2407 dblstate++;
2408 if (c == '-' || c == '+')
2409 break;
2410 case 6: // 1st exponent digit expected
2411 if (!isdigit(c))
2412 error("exponent expected");
2413 dblstate++;
2414 break;
2415
2416 case 8: // past end of exponent digits
2417 goto done;
2418 }
2419 break;
2420 }
2421 stringbuffer.writeByte(c);
2422 }
2423 done:
2424 p--;
2425
2426 stringbuffer.writeByte(0);
2427
2428 #if _WIN32 && __DMC__
2429 char *save = __locale_decpoint;
2430 __locale_decpoint = ".";
2431 #endif
2432 #ifdef IN_GCC
2433 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble);
2434 #else
2435 t->float80value = strtold((char *)stringbuffer.data, NULL);
2436 #endif
2437 errno = 0;
2438 float strtofres;
2439 double strtodres;
2440 switch (*p)
2441 {
2442 case 'F':
2443 case 'f':
2444 #ifdef IN_GCC
2445 real_t::parse((char *)stringbuffer.data, real_t::Float);
2446 #else
2447 strtofres = strtof((char *)stringbuffer.data, NULL);
2448 // LDC change: don't error on gradual underflow
2449 if (errno == ERANGE &&
2450 strtofres != 0 && strtofres != HUGE_VALF && strtofres != -HUGE_VALF)
2451 errno = 0;
2452 #endif
2453 result = TOKfloat32v;
2454 p++;
2455 break;
2456
2457 default:
2458 #ifdef IN_GCC
2459 real_t::parse((char *)stringbuffer.data, real_t::Double);
2460 #else
2461 strtodres = strtod((char *)stringbuffer.data, NULL);
2462 // LDC change: don't error on gradual underflow
2463 if (errno == ERANGE &&
2464 strtodres != 0 && strtodres != HUGE_VAL && strtodres != -HUGE_VAL)
2465 errno = 0;
2466 #endif
2467 result = TOKfloat64v;
2468 break;
2469
2470 case 'l':
2471 if (!global.params.useDeprecated)
2472 error("'l' suffix is deprecated, use 'L' instead");
2473 case 'L':
2474 result = TOKfloat80v;
2475 p++;
2476 break;
2477 }
2478 if (*p == 'i' || *p == 'I')
2479 {
2480 if (!global.params.useDeprecated && *p == 'I')
2481 error("'I' suffix is deprecated, use 'i' instead");
2482 p++;
2483 switch (result)
2484 {
2485 case TOKfloat32v:
2486 result = TOKimaginary32v;
2487 break;
2488 case TOKfloat64v:
2489 result = TOKimaginary64v;
2490 break;
2491 case TOKfloat80v:
2492 result = TOKimaginary80v;
2493 break;
2494 }
2495 }
2496 #if _WIN32 && __DMC__
2497 __locale_decpoint = save;
2498 #endif
2499 if (errno == ERANGE)
2500 error("number is not representable");
2501 return result;
2502 }
2503
2504 /*********************************************
2505 * Do pragma.
2506 * Currently, the only pragma supported is:
2507 * #line linnum [filespec]
2508 */
2509
2510 void Lexer::pragma()
2511 {
2512 Token tok;
2513 int linnum;
2514 char *filespec = NULL;
2515 Loc loc = this->loc;
2516
2517 scan(&tok);
2518 if (tok.value != TOKidentifier || tok.ident != Id::line)
2519 goto Lerr;
2520
2521 scan(&tok);
2522 if (tok.value == TOKint32v || tok.value == TOKint64v)
2523 linnum = tok.uns64value - 1;
2524 else
2525 goto Lerr;
2526
2527 while (1)
2528 {
2529 switch (*p)
2530 {
2531 case 0:
2532 case 0x1A:
2533 case '\n':
2534 Lnewline:
2535 this->loc.linnum = linnum;
2536 if (filespec)
2537 this->loc.filename = filespec;
2538 return;
2539
2540 case '\r':
2541 p++;
2542 if (*p != '\n')
2543 { p--;
2544 goto Lnewline;
2545 }
2546 continue;
2547
2548 case ' ':
2549 case '\t':
2550 case '\v':
2551 case '\f':
2552 p++;
2553 continue; // skip white space
2554
2555 case '_':
2556 if (mod && memcmp(p, "__FILE__", 8) == 0)
2557 {
2558 p += 8;
2559 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars());
2560 }
2561 continue;
2562
2563 case '"':
2564 if (filespec)
2565 goto Lerr;
2566 stringbuffer.reset();
2567 p++;
2568 while (1)
2569 { unsigned c;
2570
2571 c = *p;
2572 switch (c)
2573 {
2574 case '\n':
2575 case '\r':
2576 case 0:
2577 case 0x1A:
2578 goto Lerr;
2579
2580 case '"':
2581 stringbuffer.writeByte(0);
2582 filespec = mem.strdup((char *)stringbuffer.data);
2583 p++;
2584 break;
2585
2586 default:
2587 if (c & 0x80)
2588 { unsigned u = decodeUTF();
2589 if (u == PS || u == LS)
2590 goto Lerr;
2591 }
2592 stringbuffer.writeByte(c);
2593 p++;
2594 continue;
2595 }
2596 break;
2597 }
2598 continue;
2599
2600 default:
2601 if (*p & 0x80)
2602 { unsigned u = decodeUTF();
2603 if (u == PS || u == LS)
2604 goto Lnewline;
2605 }
2606 goto Lerr;
2607 }
2608 }
2609
2610 Lerr:
2611 error(loc, "#line integer [\"filespec\"]\\n expected");
2612 }
2613
2614
2615 /********************************************
2616 * Decode UTF character.
2617 * Issue error messages for invalid sequences.
2618 * Return decoded character, advance p to last character in UTF sequence.
2619 */
2620
2621 unsigned Lexer::decodeUTF()
2622 {
2623 dchar_t u;
2624 unsigned char c;
2625 unsigned char *s = p;
2626 size_t len;
2627 size_t idx;
2628 const char *msg;
2629
2630 c = *s;
2631 assert(c & 0x80);
2632
2633 // Check length of remaining string up to 6 UTF-8 characters
2634 for (len = 1; len < 6 && s[len]; len++)
2635 ;
2636
2637 idx = 0;
2638 msg = utf_decodeChar(s, len, &idx, &u);
2639 p += idx - 1;
2640 if (msg)
2641 {
2642 error("%s", msg);
2643 }
2644 return u;
2645 }
2646
2647
2648 /***************************************************
2649 * Parse doc comment embedded between t->ptr and p.
2650 * Remove trailing blanks and tabs from lines.
2651 * Replace all newlines with \n.
2652 * Remove leading comment character from each line.
2653 * Decide if it's a lineComment or a blockComment.
2654 * Append to previous one for this token.
2655 */
2656
2657 void Lexer::getDocComment(Token *t, unsigned lineComment)
2658 {
2659 OutBuffer buf;
2660 unsigned char ct = t->ptr[2];
2661 unsigned char *q = t->ptr + 3; // start of comment text
2662 int linestart = 0;
2663
2664 unsigned char *qend = p;
2665 if (ct == '*' || ct == '+')
2666 qend -= 2;
2667
2668 /* Scan over initial row of ****'s or ++++'s or ////'s
2669 */
2670 for (; q < qend; q++)
2671 {
2672 if (*q != ct)
2673 break;
2674 }
2675
2676 /* Remove trailing row of ****'s or ++++'s
2677 */
2678 if (ct != '/')
2679 {
2680 for (; q < qend; qend--)
2681 {
2682 if (qend[-1] != ct)
2683 break;
2684 }
2685 }
2686
2687 for (; q < qend; q++)
2688 {
2689 unsigned char c = *q;
2690
2691 switch (c)
2692 {
2693 case '*':
2694 case '+':
2695 if (linestart && c == ct)
2696 { linestart = 0;
2697 /* Trim preceding whitespace up to preceding \n
2698 */
2699 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2700 buf.offset--;
2701 continue;
2702 }
2703 break;
2704
2705 case ' ':
2706 case '\t':
2707 break;
2708
2709 case '\r':
2710 if (q[1] == '\n')
2711 continue; // skip the \r
2712 goto Lnewline;
2713
2714 default:
2715 if (c == 226)
2716 {
2717 // If LS or PS
2718 if (q[1] == 128 &&
2719 (q[2] == 168 || q[2] == 169))
2720 {
2721 q += 2;
2722 goto Lnewline;
2723 }
2724 }
2725 linestart = 0;
2726 break;
2727
2728 Lnewline:
2729 c = '\n'; // replace all newlines with \n
2730 case '\n':
2731 linestart = 1;
2732
2733 /* Trim trailing whitespace
2734 */
2735 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2736 buf.offset--;
2737
2738 break;
2739 }
2740 buf.writeByte(c);
2741 }
2742
2743 // Always end with a newline
2744 if (!buf.offset || buf.data[buf.offset - 1] != '\n')
2745 buf.writeByte('\n');
2746
2747 buf.writeByte(0);
2748
2749 // It's a line comment if the start of the doc comment comes
2750 // after other non-whitespace on the same line.
2751 unsigned char** dc = (lineComment && anyToken)
2752 ? &t->lineComment
2753 : &t->blockComment;
2754
2755 // Combine with previous doc comment, if any
2756 if (*dc)
2757 *dc = combineComments(*dc, (unsigned char *)buf.data);
2758 else
2759 *dc = (unsigned char *)buf.extractData();
2760 }
2761
2762 /********************************************
2763 * Combine two document comments into one.
2764 */
2765
2766 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2)
2767 {
2768 unsigned char *c = c2;
2769
2770 if (c1)
2771 { c = c1;
2772 if (c2)
2773 { size_t len1 = strlen((char *)c1);
2774 size_t len2 = strlen((char *)c2);
2775
2776 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1);
2777 memcpy(c, c1, len1);
2778 c[len1] = '\n';
2779 memcpy(c + len1 + 1, c2, len2);
2780 c[len1 + 1 + len2] = 0;
2781 }
2782 }
2783 return c;
2784 }
2785
2786 /********************************************
2787 * Create an identifier in the string table.
2788 */
2789
2790 Identifier *Lexer::idPool(const char *s)
2791 {
2792 size_t len = strlen(s);
2793 StringValue *sv = stringtable.update(s, len);
2794 Identifier *id = (Identifier *) sv->ptrvalue;
2795 if (!id)
2796 {
2797 id = new Identifier(sv->lstring.string, TOKidentifier);
2798 sv->ptrvalue = id;
2799 }
2800 return id;
2801 }
2802
2803 /*********************************************
2804 * Create a unique identifier using the prefix s.
2805 */
2806
2807 Identifier *Lexer::uniqueId(const char *s, int num)
2808 { char buffer[32];
2809 size_t slen = strlen(s);
2810
2811 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer));
2812 sprintf(buffer, "%s%d", s, num);
2813 return idPool(buffer);
2814 }
2815
2816 Identifier *Lexer::uniqueId(const char *s)
2817 {
2818 static int num;
2819 return uniqueId(s, ++num);
2820 }
2821
2822 /****************************************
2823 */
2824
2825 struct Keyword
2826 { const char *name;
2827 enum TOK value;
2828 };
2829
2830 static Keyword keywords[] =
2831 {
2832 // { "", TOK },
2833
2834 { "this", TOKthis },
2835 { "super", TOKsuper },
2836 { "assert", TOKassert },
2837 { "null", TOKnull },
2838 { "true", TOKtrue },
2839 { "false", TOKfalse },
2840 { "cast", TOKcast },
2841 { "new", TOKnew },
2842 { "delete", TOKdelete },
2843 { "throw", TOKthrow },
2844 { "module", TOKmodule },
2845 { "pragma", TOKpragma },
2846 { "typeof", TOKtypeof },
2847 { "typeid", TOKtypeid },
2848
2849 { "template", TOKtemplate },
2850
2851 { "void", TOKvoid },
2852 { "byte", TOKint8 },
2853 { "ubyte", TOKuns8 },
2854 { "short", TOKint16 },
2855 { "ushort", TOKuns16 },
2856 { "int", TOKint32 },
2857 { "uint", TOKuns32 },
2858 { "long", TOKint64 },
2859 { "ulong", TOKuns64 },
2860 { "cent", TOKcent, },
2861 { "ucent", TOKucent, },
2862 { "float", TOKfloat32 },
2863 { "double", TOKfloat64 },
2864 { "real", TOKfloat80 },
2865
2866 { "bool", TOKbool },
2867 { "char", TOKchar },
2868 { "wchar", TOKwchar },
2869 { "dchar", TOKdchar },
2870
2871 { "ifloat", TOKimaginary32 },
2872 { "idouble", TOKimaginary64 },
2873 { "ireal", TOKimaginary80 },
2874
2875 { "cfloat", TOKcomplex32 },
2876 { "cdouble", TOKcomplex64 },
2877 { "creal", TOKcomplex80 },
2878
2879 { "delegate", TOKdelegate },
2880 { "function", TOKfunction },
2881
2882 { "is", TOKis },
2883 { "if", TOKif },
2884 { "else", TOKelse },
2885 { "while", TOKwhile },
2886 { "for", TOKfor },
2887 { "do", TOKdo },
2888 { "switch", TOKswitch },
2889 { "case", TOKcase },
2890 { "default", TOKdefault },
2891 { "break", TOKbreak },
2892 { "continue", TOKcontinue },
2893 { "synchronized", TOKsynchronized },
2894 { "return", TOKreturn },
2895 { "goto", TOKgoto },
2896 { "try", TOKtry },
2897 { "catch", TOKcatch },
2898 { "finally", TOKfinally },
2899 { "with", TOKwith },
2900 { "asm", TOKasm },
2901 { "foreach", TOKforeach },
2902 { "foreach_reverse", TOKforeach_reverse },
2903 { "scope", TOKscope },
2904
2905 { "struct", TOKstruct },
2906 { "class", TOKclass },
2907 { "interface", TOKinterface },
2908 { "union", TOKunion },
2909 { "enum", TOKenum },
2910 { "import", TOKimport },
2911 { "mixin", TOKmixin },
2912 { "static", TOKstatic },
2913 { "final", TOKfinal },
2914 { "const", TOKconst },
2915 { "immutable", TOKimmutable },
2916 { "typedef", TOKtypedef },
2917 { "alias", TOKalias },
2918 { "override", TOKoverride },
2919 { "abstract", TOKabstract },
2920 { "volatile", TOKvolatile },
2921 { "debug", TOKdebug },
2922 { "deprecated", TOKdeprecated },
2923 { "in", TOKin },
2924 { "out", TOKout },
2925 { "inout", TOKinout },
2926 { "lazy", TOKlazy },
2927 { "auto", TOKauto },
2928
2929 { "align", TOKalign },
2930 { "extern", TOKextern },
2931 { "private", TOKprivate },
2932 { "package", TOKpackage },
2933 { "protected", TOKprotected },
2934 { "public", TOKpublic },
2935 { "export", TOKexport },
2936
2937 { "body", TOKbody },
2938 { "invariant", TOKinvariant },
2939 { "unittest", TOKunittest },
2940 { "version", TOKversion },
2941 //{ "manifest", TOKmanifest },
2942
2943 // Added after 1.0
2944 { "ref", TOKref },
2945 { "macro", TOKmacro },
2946 #if DMDV2
2947 { "pure", TOKpure },
2948 { "nothrow", TOKnothrow },
2949 { "__thread", TOKtls },
2950 { "__traits", TOKtraits },
2951 { "__overloadset", TOKoverloadset },
2952 { "__FILE__", TOKfile },
2953 { "__LINE__", TOKline },
2954 { "shared", TOKshared },
2955 #endif
2956 };
2957
2958 int Token::isKeyword()
2959 {
2960 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)
2961 {
2962 if (keywords[u].value == value)
2963 return 1;
2964 }
2965 return 0;
2966 }
2967
2968 void Lexer::initKeywords()
2969 { StringValue *sv;
2970 unsigned u;
2971 enum TOK v;
2972 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]);
2973
2974 if (global.params.Dversion == 1)
2975 nkeywords -= 2;
2976
2977 cmtable_init();
2978
2979 for (u = 0; u < nkeywords; u++)
2980 { const char *s;
2981
2982 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
2983 s = keywords[u].name;
2984 v = keywords[u].value;
2985 sv = stringtable.insert(s, strlen(s));
2986 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v);
2987
2988 //printf("tochars[%d] = '%s'\n",v, s);
2989 Token::tochars[v] = s;
2990 }
2991
2992 Token::tochars[TOKeof] = "EOF";
2993 Token::tochars[TOKlcurly] = "{";
2994 Token::tochars[TOKrcurly] = "}";
2995 Token::tochars[TOKlparen] = "(";
2996 Token::tochars[TOKrparen] = ")";
2997 Token::tochars[TOKlbracket] = "[";
2998 Token::tochars[TOKrbracket] = "]";
2999 Token::tochars[TOKsemicolon] = ";";
3000 Token::tochars[TOKcolon] = ":";
3001 Token::tochars[TOKcomma] = ",";
3002 Token::tochars[TOKdot] = ".";
3003 Token::tochars[TOKxor] = "^";
3004 Token::tochars[TOKxorass] = "^=";
3005 Token::tochars[TOKassign] = "=";
3006 Token::tochars[TOKconstruct] = "=";
3007 #if DMDV2
3008 Token::tochars[TOKblit] = "=";
3009 #endif
3010 Token::tochars[TOKlt] = "<";
3011 Token::tochars[TOKgt] = ">";
3012 Token::tochars[TOKle] = "<=";
3013 Token::tochars[TOKge] = ">=";
3014 Token::tochars[TOKequal] = "==";
3015 Token::tochars[TOKnotequal] = "!=";
3016 Token::tochars[TOKnotidentity] = "!is";
3017 Token::tochars[TOKtobool] = "!!";
3018
3019 Token::tochars[TOKunord] = "!<>=";
3020 Token::tochars[TOKue] = "!<>";
3021 Token::tochars[TOKlg] = "<>";
3022 Token::tochars[TOKleg] = "<>=";
3023 Token::tochars[TOKule] = "!>";
3024 Token::tochars[TOKul] = "!>=";
3025 Token::tochars[TOKuge] = "!<";
3026 Token::tochars[TOKug] = "!<=";
3027
3028 Token::tochars[TOKnot] = "!";
3029 Token::tochars[TOKtobool] = "!!";
3030 Token::tochars[TOKshl] = "<<";
3031 Token::tochars[TOKshr] = ">>";
3032 Token::tochars[TOKushr] = ">>>";
3033 Token::tochars[TOKadd] = "+";
3034 Token::tochars[TOKmin] = "-";
3035 Token::tochars[TOKmul] = "*";
3036 Token::tochars[TOKdiv] = "/";
3037 Token::tochars[TOKmod] = "%";
3038 Token::tochars[TOKslice] = "..";
3039 Token::tochars[TOKdotdotdot] = "...";
3040 Token::tochars[TOKand] = "&";
3041 Token::tochars[TOKandand] = "&&";
3042 Token::tochars[TOKor] = "|";
3043 Token::tochars[TOKoror] = "||";
3044 Token::tochars[TOKarray] = "[]";
3045 Token::tochars[TOKindex] = "[i]";
3046 Token::tochars[TOKaddress] = "&";
3047 Token::tochars[TOKstar] = "*";
3048 Token::tochars[TOKtilde] = "~";
3049 Token::tochars[TOKdollar] = "$";
3050 Token::tochars[TOKcast] = "cast";
3051 Token::tochars[TOKplusplus] = "++";
3052 Token::tochars[TOKminusminus] = "--";
3053 Token::tochars[TOKtype] = "type";
3054 Token::tochars[TOKquestion] = "?";
3055 Token::tochars[TOKneg] = "-";
3056 Token::tochars[TOKuadd] = "+";
3057 Token::tochars[TOKvar] = "var";
3058 Token::tochars[TOKaddass] = "+=";
3059 Token::tochars[TOKminass] = "-=";
3060 Token::tochars[TOKmulass] = "*=";
3061 Token::tochars[TOKdivass] = "/=";
3062 Token::tochars[TOKmodass] = "%=";
3063 Token::tochars[TOKshlass] = "<<=";
3064 Token::tochars[TOKshrass] = ">>=";
3065 Token::tochars[TOKushrass] = ">>>=";
3066 Token::tochars[TOKandass] = "&=";
3067 Token::tochars[TOKorass] = "|=";
3068 Token::tochars[TOKcatass] = "~=";
3069 Token::tochars[TOKcat] = "~";
3070 Token::tochars[TOKcall] = "call";
3071 Token::tochars[TOKidentity] = "is";
3072 Token::tochars[TOKnotidentity] = "!is";
3073
3074 Token::tochars[TOKorass] = "|=";
3075 Token::tochars[TOKidentifier] = "identifier";
3076
3077 // For debugging
3078 Token::tochars[TOKdotexp] = "dotexp";
3079 Token::tochars[TOKdotti] = "dotti";
3080 Token::tochars[TOKdotvar] = "dotvar";
3081 Token::tochars[TOKdottype] = "dottype";
3082 Token::tochars[TOKsymoff] = "symoff";
3083 Token::tochars[TOKtypedot] = "typedot";
3084 Token::tochars[TOKarraylength] = "arraylength";
3085 Token::tochars[TOKarrayliteral] = "arrayliteral";
3086 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral";
3087 Token::tochars[TOKstructliteral] = "structliteral";
3088 Token::tochars[TOKstring] = "string";
3089 Token::tochars[TOKdsymbol] = "symbol";
3090 Token::tochars[TOKtuple] = "tuple";
3091 Token::tochars[TOKdeclaration] = "declaration";
3092 Token::tochars[TOKdottd] = "dottd";
3093 Token::tochars[TOKon_scope_exit] = "scope(exit)";
3094 }