comparison dmd/Lexer.d @ 178:e3afd1303184

Many small bugs fixed Made all classes derive from TObject to detect memory leaks (functionality is disabled for now) Began work on overriding backend memory allocations (to avoid memory leaks)
author korDen
date Sun, 17 Oct 2010 07:42:00 +0400
parents 94b6033c07f3
children
comparison
equal deleted inserted replaced
177:1475fd394c9e 178:e3afd1303184
338 assert(0); // should have been in table 338 assert(0); // should have been in table
339 } 339 }
340 return true; 340 return true;
341 } 341 }
342 342
343 class Lexer 343 import dmd.TObject;
344
345 class Lexer : TObject
344 { 346 {
345 Loc loc; // for error messages 347 Loc loc; // for error messages
346 348
347 ubyte* base; // pointer to start of buffer 349 ubyte* base; // pointer to start of buffer
348 ubyte* end; // past end of buffer 350 ubyte* end; // past end of buffer
353 int anyToken; // !=0 means seen at least one token 355 int anyToken; // !=0 means seen at least one token
354 int commentToken; // !=0 means comments are TOKcomment's 356 int commentToken; // !=0 means comments are TOKcomment's
355 357
356 this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken) 358 this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)
357 { 359 {
360 register();
358 loc = Loc(mod, 1); 361 loc = Loc(mod, 1);
359 362
360 memset(&token,0,token.sizeof); 363 memset(&token,0,token.sizeof);
361 this.base = base; 364 this.base = base;
362 this.end = base + endoffset; 365 this.end = base + endoffset;
363 p = base + begoffset; 366 p = base + begoffset;
364 this.mod = mod; 367 this.mod = mod;
372 375
373 if (p[0] == '#' && p[1] =='!') 376 if (p[0] == '#' && p[1] =='!')
374 { 377 {
375 p += 2; 378 p += 2;
376 while (1) 379 while (1)
377 { 380 {
378 ubyte c = *p; 381 ubyte c = *p;
379 switch (c) 382 switch (c)
380 { 383 {
381 case '\n': 384 case '\n':
382 p++; 385 p++;
656 659
657 static __gshared ubyte cmtable[256]; 660 static __gshared ubyte cmtable[256];
658 enum CMoctal = 0x1; 661 enum CMoctal = 0x1;
659 enum CMhex = 0x2; 662 enum CMhex = 0x2;
660 enum CMidchar = 0x4; 663 enum CMidchar = 0x4;
661 664
662 ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; } 665 ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; }
663 ubyte ishex (ubyte c) { return cmtable[c] & CMhex; } 666 ubyte ishex (ubyte c) { return cmtable[c] & CMhex; }
664 ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; } 667 ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; }
665 668
666 static void cmtable_init() 669 static void cmtable_init()
673 cmtable[c] |= CMhex; 676 cmtable[c] |= CMhex;
674 if (isalnum(c) || c == '_') 677 if (isalnum(c) || c == '_')
675 cmtable[c] |= CMidchar; 678 cmtable[c] |= CMidchar;
676 } 679 }
677 } 680 }
678 681
679 static StringTable stringtable() 682 static ref StringTable stringtable()
680 { 683 {
681 return global.stringtable; 684 return global.stringtable;
682 } 685 }
683 686
684 static OutBuffer stringbuffer() 687 static OutBuffer stringbuffer()
685 { 688 {
686 return global.stringbuffer; 689 return global.stringbuffer;
687 } 690 }
688 691
689 static void initKeywords() 692 static void initKeywords()
690 { 693 {
691 uint nkeywords = keywords.length; 694 uint nkeywords = keywords.length;
692 695
693 if (global.params.Dversion == 1) 696 if (global.params.Dversion == 1)
694 nkeywords -= 2; 697 nkeywords -= 2;
695 698
696 cmtable_init(); 699 cmtable_init();
697 700
698 for (uint u = 0; u < nkeywords; u++) 701 for (uint u = 0; u < nkeywords; u++)
699 { 702 {
700 //printf("keyword[%d] = '%.*s'\n",u, keywords[u].name); 703 //printf("keyword[%d] = '%.*s'\n",u, keywords[u].name);
701 string s = keywords[u].name; 704 string s = keywords[u].name;
702 TOK v = keywords[u].value; 705 TOK v = keywords[u].value;
703 StringValue* sv = stringtable.insert(s); 706 Object* sv = stringtable.insert(s);
704 sv.ptrvalue = cast(void*) new Identifier(sv.lstring.string_, v); 707 *sv = new Identifier(s, v);
705 708
706 //printf("tochars[%d] = '%s'\n",v, s); 709 //printf("tochars[%d] = '%s'\n",v, s);
707 Token.tochars[v] = s; 710 Token.tochars[v] = s;
708 } 711 }
709 712
792 Token.tochars[TOK.TOKorass] = "|="; 795 Token.tochars[TOK.TOKorass] = "|=";
793 Token.tochars[TOK.TOKidentifier] = "identifier"; 796 Token.tochars[TOK.TOKidentifier] = "identifier";
794 Token.tochars[TOK.TOKat] = "@"; 797 Token.tochars[TOK.TOKat] = "@";
795 Token.tochars[TOK.TOKpow] = "^^"; 798 Token.tochars[TOK.TOKpow] = "^^";
796 Token.tochars[TOK.TOKpowass] = "^^="; 799 Token.tochars[TOK.TOKpowass] = "^^=";
797 800
798 // For debugging 801 // For debugging
799 Token.tochars[TOKerror] = "error"; 802 Token.tochars[TOKerror] = "error";
800 Token.tochars[TOK.TOKdotexp] = "dotexp"; 803 Token.tochars[TOK.TOKdotexp] = "dotexp";
801 Token.tochars[TOK.TOKdotti] = "dotti"; 804 Token.tochars[TOK.TOKdotti] = "dotti";
802 Token.tochars[TOK.TOKdotvar] = "dotvar"; 805 Token.tochars[TOK.TOKdotvar] = "dotvar";
816 Token.tochars[TOK.TOKon_scope_failure] = "scope(failure)"; 819 Token.tochars[TOK.TOKon_scope_failure] = "scope(failure)";
817 } 820 }
818 821
819 static Identifier idPool(string s) 822 static Identifier idPool(string s)
820 { 823 {
821 StringValue* sv = stringtable.update(s); 824 Object* sv = stringtable.update(s);
822 Identifier id = cast(Identifier) sv.ptrvalue; 825 Identifier id = cast(Identifier) *sv;
823 if (id is null) 826 if (id is null)
824 { 827 {
825 id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); 828 id = new Identifier(s, TOK.TOKidentifier);
826 sv.ptrvalue = cast(void*)id; 829 *sv = id;
827 } 830 }
828 831
829 return id; 832 return id;
830 } 833 }
831 834
1066 p = s; 1069 p = s;
1067 } 1070 }
1068 break; 1071 break;
1069 } 1072 }
1070 1073
1071 StringValue *sv = stringtable.update((cast(immutable(char)*)t.ptr)[0.. p - t.ptr]); 1074 auto s = cast(string)(t.ptr[0.. p - t.ptr]);
1072 Identifier id = cast(Identifier) sv.ptrvalue; 1075 Object* sv = stringtable.update(s);
1073 1076 Identifier id = cast(Identifier) *sv;
1077
1074 if (id is null) 1078 if (id is null)
1075 { id = new Identifier(sv.lstring.string_, TOK.TOKidentifier); 1079 {
1076 sv.ptrvalue = cast(void*)id; 1080 id = new Identifier(s, TOK.TOKidentifier);
1081 *sv = id;
1077 } 1082 }
1078 t.ident = id; 1083 t.ident = id;
1079 t.value = cast(TOK) id.value; 1084 t.value = cast(TOK) id.value;
1080 anyToken = 1; 1085 anyToken = 1;
1081 if (*t.ptr == '_') // if special identifier token 1086 if (*t.ptr == '_') // if special identifier token
1338 { // if /++ but not /++/ 1343 { // if /++ but not /++/
1339 getDocComment(t, lastLine == linnum); 1344 getDocComment(t, lastLine == linnum);
1340 } 1345 }
1341 continue; 1346 continue;
1342 } 1347 }
1343 1348
1344 default: 1349 default:
1345 break; /// 1350 break; ///
1346 } 1351 }
1347 t.value = TOK.TOKdiv; 1352 t.value = TOK.TOKdiv;
1348 return; 1353 return;
1553 t.value = TOK.TOKcatass; // ~= 1558 t.value = TOK.TOKcatass; // ~=
1554 } 1559 }
1555 else 1560 else
1556 t.value = TOK.TOKtilde; // ~ 1561 t.value = TOK.TOKtilde; // ~
1557 return; 1562 return;
1558 1563
1559 version(DMDV2) { 1564 version(DMDV2) {
1560 case '^': 1565 case '^':
1561 p++; 1566 p++;
1562 if (*p == '^') 1567 if (*p == '^')
1563 { p++; 1568 { p++;
1634 t.value = TOK.TOKmulass; 1639 t.value = TOK.TOKmulass;
1635 } else { 1640 } else {
1636 t.value = TOK.TOKmul; 1641 t.value = TOK.TOKmul;
1637 } 1642 }
1638 return; 1643 return;
1639 1644
1640 case '%': 1645 case '%':
1641 p++; 1646 p++;
1642 if (*p == '=') { 1647 if (*p == '=') {
1643 p++; 1648 p++;
1644 t.value = TOK.TOKmodass; 1649 t.value = TOK.TOKmodass;
1793 ndigits = 2; 1798 ndigits = 2;
1794 Lhex: 1799 Lhex:
1795 p++; 1800 p++;
1796 c = *p; 1801 c = *p;
1797 if (ishex(cast(ubyte)c)) 1802 if (ishex(cast(ubyte)c))
1798 { 1803 {
1799 uint v; 1804 uint v;
1800 1805
1801 n = 0; 1806 n = 0;
1802 v = 0; 1807 v = 0;
1803 while (1) 1808 while (1)
1833 switch (*p) 1838 switch (*p)
1834 { 1839 {
1835 case ';': 1840 case ';':
1836 c = HtmlNamedEntity(idstart, p - idstart); 1841 c = HtmlNamedEntity(idstart, p - idstart);
1837 if (c == ~0) 1842 if (c == ~0)
1838 { 1843 {
1839 error("unnamed character entity &%s;", idstart[0..(p - idstart)]); 1844 error("unnamed character entity &%s;", idstart[0..(p - idstart)]);
1840 c = ' '; 1845 c = ' ';
1841 } 1846 }
1842 p++; 1847 p++;
1843 break; 1848 break;
1858 c = '\\'; 1863 c = '\\';
1859 break; 1864 break;
1860 1865
1861 default: 1866 default:
1862 if (isoctal(cast(ubyte)c)) 1867 if (isoctal(cast(ubyte)c))
1863 { 1868 {
1864 uint v; 1869 uint v;
1865 1870
1866 n = 0; 1871 n = 0;
1867 v = 0; 1872 v = 0;
1868 do 1873 do
1938 } 1943 }
1939 break; 1944 break;
1940 } 1945 }
1941 stringbuffer.writeByte(c); 1946 stringbuffer.writeByte(c);
1942 } 1947 }
1943 1948
1944 assert(false); 1949 assert(false);
1945 } 1950 }
1946 1951
1947 /************************************** 1952 /**************************************
1948 * Lex hex strings: 1953 * Lex hex strings:
1984 t.postfix = 0; 1989 t.postfix = 0;
1985 return TOKstring; 1990 return TOKstring;
1986 1991
1987 case '"': 1992 case '"':
1988 if (n & 1) 1993 if (n & 1)
1989 { 1994 {
1990 error("odd number (%d) of hex characters in hex string", n); 1995 error("odd number (%d) of hex characters in hex string", n);
1991 stringbuffer.writeByte(v); 1996 stringbuffer.writeByte(v);
1992 } 1997 }
1993 t.len = stringbuffer.offset; 1998 t.len = stringbuffer.offset;
1994 stringbuffer.writeByte(0); 1999 stringbuffer.writeByte(0);
2094 goto Lnextline; 2099 goto Lnextline;
2095 } 2100 }
2096 break; 2101 break;
2097 } 2102 }
2098 if (delimleft == 0) 2103 if (delimleft == 0)
2099 { 2104 {
2100 delimleft = c; 2105 delimleft = c;
2101 nest = 1; 2106 nest = 1;
2102 nestcount = 1; 2107 nestcount = 1;
2103 if (c == '(') 2108 if (c == '(')
2104 delimright = ')'; 2109 delimright = ')';
2107 else if (c == '[') 2112 else if (c == '[')
2108 delimright = ']'; 2113 delimright = ']';
2109 else if (c == '<') 2114 else if (c == '<')
2110 delimright = '>'; 2115 delimright = '>';
2111 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) 2116 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
2112 { 2117 {
2113 // Start of identifier; must be a heredoc 2118 // Start of identifier; must be a heredoc
2114 Token t2; 2119 Token t2;
2115 p--; 2120 p--;
2116 scan(&t2); // read in heredoc identifier 2121 scan(&t2); // read in heredoc identifier
2117 if (t2.value != TOKidentifier) 2122 if (t2.value != TOKidentifier)
2118 { 2123 {
2119 error("identifier expected for heredoc, not %s", t2.toChars()); 2124 error("identifier expected for heredoc, not %s", t2.toChars());
2120 delimright = c; 2125 delimright = c;
2121 } 2126 }
2122 else 2127 else
2123 { 2128 {
2124 hereid = t2.ident; 2129 hereid = t2.ident;
2125 //printf("hereid = '%s'\n", hereid.toChars()); 2130 //printf("hereid = '%s'\n", hereid.toChars());
2126 blankrol = 1; 2131 blankrol = 1;
2127 } 2132 }
2128 nest = 0; 2133 nest = 0;
2129 } 2134 }
2130 else 2135 else
2131 { 2136 {
2132 delimright = c; 2137 delimright = c;
2133 nest = 0; 2138 nest = 0;
2134 if (isspace(c)) 2139 if (isspace(c))
2135 error("delimiter cannot be whitespace"); 2140 error("delimiter cannot be whitespace");
2136 } 2141 }
2137 } 2142 }
2138 else 2143 else
2139 { 2144 {
2140 if (blankrol) 2145 if (blankrol)
2141 { 2146 {
2142 error("heredoc rest of line should be blank"); 2147 error("heredoc rest of line should be blank");
2143 blankrol = 0; 2148 blankrol = 0;
2144 continue; 2149 continue;
2145 } 2150 }
2146 if (nest == 1) 2151 if (nest == 1)
2154 } 2159 }
2155 } 2160 }
2156 else if (c == delimright) 2161 else if (c == delimright)
2157 goto Ldone; 2162 goto Ldone;
2158 if (startline && isalpha(c) && hereid) 2163 if (startline && isalpha(c) && hereid)
2159 { 2164 {
2160 Token t2; 2165 Token t2;
2161 ubyte* psave = p; 2166 ubyte* psave = p;
2162 p--; 2167 p--;
2163 scan(&t2); // read in possible heredoc identifier 2168 scan(&t2); // read in possible heredoc identifier
2164 //printf("endid = '%s'\n", t2.ident.toChars()); 2169 //printf("endid = '%s'\n", t2.ident.toChars());
2165 if (t2.value == TOKidentifier && t2.ident.equals(hereid)) 2170 if (t2.value == TOKidentifier && t2.ident.equals(hereid))
2166 { 2171 {
2167 /* should check that rest of line is blank 2172 /* should check that rest of line is blank
2168 */ 2173 */
2169 goto Ldone; 2174 goto Ldone;
2170 } 2175 }
2171 p = psave; 2176 p = psave;
2209 uint nest = 1; 2214 uint nest = 1;
2210 Loc start = loc; 2215 Loc start = loc;
2211 ubyte* pstart = ++p; 2216 ubyte* pstart = ++p;
2212 2217
2213 while (true) 2218 while (true)
2214 { 2219 {
2215 Token tok; 2220 Token tok;
2216 2221
2217 scan(&tok); 2222 scan(&tok);
2218 switch (tok.value) 2223 switch (tok.value)
2219 { 2224 {
2325 } 2330 }
2326 break; 2331 break;
2327 } 2332 }
2328 stringbuffer.writeByte(c); 2333 stringbuffer.writeByte(c);
2329 } 2334 }
2330 2335
2331 assert(false); 2336 assert(false);
2332 } 2337 }
2333 2338
2334 TOK charConstant(Token* t, int wide) 2339 TOK charConstant(Token* t, int wide)
2335 { 2340 {
2389 t.uns64value = c; 2394 t.uns64value = c;
2390 break; 2395 break;
2391 } 2396 }
2392 2397
2393 if (*p != '\'') 2398 if (*p != '\'')
2394 { 2399 {
2395 error("unterminated character constant"); 2400 error("unterminated character constant");
2396 return tk; 2401 return tk;
2397 } 2402 }
2398 p++; 2403 p++;
2399 return tk; 2404 return tk;
2421 2426
2422 uint wchar_(uint u) 2427 uint wchar_(uint u)
2423 { 2428 {
2424 assert(false); 2429 assert(false);
2425 } 2430 }
2426 2431
2427 /************************************** 2432 /**************************************
2428 * Read in a number. 2433 * Read in a number.
2429 * If it's an integer, store it in tok.TKutok.Vlong. 2434 * If it's an integer, store it in tok.TKutok.Vlong.
2430 * integers can be decimal, octal or hex 2435 * integers can be decimal, octal or hex
2431 * Handle the suffixes U, UL, LU, L, etc. 2436 * Handle the suffixes U, UL, LU, L, etc.
2448 FLAGS_undefined = 0, 2453 FLAGS_undefined = 0,
2449 FLAGS_decimal = 1, // decimal 2454 FLAGS_decimal = 1, // decimal
2450 FLAGS_unsigned = 2, // u or U suffix 2455 FLAGS_unsigned = 2, // u or U suffix
2451 FLAGS_long = 4, // l or L suffix 2456 FLAGS_long = 4, // l or L suffix
2452 }; 2457 };
2453 2458
2454 FLAGS flags = FLAGS.FLAGS_decimal; 2459 FLAGS flags = FLAGS.FLAGS_decimal;
2455 2460
2456 int i; 2461 int i;
2457 int base; 2462 int base;
2458 uint c; 2463 uint c;
2887 break; 2892 break;
2888 2893
2889 case 9: 2894 case 9:
2890 dblstate = 1; 2895 dblstate = 1;
2891 if (c == 'X' || c == 'x') 2896 if (c == 'X' || c == 'x')
2892 { 2897 {
2893 hex++; 2898 hex++;
2894 break; 2899 break;
2895 } 2900 }
2896 case 1: // digits to left of . 2901 case 1: // digits to left of .
2897 case 3: // digits to right of . 2902 case 3: // digits to right of .
2905 } 2910 }
2906 break; 2911 break;
2907 2912
2908 case 2: // no more digits to left of . 2913 case 2: // no more digits to left of .
2909 if (c == '.') 2914 if (c == '.')
2910 { 2915 {
2911 dblstate++; 2916 dblstate++;
2912 break; 2917 break;
2913 } 2918 }
2914 case 4: // no more digits to right of . 2919 case 4: // no more digits to right of .
2915 if ((c == 'E' || c == 'e') || 2920 if ((c == 'E' || c == 'e') ||
2916 hex && (c == 'P' || c == 'p')) 2921 hex && (c == 'P' || c == 'p'))
2917 { 2922 {
2918 dblstate = 5; 2923 dblstate = 5;
2919 hex = 0; // exponent is always decimal 2924 hex = 0; // exponent is always decimal
2920 break; 2925 break;
2921 } 2926 }
2922 if (hex) 2927 if (hex)
2994 result = TOKimaginary80v; 2999 result = TOKimaginary80v;
2995 break; 3000 break;
2996 default: 3001 default:
2997 } 3002 }
2998 } 3003 }
2999 3004
3000 version (Windows) { ///&& __DMC__ 3005 version (Windows) { ///&& __DMC__
3001 __locale_decpoint = save; 3006 __locale_decpoint = save;
3002 } 3007 }
3003 if (errno == ERANGE) 3008 if (errno == ERANGE)
3004 error("number is not representable"); 3009 error("number is not representable");
3025 fatal(); 3030 fatal();
3026 } 3031 }
3027 3032
3028 global.errors++; 3033 global.errors++;
3029 } 3034 }
3030 3035
3031 /********************************************* 3036 /*********************************************
3032 * Do pragma. 3037 * Do pragma.
3033 * Currently, the only pragma supported is: 3038 * Currently, the only pragma supported is:
3034 * #line linnum [filespec] 3039 * #line linnum [filespec]
3035 */ 3040 */
3090 if (filespec) 3095 if (filespec)
3091 goto Lerr; 3096 goto Lerr;
3092 stringbuffer.reset(); 3097 stringbuffer.reset();
3093 p++; 3098 p++;
3094 while (1) 3099 while (1)
3095 { 3100 {
3096 uint c; 3101 uint c;
3097 3102
3098 c = *p; 3103 c = *p;
3099 switch (c) 3104 switch (c)
3100 { 3105 {
3110 p++; 3115 p++;
3111 break; 3116 break;
3112 3117
3113 default: 3118 default:
3114 if (c & 0x80) 3119 if (c & 0x80)
3115 { 3120 {
3116 uint u = decodeUTF(); 3121 uint u = decodeUTF();
3117 if (u == PS || u == LS) 3122 if (u == PS || u == LS)
3118 goto Lerr; 3123 goto Lerr;
3119 } 3124 }
3120 stringbuffer.writeByte(c); 3125 stringbuffer.writeByte(c);
3125 } 3130 }
3126 continue; 3131 continue;
3127 3132
3128 default: 3133 default:
3129 if (*p & 0x80) 3134 if (*p & 0x80)
3130 { 3135 {
3131 uint u = decodeUTF(); 3136 uint u = decodeUTF();
3132 if (u == PS || u == LS) 3137 if (u == PS || u == LS)
3133 goto Lnewline; 3138 goto Lnewline;
3134 } 3139 }
3135 goto Lerr; 3140 goto Lerr;
3304 static string combineComments(string c1, string c2) 3309 static string combineComments(string c1, string c2)
3305 { 3310 {
3306 //printf("Lexer::combineComments('%s', '%s')\n", c1, c2); 3311 //printf("Lexer::combineComments('%s', '%s')\n", c1, c2);
3307 3312
3308 string c = c2; 3313 string c = c2;
3309 3314
3310 if (c1) 3315 if (c1)
3311 { 3316 {
3312 c = c1; 3317 c = c1;
3313 if (c2) 3318 if (c2)
3314 { 3319 {
3355 static string combineComments(const(char)[] c1, const(char)[] c2) 3360 static string combineComments(const(char)[] c1, const(char)[] c2)
3356 { 3361 {
3357 //writef("Lexer.combineComments('%s', '%s')\n", c1, c2); 3362 //writef("Lexer.combineComments('%s', '%s')\n", c1, c2);
3358 3363
3359 char[] c = cast(char[]) c2; 3364 char[] c = cast(char[]) c2;
3360 3365
3361 if (c1 !is null) 3366 if (c1 !is null)
3362 { 3367 {
3363 c = cast(char[]) c1; 3368 c = cast(char[]) c1;
3364 if (c2 !is null) 3369 if (c2 !is null)
3365 { 3370 {
3368 c[0..len1] = c1[]; 3373 c[0..len1] = c1[];
3369 c[len1++] = '\n'; 3374 c[len1++] = '\n';
3370 c[len1 .. len1 + c2.length] = c2[]; 3375 c[len1 .. len1 + c2.length] = c2[];
3371 } 3376 }
3372 } 3377 }
3373 3378
3374 return cast(string)c; 3379 return cast(string)c;
3375 } 3380 }
3376 } 3381 }