comparison trunk/src/dil/Lexer.d @ 502:4e14cd1b24da

Refactored code and added modules related to tabulated Identifiers. Rearranged members of struct Identifier and added new member ID identID. Moved idTableLookup to module dil.IdTable. Renamed module TokenIDs to TokensEnum. Added member Identifier* ident to struct Token. Changed string switchtes in Parser to integer switches using enum ID.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Tue, 11 Dec 2007 14:19:30 +0100
parents 52447db67938
children 996041463028
comparison
equal deleted inserted replaced
501:949a53332c66 502:4e14cd1b24da
8 import dil.Keywords; 8 import dil.Keywords;
9 import dil.Identifier; 9 import dil.Identifier;
10 import dil.Messages; 10 import dil.Messages;
11 import dil.HtmlEntities; 11 import dil.HtmlEntities;
12 import dil.CompilerInfo; 12 import dil.CompilerInfo;
13 import dil.IdTable;
13 import tango.stdc.stdlib : strtof, strtod, strtold; 14 import tango.stdc.stdlib : strtof, strtod, strtold;
14 import tango.stdc.errno : errno, ERANGE; 15 import tango.stdc.errno : errno, ERANGE;
15 import tango.stdc.time : time_t, time, ctime; 16 import tango.stdc.time : time_t, time, ctime;
16 import tango.stdc.string : strlen; 17 import tango.stdc.string : strlen;
17 import std.utf; 18 import std.utf;
20 21
21 public import dil.LexerFuncs; 22 public import dil.LexerFuncs;
22 23
23 /// U+FFFD = �. Used to replace invalid Unicode characters. 24 /// U+FFFD = �. Used to replace invalid Unicode characters.
24 const dchar REPLACEMENT_CHAR = '\uFFFD'; 25 const dchar REPLACEMENT_CHAR = '\uFFFD';
25
26 /// Global table of identifiers. Access must be synchronized.
27 private Identifier*[string] idTable;
28
29 static this()
30 {
31 foreach(ref k; keywords)
32 idTable[k.str] = &k;
33 }
34
35 Identifier* idTableLookup(string idString)
36 out(id)
37 { assert(id !is null); }
38 body
39 {
40 synchronized
41 {
42 Identifier** id = idString in idTable;
43 if (id)
44 return *id;
45 auto newID = Identifier(TOK.Identifier, idString);
46 idTable[idString] = newID;
47 return newID;
48 }
49 }
50 26
51 /++ 27 /++
52 The Lexer analyzes the characters of a source text and 28 The Lexer analyzes the characters of a source text and
53 produces a doubly-linked list of tokens. 29 produces a doubly-linked list of tokens.
54 +/ 30 +/
311 { c = *++p; } 287 { c = *++p; }
312 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) 288 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8()))
313 289
314 t.end = p; 290 t.end = p;
315 291
316 auto id = idTableLookup(t.srcText); 292 auto id = IdTable.lookup(t.srcText);
317 t.type = id.type; 293 t.type = id.type;
294 t.ident = id;
318 295
319 if (t.type == TOK.Identifier || t.isKeyword) 296 if (t.type == TOK.Identifier || t.isKeyword)
320 return; 297 return;
321 else if (t.isSpecialToken) 298 else if (t.isSpecialToken)
322 finalizeSpecialToken(t); 299 finalizeSpecialToken(t);
324 { 301 {
325 tail = &t; 302 tail = &t;
326 assert(t.srcText == "__EOF__"); 303 assert(t.srcText == "__EOF__");
327 } 304 }
328 else 305 else
329 assert(0, "unexpected token: " ~ t.srcText); 306 assert(0, "unexpected token type: " ~ Token.toString(t.type));
330 return; 307 return;
331 } 308 }
332 309
333 if (isdigit(c)) 310 if (isdigit(c))
334 return scanNumber(t); 311 return scanNumber(t);
1056 { c = *++p; } 1033 { c = *++p; }
1057 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) 1034 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8()))
1058 1035
1059 t.end = p; 1036 t.end = p;
1060 1037
1061 auto id = idTableLookup(t.srcText); 1038 auto id = IdTable.lookup(t.srcText);
1062 t.type = id.type; 1039 t.type = id.type;
1040 t.ident = id;
1063 1041
1064 if (t.type == TOK.Identifier || t.isKeyword) 1042 if (t.type == TOK.Identifier || t.isKeyword)
1065 return; 1043 return;
1066 else if (t.isSpecialToken) 1044 else if (t.isSpecialToken)
1067 finalizeSpecialToken(t); 1045 finalizeSpecialToken(t);
1069 { 1047 {
1070 tail = &t; 1048 tail = &t;
1071 assert(t.srcText == "__EOF__"); 1049 assert(t.srcText == "__EOF__");
1072 } 1050 }
1073 else 1051 else
1074 assert(0, "unexpected token: " ~ t.srcText); 1052 assert(0, "unexpected token type: " ~ Token.toString(t.type));
1075 return; 1053 return;
1076 } 1054 }
1077 1055
1078 if (isdigit(c)) 1056 if (isdigit(c))
1079 return scanNumber(t); 1057 return scanNumber(t);