Mercurial > projects > dil
comparison trunk/src/dil/Lexer.d @ 502:4e14cd1b24da
Refactored code and added modules related to tabulated Identifiers.
Rearranged members of struct Identifier and added new member ID identID.
Moved idTableLookup to module dil.IdTable.
Renamed module TokenIDs to TokensEnum.
Added member Identifier* ident to struct Token.
Changed string switchtes in Parser to integer switches using enum ID.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Tue, 11 Dec 2007 14:19:30 +0100 |
parents | 52447db67938 |
children | 996041463028 |
comparison
equal
deleted
inserted
replaced
501:949a53332c66 | 502:4e14cd1b24da |
---|---|
8 import dil.Keywords; | 8 import dil.Keywords; |
9 import dil.Identifier; | 9 import dil.Identifier; |
10 import dil.Messages; | 10 import dil.Messages; |
11 import dil.HtmlEntities; | 11 import dil.HtmlEntities; |
12 import dil.CompilerInfo; | 12 import dil.CompilerInfo; |
13 import dil.IdTable; | |
13 import tango.stdc.stdlib : strtof, strtod, strtold; | 14 import tango.stdc.stdlib : strtof, strtod, strtold; |
14 import tango.stdc.errno : errno, ERANGE; | 15 import tango.stdc.errno : errno, ERANGE; |
15 import tango.stdc.time : time_t, time, ctime; | 16 import tango.stdc.time : time_t, time, ctime; |
16 import tango.stdc.string : strlen; | 17 import tango.stdc.string : strlen; |
17 import std.utf; | 18 import std.utf; |
20 | 21 |
21 public import dil.LexerFuncs; | 22 public import dil.LexerFuncs; |
22 | 23 |
23 /// U+FFFD = �. Used to replace invalid Unicode characters. | 24 /// U+FFFD = �. Used to replace invalid Unicode characters. |
24 const dchar REPLACEMENT_CHAR = '\uFFFD'; | 25 const dchar REPLACEMENT_CHAR = '\uFFFD'; |
25 | |
26 /// Global table of identifiers. Access must be synchronized. | |
27 private Identifier*[string] idTable; | |
28 | |
29 static this() | |
30 { | |
31 foreach(ref k; keywords) | |
32 idTable[k.str] = &k; | |
33 } | |
34 | |
35 Identifier* idTableLookup(string idString) | |
36 out(id) | |
37 { assert(id !is null); } | |
38 body | |
39 { | |
40 synchronized | |
41 { | |
42 Identifier** id = idString in idTable; | |
43 if (id) | |
44 return *id; | |
45 auto newID = Identifier(TOK.Identifier, idString); | |
46 idTable[idString] = newID; | |
47 return newID; | |
48 } | |
49 } | |
50 | 26 |
51 /++ | 27 /++ |
52 The Lexer analyzes the characters of a source text and | 28 The Lexer analyzes the characters of a source text and |
53 produces a doubly-linked list of tokens. | 29 produces a doubly-linked list of tokens. |
54 +/ | 30 +/ |
311 { c = *++p; } | 287 { c = *++p; } |
312 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) | 288 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) |
313 | 289 |
314 t.end = p; | 290 t.end = p; |
315 | 291 |
316 auto id = idTableLookup(t.srcText); | 292 auto id = IdTable.lookup(t.srcText); |
317 t.type = id.type; | 293 t.type = id.type; |
294 t.ident = id; | |
318 | 295 |
319 if (t.type == TOK.Identifier || t.isKeyword) | 296 if (t.type == TOK.Identifier || t.isKeyword) |
320 return; | 297 return; |
321 else if (t.isSpecialToken) | 298 else if (t.isSpecialToken) |
322 finalizeSpecialToken(t); | 299 finalizeSpecialToken(t); |
324 { | 301 { |
325 tail = &t; | 302 tail = &t; |
326 assert(t.srcText == "__EOF__"); | 303 assert(t.srcText == "__EOF__"); |
327 } | 304 } |
328 else | 305 else |
329 assert(0, "unexpected token: " ~ t.srcText); | 306 assert(0, "unexpected token type: " ~ Token.toString(t.type)); |
330 return; | 307 return; |
331 } | 308 } |
332 | 309 |
333 if (isdigit(c)) | 310 if (isdigit(c)) |
334 return scanNumber(t); | 311 return scanNumber(t); |
1056 { c = *++p; } | 1033 { c = *++p; } |
1057 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) | 1034 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) |
1058 | 1035 |
1059 t.end = p; | 1036 t.end = p; |
1060 | 1037 |
1061 auto id = idTableLookup(t.srcText); | 1038 auto id = IdTable.lookup(t.srcText); |
1062 t.type = id.type; | 1039 t.type = id.type; |
1040 t.ident = id; | |
1063 | 1041 |
1064 if (t.type == TOK.Identifier || t.isKeyword) | 1042 if (t.type == TOK.Identifier || t.isKeyword) |
1065 return; | 1043 return; |
1066 else if (t.isSpecialToken) | 1044 else if (t.isSpecialToken) |
1067 finalizeSpecialToken(t); | 1045 finalizeSpecialToken(t); |
1069 { | 1047 { |
1070 tail = &t; | 1048 tail = &t; |
1071 assert(t.srcText == "__EOF__"); | 1049 assert(t.srcText == "__EOF__"); |
1072 } | 1050 } |
1073 else | 1051 else |
1074 assert(0, "unexpected token: " ~ t.srcText); | 1052 assert(0, "unexpected token type: " ~ Token.toString(t.type)); |
1075 return; | 1053 return; |
1076 } | 1054 } |
1077 | 1055 |
1078 if (isdigit(c)) | 1056 if (isdigit(c)) |
1079 return scanNumber(t); | 1057 return scanNumber(t); |