Mercurial > projects > dil
comparison trunk/src/dil/Lexer.d @ 499:52447db67938
Implemented global table of identifiers.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sun, 09 Dec 2007 22:37:47 +0100 |
parents | 49c201b5c465 |
children | 4e14cd1b24da |
comparison
equal
deleted
inserted
replaced
498:49c201b5c465 | 499:52447db67938 |
---|---|
21 public import dil.LexerFuncs; | 21 public import dil.LexerFuncs; |
22 | 22 |
23 /// U+FFFD = �. Used to replace invalid Unicode characters. | 23 /// U+FFFD = �. Used to replace invalid Unicode characters. |
24 const dchar REPLACEMENT_CHAR = '\uFFFD'; | 24 const dchar REPLACEMENT_CHAR = '\uFFFD'; |
25 | 25 |
26 /// Global table of identifiers. Access must be synchronized. | |
27 private Identifier*[string] idTable; | |
28 | |
29 static this() | |
30 { | |
31 foreach(ref k; keywords) | |
32 idTable[k.str] = &k; | |
33 } | |
34 | |
35 Identifier* idTableLookup(string idString) | |
36 out(id) | |
37 { assert(id !is null); } | |
38 body | |
39 { | |
40 synchronized | |
41 { | |
42 Identifier** id = idString in idTable; | |
43 if (id) | |
44 return *id; | |
45 auto newID = Identifier(TOK.Identifier, idString); | |
46 idTable[idString] = newID; | |
47 return newID; | |
48 } | |
49 } | |
50 | |
51 /++ | |
52 The Lexer analyzes the characters of a source text and | |
53 produces a doubly-linked list of tokens. | |
54 +/ | |
26 class Lexer | 55 class Lexer |
27 { | 56 { |
28 Token* head; /// The head of the doubly linked token list. | 57 Token* head; /// The head of the doubly linked token list. |
29 Token* tail; /// The tail of the linked list. Set in scan(). | 58 Token* tail; /// The tail of the linked list. Set in scan(). |
30 Token* token; /// Points to the current token in the token list. | 59 Token* token; /// Points to the current token in the token list. |
41 uint lineNum = 1; /// Current, actual source text line number. | 70 uint lineNum = 1; /// Current, actual source text line number. |
42 uint lineNum_hline; /// Line number set by #line. | 71 uint lineNum_hline; /// Line number set by #line. |
43 uint inTokenString; /// > 0 if inside q{ } | 72 uint inTokenString; /// > 0 if inside q{ } |
44 char[] errorPath; /// The path displayed in error messages. | 73 char[] errorPath; /// The path displayed in error messages. |
45 | 74 |
46 Identifier[string] idtable; | |
47 | |
48 /++ | 75 /++ |
49 Construct a Lexer object. | 76 Construct a Lexer object. |
50 Params: | 77 Params: |
51 text = the UTF-8 source code. | 78 text = the UTF-8 source code. |
52 filePath = the path to the source code; used for error messages. | 79 filePath = the path to the source code; used for error messages. |
63 } | 90 } |
64 | 91 |
65 this.p = this.text.ptr; | 92 this.p = this.text.ptr; |
66 this.end = this.p + this.text.length; | 93 this.end = this.p + this.text.length; |
67 this.lineBegin = this.p; | 94 this.lineBegin = this.p; |
68 loadKeywords(this.idtable); | |
69 | 95 |
70 this.head = new Token; | 96 this.head = new Token; |
71 this.head.type = TOK.HEAD; | 97 this.head.type = TOK.HEAD; |
72 this.head.start = this.head.end = this.p; | 98 this.head.start = this.head.end = this.p; |
73 this.token = this.head; | 99 this.token = this.head; |
285 { c = *++p; } | 311 { c = *++p; } |
286 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) | 312 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) |
287 | 313 |
288 t.end = p; | 314 t.end = p; |
289 | 315 |
290 string str = t.srcText; | 316 auto id = idTableLookup(t.srcText); |
291 Identifier* id = str in idtable; | |
292 | |
293 if (!id) | |
294 { | |
295 idtable[str] = Identifier(TOK.Identifier, str); | |
296 id = str in idtable; | |
297 } | |
298 assert(id); | |
299 t.type = id.type; | 317 t.type = id.type; |
318 | |
300 if (t.type == TOK.Identifier || t.isKeyword) | 319 if (t.type == TOK.Identifier || t.isKeyword) |
301 return; | 320 return; |
302 else if (t.isSpecialToken) | 321 else if (t.isSpecialToken) |
303 finalizeSpecialToken(t); | 322 finalizeSpecialToken(t); |
304 else if (t.type == TOK.EOF) | 323 else if (t.type == TOK.EOF) |
1037 { c = *++p; } | 1056 { c = *++p; } |
1038 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) | 1057 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) |
1039 | 1058 |
1040 t.end = p; | 1059 t.end = p; |
1041 | 1060 |
1042 string str = t.srcText; | 1061 auto id = idTableLookup(t.srcText); |
1043 Identifier* id = str in idtable; | |
1044 | |
1045 if (!id) | |
1046 { | |
1047 idtable[str] = Identifier(TOK.Identifier, str); | |
1048 id = str in idtable; | |
1049 } | |
1050 assert(id); | |
1051 t.type = id.type; | 1062 t.type = id.type; |
1063 | |
1052 if (t.type == TOK.Identifier || t.isKeyword) | 1064 if (t.type == TOK.Identifier || t.isKeyword) |
1053 return; | 1065 return; |
1054 else if (t.isSpecialToken) | 1066 else if (t.isSpecialToken) |
1055 finalizeSpecialToken(t); | 1067 finalizeSpecialToken(t); |
1056 else if (t.type == TOK.EOF) | 1068 else if (t.type == TOK.EOF) |
2444 if (ident.length == 0) | 2456 if (ident.length == 0) |
2445 return false; | 2457 return false; |
2446 | 2458 |
2447 static Identifier[string] reserved_ids_table; | 2459 static Identifier[string] reserved_ids_table; |
2448 if (reserved_ids_table is null) | 2460 if (reserved_ids_table is null) |
2449 loadKeywords(reserved_ids_table); | 2461 Lexer.loadKeywords(reserved_ids_table); |
2450 | 2462 |
2451 size_t idx = 1; // Index to the 2nd character in ident. | 2463 size_t idx = 1; // Index to the 2nd character in ident. |
2452 dchar isFirstCharUniAlpha() | 2464 dchar isFirstCharUniAlpha() |
2453 { | 2465 { |
2454 idx = 0; | 2466 idx = 0; |