comparison trunk/src/dil/Lexer.d @ 499:52447db67938

Implemented global table of identifiers.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Sun, 09 Dec 2007 22:37:47 +0100
parents 49c201b5c465
children 4e14cd1b24da
comparison
equal deleted inserted replaced
498:49c201b5c465 499:52447db67938
21 public import dil.LexerFuncs; 21 public import dil.LexerFuncs;
22 22
23 /// U+FFFD = �. Used to replace invalid Unicode characters. 23 /// U+FFFD = �. Used to replace invalid Unicode characters.
24 const dchar REPLACEMENT_CHAR = '\uFFFD'; 24 const dchar REPLACEMENT_CHAR = '\uFFFD';
25 25
26 /// Global table of identifiers. Access must be synchronized.
27 private Identifier*[string] idTable;
28
29 static this()
30 {
31 foreach(ref k; keywords)
32 idTable[k.str] = &k;
33 }
34
35 Identifier* idTableLookup(string idString)
36 out(id)
37 { assert(id !is null); }
38 body
39 {
40 synchronized
41 {
42 Identifier** id = idString in idTable;
43 if (id)
44 return *id;
45 auto newID = Identifier(TOK.Identifier, idString);
46 idTable[idString] = newID;
47 return newID;
48 }
49 }
50
51 /++
52 The Lexer analyzes the characters of a source text and
53 produces a doubly-linked list of tokens.
54 +/
26 class Lexer 55 class Lexer
27 { 56 {
28 Token* head; /// The head of the doubly linked token list. 57 Token* head; /// The head of the doubly linked token list.
29 Token* tail; /// The tail of the linked list. Set in scan(). 58 Token* tail; /// The tail of the linked list. Set in scan().
30 Token* token; /// Points to the current token in the token list. 59 Token* token; /// Points to the current token in the token list.
41 uint lineNum = 1; /// Current, actual source text line number. 70 uint lineNum = 1; /// Current, actual source text line number.
42 uint lineNum_hline; /// Line number set by #line. 71 uint lineNum_hline; /// Line number set by #line.
43 uint inTokenString; /// > 0 if inside q{ } 72 uint inTokenString; /// > 0 if inside q{ }
44 char[] errorPath; /// The path displayed in error messages. 73 char[] errorPath; /// The path displayed in error messages.
45 74
46 Identifier[string] idtable;
47
48 /++ 75 /++
49 Construct a Lexer object. 76 Construct a Lexer object.
50 Params: 77 Params:
51 text = the UTF-8 source code. 78 text = the UTF-8 source code.
52 filePath = the path to the source code; used for error messages. 79 filePath = the path to the source code; used for error messages.
63 } 90 }
64 91
65 this.p = this.text.ptr; 92 this.p = this.text.ptr;
66 this.end = this.p + this.text.length; 93 this.end = this.p + this.text.length;
67 this.lineBegin = this.p; 94 this.lineBegin = this.p;
68 loadKeywords(this.idtable);
69 95
70 this.head = new Token; 96 this.head = new Token;
71 this.head.type = TOK.HEAD; 97 this.head.type = TOK.HEAD;
72 this.head.start = this.head.end = this.p; 98 this.head.start = this.head.end = this.p;
73 this.token = this.head; 99 this.token = this.head;
285 { c = *++p; } 311 { c = *++p; }
286 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) 312 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8()))
287 313
288 t.end = p; 314 t.end = p;
289 315
290 string str = t.srcText; 316 auto id = idTableLookup(t.srcText);
291 Identifier* id = str in idtable;
292
293 if (!id)
294 {
295 idtable[str] = Identifier(TOK.Identifier, str);
296 id = str in idtable;
297 }
298 assert(id);
299 t.type = id.type; 317 t.type = id.type;
318
300 if (t.type == TOK.Identifier || t.isKeyword) 319 if (t.type == TOK.Identifier || t.isKeyword)
301 return; 320 return;
302 else if (t.isSpecialToken) 321 else if (t.isSpecialToken)
303 finalizeSpecialToken(t); 322 finalizeSpecialToken(t);
304 else if (t.type == TOK.EOF) 323 else if (t.type == TOK.EOF)
1037 { c = *++p; } 1056 { c = *++p; }
1038 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8())) 1057 while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8()))
1039 1058
1040 t.end = p; 1059 t.end = p;
1041 1060
1042 string str = t.srcText; 1061 auto id = idTableLookup(t.srcText);
1043 Identifier* id = str in idtable;
1044
1045 if (!id)
1046 {
1047 idtable[str] = Identifier(TOK.Identifier, str);
1048 id = str in idtable;
1049 }
1050 assert(id);
1051 t.type = id.type; 1062 t.type = id.type;
1063
1052 if (t.type == TOK.Identifier || t.isKeyword) 1064 if (t.type == TOK.Identifier || t.isKeyword)
1053 return; 1065 return;
1054 else if (t.isSpecialToken) 1066 else if (t.isSpecialToken)
1055 finalizeSpecialToken(t); 1067 finalizeSpecialToken(t);
1056 else if (t.type == TOK.EOF) 1068 else if (t.type == TOK.EOF)
2444 if (ident.length == 0) 2456 if (ident.length == 0)
2445 return false; 2457 return false;
2446 2458
2447 static Identifier[string] reserved_ids_table; 2459 static Identifier[string] reserved_ids_table;
2448 if (reserved_ids_table is null) 2460 if (reserved_ids_table is null)
2449 loadKeywords(reserved_ids_table); 2461 Lexer.loadKeywords(reserved_ids_table);
2450 2462
2451 size_t idx = 1; // Index to the 2nd character in ident. 2463 size_t idx = 1; // Index to the 2nd character in ident.
2452 dchar isFirstCharUniAlpha() 2464 dchar isFirstCharUniAlpha()
2453 { 2465 {
2454 idx = 0; 2466 idx = 0;