Mercurial > projects > dil
comparison trunk/src/dil/Token.d @ 485:ea8c7459f1c4
Changed a lot of things in the Lexer.
Newlines are tokenized now, instead of being treated as whitespace.
Newline tokens store location info as well, which make quite a few functions
unnecessary. Added a static method getLocation() which returns a Location
instance for any given token. This will also be very useful for finding
the location of AST nodes (through Node.begin,) which is needed for reporting
parser and semantic errors and emitting documentation.
Removed rescanNewlines(), LocState, getState(), restoreState(),
evaluateHashLine() and updateErrorLoc().
Added isUnicodeNewlineChar(), isUnicodeNewline(), isNewline(), isNewlineEnd(),
isEndOfLine(), scanNewline(), getLocation() and error_().
Replaced some clunky expressions with isascii(), isNewlineEnd(), isEndOfLine(),
isUnicodeNewline(), isUnicodeNewlineChar().
Fix in scanNormalStringLiteral(): scanPostfix() must be before label Lreturn.
Fixed Lexer unittest.
Fix in parseDeclarationDefinitionsBlock(): 'token' should be 'begin'.
Added method isMultiline() to Token and added documentation comments.: File name too long
abort: file /home/aziz/dil/trunk/Changed a lot of things in the Lexer.
Newlines are tokenized now, instead of being treated as whitespace.
Newline tokens store location info as well, which make quite a few functions
unnecessary. Added a static method getLocation() which returns a Location
instance for any given token. This will also be very useful for finding
the location of AST nodes (through Node.begin,) which is needed for reporting
parser and semantic errors and emitting documentation.
Removed rescanNewlines(), LocState, getState(), restoreState(),
evaluateHashLine() and updateErrorLoc().
Added isUnicodeNewlineChar(), isUnicodeNewline(), isNewline(), isNewlineEnd(),
isEndOfLine(), scanNewline(), getLocation() and error_().
Replaced some clunky expressions with isascii(), isNewlineEnd(), isEndOfLine(),
isUnicodeNewline(), isUnicodeNewlineChar().
Fix in scanNormalStringLiteral(): scanPostfix() must be before label Lreturn.
Fixed Lexer unittest.
Fix in parseDeclarationDefinitionsBlock(): 'token' should be 'begin'.
Added method isMultiline() to Token and added documentation comments.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Fri, 30 Nov 2007 20:17:29 +0100 |
parents | 9c69615a4876 |
children | bccca748d745 |
comparison
equal
deleted
inserted
replaced
484:265c0b655f18 | 485:ea8c7459f1c4 |
---|---|
5 module dil.Token; | 5 module dil.Token; |
6 import common; | 6 import common; |
7 import tango.stdc.stdlib : malloc, free; | 7 import tango.stdc.stdlib : malloc, free; |
8 import tango.core.Exception; | 8 import tango.core.Exception; |
9 | 9 |
10 struct Position | |
11 { | |
12 size_t loc; | |
13 size_t col; | |
14 } | |
15 | |
16 enum TOK : ushort | 10 enum TOK : ushort |
17 { | 11 { |
18 Invalid, | 12 Invalid, |
19 | 13 |
20 /// Flag for whitespace tokens that must be ignored in the parsing phase. | 14 /// Flag for whitespace tokens that must be ignored in the parsing phase. |
22 Illegal = 1 | Whitespace, | 16 Illegal = 1 | Whitespace, |
23 Comment = 2 | Whitespace, | 17 Comment = 2 | Whitespace, |
24 Shebang = 3 | Whitespace, | 18 Shebang = 3 | Whitespace, |
25 HashLine = 4 | Whitespace, | 19 HashLine = 4 | Whitespace, |
26 Filespec = 5 | Whitespace, | 20 Filespec = 5 | Whitespace, |
27 Empty = 6, | 21 Newline = 6 | Whitespace, |
28 | 22 Empty = 7, |
29 Identifier = 7, | 23 |
24 Identifier = 8, | |
30 String, | 25 String, |
31 CharLiteral, WCharLiteral, DCharLiteral, | 26 CharLiteral, WCharLiteral, DCharLiteral, |
32 | 27 |
33 // Special tokens | 28 // Special tokens |
34 FILE, | 29 FILE, |
117 alias TOK.Abstract KeywordsBegin; | 112 alias TOK.Abstract KeywordsBegin; |
118 alias TOK.With KeywordsEnd; | 113 alias TOK.With KeywordsEnd; |
119 | 114 |
120 struct Token | 115 struct Token |
121 { | 116 { |
122 TOK type; | 117 TOK type; /// The type of the token. |
123 // Position pos; | 118 /// Pointers to the next and previous tokens (doubly-linked list.) |
124 | |
125 Token* next, prev; | 119 Token* next, prev; |
126 | 120 |
127 char* ws; /// Start of whitespace characters before token. Null if no WS. | 121 char* ws; /// Start of whitespace characters before token. Null if no WS. |
128 char* start; /// Start of token in source text. | 122 char* start; /// Start of token in source text. |
129 char* end; /// Points one past the end of token in source text. | 123 char* end; /// Points one past the end of token in source text. |
130 | 124 |
131 union | 125 union |
132 { | 126 { |
127 /// For newline tokens. | |
133 struct | 128 struct |
134 { | 129 { |
135 Token* line_num; // #line number | 130 char[] filePath; |
136 Token* line_filespec; // #line number filespec | 131 uint lineNum; |
137 } | 132 uint lineNum_hline; |
133 } | |
134 /// For #line tokens. | |
138 struct | 135 struct |
139 { | 136 { |
137 Token* tokLineNum; /// #line number | |
138 Token* tokLineFilespec; /// #line number filespec | |
139 } | |
140 /// For string tokens. | |
141 struct | |
142 { | |
140 string str; | 143 string str; |
141 char pf; /// Postfix 'c', 'w' or 'd' | 144 char pf; /// Postfix 'c', 'w', 'd' or 0 for none. |
142 version(D2) | 145 version(D2) |
143 Token* tok_str; /// Points to the contents of a token string stored as a | 146 Token* tok_str; /// Points to the contents of a token string stored as a |
144 /// doubly linked list. The last token is always '}' or | 147 /// doubly linked list. The last token is always '}' or |
145 /// EOF in case end of source text is "q{" EOF. | 148 /// EOF in case end of source text is "q{" EOF. |
146 } | 149 } |
197 static string toString(TOK tok) | 200 static string toString(TOK tok) |
198 { | 201 { |
199 return tokToString[tok]; | 202 return tokToString[tok]; |
200 } | 203 } |
201 | 204 |
205 /++ | |
206 Returns true if this is a token which can have newlines in it. | |
207 These can be any string literal except for escape literals | |
208 and block and nested comments. | |
209 +/ | |
210 bool isMultiline() | |
211 { | |
212 return type == TOK.String && start[0] != '\\' || | |
213 type == TOK.Comment && start[1] != '/'; | |
214 } | |
215 | |
216 /// Returns true if this is a keyword token. | |
202 bool isKeyword() | 217 bool isKeyword() |
203 { | 218 { |
204 return KeywordsBegin <= type && type <= KeywordsEnd; | 219 return KeywordsBegin <= type && type <= KeywordsEnd; |
205 } | 220 } |
206 | 221 |
222 /// Returns true if this is a whitespace token. | |
207 bool isWhitespace() | 223 bool isWhitespace() |
208 { | 224 { |
209 return !!(type & TOK.Whitespace); | 225 return !!(type & TOK.Whitespace); |
210 } | 226 } |
211 | 227 |
228 /// Returns true if this is a special token. | |
212 bool isSpecialToken() | 229 bool isSpecialToken() |
213 { | 230 { |
214 return *start == '_' && type != TOK.Identifier; | 231 return *start == '_' && type != TOK.Identifier; |
215 } | 232 } |
216 | 233 |
217 version(D2) | 234 version(D2) |
218 { | 235 { |
236 /// Returns true if this is a token string literal. | |
219 bool isTokenStringLiteral() | 237 bool isTokenStringLiteral() |
220 { | 238 { |
221 return type == TOK.String && tok_str !is null; | 239 return type == TOK.String && tok_str !is null; |
222 } | 240 } |
223 } | 241 } |
254 } | 272 } |
255 | 273 |
256 void destructHashLineToken() | 274 void destructHashLineToken() |
257 { | 275 { |
258 assert(type == TOK.HashLine); | 276 assert(type == TOK.HashLine); |
259 delete line_num; | 277 delete tokLineNum; |
260 delete line_filespec; | 278 delete tokLineFilespec; |
261 } | 279 } |
262 | 280 |
263 version(D2) | 281 version(D2) |
264 { | 282 { |
265 void destructTokenStringLiteral() | 283 void destructTokenStringLiteral() |
278 } | 296 } |
279 } | 297 } |
280 } | 298 } |
281 } | 299 } |
282 | 300 |
283 const string[] tokToString = [ | 301 /// A table mapping each TOK to a string. |
302 private const string[] tokToString = [ | |
284 "Invalid", | 303 "Invalid", |
285 | 304 |
286 "Illegal", | 305 "Illegal", |
287 "Comment", | 306 "Comment", |
288 "#! /shebang/", | 307 "#! /shebang/", |
289 "#line", | 308 "#line", |
290 `"filespec"`, | 309 `"filespec"`, |
310 "Newline", | |
291 "Empty", | 311 "Empty", |
292 | 312 |
293 "Identifier", | 313 "Identifier", |
294 "String", | 314 "String", |
295 "CharLiteral", "WCharLiteral", "DCharLiteral", | 315 "CharLiteral", "WCharLiteral", "DCharLiteral", |
313 "{", | 333 "{", |
314 "}", | 334 "}", |
315 | 335 |
316 ".", "..", "...", | 336 ".", "..", "...", |
317 | 337 |
318 "Unordered", | 338 "!<>=", // Unordered |
319 "UorE", | 339 "!<>", // UorE |
320 "UorG", | 340 "!<=", // UorG |
321 "UorGorE", | 341 "!<", // UorGorE |
322 "UorL", | 342 "!>=", // UorL |
323 "UorLorE", | 343 "!>", // UorLorE |
324 "LorEorG", | 344 "<>=", // LorEorG |
325 "LorG", | 345 "<>", // LorG |
326 | 346 |
327 "=", "==", "!=", "!", | 347 "=", "==", "!=", "!", |
328 "<=", "<", | 348 "<=", "<", |
329 ">=", ">", | 349 ">=", ">", |
330 "<<=", "<<", | 350 "<<=", "<<", |