Mercurial > projects > dil
comparison trunk/src/dil/lexer/Token.d @ 596:39fac5531b85
Moved dil.Token to dil.lexer.Token.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sun, 06 Jan 2008 00:25:29 +0100 |
parents | trunk/src/dil/Token.d@9e811db780a6 |
children | 4d50267f59c9 |
comparison
equal
deleted
inserted
replaced
595:f6e5bff58b10 | 596:39fac5531b85 |
---|---|
1 /++ | |
2 Author: Aziz Köksal | |
3 License: GPL3 | |
4 +/ | |
5 module dil.lexer.Token; | |
6 | |
7 import dil.Location; | |
8 import dil.Identifier; | |
9 import dil.lexer.Funcs; | |
10 import tango.stdc.stdlib : malloc, free; | |
11 import tango.core.Exception; | |
12 import common; | |
13 | |
14 public import dil.TokensEnum; | |
15 | |
16 /++ | |
17 A Token is a sequence of characters formed by the lexical analyzer. | |
18 +/ | |
19 struct Token | |
20 { | |
21 enum Flags : ushort | |
22 { | |
23 None, | |
24 Whitespace = 1, /// Tokens with this flag are ignored by the Parser. | |
25 } | |
26 | |
27 TOK type; /// The type of the token. | |
28 Flags flags; /// The flags of the token. | |
29 /// Pointers to the next and previous tokens (doubly-linked list.) | |
30 Token* next, prev; | |
31 | |
32 char* ws; /// Start of whitespace characters before token. Null if no WS. | |
33 char* start; /// Start of token in source text. | |
34 char* end; /// Points one past the end of token in source text. | |
35 | |
36 union | |
37 { | |
38 /// For newline tokens. | |
39 struct | |
40 { | |
41 char[] filePath; | |
42 uint lineNum; | |
43 uint lineNum_hline; | |
44 } | |
45 /// For #line tokens. | |
46 struct | |
47 { | |
48 Token* tokLineNum; /// #line number | |
49 Token* tokLineFilespec; /// #line number filespec | |
50 } | |
51 /// For string tokens. | |
52 struct | |
53 { | |
54 string str; | |
55 char pf; /// Postfix 'c', 'w', 'd' or 0 for none. | |
56 version(D2) | |
57 Token* tok_str; /// Points to the contents of a token string stored as a | |
58 /// doubly linked list. The last token is always '}' or | |
59 /// EOF in case end of source text is "q{" EOF. | |
60 } | |
61 Identifier* ident; | |
62 dchar dchar_; | |
63 long long_; | |
64 ulong ulong_; | |
65 int int_; | |
66 uint uint_; | |
67 float float_; | |
68 double double_; | |
69 real real_; | |
70 } | |
71 | |
72 alias srcText identifier; | |
73 | |
74 /// Returns the text of the token. | |
75 string srcText() | |
76 { | |
77 assert(start && end); | |
78 return start[0 .. end - start]; | |
79 } | |
80 | |
81 /// Returns the preceding whitespace of the token. | |
82 string wsChars() | |
83 { | |
84 assert(ws && start); | |
85 return ws[0 .. start - ws]; | |
86 } | |
87 | |
88 /// Find next non-whitespace token. Returns 'this' token if the next token is TOK.EOF or null. | |
89 Token* nextNWS() | |
90 out(token) | |
91 { | |
92 assert(token !is null); | |
93 } | |
94 body | |
95 { | |
96 auto token = next; | |
97 while (token !is null && token.isWhitespace) | |
98 token = token.next; | |
99 if (token is null || token.type == TOK.EOF) | |
100 return this; | |
101 return token; | |
102 } | |
103 | |
104 /// Find previous non-whitespace token. Returns 'this' token if the previous token is TOK.HEAD or null. | |
105 Token* prevNWS() | |
106 out(token) | |
107 { | |
108 assert(token !is null); | |
109 } | |
110 body | |
111 { | |
112 auto token = prev; | |
113 while (token !is null && token.isWhitespace) | |
114 token = token.prev; | |
115 if (token is null || token.type == TOK.HEAD) | |
116 return this; | |
117 return token; | |
118 } | |
119 | |
120 static string toString(TOK tok) | |
121 { | |
122 return tokToString[tok]; | |
123 } | |
124 | |
125 /// Adds Flags.Whitespace to this token's flags. | |
126 void setWhitespaceFlag() | |
127 { | |
128 this.flags |= Flags.Whitespace; | |
129 } | |
130 | |
131 /++ | |
132 Returns true if this is a token that can have newlines in it. | |
133 These can be block and nested comments and any string literal | |
134 except for escape string literals. | |
135 +/ | |
136 bool isMultiline() | |
137 { | |
138 return type == TOK.String && start[0] != '\\' || | |
139 type == TOK.Comment && start[1] != '/'; | |
140 } | |
141 | |
142 /// Returns true if this is a keyword token. | |
143 bool isKeyword() | |
144 { | |
145 return KeywordsBegin <= type && type <= KeywordsEnd; | |
146 } | |
147 | |
148 /// Returns true if this is an integral type token. | |
149 bool isIntegralType() | |
150 { | |
151 return IntegralTypeBegin <= type && type <= IntegralTypeEnd; | |
152 } | |
153 | |
154 /// Returns true if this is a whitespace token. | |
155 bool isWhitespace() | |
156 { | |
157 return !!(flags & Flags.Whitespace); | |
158 } | |
159 | |
160 /// Returns true if this is a special token. | |
161 bool isSpecialToken() | |
162 { | |
163 return SpecialTokensBegin <= type && type <= SpecialTokensEnd; | |
164 } | |
165 | |
166 version(D2) | |
167 { | |
168 /// Returns true if this is a token string literal. | |
169 bool isTokenStringLiteral() | |
170 { | |
171 return type == TOK.String && tok_str !is null; | |
172 } | |
173 } | |
174 | |
175 /// Returns true if this token starts a DeclarationDefinition. | |
176 bool isDeclDefStart() | |
177 { | |
178 return isDeclDefStartToken(type); | |
179 } | |
180 | |
181 /// Returns true if this token starts a Statement. | |
182 bool isStatementStart() | |
183 { | |
184 return isStatementStartToken(type); | |
185 } | |
186 | |
187 /// Returns true if this token starts an AsmInstruction. | |
188 bool isAsmInstructionStart() | |
189 { | |
190 return isAsmInstructionStartToken(type); | |
191 } | |
192 | |
193 int opEquals(TOK type2) | |
194 { | |
195 return type == type2; | |
196 } | |
197 | |
198 /// Returns the Location of this token. | |
199 Location getLocation() | |
200 { | |
201 auto search_t = this.prev; | |
202 // Find previous newline token. | |
203 while (search_t.type != TOK.Newline) | |
204 search_t = search_t.prev; | |
205 auto filePath = search_t.filePath; | |
206 auto lineNum = search_t.lineNum - search_t.lineNum_hline; | |
207 auto lineBegin = search_t.end; | |
208 // Determine actual line begin and line number. | |
209 while (1) | |
210 { | |
211 search_t = search_t.next; | |
212 if (search_t == this) | |
213 break; | |
214 // Multiline tokens must be rescanned for newlines. | |
215 if (search_t.isMultiline) | |
216 { | |
217 auto p = search_t.start, end = search_t.end; | |
218 while (p != end) | |
219 { | |
220 if (scanNewline(p) == '\n') | |
221 { | |
222 lineBegin = p; | |
223 ++lineNum; | |
224 } | |
225 else | |
226 ++p; | |
227 } | |
228 } | |
229 } | |
230 return new Location(filePath, lineNum, lineBegin, this.start); | |
231 } | |
232 | |
233 uint lineCount() | |
234 { | |
235 uint count = 1; | |
236 if (this.isMultiline) | |
237 { | |
238 auto p = this.start, end = this.end; | |
239 while (p != end) | |
240 { | |
241 if (scanNewline(p) == '\n') | |
242 ++count; | |
243 else | |
244 ++p; | |
245 } | |
246 } | |
247 return count; | |
248 } | |
249 | |
250 /// Return the source text enclosed by the left and right token. | |
251 static char[] textSpan(Token* left, Token* right) | |
252 { | |
253 assert(left.end <= right.start || left is right ); | |
254 return left.start[0 .. right.end - left.start]; | |
255 } | |
256 | |
257 new(size_t size) | |
258 { | |
259 void* p = malloc(size); | |
260 if (p is null) | |
261 throw new OutOfMemoryException(__FILE__, __LINE__); | |
262 *cast(Token*)p = Token.init; | |
263 return p; | |
264 } | |
265 | |
266 delete(void* p) | |
267 { | |
268 auto token = cast(Token*)p; | |
269 if (token) | |
270 { | |
271 if(token.type == TOK.HashLine) | |
272 token.destructHashLineToken(); | |
273 else | |
274 { | |
275 version(D2) | |
276 if (token.isTokenStringLiteral) | |
277 token.destructTokenStringLiteral(); | |
278 } | |
279 } | |
280 free(p); | |
281 } | |
282 | |
283 void destructHashLineToken() | |
284 { | |
285 assert(type == TOK.HashLine); | |
286 delete tokLineNum; | |
287 delete tokLineFilespec; | |
288 } | |
289 | |
290 version(D2) | |
291 { | |
292 void destructTokenStringLiteral() | |
293 { | |
294 assert(type == TOK.String); | |
295 assert(start && *start == 'q' && start[1] == '{'); | |
296 assert(tok_str !is null); | |
297 auto tok_it = tok_str; | |
298 auto tok_del = tok_str; | |
299 while (tok_it && tok_it.type != TOK.EOF) | |
300 { | |
301 tok_it = tok_it.next; | |
302 assert(tok_del && tok_del.type != TOK.EOF); | |
303 delete tok_del; | |
304 tok_del = tok_it; | |
305 } | |
306 } | |
307 } | |
308 } | |
309 | |
310 /++ | |
311 Not used at the moment. Could be useful if more | |
312 info is needed about the location of nodes/tokens. | |
313 +/ | |
314 struct NewlineInfo | |
315 { | |
316 char[] oriPath; /// Original path to the source text. | |
317 char[] setPath; /// Path set by #line. | |
318 uint oriLineNum; /// Actual line number in the source text. | |
319 uint setLineNum; /// Delta line number set by #line. | |
320 } | |
321 | |
322 /// Returns true if this token starts a DeclarationDefinition. | |
323 bool isDeclDefStartToken(TOK tok) | |
324 { | |
325 switch (tok) | |
326 { | |
327 alias TOK T; | |
328 case T.Align, T.Pragma, T.Export, T.Private, T.Package, T.Protected, | |
329 T.Public, T.Extern, T.Deprecated, T.Override, T.Abstract, | |
330 T.Synchronized, T.Static, T.Final, T.Const, T.Invariant/*D 2.0*/, | |
331 T.Auto, T.Scope, T.Alias, T.Typedef, T.Import, T.Enum, T.Class, | |
332 T.Interface, T.Struct, T.Union, T.This, T.Tilde, T.Unittest, T.Debug, | |
333 T.Version, T.Template, T.New, T.Delete, T.Mixin, T.Semicolon, | |
334 T.Identifier, T.Dot, T.Typeof: | |
335 return true; | |
336 default: | |
337 if (IntegralTypeBegin <= tok && tok <= IntegralTypeEnd) | |
338 return true; | |
339 } | |
340 return false; | |
341 } | |
342 | |
343 /// Returns true if this token starts a Statement. | |
344 bool isStatementStartToken(TOK tok) | |
345 { | |
346 switch (tok) | |
347 { | |
348 alias TOK T; | |
349 case T.Align, T.Extern, T.Final, T.Const, T.Auto, T.Identifier, T.Dot, | |
350 T.Typeof, T.If, T.While, T.Do, T.For, T.Foreach, T.Foreach_reverse, | |
351 T.Switch, T.Case, T.Default, T.Continue, T.Break, T.Return, T.Goto, | |
352 T.With, T.Synchronized, T.Try, T.Throw, T.Scope, T.Volatile, T.Asm, | |
353 T.Pragma, T.Mixin, T.Static, T.Debug, T.Version, T.Alias, T.Semicolon, | |
354 T.Enum, T.Class, T.Interface, T.Struct, T.Union, T.LBrace, T.Typedef, | |
355 T.This, T.Super, T.Null, T.True, T.False, T.Int32, T.Int64, T.Uint32, | |
356 T.Uint64, T.Float32, T.Float64, T.Float80, T.Imaginary32, | |
357 T.Imaginary64, T.Imaginary80, T.CharLiteral, T.String, T.LBracket, | |
358 T.Function, T.Delegate, T.Assert, T.Import, T.Typeid, T.Is, T.LParen, | |
359 T.Traits/*D2.0*/, T.AndBinary, T.PlusPlus, T.MinusMinus, T.Mul, | |
360 T.Minus, T.Plus, T.Not, T.Tilde, T.New, T.Delete, T.Cast: | |
361 return true; | |
362 default: | |
363 if (IntegralTypeBegin <= tok && tok <= IntegralTypeEnd || | |
364 SpecialTokensBegin <= tok && tok <= SpecialTokensEnd) | |
365 return true; | |
366 } | |
367 return false; | |
368 } | |
369 | |
370 /// Returns true if this token starts an AsmInstruction. | |
371 bool isAsmInstructionStartToken(TOK tok) | |
372 { | |
373 switch(tok) | |
374 { | |
375 alias TOK T; | |
376 case T.In, T.Int, T.Out, T.Identifier, T.Align, T.Semicolon: | |
377 return true; | |
378 default: | |
379 } | |
380 return false; | |
381 } |