comparison trunk/src/dil/lexer/Token.d @ 596:39fac5531b85

Moved dil.Token to dil.lexer.Token.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Sun, 06 Jan 2008 00:25:29 +0100
parents trunk/src/dil/Token.d@9e811db780a6
children 4d50267f59c9
comparison
equal deleted inserted replaced
595:f6e5bff58b10 596:39fac5531b85
1 /++
2 Author: Aziz Köksal
3 License: GPL3
4 +/
5 module dil.lexer.Token;
6
7 import dil.Location;
8 import dil.Identifier;
9 import dil.lexer.Funcs;
10 import tango.stdc.stdlib : malloc, free;
11 import tango.core.Exception;
12 import common;
13
14 public import dil.TokensEnum;
15
16 /++
17 A Token is a sequence of characters formed by the lexical analyzer.
18 +/
19 struct Token
20 {
21 enum Flags : ushort
22 {
23 None,
24 Whitespace = 1, /// Tokens with this flag are ignored by the Parser.
25 }
26
27 TOK type; /// The type of the token.
28 Flags flags; /// The flags of the token.
29 /// Pointers to the next and previous tokens (doubly-linked list.)
30 Token* next, prev;
31
32 char* ws; /// Start of whitespace characters before token. Null if no WS.
33 char* start; /// Start of token in source text.
34 char* end; /// Points one past the end of token in source text.
35
36 union
37 {
38 /// For newline tokens.
39 struct
40 {
41 char[] filePath;
42 uint lineNum;
43 uint lineNum_hline;
44 }
45 /// For #line tokens.
46 struct
47 {
48 Token* tokLineNum; /// #line number
49 Token* tokLineFilespec; /// #line number filespec
50 }
51 /// For string tokens.
52 struct
53 {
54 string str;
55 char pf; /// Postfix 'c', 'w', 'd' or 0 for none.
56 version(D2)
57 Token* tok_str; /// Points to the contents of a token string stored as a
58 /// doubly linked list. The last token is always '}' or
59 /// EOF in case end of source text is "q{" EOF.
60 }
61 Identifier* ident;
62 dchar dchar_;
63 long long_;
64 ulong ulong_;
65 int int_;
66 uint uint_;
67 float float_;
68 double double_;
69 real real_;
70 }
71
72 alias srcText identifier;
73
74 /// Returns the text of the token.
75 string srcText()
76 {
77 assert(start && end);
78 return start[0 .. end - start];
79 }
80
81 /// Returns the preceding whitespace of the token.
82 string wsChars()
83 {
84 assert(ws && start);
85 return ws[0 .. start - ws];
86 }
87
88 /// Find next non-whitespace token. Returns 'this' token if the next token is TOK.EOF or null.
89 Token* nextNWS()
90 out(token)
91 {
92 assert(token !is null);
93 }
94 body
95 {
96 auto token = next;
97 while (token !is null && token.isWhitespace)
98 token = token.next;
99 if (token is null || token.type == TOK.EOF)
100 return this;
101 return token;
102 }
103
104 /// Find previous non-whitespace token. Returns 'this' token if the previous token is TOK.HEAD or null.
105 Token* prevNWS()
106 out(token)
107 {
108 assert(token !is null);
109 }
110 body
111 {
112 auto token = prev;
113 while (token !is null && token.isWhitespace)
114 token = token.prev;
115 if (token is null || token.type == TOK.HEAD)
116 return this;
117 return token;
118 }
119
120 static string toString(TOK tok)
121 {
122 return tokToString[tok];
123 }
124
125 /// Adds Flags.Whitespace to this token's flags.
126 void setWhitespaceFlag()
127 {
128 this.flags |= Flags.Whitespace;
129 }
130
131 /++
132 Returns true if this is a token that can have newlines in it.
133 These can be block and nested comments and any string literal
134 except for escape string literals.
135 +/
136 bool isMultiline()
137 {
138 return type == TOK.String && start[0] != '\\' ||
139 type == TOK.Comment && start[1] != '/';
140 }
141
142 /// Returns true if this is a keyword token.
143 bool isKeyword()
144 {
145 return KeywordsBegin <= type && type <= KeywordsEnd;
146 }
147
148 /// Returns true if this is an integral type token.
149 bool isIntegralType()
150 {
151 return IntegralTypeBegin <= type && type <= IntegralTypeEnd;
152 }
153
154 /// Returns true if this is a whitespace token.
155 bool isWhitespace()
156 {
157 return !!(flags & Flags.Whitespace);
158 }
159
160 /// Returns true if this is a special token.
161 bool isSpecialToken()
162 {
163 return SpecialTokensBegin <= type && type <= SpecialTokensEnd;
164 }
165
166 version(D2)
167 {
168 /// Returns true if this is a token string literal.
169 bool isTokenStringLiteral()
170 {
171 return type == TOK.String && tok_str !is null;
172 }
173 }
174
175 /// Returns true if this token starts a DeclarationDefinition.
176 bool isDeclDefStart()
177 {
178 return isDeclDefStartToken(type);
179 }
180
181 /// Returns true if this token starts a Statement.
182 bool isStatementStart()
183 {
184 return isStatementStartToken(type);
185 }
186
187 /// Returns true if this token starts an AsmInstruction.
188 bool isAsmInstructionStart()
189 {
190 return isAsmInstructionStartToken(type);
191 }
192
193 int opEquals(TOK type2)
194 {
195 return type == type2;
196 }
197
198 /// Returns the Location of this token.
199 Location getLocation()
200 {
201 auto search_t = this.prev;
202 // Find previous newline token.
203 while (search_t.type != TOK.Newline)
204 search_t = search_t.prev;
205 auto filePath = search_t.filePath;
206 auto lineNum = search_t.lineNum - search_t.lineNum_hline;
207 auto lineBegin = search_t.end;
208 // Determine actual line begin and line number.
209 while (1)
210 {
211 search_t = search_t.next;
212 if (search_t == this)
213 break;
214 // Multiline tokens must be rescanned for newlines.
215 if (search_t.isMultiline)
216 {
217 auto p = search_t.start, end = search_t.end;
218 while (p != end)
219 {
220 if (scanNewline(p) == '\n')
221 {
222 lineBegin = p;
223 ++lineNum;
224 }
225 else
226 ++p;
227 }
228 }
229 }
230 return new Location(filePath, lineNum, lineBegin, this.start);
231 }
232
233 uint lineCount()
234 {
235 uint count = 1;
236 if (this.isMultiline)
237 {
238 auto p = this.start, end = this.end;
239 while (p != end)
240 {
241 if (scanNewline(p) == '\n')
242 ++count;
243 else
244 ++p;
245 }
246 }
247 return count;
248 }
249
250 /// Return the source text enclosed by the left and right token.
251 static char[] textSpan(Token* left, Token* right)
252 {
253 assert(left.end <= right.start || left is right );
254 return left.start[0 .. right.end - left.start];
255 }
256
257 new(size_t size)
258 {
259 void* p = malloc(size);
260 if (p is null)
261 throw new OutOfMemoryException(__FILE__, __LINE__);
262 *cast(Token*)p = Token.init;
263 return p;
264 }
265
266 delete(void* p)
267 {
268 auto token = cast(Token*)p;
269 if (token)
270 {
271 if(token.type == TOK.HashLine)
272 token.destructHashLineToken();
273 else
274 {
275 version(D2)
276 if (token.isTokenStringLiteral)
277 token.destructTokenStringLiteral();
278 }
279 }
280 free(p);
281 }
282
283 void destructHashLineToken()
284 {
285 assert(type == TOK.HashLine);
286 delete tokLineNum;
287 delete tokLineFilespec;
288 }
289
290 version(D2)
291 {
292 void destructTokenStringLiteral()
293 {
294 assert(type == TOK.String);
295 assert(start && *start == 'q' && start[1] == '{');
296 assert(tok_str !is null);
297 auto tok_it = tok_str;
298 auto tok_del = tok_str;
299 while (tok_it && tok_it.type != TOK.EOF)
300 {
301 tok_it = tok_it.next;
302 assert(tok_del && tok_del.type != TOK.EOF);
303 delete tok_del;
304 tok_del = tok_it;
305 }
306 }
307 }
308 }
309
310 /++
311 Not used at the moment. Could be useful if more
312 info is needed about the location of nodes/tokens.
313 +/
314 struct NewlineInfo
315 {
316 char[] oriPath; /// Original path to the source text.
317 char[] setPath; /// Path set by #line.
318 uint oriLineNum; /// Actual line number in the source text.
319 uint setLineNum; /// Delta line number set by #line.
320 }
321
322 /// Returns true if this token starts a DeclarationDefinition.
323 bool isDeclDefStartToken(TOK tok)
324 {
325 switch (tok)
326 {
327 alias TOK T;
328 case T.Align, T.Pragma, T.Export, T.Private, T.Package, T.Protected,
329 T.Public, T.Extern, T.Deprecated, T.Override, T.Abstract,
330 T.Synchronized, T.Static, T.Final, T.Const, T.Invariant/*D 2.0*/,
331 T.Auto, T.Scope, T.Alias, T.Typedef, T.Import, T.Enum, T.Class,
332 T.Interface, T.Struct, T.Union, T.This, T.Tilde, T.Unittest, T.Debug,
333 T.Version, T.Template, T.New, T.Delete, T.Mixin, T.Semicolon,
334 T.Identifier, T.Dot, T.Typeof:
335 return true;
336 default:
337 if (IntegralTypeBegin <= tok && tok <= IntegralTypeEnd)
338 return true;
339 }
340 return false;
341 }
342
343 /// Returns true if this token starts a Statement.
344 bool isStatementStartToken(TOK tok)
345 {
346 switch (tok)
347 {
348 alias TOK T;
349 case T.Align, T.Extern, T.Final, T.Const, T.Auto, T.Identifier, T.Dot,
350 T.Typeof, T.If, T.While, T.Do, T.For, T.Foreach, T.Foreach_reverse,
351 T.Switch, T.Case, T.Default, T.Continue, T.Break, T.Return, T.Goto,
352 T.With, T.Synchronized, T.Try, T.Throw, T.Scope, T.Volatile, T.Asm,
353 T.Pragma, T.Mixin, T.Static, T.Debug, T.Version, T.Alias, T.Semicolon,
354 T.Enum, T.Class, T.Interface, T.Struct, T.Union, T.LBrace, T.Typedef,
355 T.This, T.Super, T.Null, T.True, T.False, T.Int32, T.Int64, T.Uint32,
356 T.Uint64, T.Float32, T.Float64, T.Float80, T.Imaginary32,
357 T.Imaginary64, T.Imaginary80, T.CharLiteral, T.String, T.LBracket,
358 T.Function, T.Delegate, T.Assert, T.Import, T.Typeid, T.Is, T.LParen,
359 T.Traits/*D2.0*/, T.AndBinary, T.PlusPlus, T.MinusMinus, T.Mul,
360 T.Minus, T.Plus, T.Not, T.Tilde, T.New, T.Delete, T.Cast:
361 return true;
362 default:
363 if (IntegralTypeBegin <= tok && tok <= IntegralTypeEnd ||
364 SpecialTokensBegin <= tok && tok <= SpecialTokensEnd)
365 return true;
366 }
367 return false;
368 }
369
370 /// Returns true if this token starts an AsmInstruction.
371 bool isAsmInstructionStartToken(TOK tok)
372 {
373 switch(tok)
374 {
375 alias TOK T;
376 case T.In, T.Int, T.Out, T.Identifier, T.Align, T.Semicolon:
377 return true;
378 default:
379 }
380 return false;
381 }