Mercurial > projects > dil
comparison trunk/src/Lexer.d @ 18:c48d2125f1e2
- Moved code for scanning character literals to separate function.
- Added class Problem and Messages table.
author | aziz |
---|---|
date | Sat, 23 Jun 2007 21:48:03 +0000 |
parents | 9bd0bac79479 |
children | f85832f9f24e |
comparison
equal
deleted
inserted
replaced
17:9bd0bac79479 | 18:c48d2125f1e2 |
---|---|
80 } | 80 } |
81 | 81 |
82 const char[3] LS = \u2028; | 82 const char[3] LS = \u2028; |
83 const char[3] PS = \u2029; | 83 const char[3] PS = \u2029; |
84 | 84 |
85 const dchar LSd = 0x2028; | |
86 const dchar PSd = 0x2029; | |
87 | |
88 /// Index into table of error messages. | |
89 enum MID | |
90 { | |
91 UnterminatedCharacterLiteral, | |
92 EmptyCharacterLiteral | |
93 } | |
94 | |
95 string[] Messages = [ | |
96 "unterminated character literal." | |
97 "empty character literal." | |
98 ]; | |
99 | |
100 class Problem | |
101 { | |
102 enum Type | |
103 { | |
104 Lexer, | |
105 Parser, | |
106 Semantic | |
107 } | |
108 | |
109 MID id; | |
110 Type type; | |
111 uint loc; | |
112 this(Type type, MID id, uint loc) | |
113 { | |
114 this.id = id; | |
115 this.type = type; | |
116 this.loc = loc; | |
117 } | |
118 } | |
119 | |
85 class Lexer | 120 class Lexer |
86 { | 121 { |
87 Token token; | 122 Token token; |
88 char[] text; | 123 char[] text; |
89 char* p; | 124 char* p; |
90 char* end; | 125 char* end; |
91 | 126 |
92 uint loc = 1; /// line of code | 127 uint loc = 1; /// line of code |
128 | |
129 Problem[] errors; | |
93 | 130 |
94 this(char[] text) | 131 this(char[] text) |
95 { | 132 { |
96 this.text = text; | 133 this.text = text; |
97 this.text.length = this.text.length + 1; | 134 this.text.length = this.text.length + 1; |
150 if (c == '/') | 187 if (c == '/') |
151 { | 188 { |
152 c = *++p; | 189 c = *++p; |
153 switch(c) | 190 switch(c) |
154 { | 191 { |
155 case '=': | 192 case '=': |
156 ++p; | 193 ++p; |
157 t.type = TOK.DivisionAssign; | 194 t.type = TOK.DivisionAssign; |
158 t.end = p; | 195 t.end = p; |
159 return; | 196 return; |
160 case '+': | 197 case '+': |
161 uint level = 1; | 198 uint level = 1; |
162 do | 199 do |
163 { | 200 { |
164 c = *++p; | 201 c = *++p; |
165 if (c == 0) | 202 if (c == 0) |
178 } while (1) | 215 } while (1) |
179 p += 2; | 216 p += 2; |
180 t.type = TOK.Comment; | 217 t.type = TOK.Comment; |
181 t.end = p; | 218 t.end = p; |
182 return; | 219 return; |
183 case '*': | 220 case '*': |
184 do | 221 do |
185 { | 222 { |
186 c = *++p; | 223 c = *++p; |
187 if (c == 0) | 224 if (c == 0) |
188 throw new Error("unterminated /* */ comment."); | 225 throw new Error("unterminated /* */ comment."); |
189 } while (c != '*' || p[1] != '/') | 226 } while (c != '*' || p[1] != '/') |
190 p += 2; | 227 p += 2; |
191 t.type = TOK.Comment; | 228 t.type = TOK.Comment; |
192 t.end = p; | 229 t.end = p; |
193 return; | 230 return; |
194 case '/': | 231 case '/': |
195 do | 232 do |
196 { | 233 { |
197 c = *++p; | 234 c = *++p; |
198 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) | 235 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) |
199 break; | 236 break; |
218 t.end = p; | 255 t.end = p; |
219 return; | 256 return; |
220 } | 257 } |
221 | 258 |
222 if (c == '\'') | 259 if (c == '\'') |
223 { | 260 return scanCharacterLiteral(t); |
224 do { | |
225 c = *++p; | |
226 if (c == 0) | |
227 throw new Error("unterminated character literal."); | |
228 if (c == '\\') | |
229 ++p; | |
230 } while (c != '\'') | |
231 ++p; | |
232 t.type = TOK.Character; | |
233 t.end = p; | |
234 return; | |
235 } | |
236 | 261 |
237 if (c & 128 && isUniAlpha(decodeUTF())) | 262 if (c & 128 && isUniAlpha(decodeUTF())) |
238 goto Lidentifier; | 263 goto Lidentifier; |
239 c = *++p; | 264 c = *++p; |
240 } | 265 } |
266 } | |
267 | |
268 void scanCharacterLiteral(ref Token t) | |
269 { | |
270 assert(*p == '\''); | |
271 MID id = MID.UnterminatedCharacterLiteral; | |
272 uint c = *++p; | |
273 switch(c) | |
274 { | |
275 case '\\': | |
276 ++p; | |
277 if (*p != '\'') | |
278 goto Lerr; | |
279 break; | |
280 case 0, 161, '\n', '\r': | |
281 goto Lerr; | |
282 case '\'': | |
283 id = MID.EmptyCharacterLiteral; | |
284 goto Lerr; | |
285 default: | |
286 if (c & 128) | |
287 { | |
288 c = decodeUTF(); | |
289 if (c == LSd || c == PSd) | |
290 goto Lerr; | |
291 t.chr = c; | |
292 } | |
293 } | |
294 | |
295 if (*p != '\'') | |
296 Lerr: | |
297 error(id); | |
298 ++p; | |
299 t.type = TOK.Character; | |
300 t.end = p; | |
241 } | 301 } |
242 | 302 |
243 void scanNumber(ref Token t) | 303 void scanNumber(ref Token t) |
244 { | 304 { |
245 while (isdigit(*++p)) {} | 305 while (isdigit(*++p)) {} |
255 d = std.utf.decode(p[0 .. end-p], idx); | 315 d = std.utf.decode(p[0 .. end-p], idx); |
256 p += idx -1; | 316 p += idx -1; |
257 return d; | 317 return d; |
258 } | 318 } |
259 | 319 |
320 void error(MID id) | |
321 { | |
322 errors ~= new Problem(Problem.Type.Lexer, id, loc); | |
323 } | |
324 | |
260 public TOK nextToken() | 325 public TOK nextToken() |
261 { | 326 { |
262 scan(this.token); | 327 scan(this.token); |
263 return this.token.type; | 328 return this.token.type; |
264 } | 329 } |