comparison trunk/src/Lexer.d @ 18:c48d2125f1e2

- Moved code for scanning character literals to separate function. - Added class Problem and Messages table.
author aziz
date Sat, 23 Jun 2007 21:48:03 +0000
parents 9bd0bac79479
children f85832f9f24e
comparison
equal deleted inserted replaced
17:9bd0bac79479 18:c48d2125f1e2
80 } 80 }
81 81
82 const char[3] LS = \u2028; 82 const char[3] LS = \u2028;
83 const char[3] PS = \u2029; 83 const char[3] PS = \u2029;
84 84
85 const dchar LSd = 0x2028;
86 const dchar PSd = 0x2029;
87
88 /// Index into table of error messages.
89 enum MID
90 {
91 UnterminatedCharacterLiteral,
92 EmptyCharacterLiteral
93 }
94
95 string[] Messages = [
96 "unterminated character literal."
97 "empty character literal."
98 ];
99
100 class Problem
101 {
102 enum Type
103 {
104 Lexer,
105 Parser,
106 Semantic
107 }
108
109 MID id;
110 Type type;
111 uint loc;
112 this(Type type, MID id, uint loc)
113 {
114 this.id = id;
115 this.type = type;
116 this.loc = loc;
117 }
118 }
119
85 class Lexer 120 class Lexer
86 { 121 {
87 Token token; 122 Token token;
88 char[] text; 123 char[] text;
89 char* p; 124 char* p;
90 char* end; 125 char* end;
91 126
92 uint loc = 1; /// line of code 127 uint loc = 1; /// line of code
128
129 Problem[] errors;
93 130
94 this(char[] text) 131 this(char[] text)
95 { 132 {
96 this.text = text; 133 this.text = text;
97 this.text.length = this.text.length + 1; 134 this.text.length = this.text.length + 1;
150 if (c == '/') 187 if (c == '/')
151 { 188 {
152 c = *++p; 189 c = *++p;
153 switch(c) 190 switch(c)
154 { 191 {
155 case '=': 192 case '=':
156 ++p; 193 ++p;
157 t.type = TOK.DivisionAssign; 194 t.type = TOK.DivisionAssign;
158 t.end = p; 195 t.end = p;
159 return; 196 return;
160 case '+': 197 case '+':
161 uint level = 1; 198 uint level = 1;
162 do 199 do
163 { 200 {
164 c = *++p; 201 c = *++p;
165 if (c == 0) 202 if (c == 0)
178 } while (1) 215 } while (1)
179 p += 2; 216 p += 2;
180 t.type = TOK.Comment; 217 t.type = TOK.Comment;
181 t.end = p; 218 t.end = p;
182 return; 219 return;
183 case '*': 220 case '*':
184 do 221 do
185 { 222 {
186 c = *++p; 223 c = *++p;
187 if (c == 0) 224 if (c == 0)
188 throw new Error("unterminated /* */ comment."); 225 throw new Error("unterminated /* */ comment.");
189 } while (c != '*' || p[1] != '/') 226 } while (c != '*' || p[1] != '/')
190 p += 2; 227 p += 2;
191 t.type = TOK.Comment; 228 t.type = TOK.Comment;
192 t.end = p; 229 t.end = p;
193 return; 230 return;
194 case '/': 231 case '/':
195 do 232 do
196 { 233 {
197 c = *++p; 234 c = *++p;
198 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) 235 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
199 break; 236 break;
218 t.end = p; 255 t.end = p;
219 return; 256 return;
220 } 257 }
221 258
222 if (c == '\'') 259 if (c == '\'')
223 { 260 return scanCharacterLiteral(t);
224 do {
225 c = *++p;
226 if (c == 0)
227 throw new Error("unterminated character literal.");
228 if (c == '\\')
229 ++p;
230 } while (c != '\'')
231 ++p;
232 t.type = TOK.Character;
233 t.end = p;
234 return;
235 }
236 261
237 if (c & 128 && isUniAlpha(decodeUTF())) 262 if (c & 128 && isUniAlpha(decodeUTF()))
238 goto Lidentifier; 263 goto Lidentifier;
239 c = *++p; 264 c = *++p;
240 } 265 }
266 }
267
268 void scanCharacterLiteral(ref Token t)
269 {
270 assert(*p == '\'');
271 MID id = MID.UnterminatedCharacterLiteral;
272 uint c = *++p;
273 switch(c)
274 {
275 case '\\':
276 ++p;
277 if (*p != '\'')
278 goto Lerr;
279 break;
280 case 0, 161, '\n', '\r':
281 goto Lerr;
282 case '\'':
283 id = MID.EmptyCharacterLiteral;
284 goto Lerr;
285 default:
286 if (c & 128)
287 {
288 c = decodeUTF();
289 if (c == LSd || c == PSd)
290 goto Lerr;
291 t.chr = c;
292 }
293 }
294
295 if (*p != '\'')
296 Lerr:
297 error(id);
298 ++p;
299 t.type = TOK.Character;
300 t.end = p;
241 } 301 }
242 302
243 void scanNumber(ref Token t) 303 void scanNumber(ref Token t)
244 { 304 {
245 while (isdigit(*++p)) {} 305 while (isdigit(*++p)) {}
255 d = std.utf.decode(p[0 .. end-p], idx); 315 d = std.utf.decode(p[0 .. end-p], idx);
256 p += idx -1; 316 p += idx -1;
257 return d; 317 return d;
258 } 318 }
259 319
320 void error(MID id)
321 {
322 errors ~= new Problem(Problem.Type.Lexer, id, loc);
323 }
324
260 public TOK nextToken() 325 public TOK nextToken()
261 { 326 {
262 scan(this.token); 327 scan(this.token);
263 return this.token.type; 328 return this.token.type;
264 } 329 }