Mercurial > projects > dang
comparison src/parser/Parser.d @ 206:d3c148ca429b
Major moving of files. all src now goes into src, all docs in docs.
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Tue, 12 Aug 2008 18:14:56 +0200 |
parents | |
children | e0551773a005 |
comparison
equal
deleted
inserted
replaced
205:8387cbaa85ab | 206:d3c148ca429b |
---|---|
1 module parser.Parser; | |
2 | |
3 import lexer.Lexer, | |
4 lexer.Token; | |
5 | |
6 import parser.Action; | |
7 | |
8 import basic.Message; | |
9 | |
10 import basic.SmallArray, | |
11 basic.SourceManager; | |
12 | |
13 import tango.io.Stdout, | |
14 Integer = tango.text.convert.Integer; | |
15 | |
16 class Parser | |
17 { | |
18 Action action; | |
19 MessageHandler messages; | |
20 alias Object Exp; | |
21 alias Object Stmt; | |
22 alias Object Decl; | |
23 alias Object Module; | |
24 | |
25 this(MessageHandler messages) | |
26 { | |
27 this.messages = messages; | |
28 } | |
29 | |
30 Module parse(SourceManager sm, Lexer lexer, Action act) | |
31 { | |
32 this.sm = sm; | |
33 this.lexer = lexer; | |
34 this.action = act; | |
35 | |
36 Module m; | |
37 if (lexer.peek.type == Tok.Module) | |
38 { | |
39 Token _module = lexer.next; | |
40 ModuleName name = parseModuleName(); | |
41 m = action.actOnModule(_module, sm.getText(name.asRange())); | |
42 require(Tok.Seperator); | |
43 } | |
44 else | |
45 { | |
46 SLoc loc = lexer.peek.location; | |
47 m = action.actOnImplicitModule(loc, sm.getFile(loc)); | |
48 } | |
49 | |
50 while (lexer.peek.type != Tok.EOF) | |
51 foreach (d; parseDeclDef()) | |
52 action.actOnModuleDecl(m, d); | |
53 | |
54 return m; | |
55 } | |
56 | |
57 private: | |
58 Decl[] parseDeclDef() | |
59 { | |
60 Token t = lexer.peek; | |
61 if (t.type == Tok.Import) | |
62 return parseImports(); | |
63 else | |
64 return [parseDecl()]; | |
65 } | |
66 | |
67 Decl parseDecl() | |
68 { | |
69 Token t = lexer.peek; | |
70 | |
71 if (t.isBasicType || t.isIdentifier) | |
72 { | |
73 Id type; | |
74 Id iden; | |
75 int len = peekParseType; | |
76 if(lexer.peek(len).type == Tok.Identifier && len != 0) | |
77 { | |
78 type = parseType; | |
79 parseDeclAfterInvalidType: | |
80 iden = Id(require(Tok.Identifier)); | |
81 Token next = lexer.peek(); | |
82 if (next.type == Tok.Seperator) | |
83 { | |
84 Token sep = lexer.next(); | |
85 return action.actOnDeclarator(type, iden, null); | |
86 } | |
87 else if (next.type == Tok.Assign) | |
88 { | |
89 Token assign = lexer.next(); | |
90 Exp exp = parseExpression(); | |
91 require(Tok.Seperator); | |
92 return action.actOnDeclarator(type, iden, exp); | |
93 } | |
94 else if (next.type == Tok.OpenParentheses) | |
95 return parseFunc(type, iden); | |
96 else | |
97 messages.report(UnexpectedTok, next.location).arg(next.getType); | |
98 } | |
99 t = lexer.peek(len); | |
100 messages.report(InvalidDeclType, t.location) | |
101 .arg(sm.getText(t.asRange)); | |
102 while(len--) | |
103 lexer.next; | |
104 while(lexer.peek.type != Tok.Identifier) | |
105 lexer.next; | |
106 type = Id(lexer.peek); | |
107 goto parseDeclAfterInvalidType; | |
108 } | |
109 else if (t.type == Tok.Struct) | |
110 { | |
111 Id type = Id(lexer.next); | |
112 Id iden = Id(require(Tok.Identifier)); | |
113 | |
114 return parseStruct(type, iden); | |
115 } | |
116 messages.report(UnexpectedTok, t.location) | |
117 .arg(t.getType) | |
118 .arg(Tok.Identifier) | |
119 .fatal(ExitLevel.Parser); | |
120 } | |
121 | |
122 /** | |
123 Parse a series of imports belonging to a single import token. | |
124 */ | |
125 Decl[] parseImports() | |
126 { | |
127 Token _import = require(Tok.Import); | |
128 SmallArray!(Decl) res; | |
129 void addToRes(Decl d) { res ~= d; } | |
130 | |
131 bool done = false; | |
132 while (!done && !on_a(Tok.Seperator)) | |
133 { | |
134 ModuleName mod = parseModuleName(); | |
135 Token tok = lexer.peek; | |
136 switch (tok.type) | |
137 { | |
138 case Tok.Comma: | |
139 // import A, B.C; | |
140 // parse another module-name | |
141 lexer.next(); | |
142 res ~= action.actOnImport(_import, mod, null); | |
143 break; | |
144 case Tok.Assign: | |
145 // import B = A.A; | |
146 // ^- must be a single identifier | |
147 // renamed import | |
148 if (mod.packages.length != 0) | |
149 { | |
150 SLoc loc = mod.packages[0].tok.location; | |
151 messages.report(RenameMustBeSingleIdent, loc); | |
152 } | |
153 //if (isStatic) | |
154 // error("Static imports cannot be renamed"); | |
155 lexer.next(); | |
156 Id name = mod.id; | |
157 mod = parseModuleName(); | |
158 // create from mod and rename to `name` | |
159 res ~= action.actOnImport(_import, mod, &name); | |
160 break; | |
161 case Tok.Colon: | |
162 // import A : a; | |
163 // selective imports, potentially import A : print = a | |
164 lexer.next(); | |
165 Decl d = action.actOnImport(_import, mod, null); | |
166 // do-while on a comma: | |
167 // add explicit symbol | |
168 do | |
169 { | |
170 Id sym = parseIdentifier(); | |
171 Id dummy; | |
172 Id* name = null; | |
173 if (skip(Tok.Assign)) | |
174 { | |
175 dummy = sym; | |
176 name = &dummy; | |
177 sym = parseIdentifier(); | |
178 } | |
179 action.addSelectiveImport(d, sym, name); | |
180 | |
181 } while (skip(Tok.Comma)); | |
182 require(Tok.Seperator); | |
183 res ~= d; | |
184 return res.safe(); | |
185 case Tok.Seperator: | |
186 done = true; | |
187 break; | |
188 default: | |
189 goto Lerror; | |
190 } | |
191 res ~= action.actOnImport(_import, mod, null); | |
192 } | |
193 | |
194 require(Tok.Seperator); | |
195 return res.safe(); | |
196 Lerror: | |
197 while (!on_a (Tok.Seperator)) | |
198 lexer.next(); | |
199 return res.safe(); | |
200 } | |
201 | |
202 /** | |
203 Parse struct | |
204 */ | |
205 Decl parseStruct(Id type, Id iden) | |
206 { | |
207 auto decl = action.actOnDeclarator(type, iden, null); | |
208 | |
209 require(Tok.OpenBrace); | |
210 | |
211 while(lexer.peek.isBasicType || lexer.peek.isIdentifier) | |
212 { | |
213 auto m_decl = parseDecl(); | |
214 action.actOnStructMember(decl, m_decl); | |
215 /* Id var_type = Id(lexer.next); | |
216 Id var_iden = Id(require(Tok.Identifier)); | |
217 Token next = lexer.peek(); | |
218 if (next.type == Tok.Seperator) | |
219 { | |
220 Token sep = lexer.next(); | |
221 action.actOnStructMember(decl, var_type, var_iden, null); | |
222 continue; | |
223 } | |
224 else if (next.type == Tok.Assign) | |
225 { | |
226 Token assign = lexer.next(); | |
227 Exp exp = parseExpression(); | |
228 require(Tok.Seperator); | |
229 action.actOnStructMember(decl, var_type, var_iden, exp); | |
230 continue; | |
231 } | |
232 messages.report(UnexpectedTok, next.location).arg(next.getType);*/ | |
233 } | |
234 | |
235 require(Tok.CloseBrace); | |
236 | |
237 return decl; | |
238 } | |
239 | |
240 /** | |
241 Parse statements. | |
242 | |
243 This is the place to attack! | |
244 */ | |
245 Stmt parseStatement() | |
246 { | |
247 Token t = lexer.peek; | |
248 | |
249 switch(t.type) | |
250 { | |
251 case Tok.Return: | |
252 Token ret = lexer.next; | |
253 Exp exp; | |
254 if (lexer.peek.type != Tok.Seperator) | |
255 exp = parseExpression(); | |
256 require(Tok.Seperator); | |
257 return action.actOnReturnStmt(ret, exp); | |
258 | |
259 /* | |
260 if (cond) | |
261 single statement | compound statement | |
262 [else | |
263 single statement | compound statement] | |
264 */ | |
265 case Tok.If: | |
266 Token _if = lexer.next(); | |
267 | |
268 require(Tok.OpenParentheses); | |
269 Exp cond = parseExpression(); | |
270 require(Tok.CloseParentheses); | |
271 | |
272 Stmt thenB = parseSingleOrCompoundStatement(); | |
273 | |
274 // if there is no else part we use the if as token, to have | |
275 // something than can be passed along | |
276 Token _else = _if; | |
277 Stmt elseB; | |
278 if (lexer.peek.type == Tok.Else) | |
279 { | |
280 _else = lexer.next; | |
281 elseB = parseSingleOrCompoundStatement(); | |
282 } | |
283 | |
284 return action.actOnIfStmt(_if, cond, thenB, _else, elseB); | |
285 | |
286 /* | |
287 while (cond) | |
288 single statement | compound statement | |
289 */ | |
290 case Tok.While: | |
291 Token _while = lexer.next; | |
292 require(Tok.OpenParentheses); | |
293 Exp cond = parseExpression(); | |
294 require(Tok.CloseParentheses); | |
295 Stmt bodyStmt = parseSingleOrCompoundStatement(); | |
296 return action.actOnWhileStmt(_while, cond, bodyStmt); | |
297 | |
298 /* | |
299 One of four things: | |
300 A declaration of a function/variable `type id ...` | |
301 A direct assignment `id = exp;` | |
302 An indirect assignment `id.id = exp` | |
303 Some sort of free standing expression | |
304 | |
305 The assignments should be handled as binary expressions? | |
306 */ | |
307 case Tok.Identifier: | |
308 Token iden = lexer.peek; | |
309 Token n = lexer.peek(1); | |
310 // Must be an decl, if we start with a basic type, or two | |
311 // identifiers in a row | |
312 if (iden.isBasicType() || iden.isIdentifier()) | |
313 { | |
314 if ( n.type == Tok.Star || n.type == Tok.OpenBracket) | |
315 { | |
316 int len = peekParseType; | |
317 if(lexer.peek(len).type == Tok.Identifier && len != 0) | |
318 return action.actOnDeclStmt(parseVarDecl()); | |
319 | |
320 Exp exp = parseExpression(); | |
321 require(Tok.Seperator); | |
322 return action.actOnExprStmt(exp); | |
323 } | |
324 | |
325 if (n.isIdentifier()) | |
326 return action.actOnDeclStmt(parseVarDecl()); | |
327 | |
328 // Expression: a.b, a = b, a(b) etc. | |
329 Exp exp = parseExpression(); | |
330 require(Tok.Seperator); | |
331 return action.actOnExprStmt(exp); | |
332 } | |
333 | |
334 case Tok.Switch: | |
335 messages.report(UnexpectedTok, lexer.peek.location).arg(lexer.next.getType); | |
336 return null; | |
337 | |
338 default: | |
339 if (t.isBasicType()) | |
340 goto case Tok.Identifier; | |
341 if (t.type == Tok.Star) | |
342 { | |
343 auto exp = parseExpression(); | |
344 require(Tok.Seperator); | |
345 return action.actOnExprStmt(exp); | |
346 } | |
347 messages.report(UnexpectedBeginStmt, lexer.peek.location).arg(lexer.next.getType); | |
348 return null; | |
349 } | |
350 messages.report(UnexpectedTok, t.location); | |
351 return null; | |
352 } | |
353 | |
354 Decl parseVarDecl() | |
355 { | |
356 // manually hardcoded to only support "type id [= exp];" | |
357 // as that is the only thing the codegen understands | |
358 Id type = parseType; | |
359 Id id = Id(lexer.next); | |
360 Exp init; | |
361 if (skip(Tok.Assign)) | |
362 init = parseExpression(); | |
363 require(Tok.Seperator); | |
364 Decl d = action.actOnDeclarator(type, id, init); | |
365 return d; | |
366 } | |
367 | |
368 /** | |
369 Parses a function/method given the already parsed return type and name | |
370 */ | |
371 Decl parseFunc(ref Id type, ref Id name) | |
372 { | |
373 Decl func = action.actOnStartOfFunctionDef(type, name); | |
374 parseFuncArgs(func); | |
375 | |
376 if(lexer.peek.type == Tok.Seperator) | |
377 { | |
378 lexer.next; | |
379 return func; | |
380 } | |
381 Stmt stmt = parseCompoundStatement(); | |
382 | |
383 return action.actOnEndOfFunction(func, stmt); | |
384 } | |
385 | |
386 /** | |
387 Parse the function arguments, assumes current token is (. | |
388 | |
389 Both the intitial paren and the ending paren is consumed. | |
390 */ | |
391 void parseFuncArgs(Decl func) | |
392 { | |
393 require(Tok.OpenParentheses); // Remove the "(" token. | |
394 | |
395 while(lexer.peek.type != Tok.CloseParentheses) | |
396 { | |
397 auto t = parseType(); | |
398 Id i; | |
399 if(lexer.peek.type == Tok.Identifier) | |
400 i = parseIdentifier(); | |
401 action.addFuncArg(func, t, i); | |
402 | |
403 if(lexer.peek.type == Tok.Comma) | |
404 lexer.next; | |
405 } | |
406 | |
407 require(Tok.CloseParentheses); // Remove the ")" | |
408 } | |
409 | |
410 /** | |
411 Parse either a block, or a single statement as allowed after if, while | |
412 and for. | |
413 */ | |
414 Stmt parseSingleOrCompoundStatement() | |
415 { | |
416 if (lexer.peek.type == Tok.OpenBrace) | |
417 return parseCompoundStatement(); | |
418 return parseStatement(); | |
419 } | |
420 | |
421 /** | |
422 Parses a function-body or similar, expects an opening brace to be the | |
423 current token. | |
424 | |
425 Will consume both the starting { and ending } | |
426 */ | |
427 Stmt parseCompoundStatement() | |
428 { | |
429 Token lbrace = require(Tok.OpenBrace); | |
430 SmallArray!(Stmt, 32) stmts; // Try to use the stack only | |
431 while (lexer.peek.type != Tok.CloseBrace) | |
432 stmts ~= parseStatement(); | |
433 Token rbrace = require(Tok.CloseBrace); | |
434 return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); | |
435 } | |
436 | |
437 Id parseIdentifier() | |
438 { | |
439 Token tok = lexer.next; | |
440 | |
441 if (tok.type is Tok.Identifier) | |
442 return Id(tok); | |
443 | |
444 messages.report(UnexpectedTokSingle, tok.location) | |
445 .arg(tok.getType) | |
446 .arg(Tok.Identifier); | |
447 } | |
448 | |
449 ModuleName parseModuleName() | |
450 { | |
451 auto id = parseIdentifier(); | |
452 ModuleName mod; | |
453 while (skip(Tok.Dot)) | |
454 { | |
455 mod.packages ~= id; | |
456 if (lexer.peek.type != Tok.Identifier) { | |
457 messages.report(ExpectedIdAfterPackage, lexer.peek.location); | |
458 goto Lerror; | |
459 } | |
460 id = parseIdentifier(); | |
461 } | |
462 mod.id = id; | |
463 return mod; | |
464 Lerror: | |
465 while (!skip(Tok.Seperator)) | |
466 lexer.next(); | |
467 return mod; | |
468 } | |
469 | |
470 | |
471 /** | |
472 Parse a type - this includes pointer and array(at some point) types. | |
473 */ | |
474 Id parseType() | |
475 { | |
476 Token type = lexer.next; | |
477 | |
478 Id currentType; | |
479 | |
480 if ( !(type.isBasicType || type.type == Tok.Identifier) ) | |
481 messages.report(InvalidType, type.location); | |
482 | |
483 currentType = Id(type); | |
484 type = lexer.peek; | |
485 | |
486 while(type.type == Tok.Star || type.type == Tok.OpenBracket) | |
487 { | |
488 if(type.type == Tok.Star) | |
489 { | |
490 currentType = PointerId(currentType); | |
491 lexer.next; | |
492 } | |
493 else | |
494 { | |
495 lexer.next; | |
496 if(lexer.peek.type == Tok.Integer) | |
497 currentType = ArrayId(currentType, action.actOnNumericConstant(require(Tok.Integer))); | |
498 require(Tok.CloseBracket); | |
499 | |
500 } | |
501 type = lexer.peek; | |
502 } | |
503 | |
504 return currentType; | |
505 } | |
506 | |
507 int peekParseType() | |
508 { | |
509 int i; | |
510 Token type = lexer.peek(i); | |
511 | |
512 Id currentType; | |
513 | |
514 if ( !(type.isBasicType || type.type == Tok.Identifier) ) | |
515 return 0; | |
516 | |
517 currentType = Id(type); | |
518 type = lexer.peek(++i); | |
519 | |
520 while(type.type == Tok.Star || type.type == Tok.OpenBracket) | |
521 { | |
522 if(type.type == Tok.Star) | |
523 { | |
524 i++; | |
525 } | |
526 else | |
527 { | |
528 if(lexer.peek(i++).type != Tok.OpenBracket) | |
529 return 0; | |
530 if(lexer.peek(i).type == Tok.Integer) | |
531 { | |
532 i++; | |
533 if(lexer.peek(i++).type != Tok.CloseBracket) | |
534 return 0; | |
535 } | |
536 else | |
537 if(lexer.peek(i++).type != Tok.CloseBracket) | |
538 return 0; | |
539 | |
540 } | |
541 type = lexer.peek(i); | |
542 } | |
543 | |
544 return i; | |
545 } | |
546 | |
547 private: | |
548 // -- Expression parsing -- // | |
549 Exp parsePostfixExp(Exp target) | |
550 { | |
551 switch(lexer.peek.type) | |
552 { | |
553 case Tok.Dot: | |
554 switch(lexer.peek(1).type) | |
555 { | |
556 case Tok.Identifier: | |
557 Token op = lexer.next; | |
558 Id member = Id(lexer.next); | |
559 Exp exp = action.actOnMemberReference(target, op.location, member); | |
560 return parsePostfixExp(exp); | |
561 default: | |
562 Token t = lexer.peek(1); | |
563 messages.report(ExpectedIdAfterDot, t.location); | |
564 } | |
565 case Tok.OpenBracket: | |
566 Token open = lexer.next; | |
567 Exp index = parseExpression(); | |
568 Token close = require(Tok.CloseBracket); | |
569 return action.actOnIndexEpr(target, open, index, close); | |
570 default: | |
571 return target; | |
572 } | |
573 } | |
574 | |
575 Exp parseExpression(int p = 0) | |
576 { | |
577 auto exp = P(); | |
578 Token next = lexer.peek(); | |
579 BinOp* op = null; | |
580 while ((op = binary(next.type)) != null && op.prec >= p) | |
581 { | |
582 lexer.next(); | |
583 int q = op.leftAssoc? 1 + op.prec : op.prec; | |
584 auto exp2 = parseExpression(q); | |
585 exp = action.actOnBinaryOp(next.location, op.operator, exp, exp2); | |
586 next = lexer.peek(); | |
587 } | |
588 | |
589 return exp; | |
590 } | |
591 | |
592 Exp P() | |
593 { | |
594 Token next = lexer.next(); | |
595 if (auto op = unary(next.type)) | |
596 return action.actOnUnaryOp(next, parseExpression(op.prec)); | |
597 else if (next.type == Tok.OpenParentheses) | |
598 { | |
599 auto e = parseExpression(0); | |
600 require(Tok.CloseParentheses); | |
601 return e; | |
602 } | |
603 else if (next.type == Tok.Identifier) | |
604 { | |
605 Exp value = action.actOnIdentifierExp(Id(next)); | |
606 Exp iden = parsePostfixExp(value); | |
607 switch(lexer.peek.type) | |
608 { | |
609 case Tok.OpenParentheses: | |
610 Token lp = lexer.next; | |
611 SmallArray!(Exp, 8) args; | |
612 while(lexer.peek.type != Tok.CloseParentheses) | |
613 { | |
614 if(lexer.peek.type == Tok.Comma) | |
615 lexer.next; | |
616 args ~= parseExpression(); | |
617 } | |
618 | |
619 Token rp = lexer.next(); | |
620 return action.actOnCallExpr(iden, lp, args.unsafe(), rp); | |
621 | |
622 default: | |
623 return iden; | |
624 } | |
625 } | |
626 else if (next.type == Tok.Cast) | |
627 return parseCast(next); | |
628 else if (next.type == Tok.Integer) | |
629 return action.actOnNumericConstant(next); | |
630 else if (next.type == Tok.String) | |
631 return action.actOnStringExp(next); | |
632 | |
633 messages.report(ExpectedExp, next.location) | |
634 .fatal(ExitLevel.Parser); | |
635 return null; | |
636 } | |
637 | |
638 Exp parseCast(ref Token _cast) | |
639 { | |
640 require(Tok.OpenParentheses); | |
641 auto next = lexer.next; | |
642 if(!next.isBasicType && !next.isIdentifier) | |
643 messages.report(ExpectedCastType, next.location); | |
644 | |
645 require(Tok.CloseParentheses); | |
646 auto exp = P(); | |
647 return action.actOnCastExpr(_cast, Id(next), exp); | |
648 } | |
649 | |
650 struct UnOp | |
651 { | |
652 Tok tokenType; | |
653 int prec; | |
654 } | |
655 | |
656 static const UnOp[] _unary = | |
657 [ | |
658 {Tok.Minus, 4}, | |
659 {Tok.Star, 4} | |
660 ]; | |
661 UnOp* unary(Tok t) | |
662 { | |
663 foreach (ref op; _unary) | |
664 if (op.tokenType == t) | |
665 return &op; | |
666 return null; | |
667 } | |
668 | |
669 struct BinOp | |
670 { | |
671 Tok tokenType; | |
672 int prec; | |
673 bool leftAssoc; | |
674 Operator operator; | |
675 } | |
676 | |
677 static const BinOp[] _binary = | |
678 [ | |
679 {Tok.Assign, 1, false, Operator.Assign}, | |
680 | |
681 {Tok.Eq, 2, true, Operator.Eq}, | |
682 {Tok.Ne, 2, true, Operator.Ne}, | |
683 | |
684 {Tok.Lt, 2, true, Operator.Lt}, | |
685 {Tok.Le, 2, true, Operator.Le}, | |
686 {Tok.Gt, 2, true, Operator.Gt}, | |
687 {Tok.Ge, 2, true, Operator.Ge}, | |
688 | |
689 {Tok.Plus, 3, true, Operator.Add}, | |
690 {Tok.Minus, 3, true, Operator.Sub}, | |
691 | |
692 {Tok.Star, 5, true, Operator.Mul}, | |
693 {Tok.Slash, 5, true, Operator.Div}, | |
694 {Tok.Percent, 5, true, Operator.Mod} | |
695 ]; | |
696 BinOp* binary(Tok t) | |
697 { | |
698 foreach (ref op; _binary) | |
699 if (op.tokenType == t) | |
700 return &op; | |
701 return null; | |
702 } | |
703 | |
704 private: | |
705 | |
706 Token require(Tok t) | |
707 { | |
708 if (lexer.peek().type != t) | |
709 messages.report(UnexpectedTokSingle, lexer.peek.location) | |
710 .arg(lexer.peek.getType) | |
711 .arg(t); | |
712 return lexer.next(); | |
713 } | |
714 | |
715 bool skip(Tok t) | |
716 { | |
717 if (lexer.peek().type != t) | |
718 return false; | |
719 lexer.next(); | |
720 return true; | |
721 } | |
722 | |
723 bool on_a(Tok t) | |
724 { | |
725 return lexer.peek.type == t; | |
726 } | |
727 | |
728 Lexer lexer; | |
729 SourceManager sm; | |
730 } | |
731 |