comparison src/parser/Parser.d @ 206:d3c148ca429b

Major moving of files. all src now goes into src, all docs in docs.
author Anders Johnsen <skabet@gmail.com>
date Tue, 12 Aug 2008 18:14:56 +0200
parents
children e0551773a005
comparison
equal deleted inserted replaced
205:8387cbaa85ab 206:d3c148ca429b
1 module parser.Parser;
2
3 import lexer.Lexer,
4 lexer.Token;
5
6 import parser.Action;
7
8 import basic.Message;
9
10 import basic.SmallArray,
11 basic.SourceManager;
12
13 import tango.io.Stdout,
14 Integer = tango.text.convert.Integer;
15
16 class Parser
17 {
18 Action action;
19 MessageHandler messages;
20 alias Object Exp;
21 alias Object Stmt;
22 alias Object Decl;
23 alias Object Module;
24
25 this(MessageHandler messages)
26 {
27 this.messages = messages;
28 }
29
30 Module parse(SourceManager sm, Lexer lexer, Action act)
31 {
32 this.sm = sm;
33 this.lexer = lexer;
34 this.action = act;
35
36 Module m;
37 if (lexer.peek.type == Tok.Module)
38 {
39 Token _module = lexer.next;
40 ModuleName name = parseModuleName();
41 m = action.actOnModule(_module, sm.getText(name.asRange()));
42 require(Tok.Seperator);
43 }
44 else
45 {
46 SLoc loc = lexer.peek.location;
47 m = action.actOnImplicitModule(loc, sm.getFile(loc));
48 }
49
50 while (lexer.peek.type != Tok.EOF)
51 foreach (d; parseDeclDef())
52 action.actOnModuleDecl(m, d);
53
54 return m;
55 }
56
57 private:
58 Decl[] parseDeclDef()
59 {
60 Token t = lexer.peek;
61 if (t.type == Tok.Import)
62 return parseImports();
63 else
64 return [parseDecl()];
65 }
66
67 Decl parseDecl()
68 {
69 Token t = lexer.peek;
70
71 if (t.isBasicType || t.isIdentifier)
72 {
73 Id type;
74 Id iden;
75 int len = peekParseType;
76 if(lexer.peek(len).type == Tok.Identifier && len != 0)
77 {
78 type = parseType;
79 parseDeclAfterInvalidType:
80 iden = Id(require(Tok.Identifier));
81 Token next = lexer.peek();
82 if (next.type == Tok.Seperator)
83 {
84 Token sep = lexer.next();
85 return action.actOnDeclarator(type, iden, null);
86 }
87 else if (next.type == Tok.Assign)
88 {
89 Token assign = lexer.next();
90 Exp exp = parseExpression();
91 require(Tok.Seperator);
92 return action.actOnDeclarator(type, iden, exp);
93 }
94 else if (next.type == Tok.OpenParentheses)
95 return parseFunc(type, iden);
96 else
97 messages.report(UnexpectedTok, next.location).arg(next.getType);
98 }
99 t = lexer.peek(len);
100 messages.report(InvalidDeclType, t.location)
101 .arg(sm.getText(t.asRange));
102 while(len--)
103 lexer.next;
104 while(lexer.peek.type != Tok.Identifier)
105 lexer.next;
106 type = Id(lexer.peek);
107 goto parseDeclAfterInvalidType;
108 }
109 else if (t.type == Tok.Struct)
110 {
111 Id type = Id(lexer.next);
112 Id iden = Id(require(Tok.Identifier));
113
114 return parseStruct(type, iden);
115 }
116 messages.report(UnexpectedTok, t.location)
117 .arg(t.getType)
118 .arg(Tok.Identifier)
119 .fatal(ExitLevel.Parser);
120 }
121
122 /**
123 Parse a series of imports belonging to a single import token.
124 */
125 Decl[] parseImports()
126 {
127 Token _import = require(Tok.Import);
128 SmallArray!(Decl) res;
129 void addToRes(Decl d) { res ~= d; }
130
131 bool done = false;
132 while (!done && !on_a(Tok.Seperator))
133 {
134 ModuleName mod = parseModuleName();
135 Token tok = lexer.peek;
136 switch (tok.type)
137 {
138 case Tok.Comma:
139 // import A, B.C;
140 // parse another module-name
141 lexer.next();
142 res ~= action.actOnImport(_import, mod, null);
143 break;
144 case Tok.Assign:
145 // import B = A.A;
146 // ^- must be a single identifier
147 // renamed import
148 if (mod.packages.length != 0)
149 {
150 SLoc loc = mod.packages[0].tok.location;
151 messages.report(RenameMustBeSingleIdent, loc);
152 }
153 //if (isStatic)
154 // error("Static imports cannot be renamed");
155 lexer.next();
156 Id name = mod.id;
157 mod = parseModuleName();
158 // create from mod and rename to `name`
159 res ~= action.actOnImport(_import, mod, &name);
160 break;
161 case Tok.Colon:
162 // import A : a;
163 // selective imports, potentially import A : print = a
164 lexer.next();
165 Decl d = action.actOnImport(_import, mod, null);
166 // do-while on a comma:
167 // add explicit symbol
168 do
169 {
170 Id sym = parseIdentifier();
171 Id dummy;
172 Id* name = null;
173 if (skip(Tok.Assign))
174 {
175 dummy = sym;
176 name = &dummy;
177 sym = parseIdentifier();
178 }
179 action.addSelectiveImport(d, sym, name);
180
181 } while (skip(Tok.Comma));
182 require(Tok.Seperator);
183 res ~= d;
184 return res.safe();
185 case Tok.Seperator:
186 done = true;
187 break;
188 default:
189 goto Lerror;
190 }
191 res ~= action.actOnImport(_import, mod, null);
192 }
193
194 require(Tok.Seperator);
195 return res.safe();
196 Lerror:
197 while (!on_a (Tok.Seperator))
198 lexer.next();
199 return res.safe();
200 }
201
202 /**
203 Parse struct
204 */
205 Decl parseStruct(Id type, Id iden)
206 {
207 auto decl = action.actOnDeclarator(type, iden, null);
208
209 require(Tok.OpenBrace);
210
211 while(lexer.peek.isBasicType || lexer.peek.isIdentifier)
212 {
213 auto m_decl = parseDecl();
214 action.actOnStructMember(decl, m_decl);
215 /* Id var_type = Id(lexer.next);
216 Id var_iden = Id(require(Tok.Identifier));
217 Token next = lexer.peek();
218 if (next.type == Tok.Seperator)
219 {
220 Token sep = lexer.next();
221 action.actOnStructMember(decl, var_type, var_iden, null);
222 continue;
223 }
224 else if (next.type == Tok.Assign)
225 {
226 Token assign = lexer.next();
227 Exp exp = parseExpression();
228 require(Tok.Seperator);
229 action.actOnStructMember(decl, var_type, var_iden, exp);
230 continue;
231 }
232 messages.report(UnexpectedTok, next.location).arg(next.getType);*/
233 }
234
235 require(Tok.CloseBrace);
236
237 return decl;
238 }
239
240 /**
241 Parse statements.
242
243 This is the place to attack!
244 */
245 Stmt parseStatement()
246 {
247 Token t = lexer.peek;
248
249 switch(t.type)
250 {
251 case Tok.Return:
252 Token ret = lexer.next;
253 Exp exp;
254 if (lexer.peek.type != Tok.Seperator)
255 exp = parseExpression();
256 require(Tok.Seperator);
257 return action.actOnReturnStmt(ret, exp);
258
259 /*
260 if (cond)
261 single statement | compound statement
262 [else
263 single statement | compound statement]
264 */
265 case Tok.If:
266 Token _if = lexer.next();
267
268 require(Tok.OpenParentheses);
269 Exp cond = parseExpression();
270 require(Tok.CloseParentheses);
271
272 Stmt thenB = parseSingleOrCompoundStatement();
273
274 // if there is no else part we use the if as token, to have
275 // something than can be passed along
276 Token _else = _if;
277 Stmt elseB;
278 if (lexer.peek.type == Tok.Else)
279 {
280 _else = lexer.next;
281 elseB = parseSingleOrCompoundStatement();
282 }
283
284 return action.actOnIfStmt(_if, cond, thenB, _else, elseB);
285
286 /*
287 while (cond)
288 single statement | compound statement
289 */
290 case Tok.While:
291 Token _while = lexer.next;
292 require(Tok.OpenParentheses);
293 Exp cond = parseExpression();
294 require(Tok.CloseParentheses);
295 Stmt bodyStmt = parseSingleOrCompoundStatement();
296 return action.actOnWhileStmt(_while, cond, bodyStmt);
297
298 /*
299 One of four things:
300 A declaration of a function/variable `type id ...`
301 A direct assignment `id = exp;`
302 An indirect assignment `id.id = exp`
303 Some sort of free standing expression
304
305 The assignments should be handled as binary expressions?
306 */
307 case Tok.Identifier:
308 Token iden = lexer.peek;
309 Token n = lexer.peek(1);
310 // Must be an decl, if we start with a basic type, or two
311 // identifiers in a row
312 if (iden.isBasicType() || iden.isIdentifier())
313 {
314 if ( n.type == Tok.Star || n.type == Tok.OpenBracket)
315 {
316 int len = peekParseType;
317 if(lexer.peek(len).type == Tok.Identifier && len != 0)
318 return action.actOnDeclStmt(parseVarDecl());
319
320 Exp exp = parseExpression();
321 require(Tok.Seperator);
322 return action.actOnExprStmt(exp);
323 }
324
325 if (n.isIdentifier())
326 return action.actOnDeclStmt(parseVarDecl());
327
328 // Expression: a.b, a = b, a(b) etc.
329 Exp exp = parseExpression();
330 require(Tok.Seperator);
331 return action.actOnExprStmt(exp);
332 }
333
334 case Tok.Switch:
335 messages.report(UnexpectedTok, lexer.peek.location).arg(lexer.next.getType);
336 return null;
337
338 default:
339 if (t.isBasicType())
340 goto case Tok.Identifier;
341 if (t.type == Tok.Star)
342 {
343 auto exp = parseExpression();
344 require(Tok.Seperator);
345 return action.actOnExprStmt(exp);
346 }
347 messages.report(UnexpectedBeginStmt, lexer.peek.location).arg(lexer.next.getType);
348 return null;
349 }
350 messages.report(UnexpectedTok, t.location);
351 return null;
352 }
353
354 Decl parseVarDecl()
355 {
356 // manually hardcoded to only support "type id [= exp];"
357 // as that is the only thing the codegen understands
358 Id type = parseType;
359 Id id = Id(lexer.next);
360 Exp init;
361 if (skip(Tok.Assign))
362 init = parseExpression();
363 require(Tok.Seperator);
364 Decl d = action.actOnDeclarator(type, id, init);
365 return d;
366 }
367
368 /**
369 Parses a function/method given the already parsed return type and name
370 */
371 Decl parseFunc(ref Id type, ref Id name)
372 {
373 Decl func = action.actOnStartOfFunctionDef(type, name);
374 parseFuncArgs(func);
375
376 if(lexer.peek.type == Tok.Seperator)
377 {
378 lexer.next;
379 return func;
380 }
381 Stmt stmt = parseCompoundStatement();
382
383 return action.actOnEndOfFunction(func, stmt);
384 }
385
386 /**
387 Parse the function arguments, assumes current token is (.
388
389 Both the intitial paren and the ending paren is consumed.
390 */
391 void parseFuncArgs(Decl func)
392 {
393 require(Tok.OpenParentheses); // Remove the "(" token.
394
395 while(lexer.peek.type != Tok.CloseParentheses)
396 {
397 auto t = parseType();
398 Id i;
399 if(lexer.peek.type == Tok.Identifier)
400 i = parseIdentifier();
401 action.addFuncArg(func, t, i);
402
403 if(lexer.peek.type == Tok.Comma)
404 lexer.next;
405 }
406
407 require(Tok.CloseParentheses); // Remove the ")"
408 }
409
410 /**
411 Parse either a block, or a single statement as allowed after if, while
412 and for.
413 */
414 Stmt parseSingleOrCompoundStatement()
415 {
416 if (lexer.peek.type == Tok.OpenBrace)
417 return parseCompoundStatement();
418 return parseStatement();
419 }
420
421 /**
422 Parses a function-body or similar, expects an opening brace to be the
423 current token.
424
425 Will consume both the starting { and ending }
426 */
427 Stmt parseCompoundStatement()
428 {
429 Token lbrace = require(Tok.OpenBrace);
430 SmallArray!(Stmt, 32) stmts; // Try to use the stack only
431 while (lexer.peek.type != Tok.CloseBrace)
432 stmts ~= parseStatement();
433 Token rbrace = require(Tok.CloseBrace);
434 return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe());
435 }
436
437 Id parseIdentifier()
438 {
439 Token tok = lexer.next;
440
441 if (tok.type is Tok.Identifier)
442 return Id(tok);
443
444 messages.report(UnexpectedTokSingle, tok.location)
445 .arg(tok.getType)
446 .arg(Tok.Identifier);
447 }
448
449 ModuleName parseModuleName()
450 {
451 auto id = parseIdentifier();
452 ModuleName mod;
453 while (skip(Tok.Dot))
454 {
455 mod.packages ~= id;
456 if (lexer.peek.type != Tok.Identifier) {
457 messages.report(ExpectedIdAfterPackage, lexer.peek.location);
458 goto Lerror;
459 }
460 id = parseIdentifier();
461 }
462 mod.id = id;
463 return mod;
464 Lerror:
465 while (!skip(Tok.Seperator))
466 lexer.next();
467 return mod;
468 }
469
470
471 /**
472 Parse a type - this includes pointer and array(at some point) types.
473 */
474 Id parseType()
475 {
476 Token type = lexer.next;
477
478 Id currentType;
479
480 if ( !(type.isBasicType || type.type == Tok.Identifier) )
481 messages.report(InvalidType, type.location);
482
483 currentType = Id(type);
484 type = lexer.peek;
485
486 while(type.type == Tok.Star || type.type == Tok.OpenBracket)
487 {
488 if(type.type == Tok.Star)
489 {
490 currentType = PointerId(currentType);
491 lexer.next;
492 }
493 else
494 {
495 lexer.next;
496 if(lexer.peek.type == Tok.Integer)
497 currentType = ArrayId(currentType, action.actOnNumericConstant(require(Tok.Integer)));
498 require(Tok.CloseBracket);
499
500 }
501 type = lexer.peek;
502 }
503
504 return currentType;
505 }
506
507 int peekParseType()
508 {
509 int i;
510 Token type = lexer.peek(i);
511
512 Id currentType;
513
514 if ( !(type.isBasicType || type.type == Tok.Identifier) )
515 return 0;
516
517 currentType = Id(type);
518 type = lexer.peek(++i);
519
520 while(type.type == Tok.Star || type.type == Tok.OpenBracket)
521 {
522 if(type.type == Tok.Star)
523 {
524 i++;
525 }
526 else
527 {
528 if(lexer.peek(i++).type != Tok.OpenBracket)
529 return 0;
530 if(lexer.peek(i).type == Tok.Integer)
531 {
532 i++;
533 if(lexer.peek(i++).type != Tok.CloseBracket)
534 return 0;
535 }
536 else
537 if(lexer.peek(i++).type != Tok.CloseBracket)
538 return 0;
539
540 }
541 type = lexer.peek(i);
542 }
543
544 return i;
545 }
546
547 private:
548 // -- Expression parsing -- //
549 Exp parsePostfixExp(Exp target)
550 {
551 switch(lexer.peek.type)
552 {
553 case Tok.Dot:
554 switch(lexer.peek(1).type)
555 {
556 case Tok.Identifier:
557 Token op = lexer.next;
558 Id member = Id(lexer.next);
559 Exp exp = action.actOnMemberReference(target, op.location, member);
560 return parsePostfixExp(exp);
561 default:
562 Token t = lexer.peek(1);
563 messages.report(ExpectedIdAfterDot, t.location);
564 }
565 case Tok.OpenBracket:
566 Token open = lexer.next;
567 Exp index = parseExpression();
568 Token close = require(Tok.CloseBracket);
569 return action.actOnIndexEpr(target, open, index, close);
570 default:
571 return target;
572 }
573 }
574
575 Exp parseExpression(int p = 0)
576 {
577 auto exp = P();
578 Token next = lexer.peek();
579 BinOp* op = null;
580 while ((op = binary(next.type)) != null && op.prec >= p)
581 {
582 lexer.next();
583 int q = op.leftAssoc? 1 + op.prec : op.prec;
584 auto exp2 = parseExpression(q);
585 exp = action.actOnBinaryOp(next.location, op.operator, exp, exp2);
586 next = lexer.peek();
587 }
588
589 return exp;
590 }
591
592 Exp P()
593 {
594 Token next = lexer.next();
595 if (auto op = unary(next.type))
596 return action.actOnUnaryOp(next, parseExpression(op.prec));
597 else if (next.type == Tok.OpenParentheses)
598 {
599 auto e = parseExpression(0);
600 require(Tok.CloseParentheses);
601 return e;
602 }
603 else if (next.type == Tok.Identifier)
604 {
605 Exp value = action.actOnIdentifierExp(Id(next));
606 Exp iden = parsePostfixExp(value);
607 switch(lexer.peek.type)
608 {
609 case Tok.OpenParentheses:
610 Token lp = lexer.next;
611 SmallArray!(Exp, 8) args;
612 while(lexer.peek.type != Tok.CloseParentheses)
613 {
614 if(lexer.peek.type == Tok.Comma)
615 lexer.next;
616 args ~= parseExpression();
617 }
618
619 Token rp = lexer.next();
620 return action.actOnCallExpr(iden, lp, args.unsafe(), rp);
621
622 default:
623 return iden;
624 }
625 }
626 else if (next.type == Tok.Cast)
627 return parseCast(next);
628 else if (next.type == Tok.Integer)
629 return action.actOnNumericConstant(next);
630 else if (next.type == Tok.String)
631 return action.actOnStringExp(next);
632
633 messages.report(ExpectedExp, next.location)
634 .fatal(ExitLevel.Parser);
635 return null;
636 }
637
638 Exp parseCast(ref Token _cast)
639 {
640 require(Tok.OpenParentheses);
641 auto next = lexer.next;
642 if(!next.isBasicType && !next.isIdentifier)
643 messages.report(ExpectedCastType, next.location);
644
645 require(Tok.CloseParentheses);
646 auto exp = P();
647 return action.actOnCastExpr(_cast, Id(next), exp);
648 }
649
650 struct UnOp
651 {
652 Tok tokenType;
653 int prec;
654 }
655
656 static const UnOp[] _unary =
657 [
658 {Tok.Minus, 4},
659 {Tok.Star, 4}
660 ];
661 UnOp* unary(Tok t)
662 {
663 foreach (ref op; _unary)
664 if (op.tokenType == t)
665 return &op;
666 return null;
667 }
668
669 struct BinOp
670 {
671 Tok tokenType;
672 int prec;
673 bool leftAssoc;
674 Operator operator;
675 }
676
677 static const BinOp[] _binary =
678 [
679 {Tok.Assign, 1, false, Operator.Assign},
680
681 {Tok.Eq, 2, true, Operator.Eq},
682 {Tok.Ne, 2, true, Operator.Ne},
683
684 {Tok.Lt, 2, true, Operator.Lt},
685 {Tok.Le, 2, true, Operator.Le},
686 {Tok.Gt, 2, true, Operator.Gt},
687 {Tok.Ge, 2, true, Operator.Ge},
688
689 {Tok.Plus, 3, true, Operator.Add},
690 {Tok.Minus, 3, true, Operator.Sub},
691
692 {Tok.Star, 5, true, Operator.Mul},
693 {Tok.Slash, 5, true, Operator.Div},
694 {Tok.Percent, 5, true, Operator.Mod}
695 ];
696 BinOp* binary(Tok t)
697 {
698 foreach (ref op; _binary)
699 if (op.tokenType == t)
700 return &op;
701 return null;
702 }
703
704 private:
705
706 Token require(Tok t)
707 {
708 if (lexer.peek().type != t)
709 messages.report(UnexpectedTokSingle, lexer.peek.location)
710 .arg(lexer.peek.getType)
711 .arg(t);
712 return lexer.next();
713 }
714
715 bool skip(Tok t)
716 {
717 if (lexer.peek().type != t)
718 return false;
719 lexer.next();
720 return true;
721 }
722
723 bool on_a(Tok t)
724 {
725 return lexer.peek.type == t;
726 }
727
728 Lexer lexer;
729 SourceManager sm;
730 }
731