Mercurial > projects > dang
view parser/Parser.d @ 44:495188f9078e new_gen
Big update - Moving towards a better, more seperated parser
The parser no loner creates the AST directly, but through
callbacks(actions). This means the parser can be run with a different set
of actions that do something else.
The parser is not back to full strength yet, the main thing missing is the
various statements and structs.
Also added a SmallArray that uses the stack only until a given size is
exceeded, after which the array is copied unto the heap.
author | Anders Halager <halager@gmail.com> |
---|---|
date | Wed, 23 Apr 2008 00:57:45 +0200 |
parents | 858b9805843d |
children | 9bc660cbdbec |
line wrap: on
line source
module parser.Parser; import lexer.Lexer, lexer.Token; import parser.Action; import misc.Error; import basic.SmallArray; import tango.io.Stdout, Integer = tango.text.convert.Integer; class Parser { Action action; alias Object Exp; alias Object Stmt; alias Object Decl; public: Decl[] parse(Lexer lexer) { this.lexer = lexer; action = new AstAction; Decl[] declarations; while(lexer.peek.type != Tok.EOF) declarations ~= parseRootDecl(); return declarations; } Decl parseRootDecl() { Token t = lexer.peek; if (t.isBasicType || t.isIdentifier) { Id type = Id(lexer.next); Id iden = Id(require(Tok.Identifier)); Token next = lexer.peek(); if (next.type == Tok.Seperator) { Token sep = lexer.next(); return action.actOnDeclarator(type, iden, null); } else if (next.type == Tok.Assign) { Token assign = lexer.next(); Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnDeclarator(type, iden, exp); } else if (next.type == Tok.OpenParentheses) return parseFunc(type, iden); else throw error(__LINE__, PE.UnexpectedTok) .tok(next) .arg(next.getType); } else if (t.type == Tok.Struct) { } char[] c = t.getType; throw error(__LINE__, PE.UnexpectedTok).tok(t).arg(c); } /** Parse statements. This is the place to attack! */ Stmt parseStatement() { Token t = lexer.peek; switch(t.type) { case Tok.Return: Token ret = lexer.next; Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnReturnStmt(ret, exp); case Tok.If: return null; case Tok.While: return null; case Tok.Identifier: return null; case Tok.Switch: return null; default: return null; } error(__LINE__, "").tok(t); return null; } /** Parses a function/method given the already parsed */ Decl parseFunc(ref Id type, ref Id name) { Decl func = action.actOnStartOfFunctionDef(type, name); parseFuncArgs(func); Stmt stmt = parseCompoundStatement(); action.actOnEndOfFunction(func, stmt); return func; } /** Parse the function arguments, assumes current token is (. Both the intitial paren and the ending paren is consumed. */ void parseFuncArgs(Decl func) { require(Tok.OpenParentheses); // Remove the "(" token. while(lexer.peek.type != Tok.CloseParentheses) { auto t = parseType(); auto i = parseIdentifier(); action.addFuncArg(func, t, i); if(lexer.peek.type == Tok.Comma) lexer.next; } require(Tok.CloseParentheses); // Remove the ")" } /** Parses a function-body or similar, expects { to be current token. Will consume both the starting { and ending } */ Stmt parseCompoundStatement() { Token lbrace = require(Tok.OpenBrace); SmallArray!(Stmt, 32) stmts; // Try to use the stack only while (lexer.peek.type != Tok.CloseBrace) stmts ~= parseStatement(); Token rbrace = require(Tok.CloseBrace); return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); } Id parseIdentifier() { Token tok = lexer.next; if (tok.type is Tok.Identifier) return Id(tok); throw error(__LINE__, PE.UnexpectedTokSingle) .arg(tok.getType) .arg(Tok.Identifier) .tok(tok); } Id parseType() { Token type = lexer.next; if (type.isBasicType || type.type == Tok.Identifier) return Id(type); char[] c = type.getType; error(__LINE__, "Unexpected token in Type parsing. Got %0").arg(c); } private: // -- Expression parsing -- // Exp parseExpIdentifier(Exp target) { switch(lexer.peek.type) { case Tok.Dot: switch(lexer.peek(1).type) { case Tok.Identifier: Token op = lexer.next; Id member = Id(lexer.next); Exp exp = action.actOnMemberReference(target, op.location, member); return parseExpIdentifier(exp); default: Token t = lexer.peek(1); throw error(__LINE__, "Expected identifier after '.'").tok(t); } default: return target; } } Exp parseExpression(int p = 0) { auto exp = P(); Token next = lexer.peek(); BinOp* op = null; while ((op = binary(next.type)) != null && op.prec >= p) { lexer.next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); exp = action.actOnBinaryOp(op.operator, exp, exp2); next = lexer.peek(); } return exp; } Exp P() { Token next = lexer.next(); if (auto op = unary(next.type)) return action.actOnUnaryOp(next, parseExpression(op.prec)); else if (next.type == Tok.OpenParentheses) { auto e = parseExpression(0); require(Tok.CloseParentheses); return e; } else if (next.type == Tok.Identifier) { Exp value = action.actOnIdentifierExp(Id(next)); Exp iden = parseExpIdentifier(value); switch(lexer.peek.type) { // TODO: Function calls are parsed but ignored case Tok.OpenParentheses: lexer.next; Exp[] args; while(lexer.peek.type != Tok.CloseParentheses) { if(lexer.peek.type == Tok.Comma) { lexer.next; } args ~= parseExpression(); } lexer.next(); return null;//new CallExp(iden, args); default: return iden; } } else if (next.type == Tok.Integer) return action.actOnNumericConstant(next); Stdout.formatln("{}", next.getType); assert(0, "Should not happen"); } struct UnOp { Tok tokenType; int prec; } static const UnOp[] _unary = [{Tok.Sub, 4}]; UnOp* unary(Tok t) { foreach (ref op; _unary) if (op.tokenType == t) return &op; return null; } struct BinOp { Tok tokenType; int prec; bool leftAssoc; Operator operator; } static const BinOp[] _binary = [ {Tok.Eq, 2, true, Operator.Eq}, {Tok.Ne, 2, true, Operator.Ne}, {Tok.Lt, 2, true, Operator.Lt}, {Tok.Le, 2, true, Operator.Le}, {Tok.Gt, 2, true, Operator.Gt}, {Tok.Ge, 2, true, Operator.Ge}, {Tok.Add, 3, true, Operator.Add}, {Tok.Sub, 3, true, Operator.Sub}, {Tok.Mul, 5, true, Operator.Mul}, {Tok.Div, 5, true, Operator.Div} ]; BinOp* binary(Tok t) { foreach (ref op; _binary) if (op.tokenType == t) return &op; return null; } private: Token require(Tok t) { if (lexer.peek().type != t) throw error(__LINE__, PE.UnexpectedTokSingle) .arg(lexer.peek.getType) .arg(t); return lexer.next(); } bool skip(Tok t) { if (lexer.peek().type != t) return false; lexer.next(); return true; } Error error(uint line, char[] errMsg) { Location loc = lexer.peek.location; auto e = new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg); e.loc(loc); return e; } struct PE { static char[] UnexpectedTokMulti = "Unexpected token, got %0 expected one of %1", UnexpectedTokSingle = "Unexpected token, got %0 expected %1", UnexpectedTok = "Unexpected token %0"; static char[] CaseValueMustBeInt = "Cases can only be integer literals"; } Lexer lexer; }