Mercurial > projects > dang
view parser/Parser.d @ 37:858b9805843d new_gen
Bug-fixes
Void can now be used and is recognized as a keyword by lexer
Fixed a problem with casting on pointer types
The expression is now optional for a ReturnStmt (only legal in void funcs)
author | Anders Halager <halager@gmail.com> |
---|---|
date | Sun, 20 Apr 2008 23:53:05 +0200 |
parents | ce17bea8e9bd |
children | 495188f9078e |
line wrap: on
line source
module parser.Parser; import lexer.Lexer, lexer.Token; import ast.Exp, ast.Stmt, ast.Decl; import misc.Error; import tango.io.Stdout, Integer = tango.text.convert.Integer; class Parser { public: Decl[] parse(Lexer lexer) { this.lexer = lexer; Decl[] declarations; while(lexer.peek.type != Tok.EOF) { declarations ~= parseRootDecl; } return declarations; } Decl parseRootDecl() { Token t = lexer.peek; switch(t.type) { case Tok.Byte, Tok.Ubyte, Tok.Short, Tok.Ushort, Tok.Int, Tok.Uint, Tok.Long, Tok.Ulong, Tok.Float, Tok.Double, Tok.Bool, Tok.Void, Tok.Identifier: Identifier type = new Identifier(t); Token iden = lexer.peek(1); switch(iden.type) { case Tok.Identifier: Identifier identifier = new Identifier(iden); Token p = lexer.peek(2); switch(p.type) { case Tok.OpenParentheses: lexer.next; lexer.next; return parseFunc(type, identifier); case Tok.Seperator: lexer.next; lexer.next; require(Tok.Seperator); return new VarDecl(type, identifier, null); case Tok.Assign: lexer.next; lexer.next; lexer.next(); auto exp = parseExpression(); require(Tok.Seperator); return new VarDecl(type, identifier, exp); default: char[] c = p.getType; throw error(__LINE__, UnexpectedTokMulti) .tok(p) .arg(c) .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign); } break; default: char[] c = t.getType; throw error(__LINE__, UnexpectedTok).tok(iden).arg(c); } break; case Tok.Struct: lexer.next; Token iden = lexer.next; switch(iden.type) { case Tok.Identifier: Identifier identifier = new Identifier(iden); return new StructDecl (identifier, parseStruct()); default: throw error(__LINE__, "Expected struct identifier, but got %0").arg(iden.getType); } case Tok.EOF: return null; default: char[] c = t.getType; throw error(__LINE__, UnexpectedTok).tok(t).arg(c); } } Decl parseDecl() { Token t = lexer.peek; switch(t.type) { case Tok.Byte, Tok.Ubyte, Tok.Short, Tok.Ushort, Tok.Int, Tok.Uint, Tok.Long, Tok.Ulong, Tok.Float, Tok.Double, Tok.Bool, Tok.Void, Tok.Identifier: Identifier type = new Identifier(t); Token iden = lexer.peek(1); switch(iden.type) { case Tok.Identifier: Identifier identifier = new Identifier(iden); Token p = lexer.peek(2); switch(p.type) { case Tok.OpenParentheses: lexer.next; lexer.next; return parseFunc(type, identifier); case Tok.Seperator: lexer.next; lexer.next; require(Tok.Seperator); return new VarDecl(type, identifier, null); case Tok.Assign: lexer.next; lexer.next; lexer.next(); auto exp = parseExpression(); require(Tok.Seperator); return new VarDecl(type, identifier, exp); default: char[] c = p.getType; throw error(__LINE__, UnexpectedTokMulti) .tok(p) .arg(c) .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign); } break; default: char[] c = iden.getType; throw error(__LINE__, UnexpectedTokSingle) .tok(iden) .arg(c) .arg(Tok.Identifier); } break; case Tok.EOF: return null; default: char[] c = t.getType; throw error(__LINE__, UnexpectedTok).arg(c); } } VarDecl[] parseStruct() { VarDecl[] varDecls; require(Tok.OpenBrace); while(lexer.peek.type != Tok.CloseBrace) { varDecls ~= cast(VarDecl)parseDecl; } require(Tok.CloseBrace); return varDecls; } Stmt parseStatement() { Token t = lexer.peek; switch(t.type) { case Tok.Return: lexer.next; auto ret = new ReturnStmt(); if (!skip(Tok.Seperator)) { ret.exp = parseExpression(); require(Tok.Seperator); } return ret; case Tok.If: lexer.next; require(Tok.OpenParentheses); auto condition = parseExpression(); require(Tok.CloseParentheses); auto then_body = parseBlockOrSingleStmt(); Stmt[] else_body; if (lexer.peek.type == Tok.Else) { lexer.next; else_body = parseBlockOrSingleStmt(); } return new IfStmt(condition, then_body, else_body); case Tok.While: lexer.next; require(Tok.OpenParentheses); auto condition = parseExpression(); require(Tok.CloseParentheses); return new WhileStmt(condition, parseBlockOrSingleStmt()); case Tok.Identifier: Token n = lexer.peek(1); switch(n.type) { case Tok.Dot: Exp iden = parseExpIdentifier(new Identifier(lexer.next)); switch(lexer.peek.type) { case Tok.Assign: lexer.next; auto stmt = new ExpStmt(new AssignExp(iden , parseExpression())); require(Tok.Seperator); return stmt; break; } case Tok.Assign: lexer.next; lexer.next; auto stmt = new ExpStmt(new AssignExp(new Identifier(t), parseExpression())); require(Tok.Seperator); return stmt; break; case Tok.Identifier: auto decl = new DeclStmt(parseDecl()); return decl; default: auto e = new ExpStmt(parseExpression()); require(Tok.Seperator); return e; } break; case Tok.Switch: lexer.next; require(Tok.OpenParentheses); auto target = parseExpression(); auto res = new SwitchStmt(target); require(Tok.CloseParentheses); require(Tok.OpenBrace); while (true) { Stmt[] statements; if (skip(Tok.Default)) { require(Tok.Colon); statements.length = 0; while (lexer.peek.type != Tok.Case && lexer.peek.type != Tok.Default && lexer.peek.type != Tok.CloseBrace) statements ~= parseStatement(); res.setDefault(statements); continue; } Token _case = lexer.peek; if (_case.type != Tok.Case) break; lexer.next(); IntegerLit[] literals; do { Exp e = parseExpression(); IntegerLit lit = cast(IntegerLit)e; if (lit is null) throw error(__LINE__, CaseValueMustBeInt) .tok(_case); literals ~= lit; } while (skip(Tok.Comma)); require(Tok.Colon); while (lexer.peek.type != Tok.Case && lexer.peek.type != Tok.Default && lexer.peek.type != Tok.CloseBrace) statements ~= parseStatement(); res.addCase(literals, statements); if (lexer.peek.type == Tok.CloseBrace) break; } require(Tok.CloseBrace); return res; default: auto decl = new DeclStmt(parseDecl()); //require(Tok.Seperator); return decl; } return new Stmt(); } FuncDecl parseFunc(Identifier type, Identifier identifier) { VarDecl[] funcArgs = parseFuncArgs(); lexer.next; // Remove the "{" Stmt[] statements; while(lexer.peek.type != Tok.CloseBrace) statements ~= parseStatement(); lexer.next; // Remove "}" return new FuncDecl(type, identifier, funcArgs, statements); } VarDecl[] parseFuncArgs() { lexer.next; // Remove the "(" token. VarDecl[] funcArgs; while(lexer.peek.type != Tok.CloseParentheses) { auto t = parseType; auto i = parseIdentifier; funcArgs ~= new VarDecl(t, i); if(lexer.peek.type == Tok.Comma) lexer.next; } lexer.next; // Remove the ")" return funcArgs; } Identifier parseIdentifier() { Token identifier = lexer.next; switch(identifier.type) { case Tok.Identifier: return new Identifier(identifier); break; default: throw error(__LINE__, "Unexpected token in Identifier parsing. Got %0") .arg(identifier.getType) .tok(identifier); } } Identifier parseType() { Token type = lexer.next; switch(type.type) { case Tok.Byte, Tok.Ubyte, Tok.Short, Tok.Ushort, Tok.Int, Tok.Uint, Tok.Long, Tok.Ulong, Tok.Float, Tok.Double, Tok.Bool, Tok.Void, Tok.Identifier: return new Identifier(type); break; default: char[] c = type.getType; error(__LINE__, "Unexpected token in Type parsing. Got %0").arg(c); } } // -- Expression parsing -- // private: Exp parseExpIdentifier(Exp target) { switch(lexer.peek.type) { case Tok.Dot: switch(lexer.peek(1).type) { case Tok.Identifier: lexer.next; return parseExpIdentifier( new MemberLookup(target, new Identifier(lexer.next))); default: Token t = lexer.peek(1); throw error(__LINE__, "Expected identifier after '.'", &t); } default: return target; } } Exp parseExpression(int p = 0) { auto exp = P(); Token next = lexer.peek(); BinOp* op = null; while ((op = binary(next.type)) != null && op.prec >= p) { lexer.next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); exp = new BinaryExp(op.operator, exp, exp2); next = lexer.peek(); } return exp; } Exp P() { Token next = lexer.next(); if (auto op = unary(next.type)) return new NegateExp(parseExpression(op.prec)); else if (next.type == Tok.OpenParentheses) { auto e = parseExpression(0); require(Tok.CloseParentheses); return e; } else if (next.type == Tok.Identifier) { Exp iden = parseExpIdentifier(new Identifier(next)); switch(lexer.peek.type) { case Tok.OpenParentheses: lexer.next; Exp[] args; while(lexer.peek.type != Tok.CloseParentheses) { if(lexer.peek.type == Tok.Comma) { lexer.next; } args ~= parseExpression(); } lexer.next(); return new CallExp(iden, args); default: return iden; } } else if (next.type == Tok.Integer) return new IntegerLit(next); Stdout.formatln("{}", next.getType); assert(0, "Should not happen"); } private Stmt[] parseBlockOrSingleStmt() { Stmt[] stmts; if (lexer.peek.type == Tok.OpenBrace) { lexer.next; while(lexer.peek.type != Tok.CloseBrace) stmts ~= parseStatement(); lexer.next; } else stmts ~= parseStatement(); return stmts; } struct UnOp { Tok tokenType; int prec; } static UnOp[] _unary = [{Tok.Sub, 4}]; UnOp* unary(Tok t) { foreach (ref op; _unary) if (op.tokenType == t) return &op; return null; } struct BinOp { Tok tokenType; int prec; bool leftAssoc; BinaryExp.Operator operator; } static BinOp[] _binary = [ {Tok.Eq, 2, true, BinaryExp.Operator.Eq}, {Tok.Ne, 2, true, BinaryExp.Operator.Ne}, {Tok.Lt, 2, true, BinaryExp.Operator.Lt}, {Tok.Le, 2, true, BinaryExp.Operator.Le}, {Tok.Gt, 2, true, BinaryExp.Operator.Gt}, {Tok.Ge, 2, true, BinaryExp.Operator.Ge}, {Tok.Add, 3, true, BinaryExp.Operator.Add}, {Tok.Sub, 3, true, BinaryExp.Operator.Sub}, {Tok.Mul, 5, true, BinaryExp.Operator.Mul}, {Tok.Div, 5, true, BinaryExp.Operator.Div} ]; BinOp* binary(Tok t) { foreach (ref op; _binary) if (op.tokenType == t) return &op; return null; } private: Token require(Tok t) { if (lexer.peek().type != t) throw error(__LINE__, UnexpectedTokSingle) .arg(lexer.peek.getType) .arg(t); return lexer.next(); } bool skip(Tok t) { if (lexer.peek().type != t) return false; lexer.next(); return true; } Error error(uint line, char[] errMsg, Token* tok = null) { Location loc; if (tok is null) loc = lexer.peek.location; else loc = tok.location; auto e = new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg); e.loc(loc); if (tok !is null) e.tok(*tok); return e; } static char[] UnexpectedTokMulti = "Unexpected token, got %0 expected one of %1", UnexpectedTokSingle = "Unexpected token, got %0 expected %1", UnexpectedTok = "Unexpected token %0"; static char[] CaseValueMustBeInt = "Cases can only be integer literals"; Lexer lexer; }