Mercurial > projects > dang
view parser/Parser.d @ 33:084c2c147c4f new_gen
Improvements to the Error class.
* Now takes tokens instead of locations (a single loc can still be given)
* Some of the arg functions take arrays now
* Output is much better. Will print the line with all tokens given marked
author | Anders Halager <halager@gmail.com> |
---|---|
date | Sun, 20 Apr 2008 13:59:20 +0200 |
parents | 3147a52d1247 |
children | ce17bea8e9bd |
line wrap: on
line source
module parser.Parser; import lexer.Lexer, lexer.Token; import ast.Exp, ast.Stmt, ast.Decl; import misc.Error; import tango.io.Stdout, Integer = tango.text.convert.Integer; class Parser { public: Decl[] parse(Lexer lexer) { this.lexer = lexer; Decl[] declarations; while(lexer.peek.type != Tok.EOF) { declarations ~= parseRootDecl; } return declarations; } Decl parseRootDecl() { Token t = lexer.peek; switch(t.type) { case Tok.Byte, Tok.Ubyte, Tok.Short, Tok.Ushort, Tok.Int, Tok.Uint, Tok.Long, Tok.Ulong, Tok.Float, Tok.Double, Tok.Bool, Tok.Identifier: Identifier type = new Identifier(t); Token iden = lexer.peek(1); switch(iden.type) { case Tok.Identifier: Identifier identifier = new Identifier(iden); Token p = lexer.peek(2); switch(p.type) { case Tok.OpenParentheses: lexer.next; lexer.next; return parseFunc(type, identifier); case Tok.Seperator: lexer.next; lexer.next; require(Tok.Seperator); return new VarDecl(type, identifier, null); case Tok.Assign: lexer.next; lexer.next; lexer.next(); auto exp = parseExpression(); require(Tok.Seperator); return new VarDecl(type, identifier, exp); default: char[] c = p.getType; throw error(__LINE__, UnexpextedTokMulti) .tok(p) .arg(c) .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign); } break; default: char[] c = t.getType; throw error(__LINE__, UnexpextedTok).tok(iden).arg(c); } break; case Tok.Struct: lexer.next; Token iden = lexer.next; switch(iden.type) { case Tok.Identifier: Identifier identifier = new Identifier(iden); return new StructDecl (identifier, parseStruct()); default: throw error(__LINE__, "Expected struct identifier, but got %0").arg(iden.getType); } case Tok.EOF: return null; default: char[] c = t.getType; throw error(__LINE__, UnexpextedTok).tok(t).arg(c); } } Decl parseDecl() { Token t = lexer.peek; switch(t.type) { case Tok.Byte, Tok.Ubyte, Tok.Short, Tok.Ushort, Tok.Int, Tok.Uint, Tok.Long, Tok.Ulong, Tok.Float, Tok.Double, Tok.Bool, Tok.Identifier: Identifier type = new Identifier(t); Token iden = lexer.peek(1); switch(iden.type) { case Tok.Identifier: Identifier identifier = new Identifier(iden); Token p = lexer.peek(2); switch(p.type) { case Tok.OpenParentheses: lexer.next; lexer.next; return parseFunc(type, identifier); case Tok.Seperator: lexer.next; lexer.next; require(Tok.Seperator); return new VarDecl(type, identifier, null); case Tok.Assign: lexer.next; lexer.next; lexer.next(); auto exp = parseExpression(); require(Tok.Seperator); return new VarDecl(type, identifier, exp); default: char[] c = p.getType; throw error(__LINE__, UnexpextedTokMulti) .tok(p) .arg(c) .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign); } break; default: char[] c = iden.getType; throw error(__LINE__, UnexpextedTokSingle) .tok(iden) .arg(c) .arg(Tok.Identifier); } break; case Tok.EOF: return null; default: char[] c = t.getType; throw error(__LINE__, UnexpextedTok).arg(c); } } VarDecl[] parseStruct() { VarDecl[] varDecls; require(Tok.OpenBrace); while(lexer.peek.type != Tok.CloseBrace) { varDecls ~= cast(VarDecl)parseDecl; } require(Tok.CloseBrace); return varDecls; } Stmt parseStatement() { Token t = lexer.peek; switch(t.type) { case Tok.Return: lexer.next; auto ret = new ReturnStmt(); ret.exp = parseExpression(); require(Tok.Seperator); return ret; case Tok.If: lexer.next; require(Tok.OpenParentheses); auto condition = parseExpression(); require(Tok.CloseParentheses); auto then_body = parseBlockOrSingleStmt(); Stmt[] else_body; if (lexer.peek.type == Tok.Else) { lexer.next; else_body = parseBlockOrSingleStmt(); } return new IfStmt(condition, then_body, else_body); case Tok.While: lexer.next; require(Tok.OpenParentheses); auto condition = parseExpression(); require(Tok.CloseParentheses); return new WhileStmt(condition, parseBlockOrSingleStmt()); case Tok.Identifier: Token n = lexer.peek(1); switch(n.type) { case Tok.Dot: Exp iden = parseExpIdentifier(new Identifier(lexer.next)); switch(lexer.peek.type) { case Tok.Assign: lexer.next; auto stmt = new ExpStmt(new AssignExp(iden , parseExpression())); require(Tok.Seperator); return stmt; break; } case Tok.Assign: lexer.next; lexer.next; auto stmt = new ExpStmt(new AssignExp(new Identifier(t), parseExpression())); require(Tok.Seperator); return stmt; break; case Tok.Identifier: auto decl = new DeclStmt(parseDecl()); return decl; default: auto e = new ExpStmt(parseExpression()); require(Tok.Seperator); return e; } break; default: auto decl = new DeclStmt(parseDecl()); //require(Tok.Seperator); return decl; } return new Stmt(); } FuncDecl parseFunc(Identifier type, Identifier identifier) { VarDecl[] funcArgs = parseFuncArgs(); lexer.next; // Remove the "{" Stmt[] statements; while(lexer.peek.type != Tok.CloseBrace) statements ~= parseStatement(); lexer.next; // Remove "}" return new FuncDecl(type, identifier, funcArgs, statements); } VarDecl[] parseFuncArgs() { lexer.next; // Remove the "(" token. VarDecl[] funcArgs; while(lexer.peek.type != Tok.CloseParentheses) { auto t = parseType; auto i = parseIdentifier; funcArgs ~= new VarDecl(t, i); if(lexer.peek.type == Tok.Comma) lexer.next; } lexer.next; // Remove the ")" return funcArgs; } Identifier parseIdentifier() { Token identifier = lexer.next; switch(identifier.type) { case Tok.Identifier: return new Identifier(identifier); break; default: throw error(__LINE__, "Unexpexted token in Identifier parsing. Got %0") .arg(identifier.getType) .tok(identifier); } } Identifier parseType() { Token type = lexer.next; switch(type.type) { case Tok.Byte, Tok.Ubyte, Tok.Short, Tok.Ushort, Tok.Int, Tok.Uint, Tok.Long, Tok.Ulong, Tok.Float, Tok.Double, Tok.Bool, Tok.Identifier: return new Identifier(type); break; default: char[] c = type.getType; error(__LINE__, "Unexpexted token in Type parsing. Got %0").arg(c); } } // -- Expression parsing -- // private: Exp parseExpIdentifier(Exp target) { switch(lexer.peek.type) { case Tok.Dot: switch(lexer.peek(1).type) { case Tok.Identifier: lexer.next; return parseExpIdentifier( new MemberLookup(target, new Identifier(lexer.next))); default: Token t = lexer.peek(1); throw error(__LINE__, "Expected identifier after '.'", &t); } default: return target; } } Exp parseExpression(int p = 0) { auto exp = P(); Token next = lexer.peek(); BinOp* op = null; while ((op = binary(next.type)) != null && op.prec >= p) { lexer.next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); exp = new BinaryExp(op.operator, exp, exp2); next = lexer.peek(); } return exp; } Exp P() { Token next = lexer.next(); if (auto op = unary(next.type)) return new NegateExp(parseExpression(op.prec)); else if (next.type == Tok.OpenParentheses) { auto e = parseExpression(0); require(Tok.CloseParentheses); return e; } else if (next.type == Tok.Identifier) { Exp iden = parseExpIdentifier(new Identifier(next)); switch(lexer.peek.type) { case Tok.OpenParentheses: lexer.next; Exp[] args; while(lexer.peek.type != Tok.CloseParentheses) { if(lexer.peek.type == Tok.Comma) { lexer.next; } args ~= parseExpression(); } lexer.next(); return new CallExp(iden, args); default: return iden; } } else if (next.type == Tok.Integer) return new IntegerLit(next); Stdout.formatln("{}", next.getType); assert(0, "Should not happen"); } private Stmt[] parseBlockOrSingleStmt() { Stmt[] stmts; if (lexer.peek.type == Tok.OpenBrace) { lexer.next; while(lexer.peek.type != Tok.CloseBrace) stmts ~= parseStatement(); lexer.next; } else stmts ~= parseStatement(); return stmts; } struct UnOp { Tok tokenType; int prec; } static UnOp[] _unary = [{Tok.Sub, 4}]; UnOp* unary(Tok t) { foreach (ref op; _unary) if (op.tokenType == t) return &op; return null; } struct BinOp { Tok tokenType; int prec; bool leftAssoc; BinaryExp.Operator operator; } static BinOp[] _binary = [ {Tok.Eq, 2, true, BinaryExp.Operator.Eq}, {Tok.Ne, 2, true, BinaryExp.Operator.Ne}, {Tok.Lt, 2, true, BinaryExp.Operator.Lt}, {Tok.Le, 2, true, BinaryExp.Operator.Le}, {Tok.Gt, 2, true, BinaryExp.Operator.Gt}, {Tok.Ge, 2, true, BinaryExp.Operator.Ge}, {Tok.Add, 3, true, BinaryExp.Operator.Add}, {Tok.Sub, 3, true, BinaryExp.Operator.Sub}, {Tok.Mul, 5, true, BinaryExp.Operator.Mul}, {Tok.Div, 5, true, BinaryExp.Operator.Div} ]; BinOp* binary(Tok t) { foreach (ref op; _binary) if (op.tokenType == t) return &op; return null; } private: void require(Tok t) { if (lexer.peek().type != t) error(__LINE__, UnexpextedTokSingle) .arg(lexer.peek.getType) .arg(t); lexer.next(); } Error error(uint line, char[] errMsg, Token* tok = null) { Location loc; if (tok is null) loc = lexer.peek.location; else loc = tok.location; auto e = new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg); e.loc(loc); if (tok !is null) e.tok(*tok); return e; } static char[] UnexpextedTokMulti = "Unexpexted token, got %0 expected one of %1", UnexpextedTokSingle = "Unexpexted token, got %0 expected %1", UnexpextedTok = "Unexpexted token %0"; Lexer lexer; }