Mercurial > projects > dang
view parser/Parser.d @ 80:682e20aa224f new_gen
Pointers working now - big YAY
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Fri, 02 May 2008 17:33:50 +0200 |
parents | ad956143dcdc |
children | 110c7e1c4ca2 |
line wrap: on
line source
module parser.Parser; import lexer.Lexer, lexer.Token; import parser.Action; import misc.Error; import basic.SmallArray; import tango.io.Stdout, Integer = tango.text.convert.Integer; class Parser { Action action; alias Object Exp; alias Object Stmt; alias Object Decl; public: Decl[] parse(Lexer lexer, Action act) { this.lexer = lexer; action = act; Decl[] declarations; while(lexer.peek.type != Tok.EOF) declarations ~= parseDecl(); return declarations; } Decl parseDecl() { Token t = lexer.peek; if (t.isBasicType || t.isIdentifier) { Id type = Id(lexer.next); Id iden = Id(require(Tok.Identifier)); Token next = lexer.peek(); if (next.type == Tok.Seperator) { Token sep = lexer.next(); return action.actOnDeclarator(type, iden, null); } else if (next.type == Tok.Assign) { Token assign = lexer.next(); Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnDeclarator(type, iden, exp); } else if (next.type == Tok.OpenParentheses) return parseFunc(type, iden); else throw error(__LINE__, PE.UnexpectedTok) .tok(next) .arg(next.getType); } else if (t.type == Tok.Struct) { Id type = Id(lexer.next); Id iden = Id(require(Tok.Identifier)); return parseStruct(type, iden); } char[] c = t.getType; throw error(__LINE__, PE.UnexpectedTok).tok(t).arg(c); } /** Parse struct */ Decl parseStruct(Id type, Id iden) { auto decl = action.actOnDeclarator(type, iden, null); require(Tok.OpenBrace); while(lexer.peek.isBasicType || lexer.peek.isIdentifier) { Id var_type = Id(lexer.next); Id var_iden = Id(require(Tok.Identifier)); Token next = lexer.peek(); if (next.type == Tok.Seperator) { Token sep = lexer.next(); action.actOnStructMember(decl, var_type, var_iden, null); continue; } else if (next.type == Tok.Assign) { Token assign = lexer.next(); Exp exp = parseExpression(); require(Tok.Seperator); action.actOnStructMember(decl, var_type, var_iden, exp); continue; } throw error(__LINE__, PE.UnexpectedTok) .tok(next).arg(next.getType); } require(Tok.CloseBrace); return decl; } /** Parse statements. This is the place to attack! */ Stmt parseStatement() { Token t = lexer.peek; switch(t.type) { case Tok.Return: Token ret = lexer.next; Exp exp; if (lexer.peek.type != Tok.Seperator) exp = parseExpression(); require(Tok.Seperator); return action.actOnReturnStmt(ret, exp); /* if (cond) single statement | compound statement [else single statement | compound statement] */ case Tok.If: Token _if = lexer.next(); require(Tok.OpenParentheses); Exp cond = parseExpression(); require(Tok.CloseParentheses); Stmt thenB = parseSingleOrCompoundStatement(); // if there is no else part we use the if as token, to have // something than can be passed along Token _else = _if; Stmt elseB; if (lexer.peek.type == Tok.Else) { _else = lexer.next; elseB = parseSingleOrCompoundStatement(); } return action.actOnIfStmt(_if, cond, thenB, _else, elseB); /* while (cond) single statement | compound statement */ case Tok.While: Token _while = lexer.next; require(Tok.OpenParentheses); Exp cond = parseExpression(); require(Tok.CloseParentheses); Stmt bodyStmt = parseSingleOrCompoundStatement(); return action.actOnWhileStmt(_while, cond, bodyStmt); /* One of four things: A declaration of a function/variable `type id ...` A direct assignment `id = exp;` An indirect assignment `id.id = exp` Some sort of free standing expression The assignments should be handled as binary expressions? */ case Tok.Identifier: Token iden = lexer.peek; Token n = lexer.peek(1); // Must be an decl, if we start with a basic type, or two // identifiers in a row if (iden.isBasicType() || iden.isIdentifier()) { if ( n.type == Tok.Star) return action.actOnDeclStmt(parseVarDecl()); if ( n.isIdentifier()) return action.actOnDeclStmt(parseVarDecl()); // Expression: a.b, a = b, a(b) etc. Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); break; } case Tok.Switch: throw error(__LINE__, ":(").tok(lexer.peek); return null; default: if (t.isBasicType()) goto case Tok.Identifier; if (t.type == Tok.Star) { auto exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); } throw error(__LINE__, "Unexpexted begining of statement.").tok(lexer.peek); } throw error(__LINE__, "").tok(t); return null; } Decl parseVarDecl() { // manually hardcoded to only support "type id [= exp];" // as that is the only thing the codegen understands Id type = parseType; Id id = Id(lexer.next); Exp init; if (skip(Tok.Assign)) init = parseExpression(); require(Tok.Seperator); Decl d = action.actOnDeclarator(type, id, init); return d; } /** Parses a function/method given the already parsed return type and name */ Decl parseFunc(ref Id type, ref Id name) { Decl func = action.actOnStartOfFunctionDef(type, name); parseFuncArgs(func); Stmt stmt = parseCompoundStatement(); return action.actOnEndOfFunction(func, stmt); } /** Parse the function arguments, assumes current token is (. Both the intitial paren and the ending paren is consumed. */ void parseFuncArgs(Decl func) { require(Tok.OpenParentheses); // Remove the "(" token. while(lexer.peek.type != Tok.CloseParentheses) { auto t = parseType(); auto i = parseIdentifier(); action.addFuncArg(func, t, i); if(lexer.peek.type == Tok.Comma) lexer.next; } require(Tok.CloseParentheses); // Remove the ")" } /** Parse either a block, or a single statement as allowed after if, while and for. */ Stmt parseSingleOrCompoundStatement() { if (lexer.peek.type == Tok.OpenBrace) return parseCompoundStatement(); return parseStatement(); } /** Parses a function-body or similar, expects { to be current token. Will consume both the starting { and ending } */ Stmt parseCompoundStatement() { Token lbrace = require(Tok.OpenBrace); SmallArray!(Stmt, 32) stmts; // Try to use the stack only while (lexer.peek.type != Tok.CloseBrace) stmts ~= parseStatement(); Token rbrace = require(Tok.CloseBrace); return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); } Id parseIdentifier() { Token tok = lexer.next; if (tok.type is Tok.Identifier) return Id(tok); throw error(__LINE__, PE.UnexpectedTokSingle) .arg(tok.getType) .arg(Tok.Identifier) .tok(tok); } /** Parse a type - this includes pointer and array(at some point) types. */ Id parseType() { Token type = lexer.next; Id currentType; if ( !(type.isBasicType || type.type == Tok.Identifier) ) throw error(__LINE__, "Unexpected token in Type parsing. Got %0") .arg(type.getType) .tok(type); currentType = Id(type); type = lexer.peek; while(type.type == Tok.Star) { currentType = PointerId(currentType); lexer.next; type = lexer.peek; } return currentType; } private: // -- Expression parsing -- // Exp parseExpIdentifier(Exp target) { switch(lexer.peek.type) { case Tok.Dot: switch(lexer.peek(1).type) { case Tok.Identifier: Token op = lexer.next; Id member = Id(lexer.next); Exp exp = action.actOnMemberReference(target, op.location, member); return parseExpIdentifier(exp); default: Token t = lexer.peek(1); throw error(__LINE__, "Expected identifier after '.'").tok(t); } default: return target; } } Exp parseExpression(int p = 0) { auto exp = P(); Token next = lexer.peek(); BinOp* op = null; while ((op = binary(next.type)) != null && op.prec >= p) { lexer.next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); exp = action.actOnBinaryOp(op.operator, exp, exp2); next = lexer.peek(); } return exp; } Exp P() { Token next = lexer.next(); if (auto op = unary(next.type)) return action.actOnUnaryOp(next, parseExpression(op.prec)); else if (next.type == Tok.OpenParentheses) { auto e = parseExpression(0); require(Tok.CloseParentheses); return e; } else if (next.type == Tok.Identifier) { Exp value = action.actOnIdentifierExp(Id(next)); Exp iden = parseExpIdentifier(value); switch(lexer.peek.type) { case Tok.OpenParentheses: Token lp = lexer.next; SmallArray!(Exp, 8) args; while(lexer.peek.type != Tok.CloseParentheses) { if(lexer.peek.type == Tok.Comma) lexer.next; args ~= parseExpression(); } Token rp = lexer.next(); return action.actOnCallExpr(iden, lp, args.unsafe(), rp); default: return iden; } } else if (next.type == Tok.Cast) return parseCast(); else if (next.type == Tok.Integer) return action.actOnNumericConstant(next); throw error(__LINE__, "Expected expression, not '"~next.getType~"'").tok(next); assert(0, "Should not happen"); } Exp parseCast() { require(Tok.OpenParentheses); auto next = lexer.next; if(!next.isBasicType && !next.isIdentifier) throw error(__LINE__, "Expected cast type, not "~next.get).tok(next); require(Tok.CloseParentheses); auto exp = P(); return action.actOnCastExpr(Id(next), exp); } struct UnOp { Tok tokenType; int prec; } static const UnOp[] _unary = [ {Tok.Minus, 4}, {Tok.Star, 4} ]; UnOp* unary(Tok t) { foreach (ref op; _unary) if (op.tokenType == t) return &op; return null; } struct BinOp { Tok tokenType; int prec; bool leftAssoc; Operator operator; } static const BinOp[] _binary = [ {Tok.Assign, 1, false, Operator.Assign}, {Tok.Eq, 2, true, Operator.Eq}, {Tok.Ne, 2, true, Operator.Ne}, {Tok.Lt, 2, true, Operator.Lt}, {Tok.Le, 2, true, Operator.Le}, {Tok.Gt, 2, true, Operator.Gt}, {Tok.Ge, 2, true, Operator.Ge}, {Tok.Plus, 3, true, Operator.Add}, {Tok.Minus, 3, true, Operator.Sub}, {Tok.Star, 5, true, Operator.Mul}, {Tok.Slash, 5, true, Operator.Div}, {Tok.Percent, 5, true, Operator.Mod} ]; BinOp* binary(Tok t) { foreach (ref op; _binary) if (op.tokenType == t) return &op; return null; } private: Token require(Tok t) { if (lexer.peek().type != t) throw error(__LINE__, PE.UnexpectedTokSingle) .arg(lexer.peek.getType) .arg(t) .tok(lexer.peek); return lexer.next(); } bool skip(Tok t) { if (lexer.peek().type != t) return false; lexer.next(); return true; } Error error(uint line, char[] errMsg) { Location loc = lexer.peek.location; auto e = new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg); e.loc(loc); return e; } struct PE { static char[] UnexpectedTokMulti = "Unexpected token, got %0 expected one of %1", UnexpectedTokSingle = "Unexpected token, got %0 expected %1", UnexpectedTok = "Unexpected token %0"; static char[] CaseValueMustBeInt = "Cases can only be integer literals"; } Lexer lexer; }