Mercurial > projects > dang
diff parser/Parser.d @ 44:495188f9078e new_gen
Big update - Moving towards a better, more seperated parser
The parser no loner creates the AST directly, but through
callbacks(actions). This means the parser can be run with a different set
of actions that do something else.
The parser is not back to full strength yet, the main thing missing is the
various statements and structs.
Also added a SmallArray that uses the stack only until a given size is
exceeded, after which the array is copied unto the heap.
author | Anders Halager <halager@gmail.com> |
---|---|
date | Wed, 23 Apr 2008 00:57:45 +0200 |
parents | 858b9805843d |
children | 9bc660cbdbec |
line wrap: on
line diff
--- a/parser/Parser.d Tue Apr 22 22:31:39 2008 +0200 +++ b/parser/Parser.d Wed Apr 23 00:57:45 2008 +0200 @@ -3,30 +3,33 @@ import lexer.Lexer, lexer.Token; -import ast.Exp, - ast.Stmt, - ast.Decl; +import parser.Action; import misc.Error; +import basic.SmallArray; + import tango.io.Stdout, Integer = tango.text.convert.Integer; class Parser { + Action action; + alias Object Exp; + alias Object Stmt; + alias Object Decl; public: Decl[] parse(Lexer lexer) { this.lexer = lexer; + action = new AstAction; Decl[] declarations; while(lexer.peek.type != Tok.EOF) - { - declarations ~= parseRootDecl; - } + declarations ~= parseRootDecl(); return declarations; } @@ -35,147 +38,42 @@ { Token t = lexer.peek; - switch(t.type) + if (t.isBasicType || t.isIdentifier) { - case Tok.Byte, Tok.Ubyte, - Tok.Short, Tok.Ushort, - Tok.Int, Tok.Uint, - Tok.Long, Tok.Ulong, - Tok.Float, Tok.Double, - Tok.Bool, - Tok.Void, - Tok.Identifier: - Identifier type = new Identifier(t); - - Token iden = lexer.peek(1); - - switch(iden.type) - { - case Tok.Identifier: - Identifier identifier = new Identifier(iden); - Token p = lexer.peek(2); - switch(p.type) - { - case Tok.OpenParentheses: - lexer.next; lexer.next; - return parseFunc(type, identifier); - case Tok.Seperator: - lexer.next; lexer.next; - require(Tok.Seperator); - return new VarDecl(type, identifier, null); - case Tok.Assign: - lexer.next; lexer.next; - lexer.next(); - auto exp = parseExpression(); - require(Tok.Seperator); - return new VarDecl(type, identifier, exp); - default: - char[] c = p.getType; - throw error(__LINE__, UnexpectedTokMulti) - .tok(p) - .arg(c) - .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign); - } - break; - default: - char[] c = t.getType; - throw error(__LINE__, UnexpectedTok).tok(iden).arg(c); - } - break; - case Tok.Struct: - lexer.next; - Token iden = lexer.next; - switch(iden.type) - { - case Tok.Identifier: - Identifier identifier = new Identifier(iden); - return new StructDecl (identifier, parseStruct()); - default: - throw error(__LINE__, "Expected struct identifier, but got %0").arg(iden.getType); - } - case Tok.EOF: - return null; - default: - char[] c = t.getType; - throw error(__LINE__, UnexpectedTok).tok(t).arg(c); + Id type = Id(lexer.next); + Id iden = Id(require(Tok.Identifier)); + Token next = lexer.peek(); + if (next.type == Tok.Seperator) + { + Token sep = lexer.next(); + return action.actOnDeclarator(type, iden, null); + } + else if (next.type == Tok.Assign) + { + Token assign = lexer.next(); + Exp exp = parseExpression(); + require(Tok.Seperator); + return action.actOnDeclarator(type, iden, exp); + } + else if (next.type == Tok.OpenParentheses) + return parseFunc(type, iden); + else + throw error(__LINE__, PE.UnexpectedTok) + .tok(next) + .arg(next.getType); } + else if (t.type == Tok.Struct) + { + } + char[] c = t.getType; + throw error(__LINE__, PE.UnexpectedTok).tok(t).arg(c); } - Decl parseDecl() - { - Token t = lexer.peek; - - switch(t.type) - { - case Tok.Byte, Tok.Ubyte, - Tok.Short, Tok.Ushort, - Tok.Int, Tok.Uint, - Tok.Long, Tok.Ulong, - Tok.Float, Tok.Double, - Tok.Bool, - Tok.Void, - Tok.Identifier: - Identifier type = new Identifier(t); - - Token iden = lexer.peek(1); + /** + Parse statements. - switch(iden.type) - { - case Tok.Identifier: - Identifier identifier = new Identifier(iden); - Token p = lexer.peek(2); - switch(p.type) - { - case Tok.OpenParentheses: - lexer.next; lexer.next; - return parseFunc(type, identifier); - case Tok.Seperator: - lexer.next; lexer.next; - require(Tok.Seperator); - return new VarDecl(type, identifier, null); - case Tok.Assign: - lexer.next; lexer.next; - lexer.next(); - auto exp = parseExpression(); - require(Tok.Seperator); - return new VarDecl(type, identifier, exp); - default: - char[] c = p.getType; - throw error(__LINE__, UnexpectedTokMulti) - .tok(p) - .arg(c) - .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign); - } - break; - default: - char[] c = iden.getType; - throw error(__LINE__, UnexpectedTokSingle) - .tok(iden) - .arg(c) - .arg(Tok.Identifier); - } - break; - case Tok.EOF: - return null; - default: - char[] c = t.getType; - throw error(__LINE__, UnexpectedTok).arg(c); - } - } - - VarDecl[] parseStruct() - { - VarDecl[] varDecls; - require(Tok.OpenBrace); - while(lexer.peek.type != Tok.CloseBrace) - { - varDecls ~= cast(VarDecl)parseDecl; - } - - require(Tok.CloseBrace); - return varDecls; - } - + This is the place to attack! + */ Stmt parseStatement() { Token t = lexer.peek; @@ -183,212 +81,107 @@ switch(t.type) { case Tok.Return: - lexer.next; - auto ret = new ReturnStmt(); - if (!skip(Tok.Seperator)) - { - ret.exp = parseExpression(); - require(Tok.Seperator); - } - return ret; + Token ret = lexer.next; + Exp exp = parseExpression(); + require(Tok.Seperator); + return action.actOnReturnStmt(ret, exp); case Tok.If: - lexer.next; - require(Tok.OpenParentheses); - auto condition = parseExpression(); - require(Tok.CloseParentheses); - - auto then_body = parseBlockOrSingleStmt(); - - Stmt[] else_body; - if (lexer.peek.type == Tok.Else) - { - lexer.next; - else_body = parseBlockOrSingleStmt(); - } - - return new IfStmt(condition, then_body, else_body); + return null; case Tok.While: - lexer.next; - require(Tok.OpenParentheses); - auto condition = parseExpression(); - require(Tok.CloseParentheses); - return new WhileStmt(condition, parseBlockOrSingleStmt()); + return null; case Tok.Identifier: - Token n = lexer.peek(1); - switch(n.type) - { - case Tok.Dot: - Exp iden = parseExpIdentifier(new Identifier(lexer.next)); - switch(lexer.peek.type) - { - case Tok.Assign: - lexer.next; - auto stmt = new ExpStmt(new AssignExp(iden , parseExpression())); - require(Tok.Seperator); - return stmt; - break; - } - case Tok.Assign: - lexer.next; - lexer.next; - auto stmt = new ExpStmt(new AssignExp(new Identifier(t), parseExpression())); - require(Tok.Seperator); - return stmt; - break; - case Tok.Identifier: - auto decl = new DeclStmt(parseDecl()); - return decl; - - default: - auto e = new ExpStmt(parseExpression()); - require(Tok.Seperator); - return e; - - } - break; + return null; case Tok.Switch: - lexer.next; - require(Tok.OpenParentheses); - auto target = parseExpression(); - auto res = new SwitchStmt(target); - require(Tok.CloseParentheses); - require(Tok.OpenBrace); - while (true) - { - Stmt[] statements; - if (skip(Tok.Default)) - { - require(Tok.Colon); - statements.length = 0; - while (lexer.peek.type != Tok.Case - && lexer.peek.type != Tok.Default - && lexer.peek.type != Tok.CloseBrace) - statements ~= parseStatement(); - res.setDefault(statements); - continue; - } - - Token _case = lexer.peek; - if (_case.type != Tok.Case) - break; - lexer.next(); - - IntegerLit[] literals; - do - { - Exp e = parseExpression(); - IntegerLit lit = cast(IntegerLit)e; - if (lit is null) - throw error(__LINE__, CaseValueMustBeInt) - .tok(_case); - - literals ~= lit; - } - while (skip(Tok.Comma)); - require(Tok.Colon); - - while (lexer.peek.type != Tok.Case - && lexer.peek.type != Tok.Default - && lexer.peek.type != Tok.CloseBrace) - statements ~= parseStatement(); - - res.addCase(literals, statements); - - if (lexer.peek.type == Tok.CloseBrace) - break; - } - require(Tok.CloseBrace); - return res; + return null; default: - auto decl = new DeclStmt(parseDecl()); - //require(Tok.Seperator); - return decl; + return null; } - return new Stmt(); + error(__LINE__, "").tok(t); + return null; } - FuncDecl parseFunc(Identifier type, Identifier identifier) + /** + Parses a function/method given the already parsed + */ + Decl parseFunc(ref Id type, ref Id name) { - VarDecl[] funcArgs = parseFuncArgs(); - - lexer.next; // Remove the "{" + Decl func = action.actOnStartOfFunctionDef(type, name); + parseFuncArgs(func); - Stmt[] statements; + Stmt stmt = parseCompoundStatement(); - while(lexer.peek.type != Tok.CloseBrace) - statements ~= parseStatement(); - - lexer.next; // Remove "}" - - return new FuncDecl(type, identifier, funcArgs, statements); + action.actOnEndOfFunction(func, stmt); + return func; } - VarDecl[] parseFuncArgs() + /** + Parse the function arguments, assumes current token is (. + + Both the intitial paren and the ending paren is consumed. + */ + void parseFuncArgs(Decl func) { - lexer.next; // Remove the "(" token. - - VarDecl[] funcArgs; + require(Tok.OpenParentheses); // Remove the "(" token. while(lexer.peek.type != Tok.CloseParentheses) { - auto t = parseType; - auto i = parseIdentifier; - funcArgs ~= new VarDecl(t, i); + auto t = parseType(); + auto i = parseIdentifier(); + action.addFuncArg(func, t, i); if(lexer.peek.type == Tok.Comma) lexer.next; } - lexer.next; // Remove the ")" - - return funcArgs; + require(Tok.CloseParentheses); // Remove the ")" } - Identifier parseIdentifier() + /** + Parses a function-body or similar, expects { to be current token. + + Will consume both the starting { and ending } + */ + Stmt parseCompoundStatement() { - Token identifier = lexer.next; - - switch(identifier.type) - { - case Tok.Identifier: - return new Identifier(identifier); - break; - default: - throw error(__LINE__, "Unexpected token in Identifier parsing. Got %0") - .arg(identifier.getType) - .tok(identifier); - } + Token lbrace = require(Tok.OpenBrace); + SmallArray!(Stmt, 32) stmts; // Try to use the stack only + while (lexer.peek.type != Tok.CloseBrace) + stmts ~= parseStatement(); + Token rbrace = require(Tok.CloseBrace); + return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); } - Identifier parseType() + Id parseIdentifier() + { + Token tok = lexer.next; + + if (tok.type is Tok.Identifier) + return Id(tok); + + throw error(__LINE__, PE.UnexpectedTokSingle) + .arg(tok.getType) + .arg(Tok.Identifier) + .tok(tok); + } + + Id parseType() { Token type = lexer.next; - switch(type.type) - { - case Tok.Byte, Tok.Ubyte, - Tok.Short, Tok.Ushort, - Tok.Int, Tok.Uint, - Tok.Long, Tok.Ulong, - Tok.Float, Tok.Double, - Tok.Bool, - Tok.Void, - Tok.Identifier: - return new Identifier(type); - break; - default: - char[] c = type.getType; - error(__LINE__, "Unexpected token in Type parsing. Got %0").arg(c); - } + if (type.isBasicType || type.type == Tok.Identifier) + return Id(type); + + char[] c = type.getType; + error(__LINE__, "Unexpected token in Type parsing. Got %0").arg(c); } +private: // -- Expression parsing -- // -private: Exp parseExpIdentifier(Exp target) { switch(lexer.peek.type) @@ -397,12 +190,13 @@ switch(lexer.peek(1).type) { case Tok.Identifier: - lexer.next; - return parseExpIdentifier( - new MemberLookup(target, new Identifier(lexer.next))); + Token op = lexer.next; + Id member = Id(lexer.next); + Exp exp = action.actOnMemberReference(target, op.location, member); + return parseExpIdentifier(exp); default: Token t = lexer.peek(1); - throw error(__LINE__, "Expected identifier after '.'", &t); + throw error(__LINE__, "Expected identifier after '.'").tok(t); } default: return target; @@ -419,7 +213,7 @@ lexer.next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); - exp = new BinaryExp(op.operator, exp, exp2); + exp = action.actOnBinaryOp(op.operator, exp, exp2); next = lexer.peek(); } @@ -430,7 +224,7 @@ { Token next = lexer.next(); if (auto op = unary(next.type)) - return new NegateExp(parseExpression(op.prec)); + return action.actOnUnaryOp(next, parseExpression(op.prec)); else if (next.type == Tok.OpenParentheses) { auto e = parseExpression(0); @@ -439,9 +233,11 @@ } else if (next.type == Tok.Identifier) { - Exp iden = parseExpIdentifier(new Identifier(next)); + Exp value = action.actOnIdentifierExp(Id(next)); + Exp iden = parseExpIdentifier(value); switch(lexer.peek.type) { + // TODO: Function calls are parsed but ignored case Tok.OpenParentheses: lexer.next; Exp[] args; @@ -455,42 +251,26 @@ } lexer.next(); - return new CallExp(iden, args); + return null;//new CallExp(iden, args); default: return iden; } } else if (next.type == Tok.Integer) - return new IntegerLit(next); + return action.actOnNumericConstant(next); Stdout.formatln("{}", next.getType); assert(0, "Should not happen"); } - private Stmt[] parseBlockOrSingleStmt() - { - Stmt[] stmts; - if (lexer.peek.type == Tok.OpenBrace) - { - lexer.next; - while(lexer.peek.type != Tok.CloseBrace) - stmts ~= parseStatement(); - lexer.next; - } - else - stmts ~= parseStatement(); - - return stmts; - } - struct UnOp { Tok tokenType; int prec; } - static UnOp[] _unary = [{Tok.Sub, 4}]; + static const UnOp[] _unary = [{Tok.Sub, 4}]; UnOp* unary(Tok t) { foreach (ref op; _unary) @@ -504,23 +284,24 @@ Tok tokenType; int prec; bool leftAssoc; - BinaryExp.Operator operator; + Operator operator; } - static BinOp[] _binary = + static const BinOp[] _binary = [ - {Tok.Eq, 2, true, BinaryExp.Operator.Eq}, - {Tok.Ne, 2, true, BinaryExp.Operator.Ne}, - {Tok.Lt, 2, true, BinaryExp.Operator.Lt}, - {Tok.Le, 2, true, BinaryExp.Operator.Le}, - {Tok.Gt, 2, true, BinaryExp.Operator.Gt}, - {Tok.Ge, 2, true, BinaryExp.Operator.Ge}, + {Tok.Eq, 2, true, Operator.Eq}, + {Tok.Ne, 2, true, Operator.Ne}, - {Tok.Add, 3, true, BinaryExp.Operator.Add}, - {Tok.Sub, 3, true, BinaryExp.Operator.Sub}, + {Tok.Lt, 2, true, Operator.Lt}, + {Tok.Le, 2, true, Operator.Le}, + {Tok.Gt, 2, true, Operator.Gt}, + {Tok.Ge, 2, true, Operator.Ge}, - {Tok.Mul, 5, true, BinaryExp.Operator.Mul}, - {Tok.Div, 5, true, BinaryExp.Operator.Div} + {Tok.Add, 3, true, Operator.Add}, + {Tok.Sub, 3, true, Operator.Sub}, + + {Tok.Mul, 5, true, Operator.Mul}, + {Tok.Div, 5, true, Operator.Div} ]; BinOp* binary(Tok t) { @@ -535,7 +316,7 @@ Token require(Tok t) { if (lexer.peek().type != t) - throw error(__LINE__, UnexpectedTokSingle) + throw error(__LINE__, PE.UnexpectedTokSingle) .arg(lexer.peek.getType) .arg(t); return lexer.next(); @@ -549,28 +330,25 @@ return true; } - Error error(uint line, char[] errMsg, Token* tok = null) + Error error(uint line, char[] errMsg) { - Location loc; - if (tok is null) - loc = lexer.peek.location; - else - loc = tok.location; + Location loc = lexer.peek.location; auto e = new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg); e.loc(loc); - if (tok !is null) - e.tok(*tok); return e; } - static char[] - UnexpectedTokMulti = "Unexpected token, got %0 expected one of %1", - UnexpectedTokSingle = "Unexpected token, got %0 expected %1", - UnexpectedTok = "Unexpected token %0"; + struct PE + { + static char[] + UnexpectedTokMulti = "Unexpected token, got %0 expected one of %1", + UnexpectedTokSingle = "Unexpected token, got %0 expected %1", + UnexpectedTok = "Unexpected token %0"; - static char[] - CaseValueMustBeInt = "Cases can only be integer literals"; + static char[] + CaseValueMustBeInt = "Cases can only be integer literals"; + } Lexer lexer; }