Mercurial > projects > dang
diff src/parser/Parser.d @ 206:d3c148ca429b
Major moving of files. all src now goes into src, all docs in docs.
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Tue, 12 Aug 2008 18:14:56 +0200 |
parents | |
children | e0551773a005 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/parser/Parser.d Tue Aug 12 18:14:56 2008 +0200 @@ -0,0 +1,731 @@ +module parser.Parser; + +import lexer.Lexer, + lexer.Token; + +import parser.Action; + +import basic.Message; + +import basic.SmallArray, + basic.SourceManager; + +import tango.io.Stdout, + Integer = tango.text.convert.Integer; + +class Parser +{ + Action action; + MessageHandler messages; + alias Object Exp; + alias Object Stmt; + alias Object Decl; + alias Object Module; + + this(MessageHandler messages) + { + this.messages = messages; + } + + Module parse(SourceManager sm, Lexer lexer, Action act) + { + this.sm = sm; + this.lexer = lexer; + this.action = act; + + Module m; + if (lexer.peek.type == Tok.Module) + { + Token _module = lexer.next; + ModuleName name = parseModuleName(); + m = action.actOnModule(_module, sm.getText(name.asRange())); + require(Tok.Seperator); + } + else + { + SLoc loc = lexer.peek.location; + m = action.actOnImplicitModule(loc, sm.getFile(loc)); + } + + while (lexer.peek.type != Tok.EOF) + foreach (d; parseDeclDef()) + action.actOnModuleDecl(m, d); + + return m; + } + +private: + Decl[] parseDeclDef() + { + Token t = lexer.peek; + if (t.type == Tok.Import) + return parseImports(); + else + return [parseDecl()]; + } + + Decl parseDecl() + { + Token t = lexer.peek; + + if (t.isBasicType || t.isIdentifier) + { + Id type; + Id iden; + int len = peekParseType; + if(lexer.peek(len).type == Tok.Identifier && len != 0) + { + type = parseType; +parseDeclAfterInvalidType: + iden = Id(require(Tok.Identifier)); + Token next = lexer.peek(); + if (next.type == Tok.Seperator) + { + Token sep = lexer.next(); + return action.actOnDeclarator(type, iden, null); + } + else if (next.type == Tok.Assign) + { + Token assign = lexer.next(); + Exp exp = parseExpression(); + require(Tok.Seperator); + return action.actOnDeclarator(type, iden, exp); + } + else if (next.type == Tok.OpenParentheses) + return parseFunc(type, iden); + else + messages.report(UnexpectedTok, next.location).arg(next.getType); + } + t = lexer.peek(len); + messages.report(InvalidDeclType, t.location) + .arg(sm.getText(t.asRange)); + while(len--) + lexer.next; + while(lexer.peek.type != Tok.Identifier) + lexer.next; + type = Id(lexer.peek); + goto parseDeclAfterInvalidType; + } + else if (t.type == Tok.Struct) + { + Id type = Id(lexer.next); + Id iden = Id(require(Tok.Identifier)); + + return parseStruct(type, iden); + } + messages.report(UnexpectedTok, t.location) + .arg(t.getType) + .arg(Tok.Identifier) + .fatal(ExitLevel.Parser); + } + + /** + Parse a series of imports belonging to a single import token. + */ + Decl[] parseImports() + { + Token _import = require(Tok.Import); + SmallArray!(Decl) res; + void addToRes(Decl d) { res ~= d; } + + bool done = false; + while (!done && !on_a(Tok.Seperator)) + { + ModuleName mod = parseModuleName(); + Token tok = lexer.peek; + switch (tok.type) + { + case Tok.Comma: + // import A, B.C; + // parse another module-name + lexer.next(); + res ~= action.actOnImport(_import, mod, null); + break; + case Tok.Assign: + // import B = A.A; + // ^- must be a single identifier + // renamed import + if (mod.packages.length != 0) + { + SLoc loc = mod.packages[0].tok.location; + messages.report(RenameMustBeSingleIdent, loc); + } + //if (isStatic) + // error("Static imports cannot be renamed"); + lexer.next(); + Id name = mod.id; + mod = parseModuleName(); + // create from mod and rename to `name` + res ~= action.actOnImport(_import, mod, &name); + break; + case Tok.Colon: + // import A : a; + // selective imports, potentially import A : print = a + lexer.next(); + Decl d = action.actOnImport(_import, mod, null); + // do-while on a comma: + // add explicit symbol + do + { + Id sym = parseIdentifier(); + Id dummy; + Id* name = null; + if (skip(Tok.Assign)) + { + dummy = sym; + name = &dummy; + sym = parseIdentifier(); + } + action.addSelectiveImport(d, sym, name); + + } while (skip(Tok.Comma)); + require(Tok.Seperator); + res ~= d; + return res.safe(); + case Tok.Seperator: + done = true; + break; + default: + goto Lerror; + } + res ~= action.actOnImport(_import, mod, null); + } + + require(Tok.Seperator); + return res.safe(); +Lerror: + while (!on_a (Tok.Seperator)) + lexer.next(); + return res.safe(); + } + + /** + Parse struct + */ + Decl parseStruct(Id type, Id iden) + { + auto decl = action.actOnDeclarator(type, iden, null); + + require(Tok.OpenBrace); + + while(lexer.peek.isBasicType || lexer.peek.isIdentifier) + { + auto m_decl = parseDecl(); + action.actOnStructMember(decl, m_decl); +/* Id var_type = Id(lexer.next); + Id var_iden = Id(require(Tok.Identifier)); + Token next = lexer.peek(); + if (next.type == Tok.Seperator) + { + Token sep = lexer.next(); + action.actOnStructMember(decl, var_type, var_iden, null); + continue; + } + else if (next.type == Tok.Assign) + { + Token assign = lexer.next(); + Exp exp = parseExpression(); + require(Tok.Seperator); + action.actOnStructMember(decl, var_type, var_iden, exp); + continue; + } + messages.report(UnexpectedTok, next.location).arg(next.getType);*/ + } + + require(Tok.CloseBrace); + + return decl; + } + + /** + Parse statements. + + This is the place to attack! + */ + Stmt parseStatement() + { + Token t = lexer.peek; + + switch(t.type) + { + case Tok.Return: + Token ret = lexer.next; + Exp exp; + if (lexer.peek.type != Tok.Seperator) + exp = parseExpression(); + require(Tok.Seperator); + return action.actOnReturnStmt(ret, exp); + + /* + if (cond) + single statement | compound statement + [else + single statement | compound statement] + */ + case Tok.If: + Token _if = lexer.next(); + + require(Tok.OpenParentheses); + Exp cond = parseExpression(); + require(Tok.CloseParentheses); + + Stmt thenB = parseSingleOrCompoundStatement(); + + // if there is no else part we use the if as token, to have + // something than can be passed along + Token _else = _if; + Stmt elseB; + if (lexer.peek.type == Tok.Else) + { + _else = lexer.next; + elseB = parseSingleOrCompoundStatement(); + } + + return action.actOnIfStmt(_if, cond, thenB, _else, elseB); + + /* + while (cond) + single statement | compound statement + */ + case Tok.While: + Token _while = lexer.next; + require(Tok.OpenParentheses); + Exp cond = parseExpression(); + require(Tok.CloseParentheses); + Stmt bodyStmt = parseSingleOrCompoundStatement(); + return action.actOnWhileStmt(_while, cond, bodyStmt); + + /* + One of four things: + A declaration of a function/variable `type id ...` + A direct assignment `id = exp;` + An indirect assignment `id.id = exp` + Some sort of free standing expression + + The assignments should be handled as binary expressions? + */ + case Tok.Identifier: + Token iden = lexer.peek; + Token n = lexer.peek(1); + // Must be an decl, if we start with a basic type, or two + // identifiers in a row + if (iden.isBasicType() || iden.isIdentifier()) + { + if ( n.type == Tok.Star || n.type == Tok.OpenBracket) + { + int len = peekParseType; + if(lexer.peek(len).type == Tok.Identifier && len != 0) + return action.actOnDeclStmt(parseVarDecl()); + + Exp exp = parseExpression(); + require(Tok.Seperator); + return action.actOnExprStmt(exp); + } + + if (n.isIdentifier()) + return action.actOnDeclStmt(parseVarDecl()); + + // Expression: a.b, a = b, a(b) etc. + Exp exp = parseExpression(); + require(Tok.Seperator); + return action.actOnExprStmt(exp); + } + + case Tok.Switch: + messages.report(UnexpectedTok, lexer.peek.location).arg(lexer.next.getType); + return null; + + default: + if (t.isBasicType()) + goto case Tok.Identifier; + if (t.type == Tok.Star) + { + auto exp = parseExpression(); + require(Tok.Seperator); + return action.actOnExprStmt(exp); + } + messages.report(UnexpectedBeginStmt, lexer.peek.location).arg(lexer.next.getType); + return null; + } + messages.report(UnexpectedTok, t.location); + return null; + } + + Decl parseVarDecl() + { + // manually hardcoded to only support "type id [= exp];" + // as that is the only thing the codegen understands + Id type = parseType; + Id id = Id(lexer.next); + Exp init; + if (skip(Tok.Assign)) + init = parseExpression(); + require(Tok.Seperator); + Decl d = action.actOnDeclarator(type, id, init); + return d; + } + + /** + Parses a function/method given the already parsed return type and name + */ + Decl parseFunc(ref Id type, ref Id name) + { + Decl func = action.actOnStartOfFunctionDef(type, name); + parseFuncArgs(func); + + if(lexer.peek.type == Tok.Seperator) + { + lexer.next; + return func; + } + Stmt stmt = parseCompoundStatement(); + + return action.actOnEndOfFunction(func, stmt); + } + + /** + Parse the function arguments, assumes current token is (. + + Both the intitial paren and the ending paren is consumed. + */ + void parseFuncArgs(Decl func) + { + require(Tok.OpenParentheses); // Remove the "(" token. + + while(lexer.peek.type != Tok.CloseParentheses) + { + auto t = parseType(); + Id i; + if(lexer.peek.type == Tok.Identifier) + i = parseIdentifier(); + action.addFuncArg(func, t, i); + + if(lexer.peek.type == Tok.Comma) + lexer.next; + } + + require(Tok.CloseParentheses); // Remove the ")" + } + + /** + Parse either a block, or a single statement as allowed after if, while + and for. + */ + Stmt parseSingleOrCompoundStatement() + { + if (lexer.peek.type == Tok.OpenBrace) + return parseCompoundStatement(); + return parseStatement(); + } + + /** + Parses a function-body or similar, expects an opening brace to be the + current token. + + Will consume both the starting { and ending } + */ + Stmt parseCompoundStatement() + { + Token lbrace = require(Tok.OpenBrace); + SmallArray!(Stmt, 32) stmts; // Try to use the stack only + while (lexer.peek.type != Tok.CloseBrace) + stmts ~= parseStatement(); + Token rbrace = require(Tok.CloseBrace); + return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); + } + + Id parseIdentifier() + { + Token tok = lexer.next; + + if (tok.type is Tok.Identifier) + return Id(tok); + + messages.report(UnexpectedTokSingle, tok.location) + .arg(tok.getType) + .arg(Tok.Identifier); + } + + ModuleName parseModuleName() + { + auto id = parseIdentifier(); + ModuleName mod; + while (skip(Tok.Dot)) + { + mod.packages ~= id; + if (lexer.peek.type != Tok.Identifier) { + messages.report(ExpectedIdAfterPackage, lexer.peek.location); + goto Lerror; + } + id = parseIdentifier(); + } + mod.id = id; + return mod; +Lerror: + while (!skip(Tok.Seperator)) + lexer.next(); + return mod; + } + + + /** + Parse a type - this includes pointer and array(at some point) types. + */ + Id parseType() + { + Token type = lexer.next; + + Id currentType; + + if ( !(type.isBasicType || type.type == Tok.Identifier) ) + messages.report(InvalidType, type.location); + + currentType = Id(type); + type = lexer.peek; + + while(type.type == Tok.Star || type.type == Tok.OpenBracket) + { + if(type.type == Tok.Star) + { + currentType = PointerId(currentType); + lexer.next; + } + else + { + lexer.next; + if(lexer.peek.type == Tok.Integer) + currentType = ArrayId(currentType, action.actOnNumericConstant(require(Tok.Integer))); + require(Tok.CloseBracket); + + } + type = lexer.peek; + } + + return currentType; + } + + int peekParseType() + { + int i; + Token type = lexer.peek(i); + + Id currentType; + + if ( !(type.isBasicType || type.type == Tok.Identifier) ) + return 0; + + currentType = Id(type); + type = lexer.peek(++i); + + while(type.type == Tok.Star || type.type == Tok.OpenBracket) + { + if(type.type == Tok.Star) + { + i++; + } + else + { + if(lexer.peek(i++).type != Tok.OpenBracket) + return 0; + if(lexer.peek(i).type == Tok.Integer) + { + i++; + if(lexer.peek(i++).type != Tok.CloseBracket) + return 0; + } + else + if(lexer.peek(i++).type != Tok.CloseBracket) + return 0; + + } + type = lexer.peek(i); + } + + return i; + } + +private: + // -- Expression parsing -- // + Exp parsePostfixExp(Exp target) + { + switch(lexer.peek.type) + { + case Tok.Dot: + switch(lexer.peek(1).type) + { + case Tok.Identifier: + Token op = lexer.next; + Id member = Id(lexer.next); + Exp exp = action.actOnMemberReference(target, op.location, member); + return parsePostfixExp(exp); + default: + Token t = lexer.peek(1); + messages.report(ExpectedIdAfterDot, t.location); + } + case Tok.OpenBracket: + Token open = lexer.next; + Exp index = parseExpression(); + Token close = require(Tok.CloseBracket); + return action.actOnIndexEpr(target, open, index, close); + default: + return target; + } + } + + Exp parseExpression(int p = 0) + { + auto exp = P(); + Token next = lexer.peek(); + BinOp* op = null; + while ((op = binary(next.type)) != null && op.prec >= p) + { + lexer.next(); + int q = op.leftAssoc? 1 + op.prec : op.prec; + auto exp2 = parseExpression(q); + exp = action.actOnBinaryOp(next.location, op.operator, exp, exp2); + next = lexer.peek(); + } + + return exp; + } + + Exp P() + { + Token next = lexer.next(); + if (auto op = unary(next.type)) + return action.actOnUnaryOp(next, parseExpression(op.prec)); + else if (next.type == Tok.OpenParentheses) + { + auto e = parseExpression(0); + require(Tok.CloseParentheses); + return e; + } + else if (next.type == Tok.Identifier) + { + Exp value = action.actOnIdentifierExp(Id(next)); + Exp iden = parsePostfixExp(value); + switch(lexer.peek.type) + { + case Tok.OpenParentheses: + Token lp = lexer.next; + SmallArray!(Exp, 8) args; + while(lexer.peek.type != Tok.CloseParentheses) + { + if(lexer.peek.type == Tok.Comma) + lexer.next; + args ~= parseExpression(); + } + + Token rp = lexer.next(); + return action.actOnCallExpr(iden, lp, args.unsafe(), rp); + + default: + return iden; + } + } + else if (next.type == Tok.Cast) + return parseCast(next); + else if (next.type == Tok.Integer) + return action.actOnNumericConstant(next); + else if (next.type == Tok.String) + return action.actOnStringExp(next); + + messages.report(ExpectedExp, next.location) + .fatal(ExitLevel.Parser); + return null; + } + + Exp parseCast(ref Token _cast) + { + require(Tok.OpenParentheses); + auto next = lexer.next; + if(!next.isBasicType && !next.isIdentifier) + messages.report(ExpectedCastType, next.location); + + require(Tok.CloseParentheses); + auto exp = P(); + return action.actOnCastExpr(_cast, Id(next), exp); + } + + struct UnOp + { + Tok tokenType; + int prec; + } + + static const UnOp[] _unary = + [ + {Tok.Minus, 4}, + {Tok.Star, 4} + ]; + UnOp* unary(Tok t) + { + foreach (ref op; _unary) + if (op.tokenType == t) + return &op; + return null; + } + + struct BinOp + { + Tok tokenType; + int prec; + bool leftAssoc; + Operator operator; + } + + static const BinOp[] _binary = + [ + {Tok.Assign, 1, false, Operator.Assign}, + + {Tok.Eq, 2, true, Operator.Eq}, + {Tok.Ne, 2, true, Operator.Ne}, + + {Tok.Lt, 2, true, Operator.Lt}, + {Tok.Le, 2, true, Operator.Le}, + {Tok.Gt, 2, true, Operator.Gt}, + {Tok.Ge, 2, true, Operator.Ge}, + + {Tok.Plus, 3, true, Operator.Add}, + {Tok.Minus, 3, true, Operator.Sub}, + + {Tok.Star, 5, true, Operator.Mul}, + {Tok.Slash, 5, true, Operator.Div}, + {Tok.Percent, 5, true, Operator.Mod} + ]; + BinOp* binary(Tok t) + { + foreach (ref op; _binary) + if (op.tokenType == t) + return &op; + return null; + } + +private: + + Token require(Tok t) + { + if (lexer.peek().type != t) + messages.report(UnexpectedTokSingle, lexer.peek.location) + .arg(lexer.peek.getType) + .arg(t); + return lexer.next(); + } + + bool skip(Tok t) + { + if (lexer.peek().type != t) + return false; + lexer.next(); + return true; + } + + bool on_a(Tok t) + { + return lexer.peek.type == t; + } + + Lexer lexer; + SourceManager sm; +} +