Mercurial > projects > dang
view src/parser/Parser.d @ 206:d3c148ca429b
Major moving of files. all src now goes into src, all docs in docs.
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Tue, 12 Aug 2008 18:14:56 +0200 |
parents | |
children | e0551773a005 |
line wrap: on
line source
module parser.Parser; import lexer.Lexer, lexer.Token; import parser.Action; import basic.Message; import basic.SmallArray, basic.SourceManager; import tango.io.Stdout, Integer = tango.text.convert.Integer; class Parser { Action action; MessageHandler messages; alias Object Exp; alias Object Stmt; alias Object Decl; alias Object Module; this(MessageHandler messages) { this.messages = messages; } Module parse(SourceManager sm, Lexer lexer, Action act) { this.sm = sm; this.lexer = lexer; this.action = act; Module m; if (lexer.peek.type == Tok.Module) { Token _module = lexer.next; ModuleName name = parseModuleName(); m = action.actOnModule(_module, sm.getText(name.asRange())); require(Tok.Seperator); } else { SLoc loc = lexer.peek.location; m = action.actOnImplicitModule(loc, sm.getFile(loc)); } while (lexer.peek.type != Tok.EOF) foreach (d; parseDeclDef()) action.actOnModuleDecl(m, d); return m; } private: Decl[] parseDeclDef() { Token t = lexer.peek; if (t.type == Tok.Import) return parseImports(); else return [parseDecl()]; } Decl parseDecl() { Token t = lexer.peek; if (t.isBasicType || t.isIdentifier) { Id type; Id iden; int len = peekParseType; if(lexer.peek(len).type == Tok.Identifier && len != 0) { type = parseType; parseDeclAfterInvalidType: iden = Id(require(Tok.Identifier)); Token next = lexer.peek(); if (next.type == Tok.Seperator) { Token sep = lexer.next(); return action.actOnDeclarator(type, iden, null); } else if (next.type == Tok.Assign) { Token assign = lexer.next(); Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnDeclarator(type, iden, exp); } else if (next.type == Tok.OpenParentheses) return parseFunc(type, iden); else messages.report(UnexpectedTok, next.location).arg(next.getType); } t = lexer.peek(len); messages.report(InvalidDeclType, t.location) .arg(sm.getText(t.asRange)); while(len--) lexer.next; while(lexer.peek.type != Tok.Identifier) lexer.next; type = Id(lexer.peek); goto parseDeclAfterInvalidType; } else if (t.type == Tok.Struct) { Id type = Id(lexer.next); Id iden = Id(require(Tok.Identifier)); return parseStruct(type, iden); } messages.report(UnexpectedTok, t.location) .arg(t.getType) .arg(Tok.Identifier) .fatal(ExitLevel.Parser); } /** Parse a series of imports belonging to a single import token. */ Decl[] parseImports() { Token _import = require(Tok.Import); SmallArray!(Decl) res; void addToRes(Decl d) { res ~= d; } bool done = false; while (!done && !on_a(Tok.Seperator)) { ModuleName mod = parseModuleName(); Token tok = lexer.peek; switch (tok.type) { case Tok.Comma: // import A, B.C; // parse another module-name lexer.next(); res ~= action.actOnImport(_import, mod, null); break; case Tok.Assign: // import B = A.A; // ^- must be a single identifier // renamed import if (mod.packages.length != 0) { SLoc loc = mod.packages[0].tok.location; messages.report(RenameMustBeSingleIdent, loc); } //if (isStatic) // error("Static imports cannot be renamed"); lexer.next(); Id name = mod.id; mod = parseModuleName(); // create from mod and rename to `name` res ~= action.actOnImport(_import, mod, &name); break; case Tok.Colon: // import A : a; // selective imports, potentially import A : print = a lexer.next(); Decl d = action.actOnImport(_import, mod, null); // do-while on a comma: // add explicit symbol do { Id sym = parseIdentifier(); Id dummy; Id* name = null; if (skip(Tok.Assign)) { dummy = sym; name = &dummy; sym = parseIdentifier(); } action.addSelectiveImport(d, sym, name); } while (skip(Tok.Comma)); require(Tok.Seperator); res ~= d; return res.safe(); case Tok.Seperator: done = true; break; default: goto Lerror; } res ~= action.actOnImport(_import, mod, null); } require(Tok.Seperator); return res.safe(); Lerror: while (!on_a (Tok.Seperator)) lexer.next(); return res.safe(); } /** Parse struct */ Decl parseStruct(Id type, Id iden) { auto decl = action.actOnDeclarator(type, iden, null); require(Tok.OpenBrace); while(lexer.peek.isBasicType || lexer.peek.isIdentifier) { auto m_decl = parseDecl(); action.actOnStructMember(decl, m_decl); /* Id var_type = Id(lexer.next); Id var_iden = Id(require(Tok.Identifier)); Token next = lexer.peek(); if (next.type == Tok.Seperator) { Token sep = lexer.next(); action.actOnStructMember(decl, var_type, var_iden, null); continue; } else if (next.type == Tok.Assign) { Token assign = lexer.next(); Exp exp = parseExpression(); require(Tok.Seperator); action.actOnStructMember(decl, var_type, var_iden, exp); continue; } messages.report(UnexpectedTok, next.location).arg(next.getType);*/ } require(Tok.CloseBrace); return decl; } /** Parse statements. This is the place to attack! */ Stmt parseStatement() { Token t = lexer.peek; switch(t.type) { case Tok.Return: Token ret = lexer.next; Exp exp; if (lexer.peek.type != Tok.Seperator) exp = parseExpression(); require(Tok.Seperator); return action.actOnReturnStmt(ret, exp); /* if (cond) single statement | compound statement [else single statement | compound statement] */ case Tok.If: Token _if = lexer.next(); require(Tok.OpenParentheses); Exp cond = parseExpression(); require(Tok.CloseParentheses); Stmt thenB = parseSingleOrCompoundStatement(); // if there is no else part we use the if as token, to have // something than can be passed along Token _else = _if; Stmt elseB; if (lexer.peek.type == Tok.Else) { _else = lexer.next; elseB = parseSingleOrCompoundStatement(); } return action.actOnIfStmt(_if, cond, thenB, _else, elseB); /* while (cond) single statement | compound statement */ case Tok.While: Token _while = lexer.next; require(Tok.OpenParentheses); Exp cond = parseExpression(); require(Tok.CloseParentheses); Stmt bodyStmt = parseSingleOrCompoundStatement(); return action.actOnWhileStmt(_while, cond, bodyStmt); /* One of four things: A declaration of a function/variable `type id ...` A direct assignment `id = exp;` An indirect assignment `id.id = exp` Some sort of free standing expression The assignments should be handled as binary expressions? */ case Tok.Identifier: Token iden = lexer.peek; Token n = lexer.peek(1); // Must be an decl, if we start with a basic type, or two // identifiers in a row if (iden.isBasicType() || iden.isIdentifier()) { if ( n.type == Tok.Star || n.type == Tok.OpenBracket) { int len = peekParseType; if(lexer.peek(len).type == Tok.Identifier && len != 0) return action.actOnDeclStmt(parseVarDecl()); Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); } if (n.isIdentifier()) return action.actOnDeclStmt(parseVarDecl()); // Expression: a.b, a = b, a(b) etc. Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); } case Tok.Switch: messages.report(UnexpectedTok, lexer.peek.location).arg(lexer.next.getType); return null; default: if (t.isBasicType()) goto case Tok.Identifier; if (t.type == Tok.Star) { auto exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); } messages.report(UnexpectedBeginStmt, lexer.peek.location).arg(lexer.next.getType); return null; } messages.report(UnexpectedTok, t.location); return null; } Decl parseVarDecl() { // manually hardcoded to only support "type id [= exp];" // as that is the only thing the codegen understands Id type = parseType; Id id = Id(lexer.next); Exp init; if (skip(Tok.Assign)) init = parseExpression(); require(Tok.Seperator); Decl d = action.actOnDeclarator(type, id, init); return d; } /** Parses a function/method given the already parsed return type and name */ Decl parseFunc(ref Id type, ref Id name) { Decl func = action.actOnStartOfFunctionDef(type, name); parseFuncArgs(func); if(lexer.peek.type == Tok.Seperator) { lexer.next; return func; } Stmt stmt = parseCompoundStatement(); return action.actOnEndOfFunction(func, stmt); } /** Parse the function arguments, assumes current token is (. Both the intitial paren and the ending paren is consumed. */ void parseFuncArgs(Decl func) { require(Tok.OpenParentheses); // Remove the "(" token. while(lexer.peek.type != Tok.CloseParentheses) { auto t = parseType(); Id i; if(lexer.peek.type == Tok.Identifier) i = parseIdentifier(); action.addFuncArg(func, t, i); if(lexer.peek.type == Tok.Comma) lexer.next; } require(Tok.CloseParentheses); // Remove the ")" } /** Parse either a block, or a single statement as allowed after if, while and for. */ Stmt parseSingleOrCompoundStatement() { if (lexer.peek.type == Tok.OpenBrace) return parseCompoundStatement(); return parseStatement(); } /** Parses a function-body or similar, expects an opening brace to be the current token. Will consume both the starting { and ending } */ Stmt parseCompoundStatement() { Token lbrace = require(Tok.OpenBrace); SmallArray!(Stmt, 32) stmts; // Try to use the stack only while (lexer.peek.type != Tok.CloseBrace) stmts ~= parseStatement(); Token rbrace = require(Tok.CloseBrace); return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); } Id parseIdentifier() { Token tok = lexer.next; if (tok.type is Tok.Identifier) return Id(tok); messages.report(UnexpectedTokSingle, tok.location) .arg(tok.getType) .arg(Tok.Identifier); } ModuleName parseModuleName() { auto id = parseIdentifier(); ModuleName mod; while (skip(Tok.Dot)) { mod.packages ~= id; if (lexer.peek.type != Tok.Identifier) { messages.report(ExpectedIdAfterPackage, lexer.peek.location); goto Lerror; } id = parseIdentifier(); } mod.id = id; return mod; Lerror: while (!skip(Tok.Seperator)) lexer.next(); return mod; } /** Parse a type - this includes pointer and array(at some point) types. */ Id parseType() { Token type = lexer.next; Id currentType; if ( !(type.isBasicType || type.type == Tok.Identifier) ) messages.report(InvalidType, type.location); currentType = Id(type); type = lexer.peek; while(type.type == Tok.Star || type.type == Tok.OpenBracket) { if(type.type == Tok.Star) { currentType = PointerId(currentType); lexer.next; } else { lexer.next; if(lexer.peek.type == Tok.Integer) currentType = ArrayId(currentType, action.actOnNumericConstant(require(Tok.Integer))); require(Tok.CloseBracket); } type = lexer.peek; } return currentType; } int peekParseType() { int i; Token type = lexer.peek(i); Id currentType; if ( !(type.isBasicType || type.type == Tok.Identifier) ) return 0; currentType = Id(type); type = lexer.peek(++i); while(type.type == Tok.Star || type.type == Tok.OpenBracket) { if(type.type == Tok.Star) { i++; } else { if(lexer.peek(i++).type != Tok.OpenBracket) return 0; if(lexer.peek(i).type == Tok.Integer) { i++; if(lexer.peek(i++).type != Tok.CloseBracket) return 0; } else if(lexer.peek(i++).type != Tok.CloseBracket) return 0; } type = lexer.peek(i); } return i; } private: // -- Expression parsing -- // Exp parsePostfixExp(Exp target) { switch(lexer.peek.type) { case Tok.Dot: switch(lexer.peek(1).type) { case Tok.Identifier: Token op = lexer.next; Id member = Id(lexer.next); Exp exp = action.actOnMemberReference(target, op.location, member); return parsePostfixExp(exp); default: Token t = lexer.peek(1); messages.report(ExpectedIdAfterDot, t.location); } case Tok.OpenBracket: Token open = lexer.next; Exp index = parseExpression(); Token close = require(Tok.CloseBracket); return action.actOnIndexEpr(target, open, index, close); default: return target; } } Exp parseExpression(int p = 0) { auto exp = P(); Token next = lexer.peek(); BinOp* op = null; while ((op = binary(next.type)) != null && op.prec >= p) { lexer.next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); exp = action.actOnBinaryOp(next.location, op.operator, exp, exp2); next = lexer.peek(); } return exp; } Exp P() { Token next = lexer.next(); if (auto op = unary(next.type)) return action.actOnUnaryOp(next, parseExpression(op.prec)); else if (next.type == Tok.OpenParentheses) { auto e = parseExpression(0); require(Tok.CloseParentheses); return e; } else if (next.type == Tok.Identifier) { Exp value = action.actOnIdentifierExp(Id(next)); Exp iden = parsePostfixExp(value); switch(lexer.peek.type) { case Tok.OpenParentheses: Token lp = lexer.next; SmallArray!(Exp, 8) args; while(lexer.peek.type != Tok.CloseParentheses) { if(lexer.peek.type == Tok.Comma) lexer.next; args ~= parseExpression(); } Token rp = lexer.next(); return action.actOnCallExpr(iden, lp, args.unsafe(), rp); default: return iden; } } else if (next.type == Tok.Cast) return parseCast(next); else if (next.type == Tok.Integer) return action.actOnNumericConstant(next); else if (next.type == Tok.String) return action.actOnStringExp(next); messages.report(ExpectedExp, next.location) .fatal(ExitLevel.Parser); return null; } Exp parseCast(ref Token _cast) { require(Tok.OpenParentheses); auto next = lexer.next; if(!next.isBasicType && !next.isIdentifier) messages.report(ExpectedCastType, next.location); require(Tok.CloseParentheses); auto exp = P(); return action.actOnCastExpr(_cast, Id(next), exp); } struct UnOp { Tok tokenType; int prec; } static const UnOp[] _unary = [ {Tok.Minus, 4}, {Tok.Star, 4} ]; UnOp* unary(Tok t) { foreach (ref op; _unary) if (op.tokenType == t) return &op; return null; } struct BinOp { Tok tokenType; int prec; bool leftAssoc; Operator operator; } static const BinOp[] _binary = [ {Tok.Assign, 1, false, Operator.Assign}, {Tok.Eq, 2, true, Operator.Eq}, {Tok.Ne, 2, true, Operator.Ne}, {Tok.Lt, 2, true, Operator.Lt}, {Tok.Le, 2, true, Operator.Le}, {Tok.Gt, 2, true, Operator.Gt}, {Tok.Ge, 2, true, Operator.Ge}, {Tok.Plus, 3, true, Operator.Add}, {Tok.Minus, 3, true, Operator.Sub}, {Tok.Star, 5, true, Operator.Mul}, {Tok.Slash, 5, true, Operator.Div}, {Tok.Percent, 5, true, Operator.Mod} ]; BinOp* binary(Tok t) { foreach (ref op; _binary) if (op.tokenType == t) return &op; return null; } private: Token require(Tok t) { if (lexer.peek().type != t) messages.report(UnexpectedTokSingle, lexer.peek.location) .arg(lexer.peek.getType) .arg(t); return lexer.next(); } bool skip(Tok t) { if (lexer.peek().type != t) return false; lexer.next(); return true; } bool on_a(Tok t) { return lexer.peek.type == t; } Lexer lexer; SourceManager sm; }