Mercurial > projects > dang
view src/parser/Parser.d @ 209:42e663451371
Renamed some of the actions. Declarations now have it's own action.
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Tue, 12 Aug 2008 19:05:17 +0200 |
parents | e0551773a005 |
children |
line wrap: on
line source
module parser.Parser; import lexer.Lexer, lexer.Token; import parser.Action; import basic.Message, basic.Attribute; import basic.SmallArray, basic.SourceManager; import tango.io.Stdout, Integer = tango.text.convert.Integer; class Parser { Action action; MessageHandler messages; alias Object Exp; alias Object Stmt; alias Object Decl; alias Object Module; this(MessageHandler messages) { this.messages = messages; } Module parse(SourceManager sm, Lexer lexer, Action act) { this.sm = sm; this.lexer = lexer; this.action = act; Module m; if (peek.type == Tok.Module) { Token _module = next(); ModuleName name = parseModuleName(); m = action.actOnModule(_module, sm.getText(name.asRange())); require(Tok.Seperator); } else { SLoc loc = peek.location; m = action.actOnImplicitModule(loc, sm.getFile(loc)); } auto nes = parseAttributeInit; while( !isa(Tok.EOF) ) { while ( peek.isAttribute ) nes ~= parseAttribute(nes[$-1]); foreach (d; parseDeclDef(nes[$-1].a)) action.actOnModuleDecl(m, d); nes = parseAttributeScope(nes); } return m; } private: Decl[] parseDeclDef(Attribute a) { if ( isa (Tok.Import) ) return parseImports(); return [parseDecl(a)]; } Decl parseDecl(Attribute att) { switch(peek.type) { case Tok.Struct: next(); Id iden = Id(require(Tok.Identifier)); return parseStruct(iden, att); case Tok.Class: next(); Id iden = Id(require(Tok.Identifier)); return parseClass(iden, att); case Tok.Interface: next(); Id iden = Id(require(Tok.Identifier)); return parseInterface(iden, att); case Tok.Alias: next(); auto decl = parseDecl(Attribute()); return action.actOnAliasDecl(decl, att); case Tok.Identifier: Id type = parseType; Id iden = Id(require(Tok.Identifier)); switch(peek.type) { case Tok.Seperator: Token sep = next(); return action.actOnVarDecl(type, iden, null, att); case Tok.Assign: Token assign = next(); Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnVarDecl(type, iden, exp, att); case Tok.OpenParentheses: return parseFunc(type, iden, att); default: auto n1 = next(); isEOF(type.tok); messages.report(UnexpectedTok, n1.location).arg(n1.get(sm)); return action.actOnVarDecl(type, iden, null, att); } messages.report(InvalidDeclType, peek.location) .arg(sm.getText(peek.asRange)); default: if (peek.isBasicType) goto case Tok.Identifier; messages.report(UnexpectedTok, peek.location) .arg(sm.getText(peek.asRange)); next(); return null; } messages.report(UnexpectedTok, peek.location) .arg(peek.get(sm)) .arg(Tok.Identifier) .fatal(ExitLevel.Parser); } Extern parseLinkageType() { Extern e = Extern.D; if(peek(1).type != Tok.OpenParentheses) return e; next(); next(); Token t = require(Tok.Identifier); switch(sm.getText(t.asRange)) { case "C": if (peek(0).type == Tok.Plus && peek(1).type == Tok.Plus) e = Extern.CPlusPlus; else e = Extern.C; break; case "D": break; case "Windows": e = Extern.Windows; break; case "Pascal": e = Extern.Pascal; break; case "System": e = Extern.System; break; default: messages.report(UnexpectedLinkType, t.location); } if (!isa(Tok.CloseParentheses)) messages.report(UnexpectedTokSingle, peek.location); return e; } /** Parse a series of imports belonging to a single import token. */ Decl[] parseImports() { Token _import = require(Tok.Import); SmallArray!(Decl) res; void addToRes(Decl d) { res ~= d; } bool done = false; while (!done && !isa(Tok.Seperator)) { ModuleName mod = parseModuleName(); Token tok = peek; switch (tok.type) { case Tok.Comma: // import A, B.C; // parse another module-name next(); res ~= action.actOnImport(_import, mod, null); break; case Tok.Assign: // import B = A.A; // ^- must be a single identifier // renamed import if (mod.packages.length != 0) { SLoc loc = mod.packages[0].tok.location; messages.report(RenameMustBeSingleIdent, loc); } //if (isStatic) // error("Static imports cannot be renamed"); next(); Id name = mod.id; mod = parseModuleName(); // create from mod and rename to `name` res ~= action.actOnImport(_import, mod, &name); break; case Tok.Colon: // import A : a; // selective imports, potentially import A : print = a next(); Decl d = action.actOnImport(_import, mod, null); // do-while on a comma: // add explicit symbol do { Id sym = parseIdentifier(); Id dummy; Id* name = null; if (skip(Tok.Assign)) { dummy = sym; name = &dummy; sym = parseIdentifier(); } action.addSelectiveImport(d, sym, name); } while (skip(Tok.Comma)); require(Tok.Seperator); res ~= d; return res.safe(); case Tok.Seperator: done = true; break; default: goto Lerror; } res ~= action.actOnImport(_import, mod, null); } require(Tok.Seperator); return res.safe(); Lerror: while (!isa (Tok.Seperator)) next(); return res.safe(); } /** Parse interface */ Decl parseInterface(Id iden, Attribute att) { auto decl = action.actOnInterfaceDecl(iden, att); if (peek.type == Tok.Colon) // SuperInterfaces { next(); // Remove colon. Id identifier; // The identifier identifier = Id(require(Tok.Identifier)); action.actOnInterfaceBaseClass(decl, identifier); // We should now have an optional list of items, each starting ',' while (peek.type == Tok.Comma) { next(); // Remove comma // The identifier identifier = Id(require(Tok.Identifier)); action.actOnInterfaceBaseClass(decl, identifier); } } require(Tok.OpenBrace); auto nes = parseAttributeInit; while( !isa(Tok.EOF) && !isa(Tok.CloseBrace) ) { while ( peek.isAttribute ) nes ~= parseAttribute(nes[$-1]); auto m_decl = parseDecl(nes[$-1].a); action.actOnInterfaceMember(decl, m_decl); nes = parseAttributeScope(nes); } require(Tok.CloseBrace); return decl; } /** Parse class */ Decl parseClass(Id iden, Attribute att) { auto decl = action.actOnClassDecl(iden, att); if (peek.type == Tok.Colon) // BaseClassList - Super class and interfaces(in that order) { next(); // Remove colon. Token protection; Id identifier; // First we expect an optional protection level. if (peek.isBaseClassProtection) protection = next(); // Then the identifier identifier = Id(require(Tok.Identifier)); action.actOnClassBaseClass(decl, identifier); // We should now have an optional list of items, each starting ',' while (peek.type == Tok.Comma) { next(); // Remove comma // First we expect an optional protection level. if (peek.isBaseClassProtection) protection = next(); // Then the identifier identifier = Id(require(Tok.Identifier)); action.actOnClassBaseClass(decl, identifier); } } require(Tok.OpenBrace); auto nes = parseAttributeInit; while( !isa(Tok.EOF) && !isa(Tok.CloseBrace) ) { while ( peek.isAttribute ) nes ~= parseAttribute(nes[$-1]); switch(peek.type) { case Tok.This: auto id = Id(next); auto m_decl = parseFunc(iden, id, nes[$-1].a); action.actOnClassMember(decl, m_decl); break; default: auto m_decl = parseDecl(nes[$-1].a); action.actOnClassMember(decl, m_decl); } nes = parseAttributeScope(nes); } require(Tok.CloseBrace); return decl; } /** Parse struct */ Decl parseStruct(Id iden, Attribute att) { auto decl = action.actOnStructDecl(iden, att); require(Tok.OpenBrace); auto nes = parseAttributeInit; while( !isa(Tok.EOF) && !isa(Tok.CloseBrace) ) { while ( peek.isAttribute ) nes ~= parseAttribute(nes[$-1]); auto m_decl = parseDecl(nes[$-1].a); action.actOnStructMember(decl, m_decl); nes = parseAttributeScope(nes); } require(Tok.CloseBrace); return decl; } Att[] parseAttributeInit() { Att[] nes; nes ~= Att(); nes[0].nested = Scope; return nes; } Att[] parseAttributeScope(Att[] nes) { while ( nes[$-1].nested == Single ) nes.length = nes.length - 1; while ( isa(Tok.CloseBrace) && nes.length > 1) { while ( nes.length > 1 ) { if( nes[$-1].nested == Scope ) { nes.length = nes.length - 1; next(); break; } nes.length = nes.length - 1; } } return nes; } Att parseAttribute(Att last) { Att _parseAttribute(Att last) { Att a = last; a.nested = Single; switch(peek.type) { case Tok.Public: a.a.setProtection(Protection.Public); break; case Tok.Private: a.a.setProtection(Protection.Private); break; case Tok.Package: a.a.setProtection(Protection.Package); break; case Tok.Protected: a.a.setProtection(Protection.Protected); break; case Tok.Export: a.a.setProtection(Protection.Export); break; case Tok.Static: a.a.setStatic; break; case Tok.Final: a.a.setFinal; break; case Tok.Const: a.a.setConst; break; case Tok.Abstract: a.a.setAbstract; break; case Tok.Override: a.a.setOverride; break; case Tok.Deprecated: a.a.setDeprecated; break; case Tok.Auto: a.a.setAuto; break; case Tok.Extern: Extern e = parseLinkageType; a.a.setExtern(e); break; } next(); return a; } Att a = _parseAttribute(last); while (peek.isAttribute) { a = parseAttribute(a); } if (peek.type == Tok.Colon) { a.nested = All; next(); } else if (peek.type == Tok.OpenBrace) { a.nested = Scope; next(); } return a; } enum : uint { Single, Scope, All } struct Att { Attribute a; uint nested; } /** Parse statements. This is the place to attack! */ Stmt parseStatement() { switch (peek.type) { case Tok.Return: Token ret = next(); Exp exp; if (peek.type != Tok.Seperator) exp = parseExpression(); require(Tok.Seperator); return action.actOnReturnStmt(ret, exp); case Tok.If: Token _if = next(); require(Tok.OpenParentheses); Exp cond = parseExpression(); require(Tok.CloseParentheses); Stmt thenB = parseSingleOrCompoundStatement(); // if there is no else part we use the if as token, to have // something than can be passed along Token _else = _if; Stmt elseB; if (peek.type == Tok.Else) { _else = next(); elseB = parseSingleOrCompoundStatement(); } return action.actOnIfStmt(_if, cond, thenB, _else, elseB); case Tok.While: Token _while = next(); require(Tok.OpenParentheses); Exp cond = parseExpression(); require(Tok.CloseParentheses); Stmt bodyStmt = parseSingleOrCompoundStatement(); return action.actOnWhileStmt(_while, cond, bodyStmt); case Tok.For: Token _for = next(); require(Tok.OpenParentheses); Stmt init; if ( isa(Tok.Seperator)) require(Tok.Seperator); else init = parseStatement(); Exp cond; if ( !isa(Tok.Seperator)) cond = parseExpression(); require(Tok.Seperator); Exp incre; if ( !isa(Tok.CloseParentheses)) incre = parseExpression(); require(Tok.CloseParentheses); Stmt bodyStmt = parseSingleOrCompoundStatement(); return action.actOnForStmt(_for, init, cond, incre, bodyStmt); case Tok.Switch: auto t = next(); require(Tok.OpenParentheses); auto target = parseExpression(); auto res = action.actOnStartOfSwitchStmt(t, target); require(Tok.CloseParentheses); require(Tok.OpenBrace); while (true) { Stmt[] statements; if (isa(Tok.Default)) { Token _default = next(); require(Tok.Colon); statements.length = 0; while (peek.type != Tok.Case && peek.type != Tok.Default && peek.type != Tok.CloseBrace) statements ~= parseStatement(); action.actOnDefaultStmt(res, _default, statements); continue; } Token _case = peek; if (_case.type != Tok.Case) break; next(); Exp[] literals; do { Exp e = parseExpression(); literals ~= e; } while (skip(Tok.Comma)); require(Tok.Colon); while (peek.type != Tok.Case && peek.type != Tok.Default && peek.type != Tok.CloseBrace) statements ~= parseStatement(); action.actOnCaseStmt(res, _case, literals, statements); if (peek.type == Tok.CloseBrace) break; } require(Tok.CloseBrace); return res; case Tok.Star: auto exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); case Tok.Identifier: // If it's a '*' it must be a method. Otherwise it won't give // any sense. if (isa(Tok.Function, 1) || isa(Tok.Identifier, 1) || isa(Tok.Star, 1)) { Attribute a; return action.actOnDeclStmt(parseDecl(a)); } if (isa(Tok.OpenBracket, 1)) { int i = 1; while (isa(Tok.OpenBracket, i) || isa(Tok.Star, i) || isa(Tok.Identifier, i)) { if (isa(Tok.Identifier, i)) return action.actOnDeclStmt(parseDecl(Attribute())); i++; if (isa(Tok.Star,i-1)) continue; // Must be OpenBracket here.. if (isa(Tok.Integer, i)) i++; else if (isa(Tok.CloseBracket, i)) return action.actOnDeclStmt(parseDecl(Attribute())); else i++; if (!isa(Tok.CloseBracket, i)) break; i++; } if (isa(Tok.Function, i)) return action.actOnDeclStmt(parseDecl(Attribute())); } // Expression: a.b, a = b, a(b) etc. Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); case Tok.Void: // And all basic types return action.actOnDeclStmt(parseVarDecl()); default: if (peek.isBasicType) goto case Tok.Void; messages.report(UnexpectedBeginStmt, peek.location).arg(peek.get(sm)); require(Tok.Seperator); return null; } } Decl parseVarDecl() { // manually hardcoded to only support "type id [= exp];" // as that is the only thing the codegen understands Id type = parseType(); Id id = Id(next()); Exp init; if (skip(Tok.Assign)) init = parseExpression(); require(Tok.Seperator); Attribute att; Decl d = action.actOnVarDecl(type, id, init, att); return d; } /** Parses a function/method given the already parsed return type and name */ Decl parseFunc(ref Id type, ref Id name, Attribute att) { Decl func = action.actOnStartOfFunctionDef(type, name, att); parseFuncArgs(func); if(peek.type == Tok.Seperator) { next(); return func; } Stmt stmt = parseCompoundStatement(); return action.actOnEndOfFunction(func, stmt); } /** Parse the function arguments, assumes current token is (. Both the intitial paren and the ending paren is consumed. */ void parseFuncArgs(Decl func) { require(Tok.OpenParentheses); // Remove the "(" token. while(peek.type != Tok.CloseParentheses) { auto t = parseType(); Id i; if(peek.type == Tok.Identifier) i = parseIdentifier(); action.addFuncArg(func, t, i); if(peek.type == Tok.Comma) next(); } require(Tok.CloseParentheses); // Remove the ")" } /** Parse either a block, or a single statement as allowed after if, while and for. */ Stmt parseSingleOrCompoundStatement() { if (peek.type == Tok.OpenBrace) return parseCompoundStatement(); return parseStatement(); } /** Parses a function-body or similar, expects an opening brace to be the current token. Will consume both the starting { and ending } */ Stmt parseCompoundStatement() { Token lbrace = require(Tok.OpenBrace); SmallArray!(Stmt, 32) stmts; // Try to use the stack only while ( !isa(Tok.CloseBrace) && !isa(Tok.EOF) ) stmts ~= parseStatement(); Token rbrace = require(Tok.CloseBrace); return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); } Id parseIdentifier() { Token tok = next(); if (tok.type is Tok.Identifier) return Id(tok); messages.report(UnexpectedTokSingle, tok.location) .arg(tok.get(sm)) .arg(Tok.Identifier); } ModuleName parseModuleName() { auto id = parseIdentifier(); ModuleName mod; while (skip(Tok.Dot)) { mod.packages ~= id; if (peek.type != Tok.Identifier) { messages.report(ExpectedIdAfterPackage, peek.location); goto Lerror; } id = parseIdentifier(); } mod.id = id; return mod; Lerror: while (!skip(Tok.Seperator)) next(); return mod; } /** Parse a type - this includes pointer and array(at some point) types. */ Id parseType() { Token type = next(); Id currentType; if ( !(type.isBasicType || type.type == Tok.Identifier) ) messages.report(InvalidType, type.location); currentType = Id(type); while(true) { switch(peek.type) { case Tok.Star: currentType = PointerTypeId(currentType); next(); break; case Tok.OpenBracket: next(); if (isa(Tok.Integer)) currentType = StaticArrayTypeId( currentType, action.actOnNumericConstant( require(Tok.Integer))); require(Tok.CloseBracket); break; case Tok.Function: next(); require(Tok.OpenParentheses); // Remove the "(" token. DeclT[] decls; while(peek.type != Tok.CloseParentheses) { auto t = parseType(); Id i; if(peek.type == Tok.Identifier) i = parseIdentifier(); // Act on function type param decls ~= action.actOnVarDecl(t, i, null, Attribute()); if(peek.type == Tok.Comma) next(); } currentType = FunctionTypeId(currentType, decls); require(Tok.CloseParentheses); // Remove the ")" break; default: goto end; } } end: return currentType; } private: // -- Expression parsing -- // Exp parsePostfixExp(Exp target) { switch(peek.type) { case Tok.Dot: switch(peek(1).type) { case Tok.Identifier: Token op = next(); Id member = Id(next()); Exp exp = action.actOnMemberReference(target, op.location, member); return parsePostfixExp(exp); default: Token t = peek(1); messages.report(ExpectedIdAfterDot, t.location); } case Tok.OpenBracket: Token open = next(); Exp index = parseExpression(); Token close = require(Tok.CloseBracket); return action.actOnIndexExpr(target, open, index, close); default: return target; } } Exp parseExpression(int p = 0) { auto exp = P(); Token n = peek(); BinOp* op = null; while ((op = binary(n.type)) != null && op.prec >= p) { next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); exp = action.actOnBinaryOp(n.location, op.operator, exp, exp2); n = peek(); } return exp; } Exp P() { Token n = next(); if (auto op = unary(n.type)) return action.actOnUnaryOp(n, parseExpression(op.prec)); else if (n.type == Tok.OpenParentheses) { auto e = parseExpression(0); require(Tok.CloseParentheses); return e; } else if (n.type == Tok.Identifier) { Exp value = action.actOnIdentifierExp(Id(n)); Exp iden = parsePostfixExp(value); switch(peek.type) { case Tok.OpenParentheses: Token lp = next(); SmallArray!(Exp, 8) args; while(peek.type != Tok.CloseParentheses) { if(peek.type == Tok.Comma) next(); args ~= parseExpression(); } Token rp = next(); return action.actOnCallExpr(iden, lp, args.unsafe(), rp); default: return iden; } } else if (n.type == Tok.Null) return action.actOnNullExpr(n.location); else if (n.type == Tok.Cast) return parseCast(n); else if (n.type == Tok.Integer) return action.actOnNumericConstant(n); else if (n.type == Tok.String) return action.actOnStringExp(n); else if (n.type == Tok.OpenBracket) { // Array literals Exp[] exps; exps ~= parseExpression(); while (isa(Tok.Comma)) { next(); if (isa(Tok.CloseBracket)) break; exps ~= parseExpression(); } scope e = require(Tok.CloseBracket); return action.actOnArrayLiteralExpr(exps, n.location, e.location); } else if (n.type == Tok.New) { Exp[] allocator_args; Exp[] constructor_args; if ( isa(Tok.OpenParentheses)) { next(); // Remove OpenParentheses if ( !isa(Tok.CloseParentheses ) ) { allocator_args ~= parseExpression; while ( isa(Tok.Comma) ) { next(); // Remove Comma allocator_args ~= parseExpression; } } require(Tok.CloseParentheses); } auto type = parseType; if ( isa(Tok.OpenParentheses)) { next(); // Remove OpenParentheses if ( !isa(Tok.CloseParentheses ) ) { constructor_args ~= parseExpression; while ( isa(Tok.Comma) ) { next(); // Remove Comma constructor_args ~= parseExpression; } } require(Tok.CloseParentheses); } return action.actOnNewExpr(type, allocator_args, constructor_args); } messages.report(ExpectedExp, n.location) .fatal(ExitLevel.Parser); return null; } Exp parseCast(ref Token _cast) { require(Tok.OpenParentheses); auto n = next(); if(!n.isBasicType && !n.isIdentifier) messages.report(ExpectedCastType, n.location); require(Tok.CloseParentheses); auto exp = P(); return action.actOnCastExpr(_cast, Id(n), exp); } struct UnOp { Tok tokenType; int prec; } static const UnOp[] _unary = [ {Tok.Minus, 4}, {Tok.Star, 4}, {Tok.And, 4} ]; UnOp* unary(Tok t) { foreach (ref op; _unary) if (op.tokenType == t) return &op; return null; } struct BinOp { Tok tokenType; int prec; bool leftAssoc; Operator operator; } static const BinOp[] _binary = [ {Tok.Assign, 1, false, Operator.Assign}, {Tok.PlusAssign, 1, false, Operator.AddAssign}, {Tok.MinusAssign, 1, false, Operator.SubAssign}, {Tok.StarAssign, 1, false, Operator.MulAssign}, {Tok.SlashAssign, 1, false, Operator.DivAssign}, {Tok.PercentAssign, 1, false, Operator.ModAssign}, // =, += etc. 1 // (need special-case for the ternary operator at this level) // ||, 2 // &&, 3 // |, 4 // &, 5 // ^, 6 // ==, !=, is, !is, 7 // <, <= etc, 7 // in, 7 // <<, >>, >>>, 8 // +, -, ~, 9 // *, /, %, 10 // unary operators here {Tok.Eq, 2, true, Operator.Eq}, {Tok.Ne, 2, true, Operator.Ne}, {Tok.Lt, 2, true, Operator.Lt}, {Tok.Le, 2, true, Operator.Le}, {Tok.Gt, 2, true, Operator.Gt}, {Tok.Ge, 2, true, Operator.Ge}, {Tok.Plus, 3, true, Operator.Add}, {Tok.Minus, 3, true, Operator.Sub}, {Tok.Star, 5, true, Operator.Mul}, {Tok.Slash, 5, true, Operator.Div}, {Tok.Percent, 5, true, Operator.Mod}, {Tok.LeftShift, 8, true, Operator.LeftShift}, {Tok.RightShift, 8, true, Operator.RightShift}, {Tok.UnsignedRightShift, 8, true, Operator.UnsignedRightShift} ]; BinOp* binary(Tok t) { foreach (ref op; _binary) if (op.tokenType == t) return &op; return null; } private: Token require(Tok t) { if (!isa(t)) if(isa(Tok.EOF)) messages.report(UnexpectedEOF, [lexer.last.asRange][], []) .arg(lexer.last.get(sm)) .fatal(ExitLevel.Parser); else messages.report(UnexpectedTokSingle, peek.location) .arg(peek.get(sm)) .arg(typeToString[t]); return next(); } bool skip(Tok t) { if (peek().type != t) return false; next(); return true; } bool isa(Tok t, int i = 0) { return peek(i).type == t; } bool isEOF(Token t) { if (isa(Tok.EOF)) messages.report(UnexpectedEOF, [t.asRange][], []) .arg(t.get(sm)) .fatal(ExitLevel.Parser); return false; } Token next() { return lexer.next; } Token peek(int i = 0) { return lexer.peek(i); } Lexer lexer; SourceManager sm; }