Mercurial > projects > dang
view parser/Parser.d @ 160:6cb2f4201e2a
Improved static arrays
Here is a list of some stuff that works
char[3] s = "hey"
char[3] s2 = s;
s2[1] = 98 // no support for chars, but 98 = 'b' :)
int[2] i;
i[0] = 2;
Still can't pass static arrays to functions
author | Anders Halager <halager@gmail.com> |
---|---|
date | Tue, 22 Jul 2008 13:29:20 +0200 |
parents | 57b0b4464a0b |
children | 01c2c49775ef |
line wrap: on
line source
module parser.Parser; import lexer.Lexer, lexer.Token; import parser.Action; import basic.Message, basic.Attribute; import basic.SmallArray, basic.SourceManager; import tango.io.Stdout, Integer = tango.text.convert.Integer; class Parser { Action action; MessageHandler messages; alias Object Exp; alias Object Stmt; alias Object Decl; alias Object Module; this(MessageHandler messages) { this.messages = messages; } Module parse(SourceManager sm, Lexer lexer, Action act) { this.sm = sm; this.lexer = lexer; this.action = act; Module m; if ( isa(Tok.Module) ) { Token _module = next(); ModuleName name = parseModuleName(); m = action.actOnModule(_module, sm.getText(name.asRange())); require(Tok.Seperator); } else { SLoc loc = peek.location; m = action.actOnImplicitModule(loc, sm.getFile(loc)); } auto nes = parseAttributeInit; while( !isa(Tok.EOF) ) { while ( peek.isAttribute ) nes ~= parseAttribute(nes[$-1]); foreach (d; parseDeclDef(nes[$-1].a)) action.actOnModuleDecl(m, d); nes = parseAttributeScope(nes); } return m; } private: Decl[] parseDeclDef(Attribute a) { if ( isa (Tok.Import) ) return parseImports(); return [parseDecl(a)]; } Decl parseDecl(Attribute att) { Token t = peek; if (t.isBasicType || t.isIdentifier) { Id type; Id iden; int len = peekParseType; if (peek(len).type == Tok.Identifier && len != 0) { type = parseType; parseDeclAfterInvalidType: iden = Id(require(Tok.Identifier)); if ( isa(Tok.Seperator) ) { Token sep = next(); return action.actOnDeclarator(type, iden, null, att); } else if ( isa(Tok.Assign) ) { Token assign = next(); Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnDeclarator(type, iden, exp, att); } else if ( isa(Tok.OpenParentheses) ) return parseFunc(type, iden, att); else { auto n1 = next(); messages.report(UnexpectedTok, n1.location).arg(n1.getType); } return null; } t = peek(len); messages.report(InvalidDeclType, t.location) .arg(sm.getText(t.asRange)); while(len--) next(); while( !isa(Tok.Identifier) && !isa(Tok.EOF)) next(); if ( isa(Tok.EOF ) ) messages.report(UnexpectedTok, t.location) .fatal(ExitLevel.Parser); type = Id(peek); goto parseDeclAfterInvalidType; } else if (t.type == Tok.Struct) { Id type = Id(next()); Id iden = Id(require(Tok.Identifier)); return parseStruct(type, iden, att); } else if (t.type == Tok.Class) { Id type = Id(next()); Id iden = Id(require(Tok.Identifier)); return parseClass(type, iden, att); } else if (t.type == Tok.Interface) { Id type = Id(next()); Id iden = Id(require(Tok.Identifier)); return parseInterface(type, iden, att); } messages.report(UnexpectedTok, t.location) .arg(t.getType) .arg(Tok.Identifier) .fatal(ExitLevel.Parser); } Extern parseLinkageType() { Extern e = Extern.D; if(peek(1).type != Tok.OpenParentheses) return e; next(); next(); Token t = require(Tok.Identifier); switch(sm.getText(t.asRange)) { case "C": if (peek(0).type == Tok.Plus && peek(1).type == Tok.Plus) e = Extern.CPlusPlus; else e = Extern.C; break; case "D": break; case "Windows": e = Extern.Windows; break; case "Pascal": e = Extern.Pascal; break; case "System": e = Extern.System; break; default: messages.report(UnexpectedLinkType, t.location); } require(Tok.CloseParentheses); return e; } /** Parse a series of imports belonging to a single import token. */ Decl[] parseImports() { Token _import = require(Tok.Import); SmallArray!(Decl) res; void addToRes(Decl d) { res ~= d; } bool done = false; while (!done && !isa(Tok.Seperator)) { ModuleName mod = parseModuleName(); Token tok = peek; switch (tok.type) { case Tok.Comma: // import A, B.C; // parse another module-name next(); res ~= action.actOnImport(_import, mod, null); break; case Tok.Assign: // import B = A.A; // ^- must be a single identifier // renamed import if (mod.packages.length != 0) { SLoc loc = mod.packages[0].tok.location; messages.report(RenameMustBeSingleIdent, loc); } //if (isStatic) // error("Static imports cannot be renamed"); next(); Id name = mod.id; mod = parseModuleName(); // create from mod and rename to `name` res ~= action.actOnImport(_import, mod, &name); break; case Tok.Colon: // import A : a; // selective imports, potentially import A : print = a next(); Decl d = action.actOnImport(_import, mod, null); // do-while on a comma: // add explicit symbol do { Id sym = parseIdentifier(); Id dummy; Id* name = null; if (skip(Tok.Assign)) { dummy = sym; name = &dummy; sym = parseIdentifier(); } action.addSelectiveImport(d, sym, name); } while (skip(Tok.Comma)); require(Tok.Seperator); res ~= d; return res.safe(); case Tok.Seperator: done = true; break; default: goto Lerror; } res ~= action.actOnImport(_import, mod, null); } require(Tok.Seperator); return res.safe(); Lerror: while (!isa (Tok.Seperator)) next(); return res.safe(); } /** Parse interface */ Decl parseInterface(Id type, Id iden, Attribute att) { auto decl = action.actOnDeclarator(type, iden, null, att); if (peek.type == Tok.Colon) // SuperInterfaces { next(); // Remove colon. Id identifier; // The identifier identifier = Id(require(Tok.Identifier)); action.actOnInterfaceBaseClass(decl, identifier); // We should now have an optional list of items, each starting ',' while (peek.type == Tok.Comma) { next(); // Remove comma // The identifier identifier = Id(require(Tok.Identifier)); action.actOnInterfaceBaseClass(decl, identifier); } } require(Tok.OpenBrace); auto nes = parseAttributeInit; while( !isa(Tok.EOF) && !isa(Tok.CloseBrace) ) { while ( peek.isAttribute ) nes ~= parseAttribute(nes[$-1]); auto m_decl = parseDecl(nes[$-1].a); action.actOnInterfaceMember(decl, m_decl); nes = parseAttributeScope(nes); } require(Tok.CloseBrace); return decl; } /** Parse class */ Decl parseClass(Id type, Id iden, Attribute att) { auto decl = action.actOnDeclarator(type, iden, null, att); if (peek.type == Tok.Colon) // BaseClassList - Super class and interfaces(in that order) { next(); // Remove colon. Token protection; Id identifier; // First we expect an optional protection level. if (peek.isBaseClassProtection) protection = next(); // Then the identifier identifier = Id(require(Tok.Identifier)); action.actOnClassBaseClass(decl, identifier); // We should now have an optional list of items, each starting ',' while (peek.type == Tok.Comma) { next(); // Remove comma // First we expect an optional protection level. if (peek.isBaseClassProtection) protection = next(); // Then the identifier identifier = Id(require(Tok.Identifier)); action.actOnClassBaseClass(decl, identifier); } } require(Tok.OpenBrace); auto nes = parseAttributeInit; while( !isa(Tok.EOF) && !isa(Tok.CloseBrace) ) { while ( peek.isAttribute ) nes ~= parseAttribute(nes[$-1]); switch(peek.type) { case Tok.This: auto id = Id(next); auto m_decl = parseFunc(iden, id, nes[$-1].a); action.actOnClassMember(decl, m_decl); break; default: auto m_decl = parseDecl(nes[$-1].a); action.actOnClassMember(decl, m_decl); } nes = parseAttributeScope(nes); } require(Tok.CloseBrace); return decl; } /** Parse struct */ Decl parseStruct(Id type, Id iden, Attribute att) { auto decl = action.actOnDeclarator(type, iden, null, att); require(Tok.OpenBrace); auto nes = parseAttributeInit; while( !isa(Tok.EOF) && !isa(Tok.CloseBrace) ) { while ( peek.isAttribute ) nes ~= parseAttribute(nes[$-1]); auto m_decl = parseDecl(nes[$-1].a); action.actOnStructMember(decl, m_decl); nes = parseAttributeScope(nes); } require(Tok.CloseBrace); return decl; } Att[] parseAttributeInit() { Att[] nes; nes ~= Att(); nes[0].nested = Scope; return nes; } Att[] parseAttributeScope(Att[] nes) { while ( nes[$-1].nested == Single ) nes.length = nes.length - 1; while ( isa(Tok.CloseBrace) && nes.length > 1) { while ( nes.length > 1 ) { if( nes[$-1].nested == Scope ) { nes.length = nes.length - 1; next(); break; } nes.length = nes.length - 1; } } return nes; } Att parseAttribute(Att last) { Att _parseAttribute(Att last) { Att a = last; a.nested = Single; switch(peek.type) { case Tok.Public: a.a.setProtection(Protection.Public); break; case Tok.Private: a.a.setProtection(Protection.Private); break; case Tok.Package: a.a.setProtection(Protection.Package); break; case Tok.Protected: a.a.setProtection(Protection.Protected); break; case Tok.Export: a.a.setProtection(Protection.Export); break; case Tok.Static: a.a.setStatic; break; case Tok.Final: a.a.setFinal; break; case Tok.Const: a.a.setConst; break; case Tok.Abstract: a.a.setAbstract; break; case Tok.Override: a.a.setOverride; break; case Tok.Deprecated: a.a.setDeprecated; break; case Tok.Auto: a.a.setAuto; break; case Tok.Extern: Extern e = parseLinkageType; a.a.setExtern(e); break; } next(); return a; } Att a = _parseAttribute(last); while (peek.isAttribute) { a = parseAttribute(a); } if (peek.type == Tok.Colon) { a.nested = All; next(); } else if (peek.type == Tok.OpenBrace) { a.nested = Scope; next(); } return a; } enum : uint { Single, Scope, All } struct Att { Attribute a; uint nested; } /** Parse statements. This is the place to attack! */ Stmt parseStatement() { Token t = peek; if (t.isReturn) { Token ret = next(); Exp exp; if (peek.type != Tok.Seperator) exp = parseExpression(); require(Tok.Seperator); return action.actOnReturnStmt(ret, exp); } /* if (cond) single statement | compound statement [else single statement | compound statement] */ else if (t.isIf) { Token _if = next(); require(Tok.OpenParentheses); Exp cond = parseExpression(); require(Tok.CloseParentheses); Stmt thenB = parseSingleOrCompoundStatement(); // if there is no else part we use the if as token, to have // something than can be passed along Token _else = _if; Stmt elseB; if (peek.type == Tok.Else) { _else = next(); elseB = parseSingleOrCompoundStatement(); } return action.actOnIfStmt(_if, cond, thenB, _else, elseB); } /* while (cond) single statement | compound statement */ else if (t.isWhile) { Token _while = next(); require(Tok.OpenParentheses); Exp cond = parseExpression(); require(Tok.CloseParentheses); Stmt bodyStmt = parseSingleOrCompoundStatement(); return action.actOnWhileStmt(_while, cond, bodyStmt); } else if (t.isFor) { Token _for = next(); require(Tok.OpenParentheses); Stmt init; if ( isa(Tok.Seperator)) require(Tok.Seperator); else init = parseStatement(); Exp cond; if ( !isa(Tok.Seperator)) cond = parseExpression(); require(Tok.Seperator); Exp incre; if ( !isa(Tok.CloseParentheses)) incre = parseExpression(); require(Tok.CloseParentheses); Stmt bodyStmt = parseSingleOrCompoundStatement(); return action.actOnForStmt(_for, init, cond, incre, bodyStmt); } else if (t.isBasicType || t.isIdentifier) { Token iden = peek; Token n = peek(1); // Must be an decl, if we start with a basic type, or two // identifiers in a row if ( n.type == Tok.Star || n.type == Tok.OpenBracket) { int len = peekParseType; if(peek(len).type == Tok.Identifier && len != 0) return action.actOnDeclStmt(parseVarDecl()); Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); } if (n.isIdentifier()) return action.actOnDeclStmt(parseVarDecl()); // Expression: a.b, a = b, a(b) etc. Exp exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); } else if (t.isSwitch) { next(); require(Tok.OpenParentheses); auto target = parseExpression(); auto res = action.actOnStartOfSwitchStmt(t, target); require(Tok.CloseParentheses); require(Tok.OpenBrace); while (true) { Stmt[] statements; if (isa(Tok.Default)) { Token _default = next(); require(Tok.Colon); statements.length = 0; while (peek.type != Tok.Case && peek.type != Tok.Default && peek.type != Tok.CloseBrace) statements ~= parseStatement(); action.actOnDefaultStmt(res, _default, statements); continue; } Token _case = peek; if (_case.type != Tok.Case) break; next(); Exp[] literals; do { Exp e = parseExpression(); literals ~= e; } while (skip(Tok.Comma)); require(Tok.Colon); while (peek.type != Tok.Case && peek.type != Tok.Default && peek.type != Tok.CloseBrace) statements ~= parseStatement(); action.actOnCaseStmt(res, _case, literals, statements); if (peek.type == Tok.CloseBrace) break; } require(Tok.CloseBrace); return res; } else if (t.type == Tok.Star) { auto exp = parseExpression(); require(Tok.Seperator); return action.actOnExprStmt(exp); } else { messages.report(UnexpectedBeginStmt, t.location).arg(t.getType); return null; } } Decl parseVarDecl() { // manually hardcoded to only support "type id [= exp];" // as that is the only thing the codegen understands Id type = parseType(); Id id = Id(next()); Exp init; if (skip(Tok.Assign)) init = parseExpression(); require(Tok.Seperator); Attribute att; Decl d = action.actOnDeclarator(type, id, init, att); return d; } /** Parses a function/method given the already parsed return type and name */ Decl parseFunc(ref Id type, ref Id name, Attribute att) { Decl func = action.actOnStartOfFunctionDef(type, name, att); parseFuncArgs(func); if(peek.type == Tok.Seperator) { next(); return func; } Stmt stmt = parseCompoundStatement(); return action.actOnEndOfFunction(func, stmt); } /** Parse the function arguments, assumes current token is (. Both the intitial paren and the ending paren is consumed. */ void parseFuncArgs(Decl func) { require(Tok.OpenParentheses); // Remove the "(" token. while(peek.type != Tok.CloseParentheses) { auto t = parseType(); Id i; if(peek.type == Tok.Identifier) i = parseIdentifier(); action.addFuncArg(func, t, i); if(peek.type == Tok.Comma) next(); } require(Tok.CloseParentheses); // Remove the ")" } /** Parse either a block, or a single statement as allowed after if, while and for. */ Stmt parseSingleOrCompoundStatement() { if (peek.type == Tok.OpenBrace) return parseCompoundStatement(); return parseStatement(); } /** Parses a function-body or similar, expects an opening brace to be the current token. Will consume both the starting { and ending } */ Stmt parseCompoundStatement() { Token lbrace = require(Tok.OpenBrace); SmallArray!(Stmt, 32) stmts; // Try to use the stack only while ( !isa(Tok.CloseBrace) && !isa(Tok.EOF) ) stmts ~= parseStatement(); Token rbrace = require(Tok.CloseBrace); return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); } Id parseIdentifier() { Token tok = next(); if (tok.type is Tok.Identifier) return Id(tok); messages.report(UnexpectedTokSingle, tok.location) .arg(tok.getType) .arg(Tok.Identifier); } ModuleName parseModuleName() { auto id = parseIdentifier(); ModuleName mod; while (skip(Tok.Dot)) { mod.packages ~= id; if (peek.type != Tok.Identifier) { messages.report(ExpectedIdAfterPackage, peek.location); goto Lerror; } id = parseIdentifier(); } mod.id = id; return mod; Lerror: while (!skip(Tok.Seperator)) next(); return mod; } /** Parse a type - this includes pointer and array(at some point) types. */ Id parseType() { Token type = next(); Id currentType; if ( !(type.isBasicType || type.type == Tok.Identifier) ) messages.report(InvalidType, type.location); currentType = Id(type); type = peek; while(type.type == Tok.Star || type.type == Tok.OpenBracket) { if(type.type == Tok.Star) { currentType = PointerId(currentType); next(); } else { next(); if(peek.type == Tok.Integer) currentType = StaticArrayId( currentType, action.actOnNumericConstant( require(Tok.Integer))); require(Tok.CloseBracket); } type = peek; } return currentType; } int peekParseType() { int i; Token type = peek(i); Id currentType; if ( !(type.isBasicType || type.type == Tok.Identifier) ) return 0; currentType = Id(type); type = peek(++i); while(type.type == Tok.Star || type.type == Tok.OpenBracket) { if(type.type == Tok.Star) { i++; } else { if(peek(i++).type != Tok.OpenBracket) return 0; if(peek(i).type == Tok.Integer) { i++; if(peek(i++).type != Tok.CloseBracket) return 0; } else if(peek(i++).type != Tok.CloseBracket) return 0; } type = peek(i); } return i; } private: // -- Expression parsing -- // Exp parsePostfixExp(Exp target) { switch(peek.type) { case Tok.Dot: switch(peek(1).type) { case Tok.Identifier: Token op = next(); Id member = Id(next()); Exp exp = action.actOnMemberReference(target, op.location, member); return parsePostfixExp(exp); default: Token t = peek(1); messages.report(ExpectedIdAfterDot, t.location); } case Tok.OpenBracket: Token open = next(); Exp index = parseExpression(); Token close = require(Tok.CloseBracket); return action.actOnIndexExpr(target, open, index, close); default: return target; } } Exp parseExpression(int p = 0) { auto exp = P(); Token n = peek(); BinOp* op = null; while ((op = binary(n.type)) != null && op.prec >= p) { next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); exp = action.actOnBinaryOp(n.location, op.operator, exp, exp2); n = peek(); } return exp; } Exp P() { Token n = next(); if (auto op = unary(n.type)) return action.actOnUnaryOp(n, parseExpression(op.prec)); else if (n.type == Tok.OpenParentheses) { auto e = parseExpression(0); require(Tok.CloseParentheses); return e; } else if (n.type == Tok.Identifier) { Exp value = action.actOnIdentifierExp(Id(n)); Exp iden = parsePostfixExp(value); switch(peek.type) { case Tok.OpenParentheses: Token lp = next(); SmallArray!(Exp, 8) args; while(peek.type != Tok.CloseParentheses) { if(peek.type == Tok.Comma) next(); args ~= parseExpression(); } Token rp = next(); return action.actOnCallExpr(iden, lp, args.unsafe(), rp); default: return iden; } } else if (n.type == Tok.Cast) return parseCast(n); else if (n.type == Tok.Integer) return action.actOnNumericConstant(n); else if (n.type == Tok.String) return action.actOnStringExp(n); else if (n.type == Tok.New) { Exp[] allocator_args; Exp[] constructor_args; if ( isa(Tok.OpenParentheses)) { next(); // Remove OpenParentheses if ( !isa(Tok.CloseParentheses ) ) { allocator_args ~= parseExpression; while ( isa(Tok.Comma) ) { next(); // Remove Comma allocator_args ~= parseExpression; } } require(Tok.CloseParentheses); } auto type = parseType; if ( isa(Tok.OpenParentheses)) { next(); // Remove OpenParentheses if ( !isa(Tok.CloseParentheses ) ) { constructor_args ~= parseExpression; while ( isa(Tok.Comma) ) { next(); // Remove Comma constructor_args ~= parseExpression; } } require(Tok.CloseParentheses); } return action.actOnNewExpr(type, allocator_args, constructor_args); } messages.report(ExpectedExp, n.location) .fatal(ExitLevel.Parser); return null; } Exp parseCast(ref Token _cast) { require(Tok.OpenParentheses); auto n = next(); if(!n.isBasicType && !n.isIdentifier) messages.report(ExpectedCastType, n.location); require(Tok.CloseParentheses); auto exp = P(); return action.actOnCastExpr(_cast, Id(n), exp); } struct UnOp { Tok tokenType; int prec; } static const UnOp[] _unary = [ {Tok.Minus, 4}, {Tok.Star, 4} ]; UnOp* unary(Tok t) { foreach (ref op; _unary) if (op.tokenType == t) return &op; return null; } struct BinOp { Tok tokenType; int prec; bool leftAssoc; Operator operator; } static const BinOp[] _binary = [ {Tok.Assign, 1, false, Operator.Assign}, {Tok.PlusAssign, 1, false, Operator.AddAssign}, {Tok.MinusAssign, 1, false, Operator.SubAssign}, {Tok.StarAssign, 1, false, Operator.MulAssign}, {Tok.SlashAssign, 1, false, Operator.DivAssign}, {Tok.PercentAssign, 1, false, Operator.ModAssign}, // =, += etc. 1 // (need special-case for the ternary operator at this level) // ||, 2 // &&, 3 // |, 4 // &, 5 // ^, 6 // ==, !=, is, !is, 7 // <, <= etc, 7 // in, 7 // <<, >>, >>>, 8 // +, -, ~, 9 // *, /, %, 10 // unary operators here {Tok.Eq, 2, true, Operator.Eq}, {Tok.Ne, 2, true, Operator.Ne}, {Tok.Lt, 2, true, Operator.Lt}, {Tok.Le, 2, true, Operator.Le}, {Tok.Gt, 2, true, Operator.Gt}, {Tok.Ge, 2, true, Operator.Ge}, {Tok.Plus, 3, true, Operator.Add}, {Tok.Minus, 3, true, Operator.Sub}, {Tok.Star, 5, true, Operator.Mul}, {Tok.Slash, 5, true, Operator.Div}, {Tok.Percent, 5, true, Operator.Mod}, {Tok.LeftShift, 8, true, Operator.LeftShift}, {Tok.RightShift, 8, true, Operator.RightShift}, {Tok.UnsignedRightShift, 8, true, Operator.UnsignedRightShift} ]; BinOp* binary(Tok t) { foreach (ref op; _binary) if (op.tokenType == t) return &op; return null; } private: Token require(Tok t) { if (peek().type != t) messages.report(UnexpectedTokSingle, peek.location) .arg(peek.getType) .arg(t); return next(); } bool skip(Tok t) { if (peek().type != t) return false; next(); return true; } bool isa(Tok t) { return peek.type == t; } Token next() { return lexer.next; } Token peek(int i = 0) { return lexer.peek(i); } Lexer lexer; SourceManager sm; }