diff src/parser/Parser.d @ 206:d3c148ca429b

Major moving of files. all src now goes into src, all docs in docs.
author Anders Johnsen <skabet@gmail.com>
date Tue, 12 Aug 2008 18:14:56 +0200
parents
children e0551773a005
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parser/Parser.d	Tue Aug 12 18:14:56 2008 +0200
@@ -0,0 +1,731 @@
+module parser.Parser;
+
+import lexer.Lexer,
+       lexer.Token;
+
+import parser.Action;
+
+import basic.Message;
+
+import basic.SmallArray,
+       basic.SourceManager;
+
+import tango.io.Stdout,
+       Integer = tango.text.convert.Integer;
+
+class Parser
+{
+    Action action;
+    MessageHandler messages;
+    alias Object Exp;
+    alias Object Stmt;
+    alias Object Decl;
+    alias Object Module;
+
+    this(MessageHandler messages)
+    {
+        this.messages = messages;
+    }
+
+    Module parse(SourceManager sm, Lexer lexer, Action act)
+    {
+        this.sm = sm;
+        this.lexer = lexer;
+        this.action = act;
+
+        Module m;
+        if (lexer.peek.type == Tok.Module)
+        {
+            Token _module = lexer.next;
+            ModuleName name = parseModuleName();
+            m = action.actOnModule(_module, sm.getText(name.asRange()));
+            require(Tok.Seperator);
+        }
+        else
+        {
+            SLoc loc = lexer.peek.location;
+            m = action.actOnImplicitModule(loc, sm.getFile(loc));
+        }
+
+        while (lexer.peek.type != Tok.EOF)
+            foreach (d; parseDeclDef())
+                action.actOnModuleDecl(m, d);
+
+        return m;
+    }
+
+private:
+    Decl[] parseDeclDef()
+    {
+        Token t = lexer.peek;
+        if (t.type == Tok.Import)
+            return parseImports();
+        else
+            return [parseDecl()];
+    }
+
+    Decl parseDecl()
+    {
+        Token t = lexer.peek;
+
+        if (t.isBasicType || t.isIdentifier)
+        {
+            Id type;
+            Id iden;
+            int len = peekParseType;
+            if(lexer.peek(len).type == Tok.Identifier && len != 0)
+            {
+                type = parseType;
+parseDeclAfterInvalidType:
+                iden = Id(require(Tok.Identifier));
+                Token next = lexer.peek();
+                if (next.type == Tok.Seperator)
+                {
+                    Token sep = lexer.next();
+                    return action.actOnDeclarator(type, iden, null);
+                }
+                else if (next.type == Tok.Assign)
+                {
+                    Token assign = lexer.next();
+                    Exp exp = parseExpression();
+                    require(Tok.Seperator);
+                    return action.actOnDeclarator(type, iden, exp);
+                }
+                else if (next.type == Tok.OpenParentheses)
+                    return parseFunc(type, iden);
+                else
+                    messages.report(UnexpectedTok, next.location).arg(next.getType);
+            }
+            t = lexer.peek(len);
+            messages.report(InvalidDeclType, t.location)
+                .arg(sm.getText(t.asRange));
+            while(len--)
+                lexer.next;
+            while(lexer.peek.type != Tok.Identifier)
+                lexer.next;
+            type = Id(lexer.peek);
+            goto parseDeclAfterInvalidType;
+        }
+        else if (t.type == Tok.Struct)
+        {
+            Id type = Id(lexer.next);
+            Id iden = Id(require(Tok.Identifier));
+            
+            return parseStruct(type, iden);
+        }
+        messages.report(UnexpectedTok, t.location)
+            .arg(t.getType)
+            .arg(Tok.Identifier)
+            .fatal(ExitLevel.Parser);
+    }
+
+    /**
+      Parse a series of imports belonging to a single import token.
+     */
+    Decl[] parseImports()
+    {
+        Token _import = require(Tok.Import);
+        SmallArray!(Decl) res;
+        void addToRes(Decl d) { res ~= d; }
+
+        bool done = false;
+        while (!done && !on_a(Tok.Seperator))
+        {
+            ModuleName mod = parseModuleName();
+            Token tok = lexer.peek;
+            switch (tok.type)
+            {
+                case Tok.Comma:
+                    // import A, B.C;
+                    // parse another module-name
+                    lexer.next();
+                    res ~= action.actOnImport(_import, mod, null);
+                    break;
+                case Tok.Assign:
+                    // import B = A.A;
+                    //        ^- must be a single identifier
+                    // renamed import
+                    if (mod.packages.length != 0)
+                    {
+                        SLoc loc = mod.packages[0].tok.location;
+                        messages.report(RenameMustBeSingleIdent, loc);
+                    }
+                    //if (isStatic)
+                    //    error("Static imports cannot be renamed");
+                    lexer.next();
+                    Id name = mod.id;
+                    mod = parseModuleName();
+                    // create from mod and rename to `name`
+                    res ~= action.actOnImport(_import, mod, &name);
+                    break;
+                case Tok.Colon:
+                    // import A : a;
+                    // selective imports, potentially import A : print = a
+                    lexer.next();
+                    Decl d = action.actOnImport(_import, mod, null);
+                    // do-while on a comma:
+                    //   add explicit symbol
+                    do
+                    {
+                        Id sym = parseIdentifier();
+                        Id dummy;
+                        Id* name = null;
+                        if (skip(Tok.Assign))
+                        {
+                            dummy = sym;
+                            name = &dummy;
+                            sym = parseIdentifier();
+                        }
+                        action.addSelectiveImport(d, sym, name);
+
+                    } while (skip(Tok.Comma));
+                    require(Tok.Seperator);
+                    res ~= d;
+                    return res.safe();
+                case Tok.Seperator:
+                    done = true;
+                    break;
+                default:
+                    goto Lerror;
+            }
+            res ~= action.actOnImport(_import, mod, null);
+        }
+
+        require(Tok.Seperator);
+        return res.safe();
+Lerror:
+        while (!on_a (Tok.Seperator))
+            lexer.next();
+        return res.safe();
+    }
+
+    /**
+      Parse struct
+     */
+    Decl parseStruct(Id type, Id iden)
+    {
+        auto decl = action.actOnDeclarator(type, iden, null);
+
+        require(Tok.OpenBrace);
+
+        while(lexer.peek.isBasicType || lexer.peek.isIdentifier)
+        {
+            auto m_decl = parseDecl();
+            action.actOnStructMember(decl, m_decl); 
+/*            Id var_type = Id(lexer.next);
+            Id var_iden = Id(require(Tok.Identifier));
+            Token next = lexer.peek();
+            if (next.type == Tok.Seperator)
+            {
+                Token sep = lexer.next();
+                action.actOnStructMember(decl, var_type, var_iden, null);
+                continue;
+            }
+            else if (next.type == Tok.Assign)
+            {
+                Token assign = lexer.next();
+                Exp exp = parseExpression();
+                require(Tok.Seperator);
+                action.actOnStructMember(decl, var_type, var_iden, exp);
+                continue;
+            }
+            messages.report(UnexpectedTok, next.location).arg(next.getType);*/
+        }
+
+        require(Tok.CloseBrace);
+        
+        return decl;
+    }
+
+    /**
+      Parse statements.
+
+      This is the place to attack!
+     */
+    Stmt parseStatement()
+    {
+        Token t = lexer.peek;
+
+        switch(t.type)
+        {
+            case Tok.Return:
+                Token ret = lexer.next;
+                Exp exp;
+                if (lexer.peek.type != Tok.Seperator)
+                    exp = parseExpression();
+                require(Tok.Seperator);
+                return action.actOnReturnStmt(ret, exp);
+
+            /*
+               if (cond)
+                single statement | compound statement
+               [else
+                single statement | compound statement]
+             */
+            case Tok.If:
+                Token _if = lexer.next();
+
+                require(Tok.OpenParentheses);
+                Exp cond = parseExpression();
+                require(Tok.CloseParentheses);
+
+                Stmt thenB = parseSingleOrCompoundStatement();
+
+                // if there is no else part we use the if as token, to have
+                // something than can be passed along
+                Token _else = _if;
+                Stmt elseB;
+                if (lexer.peek.type == Tok.Else)
+                {
+                    _else = lexer.next;
+                    elseB = parseSingleOrCompoundStatement();
+                }
+
+                return action.actOnIfStmt(_if, cond, thenB, _else, elseB);
+
+            /*
+               while (cond)
+                single statement | compound statement
+             */
+            case Tok.While:
+                Token _while = lexer.next;
+                require(Tok.OpenParentheses);
+                Exp cond = parseExpression();
+                require(Tok.CloseParentheses);
+                Stmt bodyStmt = parseSingleOrCompoundStatement();
+                return action.actOnWhileStmt(_while, cond, bodyStmt);
+
+            /*
+               One of four things:
+               A declaration of a function/variable `type id ...`
+               A direct assignment `id = exp;`
+               An indirect assignment `id.id = exp`
+               Some sort of free standing expression
+
+               The assignments should be handled as binary expressions?
+             */
+            case Tok.Identifier:
+                Token iden = lexer.peek;
+                Token n = lexer.peek(1);
+                // Must be an decl, if we start with a basic type, or two
+                // identifiers in a row
+                if (iden.isBasicType() || iden.isIdentifier())
+                {
+                    if ( n.type == Tok.Star || n.type == Tok.OpenBracket)
+                    {
+                        int len = peekParseType;
+                        if(lexer.peek(len).type == Tok.Identifier && len != 0)
+                            return action.actOnDeclStmt(parseVarDecl());
+
+                        Exp exp = parseExpression();
+                        require(Tok.Seperator);
+                        return action.actOnExprStmt(exp);
+                    }
+                        
+                    if (n.isIdentifier())
+                        return action.actOnDeclStmt(parseVarDecl());
+
+                    // Expression: a.b, a = b, a(b) etc.
+                    Exp exp = parseExpression();
+                    require(Tok.Seperator);
+                    return action.actOnExprStmt(exp);
+                }
+
+            case Tok.Switch:
+                messages.report(UnexpectedTok, lexer.peek.location).arg(lexer.next.getType);
+                return null;
+
+            default:
+                if (t.isBasicType())
+                    goto case Tok.Identifier;
+                if (t.type == Tok.Star)
+                {
+                    auto exp = parseExpression();
+                    require(Tok.Seperator);
+                    return action.actOnExprStmt(exp);
+                }
+                messages.report(UnexpectedBeginStmt, lexer.peek.location).arg(lexer.next.getType);
+                return null;
+        }
+        messages.report(UnexpectedTok, t.location);
+        return null;
+    }
+
+    Decl parseVarDecl()
+    {
+        // manually hardcoded to only support "type id [= exp];"
+        // as that is the only thing the codegen understands
+        Id type = parseType;
+        Id id = Id(lexer.next);
+        Exp init;
+        if (skip(Tok.Assign))
+            init = parseExpression();
+        require(Tok.Seperator);
+        Decl d = action.actOnDeclarator(type, id, init);
+        return d;
+    }
+
+    /**
+      Parses a function/method given the already parsed return type and name
+     */
+    Decl parseFunc(ref Id type, ref Id name)
+    {
+        Decl func = action.actOnStartOfFunctionDef(type, name);
+        parseFuncArgs(func);
+
+        if(lexer.peek.type == Tok.Seperator)
+        {
+            lexer.next;
+            return func;
+        }
+        Stmt stmt = parseCompoundStatement();
+
+        return action.actOnEndOfFunction(func, stmt);
+    }
+
+    /**
+      Parse the function arguments, assumes current token is (.
+
+      Both the intitial paren and the ending paren is consumed.
+     */
+    void parseFuncArgs(Decl func)
+    {
+        require(Tok.OpenParentheses); // Remove the "(" token.
+
+        while(lexer.peek.type != Tok.CloseParentheses)
+        {
+            auto t = parseType();
+            Id i;
+            if(lexer.peek.type == Tok.Identifier)
+                i = parseIdentifier();
+            action.addFuncArg(func, t, i);
+
+            if(lexer.peek.type == Tok.Comma)
+                lexer.next;
+        }
+
+        require(Tok.CloseParentheses); // Remove the ")"
+    }
+
+    /**
+      Parse either a block, or a single statement as allowed after if, while
+      and for.
+     */
+    Stmt parseSingleOrCompoundStatement()
+    {
+        if (lexer.peek.type == Tok.OpenBrace)
+            return parseCompoundStatement();
+        return parseStatement();
+    }
+
+    /**
+      Parses a function-body or similar, expects an opening brace to be the
+      current token.
+      
+      Will consume both the starting { and ending }
+     */
+    Stmt parseCompoundStatement()
+    {
+        Token lbrace = require(Tok.OpenBrace);
+        SmallArray!(Stmt, 32) stmts; // Try to use the stack only
+        while (lexer.peek.type != Tok.CloseBrace)
+            stmts ~= parseStatement();
+        Token rbrace = require(Tok.CloseBrace);
+        return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe());
+    }
+
+    Id parseIdentifier()
+    {
+        Token tok = lexer.next;
+
+        if (tok.type is Tok.Identifier)
+            return Id(tok);
+
+        messages.report(UnexpectedTokSingle, tok.location)
+            .arg(tok.getType)
+            .arg(Tok.Identifier);
+    }
+
+    ModuleName parseModuleName()
+    {
+        auto id = parseIdentifier();
+        ModuleName mod;
+        while (skip(Tok.Dot))
+        {
+            mod.packages ~= id;
+            if (lexer.peek.type != Tok.Identifier) {
+                messages.report(ExpectedIdAfterPackage, lexer.peek.location);
+                goto Lerror;
+            }
+            id = parseIdentifier();
+        }
+        mod.id = id;
+        return mod;
+Lerror:
+        while (!skip(Tok.Seperator))
+            lexer.next();
+        return mod;
+    }
+
+
+    /**
+      Parse a type - this includes pointer and array(at some point) types.
+     */
+    Id parseType()
+    {
+        Token type = lexer.next;
+
+        Id currentType;
+
+        if ( !(type.isBasicType || type.type == Tok.Identifier) )
+            messages.report(InvalidType, type.location);
+
+        currentType = Id(type);
+        type = lexer.peek;
+
+        while(type.type == Tok.Star || type.type == Tok.OpenBracket)
+        {
+            if(type.type == Tok.Star)
+            {
+                currentType = PointerId(currentType);
+                lexer.next;
+            }
+            else
+            {
+                lexer.next;
+                if(lexer.peek.type == Tok.Integer)
+                    currentType = ArrayId(currentType, action.actOnNumericConstant(require(Tok.Integer)));
+                require(Tok.CloseBracket);
+                
+            }
+            type = lexer.peek;
+        }
+
+        return currentType;
+    }
+
+    int peekParseType()
+    {
+        int i;
+        Token type = lexer.peek(i);
+
+        Id currentType;
+
+        if ( !(type.isBasicType || type.type == Tok.Identifier) )
+            return 0;
+
+        currentType = Id(type);
+        type = lexer.peek(++i);
+
+        while(type.type == Tok.Star || type.type == Tok.OpenBracket)
+        {
+            if(type.type == Tok.Star)
+            {
+                i++;
+            }
+            else
+            {
+                if(lexer.peek(i++).type != Tok.OpenBracket)
+                    return 0;
+                if(lexer.peek(i).type == Tok.Integer)
+                {
+                    i++;
+                    if(lexer.peek(i++).type != Tok.CloseBracket)    
+                        return 0;
+                }
+                else
+                    if(lexer.peek(i++).type != Tok.CloseBracket)
+                        return 0;
+                
+            }
+            type = lexer.peek(i);
+        }
+
+        return i;
+    }
+
+private:
+    // -- Expression parsing -- //
+    Exp parsePostfixExp(Exp target)
+    {
+        switch(lexer.peek.type)
+        {
+            case Tok.Dot:
+                switch(lexer.peek(1).type)
+                {
+                    case Tok.Identifier:
+                        Token op = lexer.next;
+                        Id member = Id(lexer.next);
+                        Exp exp = action.actOnMemberReference(target, op.location, member);
+                        return parsePostfixExp(exp);
+                    default:
+                        Token t = lexer.peek(1);
+                        messages.report(ExpectedIdAfterDot, t.location);
+                }
+            case Tok.OpenBracket:
+                Token open = lexer.next;
+                Exp index = parseExpression();
+                Token close = require(Tok.CloseBracket);
+                return action.actOnIndexEpr(target, open, index, close);
+            default:
+                return target;
+        }
+    }
+
+    Exp parseExpression(int p = 0)
+    {
+        auto exp = P();
+        Token next = lexer.peek();
+        BinOp* op = null;
+        while ((op = binary(next.type)) != null && op.prec >= p)
+        {
+            lexer.next();
+            int q = op.leftAssoc? 1 + op.prec : op.prec;
+            auto exp2 = parseExpression(q);
+            exp = action.actOnBinaryOp(next.location, op.operator, exp, exp2);
+            next = lexer.peek();
+        }
+
+        return exp;
+    }
+
+    Exp P()
+    {
+        Token next = lexer.next();
+        if (auto op = unary(next.type))
+            return action.actOnUnaryOp(next, parseExpression(op.prec));
+        else if (next.type == Tok.OpenParentheses)
+        {
+            auto e = parseExpression(0);
+            require(Tok.CloseParentheses);
+            return e;
+        }
+        else if (next.type == Tok.Identifier)
+        {
+            Exp value = action.actOnIdentifierExp(Id(next));
+            Exp iden = parsePostfixExp(value);
+            switch(lexer.peek.type)
+            {
+                case Tok.OpenParentheses:
+                    Token lp = lexer.next;
+                    SmallArray!(Exp, 8) args;
+                    while(lexer.peek.type != Tok.CloseParentheses)
+                    {
+                        if(lexer.peek.type == Tok.Comma)
+                            lexer.next;
+                        args ~= parseExpression();
+                    }
+
+                    Token rp = lexer.next();
+                    return action.actOnCallExpr(iden, lp, args.unsafe(), rp);
+
+                default:
+                    return iden;
+            }
+        }
+        else if (next.type == Tok.Cast)
+            return parseCast(next);
+        else if (next.type == Tok.Integer)
+            return action.actOnNumericConstant(next);
+        else if (next.type == Tok.String)
+            return action.actOnStringExp(next);
+
+        messages.report(ExpectedExp, next.location)
+            .fatal(ExitLevel.Parser);
+        return null;
+    }
+
+    Exp parseCast(ref Token _cast)
+    {
+        require(Tok.OpenParentheses);
+        auto next = lexer.next;
+        if(!next.isBasicType && !next.isIdentifier)
+            messages.report(ExpectedCastType, next.location);
+        
+        require(Tok.CloseParentheses);
+        auto exp = P();
+        return action.actOnCastExpr(_cast, Id(next), exp);
+    }
+
+    struct UnOp
+    {
+        Tok tokenType;
+        int prec;
+    }
+
+    static const UnOp[] _unary =
+    [
+        {Tok.Minus, 4},
+        {Tok.Star, 4}
+    ];
+    UnOp* unary(Tok t)
+    {
+        foreach (ref op; _unary)
+            if (op.tokenType == t)
+                return &op;
+        return null;
+    }
+
+    struct BinOp
+    {
+        Tok tokenType;
+        int prec;
+        bool leftAssoc;
+        Operator operator;
+    }
+
+    static const BinOp[] _binary =
+    [
+        {Tok.Assign,    1, false, Operator.Assign},
+
+        {Tok.Eq,        2, true, Operator.Eq},
+        {Tok.Ne,        2, true, Operator.Ne},
+
+        {Tok.Lt,        2, true, Operator.Lt},
+        {Tok.Le,        2, true, Operator.Le},
+        {Tok.Gt,        2, true, Operator.Gt},
+        {Tok.Ge,        2, true, Operator.Ge},
+
+        {Tok.Plus,      3, true, Operator.Add},
+        {Tok.Minus,     3, true, Operator.Sub},
+
+        {Tok.Star,      5, true, Operator.Mul},
+        {Tok.Slash,     5, true, Operator.Div},
+        {Tok.Percent,   5, true, Operator.Mod}
+    ];
+    BinOp* binary(Tok t)
+    {
+        foreach (ref op; _binary)
+            if (op.tokenType == t)
+                return &op;
+        return null;
+    }
+
+private:
+
+    Token require(Tok t)
+    {
+        if (lexer.peek().type != t)
+            messages.report(UnexpectedTokSingle, lexer.peek.location)
+                .arg(lexer.peek.getType)
+                .arg(t);
+        return lexer.next();
+    }
+
+    bool skip(Tok t)
+    {
+        if (lexer.peek().type != t)
+            return false;
+        lexer.next();
+        return true;
+    }
+
+    bool on_a(Tok t)
+    {
+        return lexer.peek.type == t;
+    }
+
+    Lexer lexer;
+    SourceManager sm;
+}
+