view src/parser/Parser.d @ 206:d3c148ca429b

Major moving of files. all src now goes into src, all docs in docs.
author Anders Johnsen <skabet@gmail.com>
date Tue, 12 Aug 2008 18:14:56 +0200
parents
children e0551773a005
line wrap: on
line source

module parser.Parser;

import lexer.Lexer,
       lexer.Token;

import parser.Action;

import basic.Message;

import basic.SmallArray,
       basic.SourceManager;

import tango.io.Stdout,
       Integer = tango.text.convert.Integer;

class Parser
{
    Action action;
    MessageHandler messages;
    alias Object Exp;
    alias Object Stmt;
    alias Object Decl;
    alias Object Module;

    this(MessageHandler messages)
    {
        this.messages = messages;
    }

    Module parse(SourceManager sm, Lexer lexer, Action act)
    {
        this.sm = sm;
        this.lexer = lexer;
        this.action = act;

        Module m;
        if (lexer.peek.type == Tok.Module)
        {
            Token _module = lexer.next;
            ModuleName name = parseModuleName();
            m = action.actOnModule(_module, sm.getText(name.asRange()));
            require(Tok.Seperator);
        }
        else
        {
            SLoc loc = lexer.peek.location;
            m = action.actOnImplicitModule(loc, sm.getFile(loc));
        }

        while (lexer.peek.type != Tok.EOF)
            foreach (d; parseDeclDef())
                action.actOnModuleDecl(m, d);

        return m;
    }

private:
    Decl[] parseDeclDef()
    {
        Token t = lexer.peek;
        if (t.type == Tok.Import)
            return parseImports();
        else
            return [parseDecl()];
    }

    Decl parseDecl()
    {
        Token t = lexer.peek;

        if (t.isBasicType || t.isIdentifier)
        {
            Id type;
            Id iden;
            int len = peekParseType;
            if(lexer.peek(len).type == Tok.Identifier && len != 0)
            {
                type = parseType;
parseDeclAfterInvalidType:
                iden = Id(require(Tok.Identifier));
                Token next = lexer.peek();
                if (next.type == Tok.Seperator)
                {
                    Token sep = lexer.next();
                    return action.actOnDeclarator(type, iden, null);
                }
                else if (next.type == Tok.Assign)
                {
                    Token assign = lexer.next();
                    Exp exp = parseExpression();
                    require(Tok.Seperator);
                    return action.actOnDeclarator(type, iden, exp);
                }
                else if (next.type == Tok.OpenParentheses)
                    return parseFunc(type, iden);
                else
                    messages.report(UnexpectedTok, next.location).arg(next.getType);
            }
            t = lexer.peek(len);
            messages.report(InvalidDeclType, t.location)
                .arg(sm.getText(t.asRange));
            while(len--)
                lexer.next;
            while(lexer.peek.type != Tok.Identifier)
                lexer.next;
            type = Id(lexer.peek);
            goto parseDeclAfterInvalidType;
        }
        else if (t.type == Tok.Struct)
        {
            Id type = Id(lexer.next);
            Id iden = Id(require(Tok.Identifier));
            
            return parseStruct(type, iden);
        }
        messages.report(UnexpectedTok, t.location)
            .arg(t.getType)
            .arg(Tok.Identifier)
            .fatal(ExitLevel.Parser);
    }

    /**
      Parse a series of imports belonging to a single import token.
     */
    Decl[] parseImports()
    {
        Token _import = require(Tok.Import);
        SmallArray!(Decl) res;
        void addToRes(Decl d) { res ~= d; }

        bool done = false;
        while (!done && !on_a(Tok.Seperator))
        {
            ModuleName mod = parseModuleName();
            Token tok = lexer.peek;
            switch (tok.type)
            {
                case Tok.Comma:
                    // import A, B.C;
                    // parse another module-name
                    lexer.next();
                    res ~= action.actOnImport(_import, mod, null);
                    break;
                case Tok.Assign:
                    // import B = A.A;
                    //        ^- must be a single identifier
                    // renamed import
                    if (mod.packages.length != 0)
                    {
                        SLoc loc = mod.packages[0].tok.location;
                        messages.report(RenameMustBeSingleIdent, loc);
                    }
                    //if (isStatic)
                    //    error("Static imports cannot be renamed");
                    lexer.next();
                    Id name = mod.id;
                    mod = parseModuleName();
                    // create from mod and rename to `name`
                    res ~= action.actOnImport(_import, mod, &name);
                    break;
                case Tok.Colon:
                    // import A : a;
                    // selective imports, potentially import A : print = a
                    lexer.next();
                    Decl d = action.actOnImport(_import, mod, null);
                    // do-while on a comma:
                    //   add explicit symbol
                    do
                    {
                        Id sym = parseIdentifier();
                        Id dummy;
                        Id* name = null;
                        if (skip(Tok.Assign))
                        {
                            dummy = sym;
                            name = &dummy;
                            sym = parseIdentifier();
                        }
                        action.addSelectiveImport(d, sym, name);

                    } while (skip(Tok.Comma));
                    require(Tok.Seperator);
                    res ~= d;
                    return res.safe();
                case Tok.Seperator:
                    done = true;
                    break;
                default:
                    goto Lerror;
            }
            res ~= action.actOnImport(_import, mod, null);
        }

        require(Tok.Seperator);
        return res.safe();
Lerror:
        while (!on_a (Tok.Seperator))
            lexer.next();
        return res.safe();
    }

    /**
      Parse struct
     */
    Decl parseStruct(Id type, Id iden)
    {
        auto decl = action.actOnDeclarator(type, iden, null);

        require(Tok.OpenBrace);

        while(lexer.peek.isBasicType || lexer.peek.isIdentifier)
        {
            auto m_decl = parseDecl();
            action.actOnStructMember(decl, m_decl); 
/*            Id var_type = Id(lexer.next);
            Id var_iden = Id(require(Tok.Identifier));
            Token next = lexer.peek();
            if (next.type == Tok.Seperator)
            {
                Token sep = lexer.next();
                action.actOnStructMember(decl, var_type, var_iden, null);
                continue;
            }
            else if (next.type == Tok.Assign)
            {
                Token assign = lexer.next();
                Exp exp = parseExpression();
                require(Tok.Seperator);
                action.actOnStructMember(decl, var_type, var_iden, exp);
                continue;
            }
            messages.report(UnexpectedTok, next.location).arg(next.getType);*/
        }

        require(Tok.CloseBrace);
        
        return decl;
    }

    /**
      Parse statements.

      This is the place to attack!
     */
    Stmt parseStatement()
    {
        Token t = lexer.peek;

        switch(t.type)
        {
            case Tok.Return:
                Token ret = lexer.next;
                Exp exp;
                if (lexer.peek.type != Tok.Seperator)
                    exp = parseExpression();
                require(Tok.Seperator);
                return action.actOnReturnStmt(ret, exp);

            /*
               if (cond)
                single statement | compound statement
               [else
                single statement | compound statement]
             */
            case Tok.If:
                Token _if = lexer.next();

                require(Tok.OpenParentheses);
                Exp cond = parseExpression();
                require(Tok.CloseParentheses);

                Stmt thenB = parseSingleOrCompoundStatement();

                // if there is no else part we use the if as token, to have
                // something than can be passed along
                Token _else = _if;
                Stmt elseB;
                if (lexer.peek.type == Tok.Else)
                {
                    _else = lexer.next;
                    elseB = parseSingleOrCompoundStatement();
                }

                return action.actOnIfStmt(_if, cond, thenB, _else, elseB);

            /*
               while (cond)
                single statement | compound statement
             */
            case Tok.While:
                Token _while = lexer.next;
                require(Tok.OpenParentheses);
                Exp cond = parseExpression();
                require(Tok.CloseParentheses);
                Stmt bodyStmt = parseSingleOrCompoundStatement();
                return action.actOnWhileStmt(_while, cond, bodyStmt);

            /*
               One of four things:
               A declaration of a function/variable `type id ...`
               A direct assignment `id = exp;`
               An indirect assignment `id.id = exp`
               Some sort of free standing expression

               The assignments should be handled as binary expressions?
             */
            case Tok.Identifier:
                Token iden = lexer.peek;
                Token n = lexer.peek(1);
                // Must be an decl, if we start with a basic type, or two
                // identifiers in a row
                if (iden.isBasicType() || iden.isIdentifier())
                {
                    if ( n.type == Tok.Star || n.type == Tok.OpenBracket)
                    {
                        int len = peekParseType;
                        if(lexer.peek(len).type == Tok.Identifier && len != 0)
                            return action.actOnDeclStmt(parseVarDecl());

                        Exp exp = parseExpression();
                        require(Tok.Seperator);
                        return action.actOnExprStmt(exp);
                    }
                        
                    if (n.isIdentifier())
                        return action.actOnDeclStmt(parseVarDecl());

                    // Expression: a.b, a = b, a(b) etc.
                    Exp exp = parseExpression();
                    require(Tok.Seperator);
                    return action.actOnExprStmt(exp);
                }

            case Tok.Switch:
                messages.report(UnexpectedTok, lexer.peek.location).arg(lexer.next.getType);
                return null;

            default:
                if (t.isBasicType())
                    goto case Tok.Identifier;
                if (t.type == Tok.Star)
                {
                    auto exp = parseExpression();
                    require(Tok.Seperator);
                    return action.actOnExprStmt(exp);
                }
                messages.report(UnexpectedBeginStmt, lexer.peek.location).arg(lexer.next.getType);
                return null;
        }
        messages.report(UnexpectedTok, t.location);
        return null;
    }

    Decl parseVarDecl()
    {
        // manually hardcoded to only support "type id [= exp];"
        // as that is the only thing the codegen understands
        Id type = parseType;
        Id id = Id(lexer.next);
        Exp init;
        if (skip(Tok.Assign))
            init = parseExpression();
        require(Tok.Seperator);
        Decl d = action.actOnDeclarator(type, id, init);
        return d;
    }

    /**
      Parses a function/method given the already parsed return type and name
     */
    Decl parseFunc(ref Id type, ref Id name)
    {
        Decl func = action.actOnStartOfFunctionDef(type, name);
        parseFuncArgs(func);

        if(lexer.peek.type == Tok.Seperator)
        {
            lexer.next;
            return func;
        }
        Stmt stmt = parseCompoundStatement();

        return action.actOnEndOfFunction(func, stmt);
    }

    /**
      Parse the function arguments, assumes current token is (.

      Both the intitial paren and the ending paren is consumed.
     */
    void parseFuncArgs(Decl func)
    {
        require(Tok.OpenParentheses); // Remove the "(" token.

        while(lexer.peek.type != Tok.CloseParentheses)
        {
            auto t = parseType();
            Id i;
            if(lexer.peek.type == Tok.Identifier)
                i = parseIdentifier();
            action.addFuncArg(func, t, i);

            if(lexer.peek.type == Tok.Comma)
                lexer.next;
        }

        require(Tok.CloseParentheses); // Remove the ")"
    }

    /**
      Parse either a block, or a single statement as allowed after if, while
      and for.
     */
    Stmt parseSingleOrCompoundStatement()
    {
        if (lexer.peek.type == Tok.OpenBrace)
            return parseCompoundStatement();
        return parseStatement();
    }

    /**
      Parses a function-body or similar, expects an opening brace to be the
      current token.
      
      Will consume both the starting { and ending }
     */
    Stmt parseCompoundStatement()
    {
        Token lbrace = require(Tok.OpenBrace);
        SmallArray!(Stmt, 32) stmts; // Try to use the stack only
        while (lexer.peek.type != Tok.CloseBrace)
            stmts ~= parseStatement();
        Token rbrace = require(Tok.CloseBrace);
        return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe());
    }

    Id parseIdentifier()
    {
        Token tok = lexer.next;

        if (tok.type is Tok.Identifier)
            return Id(tok);

        messages.report(UnexpectedTokSingle, tok.location)
            .arg(tok.getType)
            .arg(Tok.Identifier);
    }

    ModuleName parseModuleName()
    {
        auto id = parseIdentifier();
        ModuleName mod;
        while (skip(Tok.Dot))
        {
            mod.packages ~= id;
            if (lexer.peek.type != Tok.Identifier) {
                messages.report(ExpectedIdAfterPackage, lexer.peek.location);
                goto Lerror;
            }
            id = parseIdentifier();
        }
        mod.id = id;
        return mod;
Lerror:
        while (!skip(Tok.Seperator))
            lexer.next();
        return mod;
    }


    /**
      Parse a type - this includes pointer and array(at some point) types.
     */
    Id parseType()
    {
        Token type = lexer.next;

        Id currentType;

        if ( !(type.isBasicType || type.type == Tok.Identifier) )
            messages.report(InvalidType, type.location);

        currentType = Id(type);
        type = lexer.peek;

        while(type.type == Tok.Star || type.type == Tok.OpenBracket)
        {
            if(type.type == Tok.Star)
            {
                currentType = PointerId(currentType);
                lexer.next;
            }
            else
            {
                lexer.next;
                if(lexer.peek.type == Tok.Integer)
                    currentType = ArrayId(currentType, action.actOnNumericConstant(require(Tok.Integer)));
                require(Tok.CloseBracket);
                
            }
            type = lexer.peek;
        }

        return currentType;
    }

    int peekParseType()
    {
        int i;
        Token type = lexer.peek(i);

        Id currentType;

        if ( !(type.isBasicType || type.type == Tok.Identifier) )
            return 0;

        currentType = Id(type);
        type = lexer.peek(++i);

        while(type.type == Tok.Star || type.type == Tok.OpenBracket)
        {
            if(type.type == Tok.Star)
            {
                i++;
            }
            else
            {
                if(lexer.peek(i++).type != Tok.OpenBracket)
                    return 0;
                if(lexer.peek(i).type == Tok.Integer)
                {
                    i++;
                    if(lexer.peek(i++).type != Tok.CloseBracket)    
                        return 0;
                }
                else
                    if(lexer.peek(i++).type != Tok.CloseBracket)
                        return 0;
                
            }
            type = lexer.peek(i);
        }

        return i;
    }

private:
    // -- Expression parsing -- //
    Exp parsePostfixExp(Exp target)
    {
        switch(lexer.peek.type)
        {
            case Tok.Dot:
                switch(lexer.peek(1).type)
                {
                    case Tok.Identifier:
                        Token op = lexer.next;
                        Id member = Id(lexer.next);
                        Exp exp = action.actOnMemberReference(target, op.location, member);
                        return parsePostfixExp(exp);
                    default:
                        Token t = lexer.peek(1);
                        messages.report(ExpectedIdAfterDot, t.location);
                }
            case Tok.OpenBracket:
                Token open = lexer.next;
                Exp index = parseExpression();
                Token close = require(Tok.CloseBracket);
                return action.actOnIndexEpr(target, open, index, close);
            default:
                return target;
        }
    }

    Exp parseExpression(int p = 0)
    {
        auto exp = P();
        Token next = lexer.peek();
        BinOp* op = null;
        while ((op = binary(next.type)) != null && op.prec >= p)
        {
            lexer.next();
            int q = op.leftAssoc? 1 + op.prec : op.prec;
            auto exp2 = parseExpression(q);
            exp = action.actOnBinaryOp(next.location, op.operator, exp, exp2);
            next = lexer.peek();
        }

        return exp;
    }

    Exp P()
    {
        Token next = lexer.next();
        if (auto op = unary(next.type))
            return action.actOnUnaryOp(next, parseExpression(op.prec));
        else if (next.type == Tok.OpenParentheses)
        {
            auto e = parseExpression(0);
            require(Tok.CloseParentheses);
            return e;
        }
        else if (next.type == Tok.Identifier)
        {
            Exp value = action.actOnIdentifierExp(Id(next));
            Exp iden = parsePostfixExp(value);
            switch(lexer.peek.type)
            {
                case Tok.OpenParentheses:
                    Token lp = lexer.next;
                    SmallArray!(Exp, 8) args;
                    while(lexer.peek.type != Tok.CloseParentheses)
                    {
                        if(lexer.peek.type == Tok.Comma)
                            lexer.next;
                        args ~= parseExpression();
                    }

                    Token rp = lexer.next();
                    return action.actOnCallExpr(iden, lp, args.unsafe(), rp);

                default:
                    return iden;
            }
        }
        else if (next.type == Tok.Cast)
            return parseCast(next);
        else if (next.type == Tok.Integer)
            return action.actOnNumericConstant(next);
        else if (next.type == Tok.String)
            return action.actOnStringExp(next);

        messages.report(ExpectedExp, next.location)
            .fatal(ExitLevel.Parser);
        return null;
    }

    Exp parseCast(ref Token _cast)
    {
        require(Tok.OpenParentheses);
        auto next = lexer.next;
        if(!next.isBasicType && !next.isIdentifier)
            messages.report(ExpectedCastType, next.location);
        
        require(Tok.CloseParentheses);
        auto exp = P();
        return action.actOnCastExpr(_cast, Id(next), exp);
    }

    struct UnOp
    {
        Tok tokenType;
        int prec;
    }

    static const UnOp[] _unary =
    [
        {Tok.Minus, 4},
        {Tok.Star, 4}
    ];
    UnOp* unary(Tok t)
    {
        foreach (ref op; _unary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

    struct BinOp
    {
        Tok tokenType;
        int prec;
        bool leftAssoc;
        Operator operator;
    }

    static const BinOp[] _binary =
    [
        {Tok.Assign,    1, false, Operator.Assign},

        {Tok.Eq,        2, true, Operator.Eq},
        {Tok.Ne,        2, true, Operator.Ne},

        {Tok.Lt,        2, true, Operator.Lt},
        {Tok.Le,        2, true, Operator.Le},
        {Tok.Gt,        2, true, Operator.Gt},
        {Tok.Ge,        2, true, Operator.Ge},

        {Tok.Plus,      3, true, Operator.Add},
        {Tok.Minus,     3, true, Operator.Sub},

        {Tok.Star,      5, true, Operator.Mul},
        {Tok.Slash,     5, true, Operator.Div},
        {Tok.Percent,   5, true, Operator.Mod}
    ];
    BinOp* binary(Tok t)
    {
        foreach (ref op; _binary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

private:

    Token require(Tok t)
    {
        if (lexer.peek().type != t)
            messages.report(UnexpectedTokSingle, lexer.peek.location)
                .arg(lexer.peek.getType)
                .arg(t);
        return lexer.next();
    }

    bool skip(Tok t)
    {
        if (lexer.peek().type != t)
            return false;
        lexer.next();
        return true;
    }

    bool on_a(Tok t)
    {
        return lexer.peek.type == t;
    }

    Lexer lexer;
    SourceManager sm;
}