view parser/Parser.d @ 80:682e20aa224f new_gen

Pointers working now - big YAY
author Anders Johnsen <skabet@gmail.com>
date Fri, 02 May 2008 17:33:50 +0200
parents ad956143dcdc
children 110c7e1c4ca2
line wrap: on
line source

module parser.Parser;

import lexer.Lexer,
       lexer.Token;

import parser.Action;

import misc.Error;

import basic.SmallArray;

import tango.io.Stdout,
       Integer = tango.text.convert.Integer;

class Parser
{
    Action action;
    alias Object Exp;
    alias Object Stmt;
    alias Object Decl;

public:
    Decl[] parse(Lexer lexer, Action act)
    {
        this.lexer = lexer;
        action = act;

        Decl[] declarations;

        while(lexer.peek.type != Tok.EOF)
            declarations ~= parseDecl();

        return declarations;
    }

    Decl parseDecl()
    {
        Token t = lexer.peek;

        if (t.isBasicType || t.isIdentifier)
        {
            Id type = Id(lexer.next);
            Id iden = Id(require(Tok.Identifier));
            Token next = lexer.peek();
            if (next.type == Tok.Seperator)
            {
                Token sep = lexer.next();
                return action.actOnDeclarator(type, iden, null);
            }
            else if (next.type == Tok.Assign)
            {
                Token assign = lexer.next();
                Exp exp = parseExpression();
                require(Tok.Seperator);
                return action.actOnDeclarator(type, iden, exp);
            }
            else if (next.type == Tok.OpenParentheses)
                return parseFunc(type, iden);
            else
                throw error(__LINE__, PE.UnexpectedTok)
                    .tok(next)
                    .arg(next.getType);
        }
        else if (t.type == Tok.Struct)
        {
            Id type = Id(lexer.next);
            Id iden = Id(require(Tok.Identifier));
            
            return parseStruct(type, iden);
        }
        char[] c = t.getType;
        throw error(__LINE__, PE.UnexpectedTok).tok(t).arg(c);
    }

    /**
      Parse struct
     */
    Decl parseStruct(Id type, Id iden)
    {
        auto decl = action.actOnDeclarator(type, iden, null);

        require(Tok.OpenBrace);

        while(lexer.peek.isBasicType || lexer.peek.isIdentifier)
        {
            Id var_type = Id(lexer.next);
            Id var_iden = Id(require(Tok.Identifier));
            Token next = lexer.peek();
            if (next.type == Tok.Seperator)
            {
                Token sep = lexer.next();
                action.actOnStructMember(decl, var_type, var_iden, null);
                continue;
            }
            else if (next.type == Tok.Assign)
            {
                Token assign = lexer.next();
                Exp exp = parseExpression();
                require(Tok.Seperator);
                action.actOnStructMember(decl, var_type, var_iden, exp);
                continue;
            }
            throw error(__LINE__, PE.UnexpectedTok)
                .tok(next).arg(next.getType);
        }

        require(Tok.CloseBrace);
        
        return decl;
    }
    /**
      Parse statements.

      This is the place to attack!
     */
    Stmt parseStatement()
    {
        Token t = lexer.peek;

        switch(t.type)
        {
            case Tok.Return:
                Token ret = lexer.next;
                Exp exp;
                if (lexer.peek.type != Tok.Seperator)
                    exp = parseExpression();
                require(Tok.Seperator);
                return action.actOnReturnStmt(ret, exp);

            /*
               if (cond)
                single statement | compound statement
               [else
                single statement | compound statement]
             */
            case Tok.If:
                Token _if = lexer.next();

                require(Tok.OpenParentheses);
                Exp cond = parseExpression();
                require(Tok.CloseParentheses);

                Stmt thenB = parseSingleOrCompoundStatement();

                // if there is no else part we use the if as token, to have
                // something than can be passed along
                Token _else = _if;
                Stmt elseB;
                if (lexer.peek.type == Tok.Else)
                {
                    _else = lexer.next;
                    elseB = parseSingleOrCompoundStatement();
                }

                return action.actOnIfStmt(_if, cond, thenB, _else, elseB);

            /*
               while (cond)
                single statement | compound statement
             */
            case Tok.While:
                Token _while = lexer.next;
                require(Tok.OpenParentheses);
                Exp cond = parseExpression();
                require(Tok.CloseParentheses);
                Stmt bodyStmt = parseSingleOrCompoundStatement();
                return action.actOnWhileStmt(_while, cond, bodyStmt);

            /*
               One of four things:
               A declaration of a function/variable `type id ...`
               A direct assignment `id = exp;`
               An indirect assignment `id.id = exp`
               Some sort of free standing expression

               The assignments should be handled as binary expressions?
             */
            case Tok.Identifier:
                Token iden = lexer.peek;
                Token n = lexer.peek(1);
                // Must be an decl, if we start with a basic type, or two
                // identifiers in a row
                if (iden.isBasicType() || iden.isIdentifier())
                {
                    if ( n.type == Tok.Star)
                        return action.actOnDeclStmt(parseVarDecl());
                        
                    if ( n.isIdentifier())
                        return action.actOnDeclStmt(parseVarDecl());

                    // Expression: a.b, a = b, a(b) etc.
                    Exp exp = parseExpression();
                    require(Tok.Seperator);
                    return action.actOnExprStmt(exp);
                    break;
                }

            case Tok.Switch:
                throw error(__LINE__, ":(").tok(lexer.peek);
                return null;

            default:
                if (t.isBasicType())
                    goto case Tok.Identifier;
                if (t.type == Tok.Star)
                {
                    auto exp = parseExpression();
                    require(Tok.Seperator);
                    return action.actOnExprStmt(exp);
                }
                    
                throw error(__LINE__, "Unexpexted begining of statement.").tok(lexer.peek);
        }
        throw error(__LINE__, "").tok(t);
        return null;
    }

    Decl parseVarDecl()
    {
        // manually hardcoded to only support "type id [= exp];"
        // as that is the only thing the codegen understands
        Id type = parseType;
        Id id = Id(lexer.next);
        Exp init;
        if (skip(Tok.Assign))
            init = parseExpression();
        require(Tok.Seperator);
        Decl d = action.actOnDeclarator(type, id, init);
        return d;
    }

    /**
      Parses a function/method given the already parsed return type and name
     */
    Decl parseFunc(ref Id type, ref Id name)
    {
        Decl func = action.actOnStartOfFunctionDef(type, name);
        parseFuncArgs(func);

        Stmt stmt = parseCompoundStatement();

        return action.actOnEndOfFunction(func, stmt);
    }

    /**
      Parse the function arguments, assumes current token is (.

      Both the intitial paren and the ending paren is consumed.
     */
    void parseFuncArgs(Decl func)
    {
        require(Tok.OpenParentheses); // Remove the "(" token.

        while(lexer.peek.type != Tok.CloseParentheses)
        {
            auto t = parseType();
            auto i = parseIdentifier();
            action.addFuncArg(func, t, i);

            if(lexer.peek.type == Tok.Comma)
                lexer.next;
        }

        require(Tok.CloseParentheses); // Remove the ")"
    }

    /**
      Parse either a block, or a single statement as allowed after if, while
      and for.
     */
    Stmt parseSingleOrCompoundStatement()
    {
        if (lexer.peek.type == Tok.OpenBrace)
            return parseCompoundStatement();
        return parseStatement();
    }
    /**
      Parses a function-body or similar, expects { to be current token.
      
      Will consume both the starting { and ending }
     */
    Stmt parseCompoundStatement()
    {
        Token lbrace = require(Tok.OpenBrace);
        SmallArray!(Stmt, 32) stmts; // Try to use the stack only
        while (lexer.peek.type != Tok.CloseBrace)
            stmts ~= parseStatement();
        Token rbrace = require(Tok.CloseBrace);
        return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe());
    }

    Id parseIdentifier()
    {
        Token tok = lexer.next;

        if (tok.type is Tok.Identifier)
            return Id(tok);

        throw error(__LINE__, PE.UnexpectedTokSingle)
            .arg(tok.getType)
            .arg(Tok.Identifier)
            .tok(tok);
    }

    /**
      Parse a type - this includes pointer and array(at some point) types.
     */
    Id parseType()
    {
        Token type = lexer.next;

        Id currentType;

        if ( !(type.isBasicType || type.type == Tok.Identifier) )
            throw error(__LINE__, "Unexpected token in Type parsing. Got %0")
                .arg(type.getType)
                .tok(type);

        currentType = Id(type);
        type = lexer.peek;

        while(type.type == Tok.Star)
        {
            currentType = PointerId(currentType);
            lexer.next;
            type = lexer.peek;
        }

        return currentType;
    }

private:
    // -- Expression parsing -- //
    Exp parseExpIdentifier(Exp target)
    {
        switch(lexer.peek.type)
        {
            case Tok.Dot:
                switch(lexer.peek(1).type)
                {
                    case Tok.Identifier:
                        Token op = lexer.next;
                        Id member = Id(lexer.next);
                        Exp exp = action.actOnMemberReference(target, op.location, member);
                        return parseExpIdentifier(exp);
                    default:
                        Token t = lexer.peek(1);
                        throw error(__LINE__, "Expected identifier after '.'").tok(t);
                }
            default:
                return target;
        }
    }

    Exp parseExpression(int p = 0)
    {
        auto exp = P();
        Token next = lexer.peek();
        BinOp* op = null;
        while ((op = binary(next.type)) != null && op.prec >= p)
        {
            lexer.next();
            int q = op.leftAssoc? 1 + op.prec : op.prec;
            auto exp2 = parseExpression(q);
            exp = action.actOnBinaryOp(op.operator, exp, exp2);
            next = lexer.peek();
        }

        return exp;
    }

    Exp P()
    {
        Token next = lexer.next();
        if (auto op = unary(next.type))
            return action.actOnUnaryOp(next, parseExpression(op.prec));
        else if (next.type == Tok.OpenParentheses)
        {
            auto e = parseExpression(0);
            require(Tok.CloseParentheses);
            return e;
        }
        else if (next.type == Tok.Identifier)
        {
            Exp value = action.actOnIdentifierExp(Id(next));
            Exp iden = parseExpIdentifier(value);
            switch(lexer.peek.type)
            {
                case Tok.OpenParentheses:
                    Token lp = lexer.next;
                    SmallArray!(Exp, 8) args;
                    while(lexer.peek.type != Tok.CloseParentheses)
                    {
                        if(lexer.peek.type == Tok.Comma)
                            lexer.next;
                        args ~= parseExpression();
                    }

                    Token rp = lexer.next();
                    return action.actOnCallExpr(iden, lp, args.unsafe(), rp);

                default:
                    return iden;
            }
        }
        else if (next.type == Tok.Cast)
            return parseCast();
        else if (next.type == Tok.Integer)
            return action.actOnNumericConstant(next);

        throw error(__LINE__, "Expected expression, not '"~next.getType~"'").tok(next);
        assert(0, "Should not happen");
    }

    Exp parseCast()
    {
        require(Tok.OpenParentheses);
        auto next = lexer.next;
        if(!next.isBasicType && !next.isIdentifier)
            throw error(__LINE__, "Expected cast type, not "~next.get).tok(next);
        
        require(Tok.CloseParentheses);
        auto exp = P();
        return action.actOnCastExpr(Id(next), exp);
    }

    struct UnOp
    {
        Tok tokenType;
        int prec;
    }

    static const UnOp[] _unary =
    [
        {Tok.Minus, 4},
        {Tok.Star, 4}
    ];
    UnOp* unary(Tok t)
    {
        foreach (ref op; _unary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

    struct BinOp
    {
        Tok tokenType;
        int prec;
        bool leftAssoc;
        Operator operator;
    }

    static const BinOp[] _binary =
    [
        {Tok.Assign,    1, false, Operator.Assign},

        {Tok.Eq,        2, true, Operator.Eq},
        {Tok.Ne,        2, true, Operator.Ne},

        {Tok.Lt,        2, true, Operator.Lt},
        {Tok.Le,        2, true, Operator.Le},
        {Tok.Gt,        2, true, Operator.Gt},
        {Tok.Ge,        2, true, Operator.Ge},

        {Tok.Plus,      3, true, Operator.Add},
        {Tok.Minus,     3, true, Operator.Sub},

        {Tok.Star,      5, true, Operator.Mul},
        {Tok.Slash,     5, true, Operator.Div},
        {Tok.Percent,   5, true, Operator.Mod}
    ];
    BinOp* binary(Tok t)
    {
        foreach (ref op; _binary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

private:

    Token require(Tok t)
    {
        if (lexer.peek().type != t)
            throw error(__LINE__, PE.UnexpectedTokSingle)
                .arg(lexer.peek.getType)
                .arg(t)
                .tok(lexer.peek);
        return lexer.next();
    }

    bool skip(Tok t)
    {
        if (lexer.peek().type != t)
            return false;
        lexer.next();
        return true;
    }

    Error error(uint line, char[] errMsg)
    {
        Location loc = lexer.peek.location;
        auto e =
            new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg);
            e.loc(loc);
            return e;
    }

    struct PE
    {
        static char[]
            UnexpectedTokMulti  = "Unexpected token, got %0 expected one of %1",
            UnexpectedTokSingle = "Unexpected token, got %0 expected %1",
            UnexpectedTok       = "Unexpected token %0";

        static char[]
            CaseValueMustBeInt  = "Cases can only be integer literals";
    }

    Lexer lexer;
}