view parser/Parser.d @ 37:858b9805843d new_gen

Bug-fixes Void can now be used and is recognized as a keyword by lexer Fixed a problem with casting on pointer types The expression is now optional for a ReturnStmt (only legal in void funcs)
author Anders Halager <halager@gmail.com>
date Sun, 20 Apr 2008 23:53:05 +0200
parents ce17bea8e9bd
children 495188f9078e
line wrap: on
line source

module parser.Parser;

import lexer.Lexer,
       lexer.Token;

import ast.Exp,
       ast.Stmt,
       ast.Decl;

import misc.Error;

import tango.io.Stdout,
       Integer = tango.text.convert.Integer;

class Parser
{

public:
    Decl[] parse(Lexer lexer)
    {
        this.lexer = lexer;


        Decl[] declarations;

        while(lexer.peek.type != Tok.EOF)
        {
            declarations ~= parseRootDecl;
        }

        return declarations;
    }

    Decl parseRootDecl()
    {
        Token t = lexer.peek;

        switch(t.type)
        {
            case Tok.Byte,  Tok.Ubyte,
                 Tok.Short, Tok.Ushort,
                 Tok.Int,   Tok.Uint,
                 Tok.Long,  Tok.Ulong,
                 Tok.Float, Tok.Double,
                 Tok.Bool,
                 Tok.Void,
                 Tok.Identifier:
                Identifier type = new Identifier(t);

                Token iden = lexer.peek(1);

                switch(iden.type)
                {
                    case Tok.Identifier:
                        Identifier identifier = new Identifier(iden);
                        Token p = lexer.peek(2);
                        switch(p.type)
                        {
                            case Tok.OpenParentheses:
                                lexer.next; lexer.next;
                                return parseFunc(type, identifier);
                            case Tok.Seperator:
                                lexer.next; lexer.next;
                                require(Tok.Seperator);
                                return new VarDecl(type, identifier, null);
                            case Tok.Assign:
                                lexer.next; lexer.next;
                                lexer.next();
                                auto exp = parseExpression();
                                require(Tok.Seperator);
                                return new VarDecl(type, identifier, exp);
                            default:
                                char[] c = p.getType;
                                throw error(__LINE__, UnexpectedTokMulti)
                                    .tok(p)
                                    .arg(c)
                                    .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign);
                        }
                        break;
                    default:
                        char[] c = t.getType;
                        throw error(__LINE__, UnexpectedTok).tok(iden).arg(c);
                }
                break;
            case Tok.Struct:
                lexer.next;
                Token iden = lexer.next;
                switch(iden.type)
                {
                    case Tok.Identifier:
                        Identifier identifier = new Identifier(iden);
                        return new StructDecl (identifier, parseStruct());
                    default:
                        throw error(__LINE__, "Expected struct identifier, but got %0").arg(iden.getType);
                }
            case Tok.EOF:
                return null;
            default:
                char[] c = t.getType;
                throw error(__LINE__, UnexpectedTok).tok(t).arg(c);
        }
    }

    Decl parseDecl()
    {
        Token t = lexer.peek;

        switch(t.type)
        {
            case Tok.Byte,  Tok.Ubyte,
                 Tok.Short, Tok.Ushort,
                 Tok.Int,   Tok.Uint,
                 Tok.Long,  Tok.Ulong,
                 Tok.Float, Tok.Double,
                 Tok.Bool,
                 Tok.Void,
                 Tok.Identifier:
                Identifier type = new Identifier(t);

                Token iden = lexer.peek(1);

                switch(iden.type)
                {
                    case Tok.Identifier:
                        Identifier identifier = new Identifier(iden);
                        Token p = lexer.peek(2);
                        switch(p.type)
                        {
                            case Tok.OpenParentheses:
                                lexer.next; lexer.next;
                                return parseFunc(type, identifier);
                            case Tok.Seperator:
                                lexer.next; lexer.next;
                                require(Tok.Seperator);
                                return new VarDecl(type, identifier, null);
                            case Tok.Assign:
                                lexer.next; lexer.next;
                                lexer.next();
                                auto exp = parseExpression();
                                require(Tok.Seperator);
                                return new VarDecl(type, identifier, exp);
                            default:
                                char[] c = p.getType;
                                throw error(__LINE__, UnexpectedTokMulti)
                                    .tok(p)
                                    .arg(c)
                                    .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign);
                        }
                        break;
                    default:
                        char[] c = iden.getType;
                        throw error(__LINE__, UnexpectedTokSingle)
                            .tok(iden)
                            .arg(c)
                            .arg(Tok.Identifier);
                }
                break;
            case Tok.EOF:
                return null;
            default:
                char[] c = t.getType;
                throw error(__LINE__, UnexpectedTok).arg(c);
        }
    }

    VarDecl[] parseStruct()
    {
        VarDecl[] varDecls;
        require(Tok.OpenBrace);
        while(lexer.peek.type != Tok.CloseBrace)
        {
            varDecls ~= cast(VarDecl)parseDecl;
        }

        require(Tok.CloseBrace);
        return varDecls;
    }

    Stmt parseStatement()
    {
        Token t = lexer.peek;

        switch(t.type)
        {
            case Tok.Return:
                lexer.next;
                auto ret = new ReturnStmt();
                if (!skip(Tok.Seperator))
                {
                    ret.exp = parseExpression();
                    require(Tok.Seperator);
                }
                return ret;

            case Tok.If:
                lexer.next;
                require(Tok.OpenParentheses);
                auto condition = parseExpression();
                require(Tok.CloseParentheses);

                auto then_body = parseBlockOrSingleStmt();

                Stmt[] else_body;
                if (lexer.peek.type == Tok.Else)
                {
                    lexer.next;
                    else_body = parseBlockOrSingleStmt();
                }

                return new IfStmt(condition, then_body, else_body);

            case Tok.While:
                lexer.next;
                require(Tok.OpenParentheses);
                auto condition = parseExpression();
                require(Tok.CloseParentheses);
                return new WhileStmt(condition, parseBlockOrSingleStmt());

            case Tok.Identifier:
                Token n = lexer.peek(1);
                switch(n.type)
                {
                    case Tok.Dot:
                        Exp iden = parseExpIdentifier(new Identifier(lexer.next));
                        switch(lexer.peek.type)
                        {
                            case Tok.Assign:
                                lexer.next;
                                auto stmt = new ExpStmt(new AssignExp(iden , parseExpression()));
                                require(Tok.Seperator);
                                return stmt;
                                break;
                        }
                    case Tok.Assign:
                        lexer.next;
                        lexer.next;
                        auto stmt = new ExpStmt(new AssignExp(new Identifier(t), parseExpression()));
                        require(Tok.Seperator);
                        return stmt;
                        break;
                    case Tok.Identifier:
                        auto decl = new DeclStmt(parseDecl());
                        return decl;

                    default:
                        auto e = new ExpStmt(parseExpression());
                        require(Tok.Seperator);
                        return e;

                }
                break;

            case Tok.Switch:
                lexer.next;
                require(Tok.OpenParentheses);
                auto target = parseExpression();
                auto res = new SwitchStmt(target);
                require(Tok.CloseParentheses);
                require(Tok.OpenBrace);
                while (true)
                {
                    Stmt[] statements;
                    if (skip(Tok.Default))
                    {
                        require(Tok.Colon);
                        statements.length = 0;
                        while (lexer.peek.type != Tok.Case
                                && lexer.peek.type != Tok.Default
                                && lexer.peek.type != Tok.CloseBrace)
                            statements ~= parseStatement();
                        res.setDefault(statements);
                        continue;
                    }

                    Token _case = lexer.peek;
                    if (_case.type != Tok.Case)
                        break;
                    lexer.next();

                    IntegerLit[] literals;
                    do
                    {
                        Exp e = parseExpression();
                        IntegerLit lit = cast(IntegerLit)e;
                        if (lit is null)
                            throw error(__LINE__, CaseValueMustBeInt)
                                .tok(_case);

                        literals ~= lit;
                    }
                    while (skip(Tok.Comma));
                    require(Tok.Colon);

                    while (lexer.peek.type != Tok.Case
                            && lexer.peek.type != Tok.Default
                            && lexer.peek.type != Tok.CloseBrace)
                        statements ~= parseStatement();

                    res.addCase(literals, statements);

                    if (lexer.peek.type == Tok.CloseBrace)
                        break;
                }
                require(Tok.CloseBrace);
                return res;

            default:
                auto decl = new DeclStmt(parseDecl());
                //require(Tok.Seperator);
                return decl;
        }
        return new Stmt();
    }

    FuncDecl parseFunc(Identifier type, Identifier identifier)
    {
        VarDecl[] funcArgs = parseFuncArgs();

        lexer.next; // Remove the "{"

        Stmt[] statements;

        while(lexer.peek.type != Tok.CloseBrace)
            statements ~= parseStatement();

        lexer.next; // Remove "}"

        return new FuncDecl(type, identifier, funcArgs, statements);
    }

    VarDecl[] parseFuncArgs()
    {
        lexer.next; // Remove the "(" token.

        VarDecl[] funcArgs;

        while(lexer.peek.type != Tok.CloseParentheses)
        {
            auto t = parseType;
            auto i = parseIdentifier;
            funcArgs ~= new VarDecl(t, i);

            if(lexer.peek.type == Tok.Comma)
                lexer.next;
        }

        lexer.next; // Remove the ")"

        return funcArgs;
    }

    Identifier parseIdentifier()
    {
        Token identifier = lexer.next;

        switch(identifier.type)
        {
            case Tok.Identifier:
                return new Identifier(identifier);
                break;
            default:
                throw error(__LINE__, "Unexpected token in Identifier parsing. Got %0")
                    .arg(identifier.getType)
                    .tok(identifier);
        }
    }

    Identifier parseType()
    {
        Token type = lexer.next;

        switch(type.type)
        {
            case Tok.Byte,  Tok.Ubyte,
                 Tok.Short, Tok.Ushort,
                 Tok.Int,   Tok.Uint,
                 Tok.Long,  Tok.Ulong,
                 Tok.Float, Tok.Double,
                 Tok.Bool,
                 Tok.Void,
                 Tok.Identifier:
                return new Identifier(type);
                break;
            default:
                char[] c = type.getType;
                error(__LINE__, "Unexpected token in Type parsing. Got %0").arg(c);
        }
    }

    // -- Expression parsing -- //
private:
    Exp parseExpIdentifier(Exp target)
    {
        switch(lexer.peek.type)
        {
            case Tok.Dot:
                switch(lexer.peek(1).type)
                {
                    case Tok.Identifier:
                        lexer.next;
                        return parseExpIdentifier(
                                new MemberLookup(target, new Identifier(lexer.next)));
                    default:
                        Token t = lexer.peek(1);
                        throw error(__LINE__, "Expected identifier after '.'", &t);
                }
            default:
                return target;
        }
    }

    Exp parseExpression(int p = 0)
    {
        auto exp = P();
        Token next = lexer.peek();
        BinOp* op = null;
        while ((op = binary(next.type)) != null && op.prec >= p)
        {
            lexer.next();
            int q = op.leftAssoc? 1 + op.prec : op.prec;
            auto exp2 = parseExpression(q);
            exp = new BinaryExp(op.operator, exp, exp2);
            next = lexer.peek();
        }

        return exp;
    }

    Exp P()
    {
        Token next = lexer.next();
        if (auto op = unary(next.type))
            return new NegateExp(parseExpression(op.prec));
        else if (next.type == Tok.OpenParentheses)
        {
            auto e = parseExpression(0);
            require(Tok.CloseParentheses);
            return e;
        }
        else if (next.type == Tok.Identifier)
        {
            Exp iden = parseExpIdentifier(new Identifier(next));
            switch(lexer.peek.type)
            {
                case Tok.OpenParentheses:
                    lexer.next;
                    Exp[] args;
                    while(lexer.peek.type != Tok.CloseParentheses)
                    {
                        if(lexer.peek.type == Tok.Comma)
                        {
                            lexer.next;
                        }
                        args ~= parseExpression();
                    }

                    lexer.next();
                    return new CallExp(iden, args);

                default:
                    return iden;
            }
        }
        else if (next.type == Tok.Integer)
            return new IntegerLit(next);

        Stdout.formatln("{}", next.getType);
        assert(0, "Should not happen");
    }

    private Stmt[] parseBlockOrSingleStmt()
    {
        Stmt[] stmts;
        if (lexer.peek.type == Tok.OpenBrace)
        {
            lexer.next;
            while(lexer.peek.type != Tok.CloseBrace)
                stmts ~= parseStatement();
            lexer.next;
        }
        else
            stmts ~= parseStatement();

        return stmts;
    }

    struct UnOp
    {
        Tok tokenType;
        int prec;
    }

    static UnOp[] _unary = [{Tok.Sub, 4}];
    UnOp* unary(Tok t)
    {
        foreach (ref op; _unary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

    struct BinOp
    {
        Tok tokenType;
        int prec;
        bool leftAssoc;
        BinaryExp.Operator operator;
    }

    static BinOp[] _binary =
    [
        {Tok.Eq,        2, true, BinaryExp.Operator.Eq},
        {Tok.Ne,        2, true, BinaryExp.Operator.Ne},
        {Tok.Lt,        2, true, BinaryExp.Operator.Lt},
        {Tok.Le,        2, true, BinaryExp.Operator.Le},
        {Tok.Gt,        2, true, BinaryExp.Operator.Gt},
        {Tok.Ge,        2, true, BinaryExp.Operator.Ge},

        {Tok.Add,       3, true, BinaryExp.Operator.Add},
        {Tok.Sub,       3, true, BinaryExp.Operator.Sub},

        {Tok.Mul,       5, true, BinaryExp.Operator.Mul},
        {Tok.Div,       5, true, BinaryExp.Operator.Div}
    ];
    BinOp* binary(Tok t)
    {
        foreach (ref op; _binary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

private:

    Token require(Tok t)
    {
        if (lexer.peek().type != t)
            throw error(__LINE__, UnexpectedTokSingle)
                .arg(lexer.peek.getType)
                .arg(t);
        return lexer.next();
    }

    bool skip(Tok t)
    {
        if (lexer.peek().type != t)
            return false;
        lexer.next();
        return true;
    }

    Error error(uint line, char[] errMsg, Token* tok = null)
    {
        Location loc;
        if (tok is null)
            loc = lexer.peek.location;
        else
            loc = tok.location;
        auto e =
            new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg);
            e.loc(loc);
            if (tok !is null)
                e.tok(*tok);
            return e;
    }

    static char[]
        UnexpectedTokMulti = "Unexpected token, got %0 expected one of %1",
        UnexpectedTokSingle = "Unexpected token, got %0 expected %1",
        UnexpectedTok = "Unexpected token %0";

    static char[]
        CaseValueMustBeInt = "Cases can only be integer literals";

    Lexer lexer;
}