view parser/Parser.d @ 33:084c2c147c4f new_gen

Improvements to the Error class. * Now takes tokens instead of locations (a single loc can still be given) * Some of the arg functions take arrays now * Output is much better. Will print the line with all tokens given marked
author Anders Halager <halager@gmail.com>
date Sun, 20 Apr 2008 13:59:20 +0200
parents 3147a52d1247
children ce17bea8e9bd
line wrap: on
line source

module parser.Parser;

import lexer.Lexer,
       lexer.Token;

import ast.Exp,
       ast.Stmt,
       ast.Decl;

import misc.Error;

import tango.io.Stdout,
       Integer = tango.text.convert.Integer;

class Parser
{

public:
    Decl[] parse(Lexer lexer)
    {
        this.lexer = lexer;


        Decl[] declarations;

        while(lexer.peek.type != Tok.EOF)
        {
            declarations ~= parseRootDecl;
        }

        return declarations;
    }

    Decl parseRootDecl()
    {
        Token t = lexer.peek;

        switch(t.type)
        {
            case Tok.Byte,  Tok.Ubyte,
                 Tok.Short, Tok.Ushort,
                 Tok.Int,   Tok.Uint,
                 Tok.Long,  Tok.Ulong,
                 Tok.Float, Tok.Double,
                 Tok.Bool,
                 Tok.Identifier:
                Identifier type = new Identifier(t);

                Token iden = lexer.peek(1);

                switch(iden.type)
                {
                    case Tok.Identifier:
                        Identifier identifier = new Identifier(iden);
                        Token p = lexer.peek(2);
                        switch(p.type)
                        {
                            case Tok.OpenParentheses:
                                lexer.next; lexer.next;
                                return parseFunc(type, identifier);
                            case Tok.Seperator:
                                lexer.next; lexer.next;
                                require(Tok.Seperator);
                                return new VarDecl(type, identifier, null);
                            case Tok.Assign:
                                lexer.next; lexer.next;
                                lexer.next();
                                auto exp = parseExpression();
                                require(Tok.Seperator);
                                return new VarDecl(type, identifier, exp);
                            default:
                                char[] c = p.getType;
                                throw error(__LINE__, UnexpextedTokMulti)
                                    .tok(p)
                                    .arg(c)
                                    .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign);
                        }
                        break;
                    default:
                        char[] c = t.getType;
                        throw error(__LINE__, UnexpextedTok).tok(iden).arg(c);
                }
                break;
            case Tok.Struct:
                lexer.next;
                Token iden = lexer.next;
                switch(iden.type)
                {
                    case Tok.Identifier:
                        Identifier identifier = new Identifier(iden);
                        return new StructDecl (identifier, parseStruct());
                    default:
                        throw error(__LINE__, "Expected struct identifier, but got %0").arg(iden.getType);
                }
            case Tok.EOF:
                return null;
            default:
                char[] c = t.getType;
                throw error(__LINE__, UnexpextedTok).tok(t).arg(c);
        }
    }

    Decl parseDecl()
    {
        Token t = lexer.peek;

        switch(t.type)
        {
            case Tok.Byte,  Tok.Ubyte,
                 Tok.Short, Tok.Ushort,
                 Tok.Int,   Tok.Uint,
                 Tok.Long,  Tok.Ulong,
                 Tok.Float, Tok.Double,
                 Tok.Bool,
                 Tok.Identifier:
                Identifier type = new Identifier(t);

                Token iden = lexer.peek(1);

                switch(iden.type)
                {
                    case Tok.Identifier:
                        Identifier identifier = new Identifier(iden);
                        Token p = lexer.peek(2);
                        switch(p.type)
                        {
                            case Tok.OpenParentheses:
                                lexer.next; lexer.next;
                                return parseFunc(type, identifier);
                            case Tok.Seperator:
                                lexer.next; lexer.next;
                                require(Tok.Seperator);
                                return new VarDecl(type, identifier, null);
                            case Tok.Assign:
                                lexer.next; lexer.next;
                                lexer.next();
                                auto exp = parseExpression();
                                require(Tok.Seperator);
                                return new VarDecl(type, identifier, exp);
                            default:
                                char[] c = p.getType;
                                throw error(__LINE__, UnexpextedTokMulti)
                                    .tok(p)
                                    .arg(c)
                                    .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign);
                        }
                        break;
                    default:
                        char[] c = iden.getType;
                        throw error(__LINE__, UnexpextedTokSingle)
                            .tok(iden)
                            .arg(c)
                            .arg(Tok.Identifier);
                }
                break;
            case Tok.EOF:
                return null;
            default:
                char[] c = t.getType;
                throw error(__LINE__, UnexpextedTok).arg(c);
        }
    }

    VarDecl[] parseStruct()
    {
        VarDecl[] varDecls;
        require(Tok.OpenBrace);
        while(lexer.peek.type != Tok.CloseBrace)
        {
            varDecls ~= cast(VarDecl)parseDecl;
        }

        require(Tok.CloseBrace);
        return varDecls;
    }

    Stmt parseStatement()
    {
        Token t = lexer.peek;

        switch(t.type)
        {
            case Tok.Return:
                lexer.next;
                auto ret = new ReturnStmt();
                ret.exp = parseExpression();
                require(Tok.Seperator);
                return ret;

            case Tok.If:
                lexer.next;
                require(Tok.OpenParentheses);
                auto condition = parseExpression();
                require(Tok.CloseParentheses);

                auto then_body = parseBlockOrSingleStmt();

                Stmt[] else_body;
                if (lexer.peek.type == Tok.Else)
                {
                    lexer.next;
                    else_body = parseBlockOrSingleStmt();
                }

                return new IfStmt(condition, then_body, else_body);

            case Tok.While:
                lexer.next;
                require(Tok.OpenParentheses);
                auto condition = parseExpression();
                require(Tok.CloseParentheses);
                return new WhileStmt(condition, parseBlockOrSingleStmt());

            case Tok.Identifier:
                Token n = lexer.peek(1);
                switch(n.type)
                {
                    case Tok.Dot:
                        Exp iden = parseExpIdentifier(new Identifier(lexer.next));
                        switch(lexer.peek.type)
                        {
                            case Tok.Assign:
                                lexer.next;
                                auto stmt = new ExpStmt(new AssignExp(iden , parseExpression()));
                                require(Tok.Seperator);
                                return stmt;
                                break;
                        }
                    case Tok.Assign:
                        lexer.next;
                        lexer.next;
                        auto stmt = new ExpStmt(new AssignExp(new Identifier(t), parseExpression()));
                        require(Tok.Seperator);
                        return stmt;
                        break;
                    case Tok.Identifier:
                        auto decl = new DeclStmt(parseDecl());
                        return decl;

                    default:
                        auto e = new ExpStmt(parseExpression());
                        require(Tok.Seperator);
                        return e;

                }
                break;

            default:
                auto decl = new DeclStmt(parseDecl());
                //require(Tok.Seperator);
                return decl;
        }
        return new Stmt();
    }

    FuncDecl parseFunc(Identifier type, Identifier identifier)
    {
        VarDecl[] funcArgs = parseFuncArgs();

        lexer.next; // Remove the "{"

        Stmt[] statements;

        while(lexer.peek.type != Tok.CloseBrace)
            statements ~= parseStatement();

        lexer.next; // Remove "}"

        return new FuncDecl(type, identifier, funcArgs, statements);
    }

    VarDecl[] parseFuncArgs()
    {
        lexer.next; // Remove the "(" token.

        VarDecl[] funcArgs;

        while(lexer.peek.type != Tok.CloseParentheses)
        {
            auto t = parseType;
            auto i = parseIdentifier;
            funcArgs ~= new VarDecl(t, i);

            if(lexer.peek.type == Tok.Comma)
                lexer.next;
        }

        lexer.next; // Remove the ")"

        return funcArgs;
    }

    Identifier parseIdentifier()
    {
        Token identifier = lexer.next;

        switch(identifier.type)
        {
            case Tok.Identifier:
                return new Identifier(identifier);
                break;
            default:
                throw error(__LINE__, "Unexpexted token in Identifier parsing. Got %0")
                    .arg(identifier.getType)
                    .tok(identifier);
        }
    }

    Identifier parseType()
    {
        Token type = lexer.next;

        switch(type.type)
        {
            case Tok.Byte,  Tok.Ubyte,
                 Tok.Short, Tok.Ushort,
                 Tok.Int,   Tok.Uint,
                 Tok.Long,  Tok.Ulong,
                 Tok.Float, Tok.Double,
                 Tok.Bool,
                 Tok.Identifier:
                return new Identifier(type);
                break;
            default:
                char[] c = type.getType;
                error(__LINE__, "Unexpexted token in Type parsing. Got %0").arg(c);
        }
    }

    // -- Expression parsing -- //
private:
    Exp parseExpIdentifier(Exp target)
    {
        switch(lexer.peek.type)
        {
            case Tok.Dot:
                switch(lexer.peek(1).type)
                {
                    case Tok.Identifier:
                        lexer.next;
                        return parseExpIdentifier(
                                new MemberLookup(target, new Identifier(lexer.next)));
                    default:
                        Token t = lexer.peek(1);
                        throw error(__LINE__, "Expected identifier after '.'", &t);
                }
            default:
                return target;
        }
    }

    Exp parseExpression(int p = 0)
    {
        auto exp = P();
        Token next = lexer.peek();
        BinOp* op = null;
        while ((op = binary(next.type)) != null && op.prec >= p)
        {
            lexer.next();
            int q = op.leftAssoc? 1 + op.prec : op.prec;
            auto exp2 = parseExpression(q);
            exp = new BinaryExp(op.operator, exp, exp2);
            next = lexer.peek();
        }

        return exp;
    }

    Exp P()
    {
        Token next = lexer.next();
        if (auto op = unary(next.type))
            return new NegateExp(parseExpression(op.prec));
        else if (next.type == Tok.OpenParentheses)
        {
            auto e = parseExpression(0);
            require(Tok.CloseParentheses);
            return e;
        }
        else if (next.type == Tok.Identifier)
        {
            Exp iden = parseExpIdentifier(new Identifier(next));
            switch(lexer.peek.type)
            {
                case Tok.OpenParentheses:
                    lexer.next;
                    Exp[] args;
                    while(lexer.peek.type != Tok.CloseParentheses)
                    {
                        if(lexer.peek.type == Tok.Comma)
                        {
                            lexer.next;
                        }
                        args ~= parseExpression();
                    }

                    lexer.next();
                    return new CallExp(iden, args);

                default:
                    return iden;
            }
        }
        else if (next.type == Tok.Integer)
            return new IntegerLit(next);

        Stdout.formatln("{}", next.getType);
        assert(0, "Should not happen");
    }

    private Stmt[] parseBlockOrSingleStmt()
    {
        Stmt[] stmts;
        if (lexer.peek.type == Tok.OpenBrace)
        {
            lexer.next;
            while(lexer.peek.type != Tok.CloseBrace)
                stmts ~= parseStatement();
            lexer.next;
        }
        else
            stmts ~= parseStatement();

        return stmts;
    }

    struct UnOp
    {
        Tok tokenType;
        int prec;
    }

    static UnOp[] _unary = [{Tok.Sub, 4}];
    UnOp* unary(Tok t)
    {
        foreach (ref op; _unary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

    struct BinOp
    {
        Tok tokenType;
        int prec;
        bool leftAssoc;
        BinaryExp.Operator operator;
    }

    static BinOp[] _binary =
    [
        {Tok.Eq,        2, true, BinaryExp.Operator.Eq},
        {Tok.Ne,        2, true, BinaryExp.Operator.Ne},
        {Tok.Lt,        2, true, BinaryExp.Operator.Lt},
        {Tok.Le,        2, true, BinaryExp.Operator.Le},
        {Tok.Gt,        2, true, BinaryExp.Operator.Gt},
        {Tok.Ge,        2, true, BinaryExp.Operator.Ge},

        {Tok.Add,       3, true, BinaryExp.Operator.Add},
        {Tok.Sub,       3, true, BinaryExp.Operator.Sub},

        {Tok.Mul,       5, true, BinaryExp.Operator.Mul},
        {Tok.Div,       5, true, BinaryExp.Operator.Div}
    ];
    BinOp* binary(Tok t)
    {
        foreach (ref op; _binary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

private:

    void require(Tok t)
    {
        if (lexer.peek().type != t)
            error(__LINE__, UnexpextedTokSingle)
                .arg(lexer.peek.getType)
                .arg(t);
        lexer.next();
    }

    Error error(uint line, char[] errMsg, Token* tok = null)
    {
        Location loc;
        if (tok is null)
            loc = lexer.peek.location;
        else
            loc = tok.location;
        auto e =
            new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg);
            e.loc(loc);
            if (tok !is null)
                e.tok(*tok);
            return e;
    }

    static char[]
        UnexpextedTokMulti = "Unexpexted token, got %0 expected one of %1",
        UnexpextedTokSingle = "Unexpexted token, got %0 expected %1",
        UnexpextedTok = "Unexpexted token %0";

    Lexer lexer;
}