Mercurial > projects > dang

module parser.Parser;

import lexer.Lexer,
       lexer.Token;

import parser.Action;

import misc.Error;

import basic.SmallArray;

import tango.io.Stdout,
       Integer = tango.text.convert.Integer;

class Parser
{
    Action action;
    alias Object Exp;
    alias Object Stmt;
    alias Object Decl;

public:
    Decl[] parse(Lexer lexer)
    {
        this.lexer = lexer;
        action = new AstAction;


        Decl[] declarations;

        while(lexer.peek.type != Tok.EOF)
            declarations ~= parseRootDecl();

        return declarations;
    }

    Decl parseRootDecl()
    {
        Token t = lexer.peek;

        if (t.isBasicType || t.isIdentifier)
        {
            Id type = Id(lexer.next);
            Id iden = Id(require(Tok.Identifier));
            Token next = lexer.peek();
            if (next.type == Tok.Seperator)
            {
                Token sep = lexer.next();
                return action.actOnDeclarator(type, iden, null);
            }
            else if (next.type == Tok.Assign)
            {
                Token assign = lexer.next();
                Exp exp = parseExpression();
                require(Tok.Seperator);
                return action.actOnDeclarator(type, iden, exp);
            }
            else if (next.type == Tok.OpenParentheses)
                return parseFunc(type, iden);
            else
                throw error(__LINE__, PE.UnexpectedTok)
                    .tok(next)
                    .arg(next.getType);
        }
        else if (t.type == Tok.Struct)
        {
        }
        char[] c = t.getType;
        throw error(__LINE__, PE.UnexpectedTok).tok(t).arg(c);
    }

    /**
      Parse statements.

      This is the place to attack!
     */
    Stmt parseStatement()
    {
        Token t = lexer.peek;

        switch(t.type)
        {
            case Tok.Return:
                Token ret = lexer.next;
                Exp exp = parseExpression();
                require(Tok.Seperator);
                return action.actOnReturnStmt(ret, exp);

            /*
               if (cond)
                single statement | compound statement
               [else
                single statement | compound statement]
             */
            case Tok.If:
                Token _if = lexer.next();

                require(Tok.OpenParentheses);
                Exp cond = parseExpression();
                require(Tok.CloseParentheses);

                Stmt thenB = parseSingleOrCompoundStatement();

                // if there is no else part we use the if as token, to have
                // something than can be passed along
                Token _else = _if;
                Stmt elseB;
                if (lexer.peek.type == Tok.Else)
                {
                    _else = lexer.next;
                    elseB = parseSingleOrCompoundStatement();
                }

                return action.actOnIfStmt(_if, cond, thenB, _else, elseB);

            case Tok.While:
                return null;

            case Tok.Identifier:
                return null;

            case Tok.Switch:
                return null;

            default:
                return null;
        }
        error(__LINE__, "").tok(t);
        return null;
    }

    /**
      Parses a function/method given the already parsed
     */
    Decl parseFunc(ref Id type, ref Id name)
    {
        Decl func = action.actOnStartOfFunctionDef(type, name);
        parseFuncArgs(func);

        Stmt stmt = parseCompoundStatement();

        action.actOnEndOfFunction(func, stmt);
        return func;
    }

    /**
      Parse the function arguments, assumes current token is (.

      Both the intitial paren and the ending paren is consumed.
     */
    void parseFuncArgs(Decl func)
    {
        require(Tok.OpenParentheses); // Remove the "(" token.

        while(lexer.peek.type != Tok.CloseParentheses)
        {
            auto t = parseType();
            auto i = parseIdentifier();
            action.addFuncArg(func, t, i);

            if(lexer.peek.type == Tok.Comma)
                lexer.next;
        }

        require(Tok.CloseParentheses); // Remove the ")"
    }

    /**
      Parse either a block, or a single statement as allowed after if, while
      and for.
     */
    Stmt parseSingleOrCompoundStatement()
    {
        if (lexer.peek.type == Tok.OpenBrace)
            return parseCompoundStatement();
        return parseStatement();
    }
    /**
      Parses a function-body or similar, expects { to be current token.

      Will consume both the starting { and ending }
     */
    Stmt parseCompoundStatement()
    {
        Token lbrace = require(Tok.OpenBrace);
        SmallArray!(Stmt, 32) stmts; // Try to use the stack only
        while (lexer.peek.type != Tok.CloseBrace)
            stmts ~= parseStatement();
        Token rbrace = require(Tok.CloseBrace);
        return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe());
    }

    Id parseIdentifier()
    {
        Token tok = lexer.next;

        if (tok.type is Tok.Identifier)
            return Id(tok);

        throw error(__LINE__, PE.UnexpectedTokSingle)
            .arg(tok.getType)
            .arg(Tok.Identifier)
            .tok(tok);
    }

    Id parseType()
    {
        Token type = lexer.next;

        if (type.isBasicType || type.type == Tok.Identifier)
            return Id(type);

        char[] c = type.getType;
        error(__LINE__, "Unexpected token in Type parsing. Got %0").arg(c);
    }

private:
    // -- Expression parsing -- //
    Exp parseExpIdentifier(Exp target)
    {
        switch(lexer.peek.type)
        {
            case Tok.Dot:
                switch(lexer.peek(1).type)
                {
                    case Tok.Identifier:
                        Token op = lexer.next;
                        Id member = Id(lexer.next);
                        Exp exp = action.actOnMemberReference(target, op.location, member);
                        return parseExpIdentifier(exp);
                    default:
                        Token t = lexer.peek(1);
                        throw error(__LINE__, "Expected identifier after '.'").tok(t);
                }
            default:
                return target;
        }
    }

    Exp parseExpression(int p = 0)
    {
        auto exp = P();
        Token next = lexer.peek();
        BinOp* op = null;
        while ((op = binary(next.type)) != null && op.prec >= p)
        {
            lexer.next();
            int q = op.leftAssoc? 1 + op.prec : op.prec;
            auto exp2 = parseExpression(q);
            exp = action.actOnBinaryOp(op.operator, exp, exp2);
            next = lexer.peek();
        }

        return exp;
    }

    Exp P()
    {
        Token next = lexer.next();
        if (auto op = unary(next.type))
            return action.actOnUnaryOp(next, parseExpression(op.prec));
        else if (next.type == Tok.OpenParentheses)
        {
            auto e = parseExpression(0);
            require(Tok.CloseParentheses);
            return e;
        }
        else if (next.type == Tok.Identifier)
        {
            Exp value = action.actOnIdentifierExp(Id(next));
            Exp iden = parseExpIdentifier(value);
            switch(lexer.peek.type)
            {
                // TODO: Function calls are parsed but ignored
                case Tok.OpenParentheses:
                    lexer.next;
                    Exp[] args;
                    while(lexer.peek.type != Tok.CloseParentheses)
                    {
                        if(lexer.peek.type == Tok.Comma)
                        {
                            lexer.next;
                        }
                        args ~= parseExpression();
                    }

                    lexer.next();
                    return null;//new CallExp(iden, args);

                default:
                    return iden;
            }
        }
        else if (next.type == Tok.Integer)
            return action.actOnNumericConstant(next);

        Stdout.formatln("{}", next.getType);
        assert(0, "Should not happen");
    }

    struct UnOp
    {
        Tok tokenType;
        int prec;
    }

    static const UnOp[] _unary = [{Tok.Sub, 4}];
    UnOp* unary(Tok t)
    {
        foreach (ref op; _unary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

    struct BinOp
    {
        Tok tokenType;
        int prec;
        bool leftAssoc;
        Operator operator;
    }

    static const BinOp[] _binary =
    [
        {Tok.Eq,        2, true, Operator.Eq},
        {Tok.Ne,        2, true, Operator.Ne},

        {Tok.Lt,        2, true, Operator.Lt},
        {Tok.Le,        2, true, Operator.Le},
        {Tok.Gt,        2, true, Operator.Gt},
        {Tok.Ge,        2, true, Operator.Ge},

        {Tok.Add,       3, true, Operator.Add},
        {Tok.Sub,       3, true, Operator.Sub},

        {Tok.Mul,       5, true, Operator.Mul},
        {Tok.Div,       5, true, Operator.Div}
    ];
    BinOp* binary(Tok t)
    {
        foreach (ref op; _binary)
            if (op.tokenType == t)
                return &op;
        return null;
    }

private:

    Token require(Tok t)
    {
        if (lexer.peek().type != t)
            throw error(__LINE__, PE.UnexpectedTokSingle)
                .arg(lexer.peek.getType)
                .arg(t);
        return lexer.next();
    }

    bool skip(Tok t)
    {
        if (lexer.peek().type != t)
            return false;
        lexer.next();
        return true;
    }

    Error error(uint line, char[] errMsg)
    {
        Location loc = lexer.peek.location;
        auto e =
            new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg);
            e.loc(loc);
            return e;
    }

    struct PE
    {
        static char[]
            UnexpectedTokMulti  = "Unexpected token, got %0 expected one of %1",
            UnexpectedTokSingle = "Unexpected token, got %0 expected %1",
            UnexpectedTok       = "Unexpected token %0";

        static char[]
            CaseValueMustBeInt  = "Cases can only be integer literals";
    }

    Lexer lexer;
}
author	Anders Halager <halager@gmail.com>
date	Wed, 23 Apr 2008 16:43:42 +0200
parents	495188f9078e
children	90fb4fdfefdd