Mercurial > projects > dang

diff parser/Parser.d @ 44:495188f9078e new_gen
Big update - Moving towards a better, more seperated parser The parser no loner creates the AST directly, but through callbacks(actions). This means the parser can be run with a different set of actions that do something else. The parser is not back to full strength yet, the main thing missing is the various statements and structs. Also added a SmallArray that uses the stack only until a given size is exceeded, after which the array is copied unto the heap.
author: Anders Halager <halager@gmail.com>
date: Wed, 23 Apr 2008 00:57:45 +0200
parents: 858b9805843d
children: 9bc660cbdbec
--- a/parser/Parser.d	Tue Apr 22 22:31:39 2008 +0200
+++ b/parser/Parser.d	Wed Apr 23 00:57:45 2008 +0200
@@ -3,30 +3,33 @@
 import lexer.Lexer,
        lexer.Token;
 
-import ast.Exp,
-       ast.Stmt,
-       ast.Decl;
+import parser.Action;
 
 import misc.Error;
 
+import basic.SmallArray;
+
 import tango.io.Stdout,
        Integer = tango.text.convert.Integer;
 
 class Parser
 {
+    Action action;
+    alias Object Exp;
+    alias Object Stmt;
+    alias Object Decl;
 
 public:
     Decl[] parse(Lexer lexer)
     {
         this.lexer = lexer;
+        action = new AstAction;
 
 
         Decl[] declarations;
 
         while(lexer.peek.type != Tok.EOF)
-        {
-            declarations ~= parseRootDecl;
-        }
+            declarations ~= parseRootDecl();
 
         return declarations;
     }
@@ -35,147 +38,42 @@
     {
         Token t = lexer.peek;
 
-        switch(t.type)
+        if (t.isBasicType || t.isIdentifier)
         {
-            case Tok.Byte,  Tok.Ubyte,
-                 Tok.Short, Tok.Ushort,
-                 Tok.Int,   Tok.Uint,
-                 Tok.Long,  Tok.Ulong,
-                 Tok.Float, Tok.Double,
-                 Tok.Bool,
-                 Tok.Void,
-                 Tok.Identifier:
-                Identifier type = new Identifier(t);
-
-                Token iden = lexer.peek(1);
-
-                switch(iden.type)
-                {
-                    case Tok.Identifier:
-                        Identifier identifier = new Identifier(iden);
-                        Token p = lexer.peek(2);
-                        switch(p.type)
-                        {
-                            case Tok.OpenParentheses:
-                                lexer.next; lexer.next;
-                                return parseFunc(type, identifier);
-                            case Tok.Seperator:
-                                lexer.next; lexer.next;
-                                require(Tok.Seperator);
-                                return new VarDecl(type, identifier, null);
-                            case Tok.Assign:
-                                lexer.next; lexer.next;
-                                lexer.next();
-                                auto exp = parseExpression();
-                                require(Tok.Seperator);
-                                return new VarDecl(type, identifier, exp);
-                            default:
-                                char[] c = p.getType;
-                                throw error(__LINE__, UnexpectedTokMulti)
-                                    .tok(p)
-                                    .arg(c)
-                                    .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign);
-                        }
-                        break;
-                    default:
-                        char[] c = t.getType;
-                        throw error(__LINE__, UnexpectedTok).tok(iden).arg(c);
-                }
-                break;
-            case Tok.Struct:
-                lexer.next;
-                Token iden = lexer.next;
-                switch(iden.type)
-                {
-                    case Tok.Identifier:
-                        Identifier identifier = new Identifier(iden);
-                        return new StructDecl (identifier, parseStruct());
-                    default:
-                        throw error(__LINE__, "Expected struct identifier, but got %0").arg(iden.getType);
-                }
-            case Tok.EOF:
-                return null;
-            default:
-                char[] c = t.getType;
-                throw error(__LINE__, UnexpectedTok).tok(t).arg(c);
+            Id type = Id(lexer.next);
+            Id iden = Id(require(Tok.Identifier));
+            Token next = lexer.peek();
+            if (next.type == Tok.Seperator)
+            {
+                Token sep = lexer.next();
+                return action.actOnDeclarator(type, iden, null);
+            }
+            else if (next.type == Tok.Assign)
+            {
+                Token assign = lexer.next();
+                Exp exp = parseExpression();
+                require(Tok.Seperator);
+                return action.actOnDeclarator(type, iden, exp);
+            }
+            else if (next.type == Tok.OpenParentheses)
+                return parseFunc(type, iden);
+            else
+                throw error(__LINE__, PE.UnexpectedTok)
+                    .tok(next)
+                    .arg(next.getType);
         }
+        else if (t.type == Tok.Struct)
+        {
+        }
+        char[] c = t.getType;
+        throw error(__LINE__, PE.UnexpectedTok).tok(t).arg(c);
     }
 
-    Decl parseDecl()
-    {
-        Token t = lexer.peek;
-
-        switch(t.type)
-        {
-            case Tok.Byte,  Tok.Ubyte,
-                 Tok.Short, Tok.Ushort,
-                 Tok.Int,   Tok.Uint,
-                 Tok.Long,  Tok.Ulong,
-                 Tok.Float, Tok.Double,
-                 Tok.Bool,
-                 Tok.Void,
-                 Tok.Identifier:
-                Identifier type = new Identifier(t);
-
-                Token iden = lexer.peek(1);
+    /**
+      Parse statements.
 
-                switch(iden.type)
-                {
-                    case Tok.Identifier:
-                        Identifier identifier = new Identifier(iden);
-                        Token p = lexer.peek(2);
-                        switch(p.type)
-                        {
-                            case Tok.OpenParentheses:
-                                lexer.next; lexer.next;
-                                return parseFunc(type, identifier);
-                            case Tok.Seperator:
-                                lexer.next; lexer.next;
-                                require(Tok.Seperator);
-                                return new VarDecl(type, identifier, null);
-                            case Tok.Assign:
-                                lexer.next; lexer.next;
-                                lexer.next();
-                                auto exp = parseExpression();
-                                require(Tok.Seperator);
-                                return new VarDecl(type, identifier, exp);
-                            default:
-                                char[] c = p.getType;
-                                throw error(__LINE__, UnexpectedTokMulti)
-                                    .tok(p)
-                                    .arg(c)
-                                    .arg(Tok.OpenParentheses, Tok.Seperator, Tok.Assign);
-                        }
-                        break;
-                    default:
-                        char[] c = iden.getType;
-                        throw error(__LINE__, UnexpectedTokSingle)
-                            .tok(iden)
-                            .arg(c)
-                            .arg(Tok.Identifier);
-                }
-                break;
-            case Tok.EOF:
-                return null;
-            default:
-                char[] c = t.getType;
-                throw error(__LINE__, UnexpectedTok).arg(c);
-        }
-    }
-
-    VarDecl[] parseStruct()
-    {
-        VarDecl[] varDecls;
-        require(Tok.OpenBrace);
-        while(lexer.peek.type != Tok.CloseBrace)
-        {
-            varDecls ~= cast(VarDecl)parseDecl;
-        }
-
-        require(Tok.CloseBrace);
-        return varDecls;
-    }
-
+      This is the place to attack!
+     */
     Stmt parseStatement()
     {
         Token t = lexer.peek;
@@ -183,212 +81,107 @@
         switch(t.type)
         {
             case Tok.Return:
-                lexer.next;
-                auto ret = new ReturnStmt();
-                if (!skip(Tok.Seperator))
-                {
-                    ret.exp = parseExpression();
-                    require(Tok.Seperator);
-                }
-                return ret;
+                Token ret = lexer.next;
+                Exp exp = parseExpression();
+                require(Tok.Seperator);
+                return action.actOnReturnStmt(ret, exp);
 
             case Tok.If:
-                lexer.next;
-                require(Tok.OpenParentheses);
-                auto condition = parseExpression();
-                require(Tok.CloseParentheses);
-
-                auto then_body = parseBlockOrSingleStmt();
-
-                Stmt[] else_body;
-                if (lexer.peek.type == Tok.Else)
-                {
-                    lexer.next;
-                    else_body = parseBlockOrSingleStmt();
-                }
-
-                return new IfStmt(condition, then_body, else_body);
+                return null;
 
             case Tok.While:
-                lexer.next;
-                require(Tok.OpenParentheses);
-                auto condition = parseExpression();
-                require(Tok.CloseParentheses);
-                return new WhileStmt(condition, parseBlockOrSingleStmt());
+                return null;
 
             case Tok.Identifier:
-                Token n = lexer.peek(1);
-                switch(n.type)
-                {
-                    case Tok.Dot:
-                        Exp iden = parseExpIdentifier(new Identifier(lexer.next));
-                        switch(lexer.peek.type)
-                        {
-                            case Tok.Assign:
-                                lexer.next;
-                                auto stmt = new ExpStmt(new AssignExp(iden , parseExpression()));
-                                require(Tok.Seperator);
-                                return stmt;
-                                break;
-                        }
-                    case Tok.Assign:
-                        lexer.next;
-                        lexer.next;
-                        auto stmt = new ExpStmt(new AssignExp(new Identifier(t), parseExpression()));
-                        require(Tok.Seperator);
-                        return stmt;
-                        break;
-                    case Tok.Identifier:
-                        auto decl = new DeclStmt(parseDecl());
-                        return decl;
-
-                    default:
-                        auto e = new ExpStmt(parseExpression());
-                        require(Tok.Seperator);
-                        return e;
-
-                }
-                break;
+                return null;
 
             case Tok.Switch:
-                lexer.next;
-                require(Tok.OpenParentheses);
-                auto target = parseExpression();
-                auto res = new SwitchStmt(target);
-                require(Tok.CloseParentheses);
-                require(Tok.OpenBrace);
-                while (true)
-                {
-                    Stmt[] statements;
-                    if (skip(Tok.Default))
-                    {
-                        require(Tok.Colon);
-                        statements.length = 0;
-                        while (lexer.peek.type != Tok.Case
-                                && lexer.peek.type != Tok.Default
-                                && lexer.peek.type != Tok.CloseBrace)
-                            statements ~= parseStatement();
-                        res.setDefault(statements);
-                        continue;
-                    }
-
-                    Token _case = lexer.peek;
-                    if (_case.type != Tok.Case)
-                        break;
-                    lexer.next();
-
-                    IntegerLit[] literals;
-                    do
-                    {
-                        Exp e = parseExpression();
-                        IntegerLit lit = cast(IntegerLit)e;
-                        if (lit is null)
-                            throw error(__LINE__, CaseValueMustBeInt)
-                                .tok(_case);
-
-                        literals ~= lit;
-                    }
-                    while (skip(Tok.Comma));
-                    require(Tok.Colon);
-
-                    while (lexer.peek.type != Tok.Case
-                            && lexer.peek.type != Tok.Default
-                            && lexer.peek.type != Tok.CloseBrace)
-                        statements ~= parseStatement();
-
-                    res.addCase(literals, statements);
-
-                    if (lexer.peek.type == Tok.CloseBrace)
-                        break;
-                }
-                require(Tok.CloseBrace);
-                return res;
+                return null;
 
             default:
-                auto decl = new DeclStmt(parseDecl());
-                //require(Tok.Seperator);
-                return decl;
+                return null;
         }
-        return new Stmt();
+        error(__LINE__, "").tok(t);
+        return null;
     }
 
-    FuncDecl parseFunc(Identifier type, Identifier identifier)
+    /**
+      Parses a function/method given the already parsed
+     */
+    Decl parseFunc(ref Id type, ref Id name)
     {
-        VarDecl[] funcArgs = parseFuncArgs();
-
-        lexer.next; // Remove the "{"
+        Decl func = action.actOnStartOfFunctionDef(type, name);
+        parseFuncArgs(func);
 
-        Stmt[] statements;
+        Stmt stmt = parseCompoundStatement();
 
-        while(lexer.peek.type != Tok.CloseBrace)
-            statements ~= parseStatement();
-
-        lexer.next; // Remove "}"
-
-        return new FuncDecl(type, identifier, funcArgs, statements);
+        action.actOnEndOfFunction(func, stmt);
+        return func;
     }
 
-    VarDecl[] parseFuncArgs()
+    /**
+      Parse the function arguments, assumes current token is (.
+
+      Both the intitial paren and the ending paren is consumed.
+     */
+    void parseFuncArgs(Decl func)
     {
-        lexer.next; // Remove the "(" token.
-
-        VarDecl[] funcArgs;
+        require(Tok.OpenParentheses); // Remove the "(" token.
 
         while(lexer.peek.type != Tok.CloseParentheses)
         {
-            auto t = parseType;
-            auto i = parseIdentifier;
-            funcArgs ~= new VarDecl(t, i);
+            auto t = parseType();
+            auto i = parseIdentifier();
+            action.addFuncArg(func, t, i);
 
             if(lexer.peek.type == Tok.Comma)
                 lexer.next;
         }
 
-        lexer.next; // Remove the ")"
-
-        return funcArgs;
+        require(Tok.CloseParentheses); // Remove the ")"
     }
 
-    Identifier parseIdentifier()
+    /**
+      Parses a function-body or similar, expects { to be current token.
+      
+      Will consume both the starting { and ending }
+     */
+    Stmt parseCompoundStatement()
     {
-        Token identifier = lexer.next;
-
-        switch(identifier.type)
-        {
-            case Tok.Identifier:
-                return new Identifier(identifier);
-                break;
-            default:
-                throw error(__LINE__, "Unexpected token in Identifier parsing. Got %0")
-                    .arg(identifier.getType)
-                    .tok(identifier);
-        }
+        Token lbrace = require(Tok.OpenBrace);
+        SmallArray!(Stmt, 32) stmts; // Try to use the stack only
+        while (lexer.peek.type != Tok.CloseBrace)
+            stmts ~= parseStatement();
+        Token rbrace = require(Tok.CloseBrace);
+        return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe());
     }
 
-    Identifier parseType()
+    Id parseIdentifier()
+    {
+        Token tok = lexer.next;
+
+        if (tok.type is Tok.Identifier)
+            return Id(tok);
+
+        throw error(__LINE__, PE.UnexpectedTokSingle)
+            .arg(tok.getType)
+            .arg(Tok.Identifier)
+            .tok(tok);
+    }
+
+    Id parseType()
     {
         Token type = lexer.next;
 
-        switch(type.type)
-        {
-            case Tok.Byte,  Tok.Ubyte,
-                 Tok.Short, Tok.Ushort,
-                 Tok.Int,   Tok.Uint,
-                 Tok.Long,  Tok.Ulong,
-                 Tok.Float, Tok.Double,
-                 Tok.Bool,
-                 Tok.Void,
-                 Tok.Identifier:
-                return new Identifier(type);
-                break;
-            default:
-                char[] c = type.getType;
-                error(__LINE__, "Unexpected token in Type parsing. Got %0").arg(c);
-        }
+        if (type.isBasicType || type.type == Tok.Identifier)
+            return Id(type);
+
+        char[] c = type.getType;
+        error(__LINE__, "Unexpected token in Type parsing. Got %0").arg(c);
     }
 
+private:
     // -- Expression parsing -- //
-private:
     Exp parseExpIdentifier(Exp target)
     {
         switch(lexer.peek.type)
@@ -397,12 +190,13 @@
                 switch(lexer.peek(1).type)
                 {
                     case Tok.Identifier:
-                        lexer.next;
-                        return parseExpIdentifier(
-                                new MemberLookup(target, new Identifier(lexer.next)));
+                        Token op = lexer.next;
+                        Id member = Id(lexer.next);
+                        Exp exp = action.actOnMemberReference(target, op.location, member);
+                        return parseExpIdentifier(exp);
                     default:
                         Token t = lexer.peek(1);
-                        throw error(__LINE__, "Expected identifier after '.'", &t);
+                        throw error(__LINE__, "Expected identifier after '.'").tok(t);
                 }
             default:
                 return target;
@@ -419,7 +213,7 @@
             lexer.next();
             int q = op.leftAssoc? 1 + op.prec : op.prec;
             auto exp2 = parseExpression(q);
-            exp = new BinaryExp(op.operator, exp, exp2);
+            exp = action.actOnBinaryOp(op.operator, exp, exp2);
             next = lexer.peek();
         }
 
@@ -430,7 +224,7 @@
     {
         Token next = lexer.next();
         if (auto op = unary(next.type))
-            return new NegateExp(parseExpression(op.prec));
+            return action.actOnUnaryOp(next, parseExpression(op.prec));
         else if (next.type == Tok.OpenParentheses)
         {
             auto e = parseExpression(0);
@@ -439,9 +233,11 @@
         }
         else if (next.type == Tok.Identifier)
         {
-            Exp iden = parseExpIdentifier(new Identifier(next));
+            Exp value = action.actOnIdentifierExp(Id(next));
+            Exp iden = parseExpIdentifier(value);
             switch(lexer.peek.type)
             {
+                // TODO: Function calls are parsed but ignored
                 case Tok.OpenParentheses:
                     lexer.next;
                     Exp[] args;
@@ -455,42 +251,26 @@
                     }
 
                     lexer.next();
-                    return new CallExp(iden, args);
+                    return null;//new CallExp(iden, args);
 
                 default:
                     return iden;
             }
         }
         else if (next.type == Tok.Integer)
-            return new IntegerLit(next);
+            return action.actOnNumericConstant(next);
 
         Stdout.formatln("{}", next.getType);
         assert(0, "Should not happen");
     }
 
-    private Stmt[] parseBlockOrSingleStmt()
-    {
-        Stmt[] stmts;
-        if (lexer.peek.type == Tok.OpenBrace)
-        {
-            lexer.next;
-            while(lexer.peek.type != Tok.CloseBrace)
-                stmts ~= parseStatement();
-            lexer.next;
-        }
-        else
-            stmts ~= parseStatement();
-
-        return stmts;
-    }
-
     struct UnOp
     {
         Tok tokenType;
         int prec;
     }
 
-    static UnOp[] _unary = [{Tok.Sub, 4}];
+    static const UnOp[] _unary = [{Tok.Sub, 4}];
     UnOp* unary(Tok t)
     {
         foreach (ref op; _unary)
@@ -504,23 +284,24 @@
         Tok tokenType;
         int prec;
         bool leftAssoc;
-        BinaryExp.Operator operator;
+        Operator operator;
     }
 
-    static BinOp[] _binary =
+    static const BinOp[] _binary =
     [
-        {Tok.Eq,        2, true, BinaryExp.Operator.Eq},
-        {Tok.Ne,        2, true, BinaryExp.Operator.Ne},
-        {Tok.Lt,        2, true, BinaryExp.Operator.Lt},
-        {Tok.Le,        2, true, BinaryExp.Operator.Le},
-        {Tok.Gt,        2, true, BinaryExp.Operator.Gt},
-        {Tok.Ge,        2, true, BinaryExp.Operator.Ge},
+        {Tok.Eq,        2, true, Operator.Eq},
+        {Tok.Ne,        2, true, Operator.Ne},
 
-        {Tok.Add,       3, true, BinaryExp.Operator.Add},
-        {Tok.Sub,       3, true, BinaryExp.Operator.Sub},
+        {Tok.Lt,        2, true, Operator.Lt},
+        {Tok.Le,        2, true, Operator.Le},
+        {Tok.Gt,        2, true, Operator.Gt},
+        {Tok.Ge,        2, true, Operator.Ge},
 
-        {Tok.Mul,       5, true, BinaryExp.Operator.Mul},
-        {Tok.Div,       5, true, BinaryExp.Operator.Div}
+        {Tok.Add,       3, true, Operator.Add},
+        {Tok.Sub,       3, true, Operator.Sub},
+
+        {Tok.Mul,       5, true, Operator.Mul},
+        {Tok.Div,       5, true, Operator.Div}
     ];
     BinOp* binary(Tok t)
     {
@@ -535,7 +316,7 @@
     Token require(Tok t)
     {
         if (lexer.peek().type != t)
-            throw error(__LINE__, UnexpectedTokSingle)
+            throw error(__LINE__, PE.UnexpectedTokSingle)
                 .arg(lexer.peek.getType)
                 .arg(t);
         return lexer.next();
@@ -549,28 +330,25 @@
         return true;
     }
 
-    Error error(uint line, char[] errMsg, Token* tok = null)
+    Error error(uint line, char[] errMsg)
     {
-        Location loc;
-        if (tok is null)
-            loc = lexer.peek.location;
-        else
-            loc = tok.location;
+        Location loc = lexer.peek.location;
         auto e =
             new Error("Parser.d(" ~ Integer.toString(line) ~ "): " ~errMsg);
             e.loc(loc);
-            if (tok !is null)
-                e.tok(*tok);
             return e;
     }
 
-    static char[]
-        UnexpectedTokMulti = "Unexpected token, got %0 expected one of %1",
-        UnexpectedTokSingle = "Unexpected token, got %0 expected %1",
-        UnexpectedTok = "Unexpected token %0";
+    struct PE
+    {
+        static char[]
+            UnexpectedTokMulti  = "Unexpected token, got %0 expected one of %1",
+            UnexpectedTokSingle = "Unexpected token, got %0 expected %1",
+            UnexpectedTok       = "Unexpected token %0";
 
-    static char[]
-        CaseValueMustBeInt = "Cases can only be integer literals";
+        static char[]
+            CaseValueMustBeInt  = "Cases can only be integer literals";
+    }
 
     Lexer lexer;
 }
author	Anders Halager <halager@gmail.com>
date	Wed, 23 Apr 2008 00:57:45 +0200
parents	858b9805843d
children	9bc660cbdbec