changeset 41:f977aa28eb32 new_gen

Now using arrays insted of switch - should speed things up a notch!
author Anders Johnsen <skabet@gmail.com>
date Tue, 22 Apr 2008 19:30:51 +0200
parents 9fb190ad81a4
children 4e879f82dd64
files lexer/Lexer.d
diffstat 1 files changed, 166 insertions(+), 124 deletions(-) [+]
line wrap: on
line diff
--- a/lexer/Lexer.d	Tue Apr 22 00:31:57 2008 +0200
+++ b/lexer/Lexer.d	Tue Apr 22 19:30:51 2008 +0200
@@ -14,7 +14,40 @@
     this (DataSource source)
     {
         this.source = source;
-        this.position = 0;
+        position = 0;
+
+
+        charTable.length = 256;
+        foreach( char c ; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+            charTable[c] = CharType.Letter;
+
+        foreach( char c ; "0123456789")
+            charTable[c] = CharType.Number;
+
+        foreach( char c ; "(){};:.,=!<>+-*/")
+            charTable[c] = CharType.Symbol;
+
+        foreach( char c ; " \n")
+            charTable[c] = CharType.Whitespace;
+
+        symbolFunctions.length = 256;
+
+        symbolFunctions['('] = &openParentheses;
+        symbolFunctions[')'] = &closeParentheses;
+        symbolFunctions['{'] = &openBrace;
+        symbolFunctions['}'] = &closeBrace;
+        symbolFunctions[';'] = &seperator;
+        symbolFunctions[':'] = &colon;
+        symbolFunctions['.'] = &dot;
+        symbolFunctions[','] = &comma;
+        symbolFunctions['='] = &eq;
+        symbolFunctions['!'] = &ne;
+        symbolFunctions['<'] = &le;
+        symbolFunctions['>'] = &ge;
+        symbolFunctions['+'] = &add;
+        symbolFunctions['-'] = &sub;
+        symbolFunctions['*'] = &mul;
+        symbolFunctions['/'] = &div;
     }
 
     Token next ()
@@ -54,8 +87,130 @@
     {
         return this.errors;
     }
+
 private:
+    Token eq()
+    {
+        if(source.data[position] == '=')
+            return Token(Tok.Eq, Location(position++ - 1, source), 2);
+        return Token(Tok.Assign, Location(position - 1, source), 1);
+    }
+    Token openBrace() 
+    {
+        return Token(Tok.OpenBrace, Location(position - 1, source), 1);
+    }
+    Token openParentheses() 
+    {
+        return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1);
+    }
+    Token closeParentheses()
+    {
+        return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1);
+    }
+    Token closeBrace() 
+    {
+        return Token(Tok.CloseBrace, Location(position - 1, this.source), 1);
+    }
+    Token seperator()
+    {
+        Token t = Token(Tok.Seperator, Location(position - 1, source), 1);
+        return t;
+    }
+    Token colon()
+    {
+        return Token(Tok.Colon, Location(position - 1, this.source), 1);
+    }
+    Token dot() 
+    {
+        return Token(Tok.Dot, Location(position - 1, this.source), 1);
+    }
+    Token comma() 
+    {
+        return Token(Tok.Comma, Location(position - 1, this.source), 1);
+    }
+    Token ne() 
+    {
+        if(source.data[position] == '=')
+            return Token(Tok.Ne, Location(position++ - 1, this.source), 2);
+        return Token(Tok.Not, Location(position - 1, this.source), 1);
+    }
+    Token le()
+    {
+        if(source.data[position] == '=')
+            return Token(Tok.Le, Location(position++ - 1, this.source), 2);
+        return Token(Tok.Lt, Location(position - 1, this.source), 1);
+    }
+    Token ge() 
+    {
+        if(source.data[position] == '=')
+            return Token(Tok.Ge, Location(position++ - 1, this.source), 2);
+        return Token(Tok.Gt, Location(position - 1, this.source), 1);
+    }
+    Token add() 
+    {
+        return Token(Tok.Add, Location(position - 1, this.source), 1);
+    }
+    Token sub() 
+    {
+        return Token(Tok.Sub, Location(position - 1, this.source), 1);
+    }
+    Token mul() 
+    {
+        return Token(Tok.Mul, Location(position - 1, this.source), 1);
+    }
+    Token div() 
+    {
+        switch(source.data[position])
+        {
+            case '/':
+                while(getNextChar != CharType.EOF)
+                {
+                    if(source.data[position++] == '\n')
+                        return this.next;
+                }
+                return Token(Tok.EOF, Location(position, this.source), 0);
 
+            case '*':
+                position += 2;
+                while(getNextChar != CharType.EOF)
+                {
+                    ++position;
+                    if(source.data[position-2] == '*')
+                        if(source.data[position-1] == '/')
+                            return this.next;
+                }
+                throw error(__LINE__, "Unexpected end of file. Unclosed comment block");
+
+            case '+':
+                position += 2;
+                int nesting = 1;
+                while(getNextChar != CharType.EOF)
+                {
+                    ++position;
+                    if(source.data[position-2] == '+')
+                        if(source.data[position-1] == '/')
+                        {
+                            position++;
+                            nesting--;
+                        }
+
+                    if(source.data[position-2] == '/')
+                        if(source.data[position-1] == '+')
+                        {
+                            nesting++;
+                            position++;
+                        }
+
+                    if(nesting == 0)
+                        return this.next;
+                }
+                throw error(__LINE__, "Unexpected end of file. Unclosed comment block");
+
+            default:
+                return Token(Tok.Div, Location(position - 1, this.source), 1);
+        }
+    }
+    
     Token lexNumber ()
     {
         int i = 0;
@@ -69,97 +224,9 @@
 
     Token lexSymbol ()
     {
-        switch(source.data[position++])
-        {
-            case '(':
-                return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1);
-            case ')':
-                return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1);
-            case '{':
-                return Token(Tok.OpenBrace, Location(position - 1, this.source), 1);
-            case '}':
-                return Token(Tok.CloseBrace, Location(position - 1, this.source), 1);
-            case ';':
-                return Token(Tok.Seperator, Location(position - 1, this.source), 1);
-            case ':':
-                return Token(Tok.Colon, Location(position - 1, this.source), 1);
-            case '.':
-                return Token(Tok.Dot, Location(position - 1, this.source), 1);
-            case ',':
-                return Token(Tok.Comma, Location(position - 1, this.source), 1);
-            case '=':
-                if(source.data[position] == '=')
-                    return Token(Tok.Eq, Location(position++ - 1, this.source), 2);
-                return Token(Tok.Assign, Location(position - 1, this.source), 1);
-            case '!':
-                if(source.data[position] == '=')
-                    return Token(Tok.Ne, Location(position++ - 1, this.source), 2);
-                return Token(Tok.Not, Location(position - 1, this.source), 1);
-            case '<':
-                if(source.data[position] == '=')
-                    return Token(Tok.Le, Location(position++ - 1, this.source), 2);
-                return Token(Tok.Lt, Location(position - 1, this.source), 1);
-            case '>':
-                if(source.data[position] == '=')
-                    return Token(Tok.Ge, Location(position++ - 1, this.source), 2);
-                return Token(Tok.Gt, Location(position - 1, this.source), 1);
-            case '+':
-                return Token(Tok.Add, Location(position - 1, this.source), 1);
-            case '-':
-                return Token(Tok.Sub, Location(position - 1, this.source), 1);
-            case '*':
-                return Token(Tok.Mul, Location(position - 1, this.source), 1);
-            case '/':
-                switch(source.data[position])
-                {
-                    case '/':
-                        while(getNextChar != CharType.EOF)
-                        {
-                            if(source.data[position++] == '\n')
-                                return this.next;
-                        }
-                        return Token(Tok.EOF, Location(position, this.source), 0);
+        Token t = symbolFunctions[source.data[position++]]();
 
-                    case '*':
-                        position += 2;
-                        while(getNextChar != CharType.EOF)
-                        {
-                            ++position;
-                            if(source.data[position-2] == '*')
-                                if(source.data[position-1] == '/')
-                                   return this.next;
-                        }
-                        throw error(__LINE__, "Unexpected end of file. Unclosed comment block");
-
-                    case '+':
-                        position += 2;
-                        int nesting = 1;
-                        while(getNextChar != CharType.EOF)
-                        {
-                            ++position;
-                            if(source.data[position-2] == '+')
-                                if(source.data[position-1] == '/')
-                                {
-                                    position++;
-                                    nesting--;
-                                }
-
-                            if(source.data[position-2] == '/')
-                                if(source.data[position-1] == '+')
-                                {
-                                    nesting++;
-                                    position++;
-                                }
-
-                            if(nesting == 0)
-                                return this.next;
-                        }
-                        throw error(__LINE__, "Unexpected end of file. Unclosed comment block");
-
-                    default:
-                        return Token(Tok.Div, Location(position - 1, this.source), 1);
-                }
-        }
+        return t;
     }
 
     Token lexLetter ()
@@ -196,40 +263,12 @@
 
         char current = source.data[position + offset];
 
-        if (current >= 'A' && current <= 'Z' ||
-            current >= 'a' && current <= 'z' || current > 127)
-            return CharType.Letter;
-
-        if (current >= '0' && current <= '9')
-            return CharType.Number;
-
-        switch(current)
-        {
-            case ' ':
-            case '\n':
-                return CharType.Whitespace;
+        CharType c = charTable[current];
 
-            case '(':
-            case ')':
-            case '{':
-            case '}':
-            case ';':
-            case ':':
-            case '.':
-            case ',':
-            case '=':
-            case '!':
-            case '<':
-            case '>':
-            case '+':
-            case '-':
-            case '*':
-            case '/':
-                return CharType.Symbol;
+//        if(c == CharType.INVALID)
+  //          throw error(__LINE__, "Read invalid symbol: '%0'").arg(current);
 
-            default:
-                throw error(__LINE__, "Read invalid symbol: '%0'").arg(current);
-        }
+        return c;
 
     }
 
@@ -238,13 +277,16 @@
         return (new Error(msg)).loc(Location(position, source));
     }
 
+    int position;
     DataSource source;
-    int position;
     Error[] errors;
+    CharType[] charTable;
+    Token delegate()[] symbolFunctions;
 }
 
 enum CharType : ubyte
 {
+    INVALID,
     Letter,
     Number,
     Symbol,