diff lexer/Lexer.d @ 88:eb5b2c719a39 new_gen

Major change to locations, tokens and expressions. A location (now SourceLocation or SLoc) is only 32 bit in size - disadvantage is that it can't find its own text. You have to go through the new SourceManager to do that. This has caused changes to a lot of stuff and removal of DataSource and the old Location Additionally Exp has gotten some location stuff, so we can give proper error messages. Not in Decl and Stmt yet, but thats coming too.
author Anders Halager <halager@gmail.com>
date Sun, 04 May 2008 18:13:46 +0200
parents 192da4976daa
children a49bb982a7b0
line wrap: on
line diff
--- a/lexer/Lexer.d	Sun May 04 12:58:02 2008 +0200
+++ b/lexer/Lexer.d	Sun May 04 18:13:46 2008 +0200
@@ -1,7 +1,7 @@
 module lexer.Lexer;
 
 import misc.Error,
-       misc.DataSource;
+       basic.SourceManager;
 
 import lexer.Token,
        lexer.Keyword;
@@ -20,29 +20,26 @@
 
     /**
       Create a new Lexer.
-
-      params:
-        source = The source to tokenize.
-
     */
-
-    this (DataSource source)
+    this(SourceLocation start, SourceManager src_mgr)
     {
-        this.source = source;
+        sm = src_mgr;
+        start_loc = start;
         position = 0;
+        source = sm.getRawData(start_loc);
 
 
         charTable.length = 256;
-        foreach( char c ; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")
+        foreach (c; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")
             charTable[c] = CharType.Letter;
 
-        foreach( char c ; "0123456789")
+        foreach (c; "0123456789")
             charTable[c] = CharType.Number;
 
-        foreach( char c ; "(){}[];:.,=!<>+-*/%")
+        foreach (c; "(){}[];:.,=!<>+-*/%")
             charTable[c] = CharType.Symbol;
 
-        foreach( char c ; " \n")
+        foreach (c; " \n")
             charTable[c] = CharType.Whitespace;
 
         symbolFunctions.length = 256;
@@ -75,13 +72,13 @@
       return: A Token - Token.type is TokType.EOF if there is
         no more tokens in the file.
       */
-    Token next ()
+    Token next()
     {
         switch (getNextChar)
         {
             case CharType.EOF:
-                Location l;
-                return Token (Tok.EOF, l, 0); 
+                SLoc loc;
+                return Token(Tok.EOF, loc, 0); 
 
             case CharType.Whitespace:
                 position += 1;
@@ -105,10 +102,10 @@
       return: A Token - Token.type is TokType.EOF if there is
         no more tokens in the file.
       */
-    Token peek ( int skip = 0)
+    Token peek(int skip = 0)
     {
         int oldPosition = this.position;
-        while(skip-- > 0)
+        while (skip-- > 0)
             this.next;
         Token t = this.next;
         this.position = oldPosition;
@@ -128,47 +125,47 @@
 private:
     Token eq()
     {
-        if(source.data[position] == '=')
-            return Token(Tok.Eq, Location(position++ - 1, source), 2);
-        return Token(Tok.Assign, Location(position - 1, source), 1);
+        if(source[position] == '=')
+            return Token(Tok.Eq, Loc(position++ - 1), 2);
+        return Token(Tok.Assign, Loc(position - 1), 1);
     }
     Token openBrace() 
     {
-        return Token(Tok.OpenBrace, Location(position - 1, source), 1);
+        return Token(Tok.OpenBrace, Loc(position - 1), 1);
     }
     Token closeBrace() 
     {
-        return Token(Tok.CloseBrace, Location(position - 1, this.source), 1);
+        return Token(Tok.CloseBrace, Loc(position - 1), 1);
     }
     Token openParentheses() 
     {
-        return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1);
+        return Token(Tok.OpenParentheses, Loc(position - 1), 1);
     }
     Token closeParentheses()
     {
-        return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1);
+        return Token(Tok.CloseParentheses, Loc(position - 1), 1);
     }
     Token openBracket() 
     {
-        return Token(Tok.OpenBracket, Location(position - 1, this.source), 1);
+        return Token(Tok.OpenBracket, Loc(position - 1), 1);
     }
     Token closeBracket()
     {
-        return Token(Tok.CloseBracket, Location(position - 1, source), 1);
+        return Token(Tok.CloseBracket, Loc(position - 1), 1);
     }
     Token seperator()
     {
-        return Token(Tok.Seperator, Location(position - 1, source), 1);
+        return Token(Tok.Seperator, Loc(position - 1), 1);
     }
     Token colon()
     {
-        return Token(Tok.Colon, Location(position - 1, this.source), 1);
+        return Token(Tok.Colon, Loc(position - 1), 1);
     }
     Token dot() 
     {
         int pos = 0;
         while(getNextChar(0) == CharType.Number || 
-              this.source.data[position + pos + 1] == '_')
+              this.source[position + pos + 1] == '_')
         {
             if(getNextChar(0) == CharType.Number)
             {
@@ -177,61 +174,61 @@
             }
             pos++;
         }
-        return Token(Tok.Dot, Location(position - 1, this.source), 1);
+        return Token(Tok.Dot, Loc(position - 1), 1);
     }
     Token comma() 
     {
-        return Token(Tok.Comma, Location(position - 1, this.source), 1);
+        return Token(Tok.Comma, Loc(position - 1), 1);
     }
     Token ne() 
     {
-        if(source.data[position] == '=')
-            return Token(Tok.Ne, Location(position++ - 1, this.source), 2);
-        return Token(Tok.Not, Location(position - 1, this.source), 1);
+        if(source[position] == '=')
+            return Token(Tok.Ne, Loc(position++ - 1), 2);
+        return Token(Tok.Not, Loc(position - 1), 1);
     }
     Token le()
     {
-        if(source.data[position] == '=')
-            return Token(Tok.Le, Location(position++ - 1, this.source), 2);
-        return Token(Tok.Lt, Location(position - 1, this.source), 1);
+        if(source[position] == '=')
+            return Token(Tok.Le, Loc(position++ - 1), 2);
+        return Token(Tok.Lt, Loc(position - 1), 1);
     }
     Token ge() 
     {
-        if(source.data[position] == '=')
-            return Token(Tok.Ge, Location(position++ - 1, this.source), 2);
-        return Token(Tok.Gt, Location(position - 1, this.source), 1);
+        if(source[position] == '=')
+            return Token(Tok.Ge, Loc(position++ - 1), 2);
+        return Token(Tok.Gt, Loc(position - 1), 1);
     }
     Token plus() 
     {
-        return Token(Tok.Plus, Location(position - 1, this.source), 1);
+        return Token(Tok.Plus, Loc(position - 1), 1);
     }
     Token minus() 
     {
-        return Token(Tok.Minus, Location(position - 1, this.source), 1);
+        return Token(Tok.Minus, Loc(position - 1), 1);
     }
     Token star() 
     {
-        return Token(Tok.Star, Location(position - 1, this.source), 1);
+        return Token(Tok.Star, Loc(position - 1), 1);
     }
     Token slash() 
     {
-        switch(source.data[position])
+        switch(source[position])
         {
             case '/':
                 while(getNextChar != CharType.EOF)
                 {
-                    if(source.data[position++] == '\n')
+                    if(source[position++] == '\n')
                         return this.next;
                 }
-                return Token(Tok.EOF, Location(position, this.source), 0);
+                return Token(Tok.EOF, Loc(position), 0);
 
             case '*':
                 position += 2;
                 while(getNextChar != CharType.EOF)
                 {
                     ++position;
-                    if(source.data[position-2] == '*')
-                        if(source.data[position-1] == '/')
+                    if(source[position-2] == '*')
+                        if(source[position-1] == '/')
                             return this.next;
                 }
                 throw error(__LINE__, "Unexpected end of file. Unclosed comment block");
@@ -242,15 +239,15 @@
                 while(getNextChar != CharType.EOF)
                 {
                     ++position;
-                    if(source.data[position-2] == '+')
-                        if(source.data[position-1] == '/')
+                    if(source[position-2] == '+')
+                        if(source[position-1] == '/')
                         {
                             position++;
                             nesting--;
                         }
 
-                    if(source.data[position-2] == '/')
-                        if(source.data[position-1] == '+')
+                    if(source[position-2] == '/')
+                        if(source[position-1] == '+')
                         {
                             nesting++;
                             position++;
@@ -262,13 +259,13 @@
                 throw error(__LINE__, "Unexpected end of file. Unclosed comment block");
 
             default:
-                return Token(Tok.Slash, Location(position - 1, this.source), 1);
+                return Token(Tok.Slash, Loc(position - 1), 1);
         }
     }
 
     Token percent() 
     {
-        return Token(Tok.Percent, Location(position - 1, this.source), 1);
+        return Token(Tok.Percent, Loc(position - 1), 1);
     }
     
     Token lexNumber ()
@@ -287,24 +284,24 @@
                 case CharType.Number:
                     break;
                 case CharType.Symbol:
-                    if(this.source.data[position+i] == '.')
+                    if(this.source[position+i] == '.')
                     {
                         if(dot)
                             throw error(__LINE__,"Only one '.' is allowed in an floating number")
-                                .tok(Token(Tok.Float, Location(position + i, this.source), 1));
+                                .tok(Token(Tok.Float, Loc(position + i), 1));
                         dot = true;
                         break;
                     }
                     end = true;
                     continue;
                 case CharType.Letter:
-                    if(this.source.data[position+i] == '_')
+                    if(this.source[position+i] == '_')
                         break;
-                    if (this.source.data[position+i] == 'e' || 
-                        this.source.data[position+i] == 'E')
+                    if (this.source[position+i] == 'e' || 
+                        this.source[position+i] == 'E')
                     {
                         if (e)
-                            throw error(__LINE__,"Only one '"~this.source.data[position+i]
+                            throw error(__LINE__,"Only one '"~this.source[position+i]
                                     ~"' is allowed in an floating number");
                         e = true;
                         break;
@@ -321,12 +318,12 @@
 
         position += i;
 
-        return Token(Tok.Integer, Location(position - i, this.source), i);
+        return Token(Tok.Integer, Loc(position - i), i);
     }
 
     Token lexSymbol ()
     {
-        Token t = symbolFunctions[source.data[position++]]();
+        Token t = symbolFunctions[source[position++]]();
 
         return t;
     }
@@ -344,11 +341,11 @@
             }
         }
 
-        Token t = Token(Tok.Identifier, Location(position, source), i);
+        Token t = Token(Tok.Identifier, Loc(), i);
 
         if (!hasNumber)
         {
-            char[] str = source.data[position .. position + i];
+            char[] str = source[position .. position + i];
             if(str in keywords)
                 t.type = keywords[str];
         }
@@ -360,10 +357,10 @@
 
     CharType getNextChar(int offset = 0)
     {
-        if (position + offset >= this.source.data.length)
+        if (position + offset >= this.source.length)
             return CharType.EOF;
 
-        char current = source.data[position + offset];
+        char current = source[position + offset];
 
         CharType c = charTable[current];
 
@@ -376,11 +373,20 @@
 
     Error error(uint line, char[] msg)
     {
-        return (new Error(msg)).loc(Location(position, source));
+        return (new Error(msg));//.loc(Loc(position));
     }
 
+    private final SourceLocation Loc(int pos = -1)
+    {
+        if (pos < 0)
+            return start_loc + position;
+        return start_loc + pos;
+    }
+
+    SourceManager sm;
+    SourceLocation start_loc;
     int position;
-    DataSource source;
+    char[] source;
     Error[] errors;
     CharType[] charTable;
     Token delegate()[] symbolFunctions;
@@ -396,3 +402,4 @@
 
     EOF
 }
+