changeset 104:7ff4bc2accf2 new_gen

Lexing more types of strings. Now all's left is to parse the string in the AST.
author Anders Johnsen <skabet@gmail.com>
date Wed, 21 May 2008 21:05:23 +0200
parents 09b4d74cb3f5
children f1282c5fe8e3
files ast/Exp.d lexer/Lexer.d lexer/Token.d parser/Action.d parser/Parser.d sema/AstAction.d tests/parser/string_1.d
diffstat 7 files changed, 116 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/ast/Exp.d	Thu May 08 10:54:29 2008 +0200
+++ b/ast/Exp.d	Wed May 21 21:05:23 2008 +0200
@@ -25,6 +25,7 @@
     AssignExp,
     CallExp,
     CastExp,
+    StringExp,
 }
 
 class Exp
@@ -436,6 +437,17 @@
     Exp exp;
 }
 
+class StringExp : Exp
+{
+    this(SLoc loc, char[] str)
+    {
+        super(ExpType.StringExp, loc);
+        this.str = str;
+    }
+
+    char[] str;
+}
+
 class PointerIdentifier : Identifier
 {
     this(Identifier pointerOf)
--- a/lexer/Lexer.d	Thu May 08 10:54:29 2008 +0200
+++ b/lexer/Lexer.d	Wed May 21 21:05:23 2008 +0200
@@ -37,12 +37,15 @@
         foreach (c; "0123456789")
             charTable[c] = CharType.Number;
 
-        foreach (c; "(){}[];:.,=!<>+-*/%")
+        foreach (c; "(){}[];:.,=!<>+-*/%\"`")
             charTable[c] = CharType.Symbol;
 
         foreach (c; " \n")
             charTable[c] = CharType.Whitespace;
 
+        foreach (c; "'\\")
+            charTable[c] = CharType.Other;
+
         symbolFunctions.length = 256;
 
         symbolFunctions['('] = &openParentheses;
@@ -64,6 +67,8 @@
         symbolFunctions['*'] = &star;
         symbolFunctions['/'] = &slash;
         symbolFunctions['%'] = &percent;
+        symbolFunctions['"'] = &string;
+        symbolFunctions['`'] = &string;
     }
 
     /**
@@ -93,6 +98,8 @@
 
             case CharType.Number:
                 return lexNumber;
+            case CharType.Other:
+                messages.report(UnexpectedTok, Loc(position)).fatal(ExitLevel.Lexer);
         }
     }
 
@@ -201,7 +208,7 @@
     {
         return Token(Tok.Star, Loc(position - 1), 1);
     }
-    Token slash() 
+    Token slash()
     {
         switch(source[position])
         {
@@ -220,7 +227,9 @@
                     ++position;
                     if(source[position-2] == '*')
                         if(source[position-1] == '/')
+                        {
                             return this.next;
+                        }
                 }
                 messages.report(UnexpectedEOFBlock,Loc(position));
 
@@ -258,6 +267,46 @@
     {
         return Token(Tok.Percent, Loc(position - 1), 1);
     }
+
+    Token string()
+    {
+        --position;
+        int start = position;
+        if(getNextChar() == CharType.Letter)
+            position++;
+        char end = '`';
+        switch(source[position])
+        {
+            case '"':
+                if(position > 0)
+                    if(source[position-1] == 'r')
+                    {
+                        end = '"';
+                        goto string_wys;
+                    }
+                ++position;
+                while(getNextChar != CharType.EOF)
+                {
+                    ++position;
+                    if (source[position-1] == '"' )
+                        return Token(Tok.String, Loc(start), position - start);
+                    else if (source[position-1] == '\\')
+                        position++;
+                }
+                break;
+                case '`':
+string_wys:     
+                ++position;
+                while(getNextChar != CharType.EOF)
+                {
+                    ++position;
+                    if (source[position-1] == end )
+                        return Token(Tok.String, Loc(start), position - start);
+                }
+                break;
+        }
+        messages.report(UnexpectedEOFBlock, Loc(position)).fatal(ExitLevel.Lexer);
+    }
     
     Token lexNumber ()
     {
@@ -321,6 +370,12 @@
     {
         int i = 0;
         bool hasNumber = false;
+        if (source[position+1] == '"' ||
+            source[position+1] == '`')
+        {
+            ++position;
+            return string;
+        }
         while (getNextChar(++i) == CharType.Letter || 
                 getNextChar(i) == CharType.Number)
         {
@@ -385,6 +440,7 @@
     Number,
     Symbol,
     Whitespace,
+    Other,
 
     EOF
 }
--- a/lexer/Token.d	Thu May 08 10:54:29 2008 +0200
+++ b/lexer/Token.d	Wed May 21 21:05:23 2008 +0200
@@ -136,6 +136,8 @@
     Switch, Case, Default,
     Return, Cast,
 
+    String,
+
     Module, Import,
 
 }
@@ -194,6 +196,7 @@
         Tok.Seperator:"Seperator",
         Tok.Cast:"Cast",
         Tok.Module:"Module",
-        Tok.Import:"Import"
+        Tok.Import:"Import",
+        Tok.String:"String"
     ];
 }
--- a/parser/Action.d	Thu May 08 10:54:29 2008 +0200
+++ b/parser/Action.d	Wed May 21 21:05:23 2008 +0200
@@ -292,6 +292,14 @@
     }
 
     /**
+      This is called when strings are used in expression
+     */
+    ExprT actOnStringExp(Token t)
+    {
+        return null;
+    }
+
+    /**
       Unary operator.
      */
     ExprT actOnUnaryOp(Token op, ExprT operand)
--- a/parser/Parser.d	Thu May 08 10:54:29 2008 +0200
+++ b/parser/Parser.d	Wed May 21 21:05:23 2008 +0200
@@ -627,6 +627,8 @@
             return parseCast(next);
         else if (next.type == Tok.Integer)
             return action.actOnNumericConstant(next);
+        else if (next.type == Tok.String)
+            return action.actOnStringExp(next);
 
         messages.report(ExpectedExp, next.location)
             .fatal(ExitLevel.Parser);
--- a/sema/AstAction.d	Thu May 08 10:54:29 2008 +0200
+++ b/sema/AstAction.d	Wed May 21 21:05:23 2008 +0200
@@ -173,6 +173,11 @@
         return new IntegerLit(c.location, sm.getText(c.asRange));
     }
 
+    override ExprT actOnStringExp(Token s)
+    {
+        return new StringExp(s.location, sm.getText(s.asRange));
+    }
+
     override ExprT actOnIdentifierExp(Id id)
     {
         return identifierFromTok(id.tok);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/parser/string_1.d	Wed May 21 21:05:23 2008 +0200
@@ -0,0 +1,27 @@
+
+int main()
+{
+    /* All examples taken from D's Language site */
+
+    char[4]     s1  = "food";
+
+    char[5]     s2  = r"hello";
+    char[15]    s3  = r"c:\root\foo.exe";
+    char[4]     s4  = r"ab\n";
+
+    char[5]     s5  = `hello`;
+    char[15]    s6  = `c:\root\foo.exe`;
+    char[4]     s7  = `ab\n`;
+    char[4]     s9  = `abn\`;
+
+    char[5]     s10 = "hello";
+    char[15]    s11 = "c:\\root\\foo.exe";
+    char[3]     s12 = "ab\n";
+    char[3]     s13 = "ab
+";
+
+    char[1]     s14 = x"0A";
+    char[6]     s15 = x"00 FBCD 32FD 0A";
+
+    return 0;
+}