changeset 414:9c69615a4876

Added method insertEmptyTokenBefore() to Lexer. Made some fixes, too. Fixed the while statement that skips to the next valid UTF-8 sequence.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Thu, 27 Sep 2007 10:28:08 +0200
parents 0fd78fdcb982
children b18b0cd8f8cd
files trunk/src/dil/Lexer.d trunk/src/dil/Parser.d trunk/src/dil/Token.d
diffstat 3 files changed, 51 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/dil/Lexer.d	Wed Sep 26 17:12:00 2007 +0200
+++ b/trunk/src/dil/Lexer.d	Thu Sep 27 10:28:08 2007 +0200
@@ -726,6 +726,15 @@
       else
       {
         c <<= 8; c |= p[1]; c <<= 8; c |= p[2]; c <<= 8; c |= p[3];
+        /+
+        c = *cast(uint*)p;
+        asm
+        {
+          mov EDX, c;
+          bswap EDX;
+          mov c, EDX;
+        }
+        +/
       }
     }
 
@@ -896,6 +905,7 @@
     assert(p == t.start);
     assert(*p == c, Format("p={0},c={1}", *p, cast(dchar)c));
     // 1 character tokens.
+    // TODO: consider storing the token type in ptable.
     switch (c)
     {
     case '\'':
@@ -2385,7 +2395,7 @@
     {
       error(MID.InvalidUTF8Sequence);
       // Skip to next valid utf-8 sequence
-      while (p < end && UTF8stride[*++p] != 0xFF) {}
+      while (++p < end && UTF8stride[*p] == 0xFF) {}
       --p;
       assert(p < end);
     }
@@ -2432,6 +2442,28 @@
     return State(this);
   }
 +/
+  /+
+    Insert an empty dummy token before t.
+    Useful in the parsing phase for representing a node in the AST
+    that doesn't consume an actual token from the source text.
+  +/
+  Token* insertEmptyTokenBefore(Token* t)
+  {
+    assert(t !is null && t.prev !is null);
+    assert(text.ptr <= t.start && t.start < end, Token.toString(t.type));
+    assert(text.ptr <= t.end && t.end <= end, Token.toString(t.type));
+
+    auto prev_t = t.prev;
+    auto new_t = new Token;
+    new_t.type = TOK.Empty;
+    new_t.start = new_t.end = prev_t.end;
+    // Link in new token.
+    prev_t.next = new_t;
+    new_t.prev = prev_t;
+    new_t.next = t;
+    t.prev = new_t;
+    return new_t;
+  }
 
   private void scanNext(ref Token* t)
   {
--- a/trunk/src/dil/Parser.d	Wed Sep 26 17:12:00 2007 +0200
+++ b/trunk/src/dil/Parser.d	Thu Sep 27 10:28:08 2007 +0200
@@ -2966,7 +2966,12 @@
     default:
       error(MID.ExpectedButFound, "Expression", token.srcText);
       e = new EmptyExpression();
-      break;
+      if (!trying)
+      {
+        // Insert a dummy token and don't consume current one.
+        begin = lx.insertEmptyTokenBefore(token);
+        prevToken = begin;
+      }
     }
     set(e, begin);
     return e;
@@ -3633,6 +3638,12 @@
 
       error(MID.ExpectedButFound, "Expression", token.srcText);
       e = new EmptyExpression();
+      if (!trying)
+      {
+        // Insert a dummy token and don't consume current one.
+        begin = lx.insertEmptyTokenBefore(token);
+        prevToken = begin;
+      }
     }
     set(e, begin);
     return e;
--- a/trunk/src/dil/Token.d	Wed Sep 26 17:12:00 2007 +0200
+++ b/trunk/src/dil/Token.d	Thu Sep 27 10:28:08 2007 +0200
@@ -19,13 +19,14 @@
 
   /// Flag for whitespace tokens that must be ignored in the parsing phase.
   Whitespace = 0x8000,
-  Illegal = 1 | Whitespace,
-  Comment = 2 | Whitespace,
-  Shebang = 3 | Whitespace,
+  Illegal  = 1 | Whitespace,
+  Comment  = 2 | Whitespace,
+  Shebang  = 3 | Whitespace,
   HashLine = 4 | Whitespace,
   Filespec = 5 | Whitespace,
+  Empty    = 6,
 
-  Identifier = 6,
+  Identifier = 7,
   String,
   CharLiteral, WCharLiteral, DCharLiteral,
 
@@ -287,6 +288,7 @@
   "#! /shebang/",
   "#line",
   `"filespec"`,
+  "Empty",
 
   "Identifier",
   "String",