changeset 323:6259fb93e3dd

- Rewrote scanSpecialToken(). - Added Filespec to enum TOK. - Added another nested struct to union in Token. - Deleted ExpectedNormalStringLiteral from enum MID and added ExpectedFilespec and UnterminatedFilespec. Adapted lang_de.d and lang_en.d accordingly. - Fixed some code in scanNumber(). Added some assert statements.
author aziz
date Tue, 21 Aug 2007 15:26:00 +0000
parents ed4ef0173793
children c1820da1ca53
files trunk/src/Lexer.d trunk/src/Messages.d trunk/src/Token.d trunk/src/lang_de.d trunk/src/lang_en.d trunk/src/main.d
diffstat 6 files changed, 121 insertions(+), 89 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/Lexer.d	Mon Aug 20 19:59:04 2007 +0000
+++ b/trunk/src/Lexer.d	Tue Aug 21 15:26:00 2007 +0000
@@ -578,9 +578,7 @@
         t.end = p;
         return;
       case '#':
-        scanSpecialToken();
-        c = *p;
-        continue;
+        return scanSpecialToken(t);
       default:
       }
 
@@ -973,26 +971,28 @@
         break;
     case 'i','f','F', 'e', 'E': // Imaginary and float literal suffix
       goto LscanReal;
-    case '_':
-      ++p;
-      goto LscanOct;
     default:
-      if (isoctal(*p))
+      if (*p == '_' || isoctal(*p))
         goto LscanOct;
     }
 
-    ulong_ = p[-1];
+    // Number 0
+    assert(p[-1] == '0');
+    assert(ulong_ == 0);
     isDecimal = true;
     goto Lfinalize;
 
   LscanInteger:
+    assert(*p != 0 && isdigit(*p));
     isDecimal = true;
+    goto Lenter_loop_int;
     while (1)
     {
       if (*++p == '_')
         continue;
       if (!isdigit(*p))
         break;
+    Lenter_loop_int:
       if (ulong_ < ulong.max/10 || (ulong_ == ulong.max/10 && *p <= '5'))
       {
         ulong_ *= 10;
@@ -1028,6 +1028,7 @@
 
   LscanHex:
     assert(digits == 0);
+    assert(*p == 'x');
     while (1)
     {
       if (*++p == '_')
@@ -1071,6 +1072,7 @@
 
   LscanBin:
     assert(digits == 0);
+    assert(*p == 'b');
     while (1)
     {
       if (*++p == '0')
@@ -1098,12 +1100,16 @@
     goto Lfinalize;
 
   LscanOct:
+    assert(*p == '_' || isoctal(*p));
+    if (*p != '_')
+      goto Lenter_loop_oct;
     while (1)
     {
       if (*++p == '_')
         continue;
       if (!isoctal(*p))
         break;
+    Lenter_loop_oct:
       if (ulong_ < ulong.max/2 || (ulong_ == ulong.max/2 && *p <= '1'))
       {
         ulong_ *= 8;
@@ -1362,99 +1368,116 @@
   }
 
   /// Scan special token: #line Integer [Filespec] EndOfLine
-  // TODO: Handle case like: #line 0 #line 2
-  void scanSpecialToken()
+  void scanSpecialToken(ref Token t)
   {
     assert(*p == '#');
 
-    ++p;
+    t.type = TOK.HashLine;
+
     MID mid;
-    Token* t;
-    uint oldloc = this.loc, newloc;
 
-    peek(t);
-    if (!(this.loc == oldloc && p == t.start && t.type == TOK.Identifier && t.srcText == "line"))
+    ++p;
+    if (p[0] != 'l' || p[1] != 'i' || p[2] != 'n' || p[3] != 'e')
     {
-      this.loc = oldloc; // reset this.loc because we took a peek at the next token
-      mid = MID.ExpectedIdentifierSTLine;
+      mid = MID.ExpectedNumberAfterSTLine;
       goto Lerr;
     }
-    p = t.end; // consume token
+    p += 3;
+
+    enum State
+    { Number, Filespec, End }
 
-    peek(t);
-    if (this.loc == oldloc && t.type == TOK.Int32)
+    State state;
+
+  Loop:
+    while (1)
     {
-      newloc = t.uint_ - 1;
-      p = t.end;
+      switch (*++p)
+      {
+      case '\r':
+        if (p[1] == '\n')
+          ++p;
+      case '\n', 0, _Z_:
+        break Loop;
+      case LS[0]:
+        if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
+        {
+          ++p; ++p;
+          break Loop;
+        }
+        goto default;
+      default:
+        if (isspace(*p))
+          continue;
+        if (state == State.Number)
+        {
+          if (!isdigit(*p))
+          {
+            mid = MID.ExpectedNumberAfterSTLine;
+            goto Lerr;
+          }
+          t.line_num = new Token;
+          scan(*t.line_num);
+          --p;
+          state = State.Filespec;
+        }
+        else if (state == State.Filespec)
+        {
+          if (*p != '"')
+          {
+            mid = MID.ExpectedFilespec;
+            goto Lerr;
+          }
+          t.line_filespec = new Token;
+          t.line_filespec.start = p;
+          t.line_filespec.type = TOK.Filespec;
+          while (1)
+          {
+            switch (*++p)
+            {
+            case '"':
+              break;
+            case LS[0]:
+              if (!(p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])))
+                goto default;
+            case '\r', '\n', 0, _Z_:
+              mid = MID.UnterminatedFilespec;
+              t.line_filespec.end = p;
+              goto Lerr;
+            default:
+              if (*p & 128)
+                decodeUTF8();
+              continue;
+            }
+            break; // Exit loop.
+          }
+          auto start = t.line_filespec.start +1; // +1 skips '"'
+          t.line_filespec.str = start[0 .. p - start];
+          t.line_filespec.end = p + 1;
+          state = State.End;
+        }
+        else/+ if (state == State.End)+/
+        {
+          mid = MID.UnterminatedSpecialToken;
+          goto Lerr;
+        }
+      }
     }
-    else
+
+    if (state == State.Number)
     {
-      this.loc = oldloc;
       mid = MID.ExpectedNumberAfterSTLine;
       goto Lerr;
     }
 
-    peek(t);
-    if (this.loc != oldloc)
-    {
-      this.loc = oldloc;
-      mid = MID.NewlineInSpecialToken;
-      goto Lerr;
-    }
-    if (t.type == TOK.String)
-    {
-      if (*t.start != '"')
-      {
-        mid = MID.ExpectedNormalStringLiteral;
-        goto Lerr;
-      }
-      fileName = t.srcText[1..$-1]; // contents of "..."
-      p = t.end;
-    }
-    else if (t.type == TOK.Identifier && t.srcText == "__FILE__")
-    {
-      p = t.end;
-    }
-/+
-    peek(t);
-    if (this.loc == oldloc && t.type != TOK.EOF)
-    {
-      mid = MID.UnterminatedSpecialToken;
-      goto Lerr;
-    }
-+/
-    while (1)
-    {
-      switch (*p)
-      {
-      case '\r':
-        if (p[1] == '\n')
-          ++p;
-      case '\n':
-        ++p;
-        break;
-      case LS[0]:
-        if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-        {
-          p += 2;
-          break;
-        }
-      case 0, _Z_:
-        break;
-      default:
-        if (isspace(*p)) {
-          ++p;
-          continue;
-        }
-        mid = MID.UnterminatedSpecialToken;
-        goto Lerr;
-      }
-      break;
-    }
+    this.loc = t.line_num.uint_ - 1;
+    if (t.line_filespec)
+      this.fileName = t.line_filespec.str;
+    t.end = p;
 
-    this.loc = newloc;
     return;
   Lerr:
+    t.end = p;
     error(mid);
   }
 
--- a/trunk/src/Messages.d	Mon Aug 20 19:59:04 2007 +0000
+++ b/trunk/src/Messages.d	Tue Aug 21 15:26:00 2007 +0000
@@ -17,9 +17,9 @@
   EmptyCharacterLiteral,
   // #line
   ExpectedIdentifierSTLine,
-  ExpectedNormalStringLiteral,
   ExpectedNumberAfterSTLine,
-  NewlineInSpecialToken,
+  ExpectedFilespec,
+  UnterminatedFilespec,
   UnterminatedSpecialToken,
   // ""
   UnterminatedString,
--- a/trunk/src/Token.d	Mon Aug 20 19:59:04 2007 +0000
+++ b/trunk/src/Token.d	Tue Aug 21 15:26:00 2007 +0000
@@ -21,8 +21,9 @@
   Comment = 1 | Whitespace,
   Shebang = 2 | Whitespace,
   HashLine = 3 | Whitespace,
+  Filespec = 4 | Whitespace,
 
-  Identifier = 4,
+  Identifier = 5,
   String,
   Special,
   CharLiteral, WCharLiteral, DCharLiteral,
@@ -118,6 +119,11 @@
   {
     struct
     {
+      Token* line_num; // #line number
+      Token* line_filespec; // #line number filespec
+    }
+    struct
+    {
       string str;
       char pf;
     }
--- a/trunk/src/lang_de.d	Mon Aug 20 19:59:04 2007 +0000
+++ b/trunk/src/lang_de.d	Tue Aug 21 15:26:00 2007 +0000
@@ -12,9 +12,9 @@
   "leeres Zeichenliteral.",
   // #line
   "erwartete 'line' nach '#'.",
-  `die Dateispezifikation (filespec) muss in Anführungszeichen angegeben werden (z.B. "filespec".)`,
   "Ganzzahl nach #line erwartet.",
-  "Zeilenumbrüche innerhalb eines Special Token sind nicht erlaubt.",
+  `erwartete Dateispezifikation (z.B. "pfad\zur\datei".)`,
+  "unterminierte Dateispezifikation (filespec.)",
   "ein Special Token muss mit einem Zeilenumbruch abgeschlossen werden.",
   // ""
   "unterminiertes Zeichenkettenliteral.",
--- a/trunk/src/lang_en.d	Mon Aug 20 19:59:04 2007 +0000
+++ b/trunk/src/lang_en.d	Tue Aug 21 15:26:00 2007 +0000
@@ -12,9 +12,9 @@
   "empty character literal.",
   // #line
   "expected 'line' after '#'.",
-  `the filespec must be defined in a double quote string literal (e.g. "filespec".)`,
   "integer expected after #line",
-  "newline not allowed inside special token.",
+  `expected filespec string (e.g. "path\to\file".)`,
+  "unterminated filespec string.",
   "expected a terminating newline after special token.",
   // ""
   "unterminated string literal.",
--- a/trunk/src/main.d	Mon Aug 20 19:59:04 2007 +0000
+++ b/trunk/src/main.d	Tue Aug 21 15:26:00 2007 +0000
@@ -507,6 +507,9 @@
   case TOK.Shebang:
     writef(tags[DP.Shebang], srcText);
     break;
+  case TOK.HashLine:
+    writef("<hl>%s</hl>", srcText);
+    break;
   default:
     if (token.isKeyword())
       writef(tags[DP.Keyword], srcText);