changeset 32:d7011daa4740

- Added missing commas after the items in the messages table. - Renamed TOK.DivisionAssign to TOK.DivAssign. - Added TOK.Div and code for recognizing it. - Rewrote code that scans for block and nested comments. Counts newlines properly and issues messages on errors. - Replaced magic number '26' with constant _Z_.
author aziz
date Mon, 25 Jun 2007 17:45:02 +0000
parents 94f09f4e988e
children cf3047cf3cd2
files trunk/src/Lexer.d trunk/src/Token.d trunk/src/main.d
diffstat 3 files changed, 123 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/Lexer.d	Mon Jun 25 13:34:03 2007 +0000
+++ b/trunk/src/Lexer.d	Mon Jun 25 17:45:02 2007 +0000
@@ -88,6 +88,8 @@
 const dchar LSd = 0x2028;
 const dchar PSd = 0x2029;
 
+const uint _Z_ = 26; /// Control+Z
+
 /// Index into table of error messages.
 enum MID
 {
@@ -100,20 +102,28 @@
   // x""
   NonHexCharInHexString,
   OddNumberOfDigitsInHexString,
-  UnterminatedHexString
+  UnterminatedHexString,
+  // /* */ /+ +/
+  UnterminatedBlockComment,
+  UnterminatedNestedComment
+
 }
 
 string[] messages = [
-  "unterminated character literal."
+  "unterminated character literal.",
   "empty character literal.",
   // #line
-  "expected 'line' after '#'."
-  "newline not allowed inside special token."
+  "expected 'line' after '#'.",
+  "newline not allowed inside special token.",
   "expected newline after special token.",
   // x""
   "non-hex character '{1}' found in hex string.",
   "odd number of hex digits in hex string.",
-  "unterminated hex string."
+  "unterminated hex string.",
+  // /* */ /+ +/
+  "unterminated block comment (/* */).",
+  "unterminated nested comment (/+ +/)."
+
 ];
 
 class Problem
@@ -196,6 +206,12 @@
           ++loc;
         continue;
       }
+      else if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
+      {
+        p += 3;
+        c = *p;
+        continue;
+      }
 
       if (isidbeg(c))
       {
@@ -233,51 +249,115 @@
         {
         case '=':
           ++p;
-          t.type = TOK.DivisionAssign;
+          t.type = TOK.DivAssign;
           t.end = p;
           return;
         case '+':
           uint level = 1;
-          do
+          while (1)
+          {
+            c = *++p;
+            switch (c)
+            {
+            case '\r':
+              if (p[1] == '\n')
+                ++p;
+            case '\n':
+              ++loc;
+              continue;
+            case '/':
+              if (p[1] == '+')
+              {
+                ++p;
+                ++level;
+              }
+              continue;
+            case '+':
+              if (p[1] == '/')
+              {
+                ++p;
+                if (--level == 0)
+                {
+                  ++p;
+                LreturnNC:
+                  t.type = TOK.Comment;
+                  t.end = p;
+                  return;
+                }
+              }
+              continue;
+            case 0, _Z_:
+              error(MID.UnterminatedNestedComment);
+              goto LreturnNC;
+            case LS[0]:
+              if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) {
+                p += 2;
+                ++loc;
+              }
+              continue;
+            default:
+            }
+          }
+        case '*':
+          while (1)
           {
             c = *++p;
-            if (c == 0)
-              throw new Error("unterminated /+ +/ comment.");
-            else if (c == '/' && p[1] == '+')
-            {
-              ++p;
-              ++level;
-            }
-            else if (c == '+' && p[1] == '/')
+            switch (c)
             {
-              ++p;
-              if (--level == 0)
-                break;
+            case '\r':
+              if (p[1] == '\n')
+                ++p;
+            case '\n':
+              ++loc;
+              continue;
+            case '*':
+              if (p[1] == '/')
+              {
+                p += 2;
+            LreturnBC:
+                t.type = TOK.Comment;
+                t.end = p;
+                return;
+              }
+              break;
+            case LS[0]:
+              if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) {
+                p += 2;
+                ++loc;
+              }
+              break;
+            case 0, _Z_:
+              error(MID.UnterminatedBlockComment);
+              goto LreturnBC;
+            default:
             }
-          } while (1)
-          p += 2;
-          t.type = TOK.Comment;
-          t.end = p;
-          return;
-        case '*':
-          do
+          }
+          assert(0);
+        case '/':
+          while (1)
           {
             c = *++p;
-            if (c == 0)
-              throw new Error("unterminated /* */ comment.");
-          } while (c != '*' || p[1] != '/')
-          p += 2;
-          t.type = TOK.Comment;
-          t.end = p;
-          return;
-        case '/':
-          do
-          {
-            c = *++p;
-            if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
+            switch (c)
+            {
+            case '\r':
+              if (p[1] == '\n')
+                ++p;
+            case '\n':
+            case 0, _Z_:
               break;
-          } while (c != '\n' && c != 0)
-          t.type = TOK.Comment;
+            case LS[0]:
+              if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
+                break;
+              continue;
+            default:
+              continue;
+            }
+            t.type = TOK.Comment;
+            t.end = p;
+            return;
+          }
+        default:
+          t.type = TOK.Div;
           t.end = p;
           return;
         }
@@ -469,7 +549,7 @@
     case '\\':
       ++p;
       break;
-    case 0, 26, '\n', '\r':
+    case 0, _Z_, '\n', '\r':
       goto Lerr;
     case '\'':
       id = MID.EmptyCharacterLiteral;
@@ -550,7 +630,7 @@
           ++loc;
         }
         continue;
-      case 0, 26:
+      case 0, _Z_:
         mid = MID.UnterminatedHexString;
         goto Lerr;
       default:
--- a/trunk/src/Token.d	Mon Jun 25 13:34:03 2007 +0000
+++ b/trunk/src/Token.d	Mon Jun 25 17:45:02 2007 +0000
@@ -16,7 +16,6 @@
   Comment,
   String,
   Character,
-  DivisionAssign,
   Number,
 /* Braces */
   LParen,
@@ -33,6 +32,7 @@
   AndAssign, AndLogical, AndBinary,
   PlusAssign, PlusPlus, Plus,
   MinusAssign, MinusMinus, Minus,
+  DivAssign, Div,
   MulAssign, Mul,
   ModAssign, Mod,
   XorAssign, Xor,
--- a/trunk/src/main.d	Mon Jun 25 13:34:03 2007 +0000
+++ b/trunk/src/main.d	Mon Jun 25 17:45:02 2007 +0000
@@ -52,7 +52,7 @@
       case TOK.Character:
         writef("<cl>%s</cl>", span);
       break;
-      case TOK.DivisionAssign:
+      case TOK.DivAssign:
         writef("<op>%s</op>", span);
       break;
       case TOK.AndLogical: