Mercurial > projects > dil

diff trunk/src/Lexer.d @ 18:c48d2125f1e2
- Moved code for scanning character literals to separate function. - Added class Problem and Messages table.
author: aziz
date: Sat, 23 Jun 2007 21:48:03 +0000
parents: 9bd0bac79479
children: f85832f9f24e
--- a/trunk/src/Lexer.d	Sat Jun 23 20:12:03 2007 +0000
+++ b/trunk/src/Lexer.d	Sat Jun 23 21:48:03 2007 +0000
@@ -82,6 +82,41 @@
 const char[3] LS = \u2028;
 const char[3] PS = \u2029;
 
+const dchar LSd = 0x2028;
+const dchar PSd = 0x2029;
+
+/// Index into table of error messages.
+enum MID
+{
+  UnterminatedCharacterLiteral,
+  EmptyCharacterLiteral
+}
+
+string[] Messages = [
+  "unterminated character literal."
+  "empty character literal."
+];
+
+class Problem
+{
+  enum Type
+  {
+    Lexer,
+    Parser,
+    Semantic
+  }
+
+  MID id;
+  Type type;
+  uint loc;
+  this(Type type, MID id, uint loc)
+  {
+    this.id = id;
+    this.type = type;
+    this.loc = loc;
+  }
+}
+
 class Lexer
 {
   Token token;
@@ -91,6 +126,8 @@
 
   uint loc = 1; /// line of code
 
+  Problem[] errors;
+
   this(char[] text)
   {
     this.text = text;
@@ -152,12 +189,12 @@
         c = *++p;
         switch(c)
         {
-          case '=':
+        case '=':
           ++p;
           t.type = TOK.DivisionAssign;
           t.end = p;
           return;
-          case '+':
+        case '+':
           uint level = 1;
           do
           {
@@ -180,7 +217,7 @@
           t.type = TOK.Comment;
           t.end = p;
           return;
-          case '*':
+        case '*':
           do
           {
             c = *++p;
@@ -191,7 +228,7 @@
           t.type = TOK.Comment;
           t.end = p;
           return;
-          case '/':
+        case '/':
           do
           {
             c = *++p;
@@ -220,19 +257,7 @@
       }
 
       if (c == '\'')
-      {
-        do {
-          c = *++p;
-          if (c == 0)
-            throw new Error("unterminated character literal.");
-          if (c == '\\')
-            ++p;
-        } while (c != '\'')
-        ++p;
-        t.type = TOK.Character;
-        t.end = p;
-        return;
-      }
+        return scanCharacterLiteral(t);
 
       if (c & 128 && isUniAlpha(decodeUTF()))
         goto Lidentifier;
@@ -240,6 +265,41 @@
     }
   }
 
+  void scanCharacterLiteral(ref Token t)
+  {
+    assert(*p == '\'');
+    MID id = MID.UnterminatedCharacterLiteral;
+    uint c = *++p;
+    switch(c)
+    {
+    case '\\':
+      ++p;
+      if (*p != '\'')
+        goto Lerr;
+      break;
+    case 0, 161, '\n', '\r':
+      goto Lerr;
+    case '\'':
+      id = MID.EmptyCharacterLiteral;
+      goto Lerr;
+    default:
+      if (c & 128)
+      {
+        c = decodeUTF();
+        if (c == LSd || c == PSd)
+          goto Lerr;
+        t.chr = c;
+      }
+    }
+
+    if (*p != '\'')
+    Lerr:
+      error(id);
+    ++p;
+    t.type = TOK.Character;
+    t.end = p;
+  }
+
   void scanNumber(ref Token t)
   {
     while (isdigit(*++p)) {}
@@ -257,6 +317,11 @@
     return d;
   }
 
+  void error(MID id)
+  {
+    errors ~= new Problem(Problem.Type.Lexer, id, loc);
+  }
+
   public TOK nextToken()
   {
     scan(this.token);
author	aziz
date	Sat, 23 Jun 2007 21:48:03 +0000
parents	9bd0bac79479
children	f85832f9f24e