changeset 12:0989206cf73c

- Added code to decode Unicode characters in identifiers.
author aziz
date Sat, 23 Jun 2007 10:02:00 +0000
parents dffcdaa7c47a
children e5211758b63c
files trunk/src/Lexer.d
diffstat 1 files changed, 15 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/Lexer.d	Sat Jun 23 08:54:00 2007 +0000
+++ b/trunk/src/Lexer.d	Sat Jun 23 10:02:00 2007 +0000
@@ -103,9 +103,10 @@
 
       if (isident(c) && !isdigit(c))
       {
+      Lidentifier:
         do
         { c = *++p; }
-        while (isident(c))
+        while (isident(c) || c & 128 && isUniAlpha(decodeUTF()))
         t.type = TOK.Identifier;
         t.end = p;
         return;
@@ -195,10 +196,23 @@
         t.end = p;
         return;
       }
+
+      if (c & 128 && isUniAlpha(decodeUTF()))
+        goto Lidentifier;
       c = *++p;
     }
   }
 
+  uint decodeUTF()
+  {
+    assert(*p & 128);
+    size_t idx;
+    uint d;
+    d = std.utf.decode(p[0 .. end-p], idx);
+    p += idx -1;
+    return d;
+  }
+
   public TOK nextToken()
   {
     scan(this.token);