# HG changeset patch
# User aziz
# Date 1182592920 0
# Node ID 0989206cf73cdef51cbac42d4473ffe08779d646
# Parent  dffcdaa7c47a211efe2cefb23f4cad340bb4b9a9
- Added code to decode Unicode characters in identifiers.

diff -r dffcdaa7c47a -r 0989206cf73c trunk/src/Lexer.d
--- a/trunk/src/Lexer.d	Sat Jun 23 08:54:00 2007 +0000
+++ b/trunk/src/Lexer.d	Sat Jun 23 10:02:00 2007 +0000
@@ -103,9 +103,10 @@
 
       if (isident(c) && !isdigit(c))
       {
+      Lidentifier:
         do
         { c = *++p; }
-        while (isident(c))
+        while (isident(c) || c & 128 && isUniAlpha(decodeUTF()))
         t.type = TOK.Identifier;
         t.end = p;
         return;
@@ -195,10 +196,23 @@
         t.end = p;
         return;
       }
+
+      if (c & 128 && isUniAlpha(decodeUTF()))
+        goto Lidentifier;
       c = *++p;
     }
   }
 
+  uint decodeUTF()
+  {
+    assert(*p & 128);
+    size_t idx;
+    uint d;
+    d = std.utf.decode(p[0 .. end-p], idx);
+    p += idx -1;
+    return d;
+  }
+
   public TOK nextToken()
   {
     scan(this.token);