# HG changeset patch # User aziz # Date 1182592920 0 # Node ID 0989206cf73cdef51cbac42d4473ffe08779d646 # Parent dffcdaa7c47a211efe2cefb23f4cad340bb4b9a9 - Added code to decode Unicode characters in identifiers. diff -r dffcdaa7c47a -r 0989206cf73c trunk/src/Lexer.d --- a/trunk/src/Lexer.d Sat Jun 23 08:54:00 2007 +0000 +++ b/trunk/src/Lexer.d Sat Jun 23 10:02:00 2007 +0000 @@ -103,9 +103,10 @@ if (isident(c) && !isdigit(c)) { + Lidentifier: do { c = *++p; } - while (isident(c)) + while (isident(c) || c & 128 && isUniAlpha(decodeUTF())) t.type = TOK.Identifier; t.end = p; return; @@ -195,10 +196,23 @@ t.end = p; return; } + + if (c & 128 && isUniAlpha(decodeUTF())) + goto Lidentifier; c = *++p; } } + uint decodeUTF() + { + assert(*p & 128); + size_t idx; + uint d; + d = std.utf.decode(p[0 .. end-p], idx); + p += idx -1; + return d; + } + public TOK nextToken() { scan(this.token);