Mercurial > projects > dil
changeset 12:0989206cf73c
- Added code to decode Unicode characters in identifiers.
author | aziz |
---|---|
date | Sat, 23 Jun 2007 10:02:00 +0000 |
parents | dffcdaa7c47a |
children | e5211758b63c |
files | trunk/src/Lexer.d |
diffstat | 1 files changed, 15 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/trunk/src/Lexer.d Sat Jun 23 08:54:00 2007 +0000 +++ b/trunk/src/Lexer.d Sat Jun 23 10:02:00 2007 +0000 @@ -103,9 +103,10 @@ if (isident(c) && !isdigit(c)) { + Lidentifier: do { c = *++p; } - while (isident(c)) + while (isident(c) || c & 128 && isUniAlpha(decodeUTF())) t.type = TOK.Identifier; t.end = p; return; @@ -195,10 +196,23 @@ t.end = p; return; } + + if (c & 128 && isUniAlpha(decodeUTF())) + goto Lidentifier; c = *++p; } } + uint decodeUTF() + { + assert(*p & 128); + size_t idx; + uint d; + d = std.utf.decode(p[0 .. end-p], idx); + p += idx -1; + return d; + } + public TOK nextToken() { scan(this.token);