changeset 1614:dbf7b54f542f

Merge DMD r292: bugzilla 3455 Some Unicode characters not allowed... bugzilla 3455 Some Unicode characters not allowed in identifiers. --- dmd/lexer.c | 34 ++++++++++++++++++++++------------ 1 files changed, 22 insertions(+), 12 deletions(-)
author Leandro Lucarella <llucax@gmail.com>
date Wed, 06 Jan 2010 15:18:21 -0300
parents 8f50a13d09a0
children 3da302cc4966
files dmd/lexer.c
diffstat 1 files changed, 23 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/dmd/lexer.c	Wed Jan 06 15:18:21 2010 -0300
+++ b/dmd/lexer.c	Wed Jan 06 15:18:21 2010 -0300
@@ -634,15 +634,25 @@
 	    case '_':
 	    case_ident:
 	    {   unsigned char c;
-		StringValue *sv;
-		Identifier *id;
-
-		do
+
+		while (1)
 		{
 		    c = *++p;
-		} while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
-		sv = stringtable.update((char *)t->ptr, p - t->ptr);
-		id = (Identifier *) sv->ptrvalue;
+		    if (isidchar(c))
+			continue;
+		    else if (c & 0x80)
+		    {	unsigned char *s = p;
+			unsigned u = decodeUTF();
+			if (isUniAlpha(u))
+			    continue;
+			error("char 0x%04x not allowed in identifier", u);
+			p = s;
+		    }
+		    break;
+		}
+
+		StringValue *sv = stringtable.update((char *)t->ptr, p - t->ptr);
+		Identifier *id = (Identifier *) sv->ptrvalue;
 		if (!id)
 		{   id = new Identifier(sv->lstring.string,TOKidentifier);
 		    sv->ptrvalue = id;
@@ -1177,23 +1187,23 @@
 		continue;
 
 	    default:
-	    {	unsigned char c = *p;
+	    {	unsigned c = *p;
 
 		if (c & 0x80)
-		{   unsigned u = decodeUTF();
+		{   c = decodeUTF();
 
 		    // Check for start of unicode identifier
-		    if (isUniAlpha(u))
+		    if (isUniAlpha(c))
 			goto case_ident;
 
-		    if (u == PS || u == LS)
+		    if (c == PS || c == LS)
 		    {
 			loc.linnum++;
 			p++;
 			continue;
 		    }
 		}
-		if (isprint(c))
+		if (c < 0x80 && isprint(c))
 		    error("unsupported char '%c'", c);
 		else
 		    error("unsupported char 0x%02x", c);
@@ -1455,7 +1465,7 @@
 		    if (u == PS || u == LS)
 			loc.linnum++;
 		    else
-			error("non-hex character \\u%x", u);
+			error("non-hex character \\u%04x", u);
 		}
 		else
 		    error("non-hex character '%c'", c);