projects/dil: trunk/src/Lexer.d comparison

comparison trunk/src/Lexer.d @ 18:c48d2125f1e2

- Moved code for scanning character literals to separate function. - Added class Problem and Messages table.

author	aziz
date	Sat, 23 Jun 2007 21:48:03 +0000
parents	9bd0bac79479
children	f85832f9f24e

comparison

equal deleted inserted replaced

-:9bd0bac79479
+:c48d2125f1e2
 }
 const char[3] LS = \u2028;
 const char[3] PS = \u2029;
+const dchar LSd = 0x2028;
+const dchar PSd = 0x2029;
+/// Index into table of error messages.
+enum MID
+{
+UnterminatedCharacterLiteral,
+EmptyCharacterLiteral
+}
+string[] Messages = [
+"unterminated character literal."
+"empty character literal."
+];
+class Problem
+{
+enum Type
+{
+Lexer,
+Parser,
+Semantic
+}
+MID id;
+Type type;
+uint loc;
+this(Type type, MID id, uint loc)
+{
+this.id = id;
+this.type = type;
+this.loc = loc;
+}
+}
 class Lexer
 {
 Token token;
 char[] text;
 char* p;
 char* end;
 uint loc = 1; /// line of code
+Problem[] errors;
 this(char[] text)
 {
 this.text = text;
 this.text.length = this.text.length + 1;
 if (c == '/')
 {
 c = *++p;
 switch(c)
 {
 case '=':
 ++p;
 t.type = TOK.DivisionAssign;
 t.end = p;
 return;
 case '+':
 uint level = 1;
 do
 {
 c = *++p;
 if (c == 0)
 } while (1)
 p += 2;
 t.type = TOK.Comment;
 t.end = p;
 return;
 case '*':
 do
 {
 c = *++p;
 if (c == 0)
 throw new Error("unterminated /* */ comment.");
 } while (c != '*' || p[1] != '/')
 p += 2;
 t.type = TOK.Comment;
 t.end = p;
 return;
 case '/':
 do
 {
 c = *++p;
 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
 break;
 t.end = p;
 return;
 }
 if (c == '\'')
-{
+return scanCharacterLiteral(t);
-do {
-c = *++p;
-if (c == 0)
-throw new Error("unterminated character literal.");
-if (c == '\\')
-++p;
-} while (c != '\'')
-++p;
-t.type = TOK.Character;
-t.end = p;
-return;
-}
 if (c & 128 && isUniAlpha(decodeUTF()))
 goto Lidentifier;
 c = *++p;
 }
+}
+void scanCharacterLiteral(ref Token t)
+{
+assert(*p == '\'');
+MID id = MID.UnterminatedCharacterLiteral;
+uint c = *++p;
+switch(c)
+{
+case '\\':
+++p;
+if (*p != '\'')
+goto Lerr;
+break;
+case 0, 161, '\n', '\r':
+goto Lerr;
+case '\'':
+id = MID.EmptyCharacterLiteral;
+goto Lerr;
+default:
+if (c & 128)
+{
+c = decodeUTF();
+if (c == LSd || c == PSd)
+goto Lerr;
+t.chr = c;
+}
+}
+if (*p != '\'')
+Lerr:
+error(id);
+++p;
+t.type = TOK.Character;
+t.end = p;
 }
 void scanNumber(ref Token t)
 {
 while (isdigit(*++p)) {}
 d = std.utf.decode(p[0 .. end-p], idx);
 p += idx -1;
 return d;
 }
+void error(MID id)
+{
+errors ~= new Problem(Problem.Type.Lexer, id, loc);
+}
 public TOK nextToken()
 {
 scan(this.token);
 return this.token.type;
 }

Mercurial > projects > dil

comparison trunk/src/Lexer.d @ 18:c48d2125f1e2