annotate trunk/src/Lexer.d @ 12:0989206cf73c

- Added code to decode Unicode characters in identifiers.
author aziz
date Sat, 23 Jun 2007 10:02:00 +0000
parents dffcdaa7c47a
children e5211758b63c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
1 /++
8ba2570de175 Initial import.
aziz
parents:
diff changeset
2 Author: Aziz Köksal
8ba2570de175 Initial import.
aziz
parents:
diff changeset
3 License: GPL2
8ba2570de175 Initial import.
aziz
parents:
diff changeset
4 +/
8ba2570de175 Initial import.
aziz
parents:
diff changeset
5 module Lexer;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
6 import Token;
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
7 import std.stdio;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
8 import std.utf;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
9 import std.uni;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
10
8ba2570de175 Initial import.
aziz
parents:
diff changeset
11 /// ASCII character properties table.
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
12 static const int ptable[256] = [
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,23,23,23,23,23,23,23,23,22,22, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
15 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0,16,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
16 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
21 ];
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
22
8ba2570de175 Initial import.
aziz
parents:
diff changeset
23 enum CProperty
8ba2570de175 Initial import.
aziz
parents:
diff changeset
24 {
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
25 Octal = 1,
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
26 Digit = 1<<1,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
27 Hex = 1<<2,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
28 Alpha = 1<<3,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
29 Identifier = 1<<4
8ba2570de175 Initial import.
aziz
parents:
diff changeset
30 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
31
8ba2570de175 Initial import.
aziz
parents:
diff changeset
32 int isoctal(char c) { return ptable[c] & CProperty.Octal; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
33 int isdigit(char c) { return ptable[c] & CProperty.Digit; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
34 int ishexad(char c) { return ptable[c] & CProperty.Hex; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
35 int isalpha(char c) { return ptable[c] & CProperty.Alpha; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
36 int isalnum(char c) { return ptable[c] & (CProperty.Alpha | CProperty.Digit); }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
37 int isident(char c) { return ptable[c] & CProperty.Identifier; }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
38 /+
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
39 static this()
8ba2570de175 Initial import.
aziz
parents:
diff changeset
40 {
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
41 // Initialize character properties table.
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
42 for (int i; i < ptable.length; ++i)
8ba2570de175 Initial import.
aziz
parents:
diff changeset
43 {
8ba2570de175 Initial import.
aziz
parents:
diff changeset
44 if ('0' <= i && i <= '7')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
45 ptable[i] |= CProperty.Octal;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
46 if ('0' <= i && i <= '9')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
47 ptable[i] |= CProperty.Digit;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
48 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
49 ptable[i] |= CProperty.Hex;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
50 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
51 ptable[i] |= CProperty.Alpha;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
52 if (isalnum(i) || i == '_')
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
53 ptable[i] |= CProperty.Identifier;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
54 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
55 // Print a formatted array literal.
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
56 char[] array = "[\n";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
57 for (int i; i < ptable.length; ++i)
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
58 {
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
59 char c = ptable[i];
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
60 array ~= std.string.format("%2d,", c, ((i+1) % 32) ? "":"\n");
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
61 }
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
62 array.length = array.length - 2; // remove ",\n"
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
63 array ~= "\n]";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
64 writefln(array);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
65 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
66 +/
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
67
11
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
68 const char[3] LS = \u2028;
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
69 const char[3] PS = \u2029;
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
70
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
71 class Lexer
8ba2570de175 Initial import.
aziz
parents:
diff changeset
72 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
73 Token token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
74 char[] text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
75 char* p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
76 char* end;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
77
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
78 this(char[] text)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
79 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
80 this.text = text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
81 this.text.length = this.text.length + 1;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
82 this.text[$-1] = 0;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
83
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
84 this.p = this.text.ptr;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
85 this.end = this.p + this.text.length;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
86 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
87
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
88 public void scan(out Token t)
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
89 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
90 assert(p < end);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
91
10
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
92 uint c = *p;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
93
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
94 while(1)
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
95 {
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
96 t.start = p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
97 if (c == 0)
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
98 {
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
99 t.type = TOK.EOF;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
100 t.end = p+1;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
101 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
102 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
103
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
104 if (isident(c) && !isdigit(c))
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
105 {
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
106 Lidentifier:
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
107 do
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
108 { c = *++p; }
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
109 while (isident(c) || c & 128 && isUniAlpha(decodeUTF()))
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
110 t.type = TOK.Identifier;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
111 t.end = p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
112 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
113 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
114
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
115 if (c == '/')
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
116 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
117 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
118 if (c == '+')
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
119 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
120 uint level = 1;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
121 do
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
122 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
123 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
124 if (c == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
125 throw new Error("unterminated /+ +/ comment.");
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
126 else if (c == '/' && p[1] == '+')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
127 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
128 ++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
129 ++level;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
130 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
131 else if (c == '+' && p[1] == '/')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
132 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
133 ++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
134 if (--level == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
135 break;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
136 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
137 } while (1)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
138 p += 2;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
139 t.type = TOK.Comment;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
140 t.end = p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
141 return;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
142 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
143 else if (c == '*')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
144 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
145 do
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
146 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
147 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
148 if (c == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
149 throw new Error("unterminated /* */ comment.");
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
150 } while (c != '*' || p[1] != '/')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
151 p += 2;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
152 t.type = TOK.Comment;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
153 t.end = p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
154 return;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
155 }
10
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
156 else if (c == '/')
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
157 {
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
158 do
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
159 {
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
160 c = *++p;
11
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
161 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
162 break;
10
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
163 } while (c != '\n' && c != 0)
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
164 t.type = TOK.Comment;
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
165 t.end = p;
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
166 return;
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
167 }
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
168 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
169
9
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
170 if (c == '"')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
171 {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
172 do {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
173 c = *++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
174 if (c == 0)
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
175 throw new Error("unterminated string literal.");
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
176 if (c == '\\')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
177 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
178 } while (c != '"')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
179 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
180 t.type = TOK.String;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
181 t.end = p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
182 return;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
183 }
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
184
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
185 if (c == '\'')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
186 {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
187 do {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
188 c = *++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
189 if (c == 0)
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
190 throw new Error("unterminated character literal.");
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
191 if (c == '\\')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
192 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
193 } while (c != '\'')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
194 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
195 t.type = TOK.Character;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
196 t.end = p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
197 return;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
198 }
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
199
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
200 if (c & 128 && isUniAlpha(decodeUTF()))
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
201 goto Lidentifier;
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
202 c = *++p;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
203 }
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
204 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
205
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
206 uint decodeUTF()
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
207 {
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
208 assert(*p & 128);
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
209 size_t idx;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
210 uint d;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
211 d = std.utf.decode(p[0 .. end-p], idx);
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
212 p += idx -1;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
213 return d;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
214 }
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
215
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
216 public TOK nextToken()
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
217 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
218 scan(this.token);
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
219 return this.token.type;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
220 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
221
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
222 Token[] getTokens()
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
223 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
224 Token[] tokens;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
225 while (nextToken() != TOK.EOF)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
226 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
227 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
228 return tokens;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
229 }
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
230 }