Mercurial > projects > dil
annotate trunk/src/Lexer.d @ 23:1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
author | aziz |
---|---|
date | Sun, 24 Jun 2007 11:13:01 +0000 |
parents | b05fff8e2ce4 |
children | 903f91163f23 |
rev | line source |
---|---|
0 | 1 /++ |
2 Author: Aziz Köksal | |
3 License: GPL2 | |
4 +/ | |
5 module Lexer; | |
3 | 6 import Token; |
2 | 7 import std.stdio; |
4 | 8 import std.utf; |
9 import std.uni; | |
0 | 10 |
11 /// ASCII character properties table. | |
2 | 12 static const int ptable[256] = [ |
16 | 13 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0, |
13 | 14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
16 | 15 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
13 | 16 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0, |
17 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
18 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0,16, | |
19 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
20 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, | |
21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
2 | 29 ]; |
0 | 30 |
31 enum CProperty | |
32 { | |
1 | 33 Octal = 1, |
0 | 34 Digit = 1<<1, |
35 Hex = 1<<2, | |
36 Alpha = 1<<3, | |
13 | 37 Underscore = 1<<4, |
16 | 38 Whitespace = 1<<5 |
0 | 39 } |
40 | |
13 | 41 private alias CProperty CP; |
42 int isoctal(char c) { return ptable[c] & CP.Octal; } | |
43 int isdigit(char c) { return ptable[c] & CP.Digit; } | |
44 int ishexad(char c) { return ptable[c] & CP.Hex; } | |
45 int isalpha(char c) { return ptable[c] & CP.Alpha; } | |
46 int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); } | |
47 int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); } | |
48 int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); } | |
16 | 49 int isspace(char c) { return ptable[c] & CP.Whitespace; } |
13 | 50 |
51 version(gen_ptable) | |
0 | 52 static this() |
53 { | |
2 | 54 // Initialize character properties table. |
0 | 55 for (int i; i < ptable.length; ++i) |
56 { | |
13 | 57 ptable[i] = 0; |
0 | 58 if ('0' <= i && i <= '7') |
13 | 59 ptable[i] |= CP.Octal; |
0 | 60 if ('0' <= i && i <= '9') |
13 | 61 ptable[i] |= CP.Digit; |
0 | 62 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F') |
13 | 63 ptable[i] |= CP.Hex; |
0 | 64 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z') |
13 | 65 ptable[i] |= CP.Alpha; |
66 if (i == '_') | |
67 ptable[i] |= CP.Underscore; | |
16 | 68 if (i == ' ' || i == '\t' || i == '\v'|| i == '\f') |
69 ptable[i] |= CP.Whitespace; | |
0 | 70 } |
2 | 71 // Print a formatted array literal. |
72 char[] array = "[\n"; | |
73 for (int i; i < ptable.length; ++i) | |
74 { | |
13 | 75 int c = ptable[i]; |
76 array ~= std.string.format("%2d,", c, ((i+1) % 16) ? "":"\n"); | |
2 | 77 } |
13 | 78 array[$-2..$] = "\n]"; |
2 | 79 writefln(array); |
0 | 80 } |
81 | |
11 | 82 const char[3] LS = \u2028; |
83 const char[3] PS = \u2029; | |
84 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
85 const dchar LSd = 0x2028; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
86 const dchar PSd = 0x2029; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
87 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
88 /// Index into table of error messages. |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
89 enum MID |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
90 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
91 UnterminatedCharacterLiteral, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
92 EmptyCharacterLiteral |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
93 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
94 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
95 string[] Messages = [ |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
96 "unterminated character literal." |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
97 "empty character literal." |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
98 ]; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
99 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
100 class Problem |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
101 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
102 enum Type |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
103 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
104 Lexer, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
105 Parser, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
106 Semantic |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
107 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
108 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
109 MID id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
110 Type type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
111 uint loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
112 this(Type type, MID id, uint loc) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
113 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
114 this.id = id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
115 this.type = type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
116 this.loc = loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
117 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
118 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
119 |
0 | 120 class Lexer |
121 { | |
4 | 122 Token token; |
123 char[] text; | |
124 char* p; | |
125 char* end; | |
126 | |
17 | 127 uint loc = 1; /// line of code |
128 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
129 Problem[] errors; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
130 |
4 | 131 this(char[] text) |
132 { | |
133 this.text = text; | |
134 this.text.length = this.text.length + 1; | |
135 this.text[$-1] = 0; | |
136 | |
137 this.p = this.text.ptr; | |
138 this.end = this.p + this.text.length; | |
139 } | |
140 | |
141 public void scan(out Token t) | |
3 | 142 { |
4 | 143 assert(p < end); |
0 | 144 |
10 | 145 uint c = *p; |
4 | 146 |
5 | 147 while(1) |
4 | 148 { |
5 | 149 t.start = p; |
16 | 150 |
5 | 151 if (c == 0) |
152 { | |
17 | 153 ++p; |
5 | 154 t.type = TOK.EOF; |
17 | 155 t.end = p; |
5 | 156 return; |
157 } | |
4 | 158 |
17 | 159 if (c == '\n') |
160 { | |
161 c = *++p; | |
162 ++loc; | |
163 continue; | |
164 } | |
165 else if (c == '\r') | |
166 { | |
167 c = *++p; | |
168 if (c != '\n') | |
169 ++loc; | |
170 continue; | |
171 } | |
172 | |
13 | 173 if (isidbeg(c)) |
5 | 174 { |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
175 Lidentifier: |
5 | 176 do |
177 { c = *++p; } | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
178 while (isident(c) || c & 128 && isUniAlpha(decodeUTF())) |
5 | 179 t.type = TOK.Identifier; |
180 t.end = p; | |
181 return; | |
182 } | |
183 | |
15 | 184 if (isdigit(c)) |
185 return scanNumber(t); | |
186 | |
8 | 187 if (c == '/') |
5 | 188 { |
8 | 189 c = *++p; |
14 | 190 switch(c) |
5 | 191 { |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
192 case '=': |
14 | 193 ++p; |
194 t.type = TOK.DivisionAssign; | |
195 t.end = p; | |
196 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
197 case '+': |
8 | 198 uint level = 1; |
199 do | |
7 | 200 { |
8 | 201 c = *++p; |
202 if (c == 0) | |
203 throw new Error("unterminated /+ +/ comment."); | |
204 else if (c == '/' && p[1] == '+') | |
205 { | |
206 ++p; | |
207 ++level; | |
208 } | |
209 else if (c == '+' && p[1] == '/') | |
210 { | |
211 ++p; | |
212 if (--level == 0) | |
213 break; | |
214 } | |
215 } while (1) | |
216 p += 2; | |
217 t.type = TOK.Comment; | |
218 t.end = p; | |
219 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
220 case '*': |
8 | 221 do |
7 | 222 { |
8 | 223 c = *++p; |
224 if (c == 0) | |
225 throw new Error("unterminated /* */ comment."); | |
226 } while (c != '*' || p[1] != '/') | |
227 p += 2; | |
228 t.type = TOK.Comment; | |
229 t.end = p; | |
230 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
231 case '/': |
10 | 232 do |
233 { | |
234 c = *++p; | |
11 | 235 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) |
236 break; | |
10 | 237 } while (c != '\n' && c != 0) |
238 t.type = TOK.Comment; | |
239 t.end = p; | |
240 return; | |
241 } | |
5 | 242 } |
243 | |
9
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
244 if (c == '"') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
245 { |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
246 do { |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
247 c = *++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
248 if (c == 0) |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
249 throw new Error("unterminated string literal."); |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
250 if (c == '\\') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
251 ++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
252 } while (c != '"') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
253 ++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
254 t.type = TOK.String; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
255 t.end = p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
256 return; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
257 } |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
258 |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
259 if (c == '\'') |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
260 return scanCharacterLiteral(t); |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
261 |
20 | 262 switch(c) |
263 { | |
22
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
264 case '.': |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
265 if (p[1] == '.') |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
266 { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
267 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
268 if (p[1] == '.') { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
269 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
270 t.type = TOK.Ellipses; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
271 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
272 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
273 t.type = TOK.Slice; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
274 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
275 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
276 t.type = TOK.Dot; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
277 goto Lcommon; |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
278 case '|': |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
279 c = *++p; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
280 if (c == '=') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
281 t.type = TOK.OrAssign; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
282 else if (c == '|') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
283 t.type = TOK.OrLogical; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
284 else |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
285 t.type = TOK.OrBinary; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
286 goto Lcommon; |
20 | 287 case '(': |
288 t.type = TOK.LParen; | |
289 goto Lcommon; | |
290 case ')': | |
291 t.type = TOK.RParen; | |
292 goto Lcommon; | |
293 case '[': | |
294 t.type = TOK.LBracket; | |
295 goto Lcommon; | |
296 case ']': | |
297 t.type = TOK.RBracket; | |
298 goto Lcommon; | |
299 case '{': | |
300 t.type = TOK.LBrace; | |
301 goto Lcommon; | |
302 case '}': | |
303 t.type = TOK.RBrace; | |
21
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
304 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
305 case ':': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
306 t.type = TOK.Colon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
307 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
308 case ';': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
309 t.type = TOK.Semicolon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
310 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
311 case '?': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
312 t.type = TOK.Question; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
313 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
314 case ',': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
315 t.type = TOK.Comma; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
316 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
317 case '$': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
318 t.type = TOK.Dollar; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
319 goto Lcommon; |
20 | 320 Lcommon: |
321 ++p; | |
322 t.end = p; | |
323 return; | |
324 default: | |
325 } | |
326 | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
327 if (c & 128 && isUniAlpha(decodeUTF())) |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
328 goto Lidentifier; |
5 | 329 c = *++p; |
4 | 330 } |
3 | 331 } |
4 | 332 |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
333 void scanCharacterLiteral(ref Token t) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
334 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
335 assert(*p == '\''); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
336 MID id = MID.UnterminatedCharacterLiteral; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
337 uint c = *++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
338 switch(c) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
339 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
340 case '\\': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
341 ++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
342 break; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
343 case 0, 161, '\n', '\r': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
344 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
345 case '\'': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
346 id = MID.EmptyCharacterLiteral; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
347 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
348 default: |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
349 if (c & 128) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
350 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
351 c = decodeUTF(); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
352 if (c == LSd || c == PSd) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
353 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
354 t.chr = c; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
355 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
356 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
357 |
19 | 358 ++p; |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
359 if (*p != '\'') |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
360 Lerr: |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
361 error(id); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
362 ++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
363 t.type = TOK.Character; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
364 t.end = p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
365 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
366 |
15 | 367 void scanNumber(ref Token t) |
368 { | |
369 while (isdigit(*++p)) {} | |
370 t.type = TOK.Number; | |
371 t.end = p; | |
372 } | |
373 | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
374 uint decodeUTF() |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
375 { |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
376 assert(*p & 128); |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
377 size_t idx; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
378 uint d; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
379 d = std.utf.decode(p[0 .. end-p], idx); |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
380 p += idx -1; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
381 return d; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
382 } |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
383 |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
384 void error(MID id) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
385 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
386 errors ~= new Problem(Problem.Type.Lexer, id, loc); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
387 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
388 |
3 | 389 public TOK nextToken() |
390 { | |
4 | 391 scan(this.token); |
392 return this.token.type; | |
393 } | |
394 | |
395 Token[] getTokens() | |
396 { | |
397 Token[] tokens; | |
398 while (nextToken() != TOK.EOF) | |
399 tokens ~= this.token; | |
400 tokens ~= this.token; | |
401 return tokens; | |
3 | 402 } |
5 | 403 } |