Mercurial > projects > dil
annotate trunk/src/Lexer.d @ 28:3a9daccf7d96
- Added table for identifiers to Lexer.
- Added keywords table.
- Added keywords to TOK.
author | aziz |
---|---|
date | Sun, 24 Jun 2007 17:19:03 +0000 |
parents | 43b6bf56f0e9 |
children | ef83eea26bbd |
rev | line source |
---|---|
0 | 1 /++ |
2 Author: Aziz Köksal | |
3 License: GPL2 | |
4 +/ | |
5 module Lexer; | |
3 | 6 import Token; |
28 | 7 import Keywords; |
8 import Identifier; | |
2 | 9 import std.stdio; |
4 | 10 import std.utf; |
11 import std.uni; | |
0 | 12 |
13 /// ASCII character properties table. | |
2 | 14 static const int ptable[256] = [ |
16 | 15 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0, |
13 | 16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
16 | 17 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
13 | 18 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0, |
19 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
20 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0,16, | |
21 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
22 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, | |
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
2 | 31 ]; |
0 | 32 |
33 enum CProperty | |
34 { | |
1 | 35 Octal = 1, |
0 | 36 Digit = 1<<1, |
37 Hex = 1<<2, | |
38 Alpha = 1<<3, | |
13 | 39 Underscore = 1<<4, |
16 | 40 Whitespace = 1<<5 |
0 | 41 } |
42 | |
13 | 43 private alias CProperty CP; |
44 int isoctal(char c) { return ptable[c] & CP.Octal; } | |
45 int isdigit(char c) { return ptable[c] & CP.Digit; } | |
46 int ishexad(char c) { return ptable[c] & CP.Hex; } | |
47 int isalpha(char c) { return ptable[c] & CP.Alpha; } | |
48 int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); } | |
49 int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); } | |
50 int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); } | |
16 | 51 int isspace(char c) { return ptable[c] & CP.Whitespace; } |
13 | 52 |
53 version(gen_ptable) | |
0 | 54 static this() |
55 { | |
2 | 56 // Initialize character properties table. |
0 | 57 for (int i; i < ptable.length; ++i) |
58 { | |
13 | 59 ptable[i] = 0; |
0 | 60 if ('0' <= i && i <= '7') |
13 | 61 ptable[i] |= CP.Octal; |
0 | 62 if ('0' <= i && i <= '9') |
13 | 63 ptable[i] |= CP.Digit; |
0 | 64 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F') |
13 | 65 ptable[i] |= CP.Hex; |
0 | 66 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z') |
13 | 67 ptable[i] |= CP.Alpha; |
68 if (i == '_') | |
69 ptable[i] |= CP.Underscore; | |
16 | 70 if (i == ' ' || i == '\t' || i == '\v'|| i == '\f') |
71 ptable[i] |= CP.Whitespace; | |
0 | 72 } |
2 | 73 // Print a formatted array literal. |
74 char[] array = "[\n"; | |
75 for (int i; i < ptable.length; ++i) | |
76 { | |
13 | 77 int c = ptable[i]; |
78 array ~= std.string.format("%2d,", c, ((i+1) % 16) ? "":"\n"); | |
2 | 79 } |
13 | 80 array[$-2..$] = "\n]"; |
2 | 81 writefln(array); |
0 | 82 } |
83 | |
11 | 84 const char[3] LS = \u2028; |
85 const char[3] PS = \u2029; | |
86 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
87 const dchar LSd = 0x2028; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
88 const dchar PSd = 0x2029; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
89 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
90 /// Index into table of error messages. |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
91 enum MID |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
92 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
93 UnterminatedCharacterLiteral, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
94 EmptyCharacterLiteral |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
95 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
96 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
97 string[] Messages = [ |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
98 "unterminated character literal." |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
99 "empty character literal." |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
100 ]; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
101 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
102 class Problem |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
103 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
104 enum Type |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
105 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
106 Lexer, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
107 Parser, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
108 Semantic |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
109 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
110 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
111 MID id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
112 Type type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
113 uint loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
114 this(Type type, MID id, uint loc) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
115 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
116 this.id = id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
117 this.type = type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
118 this.loc = loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
119 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
120 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
121 |
0 | 122 class Lexer |
123 { | |
4 | 124 Token token; |
125 char[] text; | |
126 char* p; | |
127 char* end; | |
128 | |
17 | 129 uint loc = 1; /// line of code |
130 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
131 Problem[] errors; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
132 |
28 | 133 Identifier[string] idtable; |
134 | |
4 | 135 this(char[] text) |
136 { | |
137 this.text = text; | |
138 this.text.length = this.text.length + 1; | |
139 this.text[$-1] = 0; | |
140 | |
141 this.p = this.text.ptr; | |
142 this.end = this.p + this.text.length; | |
28 | 143 |
144 loadKeywords(); | |
4 | 145 } |
146 | |
147 public void scan(out Token t) | |
3 | 148 { |
4 | 149 assert(p < end); |
0 | 150 |
10 | 151 uint c = *p; |
4 | 152 |
5 | 153 while(1) |
4 | 154 { |
5 | 155 t.start = p; |
16 | 156 |
5 | 157 if (c == 0) |
158 { | |
17 | 159 ++p; |
5 | 160 t.type = TOK.EOF; |
17 | 161 t.end = p; |
5 | 162 return; |
163 } | |
4 | 164 |
17 | 165 if (c == '\n') |
166 { | |
167 c = *++p; | |
168 ++loc; | |
169 continue; | |
170 } | |
171 else if (c == '\r') | |
172 { | |
173 c = *++p; | |
174 if (c != '\n') | |
175 ++loc; | |
176 continue; | |
177 } | |
178 | |
13 | 179 if (isidbeg(c)) |
5 | 180 { |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
181 Lidentifier: |
5 | 182 do |
183 { c = *++p; } | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
184 while (isident(c) || c & 128 && isUniAlpha(decodeUTF())) |
28 | 185 |
5 | 186 t.end = p; |
28 | 187 |
188 string str = t.span; | |
189 Identifier* id = str in idtable; | |
190 | |
191 if (!id) | |
192 { | |
193 idtable[str] = Identifier.Identifier(TOK.Identifier, str); | |
194 id = str in idtable; | |
195 } | |
196 assert(id); | |
197 t.type = id.type; | |
5 | 198 return; |
199 } | |
200 | |
15 | 201 if (isdigit(c)) |
202 return scanNumber(t); | |
203 | |
8 | 204 if (c == '/') |
5 | 205 { |
8 | 206 c = *++p; |
14 | 207 switch(c) |
5 | 208 { |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
209 case '=': |
14 | 210 ++p; |
211 t.type = TOK.DivisionAssign; | |
212 t.end = p; | |
213 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
214 case '+': |
8 | 215 uint level = 1; |
216 do | |
7 | 217 { |
8 | 218 c = *++p; |
219 if (c == 0) | |
220 throw new Error("unterminated /+ +/ comment."); | |
221 else if (c == '/' && p[1] == '+') | |
222 { | |
223 ++p; | |
224 ++level; | |
225 } | |
226 else if (c == '+' && p[1] == '/') | |
227 { | |
228 ++p; | |
229 if (--level == 0) | |
230 break; | |
231 } | |
232 } while (1) | |
233 p += 2; | |
234 t.type = TOK.Comment; | |
235 t.end = p; | |
236 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
237 case '*': |
8 | 238 do |
7 | 239 { |
8 | 240 c = *++p; |
241 if (c == 0) | |
242 throw new Error("unterminated /* */ comment."); | |
243 } while (c != '*' || p[1] != '/') | |
244 p += 2; | |
245 t.type = TOK.Comment; | |
246 t.end = p; | |
247 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
248 case '/': |
10 | 249 do |
250 { | |
251 c = *++p; | |
11 | 252 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) |
253 break; | |
10 | 254 } while (c != '\n' && c != 0) |
255 t.type = TOK.Comment; | |
256 t.end = p; | |
257 return; | |
258 } | |
5 | 259 } |
260 | |
9
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
261 if (c == '"') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
262 { |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
263 do { |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
264 c = *++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
265 if (c == 0) |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
266 throw new Error("unterminated string literal."); |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
267 if (c == '\\') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
268 ++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
269 } while (c != '"') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
270 ++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
271 t.type = TOK.String; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
272 t.end = p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
273 return; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
274 } |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
275 |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
276 if (c == '\'') |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
277 return scanCharacterLiteral(t); |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
278 |
20 | 279 switch(c) |
280 { | |
22
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
281 case '.': |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
282 if (p[1] == '.') |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
283 { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
284 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
285 if (p[1] == '.') { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
286 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
287 t.type = TOK.Ellipses; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
288 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
289 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
290 t.type = TOK.Slice; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
291 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
292 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
293 t.type = TOK.Dot; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
294 goto Lcommon; |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
295 case '|': |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
296 c = *++p; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
297 if (c == '=') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
298 t.type = TOK.OrAssign; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
299 else if (c == '|') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
300 t.type = TOK.OrLogical; |
26 | 301 else { |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
302 t.type = TOK.OrBinary; |
26 | 303 goto Lcommon2; |
304 } | |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
305 goto Lcommon; |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
306 case '&': |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
307 c = *++p; |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
308 if (c == '=') |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
309 t.type = TOK.AndAssign; |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
310 else if (c == '&') |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
311 t.type = TOK.AndLogical; |
26 | 312 else { |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
313 t.type = TOK.AndBinary; |
26 | 314 goto Lcommon2; |
315 } | |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
316 goto Lcommon; |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
317 case '+': |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
318 c = *++p; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
319 if (c == '=') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
320 t.type = TOK.PlusAssign; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
321 else if (c == '+') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
322 t.type = TOK.PlusPlus; |
26 | 323 else { |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
324 t.type = TOK.Plus; |
26 | 325 goto Lcommon2; |
326 } | |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
327 goto Lcommon; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
328 case '-': |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
329 c = *++p; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
330 if (c == '=') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
331 t.type = TOK.MinusAssign; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
332 else if (c == '-') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
333 t.type = TOK.MinusMinus; |
26 | 334 else { |
335 t.type = TOK.Minus; | |
336 goto Lcommon2; | |
337 } | |
338 goto Lcommon; | |
339 case '=': | |
340 if (p[1] == '=') { | |
341 ++p; | |
342 t.type = TOK.Equal; | |
343 } | |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
344 else |
26 | 345 t.type = TOK.Assign; |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
346 goto Lcommon; |
27 | 347 case '~': |
348 if (p[1] == '=') { | |
349 ++p; | |
350 t.type = TOK.CatAssign; | |
351 } | |
352 else | |
353 t.type = TOK.Tilde; | |
354 goto Lcommon; | |
355 // Single character tokens | |
20 | 356 case '(': |
357 t.type = TOK.LParen; | |
358 goto Lcommon; | |
359 case ')': | |
360 t.type = TOK.RParen; | |
361 goto Lcommon; | |
362 case '[': | |
363 t.type = TOK.LBracket; | |
364 goto Lcommon; | |
365 case ']': | |
366 t.type = TOK.RBracket; | |
367 goto Lcommon; | |
368 case '{': | |
369 t.type = TOK.LBrace; | |
370 goto Lcommon; | |
371 case '}': | |
372 t.type = TOK.RBrace; | |
21
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
373 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
374 case ':': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
375 t.type = TOK.Colon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
376 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
377 case ';': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
378 t.type = TOK.Semicolon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
379 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
380 case '?': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
381 t.type = TOK.Question; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
382 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
383 case ',': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
384 t.type = TOK.Comma; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
385 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
386 case '$': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
387 t.type = TOK.Dollar; |
20 | 388 Lcommon: |
389 ++p; | |
26 | 390 Lcommon2: |
20 | 391 t.end = p; |
392 return; | |
393 default: | |
394 } | |
395 | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
396 if (c & 128 && isUniAlpha(decodeUTF())) |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
397 goto Lidentifier; |
5 | 398 c = *++p; |
4 | 399 } |
3 | 400 } |
4 | 401 |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
402 void scanCharacterLiteral(ref Token t) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
403 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
404 assert(*p == '\''); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
405 MID id = MID.UnterminatedCharacterLiteral; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
406 uint c = *++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
407 switch(c) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
408 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
409 case '\\': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
410 ++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
411 break; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
412 case 0, 161, '\n', '\r': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
413 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
414 case '\'': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
415 id = MID.EmptyCharacterLiteral; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
416 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
417 default: |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
418 if (c & 128) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
419 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
420 c = decodeUTF(); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
421 if (c == LSd || c == PSd) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
422 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
423 t.chr = c; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
424 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
425 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
426 |
19 | 427 ++p; |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
428 if (*p != '\'') |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
429 Lerr: |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
430 error(id); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
431 ++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
432 t.type = TOK.Character; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
433 t.end = p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
434 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
435 |
15 | 436 void scanNumber(ref Token t) |
437 { | |
438 while (isdigit(*++p)) {} | |
439 t.type = TOK.Number; | |
440 t.end = p; | |
441 } | |
442 | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
443 uint decodeUTF() |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
444 { |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
445 assert(*p & 128); |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
446 size_t idx; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
447 uint d; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
448 d = std.utf.decode(p[0 .. end-p], idx); |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
449 p += idx -1; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
450 return d; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
451 } |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
452 |
28 | 453 void loadKeywords() |
454 { | |
455 foreach(k; keywords) | |
456 idtable[k.str] = k; | |
457 } | |
458 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
459 void error(MID id) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
460 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
461 errors ~= new Problem(Problem.Type.Lexer, id, loc); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
462 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
463 |
3 | 464 public TOK nextToken() |
465 { | |
4 | 466 scan(this.token); |
467 return this.token.type; | |
468 } | |
469 | |
470 Token[] getTokens() | |
471 { | |
472 Token[] tokens; | |
473 while (nextToken() != TOK.EOF) | |
474 tokens ~= this.token; | |
475 tokens ~= this.token; | |
476 return tokens; | |
3 | 477 } |
5 | 478 } |