Mercurial > projects > dil
annotate trunk/src/Lexer.d @ 31:94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
- Added code for parsing hex string literals.
- Added stub for parsing raw strings literals.
author | aziz |
---|---|
date | Mon, 25 Jun 2007 13:34:03 +0000 |
parents | 426767b94635 |
children | d7011daa4740 |
rev | line source |
---|---|
0 | 1 /++ |
2 Author: Aziz Köksal | |
3 License: GPL2 | |
4 +/ | |
5 module Lexer; | |
3 | 6 import Token; |
28 | 7 import Keywords; |
8 import Identifier; | |
2 | 9 import std.stdio; |
4 | 10 import std.utf; |
11 import std.uni; | |
30 | 12 import std.conv; |
0 | 13 |
14 /// ASCII character properties table. | |
2 | 15 static const int ptable[256] = [ |
16 | 16 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0, |
13 | 17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
16 | 18 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
13 | 19 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0, |
20 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
21 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0,16, | |
22 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
23 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, | |
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
2 | 32 ]; |
0 | 33 |
34 enum CProperty | |
35 { | |
1 | 36 Octal = 1, |
0 | 37 Digit = 1<<1, |
38 Hex = 1<<2, | |
39 Alpha = 1<<3, | |
13 | 40 Underscore = 1<<4, |
16 | 41 Whitespace = 1<<5 |
0 | 42 } |
43 | |
13 | 44 private alias CProperty CP; |
45 int isoctal(char c) { return ptable[c] & CP.Octal; } | |
46 int isdigit(char c) { return ptable[c] & CP.Digit; } | |
47 int ishexad(char c) { return ptable[c] & CP.Hex; } | |
48 int isalpha(char c) { return ptable[c] & CP.Alpha; } | |
49 int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); } | |
50 int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); } | |
51 int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); } | |
16 | 52 int isspace(char c) { return ptable[c] & CP.Whitespace; } |
13 | 53 |
54 version(gen_ptable) | |
0 | 55 static this() |
56 { | |
2 | 57 // Initialize character properties table. |
0 | 58 for (int i; i < ptable.length; ++i) |
59 { | |
13 | 60 ptable[i] = 0; |
0 | 61 if ('0' <= i && i <= '7') |
13 | 62 ptable[i] |= CP.Octal; |
0 | 63 if ('0' <= i && i <= '9') |
13 | 64 ptable[i] |= CP.Digit; |
0 | 65 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F') |
13 | 66 ptable[i] |= CP.Hex; |
0 | 67 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z') |
13 | 68 ptable[i] |= CP.Alpha; |
69 if (i == '_') | |
70 ptable[i] |= CP.Underscore; | |
16 | 71 if (i == ' ' || i == '\t' || i == '\v'|| i == '\f') |
72 ptable[i] |= CP.Whitespace; | |
0 | 73 } |
2 | 74 // Print a formatted array literal. |
75 char[] array = "[\n"; | |
76 for (int i; i < ptable.length; ++i) | |
77 { | |
13 | 78 int c = ptable[i]; |
79 array ~= std.string.format("%2d,", c, ((i+1) % 16) ? "":"\n"); | |
2 | 80 } |
13 | 81 array[$-2..$] = "\n]"; |
2 | 82 writefln(array); |
0 | 83 } |
84 | |
11 | 85 const char[3] LS = \u2028; |
86 const char[3] PS = \u2029; | |
87 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
88 const dchar LSd = 0x2028; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
89 const dchar PSd = 0x2029; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
90 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
91 /// Index into table of error messages. |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
92 enum MID |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
93 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
94 UnterminatedCharacterLiteral, |
30 | 95 EmptyCharacterLiteral, |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
96 // #line |
30 | 97 ExpectedIdentifierLine, |
98 NewlineInSpecialToken, | |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
99 UnterminatedSpecialToken, |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
100 // x"" |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
101 NonHexCharInHexString, |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
102 OddNumberOfDigitsInHexString, |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
103 UnterminatedHexString |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
104 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
105 |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
106 string[] messages = [ |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
107 "unterminated character literal." |
30 | 108 "empty character literal.", |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
109 // #line |
30 | 110 "expected 'line' after '#'." |
111 "newline not allowed inside special token." | |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
112 "expected newline after special token.", |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
113 // x"" |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
114 "non-hex character '{1}' found in hex string.", |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
115 "odd number of hex digits in hex string.", |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
116 "unterminated hex string." |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
117 ]; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
118 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
119 class Problem |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
120 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
121 enum Type |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
122 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
123 Lexer, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
124 Parser, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
125 Semantic |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
126 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
127 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
128 MID id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
129 Type type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
130 uint loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
131 this(Type type, MID id, uint loc) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
132 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
133 this.id = id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
134 this.type = type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
135 this.loc = loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
136 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
137 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
138 |
0 | 139 class Lexer |
140 { | |
4 | 141 Token token; |
30 | 142 string text; |
4 | 143 char* p; |
144 char* end; | |
145 | |
17 | 146 uint loc = 1; /// line of code |
147 | |
30 | 148 char[] fileName; |
149 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
150 Problem[] errors; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
151 |
28 | 152 Identifier[string] idtable; |
153 | |
30 | 154 this(string text, string fileName) |
4 | 155 { |
30 | 156 this.fileName = fileName; |
157 | |
4 | 158 this.text = text; |
159 this.text.length = this.text.length + 1; | |
160 this.text[$-1] = 0; | |
161 | |
162 this.p = this.text.ptr; | |
163 this.end = this.p + this.text.length; | |
28 | 164 |
165 loadKeywords(); | |
4 | 166 } |
167 | |
168 public void scan(out Token t) | |
3 | 169 { |
4 | 170 assert(p < end); |
0 | 171 |
10 | 172 uint c = *p; |
4 | 173 |
5 | 174 while(1) |
4 | 175 { |
5 | 176 t.start = p; |
16 | 177 |
5 | 178 if (c == 0) |
179 { | |
17 | 180 ++p; |
5 | 181 t.type = TOK.EOF; |
17 | 182 t.end = p; |
5 | 183 return; |
184 } | |
4 | 185 |
17 | 186 if (c == '\n') |
187 { | |
188 c = *++p; | |
189 ++loc; | |
190 continue; | |
191 } | |
192 else if (c == '\r') | |
193 { | |
194 c = *++p; | |
195 if (c != '\n') | |
196 ++loc; | |
197 continue; | |
198 } | |
199 | |
13 | 200 if (isidbeg(c)) |
5 | 201 { |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
202 if (c == 'r' && p[1] == '"') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
203 return scanRawStringLiteral(t); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
204 if (c == 'x' && p[1] == '"') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
205 return scanHexStringLiteral(t); |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
206 Lidentifier: |
5 | 207 do |
208 { c = *++p; } | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
209 while (isident(c) || c & 128 && isUniAlpha(decodeUTF())) |
28 | 210 |
5 | 211 t.end = p; |
28 | 212 |
213 string str = t.span; | |
214 Identifier* id = str in idtable; | |
215 | |
216 if (!id) | |
217 { | |
218 idtable[str] = Identifier.Identifier(TOK.Identifier, str); | |
219 id = str in idtable; | |
220 } | |
221 assert(id); | |
222 t.type = id.type; | |
5 | 223 return; |
224 } | |
225 | |
15 | 226 if (isdigit(c)) |
227 return scanNumber(t); | |
228 | |
8 | 229 if (c == '/') |
5 | 230 { |
8 | 231 c = *++p; |
14 | 232 switch(c) |
5 | 233 { |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
234 case '=': |
14 | 235 ++p; |
236 t.type = TOK.DivisionAssign; | |
237 t.end = p; | |
238 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
239 case '+': |
8 | 240 uint level = 1; |
241 do | |
7 | 242 { |
8 | 243 c = *++p; |
244 if (c == 0) | |
245 throw new Error("unterminated /+ +/ comment."); | |
246 else if (c == '/' && p[1] == '+') | |
247 { | |
248 ++p; | |
249 ++level; | |
250 } | |
251 else if (c == '+' && p[1] == '/') | |
252 { | |
253 ++p; | |
254 if (--level == 0) | |
255 break; | |
256 } | |
257 } while (1) | |
258 p += 2; | |
259 t.type = TOK.Comment; | |
260 t.end = p; | |
261 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
262 case '*': |
8 | 263 do |
7 | 264 { |
8 | 265 c = *++p; |
266 if (c == 0) | |
267 throw new Error("unterminated /* */ comment."); | |
268 } while (c != '*' || p[1] != '/') | |
269 p += 2; | |
270 t.type = TOK.Comment; | |
271 t.end = p; | |
272 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
273 case '/': |
10 | 274 do |
275 { | |
276 c = *++p; | |
11 | 277 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) |
278 break; | |
10 | 279 } while (c != '\n' && c != 0) |
280 t.type = TOK.Comment; | |
281 t.end = p; | |
282 return; | |
283 } | |
5 | 284 } |
285 | |
9
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
286 if (c == '"') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
287 { |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
288 do { |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
289 c = *++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
290 if (c == 0) |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
291 throw new Error("unterminated string literal."); |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
292 if (c == '\\') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
293 ++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
294 } while (c != '"') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
295 ++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
296 t.type = TOK.String; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
297 t.end = p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
298 return; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
299 } |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
300 |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
301 if (c == '\'') |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
302 return scanCharacterLiteral(t); |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
303 |
20 | 304 switch(c) |
305 { | |
22
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
306 case '.': |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
307 if (p[1] == '.') |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
308 { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
309 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
310 if (p[1] == '.') { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
311 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
312 t.type = TOK.Ellipses; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
313 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
314 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
315 t.type = TOK.Slice; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
316 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
317 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
318 t.type = TOK.Dot; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
319 goto Lcommon; |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
320 case '|': |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
321 c = *++p; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
322 if (c == '=') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
323 t.type = TOK.OrAssign; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
324 else if (c == '|') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
325 t.type = TOK.OrLogical; |
26 | 326 else { |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
327 t.type = TOK.OrBinary; |
26 | 328 goto Lcommon2; |
329 } | |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
330 goto Lcommon; |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
331 case '&': |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
332 c = *++p; |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
333 if (c == '=') |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
334 t.type = TOK.AndAssign; |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
335 else if (c == '&') |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
336 t.type = TOK.AndLogical; |
26 | 337 else { |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
338 t.type = TOK.AndBinary; |
26 | 339 goto Lcommon2; |
340 } | |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
341 goto Lcommon; |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
342 case '+': |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
343 c = *++p; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
344 if (c == '=') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
345 t.type = TOK.PlusAssign; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
346 else if (c == '+') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
347 t.type = TOK.PlusPlus; |
26 | 348 else { |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
349 t.type = TOK.Plus; |
26 | 350 goto Lcommon2; |
351 } | |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
352 goto Lcommon; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
353 case '-': |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
354 c = *++p; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
355 if (c == '=') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
356 t.type = TOK.MinusAssign; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
357 else if (c == '-') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
358 t.type = TOK.MinusMinus; |
26 | 359 else { |
360 t.type = TOK.Minus; | |
361 goto Lcommon2; | |
362 } | |
363 goto Lcommon; | |
364 case '=': | |
365 if (p[1] == '=') { | |
366 ++p; | |
367 t.type = TOK.Equal; | |
368 } | |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
369 else |
26 | 370 t.type = TOK.Assign; |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
371 goto Lcommon; |
27 | 372 case '~': |
373 if (p[1] == '=') { | |
374 ++p; | |
375 t.type = TOK.CatAssign; | |
376 } | |
377 else | |
378 t.type = TOK.Tilde; | |
379 goto Lcommon; | |
29
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
380 case '*': |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
381 if (p[1] == '=') { |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
382 ++p; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
383 t.type = TOK.MulAssign; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
384 } |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
385 else |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
386 t.type = TOK.Mul; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
387 goto Lcommon; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
388 case '^': |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
389 if (p[1] == '=') { |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
390 ++p; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
391 t.type = TOK.XorAssign; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
392 } |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
393 else |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
394 t.type = TOK.Xor; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
395 goto Lcommon; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
396 case '%': |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
397 if (p[1] == '=') { |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
398 ++p; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
399 t.type = TOK.ModAssign; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
400 } |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
401 else |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
402 t.type = TOK.Mod; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
403 goto Lcommon; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
404 // Single character tokens: |
20 | 405 case '(': |
406 t.type = TOK.LParen; | |
407 goto Lcommon; | |
408 case ')': | |
409 t.type = TOK.RParen; | |
410 goto Lcommon; | |
411 case '[': | |
412 t.type = TOK.LBracket; | |
413 goto Lcommon; | |
414 case ']': | |
415 t.type = TOK.RBracket; | |
416 goto Lcommon; | |
417 case '{': | |
418 t.type = TOK.LBrace; | |
419 goto Lcommon; | |
420 case '}': | |
421 t.type = TOK.RBrace; | |
21
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
422 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
423 case ':': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
424 t.type = TOK.Colon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
425 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
426 case ';': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
427 t.type = TOK.Semicolon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
428 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
429 case '?': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
430 t.type = TOK.Question; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
431 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
432 case ',': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
433 t.type = TOK.Comma; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
434 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
435 case '$': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
436 t.type = TOK.Dollar; |
20 | 437 Lcommon: |
438 ++p; | |
26 | 439 Lcommon2: |
20 | 440 t.end = p; |
441 return; | |
30 | 442 case '#': |
443 ++p; | |
444 scanSpecialToken(); | |
445 break; | |
20 | 446 default: |
447 } | |
448 | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
449 if (c & 128 && isUniAlpha(decodeUTF())) |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
450 goto Lidentifier; |
5 | 451 c = *++p; |
4 | 452 } |
3 | 453 } |
4 | 454 |
30 | 455 void peek(ref Token t) |
456 { | |
457 char* tmp = p; | |
458 scan(t); | |
459 p = tmp; | |
460 } | |
461 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
462 void scanCharacterLiteral(ref Token t) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
463 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
464 assert(*p == '\''); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
465 MID id = MID.UnterminatedCharacterLiteral; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
466 uint c = *++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
467 switch(c) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
468 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
469 case '\\': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
470 ++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
471 break; |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
472 case 0, 26, '\n', '\r': |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
473 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
474 case '\'': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
475 id = MID.EmptyCharacterLiteral; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
476 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
477 default: |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
478 if (c & 128) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
479 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
480 c = decodeUTF(); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
481 if (c == LSd || c == PSd) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
482 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
483 t.chr = c; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
484 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
485 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
486 |
19 | 487 ++p; |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
488 if (*p != '\'') |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
489 Lerr: |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
490 error(id); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
491 ++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
492 t.type = TOK.Character; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
493 t.end = p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
494 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
495 |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
496 char scanPostFix() |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
497 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
498 switch (*p) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
499 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
500 case 'c': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
501 case 'w': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
502 case 'd': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
503 return *p++; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
504 default: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
505 return 0; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
506 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
507 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
508 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
509 void scanRawStringLiteral(ref Token t) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
510 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
511 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
512 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
513 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
514 void scanHexStringLiteral(ref Token t) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
515 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
516 assert(p[0] == 'x' && p[1] == '"'); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
517 p+=2; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
518 t.type = TOK.String; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
519 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
520 uint c; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
521 ubyte[] buffer; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
522 ubyte h; // hex number |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
523 uint n; // number of hex digits |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
524 MID mid; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
525 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
526 while (1) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
527 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
528 c = *p++; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
529 switch (c) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
530 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
531 case '"': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
532 if (n & 1) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
533 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
534 mid = MID.OddNumberOfDigitsInHexString; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
535 error(mid); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
536 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
537 t.str = cast(string) buffer; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
538 t.pf = scanPostFix(); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
539 t.end = p; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
540 return; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
541 case '\r': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
542 if (*p == '\n') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
543 ++p; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
544 case '\n': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
545 ++loc; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
546 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
547 case LS[0]: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
548 if (*p == LS[1] && (p[1] == LS[2] || p[1] == PS[2])) { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
549 p += 2; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
550 ++loc; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
551 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
552 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
553 case 0, 26: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
554 mid = MID.UnterminatedHexString; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
555 goto Lerr; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
556 default: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
557 if (ishexad(c)) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
558 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
559 if (c <= '9') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
560 c -= '0'; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
561 else if (c <= 'F') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
562 c -= 'A' - 10; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
563 else |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
564 c -= 'a' - 10; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
565 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
566 if (n & 1) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
567 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
568 h <<= 4; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
569 h |= c; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
570 buffer ~= h; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
571 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
572 else |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
573 h = c; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
574 ++n; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
575 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
576 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
577 else if (isspace(c)) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
578 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
579 mid = MID.NonHexCharInHexString; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
580 goto Lerr; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
581 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
582 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
583 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
584 return; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
585 Lerr: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
586 error(mid); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
587 t.pf = 0; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
588 t.end = p; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
589 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
590 |
15 | 591 void scanNumber(ref Token t) |
592 { | |
593 while (isdigit(*++p)) {} | |
594 t.type = TOK.Number; | |
595 t.end = p; | |
30 | 596 t._uint = toInt(t.span); |
597 } | |
598 | |
599 /// Scan special token: #line Integer [Filespec] EndOfLine | |
600 void scanSpecialToken() | |
601 { | |
602 MID mid; | |
603 Token t; | |
604 | |
605 scan(t); | |
606 if (!(t.type == TOK.Identifier && t.span == "line")) { | |
607 mid = MID.ExpectedIdentifierLine; | |
608 goto Lerr; | |
609 } | |
610 | |
611 scan(t); | |
612 if (t.type == TOK.Number) | |
613 loc = t._uint - 1; | |
614 | |
615 uint loc = this.loc; | |
616 | |
617 char* wsstart = t.end; | |
618 | |
619 bool hasNewline(char* end) | |
620 { | |
621 alias wsstart p; | |
622 uint c; | |
623 for(; p != end; c = *++p) | |
624 if (c == '\n' || c == '\r' || c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) { | |
625 mid = MID.NewlineInSpecialToken; | |
626 return true; | |
627 } | |
628 return false; | |
629 } | |
630 | |
631 peek(t); | |
632 | |
633 if (t.type == TOK.String) | |
634 { | |
635 // Check whole token with preceding whitespace for newline. | |
636 if (hasNewline(t.end)) | |
637 goto Lerr; | |
638 fileName = t.span[1..$-1]; // contents of "..." | |
639 p = t.end; | |
640 } | |
641 else if (t.type == TOK.Identifier && t.span == "__FILE__") | |
642 { | |
643 // Check preceding whitespace for newline. | |
644 if (hasNewline(t.start)) | |
645 goto Lerr; | |
646 p = t.end; | |
647 } | |
648 | |
649 uint c; | |
650 while (1) | |
651 { | |
652 c = *p++; | |
653 if (isspace(c)) | |
654 continue; | |
655 | |
656 if (c == '\n' || c == '\r' || c == 0 || | |
657 c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) | |
658 break; | |
659 else { | |
660 mid = MID.UnterminatedSpecialToken; | |
661 goto Lerr; | |
662 } | |
663 } | |
664 | |
665 this.loc = loc; | |
666 return; | |
667 Lerr: | |
668 error(mid); | |
15 | 669 } |
670 | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
671 uint decodeUTF() |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
672 { |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
673 assert(*p & 128); |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
674 size_t idx; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
675 uint d; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
676 d = std.utf.decode(p[0 .. end-p], idx); |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
677 p += idx -1; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
678 return d; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
679 } |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
680 |
28 | 681 void loadKeywords() |
682 { | |
683 foreach(k; keywords) | |
684 idtable[k.str] = k; | |
685 } | |
686 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
687 void error(MID id) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
688 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
689 errors ~= new Problem(Problem.Type.Lexer, id, loc); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
690 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
691 |
3 | 692 public TOK nextToken() |
693 { | |
4 | 694 scan(this.token); |
695 return this.token.type; | |
696 } | |
697 | |
698 Token[] getTokens() | |
699 { | |
700 Token[] tokens; | |
701 while (nextToken() != TOK.EOF) | |
702 tokens ~= this.token; | |
703 tokens ~= this.token; | |
704 return tokens; | |
3 | 705 } |
5 | 706 } |