Mercurial > projects > dil
annotate trunk/src/Lexer.d @ 34:41dad4aef4b1
- Removed test string.
author | aziz |
---|---|
date | Mon, 25 Jun 2007 19:05:01 +0000 |
parents | cf3047cf3cd2 |
children | c470b9356e35 |
rev | line source |
---|---|
0 | 1 /++ |
2 Author: Aziz Köksal | |
3 License: GPL2 | |
4 +/ | |
5 module Lexer; | |
3 | 6 import Token; |
28 | 7 import Keywords; |
8 import Identifier; | |
2 | 9 import std.stdio; |
4 | 10 import std.utf; |
11 import std.uni; | |
30 | 12 import std.conv; |
0 | 13 |
14 /// ASCII character properties table. | |
2 | 15 static const int ptable[256] = [ |
16 | 16 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0, |
13 | 17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
16 | 18 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
13 | 19 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0, |
20 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
21 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0,16, | |
22 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
23 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, | |
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
2 | 32 ]; |
0 | 33 |
34 enum CProperty | |
35 { | |
1 | 36 Octal = 1, |
0 | 37 Digit = 1<<1, |
38 Hex = 1<<2, | |
39 Alpha = 1<<3, | |
13 | 40 Underscore = 1<<4, |
16 | 41 Whitespace = 1<<5 |
0 | 42 } |
43 | |
13 | 44 private alias CProperty CP; |
45 int isoctal(char c) { return ptable[c] & CP.Octal; } | |
46 int isdigit(char c) { return ptable[c] & CP.Digit; } | |
47 int ishexad(char c) { return ptable[c] & CP.Hex; } | |
48 int isalpha(char c) { return ptable[c] & CP.Alpha; } | |
49 int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); } | |
50 int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); } | |
51 int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); } | |
16 | 52 int isspace(char c) { return ptable[c] & CP.Whitespace; } |
13 | 53 |
54 version(gen_ptable) | |
0 | 55 static this() |
56 { | |
2 | 57 // Initialize character properties table. |
0 | 58 for (int i; i < ptable.length; ++i) |
59 { | |
13 | 60 ptable[i] = 0; |
0 | 61 if ('0' <= i && i <= '7') |
13 | 62 ptable[i] |= CP.Octal; |
0 | 63 if ('0' <= i && i <= '9') |
13 | 64 ptable[i] |= CP.Digit; |
0 | 65 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F') |
13 | 66 ptable[i] |= CP.Hex; |
0 | 67 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z') |
13 | 68 ptable[i] |= CP.Alpha; |
69 if (i == '_') | |
70 ptable[i] |= CP.Underscore; | |
16 | 71 if (i == ' ' || i == '\t' || i == '\v'|| i == '\f') |
72 ptable[i] |= CP.Whitespace; | |
0 | 73 } |
2 | 74 // Print a formatted array literal. |
75 char[] array = "[\n"; | |
76 for (int i; i < ptable.length; ++i) | |
77 { | |
13 | 78 int c = ptable[i]; |
79 array ~= std.string.format("%2d,", c, ((i+1) % 16) ? "":"\n"); | |
2 | 80 } |
13 | 81 array[$-2..$] = "\n]"; |
2 | 82 writefln(array); |
0 | 83 } |
84 | |
11 | 85 const char[3] LS = \u2028; |
86 const char[3] PS = \u2029; | |
87 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
88 const dchar LSd = 0x2028; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
89 const dchar PSd = 0x2029; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
90 |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
91 const uint _Z_ = 26; /// Control+Z |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
92 |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
93 /// Index into table of error messages. |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
94 enum MID |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
95 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
96 UnterminatedCharacterLiteral, |
30 | 97 EmptyCharacterLiteral, |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
98 // #line |
30 | 99 ExpectedIdentifierLine, |
100 NewlineInSpecialToken, | |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
101 UnterminatedSpecialToken, |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
102 // x"" |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
103 NonHexCharInHexString, |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
104 OddNumberOfDigitsInHexString, |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
105 UnterminatedHexString, |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
106 // /* */ /+ +/ |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
107 UnterminatedBlockComment, |
33 | 108 UnterminatedNestedComment, |
109 // `` r"" | |
110 UnterminatedRawString, | |
111 UnterminatedBackQuoteString, | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
112 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
113 |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
114 string[] messages = [ |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
115 "unterminated character literal.", |
30 | 116 "empty character literal.", |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
117 // #line |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
118 "expected 'line' after '#'.", |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
119 "newline not allowed inside special token.", |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
120 "expected newline after special token.", |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
121 // x"" |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
122 "non-hex character '{1}' found in hex string.", |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
123 "odd number of hex digits in hex string.", |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
124 "unterminated hex string.", |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
125 // /* */ /+ +/ |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
126 "unterminated block comment (/* */).", |
33 | 127 "unterminated nested comment (/+ +/).", |
128 // `` r"" | |
129 "unterminated raw string.", | |
130 "unterminated back quote string.", | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
131 ]; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
132 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
133 class Problem |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
134 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
135 enum Type |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
136 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
137 Lexer, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
138 Parser, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
139 Semantic |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
140 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
141 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
142 MID id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
143 Type type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
144 uint loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
145 this(Type type, MID id, uint loc) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
146 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
147 this.id = id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
148 this.type = type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
149 this.loc = loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
150 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
151 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
152 |
0 | 153 class Lexer |
154 { | |
4 | 155 Token token; |
30 | 156 string text; |
4 | 157 char* p; |
158 char* end; | |
159 | |
17 | 160 uint loc = 1; /// line of code |
161 | |
30 | 162 char[] fileName; |
163 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
164 Problem[] errors; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
165 |
28 | 166 Identifier[string] idtable; |
167 | |
30 | 168 this(string text, string fileName) |
4 | 169 { |
30 | 170 this.fileName = fileName; |
171 | |
4 | 172 this.text = text; |
173 this.text.length = this.text.length + 1; | |
174 this.text[$-1] = 0; | |
175 | |
176 this.p = this.text.ptr; | |
177 this.end = this.p + this.text.length; | |
28 | 178 |
179 loadKeywords(); | |
4 | 180 } |
181 | |
182 public void scan(out Token t) | |
3 | 183 { |
4 | 184 assert(p < end); |
0 | 185 |
10 | 186 uint c = *p; |
4 | 187 |
5 | 188 while(1) |
4 | 189 { |
5 | 190 t.start = p; |
16 | 191 |
5 | 192 if (c == 0) |
193 { | |
17 | 194 ++p; |
5 | 195 t.type = TOK.EOF; |
17 | 196 t.end = p; |
5 | 197 return; |
198 } | |
4 | 199 |
17 | 200 if (c == '\n') |
201 { | |
202 c = *++p; | |
203 ++loc; | |
204 continue; | |
205 } | |
206 else if (c == '\r') | |
207 { | |
208 c = *++p; | |
209 if (c != '\n') | |
210 ++loc; | |
211 continue; | |
212 } | |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
213 else if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
214 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
215 p += 3; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
216 c = *p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
217 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
218 } |
17 | 219 |
13 | 220 if (isidbeg(c)) |
5 | 221 { |
33 | 222 if (c == 'r' && p[1] == '"' && ++p) |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
223 return scanRawStringLiteral(t); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
224 if (c == 'x' && p[1] == '"') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
225 return scanHexStringLiteral(t); |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
226 Lidentifier: |
5 | 227 do |
228 { c = *++p; } | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
229 while (isident(c) || c & 128 && isUniAlpha(decodeUTF())) |
28 | 230 |
5 | 231 t.end = p; |
28 | 232 |
233 string str = t.span; | |
234 Identifier* id = str in idtable; | |
235 | |
236 if (!id) | |
237 { | |
238 idtable[str] = Identifier.Identifier(TOK.Identifier, str); | |
239 id = str in idtable; | |
240 } | |
241 assert(id); | |
242 t.type = id.type; | |
5 | 243 return; |
244 } | |
245 | |
15 | 246 if (isdigit(c)) |
247 return scanNumber(t); | |
248 | |
8 | 249 if (c == '/') |
5 | 250 { |
8 | 251 c = *++p; |
14 | 252 switch(c) |
5 | 253 { |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
254 case '=': |
14 | 255 ++p; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
256 t.type = TOK.DivAssign; |
14 | 257 t.end = p; |
258 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
259 case '+': |
8 | 260 uint level = 1; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
261 while (1) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
262 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
263 c = *++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
264 switch (c) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
265 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
266 case '\r': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
267 if (p[1] == '\n') |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
268 ++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
269 case '\n': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
270 ++loc; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
271 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
272 case '/': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
273 if (p[1] == '+') |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
274 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
275 ++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
276 ++level; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
277 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
278 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
279 case '+': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
280 if (p[1] == '/') |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
281 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
282 ++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
283 if (--level == 0) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
284 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
285 ++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
286 LreturnNC: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
287 t.type = TOK.Comment; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
288 t.end = p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
289 return; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
290 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
291 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
292 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
293 case 0, _Z_: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
294 error(MID.UnterminatedNestedComment); |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
295 goto LreturnNC; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
296 case LS[0]: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
297 if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
298 p += 2; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
299 ++loc; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
300 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
301 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
302 default: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
303 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
304 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
305 case '*': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
306 while (1) |
7 | 307 { |
8 | 308 c = *++p; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
309 switch (c) |
8 | 310 { |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
311 case '\r': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
312 if (p[1] == '\n') |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
313 ++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
314 case '\n': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
315 ++loc; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
316 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
317 case '*': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
318 if (p[1] == '/') |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
319 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
320 p += 2; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
321 LreturnBC: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
322 t.type = TOK.Comment; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
323 t.end = p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
324 return; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
325 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
326 break; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
327 case LS[0]: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
328 if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
329 p += 2; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
330 ++loc; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
331 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
332 break; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
333 case 0, _Z_: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
334 error(MID.UnterminatedBlockComment); |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
335 goto LreturnBC; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
336 default: |
8 | 337 } |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
338 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
339 assert(0); |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
340 case '/': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
341 while (1) |
7 | 342 { |
8 | 343 c = *++p; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
344 switch (c) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
345 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
346 case '\r': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
347 if (p[1] == '\n') |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
348 ++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
349 case '\n': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
350 case 0, _Z_: |
11 | 351 break; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
352 case LS[0]: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
353 if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
354 break; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
355 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
356 default: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
357 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
358 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
359 t.type = TOK.Comment; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
360 t.end = p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
361 return; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
362 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
363 default: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
364 t.type = TOK.Div; |
10 | 365 t.end = p; |
366 return; | |
367 } | |
5 | 368 } |
369 | |
9
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
370 if (c == '"') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
371 { |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
372 do { |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
373 c = *++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
374 if (c == 0) |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
375 throw new Error("unterminated string literal."); |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
376 if (c == '\\') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
377 ++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
378 } while (c != '"') |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
379 ++p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
380 t.type = TOK.String; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
381 t.end = p; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
382 return; |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
383 } |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
384 |
5d6968cc751e
- Parsing string and character literals now (rudimentary implementation.)
aziz
parents:
8
diff
changeset
|
385 if (c == '\'') |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
386 return scanCharacterLiteral(t); |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
387 |
33 | 388 if (c == '`') |
389 return scanRawStringLiteral(t); | |
390 | |
20 | 391 switch(c) |
392 { | |
22
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
393 case '.': |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
394 if (p[1] == '.') |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
395 { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
396 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
397 if (p[1] == '.') { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
398 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
399 t.type = TOK.Ellipses; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
400 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
401 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
402 t.type = TOK.Slice; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
403 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
404 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
405 t.type = TOK.Dot; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
406 goto Lcommon; |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
407 case '|': |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
408 c = *++p; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
409 if (c == '=') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
410 t.type = TOK.OrAssign; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
411 else if (c == '|') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
412 t.type = TOK.OrLogical; |
26 | 413 else { |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
414 t.type = TOK.OrBinary; |
26 | 415 goto Lcommon2; |
416 } | |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
417 goto Lcommon; |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
418 case '&': |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
419 c = *++p; |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
420 if (c == '=') |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
421 t.type = TOK.AndAssign; |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
422 else if (c == '&') |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
423 t.type = TOK.AndLogical; |
26 | 424 else { |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
425 t.type = TOK.AndBinary; |
26 | 426 goto Lcommon2; |
427 } | |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
428 goto Lcommon; |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
429 case '+': |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
430 c = *++p; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
431 if (c == '=') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
432 t.type = TOK.PlusAssign; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
433 else if (c == '+') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
434 t.type = TOK.PlusPlus; |
26 | 435 else { |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
436 t.type = TOK.Plus; |
26 | 437 goto Lcommon2; |
438 } | |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
439 goto Lcommon; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
440 case '-': |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
441 c = *++p; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
442 if (c == '=') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
443 t.type = TOK.MinusAssign; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
444 else if (c == '-') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
445 t.type = TOK.MinusMinus; |
26 | 446 else { |
447 t.type = TOK.Minus; | |
448 goto Lcommon2; | |
449 } | |
450 goto Lcommon; | |
451 case '=': | |
452 if (p[1] == '=') { | |
453 ++p; | |
454 t.type = TOK.Equal; | |
455 } | |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
456 else |
26 | 457 t.type = TOK.Assign; |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
458 goto Lcommon; |
27 | 459 case '~': |
460 if (p[1] == '=') { | |
461 ++p; | |
462 t.type = TOK.CatAssign; | |
463 } | |
464 else | |
465 t.type = TOK.Tilde; | |
466 goto Lcommon; | |
29
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
467 case '*': |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
468 if (p[1] == '=') { |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
469 ++p; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
470 t.type = TOK.MulAssign; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
471 } |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
472 else |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
473 t.type = TOK.Mul; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
474 goto Lcommon; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
475 case '^': |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
476 if (p[1] == '=') { |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
477 ++p; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
478 t.type = TOK.XorAssign; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
479 } |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
480 else |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
481 t.type = TOK.Xor; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
482 goto Lcommon; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
483 case '%': |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
484 if (p[1] == '=') { |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
485 ++p; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
486 t.type = TOK.ModAssign; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
487 } |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
488 else |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
489 t.type = TOK.Mod; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
490 goto Lcommon; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
491 // Single character tokens: |
20 | 492 case '(': |
493 t.type = TOK.LParen; | |
494 goto Lcommon; | |
495 case ')': | |
496 t.type = TOK.RParen; | |
497 goto Lcommon; | |
498 case '[': | |
499 t.type = TOK.LBracket; | |
500 goto Lcommon; | |
501 case ']': | |
502 t.type = TOK.RBracket; | |
503 goto Lcommon; | |
504 case '{': | |
505 t.type = TOK.LBrace; | |
506 goto Lcommon; | |
507 case '}': | |
508 t.type = TOK.RBrace; | |
21
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
509 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
510 case ':': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
511 t.type = TOK.Colon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
512 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
513 case ';': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
514 t.type = TOK.Semicolon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
515 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
516 case '?': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
517 t.type = TOK.Question; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
518 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
519 case ',': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
520 t.type = TOK.Comma; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
521 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
522 case '$': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
523 t.type = TOK.Dollar; |
20 | 524 Lcommon: |
525 ++p; | |
26 | 526 Lcommon2: |
20 | 527 t.end = p; |
528 return; | |
30 | 529 case '#': |
530 ++p; | |
531 scanSpecialToken(); | |
532 break; | |
20 | 533 default: |
534 } | |
535 | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
536 if (c & 128 && isUniAlpha(decodeUTF())) |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
537 goto Lidentifier; |
5 | 538 c = *++p; |
4 | 539 } |
3 | 540 } |
4 | 541 |
30 | 542 void peek(ref Token t) |
543 { | |
544 char* tmp = p; | |
545 scan(t); | |
546 p = tmp; | |
547 } | |
548 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
549 void scanCharacterLiteral(ref Token t) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
550 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
551 assert(*p == '\''); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
552 MID id = MID.UnterminatedCharacterLiteral; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
553 uint c = *++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
554 switch(c) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
555 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
556 case '\\': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
557 ++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
558 break; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
559 case 0, _Z_, '\n', '\r': |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
560 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
561 case '\'': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
562 id = MID.EmptyCharacterLiteral; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
563 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
564 default: |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
565 if (c & 128) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
566 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
567 c = decodeUTF(); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
568 if (c == LSd || c == PSd) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
569 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
570 t.chr = c; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
571 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
572 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
573 |
19 | 574 ++p; |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
575 if (*p != '\'') |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
576 Lerr: |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
577 error(id); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
578 ++p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
579 t.type = TOK.Character; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
580 t.end = p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
581 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
582 |
33 | 583 char scanPostfix() |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
584 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
585 switch (*p) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
586 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
587 case 'c': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
588 case 'w': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
589 case 'd': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
590 return *p++; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
591 default: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
592 return 0; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
593 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
594 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
595 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
596 void scanRawStringLiteral(ref Token t) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
597 { |
33 | 598 uint delim = *p; |
599 assert(delim == '`' || delim == '"' && p[-1] == 'r'); | |
600 t.type = TOK.String; | |
601 char[] buffer; | |
602 uint c; | |
603 while (1) | |
604 { | |
605 c = *++p; | |
606 switch (c) | |
607 { | |
608 case '\r': | |
609 if (p[1] == '\n') | |
610 ++p; | |
611 c = '\n'; // Convert '\r' and '\r\n' to '\n' | |
612 case '\n': | |
613 ++loc; | |
614 continue; | |
615 case '`': | |
616 case '"': | |
617 if (c == delim) | |
618 { | |
619 ++p; | |
620 t.pf = scanPostfix(); | |
621 Lreturn: | |
622 t.str = buffer ~ '\0'; | |
623 t.end = p; | |
624 return; | |
625 } | |
626 break; | |
627 case LS[0]: | |
628 if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) | |
629 { | |
630 // TODO: convert LS or PS to \n? | |
631 buffer ~= p[0..3]; | |
632 p += 2; | |
633 ++loc; | |
634 continue; | |
635 } | |
636 break; | |
637 case 0, _Z_: | |
638 if (delim == 'r') | |
639 error(MID.UnterminatedRawString); | |
640 else | |
641 error(MID.UnterminatedBackQuoteString); | |
642 goto Lreturn; | |
643 default: | |
644 } | |
645 buffer ~= c; // copy character to buffer | |
646 } | |
647 assert(0); | |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
648 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
649 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
650 void scanHexStringLiteral(ref Token t) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
651 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
652 assert(p[0] == 'x' && p[1] == '"'); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
653 p+=2; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
654 t.type = TOK.String; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
655 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
656 uint c; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
657 ubyte[] buffer; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
658 ubyte h; // hex number |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
659 uint n; // number of hex digits |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
660 MID mid; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
661 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
662 while (1) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
663 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
664 c = *p++; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
665 switch (c) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
666 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
667 case '"': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
668 if (n & 1) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
669 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
670 mid = MID.OddNumberOfDigitsInHexString; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
671 error(mid); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
672 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
673 t.str = cast(string) buffer; |
33 | 674 t.pf = scanPostfix(); |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
675 t.end = p; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
676 return; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
677 case '\r': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
678 if (*p == '\n') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
679 ++p; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
680 case '\n': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
681 ++loc; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
682 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
683 case LS[0]: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
684 if (*p == LS[1] && (p[1] == LS[2] || p[1] == PS[2])) { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
685 p += 2; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
686 ++loc; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
687 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
688 continue; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
689 case 0, _Z_: |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
690 mid = MID.UnterminatedHexString; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
691 goto Lerr; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
692 default: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
693 if (ishexad(c)) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
694 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
695 if (c <= '9') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
696 c -= '0'; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
697 else if (c <= 'F') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
698 c -= 'A' - 10; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
699 else |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
700 c -= 'a' - 10; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
701 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
702 if (n & 1) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
703 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
704 h <<= 4; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
705 h |= c; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
706 buffer ~= h; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
707 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
708 else |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
709 h = c; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
710 ++n; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
711 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
712 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
713 else if (isspace(c)) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
714 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
715 mid = MID.NonHexCharInHexString; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
716 goto Lerr; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
717 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
718 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
719 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
720 return; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
721 Lerr: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
722 error(mid); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
723 t.pf = 0; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
724 t.end = p; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
725 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
726 |
15 | 727 void scanNumber(ref Token t) |
728 { | |
729 while (isdigit(*++p)) {} | |
730 t.type = TOK.Number; | |
731 t.end = p; | |
30 | 732 t._uint = toInt(t.span); |
733 } | |
734 | |
735 /// Scan special token: #line Integer [Filespec] EndOfLine | |
736 void scanSpecialToken() | |
737 { | |
738 MID mid; | |
739 Token t; | |
740 | |
741 scan(t); | |
742 if (!(t.type == TOK.Identifier && t.span == "line")) { | |
743 mid = MID.ExpectedIdentifierLine; | |
744 goto Lerr; | |
745 } | |
746 | |
747 scan(t); | |
748 if (t.type == TOK.Number) | |
749 loc = t._uint - 1; | |
750 | |
751 uint loc = this.loc; | |
752 | |
753 char* wsstart = t.end; | |
754 | |
755 bool hasNewline(char* end) | |
756 { | |
757 alias wsstart p; | |
758 uint c; | |
759 for(; p != end; c = *++p) | |
760 if (c == '\n' || c == '\r' || c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) { | |
761 mid = MID.NewlineInSpecialToken; | |
762 return true; | |
763 } | |
764 return false; | |
765 } | |
766 | |
767 peek(t); | |
768 | |
769 if (t.type == TOK.String) | |
770 { | |
771 // Check whole token with preceding whitespace for newline. | |
772 if (hasNewline(t.end)) | |
773 goto Lerr; | |
774 fileName = t.span[1..$-1]; // contents of "..." | |
775 p = t.end; | |
776 } | |
777 else if (t.type == TOK.Identifier && t.span == "__FILE__") | |
778 { | |
779 // Check preceding whitespace for newline. | |
780 if (hasNewline(t.start)) | |
781 goto Lerr; | |
782 p = t.end; | |
783 } | |
784 | |
785 uint c; | |
786 while (1) | |
787 { | |
788 c = *p++; | |
789 if (isspace(c)) | |
790 continue; | |
791 | |
792 if (c == '\n' || c == '\r' || c == 0 || | |
793 c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) | |
794 break; | |
795 else { | |
796 mid = MID.UnterminatedSpecialToken; | |
797 goto Lerr; | |
798 } | |
799 } | |
800 | |
801 this.loc = loc; | |
802 return; | |
803 Lerr: | |
804 error(mid); | |
15 | 805 } |
806 | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
807 uint decodeUTF() |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
808 { |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
809 assert(*p & 128); |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
810 size_t idx; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
811 uint d; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
812 d = std.utf.decode(p[0 .. end-p], idx); |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
813 p += idx -1; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
814 return d; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
815 } |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
816 |
28 | 817 void loadKeywords() |
818 { | |
819 foreach(k; keywords) | |
820 idtable[k.str] = k; | |
821 } | |
822 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
823 void error(MID id) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
824 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
825 errors ~= new Problem(Problem.Type.Lexer, id, loc); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
826 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
827 |
3 | 828 public TOK nextToken() |
829 { | |
4 | 830 scan(this.token); |
831 return this.token.type; | |
832 } | |
833 | |
834 Token[] getTokens() | |
835 { | |
836 Token[] tokens; | |
837 while (nextToken() != TOK.EOF) | |
838 tokens ~= this.token; | |
839 tokens ~= this.token; | |
840 return tokens; | |
3 | 841 } |
5 | 842 } |