Mercurial > projects > dil
annotate trunk/src/Lexer.d @ 51:cadd2bfe686c
- Displaying error messages in XML.
- Made fixes to the special token scanner.
author | aziz |
---|---|
date | Wed, 27 Jun 2007 16:43:00 +0000 |
parents | 4a27b7840ea9 |
children | f65a83c27638 |
rev | line source |
---|---|
0 | 1 /++ |
2 Author: Aziz Köksal | |
3 License: GPL2 | |
4 +/ | |
5 module Lexer; | |
3 | 6 import Token; |
28 | 7 import Keywords; |
8 import Identifier; | |
2 | 9 import std.stdio; |
4 | 10 import std.utf; |
11 import std.uni; | |
30 | 12 import std.conv; |
0 | 13 |
14 /// ASCII character properties table. | |
2 | 15 static const int ptable[256] = [ |
45 | 16 0x5c00, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0, |
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c00, 0, 0, 0, 0, 0, | |
18 32, 0, 0x2200, 0, 0, 0, 0, 0x2700, 0, 0, 0, 0, 0, 0, 0, 0, | |
19 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0x3f00, | |
13 | 20 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
45 | 21 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0x5c00, 0, 0,16, |
22 0, 0x70c, 0x80c,12,12,12, 0xc0c, 8, 8, 8, 8, 8, 8, 8, 0xa08, 8, | |
23 8, 8, 0xd08, 8, 0x908, 8, 0xb08, 8, 8, 8, 8, 0, 0, 0, 0, 0, | |
13 | 24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
2 | 32 ]; |
0 | 33 |
34 enum CProperty | |
35 { | |
1 | 36 Octal = 1, |
0 | 37 Digit = 1<<1, |
38 Hex = 1<<2, | |
39 Alpha = 1<<3, | |
13 | 40 Underscore = 1<<4, |
16 | 41 Whitespace = 1<<5 |
0 | 42 } |
43 | |
45 | 44 const uint EVMask = 0xFF00; // Bit mask for escape value |
45 | |
13 | 46 private alias CProperty CP; |
47 int isoctal(char c) { return ptable[c] & CP.Octal; } | |
48 int isdigit(char c) { return ptable[c] & CP.Digit; } | |
49 int ishexad(char c) { return ptable[c] & CP.Hex; } | |
50 int isalpha(char c) { return ptable[c] & CP.Alpha; } | |
51 int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); } | |
52 int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); } | |
53 int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); } | |
16 | 54 int isspace(char c) { return ptable[c] & CP.Whitespace; } |
45 | 55 int char2ev(char c) { return ptable[c] >> 8; /*(ptable[c] & EVMask) >> 8;*/ } |
13 | 56 |
57 version(gen_ptable) | |
0 | 58 static this() |
59 { | |
45 | 60 alias ptable p; |
2 | 61 // Initialize character properties table. |
45 | 62 for (int i; i < p.length; ++i) |
0 | 63 { |
45 | 64 p[i] = 0; |
0 | 65 if ('0' <= i && i <= '7') |
45 | 66 p[i] |= CP.Octal; |
0 | 67 if ('0' <= i && i <= '9') |
45 | 68 p[i] |= CP.Digit; |
0 | 69 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F') |
45 | 70 p[i] |= CP.Hex; |
0 | 71 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z') |
45 | 72 p[i] |= CP.Alpha; |
13 | 73 if (i == '_') |
45 | 74 p[i] |= CP.Underscore; |
44
5055947e0f98
- Specific operators and comments can be formatted with CSS now.
aziz
parents:
43
diff
changeset
|
75 if (i == ' ' || i == '\t' || i == '\v' || i == '\f') |
45 | 76 p[i] |= CP.Whitespace; |
0 | 77 } |
45 | 78 // Store escape sequence values in second byte. |
79 assert(CProperty.max <= ubyte.max, "character property flags and escape value byte overlap."); | |
80 p['\''] |= 39 << 8; | |
81 p['"'] |= 34 << 8; | |
82 p['?'] |= 63 << 8; | |
83 p['\\'] |= p[0] = p[26] = 92 << 8; | |
84 p['a'] |= 7 << 8; | |
85 p['b'] |= 8 << 8; | |
86 p['f'] |= 12 << 8; | |
87 p['n'] |= 10 << 8; | |
88 p['r'] |= 13 << 8; | |
89 p['t'] |= 9 << 8; | |
90 p['v'] |= 11 << 8; | |
2 | 91 // Print a formatted array literal. |
92 char[] array = "[\n"; | |
45 | 93 for (int i; i < p.length; ++i) |
2 | 94 { |
45 | 95 int c = p[i]; |
96 array ~= std.string.format(c>255?" 0x%x,":"%2d,", c, ((i+1) % 16) ? "":"\n"); | |
2 | 97 } |
13 | 98 array[$-2..$] = "\n]"; |
2 | 99 writefln(array); |
0 | 100 } |
101 | |
11 | 102 const char[3] LS = \u2028; |
103 const char[3] PS = \u2029; | |
104 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
105 const dchar LSd = 0x2028; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
106 const dchar PSd = 0x2029; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
107 |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
108 const uint _Z_ = 26; /// Control+Z |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
109 |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
110 /// Index into table of error messages. |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
111 enum MID |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
112 { |
48 | 113 InvalidUnicodeCharacter, |
49 | 114 InvalidUTF8Sequence, |
48 | 115 // '' |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
116 UnterminatedCharacterLiteral, |
30 | 117 EmptyCharacterLiteral, |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
118 // #line |
51 | 119 ExpectedIdentifierSTLine, |
120 ExpectedNormalStringLiteral, | |
121 ExpectedNumberAfterSTLine, | |
30 | 122 NewlineInSpecialToken, |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
123 UnterminatedSpecialToken, |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
124 // "" |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
125 UnterminatedString, |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
126 // x"" |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
127 NonHexCharInHexString, |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
128 OddNumberOfDigitsInHexString, |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
129 UnterminatedHexString, |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
130 // /* */ /+ +/ |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
131 UnterminatedBlockComment, |
33 | 132 UnterminatedNestedComment, |
133 // `` r"" | |
134 UnterminatedRawString, | |
135 UnterminatedBackQuoteString, | |
45 | 136 // \x \u \U |
137 UndefinedEscapeSequence, | |
138 InsufficientHexDigits, | |
139 // \&[a-zA-Z][a-zA-Z0-9]+; | |
140 UnterminatedHTMLEntity, | |
141 InvalidBeginHTMLEntity, | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
142 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
143 |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
144 string[] messages = [ |
48 | 145 "invalid Unicode character.", |
49 | 146 "invalid UTF-8 sequence.", |
48 | 147 // '' |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
148 "unterminated character literal.", |
30 | 149 "empty character literal.", |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
150 // #line |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
151 "expected 'line' after '#'.", |
51 | 152 `the filespec must be defined in a double quote string literal (e.g. "filespec".)`, |
153 "positive integer expected after #line", | |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
154 "newline not allowed inside special token.", |
51 | 155 "expected a terminating newline after special token.", |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
156 // "" |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
157 "unterminated string literal.", |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
158 // x"" |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
159 "non-hex character '{1}' found in hex string.", |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
160 "odd number of hex digits in hex string.", |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
161 "unterminated hex string.", |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
162 // /* */ /+ +/ |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
163 "unterminated block comment (/* */).", |
33 | 164 "unterminated nested comment (/+ +/).", |
165 // `` r"" | |
166 "unterminated raw string.", | |
167 "unterminated back quote string.", | |
45 | 168 // \x \u \U |
169 "found undefined escape sequence.", | |
170 "insufficient number of hex digits in escape sequence.", | |
171 // \&[a-zA-Z][a-zA-Z0-9]+; | |
172 "unterminated html entity.", | |
173 "html entities must begin with a letter.", | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
174 ]; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
175 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
176 class Problem |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
177 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
178 enum Type |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
179 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
180 Lexer, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
181 Parser, |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
182 Semantic |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
183 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
184 |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
185 MID id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
186 Type type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
187 uint loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
188 this(Type type, MID id, uint loc) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
189 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
190 this.id = id; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
191 this.type = type; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
192 this.loc = loc; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
193 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
194 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
195 |
0 | 196 class Lexer |
197 { | |
4 | 198 Token token; |
30 | 199 string text; |
4 | 200 char* p; |
201 char* end; | |
202 | |
17 | 203 uint loc = 1; /// line of code |
204 | |
30 | 205 char[] fileName; |
206 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
207 Problem[] errors; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
208 |
28 | 209 Identifier[string] idtable; |
210 | |
30 | 211 this(string text, string fileName) |
4 | 212 { |
30 | 213 this.fileName = fileName; |
214 | |
4 | 215 this.text = text; |
39
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
216 if (text[$-1] != 0) |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
217 { |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
218 this.text.length = this.text.length + 1; |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
219 this.text[$-1] = 0; |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
220 } |
4 | 221 |
222 this.p = this.text.ptr; | |
223 this.end = this.p + this.text.length; | |
28 | 224 |
225 loadKeywords(); | |
4 | 226 } |
227 | |
228 public void scan(out Token t) | |
3 | 229 { |
4 | 230 assert(p < end); |
0 | 231 |
10 | 232 uint c = *p; |
4 | 233 |
5 | 234 while(1) |
4 | 235 { |
5 | 236 t.start = p; |
16 | 237 |
5 | 238 if (c == 0) |
239 { | |
51 | 240 assert(*p == 0); |
17 | 241 ++p; |
51 | 242 assert(p == end); |
5 | 243 t.type = TOK.EOF; |
17 | 244 t.end = p; |
5 | 245 return; |
246 } | |
4 | 247 |
17 | 248 if (c == '\n') |
249 { | |
250 c = *++p; | |
251 ++loc; | |
252 continue; | |
253 } | |
254 else if (c == '\r') | |
255 { | |
256 c = *++p; | |
257 if (c != '\n') | |
258 ++loc; | |
259 continue; | |
260 } | |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
261 else if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
262 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
263 p += 3; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
264 c = *p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
265 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
266 } |
17 | 267 |
13 | 268 if (isidbeg(c)) |
5 | 269 { |
33 | 270 if (c == 'r' && p[1] == '"' && ++p) |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
271 return scanRawStringLiteral(t); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
272 if (c == 'x' && p[1] == '"') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
273 return scanHexStringLiteral(t); |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
274 Lidentifier: |
5 | 275 do |
276 { c = *++p; } | |
49 | 277 while (isident(c) || c & 128 && isUniAlpha(decodeUTF8())) |
28 | 278 |
5 | 279 t.end = p; |
28 | 280 |
281 string str = t.span; | |
282 Identifier* id = str in idtable; | |
283 | |
284 if (!id) | |
285 { | |
286 idtable[str] = Identifier.Identifier(TOK.Identifier, str); | |
287 id = str in idtable; | |
288 } | |
289 assert(id); | |
290 t.type = id.type; | |
5 | 291 return; |
292 } | |
293 | |
15 | 294 if (isdigit(c)) |
295 return scanNumber(t); | |
296 | |
8 | 297 if (c == '/') |
5 | 298 { |
8 | 299 c = *++p; |
14 | 300 switch(c) |
5 | 301 { |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
302 case '=': |
14 | 303 ++p; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
304 t.type = TOK.DivAssign; |
14 | 305 t.end = p; |
306 return; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
307 case '+': |
8 | 308 uint level = 1; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
309 while (1) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
310 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
311 c = *++p; |
42 | 312 LswitchNC: // only jumped to from default case of next switch(c) |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
313 switch (c) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
314 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
315 case '\r': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
316 if (p[1] == '\n') |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
317 ++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
318 case '\n': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
319 ++loc; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
320 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
321 case 0, _Z_: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
322 error(MID.UnterminatedNestedComment); |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
323 goto LreturnNC; |
42 | 324 default: |
325 } | |
326 | |
327 c <<= 8; | |
328 c |= *++p; | |
329 switch (c) | |
330 { | |
331 case 0x2F2B: // /+ | |
332 ++level; | |
333 continue; | |
334 case 0x2B2F: // +/ | |
335 if (--level == 0) | |
336 { | |
337 ++p; | |
338 LreturnNC: | |
339 t.type = TOK.Comment; | |
340 t.end = p; | |
341 return; | |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
342 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
343 continue; |
42 | 344 case 0xE280: // LS[0..1] || PS[0..1] |
345 if (p[1] == LS[2] || p[1] == PS[2]) | |
346 { | |
347 ++loc; | |
348 ++p; | |
349 } | |
43
1845c23dd056
- Matched some parts of the scanner of block comments to the scanner of nested comments.
aziz
parents:
42
diff
changeset
|
350 continue; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
351 default: |
42 | 352 c &= char.max; |
353 goto LswitchNC; | |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
354 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
355 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
356 case '*': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
357 while (1) |
7 | 358 { |
43
1845c23dd056
- Matched some parts of the scanner of block comments to the scanner of nested comments.
aziz
parents:
42
diff
changeset
|
359 c = *++p; |
1845c23dd056
- Matched some parts of the scanner of block comments to the scanner of nested comments.
aziz
parents:
42
diff
changeset
|
360 LswitchBC: // only jumped to from default case of next switch(c) |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
361 switch (c) |
8 | 362 { |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
363 case '\r': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
364 if (p[1] == '\n') |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
365 ++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
366 case '\n': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
367 ++loc; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
368 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
369 case 0, _Z_: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
370 error(MID.UnterminatedBlockComment); |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
371 goto LreturnBC; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
372 default: |
8 | 373 } |
41 | 374 |
375 c <<= 8; | |
376 c |= *++p; | |
377 switch (c) | |
378 { | |
379 case 0x2A2F: // */ | |
380 ++p; | |
381 LreturnBC: | |
382 t.type = TOK.Comment; | |
383 t.end = p; | |
384 return; | |
385 case 0xE280: // LS[0..1] || PS[0..1] | |
386 if (p[1] == LS[2] || p[1] == PS[2]) | |
387 { | |
388 ++loc; | |
43
1845c23dd056
- Matched some parts of the scanner of block comments to the scanner of nested comments.
aziz
parents:
42
diff
changeset
|
389 ++p; |
41 | 390 } |
43
1845c23dd056
- Matched some parts of the scanner of block comments to the scanner of nested comments.
aziz
parents:
42
diff
changeset
|
391 continue; |
41 | 392 default: |
393 c &= char.max; | |
43
1845c23dd056
- Matched some parts of the scanner of block comments to the scanner of nested comments.
aziz
parents:
42
diff
changeset
|
394 goto LswitchBC; |
41 | 395 } |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
396 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
397 assert(0); |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
398 case '/': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
399 while (1) |
7 | 400 { |
8 | 401 c = *++p; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
402 switch (c) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
403 { |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
404 case '\r': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
405 if (p[1] == '\n') |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
406 ++p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
407 case '\n': |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
408 case 0, _Z_: |
11 | 409 break; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
410 case LS[0]: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
411 if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
412 break; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
413 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
414 default: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
415 continue; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
416 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
417 t.type = TOK.Comment; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
418 t.end = p; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
419 return; |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
420 } |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
421 default: |
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
422 t.type = TOK.Div; |
10 | 423 t.end = p; |
424 return; | |
425 } | |
5 | 426 } |
427 | |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
428 switch (c) |
20 | 429 { |
48 | 430 case '\'': |
431 return scanCharacterLiteral(t); | |
432 case '`': | |
433 return scanRawStringLiteral(t); | |
434 case '"': | |
435 return scanNormalStringLiteral(t); | |
436 case '\\': | |
437 char[] buffer; | |
438 do | |
439 { | |
440 ++p; | |
441 c = scanEscapeSequence(); | |
50 | 442 if (c == 0xFFFF) |
443 break; | |
48 | 444 if (c < 128) |
445 buffer ~= c; | |
446 else | |
447 encodeUTF8(buffer, c); | |
448 } while (*p == '\\') | |
449 buffer ~= 0; | |
450 t.type = TOK.String; | |
451 t.str = buffer; | |
452 t.end = p; | |
453 return; | |
38
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
454 case '>': /* > >= >> >>= >>> >>>= */ |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
455 c = *++p; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
456 switch (c) |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
457 { |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
458 case '=': |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
459 t.type = TOK.GreaterEqual; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
460 goto Lcommon; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
461 case '>': |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
462 if (p[1] == '>') |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
463 { |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
464 ++p; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
465 if (p[1] == '=') |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
466 { ++p; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
467 t.type = TOK.URShiftAssign; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
468 } |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
469 else |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
470 t.type = TOK.URShift; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
471 } |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
472 else if (p[1] == '=') |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
473 { |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
474 ++p; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
475 t.type = TOK.RShiftAssign; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
476 } |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
477 else |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
478 t.type = TOK.RShift; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
479 goto Lcommon; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
480 default: |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
481 t.type = TOK.Greater; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
482 goto Lcommon2; |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
483 } |
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
484 assert(0); |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
485 case '<': /* < <= <> <>= << <<= */ |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
486 c = *++p; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
487 switch (c) |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
488 { |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
489 case '=': |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
490 t.type = TOK.LessEqual; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
491 goto Lcommon; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
492 case '<': |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
493 if (p[1] == '=') { |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
494 ++p; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
495 t.type = TOK.LShiftAssign; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
496 } |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
497 else |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
498 t.type = TOK.LShift; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
499 goto Lcommon; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
500 case '>': |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
501 if (p[1] == '=') { |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
502 ++p; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
503 t.type = TOK.LorEorG; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
504 } |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
505 else |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
506 t.type = TOK.LorG; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
507 goto Lcommon; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
508 default: |
38
640c45aaaaee
- Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents:
37
diff
changeset
|
509 t.type = TOK.Less; |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
510 goto Lcommon2; |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
511 } |
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
512 assert(0); |
37
7f3bcb97d017
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
36
diff
changeset
|
513 case '!': /* ! !< !> !<= !>= !<> !<>= */ |
35
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
514 c = *++p; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
515 switch (c) |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
516 { |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
517 case '<': |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
518 c = *++p; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
519 if (c == '>') |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
520 { |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
521 if (p[1] == '=') { |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
522 ++p; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
523 t.type = TOK.Unordered; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
524 } |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
525 else |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
526 t.type = TOK.UorE; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
527 } |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
528 else if (c == '=') |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
529 { |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
530 t.type = TOK.UorG; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
531 } |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
532 else { |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
533 t.type = TOK.UorGorE; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
534 goto Lcommon2; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
535 } |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
536 goto Lcommon; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
537 case '>': |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
538 if (p[1] == '=') |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
539 { |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
540 ++p; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
541 t.type = TOK.UorL; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
542 } |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
543 else |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
544 t.type = TOK.UorLorE; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
545 goto Lcommon; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
546 case '=': |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
547 t.type = TOK.NotEqual; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
548 goto Lcommon; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
549 default: |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
550 t.type = TOK.Not; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
551 goto Lcommon2; |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
552 } |
c470b9356e35
- Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents:
34
diff
changeset
|
553 assert(0); |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
554 case '.': /* . .. ... */ |
22
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
555 if (p[1] == '.') |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
556 { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
557 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
558 if (p[1] == '.') { |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
559 ++p; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
560 t.type = TOK.Ellipses; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
561 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
562 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
563 t.type = TOK.Slice; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
564 } |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
565 else |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
566 t.type = TOK.Dot; |
b05fff8e2ce4
- Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents:
21
diff
changeset
|
567 goto Lcommon; |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
568 case '|': /* | || |= */ |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
569 c = *++p; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
570 if (c == '=') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
571 t.type = TOK.OrAssign; |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
572 else if (c == '|') |
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
573 t.type = TOK.OrLogical; |
26 | 574 else { |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
575 t.type = TOK.OrBinary; |
26 | 576 goto Lcommon2; |
577 } | |
23
1a7903701a3d
- Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents:
22
diff
changeset
|
578 goto Lcommon; |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
579 case '&': /* & && &= */ |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
580 c = *++p; |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
581 if (c == '=') |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
582 t.type = TOK.AndAssign; |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
583 else if (c == '&') |
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
584 t.type = TOK.AndLogical; |
26 | 585 else { |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
586 t.type = TOK.AndBinary; |
26 | 587 goto Lcommon2; |
588 } | |
24
903f91163f23
- Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents:
23
diff
changeset
|
589 goto Lcommon; |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
590 case '+': /* + ++ += */ |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
591 c = *++p; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
592 if (c == '=') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
593 t.type = TOK.PlusAssign; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
594 else if (c == '+') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
595 t.type = TOK.PlusPlus; |
26 | 596 else { |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
597 t.type = TOK.Plus; |
26 | 598 goto Lcommon2; |
599 } | |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
600 goto Lcommon; |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
601 case '-': /* - -- -= */ |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
602 c = *++p; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
603 if (c == '=') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
604 t.type = TOK.MinusAssign; |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
605 else if (c == '-') |
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
606 t.type = TOK.MinusMinus; |
26 | 607 else { |
608 t.type = TOK.Minus; | |
609 goto Lcommon2; | |
610 } | |
611 goto Lcommon; | |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
612 case '=': /* = == */ |
26 | 613 if (p[1] == '=') { |
614 ++p; | |
615 t.type = TOK.Equal; | |
616 } | |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
617 else |
26 | 618 t.type = TOK.Assign; |
25
9c866208b3f6
- Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents:
24
diff
changeset
|
619 goto Lcommon; |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
620 case '~': /* ~ ~= */ |
27 | 621 if (p[1] == '=') { |
622 ++p; | |
623 t.type = TOK.CatAssign; | |
624 } | |
625 else | |
626 t.type = TOK.Tilde; | |
627 goto Lcommon; | |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
628 case '*': /* * *= */ |
29
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
629 if (p[1] == '=') { |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
630 ++p; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
631 t.type = TOK.MulAssign; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
632 } |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
633 else |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
634 t.type = TOK.Mul; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
635 goto Lcommon; |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
636 case '^': /* ^ ^= */ |
29
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
637 if (p[1] == '=') { |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
638 ++p; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
639 t.type = TOK.XorAssign; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
640 } |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
641 else |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
642 t.type = TOK.Xor; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
643 goto Lcommon; |
36
3c7210a722f7
- Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents:
35
diff
changeset
|
644 case '%': /* % %= */ |
29
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
645 if (p[1] == '=') { |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
646 ++p; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
647 t.type = TOK.ModAssign; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
648 } |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
649 else |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
650 t.type = TOK.Mod; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
651 goto Lcommon; |
ef83eea26bbd
- Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents:
28
diff
changeset
|
652 // Single character tokens: |
20 | 653 case '(': |
654 t.type = TOK.LParen; | |
655 goto Lcommon; | |
656 case ')': | |
657 t.type = TOK.RParen; | |
658 goto Lcommon; | |
659 case '[': | |
660 t.type = TOK.LBracket; | |
661 goto Lcommon; | |
662 case ']': | |
663 t.type = TOK.RBracket; | |
664 goto Lcommon; | |
665 case '{': | |
666 t.type = TOK.LBrace; | |
667 goto Lcommon; | |
668 case '}': | |
669 t.type = TOK.RBrace; | |
21
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
670 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
671 case ':': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
672 t.type = TOK.Colon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
673 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
674 case ';': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
675 t.type = TOK.Semicolon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
676 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
677 case '?': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
678 t.type = TOK.Question; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
679 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
680 case ',': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
681 t.type = TOK.Comma; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
682 goto Lcommon; |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
683 case '$': |
c785c122e4e6
- Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents:
20
diff
changeset
|
684 t.type = TOK.Dollar; |
20 | 685 Lcommon: |
686 ++p; | |
26 | 687 Lcommon2: |
20 | 688 t.end = p; |
689 return; | |
30 | 690 case '#': |
691 scanSpecialToken(); | |
51 | 692 c = *p; |
693 continue; | |
20 | 694 default: |
695 } | |
696 | |
49 | 697 if (c & 128 && isUniAlpha(decodeUTF8())) |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
698 goto Lidentifier; |
5 | 699 c = *++p; |
4 | 700 } |
3 | 701 } |
4 | 702 |
30 | 703 void peek(ref Token t) |
704 { | |
705 char* tmp = p; | |
51 | 706 uint len = errors.length; |
30 | 707 scan(t); |
708 p = tmp; | |
51 | 709 if (errors.length != len) |
710 errors = errors[0..len]; | |
30 | 711 } |
712 | |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
713 void scanNormalStringLiteral(ref Token t) |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
714 { |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
715 assert(*p == '"'); |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
716 ++p; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
717 char[] buffer; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
718 t.type = TOK.String; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
719 while (1) |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
720 { |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
721 switch (*p) |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
722 { |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
723 case '"': |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
724 ++p; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
725 Lreturn: |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
726 buffer ~= 0; |
49 | 727 t.str = buffer; |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
728 t.pf = scanPostfix(); |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
729 t.end = p; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
730 return; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
731 case '\\': |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
732 ++p; |
49 | 733 dchar d = scanEscapeSequence(); |
50 | 734 if (d == 0xFFFF) |
735 continue; | |
49 | 736 if (d < 128) |
737 buffer ~= d; | |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
738 else |
49 | 739 encodeUTF8(buffer, d); |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
740 continue; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
741 case '\r': |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
742 if (p[1] == '\n') |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
743 ++p; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
744 case '\n': |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
745 ++p; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
746 ++loc; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
747 buffer ~= '\n'; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
748 continue; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
749 case 0, _Z_: |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
750 error(MID.UnterminatedString); |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
751 goto Lreturn; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
752 default: |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
753 if (*p & 128) |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
754 { |
49 | 755 char* begin = p; |
756 dchar d = decodeUTF8(); | |
757 if (d == LSd || d == PSd) | |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
758 goto case '\n'; |
49 | 759 |
760 if (d != 0xFFFF) | |
761 { | |
762 ++p; | |
763 buffer ~= begin[0 .. p - begin]; | |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
764 } |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
765 continue; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
766 } |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
767 buffer ~= *p++; |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
768 } |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
769 } |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
770 } |
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
771 |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
772 void scanCharacterLiteral(ref Token t) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
773 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
774 assert(*p == '\''); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
775 MID id = MID.UnterminatedCharacterLiteral; |
46 | 776 ++p; |
777 switch (*p) | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
778 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
779 case '\\': |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
780 ++p; |
46 | 781 t.dchar_ = scanEscapeSequence(); |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
782 break; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
783 case '\'': |
46 | 784 ++p; |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
785 id = MID.EmptyCharacterLiteral; |
46 | 786 case '\n', '\r', 0, _Z_: |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
787 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
788 default: |
46 | 789 uint c = *p; |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
790 if (c & 128) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
791 { |
49 | 792 c = decodeUTF8(); |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
793 if (c == LSd || c == PSd) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
794 goto Lerr; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
795 } |
46 | 796 t.dchar_ = c; |
797 ++p; | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
798 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
799 |
46 | 800 if (*p == '\'') |
801 ++p; | |
802 else | |
803 Lerr: | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
804 error(id); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
805 t.type = TOK.Character; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
806 t.end = p; |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
807 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
808 |
33 | 809 char scanPostfix() |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
810 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
811 switch (*p) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
812 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
813 case 'c': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
814 case 'w': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
815 case 'd': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
816 return *p++; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
817 default: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
818 return 0; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
819 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
820 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
821 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
822 void scanRawStringLiteral(ref Token t) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
823 { |
33 | 824 uint delim = *p; |
825 assert(delim == '`' || delim == '"' && p[-1] == 'r'); | |
826 t.type = TOK.String; | |
827 char[] buffer; | |
828 uint c; | |
829 while (1) | |
830 { | |
831 c = *++p; | |
832 switch (c) | |
833 { | |
834 case '\r': | |
835 if (p[1] == '\n') | |
836 ++p; | |
837 c = '\n'; // Convert '\r' and '\r\n' to '\n' | |
838 case '\n': | |
839 ++loc; | |
840 continue; | |
841 case '`': | |
842 case '"': | |
843 if (c == delim) | |
844 { | |
845 ++p; | |
846 t.pf = scanPostfix(); | |
847 Lreturn: | |
848 t.str = buffer ~ '\0'; | |
849 t.end = p; | |
850 return; | |
851 } | |
852 break; | |
853 case LS[0]: | |
854 if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) | |
855 { | |
856 // TODO: convert LS or PS to \n? | |
857 buffer ~= p[0..3]; | |
858 p += 2; | |
859 ++loc; | |
860 continue; | |
861 } | |
862 break; | |
863 case 0, _Z_: | |
864 if (delim == 'r') | |
865 error(MID.UnterminatedRawString); | |
866 else | |
867 error(MID.UnterminatedBackQuoteString); | |
868 goto Lreturn; | |
869 default: | |
870 } | |
871 buffer ~= c; // copy character to buffer | |
872 } | |
873 assert(0); | |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
874 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
875 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
876 void scanHexStringLiteral(ref Token t) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
877 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
878 assert(p[0] == 'x' && p[1] == '"'); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
879 p+=2; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
880 t.type = TOK.String; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
881 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
882 uint c; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
883 ubyte[] buffer; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
884 ubyte h; // hex number |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
885 uint n; // number of hex digits |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
886 MID mid; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
887 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
888 while (1) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
889 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
890 c = *p++; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
891 switch (c) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
892 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
893 case '"': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
894 if (n & 1) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
895 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
896 mid = MID.OddNumberOfDigitsInHexString; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
897 error(mid); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
898 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
899 t.str = cast(string) buffer; |
33 | 900 t.pf = scanPostfix(); |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
901 t.end = p; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
902 return; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
903 case '\r': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
904 if (*p == '\n') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
905 ++p; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
906 case '\n': |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
907 ++loc; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
908 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
909 case LS[0]: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
910 if (*p == LS[1] && (p[1] == LS[2] || p[1] == PS[2])) { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
911 p += 2; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
912 ++loc; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
913 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
914 continue; |
32
d7011daa4740
- Added missing commas after the items in the messages table.
aziz
parents:
31
diff
changeset
|
915 case 0, _Z_: |
31
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
916 mid = MID.UnterminatedHexString; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
917 goto Lerr; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
918 default: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
919 if (ishexad(c)) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
920 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
921 if (c <= '9') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
922 c -= '0'; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
923 else if (c <= 'F') |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
924 c -= 'A' - 10; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
925 else |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
926 c -= 'a' - 10; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
927 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
928 if (n & 1) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
929 { |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
930 h <<= 4; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
931 h |= c; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
932 buffer ~= h; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
933 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
934 else |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
935 h = c; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
936 ++n; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
937 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
938 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
939 else if (isspace(c)) |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
940 continue; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
941 mid = MID.NonHexCharInHexString; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
942 goto Lerr; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
943 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
944 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
945 |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
946 return; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
947 Lerr: |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
948 error(mid); |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
949 t.pf = 0; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
950 t.end = p; |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
951 } |
94f09f4e988e
- Added struct for strings to Token with 'pf' = postfix.
aziz
parents:
30
diff
changeset
|
952 |
45 | 953 dchar scanEscapeSequence() |
954 { | |
955 uint c = char2ev(*p); | |
46 | 956 if (c) { |
957 ++p; | |
45 | 958 return c; |
46 | 959 } |
50 | 960 c = 0xFFFF; |
45 | 961 uint digits = 2; |
962 | |
963 switch (*p) | |
964 { | |
965 case 'x': | |
966 c = 0; | |
967 while (1) | |
968 { | |
969 ++p; | |
970 if (ishexad(*p)) | |
971 { | |
972 c *= 16; | |
973 if (*p <= '9') | |
48 | 974 c += *p - '0'; |
45 | 975 else if (*p <= 'F') |
48 | 976 c += *p - 'A' + 10; |
45 | 977 else |
48 | 978 c += *p - 'a' + 10; |
979 if (!--digits) { | |
980 ++p; | |
45 | 981 break; |
48 | 982 } |
45 | 983 } |
984 else | |
985 { | |
986 error(MID.InsufficientHexDigits); | |
50 | 987 c = 0xFFFF; |
45 | 988 break; |
989 } | |
990 } | |
991 break; | |
992 case 'u': | |
993 digits = 4; | |
994 goto case 'x'; | |
995 case 'U': | |
996 digits = 8; | |
997 goto case 'x'; | |
998 default: | |
48 | 999 if (isoctal(*p)) |
45 | 1000 { |
48 | 1001 c = 0; |
1002 c += *p - '0'; | |
1003 ++p; | |
1004 if (!isoctal(*p)) | |
1005 return c; | |
1006 c *= 8; | |
1007 c += *p - '0'; | |
1008 ++p; | |
1009 if (!isoctal(*p)) | |
1010 return c; | |
1011 c *= 8; | |
1012 c += *p - '0'; | |
1013 ++p; | |
1014 } | |
1015 else if(*p == '&') | |
1016 { | |
1017 if (isalpha(*++p)) | |
45 | 1018 { |
48 | 1019 while (1) |
1020 { | |
1021 if (isalnum(*++p)) | |
1022 continue; | |
1023 if (*p == ';') { | |
1024 // TODO: convert entity to unicode codepoint. | |
1025 ++p; | |
1026 break; | |
1027 } | |
1028 else { | |
1029 error(MID.UnterminatedHTMLEntity); | |
1030 break; | |
1031 } | |
45 | 1032 } |
1033 } | |
48 | 1034 else |
1035 error(MID.InvalidBeginHTMLEntity); | |
45 | 1036 } |
1037 else | |
48 | 1038 error(MID.UndefinedEscapeSequence); |
45 | 1039 } |
1040 | |
1041 return c; | |
1042 } | |
1043 | |
15 | 1044 void scanNumber(ref Token t) |
1045 { | |
1046 while (isdigit(*++p)) {} | |
1047 t.type = TOK.Number; | |
1048 t.end = p; | |
30 | 1049 t._uint = toInt(t.span); |
1050 } | |
1051 | |
1052 /// Scan special token: #line Integer [Filespec] EndOfLine | |
51 | 1053 // TODO: Handle case like: #line 0 #line 2 |
30 | 1054 void scanSpecialToken() |
1055 { | |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
1056 assert(*p == '#'); |
51 | 1057 |
47
8aa37a78937b
- Properly implemented scanner for normal string literals.
aziz
parents:
46
diff
changeset
|
1058 ++p; |
30 | 1059 MID mid; |
1060 Token t; | |
51 | 1061 uint oldloc = this.loc, newloc; |
30 | 1062 |
51 | 1063 peek(t); |
1064 if (!(this.loc == oldloc && p == t.start && t.type == TOK.Identifier && t.span == "line")) | |
1065 { | |
1066 this.loc = oldloc; // reset this.loc because we took a peek at the next token | |
1067 mid = MID.ExpectedIdentifierSTLine; | |
1068 goto Lerr; | |
1069 } | |
1070 p = t.end; // consume token | |
1071 | |
1072 peek(t); | |
1073 if (this.loc == oldloc && t.type == TOK.Number) | |
1074 { | |
1075 newloc = t._uint - 1; | |
1076 p = t.end; | |
1077 } | |
1078 else | |
1079 { | |
1080 this.loc = oldloc; | |
1081 mid = MID.ExpectedNumberAfterSTLine; | |
30 | 1082 goto Lerr; |
1083 } | |
1084 | |
51 | 1085 peek(t); |
1086 if (this.loc != oldloc) | |
30 | 1087 { |
51 | 1088 this.loc = oldloc; |
1089 mid = MID.NewlineInSpecialToken; | |
1090 goto Lerr; | |
30 | 1091 } |
1092 if (t.type == TOK.String) | |
1093 { | |
51 | 1094 if (*t.start != '"') |
1095 { | |
1096 mid = MID.ExpectedNormalStringLiteral; | |
30 | 1097 goto Lerr; |
51 | 1098 } |
30 | 1099 fileName = t.span[1..$-1]; // contents of "..." |
1100 p = t.end; | |
1101 } | |
1102 else if (t.type == TOK.Identifier && t.span == "__FILE__") | |
1103 { | |
1104 p = t.end; | |
1105 } | |
51 | 1106 /+ |
1107 peek(t); | |
1108 if (this.loc == oldloc && t.type != TOK.EOF) | |
1109 { | |
1110 mid = MID.UnterminatedSpecialToken; | |
1111 goto Lerr; | |
1112 } | |
1113 +/ | |
30 | 1114 while (1) |
1115 { | |
51 | 1116 switch (*p) |
1117 { | |
1118 case '\r': | |
1119 if (p[1] == '\n') | |
1120 ++p; | |
1121 case '\n': | |
1122 ++p; | |
30 | 1123 break; |
51 | 1124 case LS[0]: |
1125 if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) | |
1126 { | |
1127 p += 2; | |
1128 break; | |
1129 } | |
1130 case 0, _Z_: | |
1131 break; | |
1132 default: | |
1133 if (isspace(*p)) { | |
1134 ++p; | |
1135 continue; | |
1136 } | |
30 | 1137 mid = MID.UnterminatedSpecialToken; |
1138 goto Lerr; | |
1139 } | |
51 | 1140 break; |
30 | 1141 } |
1142 | |
51 | 1143 this.loc = newloc; |
30 | 1144 return; |
1145 Lerr: | |
1146 error(mid); | |
15 | 1147 } |
1148 | |
49 | 1149 uint decodeUTF8() |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
1150 { |
49 | 1151 assert(*p & 128, "check for ASCII char before calling decodeUTF8()."); |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
1152 size_t idx; |
49 | 1153 uint d = 0xFFFF; |
1154 try | |
1155 { | |
1156 d = std.utf.decode(p[0 .. end-p], idx); | |
1157 p += idx -1; | |
1158 } | |
1159 catch (UtfException e) | |
1160 { | |
1161 error(MID.InvalidUTF8Sequence); | |
1162 // Skip to next valid utf-8 sequence | |
1163 while (UTF8stride[*++p] != 0xFF) {} | |
1164 } | |
12
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
1165 return d; |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
1166 } |
0989206cf73c
- Added code to decode Unicode characters in identifiers.
aziz
parents:
11
diff
changeset
|
1167 |
28 | 1168 void loadKeywords() |
1169 { | |
1170 foreach(k; keywords) | |
1171 idtable[k.str] = k; | |
1172 } | |
1173 | |
18
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
1174 void error(MID id) |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
1175 { |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
1176 errors ~= new Problem(Problem.Type.Lexer, id, loc); |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
1177 } |
c48d2125f1e2
- Moved code for scanning character literals to separate function.
aziz
parents:
17
diff
changeset
|
1178 |
3 | 1179 public TOK nextToken() |
1180 { | |
4 | 1181 scan(this.token); |
1182 return this.token.type; | |
1183 } | |
1184 | |
1185 Token[] getTokens() | |
1186 { | |
1187 Token[] tokens; | |
1188 while (nextToken() != TOK.EOF) | |
1189 tokens ~= this.token; | |
1190 tokens ~= this.token; | |
1191 return tokens; | |
3 | 1192 } |
48 | 1193 |
1194 private void encodeUTF8(inout char[] str, dchar d) | |
1195 { | |
1196 char[6] b; | |
1197 assert(d > 0x7F, "check for ASCII char before calling encodeUTF8()."); | |
1198 if (d < 0x800) | |
1199 { | |
1200 b[0] = 0xC0 | (d >> 6); | |
1201 b[1] = 0x80 | (d & 0x3F); | |
1202 str ~= b[0..2]; | |
1203 } | |
1204 else if (d < 0x10000) | |
1205 { | |
1206 b[0] = 0xE0 | (d >> 12); | |
1207 b[1] = 0x80 | ((d >> 6) & 0x3F); | |
1208 b[2] = 0x80 | (d & 0x3F); | |
1209 str ~= b[0..3]; | |
1210 } | |
1211 else if (d < 0x200000) | |
1212 { | |
1213 b[0] = 0xF0 | (d >> 18); | |
1214 b[1] = 0x80 | ((d >> 12) & 0x3F); | |
1215 b[2] = 0x80 | ((d >> 6) & 0x3F); | |
1216 b[3] = 0x80 | (d & 0x3F); | |
1217 str ~= b[0..4]; | |
1218 } | |
1219 else if (d < 0x4000000) | |
1220 { | |
1221 b[0] = 0xF8 | (d >> 24); | |
1222 b[1] = 0x80 | ((d >> 18) & 0x3F); | |
1223 b[2] = 0x80 | ((d >> 12) & 0x3F); | |
1224 b[3] = 0x80 | ((d >> 6) & 0x3F); | |
1225 b[4] = 0x80 | (d & 0x3F); | |
1226 str ~= b[0..5]; | |
1227 } | |
1228 else if (d < 0x80000000) | |
1229 { | |
1230 b[0] = 0xFC | (d >> 30); | |
1231 b[1] = 0x80 | ((d >> 24) & 0x3F); | |
1232 b[2] = 0x80 | ((d >> 18) & 0x3F); | |
1233 b[3] = 0x80 | ((d >> 12) & 0x3F); | |
1234 b[4] = 0x80 | ((d >> 6) & 0x3F); | |
1235 b[5] = 0x80 | (d & 0x3F); | |
1236 str ~= b[0..6]; | |
1237 } | |
1238 else | |
1239 error(MID.InvalidUnicodeCharacter); | |
1240 } | |
5 | 1241 } |
39
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1242 |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1243 unittest |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1244 { |
40 | 1245 string[] toks = [ |
1246 ">", ">=", ">>", ">>=", ">>>", ">>>=", "<", "<=", "<>", | |
1247 "<>=", "<<", "<<=", "!", "!<", "!>", "!<=", "!>=", "!<>", | |
1248 "!<>=", ".", "..", "...", "&", "&&", "&=", "+", "++", | |
1249 "+=", "-", "--", "-=", "=", "==", "~", "~=", "*", | |
1250 "*=", "/", "/=", "^", "^=", "%", "%=", "(", ")", | |
1251 "[", "]", "{", "}", ":", ";", "?", ",", "$" | |
1252 ]; | |
39
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1253 |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1254 char[] src; |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1255 |
40 | 1256 foreach (op; toks) |
39
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1257 src ~= op ~ " "; |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1258 |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1259 auto lx = new Lexer(src, ""); |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1260 auto tokens = lx.getTokens(); |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1261 |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1262 tokens = tokens[0..$-1]; // exclude TOK.EOF |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1263 |
40 | 1264 assert(tokens.length == toks.length ); |
39
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1265 |
69b940398d7b
- Added unittest to test correct parsing of operator tokens.
aziz
parents:
38
diff
changeset
|
1266 foreach (i, t; tokens) |
40 | 1267 assert(t.span == toks[i], std.string.format("Lexed '%s' but expected '%s'", t.span, toks[i])); |
41 | 1268 } |