Mercurial > projects > dil
comparison trunk/src/dil/Lexer.d @ 386:392a0068fc61
Refactored code related to scanning escape sequences.
The backslash character is not skipped anymore before calling
scanEscapeSequence(). Added an assert() to check for this.
Added code that will pass the string of an undefined escape sequence to error().
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Tue, 11 Sep 2007 23:09:27 +0200 |
parents | f7ce725e79c3 |
children | ad0cbd1c8881 |
comparison
equal
deleted
inserted
replaced
385:c45233dc63db | 386:392a0068fc61 |
---|---|
388 return scanNormalStringLiteral(t); | 388 return scanNormalStringLiteral(t); |
389 case '\\': | 389 case '\\': |
390 char[] buffer; | 390 char[] buffer; |
391 do | 391 do |
392 { | 392 { |
393 ++p; | |
394 c = scanEscapeSequence(); | 393 c = scanEscapeSequence(); |
395 if (c < 128) | 394 if (c < 128) |
396 buffer ~= c; | 395 buffer ~= c; |
397 else | 396 else |
398 encodeUTF8(buffer, c); | 397 encodeUTF8(buffer, c); |
671 t.str = buffer; | 670 t.str = buffer; |
672 t.pf = scanPostfix(); | 671 t.pf = scanPostfix(); |
673 t.end = p; | 672 t.end = p; |
674 return; | 673 return; |
675 case '\\': | 674 case '\\': |
676 ++p; | |
677 c = scanEscapeSequence(); | 675 c = scanEscapeSequence(); |
678 --p; | 676 --p; |
679 if (c & 128) | 677 if (c & 128) |
680 encodeUTF8(buffer, c); | 678 encodeUTF8(buffer, c); |
681 else | 679 else |
720 ++p; | 718 ++p; |
721 TOK type = TOK.CharLiteral; | 719 TOK type = TOK.CharLiteral; |
722 switch (*p) | 720 switch (*p) |
723 { | 721 { |
724 case '\\': | 722 case '\\': |
725 ++p; | 723 switch (p[1]) |
726 switch (*p) | |
727 { | 724 { |
728 case 'u': | 725 case 'u': |
729 type = TOK.WCharLiteral; break; | 726 type = TOK.WCharLiteral; break; |
730 case 'U': | 727 case 'U': |
731 type = TOK.DCharLiteral; break; | 728 type = TOK.DCharLiteral; break; |
1158 } | 1155 } |
1159 } | 1156 } |
1160 | 1157 |
1161 dchar scanEscapeSequence() | 1158 dchar scanEscapeSequence() |
1162 { | 1159 { |
1160 assert(*p == '\\'); | |
1161 ++p; | |
1163 uint c = char2ev(*p); | 1162 uint c = char2ev(*p); |
1164 if (c) | 1163 if (c) |
1165 { | 1164 { |
1166 ++p; | 1165 ++p; |
1167 return c; | 1166 return c; |
1232 {} | 1231 {} |
1233 | 1232 |
1234 if (*p == ';') | 1233 if (*p == ';') |
1235 { | 1234 { |
1236 c = entity2Unicode(begin[0..p - begin]); | 1235 c = entity2Unicode(begin[0..p - begin]); |
1237 ++p; | 1236 ++p; // Skip ; |
1238 if (c == 0xFFFF) | 1237 if (c == 0xFFFF) |
1239 error(MID.UndefinedHTMLEntity, (begin-1)[0..p-(begin-1)]); | 1238 error(MID.UndefinedHTMLEntity, (begin-1)[0..p-(begin-1)]); |
1240 } | 1239 } |
1241 else | 1240 else |
1242 error(MID.UnterminatedHTMLEntity); | 1241 error(MID.UnterminatedHTMLEntity); |
1243 } | 1242 } |
1244 else | 1243 else |
1245 error(MID.InvalidBeginHTMLEntity); | 1244 error(MID.InvalidBeginHTMLEntity); |
1246 } | 1245 } |
1247 else | 1246 else |
1248 error(MID.UndefinedEscapeSequence); | 1247 { |
1248 // TODO: add parameter to localized strings | |
1249 dchar d = *p; | |
1250 char[] str = `\`; | |
1251 if (d & 128) | |
1252 { | |
1253 d = decodeUTF8(); | |
1254 encodeUTF8(str, d); | |
1255 ++p; | |
1256 } | |
1257 else | |
1258 str ~= d; | |
1259 error(MID.UndefinedEscapeSequence/+, str+/); | |
1260 } | |
1249 } | 1261 } |
1250 | 1262 |
1251 return c; | 1263 return c; |
1252 } | 1264 } |
1253 | 1265 |