comparison trunk/src/dil/Lexer.d @ 386:392a0068fc61

Refactored code related to scanning escape sequences. The backslash character is not skipped anymore before calling scanEscapeSequence(). Added an assert() to check for this. Added code that will pass the string of an undefined escape sequence to error().
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Tue, 11 Sep 2007 23:09:27 +0200
parents f7ce725e79c3
children ad0cbd1c8881
comparison
equal deleted inserted replaced
385:c45233dc63db 386:392a0068fc61
388 return scanNormalStringLiteral(t); 388 return scanNormalStringLiteral(t);
389 case '\\': 389 case '\\':
390 char[] buffer; 390 char[] buffer;
391 do 391 do
392 { 392 {
393 ++p;
394 c = scanEscapeSequence(); 393 c = scanEscapeSequence();
395 if (c < 128) 394 if (c < 128)
396 buffer ~= c; 395 buffer ~= c;
397 else 396 else
398 encodeUTF8(buffer, c); 397 encodeUTF8(buffer, c);
671 t.str = buffer; 670 t.str = buffer;
672 t.pf = scanPostfix(); 671 t.pf = scanPostfix();
673 t.end = p; 672 t.end = p;
674 return; 673 return;
675 case '\\': 674 case '\\':
676 ++p;
677 c = scanEscapeSequence(); 675 c = scanEscapeSequence();
678 --p; 676 --p;
679 if (c & 128) 677 if (c & 128)
680 encodeUTF8(buffer, c); 678 encodeUTF8(buffer, c);
681 else 679 else
720 ++p; 718 ++p;
721 TOK type = TOK.CharLiteral; 719 TOK type = TOK.CharLiteral;
722 switch (*p) 720 switch (*p)
723 { 721 {
724 case '\\': 722 case '\\':
725 ++p; 723 switch (p[1])
726 switch (*p)
727 { 724 {
728 case 'u': 725 case 'u':
729 type = TOK.WCharLiteral; break; 726 type = TOK.WCharLiteral; break;
730 case 'U': 727 case 'U':
731 type = TOK.DCharLiteral; break; 728 type = TOK.DCharLiteral; break;
1158 } 1155 }
1159 } 1156 }
1160 1157
1161 dchar scanEscapeSequence() 1158 dchar scanEscapeSequence()
1162 { 1159 {
1160 assert(*p == '\\');
1161 ++p;
1163 uint c = char2ev(*p); 1162 uint c = char2ev(*p);
1164 if (c) 1163 if (c)
1165 { 1164 {
1166 ++p; 1165 ++p;
1167 return c; 1166 return c;
1232 {} 1231 {}
1233 1232
1234 if (*p == ';') 1233 if (*p == ';')
1235 { 1234 {
1236 c = entity2Unicode(begin[0..p - begin]); 1235 c = entity2Unicode(begin[0..p - begin]);
1237 ++p; 1236 ++p; // Skip ;
1238 if (c == 0xFFFF) 1237 if (c == 0xFFFF)
1239 error(MID.UndefinedHTMLEntity, (begin-1)[0..p-(begin-1)]); 1238 error(MID.UndefinedHTMLEntity, (begin-1)[0..p-(begin-1)]);
1240 } 1239 }
1241 else 1240 else
1242 error(MID.UnterminatedHTMLEntity); 1241 error(MID.UnterminatedHTMLEntity);
1243 } 1242 }
1244 else 1243 else
1245 error(MID.InvalidBeginHTMLEntity); 1244 error(MID.InvalidBeginHTMLEntity);
1246 } 1245 }
1247 else 1246 else
1248 error(MID.UndefinedEscapeSequence); 1247 {
1248 // TODO: add parameter to localized strings
1249 dchar d = *p;
1250 char[] str = `\`;
1251 if (d & 128)
1252 {
1253 d = decodeUTF8();
1254 encodeUTF8(str, d);
1255 ++p;
1256 }
1257 else
1258 str ~= d;
1259 error(MID.UndefinedEscapeSequence/+, str+/);
1260 }
1249 } 1261 }
1250 1262
1251 return c; 1263 return c;
1252 } 1264 }
1253 1265