projects/dil: trunk/src/dil/parser/Parser.d comparison

comparison trunk/src/dil/parser/Parser.d @ 789:c1d5cfd7aa44

Implemented string literal conversion. Removed two MID messages. Added MSG.InvalidUTF8SequenceInString. Added toUTF16() and toUTF32(). Fixed escape sequences. Added formatBytes() and findInvalidUTF8Sequence().

author	Aziz K?ksal <aziz.koeksal@gmail.com>
date	Mon, 25 Feb 2008 02:56:22 +0100
parents	846044180d22
children	c24be8d4f6ab

comparison

equal deleted inserted replaced

-:139c9a6a39a8
+:c1d5cfd7aa44
 import dil.Messages;
 import dil.Information;
 import dil.Enums;
 import dil.CompilerInfo;
 import dil.SourceText;
+import dil.Unicode;
 import common;
-/++
+/// The Parser produces a full parse tree by examining
-The Parser produces a full parse tree by examining
+/// the list of tokens provided by the Lexer.
-the list of tokens provided by the Lexer.
-+/
 class Parser
 {
 Lexer lexer; /// Used to lex the source code.
 Token* token; /// Current non-whitespace token.
 Token* prevToken; /// Previous non-whitespace token.
 char[] str = token.str;
 char postfix = token.pf;
 nT();
 while (token.kind == T.String)
 {
-if (postfix == '\0')
+/+if (postfix == 0)
 postfix = token.pf;
-else if (token.pf && token.pf != postfix)
+else+/
+if (token.pf && token.pf != postfix)
 error(token, MSG.StringPostfixMismatch);
-str.length = str.length - 1;
+str.length = str.length - 1; // Exclude '\0'.
 str ~= token.str;
 nT();
 }
 switch (postfix)
-{ // TODO: convert string
+{
-case 'w': e = new StringExpression(/+toUTF16+/(str)); break;
+case 'w':
-case 'd': e = new StringExpression(/+toUTF32+/(str)); break;
+if (checkString(begin, str))
+goto default;
+e = new StringExpression(dil.Unicode.toUTF16(str)); break;
+case 'd':
+if (checkString(begin, str))
+goto default;
+e = new StringExpression(dil.Unicode.toUTF32(str)); break;
 case 'c':
-default: e = new StringExpression(str); break;
+default:
+// No checking done to allow for binary data.
+e = new StringExpression(str); break;
 }
 break;
 case T.LBracket:
 Expression[] values;
 else
 error(token, errorMsg, token.srcText);
 return idtok;
 }
+/// Returns true if the string str has an invalid UTF-8 sequence.
+bool checkString(Token* begin, string str)
+{
+auto utf8Seq = Lexer.findInvalidUTF8Sequence(str);
+if (utf8Seq.length)
+error(begin, MSG.InvalidUTF8SequenceInString, utf8Seq);
+return utf8Seq.length != 0;
+}
 /// Reports an error that has no message ID yet.
 void error(Token* token, char[] formatMsg, ...)
 {
 error_(token, formatMsg, _arguments, _argptr);
 }

Mercurial > projects > dil

comparison trunk/src/dil/parser/Parser.d @ 789:c1d5cfd7aa44