diff trunk/src/dil/parser/Parser.d @ 789:c1d5cfd7aa44

Implemented string literal conversion. Removed two MID messages. Added MSG.InvalidUTF8SequenceInString. Added toUTF16() and toUTF32(). Fixed escape sequences. Added formatBytes() and findInvalidUTF8Sequence().
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Mon, 25 Feb 2008 02:56:22 +0100
parents 846044180d22
children c24be8d4f6ab
line wrap: on
line diff
--- a/trunk/src/dil/parser/Parser.d	Sun Feb 24 03:19:02 2008 +0100
+++ b/trunk/src/dil/parser/Parser.d	Mon Feb 25 02:56:22 2008 +0100
@@ -17,12 +17,11 @@
 import dil.Enums;
 import dil.CompilerInfo;
 import dil.SourceText;
+import dil.Unicode;
 import common;
 
-/++
-  The Parser produces a full parse tree by examining
-  the list of tokens provided by the Lexer.
-+/
+/// The Parser produces a full parse tree by examining
+/// the list of tokens provided by the Lexer.
 class Parser
 {
   Lexer lexer; /// Used to lex the source code.
@@ -3160,20 +3159,29 @@
       nT();
       while (token.kind == T.String)
       {
-        if (postfix == '\0')
+        /+if (postfix == 0)
             postfix = token.pf;
-        else if (token.pf && token.pf != postfix)
+        else+/
+        if (token.pf && token.pf != postfix)
           error(token, MSG.StringPostfixMismatch);
-        str.length = str.length - 1;
+        str.length = str.length - 1; // Exclude '\0'.
         str ~= token.str;
         nT();
       }
       switch (postfix)
-      { // TODO: convert string
-      case 'w': e = new StringExpression(/+toUTF16+/(str)); break;
-      case 'd': e = new StringExpression(/+toUTF32+/(str)); break;
+      {
+      case 'w':
+        if (checkString(begin, str))
+          goto default;
+        e = new StringExpression(dil.Unicode.toUTF16(str)); break;
+      case 'd':
+        if (checkString(begin, str))
+          goto default;
+        e = new StringExpression(dil.Unicode.toUTF32(str)); break;
       case 'c':
-      default: e = new StringExpression(str); break;
+      default:
+        // No checking done to allow for binary data.
+        e = new StringExpression(str); break;
       }
       break;
     case T.LBracket:
@@ -4056,6 +4064,15 @@
     return idtok;
   }
 
+  /// Returns true if the string str has an invalid UTF-8 sequence.
+  bool checkString(Token* begin, string str)
+  {
+    auto utf8Seq = Lexer.findInvalidUTF8Sequence(str);
+    if (utf8Seq.length)
+      error(begin, MSG.InvalidUTF8SequenceInString, utf8Seq);
+    return utf8Seq.length != 0;
+  }
+
   /// Reports an error that has no message ID yet.
   void error(Token* token, char[] formatMsg, ...)
   {