comparison trunk/src/dil/parser/Parser.d @ 789:c1d5cfd7aa44

Implemented string literal conversion. Removed two MID messages. Added MSG.InvalidUTF8SequenceInString. Added toUTF16() and toUTF32(). Fixed escape sequences. Added formatBytes() and findInvalidUTF8Sequence().
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Mon, 25 Feb 2008 02:56:22 +0100
parents 846044180d22
children c24be8d4f6ab
comparison
equal deleted inserted replaced
788:139c9a6a39a8 789:c1d5cfd7aa44
15 import dil.Messages; 15 import dil.Messages;
16 import dil.Information; 16 import dil.Information;
17 import dil.Enums; 17 import dil.Enums;
18 import dil.CompilerInfo; 18 import dil.CompilerInfo;
19 import dil.SourceText; 19 import dil.SourceText;
20 import dil.Unicode;
20 import common; 21 import common;
21 22
22 /++ 23 /// The Parser produces a full parse tree by examining
23 The Parser produces a full parse tree by examining 24 /// the list of tokens provided by the Lexer.
24 the list of tokens provided by the Lexer.
25 +/
26 class Parser 25 class Parser
27 { 26 {
28 Lexer lexer; /// Used to lex the source code. 27 Lexer lexer; /// Used to lex the source code.
29 Token* token; /// Current non-whitespace token. 28 Token* token; /// Current non-whitespace token.
30 Token* prevToken; /// Previous non-whitespace token. 29 Token* prevToken; /// Previous non-whitespace token.
3158 char[] str = token.str; 3157 char[] str = token.str;
3159 char postfix = token.pf; 3158 char postfix = token.pf;
3160 nT(); 3159 nT();
3161 while (token.kind == T.String) 3160 while (token.kind == T.String)
3162 { 3161 {
3163 if (postfix == '\0') 3162 /+if (postfix == 0)
3164 postfix = token.pf; 3163 postfix = token.pf;
3165 else if (token.pf && token.pf != postfix) 3164 else+/
3165 if (token.pf && token.pf != postfix)
3166 error(token, MSG.StringPostfixMismatch); 3166 error(token, MSG.StringPostfixMismatch);
3167 str.length = str.length - 1; 3167 str.length = str.length - 1; // Exclude '\0'.
3168 str ~= token.str; 3168 str ~= token.str;
3169 nT(); 3169 nT();
3170 } 3170 }
3171 switch (postfix) 3171 switch (postfix)
3172 { // TODO: convert string 3172 {
3173 case 'w': e = new StringExpression(/+toUTF16+/(str)); break; 3173 case 'w':
3174 case 'd': e = new StringExpression(/+toUTF32+/(str)); break; 3174 if (checkString(begin, str))
3175 goto default;
3176 e = new StringExpression(dil.Unicode.toUTF16(str)); break;
3177 case 'd':
3178 if (checkString(begin, str))
3179 goto default;
3180 e = new StringExpression(dil.Unicode.toUTF32(str)); break;
3175 case 'c': 3181 case 'c':
3176 default: e = new StringExpression(str); break; 3182 default:
3183 // No checking done to allow for binary data.
3184 e = new StringExpression(str); break;
3177 } 3185 }
3178 break; 3186 break;
3179 case T.LBracket: 3187 case T.LBracket:
3180 Expression[] values; 3188 Expression[] values;
3181 3189
4054 else 4062 else
4055 error(token, errorMsg, token.srcText); 4063 error(token, errorMsg, token.srcText);
4056 return idtok; 4064 return idtok;
4057 } 4065 }
4058 4066
4067 /// Returns true if the string str has an invalid UTF-8 sequence.
4068 bool checkString(Token* begin, string str)
4069 {
4070 auto utf8Seq = Lexer.findInvalidUTF8Sequence(str);
4071 if (utf8Seq.length)
4072 error(begin, MSG.InvalidUTF8SequenceInString, utf8Seq);
4073 return utf8Seq.length != 0;
4074 }
4075
4059 /// Reports an error that has no message ID yet. 4076 /// Reports an error that has no message ID yet.
4060 void error(Token* token, char[] formatMsg, ...) 4077 void error(Token* token, char[] formatMsg, ...)
4061 { 4078 {
4062 error_(token, formatMsg, _arguments, _argptr); 4079 error_(token, formatMsg, _arguments, _argptr);
4063 } 4080 }