Mercurial > projects > dil
comparison trunk/src/dil/parser/Parser.d @ 789:c1d5cfd7aa44
Implemented string literal conversion.
Removed two MID messages.
Added MSG.InvalidUTF8SequenceInString.
Added toUTF16() and toUTF32().
Fixed escape sequences.
Added formatBytes() and findInvalidUTF8Sequence().
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Mon, 25 Feb 2008 02:56:22 +0100 |
parents | 846044180d22 |
children | c24be8d4f6ab |
comparison
equal
deleted
inserted
replaced
788:139c9a6a39a8 | 789:c1d5cfd7aa44 |
---|---|
15 import dil.Messages; | 15 import dil.Messages; |
16 import dil.Information; | 16 import dil.Information; |
17 import dil.Enums; | 17 import dil.Enums; |
18 import dil.CompilerInfo; | 18 import dil.CompilerInfo; |
19 import dil.SourceText; | 19 import dil.SourceText; |
20 import dil.Unicode; | |
20 import common; | 21 import common; |
21 | 22 |
22 /++ | 23 /// The Parser produces a full parse tree by examining |
23 The Parser produces a full parse tree by examining | 24 /// the list of tokens provided by the Lexer. |
24 the list of tokens provided by the Lexer. | |
25 +/ | |
26 class Parser | 25 class Parser |
27 { | 26 { |
28 Lexer lexer; /// Used to lex the source code. | 27 Lexer lexer; /// Used to lex the source code. |
29 Token* token; /// Current non-whitespace token. | 28 Token* token; /// Current non-whitespace token. |
30 Token* prevToken; /// Previous non-whitespace token. | 29 Token* prevToken; /// Previous non-whitespace token. |
3158 char[] str = token.str; | 3157 char[] str = token.str; |
3159 char postfix = token.pf; | 3158 char postfix = token.pf; |
3160 nT(); | 3159 nT(); |
3161 while (token.kind == T.String) | 3160 while (token.kind == T.String) |
3162 { | 3161 { |
3163 if (postfix == '\0') | 3162 /+if (postfix == 0) |
3164 postfix = token.pf; | 3163 postfix = token.pf; |
3165 else if (token.pf && token.pf != postfix) | 3164 else+/ |
3165 if (token.pf && token.pf != postfix) | |
3166 error(token, MSG.StringPostfixMismatch); | 3166 error(token, MSG.StringPostfixMismatch); |
3167 str.length = str.length - 1; | 3167 str.length = str.length - 1; // Exclude '\0'. |
3168 str ~= token.str; | 3168 str ~= token.str; |
3169 nT(); | 3169 nT(); |
3170 } | 3170 } |
3171 switch (postfix) | 3171 switch (postfix) |
3172 { // TODO: convert string | 3172 { |
3173 case 'w': e = new StringExpression(/+toUTF16+/(str)); break; | 3173 case 'w': |
3174 case 'd': e = new StringExpression(/+toUTF32+/(str)); break; | 3174 if (checkString(begin, str)) |
3175 goto default; | |
3176 e = new StringExpression(dil.Unicode.toUTF16(str)); break; | |
3177 case 'd': | |
3178 if (checkString(begin, str)) | |
3179 goto default; | |
3180 e = new StringExpression(dil.Unicode.toUTF32(str)); break; | |
3175 case 'c': | 3181 case 'c': |
3176 default: e = new StringExpression(str); break; | 3182 default: |
3183 // No checking done to allow for binary data. | |
3184 e = new StringExpression(str); break; | |
3177 } | 3185 } |
3178 break; | 3186 break; |
3179 case T.LBracket: | 3187 case T.LBracket: |
3180 Expression[] values; | 3188 Expression[] values; |
3181 | 3189 |
4054 else | 4062 else |
4055 error(token, errorMsg, token.srcText); | 4063 error(token, errorMsg, token.srcText); |
4056 return idtok; | 4064 return idtok; |
4057 } | 4065 } |
4058 | 4066 |
4067 /// Returns true if the string str has an invalid UTF-8 sequence. | |
4068 bool checkString(Token* begin, string str) | |
4069 { | |
4070 auto utf8Seq = Lexer.findInvalidUTF8Sequence(str); | |
4071 if (utf8Seq.length) | |
4072 error(begin, MSG.InvalidUTF8SequenceInString, utf8Seq); | |
4073 return utf8Seq.length != 0; | |
4074 } | |
4075 | |
4059 /// Reports an error that has no message ID yet. | 4076 /// Reports an error that has no message ID yet. |
4060 void error(Token* token, char[] formatMsg, ...) | 4077 void error(Token* token, char[] formatMsg, ...) |
4061 { | 4078 { |
4062 error_(token, formatMsg, _arguments, _argptr); | 4079 error_(token, formatMsg, _arguments, _argptr); |
4063 } | 4080 } |