comparison trunk/src/dil/Converter.d @ 764:4579e8505d5e

Fixed unittests and removed dil.File. Fixed Converter.UTF16toUTF8(). Fixed an encode() function in dil.Unicode.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Sat, 16 Feb 2008 03:28:39 +0100
parents 90668b83ae5e
children 3b34f6a95a27
comparison
equal deleted inserted replaced
763:f26f13b5a3a3 764:4579e8505d5e
120 wchar[] text = cast(wchar[]) data[0 .. $-($%2)]; // Trim to multiple of two. 120 wchar[] text = cast(wchar[]) data[0 .. $-($%2)]; // Trim to multiple of two.
121 wchar* p = text.ptr, 121 wchar* p = text.ptr,
122 end = text.ptr + text.length; 122 end = text.ptr + text.length;
123 char[] result; 123 char[] result;
124 uint lineNum = 1; 124 uint lineNum = 1;
125 dchar c = *p; 125
126 126 for (; p < end; p++)
127 do
128 { 127 {
128 dchar c = *p;
129 static if (isBigEndian) 129 static if (isBigEndian)
130 c = BEtoMachineWord(c); 130 c = BEtoMachineWord(c);
131 else 131 else
132 c = LEtoMachineWord(c); 132 c = LEtoMachineWord(c);
133 133
134 if (c < 0xD800 || 0xDFFF > c) 134 if (0xD800 > c || c > 0xDFFF)
135 {} 135 {}
136 else if (c <= 0xDBFF && p+1 < end) 136 else if (c <= 0xDBFF && p+1 < end)
137 { 137 { // Decode surrogate pairs.
138 wchar c2 = p[1]; 138 wchar c2 = p[1];
139 static if (isBigEndian) 139 static if (isBigEndian)
140 c2 = BEtoMachineWord(c2); 140 c2 = BEtoMachineWord(c2);
141 else 141 else
142 c2 = LEtoMachineWord(c2); 142 c2 = LEtoMachineWord(c2);
157 c = REPLACEMENT_CHAR; 157 c = REPLACEMENT_CHAR;
158 } 158 }
159 159
160 if (isNewline(c)) 160 if (isNewline(c))
161 ++lineNum; 161 ++lineNum;
162 ++p;
163 dil.Unicode.encode(result, c); 162 dil.Unicode.encode(result, c);
164 } while (p < end) 163 }
165 164
166 if (data.length % 2) 165 if (data.length % 2)
167 infoMan ~= new LexerError( 166 infoMan ~= new LexerError(
168 new Location(filePath, lineNum), 167 new Location(filePath, lineNum),
169 MSG.UTF16FileMustBeDivisibleBy2 168 MSG.UTF16FileMustBeDivisibleBy2
170 ); 169 );
171
172 return result; 170 return result;
173 } 171 }
174 172
175 alias UTF16toUTF8!(true) UTF16BEtoUTF8; 173 alias UTF16toUTF8!(true) UTF16BEtoUTF8;
176 alias UTF16toUTF8!(false) UTF16LEtoUTF8; 174 alias UTF16toUTF8!(false) UTF16LEtoUTF8;
293 assert(p == end); 291 assert(p == end);
294 text.length = text.length - (p - q); 292 text.length = text.length - (p - q);
295 //text = text.ptr[0 .. q - text.ptr]; // Another way. 293 //text = text.ptr[0 .. q - text.ptr]; // Another way.
296 return text; 294 return text;
297 } 295 }
296
297 unittest
298 {
299 Stdout("Testing function Converter.\n");
300 struct Data2Text
301 {
302 char[] text;
303 char[] expected = "source";
304 ubyte[] data()
305 { return cast(ubyte[])text; }
306 }
307 const Data2Text[] map = [
308 // Without BOM
309 {"source"},
310 {"s\0o\0u\0r\0c\0e\0"},
311 {"\0s\0o\0u\0r\0c\0e"},
312 {"s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e\0\0\0"},
313 {"\0\0\0s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e"},
314 // With BOM
315 {"\xEF\xBB\xBFsource"},
316 {"\xFE\xFF\0s\0o\0u\0r\0c\0e"},
317 {"\xFF\xFEs\0o\0u\0r\0c\0e\0"},
318 {"\x00\x00\xFE\xFF\0\0\0s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e"},
319 {"\xFF\xFE\x00\x00s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e\0\0\0"},
320 ];
321 auto converter = Converter("", new InfoManager);
322 foreach (i, pair; map)
323 assert(converter.data2UTF8(pair.data) == pair.expected, Format("failed at item {}", i));
324 }