Mercurial > projects > dil
comparison trunk/src/dil/Converter.d @ 764:4579e8505d5e
Fixed unittests and removed dil.File.
Fixed Converter.UTF16toUTF8().
Fixed an encode() function in dil.Unicode.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sat, 16 Feb 2008 03:28:39 +0100 |
parents | 90668b83ae5e |
children | 3b34f6a95a27 |
comparison
equal
deleted
inserted
replaced
763:f26f13b5a3a3 | 764:4579e8505d5e |
---|---|
120 wchar[] text = cast(wchar[]) data[0 .. $-($%2)]; // Trim to multiple of two. | 120 wchar[] text = cast(wchar[]) data[0 .. $-($%2)]; // Trim to multiple of two. |
121 wchar* p = text.ptr, | 121 wchar* p = text.ptr, |
122 end = text.ptr + text.length; | 122 end = text.ptr + text.length; |
123 char[] result; | 123 char[] result; |
124 uint lineNum = 1; | 124 uint lineNum = 1; |
125 dchar c = *p; | 125 |
126 | 126 for (; p < end; p++) |
127 do | |
128 { | 127 { |
128 dchar c = *p; | |
129 static if (isBigEndian) | 129 static if (isBigEndian) |
130 c = BEtoMachineWord(c); | 130 c = BEtoMachineWord(c); |
131 else | 131 else |
132 c = LEtoMachineWord(c); | 132 c = LEtoMachineWord(c); |
133 | 133 |
134 if (c < 0xD800 || 0xDFFF > c) | 134 if (0xD800 > c || c > 0xDFFF) |
135 {} | 135 {} |
136 else if (c <= 0xDBFF && p+1 < end) | 136 else if (c <= 0xDBFF && p+1 < end) |
137 { | 137 { // Decode surrogate pairs. |
138 wchar c2 = p[1]; | 138 wchar c2 = p[1]; |
139 static if (isBigEndian) | 139 static if (isBigEndian) |
140 c2 = BEtoMachineWord(c2); | 140 c2 = BEtoMachineWord(c2); |
141 else | 141 else |
142 c2 = LEtoMachineWord(c2); | 142 c2 = LEtoMachineWord(c2); |
157 c = REPLACEMENT_CHAR; | 157 c = REPLACEMENT_CHAR; |
158 } | 158 } |
159 | 159 |
160 if (isNewline(c)) | 160 if (isNewline(c)) |
161 ++lineNum; | 161 ++lineNum; |
162 ++p; | |
163 dil.Unicode.encode(result, c); | 162 dil.Unicode.encode(result, c); |
164 } while (p < end) | 163 } |
165 | 164 |
166 if (data.length % 2) | 165 if (data.length % 2) |
167 infoMan ~= new LexerError( | 166 infoMan ~= new LexerError( |
168 new Location(filePath, lineNum), | 167 new Location(filePath, lineNum), |
169 MSG.UTF16FileMustBeDivisibleBy2 | 168 MSG.UTF16FileMustBeDivisibleBy2 |
170 ); | 169 ); |
171 | |
172 return result; | 170 return result; |
173 } | 171 } |
174 | 172 |
175 alias UTF16toUTF8!(true) UTF16BEtoUTF8; | 173 alias UTF16toUTF8!(true) UTF16BEtoUTF8; |
176 alias UTF16toUTF8!(false) UTF16LEtoUTF8; | 174 alias UTF16toUTF8!(false) UTF16LEtoUTF8; |
293 assert(p == end); | 291 assert(p == end); |
294 text.length = text.length - (p - q); | 292 text.length = text.length - (p - q); |
295 //text = text.ptr[0 .. q - text.ptr]; // Another way. | 293 //text = text.ptr[0 .. q - text.ptr]; // Another way. |
296 return text; | 294 return text; |
297 } | 295 } |
296 | |
297 unittest | |
298 { | |
299 Stdout("Testing function Converter.\n"); | |
300 struct Data2Text | |
301 { | |
302 char[] text; | |
303 char[] expected = "source"; | |
304 ubyte[] data() | |
305 { return cast(ubyte[])text; } | |
306 } | |
307 const Data2Text[] map = [ | |
308 // Without BOM | |
309 {"source"}, | |
310 {"s\0o\0u\0r\0c\0e\0"}, | |
311 {"\0s\0o\0u\0r\0c\0e"}, | |
312 {"s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e\0\0\0"}, | |
313 {"\0\0\0s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e"}, | |
314 // With BOM | |
315 {"\xEF\xBB\xBFsource"}, | |
316 {"\xFE\xFF\0s\0o\0u\0r\0c\0e"}, | |
317 {"\xFF\xFEs\0o\0u\0r\0c\0e\0"}, | |
318 {"\x00\x00\xFE\xFF\0\0\0s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e"}, | |
319 {"\xFF\xFE\x00\x00s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e\0\0\0"}, | |
320 ]; | |
321 auto converter = Converter("", new InfoManager); | |
322 foreach (i, pair; map) | |
323 assert(converter.data2UTF8(pair.data) == pair.expected, Format("failed at item {}", i)); | |
324 } |