Mercurial > projects > dil
comparison trunk/src/dil/File.d @ 518:8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Moved code from dil.File to dil.FileBOM.
Added opCatAssign to class InformationManager.
Added encode() function to dil.Unicode.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sat, 15 Dec 2007 18:55:06 +0100 |
parents | 3aa00474b381 |
children | 50e64bab9c7a |
comparison
equal
deleted
inserted
replaced
517:b465c669d70c | 518:8f86bb9ef715 |
---|---|
1 /++ | 1 /++ |
2 Author: Aziz Köksal | 2 Author: Aziz Köksal |
3 License: GPL3 | 3 License: GPL3 |
4 +/ | 4 +/ |
5 module dil.File; | 5 module dil.File; |
6 | |
7 import dil.FileBOM; | |
8 import dil.Information; | |
9 import dil.Converter; | |
6 import tango.io.File; | 10 import tango.io.File; |
7 import std.utf; | 11 import std.utf; |
8 import common; | 12 import common; |
9 | 13 |
10 /// Loads a file in any valid Unicode format and converts it to UTF-8. | 14 /// Loads a file in any valid Unicode format and converts it to UTF-8. |
11 char[] loadFile(char[] filePath) | 15 char[] loadFile(char[] filePath) |
12 { | 16 { |
13 return data2Utf8(cast(ubyte[]) (new File(filePath)).read()); | 17 return data2UTF8(cast(ubyte[]) (new File(filePath)).read()); |
14 } | 18 } |
15 | 19 |
16 char[] data2Utf8(ubyte[] data) | 20 char[] loadFile(char[] filePath, InformationManager infoMan) |
21 { | |
22 auto converter = Converter(filePath, infoMan); | |
23 return converter.data2UTF8(cast(ubyte[]) (new File(filePath)).read()); | |
24 } | |
25 | |
26 char[] data2UTF8(ubyte[] data) | |
17 { | 27 { |
18 if (data.length == 0) | 28 if (data.length == 0) |
19 return null; | 29 return null; |
20 | 30 |
21 char[] text; | 31 char[] text; |
137 unittest | 147 unittest |
138 { | 148 { |
139 ubyte[] test = cast(ubyte[])x"1A 2B 3C 4D"; | 149 ubyte[] test = cast(ubyte[])x"1A 2B 3C 4D"; |
140 assert(utf32BEtoLE(test) == cast(ubyte[])x"4D 3C 2B 1A"); | 150 assert(utf32BEtoLE(test) == cast(ubyte[])x"4D 3C 2B 1A"); |
141 } | 151 } |
142 | |
143 /// Byte Order Mark | |
144 enum BOM | |
145 { | |
146 None, /// No BOM | |
147 UTF8, /// UTF-8: EF BB BF | |
148 UTF16BE, /// UTF-16 Big Endian: FE FF | |
149 UTF16LE, /// UTF-16 Little Endian: FF FE | |
150 UTF32BE, /// UTF-32 Big Endian: 00 00 FE FF | |
151 UTF32LE /// UTF-32 Little Endian: FF FE 00 00 | |
152 } | |
153 | |
154 BOM tellBOM(ubyte[] data) | |
155 { | |
156 BOM bom = BOM.None; | |
157 if (data.length < 2) | |
158 return bom; | |
159 | |
160 if (data[0..2] == cast(ubyte[2])x"FE FF") | |
161 { | |
162 bom = BOM.UTF16BE; // FE FF | |
163 } | |
164 else if (data[0..2] == cast(ubyte[2])x"FF FE") | |
165 { | |
166 if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"00 00") | |
167 bom = BOM.UTF32LE; // FF FE 00 00 | |
168 else | |
169 bom = BOM.UTF16LE; // FF FE XX XX | |
170 } | |
171 else if (data[0..2] == cast(ubyte[2])x"00 00") | |
172 { | |
173 if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"FE FF") | |
174 bom = BOM.UTF32BE; // 00 00 FE FF | |
175 } | |
176 else if (data[0..2] == cast(ubyte[2])x"EF BB") | |
177 { | |
178 if (data.length >= 3 && data[2] == '\xBF') | |
179 bom = BOM.UTF8; // EF BB BF | |
180 } | |
181 return bom; | |
182 } | |
183 | |
184 unittest | |
185 { | |
186 Stdout("Testing function tellBOM().\n"); | |
187 | |
188 struct Data2BOM | |
189 { | |
190 ubyte[] data; | |
191 BOM bom; | |
192 } | |
193 alias ubyte[] ub; | |
194 const Data2BOM[] map = [ | |
195 {cast(ub)x"12", BOM.None}, | |
196 {cast(ub)x"12 34", BOM.None}, | |
197 {cast(ub)x"00 00 FF FE", BOM.None}, | |
198 {cast(ub)x"EF BB FF", BOM.None}, | |
199 | |
200 {cast(ub)x"EF", BOM.None}, | |
201 {cast(ub)x"EF BB", BOM.None}, | |
202 {cast(ub)x"FE", BOM.None}, | |
203 {cast(ub)x"FF", BOM.None}, | |
204 {cast(ub)x"00", BOM.None}, | |
205 {cast(ub)x"00 00", BOM.None}, | |
206 {cast(ub)x"00 00 FE", BOM.None}, | |
207 | |
208 {cast(ub)x"FE FF 00", BOM.UTF16BE}, | |
209 {cast(ub)x"FE FF 00 FF", BOM.UTF16BE}, | |
210 | |
211 {cast(ub)x"EF BB BF", BOM.UTF8}, | |
212 {cast(ub)x"FE FF", BOM.UTF16BE}, | |
213 {cast(ub)x"FF FE", BOM.UTF16LE}, | |
214 {cast(ub)x"00 00 FE FF", BOM.UTF32BE}, | |
215 {cast(ub)x"FF FE 00 00", BOM.UTF32LE} | |
216 ]; | |
217 | |
218 foreach (pair; map) | |
219 assert(tellBOM(pair.data) == pair.bom, Format("Failed at {0}", pair.data)); | |
220 } |