comparison trunk/src/dil/FileBOM.d @ 518:8f86bb9ef715

Added module dil.Converter and dil.FileBOM. Moved code from dil.File to dil.FileBOM. Added opCatAssign to class InformationManager. Added encode() function to dil.Unicode.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Sat, 15 Dec 2007 18:55:06 +0100
parents
children 164b4ecd9793
comparison
equal deleted inserted replaced
517:b465c669d70c 518:8f86bb9ef715
1 /++
2 Author: Aziz Köksal
3 License: GPL3
4 +/
5 module dil.FileBOM;
6
7 /// Byte Order Mark
8 enum BOM
9 {
10 None, /// No BOM
11 UTF8, /// UTF-8: EF BB BF
12 UTF16BE, /// UTF-16 Big Endian: FE FF
13 UTF16LE, /// UTF-16 Little Endian: FF FE
14 UTF32BE, /// UTF-32 Big Endian: 00 00 FE FF
15 UTF32LE /// UTF-32 Little Endian: FF FE 00 00
16 }
17
18 BOM tellBOM(ubyte[] data)
19 {
20 BOM bom = BOM.None;
21 if (data.length < 2)
22 return bom;
23
24 if (data[0..2] == cast(ubyte[2])x"FE FF")
25 {
26 bom = BOM.UTF16BE; // FE FF
27 }
28 else if (data[0..2] == cast(ubyte[2])x"FF FE")
29 {
30 if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"00 00")
31 bom = BOM.UTF32LE; // FF FE 00 00
32 else
33 bom = BOM.UTF16LE; // FF FE XX XX
34 }
35 else if (data[0..2] == cast(ubyte[2])x"00 00")
36 {
37 if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"FE FF")
38 bom = BOM.UTF32BE; // 00 00 FE FF
39 }
40 else if (data[0..2] == cast(ubyte[2])x"EF BB")
41 {
42 if (data.length >= 3 && data[2] == '\xBF')
43 bom = BOM.UTF8; // EF BB BF
44 }
45 return bom;
46 }
47
48 unittest
49 {
50 Stdout("Testing function tellBOM().\n");
51
52 struct Data2BOM
53 {
54 ubyte[] data;
55 BOM bom;
56 }
57 alias ubyte[] ub;
58 const Data2BOM[] map = [
59 {cast(ub)x"12", BOM.None},
60 {cast(ub)x"12 34", BOM.None},
61 {cast(ub)x"00 00 FF FE", BOM.None},
62 {cast(ub)x"EF BB FF", BOM.None},
63
64 {cast(ub)x"EF", BOM.None},
65 {cast(ub)x"EF BB", BOM.None},
66 {cast(ub)x"FE", BOM.None},
67 {cast(ub)x"FF", BOM.None},
68 {cast(ub)x"00", BOM.None},
69 {cast(ub)x"00 00", BOM.None},
70 {cast(ub)x"00 00 FE", BOM.None},
71
72 {cast(ub)x"FE FF 00", BOM.UTF16BE},
73 {cast(ub)x"FE FF 00 FF", BOM.UTF16BE},
74
75 {cast(ub)x"EF BB BF", BOM.UTF8},
76 {cast(ub)x"FE FF", BOM.UTF16BE},
77 {cast(ub)x"FF FE", BOM.UTF16LE},
78 {cast(ub)x"00 00 FE FF", BOM.UTF32BE},
79 {cast(ub)x"FF FE 00 00", BOM.UTF32LE}
80 ];
81
82 foreach (pair; map)
83 assert(tellBOM(pair.data) == pair.bom, Format("Failed at {0}", pair.data));
84 }