Mercurial > projects > dil
annotate src/dil/FileBOM.d @ 806:bcb74c9b895c
Moved out files in the trunk folder to the root.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sun, 09 Mar 2008 00:12:19 +0100 |
parents | trunk/src/dil/FileBOM.d@3b34f6a95a27 |
children |
rev | line source |
---|---|
518
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
1 /++ |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
2 Author: Aziz Köksal |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
3 License: GPL3 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
4 +/ |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
5 module dil.FileBOM; |
553
164b4ecd9793
Unittests in dil.File and dil.FileBOM compile again.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
518
diff
changeset
|
6 import common; |
518
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
7 |
786
3b34f6a95a27
Added and revised documenation comments.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
553
diff
changeset
|
8 /// Enumeration of byte order marks. |
518
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
9 enum BOM |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
10 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
11 None, /// No BOM |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
12 UTF8, /// UTF-8: EF BB BF |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
13 UTF16BE, /// UTF-16 Big Endian: FE FF |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
14 UTF16LE, /// UTF-16 Little Endian: FF FE |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
15 UTF32BE, /// UTF-32 Big Endian: 00 00 FE FF |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
16 UTF32LE /// UTF-32 Little Endian: FF FE 00 00 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
17 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
18 |
786
3b34f6a95a27
Added and revised documenation comments.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
553
diff
changeset
|
19 /// Looks at the first bytes of data and returns the corresponding BOM. |
518
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
20 BOM tellBOM(ubyte[] data) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
21 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
22 BOM bom = BOM.None; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
23 if (data.length < 2) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
24 return bom; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
25 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
26 if (data[0..2] == cast(ubyte[2])x"FE FF") |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
27 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
28 bom = BOM.UTF16BE; // FE FF |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
29 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
30 else if (data[0..2] == cast(ubyte[2])x"FF FE") |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
31 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
32 if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"00 00") |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
33 bom = BOM.UTF32LE; // FF FE 00 00 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
34 else |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
35 bom = BOM.UTF16LE; // FF FE XX XX |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
36 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
37 else if (data[0..2] == cast(ubyte[2])x"00 00") |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
38 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
39 if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"FE FF") |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
40 bom = BOM.UTF32BE; // 00 00 FE FF |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
41 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
42 else if (data[0..2] == cast(ubyte[2])x"EF BB") |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
43 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
44 if (data.length >= 3 && data[2] == '\xBF') |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
45 bom = BOM.UTF8; // EF BB BF |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
46 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
47 return bom; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
48 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
49 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
50 unittest |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
51 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
52 Stdout("Testing function tellBOM().\n"); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
53 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
54 struct Data2BOM |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
55 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
56 ubyte[] data; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
57 BOM bom; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
58 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
59 alias ubyte[] ub; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
60 const Data2BOM[] map = [ |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
61 {cast(ub)x"12", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
62 {cast(ub)x"12 34", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
63 {cast(ub)x"00 00 FF FE", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
64 {cast(ub)x"EF BB FF", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
65 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
66 {cast(ub)x"EF", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
67 {cast(ub)x"EF BB", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
68 {cast(ub)x"FE", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
69 {cast(ub)x"FF", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
70 {cast(ub)x"00", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
71 {cast(ub)x"00 00", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
72 {cast(ub)x"00 00 FE", BOM.None}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
73 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
74 {cast(ub)x"FE FF 00", BOM.UTF16BE}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
75 {cast(ub)x"FE FF 00 FF", BOM.UTF16BE}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
76 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
77 {cast(ub)x"EF BB BF", BOM.UTF8}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
78 {cast(ub)x"FE FF", BOM.UTF16BE}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
79 {cast(ub)x"FF FE", BOM.UTF16LE}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
80 {cast(ub)x"00 00 FE FF", BOM.UTF32BE}, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
81 {cast(ub)x"FF FE 00 00", BOM.UTF32LE} |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
82 ]; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
83 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
84 foreach (pair; map) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
85 assert(tellBOM(pair.data) == pair.bom, Format("Failed at {0}", pair.data)); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
86 } |