Mercurial > projects > dil
annotate trunk/src/dil/Converter.d @ 532:50e64bab9c7a
Renamed InformationManager to InfoManager.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Mon, 17 Dec 2007 16:10:08 +0100 |
parents | 8f86bb9ef715 |
children | 2a8d0ed0d71e |
rev | line source |
---|---|
518
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
1 /++ |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
2 Author: Aziz Köksal |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
3 License: GPL3 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
4 +/ |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
5 module dil.Converter; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
6 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
7 import dil.Information; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
8 import dil.Location; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
9 import dil.Unicode; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
10 import dil.FileBOM; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
11 import common; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
12 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
13 /// Converts various Unicode encoding formats to UTF-8. |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
14 struct Converter |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
15 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
16 char[] filePath; /// For error messages. |
532
50e64bab9c7a
Renamed InformationManager to InfoManager.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
518
diff
changeset
|
17 InfoManager infoMan; |
518
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
18 |
532
50e64bab9c7a
Renamed InformationManager to InfoManager.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
518
diff
changeset
|
19 static Converter opCall(char[] filePath, InfoManager infoMan) |
518
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
20 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
21 Converter conv; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
22 conv.filePath = filePath; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
23 conv.infoMan = infoMan; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
24 return conv; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
25 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
26 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
27 dchar swapBytes(dchar c) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
28 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
29 return c = (c << 24) | |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
30 ((c >> 8) & 0xFF00) | |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
31 ((c << 8) & 0xFF0000) | |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
32 (c >> 24); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
33 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
34 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
35 wchar swapBytes(wchar c) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
36 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
37 return (c << 8) | (c >> 8); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
38 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
39 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
40 wchar BEtoMachineDword(dchar c) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
41 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
42 version(LittleEndian) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
43 return swapBytes(c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
44 else |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
45 return c; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
46 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
47 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
48 wchar LEtoMachineDword(dchar c) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
49 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
50 version(LittleEndian) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
51 return c; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
52 else |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
53 return swapBytes(c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
54 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
55 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
56 wchar BEtoMachineWord(wchar c) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
57 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
58 version(LittleEndian) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
59 return swapBytes(c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
60 else |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
61 return c; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
62 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
63 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
64 wchar LEtoMachineWord(wchar c) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
65 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
66 version(LittleEndian) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
67 return c; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
68 else |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
69 return swapBytes(c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
70 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
71 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
72 char[] UTF32toUTF8(bool isBigEndian)(ubyte[] data) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
73 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
74 if (data.length % 4) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
75 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
76 infoMan.info ~= new LexerError(new Location(filePath, 0), |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
77 "the byte length of a UTF-32 source file must be divisible by 4." |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
78 ); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
79 data = data[0 .. $ - $ % 4]; // Trim to valid size. |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
80 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
81 if (data.length == 0) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
82 return null; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
83 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
84 char[] result; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
85 foreach (dchar c; cast(dchar[])data) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
86 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
87 static if (isBigEndian) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
88 c = BEtoMachineDword(c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
89 else |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
90 c = LEtoMachineDword(c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
91 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
92 if (!isValidChar(c)) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
93 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
94 // TODO: correct location. |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
95 auto loc = new Location(filePath, 0); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
96 infoMan.info ~= new LexerError(null, Format("invalid UTF-32 character '{:X}'.", c)); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
97 c = REPLACEMENT_CHAR; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
98 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
99 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
100 dil.Unicode.encode(result, c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
101 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
102 return result; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
103 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
104 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
105 alias UTF32toUTF8!(true) UTF32BEtoUTF8; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
106 alias UTF32toUTF8!(false) UTF32LEtoUTF8; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
107 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
108 char[] UTF16toUTF8(bool isBigEndian)(ubyte[] data) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
109 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
110 if (data.length % 2) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
111 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
112 infoMan ~= new LexerError(new Location(filePath, 0), |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
113 "the byte length of a UTF-16 source file must be divisible by 2." |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
114 ); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
115 data = data[0 .. $-1]; // Trim to valid size. |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
116 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
117 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
118 if (data.length == 0) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
119 return null; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
120 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
121 wchar[] text = cast(wchar[])data; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
122 wchar* p = text.ptr, |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
123 end = text.ptr + text.length; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
124 char[] result; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
125 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
126 dchar c = *p; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
127 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
128 do |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
129 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
130 static if (isBigEndian) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
131 c = BEtoMachineWord(c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
132 else |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
133 c = LEtoMachineWord(c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
134 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
135 if (c < 0xD800 || 0xDFFF > c) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
136 {} |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
137 else if (c <= 0xDBFF && p+1 < end) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
138 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
139 wchar c2 = p[1]; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
140 static if (isBigEndian) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
141 c2 = BEtoMachineWord(c2); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
142 else |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
143 c2 = LEtoMachineWord(c2); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
144 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
145 if (0xDC00 <= c2 && c2 <= 0xDFFF) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
146 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
147 c = (c - 0xD7C0) << 10; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
148 c |= (c2 & 0x3FF); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
149 ++p; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
150 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
151 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
152 else |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
153 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
154 // TODO: correct location. |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
155 auto loc = new Location(filePath, 0); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
156 infoMan ~= new LexerError(loc, Format("invalid UTF-16 character '{:X}'.", c)); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
157 c = REPLACEMENT_CHAR; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
158 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
159 ++p; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
160 dil.Unicode.encode(result, c); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
161 } while (p < end) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
162 return result; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
163 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
164 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
165 alias UTF16toUTF8!(true) UTF16BEtoUTF8; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
166 alias UTF16toUTF8!(false) UTF16LEtoUTF8; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
167 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
168 char[] data2UTF8(ubyte[] data) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
169 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
170 if (data.length == 0) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
171 return null; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
172 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
173 char[] text; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
174 BOM bom = tellBOM(data); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
175 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
176 switch (bom) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
177 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
178 case BOM.None: |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
179 // No BOM found. According to the specs the first character |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
180 // must be an ASCII character. |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
181 if (data.length >= 4) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
182 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
183 if (data[0..3] == cast(ubyte[3])x"00 00 00") |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
184 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
185 text = UTF32BEtoUTF8(data); // UTF-32BE: 00 00 00 XX |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
186 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
187 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
188 else if (data[1..4] == cast(ubyte[3])x"00 00 00") |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
189 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
190 text = UTF32LEtoUTF8(data); // UTF-32LE: XX 00 00 00 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
191 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
192 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
193 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
194 if (data.length >= 2) |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
195 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
196 if (data[0] == 0) // UTF-16BE: 00 XX |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
197 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
198 text = UTF16BEtoUTF8(data); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
199 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
200 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
201 else if (data[1] == 0) // UTF-16LE: XX 00 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
202 { |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
203 text = UTF16LEtoUTF8(data); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
204 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
205 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
206 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
207 text = cast(char[])data; // UTF-8 |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
208 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
209 case BOM.UTF8: |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
210 text = cast(char[])data[3..$]; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
211 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
212 case BOM.UTF16BE: |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
213 text = UTF16BEtoUTF8(data[2..$]); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
214 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
215 case BOM.UTF16LE: |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
216 text = UTF16LEtoUTF8(data[2..$]); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
217 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
218 case BOM.UTF32BE: |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
219 text = UTF32BEtoUTF8(data[4..$]); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
220 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
221 case BOM.UTF32LE: |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
222 text = UTF32LEtoUTF8(data[4..$]); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
223 break; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
224 default: |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
225 assert(0); |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
226 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
227 return text; |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
228 } |
8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff
changeset
|
229 } |