annotate trunk/src/dil/Converter.d @ 532:50e64bab9c7a

Renamed InformationManager to InfoManager.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Mon, 17 Dec 2007 16:10:08 +0100
parents 8f86bb9ef715
children 2a8d0ed0d71e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
518
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
1 /++
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
2 Author: Aziz Köksal
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
3 License: GPL3
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
4 +/
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
5 module dil.Converter;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
6
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
7 import dil.Information;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
8 import dil.Location;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
9 import dil.Unicode;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
10 import dil.FileBOM;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
11 import common;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
12
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
13 /// Converts various Unicode encoding formats to UTF-8.
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
14 struct Converter
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
15 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
16 char[] filePath; /// For error messages.
532
50e64bab9c7a Renamed InformationManager to InfoManager.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 518
diff changeset
17 InfoManager infoMan;
518
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
18
532
50e64bab9c7a Renamed InformationManager to InfoManager.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 518
diff changeset
19 static Converter opCall(char[] filePath, InfoManager infoMan)
518
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
20 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
21 Converter conv;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
22 conv.filePath = filePath;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
23 conv.infoMan = infoMan;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
24 return conv;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
25 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
26
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
27 dchar swapBytes(dchar c)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
28 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
29 return c = (c << 24) |
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
30 ((c >> 8) & 0xFF00) |
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
31 ((c << 8) & 0xFF0000) |
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
32 (c >> 24);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
33 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
34
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
35 wchar swapBytes(wchar c)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
36 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
37 return (c << 8) | (c >> 8);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
38 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
39
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
40 wchar BEtoMachineDword(dchar c)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
41 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
42 version(LittleEndian)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
43 return swapBytes(c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
44 else
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
45 return c;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
46 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
47
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
48 wchar LEtoMachineDword(dchar c)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
49 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
50 version(LittleEndian)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
51 return c;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
52 else
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
53 return swapBytes(c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
54 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
55
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
56 wchar BEtoMachineWord(wchar c)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
57 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
58 version(LittleEndian)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
59 return swapBytes(c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
60 else
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
61 return c;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
62 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
63
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
64 wchar LEtoMachineWord(wchar c)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
65 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
66 version(LittleEndian)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
67 return c;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
68 else
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
69 return swapBytes(c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
70 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
71
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
72 char[] UTF32toUTF8(bool isBigEndian)(ubyte[] data)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
73 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
74 if (data.length % 4)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
75 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
76 infoMan.info ~= new LexerError(new Location(filePath, 0),
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
77 "the byte length of a UTF-32 source file must be divisible by 4."
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
78 );
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
79 data = data[0 .. $ - $ % 4]; // Trim to valid size.
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
80 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
81 if (data.length == 0)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
82 return null;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
83
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
84 char[] result;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
85 foreach (dchar c; cast(dchar[])data)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
86 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
87 static if (isBigEndian)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
88 c = BEtoMachineDword(c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
89 else
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
90 c = LEtoMachineDword(c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
91
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
92 if (!isValidChar(c))
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
93 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
94 // TODO: correct location.
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
95 auto loc = new Location(filePath, 0);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
96 infoMan.info ~= new LexerError(null, Format("invalid UTF-32 character '{:X}'.", c));
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
97 c = REPLACEMENT_CHAR;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
98 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
99
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
100 dil.Unicode.encode(result, c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
101 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
102 return result;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
103 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
104
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
105 alias UTF32toUTF8!(true) UTF32BEtoUTF8;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
106 alias UTF32toUTF8!(false) UTF32LEtoUTF8;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
107
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
108 char[] UTF16toUTF8(bool isBigEndian)(ubyte[] data)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
109 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
110 if (data.length % 2)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
111 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
112 infoMan ~= new LexerError(new Location(filePath, 0),
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
113 "the byte length of a UTF-16 source file must be divisible by 2."
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
114 );
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
115 data = data[0 .. $-1]; // Trim to valid size.
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
116 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
117
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
118 if (data.length == 0)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
119 return null;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
120
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
121 wchar[] text = cast(wchar[])data;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
122 wchar* p = text.ptr,
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
123 end = text.ptr + text.length;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
124 char[] result;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
125
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
126 dchar c = *p;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
127
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
128 do
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
129 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
130 static if (isBigEndian)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
131 c = BEtoMachineWord(c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
132 else
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
133 c = LEtoMachineWord(c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
134
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
135 if (c < 0xD800 || 0xDFFF > c)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
136 {}
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
137 else if (c <= 0xDBFF && p+1 < end)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
138 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
139 wchar c2 = p[1];
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
140 static if (isBigEndian)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
141 c2 = BEtoMachineWord(c2);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
142 else
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
143 c2 = LEtoMachineWord(c2);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
144
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
145 if (0xDC00 <= c2 && c2 <= 0xDFFF)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
146 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
147 c = (c - 0xD7C0) << 10;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
148 c |= (c2 & 0x3FF);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
149 ++p;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
150 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
151 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
152 else
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
153 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
154 // TODO: correct location.
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
155 auto loc = new Location(filePath, 0);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
156 infoMan ~= new LexerError(loc, Format("invalid UTF-16 character '{:X}'.", c));
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
157 c = REPLACEMENT_CHAR;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
158 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
159 ++p;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
160 dil.Unicode.encode(result, c);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
161 } while (p < end)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
162 return result;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
163 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
164
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
165 alias UTF16toUTF8!(true) UTF16BEtoUTF8;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
166 alias UTF16toUTF8!(false) UTF16LEtoUTF8;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
167
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
168 char[] data2UTF8(ubyte[] data)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
169 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
170 if (data.length == 0)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
171 return null;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
172
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
173 char[] text;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
174 BOM bom = tellBOM(data);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
175
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
176 switch (bom)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
177 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
178 case BOM.None:
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
179 // No BOM found. According to the specs the first character
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
180 // must be an ASCII character.
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
181 if (data.length >= 4)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
182 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
183 if (data[0..3] == cast(ubyte[3])x"00 00 00")
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
184 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
185 text = UTF32BEtoUTF8(data); // UTF-32BE: 00 00 00 XX
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
186 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
187 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
188 else if (data[1..4] == cast(ubyte[3])x"00 00 00")
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
189 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
190 text = UTF32LEtoUTF8(data); // UTF-32LE: XX 00 00 00
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
191 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
192 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
193 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
194 if (data.length >= 2)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
195 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
196 if (data[0] == 0) // UTF-16BE: 00 XX
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
197 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
198 text = UTF16BEtoUTF8(data);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
199 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
200 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
201 else if (data[1] == 0) // UTF-16LE: XX 00
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
202 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
203 text = UTF16LEtoUTF8(data);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
204 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
205 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
206 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
207 text = cast(char[])data; // UTF-8
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
208 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
209 case BOM.UTF8:
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
210 text = cast(char[])data[3..$];
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
211 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
212 case BOM.UTF16BE:
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
213 text = UTF16BEtoUTF8(data[2..$]);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
214 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
215 case BOM.UTF16LE:
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
216 text = UTF16LEtoUTF8(data[2..$]);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
217 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
218 case BOM.UTF32BE:
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
219 text = UTF32BEtoUTF8(data[4..$]);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
220 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
221 case BOM.UTF32LE:
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
222 text = UTF32LEtoUTF8(data[4..$]);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
223 break;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
224 default:
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
225 assert(0);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
226 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
227 return text;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
228 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
229 }