annotate tango/tango/text/convert/UnicodeBom.d @ 264:a9dae3da4e87 trunk

[svn r285] Fixed D -> bool LLVM helper for floating point values. Changed the way D-style varargs are passed, now each param should be aligned to size_t.sizeof.
author lindquist
date Sat, 14 Jun 2008 17:28:13 +0200
parents 1700239cab2e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
132
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
1 /*******************************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
2
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
4
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
5 license: BSD style: $(LICENSE)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
6
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
7 version: Initial release: December 2005
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
8
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
9 author: Kris
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
10
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
11 *******************************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
12
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
13 module tango.text.convert.UnicodeBom;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
14
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
15 private import tango.core.ByteSwap;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
16
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
17 private import Utf = tango.text.convert.Utf;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
18
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
19
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
20 private extern (C) void onUnicodeError (char[] msg, size_t idx = 0);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
21
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
22 /*******************************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
23
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
24 see http://icu.sourceforge.net/docs/papers/forms_of_unicode/#t2
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
25
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
26 *******************************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
27
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
28 enum Encoding {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
29 Unknown,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
30 UTF_8,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
31 UTF_8N,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
32 UTF_16,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
33 UTF_16BE,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
34 UTF_16LE,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
35 UTF_32,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
36 UTF_32BE,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
37 UTF_32LE,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
38 };
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
39
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
40 /*******************************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
41
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
42 Convert unicode content
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
43
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
44 Unicode is an encoding of textual material. The purpose of this module
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
45 is to interface external-encoding with a programmer-defined internal-
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
46 encoding. This internal encoding is declared via the template argument
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
47 T, whilst the external encoding is either specified or derived.
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
48
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
49 Three internal encodings are supported: char, wchar, and dchar. The
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
50 methods herein operate upon arrays of this type. That is, decode()
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
51 returns an array of the type, while encode() expect an array of said
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
52 type.
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
53
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
54 Supported external encodings are as follow:
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
55
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
56 Encoding.Unknown
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
57 Encoding.UTF_8
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
58 Encoding.UTF_8N
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
59 Encoding.UTF_16
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
60 Encoding.UTF_16BE
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
61 Encoding.UTF_16LE
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
62 Encoding.UTF_32
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
63 Encoding.UTF_32BE
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
64 Encoding.UTF_32LE
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
65
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
66 These can be divided into non-explicit and explicit encodings:
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
67
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
68 Encoding.Unknown
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
69 Encoding.UTF_8
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
70 Encoding.UTF_16
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
71 Encoding.UTF_32
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
72
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
73
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
74 Encoding.UTF_8N
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
75 Encoding.UTF_16BE
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
76 Encoding.UTF_16LE
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
77 Encoding.UTF_32BE
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
78 Encoding.UTF_32LE
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
79
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
80 The former group of non-explicit encodings may be used to 'discover'
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
81 an unknown encoding, by examining the first few bytes of the content
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
82 for a signature. This signature is optional, but is often written such
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
83 that the content is self-describing. When an encoding is unknown, using
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
84 one of the non-explicit encodings will cause the decode() method to look
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
85 for a signature and adjust itself accordingly. It is possible that a
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
86 ZWNBSP character might be confused with the signature; today's unicode
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
87 content is supposed to use the WORD-JOINER character instead.
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
88
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
89 The group of explicit encodings are for use when the content encoding
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
90 is known. These *must* be used when converting back to external encoding,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
91 since written content must be in a known format. It should be noted that,
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
92 during a decode() operation, the existence of a signature is in conflict
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
93 with these explicit varieties.
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
94
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
95
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
96 See
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
97 $(LINK http://www.utf-8.com/)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
98 $(LINK http://www.hackcraft.net/xmlUnicode/)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
99 $(LINK http://www.unicode.org/faq/utf_bom.html/)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
100 $(LINK http://www.azillionmonkeys.com/qed/unicode.html/)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
101 $(LINK http://icu.sourceforge.net/docs/papers/forms_of_unicode/)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
102
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
103 *******************************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
104
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
105 class UnicodeBom(T) : BomSniffer
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
106 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
107 static if (!is (T == char) && !is (T == wchar) && !is (T == dchar))
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
108 pragma (msg, "Template type must be char, wchar, or dchar");
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
109
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
110 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
111
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
112 Construct a instance using the given external encoding ~ one
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
113 of the Encoding.xx types
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
114
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
115 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
116
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
117 this (Encoding encoding)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
118 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
119 setup (encoding);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
120 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
121
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
122 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
123
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
124 Convert the provided content. The content is inspected
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
125 for a BOM signature, which is stripped. An exception is
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
126 thrown if a signature is present when, according to the
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
127 encoding type, it should not be. Conversely, An exception
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
128 is thrown if there is no known signature where the current
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
129 encoding expects one to be present
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
130
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
131 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
132
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
133 final T[] decode (void[] content, T[] dst=null, uint* ate=null)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
134 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
135 // look for a BOM
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
136 auto info = test (content);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
137
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
138 // are we expecting a BOM?
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
139 if (lookup[encoding].test)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
140 if (info)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
141 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
142 // yep ~ and we got one
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
143 setup (info.encoding);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
144
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
145 // strip BOM from content
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
146 content = content [info.bom.length .. length];
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
147 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
148 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
149 // can this encoding be defaulted?
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
150 if (settings.fallback)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
151 setup (settings.fallback);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
152 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
153 onUnicodeError ("UnicodeBom.decode :: unknown or missing BOM");
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
154 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
155 if (info)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
156 // found a BOM when using an explicit encoding
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
157 onUnicodeError ("UnicodeBom.decode :: explicit encoding does not permit BOM");
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
158
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
159 // convert it to internal representation
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
160 return into (swapBytes(content), settings.type, dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
161 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
162
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
163 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
164
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
165 Perform encoding of content. Note that the encoding must be
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
166 of the explicit variety by the time we get here
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
167
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
168 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
169
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
170 final void[] encode (T[] content, void[] dst=null, uint* ate=null)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
171 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
172 if (settings.test)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
173 onUnicodeError ("UnicodeBom.encode :: cannot write to a non-specific encoding");
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
174
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
175 // convert it to external representation, and write
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
176 return swapBytes (from (content, settings.type, dst, ate));
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
177 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
178
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
179 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
180
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
181 Swap bytes around, as required by the encoding
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
182
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
183 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
184
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
185 private final void[] swapBytes (void[] content)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
186 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
187 bool endian = settings.endian;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
188 bool swap = settings.bigEndian;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
189
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
190 version (BigEndian)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
191 swap = !swap;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
192
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
193 if (endian && swap)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
194 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
195 if (settings.type == Utf16)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
196 ByteSwap.swap16 (content.ptr, content.length);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
197 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
198 ByteSwap.swap32 (content.ptr, content.length);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
199 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
200 return content;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
201 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
202
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
203 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
204
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
205
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
206 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
207
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
208 static T[] into (void[] x, uint type, T[] dst=null, uint* ate=null)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
209 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
210 T[] ret;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
211
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
212 static if (is (T == char))
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
213 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
214 if (type == Utf8)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
215 return cast(T[]) x;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
216
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
217 if (type == Utf16)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
218 ret = Utf.toString (cast(wchar[]) x, dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
219 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
220 if (type == Utf32)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
221 ret = Utf.toString (cast(dchar[]) x, dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
222 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
223
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
224 static if (is (T == wchar))
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
225 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
226 if (type == Utf16)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
227 return cast(T[]) x;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
228
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
229 if (type == Utf8)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
230 ret = Utf.toString16 (cast(char[]) x, dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
231 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
232 if (type == Utf32)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
233 ret = Utf.toString16 (cast(dchar[]) x, dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
234 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
235
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
236 static if (is (T == dchar))
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
237 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
238 if (type == Utf32)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
239 return cast(T[]) x;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
240
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
241 if (type == Utf8)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
242 ret = Utf.toString32 (cast(char[]) x, dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
243 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
244 if (type == Utf16)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
245 ret = Utf.toString32 (cast(wchar[]) x, dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
246 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
247
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
248 return ret;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
249 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
250
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
251
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
252 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
253
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
254 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
255
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
256 static void[] from (T[] x, uint type, void[] dst=null, uint* ate=null)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
257 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
258 void[] ret;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
259
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
260 static if (is (T == char))
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
261 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
262 if (type == Utf8)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
263 return x;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
264
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
265 if (type == Utf16)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
266 ret = Utf.toString16 (x, cast(wchar[]) dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
267 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
268 if (type == Utf32)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
269 ret = Utf.toString32 (x, cast(dchar[]) dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
270 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
271
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
272 static if (is (T == wchar))
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
273 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
274 if (type == Utf16)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
275 return x;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
276
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
277 if (type == Utf8)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
278 ret = Utf.toString (x, cast(char[]) dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
279 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
280 if (type == Utf32)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
281 ret = Utf.toString32 (x, cast(dchar[]) dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
282 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
283
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
284 static if (is (T == dchar))
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
285 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
286 if (type == Utf32)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
287 return x;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
288
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
289 if (type == Utf8)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
290 ret = Utf.toString (x, cast(char[]) dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
291 else
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
292 if (type == Utf16)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
293 ret = Utf.toString16 (x, cast(wchar[]) dst, ate);
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
294 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
295
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
296 return ret;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
297 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
298 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
299
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
300
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
301
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
302 /*******************************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
303
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
304 *******************************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
305
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
306 class BomSniffer
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
307 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
308 private Encoding encoder; // the current encoding
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
309 private Info* settings; // pointer to encoding configuration
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
310
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
311 private struct Info
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
312 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
313 int type; // type of element (char/wchar/dchar)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
314 Encoding encoding; // Encoding.xx encoding
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
315 char[] bom; // pattern to match for signature
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
316 bool test, // should we test for this encoding?
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
317 endian, // this encoding have endian concerns?
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
318 bigEndian; // is this a big-endian encoding?
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
319 Encoding fallback; // can this encoding be defaulted?
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
320 };
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
321
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
322 private enum {Utf8, Utf16, Utf32};
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
323
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
324 private const Info[] lookup =
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
325 [
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
326 {Utf8, Encoding.Unknown, null, true, false, false, Encoding.UTF_8N},
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
327 {Utf8, Encoding.UTF_8, null, true, false, false, Encoding.UTF_8N},
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
328 {Utf8, Encoding.UTF_8N, x"efbbbf", false},
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
329 {Utf16, Encoding.UTF_16, null, true, false, false, Encoding.UTF_16BE},
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
330 {Utf16, Encoding.UTF_16BE, x"feff", false, true, true},
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
331 {Utf16, Encoding.UTF_16LE, x"fffe", false, true},
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
332 {Utf32, Encoding.UTF_32, null, true, false, false, Encoding.UTF_32BE},
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
333 {Utf32, Encoding.UTF_32BE, x"0000feff", false, true, true},
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
334 {Utf32, Encoding.UTF_32LE, x"fffe0000", false, true},
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
335 ];
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
336
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
337 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
338
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
339 Return the current encoding. This is either the originally
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
340 specified encoding, or a derived one obtained by inspecting
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
341 the content for a BOM. The latter is performed as part of
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
342 the decode() method
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
343
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
344 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
345
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
346 final Encoding encoding ()
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
347 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
348 return encoder;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
349 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
350
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
351 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
352
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
353 Return the signature (BOM) of the current encoding
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
354
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
355 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
356
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
357 final void[] signature ()
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
358 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
359 return settings.bom;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
360 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
361
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
362 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
363
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
364 Configure this instance with unicode converters
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
365
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
366 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
367
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
368 final void setup (Encoding encoding)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
369 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
370 this.settings = &lookup[encoding];
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
371 this.encoder = encoding;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
372 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
373
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
374 /***********************************************************************
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
375
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
376 Scan the BOM signatures looking for a match. We scan in
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
377 reverse order to get the longest match first
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
378
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
379 ***********************************************************************/
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
380
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
381 static final Info* test (void[] content)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
382 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
383 for (Info* info=lookup.ptr+lookup.length; --info >= lookup.ptr;)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
384 if (info.bom)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
385 {
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
386 int len = info.bom.length;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
387 if (len <= content.length)
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
388 if (content[0..len] == info.bom[0..len])
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
389 return info;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
390 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
391 return null;
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
392 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
393 }
1700239cab2e [svn r136] MAJOR UNSTABLE UPDATE!!!
lindquist
parents:
diff changeset
394