Mercurial > projects > dwt2
comparison base/src/java/nonstandard/UtfBase.d @ 120:536e43f63c81
Comprehensive update for Win32/Linux32 dmd-2.053/dmd-1.068+Tango-r5661
===D2===
* added [Try]Immutable/Const/Shared templates to work with differenses in D1/D2 instead of version statements
used these templates to work with strict type storage rules of dmd-2.053
* com.ibm.icu now also compilable with D2, but not tested yet
* small fixes
Snippet288 - shared data is in TLS
===Phobos===
* fixed critical bugs in Phobos implemention
completely incorrect segfault prone fromStringz (Linux's port ruthless killer)
terrible, incorrect StringBuffer realization (StyledText killer)
* fixed small bugs as well
Snippet72 - misprint in the snippet
* implemented missed functionality for Phobos
ByteArrayOutputStream implemented (image loading available)
formatting correctly works for all DWT's cases
As a result, folowing snippets now works with Phobos (Snippet### - what is fixed):
Snippet24, 42, 111, 115, 130, 235, 276 - bad string formatting
Snippet48, 282 - crash on image loading
Snippet163, 189, 211, 213, 217, 218, 222 - crash on copy/cut in StyledText
Snippet244 - hang-up
===Tango===
* few changes for the latest Tango trunc-r5661
* few small performance improvments
===General===
* implMissing-s for only one version changed to implMissingInTango/InPhobos
* incorrect calls to Format in toString-s fixed
* fixed loading \uXXXX characters in ResourceBundle
* added good UTF-8 support for StyledText, TextLayout (Win32) and friends
UTF functions revised and tested. It is now in java.nonstandard.*Utf modules
StyledText and TextLayout (Win32) modules revised for UTF-8 support
* removed small diferences in most identical files in *.swt.* folders
*.swt.internal.image, *.swt.events and *.swt.custom are identical in Win32/Linux32
now 179 of 576 (~31%) files in *.swt.* folders are fully identical
* Win32: snippets now have right subsystem, pretty icons and native system style controls
* small fixes in snippets
Snippet44 - it's not Snippet44
Snippet212 - functions work with different images and offsets arrays
Win32: Snippet282 - crash on close if the button has an image
Snippet293 - setGrayed is commented
and others
Win32: As a result, folowing snippets now works
Snippet68 - color doesn't change
Snippet163, 189, 211, 213, 217, 218, 222 - UTF-8 issues (see above)
Snippet193 - no tabel headers
author | Denis Shelomovskij <verylonglogin.reg@gmail.com> |
---|---|
date | Sat, 09 Jul 2011 15:50:20 +0300 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
119:d00e8db0a568 | 120:536e43f63c81 |
---|---|
1 /** | |
2 * Stuff for working with narrow strings. | |
3 * This module shouldn't be imported directly. | |
4 * Use SafeUtf/UnsafeUtf modules instead. | |
5 * | |
6 * Authors: Denis Shelomovskij <verylonglogin.reg@gmail.com> | |
7 */ | |
8 module java.nonstandard.UtfBase; | |
9 | |
10 package const UtfBaseText = ` | |
11 # line 11 "java\nonstandard\UtfBase.d" | |
12 import java.lang.util; | |
13 | |
14 version(Tango){ | |
15 static import tango.text.convert.Utf; | |
16 } else { // Phobos | |
17 static import std.utf; | |
18 static import std.conv; | |
19 } | |
20 | |
21 ///The Universal Character Set (UCS), defined by the International Standard ISO/IEC 10646 | |
22 /*typedef*/alias int UCSindex; | |
23 alias UCSindex UCSshift; | |
24 | |
25 static if(UTFTypeCheck) { | |
26 ///UTF-16 (16-bit Unicode Transformation Format) | |
27 /*struct UTF16index { | |
28 int internalValue; | |
29 alias internalValue val; | |
30 | |
31 private static UTF16index opCall(int _val) { | |
32 UTF16index t = { _val }; | |
33 return t; | |
34 } | |
35 | |
36 void opAddAssign(in UTF16shift di) { | |
37 val += di; | |
38 } | |
39 | |
40 void opSubAssign(in UTF16shift di) { | |
41 val -= di; | |
42 } | |
43 | |
44 mixin(constFuncs!(" | |
45 UTF16index opAdd(in UTF16shift di) { | |
46 return UTF16index(val + di); | |
47 } | |
48 | |
49 UTF16index opSub(in UTF16shift di) { | |
50 return UTF16index(val - di); | |
51 } | |
52 | |
53 version(Windows) { | |
54 UTF16index opAdd(in int di) { | |
55 return UTF16index(val + di); | |
56 } | |
57 | |
58 UTF16index opSub(in int di) { | |
59 return UTF16index(val - di); | |
60 } | |
61 } | |
62 | |
63 int opCmp(in UTF16index i2) { | |
64 return val - i2.val; | |
65 } | |
66 ")); | |
67 }*/ | |
68 typedef int UTF16index; | |
69 typedef int UTF16shift; | |
70 | |
71 ///UTF-8 (UCS Transformation Format — 8-bit) | |
72 //typedef int UTF8index; | |
73 //alias UTF8index UTF8shift; | |
74 struct UTF8index { | |
75 int internalValue; | |
76 alias internalValue val; | |
77 | |
78 private static UTF8index opCall(int _val) { | |
79 UTF8index t = { _val }; | |
80 return t; | |
81 } | |
82 | |
83 void opAddAssign(in UTF8shift di) { | |
84 val += di.val; | |
85 } | |
86 | |
87 void opSubAssign(in UTF8shift di) { | |
88 val -= di.val; | |
89 } | |
90 | |
91 mixin(constFuncs!(" | |
92 UTF8index opAdd(in UTF8shift di) { | |
93 return UTF8index(val + di.val); | |
94 } | |
95 | |
96 UTF8index opSub(in UTF8shift di) { | |
97 return UTF8index(val - di.val); | |
98 } | |
99 | |
100 UTF8shift opSub(in UTF8index di) { | |
101 return UTF8shift(val - di.val); | |
102 } | |
103 | |
104 int opCmp(in UTF8index i2) { | |
105 return val - i2.val; | |
106 } | |
107 ")); | |
108 } | |
109 | |
110 private UTF8index newUTF8index(int i) { | |
111 return UTF8index(i); | |
112 } | |
113 | |
114 private int val(T)(T i) { | |
115 static if(is(T : UTF16index)) | |
116 return cast(int) i; | |
117 else | |
118 return i.val; | |
119 } | |
120 | |
121 private void dec(ref UTF8index i) { | |
122 --i.val; | |
123 } | |
124 | |
125 struct UTF8shift { | |
126 int internalValue; | |
127 alias internalValue val; | |
128 | |
129 private static UTF8shift opCall(int _val) { | |
130 UTF8shift t = { _val }; | |
131 return t; | |
132 } | |
133 | |
134 void opAddAssign(in UTF8shift di) { | |
135 val += di.val; | |
136 } | |
137 | |
138 void opSubAssign(in UTF8shift di) { | |
139 val -= di.val; | |
140 } | |
141 | |
142 mixin(constFuncs!(" | |
143 UTF8shift opAdd(in UTF8shift di) { | |
144 return UTF8shift(val + di.val); | |
145 } | |
146 | |
147 UTF8shift opSub(in UTF8shift di) { | |
148 return UTF8shift(val - di.val); | |
149 } | |
150 | |
151 int opCmp(in UTF8shift di2) { | |
152 return val - di2.val; | |
153 } | |
154 ")); | |
155 } | |
156 | |
157 | |
158 UTF8index asUTF8index(int i) { | |
159 return UTF8index(i); | |
160 } | |
161 | |
162 UTF8shift asUTF8shift(int i) { | |
163 return UTF8shift(i); | |
164 } | |
165 } else { | |
166 alias int UTF16index; | |
167 alias int UTF16shift; | |
168 | |
169 alias int UTF8index; | |
170 alias int UTF8shift; | |
171 | |
172 private int val(int i) { | |
173 return i; | |
174 } | |
175 | |
176 private void dec(ref UTF8index i) { | |
177 --i; | |
178 } | |
179 } | |
180 | |
181 char charByteAt(in char[] s, in UTF8index i) { | |
182 return s[val(i)]; | |
183 } | |
184 | |
185 UTF8index preFirstIndex(in char[] s) { | |
186 return cast(UTF8index) -1; | |
187 } | |
188 | |
189 UTF8index firstIndex(in char[] s) { | |
190 return cast(UTF8index) 0; | |
191 } | |
192 | |
193 UTF8index endIndex(in char[] s) { | |
194 return cast(UTF8index) s.length; | |
195 } | |
196 | |
197 UTF8index beforeEndIndex(in char[] s) { | |
198 return s.offsetBefore(s.endIndex()); | |
199 } | |
200 | |
201 | |
202 //These variables aren't in TLS so it can be used only for writing | |
203 mixin(gshared!(" | |
204 private UCSindex UCSdummyShift; | |
205 private UTF8shift UTF8dummyShift; | |
206 private UTF16shift UTF16dummyShift; | |
207 ")); | |
208 | |
209 private const ubyte[256] p_UTF8stride = | |
210 [ | |
211 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
212 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
213 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
214 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
215 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
216 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
217 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
218 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
219 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, | |
220 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, | |
221 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, | |
222 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, | |
223 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
224 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
225 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |
226 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF, | |
227 ]; | |
228 | |
229 private String toUTF8infoString(in char[] s, UTF8index i) { | |
230 return Format("i = {}, s[i] = {}, s = {}", val(i), cast(ubyte)s.charByteAt(i), cast(ubyte[])s); | |
231 } | |
232 | |
233 class UTF8Exception : Exception { | |
234 this( String msg, in char[] s, UTF8index i){ | |
235 super( Format("{}:\n{}", msg, toUTF8infoString(s, i))); | |
236 } | |
237 } | |
238 | |
239 bool isUTF8sequenceStart( in char[] s, in UTF8index i ) { | |
240 return p_UTF8stride[s.charByteAt(i)] != 0xFF; | |
241 } | |
242 | |
243 void validateUTF8index( in char[] s, in UTF8index i ) { | |
244 if(i != s.endIndex() && !s.isUTF8sequenceStart(i)) | |
245 throw new UTF8Exception("Not a start of an UTF-8 sequence", s, i); | |
246 } | |
247 | |
248 UTF8shift UTF8strideAt( in char[] s, in UTF8index i ) { | |
249 s.validateUTF8index(i); | |
250 version(Tango) { | |
251 return cast(UTF8shift)p_UTF8stride[s.charByteAt(i)]; | |
252 } else { // Phobos | |
253 return cast(UTF8shift)std.utf.stride( s, val(i) ); | |
254 } | |
255 } | |
256 | |
257 UTF16shift UTF16strideAt( in wchar[] s, in UTF16index i ) { | |
258 //s.validateUTF16index(i); | |
259 version(Tango) { | |
260 uint u = s[val(i)]; | |
261 return cast(UTF16shift)(1 + (u >= 0xD800 && u <= 0xDBFF)); | |
262 } else { // Phobos | |
263 return cast(UTF16shift)std.utf.stride( s, val(i) ); | |
264 } | |
265 } | |
266 | |
267 UCSindex UCScount( in char[] s ){ | |
268 version(Tango){ | |
269 scope dchar[] buf = new dchar[]( s.length ); | |
270 uint ate; | |
271 dchar[] res = tango.text.convert.Utf.toString32( s, buf, &ate ); | |
272 assert( ate is s.length ); | |
273 return res.length; | |
274 } else { // Phobos | |
275 return std.utf.count(s); | |
276 } | |
277 } | |
278 | |
279 UTF8shift toUTF8shift( in char[] s, in UTF8index i, in UCSshift dn ) { | |
280 s.validateUTF8index(i); | |
281 UTF8index j = i; | |
282 UCSshift tdn = dn; | |
283 if(tdn > 0) | |
284 do { | |
285 j += s.UTF8strideAt(j); | |
286 if(j > s.endIndex()) | |
287 throw new UTF8Exception(Format("toUTF8shift (dn = {}): No end of the UTF-8 sequence", dn), s, i); | |
288 } while(--tdn) | |
289 else if(tdn < 0) { | |
290 do { | |
291 if(!val(j)) | |
292 if(tdn == -1) { | |
293 j = s.preFirstIndex(); | |
294 break; | |
295 } else | |
296 throw new UTF8Exception(Format("toUTF8shift (dn = {}): Can only go down to -1, not {}", dn, tdn), s, i); | |
297 int l = 0; | |
298 do { | |
299 if(!val(j)) | |
300 throw new UTF8Exception(Format("toUTF8shift (dn = {}): No start of the UTF-8 sequence before", dn), s, i); | |
301 ++l; | |
302 dec(j); | |
303 } while(!s.isUTF8sequenceStart(j)) | |
304 l -= val(s.UTF8strideAt(j)); | |
305 if(l > 0) | |
306 throw new UTF8Exception(Format("toUTF8shift (dn = {}): Overlong UTF-8 sequence before", dn), s, i); | |
307 else if(l < 0) | |
308 throw new UTF8Exception(Format("toUTF8shift (dn = {}): Too short UTF-8 sequence before", dn), s, i); | |
309 } while(++tdn) | |
310 } | |
311 return j - i; | |
312 } | |
313 | |
314 UTF8index offsetBefore( in char[] s, in UTF8index i ) { | |
315 return i + s.toUTF8shift(i, -1); | |
316 } | |
317 | |
318 UTF8index offsetAfter( in char[] s, in UTF8index i ) { | |
319 return i + s.toUTF8shift(i, 1); | |
320 } | |
321 | |
322 /** | |
323 If the index is in a midle of an UTF-8 byte sequence, it | |
324 will return the position of the first byte of this sequence. | |
325 */ | |
326 void adjustUTF8index( in char[] s, ref UTF8index i ){ | |
327 if(i == s.endIndex() || s.isUTF8sequenceStart(i)) | |
328 return; | |
329 | |
330 int l = 0; | |
331 alias i res; | |
332 do { | |
333 if(!val(res)) | |
334 throw new UTF8Exception("adjustUTF8index: No start of the UTF-8 sequence", s, i); | |
335 ++l; | |
336 dec(res); | |
337 } while(!s.isUTF8sequenceStart(res)) | |
338 l -= val(s.UTF8strideAt(i)); | |
339 if(l > 0) | |
340 throw new UTF8Exception("adjustUTF8index: Overlong UTF-8 sequence", s, i); | |
341 } | |
342 | |
343 UTF8index takeIndexArg(String F = __FILE__, uint L = __LINE__)(String s, int i_arg, String location) { | |
344 UTF8index res = cast(UTF8index) i_arg; | |
345 if(i_arg > 0 && i_arg < s.length) { | |
346 auto t = res; | |
347 s.adjustUTF8index(res); | |
348 if(t != res) | |
349 getDwtLogger().warn(F, L, Format("Fixed invalid UTF-8 index at {}:\nnew i = {}, {}", location, val(res), toUTF8infoString(s, t))); | |
350 } | |
351 return res; | |
352 } | |
353 | |
354 dchar dcharAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) { | |
355 s.validateUTF8index(i); | |
356 auto str = s[val(i) .. $]; | |
357 version(Tango){ | |
358 dchar[1] buf; | |
359 uint ate; | |
360 dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); | |
361 assert( ate > 0 && res.length is 1 ); | |
362 stride = cast(UTF8shift)ate; | |
363 return res[0]; | |
364 } else { // Phobos | |
365 size_t ate = 0; | |
366 dchar res = std.utf.decode(str, ate); | |
367 stride = cast(UTF8shift)ate; | |
368 return res; | |
369 } | |
370 } | |
371 | |
372 dchar dcharAt( in wchar[] s, in UTF16index i, out UTF16shift stride = UTF16dummyShift ) { | |
373 //s.validateUTF16index(i); | |
374 auto str = s[val(i) .. $]; | |
375 version(Tango){ | |
376 dchar[1] buf; | |
377 uint ate; | |
378 dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); | |
379 assert( ate > 0 && res.length is 1 ); | |
380 stride = cast(UTF16shift)ate; | |
381 if( ate is 0 || res.length is 0 ){ | |
382 getDwtLogger().trace( __FILE__, __LINE__, "str.length={} str={:X2}", str.length, cast(ubyte[])str ); | |
383 } | |
384 return res[0]; | |
385 } else { // Phobos | |
386 size_t ate = 0; | |
387 dchar res = std.utf.decode(str, ate); | |
388 stride = cast(UTF16shift)ate; | |
389 return res; | |
390 } | |
391 } | |
392 | |
393 dchar dcharBefore( in char[] s, in UTF8index i ) { | |
394 return s.dcharAt(s.offsetBefore(i)); | |
395 } | |
396 | |
397 dchar dcharAfter( in char[] s, in UTF8index i ) { | |
398 return s.dcharAt(i + s.toUTF8shift(i, 1)); | |
399 } | |
400 | |
401 ///Get that String, that contains the next codepoint of a String. | |
402 String dcharAsStringAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) { | |
403 s.validateUTF8index(i); | |
404 auto str = s[val(i) .. $]; | |
405 uint ate; | |
406 version(Tango){ | |
407 dchar[1] buf; | |
408 dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); | |
409 } else { // Phobos | |
410 ate = std.utf.stride( str, 0 ); | |
411 } | |
412 stride = cast(UTF8shift)ate; | |
413 return str[ 0 .. ate ]._idup(); | |
414 } | |
415 | |
416 `; |