comparison base/src/java/nonstandard/UtfBase.d @ 120:536e43f63c81

Comprehensive update for Win32/Linux32 dmd-2.053/dmd-1.068+Tango-r5661 ===D2=== * added [Try]Immutable/Const/Shared templates to work with differenses in D1/D2 instead of version statements used these templates to work with strict type storage rules of dmd-2.053 * com.ibm.icu now also compilable with D2, but not tested yet * small fixes Snippet288 - shared data is in TLS ===Phobos=== * fixed critical bugs in Phobos implemention completely incorrect segfault prone fromStringz (Linux's port ruthless killer) terrible, incorrect StringBuffer realization (StyledText killer) * fixed small bugs as well Snippet72 - misprint in the snippet * implemented missed functionality for Phobos ByteArrayOutputStream implemented (image loading available) formatting correctly works for all DWT's cases As a result, folowing snippets now works with Phobos (Snippet### - what is fixed): Snippet24, 42, 111, 115, 130, 235, 276 - bad string formatting Snippet48, 282 - crash on image loading Snippet163, 189, 211, 213, 217, 218, 222 - crash on copy/cut in StyledText Snippet244 - hang-up ===Tango=== * few changes for the latest Tango trunc-r5661 * few small performance improvments ===General=== * implMissing-s for only one version changed to implMissingInTango/InPhobos * incorrect calls to Format in toString-s fixed * fixed loading \uXXXX characters in ResourceBundle * added good UTF-8 support for StyledText, TextLayout (Win32) and friends UTF functions revised and tested. It is now in java.nonstandard.*Utf modules StyledText and TextLayout (Win32) modules revised for UTF-8 support * removed small diferences in most identical files in *.swt.* folders *.swt.internal.image, *.swt.events and *.swt.custom are identical in Win32/Linux32 now 179 of 576 (~31%) files in *.swt.* folders are fully identical * Win32: snippets now have right subsystem, pretty icons and native system style controls * small fixes in snippets Snippet44 - it's not Snippet44 Snippet212 - functions work with different images and offsets arrays Win32: Snippet282 - crash on close if the button has an image Snippet293 - setGrayed is commented and others Win32: As a result, folowing snippets now works Snippet68 - color doesn't change Snippet163, 189, 211, 213, 217, 218, 222 - UTF-8 issues (see above) Snippet193 - no tabel headers
author Denis Shelomovskij <verylonglogin.reg@gmail.com>
date Sat, 09 Jul 2011 15:50:20 +0300
parents
children
comparison
equal deleted inserted replaced
119:d00e8db0a568 120:536e43f63c81
1 /**
2 * Stuff for working with narrow strings.
3 * This module shouldn't be imported directly.
4 * Use SafeUtf/UnsafeUtf modules instead.
5 *
6 * Authors: Denis Shelomovskij <verylonglogin.reg@gmail.com>
7 */
8 module java.nonstandard.UtfBase;
9
10 package const UtfBaseText = `
11 # line 11 "java\nonstandard\UtfBase.d"
12 import java.lang.util;
13
14 version(Tango){
15 static import tango.text.convert.Utf;
16 } else { // Phobos
17 static import std.utf;
18 static import std.conv;
19 }
20
21 ///The Universal Character Set (UCS), defined by the International Standard ISO/IEC 10646
22 /*typedef*/alias int UCSindex;
23 alias UCSindex UCSshift;
24
25 static if(UTFTypeCheck) {
26 ///UTF-16 (16-bit Unicode Transformation Format)
27 /*struct UTF16index {
28 int internalValue;
29 alias internalValue val;
30
31 private static UTF16index opCall(int _val) {
32 UTF16index t = { _val };
33 return t;
34 }
35
36 void opAddAssign(in UTF16shift di) {
37 val += di;
38 }
39
40 void opSubAssign(in UTF16shift di) {
41 val -= di;
42 }
43
44 mixin(constFuncs!("
45 UTF16index opAdd(in UTF16shift di) {
46 return UTF16index(val + di);
47 }
48
49 UTF16index opSub(in UTF16shift di) {
50 return UTF16index(val - di);
51 }
52
53 version(Windows) {
54 UTF16index opAdd(in int di) {
55 return UTF16index(val + di);
56 }
57
58 UTF16index opSub(in int di) {
59 return UTF16index(val - di);
60 }
61 }
62
63 int opCmp(in UTF16index i2) {
64 return val - i2.val;
65 }
66 "));
67 }*/
68 typedef int UTF16index;
69 typedef int UTF16shift;
70
71 ///UTF-8 (UCS Transformation Format — 8-bit)
72 //typedef int UTF8index;
73 //alias UTF8index UTF8shift;
74 struct UTF8index {
75 int internalValue;
76 alias internalValue val;
77
78 private static UTF8index opCall(int _val) {
79 UTF8index t = { _val };
80 return t;
81 }
82
83 void opAddAssign(in UTF8shift di) {
84 val += di.val;
85 }
86
87 void opSubAssign(in UTF8shift di) {
88 val -= di.val;
89 }
90
91 mixin(constFuncs!("
92 UTF8index opAdd(in UTF8shift di) {
93 return UTF8index(val + di.val);
94 }
95
96 UTF8index opSub(in UTF8shift di) {
97 return UTF8index(val - di.val);
98 }
99
100 UTF8shift opSub(in UTF8index di) {
101 return UTF8shift(val - di.val);
102 }
103
104 int opCmp(in UTF8index i2) {
105 return val - i2.val;
106 }
107 "));
108 }
109
110 private UTF8index newUTF8index(int i) {
111 return UTF8index(i);
112 }
113
114 private int val(T)(T i) {
115 static if(is(T : UTF16index))
116 return cast(int) i;
117 else
118 return i.val;
119 }
120
121 private void dec(ref UTF8index i) {
122 --i.val;
123 }
124
125 struct UTF8shift {
126 int internalValue;
127 alias internalValue val;
128
129 private static UTF8shift opCall(int _val) {
130 UTF8shift t = { _val };
131 return t;
132 }
133
134 void opAddAssign(in UTF8shift di) {
135 val += di.val;
136 }
137
138 void opSubAssign(in UTF8shift di) {
139 val -= di.val;
140 }
141
142 mixin(constFuncs!("
143 UTF8shift opAdd(in UTF8shift di) {
144 return UTF8shift(val + di.val);
145 }
146
147 UTF8shift opSub(in UTF8shift di) {
148 return UTF8shift(val - di.val);
149 }
150
151 int opCmp(in UTF8shift di2) {
152 return val - di2.val;
153 }
154 "));
155 }
156
157
158 UTF8index asUTF8index(int i) {
159 return UTF8index(i);
160 }
161
162 UTF8shift asUTF8shift(int i) {
163 return UTF8shift(i);
164 }
165 } else {
166 alias int UTF16index;
167 alias int UTF16shift;
168
169 alias int UTF8index;
170 alias int UTF8shift;
171
172 private int val(int i) {
173 return i;
174 }
175
176 private void dec(ref UTF8index i) {
177 --i;
178 }
179 }
180
181 char charByteAt(in char[] s, in UTF8index i) {
182 return s[val(i)];
183 }
184
185 UTF8index preFirstIndex(in char[] s) {
186 return cast(UTF8index) -1;
187 }
188
189 UTF8index firstIndex(in char[] s) {
190 return cast(UTF8index) 0;
191 }
192
193 UTF8index endIndex(in char[] s) {
194 return cast(UTF8index) s.length;
195 }
196
197 UTF8index beforeEndIndex(in char[] s) {
198 return s.offsetBefore(s.endIndex());
199 }
200
201
202 //These variables aren't in TLS so it can be used only for writing
203 mixin(gshared!("
204 private UCSindex UCSdummyShift;
205 private UTF8shift UTF8dummyShift;
206 private UTF16shift UTF16dummyShift;
207 "));
208
209 private const ubyte[256] p_UTF8stride =
210 [
211 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
212 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
213 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
214 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
215 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
216 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
217 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
218 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
219 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
220 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
221 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
222 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
223 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
224 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
225 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
226 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
227 ];
228
229 private String toUTF8infoString(in char[] s, UTF8index i) {
230 return Format("i = {}, s[i] = {}, s = {}", val(i), cast(ubyte)s.charByteAt(i), cast(ubyte[])s);
231 }
232
233 class UTF8Exception : Exception {
234 this( String msg, in char[] s, UTF8index i){
235 super( Format("{}:\n{}", msg, toUTF8infoString(s, i)));
236 }
237 }
238
239 bool isUTF8sequenceStart( in char[] s, in UTF8index i ) {
240 return p_UTF8stride[s.charByteAt(i)] != 0xFF;
241 }
242
243 void validateUTF8index( in char[] s, in UTF8index i ) {
244 if(i != s.endIndex() && !s.isUTF8sequenceStart(i))
245 throw new UTF8Exception("Not a start of an UTF-8 sequence", s, i);
246 }
247
248 UTF8shift UTF8strideAt( in char[] s, in UTF8index i ) {
249 s.validateUTF8index(i);
250 version(Tango) {
251 return cast(UTF8shift)p_UTF8stride[s.charByteAt(i)];
252 } else { // Phobos
253 return cast(UTF8shift)std.utf.stride( s, val(i) );
254 }
255 }
256
257 UTF16shift UTF16strideAt( in wchar[] s, in UTF16index i ) {
258 //s.validateUTF16index(i);
259 version(Tango) {
260 uint u = s[val(i)];
261 return cast(UTF16shift)(1 + (u >= 0xD800 && u <= 0xDBFF));
262 } else { // Phobos
263 return cast(UTF16shift)std.utf.stride( s, val(i) );
264 }
265 }
266
267 UCSindex UCScount( in char[] s ){
268 version(Tango){
269 scope dchar[] buf = new dchar[]( s.length );
270 uint ate;
271 dchar[] res = tango.text.convert.Utf.toString32( s, buf, &ate );
272 assert( ate is s.length );
273 return res.length;
274 } else { // Phobos
275 return std.utf.count(s);
276 }
277 }
278
279 UTF8shift toUTF8shift( in char[] s, in UTF8index i, in UCSshift dn ) {
280 s.validateUTF8index(i);
281 UTF8index j = i;
282 UCSshift tdn = dn;
283 if(tdn > 0)
284 do {
285 j += s.UTF8strideAt(j);
286 if(j > s.endIndex())
287 throw new UTF8Exception(Format("toUTF8shift (dn = {}): No end of the UTF-8 sequence", dn), s, i);
288 } while(--tdn)
289 else if(tdn < 0) {
290 do {
291 if(!val(j))
292 if(tdn == -1) {
293 j = s.preFirstIndex();
294 break;
295 } else
296 throw new UTF8Exception(Format("toUTF8shift (dn = {}): Can only go down to -1, not {}", dn, tdn), s, i);
297 int l = 0;
298 do {
299 if(!val(j))
300 throw new UTF8Exception(Format("toUTF8shift (dn = {}): No start of the UTF-8 sequence before", dn), s, i);
301 ++l;
302 dec(j);
303 } while(!s.isUTF8sequenceStart(j))
304 l -= val(s.UTF8strideAt(j));
305 if(l > 0)
306 throw new UTF8Exception(Format("toUTF8shift (dn = {}): Overlong UTF-8 sequence before", dn), s, i);
307 else if(l < 0)
308 throw new UTF8Exception(Format("toUTF8shift (dn = {}): Too short UTF-8 sequence before", dn), s, i);
309 } while(++tdn)
310 }
311 return j - i;
312 }
313
314 UTF8index offsetBefore( in char[] s, in UTF8index i ) {
315 return i + s.toUTF8shift(i, -1);
316 }
317
318 UTF8index offsetAfter( in char[] s, in UTF8index i ) {
319 return i + s.toUTF8shift(i, 1);
320 }
321
322 /**
323 If the index is in a midle of an UTF-8 byte sequence, it
324 will return the position of the first byte of this sequence.
325 */
326 void adjustUTF8index( in char[] s, ref UTF8index i ){
327 if(i == s.endIndex() || s.isUTF8sequenceStart(i))
328 return;
329
330 int l = 0;
331 alias i res;
332 do {
333 if(!val(res))
334 throw new UTF8Exception("adjustUTF8index: No start of the UTF-8 sequence", s, i);
335 ++l;
336 dec(res);
337 } while(!s.isUTF8sequenceStart(res))
338 l -= val(s.UTF8strideAt(i));
339 if(l > 0)
340 throw new UTF8Exception("adjustUTF8index: Overlong UTF-8 sequence", s, i);
341 }
342
343 UTF8index takeIndexArg(String F = __FILE__, uint L = __LINE__)(String s, int i_arg, String location) {
344 UTF8index res = cast(UTF8index) i_arg;
345 if(i_arg > 0 && i_arg < s.length) {
346 auto t = res;
347 s.adjustUTF8index(res);
348 if(t != res)
349 getDwtLogger().warn(F, L, Format("Fixed invalid UTF-8 index at {}:\nnew i = {}, {}", location, val(res), toUTF8infoString(s, t)));
350 }
351 return res;
352 }
353
354 dchar dcharAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) {
355 s.validateUTF8index(i);
356 auto str = s[val(i) .. $];
357 version(Tango){
358 dchar[1] buf;
359 uint ate;
360 dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
361 assert( ate > 0 && res.length is 1 );
362 stride = cast(UTF8shift)ate;
363 return res[0];
364 } else { // Phobos
365 size_t ate = 0;
366 dchar res = std.utf.decode(str, ate);
367 stride = cast(UTF8shift)ate;
368 return res;
369 }
370 }
371
372 dchar dcharAt( in wchar[] s, in UTF16index i, out UTF16shift stride = UTF16dummyShift ) {
373 //s.validateUTF16index(i);
374 auto str = s[val(i) .. $];
375 version(Tango){
376 dchar[1] buf;
377 uint ate;
378 dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
379 assert( ate > 0 && res.length is 1 );
380 stride = cast(UTF16shift)ate;
381 if( ate is 0 || res.length is 0 ){
382 getDwtLogger().trace( __FILE__, __LINE__, "str.length={} str={:X2}", str.length, cast(ubyte[])str );
383 }
384 return res[0];
385 } else { // Phobos
386 size_t ate = 0;
387 dchar res = std.utf.decode(str, ate);
388 stride = cast(UTF16shift)ate;
389 return res;
390 }
391 }
392
393 dchar dcharBefore( in char[] s, in UTF8index i ) {
394 return s.dcharAt(s.offsetBefore(i));
395 }
396
397 dchar dcharAfter( in char[] s, in UTF8index i ) {
398 return s.dcharAt(i + s.toUTF8shift(i, 1));
399 }
400
401 ///Get that String, that contains the next codepoint of a String.
402 String dcharAsStringAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) {
403 s.validateUTF8index(i);
404 auto str = s[val(i) .. $];
405 uint ate;
406 version(Tango){
407 dchar[1] buf;
408 dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
409 } else { // Phobos
410 ate = std.utf.stride( str, 0 );
411 }
412 stride = cast(UTF8shift)ate;
413 return str[ 0 .. ate ]._idup();
414 }
415
416 `;