Mercurial > projects > dwt2
diff base/src/java/nonstandard/UtfBase.d @ 120:536e43f63c81
Comprehensive update for Win32/Linux32 dmd-2.053/dmd-1.068+Tango-r5661
===D2===
* added [Try]Immutable/Const/Shared templates to work with differenses in D1/D2 instead of version statements
used these templates to work with strict type storage rules of dmd-2.053
* com.ibm.icu now also compilable with D2, but not tested yet
* small fixes
Snippet288 - shared data is in TLS
===Phobos===
* fixed critical bugs in Phobos implemention
completely incorrect segfault prone fromStringz (Linux's port ruthless killer)
terrible, incorrect StringBuffer realization (StyledText killer)
* fixed small bugs as well
Snippet72 - misprint in the snippet
* implemented missed functionality for Phobos
ByteArrayOutputStream implemented (image loading available)
formatting correctly works for all DWT's cases
As a result, folowing snippets now works with Phobos (Snippet### - what is fixed):
Snippet24, 42, 111, 115, 130, 235, 276 - bad string formatting
Snippet48, 282 - crash on image loading
Snippet163, 189, 211, 213, 217, 218, 222 - crash on copy/cut in StyledText
Snippet244 - hang-up
===Tango===
* few changes for the latest Tango trunc-r5661
* few small performance improvments
===General===
* implMissing-s for only one version changed to implMissingInTango/InPhobos
* incorrect calls to Format in toString-s fixed
* fixed loading \uXXXX characters in ResourceBundle
* added good UTF-8 support for StyledText, TextLayout (Win32) and friends
UTF functions revised and tested. It is now in java.nonstandard.*Utf modules
StyledText and TextLayout (Win32) modules revised for UTF-8 support
* removed small diferences in most identical files in *.swt.* folders
*.swt.internal.image, *.swt.events and *.swt.custom are identical in Win32/Linux32
now 179 of 576 (~31%) files in *.swt.* folders are fully identical
* Win32: snippets now have right subsystem, pretty icons and native system style controls
* small fixes in snippets
Snippet44 - it's not Snippet44
Snippet212 - functions work with different images and offsets arrays
Win32: Snippet282 - crash on close if the button has an image
Snippet293 - setGrayed is commented
and others
Win32: As a result, folowing snippets now works
Snippet68 - color doesn't change
Snippet163, 189, 211, 213, 217, 218, 222 - UTF-8 issues (see above)
Snippet193 - no tabel headers
author | Denis Shelomovskij <verylonglogin.reg@gmail.com> |
---|---|
date | Sat, 09 Jul 2011 15:50:20 +0300 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/base/src/java/nonstandard/UtfBase.d Sat Jul 09 15:50:20 2011 +0300 @@ -0,0 +1,416 @@ +/** + * Stuff for working with narrow strings. + * This module shouldn't be imported directly. + * Use SafeUtf/UnsafeUtf modules instead. + * + * Authors: Denis Shelomovskij <verylonglogin.reg@gmail.com> + */ +module java.nonstandard.UtfBase; + +package const UtfBaseText = ` +# line 11 "java\nonstandard\UtfBase.d" +import java.lang.util; + +version(Tango){ + static import tango.text.convert.Utf; +} else { // Phobos + static import std.utf; + static import std.conv; +} + +///The Universal Character Set (UCS), defined by the International Standard ISO/IEC 10646 +/*typedef*/alias int UCSindex; +alias UCSindex UCSshift; + +static if(UTFTypeCheck) { + ///UTF-16 (16-bit Unicode Transformation Format) + /*struct UTF16index { + int internalValue; + alias internalValue val; + + private static UTF16index opCall(int _val) { + UTF16index t = { _val }; + return t; + } + + void opAddAssign(in UTF16shift di) { + val += di; + } + + void opSubAssign(in UTF16shift di) { + val -= di; + } + +mixin(constFuncs!(" + UTF16index opAdd(in UTF16shift di) { + return UTF16index(val + di); + } + + UTF16index opSub(in UTF16shift di) { + return UTF16index(val - di); + } + + version(Windows) { + UTF16index opAdd(in int di) { + return UTF16index(val + di); + } + + UTF16index opSub(in int di) { + return UTF16index(val - di); + } + } + + int opCmp(in UTF16index i2) { + return val - i2.val; + } +")); + }*/ + typedef int UTF16index; + typedef int UTF16shift; + + ///UTF-8 (UCS Transformation Format — 8-bit) + //typedef int UTF8index; + //alias UTF8index UTF8shift; + struct UTF8index { + int internalValue; + alias internalValue val; + + private static UTF8index opCall(int _val) { + UTF8index t = { _val }; + return t; + } + + void opAddAssign(in UTF8shift di) { + val += di.val; + } + + void opSubAssign(in UTF8shift di) { + val -= di.val; + } + +mixin(constFuncs!(" + UTF8index opAdd(in UTF8shift di) { + return UTF8index(val + di.val); + } + + UTF8index opSub(in UTF8shift di) { + return UTF8index(val - di.val); + } + + UTF8shift opSub(in UTF8index di) { + return UTF8shift(val - di.val); + } + + int opCmp(in UTF8index i2) { + return val - i2.val; + } +")); + } + + private UTF8index newUTF8index(int i) { + return UTF8index(i); + } + + private int val(T)(T i) { + static if(is(T : UTF16index)) + return cast(int) i; + else + return i.val; + } + + private void dec(ref UTF8index i) { + --i.val; + } + + struct UTF8shift { + int internalValue; + alias internalValue val; + + private static UTF8shift opCall(int _val) { + UTF8shift t = { _val }; + return t; + } + + void opAddAssign(in UTF8shift di) { + val += di.val; + } + + void opSubAssign(in UTF8shift di) { + val -= di.val; + } + +mixin(constFuncs!(" + UTF8shift opAdd(in UTF8shift di) { + return UTF8shift(val + di.val); + } + + UTF8shift opSub(in UTF8shift di) { + return UTF8shift(val - di.val); + } + + int opCmp(in UTF8shift di2) { + return val - di2.val; + } +")); + } + + + UTF8index asUTF8index(int i) { + return UTF8index(i); + } + + UTF8shift asUTF8shift(int i) { + return UTF8shift(i); + } +} else { + alias int UTF16index; + alias int UTF16shift; + + alias int UTF8index; + alias int UTF8shift; + + private int val(int i) { + return i; + } + + private void dec(ref UTF8index i) { + --i; + } +} + +char charByteAt(in char[] s, in UTF8index i) { + return s[val(i)]; +} + +UTF8index preFirstIndex(in char[] s) { + return cast(UTF8index) -1; +} + +UTF8index firstIndex(in char[] s) { + return cast(UTF8index) 0; +} + +UTF8index endIndex(in char[] s) { + return cast(UTF8index) s.length; +} + +UTF8index beforeEndIndex(in char[] s) { + return s.offsetBefore(s.endIndex()); +} + + +//These variables aren't in TLS so it can be used only for writing +mixin(gshared!(" +private UCSindex UCSdummyShift; +private UTF8shift UTF8dummyShift; +private UTF16shift UTF16dummyShift; +")); + +private const ubyte[256] p_UTF8stride = +[ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF, +]; + +private String toUTF8infoString(in char[] s, UTF8index i) { + return Format("i = {}, s[i] = {}, s = {}", val(i), cast(ubyte)s.charByteAt(i), cast(ubyte[])s); +} + +class UTF8Exception : Exception { + this( String msg, in char[] s, UTF8index i){ + super( Format("{}:\n{}", msg, toUTF8infoString(s, i))); + } +} + +bool isUTF8sequenceStart( in char[] s, in UTF8index i ) { + return p_UTF8stride[s.charByteAt(i)] != 0xFF; +} + +void validateUTF8index( in char[] s, in UTF8index i ) { + if(i != s.endIndex() && !s.isUTF8sequenceStart(i)) + throw new UTF8Exception("Not a start of an UTF-8 sequence", s, i); +} + +UTF8shift UTF8strideAt( in char[] s, in UTF8index i ) { + s.validateUTF8index(i); + version(Tango) { + return cast(UTF8shift)p_UTF8stride[s.charByteAt(i)]; + } else { // Phobos + return cast(UTF8shift)std.utf.stride( s, val(i) ); + } +} + +UTF16shift UTF16strideAt( in wchar[] s, in UTF16index i ) { + //s.validateUTF16index(i); + version(Tango) { + uint u = s[val(i)]; + return cast(UTF16shift)(1 + (u >= 0xD800 && u <= 0xDBFF)); + } else { // Phobos + return cast(UTF16shift)std.utf.stride( s, val(i) ); + } +} + +UCSindex UCScount( in char[] s ){ + version(Tango){ + scope dchar[] buf = new dchar[]( s.length ); + uint ate; + dchar[] res = tango.text.convert.Utf.toString32( s, buf, &ate ); + assert( ate is s.length ); + return res.length; + } else { // Phobos + return std.utf.count(s); + } +} + +UTF8shift toUTF8shift( in char[] s, in UTF8index i, in UCSshift dn ) { + s.validateUTF8index(i); + UTF8index j = i; + UCSshift tdn = dn; + if(tdn > 0) + do { + j += s.UTF8strideAt(j); + if(j > s.endIndex()) + throw new UTF8Exception(Format("toUTF8shift (dn = {}): No end of the UTF-8 sequence", dn), s, i); + } while(--tdn) + else if(tdn < 0) { + do { + if(!val(j)) + if(tdn == -1) { + j = s.preFirstIndex(); + break; + } else + throw new UTF8Exception(Format("toUTF8shift (dn = {}): Can only go down to -1, not {}", dn, tdn), s, i); + int l = 0; + do { + if(!val(j)) + throw new UTF8Exception(Format("toUTF8shift (dn = {}): No start of the UTF-8 sequence before", dn), s, i); + ++l; + dec(j); + } while(!s.isUTF8sequenceStart(j)) + l -= val(s.UTF8strideAt(j)); + if(l > 0) + throw new UTF8Exception(Format("toUTF8shift (dn = {}): Overlong UTF-8 sequence before", dn), s, i); + else if(l < 0) + throw new UTF8Exception(Format("toUTF8shift (dn = {}): Too short UTF-8 sequence before", dn), s, i); + } while(++tdn) + } + return j - i; +} + +UTF8index offsetBefore( in char[] s, in UTF8index i ) { + return i + s.toUTF8shift(i, -1); +} + +UTF8index offsetAfter( in char[] s, in UTF8index i ) { + return i + s.toUTF8shift(i, 1); +} + +/** +If the index is in a midle of an UTF-8 byte sequence, it +will return the position of the first byte of this sequence. +*/ +void adjustUTF8index( in char[] s, ref UTF8index i ){ + if(i == s.endIndex() || s.isUTF8sequenceStart(i)) + return; + + int l = 0; + alias i res; + do { + if(!val(res)) + throw new UTF8Exception("adjustUTF8index: No start of the UTF-8 sequence", s, i); + ++l; + dec(res); + } while(!s.isUTF8sequenceStart(res)) + l -= val(s.UTF8strideAt(i)); + if(l > 0) + throw new UTF8Exception("adjustUTF8index: Overlong UTF-8 sequence", s, i); +} + +UTF8index takeIndexArg(String F = __FILE__, uint L = __LINE__)(String s, int i_arg, String location) { + UTF8index res = cast(UTF8index) i_arg; + if(i_arg > 0 && i_arg < s.length) { + auto t = res; + s.adjustUTF8index(res); + if(t != res) + getDwtLogger().warn(F, L, Format("Fixed invalid UTF-8 index at {}:\nnew i = {}, {}", location, val(res), toUTF8infoString(s, t))); + } + return res; +} + +dchar dcharAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) { + s.validateUTF8index(i); + auto str = s[val(i) .. $]; + version(Tango){ + dchar[1] buf; + uint ate; + dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); + assert( ate > 0 && res.length is 1 ); + stride = cast(UTF8shift)ate; + return res[0]; + } else { // Phobos + size_t ate = 0; + dchar res = std.utf.decode(str, ate); + stride = cast(UTF8shift)ate; + return res; + } +} + +dchar dcharAt( in wchar[] s, in UTF16index i, out UTF16shift stride = UTF16dummyShift ) { + //s.validateUTF16index(i); + auto str = s[val(i) .. $]; + version(Tango){ + dchar[1] buf; + uint ate; + dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); + assert( ate > 0 && res.length is 1 ); + stride = cast(UTF16shift)ate; + if( ate is 0 || res.length is 0 ){ + getDwtLogger().trace( __FILE__, __LINE__, "str.length={} str={:X2}", str.length, cast(ubyte[])str ); + } + return res[0]; + } else { // Phobos + size_t ate = 0; + dchar res = std.utf.decode(str, ate); + stride = cast(UTF16shift)ate; + return res; + } +} + +dchar dcharBefore( in char[] s, in UTF8index i ) { + return s.dcharAt(s.offsetBefore(i)); +} + +dchar dcharAfter( in char[] s, in UTF8index i ) { + return s.dcharAt(i + s.toUTF8shift(i, 1)); +} + +///Get that String, that contains the next codepoint of a String. +String dcharAsStringAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) { + s.validateUTF8index(i); + auto str = s[val(i) .. $]; + uint ate; + version(Tango){ + dchar[1] buf; + dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); + } else { // Phobos + ate = std.utf.stride( str, 0 ); + } + stride = cast(UTF8shift)ate; + return str[ 0 .. ate ]._idup(); +} + +`;