diff base/src/java/nonstandard/UtfBase.d @ 120:536e43f63c81

Comprehensive update for Win32/Linux32 dmd-2.053/dmd-1.068+Tango-r5661 ===D2=== * added [Try]Immutable/Const/Shared templates to work with differenses in D1/D2 instead of version statements used these templates to work with strict type storage rules of dmd-2.053 * com.ibm.icu now also compilable with D2, but not tested yet * small fixes Snippet288 - shared data is in TLS ===Phobos=== * fixed critical bugs in Phobos implemention completely incorrect segfault prone fromStringz (Linux's port ruthless killer) terrible, incorrect StringBuffer realization (StyledText killer) * fixed small bugs as well Snippet72 - misprint in the snippet * implemented missed functionality for Phobos ByteArrayOutputStream implemented (image loading available) formatting correctly works for all DWT's cases As a result, folowing snippets now works with Phobos (Snippet### - what is fixed): Snippet24, 42, 111, 115, 130, 235, 276 - bad string formatting Snippet48, 282 - crash on image loading Snippet163, 189, 211, 213, 217, 218, 222 - crash on copy/cut in StyledText Snippet244 - hang-up ===Tango=== * few changes for the latest Tango trunc-r5661 * few small performance improvments ===General=== * implMissing-s for only one version changed to implMissingInTango/InPhobos * incorrect calls to Format in toString-s fixed * fixed loading \uXXXX characters in ResourceBundle * added good UTF-8 support for StyledText, TextLayout (Win32) and friends UTF functions revised and tested. It is now in java.nonstandard.*Utf modules StyledText and TextLayout (Win32) modules revised for UTF-8 support * removed small diferences in most identical files in *.swt.* folders *.swt.internal.image, *.swt.events and *.swt.custom are identical in Win32/Linux32 now 179 of 576 (~31%) files in *.swt.* folders are fully identical * Win32: snippets now have right subsystem, pretty icons and native system style controls * small fixes in snippets Snippet44 - it's not Snippet44 Snippet212 - functions work with different images and offsets arrays Win32: Snippet282 - crash on close if the button has an image Snippet293 - setGrayed is commented and others Win32: As a result, folowing snippets now works Snippet68 - color doesn't change Snippet163, 189, 211, 213, 217, 218, 222 - UTF-8 issues (see above) Snippet193 - no tabel headers
author Denis Shelomovskij <verylonglogin.reg@gmail.com>
date Sat, 09 Jul 2011 15:50:20 +0300
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/base/src/java/nonstandard/UtfBase.d	Sat Jul 09 15:50:20 2011 +0300
@@ -0,0 +1,416 @@
+/** 
+ * Stuff for working with narrow strings.
+ * This module shouldn't be imported directly.
+ * Use SafeUtf/UnsafeUtf modules instead.
+ * 
+ * Authors: Denis Shelomovskij <verylonglogin.reg@gmail.com>
+ */
+module java.nonstandard.UtfBase;
+
+package const UtfBaseText = `
+# line 11 "java\nonstandard\UtfBase.d"
+import java.lang.util;
+
+version(Tango){
+    static import tango.text.convert.Utf;
+} else { // Phobos
+    static import std.utf;
+    static import std.conv;
+}
+
+///The Universal Character Set (UCS), defined by the International Standard ISO/IEC 10646
+/*typedef*/alias int UCSindex;
+alias UCSindex UCSshift;
+
+static if(UTFTypeCheck) {
+    ///UTF-16 (16-bit Unicode Transformation Format)
+    /*struct UTF16index {
+        int internalValue;
+        alias internalValue val;
+        
+        private static UTF16index opCall(int _val) {
+            UTF16index t = { _val };
+            return t;
+        }
+        
+        void opAddAssign(in UTF16shift di) {
+            val += di;
+        }
+        
+        void opSubAssign(in UTF16shift di) {
+            val -= di;
+        }
+        
+mixin(constFuncs!("
+        UTF16index opAdd(in UTF16shift di) {
+            return UTF16index(val + di);
+        }
+        
+        UTF16index opSub(in UTF16shift di) {
+            return UTF16index(val - di);
+        }
+        
+        version(Windows) {
+            UTF16index opAdd(in int di) {
+                return UTF16index(val + di);
+            }
+            
+            UTF16index opSub(in int di) {
+                return UTF16index(val - di);
+            }
+        }
+        
+        int opCmp(in UTF16index i2) {
+            return val - i2.val;
+        }
+"));
+    }*/
+    typedef int UTF16index;
+    typedef int UTF16shift;
+
+    ///UTF-8 (UCS Transformation Format — 8-bit)
+    //typedef int UTF8index;
+    //alias UTF8index UTF8shift;
+    struct UTF8index {
+        int internalValue;
+        alias internalValue val;
+        
+        private static UTF8index opCall(int _val) {
+            UTF8index t = { _val };
+            return t;
+        }
+        
+        void opAddAssign(in UTF8shift di) {
+            val += di.val;
+        }
+        
+        void opSubAssign(in UTF8shift di) {
+            val -= di.val;
+        }
+        
+mixin(constFuncs!("
+        UTF8index opAdd(in UTF8shift di) {
+            return UTF8index(val + di.val);
+        }
+        
+        UTF8index opSub(in UTF8shift di) {
+            return UTF8index(val - di.val);
+        }
+        
+        UTF8shift opSub(in UTF8index di) {
+            return UTF8shift(val - di.val);
+        }
+        
+        int opCmp(in UTF8index i2) {
+            return val - i2.val;
+        }
+"));
+    }
+    
+    private UTF8index newUTF8index(int i) {
+        return UTF8index(i);
+    }
+    
+    private int val(T)(T i) {
+        static if(is(T : UTF16index))
+            return cast(int) i;
+        else
+            return i.val;
+    }
+    
+    private void dec(ref UTF8index i) {
+        --i.val;
+    }
+    
+    struct UTF8shift {
+        int internalValue;
+        alias internalValue val;
+        
+        private static UTF8shift opCall(int _val) {
+            UTF8shift t = { _val };
+            return t;
+        }
+        
+        void opAddAssign(in UTF8shift di) {
+            val += di.val;
+        }
+        
+        void opSubAssign(in UTF8shift di) {
+            val -= di.val;
+        }
+        
+mixin(constFuncs!("
+        UTF8shift opAdd(in UTF8shift di) {
+            return UTF8shift(val + di.val);
+        }
+        
+        UTF8shift opSub(in UTF8shift di) {
+            return UTF8shift(val - di.val);
+        }
+        
+        int opCmp(in UTF8shift di2) {
+            return val - di2.val;
+        }
+"));
+    }
+    
+
+    UTF8index asUTF8index(int i) {
+        return UTF8index(i);
+    }
+
+    UTF8shift asUTF8shift(int i) {
+        return UTF8shift(i);
+    }
+} else {
+    alias int UTF16index;
+    alias int UTF16shift;
+    
+    alias int UTF8index;
+    alias int UTF8shift;
+    
+    private int val(int i) {
+        return i;
+    }
+    
+    private void dec(ref UTF8index i) {
+        --i;
+    }
+}
+
+char charByteAt(in char[] s, in UTF8index i) {
+    return s[val(i)];
+}
+
+UTF8index preFirstIndex(in char[] s) {
+    return cast(UTF8index) -1;
+}
+
+UTF8index firstIndex(in char[] s) {
+    return cast(UTF8index) 0;
+}
+
+UTF8index endIndex(in char[] s) {
+    return cast(UTF8index) s.length;
+}
+
+UTF8index beforeEndIndex(in char[] s) {
+    return s.offsetBefore(s.endIndex());
+}
+
+
+//These variables aren't in TLS so it can be used only for writing
+mixin(gshared!("
+private UCSindex UCSdummyShift;
+private UTF8shift UTF8dummyShift;
+private UTF16shift UTF16dummyShift;
+"));
+
+private const ubyte[256] p_UTF8stride =
+[
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+    4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
+];
+
+private String toUTF8infoString(in char[] s, UTF8index i) {
+    return Format("i = {}, s[i] = {}, s = {}", val(i), cast(ubyte)s.charByteAt(i), cast(ubyte[])s);
+}
+
+class UTF8Exception : Exception {
+    this( String msg, in char[] s, UTF8index i){
+        super( Format("{}:\n{}", msg, toUTF8infoString(s, i)));
+    }
+}
+
+bool isUTF8sequenceStart( in char[] s, in UTF8index i ) {
+    return p_UTF8stride[s.charByteAt(i)] != 0xFF;
+}
+
+void validateUTF8index( in char[] s, in UTF8index i ) {
+    if(i != s.endIndex() && !s.isUTF8sequenceStart(i))
+        throw new UTF8Exception("Not a start of an UTF-8 sequence", s, i);
+}
+
+UTF8shift UTF8strideAt( in char[] s, in UTF8index i ) {
+    s.validateUTF8index(i);
+    version(Tango) {
+        return cast(UTF8shift)p_UTF8stride[s.charByteAt(i)];
+    } else { // Phobos
+        return cast(UTF8shift)std.utf.stride( s, val(i) );
+    }
+}
+
+UTF16shift UTF16strideAt( in wchar[] s, in UTF16index i ) {
+    //s.validateUTF16index(i);
+    version(Tango) {
+        uint u = s[val(i)];
+        return cast(UTF16shift)(1 + (u >= 0xD800 && u <= 0xDBFF));
+    } else { // Phobos
+        return cast(UTF16shift)std.utf.stride( s, val(i) );
+    }
+}
+
+UCSindex UCScount( in char[] s ){
+    version(Tango){
+        scope dchar[] buf = new dchar[]( s.length );
+        uint ate;
+        dchar[] res = tango.text.convert.Utf.toString32( s, buf, &ate );
+        assert( ate is s.length );
+        return res.length;
+    } else { // Phobos
+        return std.utf.count(s);
+    }
+}
+
+UTF8shift toUTF8shift( in char[] s, in UTF8index i, in UCSshift dn ) {
+    s.validateUTF8index(i);
+    UTF8index j = i;
+    UCSshift tdn = dn;
+    if(tdn > 0)
+        do {
+            j += s.UTF8strideAt(j);
+            if(j > s.endIndex())
+                throw new UTF8Exception(Format("toUTF8shift (dn = {}): No end of the UTF-8 sequence", dn), s, i);
+        } while(--tdn)
+    else if(tdn < 0) {
+        do {
+            if(!val(j))
+                if(tdn == -1) {
+                    j = s.preFirstIndex();
+                    break;
+                } else
+                    throw new UTF8Exception(Format("toUTF8shift (dn = {}): Can only go down to -1, not {}", dn, tdn), s, i);
+            int l = 0;
+            do {
+                if(!val(j))
+                    throw new UTF8Exception(Format("toUTF8shift (dn = {}): No start of the UTF-8 sequence before", dn), s, i);
+                ++l;
+                dec(j);
+            } while(!s.isUTF8sequenceStart(j))
+            l -= val(s.UTF8strideAt(j));
+            if(l > 0)
+                throw new UTF8Exception(Format("toUTF8shift (dn = {}): Overlong UTF-8 sequence before", dn), s, i);
+            else if(l < 0)
+                throw new UTF8Exception(Format("toUTF8shift (dn = {}): Too short UTF-8 sequence before", dn), s, i);
+        } while(++tdn)
+    }
+    return j - i;
+}
+
+UTF8index offsetBefore( in char[] s, in UTF8index i ) {
+   return i + s.toUTF8shift(i, -1);
+}
+
+UTF8index offsetAfter( in char[] s, in UTF8index i ) {
+   return i + s.toUTF8shift(i, 1);
+}
+
+/**
+If the index is in a midle of an UTF-8 byte sequence, it
+will return the position of the first byte of this sequence.
+*/
+void adjustUTF8index( in char[] s, ref UTF8index i ){
+    if(i == s.endIndex() || s.isUTF8sequenceStart(i))
+        return;
+    
+    int l = 0;
+    alias i res;
+    do {
+        if(!val(res))
+            throw new UTF8Exception("adjustUTF8index: No start of the UTF-8 sequence", s, i);
+        ++l;
+        dec(res);
+    } while(!s.isUTF8sequenceStart(res))
+    l -= val(s.UTF8strideAt(i));
+    if(l > 0)
+        throw new UTF8Exception("adjustUTF8index: Overlong UTF-8 sequence", s, i);
+}
+
+UTF8index takeIndexArg(String F = __FILE__, uint L = __LINE__)(String s, int i_arg, String location) {
+    UTF8index res = cast(UTF8index) i_arg;
+    if(i_arg > 0 && i_arg < s.length) {
+        auto t = res;
+        s.adjustUTF8index(res);
+        if(t != res)
+            getDwtLogger().warn(F, L, Format("Fixed invalid UTF-8 index at {}:\nnew i = {}, {}", location, val(res), toUTF8infoString(s, t)));
+    }
+    return res;
+}
+
+dchar dcharAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) {
+    s.validateUTF8index(i);
+    auto str = s[val(i) .. $];
+    version(Tango){
+        dchar[1] buf;
+        uint ate;
+        dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
+        assert( ate > 0 && res.length is 1 );
+        stride = cast(UTF8shift)ate;
+        return res[0];
+    } else { // Phobos
+        size_t ate = 0;
+        dchar res = std.utf.decode(str, ate);
+        stride = cast(UTF8shift)ate;
+        return res;
+    }
+}
+
+dchar dcharAt( in wchar[] s, in UTF16index i, out UTF16shift stride = UTF16dummyShift ) {
+    //s.validateUTF16index(i);
+    auto str = s[val(i) .. $];
+    version(Tango){
+        dchar[1] buf;
+        uint ate;
+        dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
+        assert( ate > 0 && res.length is 1 );
+        stride = cast(UTF16shift)ate;
+        if( ate is 0 || res.length is 0 ){
+            getDwtLogger().trace( __FILE__, __LINE__, "str.length={} str={:X2}", str.length, cast(ubyte[])str );
+        }
+        return res[0];
+    } else { // Phobos
+        size_t ate = 0;
+        dchar res = std.utf.decode(str, ate);
+        stride = cast(UTF16shift)ate;
+        return res;
+    }
+}
+
+dchar dcharBefore( in char[] s, in UTF8index i ) {
+   return s.dcharAt(s.offsetBefore(i));
+}
+
+dchar dcharAfter( in char[] s, in UTF8index i ) {
+    return s.dcharAt(i + s.toUTF8shift(i, 1));
+}
+
+///Get that String, that contains the next codepoint of a String.
+String dcharAsStringAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) {
+    s.validateUTF8index(i);
+    auto str = s[val(i) .. $];
+    uint ate;
+    version(Tango){
+        dchar[1] buf;
+        dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
+    } else { // Phobos
+        ate = std.utf.stride( str, 0 );
+    }
+    stride = cast(UTF8shift)ate;
+    return str[ 0 .. ate ]._idup();
+}
+
+`;