view base/src/java/nonstandard/SafeUtf.d @ 120:536e43f63c81

Comprehensive update for Win32/Linux32 dmd-2.053/dmd-1.068+Tango-r5661 ===D2=== * added [Try]Immutable/Const/Shared templates to work with differenses in D1/D2 instead of version statements used these templates to work with strict type storage rules of dmd-2.053 * com.ibm.icu now also compilable with D2, but not tested yet * small fixes Snippet288 - shared data is in TLS ===Phobos=== * fixed critical bugs in Phobos implemention completely incorrect segfault prone fromStringz (Linux's port ruthless killer) terrible, incorrect StringBuffer realization (StyledText killer) * fixed small bugs as well Snippet72 - misprint in the snippet * implemented missed functionality for Phobos ByteArrayOutputStream implemented (image loading available) formatting correctly works for all DWT's cases As a result, folowing snippets now works with Phobos (Snippet### - what is fixed): Snippet24, 42, 111, 115, 130, 235, 276 - bad string formatting Snippet48, 282 - crash on image loading Snippet163, 189, 211, 213, 217, 218, 222 - crash on copy/cut in StyledText Snippet244 - hang-up ===Tango=== * few changes for the latest Tango trunc-r5661 * few small performance improvments ===General=== * implMissing-s for only one version changed to implMissingInTango/InPhobos * incorrect calls to Format in toString-s fixed * fixed loading \uXXXX characters in ResourceBundle * added good UTF-8 support for StyledText, TextLayout (Win32) and friends UTF functions revised and tested. It is now in java.nonstandard.*Utf modules StyledText and TextLayout (Win32) modules revised for UTF-8 support * removed small diferences in most identical files in *.swt.* folders *.swt.internal.image, *.swt.events and *.swt.custom are identical in Win32/Linux32 now 179 of 576 (~31%) files in *.swt.* folders are fully identical * Win32: snippets now have right subsystem, pretty icons and native system style controls * small fixes in snippets Snippet44 - it's not Snippet44 Snippet212 - functions work with different images and offsets arrays Win32: Snippet282 - crash on close if the button has an image Snippet293 - setGrayed is commented and others Win32: As a result, folowing snippets now works Snippet68 - color doesn't change Snippet163, 189, 211, 213, 217, 218, 222 - UTF-8 issues (see above) Snippet193 - no tabel headers
author Denis Shelomovskij <verylonglogin.reg@gmail.com>
date Sat, 09 Jul 2011 15:50:20 +0300
parents
children
line wrap: on
line source

/** 
 * Stuff for working with narrow strings.
 * Safe because of strong type checking.
 * 
 * Authors: Denis Shelomovskij <verylonglogin.reg@gmail.com>
 */
module java.nonstandard.SafeUtf;

import java.nonstandard.UtfBase;

private const bool UTFTypeCheck = true;
mixin(UtfBaseText);

unittest {
    auto s = "abаб回家\U00064321\U00064321d"; assert(s.length == 1+1+2+2+3+3+4+4+1);
    auto ws = "abаб回家\U00064321\U00064321d"w; assert(ws.length == 1+1+1+1+1+1+2+2+1);
    auto dchars = "abаб回家\U00064321\U00064321d"d;
    auto  starts  = [1, 1, 1,0, 1,0, 1,0,0, 1,0,0, 1,0,0,0, 1,0,0,0, 1];
    auto wstarts  = [1, 1, 1,   1,   1,     1,     1,0,     1,0    , 1];
    assert(s.length == starts.length);
    assert(ws.length == wstarts.length);
    
    auto  strides = [1, 1, 2, 2, 3, 3, 4, 4, 1];
    auto wstrides = [1, 1, 1, 1, 1, 1, 2, 2, 1];
    auto shifts0 = [0, 1, 1+1, 1+1+2, 1+1+2+2, 1+1+2+2+3, 1+1+2+2+3+3, 1+1+2+2+3+3+4, 1+1+2+2+3+3+4+4];
    assert(strides.length == dchars.length);
    assert(wstrides.length == dchars.length);
    assert(shifts0.length == dchars.length);
    
    UTF8index prevStart = 0;
    UCSindex n = 0;
    foreach(UTF8index i, char ch; s) {
        assert(s.isUTF8sequenceStart(i) == starts[i]);
        if(starts[i]) {
            s.validateUTF8index(i);
            assert(s.UTF8strideAt(i) == strides[n]);
            assert(s.toUTF8shift(0, n) == shifts0[n]);
            assert(s.toUTF8shift(shifts0[n], -n) == -shifts0[n]);
            if(i) assert(s.offsetBefore(i) == prevStart);
            assert(s[0 .. val(i)].UCScount == n);
            assert(s[val(i) .. $].UCScount == strides.length - n);
            
            UTF8shift di;
            assert(s.dcharAt(i, di) == dchars[n]);
            assert(di == strides[n]);
            if(i) assert(s.dcharBefore(i) == s.dcharAt(prevStart));
            if(i) assert(s.dcharAfter(prevStart) == s.dcharAt(i));
            auto dcharStr = s[val(i) .. val(i) + strides[n]];
            assert(s.dcharAsStringAt(i, di) == dcharStr && di == dcharStr.length);
            assert(dcharToString(s.dcharAt(i)) == dcharStr);
            prevStart = i;
            ++n;
        }
        UTF8index t = i;
        s.adjustUTF8index(t);
        assert(t == prevStart);
    }
    
    n = 0;
    foreach(UTF16index i, wchar ch; ws)
        if(wstarts[i]) {
            //s.validateUTF16index(i);
            UTF16shift di;
            assert(ws.dcharAt(i, di) == dchars[n]);
            assert(di == wstrides[n]);
            ++n;
        }
    
    s.validateUTF8index(s.length);
}