projects/dwt2: base/src/java/nonstandard/UtfBase.d comparison

comparison base/src/java/nonstandard/UtfBase.d @ 120:536e43f63c81

Comprehensive update for Win32/Linux32 dmd-2.053/dmd-1.068+Tango-r5661 ===D2=== * added [Try]Immutable/Const/Shared templates to work with differenses in D1/D2 instead of version statements used these templates to work with strict type storage rules of dmd-2.053 * com.ibm.icu now also compilable with D2, but not tested yet * small fixes Snippet288 - shared data is in TLS ===Phobos=== * fixed critical bugs in Phobos implemention completely incorrect segfault prone fromStringz (Linux's port ruthless killer) terrible, incorrect StringBuffer realization (StyledText killer) * fixed small bugs as well Snippet72 - misprint in the snippet * implemented missed functionality for Phobos ByteArrayOutputStream implemented (image loading available) formatting correctly works for all DWT's cases As a result, folowing snippets now works with Phobos (Snippet### - what is fixed): Snippet24, 42, 111, 115, 130, 235, 276 - bad string formatting Snippet48, 282 - crash on image loading Snippet163, 189, 211, 213, 217, 218, 222 - crash on copy/cut in StyledText Snippet244 - hang-up ===Tango=== * few changes for the latest Tango trunc-r5661 * few small performance improvments ===General=== * implMissing-s for only one version changed to implMissingInTango/InPhobos * incorrect calls to Format in toString-s fixed * fixed loading \uXXXX characters in ResourceBundle * added good UTF-8 support for StyledText, TextLayout (Win32) and friends UTF functions revised and tested. It is now in java.nonstandard.*Utf modules StyledText and TextLayout (Win32) modules revised for UTF-8 support * removed small diferences in most identical files in *.swt.* folders *.swt.internal.image, *.swt.events and *.swt.custom are identical in Win32/Linux32 now 179 of 576 (~31%) files in *.swt.* folders are fully identical * Win32: snippets now have right subsystem, pretty icons and native system style controls * small fixes in snippets Snippet44 - it's not Snippet44 Snippet212 - functions work with different images and offsets arrays Win32: Snippet282 - crash on close if the button has an image Snippet293 - setGrayed is commented and others Win32: As a result, folowing snippets now works Snippet68 - color doesn't change Snippet163, 189, 211, 213, 217, 218, 222 - UTF-8 issues (see above) Snippet193 - no tabel headers

author	Denis Shelomovskij <verylonglogin.reg@gmail.com>
date	Sat, 09 Jul 2011 15:50:20 +0300
parents
children

comparison

equal deleted inserted replaced

-:d00e8db0a568
+:536e43f63c81
+/**
+* Stuff for working with narrow strings.
+* This module shouldn't be imported directly.
+* Use SafeUtf/UnsafeUtf modules instead.
+*
+* Authors: Denis Shelomovskij <verylonglogin.reg@gmail.com>
+*/
+module java.nonstandard.UtfBase;
+package const UtfBaseText = `
+# line 11 "java\nonstandard\UtfBase.d"
+import java.lang.util;
+version(Tango){
+static import tango.text.convert.Utf;
+} else { // Phobos
+static import std.utf;
+static import std.conv;
+}
+///The Universal Character Set (UCS), defined by the International Standard ISO/IEC 10646
+/*typedef*/alias int UCSindex;
+alias UCSindex UCSshift;
+static if(UTFTypeCheck) {
+///UTF-16 (16-bit Unicode Transformation Format)
+/*struct UTF16index {
+int internalValue;
+alias internalValue val;
+private static UTF16index opCall(int _val) {
+UTF16index t = { _val };
+return t;
+}
+void opAddAssign(in UTF16shift di) {
+val += di;
+}
+void opSubAssign(in UTF16shift di) {
+val -= di;
+}
+mixin(constFuncs!("
+UTF16index opAdd(in UTF16shift di) {
+return UTF16index(val + di);
+}
+UTF16index opSub(in UTF16shift di) {
+return UTF16index(val - di);
+}
+version(Windows) {
+UTF16index opAdd(in int di) {
+return UTF16index(val + di);
+}
+UTF16index opSub(in int di) {
+return UTF16index(val - di);
+}
+}
+int opCmp(in UTF16index i2) {
+return val - i2.val;
+}
+"));
+}*/
+typedef int UTF16index;
+typedef int UTF16shift;
+///UTF-8 (UCS Transformation Format — 8-bit)
+//typedef int UTF8index;
+//alias UTF8index UTF8shift;
+struct UTF8index {
+int internalValue;
+alias internalValue val;
+private static UTF8index opCall(int _val) {
+UTF8index t = { _val };
+return t;
+}
+void opAddAssign(in UTF8shift di) {
+val += di.val;
+}
+void opSubAssign(in UTF8shift di) {
+val -= di.val;
+}
+mixin(constFuncs!("
+UTF8index opAdd(in UTF8shift di) {
+return UTF8index(val + di.val);
+}
+UTF8index opSub(in UTF8shift di) {
+return UTF8index(val - di.val);
+}
+UTF8shift opSub(in UTF8index di) {
+return UTF8shift(val - di.val);
+}
+int opCmp(in UTF8index i2) {
+return val - i2.val;
+}
+"));
+}
+private UTF8index newUTF8index(int i) {
+return UTF8index(i);
+}
+private int val(T)(T i) {
+static if(is(T : UTF16index))
+return cast(int) i;
+else
+return i.val;
+}
+private void dec(ref UTF8index i) {
+--i.val;
+}
+struct UTF8shift {
+int internalValue;
+alias internalValue val;
+private static UTF8shift opCall(int _val) {
+UTF8shift t = { _val };
+return t;
+}
+void opAddAssign(in UTF8shift di) {
+val += di.val;
+}
+void opSubAssign(in UTF8shift di) {
+val -= di.val;
+}
+mixin(constFuncs!("
+UTF8shift opAdd(in UTF8shift di) {
+return UTF8shift(val + di.val);
+}
+UTF8shift opSub(in UTF8shift di) {
+return UTF8shift(val - di.val);
+}
+int opCmp(in UTF8shift di2) {
+return val - di2.val;
+}
+"));
+}
+UTF8index asUTF8index(int i) {
+return UTF8index(i);
+}
+UTF8shift asUTF8shift(int i) {
+return UTF8shift(i);
+}
+} else {
+alias int UTF16index;
+alias int UTF16shift;
+alias int UTF8index;
+alias int UTF8shift;
+private int val(int i) {
+return i;
+}
+private void dec(ref UTF8index i) {
+--i;
+}
+}
+char charByteAt(in char[] s, in UTF8index i) {
+return s[val(i)];
+}
+UTF8index preFirstIndex(in char[] s) {
+return cast(UTF8index) -1;
+}
+UTF8index firstIndex(in char[] s) {
+return cast(UTF8index) 0;
+}
+UTF8index endIndex(in char[] s) {
+return cast(UTF8index) s.length;
+}
+UTF8index beforeEndIndex(in char[] s) {
+return s.offsetBefore(s.endIndex());
+}
+//These variables aren't in TLS so it can be used only for writing
+mixin(gshared!("
+private UCSindex UCSdummyShift;
+private UTF8shift UTF8dummyShift;
+private UTF16shift UTF16dummyShift;
+"));
+private const ubyte[256] p_UTF8stride =
+[
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
+];
+private String toUTF8infoString(in char[] s, UTF8index i) {
+return Format("i = {}, s[i] = {}, s = {}", val(i), cast(ubyte)s.charByteAt(i), cast(ubyte[])s);
+}
+class UTF8Exception : Exception {
+this( String msg, in char[] s, UTF8index i){
+super( Format("{}:\n{}", msg, toUTF8infoString(s, i)));
+}
+}
+bool isUTF8sequenceStart( in char[] s, in UTF8index i ) {
+return p_UTF8stride[s.charByteAt(i)] != 0xFF;
+}
+void validateUTF8index( in char[] s, in UTF8index i ) {
+if(i != s.endIndex() && !s.isUTF8sequenceStart(i))
+throw new UTF8Exception("Not a start of an UTF-8 sequence", s, i);
+}
+UTF8shift UTF8strideAt( in char[] s, in UTF8index i ) {
+s.validateUTF8index(i);
+version(Tango) {
+return cast(UTF8shift)p_UTF8stride[s.charByteAt(i)];
+} else { // Phobos
+return cast(UTF8shift)std.utf.stride( s, val(i) );
+}
+}
+UTF16shift UTF16strideAt( in wchar[] s, in UTF16index i ) {
+//s.validateUTF16index(i);
+version(Tango) {
+uint u = s[val(i)];
+return cast(UTF16shift)(1 + (u >= 0xD800 && u <= 0xDBFF));
+} else { // Phobos
+return cast(UTF16shift)std.utf.stride( s, val(i) );
+}
+}
+UCSindex UCScount( in char[] s ){
+version(Tango){
+scope dchar[] buf = new dchar[]( s.length );
+uint ate;
+dchar[] res = tango.text.convert.Utf.toString32( s, buf, &ate );
+assert( ate is s.length );
+return res.length;
+} else { // Phobos
+return std.utf.count(s);
+}
+}
+UTF8shift toUTF8shift( in char[] s, in UTF8index i, in UCSshift dn ) {
+s.validateUTF8index(i);
+UTF8index j = i;
+UCSshift tdn = dn;
+if(tdn > 0)
+do {
+j += s.UTF8strideAt(j);
+if(j > s.endIndex())
+throw new UTF8Exception(Format("toUTF8shift (dn = {}): No end of the UTF-8 sequence", dn), s, i);
+} while(--tdn)
+else if(tdn < 0) {
+do {
+if(!val(j))
+if(tdn == -1) {
+j = s.preFirstIndex();
+break;
+} else
+throw new UTF8Exception(Format("toUTF8shift (dn = {}): Can only go down to -1, not {}", dn, tdn), s, i);
+int l = 0;
+do {
+if(!val(j))
+throw new UTF8Exception(Format("toUTF8shift (dn = {}): No start of the UTF-8 sequence before", dn), s, i);
+++l;
+dec(j);
+} while(!s.isUTF8sequenceStart(j))
+l -= val(s.UTF8strideAt(j));
+if(l > 0)
+throw new UTF8Exception(Format("toUTF8shift (dn = {}): Overlong UTF-8 sequence before", dn), s, i);
+else if(l < 0)
+throw new UTF8Exception(Format("toUTF8shift (dn = {}): Too short UTF-8 sequence before", dn), s, i);
+} while(++tdn)
+}
+return j - i;
+}
+UTF8index offsetBefore( in char[] s, in UTF8index i ) {
+return i + s.toUTF8shift(i, -1);
+}
+UTF8index offsetAfter( in char[] s, in UTF8index i ) {
+return i + s.toUTF8shift(i, 1);
+}
+/**
+If the index is in a midle of an UTF-8 byte sequence, it
+will return the position of the first byte of this sequence.
+*/
+void adjustUTF8index( in char[] s, ref UTF8index i ){
+if(i == s.endIndex() || s.isUTF8sequenceStart(i))
+return;
+int l = 0;
+alias i res;
+do {
+if(!val(res))
+throw new UTF8Exception("adjustUTF8index: No start of the UTF-8 sequence", s, i);
+++l;
+dec(res);
+} while(!s.isUTF8sequenceStart(res))
+l -= val(s.UTF8strideAt(i));
+if(l > 0)
+throw new UTF8Exception("adjustUTF8index: Overlong UTF-8 sequence", s, i);
+}
+UTF8index takeIndexArg(String F = __FILE__, uint L = __LINE__)(String s, int i_arg, String location) {
+UTF8index res = cast(UTF8index) i_arg;
+if(i_arg > 0 && i_arg < s.length) {
+auto t = res;
+s.adjustUTF8index(res);
+if(t != res)
+getDwtLogger().warn(F, L, Format("Fixed invalid UTF-8 index at {}:\nnew i = {}, {}", location, val(res), toUTF8infoString(s, t)));
+}
+return res;
+}
+dchar dcharAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) {
+s.validateUTF8index(i);
+auto str = s[val(i) .. $];
+version(Tango){
+dchar[1] buf;
+uint ate;
+dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
+assert( ate > 0 && res.length is 1 );
+stride = cast(UTF8shift)ate;
+return res[0];
+} else { // Phobos
+size_t ate = 0;
+dchar res = std.utf.decode(str, ate);
+stride = cast(UTF8shift)ate;
+return res;
+}
+}
+dchar dcharAt( in wchar[] s, in UTF16index i, out UTF16shift stride = UTF16dummyShift ) {
+//s.validateUTF16index(i);
+auto str = s[val(i) .. $];
+version(Tango){
+dchar[1] buf;
+uint ate;
+dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
+assert( ate > 0 && res.length is 1 );
+stride = cast(UTF16shift)ate;
+if( ate is 0 || res.length is 0 ){
+getDwtLogger().trace( __FILE__, __LINE__, "str.length={} str={:X2}", str.length, cast(ubyte[])str );
+}
+return res[0];
+} else { // Phobos
+size_t ate = 0;
+dchar res = std.utf.decode(str, ate);
+stride = cast(UTF16shift)ate;
+return res;
+}
+}
+dchar dcharBefore( in char[] s, in UTF8index i ) {
+return s.dcharAt(s.offsetBefore(i));
+}
+dchar dcharAfter( in char[] s, in UTF8index i ) {
+return s.dcharAt(i + s.toUTF8shift(i, 1));
+}
+///Get that String, that contains the next codepoint of a String.
+String dcharAsStringAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) {
+s.validateUTF8index(i);
+auto str = s[val(i) .. $];
+uint ate;
+version(Tango){
+dchar[1] buf;
+dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
+} else { // Phobos
+ate = std.utf.stride( str, 0 );
+}
+stride = cast(UTF8shift)ate;
+return str[ 0 .. ate ]._idup();
+}
+`;

Mercurial > projects > dwt2

comparison base/src/java/nonstandard/UtfBase.d @ 120:536e43f63c81