changeset 177:6bfa783ac0b4

Sync with dwt-linux
author Frank Benoit <benoit@tionex.de>
date Fri, 07 Mar 2008 02:24:13 +0100
parents e0ea5a76da07
children e186b01aa62e
files dwt/dwthelper/utils.d
diffstat 1 files changed, 184 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/dwt/dwthelper/utils.d	Fri Mar 07 02:24:03 2008 +0100
+++ b/dwt/dwthelper/utils.d	Fri Mar 07 02:24:13 2008 +0100
@@ -13,8 +13,11 @@
 import tango.text.Util;
 import tango.text.Unicode;
 import tango.text.convert.Utf;
+import tango.core.Exception;
 import tango.stdc.stdlib : exit;
 
+import tango.util.log.Trace;
+
 void implMissing( char[] file, uint line ){
     Stderr.formatln( "implementation missing in file {} line {}", file, line );
     Stderr.formatln( "exiting ..." );
@@ -48,11 +51,168 @@
 alias ArrayWrapperT!(char)    ArrayWrapperString;
 alias ArrayWrapperT!(char[])  ArrayWrapperString2;
 
-dchar getFirstCodepoint( char[] str ){
-    foreach( dchar d; str ){
-        return d;
+int codepointIndexToIndex( char[] str, int cpIndex ){
+    int cps = cpIndex;
+    int res = 0;
+    while( cps > 0 ){
+        cps--;
+        if( str[res] < 0x80 ){
+            res+=1;
+        }
+        else if( str[res] < 0xE0 ){
+            res+=2;
+        }
+        else if( str[res] & 0xF0 ){
+            res+=3;
+        }
+        else{
+            res+=4;
+        }
+    }
+    return res;
+}
+int indexToCodepointIndex( char[] str, int index ){
+    int i = 0;
+    int res = 0;
+    while( i < index ){
+        if( str[i] < 0x80 ){
+            i+=1;
+        }
+        else if( str[i] < 0xE0 ){
+            i+=2;
+        }
+        else if( str[i] & 0xF0 ){
+            i+=3;
+        }
+        else{
+            i+=4;
+        }
+        res++;
+    }
+    return res;
+}
+
+char[] firstCodePointStr( char[] str, out int consumed ){
+    dchar[1] buf;
+    uint ate;
+    dchar[] res = str.toString32( buf, &ate );
+    consumed = ate;
+    return str[ 0 .. ate ];
+}
+
+dchar firstCodePoint( char[] str ){
+    int dummy;
+    return firstCodePoint( str, dummy );
+}
+dchar firstCodePoint( char[] str, out int consumed ){
+    dchar[1] buf;
+    uint ate;
+    dchar[] res = str.toString32( buf, &ate );
+    consumed = ate;
+    if( ate is 0 || res.length is 0 ){
+        Trace.formatln( "dwthelper.utils {}: str.length={} str={:X2}", __LINE__, str.length, cast(ubyte[])str );
     }
+    assert( ate > 0 );
+    assert( res.length is 1 );
+    return res[0];
 }
+
+char[] dcharToString( dchar key ){
+    dchar[1] buf;
+    buf[0] = key;
+    return tango.text.convert.Utf.toString( buf );
+}
+
+int codepointCount( char[] str ){
+    scope dchar[] buf = new dchar[]( str.length );
+    uint ate;
+    dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate );
+    assert( ate is str.length );
+    return res.length;
+}
+
+alias tango.text.convert.Utf.toString16 toString16;
+alias tango.text.convert.Utf.toString toString;
+
+int getRelativeCodePointOffset( char[] str, int startIndex, int searchRelCp ){
+    int ignore;
+    int i = startIndex;
+    if( searchRelCp > 0 ){
+        while( searchRelCp !is 0 ){
+
+            if( ( i < str.length )
+                && ( str[i] & 0x80 ) is 0x00 )
+            {
+                i+=1;
+            }
+            else if( ( i+1 < str.length )
+                && (( str[i+1] & 0xC0 ) is 0x80 )
+                && (( str[i  ] & 0xE0 ) is 0xC0 ))
+            {
+                i+=2;
+            }
+            else if( ( i+2 < str.length )
+                && (( str[i+2] & 0xC0 ) is 0x80 )
+                && (( str[i+1] & 0xC0 ) is 0x80 )
+                && (( str[i  ] & 0xF0 ) is 0xE0 ))
+            {
+                i+=3;
+            }
+            else if(( i+3 < str.length )
+                && (( str[i+3] & 0xC0 ) is 0x80 )
+                && (( str[i+2] & 0xC0 ) is 0x80 )
+                && (( str[i+1] & 0xC0 ) is 0x80 )
+                && (( str[i  ] & 0xF8 ) is 0xF0 ))
+            {
+                i+=4;
+            }
+            else{
+                tango.text.convert.Utf.onUnicodeError( "invalid utf8 input", i );
+            }
+            searchRelCp--;
+        }
+    }
+    else if( searchRelCp < 0 ){
+        while( searchRelCp !is 0 ){
+            do{
+                i--;
+                if( i < 0 ){
+                    Trace.formatln( "dwthelper.utils getRelativeCodePointOffset {}: str={}, startIndex={}, searchRelCp={}", __LINE__, str, startIndex, searchRelCp );
+                    tango.text.convert.Utf.onUnicodeError( "invalid utf8 input", i );
+                }
+            } while(( str[i] & 0xC0 ) is 0x80 );
+            searchRelCp++;
+        }
+    }
+    return i - startIndex;
+}
+dchar getRelativeCodePoint( char[] str, int startIndex, int searchRelCp, out int relIndex ){
+    relIndex = getRelativeCodePointOffset( str, startIndex, searchRelCp );
+    int ignore;
+    return firstCodePoint( str[ startIndex+relIndex .. $ ], ignore );
+}
+
+int utf8AdjustOffset( char[] str, int offset ){
+    if( str.length <= offset || offset <= 0 ){
+        return offset;
+    }
+    while(( str[offset] & 0xC0 ) is 0x80 ){
+        offset--;
+    }
+    return offset;
+}
+
+dchar CharacterFirstToLower( char[] str ){
+    int consumed;
+    return CharacterFirstToLower( str, consumed );
+}
+dchar CharacterFirstToLower( char[] str, out int consumed ){
+    dchar[1] buf;
+    buf[0] = firstCodePoint( str, consumed );
+    dchar[] r = tango.text.Unicode.toLower( buf );
+    return r[0];
+}
+
 dchar CharacterToLower( dchar c ){
     dchar[] r = tango.text.Unicode.toLower( [c] );
     return r[0];
@@ -90,6 +250,15 @@
     return res;
 }
 
+public int lastIndexOf(char[] str, char ch){
+    return lastIndexOf( str, ch, str.length );
+}
+public int lastIndexOf(char[] str, char ch, int formIndex){
+    int res = tango.text.Util.locatePrior( str, ch, formIndex );
+    if( res is str.length ) res = -1;
+    return res;
+}
+
 public char[] replace( char[] str, char from, char to ){
     return tango.text.Util.replace( str.dup, from, to );
 }
@@ -129,6 +298,14 @@
     return src[ $-pattern.length .. $ ] == pattern;
 }
 
+public bool equals( char[] src, char[] other ){
+    return src == other;
+}
+
+public bool equalsIgnoreCase( char[] src, char[] other ){
+    return tango.text.Unicode.toFold(src) == tango.text.Unicode.toFold(other);
+}
+
 public bool startsWith( char[] src, char[] pattern ){
     if( src.length < pattern.length ){
         return false;
@@ -139,6 +316,10 @@
     return tango.text.Unicode.toLower( src );
 }
 
+public hash_t toHash( char[] src ){
+    return typeid(char[]).getHash(&src);
+}
+
 static char[] toHex(uint value, bool prefix = true, int radix = 8){
     return tango.text.convert.Integer.toString(
             value,