changeset 277:f18872e0f232

unicode utils
author Frank Benoit <benoit@tionex.de>
date Tue, 05 Aug 2008 00:54:11 +0200
parents 240db000bbcd
children 3f53ebb05b5b
files dwt/dwthelper/utils.d
diffstat 1 files changed, 34 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/dwt/dwthelper/utils.d	Tue Aug 05 00:52:09 2008 +0200
+++ b/dwt/dwthelper/utils.d	Tue Aug 05 00:54:11 2008 +0200
@@ -340,9 +340,13 @@
     return res;
 }
 int indexToCodepointIndex( String str, int index ){
+    if( index < 0 ) return index;
     int i = 0;
     int res = 0;
     while( i < index ){
+        if( i >= str.length ){
+            break;
+        }
         if( str[i] < 0x80 ){
             i+=1;
         }
@@ -435,7 +439,7 @@
                 i+=4;
             }
             else{
-                Trace.formatln( "invalid utf8 characters: {:X2}", cast(ubyte[]) str );
+                Trace.formatln( "invalid utf8 characters:  {:X2}", cast(ubyte[]) str );
                 tango.text.convert.Utf.onUnicodeError( "invalid utf8 input", i );
             }
             searchRelCp--;
@@ -446,8 +450,9 @@
             do{
                 i--;
                 if( i < 0 ){
-                    Trace.formatln( "dwthelper.utils getRelativeCodePointOffset {}: str={}, startIndex={}, searchRelCp={}", __LINE__, str, startIndex, searchRelCp );
-                    tango.text.convert.Utf.onUnicodeError( "invalid utf8 input", i );
+                    return -1;
+                    //Trace.formatln( "dwthelper.utils getRelativeCodePointOffset {}: str={}, startIndex={}, searchRelCp={}", __LINE__, str, startIndex, searchRelCp );
+                    //tango.text.convert.Utf.onUnicodeError( "invalid utf8 input", i );
                 }
             } while(( str[i] & 0xC0 ) is 0x80 );
             searchRelCp++;
@@ -470,6 +475,32 @@
     }
     return offset;
 }
+int utf8OffsetIncr( String str, int offset ){
+    int res = offset +1;
+    if( str.length <= res || res <= 0 ){
+        return res;
+    }
+    int tries = 4;
+    while(( str[res] & 0xC0 ) is 0x80 ){
+        res++;
+        assert( tries-- > 0 );
+    }
+    return res;
+}
+int utf8OffsetDecr( String str, int offset ){
+    int res = offset-1;
+    if( str.length <= res || res <= 0 ){
+        return res;
+    }
+    int tries = 4;
+    while(( str[res] & 0xC0 ) is 0x80 ){
+        res--;
+        assert( tries-- > 0 );
+    }
+    Trace.formatln( "utf8OffsetDecr {}->{}", offset, res );
+    Trace.memory( str );
+    return res;
+}
 
 bool CharacterIsDefined( dchar ch ){
     return (ch in tango.text.UnicodeData.unicodeData) !is null;