# HG changeset patch # User Frank Benoit # Date 1240141778 -7200 # Node ID ebefa5c2eab4d7bb2ebbe7b1634eeb49ab61ebf0 # Parent 2755ef2c8ef89b043035231a6fdae06a2588d1a1 moving ICU bindings to com.ibm.icu diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/ICU.d --- a/base/src/java/mangoicu/ICU.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,695 +0,0 @@ -/******************************************************************************* - - @file ICU.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version; October 2004 - Updated to ICU v3.2; March 2005 - - @author Kris - John Reimer - Anders F Bjorklund (Darwin patches) - - -*******************************************************************************/ - -module java.mangoicu.ICU; - -import java.lang.util; -/******************************************************************************* - - Library version identifiers - -*******************************************************************************/ - -version (ICU30) - { - private static const char[] ICULib = "30"; - private static const char[] ICUSig = "_3_0\0"; - } -version (ICU32) - { - private static const char[] ICULib = "32"; - private static const char[] ICUSig = "_3_2\0"; - } -version (ICU34) - { - private static const char[] ICULib = "34"; - private static const char[] ICUSig = "_3_4\0"; - } -version (ICU36) - { - private static const char[] ICULib = "36"; - private static const char[] ICUSig = "_3_6\0"; - } -else - { - private static const char[] ICULib = "38"; - private static const char[] ICUSig = "_3_8\0"; - } - -/******************************************************************************* - -*******************************************************************************/ - -private static extern (C) uint strlen (char *s); -private static extern (C) uint wcslen (wchar *s); - - -/******************************************************************************* - - Some low-level routines to help bind the ICU C-API to D. - -*******************************************************************************/ - -protected class ICU -{ - /*********************************************************************** - - The library names to load within the target environment - - ***********************************************************************/ - - version (Win32) - { - package static char[] icuuc = "icuuc"~ICULib~".dll"; - package static char[] icuin = "icuin"~ICULib~".dll"; - } - else - version (linux) - { - package static char[] icuuc = "libicuuc.so."~ICULib; - package static char[] icuin = "libicui18n.so."~ICULib; - } - else - version (darwin) - { - package static char[] icuuc = "libicuuc.dylib."~ICULib; - package static char[] icuin = "libicui18n.dylib."~ICULib; - } - else - { - static assert (false); - } - - /*********************************************************************** - - Use this for the primary argument-type to most ICU functions - - ***********************************************************************/ - - protected typedef void* Handle; - - /*********************************************************************** - - Parse-error filled in by several functions - - ***********************************************************************/ - - public struct ParseError - { - int line, - offset; - wchar[16] preContext, - postContext; - } - - /*********************************************************************** - - The binary form of a version on ICU APIs is an array of - four bytes - - ***********************************************************************/ - - public struct Version - { - ubyte[4] info; - } - - /*********************************************************************** - - ICU error codes (the ones which are referenced) - - ***********************************************************************/ - - package enum UErrorCode:int - { - OK, - BufferOverflow=15 - } - - /*********************************************************************** - - ***********************************************************************/ - - package static final bool isError (UErrorCode e) - { - return e > 0; - } - - /*********************************************************************** - - ***********************************************************************/ - - package static final void exception (CString msg) - { - throw new ICUException ( cast(String)msg); - } - - /*********************************************************************** - - ***********************************************************************/ - - package static final void testError (UErrorCode e, CString msg) - { - if (e > 0) - exception (msg); - } - - /*********************************************************************** - - ***********************************************************************/ - - package static final char* toString (CString string) - { - static char[] empty; - - if (! string.length) - return (string.ptr) ? empty.ptr : null; - -// if (* (&string[0] + string.length)) - { - // Need to make a copy - char[] copy = new char [string.length + 1]; - copy [0..string.length] = string; - copy [string.length] = 0; - string = copy; - } - return cast(char*)string.ptr; - } - - /*********************************************************************** - - ***********************************************************************/ - - package static final wchar* toString (CString16 string) - { - static wchar[] empty; - - if (! string.length) - return (string.ptr) ? empty.ptr : null; - -// if (* (&string[0] + string.length)) - { - // Need to make a copy - wchar[] copy = new wchar [string.length + 1]; - copy [0..string.length] = string; - copy [string.length] = 0; - string = copy; - } - return cast(wchar*)string.ptr; - } - - /*********************************************************************** - - ***********************************************************************/ - - protected static final uint length (char* s) - { - return strlen (s); - } - - /*********************************************************************** - - ***********************************************************************/ - - protected static final uint length (wchar* s) - { - return wcslen (s); - } - - /*********************************************************************** - - ***********************************************************************/ - - protected static final char[] toArray (char* s) - { - if (s) - return s[0..strlen (s)]; - return null; - } - - /*********************************************************************** - - ***********************************************************************/ - - protected static final wchar[] toArray (wchar* s) - { - if (s) - return s[0..wcslen (s)]; - return null; - } -} - - -/******************************************************************************* - -*******************************************************************************/ - -class ICUException : Exception -{ - /*********************************************************************** - - Construct exception with the provided text string - - ***********************************************************************/ - - this (String msg) - { - super (msg); - } -} - -/******************************************************************************* - -*******************************************************************************/ - -typedef void* UParseError; - - -/******************************************************************************* - - Function address loader for Win32 - -*******************************************************************************/ - -version (Win32) -{ - typedef void* HANDLE; - extern (Windows) HANDLE LoadLibraryA (char*); - extern (Windows) HANDLE GetProcAddress (HANDLE, char*); - extern (Windows) void FreeLibrary (HANDLE); - - /*********************************************************************** - - ***********************************************************************/ - - package static class FunctionLoader - { - /*************************************************************** - - ***************************************************************/ - - protected struct Bind - { - void** fnc; - CString name; - } - - /*************************************************************** - - ***************************************************************/ - - static final void* bind (char[] library, inout Bind[] targets) - { - HANDLE lib = LoadLibraryA (ICU.toString(library)); - - foreach (Bind b; targets) - { - auto name = b.name ~ ICUSig; - *b.fnc = GetProcAddress (lib, cast(char*)name.ptr); - if (*b.fnc) - {}// printf ("bound '%.*s'\n", name); - else - throw new Exception ( cast(String)("required " ~ name ~ " in library " ~ library)); - } - return lib; - } - - /*************************************************************** - - ***************************************************************/ - - static final void unbind (void* library) - { - version (CorrectedTeardown) - FreeLibrary (cast(HANDLE) library); - } - } -} - - -/******************************************************************************* - - 2004-11-26: Added Linux shared library support -- John Reimer - -*******************************************************************************/ - -else version (linux) -{ - //Tell build to link with dl library - version(build) { pragma(link, "dl"); } - - // from include/bits/dlfcn.h on Linux - const int RTLD_LAZY = 0x00001; // Lazy function call binding - const int RTLD_NOW = 0x00002; // Immediate function call binding - const int RTLD_NOLOAD = 0x00004; // no object load - const int RTLD_DEEPBIND = 0x00008; - const int RTLD_GLOBAL = 0x00100; // make object available to whole program - - extern(C) - { - void* dlopen(char* filename, int flag); - char* dlerror(); - void* dlsym(void* handle, char* symbol); - int dlclose(void* handle); - } - - class FunctionLoader - { - /*************************************************************** - - ***************************************************************/ - - protected struct Bind - { - void** fnc; - CString name; - } - - /*************************************************************** - - ***************************************************************/ - - static final void* bind (char[] library, inout Bind[] targets) - { - static char[] errorInfo; - // printf("the library is %s\n", ICU.toString(library)); - void* lib = dlopen(ICU.toString(library), RTLD_NOW); - - // clear the error buffer - dlerror(); - - foreach (Bind b; targets) - { - char[] name = cast(char[])(b.name ~ ICUSig); - - *b.fnc = dlsym (lib, name.ptr); - if (*b.fnc) - {}// printf ("bound '%.*s'\n", name); - else { - // errorInfo = ICU.toArray(dlerror()); - // printf("%s", dlerror()); - throw new Exception (cast(String)("required " ~ name ~ " in library " ~ library)); - } - } - return lib; - } - - /*************************************************************** - - ***************************************************************/ - - static final void unbind (void* library) - { - version (CorrectedTeardown) - { - if (! dlclose (library)) - throw new Exception ("close library failed\n"); - } - } - } -} - - -/******************************************************************************* - - 2004-12-20: Added Darwin shared library support -- afb - -*******************************************************************************/ - -else version (darwin) -{ - // #include - - struct mach_header - { - uint magic; /* mach magic number identifier */ - uint cputype; /* cpu specifier */ - uint cpusubtype; /* machine specifier */ - uint filetype; /* type of file */ - uint ncmds; /* number of load commands */ - uint sizeofcmds; /* the size of all the load commands */ - uint flags; /* flags */ - } - - /* Constant for the magic field of the mach_header */ - const uint MH_MAGIC = 0xfeedface; // the mach magic number - const uint MH_CIGAM = 0xcefaedfe; // x86 variant - - // #include - - typedef void *NSObjectFileImage; - - typedef void *NSModule; - - typedef void *NSSymbol; - - enum // DYLD_BOOL: uint - { - FALSE, - TRUE - } - alias uint DYLD_BOOL; - - enum // NSObjectFileImageReturnCode: uint - { - NSObjectFileImageFailure, /* for this a message is printed on stderr */ - NSObjectFileImageSuccess, - NSObjectFileImageInappropriateFile, - NSObjectFileImageArch, - NSObjectFileImageFormat, /* for this a message is printed on stderr */ - NSObjectFileImageAccess - } - alias uint NSObjectFileImageReturnCode; - - enum // NSLinkEditErrors: uint - { - NSLinkEditFileAccessError, - NSLinkEditFileFormatError, - NSLinkEditMachResourceError, - NSLinkEditUnixResourceError, - NSLinkEditOtherError, - NSLinkEditWarningError, - NSLinkEditMultiplyDefinedError, - NSLinkEditUndefinedError - } - alias uint NSLinkEditErrors; - - extern(C) - { - NSObjectFileImageReturnCode NSCreateObjectFileImageFromFile(char *pathName, NSObjectFileImage* objectFileImage); - DYLD_BOOL NSDestroyObjectFileImage(NSObjectFileImage objectFileImage); - - mach_header * NSAddImage(char *image_name, uint options); - const uint NSADDIMAGE_OPTION_NONE = 0x0; - const uint NSADDIMAGE_OPTION_RETURN_ON_ERROR = 0x1; - const uint NSADDIMAGE_OPTION_WITH_SEARCHING = 0x2; - const uint NSADDIMAGE_OPTION_RETURN_ONLY_IF_LOADED = 0x4; - const uint NSADDIMAGE_OPTION_MATCH_FILENAME_BY_INSTALLNAME = 0x8; - - NSModule NSLinkModule(NSObjectFileImage objectFileImage, char* moduleName, uint options); - const uint NSLINKMODULE_OPTION_NONE = 0x0; - const uint NSLINKMODULE_OPTION_BINDNOW = 0x01; - const uint NSLINKMODULE_OPTION_PRIVATE = 0x02; - const uint NSLINKMODULE_OPTION_RETURN_ON_ERROR = 0x04; - const uint NSLINKMODULE_OPTION_DONT_CALL_MOD_INIT_ROUTINES = 0x08; - const uint NSLINKMODULE_OPTION_TRAILING_PHYS_NAME = 0x10; - DYLD_BOOL NSUnLinkModule(NSModule module_, uint options); - - void NSLinkEditError(NSLinkEditErrors *c, int *errorNumber, char **fileName, char **errorString); - - DYLD_BOOL NSIsSymbolNameDefined(char *symbolName); - DYLD_BOOL NSIsSymbolNameDefinedInImage(mach_header *image, char *symbolName); - NSSymbol NSLookupAndBindSymbol(char *symbolName); - NSSymbol NSLookupSymbolInModule(NSModule module_, char* symbolName); - NSSymbol NSLookupSymbolInImage(mach_header *image, char *symbolName, uint options); - const uint NSLOOKUPSYMBOLINIMAGE_OPTION_BIND = 0x0; - const uint NSLOOKUPSYMBOLINIMAGE_OPTION_BIND_NOW = 0x1; - const uint NSLOOKUPSYMBOLINIMAGE_OPTION_BIND_FULLY = 0x2; - const uint NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR = 0x4; - - void* NSAddressOfSymbol(NSSymbol symbol); - char* NSNameOfSymbol(NSSymbol symbol); - } - - - class FunctionLoader - { - /*************************************************************** - - ***************************************************************/ - - protected struct Bind - { - void** fnc; - CString name; - } - - /*************************************************************** - - ***************************************************************/ - - private static NSModule open(char* filename) - { - NSModule mod = null; - NSObjectFileImage fileImage = null; - debug printf("Trying to load: %s\n", filename); - - NSObjectFileImageReturnCode returnCode = - NSCreateObjectFileImageFromFile(filename, &fileImage); - if(returnCode == NSObjectFileImageSuccess) - { - mod = NSLinkModule(fileImage,filename, - NSLINKMODULE_OPTION_RETURN_ON_ERROR | - NSLINKMODULE_OPTION_PRIVATE | - NSLINKMODULE_OPTION_BINDNOW); - NSDestroyObjectFileImage(fileImage); - } - else if(returnCode == NSObjectFileImageInappropriateFile) - { - NSDestroyObjectFileImage(fileImage); - /* Could be a dynamic library rather than a bundle */ - mod = cast(NSModule) NSAddImage(filename, - NSADDIMAGE_OPTION_RETURN_ON_ERROR); - } - else - { - debug printf("FileImage Failed: %d\n", returnCode); - } - return mod; - } - - private static void* symbol(NSModule mod, char* name) - { - NSSymbol symbol = null; - uint magic = (* cast(mach_header *) mod).magic; - - if ( (mod == cast(NSModule) -1) && NSIsSymbolNameDefined(name)) - /* Global context, use NSLookupAndBindSymbol */ - symbol = NSLookupAndBindSymbol(name); - else if ( ( magic == MH_MAGIC || magic == MH_CIGAM ) && - NSIsSymbolNameDefinedInImage(cast(mach_header *) mod, name)) - symbol = NSLookupSymbolInImage(cast(mach_header *) mod, name, - NSLOOKUPSYMBOLINIMAGE_OPTION_BIND | - NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR); - else - symbol = NSLookupSymbolInModule(mod, name); - - return NSAddressOfSymbol(symbol); - } - - static final void* bind (char[] library, inout Bind[] targets) - { - static char[] errorInfo; - - debug printf("the library is %s\n", ICU.toString(library)); - - void* lib = null; - static char[][] usual_suspects = [ "", "/usr/local/lib/", "/usr/lib/", - /* Fink */ "/sw/lib/", /* DarwinPorts */ "/opt/local/lib/" ]; - foreach (char[] prefix; usual_suspects) - { - lib = cast(void*) open(ICU.toString(prefix ~ library)); - if (lib != null) break; - } - if (lib == null) - { - throw new Exception ("could not open library " ~ library); - } - - // clear the error buffer - // error(); - - foreach (Bind b; targets) - { - // Note: all C functions have a underscore prefix in Mach-O symbols - char[] name = "_" ~ b.name ~ ICUSig; - - *b.fnc = symbol(cast(NSModule) lib, name.ptr); - if (*b.fnc != null) - { - debug printf ("bound '%.*s'\n", name); - } - else - { - // errorInfo = ICU.toArray(error()); - throw new Exception ("required " ~ name ~ " in library " ~ library); - } - } - return lib; - } - - /*************************************************************** - - ***************************************************************/ - - private static bool close(NSModule mod) - { - uint magic = (* cast(mach_header *) mod).magic; - if ( magic == MH_MAGIC || magic == MH_CIGAM ) - { - // Can not unlink dynamic libraries on Darwin - return true; - } - - return (NSUnLinkModule(mod, 0) == TRUE); - } - - static final void unbind (void* library) - { - version (CorrectedTeardown) - { - if (! close(cast(NSModule) library)) - throw new Exception ("close library failed\n"); - } - } - } -} - -/******************************************************************************* - - unknown platform - -*******************************************************************************/ - -else static assert(0); // need an implementation of FunctionLoader for this OS - - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UBreakIterator.d --- a/base/src/java/mangoicu/UBreakIterator.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,621 +0,0 @@ -/******************************************************************************* - - @file UBreakIterator.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UBreakIterator; - -private import java.mangoicu.ICU; - -public import java.mangoicu.ULocale, - java.mangoicu.UText, - java.mangoicu.UString; - - - -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class UCharacterIterator : UBreakIterator -// { -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Character, locale, text); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class UWordIterator : UBreakIterator -// { -// public enum Break -// { -// None = 0, -// NoneLimit = 100, -// Number = 100, -// NumberLimit = 200, -// Letter = 200, -// LetterLimit = 300, -// Kana = 300, -// KanaLimit = 400, -// Ideo = 400, -// IdeoLimit = 500 -// } -// -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Word, locale, text); -// } -// -// /*********************************************************************** -// -// Return the status from the break rule that determined -// the most recently returned break position. -// -// ***********************************************************************/ -// -// void getStatus (inout Break b) -// { -// b = cast(Break) super.getStatus(); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class ULineIterator : UBreakIterator -// { -// public enum Break -// { -// Soft = 0, -// SoftLimit = 100, -// Hard = 100, -// HardLimit = 200 -// } -// -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Line, locale, text); -// } -// -// /*********************************************************************** -// -// Return the status from the break rule that determined -// the most recently returned break position. -// -// ***********************************************************************/ -// -// void getStatus (inout Break b) -// { -// b = cast(Break) super.getStatus(); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class USentenceIterator : UBreakIterator -// { -// public enum Break -// { -// Term = 0, -// TermLimit = 100, -// Sep = 100, -// Limit = 200 -// } -// -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Sentence, locale, text); -// } -// -// /*********************************************************************** -// -// Return the status from the break rule that determined -// the most recently returned break position. -// -// ***********************************************************************/ -// -// void getStatus (inout Break b) -// { -// b = cast(Break) super.getStatus(); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class UTitleIterator : UBreakIterator -// { -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Title, locale, text); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class URuleIterator : UBreakIterator -// { -// /*********************************************************************** -// -// Open a new UBreakIterator for locating text boundaries -// using specified breaking rules -// -// ***********************************************************************/ -// -// this (UStringView rules, UStringView text = null) -// { -// UErrorCode e; -// -// handle = ubrk_openRules (rules.get.ptr, rules.length, text.get.ptr, text.length, null, e); -// testError (e, "failed to open rule iterator"); -// } -// } - - -/******************************************************************************* - - BreakIterator defines methods for finding the location of boundaries - in text. Pointer to a UBreakIterator maintain a current position and - scan over text returning the index of characters where boundaries occur. - - Line boundary analysis determines where a text string can be broken - when line-wrapping. The mechanism correctly handles punctuation and - hyphenated words. - - Sentence boundary analysis allows selection with correct interpretation - of periods within numbers and abbreviations, and trailing punctuation - marks such as quotation marks and parentheses. - - Word boundary analysis is used by search and replace functions, as well - as within text editing applications that allow the user to select words - with a double click. Word selection provides correct interpretation of - punctuation marks within and following words. Characters that are not - part of a word, such as symbols or punctuation marks, have word-breaks - on both sides. - - Character boundary analysis allows users to interact with characters - as they expect to, for example, when moving the cursor through a text - string. Character boundary analysis provides correct navigation of - through character strings, regardless of how the character is stored. - For example, an accented character might be stored as a base character - and a diacritical mark. What users consider to be a character can differ - between languages. - - Title boundary analysis locates all positions, typically starts of - words, that should be set to Title Case when title casing the text. - - See - this page for full details. - -*******************************************************************************/ - -struct UBreakIterator -{ - typedef void _UBreakIterator; - alias _UBreakIterator* Handle; - Handle handle; - UText ut; - - // this is returned by next(), previous() etc ... - const uint Done = uint.max; - alias Done DONE; - - /*********************************************************************** - - internal types passed to C API - - ***********************************************************************/ - - private enum Type - { - Character, - Word, - Line, - Sentence, - Title - } - - - public enum WordBreak - { - None = 0, - NoneLimit = 100, - Number = 100, - NumberLimit = 200, - Letter = 200, - LetterLimit = 300, - Kana = 300, - KanaLimit = 400, - Ideo = 400, - IdeoLimit = 500 - } - public enum LineBreak - { - Soft = 0, - SoftLimit = 100, - Hard = 100, - HardLimit = 200 - } - public enum SentenceBreak - { - Term = 0, - TermLimit = 100, - Sep = 100, - Limit = 200 - } - - - /*********************************************************************** - - Open a new UBreakIterator for locating text boundaries for - a specified locale. A UBreakIterator may be used for detecting - character, line, word, and sentence breaks in text. - - ***********************************************************************/ - - static UBreakIterator openWordIterator( ULocale locale, char[] str = null ){ - UBreakIterator res; - auto e = ICU.UErrorCode.OK; - res.handle = ubrk_open( Type.Word, cast(char*)locale.name.ptr, null, 0, e); - ICU.testError (e, "failed to open word iterator"); - if( str ) { - res.ut.openUTF8(str); - ubrk_setUText( res.handle, & res.ut, e); - ICU.testError (e, "failed to set text in iterator"); - } - return res; - } - - static UBreakIterator openLineIterator( ULocale locale, char[] str = null ){ - UBreakIterator res; - auto e = ICU.UErrorCode.OK; - res.handle = ubrk_open( Type.Line, cast(char*)locale.name.ptr, null, 0, e); - ICU.testError (e, "failed to open line iterator"); - if( str ) { - res.ut.openUTF8(str); - ubrk_setUText( res.handle, & res.ut, e); - ICU.testError (e, "failed to set text in iterator"); - } - return res; - } - - /*********************************************************************** - - Close a UBreakIterator - - ***********************************************************************/ - - void close () - { - ut.close(); - ubrk_close (handle); - } - - /*********************************************************************** - - Sets an existing iterator to point to a new piece of text - - ***********************************************************************/ - - void setText (UStringView text) - { - ICU.UErrorCode e; - ubrk_setText (handle, text.get.ptr, text.length, e); - ICU.testError (e, "failed to set iterator text"); - } - - void setText (char[] text) - { - auto e = ICU.UErrorCode.OK; - ut.openUTF8(text); - ubrk_setUText( handle, & ut, e); - ICU.testError (e, "failed to set text in iterator"); - } - - /*********************************************************************** - - Determine the most recently-returned text boundary - - ***********************************************************************/ - - uint current () - { - return ubrk_current (handle); - } - - /*********************************************************************** - - Determine the text boundary following the current text - boundary, or UBRK_DONE if all text boundaries have been - returned. - - If offset is specified, determines the text boundary - following the current text boundary: The value returned - is always greater than offset, or Done - - ***********************************************************************/ - - uint next (uint offset = uint.max) - { - if (offset == uint.max) - return ubrk_next (handle); - return ubrk_following (handle, offset); - } - alias next following; - /*********************************************************************** - - Determine the text boundary preceding the current text - boundary, or Done if all text boundaries have been returned. - - If offset is specified, determines the text boundary preceding - the specified offset. The value returned is always smaller than - offset, or Done. - - ***********************************************************************/ - - uint previous (uint offset = uint.max) - { - if (offset == uint.max) - return ubrk_previous (handle); - return ubrk_preceding (handle, offset); - } - - /*********************************************************************** - - Determine the index of the first character in the text - being scanned. This is not always the same as index 0 - of the text. - - ***********************************************************************/ - - uint first () - { - return ubrk_first (handle); - } - - /*********************************************************************** - - Determine the index immediately beyond the last character - in the text being scanned. This is not the same as the last - character - - ***********************************************************************/ - - uint last () - { - return ubrk_last (handle); - } - - /*********************************************************************** - - Returns true if the specfied position is a boundary position. - As a side effect, leaves the iterator pointing to the first - boundary position at or after "offset". - - ***********************************************************************/ - - bool isBoundary (uint offset) - { - return ubrk_isBoundary (handle, offset) != 0; - } - - /*********************************************************************** - - Return the status from the break rule that determined - the most recently returned break position. - - ***********************************************************************/ - - void getStatus (inout uint s) - { - s = getStatus (); - } - - /*********************************************************************** - - Return the status from the break rule that determined - the most recently returned break position. - - The values appear in the rule source within brackets, - {123}, for example. For rules that do not specify a status, - a default value of 0 is returned. - - For word break iterators, the possible values are defined - in enum UWordBreak - - ***********************************************************************/ - - private uint getStatus () - { - return ubrk_getRuleStatus (handle); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (uint, char*, wchar*, uint, inout ICU.UErrorCode) ubrk_open; - Handle function (wchar*, uint, wchar*, uint, void*, inout ICU.UErrorCode) ubrk_openRules; - void function (Handle) ubrk_close; - void function (Handle, wchar*, uint, inout ICU.UErrorCode) ubrk_setText; - uint function (Handle) ubrk_current; - uint function (Handle) ubrk_next; - uint function (Handle) ubrk_previous; - uint function (Handle) ubrk_first; - uint function (Handle) ubrk_last; - uint function (Handle, uint) ubrk_preceding; - uint function (Handle, uint) ubrk_following; - byte function (Handle, uint) ubrk_isBoundary; - uint function (Handle) ubrk_getRuleStatus; - Handle function (Handle, void *, int *, inout ICU.UErrorCode) ubrk_safeClone; - void function (Handle, UText*, inout ICU.UErrorCode) ubrk_setUText; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ubrk_open, "ubrk_open"}, - {cast(void**) &ubrk_close, "ubrk_close"}, - {cast(void**) &ubrk_openRules, "ubrk_openRules"}, - {cast(void**) &ubrk_setText, "ubrk_setText"}, - {cast(void**) &ubrk_current, "ubrk_current"}, - {cast(void**) &ubrk_next, "ubrk_next"}, - {cast(void**) &ubrk_previous, "ubrk_previous"}, - {cast(void**) &ubrk_first, "ubrk_first"}, - {cast(void**) &ubrk_last, "ubrk_last"}, - {cast(void**) &ubrk_preceding, "ubrk_preceding"}, - {cast(void**) &ubrk_following, "ubrk_following"}, - {cast(void**) &ubrk_isBoundary, "ubrk_isBoundary"}, - {cast(void**) &ubrk_getRuleStatus, "ubrk_getRuleStatus"}, - {cast(void**) &ubrk_setUText, "ubrk_setUText"}, - {cast(void**) &ubrk_safeClone, "ubrk_safeClone"}, - ]; - - /********************************************************************** - - **********************************************************************/ - - static this () - { - library = FunctionLoader.bind (ICU.icuuc, targets); - } - - /********************************************************************** - - **********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UCalendar.d --- a/base/src/java/mangoicu/UCalendar.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,666 +0,0 @@ -/******************************************************************************* - - @file UCalendar.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UCalendar; - -private import java.mangoicu.ICU, - java.mangoicu.UString; - -public import java.mangoicu.ULocale, - java.mangoicu.UTimeZone; - -/******************************************************************************* - - UCalendar is used for converting between a UDate object and - a set of integer fields such as Year, Month, Day, - Hour, and so on. (A UDate object represents a specific instant - in time with millisecond precision. See UDate for information about - the UDate) - - Types of UCalendar interpret a UDate according to the rules of a - specific calendar system. UCalendar supports Traditional & Gregorian. - - A UCalendar object can produce all the time field values needed to - implement the date-time formatting for a particular language and - calendar style (for example, Japanese-Gregorian, Japanese-Traditional). - - When computing a UDate from time fields, two special circumstances - may arise: there may be insufficient information to compute the UDate - (such as only year and month but no day in the month), or there may be - inconsistent information (such as "Tuesday, July 15, 1996" -- July 15, - 1996 is actually a Monday). - - Insufficient information. The calendar will use default information - to specify the missing fields. This may vary by calendar; for the - Gregorian calendar, the default for a field is the same as that of - the start of the epoch: i.e., Year = 1970, Month = January, - Date = 1, etc. - - Inconsistent information. If fields conflict, the calendar will give - preference to fields set more recently. For example, when determining - the day, the calendar will look for one of the following combinations - of fields. The most recent combination, as determined by the most - recently set single field, will be used. - - See http://oss.software.ibm.com/icu/apiref/udat_8h.html for full - details. - -*******************************************************************************/ - -class UCalendar : ICU -{ - package Handle handle; - - typedef double UDate; - - //Possible types of UCalendars - public enum Type - { - Traditional, - Gregorian - } - - // Possible fields in a UCalendar - public enum DateFields - { - Era, - Year, - Month, - WeekOfYear, - WeekOfMonth, - Date, - DayOfYear, - DayOfWeek, - DayOfWeekInMonth, - AmPm, - Hour, - HourOfDay, - Minute, - Second, - Millisecond, - ZoneOffset, - DstOffset, - YearWoy, - DowLocal, - ExtendedYear, - JulianDay, - MillisecondsInDay, - FieldCount, - DayOfMonth = Date - } - - // Possible days of the week in a UCalendar - public enum DaysOfWeek - { - Sunday = 1, - Monday, - Tuesday, - Wednesday, - Thursday, - Friday, - Saturday - } - - // Possible months in a UCalendar - public enum Months - { - January, - February, - March, - April, - May, - June, - July, - August, - September, - October, - November, - December, - UnDecimber - } - - // Possible AM/PM values in a UCalendar - public enum AMPMs - { - AM, - PM - } - - // Possible formats for a UCalendar's display name - public enum DisplayNameType - { - Standard, - ShortStandard, - DST, - ShortDST - } - - // Possible limit values for a UCalendar - public enum Limit - { - Minimum, - Maximum, - GreatestMinimum, - LeastMaximum, - ActualMinimum, - ActualMaximum - } - - // Types of UCalendar attributes - private enum Attribute - { - Lenient, // unused: set from UDateFormat instead - FirstDayOfWeek, - MinimalDaysInFirstWeek - } - - /*********************************************************************** - - Open a UCalendar. A UCalendar may be used to convert a - millisecond value to a year, month, and day - - ***********************************************************************/ - - this (inout UTimeZone zone, inout ULocale locale, Type type = Type.Traditional) - { - UErrorCode e; - - handle = ucal_open (cast(wchar*)zone.name.ptr, zone.name.length, ICU.toString(locale.name), type, e); - testError (e, "failed to open calendar"); - } - - /*********************************************************************** - - Internal only: Open a UCalendar with the given handle - - ***********************************************************************/ - - package this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Close this UCalendar - - ***********************************************************************/ - - ~this () - { - ucal_close (handle); - } - - /*********************************************************************** - - Set the TimeZone used by a UCalendar - - ***********************************************************************/ - - void setTimeZone (inout UTimeZone zone) - { - UErrorCode e; - - ucal_setTimeZone (handle, cast(wchar*)zone.name.ptr, zone.name.length, e); - testError (e, "failed to set calendar time zone"); - } - - /*********************************************************************** - - Get display name of the TimeZone used by this UCalendar - - ***********************************************************************/ - - void getTimeZoneName (UString s, inout ULocale locale, DisplayNameType type=DisplayNameType.Standard) - { - uint format (wchar* dst, uint length, inout ICU.UErrorCode e) - { - return ucal_getTimeZoneDisplayName (handle, type, toString(locale.name), dst, length, e); - } - - s.format (&format, "failed to get time zone name"); - } - - /*********************************************************************** - - Determine if a UCalendar is currently in daylight savings - time - - ***********************************************************************/ - - bool inDaylightTime () - { - UErrorCode e; - - auto x = ucal_inDaylightTime (handle, e); - testError (e, "failed to test calendar daylight time"); - return x != 0; - } - - /*********************************************************************** - - Get the current date and time - - ***********************************************************************/ - - UDate getNow () - { - return ucal_getNow (); - } - - /*********************************************************************** - - Get a UCalendar's current time in millis. The time is - represented as milliseconds from the epoch - - ***********************************************************************/ - - UDate getMillis () - { - UErrorCode e; - - auto x = ucal_getMillis (handle, e); - testError (e, "failed to get time"); - return x; - } - - /*********************************************************************** - - Set a UCalendar's current time in millis. The time is - represented as milliseconds from the epoch - - ***********************************************************************/ - - void setMillis (UDate date) - { - UErrorCode e; - - ucal_setMillis (handle, date, e); - testError (e, "failed to set time"); - } - - /*********************************************************************** - - Set a UCalendar's current date - - ***********************************************************************/ - - void setDate (uint year, Months month, uint date) - { - UErrorCode e; - - ucal_setDate (handle, year, month, date, e); - testError (e, "failed to set date"); - } - - /*********************************************************************** - - Set a UCalendar's current date - - ***********************************************************************/ - - void setDateTime (uint year, Months month, uint date, uint hour, uint minute, uint second) - { - UErrorCode e; - - ucal_setDateTime (handle, year, month, date, hour, minute, second, e); - testError (e, "failed to set date/time"); - } - - /*********************************************************************** - - Returns TRUE if the given Calendar object is equivalent - to this one - - ***********************************************************************/ - - bool isEquivalent (UCalendar when) - { - return ucal_equivalentTo (handle, when.handle) != 0; - } - - /*********************************************************************** - - Compares the Calendar time - - ***********************************************************************/ - - bool isEqual (UCalendar when) - { - return (this is when || getMillis == when.getMillis); - } - - /*********************************************************************** - - Returns true if this Calendar's current time is before - "when"'s current time - - ***********************************************************************/ - - bool isBefore (UCalendar when) - { - return (this !is when || getMillis < when.getMillis); - } - - /*********************************************************************** - - Returns true if this Calendar's current time is after - "when"'s current time - - ***********************************************************************/ - - bool isAfter (UCalendar when) - { - return (this !is when || getMillis > when.getMillis); - } - - /*********************************************************************** - - Add a specified signed amount to a particular field in a - UCalendar - - ***********************************************************************/ - - void add (DateFields field, uint amount) - { - UErrorCode e; - - ucal_add (handle, field, amount, e); - testError (e, "failed to add to calendar"); - } - - /*********************************************************************** - - Add a specified signed amount to a particular field in a - UCalendar - - ***********************************************************************/ - - void roll (DateFields field, uint amount) - { - UErrorCode e; - - ucal_roll (handle, field, amount, e); - testError (e, "failed to roll calendar"); - } - - /*********************************************************************** - - Get the current value of a field from a UCalendar - - ***********************************************************************/ - - uint get (DateFields field) - { - UErrorCode e; - - auto x = ucal_get (handle, field, e); - testError (e, "failed to get calendar field"); - return x; - } - - /*********************************************************************** - - Set the value of a field in a UCalendar - - ***********************************************************************/ - - void set (DateFields field, uint value) - { - ucal_set (handle, field, value); - } - - /*********************************************************************** - - Determine if a field in a UCalendar is set - - ***********************************************************************/ - - bool isSet (DateFields field) - { - return ucal_isSet (handle, field) != 0; - } - - /*********************************************************************** - - Clear a field in a UCalendar - - ***********************************************************************/ - - void clearField (DateFields field) - { - ucal_clearField (handle, field); - } - - /*********************************************************************** - - Clear all fields in a UCalendar - - ***********************************************************************/ - - void clear () - { - ucal_clear (handle); - } - - /*********************************************************************** - - Determine a limit for a field in a UCalendar. A limit is a - maximum or minimum value for a field - - ***********************************************************************/ - - uint getLimit (DateFields field, Limit type) - { - UErrorCode e; - - auto x = ucal_getLimit (handle, field, type, e); - testError (e, "failed to get calendar limit"); - return x; - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getDaysInFirstWeek () - { - return ucal_getAttribute (handle, Attribute.MinimalDaysInFirstWeek); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getFirstDayOfWeek () - { - return ucal_getAttribute (handle, Attribute.FirstDayOfWeek); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setDaysInFirstWeek (uint value) - { - ucal_setAttribute (handle, Attribute.MinimalDaysInFirstWeek, value); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setFirstDayOfWeek (uint value) - { - ucal_setAttribute (handle, Attribute.FirstDayOfWeek, value); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, char*, Type, inout UErrorCode) ucal_open; - void function (Handle) ucal_close; - UDate function () ucal_getNow; - UDate function (Handle, inout UErrorCode) ucal_getMillis; - void function (Handle, UDate, inout UErrorCode) ucal_setMillis; - void function (Handle, uint, uint, uint, inout UErrorCode) ucal_setDate; - void function (Handle, uint, uint, uint, uint, uint, uint, inout UErrorCode) ucal_setDateTime; - byte function (Handle, Handle) ucal_equivalentTo; - void function (Handle, uint, uint, inout UErrorCode) ucal_add; - void function (Handle, uint, uint, inout UErrorCode) ucal_roll; - uint function (Handle, uint, inout UErrorCode) ucal_get; - void function (Handle, uint, uint) ucal_set; - byte function (Handle, uint) ucal_isSet; - void function (Handle, uint) ucal_clearField; - void function (Handle) ucal_clear; - uint function (Handle, uint, uint, inout UErrorCode) ucal_getLimit; - void function (Handle, wchar*, uint, inout UErrorCode) ucal_setTimeZone; - byte function (Handle, uint) ucal_inDaylightTime; - uint function (Handle, uint) ucal_getAttribute; - void function (Handle, uint, uint) ucal_setAttribute; - uint function (Handle, uint, char*, wchar*, uint, inout UErrorCode) ucal_getTimeZoneDisplayName; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ucal_open, "ucal_open"}, - {cast(void**) &ucal_close, "ucal_close"}, - {cast(void**) &ucal_getNow, "ucal_getNow"}, - {cast(void**) &ucal_getMillis, "ucal_getMillis"}, - {cast(void**) &ucal_setMillis, "ucal_setMillis"}, - {cast(void**) &ucal_setDate, "ucal_setDate"}, - {cast(void**) &ucal_setDateTime, "ucal_setDateTime"}, - {cast(void**) &ucal_equivalentTo, "ucal_equivalentTo"}, - {cast(void**) &ucal_add, "ucal_add"}, - {cast(void**) &ucal_roll, "ucal_roll"}, - {cast(void**) &ucal_get, "ucal_get"}, - {cast(void**) &ucal_set, "ucal_set"}, - {cast(void**) &ucal_clearField, "ucal_clearField"}, - {cast(void**) &ucal_clear, "ucal_clear"}, - {cast(void**) &ucal_getLimit, "ucal_getLimit"}, - {cast(void**) &ucal_setTimeZone, "ucal_setTimeZone"}, - {cast(void**) &ucal_inDaylightTime, "ucal_inDaylightTime"}, - {cast(void**) &ucal_getAttribute, "ucal_getAttribute"}, - {cast(void**) &ucal_setAttribute, "ucal_setAttribute"}, - {cast(void**) &ucal_isSet, "ucal_isSet"}, - {cast(void**) &ucal_getTimeZoneDisplayName, "ucal_getTimeZoneDisplayName"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } - -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UChar.d --- a/base/src/java/mangoicu/UChar.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1240 +0,0 @@ -/******************************************************************************* - - @file UChar.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, October 2004 - @author Kris - - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UChar; - -private import java.mangoicu.ICU; - -/******************************************************************************* - - This API provides low-level access to the Unicode Character - Database. In addition to raw property values, some convenience - functions calculate derived properties, for example for Java-style - programming. - - Unicode assigns each code point (not just assigned character) - values for many properties. Most of them are simple boolean - flags, or constants from a small enumerated list. For some - properties, values are strings or other relatively more complex - types. - - For more information see "About the Unicode Character Database" - (http://www.unicode.org/ucd/) and the ICU User Guide chapter on - Properties (http://oss.software.ibm.com/icu/userguide/properties.html). - - Many functions are designed to match java.lang.Character functions. - See the individual function documentation, and see the JDK 1.4.1 - java.lang.Character documentation at - http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html - - There are also functions that provide easy migration from C/POSIX - functions like isblank(). Their use is generally discouraged because - the C/POSIX standards do not define their semantics beyond the ASCII - range, which means that different implementations exhibit very different - behavior. Instead, Unicode properties should be used directly. - - There are also only a few, broad C/POSIX character classes, and they - tend to be used for conflicting purposes. For example, the "isalpha()" - class is sometimes used to determine word boundaries, while a more - sophisticated approach would at least distinguish initial letters from - continuation characters (the latter including combining marks). (In - ICU, BreakIterator is the most sophisticated API for word boundaries.) - Another example: There is no "istitle()" class for titlecase characters. - - A summary of the behavior of some C/POSIX character classification - implementations for Unicode is available at - http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html - - See - this page for full details. - -*******************************************************************************/ - -class UChar : ICU -{ - public enum Property - { - Alphabetic = 0, - BinaryStart = Alphabetic, - AsciiHexDigit, - BidiControl, - BidiMirrored, - Dash, - DefaultIgnorableCodePoint, - Deprecated, - Diacritic, - Extender, - FullCompositionExclusion, - GraphemeBase, - GraphemeExtend, - GraphemeLink, - HexDigit, - Hyphen, - IdContinue, - IdStart, - Ideographic, - IdsBinaryOperator, - IdsTrinaryOperator, - JoinControl, - LogicalOrderException, - Lowercase, - Math, - NoncharacterCodePoint, - QuotationMark, - Radical, - SoftDotted, - TerminalPunctuation, - UnifiedIdeograph, - Uppercase, - WhiteSpace, - XidContinue, - XidStart, - CaseSensitive, - STerm, - VariationSelector, - NfdInert, - NfkdInert, - NfcInert, - NfkcInert, - SegmentStarter, - BinaryLimit, - BidiClass = 0x1000, - IntStart = BidiClass, - Block, CanonicalCombiningClass, - DecompositionType, - EastAsianWidth, - GeneralCategory, - JoiningGroup, - JoiningType, - LineBreak, - NumericType, - Script, - HangulSyllableType, - NfdQuickCheck, - NfkdQuickCheck, - NfcQuickCheck, - NfkcQuickCheck, - LeadCanonicalCombiningClass, - TrailCanonicalCombiningClass, - IntLimit, - GeneralCategoryMask = 0x2000, - MaskStart = GeneralCategoryMask, - MaskLimit, - NumericValue = 0x3000, - DoubleStart = NumericValue, - DoubleLimit, - Age = 0x4000, - StringStart = Age, - BidiMirroringGlyph, - CaseFolding, - IsoComment, - LowercaseMapping, - Name, - SimpleCaseFolding, - SimpleLowercaseMapping, - SimpleTitlecaseMapping, - SimpleUppercaseMapping, - TitlecaseMapping, - Unicode1Name, - UppercaseMapping, - StringLimit, - InvalidCode = -1 - } - - public enum Category - { - Unassigned = 0, - GeneralOtherTypes = 0, - UppercaseLetter = 1, - LowercaseLetter = 2, - TitlecaseLetter = 3, - ModifierLetter = 4, - OtherLetter = 5, - NonSpacingMark = 6, - EnclosingMark = 7, - CombiningSpacingMark = 8, - DecimalDigitNumber = 9, - LetterNumber = 10, - OtherNumber = 11, - SpaceSeparator = 12, - LineSeparator = 13, - ParagraphSeparator = 14, - ControlChar = 15, - FormatChar = 16, - PrivateUseChar = 17, - Surrogate = 18, - DashPunctuation = 19, - StartPunctuation = 20, - EndPunctuation = 21, - ConnectorPunctuation = 22, - OtherPunctuation = 23, - MathSymbol = 24, - CurrencySymbol = 25, - ModifierSymbol = 26, - OtherSymbol = 27, - InitialPunctuation = 28, - FinalPunctuation = 29, - Count - } - - public enum Direction - { - LeftToRight = 0, - RightToLeft = 1, - EuropeanNumber = 2, - EuropeanNumberSeparator = 3, - EuropeanNumberTerminator = 4, - ArabicNumber = 5, - CommonNumberSeparator = 6, - BlockSeparator = 7, - SegmentSeparator = 8, - WhiteSpaceNeutral = 9, - OtherNeutral = 10, - LeftToRightEmbedding = 11, - LeftToRightOverride = 12, - RightToLeftArabic = 13, - RightToLeftEmbedding = 14, - RightToLeftOverride = 15, - PopDirectionalFormat = 16, - DirNonSpacingMark = 17, - BoundaryNeutral = 18, - Count - } - - public enum BlockCode - { - NoBlock = 0, - BasicLatin = 1, - Latin1Supplement = 2, - LatinExtendedA = 3, - LatinExtendedB = 4, - IpaExtensions = 5, - SpacingModifierLetters = 6, - CombiningDiacriticalMarks = 7, - Greek = 8, - Cyrillic = 9, - Armenian = 10, - Hebrew = 11, - Arabic = 12, - Syriac = 13, - Thaana = 14, - Devanagari = 15, - Bengali = 16, - Gurmukhi = 17, - Gujarati = 18, - Oriya = 19, - Tamil = 20, - Telugu = 21, - Kannada = 22, - Malayalam = 23, - Sinhala = 24, - Thai = 25, - Lao = 26, - Tibetan = 27, - Myanmar = 28, - Georgian = 29, - HangulJamo = 30, - Ethiopic = 31, - Cherokee = 32, - UnifiedCanadianAboriginalSyllabics = 33, - Ogham = 34, - Runic = 35, - Khmer = 36, - Mongolian = 37, - LatinExtendedAdditional = 38, - GreekExtended = 39, - GeneralPunctuation = 40, - SuperscriptsAndSubscripts = 41, - CurrencySymbols = 42, - CombiningMarksForSymbols = 43, - LetterlikeSymbols = 44, - NumberForms = 45, - Arrows = 46, - MathematicalOperators = 47, - MiscellaneousTechnical = 48, - ControlPictures = 49, - OpticalCharacterRecognition = 50, - EnclosedAlphanumerics = 51, - BoxDrawing = 52, - BlockElements = 53, - GeometricShapes = 54, - MiscellaneousSymbols = 55, - Dingbats = 56, - BraillePatterns = 57, - CjkRadicalsSupplement = 58, - KangxiRadicals = 59, - IdeographicDescriptionCharacters = 60, - CjkSymbolsAndPunctuation = 61, - Hiragana = 62, - Katakana = 63, - Bopomofo = 64, - HangulCompatibilityJamo = 65, - Kanbun = 66, - BopomofoExtended = 67, - EnclosedCjkLettersAndMonths = 68, - CjkCompatibility = 69, - CjkUnifiedIdeographsExtensionA = 70, - CjkUnifiedIdeographs = 71, - YiSyllables = 72, - YiRadicals = 73, - HangulSyllables = 74, - HighSurrogates = 75, - HighPrivateUseSurrogates = 76, - LowSurrogates = 77, - PrivateUse = 78, - PrivateUseArea = PrivateUse, - CjkCompatibilityIdeographs = 79, - AlphabeticPresentationForms = 80, - ArabicPresentationFormsA = 81, - CombiningHalfMarks = 82, - CjkCompatibilityForms = 83, - SmallFormVariants = 84, - ArabicPresentationFormsB = 85, - Specials = 86, - HalfwidthAndFullwidthForms = 87, - OldItalic = 88, - Gothic = 89, - Deseret = 90, - ByzantineMusicalSymbols = 91, - MusicalSymbols = 92, - MathematicalAlphanumericSymbols = 93, - CjkUnifiedIdeographsExtensionB = 94, - CjkCompatibilityIdeographsSupplement = 95, - Tags = 96, - CyrillicSupplementary = 97, - CyrillicSupplement = CyrillicSupplementary, - Tagalog = 98, - Hanunoo = 99, - Buhid = 100, - Tagbanwa = 101, - MiscellaneousMathematicalSymbolsA = 102, - SupplementalArrowsA = 103, - SupplementalArrowsB = 104, - MiscellaneousMathematicalSymbolsB = 105, - SupplementalMathematicalOperators = 106, - KatakanaPhoneticExtensions = 107, - VariationSelectors = 108, - SupplementaryPrivateUseAreaA = 109, - SupplementaryPrivateUseAreaB = 110, - Limbu = 111, - TaiLe = 112, - KhmerSymbols = 113, - PhoneticExtensions = 114, - MiscellaneousSymbolsAndArrows = 115, - YijingHexagramSymbols = 116, - LinearBSyllabary = 117, - LinearBIdeograms = 118, - AegeanNumbers = 119, - Ugaritic = 120, - Shavian = 121, - Osmanya = 122, - CypriotSyllabary = 123, - TaiXuanJingSymbols = 124, - VariationSelectorsSupplement = 125, - Count, - InvalidCode = -1 - } - - public enum EastAsianWidth - { - Neutral, - Ambiguous, - Halfwidth, - Fullwidth, - Narrow, - Wide, - Count - } - - public enum CharNameChoice - { - Unicode, - Unicode10, - Extended, - Count - } - - public enum NameChoice - { - Short, - Long, - Count - } - - public enum DecompositionType - { - None, - Canonical, - Compat, - Circle, - Final, - Font, - Fraction, - Initial, - Isolated, - Medial, - Narrow, - Nobreak, - Small, - Square, - Sub, - Super, - Vertical, - Wide, - Count - } - - public enum JoiningType - { - NonJoining, - JoinCausing, - DualJoining, - LeftJoining, - RightJoining, - Transparent, - Count - } - - public enum JoiningGroup - { - NoJoiningGroup, - Ain, - Alaph, - Alef, - Beh, - Beth, - Dal, - DalathRish, - E, - Feh, - FinalSemkath, - Gaf, - Gamal, - Hah, - HamzaOnHehGoal, - He, - Heh, - HehGoal, - Heth, - Kaf, - Kaph, - KnottedHeh, - Lam, - Lamadh, - Meem, - Mim, - Noon, - Nun, - Pe, - Qaf, - Qaph, - Reh, - Reversed_Pe, - Sad, - Sadhe, - Seen, - Semkath, - Shin, - Swash_Kaf, - Syriac_Waw, - Tah, - Taw, - Teh_Marbuta, - Teth, - Waw, - Yeh, - Yeh_Barree, - Yeh_With_Tail, - Yudh, - Yudh_He, - Zain, - Fe, - Khaph, - Zhain, - Count - } - - public enum LineBreak - { - Unknown, - Ambiguous, - Alphabetic, - BreakBoth, - BreakAfter, - BreakBefore, - MandatoryBreak, - ContingentBreak, - ClosePunctuation, - CombiningMark, - CarriageReturn, - Exclamation, - Glue, - Hyphen, - Ideographic, - Inseperable, - Inseparable = Inseperable, - InfixNumeric, - LineFeed, - Nonstarter, - Numeric, - OpenPunctuation, - PostfixNumeric, - PrefixNumeric, - Quotation, - ComplexContext, - Surrogate, - Space, - BreakSymbols, - Zwspace, - NextLine, - WordJoiner, - Count - } - - public enum NumericType - { - None, - Decimal, - Digit, - Numeric, - Count - } - - public enum HangulSyllableType - { - NotApplicable, - LeadingJamo, - VowelJamo, - TrailingJamo, - LvSyllable, - LvtSyllable, - Count - } - - /*********************************************************************** - - Get the property value for an enumerated or integer - Unicode property for a code point. Also returns binary - and mask property values. - - Unicode, especially in version 3.2, defines many more - properties than the original set in UnicodeData.txt. - - The properties APIs are intended to reflect Unicode - properties as defined in the Unicode Character Database - (UCD) and Unicode Technical Reports (UTR). For details - about the properties see http://www.unicode.org/ . For - names of Unicode properties see the file PropertyAliases.txt - - ***********************************************************************/ - - uint getProperty (dchar c, Property p) - { - return u_getIntPropertyValue (cast(uint) c, cast(uint) p); - } - - /*********************************************************************** - - Get the minimum value for an enumerated/integer/binary - Unicode property - - ***********************************************************************/ - - uint getPropertyMinimum (Property p) - { - return u_getIntPropertyMinValue (p); - } - - /*********************************************************************** - - Get the maximum value for an enumerated/integer/binary - Unicode property - - ***********************************************************************/ - - uint getPropertyMaximum (Property p) - { - return u_getIntPropertyMaxValue (p); - } - - /*********************************************************************** - - Returns the bidirectional category value for the code - point, which is used in the Unicode bidirectional algorithm - (UAX #9 http://www.unicode.org/reports/tr9/). - - ***********************************************************************/ - - Direction charDirection (dchar c) - { - return cast(Direction) u_charDirection (c); - } - - /*********************************************************************** - - Returns the Unicode allocation block that contains the - character - - ***********************************************************************/ - - BlockCode getBlockCode (dchar c) - { - return cast(BlockCode) ublock_getCode (c); - } - - /*********************************************************************** - - Retrieve the name of a Unicode character. - - ***********************************************************************/ - - char[] getCharName (dchar c, CharNameChoice choice, inout char[] dst) - { - UErrorCode e; - - uint len = u_charName (c, choice, dst.ptr, dst.length, e); - testError (e, "failed to extract char name (buffer too small?)"); - return dst [0..len]; - } - - /*********************************************************************** - - Get the ISO 10646 comment for a character. - - ***********************************************************************/ - - char[] getComment (dchar c, inout char[] dst) - { - UErrorCode e; - - uint len = u_getISOComment (c, dst.ptr, dst.length, e); - testError (e, "failed to extract comment (buffer too small?)"); - return dst [0..len]; - } - - /*********************************************************************** - - Find a Unicode character by its name and return its code - point value. - - ***********************************************************************/ - - dchar charFromName (CharNameChoice choice, char[] name) - { - UErrorCode e; - - dchar c = u_charFromName (choice, toString(name), e); - testError (e, "failed to locate char name"); - return c; - } - - /*********************************************************************** - - Return the Unicode name for a given property, as given in the - Unicode database file PropertyAliases.txt - - ***********************************************************************/ - - char[] getPropertyName (Property p, NameChoice choice) - { - return toArray (u_getPropertyName (p, choice)); - } - - /*********************************************************************** - - Return the Unicode name for a given property value, as given - in the Unicode database file PropertyValueAliases.txt. - - ***********************************************************************/ - - char[] getPropertyValueName (Property p, NameChoice choice, uint value) - { - return toArray (u_getPropertyValueName (p, value, choice)); - } - - /*********************************************************************** - - Gets the Unicode version information - - ***********************************************************************/ - - void getUnicodeVersion (inout Version v) - { - u_getUnicodeVersion (v); - } - - /*********************************************************************** - - Get the "age" of the code point - - ***********************************************************************/ - - void getCharAge (dchar c, inout Version v) - { - u_charAge (c, v); - } - - - /*********************************************************************** - - These are externalised directly to the client (sans wrapper), - but this may have to change for linux, depending upon the - ICU function-naming conventions within the Posix libraries. - - ***********************************************************************/ - - static extern (C) - { - /*************************************************************** - - Check if a code point has the Alphabetic Unicode - property. - - ***************************************************************/ - - bool function (dchar c) isUAlphabetic; - - /*************************************************************** - - Check if a code point has the Lowercase Unicode - property. - - ***************************************************************/ - - bool function (dchar c) isULowercase; - - /*************************************************************** - - Check if a code point has the Uppercase Unicode - property. - - ***************************************************************/ - - bool function (dchar c) isUUppercase; - - /*************************************************************** - - Check if a code point has the White_Space Unicode - property. - - ***************************************************************/ - - bool function (dchar c) isUWhiteSpace; - - /*************************************************************** - - Determines whether the specified code point has the - general category "Ll" (lowercase letter). - - ***************************************************************/ - - bool function (dchar c) isLower; - - /*************************************************************** - - Determines whether the specified code point has the - general category "Lu" (uppercase letter). - - ***************************************************************/ - - bool function (dchar c) isUpper; - - /*************************************************************** - - Determines whether the specified code point is a - titlecase letter. - - ***************************************************************/ - - bool function (dchar c) isTitle; - - /*************************************************************** - - Determines whether the specified code point is a - digit character according to Java. - - ***************************************************************/ - - bool function (dchar c) isDigit; - - /*************************************************************** - - Determines whether the specified code point is a - letter character. - - ***************************************************************/ - - bool function (dchar c) isAlpha; - - /*************************************************************** - - Determines whether the specified code point is an - alphanumeric character (letter or digit) according - to Java. - - ***************************************************************/ - - bool function (dchar c) isAlphaNumeric; - - /*************************************************************** - - Determines whether the specified code point is a - hexadecimal digit. - - ***************************************************************/ - - bool function (dchar c) isHexDigit; - - /*************************************************************** - - Determines whether the specified code point is a - punctuation character. - - ***************************************************************/ - - bool function (dchar c) isPunct; - - /*************************************************************** - - Determines whether the specified code point is a - "graphic" character (printable, excluding spaces). - - ***************************************************************/ - - bool function (dchar c) isGraph; - - /*************************************************************** - - Determines whether the specified code point is a - "blank" or "horizontal space", a character that - visibly separates words on a line. - - ***************************************************************/ - - bool function (dchar c) isBlank; - - /*************************************************************** - - Determines whether the specified code point is - "defined", which usually means that it is assigned - a character. - - ***************************************************************/ - - bool function (dchar c) isDefined; - - /*************************************************************** - - Determines if the specified character is a space - character or not. - - ***************************************************************/ - - bool function (dchar c) isSpace; - - /*************************************************************** - - Determine if the specified code point is a space - character according to Java. - - ***************************************************************/ - - bool function (dchar c) isJavaSpaceChar; - - /*************************************************************** - - Determines if the specified code point is a whitespace - character according to Java/ICU. - - ***************************************************************/ - - bool function (dchar c) isWhiteSpace; - - /*************************************************************** - - Determines whether the specified code point is a - control character (as defined by this function). - - ***************************************************************/ - - bool function (dchar c) isCtrl; - - /*************************************************************** - - Determines whether the specified code point is an ISO - control code. - - ***************************************************************/ - - bool function (dchar c) isISOControl; - - /*************************************************************** - - Determines whether the specified code point is a - printable character. - - ***************************************************************/ - - bool function (dchar c) isPrint; - - /*************************************************************** - - Determines whether the specified code point is a - base character. - - ***************************************************************/ - - bool function (dchar c) isBase; - - /*************************************************************** - - Determines if the specified character is permissible - as the first character in an identifier according to - Unicode (The Unicode Standard, Version 3.0, chapter - 5.16 Identifiers). - - ***************************************************************/ - - bool function (dchar c) isIDStart; - - /*************************************************************** - - Determines if the specified character is permissible - in an identifier according to Java. - - ***************************************************************/ - - bool function (dchar c) isIDPart; - - /*************************************************************** - - Determines if the specified character should be - regarded as an ignorable character in an identifier, - according to Java. - - ***************************************************************/ - - bool function (dchar c) isIDIgnorable; - - /*************************************************************** - - Determines if the specified character is permissible - as the first character in a Java identifier. - - ***************************************************************/ - - bool function (dchar c) isJavaIDStart; - - /*************************************************************** - - Determines if the specified character is permissible - in a Java identifier. - - ***************************************************************/ - - bool function (dchar c) isJavaIDPart; - - /*************************************************************** - - Determines whether the code point has the - Bidi_Mirrored property. - - ***************************************************************/ - - bool function (dchar c) isMirrored; - - /*************************************************************** - - Returns the decimal digit value of a decimal digit - character. - - ***************************************************************/ - - ubyte function (dchar c) charDigitValue; - - /*************************************************************** - - Maps the specified character to a "mirror-image" - character. - - ***************************************************************/ - - dchar function (dchar c) charMirror; - - /*************************************************************** - - Returns the general category value for the code point. - - ***************************************************************/ - - ubyte function (dchar c) charType; - - /*************************************************************** - - Returns the combining class of the code point as - specified in UnicodeData.txt. - - ***************************************************************/ - - ubyte function (dchar c) getCombiningClass; - - /*************************************************************** - - The given character is mapped to its lowercase - equivalent according to UnicodeData.txt; if the - character has no lowercase equivalent, the - character itself is returned. - - ***************************************************************/ - - dchar function (dchar c) toLower; - - /*************************************************************** - - The given character is mapped to its uppercase equivalent - according to UnicodeData.txt; if the character has no - uppercase equivalent, the character itself is returned. - - ***************************************************************/ - - dchar function (dchar c) toUpper; - - /*************************************************************** - - The given character is mapped to its titlecase - equivalent according to UnicodeData.txt; if none - is defined, the character itself is returned. - - ***************************************************************/ - - dchar function (dchar c) toTitle; - - /*************************************************************** - - The given character is mapped to its case folding - equivalent according to UnicodeData.txt and - CaseFolding.txt; if the character has no case folding - equivalent, the character itself is returned. - - ***************************************************************/ - - dchar function (dchar c, uint options) foldCase; - - /*************************************************************** - - Returns the decimal digit value of the code point in - the specified radix. - - ***************************************************************/ - - uint function (dchar ch, ubyte radix) digit; - - /*************************************************************** - - Determines the character representation for a specific - digit in the specified radix. - - ***************************************************************/ - - dchar function (uint digit, ubyte radix) forDigit; - - /*************************************************************** - - Get the numeric value for a Unicode code point as - defined in the Unicode Character Database. - - ***************************************************************/ - - double function (dchar c) getNumericValue; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - uint function (uint, uint) u_getIntPropertyValue; - uint function (uint) u_getIntPropertyMinValue; - uint function (uint) u_getIntPropertyMaxValue; - uint function (dchar) u_charDirection; - uint function (dchar) ublock_getCode; - uint function (dchar, uint, char*, uint, inout UErrorCode) u_charName; - uint function (dchar, char*, uint, inout UErrorCode) u_getISOComment; - uint function (uint, char*, inout UErrorCode) u_charFromName; - char* function (uint, uint) u_getPropertyName; - char* function (uint, uint, uint) u_getPropertyValueName; - void function (inout Version) u_getUnicodeVersion; - void function (dchar, inout Version) u_charAge; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &forDigit, "u_forDigit"}, - {cast(void**) &digit, "u_digit"}, - {cast(void**) &foldCase, "u_foldCase"}, - {cast(void**) &toTitle, "u_totitle"}, - {cast(void**) &toUpper, "u_toupper"}, - {cast(void**) &toLower, "u_tolower"}, - {cast(void**) &charType, "u_charType"}, - {cast(void**) &charMirror, "u_charMirror"}, - {cast(void**) &charDigitValue, "u_charDigitValue"}, - {cast(void**) &isJavaIDPart, "u_isJavaIDPart"}, - {cast(void**) &isJavaIDStart, "u_isJavaIDStart"}, - {cast(void**) &isIDIgnorable, "u_isIDIgnorable"}, - {cast(void**) &isIDPart, "u_isIDPart"}, - {cast(void**) &isIDStart, "u_isIDStart"}, - {cast(void**) &isMirrored, "u_isMirrored"}, - {cast(void**) &isBase, "u_isbase"}, - {cast(void**) &isPrint, "u_isprint"}, - {cast(void**) &isISOControl, "u_isISOControl"}, - {cast(void**) &isCtrl, "u_iscntrl"}, - {cast(void**) &isWhiteSpace, "u_isWhitespace"}, - {cast(void**) &isJavaSpaceChar, "u_isJavaSpaceChar"}, - {cast(void**) &isSpace, "u_isspace"}, - {cast(void**) &isDefined, "u_isdefined"}, - {cast(void**) &isBlank, "u_isblank"}, - {cast(void**) &isGraph, "u_isgraph"}, - {cast(void**) &isPunct, "u_ispunct"}, - {cast(void**) &isHexDigit, "u_isxdigit"}, - {cast(void**) &isAlpha, "u_isalpha"}, - {cast(void**) &isAlphaNumeric, "u_isalnum"}, - {cast(void**) &isDigit, "u_isdigit"}, - {cast(void**) &isTitle, "u_istitle"}, - {cast(void**) &isUpper, "u_isupper"}, - {cast(void**) &isLower, "u_islower"}, - {cast(void**) &isUAlphabetic, "u_isUAlphabetic"}, - {cast(void**) &isUWhiteSpace, "u_isUWhiteSpace"}, - {cast(void**) &isUUppercase, "u_isUUppercase"}, - {cast(void**) &isULowercase, "u_isULowercase"}, - {cast(void**) &getNumericValue, "u_getNumericValue"}, - {cast(void**) &getCombiningClass, "u_getCombiningClass"}, - {cast(void**) &u_getIntPropertyValue, "u_getIntPropertyValue"}, - {cast(void**) &u_getIntPropertyMinValue,"u_getIntPropertyMinValue"}, - {cast(void**) &u_getIntPropertyMaxValue,"u_getIntPropertyMaxValue"}, - {cast(void**) &u_charDirection, "u_charDirection"}, - {cast(void**) &ublock_getCode, "ublock_getCode"}, - {cast(void**) &u_charName, "u_charName"}, - {cast(void**) &u_getISOComment, "u_getISOComment"}, - {cast(void**) &u_charFromName, "u_charFromName"}, - {cast(void**) &u_getPropertyName, "u_getPropertyName"}, - {cast(void**) &u_getPropertyValueName, "u_getPropertyValueName"}, - {cast(void**) &u_getUnicodeVersion, "u_getUnicodeVersion"}, - {cast(void**) &u_charAge, "u_charAge"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UCollator.d --- a/base/src/java/mangoicu/UCollator.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,732 +0,0 @@ -/******************************************************************************* - - @file UCollator.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UCollator; - -private import java.mangoicu.ICU, - java.mangoicu.USet, - java.mangoicu.ULocale, - java.mangoicu.UString; - -/******************************************************************************* - - The API for Collator performs locale-sensitive string comparison. - You use this service to build searching and sorting routines for - natural language text. Important: The ICU collation service has been - reimplemented in order to achieve better performance and UCA compliance. - For details, see the collation design document. - - For more information about the collation service see the users guide. - - Collation service provides correct sorting orders for most locales - supported in ICU. If specific data for a locale is not available, - the orders eventually falls back to the UCA sort order. - - Sort ordering may be customized by providing your own set of rules. - For more on this subject see the Collation customization section of - the users guide. - - See - this page for full details. - -*******************************************************************************/ - -class UCollator : ICU -{ - package Handle handle; - - enum Attribute - { - FrenchCollation, - AlternateHandling, - CaseFirst, - CaseLevel, - NormalizationMode, - DecompositionMode = NormalizationMode, - strength, - HiraganaQuaternaryMode, - NumericCollation, - AttributeCount - } - - enum AttributeValue - { - Default = -1, - Primary = 0, - Secondary = 1, - Tertiary = 2, - DefaultStrength = Tertiary, - CeStrengthLimit, - Quaternary = 3, - Identical = 15, - strengthLimit, - Off = 16, - On = 17, - Shifted = 20, - NonIgnorable = 21, - LowerFirst = 24, - UpperFirst = 25, - AttributeValueCount - } - - enum RuleOption - { - TailoringOnly, - FullRules - } - - enum BoundMode - { - BoundLower = 0, - BoundUpper = 1, - BoundUpperLong = 2, - BoundValueCount - } - - typedef AttributeValue Strength; - - /*********************************************************************** - - Open a UCollator for comparing strings. The locale specified - determines the required collation rules. Special values for - locales can be passed in - if ULocale.Default is passed for - the locale, the default locale collation rules will be used. - If ULocale.Root is passed, UCA rules will be used - - ***********************************************************************/ - - this (ULocale locale) - { - UErrorCode e; - - handle = ucol_open (toString(locale.name), e); - testError (e, "failed to open collator"); - } - - /*********************************************************************** - - Produce a UCollator instance according to the rules supplied. - - The rules are used to change the default ordering, defined in - the UCA in a process called tailoring. For the syntax of the - rules please see users guide - - ***********************************************************************/ - - this (UStringView rules, AttributeValue mode, Strength strength) - { - UErrorCode e; - - handle = ucol_openRules (rules.get.ptr, rules.len, mode, strength, null, e); - testError (e, "failed to open rules-based collator"); - } - - /*********************************************************************** - - Open a collator defined by a short form string. The - structure and the syntax of the string is defined in - the "Naming collators" section of the users guide: - http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators - Attributes are overriden by the subsequent attributes. - So, for "S2_S3", final strength will be 3. 3066bis - locale overrides individual locale parts. - - The call to this constructor is equivalent to a plain - constructor, followed by a series of calls to setAttribute - and setVariableTop - - ***********************************************************************/ - - this (char[] shortName, bool forceDefaults) - { - UErrorCode e; - - handle = ucol_openFromShortString (toString(shortName), forceDefaults, null, e); - testError (e, "failed to open short-name collator"); - } - - /*********************************************************************** - - Internal constructor invoked via USearch - - ***********************************************************************/ - - package this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Close a UCollator - - ***********************************************************************/ - - ~this () - { - ucol_close (handle); - } - - /*********************************************************************** - - Get a set containing the contractions defined by the - collator. - - The set includes both the UCA contractions and the - contractions defined by the collator. This set will - contain only strings. If a tailoring explicitly - suppresses contractions from the UCA (like Russian), - removed contractions will not be in the resulting set. - - ***********************************************************************/ - - void getContractions (USet set) - { - UErrorCode e; - - ucol_getContractions (handle, set.handle, e); - testError (e, "failed to get collator contractions"); - } - - /*********************************************************************** - - Compare two strings. Return value is -, 0, + - - ***********************************************************************/ - - int strcoll (UStringView source, UStringView target) - { - return ucol_strcoll (handle, source.get.ptr, source.len, target.get.ptr, target.len); - } - - /*********************************************************************** - - Determine if one string is greater than another. This - function is equivalent to strcoll() > 1 - - ***********************************************************************/ - - bool greater (UStringView source, UStringView target) - { - return ucol_greater (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; - } - - /*********************************************************************** - - Determine if one string is greater than or equal to - another. This function is equivalent to strcoll() >= 0 - - ***********************************************************************/ - - bool greaterOrEqual (UStringView source, UStringView target) - { - return ucol_greaterOrEqual (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; - } - - /*********************************************************************** - - This function is equivalent to strcoll() == 0 - - ***********************************************************************/ - - bool equal (UStringView source, UStringView target) - { - return ucol_equal (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; - } - - /*********************************************************************** - - Get the collation strength used in a UCollator. The - strength influences how strings are compared. - - ***********************************************************************/ - - Strength getStrength () - { - return ucol_getStrength (handle); - } - - /*********************************************************************** - - Set the collation strength used in this UCollator. The - strength influences how strings are compared. one of - Primary, Secondary, Tertiary, Quaternary, Dentical, or - Default - - ***********************************************************************/ - - void setStrength (Strength s) - { - ucol_setStrength (handle, s); - } - - /*********************************************************************** - - Get the display name for a UCollator. The display name is - suitable for presentation to a user - - ***********************************************************************/ - - void getDisplayName (ULocale obj, ULocale display, UString dst) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return ucol_getDisplayName (toString(obj.name), toString(display.name), dst.get.ptr, dst.len, e); - } - - dst.format (&fmt, "failed to get collator display name"); - } - - /*********************************************************************** - - Returns current rules. Options define whether full rules - are returned or just the tailoring. - - ***********************************************************************/ - - void getRules (UString dst, RuleOption o = RuleOption.FullRules) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - uint needed = ucol_getRulesEx (handle, o, dst.get.ptr, dst.len); - if (needed > len) - e = e.BufferOverflow; - return needed; - } - - dst.format (&fmt, "failed to get collator rules"); - } - - /*********************************************************************** - - Get the short definition string for a collator. - - This API harvests the collator's locale and the attribute - set and produces a string that can be used for opening a - collator with the same properties using the char[] style - constructor. This string will be normalized. - - The structure and the syntax of the string is defined in the - "Naming collators" section of the users guide: - http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators - - ***********************************************************************/ - - char[] getShortDefinitionString (ULocale locale = ULocale.Default) - { - UErrorCode e; - char[64] dst; - - uint len = ucol_getShortDefinitionString (handle, toString(locale.name), dst.ptr, dst.length, e); - testError (e, "failed to get collator short name"); - return dst[0..len].dup; - } - - /*********************************************************************** - - Verifies and normalizes short definition string. Normalized - short definition string has all the option sorted by the - argument name, so that equivalent definition strings are the - same - - ***********************************************************************/ - - char[] normalizeShortDefinitionString (char[] source) - { - UErrorCode e; - char[64] dst; - - uint len = ucol_normalizeShortDefinitionString (toString(source), dst.ptr, dst.length, null, e); - testError (e, "failed to normalize collator short name"); - return dst[0..len].dup; - } - - /*********************************************************************** - - Get a sort key for a string from a UCollator. Sort keys - may be compared using strcmp. - - ***********************************************************************/ - - ubyte[] getSortKey (UStringView t, ubyte[] result) - { - uint len = ucol_getSortKey (handle, t.get.ptr, t.len, result.ptr, result.length); - if (len < result.length) - return result [0..len]; - return null; - } - - /*********************************************************************** - - Merge two sort keys. The levels are merged with their - corresponding counterparts (primaries with primaries, - secondaries with secondaries etc.). Between the values - from the same level a separator is inserted. example - (uncompressed): 191B1D 01 050505 01 910505 00 and - 1F2123 01 050505 01 910505 00 will be merged as - 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00 - This allows for concatenating of first and last names for - sorting, among other things. If the destination buffer is - not big enough, the results are undefined. If any of source - lengths are zero or any of source pointers are null/undefined, - result is of size zero. - - ***********************************************************************/ - - ubyte[] mergeSortkeys (ubyte[] left, ubyte[] right, ubyte[] result) - { - uint len = ucol_mergeSortkeys (left.ptr, left.length, right.ptr, right.length, result.ptr, result.length); - if (len < result.length) - return result [0..len]; - return null; - } - - /*********************************************************************** - - Produce a bound for a given sortkey and a number of levels. - - Return value is always the number of bytes needed, regardless - of whether the result buffer was big enough or even valid. - - Resulting bounds can be used to produce a range of strings - that are between upper and lower bounds. For example, if - bounds are produced for a sortkey of string "smith", strings - between upper and lower bounds with one level would include - "Smith", "SMITH", "sMiTh". - - There are two upper bounds that can be produced. If BoundUpper - is produced, strings matched would be as above. However, if - bound produced using BoundUpperLong is used, the above example - will also match "Smithsonian" and similar. - - ***********************************************************************/ - - ubyte[] getBound (BoundMode mode, ubyte[] source, ubyte[] result, uint levels = 1) - { - UErrorCode e; - - uint len = ucol_getBound (source.ptr, source.length, mode, levels, result.ptr, result.length, e); - testError (e, "failed to get sortkey bound"); - if (len < result.length) - return result [0..len]; - return null; - } - - /*********************************************************************** - - Gets the version information for a Collator. - - Version is currently an opaque 32-bit number which depends, - among other things, on major versions of the collator - tailoring and UCA - - ***********************************************************************/ - - void getVersion (inout Version v) - { - ucol_getVersion (handle, v); - } - - /*********************************************************************** - - Gets the UCA version information for this Collator - - ***********************************************************************/ - - void getUCAVersion (inout Version v) - { - ucol_getUCAVersion (handle, v); - } - - /*********************************************************************** - - Universal attribute setter - - ***********************************************************************/ - - void setAttribute (Attribute attr, AttributeValue value) - { - UErrorCode e; - - ucol_setAttribute (handle, attr, value, e); - testError (e, "failed to set collator attribute"); - } - - /*********************************************************************** - - Universal attribute getter - - ***********************************************************************/ - - AttributeValue getAttribute (Attribute attr) - { - UErrorCode e; - - AttributeValue v = ucol_getAttribute (handle, attr, e); - testError (e, "failed to get collator attribute"); - return v; - } - - /*********************************************************************** - - Variable top is a two byte primary value which causes all - the codepoints with primary values that are less or equal - than the variable top to be shifted when alternate handling - is set to Shifted. - - ***********************************************************************/ - - void setVariableTop (UStringView t) - { - UErrorCode e; - - ucol_setVariableTop (handle, t.get.ptr, t.len, e); - testError (e, "failed to set variable-top"); - } - - /*********************************************************************** - - Sets the variable top to a collation element value - supplied.Variable top is set to the upper 16 bits. - Lower 16 bits are ignored. - - ***********************************************************************/ - - void setVariableTop (uint x) - { - UErrorCode e; - - ucol_restoreVariableTop (handle, x, e); - testError (e, "failed to restore variable-top"); - } - - /*********************************************************************** - - Gets the variable top value of this Collator. Lower 16 bits - are undefined and should be ignored. - - ***********************************************************************/ - - uint getVariableTop () - { - UErrorCode e; - - uint x = ucol_getVariableTop (handle, e); - testError (e, "failed to get variable-top"); - return x; - } - - /*********************************************************************** - - Gets the locale name of the collator. If the collator is - instantiated from the rules, then this function will throw - an exception - - ***********************************************************************/ - - void getLocale (ULocale locale, ULocale.Type type) - { - UErrorCode e; - - locale.name = toArray (ucol_getLocaleByType (handle, type, e)); - if (isError(e) || locale.name is null) - exception ("failed to get collator locale"); - } - - /*********************************************************************** - - Get the Unicode set that contains all the characters and - sequences tailored in this collator. - - ***********************************************************************/ - - USet getTailoredSet () - { - UErrorCode e; - - Handle h = ucol_getTailoredSet (handle, e); - testError (e, "failed to get tailored set"); - return new USet (h); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - void function (Handle) ucol_close; - Handle function (char *loc, inout UErrorCode e) ucol_open; - Handle function (wchar* rules, uint rulesLength, AttributeValue normalizationMode, Strength strength, UParseError *parseError, inout UErrorCode e) ucol_openRules; - Handle function (char *definition, byte forceDefaults, UParseError *parseError, inout UErrorCode e) ucol_openFromShortString; - uint function (Handle, Handle conts, inout UErrorCode e) ucol_getContractions; - int function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_strcoll; - byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greater; - byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greaterOrEqual; - byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_equal; - Strength function (Handle) ucol_getStrength; - void function (Handle, Strength strength) ucol_setStrength; - uint function (char *objLoc, char *dispLoc, wchar* result, uint resultLength, inout UErrorCode e) ucol_getDisplayName; - uint function (Handle, char *locale, char *buffer, uint capacity, inout UErrorCode e) ucol_getShortDefinitionString; - uint function (char *source, char *destination, uint capacity, UParseError *parseError, inout UErrorCode e) ucol_normalizeShortDefinitionString; - uint function (Handle, wchar* source, uint sourceLength, ubyte *result, uint resultLength) ucol_getSortKey; - uint function (ubyte *source, uint sourceLength, BoundMode boundType, uint noOfLevels, ubyte *result, uint resultLength, inout UErrorCode e) ucol_getBound; - void function (Handle, Version info) ucol_getVersion; - void function (Handle, Version info) ucol_getUCAVersion; - uint function (ubyte *src1, uint src1Length, ubyte *src2, uint src2Length, ubyte *dest, uint destCapacity) ucol_mergeSortkeys; - void function (Handle, Attribute attr, AttributeValue value, inout UErrorCode e) ucol_setAttribute; - AttributeValue function (Handle, Attribute attr, inout UErrorCode e) ucol_getAttribute; - uint function (Handle, wchar* varTop, uint len, inout UErrorCode e) ucol_setVariableTop; - uint function (Handle, inout UErrorCode e) ucol_getVariableTop; - void function (Handle, uint varTop, inout UErrorCode e) ucol_restoreVariableTop; - uint function (Handle, RuleOption delta, wchar* buffer, uint bufferLen) ucol_getRulesEx; - char* function (Handle, ULocale.Type type, inout UErrorCode e) ucol_getLocaleByType; - Handle function (Handle, inout UErrorCode e) ucol_getTailoredSet; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ucol_open, "ucol_open"}, - {cast(void**) &ucol_close, "ucol_close"}, - {cast(void**) &ucol_openRules, "ucol_openRules"}, - {cast(void**) &ucol_openFromShortString, "ucol_openFromShortString"}, - {cast(void**) &ucol_getContractions, "ucol_getContractions"}, - {cast(void**) &ucol_strcoll, "ucol_strcoll"}, - {cast(void**) &ucol_greater, "ucol_greater"}, - {cast(void**) &ucol_greaterOrEqual, "ucol_greaterOrEqual"}, - {cast(void**) &ucol_equal, "ucol_equal"}, - {cast(void**) &ucol_getStrength, "ucol_getStrength"}, - {cast(void**) &ucol_setStrength, "ucol_setStrength"}, - {cast(void**) &ucol_getDisplayName, "ucol_getDisplayName"}, - {cast(void**) &ucol_getShortDefinitionString, "ucol_getShortDefinitionString"}, - {cast(void**) &ucol_normalizeShortDefinitionString, "ucol_normalizeShortDefinitionString"}, - {cast(void**) &ucol_getSortKey, "ucol_getSortKey"}, - {cast(void**) &ucol_getBound, "ucol_getBound"}, - {cast(void**) &ucol_getVersion, "ucol_getVersion"}, - {cast(void**) &ucol_getUCAVersion, "ucol_getUCAVersion"}, - {cast(void**) &ucol_mergeSortkeys, "ucol_mergeSortkeys"}, - {cast(void**) &ucol_setAttribute, "ucol_setAttribute"}, - {cast(void**) &ucol_getAttribute, "ucol_getAttribute"}, - {cast(void**) &ucol_setVariableTop, "ucol_setVariableTop"}, - {cast(void**) &ucol_getVariableTop, "ucol_getVariableTop"}, - {cast(void**) &ucol_restoreVariableTop, "ucol_restoreVariableTop"}, - {cast(void**) &ucol_getRulesEx, "ucol_getRulesEx"}, - {cast(void**) &ucol_getLocaleByType, "ucol_getLocaleByType"}, - {cast(void**) &ucol_getTailoredSet, "ucol_getTailoredSet"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UConverter.d --- a/base/src/java/mangoicu/UConverter.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,764 +0,0 @@ -/******************************************************************************* - - @file UConverter.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, October 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UConverter; - -private import java.mangoicu.ICU; - -/******************************************************************************* - -*******************************************************************************/ - -struct UAdjust // used with encode() & decode() methods -{ - uint input, // how much was read from the input - output; // how much was written to the output -} - -/******************************************************************************* - -*******************************************************************************/ - -interface ITranscoder -{ - void reset (); - - bool convert (void[] input, void[] output, inout UAdjust x, bool flush); -} - -/******************************************************************************* - - This API is used to convert codepage or character encoded data to - and from UTF-16. You can open a converter with ucnv_open(). With - that converter, you can get its properties, set options, convert - your data and close the converter. - - Since many software programs recogize different converter names - for different types of converters, there are other functions in - this API to iterate over the converter aliases. - - See - this page for full details. - -*******************************************************************************/ - -class UConverter : ICU -{ - private Handle handle; - - - - /*********************************************************************** - - Creates a UConverter object with the names specified as a - string. - - The actual name will be resolved with the alias file using - a case-insensitive string comparison that ignores delimiters - '-', '_', and ' ' (dash, underscore, and space). E.g., the - names "UTF8", "utf-8", and "Utf 8" are all equivalent. If null - is passed for the converter name, it will create one with the - getDefaultName() return value. - - A converter name may contain options like a locale specification - to control the specific behavior of the converter instantiated. - The meaning of the options depends on the particular converter: - if an option is not defined for or recognized, it is ignored. - - Options are appended to the converter name string, with an - OptionSepChar between the name and the first option and also - between adjacent options. - - The conversion behavior and names can vary between platforms, - and ICU may convert some characters differently from other - platforms. Details on this topic are in the User's Guide. - - ***********************************************************************/ - - this (char[] name) - { - UErrorCode e; - - handle = ucnv_open (toString (name), e); - if (isError (e)) - exception ("failed to create converter for '"~name~"'"); - } - - /*********************************************************************** - - Deletes the unicode converter and releases resources - associated with just this instance. Does not free up - shared converter tables. - - ***********************************************************************/ - - ~this () - { - ucnv_close (handle); - } - - /*********************************************************************** - - Do a fuzzy compare of two converter/alias names. The - comparison is case-insensitive. It also ignores the - characters '-', '_', and ' ' (dash, underscore, and space). - Thus the strings "UTF-8", "utf_8", and "Utf 8" are exactly - equivalent - - ***********************************************************************/ - - static final int compareNames (char[] a, char[] b) - { - return ucnv_compareNames (toString(a), toString(b)); - } - - /*********************************************************************** - - Resets the state of this converter to the default state. - - This is used in the case of an error, to restart a - conversion from a known default state. It will also - empty the internal output buffers. - - ***********************************************************************/ - - void reset () - { - ucnv_reset (handle); - } - - /*********************************************************************** - - Resets the from-Unicode part of this converter state to the - default state. - - This is used in the case of an error to restart a conversion - from Unicode to a known default state. It will also empty the - internal output buffers used for the conversion from Unicode - codepoints. - - ***********************************************************************/ - - void resetDecoder () - { - ucnv_resetToUnicode (handle); - } - - /*********************************************************************** - - Resets the from-Unicode part of this converter state to the - default state. - - This is used in the case of an error to restart a conversion - from Unicode to a known default state. It will also empty the - internal output buffers used for the conversion from Unicode - codepoints. - - ***********************************************************************/ - - void resetEncoder () - { - ucnv_resetFromUnicode (handle); - } - - /*********************************************************************** - - Returns the maximum number of bytes that are output per - UChar in conversion from Unicode using this converter. - - The returned number can be used to calculate the size of - a target buffer for conversion from Unicode. - - This number may not be the same as the maximum number of - bytes per "conversion unit". In other words, it may not - be the intuitively expected number of bytes per character - that would be published for a charset, and may not fulfill - any other purpose than the allocation of an output buffer - of guaranteed sufficient size for a given input length and - converter. - - Examples for special cases that are taken into account: - - * Supplementary code points may convert to more bytes than - BMP code points. This function returns bytes per UChar - (UTF-16 code unit), not per Unicode code point, for efficient - buffer allocation. - * State-shifting output (SI/SO, escapes, etc.) from stateful - converters. - * When m input UChars are converted to n output bytes, then - the maximum m/n is taken into account. - - The number returned here does not take into account: - - * callbacks which output more than one charset character - sequence per call, like escape callbacks - * initial and final non-character bytes that are output by - some converters (automatic BOMs, initial escape sequence, - final SI, etc.) - - Examples for returned values: - - * SBCS charsets: 1 - * Shift-JIS: 2 - * UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted) - * UTF-8: 3 (3 per BMP, 4 per surrogate _pair_) - * EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS) - * ISO-2022: 3 (always outputs UTF-8) - * ISO-2022-JP: 6 (4-byte escape sequences + DBCS) - * ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 - + DBCS) - - ***********************************************************************/ - - ubyte getMaxCharSize () - { - return ucnv_getMaxCharSize (handle); - } - - /*********************************************************************** - - Returns the minimum byte length for characters in this - codepage. This is usually either 1 or 2. - - ***********************************************************************/ - - ubyte getMinCharSize () - { - return ucnv_getMinCharSize (handle); - } - - /*********************************************************************** - - Gets the internal, canonical name of the converter (zero- - terminated). - - ***********************************************************************/ - - char[] getName () - { - UErrorCode e; - - char[] name = toArray (ucnv_getName (handle, e)); - testError (e, "failed to get converter name"); - return name; - } - - /*********************************************************************** - - Determines if the converter contains ambiguous mappings of - the same character or not - - ***********************************************************************/ - - bool isAmbiguous () - { - return cast(bool) ucnv_isAmbiguous (handle); - } - - /*********************************************************************** - - Detects Unicode signature byte sequences at the start - of the byte stream and returns the charset name of the - indicated Unicode charset. A null is returned where no - Unicode signature is recognized. - - A caller can create a UConverter using the charset name. - The first code unit (wchar) from the start of the stream - will be U+FEFF (the Unicode BOM/signature character) and - can usually be ignored. - - ***********************************************************************/ - - static final char[] detectSignature (void[] input) - { - UErrorCode e; - uint len; - char* name; - - name = ucnv_detectUnicodeSignature (input.ptr, input.length, len, e); - if (name == null || isError (e)) - return null; - return toArray (name); - } - - /*********************************************************************** - - Converts an array of unicode characters to an array of - codepage characters. - - This function is optimized for converting a continuous - stream of data in buffer-sized chunks, where the entire - source and target does not fit in available buffers. - - The source pointer is an in/out parameter. It starts out - pointing where the conversion is to begin, and ends up - pointing after the last UChar consumed. - - Target similarly starts out pointer at the first available - byte in the output buffer, and ends up pointing after the - last byte written to the output. - - The converter always attempts to consume the entire source - buffer, unless (1.) the target buffer is full, or (2.) a - failing error is returned from the current callback function. - When a successful error status has been returned, it means - that all of the source buffer has been consumed. At that - point, the caller should reset the source and sourceLimit - pointers to point to the next chunk. - - At the end of the stream (flush==true), the input is completely - consumed when *source==sourceLimit and no error code is set. - The converter object is then automatically reset by this - function. (This means that a converter need not be reset - explicitly between data streams if it finishes the previous - stream without errors.) - - This is a stateful conversion. Additionally, even when all - source data has been consumed, some data may be in the - converters' internal state. Call this function repeatedly, - updating the target pointers with the next empty chunk of - target in case of a U_BUFFER_OVERFLOW_ERROR, and updating - the source pointers with the next chunk of source when a - successful error status is returned, until there are no more - chunks of source data. - - Parameters: - - converter the Unicode converter - target I/O parameter. Input : Points to the - beginning of the buffer to copy codepage - characters to. Output : points to after - the last codepage character copied to - target. - targetLimit the pointer just after last of the - target buffer - source I/O parameter, pointer to pointer to - the source Unicode character buffer. - sourceLimit the pointer just after the last of - the source buffer - offsets if NULL is passed, nothing will happen - to it, otherwise it needs to have the - same number of allocated cells as target. - Will fill in offsets from target to source - pointer e.g: offsets[3] is equal to 6, it - means that the target[3] was a result of - transcoding source[6] For output data - carried across calls, and other data - without a specific source character - (such as from escape sequences or - callbacks) -1 will be placed for offsets. - flush set to TRUE if the current source buffer - is the last available chunk of the source, - FALSE otherwise. Note that if a failing - status is returned, this function may - have to be called multiple times with - flush set to TRUE until the source buffer - is consumed. - - ***********************************************************************/ - - bool encode (wchar[] input, void[] output, inout UAdjust x, bool flush) - { - UErrorCode e; - wchar* src = input.ptr; - void* dst = output.ptr; - wchar* srcLimit = src + input.length; - void* dstLimit = dst + output.length; - - ucnv_fromUnicode (handle, &dst, dstLimit, &src, srcLimit, null, flush, e); - x.input = src - input.ptr; - x.output = dst - output.ptr; - - if (e == e.BufferOverflow) - return true; - - testError (e, "failed to encode"); - return false; - } - - /*********************************************************************** - - Encode the Unicode string into a codepage string. - - This function is a more convenient but less powerful version - of encode(). It is only useful for whole strings, not - for streaming conversion. The maximum output buffer capacity - required (barring output from callbacks) should be calculated - using getMaxCharSize(). - - ***********************************************************************/ - - uint encode (wchar[] input, void[] output) - { - UErrorCode e; - uint len; - - len = ucnv_fromUChars (handle, output.ptr, output.length, input.ptr, input.length, e); - testError (e, "failed to encode"); - return len; - } - - /*********************************************************************** - - Converts a buffer of codepage bytes into an array of unicode - UChars characters. - - This function is optimized for converting a continuous stream - of data in buffer-sized chunks, where the entire source and - target does not fit in available buffers. - - The source pointer is an in/out parameter. It starts out pointing - where the conversion is to begin, and ends up pointing after the - last byte of source consumed. - - Target similarly starts out pointer at the first available UChar - in the output buffer, and ends up pointing after the last UChar - written to the output. It does NOT necessarily keep UChar sequences - together. - - The converter always attempts to consume the entire source buffer, - unless (1.) the target buffer is full, or (2.) a failing error is - returned from the current callback function. When a successful - error status has been returned, it means that all of the source - buffer has been consumed. At that point, the caller should reset - the source and sourceLimit pointers to point to the next chunk. - - At the end of the stream (flush==true), the input is completely - consumed when *source==sourceLimit and no error code is set The - converter object is then automatically reset by this function. - (This means that a converter need not be reset explicitly between - data streams if it finishes the previous stream without errors.) - - This is a stateful conversion. Additionally, even when all source - data has been consumed, some data may be in the converters' internal - state. Call this function repeatedly, updating the target pointers - with the next empty chunk of target in case of a BufferOverflow, and - updating the source pointers with the next chunk of source when a - successful error status is returned, until there are no more chunks - of source data. - - Parameters: - converter the Unicode converter - target I/O parameter. Input : Points to the beginning - of the buffer to copy UChars into. Output : - points to after the last UChar copied. - targetLimit the pointer just after the end of the target - buffer - source I/O parameter, pointer to pointer to the source - codepage buffer. - sourceLimit the pointer to the byte after the end of the - source buffer - offsets if NULL is passed, nothing will happen to - it, otherwise it needs to have the same - number of allocated cells as target. Will - fill in offsets from target to source pointer - e.g: offsets[3] is equal to 6, it means that - the target[3] was a result of transcoding - source[6] For output data carried across - calls, and other data without a specific - source character (such as from escape - sequences or callbacks) -1 will be placed - for offsets. - flush set to true if the current source buffer - is the last available chunk of the source, - false otherwise. Note that if a failing - status is returned, this function may have - to be called multiple times with flush set - to true until the source buffer is consumed. - - ***********************************************************************/ - - bool decode (void[] input, wchar[] output, inout UAdjust x, bool flush) - { - UErrorCode e; - void* src = input.ptr; - wchar* dst = output.ptr; - void* srcLimit = src + input.length; - wchar* dstLimit = dst + output.length; - - ucnv_toUnicode (handle, &dst, dstLimit, &src, srcLimit, null, flush, e); - x.input = src - input.ptr; - x.output = dst - output.ptr; - - if (e == e.BufferOverflow) - return true; - - testError (e, "failed to decode"); - return false; - } - - /*********************************************************************** - - Decode the codepage string into a Unicode string. - - This function is a more convenient but less powerful version - of decode(). It is only useful for whole strings, not for - streaming conversion. The maximum output buffer capacity - required (barring output from callbacks) will be 2*src.length - (each char may be converted into a surrogate pair) - - ***********************************************************************/ - - uint decode (void[] input, wchar[] output) - { - UErrorCode e; - uint len; - - len = ucnv_toUChars (handle, output.ptr, output.length, input.ptr, input.length, e); - testError (e, "failed to decode"); - return len; - } - - /********************************************************************** - - Iterate over the available converter names - - **********************************************************************/ - - static int opApply (int delegate(inout char[] element) dg) - { - char[] name; - int result; - uint count = ucnv_countAvailable (); - - for (uint i=0; i < count; ++i) - { - name = toArray (ucnv_getAvailableName (i)); - result = dg (name); - if (result) - break; - } - return result; - } - - /*********************************************************************** - - ***********************************************************************/ - - ITranscoder createTranscoder (UConverter dst) - { - return new UTranscoder (this, dst); - } - - /********************************************************************** - - **********************************************************************/ - - private class UTranscoder : ITranscoder - { - private UConverter cSrc, - cDst; - private bool clear = true; - - /************************************************************** - - **************************************************************/ - - this (UConverter src, UConverter dst) - { - cSrc = src; - cDst = dst; - } - - /************************************************************** - - **************************************************************/ - - void reset () - { - clear = true; - } - - /************************************************************** - - **************************************************************/ - - bool convert (void[] input, void[] output, inout UAdjust x, bool flush) - { - UErrorCode e; - void* src = input.ptr; - void* dst = output.ptr; - void* srcLimit = src + input.length; - void* dstLimit = dst + output.length; - - ucnv_convertEx (cDst.handle, cSrc.handle, &dst, dstLimit, - &src, srcLimit, null, null, null, null, - clear, flush, e); - clear = false; - x.input = src - input.ptr; - x.output = dst - output.ptr; - - if (e == e.BufferOverflow) - return true; - - testError (e, "failed to decode"); - return false; - } - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - int function (char*, char*) ucnv_compareNames; - Handle function (char*, inout UErrorCode) ucnv_open; - char* function (void*, uint, inout uint, inout UErrorCode) ucnv_detectUnicodeSignature; - void function (Handle) ucnv_close; - void function (Handle) ucnv_reset; - int function (Handle) ucnv_resetToUnicode; - int function (Handle) ucnv_resetFromUnicode; - ubyte function (Handle) ucnv_getMaxCharSize; - ubyte function (Handle) ucnv_getMinCharSize; - char* function (Handle, inout UErrorCode) ucnv_getName; - uint function (Handle, wchar*, uint, void*, uint, inout UErrorCode) ucnv_toUChars; - uint function (Handle, void*, uint, wchar*, uint, inout UErrorCode) ucnv_fromUChars; - void function (Handle, void**, void*, wchar**, wchar*, int*, ubyte, inout UErrorCode) ucnv_fromUnicode; - void function (Handle, wchar**, wchar*, void**, void*, int*, ubyte, inout UErrorCode) ucnv_toUnicode; - void function (Handle, Handle, void**, void*, void**, void*, wchar*, wchar*, wchar*, wchar*, ubyte, ubyte, inout UErrorCode) ucnv_convertEx; - ubyte function (Handle) ucnv_isAmbiguous; - char* function (uint) ucnv_getAvailableName; - uint function () ucnv_countAvailable; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ucnv_open, "ucnv_open"}, - {cast(void**) &ucnv_close, "ucnv_close"}, - {cast(void**) &ucnv_reset, "ucnv_reset"}, - {cast(void**) &ucnv_resetToUnicode, "ucnv_resetToUnicode"}, - {cast(void**) &ucnv_resetFromUnicode, "ucnv_resetFromUnicode"}, - {cast(void**) &ucnv_compareNames, "ucnv_compareNames"}, - {cast(void**) &ucnv_getMaxCharSize, "ucnv_getMaxCharSize"}, - {cast(void**) &ucnv_getMinCharSize, "ucnv_getMinCharSize"}, - {cast(void**) &ucnv_getName, "ucnv_getName"}, - {cast(void**) &ucnv_detectUnicodeSignature, "ucnv_detectUnicodeSignature"}, - {cast(void**) &ucnv_toUChars, "ucnv_toUChars"}, - {cast(void**) &ucnv_fromUChars, "ucnv_fromUChars"}, - {cast(void**) &ucnv_toUnicode, "ucnv_toUnicode"}, - {cast(void**) &ucnv_fromUnicode, "ucnv_fromUnicode"}, - {cast(void**) &ucnv_convertEx, "ucnv_convertEx"}, - {cast(void**) &ucnv_isAmbiguous, "ucnv_isAmbiguous"}, - {cast(void**) &ucnv_countAvailable, "ucnv_countAvailable"}, - {cast(void**) &ucnv_getAvailableName, "ucnv_getAvailableName"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); -/+ - foreach (char[] name; UConverter) - printf ("%.*s\n", name); -+/ - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UDateFormat.d --- a/base/src/java/mangoicu/UDateFormat.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,426 +0,0 @@ -/******************************************************************************* - - @file UDateFormat.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UDateFormat; - -private import java.mangoicu.ICU, - java.mangoicu.UString, - java.mangoicu.UCalendar, - java.mangoicu.UNumberFormat; - -/******************************************************************************* - - UDateFormat consists of functions that convert dates and - times from their internal representations to textual form and back - again in a language-independent manner. Converting from the internal - representation (milliseconds since midnight, January 1, 1970) to text - is known as "formatting," and converting from text to millis is known - as "parsing." We currently define one concrete structure UDateFormat, - which can handle pretty much all normal date formatting and parsing - actions. - - UDateFormat helps you to format and parse dates for any locale. - Your code can be completely independent of the locale conventions - for months, days of the week, or even the calendar format: lunar - vs. solar. - - See - this page for full details. - -*******************************************************************************/ - -private class UDateFormat : ICU -{ - private Handle handle; - - alias UCalendar.UDate UDate; - - typedef void* UFieldPos; - - public enum Style - { - Full, - Long, - Medium, - Short, - Default = Medium, - None = -1, - Ignore = -2 - }; - - public enum Field - { - EraField = 0, - YearField = 1, - MonthField = 2, - DateField = 3, - HourOfDay1Field = 4, - HourOfDay0Field = 5, - MinuteField = 6, - SecondField = 7, - FractionalSecondField = 8, - DayOfWeekField = 9, - DayOfYearField = 10, - DayOfWeekInMonthField = 11, - WeekOfYearField = 12, - WeekOfMonthField = 13, - AmPmField = 14, - Hour1Field = 15, - Hour0Field = 16, - TimezoneField = 17, - YearWoyField = 18, - DowLocalField = 19, - ExtendedYearField = 20, - JulianDayField = 21, - MillisecondsInDayField = 22, - TimezoneRfcField = 23, - FieldCount = 24 - }; - - private enum Symbol - { - Eras, - Months, - ShortMonths, - Weekdays, - ShortWeekdays, - AmPms, - LocalizedChars - }; - - - /*********************************************************************** - - Open a new UDateFormat for formatting and parsing dates - and time. If a pattern is not specified, an appropriate - one for the given locale will be used. - - ***********************************************************************/ - - this (Style time, Style date, inout ULocale locale, inout UTimeZone tz, UStringView pattern=null) - { - UErrorCode e; - wchar* p; - uint c; - - if (pattern) - p = pattern.get.ptr, c = pattern.length; - handle = udat_open (time, date, ICU.toString(locale.name), cast(wchar*)tz.name.ptr, tz.name.length, p, c, e); - testError (e, "failed to create DateFormat"); - } - - /*********************************************************************** - - Close a UDateFormat - - ***********************************************************************/ - - ~this () - { - udat_close (handle); - } - - /*********************************************************************** - - Format a date using an UDateFormat - - ***********************************************************************/ - - void format (UString dst, UDate date, UFieldPos p = null) - { - uint fmat (wchar* result, uint len, inout UErrorCode e) - { - return udat_format (handle, date, result, len, p, e); - } - - dst.format (&fmat, "date format failed"); - } - - /*********************************************************************** - - Parse a string into an date/time using a UDateFormat - - ***********************************************************************/ - - UDate parse (UStringView src, uint* index=null) - { - UErrorCode e; - - UDate x = udat_parse (handle, src.content.ptr, src.len, index, e); - testError (e, "failed to parse date"); - return x; - } - - /*********************************************************************** - - Set the UCalendar associated with an UDateFormat. A - UDateFormat uses a UCalendar to convert a raw value - to, for example, the day of the week. - - ***********************************************************************/ - - void setCalendar (UCalendar c) - { - udat_setCalendar (handle, c.handle); - } - - /*********************************************************************** - - Get the UCalendar associated with this UDateFormat - - ***********************************************************************/ - - UCalendar getCalendar () - { - Handle h = udat_getCalendar (handle); - return new UCalendar (h); - } - - /*********************************************************************** - - Set the UNumberFormat associated with an UDateFormat.A - UDateFormat uses a UNumberFormat to format numbers within - a date, for example the day number. - - ***********************************************************************/ - - void setNumberFormat (UNumberFormat n) - { - udat_setCalendar (handle, n.handle); - } - - /*********************************************************************** - - Get the year relative to which all 2-digit years are - interpreted - - ***********************************************************************/ - - UDate getTwoDigitYearStart () - { - UErrorCode e; - - UDate x = udat_get2DigitYearStart (handle, e); - testError (e, "failed to get two digit year start"); - return x; - } - - /*********************************************************************** - - Set the year relative to which all 2-digit years are - interpreted - - ***********************************************************************/ - - void setTwoDigitYearStart (UDate start) - { - UErrorCode e; - - udat_set2DigitYearStart (handle, start, e); - testError (e, "failed to set two digit year start"); - } - - /*********************************************************************** - - Extract the pattern from a UDateFormat - - ***********************************************************************/ - - void getPattern (UString dst, bool localize) - { - uint fmat (wchar* result, uint len, inout UErrorCode e) - { - return udat_toPattern (handle, localize, result, len, e); - } - - dst.format (&fmat, "failed to retrieve date format pattern"); - } - - /*********************************************************************** - - Set the pattern for a UDateFormat - - ***********************************************************************/ - - void setPattern (UStringView pattern, bool localized) - { - udat_applyPattern (handle, localized, pattern.get.ptr, pattern.length); - } - - /*********************************************************************** - - Specify whether an UDateFormat will perform lenient parsing. - - ***********************************************************************/ - - void setLenient (bool yes) - { - udat_setLenient (handle, yes); - } - - /*********************************************************************** - - Determine if an UDateFormat will perform lenient parsing. - - ***********************************************************************/ - - bool isLenient () - { - return udat_isLenient (handle) != 0; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (uint, uint, char*, wchar*, uint, wchar*, uint, inout UErrorCode) udat_open; - void function (Handle) udat_close; - uint function (Handle, UDate, wchar*, uint, UFieldPos, inout UErrorCode) udat_format; - UDate function (Handle, wchar*, uint, uint*, inout UErrorCode) udat_parse; - void function (Handle, Handle) udat_setCalendar; - void function (Handle, Handle) udat_setNumberFormat; - UDate function (Handle, inout UErrorCode) udat_get2DigitYearStart; - void function (Handle, UDate, inout UErrorCode) udat_set2DigitYearStart; - uint function (Handle, byte, wchar*, uint, inout UErrorCode) udat_toPattern; - void function (Handle, byte, wchar*, uint) udat_applyPattern; - void function (Handle, byte) udat_setLenient; - byte function (Handle) udat_isLenient; - Handle function (Handle) udat_getCalendar; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &udat_open, "udat_open"}, - {cast(void**) &udat_close, "udat_close"}, - {cast(void**) &udat_format, "udat_format"}, - {cast(void**) &udat_parse, "udat_parse"}, - {cast(void**) &udat_setCalendar, "udat_setCalendar"}, - {cast(void**) &udat_setNumberFormat, "udat_setNumberFormat"}, - {cast(void**) &udat_get2DigitYearStart, "udat_get2DigitYearStart"}, - {cast(void**) &udat_set2DigitYearStart, "udat_set2DigitYearStart"}, - {cast(void**) &udat_toPattern, "udat_toPattern"}, - {cast(void**) &udat_applyPattern, "udat_applyPattern"}, - {cast(void**) &udat_setLenient, "udat_setLenient"}, - {cast(void**) &udat_isLenient, "udat_isLenient"}, - {cast(void**) &udat_getCalendar, "udat_getCalendar"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - - - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UDomainName.d --- a/base/src/java/mangoicu/UDomainName.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,322 +0,0 @@ -/******************************************************************************* - - @file UDomainName.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UDomainName; - -private import java.mangoicu.ICU, - java.mangoicu.UString; - -/******************************************************************************* - - UIDNA API implements the IDNA protocol as defined in the - IDNA RFC (http://www.ietf.org/rfc/rfc3490.txt). - - The RFC defines 2 operations: toAscii and toUnicode. Domain - labels containing non-ASCII code points are required to be - processed by toAscii operation before passing it to resolver - libraries. Domain names that are obtained from resolver - libraries are required to be processed by toUnicode operation - before displaying the domain name to the user. IDNA requires - that implementations process input strings with Nameprep - (http://www.ietf.org/rfc/rfc3491.txt), which is a profile of - Stringprep (http://www.ietf.org/rfc/rfc3454.txt), and then with - Punycode (http://www.ietf.org/rfc/rfc3492.txt). Implementations - of IDNA MUST fully implement Nameprep and Punycode; neither - Nameprep nor Punycode are optional. - - The input and output of toAscii() and ToUnicode() operations are - Unicode and are designed to be chainable, i.e., applying toAscii() - or toUnicode() operations multiple times to an input string will - yield the same result as applying the operation once. - - See - this page for full details. - -*******************************************************************************/ - -class UDomainName : ICU -{ - private UStringView text; - private Handle handle; - - enum Options - { - Strict, - Lenient, - Std3 - } - - - /*********************************************************************** - - - ***********************************************************************/ - - this (UStringView text) - { - this.text = text; - } - - /*********************************************************************** - - This function implements the ToASCII operation as - defined in the IDNA RFC. - - This operation is done on single labels before sending - it to something that expects ASCII names. A label is an - individual part of a domain name. Labels are usually - separated by dots; e.g." "www.example.com" is composed - of 3 labels "www","example", and "com". - - ***********************************************************************/ - - void toAscii (UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uidna_toASCII (text.get.ptr, text.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to convert IDN to ASCII"); - } - - /*********************************************************************** - - This function implements the ToUnicode operation as - defined in the IDNA RFC. - - This operation is done on single labels before sending - it to something that expects Unicode names. A label is - an individual part of a domain name. Labels are usually - separated by dots; for e.g." "www.example.com" is composed - of 3 labels "www","example", and "com". - - ***********************************************************************/ - - void toUnicode (UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uidna_toUnicode (text.get.ptr, text.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to convert IDN to Unicode"); - } - - /*********************************************************************** - - Convenience function that implements the IDNToASCII - operation as defined in the IDNA RFC. - - This operation is done on complete domain names, e.g: - "www.example.com". It is important to note that this - operation can fail. If it fails, then the input domain - name cannot be used as an Internationalized Domain Name - and the application should have methods defined to deal - with the failure. - - Note: IDNA RFC specifies that a conformant application - should divide a domain name into separate labels, decide - whether to apply allowUnassigned and useSTD3ASCIIRules - on each, and then convert. This function does not offer - that level of granularity. The options once set will apply - to all labels in the domain name - - ***********************************************************************/ - - void IdnToAscii (UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uidna_IDNToASCII (text.get.ptr, text.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to convert IDN to ASCII"); - } - - /*********************************************************************** - - Convenience function that implements the IDNToUnicode - operation as defined in the IDNA RFC. - - This operation is done on complete domain names, e.g: - "www.example.com". - - Note: IDNA RFC specifies that a conformant application - should divide a domain name into separate labels, decide - whether to apply allowUnassigned and useSTD3ASCIIRules - on each, and then convert. This function does not offer - that level of granularity. The options once set will apply - to all labels in the domain name - - ***********************************************************************/ - - void IdnToUnicode (UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uidna_IDNToUnicode (text.get.ptr, text.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to convert IDN to Unicode"); - } - - /*********************************************************************** - - Compare two IDN strings for equivalence. - - This function splits the domain names into labels and - compares them. According to IDN RFC, whenever two labels - are compared, they are considered equal if and only if - their ASCII forms (obtained by applying toASCII) match - using an case-insensitive ASCII comparison. Two domain - names are considered a match if and only if all labels - match regardless of whether label separators match - - ***********************************************************************/ - - int compare (UString other, Options o = Options.Strict) - { - UErrorCode e; - int i = uidna_compare (text.get.ptr, text.len, other.get.ptr, other.len, o, e); - testError (e, "failed to compare IDN strings"); - return i; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_toASCII; - uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_toUnicode; - uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_IDNToASCII; - uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_IDNToUnicode; - int function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) uidna_compare; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uidna_toASCII, "uidna_toASCII"}, - {cast(void**) &uidna_toUnicode, "uidna_toUnicode"}, - {cast(void**) &uidna_IDNToASCII, "uidna_IDNToASCII"}, - {cast(void**) &uidna_IDNToUnicode, "uidna_IDNToUnicode"}, - {cast(void**) &uidna_compare, "uidna_compare"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UEnumeration.d --- a/base/src/java/mangoicu/UEnumeration.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,264 +0,0 @@ -/******************************************************************************* - - @file UEnumeration.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UEnumeration; - -private import java.mangoicu.ICU; - -/******************************************************************************* - - UEnumeration is returned by a number of ICU classes, for providing - access to such things as ULocale lists and so on, - -*******************************************************************************/ - -class UEnumeration : ICU -{ - package Handle handle; - - /*********************************************************************** - - ***********************************************************************/ - - this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Disposes of the storage used by a UEnumeration object - - ***********************************************************************/ - - ~this () - { - uenum_close (handle); - } - - /*********************************************************************** - - Returns the next element in the iterator's list. - - If there are no more elements, returns NULL. If the - iterator is out-of-sync with its service, status is - set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. - If the native service string is a UChar* string, it - is converted to char* with the invariant converter. - The result is terminated by (char)0. If the conversion - fails (because a character cannot be converted) then - status is set to U_INVARIANT_CONVERSION_ERROR and the - return value is undefined (but non-NULL). - - ***********************************************************************/ - - uint count () - { - UErrorCode e; - - uint x = uenum_count (handle, e); - testError (e, "enumeration out of sync"); - return x; - } - - /*********************************************************************** - - Resets the iterator to the current list of service IDs. - - This re-establishes sync with the service and rewinds - the iterator to start at the first element - - ***********************************************************************/ - - void reset () - { - ICU.UErrorCode e; - - uenum_reset (handle, e); - testError (e, "failed to reset enumeration"); - } - - /*********************************************************************** - - Returns the next element in the iterator's list. - - If there are no more elements, returns NULL. If the - iterator is out-of-sync with its service, status is - set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. - If the native service string is a char* string, it is - converted to UChar* with the invariant converter. - - ***********************************************************************/ - - bool next (out char[] dst) - { - ICU.UErrorCode e; - uint len; - - char* p = uenum_next (handle, &len, e); - testError (e, "failed to traverse enumeration"); - if (p) - return dst = p[0..len], true; - return false; - } - - /*********************************************************************** - - Returns the next element in the iterator's list. - - If there are no more elements, returns NULL. If the - iterator is out-of-sync with its service, status is - set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. - If the native service string is a char* string, it is - converted to UChar* with the invariant converter. - - ***********************************************************************/ - - bool next (inout wchar[] dst) - { - ICU.UErrorCode e; - uint len; - - wchar* p = uenum_unext (handle, &len, e); - testError (e, "failed to traverse enumeration"); - if (p) - return dst = p[0..len], true; - return false; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - void function (Handle) uenum_close; - uint function (Handle, inout UErrorCode) uenum_count; - void function (Handle, inout UErrorCode) uenum_reset; - char* function (Handle, uint*, inout UErrorCode) uenum_next; - wchar* function (Handle, uint*, inout UErrorCode) uenum_unext; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uenum_close, "uenum_close"}, - {cast(void**) &uenum_count, "uenum_count"}, - {cast(void**) &uenum_reset, "uenum_reset"}, - {cast(void**) &uenum_next, "uenum_next"}, - {cast(void**) &uenum_unext, "uenum_unext"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/ULocale.d --- a/base/src/java/mangoicu/ULocale.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,230 +0,0 @@ -/******************************************************************************* - - @file ULocale.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, October 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.ULocale; - -private import java.mangoicu.ICU; -private import java.lang.util; - -/******************************************************************************* - - Note that this is a struct rather than a class. This is so - that one can easily construct these on the stack, plus the - 'convenience' instances can be created statically. - -*******************************************************************************/ - -struct ULocale -{ - public CString name; - - /*********************************************************************** - - ***********************************************************************/ - - public static ULocale Root = {""}; - public static ULocale Default = {null}; - public static ULocale English = {"en"}; - public static ULocale Chinese = {"zh"}; - public static ULocale French = {"fr"}; - public static ULocale German = {"de"}; - public static ULocale Italian = {"it"}; - public static ULocale Japanese = {"ja"}; - public static ULocale Korean = {"ko"}; - public static ULocale SimplifiedChinese = {"zh_CN"}; - public static ULocale TraditionalChinese = {"zh_TW"}; - public static ULocale Canada = {"en_CA"}; - public static ULocale CanadaFrench = {"fr_CA"}; - public static ULocale China = {"zh_CN"}; - public static ULocale PRC = {"zh_CN"}; - public static ULocale France = {"fr_FR"}; - public static ULocale Germany = {"de_DE"}; - public static ULocale Italy = {"it_IT"}; - public static ULocale Japan = {"jp_JP"}; - public static ULocale Korea = {"ko_KR"}; - public static ULocale Taiwan = {"zh_TW"}; - public static ULocale UK = {"en_GB"}; - public static ULocale US = {"en_US"}; - - /*********************************************************************** - - ***********************************************************************/ - - public enum Type - { - Actual = 0, - Valid = 1, - Requested = 2, - } - - /*********************************************************************** - - ***********************************************************************/ - - public const uint LanguageCapacity = 12; - public const uint CountryCapacity = 4; - public const uint FullNameCapacity = 56; - public const uint ScriptCapacity = 6; - public const uint KeywordsCapacity = 50; - public const uint KeywordAndValuesCapacity = 100; - public const char KeywordItemSeparator = ':'; - public const char KeywordSeparator = '@'; - public const char KeywordAssign = '='; - - - /*********************************************************************** - - ***********************************************************************/ - - static void getDefault (inout ULocale locale) - { - locale.name = ICU.toArray (uloc_getDefault()); - if (! locale.name) - ICU.exception ("failed to get default locale"); - } - - /*********************************************************************** - - ***********************************************************************/ - - static void setDefault (inout ULocale locale) - { - ICU.UErrorCode e; - - uloc_setDefault (ICU.toString(locale.name), e); - - if (ICU.isError (e)) - ICU.exception ("invalid locale '"~locale.name~"'"); - } - - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - char* function () uloc_getDefault; - void function (char*, inout ICU.UErrorCode) uloc_setDefault; - } - - /********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uloc_getDefault, "uloc_getDefault"}, - {cast(void**) &uloc_setDefault, "uloc_setDefault"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (ICU.icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UMessageFormat.d --- a/base/src/java/mangoicu/UMessageFormat.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,420 +0,0 @@ -/******************************************************************************* - - @file UMessageFormat.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UMessageFormat; - -private import java.mangoicu.ICU, - java.mangoicu.UString; - -public import java.mangoicu.ULocale; - -/******************************************************************************* - - Provides means to produce concatenated messages in language-neutral - way. Use this for all concatenations that show up to end users. Takes - a set of objects, formats them, then inserts the formatted strings into - the pattern at the appropriate places. - - See - this page for full details. - -*******************************************************************************/ - -class UMessageFormat : ICU -{ - private Handle handle; - - /*********************************************************************** - - Open a message formatter with given wchar[] and for the - given locale. - - ***********************************************************************/ - - this (wchar[] pattern, inout ULocale locale = ULocale.Default) - { - UErrorCode e; - - handle = umsg_open (pattern.ptr, pattern.length, toString(locale.name), null, e); - testError (e, "failed to open message formatter"); - } - - /*********************************************************************** - - Open a message formatter with given pattern and for the - given locale. - - ***********************************************************************/ - - this (UStringView pattern, inout ULocale locale = ULocale.Default) - { - this (pattern.get, locale); - } - - /*********************************************************************** - - Release message formatter - - ***********************************************************************/ - - ~this () - { - umsg_close (handle); - } - - /*********************************************************************** - - This locale is used for fetching default number or date - format information - - ***********************************************************************/ - - UMessageFormat setLocale (inout ULocale locale) - { - umsg_setLocale (handle, toString(locale.name)); - return this; - } - - /*********************************************************************** - - This locale is used for fetching default number or date - format information - - ***********************************************************************/ - - UMessageFormat getLocale (inout ULocale locale) - { - locale.name = toArray (umsg_getLocale (handle)); - return this; - } - - /*********************************************************************** - - Sets the pattern - - ***********************************************************************/ - - UMessageFormat setPattern (UStringView pattern) - { - UErrorCode e; - - umsg_applyPattern (handle, pattern.get.ptr, pattern.len, null, e); - testError (e, "failed to set formatter pattern"); - return this; - } - - /*********************************************************************** - - Gets the pattern - - ***********************************************************************/ - - UMessageFormat getPattern (UString s) - { - uint fmt (wchar* dst, uint length, inout UErrorCode e) - { - return umsg_toPattern (handle, dst, length, e); - } - - s.format (&fmt, "failed to get formatter pattern"); - return this; - } - - /*********************************************************************** - - This function may perform re-ordering of the arguments - depending on the locale. For all numeric arguments, double - is assumed unless the type is explicitly integer. All choice - format arguments must be of type double. - - ***********************************************************************/ - - UMessageFormat format (UString s, Args* list) - { - uint fmt (wchar* dst, uint length, inout UErrorCode e) - { - return umsg_vformat (handle, dst, length, list.args.ptr, e); - } - - s.format (&fmt, "failed to format pattern"); - return this; - } - - - /*********************************************************************** - - A typesafe list of arguments for the UMessageFormat.format() - method. This should be used in the following manner: - - @code - wchar[] format = "{0} {1, number, currency} {2, number, integer}"; - UMessageFormat msg = new UMessageFormat (format); - - msg.Args args; - msg.format (output, args.add("abc").add(152.0).add(456)); - @endcode - - Note that the argument order must follow that of the format - string, although the format string may dictate the ultimate - position of each argument. - - See http://oss.software.ibm.com/icu/apiref/umsg_8h.html for - details on the format string. - - @todo this will likely fail on certain CPU architectures. - - ***********************************************************************/ - - struct Args - { - private uint[32] args; - private uint index; - - /*************************************************************** - - ***************************************************************/ - - version( D_Version2 ){ - mixin( "invariant() { invariant_(); }"); - } - else{ - mixin( "invariant { invariant_(); }"); - } - private void invariant_(){ - assert (index < args.length); - } - - /*************************************************************** - - ***************************************************************/ - - Args* reset () - { - index = 0; - version(D_Version2){ - return &this; - } else { - return this; - } - } - - /*************************************************************** - - ***************************************************************/ - - Args* add (UStringView x) - { - args[index] = cast(uint) cast(wchar*) x.get(); - ++index; - version(D_Version2){ - return &this; - } else { - return this; - } - } - - /*************************************************************** - - ***************************************************************/ - - Args* add (wchar[] x) - { - args[index] = cast(uint) cast(wchar*) x; - ++index; - version(D_Version2){ - return &this; - } else { - return this; - } - } - - /*************************************************************** - - ***************************************************************/ - - Args* add (int x) - { - args[index] = x; - ++index; - version(D_Version2){ - return &this; - } else { - return this; - } - } - - /*************************************************************** - - ***************************************************************/ - - Args* add (double x) - { - *(cast(double*) &args[index]) = x; - index += 2; - version(D_Version2){ - return &this; - } else { - return this; - } - } - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, char*, void*, inout UErrorCode) umsg_open; - void function (Handle) umsg_close; - void function (Handle, char*) umsg_setLocale; - char* function (Handle) umsg_getLocale; - uint function (Handle, wchar*, uint, inout UErrorCode) umsg_toPattern; - void function (Handle, wchar*, uint, void*, inout UErrorCode) umsg_applyPattern; - uint function (Handle, wchar*, uint, void*, inout UErrorCode) umsg_vformat; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &umsg_open, "umsg_open"}, - {cast(void**) &umsg_close, "umsg_close"}, - {cast(void**) &umsg_setLocale, "umsg_setLocale"}, - {cast(void**) &umsg_getLocale, "umsg_getLocale"}, - {cast(void**) &umsg_toPattern, "umsg_toPattern"}, - {cast(void**) &umsg_applyPattern, "umsg_applyPattern"}, - {cast(void**) &umsg_vformat, "umsg_vformat"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - //test (); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } - - /*********************************************************************** - - ***********************************************************************/ - - //static void test() - //{ - // UString output = new UString(100); - // wchar[] format = "{0} {1, number, currency} {2, number, integer}"; - - // UMessageFormat msg = new UMessageFormat (format); - - // msg.Args args; - // msg.format (output, args.add("abc").add(152.0).add(456)); - //} -} - - - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UNormalize.d --- a/base/src/java/mangoicu/UNormalize.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,391 +0,0 @@ -/******************************************************************************* - - @file UNormalize.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, October 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UNormalize; - -private import java.mangoicu.ICU, - java.mangoicu.UString, - java.mangoicu.ULocale; - -/******************************************************************************* - - transforms Unicode text into an equivalent composed or - decomposed form, allowing for easier sorting and searching - of text. UNormalize supports the standard normalization forms - described in http://www.unicode.org/unicode/reports/tr15/ - - Characters with accents or other adornments can be encoded - in several different ways in Unicode. For example, take the - character A-acute. In Unicode, this can be encoded as a single - character (the "composed" form): - - 00C1 LATIN CAPITAL LETTER A WITH ACUTE - - or as two separate characters (the "decomposed" form): - - 0041 LATIN CAPITAL LETTER A 0301 COMBINING ACUTE ACCENT - - To a user of your program, however, both of these sequences - should be treated as the same "user-level" character "A with - acute accent". When you are searching or comparing text, you - must ensure that these two sequences are treated equivalently. - In addition, you must handle characters with more than one - accent. Sometimes the order of a character's combining accents - is significant, while in other cases accent sequences in different - orders are really equivalent. - - Similarly, the string "ffi" can be encoded as three separate - letters: - - 0066 LATIN SMALL LETTER F 0066 LATIN SMALL LETTER F - 0069 LATIN SMALL LETTER I - - or as the single character - - FB03 LATIN SMALL LIGATURE FFI - - The ffi ligature is not a distinct semantic character, and strictly - speaking it shouldn't be in Unicode at all, but it was included for - compatibility with existing character sets that already provided it. - The Unicode standard identifies such characters by giving them - "compatibility" decompositions into the corresponding semantic - characters. When sorting and searching, you will often want to use - these mappings. - - unorm_normalize helps solve these problems by transforming text into - the canonical composed and decomposed forms as shown in the first - example above. In addition, you can have it perform compatibility - decompositions so that you can treat compatibility characters the - same as their equivalents. Finally, UNormalize rearranges - accents into the proper canonical order, so that you do not have - to worry about accent rearrangement on your own. - - Form FCD, "Fast C or D", is also designed for collation. It allows - to work on strings that are not necessarily normalized with an - algorithm (like in collation) that works under "canonical closure", - i.e., it treats precomposed characters and their decomposed - equivalents the same. - - It is not a normalization form because it does not provide for - uniqueness of representation. Multiple strings may be canonically - equivalent (their NFDs are identical) and may all conform to FCD - without being identical themselves. - - The form is defined such that the "raw decomposition", the - recursive canonical decomposition of each character, results - in a string that is canonically ordered. This means that - precomposed characters are allowed for as long as their - decompositions do not need canonical reordering. - - Its advantage for a process like collation is that all NFD - and most NFC texts - and many unnormalized texts - already - conform to FCD and do not need to be normalized (NFD) for - such a process. The FCD quick check will return UNORM_YES - for most strings in practice. - - For more details on FCD see the collation design document: - http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm - - ICU collation performs either NFD or FCD normalization - automatically if normalization is turned on for the collator - object. Beyond collation and string search, normalized strings - may be useful for string equivalence comparisons, transliteration/ - transcription, unique representations, etc. - - The W3C generally recommends to exchange texts in NFC. Note also - that most legacy character encodings use only precomposed forms - and often do not encode any combining marks by themselves. For - conversion to such character encodings the Unicode text needs to - be normalized to NFC. For more usage examples, see the Unicode - Standard Annex. - - See - this page for full details. - - -*******************************************************************************/ - -class UNormalize : ICU -{ - enum Mode - { - None = 1, - NFD = 2, - NFKD = 3, - NFC = 4, - Default = NFC, - NFKC = 5, - FCD = 6, - Count - } - - enum Check - { - No, - Yes, - Maybe - } - - enum Options - { - None = 0x00, - Unicode32 = 0x20 - } - - /*********************************************************************** - - Normalize a string. The string will be normalized according - the specified normalization mode and options - - ***********************************************************************/ - - static void normalize (UStringView src, UString dst, Mode mode, Options o = Options.None) - { - uint fmt (wchar* dst, uint len, inout UErrorCode e) - { - return unorm_normalize (src.get.ptr, src.len, mode, o, dst, len, e); - } - - dst.format (&fmt, "failed to normalize"); - } - - /*********************************************************************** - - Performing quick check on a string, to quickly determine - if the string is in a particular normalization format. - - Three types of result can be returned: Yes, No or Maybe. - Result Yes indicates that the argument string is in the - desired normalized format, No determines that argument - string is not in the desired normalized format. A Maybe - result indicates that a more thorough check is required, - the user may have to put the string in its normalized - form and compare the results. - - ***********************************************************************/ - - static Check check (UStringView t, Mode mode, Options o = Options.None) - { - UErrorCode e; - - Check c = cast(Check) unorm_quickCheckWithOptions (t.get.ptr, t.len, mode, o, e); - testError (e, "failed to perform normalization check"); - return c; - } - - /*********************************************************************** - - Test if a string is in a given normalization form. - - Unlike check(), this function returns a definitive result, - never a "maybe". For NFD, NFKD, and FCD, both functions - work exactly the same. For NFC and NFKC where quickCheck - may return "maybe", this function will perform further - tests to arrive at a TRUE/FALSE result. - - ***********************************************************************/ - - static bool isNormalized (UStringView t, Mode mode, Options o = Options.None) - { - UErrorCode e; - - byte b = unorm_isNormalizedWithOptions (t.get.ptr, t.len, mode, o, e); - testError (e, "failed to perform normalization test"); - return b != 0; - } - - /*********************************************************************** - - Concatenate normalized strings, making sure that the result - is normalized as well. If both the left and the right strings - are in the normalization form according to "mode/options", - then the result will be - - dest=normalize(left+right, mode, options) - - With the input strings already being normalized, this function - will use unorm_next() and unorm_previous() to find the adjacent - end pieces of the input strings. Only the concatenation of these - end pieces will be normalized and then concatenated with the - remaining parts of the input strings. - - It is allowed to have dst==left to avoid copying the entire - left string. - - ***********************************************************************/ - - static void concatenate (UStringView left, UStringView right, UString dst, Mode mode, Options o = Options.None) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return unorm_concatenate (left.get.ptr, left.len, right.get.ptr, right.len, p, len, mode, o, e); - } - - dst.format (&fmt, "failed to concatenate"); - } - - /*********************************************************************** - - Compare two strings for canonical equivalence. Further - options include case-insensitive comparison and code - point order (as opposed to code unit order). - - Canonical equivalence between two strings is defined as - their normalized forms (NFD or NFC) being identical. - This function compares strings incrementally instead of - normalizing (and optionally case-folding) both strings - entirely, improving performance significantly. - - Bulk normalization is only necessary if the strings do - not fulfill the FCD conditions. Only in this case, and - only if the strings are relatively long, is memory - allocated temporarily. For FCD strings and short non-FCD - strings there is no memory allocation. - - ***********************************************************************/ - - static int compare (UStringView left, UStringView right, Options o = Options.None) - { - UErrorCode e; - - int i = unorm_compare (left.get.ptr, left.len, right.get.ptr, right.len, o, e); - testError (e, "failed to compare"); - return i; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - uint function (wchar*, uint, uint, uint, wchar*, uint, inout UErrorCode) unorm_normalize; - uint function (wchar*, uint, uint, uint, inout UErrorCode) unorm_quickCheckWithOptions; - byte function (wchar*, uint, uint, uint, inout UErrorCode) unorm_isNormalizedWithOptions; - uint function (wchar*, uint, wchar*, uint, wchar*, uint, uint, uint, inout UErrorCode) unorm_concatenate; - uint function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) unorm_compare; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &unorm_normalize, "unorm_normalize"}, - {cast(void**) &unorm_quickCheckWithOptions, "unorm_quickCheckWithOptions"}, - {cast(void**) &unorm_isNormalizedWithOptions, "unorm_isNormalizedWithOptions"}, - {cast(void**) &unorm_concatenate, "unorm_concatenate"}, - {cast(void**) &unorm_compare, "unorm_compare"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UNumberFormat.d --- a/base/src/java/mangoicu/UNumberFormat.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,934 +0,0 @@ -/******************************************************************************* - - @file UNumberFormat.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UNumberFormat; - -private import java.mangoicu.ICU, - java.mangoicu.UString; - -public import java.mangoicu.ULocale; - -/******************************************************************************* - -*******************************************************************************/ - -class UDecimalFormat : UCommonFormat -{ - /*********************************************************************** - - ***********************************************************************/ - - this (inout ULocale locale) - { - super (Style.Decimal, null, locale); - } - - /*********************************************************************** - - Set the pattern for a UDecimalFormat - - ***********************************************************************/ - - void setPattern (UStringView pattern, bool localized) - { - UErrorCode e; - - unum_applyPattern (handle, localized, pattern.get.ptr, pattern.length, null, e); - testError (e, "failed to set numeric pattern"); - } -} - - -/******************************************************************************* - -*******************************************************************************/ - -class UCurrencyFormat : UCommonFormat -{ - /*********************************************************************** - - ***********************************************************************/ - - this (inout ULocale locale) - { - super (Style.Currency, null, locale); - } -} - - -/******************************************************************************* - -*******************************************************************************/ - -class UPercentFormat : UCommonFormat -{ - /*********************************************************************** - - ***********************************************************************/ - - this (inout ULocale locale) - { - super (Style.Percent, null, locale); - } -} - - -/******************************************************************************* - -*******************************************************************************/ - -class UScientificFormat : UCommonFormat -{ - /*********************************************************************** - - ***********************************************************************/ - - this (inout ULocale locale) - { - super (Style.Scientific, null, locale); - } -} - - -/******************************************************************************* - -*******************************************************************************/ - -class USpelloutFormat : UCommonFormat -{ - /*********************************************************************** - - ***********************************************************************/ - - this (inout ULocale locale) - { - super (Style.Spellout, null, locale); - } -} - - -/******************************************************************************* - -*******************************************************************************/ - -class UDurationFormat : UCommonFormat -{ - /*********************************************************************** - - ***********************************************************************/ - - this (inout ULocale locale) - { - super (Style.Duration, null, locale); - } -} - - -/******************************************************************************* - -*******************************************************************************/ - -class URuleBasedFormat : UNumberFormat -{ - /*********************************************************************** - - ***********************************************************************/ - - this (inout ULocale locale) - { - super (Style.RuleBased, null, locale); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setLenientParse (bool yes) - { - unum_setAttribute (handle, Attribute.LenientParse, yes); - } - - - /*********************************************************************** - - ***********************************************************************/ - - bool isLenientParse () - { - return unum_getAttribute (handle, Attribute.LenientParse) != 0; - } -} - - -/******************************************************************************* - -*******************************************************************************/ - -private class UCommonFormat : UNumberFormat -{ - /*********************************************************************** - - ***********************************************************************/ - - this (Style style, char[] pattern, inout ULocale locale) - { - super (style, pattern, locale); - } - - /*********************************************************************** - - Return true if this format will parse numbers as integers - only - - ***********************************************************************/ - - bool isParseIntegerOnly () - { - return unum_getAttribute (handle, Attribute.ParseIntOnly) != 0; - } - - /*********************************************************************** - - Returns true if grouping is used in this format. - - ***********************************************************************/ - - bool isGroupingUsed () - { - return unum_getAttribute (handle, Attribute.GroupingUsed) != 0; - } - - /*********************************************************************** - - Always show decimal point? - - ***********************************************************************/ - - bool isDecimalSeparatorAlwaysShown () - { - return unum_getAttribute (handle, Attribute.DecimalAlwaysShown) != 0; - } - - /*********************************************************************** - - Sets whether or not numbers should be parsed as integers - only - - ***********************************************************************/ - - void setParseIntegerOnly (bool yes) - { - unum_setAttribute (handle, Attribute.ParseIntOnly, yes); - } - - /*********************************************************************** - - Set whether or not grouping will be used in this format. - - ***********************************************************************/ - - void setGroupingUsed (bool yes) - { - unum_setAttribute (handle, Attribute.GroupingUsed, yes); - } - - /*********************************************************************** - - Always show decimal point. - - ***********************************************************************/ - - void setDecimalSeparatorAlwaysShown (bool yes) - { - unum_setAttribute (handle, Attribute.DecimalAlwaysShown, yes); - } - - /*********************************************************************** - - Sets the maximum number of digits allowed in the integer - portion of a number. - - ***********************************************************************/ - - void setMaxIntegerDigits (uint x) - { - unum_setAttribute (handle, Attribute.MaxIntegerDigits, x); - } - - /*********************************************************************** - - Sets the minimum number of digits allowed in the integer - portion of a number. - - ***********************************************************************/ - - void setMinIntegerDigits (uint x) - { - unum_setAttribute (handle, Attribute.MinIntegerDigits, x); - } - - /*********************************************************************** - - Integer digits displayed - - ***********************************************************************/ - - void setIntegerDigits (uint x) - { - unum_setAttribute (handle, Attribute.IntegerDigits, x); - } - - /*********************************************************************** - - Sets the maximum number of digits allowed in the fraction - portion of a number. - - ***********************************************************************/ - - void setMaxFractionDigits (uint x) - { - unum_setAttribute (handle, Attribute.MaxFractionDigits, x); - } - - /*********************************************************************** - - Sets the minimum number of digits allowed in the fraction - portion of a number. - - ***********************************************************************/ - - void setMinFractionDigits (uint x) - { - unum_setAttribute (handle, Attribute.MinFractionDigits, x); - } - - /*********************************************************************** - - Fraction digits. - - ***********************************************************************/ - - void setFractionDigits (uint x) - { - unum_setAttribute (handle, Attribute.FractionDigits, x); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setMultiplier (uint x) - { - unum_setAttribute (handle, Attribute.Multiplier, x); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setGroupingSize (uint x) - { - unum_setAttribute (handle, Attribute.GroupingSize, x); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setRoundingMode (Rounding x) - { - unum_setAttribute (handle, Attribute.RoundingMode, x); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setRoundingIncrement (uint x) - { - unum_setAttribute (handle, Attribute.RoundingIncrement, x); - } - - /*********************************************************************** - - The width to which the output of format() is padded - - ***********************************************************************/ - - void setFormatWidth (uint x) - { - unum_setAttribute (handle, Attribute.FormatWidth, x); - } - - /*********************************************************************** - - The position at which padding will take place. - - ***********************************************************************/ - - void setPaddingPosition (Pad x) - { - unum_setAttribute (handle, Attribute.PaddingPosition, x); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setSecondaryGroupingSize (uint x) - { - unum_setAttribute (handle, Attribute.SecondaryGroupingSize, x); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setSignificantDigitsUsed (uint x) - { - unum_setAttribute (handle, Attribute.SignificantDigitsUsed, x); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setMinSignificantDigits (uint x) - { - unum_setAttribute (handle, Attribute.MinSignificantDigits, x); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setMaxSignificantDigits (uint x) - { - unum_setAttribute (handle, Attribute.MaxSignificantDigits, x); - } - - - /*********************************************************************** - - Returns the maximum number of digits allowed in the integer - portion of a number. - - ***********************************************************************/ - - uint getMaxIntegerDigits () - { - return unum_getAttribute (handle, Attribute.MaxIntegerDigits); - } - - /*********************************************************************** - - Returns the minimum number of digits allowed in the integer - portion of a number. - - ***********************************************************************/ - - uint getMinIntegerDigits () - { - return unum_getAttribute (handle, Attribute.MinIntegerDigits); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getIntegerDigits () - { - return unum_getAttribute (handle, Attribute.IntegerDigits); - } - - /*********************************************************************** - - Returns the maximum number of digits allowed in the fraction - portion of a number. - - ***********************************************************************/ - - uint getMaxFractionDigits () - { - return unum_getAttribute (handle, Attribute.MaxFractionDigits); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getMinFractionDigits () - { - return unum_getAttribute (handle, Attribute.MinFractionDigits); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getFractionDigits () - { - return unum_getAttribute (handle, Attribute.FractionDigits); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getMultiplier () - { - return unum_getAttribute (handle, Attribute.Multiplier); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getGroupingSize () - { - return unum_getAttribute (handle, Attribute.GroupingSize); - } - - /*********************************************************************** - - ***********************************************************************/ - - Rounding getRoundingMode () - { - return cast(Rounding) unum_getAttribute (handle, Attribute.RoundingMode); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getRoundingIncrement () - { - return unum_getAttribute (handle, Attribute.RoundingIncrement); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getFormatWidth () - { - return unum_getAttribute (handle, Attribute.FormatWidth); - } - - /*********************************************************************** - - ***********************************************************************/ - - Pad getPaddingPosition () - { - return cast(Pad) unum_getAttribute (handle, Attribute.PaddingPosition); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getSecondaryGroupingSize () - { - return unum_getAttribute (handle, Attribute.SecondaryGroupingSize); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getSignificantDigitsUsed () - { - return unum_getAttribute (handle, Attribute.SignificantDigitsUsed); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getMinSignificantDigits () - { - return unum_getAttribute (handle, Attribute.MinSignificantDigits); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getMaxSignificantDigits () - { - return unum_getAttribute (handle, Attribute.MaxSignificantDigits); - } - - /*********************************************************************** - - ***********************************************************************/ - - void getPattern (UString dst, bool localize) - { - uint fmat (wchar* result, uint len, inout UErrorCode e) - { - return unum_toPattern (handle, localize, result, len, e); - } - - dst.format (&fmat, "failed to retrieve numeric format pattern"); - } -} - - -/******************************************************************************* - - UNumberFormat provides functions for formatting and parsing - a number. Also provides methods for determining which locales have - number formats, and what their names are. - - UNumberFormat helps you to format and parse numbers for any locale. - Your code can be completely independent of the locale conventions - for decimal points, thousands-separators, or even the particular - decimal digits used, or whether the number format is even decimal. - There are different number format styles like decimal, currency, - percent and spellout - - See - this page for full details. - -*******************************************************************************/ - -class UNumberFormat : ICU -{ - package Handle handle; - - typedef void* UFieldPos; - typedef void* ParseError; - - - public enum Rounding - { - Ceiling, - Floor, - Down, - Up, - HalfEven, - HalfDown, - HalfUp - }; - - public enum Pad - { - BeforePrefix, - AfterPrefix, - BeforeSuffix, - AfterSuffix - }; - - public enum Style - { - PatternDecimal, - Decimal, - Currency, - Percent, - Scientific, - Spellout, - Ordinal, - Duration, - RuleBased, - Default = Decimal, - Ignore = PatternDecimal - }; - - private enum Attribute - { - ParseIntOnly, - GroupingUsed, - DecimalAlwaysShown, - MaxIntegerDigits, - MinIntegerDigits, - IntegerDigits, - MaxFractionDigits, - MinFractionDigits, - FractionDigits, - Multiplier, - GroupingSize, - RoundingMode, - RoundingIncrement, - FormatWidth, - PaddingPosition, - SecondaryGroupingSize, - SignificantDigitsUsed, - MinSignificantDigits, - MaxSignificantDigits, - LenientParse - }; - - private enum Symbol - { - DecimalSeparator, - GroupingSeparator, - PatternSeparator, - Percent, - ZeroDigit, - Digit, - MinusSign, - PlusSign, - Currency, - IntlCurrency, - MonetarySeparator, - Exponential, - Permill, - PadEscape, - Infinity, - Nan, - SignificantDigit, - FormatSymbolCount - }; - - /*********************************************************************** - - ***********************************************************************/ - - this (Style style, char[] pattern, inout ULocale locale) - { - UErrorCode e; - - handle = unum_open (style, pattern.ptr, pattern.length, toString(locale.name), null, e); - testError (e, "failed to create NumberFormat"); - } - - /*********************************************************************** - - ***********************************************************************/ - - ~this () - { - unum_close (handle); - } - - /*********************************************************************** - - ***********************************************************************/ - - void format (UString dst, int number, UFieldPos p = null) - { - uint fmat (wchar* result, uint len, inout UErrorCode e) - { - return unum_format (handle, number, result, len, p, e); - } - - dst.format (&fmat, "int format failed"); - } - - /*********************************************************************** - - ***********************************************************************/ - - void format (UString dst, long number, UFieldPos p = null) - { - uint fmat (wchar* result, uint len, inout UErrorCode e) - { - return unum_formatInt64 (handle, number, result, len, p, e); - } - - dst.format (&fmat, "int64 format failed"); - } - - /*********************************************************************** - - ***********************************************************************/ - - void format (UString dst, double number, UFieldPos p = null) - { - uint fmat (wchar* result, uint len, inout UErrorCode e) - { - return unum_formatDouble (handle, number, result, len, p, e); - } - - dst.format (&fmat, "double format failed"); - } - - /*********************************************************************** - - ***********************************************************************/ - - int parseInteger (UStringView src, uint* index=null) - { - UErrorCode e; - - return unum_parse (handle, src.content.ptr, src.len, index, e); - } - - /*********************************************************************** - - ***********************************************************************/ - - long parseLong (UStringView src, uint* index=null) - { - UErrorCode e; - - return unum_parseInt64 (handle, src.content.ptr, src.len, index, e); - } - - /*********************************************************************** - - ***********************************************************************/ - - double parseDouble (UStringView src, uint* index=null) - { - UErrorCode e; - - return unum_parseDouble (handle, src.content.ptr, src.len, index, e); - } - - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (uint, char*, uint, char*, ParseError, inout UErrorCode) unum_open; - void function (Handle) unum_close; - int function (Handle, int, wchar*, uint, UFieldPos, inout UErrorCode) unum_format; - int function (Handle, long, wchar*, uint, UFieldPos, inout UErrorCode) unum_formatInt64; - int function (Handle, double, wchar*, uint, UFieldPos, inout UErrorCode) unum_formatDouble; - int function (Handle, wchar*, uint, uint*, inout UErrorCode) unum_parse; - long function (Handle, wchar*, uint, uint*, inout UErrorCode) unum_parseInt64; - double function (Handle, wchar*, uint, uint*, inout UErrorCode) unum_parseDouble; - int function (Handle, uint) unum_getAttribute; - void function (Handle, uint, uint) unum_setAttribute; - uint function (Handle, byte, wchar*, uint, inout UErrorCode) unum_toPattern; - void function (Handle, byte, wchar*, uint, ParseError, inout UErrorCode) unum_applyPattern; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &unum_open, "unum_open"}, - {cast(void**) &unum_close, "unum_close"}, - {cast(void**) &unum_format, "unum_format"}, - {cast(void**) &unum_formatInt64 "unum_formatInt64"}, - {cast(void**) &unum_formatDouble "unum_formatDouble"}, - {cast(void**) &unum_parse, "unum_parse"}, - {cast(void**) &unum_parseInt64 "unum_parseInt64"}, - {cast(void**) &unum_parseDouble "unum_parseDouble"}, - {cast(void**) &unum_getAttribute "unum_getAttribute"}, - {cast(void**) &unum_setAttribute "unum_setAttribute"}, - {cast(void**) &unum_toPattern "unum_toPattern"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - - - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/URegex.d --- a/base/src/java/mangoicu/URegex.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,700 +0,0 @@ -/******************************************************************************* - - @file URegex.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.URegex; - -private import java.mangoicu.ICU; - -public import java.mangoicu.ULocale, - java.mangoicu.UString, - java.mangoicu.UCollator, - java.mangoicu.UBreakIterator; - - -/******************************************************************************* - - Set of slices to return for group matching. See URegex.groups() - -*******************************************************************************/ - -class Groups : ICU -{ - public wchar[] g0, - g1, - g2, - g3, - g4, - g5, - g6, - g7, - g8, - g9; -} - -/******************************************************************************* - - Apis for an engine that provides regular-expression searching of - UTF16 strings. - - See http://icu.sourceforge.net/apiref/icu4c/uregex_8h.html for full - details. - -*******************************************************************************/ - -class URegex : Groups -{ - private Handle handle; - private UStringView theText; - - // Regex modes - public enum Flag - { - None = 0, - - // Enable case insensitive matching - CaseInsensitive = 2, - - // Allow white space and comments within patterns - Comments = 4, - - // Control behavior of "$" and "^" If set, recognize - // line terminators within string, otherwise, match - // only at start and end of input string. - MultiLine = 8, - - // If set, '.' matches line terminators, otherwise '.' - // matching stops at line end - DotAll = 32, - - // Forces normalization of pattern and strings - CanonEq = 128, - - // If set, uses the Unicode TR 29 definition of word - // boundaries. Warning: Unicode word boundaries are - // quite different from traditional regular expression - // word boundaries. See http://unicode.org/reports/tr29/#Word_Boundaries - UWord = 256, - } - - /*********************************************************************** - - Compiles the regular expression in string form into an - internal representation using the specified match mode - flags. The resulting regular expression handle can then - be used to perform various matching operations. - - ***********************************************************************/ - - this (wchar[] pattern, Flag flags=Flag.None, ParseError* pe=null) - { - UErrorCode e; - - handle = uregex_open (pattern.ptr, pattern.length, flags, pe, e); - testError (e, "failed to open regex"); - uregex_setText (handle, null, 0, e); - } - - /*********************************************************************** - - Compiles the regular expression in string form into an - internal representation using the specified match mode - flags. The resulting regular expression handle can then - be used to perform various matching operations. - - ***********************************************************************/ - - this (UStringView pattern, Flag flags=Flag.None, ParseError* pe=null) - { - this (pattern.get, flags, pe); - } - - /*********************************************************************** - - Internal constructor; used for cloning - - ***********************************************************************/ - - private this (Handle handle) - { - UErrorCode e; - - this.handle = handle; - uregex_setText (handle, null, 0, e); - } - - /*********************************************************************** - - Close the regular expression, recovering all resources (memory) - it was holding - - ***********************************************************************/ - - ~this () - { - uregex_close (handle); - } - - /*********************************************************************** - - Cloning a regular expression is faster than opening a second - instance from the source form of the expression, and requires - less memory. - - Note that the current input string and the position of any - matched text within it are not cloned; only the pattern itself - and and the match mode flags are copied. - - Cloning can be particularly useful to threaded applications - that perform multiple match operations in parallel. Each - concurrent RE operation requires its own instance of a - URegularExpression. - - ***********************************************************************/ - - URegex clone () - { - UErrorCode e; - - Handle h = uregex_clone (handle, e); - testError (e, "failed to clone regex"); - return new URegex (h); - } - - /*********************************************************************** - - Return a copy of the source form of the pattern for this - regular expression - - ***********************************************************************/ - - UString getPattern () - { - UErrorCode e; - uint len; - - wchar* x = uregex_pattern (handle, len, e); - testError (e, "failed to extract regex pattern"); - return new UString (x[0..len]); - } - - /*********************************************************************** - - Get the match mode flags that were specified when compiling - this regular expression - - ***********************************************************************/ - - Flag getFlags () - { - UErrorCode e; - - Flag f = cast(Flag) uregex_flags (handle, e); - testError (e, "failed to get regex flags"); - return f; - } - - /*********************************************************************** - - Set the subject text string upon which the regular expression - will look for matches. - - This function may be called any number of times, allowing the - regular expression pattern to be applied to different strings. - - Regular expression matching operations work directly on the - application's string data. No copy is made. The subject string - data must not be altered after calling this function until after - all regular expression operations involving this string data are - completed. - - Zero length strings are permitted. In this case, no subsequent - match operation will dereference the text string pointer. - - ***********************************************************************/ - - void setText (UStringView t) - { - UErrorCode e; - - theText = t; - uregex_setText (handle, t.get.ptr, t.length, e); - testError (e, "failed to set regex text"); - } - - /*********************************************************************** - - Get the subject text that is currently associated with this - regular expression object. This simply returns whatever was - previously supplied via setText(). - - Note that this returns a read-only reference to the text. - - ***********************************************************************/ - - UStringView getText () - { - return theText; - } - - /*********************************************************************** - - Return a set of slices representing the parenthesised groups. - This can be used in the following manner: - - @code - wchar msg; - - if (regex.next()) - with (regex.groups()) - msg ~= g1 ~ ":" ~ g2 - @endcode - - Note that g0 represents the entire match, whereas g1 through - g9 represent the parenthesised expressions. - - ***********************************************************************/ - - Groups groups () - { - wchar[]* p = &g0; - uint count = groupCount(); - wchar[] content = theText.get(); - - if (count > 9) - count = 9; - for (uint i=0; i <= count; ++p, ++i) - *p = content [start(i)..end(i)]; - return this; - } - - /*********************************************************************** - - Extract the string for the specified matching expression or - subexpression. UString 's' is the destination for the match. - - Group #0 is the complete string of matched text. Group #1 is - the text matched by the first set of capturing parentheses. - - ***********************************************************************/ - - void group (UString s, uint index) - { - uint fmt (wchar* dst, uint length, inout UErrorCode e) - { - return uregex_group (handle, index, dst, length, e); - } - - s.format (&fmt, "failed to extract regex group text"); - } - - /*********************************************************************** - - Get the number of capturing groups in this regular - expression's pattern - - ***********************************************************************/ - - uint groupCount () - { - UErrorCode e; - - uint i = uregex_groupCount (handle, e); - testError (e, "failed to get regex group-count"); - return i; - } - - /*********************************************************************** - - Returns the index in the input string of the start of the - text matched by the specified capture group during the - previous match operation. - - Return -1 if the capture group was not part of the last - match. Group #0 refers to the complete range of matched - text. Group #1 refers to the text matched by the first - set of capturing parentheses - - ***********************************************************************/ - - uint start (uint index = 0) - { - UErrorCode e; - - uint i = uregex_start (handle, index, e); - testError (e, "failed to get regex start"); - return i; - } - - /*********************************************************************** - - Returns the index in the input string of the position - following the end of the text matched by the specified - capture group. - - Return -1 if the capture group was not part of the last - match. Group #0 refers to the complete range of matched - text. Group #1 refers to the text matched by the first - set of capturing parentheses. - - ***********************************************************************/ - - uint end (uint index = 0) - { - UErrorCode e; - - uint i = uregex_end (handle, index, e); - testError (e, "failed to get regex end"); - return i; - } - - /*********************************************************************** - - Reset any saved state from the previous match. - - Has the effect of causing uregex_findNext to begin at the - specified index, and causing uregex_start(), uregex_end() - and uregex_group() to return an error indicating that there - is no match information available. - - ***********************************************************************/ - - void reset (uint startIndex) - { - UErrorCode e; - - uregex_reset (handle, startIndex, e); - testError (e, "failed to set regex next-index"); - } - - /*********************************************************************** - - Attempts to match the input string, beginning at startIndex, - against the pattern. - - To succeed, the match must extend to the end of the input - string - - ***********************************************************************/ - - bool match (uint startIndex) - { - UErrorCode e; - - bool b = uregex_matches (handle, startIndex, e); - testError (e, "failed while matching regex"); - return b; - } - - /*********************************************************************** - - Attempts to match the input string, starting from the - specified index, against the pattern. - - The match may be of any length, and is not required to - extend to the end of the input string. Contrast with match() - - ***********************************************************************/ - - bool probe (uint startIndex) - { - UErrorCode e; - - bool b = uregex_lookingAt (handle, startIndex, e); - testError (e, "failed while looking at regex"); - return b; - } - - /*********************************************************************** - - Returns whether the text matches the search pattern, starting - from the current position. - - If startIndex is specified, the current position is moved to - the specified location before the seach is initiated. - - ***********************************************************************/ - - bool next (uint startIndex = uint.max) - { - UErrorCode e; - bool b; - - b = (startIndex == uint.max) ? uregex_findNext (handle, e) : - uregex_find (handle, startIndex, e); - - testError (e, "failed on next regex"); - return b; - } - - /*********************************************************************** - - Replaces every substring of the input that matches the pattern - with the given replacement string. - - This is a convenience function that provides a complete - find-and-replace-all operation. - - This method scans the input string looking for matches of - the pattern. Input that is not part of any match is copied - unchanged to the destination buffer. Matched regions are - replaced in the output buffer by the replacement string. - The replacement string may contain references to capture - groups; these take the form of $1, $2, etc. - - The provided 'result' will contain the results, and should - be set with a length sufficient to house the entire result. - Upon completion, the 'result' is shortened appropriately - and the total extent (length) of the operation is returned. - Set the initital length of 'result' using the UString method - truncate(). - - The returned extent should be checked to ensure it is not - longer than the length of 'result'. If it is longer, then - the result has been truncated. - - ***********************************************************************/ - - uint replaceAll (UStringView replace, UString result) - { - UErrorCode e; - - uint len = uregex_replaceAll (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); - testError (e, "failed during regex replace"); - result.truncate (len); - return len; - } - - /*********************************************************************** - - Replaces the first substring of the input that matches the - pattern with the given replacement string. - - This is a convenience function that provides a complete - find-and-replace operation. - - This method scans the input string looking for a match of - the pattern. All input that is not part of the match is - copied unchanged to the destination buffer. The matched - region is replaced in the output buffer by the replacement - string. The replacement string may contain references to - capture groups; these take the form of $1, $2, etc - - The provided 'result' will contain the results, and should - be set with a length sufficient to house the entire result. - Upon completion, the 'result' is shortened appropriately - and the total extent (length) of the operation is returned. - Set the initital length of 'result' using the UString method - truncate(). - - The returned extent should be checked to ensure it is not - longer than the length of 'result'. If it is longer, then - the result has been truncated. - - ***********************************************************************/ - - uint replaceFirst (UStringView replace, UString result) - { - UErrorCode e; - - uint len = uregex_replaceFirst (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); - testError (e, "failed during regex replace"); - result.truncate (len); - return len; - } - - /*********************************************************************** - - Split the text up into slices (fields), where each slice - represents the text situated between each pattern matched - within the text. The pattern is expected to represent one - or more slice delimiters. - - ***********************************************************************/ - - uint split (wchar[][] fields) - { - UErrorCode e; - uint pos, - count; - wchar[] content = theText.get; - - while (count < fields.length) - if (uregex_findNext (handle, e) && e == e.OK) - { - uint i = start(); - fields[count] = content[pos..i]; - pos = end (); - - // ignore leading delimiter - if (i) - ++count; - } - else - break; - - testError (e, "failed during split"); - return count; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, uint, ParseError*, inout UErrorCode) uregex_open; - void function (Handle) uregex_close; - Handle function (Handle, inout UErrorCode) uregex_clone; - wchar* function (Handle, inout uint, inout UErrorCode) uregex_pattern; - uint function (Handle, inout UErrorCode) uregex_flags; - void function (Handle, wchar*, uint, inout UErrorCode) uregex_setText; - wchar* function (Handle, inout uint, inout UErrorCode) uregex_getText; - uint function (Handle, uint, wchar*, uint, inout UErrorCode) uregex_group; - uint function (Handle, inout UErrorCode) uregex_groupCount; - uint function (Handle, uint, inout UErrorCode) uregex_start; - uint function (Handle, uint, inout UErrorCode) uregex_end; - void function (Handle, uint, inout UErrorCode) uregex_reset; - bool function (Handle, uint, inout UErrorCode) uregex_matches; - bool function (Handle, uint, inout UErrorCode) uregex_lookingAt; - bool function (Handle, uint, inout UErrorCode) uregex_find; - bool function (Handle, inout UErrorCode) uregex_findNext; - uint function (Handle, wchar*, uint, wchar*, uint, inout UErrorCode) uregex_replaceAll; - uint function (Handle, wchar*, uint, wchar*, uint, inout UErrorCode) uregex_replaceFirst; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uregex_open, "uregex_open"}, - {cast(void**) &uregex_close, "uregex_close"}, - {cast(void**) &uregex_clone, "uregex_clone"}, - {cast(void**) &uregex_pattern, "uregex_pattern"}, - {cast(void**) &uregex_flags, "uregex_flags"}, - {cast(void**) &uregex_setText, "uregex_setText"}, - {cast(void**) &uregex_getText, "uregex_getText"}, - {cast(void**) &uregex_group, "uregex_group"}, - {cast(void**) &uregex_groupCount, "uregex_groupCount"}, - {cast(void**) &uregex_start, "uregex_start"}, - {cast(void**) &uregex_end, "uregex_end"}, - {cast(void**) &uregex_reset, "uregex_reset"}, - {cast(void**) &uregex_matches, "uregex_matches"}, - {cast(void**) &uregex_lookingAt, "uregex_lookingAt"}, - {cast(void**) &uregex_find, "uregex_find"}, - {cast(void**) &uregex_findNext, "uregex_findNext"}, - {cast(void**) &uregex_replaceAll, "uregex_replaceAll"}, - {cast(void**) &uregex_replaceFirst, "uregex_replaceFirst"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UResourceBundle.d --- a/base/src/java/mangoicu/UResourceBundle.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,544 +0,0 @@ -/******************************************************************************* - - @file UResourceBundle.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UResourceBundle; - -private import java.mangoicu.ICU, - java.mangoicu.UString; - -public import java.mangoicu.ULocale; - -/******************************************************************************* - - API representing a collection of resource information pertaining to - a given locale. A resource bundle provides a way of accessing locale- - specific information in a data file. You create a resource bundle that - manages the resources for a given locale and then ask it for individual - resources. - - Resource bundles in ICU4C are currently defined using text files which - conform to the following BNF definition. More on resource bundle concepts - and syntax can be found in the Users Guide. - - See - this page for full details. - -*******************************************************************************/ - -class UResourceBundle : ICU -{ - private Handle handle; - - /*********************************************************************** - - Internals opened up to the public - - ***********************************************************************/ - - // Numeric constants for types of resource items - public enum ResType - { - None = -1, - String = 0, - Binary = 1, - Table = 2, - Alias = 3, - Int = 7, - Array = 8, - IntVector = 14 - } - - /*********************************************************************** - - private constructor for internal use only - - ***********************************************************************/ - - private this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Constructs a resource bundle for the locale-specific bundle - in the specified path. - - locale This is the locale this resource bundle is for. To - get resources for the French locale, for example, you - would create a ResourceBundle passing ULocale::FRENCH - for the "locale" parameter, and all subsequent calls - to that resource bundle will return resources that - pertain to the French locale. If the caller passes a - Locale.Default parameter, the default locale for the - system (as returned by ULocale.getDefault()) will be - used. Passing Locale.Root will cause the root-locale - to be used. - - path This is a full pathname in the platform-specific - format for the directory containing the resource - data files we want to load resources from. We use - locale IDs to generate filenames, and the filenames - have this string prepended to them before being passed - to the C++ I/O functions. Therefore, this string must - always end with a directory delimiter (whatever that - is for the target OS) for this class to work correctly. - A null value will open the default ICU data-files - - ***********************************************************************/ - - this (inout ULocale locale, char[] path = null) - { - UErrorCode e; - - handle = ures_open (toString(path), toString(locale.name), e); - testError (e, "failed to open resource bundle"); - } - - /*********************************************************************** - - ***********************************************************************/ - - ~this () - { - ures_close (handle); - } - - /*********************************************************************** - - Returns the size of a resource. Size for scalar types is - always 1, and for vector/table types is the number of child - resources. - - ***********************************************************************/ - - uint getSize () - { - return ures_getSize (handle); - } - - /*********************************************************************** - - Returns a signed integer from a resource. This integer is - originally 28 bit and the sign gets propagated. - - ***********************************************************************/ - - int getInt () - { - UErrorCode e; - - int x = ures_getInt (handle, e); - testError (e, "failed to get resource integer"); - return x; - } - - /*********************************************************************** - - Returns a string from a string resource type - - ***********************************************************************/ - - UStringView getString () - { - UErrorCode e; - uint len; - - wchar* x = ures_getString (handle, len, e); - testError (e, "failed to get resource string"); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Returns the string in a given resource at the specified - index - - ***********************************************************************/ - - UStringView getString (uint index) - { - UErrorCode e; - uint len; - - wchar* x = ures_getStringByIndex (handle, index, len, e); - testError (e, "failed to get resource string"); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Returns a string in a resource that has a given key. This - procedure works only with table resources. - - ***********************************************************************/ - - UStringView getString (char[] key) - { - UErrorCode e; - uint len; - - wchar* x = ures_getStringByKey (handle, toString(key), len, e); - testError (e, "failed to get resource string"); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Returns the next string in a resource or NULL if there are - no more resources to iterate over - - ***********************************************************************/ - - UStringView getNextString () - { - UErrorCode e; - uint len; - char* key; - - wchar* x = ures_getNextString (handle, len, key, e); - testError (e, "failed to get next resource string"); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Returns a binary data from a resource. Can be used at most - primitive resource types (binaries, strings, ints) - - ***********************************************************************/ - - void[] getBinary () - { - UErrorCode e; - uint len; - - void* x = ures_getBinary (handle, len, e); - testError (e, "failed to get binary resource"); - return x[0..len]; - } - - /*********************************************************************** - - Returns an integer vector from a resource - - ***********************************************************************/ - - int[] getIntVector () - { - UErrorCode e; - uint len; - - int* x = ures_getIntVector (handle, len, e); - testError (e, "failed to get vector resource"); - return x[0..len]; - } - - /*********************************************************************** - - Checks whether the resource has another element to - iterate over - - ***********************************************************************/ - - bool hasNext () - { - return ures_hasNext (handle) != 0; - } - - /*********************************************************************** - - Resets the internal context of a resource so that - iteration starts from the first element - - ***********************************************************************/ - - void resetIterator () - { - ures_resetIterator (handle); - } - - /*********************************************************************** - - Returns the next resource in a given resource or NULL if - there are no more resources - - ***********************************************************************/ - - UResourceBundle getNextResource () - { - UErrorCode e; - - return get (ures_getNextResource (handle, null, e), e); - } - - /*********************************************************************** - - Returns a resource that has a given key. This procedure - works only with table resources. - - ***********************************************************************/ - - UResourceBundle getResource (char[] key) - { - UErrorCode e; - - return get (ures_getByKey (handle, toString(key), null, e), e); - } - - /*********************************************************************** - - Returns the resource at the specified index - - ***********************************************************************/ - - UResourceBundle getResource (uint index) - { - UErrorCode e; - - return get (ures_getByIndex (handle, index, null, e), e); - } - - /*********************************************************************** - - Return the version number associated with this ResourceBundle - as a UVersionInfo array - - ***********************************************************************/ - - void getVersion (inout Version info) - { - ures_getVersion (handle, info); - } - - /*********************************************************************** - - Return the ULocale associated with this ResourceBundle - - ***********************************************************************/ - - void getLocale (inout ULocale locale) - { - UErrorCode e; - - locale.name = toArray (ures_getLocale (handle, e)); - testError (e, "failed to get resource locale"); - } - - /*********************************************************************** - - Returns the key associated with this resource. Not all - the resources have a key - only those that are members - of a table. - - ***********************************************************************/ - - char[] getKey () - { - return toArray (ures_getKey (handle)); - } - - /*********************************************************************** - - Returns the type of a resource. Available types are - defined in enum UResType - - ***********************************************************************/ - - ResType getType () - { - return cast(ResType) ures_getType (handle); - } - - /*********************************************************************** - - Worker function for constructing internal ResourceBundle - instances. Returns null when the provided handle is null. - - ***********************************************************************/ - - private static final UResourceBundle get (Handle handle, inout UErrorCode e) - { - testError (e, "failed to create resource bundle"); - if (handle) - return new UResourceBundle (handle); - return null; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (char*, char*, inout UErrorCode) ures_open; - void function (Handle) ures_close; - char* function (Handle, inout UErrorCode) ures_getLocale; - void function (Handle, inout Version) ures_getVersion; - uint function (Handle) ures_getSize; - int function (Handle, inout UErrorCode) ures_getInt; - wchar* function (Handle, inout uint, inout UErrorCode) ures_getString; - wchar* function (Handle, uint, inout uint, inout UErrorCode) ures_getStringByIndex; - wchar* function (Handle, char*, inout uint, inout UErrorCode) ures_getStringByKey; - void* function (Handle, inout uint, inout UErrorCode) ures_getBinary; - int* function (Handle, inout uint, inout UErrorCode) ures_getIntVector; - byte function (Handle) ures_hasNext; - void function (Handle) ures_resetIterator; - wchar* function (Handle, inout uint, inout char*, inout UErrorCode) ures_getNextString; - char* function (Handle) ures_getKey; - int function (Handle) ures_getType; - Handle function (Handle, Handle, inout UErrorCode) ures_getNextResource; - Handle function (Handle, uint, Handle, inout UErrorCode) ures_getByIndex; - Handle function (Handle, char*, Handle, inout UErrorCode) ures_getByKey; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ures_open, "ures_open"}, - {cast(void**) &ures_close, "ures_close"}, - {cast(void**) &ures_getLocale, "ures_getLocale"}, - {cast(void**) &ures_getVersion, "ures_getVersion"}, - {cast(void**) &ures_getSize, "ures_getSize"}, - {cast(void**) &ures_getInt, "ures_getInt"}, - {cast(void**) &ures_getString, "ures_getString"}, - {cast(void**) &ures_getStringByIndex, "ures_getStringByIndex"}, - {cast(void**) &ures_getStringByKey, "ures_getStringByKey"}, - {cast(void**) &ures_getBinary, "ures_getBinary"}, - {cast(void**) &ures_hasNext, "ures_hasNext"}, - {cast(void**) &ures_resetIterator, "ures_resetIterator"}, - {cast(void**) &ures_getNextString, "ures_getNextString"}, - {cast(void**) &ures_getKey, "ures_getKey"}, - {cast(void**) &ures_getType, "ures_getType"}, - {cast(void**) &ures_getNextResource, "ures_getNextResource"}, - {cast(void**) &ures_getByIndex, "ures_getByIndex"}, - {cast(void**) &ures_getByKey, "ures_getByKey"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - //test (); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } - - /*********************************************************************** - - ***********************************************************************/ - - static void test() - { - UResourceBundle b = new UResourceBundle (ULocale.Default); - UStringView t = b.getNextString(); - UResourceBundle b1 = b.getNextResource (); - } -} - - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/USearch.d --- a/base/src/java/mangoicu/USearch.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,607 +0,0 @@ -/******************************************************************************* - - @file USearch.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.USearch; - -private import java.mangoicu.ICU; - -public import java.mangoicu.ULocale, - java.mangoicu.UString, - java.mangoicu.UCollator, - java.mangoicu.UBreakIterator; - -/******************************************************************************* - - Apis for an engine that provides language-sensitive text - searching based on the comparison rules defined in a UCollator - data struct. This ensures that language eccentricity can be handled, - e.g. for the German collator, characters ß and SS will be matched - if case is chosen to be ignored. See the "ICU Collation Design - Document" for more information. - - The algorithm implemented is a modified form of the Boyer Moore's - search. For more information see "Efficient Text Searching in Java", - published in Java Report in February, 1999, for further information - on the algorithm. - - There are 2 match options for selection: Let S' be the sub-string - of a text string S between the offsets start and end . A - pattern string P matches a text string S at the offsets if - - - option 1. Some canonical equivalent of P matches some canonical - equivalent of S' - - - option 2. P matches S' and if P starts or ends with a combining - mark, there exists no non-ignorable combining mark before - or after S' in S respectively. - - Option 2 will be the default - - This search has APIs similar to that of other text iteration - mechanisms such as the break iterators in ubrk.h. Using these - APIs, it is easy to scan through text looking for all occurances - of a given pattern. This search iterator allows changing of - direction by calling a reset followed by a next or previous. - Though a direction change can occur without calling reset first, - this operation comes with some speed penalty. Generally, match - results in the forward direction will match the result matches - in the backwards direction in the reverse order - - USearch provides APIs to specify the starting position within - the text string to be searched, e.g. setOffset(), previous(x) - and next(x). Since the starting position will be set as it - is specified, please take note that there are some dangerous - positions which the search may render incorrect results: - - - The midst of a substring that requires normalization. - - - If the following match is to be found, the position should - not be the second character which requires to be swapped - with the preceding character. Vice versa, if the preceding - match is to be found, position to search from should not be - the first character which requires to be swapped with the - next character. E.g certain Thai and Lao characters require - swapping. - - - If a following pattern match is to be found, any position - within a contracting sequence except the first will fail. - Vice versa if a preceding pattern match is to be found, - a invalid starting point would be any character within a - contracting sequence except the last. - - A breakiterator can be used if only matches at logical breaks are - desired. Using a breakiterator will only give you results that - exactly matches the boundaries given by the breakiterator. For - instance the pattern "e" will not be found in the string "\u00e9" - if a character break iterator is used. - - Options are provided to handle overlapping matches. E.g. In - English, overlapping matches produces the result 0 and 2 for - the pattern "abab" in the text "ababab", where else mutually - exclusive matches only produce the result of 0. - - Though collator attributes will be taken into consideration while - performing matches, there are no APIs here for setting and getting - the attributes. These attributes can be set by getting the collator - from getCollator() and using the APIs in UCollator. Lastly to update - String Search to the new collator attributes, reset() has to be called. - - See http://oss.software.ibm.com/icu/apiref/usearch_8h.html for full - details. - -*******************************************************************************/ - -class USearch : ICU -{ - private Handle handle; - private UBreakIterator* iterator; - - // DONE is returned by previous() and next() after all valid - // matches have been returned, and by first() and last() if - // there are no matches at all. - const uint Done = uint.max; - - //Possible types of searches - public enum Attribute - { - Overlap, - CanonicalMatch, - Count - } - - public enum AttributeValue - { - Default = -1, - Off, - On, - Count - } - - /*********************************************************************** - - Creating a search iterator data struct using the argument - locale language rule set - - ***********************************************************************/ - - this (UStringView pattern, UStringView text, inout ULocale locale, UBreakIterator* iterator = null) - { - UErrorCode e; - - this.iterator = iterator; - handle = usearch_open (pattern.get.ptr, pattern.length, text.get.ptr, text.length, toString(locale.name), ( iterator is null ) ? null : iterator.handle, e); - testError (e, "failed to open search"); - } - - /*********************************************************************** - - Creating a search iterator data struct using the argument - locale language rule set - - ***********************************************************************/ - - this (UStringView pattern, UStringView text, UCollator col, UBreakIterator* iterator = null) - { - UErrorCode e; - - this.iterator = iterator; - handle = usearch_openFromCollator (pattern.get.ptr, pattern.length, text.get.ptr, text.length, col.handle, ( iterator is null ) ? null : iterator.handle, e); - testError (e, "failed to open search from collator"); - } - - /*********************************************************************** - - Close this USearch - - ***********************************************************************/ - - ~this () - { - usearch_close (handle); - } - - /*********************************************************************** - - Sets the current position in the text string which the - next search will start from. - - ***********************************************************************/ - - void setOffset (uint position) - { - UErrorCode e; - - usearch_setOffset (handle, position, e); - testError (e, "failed to set search offset"); - } - - /*********************************************************************** - - Return the current index in the string text being searched - - ***********************************************************************/ - - uint getOffset () - { - return usearch_getOffset (handle); - } - - /*********************************************************************** - - Returns the index to the match in the text string that was - searched - - ***********************************************************************/ - - uint getMatchedStart () - { - return usearch_getMatchedStart (handle); - } - - /*********************************************************************** - - Returns the length of text in the string which matches the - search pattern - - ***********************************************************************/ - - uint getMatchedLength () - { - return usearch_getMatchedLength (handle); - } - - /*********************************************************************** - - Returns the text that was matched by the most recent call to - first(), next(), previous(), or last(). - - ***********************************************************************/ - - void getMatchedText (UString s) - { - uint fmt (wchar* dst, uint length, inout UErrorCode e) - { - return usearch_getMatchedText (handle, dst, length, e); - } - - s.format (&fmt, "failed to extract matched text"); - } - - /*********************************************************************** - - Set the string text to be searched. - - ***********************************************************************/ - - void setText (UStringView t) - { - UErrorCode e; - - usearch_setText (handle, t.get.ptr, t.length, e); - testError (e, "failed to set search text"); - } - - /*********************************************************************** - - Return the string text to be searched. Note that this - returns a read-only reference to the search text. - - ***********************************************************************/ - - UStringView getText () - { - uint len; - - wchar *x = usearch_getText (handle, &len); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Sets the pattern used for matching - - ***********************************************************************/ - - void setPattern (UStringView t) - { - UErrorCode e; - - usearch_setPattern (handle, t.get.ptr, t.length, e); - testError (e, "failed to set search pattern"); - } - - /*********************************************************************** - - Gets the search pattern. Note that this returns a - read-only reference to the pattern. - - ***********************************************************************/ - - UStringView getPattern () - { - uint len; - - wchar *x = usearch_getPattern (handle, &len); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Set the BreakIterator that will be used to restrict the - points at which matches are detected. - - ***********************************************************************/ - - void setIterator (UBreakIterator* iterator) - { - UErrorCode e; - - this.iterator = iterator; - usearch_setBreakIterator (handle, cast(Handle)iterator.handle, e); - testError (e, "failed to set search iterator"); - } - - /*********************************************************************** - - Get the BreakIterator that will be used to restrict the - points at which matches are detected. - - ***********************************************************************/ - - UBreakIterator* getIterator () - { - return iterator; - } - - /*********************************************************************** - - Returns the first index at which the string text matches - the search pattern - - ***********************************************************************/ - - uint first () - { - UErrorCode e; - - uint x = usearch_first (handle, e); - testError (e, "failed on first search"); - return x; - } - - /*********************************************************************** - - Returns the last index in the target text at which it - matches the search pattern - - ***********************************************************************/ - - uint last () - { - UErrorCode e; - - uint x = usearch_last (handle, e); - testError (e, "failed on last search"); - return x; - } - - /*********************************************************************** - - Returns the index of the next point at which the string - text matches the search pattern, starting from the current - position. - - If pos is specified, returns the first index greater than - pos at which the string text matches the search pattern - - ***********************************************************************/ - - uint next (uint pos = uint.max) - { - UErrorCode e; - uint x; - - x = (pos == uint.max) ? usearch_next (handle, e) : - usearch_following (handle, pos, e); - - testError (e, "failed on next search"); - return x; - } - - /*********************************************************************** - - Returns the index of the previous point at which the - string text matches the search pattern, starting at - the current position. - - If pos is specified, returns the first index less - than pos at which the string text matches the search - pattern. - - ***********************************************************************/ - - uint previous (uint pos = uint.max) - { - UErrorCode e; - uint x; - - x = (pos == uint.max) ? usearch_previous (handle, e) : - usearch_preceding (handle, pos, e); - - testError (e, "failed on next search"); - return x; - } - - /*********************************************************************** - - Search will begin at the start of the text string if a - forward iteration is initiated before a backwards iteration. - Otherwise if a backwards iteration is initiated before a - forwards iteration, the search will begin at the end of the - text string - - ***********************************************************************/ - - void reset () - { - usearch_reset (handle); - } - - /*********************************************************************** - - Gets the collator used for the language rules. - - ***********************************************************************/ - - UCollator getCollator () - { - return new UCollator (usearch_getCollator (handle)); - } - - /*********************************************************************** - - Sets the collator used for the language rules. This - method causes internal data such as Boyer-Moore shift - tables to be recalculated, but the iterator's position - is unchanged - - ***********************************************************************/ - - void setCollator (UCollator col) - { - UErrorCode e; - - usearch_setCollator (handle, col.handle, e); - testError (e, "failed to set search collator"); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, wchar*, uint, char*, void*, inout UErrorCode) usearch_open; - Handle function (wchar*, uint, wchar*, uint, Handle, void*, inout UErrorCode) usearch_openFromCollator; - void function (Handle) usearch_close; - void function (Handle, uint, inout UErrorCode) usearch_setOffset; - uint function (Handle) usearch_getOffset; - uint function (Handle) usearch_getMatchedStart; - uint function (Handle) usearch_getMatchedLength; - uint function (Handle, wchar*, uint, inout UErrorCode) usearch_getMatchedText; - void function (Handle, wchar*, uint, inout UErrorCode) usearch_setText; - wchar* function (Handle, uint*) usearch_getText; - void function (Handle, wchar*, uint, inout UErrorCode) usearch_setPattern; - wchar* function (Handle, uint*) usearch_getPattern; - uint function (Handle, inout UErrorCode) usearch_first; - uint function (Handle, inout UErrorCode) usearch_last; - uint function (Handle, inout UErrorCode) usearch_next; - uint function (Handle, inout UErrorCode) usearch_previous; - uint function (Handle, uint, inout UErrorCode) usearch_following; - uint function (Handle, uint, inout UErrorCode) usearch_preceding; - void function (Handle) usearch_reset; - void function (Handle, Handle, inout UErrorCode) usearch_setBreakIterator; - Handle function (Handle) usearch_getCollator; - void function (Handle, Handle, inout UErrorCode) usearch_setCollator; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &usearch_open, "usearch_open"}, - {cast(void**) &usearch_openFromCollator, "usearch_openFromCollator"}, - {cast(void**) &usearch_close, "usearch_close"}, - {cast(void**) &usearch_setOffset, "usearch_setOffset"}, - {cast(void**) &usearch_getOffset, "usearch_getOffset"}, - {cast(void**) &usearch_getMatchedStart, "usearch_getMatchedStart"}, - {cast(void**) &usearch_getMatchedLength, "usearch_getMatchedLength"}, - {cast(void**) &usearch_getMatchedText, "usearch_getMatchedText"}, - {cast(void**) &usearch_setText, "usearch_setText"}, - {cast(void**) &usearch_getText, "usearch_getText"}, - {cast(void**) &usearch_setPattern, "usearch_setPattern"}, - {cast(void**) &usearch_getPattern, "usearch_getPattern"}, - {cast(void**) &usearch_first, "usearch_first"}, - {cast(void**) &usearch_last, "usearch_last"}, - {cast(void**) &usearch_next, "usearch_next"}, - {cast(void**) &usearch_previous, "usearch_previous"}, - {cast(void**) &usearch_following, "usearch_following"}, - {cast(void**) &usearch_preceding, "usearch_preceding"}, - {cast(void**) &usearch_reset, "usearch_reset"}, - {cast(void**) &usearch_setBreakIterator, "usearch_setBreakIterator"}, - {cast(void**) &usearch_getCollator, "usearch_getCollator"}, - {cast(void**) &usearch_setCollator, "usearch_setCollator"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/USet.d --- a/base/src/java/mangoicu/USet.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,472 +0,0 @@ -/******************************************************************************* - - @file USet.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.USet; - -private import java.mangoicu.ICU, - java.mangoicu.UString; - -/******************************************************************************* - - A mutable set of Unicode characters and multicharacter strings. - - Objects of this class represent character classes used in regular - expressions. A character specifies a subset of Unicode code points. - Legal code points are U+0000 to U+10FFFF, inclusive. - - UnicodeSet supports two APIs. The first is the operand API that - allows the caller to modify the value of a UnicodeSet object. It - conforms to Java 2's java.util.Set interface, although UnicodeSet - does not actually implement that interface. All methods of Set are - supported, with the modification that they take a character range - or single character instead of an Object, and they take a UnicodeSet - instead of a Collection. The operand API may be thought of in terms - of boolean logic: a boolean OR is implemented by add, a boolean AND - is implemented by retain, a boolean XOR is implemented by complement - taking an argument, and a boolean NOT is implemented by complement - with no argument. In terms of traditional set theory function names, - add is a union, retain is an intersection, remove is an asymmetric - difference, and complement with no argument is a set complement with - respect to the superset range MIN_VALUE-MAX_VALUE - - The second API is the applyPattern()/toPattern() API from the - java.text.Format-derived classes. Unlike the methods that add - characters, add categories, and control the logic of the set, - the method applyPattern() sets all attributes of a UnicodeSet - at once, based on a string pattern. - - See - this page for full details. - -*******************************************************************************/ - -class USet : ICU -{ - package Handle handle; - - enum Options - { - None = 0, - IgnoreSpace = 1, - CaseInsensitive = 2, - } - - - /*********************************************************************** - - Creates a USet object that contains the range of characters - start..end, inclusive - - ***********************************************************************/ - - this (wchar start, wchar end) - { - handle = uset_open (start, end); - } - - /*********************************************************************** - - Creates a set from the given pattern. See the UnicodeSet - class description for the syntax of the pattern language - - ***********************************************************************/ - - this (UStringView pattern, Options o = Options.None) - { - UErrorCode e; - - handle = uset_openPatternOptions (pattern.get.ptr, pattern.len, o, e); - testError (e, "failed to open pattern-based charset"); - } - - /*********************************************************************** - - Internal constructor invoked via UCollator - - ***********************************************************************/ - - package this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Disposes of the storage used by a USet object - - ***********************************************************************/ - - ~this () - { - uset_close (handle); - } - - /*********************************************************************** - - Modifies the set to represent the set specified by the - given pattern. See the UnicodeSet class description for - the syntax of the pattern language. See also the User - Guide chapter about UnicodeSet. Empties the set passed - before applying the pattern. - - ***********************************************************************/ - - void applyPattern (UStringView pattern, Options o = Options.None) - { - UErrorCode e; - - uset_applyPattern (handle, pattern.get.ptr, pattern.len, o, e); - testError (e, "failed to apply pattern"); - } - - /*********************************************************************** - - Returns a string representation of this set. If the result - of calling this function is passed to a uset_openPattern(), - it will produce another set that is equal to this one. - - ***********************************************************************/ - - void toPattern (UString dst, bool escape) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uset_toPattern (handle, p, len, escape, e); - } - - dst.format (&fmt, "failed to convert charset to a pattern"); - } - - /*********************************************************************** - - Adds the given character to the given USet. After this call, - contains (c) will return true. - - ***********************************************************************/ - - void add (wchar c) - { - uset_add (handle, c); - } - - /*********************************************************************** - - Adds all of the elements in the specified set to this set - if they're not already present. This operation effectively - modifies this set so that its value is the union of the two - sets. The behavior of this operation is unspecified if the - specified collection is modified while the operation is in - progress. - - ***********************************************************************/ - - void addSet (USet other) - { - uset_addAll (handle, other.handle); - } - - /*********************************************************************** - - Adds the given range of characters to the given USet. After - this call, contains(start, end) will return true - - ***********************************************************************/ - - void addRange (wchar start, wchar end) - { - uset_addRange (handle, start, end); - } - - /*********************************************************************** - - Adds the given string to the given USet. After this call, - containsString (str, strLen) will return true - - ***********************************************************************/ - - void addString (UStringView t) - { - uset_addString (handle, t.get.ptr, t.len); - } - - /*********************************************************************** - - Removes the given character from this USet. After the - call, contains(c) will return false - - ***********************************************************************/ - - void remove (wchar c) - { - uset_remove (handle, c); - } - - /*********************************************************************** - - Removes the given range of characters from this USet. - After the call, contains(start, end) will return false - - ***********************************************************************/ - - void removeRange (wchar start, wchar end) - { - uset_removeRange (handle, start, end); - } - - /*********************************************************************** - - Removes the given string from this USet. After the call, - containsString (str, strLen) will return false - - ***********************************************************************/ - - void removeString (UStringView t) - { - uset_removeString (handle, t.get.ptr, t.len); - } - - /*********************************************************************** - - Inverts this set. This operation modifies this set so - that its value is its complement. This operation does - not affect the multicharacter strings, if any - - ***********************************************************************/ - - void complement () - { - uset_complement (handle); - } - - /*********************************************************************** - - Removes all of the elements from this set. This set will - be empty after this call returns. - - ***********************************************************************/ - - void clear () - { - uset_clear (handle); - } - - /*********************************************************************** - - Returns true if this USet contains no characters and no - strings - - ***********************************************************************/ - - bool isEmpty () - { - return uset_isEmpty (handle) != 0; - } - - /*********************************************************************** - - Returns true if this USet contains the given character - - ***********************************************************************/ - - bool contains (wchar c) - { - return uset_contains (handle, c) != 0; - } - - /*********************************************************************** - - Returns true if this USet contains all characters c where - start <= c && c <= end - - ***********************************************************************/ - - bool containsRange (wchar start, wchar end) - { - return uset_containsRange (handle, start, end) != 0; - } - - /*********************************************************************** - - Returns true if this USet contains the given string - - ***********************************************************************/ - - bool containsString (UStringView t) - { - return uset_containsString (handle, t.get.ptr, t.len) != 0; - } - - /*********************************************************************** - - ***********************************************************************/ - - uint size () - { - return uset_size (handle); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar start, wchar end) uset_open; - void function (Handle) uset_close; - Handle function (wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_openPatternOptions; - uint function (Handle, wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_applyPattern; - uint function (Handle, wchar* result, uint resultCapacity, byte escapeUnprintable, inout UErrorCode e) uset_toPattern; - void function (Handle, wchar c) uset_add; - void function (Handle, Handle additionalSet) uset_addAll; - void function (Handle, wchar start, wchar end) uset_addRange; - void function (Handle, wchar* str, uint strLen) uset_addString; - void function (Handle, wchar c) uset_remove; - void function (Handle, wchar start, wchar end) uset_removeRange; - void function (Handle, wchar* str, uint strLen) uset_removeString; - void function (Handle) uset_complement; - void function (Handle) uset_clear; - byte function (Handle) uset_isEmpty; - byte function (Handle, wchar c) uset_contains; - byte function (Handle, wchar start, wchar end) uset_containsRange; - byte function (Handle, wchar* str, uint strLen) uset_containsString; - uint function (Handle) uset_size; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uset_open, "uset_open"}, - {cast(void**) &uset_close, "uset_close"}, - {cast(void**) &uset_openPatternOptions, "uset_openPatternOptions"}, - {cast(void**) &uset_applyPattern, "uset_applyPattern"}, - {cast(void**) &uset_toPattern, "uset_toPattern"}, - {cast(void**) &uset_add, "uset_add"}, - {cast(void**) &uset_addAll, "uset_addAll"}, - {cast(void**) &uset_addRange, "uset_addRange"}, - {cast(void**) &uset_addString, "uset_addString"}, - {cast(void**) &uset_remove, "uset_remove"}, - {cast(void**) &uset_removeRange, "uset_removeRange"}, - {cast(void**) &uset_removeString, "uset_removeString"}, - {cast(void**) &uset_complement, "uset_complement"}, - {cast(void**) &uset_clear, "uset_clear"}, - {cast(void**) &uset_isEmpty, "uset_isEmpty"}, - {cast(void**) &uset_contains, "uset_contains"}, - {cast(void**) &uset_containsRange, "uset_containsRange"}, - {cast(void**) &uset_containsString, "uset_containsString"}, - {cast(void**) &uset_size, "uset_size"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UString.d --- a/base/src/java/mangoicu/UString.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1508 +0,0 @@ -/******************************************************************************* - - @file UString.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, October 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UString; - -private import java.mangoicu.ICU, - java.mangoicu.UChar, - java.mangoicu.ULocale; -import java.lang.util; -/******************************************************************************* - -*******************************************************************************/ - -private extern (C) void memmove (void* dst, void* src, uint bytes); - -/******************************************************************************* - - Bind to the IReadable and IWritable interfaces if we're building - along with the mango.io package - -*******************************************************************************/ - -version=Isolated; -version (Isolated) - { - private interface ITextOther {} - private interface IStringOther {} - } - else - { - private import java.mangoicu.UMango; - - private import mango.io.model.IReader, - mango.io.model.IWriter; - - private interface ITextOther : IWritable {} - private interface IStringOther : IReadable {} - } - - -/******************************************************************************* - - UString is a string class that stores Unicode characters directly - and provides similar functionality as the Java String class. - - In ICU, a Unicode string consists of 16-bit Unicode code units. - A Unicode character may be stored with either one code unit — - which is the most common case — or with a matched pair of - special code units ("surrogates"). The data type for code units - is UChar. - - For single-character handling, a Unicode character code point is - a value in the range 0..0x10ffff. ICU uses the UChar32 type for - code points. - - Indexes and offsets into and lengths of strings always count code - units, not code points. This is the same as with multi-byte char* - strings in traditional string handling. Operations on partial - strings typically do not test for code point boundaries. If necessary, - the user needs to take care of such boundaries by testing for the code - unit values or by using functions like getChar32Start() - and getChar32Limit() - - UString methods are more lenient with regard to input parameter values - than other ICU APIs. In particular: - - - If indexes are out of bounds for a UString object (< 0 or > length) - then they are "pinned" to the nearest boundary. - - - If primitive string pointer values (e.g., const wchar* or char*) for - input strings are null, then those input string parameters are treated - as if they pointed to an empty string. However, this is not the case - for char* parameters for charset names or other IDs. - -*******************************************************************************/ - -class UString : UStringView, IStringOther -{ - alias opCat append; - alias opIndexAssign setCharAt; - - /*********************************************************************** - - Create an empty UString with the specified available space - - ***********************************************************************/ - - this (uint space = 0) - { - content.length = space; - mutable = true; - } - - /*********************************************************************** - - Create a UString upon the provided content. If said content - is immutable (read-only) then you might consider setting the - 'mutable' parameter to false. Doing so will avoid allocating - heap-space for the content until it is modified. - - ***********************************************************************/ - - this (CString16 content, bool mutable = true) - { - setTo (content, mutable); - } - - /*********************************************************************** - - Create a UString via the content of a UStringView. Note that the - default is to assume the content is immutable (read-only). - - ***********************************************************************/ - - this (UStringView other, bool mutable = false) - { - this (other.get, mutable); - } - - /*********************************************************************** - - Create a UString via the content of a UString. If said content - is immutable (read-only) then you might consider setting the - 'mutable' parameter to false. Doing so will avoid allocating - heap-space for the content until it is modified via UString - methods. - - ***********************************************************************/ - - this (UString other, bool mutable = true) - { - this (other.get, mutable); - } - - /*********************************************************************** - - Support for reading content via the IO system - - ***********************************************************************/ - - version (Isolated){} - else - { - /*************************************************************** - - Internal adapter to handle loading and conversion - of UString content. Once constructed, this may be - used as the target for an IReader. Alternatively, - invoke the load() method with an IBuffer of choice. - - ***************************************************************/ - - class UStringDecoder : StringDecoder16 - { - private UString s; - - // construct a decoder on the given UString - this (UConverter c, uint bytes, UString s) - { - super (c, bytes); - this.s = s; - } - - // IReadable adapter to perform the conversion - protected void read (IReader r) - { - load (r.buffer); - } - - // read from the provided buffer until we - // either have all the content, or an eof - // condition throws an exception. - package void load (IBuffer b) - { - uint produced = super.read (b, s.content); - while (toGo) - { - s.expand (toGo); - produced += super.read (b, s.content[produced..$]); - } - s.len = produced; - } - } - - /*************************************************************** - - Another constructor for loading known content length - into a UString. - - ***************************************************************/ - - this (IBuffer buffer, uint contentLength, UConverter cvt) - { - this (contentLength); - UStringDecoder sd = new UStringDecoder (cvt, contentLength, this); - sd.load (buffer); - } - - /*************************************************************** - - Read as many bytes from the input as is necessary - to produce the expected number of wchar elements. - This uses the default wchar handler, which can be - altered by binding a StringDecoder to the IReader - in use (see UMango for details). - - We're mutable, so ensure we don't mess with the - IO buffers. Interestingly, changing the length - of a D array will account for slice assignments - (it checks the pointer to see if it's a starting - point in the pool). Unfortunately, that doesn't - catch the case where a slice starts at offset 0, - which is where IBuffer slices may come from. - - To be safe, we ask the allocator in use whether - the content it provided can be mutated or not. - Note that this is not necessary for UStringView, since - that is a read-only construct. - - ***************************************************************/ - - void read (IReader r) - { - r.get (content); - len = content.length; - mutable = r.getAllocator.isMutable (content); - } - - /*************************************************************** - - Return a streaming decoder that can be used to - populate this UString with a specified number of - input bytes. - - This differs from the above read() method in the - way content is read: in the above case, exactly - the specified number of wchar elements will be - converter from the input, whereas in this case - a variable number of wchar elements are converted - until 'bytes' have been read from the input. This - is useful in those cases where the original number - of elements has been lost, and only the resultant - converted byte-count remains (a la HTTP). - - The returned StringDecoder is one-shot only. You may - reuse it (both the converter and the byte count) via - its reset() method. - - One applies the resultant converter directly with an - IReader like so: - - @code - UString s = ...; - IReader r = ...; - - // r >> s.createDecoder(cvt, bytes); - r.get (s.createDecoder(cvt, bytes)); - @endcode - - which will read the specified number of bytes from - the input and convert them to an appropriate number - of wchars within the UString. - - ***************************************************************/ - - StringDecoder createDecoder (UConverter c, uint bytes) - { - return new UStringDecoder (c, bytes, this); - } - } - - /*********************************************************************** - - Append text to this UString - - ***********************************************************************/ - - UString opCat (UStringView other) - { - return opCat (other.get); - } - - /*********************************************************************** - - Append partial text to this UString - - ***********************************************************************/ - - UString opCat (UStringView other, uint start, uint len=uint.max) - { - other.pinIndices (start, len); - return opCat (other.content [start..start+len]); - } - - /*********************************************************************** - - Append a single character to this UString - - ***********************************************************************/ - - UString opCat (wchar chr) - { - return opCat (&chr, 1); - } - - /*********************************************************************** - - Append text to this UString - - ***********************************************************************/ - - UString opCat (wchar[] chars) - { - return opCat (chars.ptr, chars.length); - } - - /*********************************************************************** - - Converts a sequence of UTF-8 bytes to UChars (UTF-16) - - ***********************************************************************/ - - UString opCat (char[] chars) - { - uint fmt (wchar* dst, uint len, inout UErrorCode e) - { - uint x; - - u_strFromUTF8 (dst, len, &x, chars.ptr, chars.length, e); - return x; - } - - expand (chars.length); - return format (&fmt, "failed to append UTF char[]"); - } - - /*********************************************************************** - - Set a section of this UString to the specified character - - ***********************************************************************/ - - UString setTo (wchar chr, uint start=0, uint len=uint.max) - { - pinIndices (start, len); - if (! mutable) - realloc (); - content [start..start+len] = chr; - return this; - } - - /*********************************************************************** - - Set the content to the provided array. Parameter 'mutable' - specifies whether the given array is likely to change. If - not, the array is aliased until such time this UString is - altered. - - ***********************************************************************/ - - UString setTo (CString16 chars, bool mutable = true) - { - len = chars.length; - if ((this.mutable = mutable) == true) - content = chars.dup; - else - content = cast(wchar[])chars; - return this; - } - - /*********************************************************************** - - Replace the content of this UString. If the new content - is immutable (read-only) then you might consider setting the - 'mutable' parameter to false. Doing so will avoid allocating - heap-space for the content until it is modified via one of - these methods. - - ***********************************************************************/ - - UString setTo (UStringView other, bool mutable = true) - { - return setTo (other.get, mutable); - } - - /*********************************************************************** - - Replace the content of this UString. If the new content - is immutable (read-only) then you might consider setting the - 'mutable' parameter to false. Doing so will avoid allocating - heap-space for the content until it is modified via one of - these methods. - - ***********************************************************************/ - - UString setTo (UStringView other, uint start, uint len, bool mutable = true) - { - other.pinIndices (start, len); - return setTo (other.content [start..start+len], mutable); - } - - /*********************************************************************** - - Replace the character at the specified location. - - ***********************************************************************/ - - final UString opIndexAssign (wchar chr, uint index) - in { - if (index >= len) - exception ("index of out bounds"); - } - body - { - if (! mutable) - realloc (); - content [index] = chr; - return this; - } - - /*********************************************************************** - - Remove a piece of this UString. - - ***********************************************************************/ - - UString remove (uint start, uint length=uint.max) - { - pinIndices (start, length); - if (length) - if (start >= len) - truncate (start); - else - { - if (! mutable) - realloc (); - - uint i = start + length; - memmove (&content[start], &content[i], (len-i) * wchar.sizeof); - len -= length; - } - return this; - } - - /*********************************************************************** - - Truncate the length of this UString. - - ***********************************************************************/ - - UString truncate (uint length=0) - { - if (length <= len) - len = length; - return this; - } - - /*********************************************************************** - - Insert leading spaces in this UString - - ***********************************************************************/ - - UString padLeading (uint count, wchar padChar = 0x0020) - { - expand (count); - memmove (&content[count], content.ptr, len * wchar.sizeof); - len += count; - return setTo (padChar, 0, count); - } - - /*********************************************************************** - - Append some trailing spaces to this UString. - - ***********************************************************************/ - - UString padTrailing (uint length, wchar padChar = 0x0020) - { - expand (length); - len += length; - return setTo (padChar, len-length, length); - } - - /*********************************************************************** - - Check for available space within the buffer, and expand - as necessary. - - ***********************************************************************/ - - package final void expand (uint count) - { - if ((len + count) > content.length) - realloc (count); - } - - /*********************************************************************** - - Allocate memory due to a change in the content. We handle - the distinction between mutable and immutable here. - - ***********************************************************************/ - - private final void realloc (uint count = 0) - { - uint size = (content.length + count + 63) & ~63; - - if (mutable) - content.length = size; - else - { - mutable = true; - wchar[] x = content; - content = new wchar [size]; - if (len) - content[0..len] = x; - } - } - - /*********************************************************************** - - Internal method to support UString appending - - ***********************************************************************/ - - private final UString opCat (wchar* chars, uint count) - { - expand (count); - content[len..len+count] = chars[0..count]; - len += count; - return this; - } - - /*********************************************************************** - - Internal method to support formatting into this UString. - This is used by many of the ICU wrappers to append content - into a UString. - - ***********************************************************************/ - - typedef uint delegate (wchar* dst, uint len, inout UErrorCode e) Formatter; - - package final UString format (Formatter format, CString msg) - { - UErrorCode e; - uint length; - - while (true) - { - e = e.OK; - length = format (&content[len], content.length - len, e); - if (e == e.BufferOverflow) - expand (length); - else - break; - } - - if (isError (e)) - exception (msg); - - len += length; - return this; - } -} - - -/******************************************************************************* - - Immutable (read-only) text -- use UString for mutable strings. - -*******************************************************************************/ - -class UStringView : ICU, ITextOther -{ - alias opIndex charAt; - - // the core of the UStringView and UString attributes. The name 'len' - // is used rather than the more obvious 'length' since there is - // a collision with the silly array[length] syntactic sugar ... - package uint len; - package wchar[] content; - - // this should probably be in UString only, but there seems to - // be a compiler bug where it doesn't get initialised correctly, - // and it's perhaps useful to have here for when a UString is - // passed as a UStringView argument. - private bool mutable; - - // toFolded() argument - public enum CaseOption - { - Default = 0, - SpecialI = 1 - } - - /*********************************************************************** - - Hidden constructor - - ***********************************************************************/ - - private this () - { - } - - /*********************************************************************** - - Construct read-only wrapper around the given content - - ***********************************************************************/ - - this (wchar[] content) - { - this.content = content; - this.len = content.length; - } - - /*********************************************************************** - - Support for writing via the Mango IO subsystem - - ***********************************************************************/ - - version (Isolated){} - else - { - void write (IWriter w) - { - w.put (get); - } - } - - /*********************************************************************** - - Return the valid content from this UStringView - - ***********************************************************************/ - - final package wchar[] get () - { - return content [0..len]; - } - - /*********************************************************************** - - Is this UStringView equal to another? - - ***********************************************************************/ - - final override equals_t opEquals (Object o) - { - UStringView other = cast(UStringView) o; - - if (other) - return (other is this || compare (other) == 0); - return 0; - } - - /*********************************************************************** - - Compare this UStringView to another. - - ***********************************************************************/ - - final override int opCmp (Object o) - { - UStringView other = cast(UStringView) o; - - if (other is this) - return 0; - else - if (other) - return compare (other); - return 1; - } - - /*********************************************************************** - - Hash this UStringView - - ***********************************************************************/ - - final override uint toHash () - { - return typeid(wchar[]).getHash (&content[0..len]); - } - - /*********************************************************************** - - Clone this UStringView into a UString - - ***********************************************************************/ - - final UString copy () - { - return new UString (content); - } - - /*********************************************************************** - - Clone a section of this UStringView into a UString - - ***********************************************************************/ - - final UString extract (uint start, uint len=uint.max) - { - pinIndices (start, len); - return new UString (content[start..start+len]); - } - - /*********************************************************************** - - Count unicode code points in the length UChar code units of - the string. A code point may occupy either one or two UChar - code units. Counting code points involves reading all code - units. - - ***********************************************************************/ - - final uint codePoints (uint start=0, uint length=uint.max) - { - pinIndices (start, length); - return u_countChar32 (&content[start], length); - } - - /*********************************************************************** - - Return an indication whether or not there are surrogate pairs - within the string. - - ***********************************************************************/ - - final bool hasSurrogates (uint start=0, uint length=uint.max) - { - pinIndices (start, length); - return codePoints (start, length) != length; - } - - /*********************************************************************** - - Return the character at the specified position. - - ***********************************************************************/ - - final wchar opIndex (uint index) - in { - if (index >= len) - exception ("index of out bounds"); - } - body - { - return content [index]; - } - - /*********************************************************************** - - Return the length of the valid content - - ***********************************************************************/ - - final uint length () - { - return len; - } - - /*********************************************************************** - - The comparison can be done in code unit order or in code - point order. They differ only in UTF-16 when comparing - supplementary code points (U+10000..U+10ffff) to BMP code - points near the end of the BMP (i.e., U+e000..U+ffff). - - In code unit order, high BMP code points sort after - supplementary code points because they are stored as - pairs of surrogates which are at U+d800..U+dfff. - - ***********************************************************************/ - - final int compare (UStringView other, bool codePointOrder=false) - { - return compare (other.get, codePointOrder); - } - - /*********************************************************************** - - The comparison can be done in code unit order or in code - point order. They differ only in UTF-16 when comparing - supplementary code points (U+10000..U+10ffff) to BMP code - points near the end of the BMP (i.e., U+e000..U+ffff). - - In code unit order, high BMP code points sort after - supplementary code points because they are stored as - pairs of surrogates which are at U+d800..U+dfff. - - ***********************************************************************/ - - final int compare (wchar[] other, bool codePointOrder=false) - { - return u_strCompare (content.ptr, len, other.ptr, other.length, codePointOrder); - } - - /*********************************************************************** - - The comparison can be done in UTF-16 code unit order or - in code point order. They differ only when comparing - supplementary code points (U+10000..U+10ffff) to BMP code - points near the end of the BMP (i.e., U+e000..U+ffff). - - In code unit order, high BMP code points sort after - supplementary code points because they are stored as - pairs of surrogates which are at U+d800..U+dfff. - - ***********************************************************************/ - - final int compareFolded (UStringView other, CaseOption option = CaseOption.Default) - { - return compareFolded (other.content, option); - } - - /*********************************************************************** - - The comparison can be done in UTF-16 code unit order or - in code point order. They differ only when comparing - supplementary code points (U+10000..U+10ffff) to BMP code - points near the end of the BMP (i.e., U+e000..U+ffff). - - In code unit order, high BMP code points sort after - supplementary code points because they are stored as - pairs of surrogates which are at U+d800..U+dfff. - - ***********************************************************************/ - - final int compareFolded (wchar[] other, CaseOption option = CaseOption.Default) - { - return compareFolded (get, other, option); - } - - /*********************************************************************** - - Does this UStringView start with specified string? - - ***********************************************************************/ - - final bool startsWith (UStringView other) - { - return startsWith (other.get); - } - - /*********************************************************************** - - Does this UStringView start with specified string? - - ***********************************************************************/ - - final bool startsWith (wchar[] chars) - { - if (len >= chars.length) - return compareFolded (content[0..chars.length], chars) == 0; - return false; - } - - /*********************************************************************** - - Does this UStringView end with specified string? - - ***********************************************************************/ - - final bool endsWith (UStringView other) - { - return endsWith (other.get); - } - - /*********************************************************************** - - Does this UStringView end with specified string? - - ***********************************************************************/ - - final bool endsWith (wchar[] chars) - { - if (len >= chars.length) - return compareFolded (content[len-chars.length..len], chars) == 0; - return false; - } - - /*********************************************************************** - - Find the first occurrence of a BMP code point in a string. - A surrogate code point is found only if its match in the - text is not part of a surrogate pair. - - ***********************************************************************/ - - final uint indexOf (wchar c, uint start=0) - { - pinIndex (start); - wchar* s = u_memchr (&content[start], c, len-start); - if (s) - return s - content.ptr; - return uint.max; - } - - /*********************************************************************** - - Find the first occurrence of a substring in a string. - - The substring is found at code point boundaries. That means - that if the substring begins with a trail surrogate or ends - with a lead surrogate, then it is found only if these - surrogates stand alone in the text. Otherwise, the substring - edge units would be matched against halves of surrogate pairs. - - ***********************************************************************/ - - final uint indexOf (UStringView other, uint start=0) - { - return indexOf (other.get, start); - } - - /*********************************************************************** - - Find the first occurrence of a substring in a string. - - The substring is found at code point boundaries. That means - that if the substring begins with a trail surrogate or ends - with a lead surrogate, then it is found only if these - surrogates stand alone in the text. Otherwise, the substring - edge units would be matched against halves of surrogate pairs. - - ***********************************************************************/ - - final uint indexOf (wchar[] chars, uint start=0) - { - pinIndex (start); - wchar* s = u_strFindFirst (&content[start], len-start, chars.ptr, chars.length); - if (s) - return s - content.ptr; - return uint.max; - } - - /*********************************************************************** - - Find the last occurrence of a BMP code point in a string. - A surrogate code point is found only if its match in the - text is not part of a surrogate pair. - - ***********************************************************************/ - - final uint lastIndexOf (wchar c, uint start=uint.max) - { - pinIndex (start); - wchar* s = u_memrchr (content.ptr, c, start); - if (s) - return s - content.ptr; - return uint.max; - } - - /*********************************************************************** - - Find the last occurrence of a BMP code point in a string. - A surrogate code point is found only if its match in the - text is not part of a surrogate pair. - - ***********************************************************************/ - - final uint lastIndexOf (UStringView other, uint start=uint.max) - { - return lastIndexOf (other.get, start); - } - - /*********************************************************************** - - Find the last occurrence of a substring in a string. - - The substring is found at code point boundaries. That means - that if the substring begins with a trail surrogate or ends - with a lead surrogate, then it is found only if these - surrogates stand alone in the text. Otherwise, the substring - edge units would be matched against halves of surrogate pairs. - - ***********************************************************************/ - - final uint lastIndexOf (wchar[] chars, uint start=uint.max) - { - pinIndex (start); - wchar* s = u_strFindLast (content.ptr, start, chars.ptr, chars.length); - if (s) - return s - content.ptr; - return uint.max; - } - - /*********************************************************************** - - Lowercase the characters into a seperate UString. - - Casing is locale-dependent and context-sensitive. The - result may be longer or shorter than the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toLower (UString dst) - { - return toLower (dst, ULocale.Default); - } - - /*********************************************************************** - - Lowercase the characters into a seperate UString. - - Casing is locale-dependent and context-sensitive. The - result may be longer or shorter than the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toLower (UString dst, inout ULocale locale) - { - uint lower (wchar* dst, uint length, inout UErrorCode e) - { - return u_strToLower (dst, length, content.ptr, len, ICU.toString(locale.name), e); - } - - dst.expand (len + 32); - return dst.format (&lower, "toLower() failed"); - } - - /*********************************************************************** - - Uppercase the characters into a seperate UString. - - Casing is locale-dependent and context-sensitive. The - result may be longer or shorter than the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toUpper (UString dst) - { - return toUpper (dst, ULocale.Default); - } - - /*********************************************************************** - - Uppercase the characters into a seperate UString. - - Casing is locale-dependent and context-sensitive. The - result may be longer or shorter than the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toUpper (UString dst, inout ULocale locale) - { - uint upper (wchar* dst, uint length, inout UErrorCode e) - { - return u_strToUpper (dst, length, content.ptr, len, ICU.toString(locale.name), e); - } - - dst.expand (len + 32); - return dst.format (&upper, "toUpper() failed"); - } - - /*********************************************************************** - - Case-fold the characters into a seperate UString. - - Case-folding is locale-independent and not context-sensitive, - but there is an option for whether to include or exclude - mappings for dotted I and dotless i that are marked with 'I' - in CaseFolding.txt. The result may be longer or shorter than - the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toFolded (UString dst, CaseOption option = CaseOption.Default) - { - uint fold (wchar* dst, uint length, inout UErrorCode e) - { - return u_strFoldCase (dst, length, content.ptr, len, option, e); - } - - dst.expand (len + 32); - return dst.format (&fold, "toFolded() failed"); - } - - /*********************************************************************** - - Converts a sequence of wchar (UTF-16) to UTF-8 bytes. If - the output array is not provided, an array of appropriate - size will be allocated and returned. Where the output is - provided, it must be large enough to hold potentially four - bytes per character for surrogate-pairs or three bytes per - character for BMP only. Consider using UConverter where - streaming conversions are required. - - Returns an array slice representing the valid UTF8 content. - - ***********************************************************************/ - - final char[] toUtf8 (char[] dst = null) - { - uint x; - UErrorCode e; - - if (! cast(char*) dst) - dst = new char[len * 4]; - - u_strToUTF8 (dst.ptr, dst.length, &x, content.ptr, len, e); - testError (e, "failed to convert to UTF8"); - return dst [0..x]; - } - - /*********************************************************************** - - Remove leading and trailing whitespace from this UStringView. - Note that we slice the content to remove leading space. - - ***********************************************************************/ - - UStringView trim () - { - wchar c; - uint i = len; - - // cut off trailing white space - while (i && ((c = charAt(i-1)) == 0x20 || UChar.isWhiteSpace (c))) - --i; - len = i; - - // now remove leading whitespace - for (i=0; i < len && ((c = charAt(i)) == 0x20 || UChar.isWhiteSpace (c)); ++i) {} - if (i) - { - len -= i; - content = content[i..$-i]; - } - - return this; - } - - /*********************************************************************** - - Unescape a string of characters and write the resulting - Unicode characters to the destination buffer. The following - escape sequences are recognized: - - uhhhh 4 hex digits; h in [0-9A-Fa-f] - Uhhhhhhhh 8 hex digits - xhh 1-2 hex digits - x{h...} 1-8 hex digits - ooo 1-3 octal digits; o in [0-7] - cX control-X; X is masked with 0x1F - - as well as the standard ANSI C escapes: - - a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, - v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, - \\" =U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C - - Anything else following a backslash is generically escaped. - For example, "[a\\-z]" returns "[a-z]". - - If an escape sequence is ill-formed, this method returns an - empty string. An example of an ill-formed sequence is "\\u" - followed by fewer than 4 hex digits. - - ***********************************************************************/ - - final UString unEscape () - { - UString result = new UString (len); - for (uint i=0; i < len;) - { - dchar c = charAt(i++); - if (c == 0x005C) - { - // bump index ... - c = u_unescapeAt (&_charAt, &i, len, cast(void*) this); - - // error? - if (c == 0xFFFFFFFF) - { - result.truncate (); // return empty string - break; // invalid escape sequence - } - } - result.append (c); - } - return result; - } - - /*********************************************************************** - - Is this code point a surrogate (U+d800..U+dfff)? - - ***********************************************************************/ - - final static bool isSurrogate (wchar c) - { - return (c & 0xfffff800) == 0xd800; - } - - /*********************************************************************** - - Is this code unit a lead surrogate (U+d800..U+dbff)? - - ***********************************************************************/ - - final static bool isLeading (wchar c) - { - return (c & 0xfffffc00) == 0xd800; - } - - /*********************************************************************** - - Is this code unit a trail surrogate (U+dc00..U+dfff)? - - ***********************************************************************/ - - final static bool isTrailing (wchar c) - { - return (c & 0xfffffc00) == 0xdc00; - } - - /*********************************************************************** - - Adjust a random-access offset to a code point boundary - at the start of a code point. If the offset points to - the trail surrogate of a surrogate pair, then the offset - is decremented. Otherwise, it is not modified. - - ***********************************************************************/ - - final uint getCharStart (uint i) - in { - if (i >= len) - exception ("index of out bounds"); - } - body - { - if (isTrailing (content[i]) && i && isLeading (content[i-1])) - --i; - return i; - } - - /*********************************************************************** - - Adjust a random-access offset to a code point boundary - after a code point. If the offset is behind the lead - surrogate of a surrogate pair, then the offset is - incremented. Otherwise, it is not modified. - - ***********************************************************************/ - - final uint getCharLimit (uint i) - in { - if (i >= len) - exception ("index of out bounds"); - } - body - { - if (i && isLeading(content[i-1]) && isTrailing (content[i])) - ++i; - return i; - } - - /*********************************************************************** - - Callback for C unescapeAt() function - - ***********************************************************************/ - - extern (C) - { - typedef wchar function (uint offset, void* context) CharAt; - - private static wchar _charAt (uint offset, void* context) - { - return (cast(UString) context).charAt (offset); - } - } - - /*********************************************************************** - - Pin the given index to a valid position. - - ***********************************************************************/ - - final private void pinIndex (inout uint x) - { - if (x > len) - x = len; - } - - /*********************************************************************** - - Pin the given index and length to a valid position. - - ***********************************************************************/ - - final private void pinIndices (inout uint start, inout uint length) - { - if (start > len) - start = len; - - if (length > (len - start)) - length = len - start; - } - - /*********************************************************************** - - Helper for comparison methods - - ***********************************************************************/ - - final private int compareFolded (wchar[] s1, wchar[] s2, CaseOption option = CaseOption.Default) - { - UErrorCode e; - - int x = u_strCaseCompare (s1.ptr, s1.length, s2.ptr, s2.length, option, e); - testError (e, "compareFolded failed"); - return x; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - wchar* function (wchar*, uint, wchar*, uint) u_strFindFirst; - wchar* function (wchar*, uint, wchar*, uint) u_strFindLast; - wchar* function (wchar*, wchar, uint) u_memchr; - wchar* function (wchar*, wchar, uint) u_memrchr; - int function (wchar*, uint, wchar*, uint, bool) u_strCompare; - int function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strCaseCompare; - dchar function (CharAt, uint*, uint, void*) u_unescapeAt; - uint function (wchar*, uint) u_countChar32; - uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToUpper; - uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToLower; - uint function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strFoldCase; - wchar* function (wchar*, uint, uint*, char*, uint, inout UErrorCode) u_strFromUTF8; - char* function (char*, uint, uint*, wchar*, uint, inout UErrorCode) u_strToUTF8; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &u_strFindFirst, "u_strFindFirst"}, - {cast(void**) &u_strFindLast, "u_strFindLast"}, - {cast(void**) &u_memchr, "u_memchr"}, - {cast(void**) &u_memrchr, "u_memrchr"}, - {cast(void**) &u_strCompare, "u_strCompare"}, - {cast(void**) &u_strCaseCompare, "u_strCaseCompare"}, - {cast(void**) &u_unescapeAt, "u_unescapeAt"}, - {cast(void**) &u_countChar32, "u_countChar32"}, - {cast(void**) &u_strToUpper, "u_strToUpper"}, - {cast(void**) &u_strToLower, "u_strToLower"}, - {cast(void**) &u_strFoldCase, "u_strFoldCase"}, - {cast(void**) &u_strFromUTF8, "u_strFromUTF8"}, - {cast(void**) &u_strToUTF8, "u_strToUTF8"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - //test (); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } - - /*********************************************************************** - - ***********************************************************************/ - - //private static void test() - //{ - // UString s = new UString (r"aaaqw \uabcd eaaa"); - // CString16 x = "dssfsdff"; - // s ~ x ~ x; - // wchar c = s[3]; - // s[3] = 'Q'; - // int y = s.indexOf ("qwe"); - // s.unEscape (); - // s.toUpper (new UString); - // s.padLeading(2).padTrailing(2).trim(); - //} -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UStringPrep.d --- a/base/src/java/mangoicu/UStringPrep.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,249 +0,0 @@ -/******************************************************************************* - - @file UStringPrep.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UStringPrep; - -private import java.mangoicu.ICU, - java.mangoicu.UString; - -/******************************************************************************* - - StringPrep API implements the StingPrep framework as described - by RFC 3454. - - StringPrep prepares Unicode strings for use in network protocols. - Profiles of StingPrep are set of rules and data according to with - the Unicode Strings are prepared. Each profiles contains tables - which describe how a code point should be treated. The tables are - broadly classied into - - - Unassinged Table: Contains code points that are unassigned - in the Unicode Version supported by StringPrep. Currently - RFC 3454 supports Unicode 3.2. - - - Prohibited Table: Contains code points that are prohibted - from the output of the StringPrep processing function. - - - Mapping Table: Contains code ponts that are deleted from the - output or case mapped. - - The procedure for preparing Unicode strings: - - 1. Map: For each character in the input, check if it has a mapping - and, if so, replace it with its mapping. - - 2. Normalize: Possibly normalize the result of step 1 using Unicode - normalization. - - 3. Prohibit: Check for any characters that are not allowed in the - output. If any are found, return an error. - - 4. Check bidi: Possibly check for right-to-left characters, and if - any are found, make sure that the whole string satisfies the - requirements for bidirectional strings. If the string does not - satisfy the requirements for bidirectional strings, return an - error. - - See - this page for full details. - -*******************************************************************************/ - -class UStringPrep : ICU -{ - private Handle handle; - - enum Options - { - Strict, - Lenient - } - - - /*********************************************************************** - - Creates a StringPrep profile from the data file. - - path string containing the full path pointing - to the directory where the profile reside - followed by the package name e.g. - "/usr/resource/my_app/profiles/mydata" on - a Unix system. if NULL, ICU default data - files will be used. - - fileName name of the profile file to be opened - - ***********************************************************************/ - - this (char[] path, char[] filename) - { - UErrorCode e; - - handle = usprep_open (toString(path), toString(filename), e); - testError (e, "failed to open string-prep"); - } - - /*********************************************************************** - - Close this profile - - ***********************************************************************/ - - ~this () - { - usprep_close (handle); - } - - /*********************************************************************** - - Prepare the input buffer - - This operation maps, normalizes(NFKC), checks for prohited - and BiDi characters in the order defined by RFC 3454 depending - on the options specified in the profile - - ***********************************************************************/ - - void prepare (UStringView src, UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return usprep_prepare (handle, src.get.ptr, src.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to prepare text"); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (char*, char*, inout UErrorCode) usprep_open; - void function (Handle) usprep_close; - uint function (Handle, wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) usprep_prepare; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &usprep_open, "usprep_open"}, - {cast(void**) &usprep_close, "usprep_close"}, - {cast(void**) &usprep_prepare, "usprep_prepare"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UText.d --- a/base/src/java/mangoicu/UText.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,249 +0,0 @@ -/******************************************************************************* - - @file UString.d - - Copyright (c) 2008 Frank Benoit - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, July 2008 - @author Frank - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ -module java.mangoicu.UText; - -import java.mangoicu.ICU; - -struct UText { - // UText private fields -- start - private { - uint magic = UTEXT_MAGIC; - int flags = 0; - int providerProperties = 0; - int sizeOfStruct = UText.sizeof; - long chunkNativeLimit = 0; - int extraSize = 0; - int nativeIndexingLimit = 0; - long chunkNativeStart = 0; - int chunkOffset = 0; - int chunkLength = 0; - wchar* chunkContents = null; - void* pFuncs = null; - void* pExtra = null; - void* context = null; - void* p = null; - void* q = null; - void* r = null; - void* privP = null; - long a = 0; - int b = 0; - int c = 0; - long privA = 0; - int privB = 0; - int privC = 0; - } // UText private fields -- end - // do not add any non-static fields - - private enum { - UTEXT_MAGIC = 0x345ad82c - } - void close(){ - version(D_Version2){ - utext_close(&this); - } else { - utext_close(this); - } - } - private void ensureStatusOk( ICU.UErrorCode status ){ - if( status !is ICU.UErrorCode.OK ){ - throw new Exception( "ICU Exception" ); - } - } - void openUTF8( char[] str ){ - auto status = ICU.UErrorCode.OK; - version(D_Version2){ - utext_openUTF8(&this, str.ptr, str.length, status ); - } else { - utext_openUTF8(this, str.ptr, str.length, status ); - } - ensureStatusOk( status ); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - static extern(System){ - UText * function(UText* ut) utext_close; - UText * function(UText* ut, char *s, long length, inout ICU.UErrorCode status) utext_openUTF8; -// UText * function(UText* ut, UChar *s, int64_t length, inout UErrorCode status) utext_openUChars; -// UText * function(UText* ut, U_NAMESPACE_QUALIFIER UnicodeString *s, inout UErrorCode status) utext_openUnicodeString; -// UText * function(UText* ut, U_NAMESPACE_QUALIFIER UnicodeString *s, inout UErrorCode status) utext_openConstUnicodeString; -// UText * function(UText* ut, U_NAMESPACE_QUALIFIER Replaceable *rep, inout UErrorCode status) utext_openReplaceable; -// UText * function(UText* ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, inout UErrorCode status) utext_openCharacterIterator; -// UText * function(UText* ut, UText *src, UBool deep, UBool readOnly, inout UErrorCode status) utext_clone; -// UBool function(const UText *a, const UText *b) utext_equals; -// int64_t function(UText* ut) utext_nativeLength; -// UBool function(UText* ut) utext_isLengthExpensive; -// UChar32 function(UText* ut, int64_t nativeIndex) utext_char32At; -// UChar32 function(UText* ut) utext_current32; -// UChar32 function(UText* ut) utext_next32; -// UChar32 function(UText* ut) utext_previous32; -// UChar32 function(UText* ut, int64_t nativeIndex) utext_next32From; -// UChar32 function(UText* ut, int64_t nativeIndex) utext_previous32From; -// int64_t function(UText* ut) utext_getNativeIndex; -// void function(UText* ut, int64_t nativeIndex) utext_setNativeIndex; -// UBool function(UText* ut, int delta) utext_moveIndex32; -// int64_t function(UText* ut) utext_getPreviousNativeIndex; -// int function(UText* ut, int64_t nativeStart, int64_t nativeLimit, -// UChar *dest, int destCapacity, -// inout UErrorCode status) utext_extract; -// UBool function(UText* ut) utext_isWritable; -// UBool function(UText* ut) utext_hasMetaData; -// int function(UText* ut, -// int64_t nativeStart, int64_t nativeLimit, -// UChar *replacementText, int replacementLength, -// inout UErrorCode status) utext_replace; -// void function(UText* ut, -// int64_t nativeStart, int64_t nativeLimit, -// int64_t destIndex, -// UBool move, -// inout UErrorCode status) utext_copy; -// void function(UText* ut) utext_freeze; -// UText * function(UText* ut, int extraSpace, inout UErrorCode status) utext_setup; - } - - /*********************************************************************** - - ***********************************************************************/ - static FunctionLoader.Bind[] targets = [ - {cast(void**) &utext_close, "utext_close"}, - {cast(void**) &utext_openUTF8, "utext_openUTF8"}, -// {cast(void**) &utext_openUChars, "utext_openUChars"}, -// {cast(void**) &utext_openUnicodeString, "utext_openUnicodeString"}, -// {cast(void**) &utext_openConstUnicodeString, "utext_openConstUnicodeString"}, -// {cast(void**) &utext_openReplaceable, "utext_openReplaceable"}, -// {cast(void**) &utext_openCharacterIterator, "utext_openCharacterIterator"}, -// {cast(void**) &utext_clone, "utext_clone"}, -// {cast(void**) &utext_equals, "utext_equals"}, -// {cast(void**) &utext_nativeLength, "utext_nativeLength"}, -// {cast(void**) &utext_isLengthExpensive, "utext_isLengthExpensive"}, -// {cast(void**) &utext_char32At, "utext_char32At"}, -// {cast(void**) &utext_current32, "utext_current32"}, -// {cast(void**) &utext_next32, "utext_next32"}, -// {cast(void**) &utext_next32From, "utext_next32From"}, -// {cast(void**) &utext_previous32, "utext_previous32"}, -// {cast(void**) &utext_previous32From, "utext_previous32From"}, -// {cast(void**) &utext_setNativeIndex, "utext_setNativeIndex"}, -// {cast(void**) &utext_moveIndex32, "utext_moveIndex32"}, -// {cast(void**) &utext_getPreviousNativeIndex, "utext_getPreviousNativeIndex"}, -// {cast(void**) &utext_extract, "utext_extract"}, -// {cast(void**) &utext_isWritable, "utext_isWritable"}, -// {cast(void**) &utext_hasMetaData, "utext_hasMetaData"}, -// {cast(void**) &utext_replace, "utext_replace"}, -// {cast(void**) &utext_copy, "utext_copy"}, -// {cast(void**) &utext_freeze, "utext_freeze"}, -// {cast(void**) &utext_setup, "utext_setup"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (ICU.icuuc, targets); - //test (); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } - -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UTimeZone.d --- a/base/src/java/mangoicu/UTimeZone.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,264 +0,0 @@ -/******************************************************************************* - - @file UTimeZone.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UTimeZone; - -private import java.mangoicu.ICU, - java.mangoicu.UString, - java.mangoicu.UEnumeration; -private import java.lang.util; - -/******************************************************************************* - - A representation of a TimeZone. Unfortunately, ICU does not expose - this as a seperate entity from the C-API, so we have to make do - with an approximation instead. - -*******************************************************************************/ - -struct UTimeZone -{ - public CString16 name; - - public static UTimeZone Default = {null}; - public static UTimeZone Gmt = {"Etc/GMT"}; - public static UTimeZone Greenwich = {"Etc/Greenwich"}; - public static UTimeZone Uct = {"Etc/UCT"}; - public static UTimeZone Utc = {"Etc/UTC"}; - public static UTimeZone Universal = {"Etc/Universal"}; - - public static UTimeZone GmtPlus0 = {"Etc/GMT+0"}; - public static UTimeZone GmtPlus1 = {"Etc/GMT+1"}; - public static UTimeZone GmtPlus2 = {"Etc/GMT+2"}; - public static UTimeZone GmtPlus3 = {"Etc/GMT+3"}; - public static UTimeZone GmtPlus4 = {"Etc/GMT+4"}; - public static UTimeZone GmtPlus5 = {"Etc/GMT+5"}; - public static UTimeZone GmtPlus6 = {"Etc/GMT+6"}; - public static UTimeZone GmtPlus7 = {"Etc/GMT+7"}; - public static UTimeZone GmtPlus8 = {"Etc/GMT+8"}; - public static UTimeZone GmtPlus9 = {"Etc/GMT+9"}; - public static UTimeZone GmtPlus10 = {"Etc/GMT+10"}; - public static UTimeZone GmtPlus11 = {"Etc/GMT+11"}; - public static UTimeZone GmtPlus12 = {"Etc/GMT+12"}; - - public static UTimeZone GmtMinus0 = {"Etc/GMT-0"}; - public static UTimeZone GmtMinus1 = {"Etc/GMT-1"}; - public static UTimeZone GmtMinus2 = {"Etc/GMT-2"}; - public static UTimeZone GmtMinus3 = {"Etc/GMT-3"}; - public static UTimeZone GmtMinus4 = {"Etc/GMT-4"}; - public static UTimeZone GmtMinus5 = {"Etc/GMT-5"}; - public static UTimeZone GmtMinus6 = {"Etc/GMT-6"}; - public static UTimeZone GmtMinus7 = {"Etc/GMT-7"}; - public static UTimeZone GmtMinus8 = {"Etc/GMT-8"}; - public static UTimeZone GmtMinus9 = {"Etc/GMT-9"}; - public static UTimeZone GmtMinus10 = {"Etc/GMT-10"}; - public static UTimeZone GmtMinus11 = {"Etc/GMT-11"}; - public static UTimeZone GmtMinus12 = {"Etc/GMT-12"}; - - /*********************************************************************** - - Get the default time zone. - - ***********************************************************************/ - - static void getDefault (inout UTimeZone zone) - { - uint format (wchar* dst, uint length, inout ICU.UErrorCode e) - { - return ucal_getDefaultTimeZone (dst, length, e); - } - - UString s = new UString(64); - s.format (&format, "failed to get default time zone"); - zone.name = s.get(); - } - - /*********************************************************************** - - Set the default time zone. - - ***********************************************************************/ - - static void setDefault (inout UTimeZone zone) - { - ICU.UErrorCode e; - - ucal_setDefaultTimeZone (ICU.toString (zone.name), e); - ICU.testError (e, "failed to set default time zone"); - } - - /*********************************************************************** - - Return the amount of time in milliseconds that the clock - is advanced during daylight savings time for the given - time zone, or zero if the time zone does not observe daylight - savings time - - ***********************************************************************/ - - static uint getDSTSavings (inout UTimeZone zone) - { - ICU.UErrorCode e; - - uint x = ucal_getDSTSavings (ICU.toString (zone.name), e); - ICU.testError (e, "failed to get DST savings"); - return x; - } - - - /********************************************************************** - - Iterate over the available timezone names - - **********************************************************************/ - - static int opApply (int delegate(inout wchar[] element) dg) - { - ICU.UErrorCode e; - wchar[] name; - int result; - - void* h = ucal_openTimeZones (e); - ICU.testError (e, "failed to open timeszone iterator"); - - UEnumeration zones = new UEnumeration (cast(UEnumeration.Handle) h); - while (zones.next(name) && (result = dg(name)) != 0) {} - delete zones; - return result; - } - - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - void* function (inout ICU.UErrorCode) ucal_openTimeZones; - uint function (wchar*, uint, inout ICU.UErrorCode) ucal_getDefaultTimeZone; - void function (wchar*, inout ICU.UErrorCode) ucal_setDefaultTimeZone; - uint function (wchar*, inout ICU.UErrorCode) ucal_getDSTSavings; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ucal_openTimeZones, "ucal_openTimeZones"}, - {cast(void**) &ucal_getDefaultTimeZone, "ucal_getDefaultTimeZone"}, - {cast(void**) &ucal_setDefaultTimeZone, "ucal_setDefaultTimeZone"}, - {cast(void**) &ucal_getDSTSavings, "ucal_getDSTSavings"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (ICU.icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 base/src/java/mangoicu/UTransform.d --- a/base/src/java/mangoicu/UTransform.d Sun Apr 19 12:22:47 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,239 +0,0 @@ -/******************************************************************************* - - @file UTransform.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module java.mangoicu.UTransform; - -private import java.mangoicu.ICU, - java.mangoicu.UString; - -/******************************************************************************* - - See - this page for full details. - -*******************************************************************************/ - -class UTransform : ICU -{ - private Handle handle; - - enum Direction - { - Forward, - Reverse - } - - - /*********************************************************************** - - ***********************************************************************/ - - this (UStringView id) - { - UErrorCode e; - - handle = utrans_openU (id.get.ptr, id.len, 0, null, 0, null, e); - testError (e, "failed to open ID transform"); - } - - /*********************************************************************** - - ***********************************************************************/ - - this (UStringView rule, Direction dir) - { - UErrorCode e; - - handle = utrans_openU (null, 0, dir, rule.get.ptr, rule.len, null, e); - testError (e, "failed to open rule-based transform"); - } - - /*********************************************************************** - - ***********************************************************************/ - - ~this () - { - utrans_close (handle); - } - - /*********************************************************************** - - ***********************************************************************/ - - UStringView getID () - { - uint len; - wchar *s = utrans_getUnicodeID (handle, len); - return new UStringView (s[0..len]); - } - - /*********************************************************************** - - ***********************************************************************/ - - UTransform setFilter (UStringView filter) - { - UErrorCode e; - - if (filter.length) - utrans_setFilter (handle, filter.get.ptr, filter.len, e); - else - utrans_setFilter (handle, null, 0, e); - - testError (e, "failed to set transform filter"); - return this; - } - - /*********************************************************************** - - ***********************************************************************/ - - UTransform execute (UString text) - { - UErrorCode e; - uint textLen = text.len; - - utrans_transUChars (handle, text.get.ptr, &textLen, text.content.length, 0, &text.len, e); - testError (e, "failed to execute transform"); - return this; - } - - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, uint, wchar*, uint, void*, inout UErrorCode) utrans_openU; - void function (Handle) utrans_close; - wchar* function (Handle, inout uint) utrans_getUnicodeID; - void function (Handle, wchar*, uint, inout UErrorCode) utrans_setFilter; - void function (Handle, wchar*, uint*, uint, uint, uint*, inout UErrorCode) utrans_transUChars; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &utrans_openU, "utrans_openU"}, - {cast(void**) &utrans_close, "utrans_close"}, - {cast(void**) &utrans_getUnicodeID, "utrans_getUnicodeID"}, - {cast(void**) &utrans_setFilter, "utrans_setFilter"}, - {cast(void**) &utrans_transUChars, "utrans_transUChars"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/ICU.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/ICU.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,695 @@ +/******************************************************************************* + + @file ICU.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version; October 2004 + Updated to ICU v3.2; March 2005 + + @author Kris + John Reimer + Anders F Bjorklund (Darwin patches) + + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.ICU; + +import java.lang.util; +/******************************************************************************* + + Library version identifiers + +*******************************************************************************/ + +version (ICU30) + { + private static const char[] ICULib = "30"; + private static const char[] ICUSig = "_3_0\0"; + } +version (ICU32) + { + private static const char[] ICULib = "32"; + private static const char[] ICUSig = "_3_2\0"; + } +version (ICU34) + { + private static const char[] ICULib = "34"; + private static const char[] ICUSig = "_3_4\0"; + } +version (ICU36) + { + private static const char[] ICULib = "36"; + private static const char[] ICUSig = "_3_6\0"; + } +else + { + private static const char[] ICULib = "38"; + private static const char[] ICUSig = "_3_8\0"; + } + +/******************************************************************************* + +*******************************************************************************/ + +private static extern (C) uint strlen (char *s); +private static extern (C) uint wcslen (wchar *s); + + +/******************************************************************************* + + Some low-level routines to help bind the ICU C-API to D. + +*******************************************************************************/ + +protected class ICU +{ + /*********************************************************************** + + The library names to load within the target environment + + ***********************************************************************/ + + version (Win32) + { + package static char[] icuuc = "icuuc"~ICULib~".dll"; + package static char[] icuin = "icuin"~ICULib~".dll"; + } + else + version (linux) + { + package static char[] icuuc = "libicuuc.so."~ICULib; + package static char[] icuin = "libicui18n.so."~ICULib; + } + else + version (darwin) + { + package static char[] icuuc = "libicuuc.dylib."~ICULib; + package static char[] icuin = "libicui18n.dylib."~ICULib; + } + else + { + static assert (false); + } + + /*********************************************************************** + + Use this for the primary argument-type to most ICU functions + + ***********************************************************************/ + + protected typedef void* Handle; + + /*********************************************************************** + + Parse-error filled in by several functions + + ***********************************************************************/ + + public struct ParseError + { + int line, + offset; + wchar[16] preContext, + postContext; + } + + /*********************************************************************** + + The binary form of a version on ICU APIs is an array of + four bytes + + ***********************************************************************/ + + public struct Version + { + ubyte[4] info; + } + + /*********************************************************************** + + ICU error codes (the ones which are referenced) + + ***********************************************************************/ + + package enum UErrorCode:int + { + OK, + BufferOverflow=15 + } + + /*********************************************************************** + + ***********************************************************************/ + + package static final bool isError (UErrorCode e) + { + return e > 0; + } + + /*********************************************************************** + + ***********************************************************************/ + + package static final void exception (CString msg) + { + throw new ICUException ( cast(String)msg); + } + + /*********************************************************************** + + ***********************************************************************/ + + package static final void testError (UErrorCode e, CString msg) + { + if (e > 0) + exception (msg); + } + + /*********************************************************************** + + ***********************************************************************/ + + package static final char* toString (CString string) + { + static char[] empty; + + if (! string.length) + return (string.ptr) ? empty.ptr : null; + +// if (* (&string[0] + string.length)) + { + // Need to make a copy + char[] copy = new char [string.length + 1]; + copy [0..string.length] = string; + copy [string.length] = 0; + string = copy; + } + return cast(char*)string.ptr; + } + + /*********************************************************************** + + ***********************************************************************/ + + package static final wchar* toString (CString16 string) + { + static wchar[] empty; + + if (! string.length) + return (string.ptr) ? empty.ptr : null; + +// if (* (&string[0] + string.length)) + { + // Need to make a copy + wchar[] copy = new wchar [string.length + 1]; + copy [0..string.length] = string; + copy [string.length] = 0; + string = copy; + } + return cast(wchar*)string.ptr; + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final uint length (char* s) + { + return strlen (s); + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final uint length (wchar* s) + { + return wcslen (s); + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final char[] toArray (char* s) + { + if (s) + return s[0..strlen (s)]; + return null; + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final wchar[] toArray (wchar* s) + { + if (s) + return s[0..wcslen (s)]; + return null; + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class ICUException : Exception +{ + /*********************************************************************** + + Construct exception with the provided text string + + ***********************************************************************/ + + this (String msg) + { + super (msg); + } +} + +/******************************************************************************* + +*******************************************************************************/ + +typedef void* UParseError; + + +/******************************************************************************* + + Function address loader for Win32 + +*******************************************************************************/ + +version (Win32) +{ + typedef void* HANDLE; + extern (Windows) HANDLE LoadLibraryA (char*); + extern (Windows) HANDLE GetProcAddress (HANDLE, char*); + extern (Windows) void FreeLibrary (HANDLE); + + /*********************************************************************** + + ***********************************************************************/ + + package static class FunctionLoader + { + /*************************************************************** + + ***************************************************************/ + + protected struct Bind + { + void** fnc; + CString name; + } + + /*************************************************************** + + ***************************************************************/ + + static final void* bind (char[] library, inout Bind[] targets) + { + HANDLE lib = LoadLibraryA (ICU.toString(library)); + + foreach (Bind b; targets) + { + auto name = b.name ~ ICUSig; + *b.fnc = GetProcAddress (lib, cast(char*)name.ptr); + if (*b.fnc) + {}// printf ("bound '%.*s'\n", name); + else + throw new Exception ( cast(String)("required " ~ name ~ " in library " ~ library)); + } + return lib; + } + + /*************************************************************** + + ***************************************************************/ + + static final void unbind (void* library) + { + version (CorrectedTeardown) + FreeLibrary (cast(HANDLE) library); + } + } +} + + +/******************************************************************************* + + 2004-11-26: Added Linux shared library support -- John Reimer + +*******************************************************************************/ + +else version (linux) +{ + //Tell build to link with dl library + version(build) { pragma(link, "dl"); } + + // from include/bits/dlfcn.h on Linux + const int RTLD_LAZY = 0x00001; // Lazy function call binding + const int RTLD_NOW = 0x00002; // Immediate function call binding + const int RTLD_NOLOAD = 0x00004; // no object load + const int RTLD_DEEPBIND = 0x00008; + const int RTLD_GLOBAL = 0x00100; // make object available to whole program + + extern(C) + { + void* dlopen(char* filename, int flag); + char* dlerror(); + void* dlsym(void* handle, char* symbol); + int dlclose(void* handle); + } + + class FunctionLoader + { + /*************************************************************** + + ***************************************************************/ + + protected struct Bind + { + void** fnc; + CString name; + } + + /*************************************************************** + + ***************************************************************/ + + static final void* bind (char[] library, inout Bind[] targets) + { + static char[] errorInfo; + // printf("the library is %s\n", ICU.toString(library)); + void* lib = dlopen(ICU.toString(library), RTLD_NOW); + + // clear the error buffer + dlerror(); + + foreach (Bind b; targets) + { + char[] name = cast(char[])(b.name ~ ICUSig); + + *b.fnc = dlsym (lib, name.ptr); + if (*b.fnc) + {}// printf ("bound '%.*s'\n", name); + else { + // errorInfo = ICU.toArray(dlerror()); + // printf("%s", dlerror()); + throw new Exception (cast(String)("required " ~ name ~ " in library " ~ library)); + } + } + return lib; + } + + /*************************************************************** + + ***************************************************************/ + + static final void unbind (void* library) + { + version (CorrectedTeardown) + { + if (! dlclose (library)) + throw new Exception ("close library failed\n"); + } + } + } +} + + +/******************************************************************************* + + 2004-12-20: Added Darwin shared library support -- afb + +*******************************************************************************/ + +else version (darwin) +{ + // #include + + struct mach_header + { + uint magic; /* mach magic number identifier */ + uint cputype; /* cpu specifier */ + uint cpusubtype; /* machine specifier */ + uint filetype; /* type of file */ + uint ncmds; /* number of load commands */ + uint sizeofcmds; /* the size of all the load commands */ + uint flags; /* flags */ + } + + /* Constant for the magic field of the mach_header */ + const uint MH_MAGIC = 0xfeedface; // the mach magic number + const uint MH_CIGAM = 0xcefaedfe; // x86 variant + + // #include + + typedef void *NSObjectFileImage; + + typedef void *NSModule; + + typedef void *NSSymbol; + + enum // DYLD_BOOL: uint + { + FALSE, + TRUE + } + alias uint DYLD_BOOL; + + enum // NSObjectFileImageReturnCode: uint + { + NSObjectFileImageFailure, /* for this a message is printed on stderr */ + NSObjectFileImageSuccess, + NSObjectFileImageInappropriateFile, + NSObjectFileImageArch, + NSObjectFileImageFormat, /* for this a message is printed on stderr */ + NSObjectFileImageAccess + } + alias uint NSObjectFileImageReturnCode; + + enum // NSLinkEditErrors: uint + { + NSLinkEditFileAccessError, + NSLinkEditFileFormatError, + NSLinkEditMachResourceError, + NSLinkEditUnixResourceError, + NSLinkEditOtherError, + NSLinkEditWarningError, + NSLinkEditMultiplyDefinedError, + NSLinkEditUndefinedError + } + alias uint NSLinkEditErrors; + + extern(C) + { + NSObjectFileImageReturnCode NSCreateObjectFileImageFromFile(char *pathName, NSObjectFileImage* objectFileImage); + DYLD_BOOL NSDestroyObjectFileImage(NSObjectFileImage objectFileImage); + + mach_header * NSAddImage(char *image_name, uint options); + const uint NSADDIMAGE_OPTION_NONE = 0x0; + const uint NSADDIMAGE_OPTION_RETURN_ON_ERROR = 0x1; + const uint NSADDIMAGE_OPTION_WITH_SEARCHING = 0x2; + const uint NSADDIMAGE_OPTION_RETURN_ONLY_IF_LOADED = 0x4; + const uint NSADDIMAGE_OPTION_MATCH_FILENAME_BY_INSTALLNAME = 0x8; + + NSModule NSLinkModule(NSObjectFileImage objectFileImage, char* moduleName, uint options); + const uint NSLINKMODULE_OPTION_NONE = 0x0; + const uint NSLINKMODULE_OPTION_BINDNOW = 0x01; + const uint NSLINKMODULE_OPTION_PRIVATE = 0x02; + const uint NSLINKMODULE_OPTION_RETURN_ON_ERROR = 0x04; + const uint NSLINKMODULE_OPTION_DONT_CALL_MOD_INIT_ROUTINES = 0x08; + const uint NSLINKMODULE_OPTION_TRAILING_PHYS_NAME = 0x10; + DYLD_BOOL NSUnLinkModule(NSModule module_, uint options); + + void NSLinkEditError(NSLinkEditErrors *c, int *errorNumber, char **fileName, char **errorString); + + DYLD_BOOL NSIsSymbolNameDefined(char *symbolName); + DYLD_BOOL NSIsSymbolNameDefinedInImage(mach_header *image, char *symbolName); + NSSymbol NSLookupAndBindSymbol(char *symbolName); + NSSymbol NSLookupSymbolInModule(NSModule module_, char* symbolName); + NSSymbol NSLookupSymbolInImage(mach_header *image, char *symbolName, uint options); + const uint NSLOOKUPSYMBOLINIMAGE_OPTION_BIND = 0x0; + const uint NSLOOKUPSYMBOLINIMAGE_OPTION_BIND_NOW = 0x1; + const uint NSLOOKUPSYMBOLINIMAGE_OPTION_BIND_FULLY = 0x2; + const uint NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR = 0x4; + + void* NSAddressOfSymbol(NSSymbol symbol); + char* NSNameOfSymbol(NSSymbol symbol); + } + + + class FunctionLoader + { + /*************************************************************** + + ***************************************************************/ + + protected struct Bind + { + void** fnc; + CString name; + } + + /*************************************************************** + + ***************************************************************/ + + private static NSModule open(char* filename) + { + NSModule mod = null; + NSObjectFileImage fileImage = null; + debug printf("Trying to load: %s\n", filename); + + NSObjectFileImageReturnCode returnCode = + NSCreateObjectFileImageFromFile(filename, &fileImage); + if(returnCode == NSObjectFileImageSuccess) + { + mod = NSLinkModule(fileImage,filename, + NSLINKMODULE_OPTION_RETURN_ON_ERROR | + NSLINKMODULE_OPTION_PRIVATE | + NSLINKMODULE_OPTION_BINDNOW); + NSDestroyObjectFileImage(fileImage); + } + else if(returnCode == NSObjectFileImageInappropriateFile) + { + NSDestroyObjectFileImage(fileImage); + /* Could be a dynamic library rather than a bundle */ + mod = cast(NSModule) NSAddImage(filename, + NSADDIMAGE_OPTION_RETURN_ON_ERROR); + } + else + { + debug printf("FileImage Failed: %d\n", returnCode); + } + return mod; + } + + private static void* symbol(NSModule mod, char* name) + { + NSSymbol symbol = null; + uint magic = (* cast(mach_header *) mod).magic; + + if ( (mod == cast(NSModule) -1) && NSIsSymbolNameDefined(name)) + /* Global context, use NSLookupAndBindSymbol */ + symbol = NSLookupAndBindSymbol(name); + else if ( ( magic == MH_MAGIC || magic == MH_CIGAM ) && + NSIsSymbolNameDefinedInImage(cast(mach_header *) mod, name)) + symbol = NSLookupSymbolInImage(cast(mach_header *) mod, name, + NSLOOKUPSYMBOLINIMAGE_OPTION_BIND | + NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR); + else + symbol = NSLookupSymbolInModule(mod, name); + + return NSAddressOfSymbol(symbol); + } + + static final void* bind (char[] library, inout Bind[] targets) + { + static char[] errorInfo; + + debug printf("the library is %s\n", ICU.toString(library)); + + void* lib = null; + static char[][] usual_suspects = [ "", "/usr/local/lib/", "/usr/lib/", + /* Fink */ "/sw/lib/", /* DarwinPorts */ "/opt/local/lib/" ]; + foreach (char[] prefix; usual_suspects) + { + lib = cast(void*) open(ICU.toString(prefix ~ library)); + if (lib != null) break; + } + if (lib == null) + { + throw new Exception ("could not open library " ~ library); + } + + // clear the error buffer + // error(); + + foreach (Bind b; targets) + { + // Note: all C functions have a underscore prefix in Mach-O symbols + char[] name = "_" ~ b.name ~ ICUSig; + + *b.fnc = symbol(cast(NSModule) lib, name.ptr); + if (*b.fnc != null) + { + debug printf ("bound '%.*s'\n", name); + } + else + { + // errorInfo = ICU.toArray(error()); + throw new Exception ("required " ~ name ~ " in library " ~ library); + } + } + return lib; + } + + /*************************************************************** + + ***************************************************************/ + + private static bool close(NSModule mod) + { + uint magic = (* cast(mach_header *) mod).magic; + if ( magic == MH_MAGIC || magic == MH_CIGAM ) + { + // Can not unlink dynamic libraries on Darwin + return true; + } + + return (NSUnLinkModule(mod, 0) == TRUE); + } + + static final void unbind (void* library) + { + version (CorrectedTeardown) + { + if (! close(cast(NSModule) library)) + throw new Exception ("close library failed\n"); + } + } + } +} + +/******************************************************************************* + + unknown platform + +*******************************************************************************/ + +else static assert(0); // need an implementation of FunctionLoader for this OS + + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UBreakIterator.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UBreakIterator.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,621 @@ +/******************************************************************************* + + @file UBreakIterator.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UBreakIterator; + +private import com.ibm.icu.mangoicu.ICU; + +public import com.ibm.icu.mangoicu.ULocale, + com.ibm.icu.mangoicu.UText, + com.ibm.icu.mangoicu.UString; + + + +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class UCharacterIterator : UBreakIterator +// { +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Character, locale, text); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class UWordIterator : UBreakIterator +// { +// public enum Break +// { +// None = 0, +// NoneLimit = 100, +// Number = 100, +// NumberLimit = 200, +// Letter = 200, +// LetterLimit = 300, +// Kana = 300, +// KanaLimit = 400, +// Ideo = 400, +// IdeoLimit = 500 +// } +// +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Word, locale, text); +// } +// +// /*********************************************************************** +// +// Return the status from the break rule that determined +// the most recently returned break position. +// +// ***********************************************************************/ +// +// void getStatus (inout Break b) +// { +// b = cast(Break) super.getStatus(); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class ULineIterator : UBreakIterator +// { +// public enum Break +// { +// Soft = 0, +// SoftLimit = 100, +// Hard = 100, +// HardLimit = 200 +// } +// +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Line, locale, text); +// } +// +// /*********************************************************************** +// +// Return the status from the break rule that determined +// the most recently returned break position. +// +// ***********************************************************************/ +// +// void getStatus (inout Break b) +// { +// b = cast(Break) super.getStatus(); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class USentenceIterator : UBreakIterator +// { +// public enum Break +// { +// Term = 0, +// TermLimit = 100, +// Sep = 100, +// Limit = 200 +// } +// +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Sentence, locale, text); +// } +// +// /*********************************************************************** +// +// Return the status from the break rule that determined +// the most recently returned break position. +// +// ***********************************************************************/ +// +// void getStatus (inout Break b) +// { +// b = cast(Break) super.getStatus(); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class UTitleIterator : UBreakIterator +// { +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Title, locale, text); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class URuleIterator : UBreakIterator +// { +// /*********************************************************************** +// +// Open a new UBreakIterator for locating text boundaries +// using specified breaking rules +// +// ***********************************************************************/ +// +// this (UStringView rules, UStringView text = null) +// { +// UErrorCode e; +// +// handle = ubrk_openRules (rules.get.ptr, rules.length, text.get.ptr, text.length, null, e); +// testError (e, "failed to open rule iterator"); +// } +// } + + +/******************************************************************************* + + BreakIterator defines methods for finding the location of boundaries + in text. Pointer to a UBreakIterator maintain a current position and + scan over text returning the index of characters where boundaries occur. + + Line boundary analysis determines where a text string can be broken + when line-wrapping. The mechanism correctly handles punctuation and + hyphenated words. + + Sentence boundary analysis allows selection with correct interpretation + of periods within numbers and abbreviations, and trailing punctuation + marks such as quotation marks and parentheses. + + Word boundary analysis is used by search and replace functions, as well + as within text editing applications that allow the user to select words + with a double click. Word selection provides correct interpretation of + punctuation marks within and following words. Characters that are not + part of a word, such as symbols or punctuation marks, have word-breaks + on both sides. + + Character boundary analysis allows users to interact with characters + as they expect to, for example, when moving the cursor through a text + string. Character boundary analysis provides correct navigation of + through character strings, regardless of how the character is stored. + For example, an accented character might be stored as a base character + and a diacritical mark. What users consider to be a character can differ + between languages. + + Title boundary analysis locates all positions, typically starts of + words, that should be set to Title Case when title casing the text. + + See + this page for full details. + +*******************************************************************************/ + +struct UBreakIterator +{ + typedef void _UBreakIterator; + alias _UBreakIterator* Handle; + Handle handle; + UText ut; + + // this is returned by next(), previous() etc ... + const uint Done = uint.max; + alias Done DONE; + + /*********************************************************************** + + internal types passed to C API + + ***********************************************************************/ + + private enum Type + { + Character, + Word, + Line, + Sentence, + Title + } + + + public enum WordBreak + { + None = 0, + NoneLimit = 100, + Number = 100, + NumberLimit = 200, + Letter = 200, + LetterLimit = 300, + Kana = 300, + KanaLimit = 400, + Ideo = 400, + IdeoLimit = 500 + } + public enum LineBreak + { + Soft = 0, + SoftLimit = 100, + Hard = 100, + HardLimit = 200 + } + public enum SentenceBreak + { + Term = 0, + TermLimit = 100, + Sep = 100, + Limit = 200 + } + + + /*********************************************************************** + + Open a new UBreakIterator for locating text boundaries for + a specified locale. A UBreakIterator may be used for detecting + character, line, word, and sentence breaks in text. + + ***********************************************************************/ + + static UBreakIterator openWordIterator( ULocale locale, char[] str = null ){ + UBreakIterator res; + auto e = ICU.UErrorCode.OK; + res.handle = ubrk_open( Type.Word, cast(char*)locale.name.ptr, null, 0, e); + ICU.testError (e, "failed to open word iterator"); + if( str ) { + res.ut.openUTF8(str); + ubrk_setUText( res.handle, & res.ut, e); + ICU.testError (e, "failed to set text in iterator"); + } + return res; + } + + static UBreakIterator openLineIterator( ULocale locale, char[] str = null ){ + UBreakIterator res; + auto e = ICU.UErrorCode.OK; + res.handle = ubrk_open( Type.Line, cast(char*)locale.name.ptr, null, 0, e); + ICU.testError (e, "failed to open line iterator"); + if( str ) { + res.ut.openUTF8(str); + ubrk_setUText( res.handle, & res.ut, e); + ICU.testError (e, "failed to set text in iterator"); + } + return res; + } + + /*********************************************************************** + + Close a UBreakIterator + + ***********************************************************************/ + + void close () + { + ut.close(); + ubrk_close (handle); + } + + /*********************************************************************** + + Sets an existing iterator to point to a new piece of text + + ***********************************************************************/ + + void setText (UStringView text) + { + ICU.UErrorCode e; + ubrk_setText (handle, text.get.ptr, text.length, e); + ICU.testError (e, "failed to set iterator text"); + } + + void setText (char[] text) + { + auto e = ICU.UErrorCode.OK; + ut.openUTF8(text); + ubrk_setUText( handle, & ut, e); + ICU.testError (e, "failed to set text in iterator"); + } + + /*********************************************************************** + + Determine the most recently-returned text boundary + + ***********************************************************************/ + + uint current () + { + return ubrk_current (handle); + } + + /*********************************************************************** + + Determine the text boundary following the current text + boundary, or UBRK_DONE if all text boundaries have been + returned. + + If offset is specified, determines the text boundary + following the current text boundary: The value returned + is always greater than offset, or Done + + ***********************************************************************/ + + uint next (uint offset = uint.max) + { + if (offset == uint.max) + return ubrk_next (handle); + return ubrk_following (handle, offset); + } + alias next following; + /*********************************************************************** + + Determine the text boundary preceding the current text + boundary, or Done if all text boundaries have been returned. + + If offset is specified, determines the text boundary preceding + the specified offset. The value returned is always smaller than + offset, or Done. + + ***********************************************************************/ + + uint previous (uint offset = uint.max) + { + if (offset == uint.max) + return ubrk_previous (handle); + return ubrk_preceding (handle, offset); + } + + /*********************************************************************** + + Determine the index of the first character in the text + being scanned. This is not always the same as index 0 + of the text. + + ***********************************************************************/ + + uint first () + { + return ubrk_first (handle); + } + + /*********************************************************************** + + Determine the index immediately beyond the last character + in the text being scanned. This is not the same as the last + character + + ***********************************************************************/ + + uint last () + { + return ubrk_last (handle); + } + + /*********************************************************************** + + Returns true if the specfied position is a boundary position. + As a side effect, leaves the iterator pointing to the first + boundary position at or after "offset". + + ***********************************************************************/ + + bool isBoundary (uint offset) + { + return ubrk_isBoundary (handle, offset) != 0; + } + + /*********************************************************************** + + Return the status from the break rule that determined + the most recently returned break position. + + ***********************************************************************/ + + void getStatus (inout uint s) + { + s = getStatus (); + } + + /*********************************************************************** + + Return the status from the break rule that determined + the most recently returned break position. + + The values appear in the rule source within brackets, + {123}, for example. For rules that do not specify a status, + a default value of 0 is returned. + + For word break iterators, the possible values are defined + in enum UWordBreak + + ***********************************************************************/ + + private uint getStatus () + { + return ubrk_getRuleStatus (handle); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (uint, char*, wchar*, uint, inout ICU.UErrorCode) ubrk_open; + Handle function (wchar*, uint, wchar*, uint, void*, inout ICU.UErrorCode) ubrk_openRules; + void function (Handle) ubrk_close; + void function (Handle, wchar*, uint, inout ICU.UErrorCode) ubrk_setText; + uint function (Handle) ubrk_current; + uint function (Handle) ubrk_next; + uint function (Handle) ubrk_previous; + uint function (Handle) ubrk_first; + uint function (Handle) ubrk_last; + uint function (Handle, uint) ubrk_preceding; + uint function (Handle, uint) ubrk_following; + byte function (Handle, uint) ubrk_isBoundary; + uint function (Handle) ubrk_getRuleStatus; + Handle function (Handle, void *, int *, inout ICU.UErrorCode) ubrk_safeClone; + void function (Handle, UText*, inout ICU.UErrorCode) ubrk_setUText; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ubrk_open, "ubrk_open"}, + {cast(void**) &ubrk_close, "ubrk_close"}, + {cast(void**) &ubrk_openRules, "ubrk_openRules"}, + {cast(void**) &ubrk_setText, "ubrk_setText"}, + {cast(void**) &ubrk_current, "ubrk_current"}, + {cast(void**) &ubrk_next, "ubrk_next"}, + {cast(void**) &ubrk_previous, "ubrk_previous"}, + {cast(void**) &ubrk_first, "ubrk_first"}, + {cast(void**) &ubrk_last, "ubrk_last"}, + {cast(void**) &ubrk_preceding, "ubrk_preceding"}, + {cast(void**) &ubrk_following, "ubrk_following"}, + {cast(void**) &ubrk_isBoundary, "ubrk_isBoundary"}, + {cast(void**) &ubrk_getRuleStatus, "ubrk_getRuleStatus"}, + {cast(void**) &ubrk_setUText, "ubrk_setUText"}, + {cast(void**) &ubrk_safeClone, "ubrk_safeClone"}, + ]; + + /********************************************************************** + + **********************************************************************/ + + static this () + { + library = FunctionLoader.bind (ICU.icuuc, targets); + } + + /********************************************************************** + + **********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UCalendar.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UCalendar.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,666 @@ +/******************************************************************************* + + @file UCalendar.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UCalendar; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString; + +public import com.ibm.icu.mangoicu.ULocale, + com.ibm.icu.mangoicu.UTimeZone; + +/******************************************************************************* + + UCalendar is used for converting between a UDate object and + a set of integer fields such as Year, Month, Day, + Hour, and so on. (A UDate object represents a specific instant + in time with millisecond precision. See UDate for information about + the UDate) + + Types of UCalendar interpret a UDate according to the rules of a + specific calendar system. UCalendar supports Traditional & Gregorian. + + A UCalendar object can produce all the time field values needed to + implement the date-time formatting for a particular language and + calendar style (for example, Japanese-Gregorian, Japanese-Traditional). + + When computing a UDate from time fields, two special circumstances + may arise: there may be insufficient information to compute the UDate + (such as only year and month but no day in the month), or there may be + inconsistent information (such as "Tuesday, July 15, 1996" -- July 15, + 1996 is actually a Monday). + + Insufficient information. The calendar will use default information + to specify the missing fields. This may vary by calendar; for the + Gregorian calendar, the default for a field is the same as that of + the start of the epoch: i.e., Year = 1970, Month = January, + Date = 1, etc. + + Inconsistent information. If fields conflict, the calendar will give + preference to fields set more recently. For example, when determining + the day, the calendar will look for one of the following combinations + of fields. The most recent combination, as determined by the most + recently set single field, will be used. + + See http://oss.software.ibm.com/icu/apiref/udat_8h.html for full + details. + +*******************************************************************************/ + +class UCalendar : ICU +{ + package Handle handle; + + typedef double UDate; + + //Possible types of UCalendars + public enum Type + { + Traditional, + Gregorian + } + + // Possible fields in a UCalendar + public enum DateFields + { + Era, + Year, + Month, + WeekOfYear, + WeekOfMonth, + Date, + DayOfYear, + DayOfWeek, + DayOfWeekInMonth, + AmPm, + Hour, + HourOfDay, + Minute, + Second, + Millisecond, + ZoneOffset, + DstOffset, + YearWoy, + DowLocal, + ExtendedYear, + JulianDay, + MillisecondsInDay, + FieldCount, + DayOfMonth = Date + } + + // Possible days of the week in a UCalendar + public enum DaysOfWeek + { + Sunday = 1, + Monday, + Tuesday, + Wednesday, + Thursday, + Friday, + Saturday + } + + // Possible months in a UCalendar + public enum Months + { + January, + February, + March, + April, + May, + June, + July, + August, + September, + October, + November, + December, + UnDecimber + } + + // Possible AM/PM values in a UCalendar + public enum AMPMs + { + AM, + PM + } + + // Possible formats for a UCalendar's display name + public enum DisplayNameType + { + Standard, + ShortStandard, + DST, + ShortDST + } + + // Possible limit values for a UCalendar + public enum Limit + { + Minimum, + Maximum, + GreatestMinimum, + LeastMaximum, + ActualMinimum, + ActualMaximum + } + + // Types of UCalendar attributes + private enum Attribute + { + Lenient, // unused: set from UDateFormat instead + FirstDayOfWeek, + MinimalDaysInFirstWeek + } + + /*********************************************************************** + + Open a UCalendar. A UCalendar may be used to convert a + millisecond value to a year, month, and day + + ***********************************************************************/ + + this (inout UTimeZone zone, inout ULocale locale, Type type = Type.Traditional) + { + UErrorCode e; + + handle = ucal_open (cast(wchar*)zone.name.ptr, zone.name.length, ICU.toString(locale.name), type, e); + testError (e, "failed to open calendar"); + } + + /*********************************************************************** + + Internal only: Open a UCalendar with the given handle + + ***********************************************************************/ + + package this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Close this UCalendar + + ***********************************************************************/ + + ~this () + { + ucal_close (handle); + } + + /*********************************************************************** + + Set the TimeZone used by a UCalendar + + ***********************************************************************/ + + void setTimeZone (inout UTimeZone zone) + { + UErrorCode e; + + ucal_setTimeZone (handle, cast(wchar*)zone.name.ptr, zone.name.length, e); + testError (e, "failed to set calendar time zone"); + } + + /*********************************************************************** + + Get display name of the TimeZone used by this UCalendar + + ***********************************************************************/ + + void getTimeZoneName (UString s, inout ULocale locale, DisplayNameType type=DisplayNameType.Standard) + { + uint format (wchar* dst, uint length, inout ICU.UErrorCode e) + { + return ucal_getTimeZoneDisplayName (handle, type, toString(locale.name), dst, length, e); + } + + s.format (&format, "failed to get time zone name"); + } + + /*********************************************************************** + + Determine if a UCalendar is currently in daylight savings + time + + ***********************************************************************/ + + bool inDaylightTime () + { + UErrorCode e; + + auto x = ucal_inDaylightTime (handle, e); + testError (e, "failed to test calendar daylight time"); + return x != 0; + } + + /*********************************************************************** + + Get the current date and time + + ***********************************************************************/ + + UDate getNow () + { + return ucal_getNow (); + } + + /*********************************************************************** + + Get a UCalendar's current time in millis. The time is + represented as milliseconds from the epoch + + ***********************************************************************/ + + UDate getMillis () + { + UErrorCode e; + + auto x = ucal_getMillis (handle, e); + testError (e, "failed to get time"); + return x; + } + + /*********************************************************************** + + Set a UCalendar's current time in millis. The time is + represented as milliseconds from the epoch + + ***********************************************************************/ + + void setMillis (UDate date) + { + UErrorCode e; + + ucal_setMillis (handle, date, e); + testError (e, "failed to set time"); + } + + /*********************************************************************** + + Set a UCalendar's current date + + ***********************************************************************/ + + void setDate (uint year, Months month, uint date) + { + UErrorCode e; + + ucal_setDate (handle, year, month, date, e); + testError (e, "failed to set date"); + } + + /*********************************************************************** + + Set a UCalendar's current date + + ***********************************************************************/ + + void setDateTime (uint year, Months month, uint date, uint hour, uint minute, uint second) + { + UErrorCode e; + + ucal_setDateTime (handle, year, month, date, hour, minute, second, e); + testError (e, "failed to set date/time"); + } + + /*********************************************************************** + + Returns TRUE if the given Calendar object is equivalent + to this one + + ***********************************************************************/ + + bool isEquivalent (UCalendar when) + { + return ucal_equivalentTo (handle, when.handle) != 0; + } + + /*********************************************************************** + + Compares the Calendar time + + ***********************************************************************/ + + bool isEqual (UCalendar when) + { + return (this is when || getMillis == when.getMillis); + } + + /*********************************************************************** + + Returns true if this Calendar's current time is before + "when"'s current time + + ***********************************************************************/ + + bool isBefore (UCalendar when) + { + return (this !is when || getMillis < when.getMillis); + } + + /*********************************************************************** + + Returns true if this Calendar's current time is after + "when"'s current time + + ***********************************************************************/ + + bool isAfter (UCalendar when) + { + return (this !is when || getMillis > when.getMillis); + } + + /*********************************************************************** + + Add a specified signed amount to a particular field in a + UCalendar + + ***********************************************************************/ + + void add (DateFields field, uint amount) + { + UErrorCode e; + + ucal_add (handle, field, amount, e); + testError (e, "failed to add to calendar"); + } + + /*********************************************************************** + + Add a specified signed amount to a particular field in a + UCalendar + + ***********************************************************************/ + + void roll (DateFields field, uint amount) + { + UErrorCode e; + + ucal_roll (handle, field, amount, e); + testError (e, "failed to roll calendar"); + } + + /*********************************************************************** + + Get the current value of a field from a UCalendar + + ***********************************************************************/ + + uint get (DateFields field) + { + UErrorCode e; + + auto x = ucal_get (handle, field, e); + testError (e, "failed to get calendar field"); + return x; + } + + /*********************************************************************** + + Set the value of a field in a UCalendar + + ***********************************************************************/ + + void set (DateFields field, uint value) + { + ucal_set (handle, field, value); + } + + /*********************************************************************** + + Determine if a field in a UCalendar is set + + ***********************************************************************/ + + bool isSet (DateFields field) + { + return ucal_isSet (handle, field) != 0; + } + + /*********************************************************************** + + Clear a field in a UCalendar + + ***********************************************************************/ + + void clearField (DateFields field) + { + ucal_clearField (handle, field); + } + + /*********************************************************************** + + Clear all fields in a UCalendar + + ***********************************************************************/ + + void clear () + { + ucal_clear (handle); + } + + /*********************************************************************** + + Determine a limit for a field in a UCalendar. A limit is a + maximum or minimum value for a field + + ***********************************************************************/ + + uint getLimit (DateFields field, Limit type) + { + UErrorCode e; + + auto x = ucal_getLimit (handle, field, type, e); + testError (e, "failed to get calendar limit"); + return x; + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getDaysInFirstWeek () + { + return ucal_getAttribute (handle, Attribute.MinimalDaysInFirstWeek); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getFirstDayOfWeek () + { + return ucal_getAttribute (handle, Attribute.FirstDayOfWeek); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setDaysInFirstWeek (uint value) + { + ucal_setAttribute (handle, Attribute.MinimalDaysInFirstWeek, value); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setFirstDayOfWeek (uint value) + { + ucal_setAttribute (handle, Attribute.FirstDayOfWeek, value); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, char*, Type, inout UErrorCode) ucal_open; + void function (Handle) ucal_close; + UDate function () ucal_getNow; + UDate function (Handle, inout UErrorCode) ucal_getMillis; + void function (Handle, UDate, inout UErrorCode) ucal_setMillis; + void function (Handle, uint, uint, uint, inout UErrorCode) ucal_setDate; + void function (Handle, uint, uint, uint, uint, uint, uint, inout UErrorCode) ucal_setDateTime; + byte function (Handle, Handle) ucal_equivalentTo; + void function (Handle, uint, uint, inout UErrorCode) ucal_add; + void function (Handle, uint, uint, inout UErrorCode) ucal_roll; + uint function (Handle, uint, inout UErrorCode) ucal_get; + void function (Handle, uint, uint) ucal_set; + byte function (Handle, uint) ucal_isSet; + void function (Handle, uint) ucal_clearField; + void function (Handle) ucal_clear; + uint function (Handle, uint, uint, inout UErrorCode) ucal_getLimit; + void function (Handle, wchar*, uint, inout UErrorCode) ucal_setTimeZone; + byte function (Handle, uint) ucal_inDaylightTime; + uint function (Handle, uint) ucal_getAttribute; + void function (Handle, uint, uint) ucal_setAttribute; + uint function (Handle, uint, char*, wchar*, uint, inout UErrorCode) ucal_getTimeZoneDisplayName; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucal_open, "ucal_open"}, + {cast(void**) &ucal_close, "ucal_close"}, + {cast(void**) &ucal_getNow, "ucal_getNow"}, + {cast(void**) &ucal_getMillis, "ucal_getMillis"}, + {cast(void**) &ucal_setMillis, "ucal_setMillis"}, + {cast(void**) &ucal_setDate, "ucal_setDate"}, + {cast(void**) &ucal_setDateTime, "ucal_setDateTime"}, + {cast(void**) &ucal_equivalentTo, "ucal_equivalentTo"}, + {cast(void**) &ucal_add, "ucal_add"}, + {cast(void**) &ucal_roll, "ucal_roll"}, + {cast(void**) &ucal_get, "ucal_get"}, + {cast(void**) &ucal_set, "ucal_set"}, + {cast(void**) &ucal_clearField, "ucal_clearField"}, + {cast(void**) &ucal_clear, "ucal_clear"}, + {cast(void**) &ucal_getLimit, "ucal_getLimit"}, + {cast(void**) &ucal_setTimeZone, "ucal_setTimeZone"}, + {cast(void**) &ucal_inDaylightTime, "ucal_inDaylightTime"}, + {cast(void**) &ucal_getAttribute, "ucal_getAttribute"}, + {cast(void**) &ucal_setAttribute, "ucal_setAttribute"}, + {cast(void**) &ucal_isSet, "ucal_isSet"}, + {cast(void**) &ucal_getTimeZoneDisplayName, "ucal_getTimeZoneDisplayName"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UChar.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UChar.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,1240 @@ +/******************************************************************************* + + @file UChar.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UChar; + +private import com.ibm.icu.mangoicu.ICU; + +/******************************************************************************* + + This API provides low-level access to the Unicode Character + Database. In addition to raw property values, some convenience + functions calculate derived properties, for example for Java-style + programming. + + Unicode assigns each code point (not just assigned character) + values for many properties. Most of them are simple boolean + flags, or constants from a small enumerated list. For some + properties, values are strings or other relatively more complex + types. + + For more information see "About the Unicode Character Database" + (http://www.unicode.org/ucd/) and the ICU User Guide chapter on + Properties (http://oss.software.ibm.com/icu/userguide/properties.html). + + Many functions are designed to match java.lang.Character functions. + See the individual function documentation, and see the JDK 1.4.1 + java.lang.Character documentation at + http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html + + There are also functions that provide easy migration from C/POSIX + functions like isblank(). Their use is generally discouraged because + the C/POSIX standards do not define their semantics beyond the ASCII + range, which means that different implementations exhibit very different + behavior. Instead, Unicode properties should be used directly. + + There are also only a few, broad C/POSIX character classes, and they + tend to be used for conflicting purposes. For example, the "isalpha()" + class is sometimes used to determine word boundaries, while a more + sophisticated approach would at least distinguish initial letters from + continuation characters (the latter including combining marks). (In + ICU, BreakIterator is the most sophisticated API for word boundaries.) + Another example: There is no "istitle()" class for titlecase characters. + + A summary of the behavior of some C/POSIX character classification + implementations for Unicode is available at + http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html + + See + this page for full details. + +*******************************************************************************/ + +class UChar : ICU +{ + public enum Property + { + Alphabetic = 0, + BinaryStart = Alphabetic, + AsciiHexDigit, + BidiControl, + BidiMirrored, + Dash, + DefaultIgnorableCodePoint, + Deprecated, + Diacritic, + Extender, + FullCompositionExclusion, + GraphemeBase, + GraphemeExtend, + GraphemeLink, + HexDigit, + Hyphen, + IdContinue, + IdStart, + Ideographic, + IdsBinaryOperator, + IdsTrinaryOperator, + JoinControl, + LogicalOrderException, + Lowercase, + Math, + NoncharacterCodePoint, + QuotationMark, + Radical, + SoftDotted, + TerminalPunctuation, + UnifiedIdeograph, + Uppercase, + WhiteSpace, + XidContinue, + XidStart, + CaseSensitive, + STerm, + VariationSelector, + NfdInert, + NfkdInert, + NfcInert, + NfkcInert, + SegmentStarter, + BinaryLimit, + BidiClass = 0x1000, + IntStart = BidiClass, + Block, CanonicalCombiningClass, + DecompositionType, + EastAsianWidth, + GeneralCategory, + JoiningGroup, + JoiningType, + LineBreak, + NumericType, + Script, + HangulSyllableType, + NfdQuickCheck, + NfkdQuickCheck, + NfcQuickCheck, + NfkcQuickCheck, + LeadCanonicalCombiningClass, + TrailCanonicalCombiningClass, + IntLimit, + GeneralCategoryMask = 0x2000, + MaskStart = GeneralCategoryMask, + MaskLimit, + NumericValue = 0x3000, + DoubleStart = NumericValue, + DoubleLimit, + Age = 0x4000, + StringStart = Age, + BidiMirroringGlyph, + CaseFolding, + IsoComment, + LowercaseMapping, + Name, + SimpleCaseFolding, + SimpleLowercaseMapping, + SimpleTitlecaseMapping, + SimpleUppercaseMapping, + TitlecaseMapping, + Unicode1Name, + UppercaseMapping, + StringLimit, + InvalidCode = -1 + } + + public enum Category + { + Unassigned = 0, + GeneralOtherTypes = 0, + UppercaseLetter = 1, + LowercaseLetter = 2, + TitlecaseLetter = 3, + ModifierLetter = 4, + OtherLetter = 5, + NonSpacingMark = 6, + EnclosingMark = 7, + CombiningSpacingMark = 8, + DecimalDigitNumber = 9, + LetterNumber = 10, + OtherNumber = 11, + SpaceSeparator = 12, + LineSeparator = 13, + ParagraphSeparator = 14, + ControlChar = 15, + FormatChar = 16, + PrivateUseChar = 17, + Surrogate = 18, + DashPunctuation = 19, + StartPunctuation = 20, + EndPunctuation = 21, + ConnectorPunctuation = 22, + OtherPunctuation = 23, + MathSymbol = 24, + CurrencySymbol = 25, + ModifierSymbol = 26, + OtherSymbol = 27, + InitialPunctuation = 28, + FinalPunctuation = 29, + Count + } + + public enum Direction + { + LeftToRight = 0, + RightToLeft = 1, + EuropeanNumber = 2, + EuropeanNumberSeparator = 3, + EuropeanNumberTerminator = 4, + ArabicNumber = 5, + CommonNumberSeparator = 6, + BlockSeparator = 7, + SegmentSeparator = 8, + WhiteSpaceNeutral = 9, + OtherNeutral = 10, + LeftToRightEmbedding = 11, + LeftToRightOverride = 12, + RightToLeftArabic = 13, + RightToLeftEmbedding = 14, + RightToLeftOverride = 15, + PopDirectionalFormat = 16, + DirNonSpacingMark = 17, + BoundaryNeutral = 18, + Count + } + + public enum BlockCode + { + NoBlock = 0, + BasicLatin = 1, + Latin1Supplement = 2, + LatinExtendedA = 3, + LatinExtendedB = 4, + IpaExtensions = 5, + SpacingModifierLetters = 6, + CombiningDiacriticalMarks = 7, + Greek = 8, + Cyrillic = 9, + Armenian = 10, + Hebrew = 11, + Arabic = 12, + Syriac = 13, + Thaana = 14, + Devanagari = 15, + Bengali = 16, + Gurmukhi = 17, + Gujarati = 18, + Oriya = 19, + Tamil = 20, + Telugu = 21, + Kannada = 22, + Malayalam = 23, + Sinhala = 24, + Thai = 25, + Lao = 26, + Tibetan = 27, + Myanmar = 28, + Georgian = 29, + HangulJamo = 30, + Ethiopic = 31, + Cherokee = 32, + UnifiedCanadianAboriginalSyllabics = 33, + Ogham = 34, + Runic = 35, + Khmer = 36, + Mongolian = 37, + LatinExtendedAdditional = 38, + GreekExtended = 39, + GeneralPunctuation = 40, + SuperscriptsAndSubscripts = 41, + CurrencySymbols = 42, + CombiningMarksForSymbols = 43, + LetterlikeSymbols = 44, + NumberForms = 45, + Arrows = 46, + MathematicalOperators = 47, + MiscellaneousTechnical = 48, + ControlPictures = 49, + OpticalCharacterRecognition = 50, + EnclosedAlphanumerics = 51, + BoxDrawing = 52, + BlockElements = 53, + GeometricShapes = 54, + MiscellaneousSymbols = 55, + Dingbats = 56, + BraillePatterns = 57, + CjkRadicalsSupplement = 58, + KangxiRadicals = 59, + IdeographicDescriptionCharacters = 60, + CjkSymbolsAndPunctuation = 61, + Hiragana = 62, + Katakana = 63, + Bopomofo = 64, + HangulCompatibilityJamo = 65, + Kanbun = 66, + BopomofoExtended = 67, + EnclosedCjkLettersAndMonths = 68, + CjkCompatibility = 69, + CjkUnifiedIdeographsExtensionA = 70, + CjkUnifiedIdeographs = 71, + YiSyllables = 72, + YiRadicals = 73, + HangulSyllables = 74, + HighSurrogates = 75, + HighPrivateUseSurrogates = 76, + LowSurrogates = 77, + PrivateUse = 78, + PrivateUseArea = PrivateUse, + CjkCompatibilityIdeographs = 79, + AlphabeticPresentationForms = 80, + ArabicPresentationFormsA = 81, + CombiningHalfMarks = 82, + CjkCompatibilityForms = 83, + SmallFormVariants = 84, + ArabicPresentationFormsB = 85, + Specials = 86, + HalfwidthAndFullwidthForms = 87, + OldItalic = 88, + Gothic = 89, + Deseret = 90, + ByzantineMusicalSymbols = 91, + MusicalSymbols = 92, + MathematicalAlphanumericSymbols = 93, + CjkUnifiedIdeographsExtensionB = 94, + CjkCompatibilityIdeographsSupplement = 95, + Tags = 96, + CyrillicSupplementary = 97, + CyrillicSupplement = CyrillicSupplementary, + Tagalog = 98, + Hanunoo = 99, + Buhid = 100, + Tagbanwa = 101, + MiscellaneousMathematicalSymbolsA = 102, + SupplementalArrowsA = 103, + SupplementalArrowsB = 104, + MiscellaneousMathematicalSymbolsB = 105, + SupplementalMathematicalOperators = 106, + KatakanaPhoneticExtensions = 107, + VariationSelectors = 108, + SupplementaryPrivateUseAreaA = 109, + SupplementaryPrivateUseAreaB = 110, + Limbu = 111, + TaiLe = 112, + KhmerSymbols = 113, + PhoneticExtensions = 114, + MiscellaneousSymbolsAndArrows = 115, + YijingHexagramSymbols = 116, + LinearBSyllabary = 117, + LinearBIdeograms = 118, + AegeanNumbers = 119, + Ugaritic = 120, + Shavian = 121, + Osmanya = 122, + CypriotSyllabary = 123, + TaiXuanJingSymbols = 124, + VariationSelectorsSupplement = 125, + Count, + InvalidCode = -1 + } + + public enum EastAsianWidth + { + Neutral, + Ambiguous, + Halfwidth, + Fullwidth, + Narrow, + Wide, + Count + } + + public enum CharNameChoice + { + Unicode, + Unicode10, + Extended, + Count + } + + public enum NameChoice + { + Short, + Long, + Count + } + + public enum DecompositionType + { + None, + Canonical, + Compat, + Circle, + Final, + Font, + Fraction, + Initial, + Isolated, + Medial, + Narrow, + Nobreak, + Small, + Square, + Sub, + Super, + Vertical, + Wide, + Count + } + + public enum JoiningType + { + NonJoining, + JoinCausing, + DualJoining, + LeftJoining, + RightJoining, + Transparent, + Count + } + + public enum JoiningGroup + { + NoJoiningGroup, + Ain, + Alaph, + Alef, + Beh, + Beth, + Dal, + DalathRish, + E, + Feh, + FinalSemkath, + Gaf, + Gamal, + Hah, + HamzaOnHehGoal, + He, + Heh, + HehGoal, + Heth, + Kaf, + Kaph, + KnottedHeh, + Lam, + Lamadh, + Meem, + Mim, + Noon, + Nun, + Pe, + Qaf, + Qaph, + Reh, + Reversed_Pe, + Sad, + Sadhe, + Seen, + Semkath, + Shin, + Swash_Kaf, + Syriac_Waw, + Tah, + Taw, + Teh_Marbuta, + Teth, + Waw, + Yeh, + Yeh_Barree, + Yeh_With_Tail, + Yudh, + Yudh_He, + Zain, + Fe, + Khaph, + Zhain, + Count + } + + public enum LineBreak + { + Unknown, + Ambiguous, + Alphabetic, + BreakBoth, + BreakAfter, + BreakBefore, + MandatoryBreak, + ContingentBreak, + ClosePunctuation, + CombiningMark, + CarriageReturn, + Exclamation, + Glue, + Hyphen, + Ideographic, + Inseperable, + Inseparable = Inseperable, + InfixNumeric, + LineFeed, + Nonstarter, + Numeric, + OpenPunctuation, + PostfixNumeric, + PrefixNumeric, + Quotation, + ComplexContext, + Surrogate, + Space, + BreakSymbols, + Zwspace, + NextLine, + WordJoiner, + Count + } + + public enum NumericType + { + None, + Decimal, + Digit, + Numeric, + Count + } + + public enum HangulSyllableType + { + NotApplicable, + LeadingJamo, + VowelJamo, + TrailingJamo, + LvSyllable, + LvtSyllable, + Count + } + + /*********************************************************************** + + Get the property value for an enumerated or integer + Unicode property for a code point. Also returns binary + and mask property values. + + Unicode, especially in version 3.2, defines many more + properties than the original set in UnicodeData.txt. + + The properties APIs are intended to reflect Unicode + properties as defined in the Unicode Character Database + (UCD) and Unicode Technical Reports (UTR). For details + about the properties see http://www.unicode.org/ . For + names of Unicode properties see the file PropertyAliases.txt + + ***********************************************************************/ + + uint getProperty (dchar c, Property p) + { + return u_getIntPropertyValue (cast(uint) c, cast(uint) p); + } + + /*********************************************************************** + + Get the minimum value for an enumerated/integer/binary + Unicode property + + ***********************************************************************/ + + uint getPropertyMinimum (Property p) + { + return u_getIntPropertyMinValue (p); + } + + /*********************************************************************** + + Get the maximum value for an enumerated/integer/binary + Unicode property + + ***********************************************************************/ + + uint getPropertyMaximum (Property p) + { + return u_getIntPropertyMaxValue (p); + } + + /*********************************************************************** + + Returns the bidirectional category value for the code + point, which is used in the Unicode bidirectional algorithm + (UAX #9 http://www.unicode.org/reports/tr9/). + + ***********************************************************************/ + + Direction charDirection (dchar c) + { + return cast(Direction) u_charDirection (c); + } + + /*********************************************************************** + + Returns the Unicode allocation block that contains the + character + + ***********************************************************************/ + + BlockCode getBlockCode (dchar c) + { + return cast(BlockCode) ublock_getCode (c); + } + + /*********************************************************************** + + Retrieve the name of a Unicode character. + + ***********************************************************************/ + + char[] getCharName (dchar c, CharNameChoice choice, inout char[] dst) + { + UErrorCode e; + + uint len = u_charName (c, choice, dst.ptr, dst.length, e); + testError (e, "failed to extract char name (buffer too small?)"); + return dst [0..len]; + } + + /*********************************************************************** + + Get the ISO 10646 comment for a character. + + ***********************************************************************/ + + char[] getComment (dchar c, inout char[] dst) + { + UErrorCode e; + + uint len = u_getISOComment (c, dst.ptr, dst.length, e); + testError (e, "failed to extract comment (buffer too small?)"); + return dst [0..len]; + } + + /*********************************************************************** + + Find a Unicode character by its name and return its code + point value. + + ***********************************************************************/ + + dchar charFromName (CharNameChoice choice, char[] name) + { + UErrorCode e; + + dchar c = u_charFromName (choice, toString(name), e); + testError (e, "failed to locate char name"); + return c; + } + + /*********************************************************************** + + Return the Unicode name for a given property, as given in the + Unicode database file PropertyAliases.txt + + ***********************************************************************/ + + char[] getPropertyName (Property p, NameChoice choice) + { + return toArray (u_getPropertyName (p, choice)); + } + + /*********************************************************************** + + Return the Unicode name for a given property value, as given + in the Unicode database file PropertyValueAliases.txt. + + ***********************************************************************/ + + char[] getPropertyValueName (Property p, NameChoice choice, uint value) + { + return toArray (u_getPropertyValueName (p, value, choice)); + } + + /*********************************************************************** + + Gets the Unicode version information + + ***********************************************************************/ + + void getUnicodeVersion (inout Version v) + { + u_getUnicodeVersion (v); + } + + /*********************************************************************** + + Get the "age" of the code point + + ***********************************************************************/ + + void getCharAge (dchar c, inout Version v) + { + u_charAge (c, v); + } + + + /*********************************************************************** + + These are externalised directly to the client (sans wrapper), + but this may have to change for linux, depending upon the + ICU function-naming conventions within the Posix libraries. + + ***********************************************************************/ + + static extern (C) + { + /*************************************************************** + + Check if a code point has the Alphabetic Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isUAlphabetic; + + /*************************************************************** + + Check if a code point has the Lowercase Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isULowercase; + + /*************************************************************** + + Check if a code point has the Uppercase Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isUUppercase; + + /*************************************************************** + + Check if a code point has the White_Space Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isUWhiteSpace; + + /*************************************************************** + + Determines whether the specified code point has the + general category "Ll" (lowercase letter). + + ***************************************************************/ + + bool function (dchar c) isLower; + + /*************************************************************** + + Determines whether the specified code point has the + general category "Lu" (uppercase letter). + + ***************************************************************/ + + bool function (dchar c) isUpper; + + /*************************************************************** + + Determines whether the specified code point is a + titlecase letter. + + ***************************************************************/ + + bool function (dchar c) isTitle; + + /*************************************************************** + + Determines whether the specified code point is a + digit character according to Java. + + ***************************************************************/ + + bool function (dchar c) isDigit; + + /*************************************************************** + + Determines whether the specified code point is a + letter character. + + ***************************************************************/ + + bool function (dchar c) isAlpha; + + /*************************************************************** + + Determines whether the specified code point is an + alphanumeric character (letter or digit) according + to Java. + + ***************************************************************/ + + bool function (dchar c) isAlphaNumeric; + + /*************************************************************** + + Determines whether the specified code point is a + hexadecimal digit. + + ***************************************************************/ + + bool function (dchar c) isHexDigit; + + /*************************************************************** + + Determines whether the specified code point is a + punctuation character. + + ***************************************************************/ + + bool function (dchar c) isPunct; + + /*************************************************************** + + Determines whether the specified code point is a + "graphic" character (printable, excluding spaces). + + ***************************************************************/ + + bool function (dchar c) isGraph; + + /*************************************************************** + + Determines whether the specified code point is a + "blank" or "horizontal space", a character that + visibly separates words on a line. + + ***************************************************************/ + + bool function (dchar c) isBlank; + + /*************************************************************** + + Determines whether the specified code point is + "defined", which usually means that it is assigned + a character. + + ***************************************************************/ + + bool function (dchar c) isDefined; + + /*************************************************************** + + Determines if the specified character is a space + character or not. + + ***************************************************************/ + + bool function (dchar c) isSpace; + + /*************************************************************** + + Determine if the specified code point is a space + character according to Java. + + ***************************************************************/ + + bool function (dchar c) isJavaSpaceChar; + + /*************************************************************** + + Determines if the specified code point is a whitespace + character according to Java/ICU. + + ***************************************************************/ + + bool function (dchar c) isWhiteSpace; + + /*************************************************************** + + Determines whether the specified code point is a + control character (as defined by this function). + + ***************************************************************/ + + bool function (dchar c) isCtrl; + + /*************************************************************** + + Determines whether the specified code point is an ISO + control code. + + ***************************************************************/ + + bool function (dchar c) isISOControl; + + /*************************************************************** + + Determines whether the specified code point is a + printable character. + + ***************************************************************/ + + bool function (dchar c) isPrint; + + /*************************************************************** + + Determines whether the specified code point is a + base character. + + ***************************************************************/ + + bool function (dchar c) isBase; + + /*************************************************************** + + Determines if the specified character is permissible + as the first character in an identifier according to + Unicode (The Unicode Standard, Version 3.0, chapter + 5.16 Identifiers). + + ***************************************************************/ + + bool function (dchar c) isIDStart; + + /*************************************************************** + + Determines if the specified character is permissible + in an identifier according to Java. + + ***************************************************************/ + + bool function (dchar c) isIDPart; + + /*************************************************************** + + Determines if the specified character should be + regarded as an ignorable character in an identifier, + according to Java. + + ***************************************************************/ + + bool function (dchar c) isIDIgnorable; + + /*************************************************************** + + Determines if the specified character is permissible + as the first character in a Java identifier. + + ***************************************************************/ + + bool function (dchar c) isJavaIDStart; + + /*************************************************************** + + Determines if the specified character is permissible + in a Java identifier. + + ***************************************************************/ + + bool function (dchar c) isJavaIDPart; + + /*************************************************************** + + Determines whether the code point has the + Bidi_Mirrored property. + + ***************************************************************/ + + bool function (dchar c) isMirrored; + + /*************************************************************** + + Returns the decimal digit value of a decimal digit + character. + + ***************************************************************/ + + ubyte function (dchar c) charDigitValue; + + /*************************************************************** + + Maps the specified character to a "mirror-image" + character. + + ***************************************************************/ + + dchar function (dchar c) charMirror; + + /*************************************************************** + + Returns the general category value for the code point. + + ***************************************************************/ + + ubyte function (dchar c) charType; + + /*************************************************************** + + Returns the combining class of the code point as + specified in UnicodeData.txt. + + ***************************************************************/ + + ubyte function (dchar c) getCombiningClass; + + /*************************************************************** + + The given character is mapped to its lowercase + equivalent according to UnicodeData.txt; if the + character has no lowercase equivalent, the + character itself is returned. + + ***************************************************************/ + + dchar function (dchar c) toLower; + + /*************************************************************** + + The given character is mapped to its uppercase equivalent + according to UnicodeData.txt; if the character has no + uppercase equivalent, the character itself is returned. + + ***************************************************************/ + + dchar function (dchar c) toUpper; + + /*************************************************************** + + The given character is mapped to its titlecase + equivalent according to UnicodeData.txt; if none + is defined, the character itself is returned. + + ***************************************************************/ + + dchar function (dchar c) toTitle; + + /*************************************************************** + + The given character is mapped to its case folding + equivalent according to UnicodeData.txt and + CaseFolding.txt; if the character has no case folding + equivalent, the character itself is returned. + + ***************************************************************/ + + dchar function (dchar c, uint options) foldCase; + + /*************************************************************** + + Returns the decimal digit value of the code point in + the specified radix. + + ***************************************************************/ + + uint function (dchar ch, ubyte radix) digit; + + /*************************************************************** + + Determines the character representation for a specific + digit in the specified radix. + + ***************************************************************/ + + dchar function (uint digit, ubyte radix) forDigit; + + /*************************************************************** + + Get the numeric value for a Unicode code point as + defined in the Unicode Character Database. + + ***************************************************************/ + + double function (dchar c) getNumericValue; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + uint function (uint, uint) u_getIntPropertyValue; + uint function (uint) u_getIntPropertyMinValue; + uint function (uint) u_getIntPropertyMaxValue; + uint function (dchar) u_charDirection; + uint function (dchar) ublock_getCode; + uint function (dchar, uint, char*, uint, inout UErrorCode) u_charName; + uint function (dchar, char*, uint, inout UErrorCode) u_getISOComment; + uint function (uint, char*, inout UErrorCode) u_charFromName; + char* function (uint, uint) u_getPropertyName; + char* function (uint, uint, uint) u_getPropertyValueName; + void function (inout Version) u_getUnicodeVersion; + void function (dchar, inout Version) u_charAge; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &forDigit, "u_forDigit"}, + {cast(void**) &digit, "u_digit"}, + {cast(void**) &foldCase, "u_foldCase"}, + {cast(void**) &toTitle, "u_totitle"}, + {cast(void**) &toUpper, "u_toupper"}, + {cast(void**) &toLower, "u_tolower"}, + {cast(void**) &charType, "u_charType"}, + {cast(void**) &charMirror, "u_charMirror"}, + {cast(void**) &charDigitValue, "u_charDigitValue"}, + {cast(void**) &isJavaIDPart, "u_isJavaIDPart"}, + {cast(void**) &isJavaIDStart, "u_isJavaIDStart"}, + {cast(void**) &isIDIgnorable, "u_isIDIgnorable"}, + {cast(void**) &isIDPart, "u_isIDPart"}, + {cast(void**) &isIDStart, "u_isIDStart"}, + {cast(void**) &isMirrored, "u_isMirrored"}, + {cast(void**) &isBase, "u_isbase"}, + {cast(void**) &isPrint, "u_isprint"}, + {cast(void**) &isISOControl, "u_isISOControl"}, + {cast(void**) &isCtrl, "u_iscntrl"}, + {cast(void**) &isWhiteSpace, "u_isWhitespace"}, + {cast(void**) &isJavaSpaceChar, "u_isJavaSpaceChar"}, + {cast(void**) &isSpace, "u_isspace"}, + {cast(void**) &isDefined, "u_isdefined"}, + {cast(void**) &isBlank, "u_isblank"}, + {cast(void**) &isGraph, "u_isgraph"}, + {cast(void**) &isPunct, "u_ispunct"}, + {cast(void**) &isHexDigit, "u_isxdigit"}, + {cast(void**) &isAlpha, "u_isalpha"}, + {cast(void**) &isAlphaNumeric, "u_isalnum"}, + {cast(void**) &isDigit, "u_isdigit"}, + {cast(void**) &isTitle, "u_istitle"}, + {cast(void**) &isUpper, "u_isupper"}, + {cast(void**) &isLower, "u_islower"}, + {cast(void**) &isUAlphabetic, "u_isUAlphabetic"}, + {cast(void**) &isUWhiteSpace, "u_isUWhiteSpace"}, + {cast(void**) &isUUppercase, "u_isUUppercase"}, + {cast(void**) &isULowercase, "u_isULowercase"}, + {cast(void**) &getNumericValue, "u_getNumericValue"}, + {cast(void**) &getCombiningClass, "u_getCombiningClass"}, + {cast(void**) &u_getIntPropertyValue, "u_getIntPropertyValue"}, + {cast(void**) &u_getIntPropertyMinValue,"u_getIntPropertyMinValue"}, + {cast(void**) &u_getIntPropertyMaxValue,"u_getIntPropertyMaxValue"}, + {cast(void**) &u_charDirection, "u_charDirection"}, + {cast(void**) &ublock_getCode, "ublock_getCode"}, + {cast(void**) &u_charName, "u_charName"}, + {cast(void**) &u_getISOComment, "u_getISOComment"}, + {cast(void**) &u_charFromName, "u_charFromName"}, + {cast(void**) &u_getPropertyName, "u_getPropertyName"}, + {cast(void**) &u_getPropertyValueName, "u_getPropertyValueName"}, + {cast(void**) &u_getUnicodeVersion, "u_getUnicodeVersion"}, + {cast(void**) &u_charAge, "u_charAge"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UCollator.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UCollator.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,732 @@ +/******************************************************************************* + + @file UCollator.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UCollator; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.USet, + com.ibm.icu.mangoicu.ULocale, + com.ibm.icu.mangoicu.UString; + +/******************************************************************************* + + The API for Collator performs locale-sensitive string comparison. + You use this service to build searching and sorting routines for + natural language text. Important: The ICU collation service has been + reimplemented in order to achieve better performance and UCA compliance. + For details, see the collation design document. + + For more information about the collation service see the users guide. + + Collation service provides correct sorting orders for most locales + supported in ICU. If specific data for a locale is not available, + the orders eventually falls back to the UCA sort order. + + Sort ordering may be customized by providing your own set of rules. + For more on this subject see the Collation customization section of + the users guide. + + See + this page for full details. + +*******************************************************************************/ + +class UCollator : ICU +{ + package Handle handle; + + enum Attribute + { + FrenchCollation, + AlternateHandling, + CaseFirst, + CaseLevel, + NormalizationMode, + DecompositionMode = NormalizationMode, + strength, + HiraganaQuaternaryMode, + NumericCollation, + AttributeCount + } + + enum AttributeValue + { + Default = -1, + Primary = 0, + Secondary = 1, + Tertiary = 2, + DefaultStrength = Tertiary, + CeStrengthLimit, + Quaternary = 3, + Identical = 15, + strengthLimit, + Off = 16, + On = 17, + Shifted = 20, + NonIgnorable = 21, + LowerFirst = 24, + UpperFirst = 25, + AttributeValueCount + } + + enum RuleOption + { + TailoringOnly, + FullRules + } + + enum BoundMode + { + BoundLower = 0, + BoundUpper = 1, + BoundUpperLong = 2, + BoundValueCount + } + + typedef AttributeValue Strength; + + /*********************************************************************** + + Open a UCollator for comparing strings. The locale specified + determines the required collation rules. Special values for + locales can be passed in - if ULocale.Default is passed for + the locale, the default locale collation rules will be used. + If ULocale.Root is passed, UCA rules will be used + + ***********************************************************************/ + + this (ULocale locale) + { + UErrorCode e; + + handle = ucol_open (toString(locale.name), e); + testError (e, "failed to open collator"); + } + + /*********************************************************************** + + Produce a UCollator instance according to the rules supplied. + + The rules are used to change the default ordering, defined in + the UCA in a process called tailoring. For the syntax of the + rules please see users guide + + ***********************************************************************/ + + this (UStringView rules, AttributeValue mode, Strength strength) + { + UErrorCode e; + + handle = ucol_openRules (rules.get.ptr, rules.len, mode, strength, null, e); + testError (e, "failed to open rules-based collator"); + } + + /*********************************************************************** + + Open a collator defined by a short form string. The + structure and the syntax of the string is defined in + the "Naming collators" section of the users guide: + http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators + Attributes are overriden by the subsequent attributes. + So, for "S2_S3", final strength will be 3. 3066bis + locale overrides individual locale parts. + + The call to this constructor is equivalent to a plain + constructor, followed by a series of calls to setAttribute + and setVariableTop + + ***********************************************************************/ + + this (char[] shortName, bool forceDefaults) + { + UErrorCode e; + + handle = ucol_openFromShortString (toString(shortName), forceDefaults, null, e); + testError (e, "failed to open short-name collator"); + } + + /*********************************************************************** + + Internal constructor invoked via USearch + + ***********************************************************************/ + + package this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Close a UCollator + + ***********************************************************************/ + + ~this () + { + ucol_close (handle); + } + + /*********************************************************************** + + Get a set containing the contractions defined by the + collator. + + The set includes both the UCA contractions and the + contractions defined by the collator. This set will + contain only strings. If a tailoring explicitly + suppresses contractions from the UCA (like Russian), + removed contractions will not be in the resulting set. + + ***********************************************************************/ + + void getContractions (USet set) + { + UErrorCode e; + + ucol_getContractions (handle, set.handle, e); + testError (e, "failed to get collator contractions"); + } + + /*********************************************************************** + + Compare two strings. Return value is -, 0, + + + ***********************************************************************/ + + int strcoll (UStringView source, UStringView target) + { + return ucol_strcoll (handle, source.get.ptr, source.len, target.get.ptr, target.len); + } + + /*********************************************************************** + + Determine if one string is greater than another. This + function is equivalent to strcoll() > 1 + + ***********************************************************************/ + + bool greater (UStringView source, UStringView target) + { + return ucol_greater (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; + } + + /*********************************************************************** + + Determine if one string is greater than or equal to + another. This function is equivalent to strcoll() >= 0 + + ***********************************************************************/ + + bool greaterOrEqual (UStringView source, UStringView target) + { + return ucol_greaterOrEqual (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; + } + + /*********************************************************************** + + This function is equivalent to strcoll() == 0 + + ***********************************************************************/ + + bool equal (UStringView source, UStringView target) + { + return ucol_equal (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; + } + + /*********************************************************************** + + Get the collation strength used in a UCollator. The + strength influences how strings are compared. + + ***********************************************************************/ + + Strength getStrength () + { + return ucol_getStrength (handle); + } + + /*********************************************************************** + + Set the collation strength used in this UCollator. The + strength influences how strings are compared. one of + Primary, Secondary, Tertiary, Quaternary, Dentical, or + Default + + ***********************************************************************/ + + void setStrength (Strength s) + { + ucol_setStrength (handle, s); + } + + /*********************************************************************** + + Get the display name for a UCollator. The display name is + suitable for presentation to a user + + ***********************************************************************/ + + void getDisplayName (ULocale obj, ULocale display, UString dst) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return ucol_getDisplayName (toString(obj.name), toString(display.name), dst.get.ptr, dst.len, e); + } + + dst.format (&fmt, "failed to get collator display name"); + } + + /*********************************************************************** + + Returns current rules. Options define whether full rules + are returned or just the tailoring. + + ***********************************************************************/ + + void getRules (UString dst, RuleOption o = RuleOption.FullRules) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + uint needed = ucol_getRulesEx (handle, o, dst.get.ptr, dst.len); + if (needed > len) + e = e.BufferOverflow; + return needed; + } + + dst.format (&fmt, "failed to get collator rules"); + } + + /*********************************************************************** + + Get the short definition string for a collator. + + This API harvests the collator's locale and the attribute + set and produces a string that can be used for opening a + collator with the same properties using the char[] style + constructor. This string will be normalized. + + The structure and the syntax of the string is defined in the + "Naming collators" section of the users guide: + http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators + + ***********************************************************************/ + + char[] getShortDefinitionString (ULocale locale = ULocale.Default) + { + UErrorCode e; + char[64] dst; + + uint len = ucol_getShortDefinitionString (handle, toString(locale.name), dst.ptr, dst.length, e); + testError (e, "failed to get collator short name"); + return dst[0..len].dup; + } + + /*********************************************************************** + + Verifies and normalizes short definition string. Normalized + short definition string has all the option sorted by the + argument name, so that equivalent definition strings are the + same + + ***********************************************************************/ + + char[] normalizeShortDefinitionString (char[] source) + { + UErrorCode e; + char[64] dst; + + uint len = ucol_normalizeShortDefinitionString (toString(source), dst.ptr, dst.length, null, e); + testError (e, "failed to normalize collator short name"); + return dst[0..len].dup; + } + + /*********************************************************************** + + Get a sort key for a string from a UCollator. Sort keys + may be compared using strcmp. + + ***********************************************************************/ + + ubyte[] getSortKey (UStringView t, ubyte[] result) + { + uint len = ucol_getSortKey (handle, t.get.ptr, t.len, result.ptr, result.length); + if (len < result.length) + return result [0..len]; + return null; + } + + /*********************************************************************** + + Merge two sort keys. The levels are merged with their + corresponding counterparts (primaries with primaries, + secondaries with secondaries etc.). Between the values + from the same level a separator is inserted. example + (uncompressed): 191B1D 01 050505 01 910505 00 and + 1F2123 01 050505 01 910505 00 will be merged as + 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00 + This allows for concatenating of first and last names for + sorting, among other things. If the destination buffer is + not big enough, the results are undefined. If any of source + lengths are zero or any of source pointers are null/undefined, + result is of size zero. + + ***********************************************************************/ + + ubyte[] mergeSortkeys (ubyte[] left, ubyte[] right, ubyte[] result) + { + uint len = ucol_mergeSortkeys (left.ptr, left.length, right.ptr, right.length, result.ptr, result.length); + if (len < result.length) + return result [0..len]; + return null; + } + + /*********************************************************************** + + Produce a bound for a given sortkey and a number of levels. + + Return value is always the number of bytes needed, regardless + of whether the result buffer was big enough or even valid. + + Resulting bounds can be used to produce a range of strings + that are between upper and lower bounds. For example, if + bounds are produced for a sortkey of string "smith", strings + between upper and lower bounds with one level would include + "Smith", "SMITH", "sMiTh". + + There are two upper bounds that can be produced. If BoundUpper + is produced, strings matched would be as above. However, if + bound produced using BoundUpperLong is used, the above example + will also match "Smithsonian" and similar. + + ***********************************************************************/ + + ubyte[] getBound (BoundMode mode, ubyte[] source, ubyte[] result, uint levels = 1) + { + UErrorCode e; + + uint len = ucol_getBound (source.ptr, source.length, mode, levels, result.ptr, result.length, e); + testError (e, "failed to get sortkey bound"); + if (len < result.length) + return result [0..len]; + return null; + } + + /*********************************************************************** + + Gets the version information for a Collator. + + Version is currently an opaque 32-bit number which depends, + among other things, on major versions of the collator + tailoring and UCA + + ***********************************************************************/ + + void getVersion (inout Version v) + { + ucol_getVersion (handle, v); + } + + /*********************************************************************** + + Gets the UCA version information for this Collator + + ***********************************************************************/ + + void getUCAVersion (inout Version v) + { + ucol_getUCAVersion (handle, v); + } + + /*********************************************************************** + + Universal attribute setter + + ***********************************************************************/ + + void setAttribute (Attribute attr, AttributeValue value) + { + UErrorCode e; + + ucol_setAttribute (handle, attr, value, e); + testError (e, "failed to set collator attribute"); + } + + /*********************************************************************** + + Universal attribute getter + + ***********************************************************************/ + + AttributeValue getAttribute (Attribute attr) + { + UErrorCode e; + + AttributeValue v = ucol_getAttribute (handle, attr, e); + testError (e, "failed to get collator attribute"); + return v; + } + + /*********************************************************************** + + Variable top is a two byte primary value which causes all + the codepoints with primary values that are less or equal + than the variable top to be shifted when alternate handling + is set to Shifted. + + ***********************************************************************/ + + void setVariableTop (UStringView t) + { + UErrorCode e; + + ucol_setVariableTop (handle, t.get.ptr, t.len, e); + testError (e, "failed to set variable-top"); + } + + /*********************************************************************** + + Sets the variable top to a collation element value + supplied.Variable top is set to the upper 16 bits. + Lower 16 bits are ignored. + + ***********************************************************************/ + + void setVariableTop (uint x) + { + UErrorCode e; + + ucol_restoreVariableTop (handle, x, e); + testError (e, "failed to restore variable-top"); + } + + /*********************************************************************** + + Gets the variable top value of this Collator. Lower 16 bits + are undefined and should be ignored. + + ***********************************************************************/ + + uint getVariableTop () + { + UErrorCode e; + + uint x = ucol_getVariableTop (handle, e); + testError (e, "failed to get variable-top"); + return x; + } + + /*********************************************************************** + + Gets the locale name of the collator. If the collator is + instantiated from the rules, then this function will throw + an exception + + ***********************************************************************/ + + void getLocale (ULocale locale, ULocale.Type type) + { + UErrorCode e; + + locale.name = toArray (ucol_getLocaleByType (handle, type, e)); + if (isError(e) || locale.name is null) + exception ("failed to get collator locale"); + } + + /*********************************************************************** + + Get the Unicode set that contains all the characters and + sequences tailored in this collator. + + ***********************************************************************/ + + USet getTailoredSet () + { + UErrorCode e; + + Handle h = ucol_getTailoredSet (handle, e); + testError (e, "failed to get tailored set"); + return new USet (h); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + void function (Handle) ucol_close; + Handle function (char *loc, inout UErrorCode e) ucol_open; + Handle function (wchar* rules, uint rulesLength, AttributeValue normalizationMode, Strength strength, UParseError *parseError, inout UErrorCode e) ucol_openRules; + Handle function (char *definition, byte forceDefaults, UParseError *parseError, inout UErrorCode e) ucol_openFromShortString; + uint function (Handle, Handle conts, inout UErrorCode e) ucol_getContractions; + int function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_strcoll; + byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greater; + byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greaterOrEqual; + byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_equal; + Strength function (Handle) ucol_getStrength; + void function (Handle, Strength strength) ucol_setStrength; + uint function (char *objLoc, char *dispLoc, wchar* result, uint resultLength, inout UErrorCode e) ucol_getDisplayName; + uint function (Handle, char *locale, char *buffer, uint capacity, inout UErrorCode e) ucol_getShortDefinitionString; + uint function (char *source, char *destination, uint capacity, UParseError *parseError, inout UErrorCode e) ucol_normalizeShortDefinitionString; + uint function (Handle, wchar* source, uint sourceLength, ubyte *result, uint resultLength) ucol_getSortKey; + uint function (ubyte *source, uint sourceLength, BoundMode boundType, uint noOfLevels, ubyte *result, uint resultLength, inout UErrorCode e) ucol_getBound; + void function (Handle, Version info) ucol_getVersion; + void function (Handle, Version info) ucol_getUCAVersion; + uint function (ubyte *src1, uint src1Length, ubyte *src2, uint src2Length, ubyte *dest, uint destCapacity) ucol_mergeSortkeys; + void function (Handle, Attribute attr, AttributeValue value, inout UErrorCode e) ucol_setAttribute; + AttributeValue function (Handle, Attribute attr, inout UErrorCode e) ucol_getAttribute; + uint function (Handle, wchar* varTop, uint len, inout UErrorCode e) ucol_setVariableTop; + uint function (Handle, inout UErrorCode e) ucol_getVariableTop; + void function (Handle, uint varTop, inout UErrorCode e) ucol_restoreVariableTop; + uint function (Handle, RuleOption delta, wchar* buffer, uint bufferLen) ucol_getRulesEx; + char* function (Handle, ULocale.Type type, inout UErrorCode e) ucol_getLocaleByType; + Handle function (Handle, inout UErrorCode e) ucol_getTailoredSet; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucol_open, "ucol_open"}, + {cast(void**) &ucol_close, "ucol_close"}, + {cast(void**) &ucol_openRules, "ucol_openRules"}, + {cast(void**) &ucol_openFromShortString, "ucol_openFromShortString"}, + {cast(void**) &ucol_getContractions, "ucol_getContractions"}, + {cast(void**) &ucol_strcoll, "ucol_strcoll"}, + {cast(void**) &ucol_greater, "ucol_greater"}, + {cast(void**) &ucol_greaterOrEqual, "ucol_greaterOrEqual"}, + {cast(void**) &ucol_equal, "ucol_equal"}, + {cast(void**) &ucol_getStrength, "ucol_getStrength"}, + {cast(void**) &ucol_setStrength, "ucol_setStrength"}, + {cast(void**) &ucol_getDisplayName, "ucol_getDisplayName"}, + {cast(void**) &ucol_getShortDefinitionString, "ucol_getShortDefinitionString"}, + {cast(void**) &ucol_normalizeShortDefinitionString, "ucol_normalizeShortDefinitionString"}, + {cast(void**) &ucol_getSortKey, "ucol_getSortKey"}, + {cast(void**) &ucol_getBound, "ucol_getBound"}, + {cast(void**) &ucol_getVersion, "ucol_getVersion"}, + {cast(void**) &ucol_getUCAVersion, "ucol_getUCAVersion"}, + {cast(void**) &ucol_mergeSortkeys, "ucol_mergeSortkeys"}, + {cast(void**) &ucol_setAttribute, "ucol_setAttribute"}, + {cast(void**) &ucol_getAttribute, "ucol_getAttribute"}, + {cast(void**) &ucol_setVariableTop, "ucol_setVariableTop"}, + {cast(void**) &ucol_getVariableTop, "ucol_getVariableTop"}, + {cast(void**) &ucol_restoreVariableTop, "ucol_restoreVariableTop"}, + {cast(void**) &ucol_getRulesEx, "ucol_getRulesEx"}, + {cast(void**) &ucol_getLocaleByType, "ucol_getLocaleByType"}, + {cast(void**) &ucol_getTailoredSet, "ucol_getTailoredSet"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UConverter.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UConverter.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,764 @@ +/******************************************************************************* + + @file UConverter.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UConverter; + +private import com.ibm.icu.mangoicu.ICU; + +/******************************************************************************* + +*******************************************************************************/ + +struct UAdjust // used with encode() & decode() methods +{ + uint input, // how much was read from the input + output; // how much was written to the output +} + +/******************************************************************************* + +*******************************************************************************/ + +interface ITranscoder +{ + void reset (); + + bool convert (void[] input, void[] output, inout UAdjust x, bool flush); +} + +/******************************************************************************* + + This API is used to convert codepage or character encoded data to + and from UTF-16. You can open a converter with ucnv_open(). With + that converter, you can get its properties, set options, convert + your data and close the converter. + + Since many software programs recogize different converter names + for different types of converters, there are other functions in + this API to iterate over the converter aliases. + + See + this page for full details. + +*******************************************************************************/ + +class UConverter : ICU +{ + private Handle handle; + + + + /*********************************************************************** + + Creates a UConverter object with the names specified as a + string. + + The actual name will be resolved with the alias file using + a case-insensitive string comparison that ignores delimiters + '-', '_', and ' ' (dash, underscore, and space). E.g., the + names "UTF8", "utf-8", and "Utf 8" are all equivalent. If null + is passed for the converter name, it will create one with the + getDefaultName() return value. + + A converter name may contain options like a locale specification + to control the specific behavior of the converter instantiated. + The meaning of the options depends on the particular converter: + if an option is not defined for or recognized, it is ignored. + + Options are appended to the converter name string, with an + OptionSepChar between the name and the first option and also + between adjacent options. + + The conversion behavior and names can vary between platforms, + and ICU may convert some characters differently from other + platforms. Details on this topic are in the User's Guide. + + ***********************************************************************/ + + this (char[] name) + { + UErrorCode e; + + handle = ucnv_open (toString (name), e); + if (isError (e)) + exception ("failed to create converter for '"~name~"'"); + } + + /*********************************************************************** + + Deletes the unicode converter and releases resources + associated with just this instance. Does not free up + shared converter tables. + + ***********************************************************************/ + + ~this () + { + ucnv_close (handle); + } + + /*********************************************************************** + + Do a fuzzy compare of two converter/alias names. The + comparison is case-insensitive. It also ignores the + characters '-', '_', and ' ' (dash, underscore, and space). + Thus the strings "UTF-8", "utf_8", and "Utf 8" are exactly + equivalent + + ***********************************************************************/ + + static final int compareNames (char[] a, char[] b) + { + return ucnv_compareNames (toString(a), toString(b)); + } + + /*********************************************************************** + + Resets the state of this converter to the default state. + + This is used in the case of an error, to restart a + conversion from a known default state. It will also + empty the internal output buffers. + + ***********************************************************************/ + + void reset () + { + ucnv_reset (handle); + } + + /*********************************************************************** + + Resets the from-Unicode part of this converter state to the + default state. + + This is used in the case of an error to restart a conversion + from Unicode to a known default state. It will also empty the + internal output buffers used for the conversion from Unicode + codepoints. + + ***********************************************************************/ + + void resetDecoder () + { + ucnv_resetToUnicode (handle); + } + + /*********************************************************************** + + Resets the from-Unicode part of this converter state to the + default state. + + This is used in the case of an error to restart a conversion + from Unicode to a known default state. It will also empty the + internal output buffers used for the conversion from Unicode + codepoints. + + ***********************************************************************/ + + void resetEncoder () + { + ucnv_resetFromUnicode (handle); + } + + /*********************************************************************** + + Returns the maximum number of bytes that are output per + UChar in conversion from Unicode using this converter. + + The returned number can be used to calculate the size of + a target buffer for conversion from Unicode. + + This number may not be the same as the maximum number of + bytes per "conversion unit". In other words, it may not + be the intuitively expected number of bytes per character + that would be published for a charset, and may not fulfill + any other purpose than the allocation of an output buffer + of guaranteed sufficient size for a given input length and + converter. + + Examples for special cases that are taken into account: + + * Supplementary code points may convert to more bytes than + BMP code points. This function returns bytes per UChar + (UTF-16 code unit), not per Unicode code point, for efficient + buffer allocation. + * State-shifting output (SI/SO, escapes, etc.) from stateful + converters. + * When m input UChars are converted to n output bytes, then + the maximum m/n is taken into account. + + The number returned here does not take into account: + + * callbacks which output more than one charset character + sequence per call, like escape callbacks + * initial and final non-character bytes that are output by + some converters (automatic BOMs, initial escape sequence, + final SI, etc.) + + Examples for returned values: + + * SBCS charsets: 1 + * Shift-JIS: 2 + * UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted) + * UTF-8: 3 (3 per BMP, 4 per surrogate _pair_) + * EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS) + * ISO-2022: 3 (always outputs UTF-8) + * ISO-2022-JP: 6 (4-byte escape sequences + DBCS) + * ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + + DBCS) + + ***********************************************************************/ + + ubyte getMaxCharSize () + { + return ucnv_getMaxCharSize (handle); + } + + /*********************************************************************** + + Returns the minimum byte length for characters in this + codepage. This is usually either 1 or 2. + + ***********************************************************************/ + + ubyte getMinCharSize () + { + return ucnv_getMinCharSize (handle); + } + + /*********************************************************************** + + Gets the internal, canonical name of the converter (zero- + terminated). + + ***********************************************************************/ + + char[] getName () + { + UErrorCode e; + + char[] name = toArray (ucnv_getName (handle, e)); + testError (e, "failed to get converter name"); + return name; + } + + /*********************************************************************** + + Determines if the converter contains ambiguous mappings of + the same character or not + + ***********************************************************************/ + + bool isAmbiguous () + { + return cast(bool) ucnv_isAmbiguous (handle); + } + + /*********************************************************************** + + Detects Unicode signature byte sequences at the start + of the byte stream and returns the charset name of the + indicated Unicode charset. A null is returned where no + Unicode signature is recognized. + + A caller can create a UConverter using the charset name. + The first code unit (wchar) from the start of the stream + will be U+FEFF (the Unicode BOM/signature character) and + can usually be ignored. + + ***********************************************************************/ + + static final char[] detectSignature (void[] input) + { + UErrorCode e; + uint len; + char* name; + + name = ucnv_detectUnicodeSignature (input.ptr, input.length, len, e); + if (name == null || isError (e)) + return null; + return toArray (name); + } + + /*********************************************************************** + + Converts an array of unicode characters to an array of + codepage characters. + + This function is optimized for converting a continuous + stream of data in buffer-sized chunks, where the entire + source and target does not fit in available buffers. + + The source pointer is an in/out parameter. It starts out + pointing where the conversion is to begin, and ends up + pointing after the last UChar consumed. + + Target similarly starts out pointer at the first available + byte in the output buffer, and ends up pointing after the + last byte written to the output. + + The converter always attempts to consume the entire source + buffer, unless (1.) the target buffer is full, or (2.) a + failing error is returned from the current callback function. + When a successful error status has been returned, it means + that all of the source buffer has been consumed. At that + point, the caller should reset the source and sourceLimit + pointers to point to the next chunk. + + At the end of the stream (flush==true), the input is completely + consumed when *source==sourceLimit and no error code is set. + The converter object is then automatically reset by this + function. (This means that a converter need not be reset + explicitly between data streams if it finishes the previous + stream without errors.) + + This is a stateful conversion. Additionally, even when all + source data has been consumed, some data may be in the + converters' internal state. Call this function repeatedly, + updating the target pointers with the next empty chunk of + target in case of a U_BUFFER_OVERFLOW_ERROR, and updating + the source pointers with the next chunk of source when a + successful error status is returned, until there are no more + chunks of source data. + + Parameters: + + converter the Unicode converter + target I/O parameter. Input : Points to the + beginning of the buffer to copy codepage + characters to. Output : points to after + the last codepage character copied to + target. + targetLimit the pointer just after last of the + target buffer + source I/O parameter, pointer to pointer to + the source Unicode character buffer. + sourceLimit the pointer just after the last of + the source buffer + offsets if NULL is passed, nothing will happen + to it, otherwise it needs to have the + same number of allocated cells as target. + Will fill in offsets from target to source + pointer e.g: offsets[3] is equal to 6, it + means that the target[3] was a result of + transcoding source[6] For output data + carried across calls, and other data + without a specific source character + (such as from escape sequences or + callbacks) -1 will be placed for offsets. + flush set to TRUE if the current source buffer + is the last available chunk of the source, + FALSE otherwise. Note that if a failing + status is returned, this function may + have to be called multiple times with + flush set to TRUE until the source buffer + is consumed. + + ***********************************************************************/ + + bool encode (wchar[] input, void[] output, inout UAdjust x, bool flush) + { + UErrorCode e; + wchar* src = input.ptr; + void* dst = output.ptr; + wchar* srcLimit = src + input.length; + void* dstLimit = dst + output.length; + + ucnv_fromUnicode (handle, &dst, dstLimit, &src, srcLimit, null, flush, e); + x.input = src - input.ptr; + x.output = dst - output.ptr; + + if (e == e.BufferOverflow) + return true; + + testError (e, "failed to encode"); + return false; + } + + /*********************************************************************** + + Encode the Unicode string into a codepage string. + + This function is a more convenient but less powerful version + of encode(). It is only useful for whole strings, not + for streaming conversion. The maximum output buffer capacity + required (barring output from callbacks) should be calculated + using getMaxCharSize(). + + ***********************************************************************/ + + uint encode (wchar[] input, void[] output) + { + UErrorCode e; + uint len; + + len = ucnv_fromUChars (handle, output.ptr, output.length, input.ptr, input.length, e); + testError (e, "failed to encode"); + return len; + } + + /*********************************************************************** + + Converts a buffer of codepage bytes into an array of unicode + UChars characters. + + This function is optimized for converting a continuous stream + of data in buffer-sized chunks, where the entire source and + target does not fit in available buffers. + + The source pointer is an in/out parameter. It starts out pointing + where the conversion is to begin, and ends up pointing after the + last byte of source consumed. + + Target similarly starts out pointer at the first available UChar + in the output buffer, and ends up pointing after the last UChar + written to the output. It does NOT necessarily keep UChar sequences + together. + + The converter always attempts to consume the entire source buffer, + unless (1.) the target buffer is full, or (2.) a failing error is + returned from the current callback function. When a successful + error status has been returned, it means that all of the source + buffer has been consumed. At that point, the caller should reset + the source and sourceLimit pointers to point to the next chunk. + + At the end of the stream (flush==true), the input is completely + consumed when *source==sourceLimit and no error code is set The + converter object is then automatically reset by this function. + (This means that a converter need not be reset explicitly between + data streams if it finishes the previous stream without errors.) + + This is a stateful conversion. Additionally, even when all source + data has been consumed, some data may be in the converters' internal + state. Call this function repeatedly, updating the target pointers + with the next empty chunk of target in case of a BufferOverflow, and + updating the source pointers with the next chunk of source when a + successful error status is returned, until there are no more chunks + of source data. + + Parameters: + converter the Unicode converter + target I/O parameter. Input : Points to the beginning + of the buffer to copy UChars into. Output : + points to after the last UChar copied. + targetLimit the pointer just after the end of the target + buffer + source I/O parameter, pointer to pointer to the source + codepage buffer. + sourceLimit the pointer to the byte after the end of the + source buffer + offsets if NULL is passed, nothing will happen to + it, otherwise it needs to have the same + number of allocated cells as target. Will + fill in offsets from target to source pointer + e.g: offsets[3] is equal to 6, it means that + the target[3] was a result of transcoding + source[6] For output data carried across + calls, and other data without a specific + source character (such as from escape + sequences or callbacks) -1 will be placed + for offsets. + flush set to true if the current source buffer + is the last available chunk of the source, + false otherwise. Note that if a failing + status is returned, this function may have + to be called multiple times with flush set + to true until the source buffer is consumed. + + ***********************************************************************/ + + bool decode (void[] input, wchar[] output, inout UAdjust x, bool flush) + { + UErrorCode e; + void* src = input.ptr; + wchar* dst = output.ptr; + void* srcLimit = src + input.length; + wchar* dstLimit = dst + output.length; + + ucnv_toUnicode (handle, &dst, dstLimit, &src, srcLimit, null, flush, e); + x.input = src - input.ptr; + x.output = dst - output.ptr; + + if (e == e.BufferOverflow) + return true; + + testError (e, "failed to decode"); + return false; + } + + /*********************************************************************** + + Decode the codepage string into a Unicode string. + + This function is a more convenient but less powerful version + of decode(). It is only useful for whole strings, not for + streaming conversion. The maximum output buffer capacity + required (barring output from callbacks) will be 2*src.length + (each char may be converted into a surrogate pair) + + ***********************************************************************/ + + uint decode (void[] input, wchar[] output) + { + UErrorCode e; + uint len; + + len = ucnv_toUChars (handle, output.ptr, output.length, input.ptr, input.length, e); + testError (e, "failed to decode"); + return len; + } + + /********************************************************************** + + Iterate over the available converter names + + **********************************************************************/ + + static int opApply (int delegate(inout char[] element) dg) + { + char[] name; + int result; + uint count = ucnv_countAvailable (); + + for (uint i=0; i < count; ++i) + { + name = toArray (ucnv_getAvailableName (i)); + result = dg (name); + if (result) + break; + } + return result; + } + + /*********************************************************************** + + ***********************************************************************/ + + ITranscoder createTranscoder (UConverter dst) + { + return new UTranscoder (this, dst); + } + + /********************************************************************** + + **********************************************************************/ + + private class UTranscoder : ITranscoder + { + private UConverter cSrc, + cDst; + private bool clear = true; + + /************************************************************** + + **************************************************************/ + + this (UConverter src, UConverter dst) + { + cSrc = src; + cDst = dst; + } + + /************************************************************** + + **************************************************************/ + + void reset () + { + clear = true; + } + + /************************************************************** + + **************************************************************/ + + bool convert (void[] input, void[] output, inout UAdjust x, bool flush) + { + UErrorCode e; + void* src = input.ptr; + void* dst = output.ptr; + void* srcLimit = src + input.length; + void* dstLimit = dst + output.length; + + ucnv_convertEx (cDst.handle, cSrc.handle, &dst, dstLimit, + &src, srcLimit, null, null, null, null, + clear, flush, e); + clear = false; + x.input = src - input.ptr; + x.output = dst - output.ptr; + + if (e == e.BufferOverflow) + return true; + + testError (e, "failed to decode"); + return false; + } + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + int function (char*, char*) ucnv_compareNames; + Handle function (char*, inout UErrorCode) ucnv_open; + char* function (void*, uint, inout uint, inout UErrorCode) ucnv_detectUnicodeSignature; + void function (Handle) ucnv_close; + void function (Handle) ucnv_reset; + int function (Handle) ucnv_resetToUnicode; + int function (Handle) ucnv_resetFromUnicode; + ubyte function (Handle) ucnv_getMaxCharSize; + ubyte function (Handle) ucnv_getMinCharSize; + char* function (Handle, inout UErrorCode) ucnv_getName; + uint function (Handle, wchar*, uint, void*, uint, inout UErrorCode) ucnv_toUChars; + uint function (Handle, void*, uint, wchar*, uint, inout UErrorCode) ucnv_fromUChars; + void function (Handle, void**, void*, wchar**, wchar*, int*, ubyte, inout UErrorCode) ucnv_fromUnicode; + void function (Handle, wchar**, wchar*, void**, void*, int*, ubyte, inout UErrorCode) ucnv_toUnicode; + void function (Handle, Handle, void**, void*, void**, void*, wchar*, wchar*, wchar*, wchar*, ubyte, ubyte, inout UErrorCode) ucnv_convertEx; + ubyte function (Handle) ucnv_isAmbiguous; + char* function (uint) ucnv_getAvailableName; + uint function () ucnv_countAvailable; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucnv_open, "ucnv_open"}, + {cast(void**) &ucnv_close, "ucnv_close"}, + {cast(void**) &ucnv_reset, "ucnv_reset"}, + {cast(void**) &ucnv_resetToUnicode, "ucnv_resetToUnicode"}, + {cast(void**) &ucnv_resetFromUnicode, "ucnv_resetFromUnicode"}, + {cast(void**) &ucnv_compareNames, "ucnv_compareNames"}, + {cast(void**) &ucnv_getMaxCharSize, "ucnv_getMaxCharSize"}, + {cast(void**) &ucnv_getMinCharSize, "ucnv_getMinCharSize"}, + {cast(void**) &ucnv_getName, "ucnv_getName"}, + {cast(void**) &ucnv_detectUnicodeSignature, "ucnv_detectUnicodeSignature"}, + {cast(void**) &ucnv_toUChars, "ucnv_toUChars"}, + {cast(void**) &ucnv_fromUChars, "ucnv_fromUChars"}, + {cast(void**) &ucnv_toUnicode, "ucnv_toUnicode"}, + {cast(void**) &ucnv_fromUnicode, "ucnv_fromUnicode"}, + {cast(void**) &ucnv_convertEx, "ucnv_convertEx"}, + {cast(void**) &ucnv_isAmbiguous, "ucnv_isAmbiguous"}, + {cast(void**) &ucnv_countAvailable, "ucnv_countAvailable"}, + {cast(void**) &ucnv_getAvailableName, "ucnv_getAvailableName"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); +/+ + foreach (char[] name; UConverter) + printf ("%.*s\n", name); ++/ + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UDateFormat.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UDateFormat.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,426 @@ +/******************************************************************************* + + @file UDateFormat.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UDateFormat; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString, + com.ibm.icu.mangoicu.UCalendar, + com.ibm.icu.mangoicu.UNumberFormat; + +/******************************************************************************* + + UDateFormat consists of functions that convert dates and + times from their internal representations to textual form and back + again in a language-independent manner. Converting from the internal + representation (milliseconds since midnight, January 1, 1970) to text + is known as "formatting," and converting from text to millis is known + as "parsing." We currently define one concrete structure UDateFormat, + which can handle pretty much all normal date formatting and parsing + actions. + + UDateFormat helps you to format and parse dates for any locale. + Your code can be completely independent of the locale conventions + for months, days of the week, or even the calendar format: lunar + vs. solar. + + See + this page for full details. + +*******************************************************************************/ + +private class UDateFormat : ICU +{ + private Handle handle; + + alias UCalendar.UDate UDate; + + typedef void* UFieldPos; + + public enum Style + { + Full, + Long, + Medium, + Short, + Default = Medium, + None = -1, + Ignore = -2 + }; + + public enum Field + { + EraField = 0, + YearField = 1, + MonthField = 2, + DateField = 3, + HourOfDay1Field = 4, + HourOfDay0Field = 5, + MinuteField = 6, + SecondField = 7, + FractionalSecondField = 8, + DayOfWeekField = 9, + DayOfYearField = 10, + DayOfWeekInMonthField = 11, + WeekOfYearField = 12, + WeekOfMonthField = 13, + AmPmField = 14, + Hour1Field = 15, + Hour0Field = 16, + TimezoneField = 17, + YearWoyField = 18, + DowLocalField = 19, + ExtendedYearField = 20, + JulianDayField = 21, + MillisecondsInDayField = 22, + TimezoneRfcField = 23, + FieldCount = 24 + }; + + private enum Symbol + { + Eras, + Months, + ShortMonths, + Weekdays, + ShortWeekdays, + AmPms, + LocalizedChars + }; + + + /*********************************************************************** + + Open a new UDateFormat for formatting and parsing dates + and time. If a pattern is not specified, an appropriate + one for the given locale will be used. + + ***********************************************************************/ + + this (Style time, Style date, inout ULocale locale, inout UTimeZone tz, UStringView pattern=null) + { + UErrorCode e; + wchar* p; + uint c; + + if (pattern) + p = pattern.get.ptr, c = pattern.length; + handle = udat_open (time, date, ICU.toString(locale.name), cast(wchar*)tz.name.ptr, tz.name.length, p, c, e); + testError (e, "failed to create DateFormat"); + } + + /*********************************************************************** + + Close a UDateFormat + + ***********************************************************************/ + + ~this () + { + udat_close (handle); + } + + /*********************************************************************** + + Format a date using an UDateFormat + + ***********************************************************************/ + + void format (UString dst, UDate date, UFieldPos p = null) + { + uint fmat (wchar* result, uint len, inout UErrorCode e) + { + return udat_format (handle, date, result, len, p, e); + } + + dst.format (&fmat, "date format failed"); + } + + /*********************************************************************** + + Parse a string into an date/time using a UDateFormat + + ***********************************************************************/ + + UDate parse (UStringView src, uint* index=null) + { + UErrorCode e; + + UDate x = udat_parse (handle, src.content.ptr, src.len, index, e); + testError (e, "failed to parse date"); + return x; + } + + /*********************************************************************** + + Set the UCalendar associated with an UDateFormat. A + UDateFormat uses a UCalendar to convert a raw value + to, for example, the day of the week. + + ***********************************************************************/ + + void setCalendar (UCalendar c) + { + udat_setCalendar (handle, c.handle); + } + + /*********************************************************************** + + Get the UCalendar associated with this UDateFormat + + ***********************************************************************/ + + UCalendar getCalendar () + { + Handle h = udat_getCalendar (handle); + return new UCalendar (h); + } + + /*********************************************************************** + + Set the UNumberFormat associated with an UDateFormat.A + UDateFormat uses a UNumberFormat to format numbers within + a date, for example the day number. + + ***********************************************************************/ + + void setNumberFormat (UNumberFormat n) + { + udat_setCalendar (handle, n.handle); + } + + /*********************************************************************** + + Get the year relative to which all 2-digit years are + interpreted + + ***********************************************************************/ + + UDate getTwoDigitYearStart () + { + UErrorCode e; + + UDate x = udat_get2DigitYearStart (handle, e); + testError (e, "failed to get two digit year start"); + return x; + } + + /*********************************************************************** + + Set the year relative to which all 2-digit years are + interpreted + + ***********************************************************************/ + + void setTwoDigitYearStart (UDate start) + { + UErrorCode e; + + udat_set2DigitYearStart (handle, start, e); + testError (e, "failed to set two digit year start"); + } + + /*********************************************************************** + + Extract the pattern from a UDateFormat + + ***********************************************************************/ + + void getPattern (UString dst, bool localize) + { + uint fmat (wchar* result, uint len, inout UErrorCode e) + { + return udat_toPattern (handle, localize, result, len, e); + } + + dst.format (&fmat, "failed to retrieve date format pattern"); + } + + /*********************************************************************** + + Set the pattern for a UDateFormat + + ***********************************************************************/ + + void setPattern (UStringView pattern, bool localized) + { + udat_applyPattern (handle, localized, pattern.get.ptr, pattern.length); + } + + /*********************************************************************** + + Specify whether an UDateFormat will perform lenient parsing. + + ***********************************************************************/ + + void setLenient (bool yes) + { + udat_setLenient (handle, yes); + } + + /*********************************************************************** + + Determine if an UDateFormat will perform lenient parsing. + + ***********************************************************************/ + + bool isLenient () + { + return udat_isLenient (handle) != 0; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (uint, uint, char*, wchar*, uint, wchar*, uint, inout UErrorCode) udat_open; + void function (Handle) udat_close; + uint function (Handle, UDate, wchar*, uint, UFieldPos, inout UErrorCode) udat_format; + UDate function (Handle, wchar*, uint, uint*, inout UErrorCode) udat_parse; + void function (Handle, Handle) udat_setCalendar; + void function (Handle, Handle) udat_setNumberFormat; + UDate function (Handle, inout UErrorCode) udat_get2DigitYearStart; + void function (Handle, UDate, inout UErrorCode) udat_set2DigitYearStart; + uint function (Handle, byte, wchar*, uint, inout UErrorCode) udat_toPattern; + void function (Handle, byte, wchar*, uint) udat_applyPattern; + void function (Handle, byte) udat_setLenient; + byte function (Handle) udat_isLenient; + Handle function (Handle) udat_getCalendar; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &udat_open, "udat_open"}, + {cast(void**) &udat_close, "udat_close"}, + {cast(void**) &udat_format, "udat_format"}, + {cast(void**) &udat_parse, "udat_parse"}, + {cast(void**) &udat_setCalendar, "udat_setCalendar"}, + {cast(void**) &udat_setNumberFormat, "udat_setNumberFormat"}, + {cast(void**) &udat_get2DigitYearStart, "udat_get2DigitYearStart"}, + {cast(void**) &udat_set2DigitYearStart, "udat_set2DigitYearStart"}, + {cast(void**) &udat_toPattern, "udat_toPattern"}, + {cast(void**) &udat_applyPattern, "udat_applyPattern"}, + {cast(void**) &udat_setLenient, "udat_setLenient"}, + {cast(void**) &udat_isLenient, "udat_isLenient"}, + {cast(void**) &udat_getCalendar, "udat_getCalendar"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + + + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UDomainName.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UDomainName.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,322 @@ +/******************************************************************************* + + @file UDomainName.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UDomainName; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString; + +/******************************************************************************* + + UIDNA API implements the IDNA protocol as defined in the + IDNA RFC (http://www.ietf.org/rfc/rfc3490.txt). + + The RFC defines 2 operations: toAscii and toUnicode. Domain + labels containing non-ASCII code points are required to be + processed by toAscii operation before passing it to resolver + libraries. Domain names that are obtained from resolver + libraries are required to be processed by toUnicode operation + before displaying the domain name to the user. IDNA requires + that implementations process input strings with Nameprep + (http://www.ietf.org/rfc/rfc3491.txt), which is a profile of + Stringprep (http://www.ietf.org/rfc/rfc3454.txt), and then with + Punycode (http://www.ietf.org/rfc/rfc3492.txt). Implementations + of IDNA MUST fully implement Nameprep and Punycode; neither + Nameprep nor Punycode are optional. + + The input and output of toAscii() and ToUnicode() operations are + Unicode and are designed to be chainable, i.e., applying toAscii() + or toUnicode() operations multiple times to an input string will + yield the same result as applying the operation once. + + See + this page for full details. + +*******************************************************************************/ + +class UDomainName : ICU +{ + private UStringView text; + private Handle handle; + + enum Options + { + Strict, + Lenient, + Std3 + } + + + /*********************************************************************** + + + ***********************************************************************/ + + this (UStringView text) + { + this.text = text; + } + + /*********************************************************************** + + This function implements the ToASCII operation as + defined in the IDNA RFC. + + This operation is done on single labels before sending + it to something that expects ASCII names. A label is an + individual part of a domain name. Labels are usually + separated by dots; e.g." "www.example.com" is composed + of 3 labels "www","example", and "com". + + ***********************************************************************/ + + void toAscii (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uidna_toASCII (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to ASCII"); + } + + /*********************************************************************** + + This function implements the ToUnicode operation as + defined in the IDNA RFC. + + This operation is done on single labels before sending + it to something that expects Unicode names. A label is + an individual part of a domain name. Labels are usually + separated by dots; for e.g." "www.example.com" is composed + of 3 labels "www","example", and "com". + + ***********************************************************************/ + + void toUnicode (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uidna_toUnicode (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to Unicode"); + } + + /*********************************************************************** + + Convenience function that implements the IDNToASCII + operation as defined in the IDNA RFC. + + This operation is done on complete domain names, e.g: + "www.example.com". It is important to note that this + operation can fail. If it fails, then the input domain + name cannot be used as an Internationalized Domain Name + and the application should have methods defined to deal + with the failure. + + Note: IDNA RFC specifies that a conformant application + should divide a domain name into separate labels, decide + whether to apply allowUnassigned and useSTD3ASCIIRules + on each, and then convert. This function does not offer + that level of granularity. The options once set will apply + to all labels in the domain name + + ***********************************************************************/ + + void IdnToAscii (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uidna_IDNToASCII (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to ASCII"); + } + + /*********************************************************************** + + Convenience function that implements the IDNToUnicode + operation as defined in the IDNA RFC. + + This operation is done on complete domain names, e.g: + "www.example.com". + + Note: IDNA RFC specifies that a conformant application + should divide a domain name into separate labels, decide + whether to apply allowUnassigned and useSTD3ASCIIRules + on each, and then convert. This function does not offer + that level of granularity. The options once set will apply + to all labels in the domain name + + ***********************************************************************/ + + void IdnToUnicode (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uidna_IDNToUnicode (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to Unicode"); + } + + /*********************************************************************** + + Compare two IDN strings for equivalence. + + This function splits the domain names into labels and + compares them. According to IDN RFC, whenever two labels + are compared, they are considered equal if and only if + their ASCII forms (obtained by applying toASCII) match + using an case-insensitive ASCII comparison. Two domain + names are considered a match if and only if all labels + match regardless of whether label separators match + + ***********************************************************************/ + + int compare (UString other, Options o = Options.Strict) + { + UErrorCode e; + int i = uidna_compare (text.get.ptr, text.len, other.get.ptr, other.len, o, e); + testError (e, "failed to compare IDN strings"); + return i; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_toASCII; + uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_toUnicode; + uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_IDNToASCII; + uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_IDNToUnicode; + int function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) uidna_compare; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uidna_toASCII, "uidna_toASCII"}, + {cast(void**) &uidna_toUnicode, "uidna_toUnicode"}, + {cast(void**) &uidna_IDNToASCII, "uidna_IDNToASCII"}, + {cast(void**) &uidna_IDNToUnicode, "uidna_IDNToUnicode"}, + {cast(void**) &uidna_compare, "uidna_compare"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UEnumeration.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UEnumeration.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,264 @@ +/******************************************************************************* + + @file UEnumeration.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UEnumeration; + +private import com.ibm.icu.mangoicu.ICU; + +/******************************************************************************* + + UEnumeration is returned by a number of ICU classes, for providing + access to such things as ULocale lists and so on, + +*******************************************************************************/ + +class UEnumeration : ICU +{ + package Handle handle; + + /*********************************************************************** + + ***********************************************************************/ + + this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Disposes of the storage used by a UEnumeration object + + ***********************************************************************/ + + ~this () + { + uenum_close (handle); + } + + /*********************************************************************** + + Returns the next element in the iterator's list. + + If there are no more elements, returns NULL. If the + iterator is out-of-sync with its service, status is + set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. + If the native service string is a UChar* string, it + is converted to char* with the invariant converter. + The result is terminated by (char)0. If the conversion + fails (because a character cannot be converted) then + status is set to U_INVARIANT_CONVERSION_ERROR and the + return value is undefined (but non-NULL). + + ***********************************************************************/ + + uint count () + { + UErrorCode e; + + uint x = uenum_count (handle, e); + testError (e, "enumeration out of sync"); + return x; + } + + /*********************************************************************** + + Resets the iterator to the current list of service IDs. + + This re-establishes sync with the service and rewinds + the iterator to start at the first element + + ***********************************************************************/ + + void reset () + { + ICU.UErrorCode e; + + uenum_reset (handle, e); + testError (e, "failed to reset enumeration"); + } + + /*********************************************************************** + + Returns the next element in the iterator's list. + + If there are no more elements, returns NULL. If the + iterator is out-of-sync with its service, status is + set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. + If the native service string is a char* string, it is + converted to UChar* with the invariant converter. + + ***********************************************************************/ + + bool next (out char[] dst) + { + ICU.UErrorCode e; + uint len; + + char* p = uenum_next (handle, &len, e); + testError (e, "failed to traverse enumeration"); + if (p) + return dst = p[0..len], true; + return false; + } + + /*********************************************************************** + + Returns the next element in the iterator's list. + + If there are no more elements, returns NULL. If the + iterator is out-of-sync with its service, status is + set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. + If the native service string is a char* string, it is + converted to UChar* with the invariant converter. + + ***********************************************************************/ + + bool next (inout wchar[] dst) + { + ICU.UErrorCode e; + uint len; + + wchar* p = uenum_unext (handle, &len, e); + testError (e, "failed to traverse enumeration"); + if (p) + return dst = p[0..len], true; + return false; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + void function (Handle) uenum_close; + uint function (Handle, inout UErrorCode) uenum_count; + void function (Handle, inout UErrorCode) uenum_reset; + char* function (Handle, uint*, inout UErrorCode) uenum_next; + wchar* function (Handle, uint*, inout UErrorCode) uenum_unext; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uenum_close, "uenum_close"}, + {cast(void**) &uenum_count, "uenum_count"}, + {cast(void**) &uenum_reset, "uenum_reset"}, + {cast(void**) &uenum_next, "uenum_next"}, + {cast(void**) &uenum_unext, "uenum_unext"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/ULocale.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/ULocale.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,230 @@ +/******************************************************************************* + + @file ULocale.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.ULocale; + +private import com.ibm.icu.mangoicu.ICU; +private import java.lang.util; + +/******************************************************************************* + + Note that this is a struct rather than a class. This is so + that one can easily construct these on the stack, plus the + 'convenience' instances can be created statically. + +*******************************************************************************/ + +struct ULocale +{ + public CString name; + + /*********************************************************************** + + ***********************************************************************/ + + public static ULocale Root = {""}; + public static ULocale Default = {null}; + public static ULocale English = {"en"}; + public static ULocale Chinese = {"zh"}; + public static ULocale French = {"fr"}; + public static ULocale German = {"de"}; + public static ULocale Italian = {"it"}; + public static ULocale Japanese = {"ja"}; + public static ULocale Korean = {"ko"}; + public static ULocale SimplifiedChinese = {"zh_CN"}; + public static ULocale TraditionalChinese = {"zh_TW"}; + public static ULocale Canada = {"en_CA"}; + public static ULocale CanadaFrench = {"fr_CA"}; + public static ULocale China = {"zh_CN"}; + public static ULocale PRC = {"zh_CN"}; + public static ULocale France = {"fr_FR"}; + public static ULocale Germany = {"de_DE"}; + public static ULocale Italy = {"it_IT"}; + public static ULocale Japan = {"jp_JP"}; + public static ULocale Korea = {"ko_KR"}; + public static ULocale Taiwan = {"zh_TW"}; + public static ULocale UK = {"en_GB"}; + public static ULocale US = {"en_US"}; + + /*********************************************************************** + + ***********************************************************************/ + + public enum Type + { + Actual = 0, + Valid = 1, + Requested = 2, + } + + /*********************************************************************** + + ***********************************************************************/ + + public const uint LanguageCapacity = 12; + public const uint CountryCapacity = 4; + public const uint FullNameCapacity = 56; + public const uint ScriptCapacity = 6; + public const uint KeywordsCapacity = 50; + public const uint KeywordAndValuesCapacity = 100; + public const char KeywordItemSeparator = ':'; + public const char KeywordSeparator = '@'; + public const char KeywordAssign = '='; + + + /*********************************************************************** + + ***********************************************************************/ + + static void getDefault (inout ULocale locale) + { + locale.name = ICU.toArray (uloc_getDefault()); + if (! locale.name) + ICU.exception ("failed to get default locale"); + } + + /*********************************************************************** + + ***********************************************************************/ + + static void setDefault (inout ULocale locale) + { + ICU.UErrorCode e; + + uloc_setDefault (ICU.toString(locale.name), e); + + if (ICU.isError (e)) + ICU.exception ("invalid locale '"~locale.name~"'"); + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + char* function () uloc_getDefault; + void function (char*, inout ICU.UErrorCode) uloc_setDefault; + } + + /********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uloc_getDefault, "uloc_getDefault"}, + {cast(void**) &uloc_setDefault, "uloc_setDefault"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (ICU.icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UMessageFormat.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UMessageFormat.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,420 @@ +/******************************************************************************* + + @file UMessageFormat.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UMessageFormat; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString; + +public import com.ibm.icu.mangoicu.ULocale; + +/******************************************************************************* + + Provides means to produce concatenated messages in language-neutral + way. Use this for all concatenations that show up to end users. Takes + a set of objects, formats them, then inserts the formatted strings into + the pattern at the appropriate places. + + See + this page for full details. + +*******************************************************************************/ + +class UMessageFormat : ICU +{ + private Handle handle; + + /*********************************************************************** + + Open a message formatter with given wchar[] and for the + given locale. + + ***********************************************************************/ + + this (wchar[] pattern, inout ULocale locale = ULocale.Default) + { + UErrorCode e; + + handle = umsg_open (pattern.ptr, pattern.length, toString(locale.name), null, e); + testError (e, "failed to open message formatter"); + } + + /*********************************************************************** + + Open a message formatter with given pattern and for the + given locale. + + ***********************************************************************/ + + this (UStringView pattern, inout ULocale locale = ULocale.Default) + { + this (pattern.get, locale); + } + + /*********************************************************************** + + Release message formatter + + ***********************************************************************/ + + ~this () + { + umsg_close (handle); + } + + /*********************************************************************** + + This locale is used for fetching default number or date + format information + + ***********************************************************************/ + + UMessageFormat setLocale (inout ULocale locale) + { + umsg_setLocale (handle, toString(locale.name)); + return this; + } + + /*********************************************************************** + + This locale is used for fetching default number or date + format information + + ***********************************************************************/ + + UMessageFormat getLocale (inout ULocale locale) + { + locale.name = toArray (umsg_getLocale (handle)); + return this; + } + + /*********************************************************************** + + Sets the pattern + + ***********************************************************************/ + + UMessageFormat setPattern (UStringView pattern) + { + UErrorCode e; + + umsg_applyPattern (handle, pattern.get.ptr, pattern.len, null, e); + testError (e, "failed to set formatter pattern"); + return this; + } + + /*********************************************************************** + + Gets the pattern + + ***********************************************************************/ + + UMessageFormat getPattern (UString s) + { + uint fmt (wchar* dst, uint length, inout UErrorCode e) + { + return umsg_toPattern (handle, dst, length, e); + } + + s.format (&fmt, "failed to get formatter pattern"); + return this; + } + + /*********************************************************************** + + This function may perform re-ordering of the arguments + depending on the locale. For all numeric arguments, double + is assumed unless the type is explicitly integer. All choice + format arguments must be of type double. + + ***********************************************************************/ + + UMessageFormat format (UString s, Args* list) + { + uint fmt (wchar* dst, uint length, inout UErrorCode e) + { + return umsg_vformat (handle, dst, length, list.args.ptr, e); + } + + s.format (&fmt, "failed to format pattern"); + return this; + } + + + /*********************************************************************** + + A typesafe list of arguments for the UMessageFormat.format() + method. This should be used in the following manner: + + @code + wchar[] format = "{0} {1, number, currency} {2, number, integer}"; + UMessageFormat msg = new UMessageFormat (format); + + msg.Args args; + msg.format (output, args.add("abc").add(152.0).add(456)); + @endcode + + Note that the argument order must follow that of the format + string, although the format string may dictate the ultimate + position of each argument. + + See http://oss.software.ibm.com/icu/apiref/umsg_8h.html for + details on the format string. + + @todo this will likely fail on certain CPU architectures. + + ***********************************************************************/ + + struct Args + { + private uint[32] args; + private uint index; + + /*************************************************************** + + ***************************************************************/ + + version( D_Version2 ){ + mixin( "invariant() { invariant_(); }"); + } + else{ + mixin( "invariant { invariant_(); }"); + } + private void invariant_(){ + assert (index < args.length); + } + + /*************************************************************** + + ***************************************************************/ + + Args* reset () + { + index = 0; + version(D_Version2){ + return &this; + } else { + return this; + } + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (UStringView x) + { + args[index] = cast(uint) cast(wchar*) x.get(); + ++index; + version(D_Version2){ + return &this; + } else { + return this; + } + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (wchar[] x) + { + args[index] = cast(uint) cast(wchar*) x; + ++index; + version(D_Version2){ + return &this; + } else { + return this; + } + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (int x) + { + args[index] = x; + ++index; + version(D_Version2){ + return &this; + } else { + return this; + } + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (double x) + { + *(cast(double*) &args[index]) = x; + index += 2; + version(D_Version2){ + return &this; + } else { + return this; + } + } + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, char*, void*, inout UErrorCode) umsg_open; + void function (Handle) umsg_close; + void function (Handle, char*) umsg_setLocale; + char* function (Handle) umsg_getLocale; + uint function (Handle, wchar*, uint, inout UErrorCode) umsg_toPattern; + void function (Handle, wchar*, uint, void*, inout UErrorCode) umsg_applyPattern; + uint function (Handle, wchar*, uint, void*, inout UErrorCode) umsg_vformat; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &umsg_open, "umsg_open"}, + {cast(void**) &umsg_close, "umsg_close"}, + {cast(void**) &umsg_setLocale, "umsg_setLocale"}, + {cast(void**) &umsg_getLocale, "umsg_getLocale"}, + {cast(void**) &umsg_toPattern, "umsg_toPattern"}, + {cast(void**) &umsg_applyPattern, "umsg_applyPattern"}, + {cast(void**) &umsg_vformat, "umsg_vformat"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + + /*********************************************************************** + + ***********************************************************************/ + + //static void test() + //{ + // UString output = new UString(100); + // wchar[] format = "{0} {1, number, currency} {2, number, integer}"; + + // UMessageFormat msg = new UMessageFormat (format); + + // msg.Args args; + // msg.format (output, args.add("abc").add(152.0).add(456)); + //} +} + + + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UNormalize.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UNormalize.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,391 @@ +/******************************************************************************* + + @file UNormalize.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UNormalize; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString, + com.ibm.icu.mangoicu.ULocale; + +/******************************************************************************* + + transforms Unicode text into an equivalent composed or + decomposed form, allowing for easier sorting and searching + of text. UNormalize supports the standard normalization forms + described in http://www.unicode.org/unicode/reports/tr15/ + + Characters with accents or other adornments can be encoded + in several different ways in Unicode. For example, take the + character A-acute. In Unicode, this can be encoded as a single + character (the "composed" form): + + 00C1 LATIN CAPITAL LETTER A WITH ACUTE + + or as two separate characters (the "decomposed" form): + + 0041 LATIN CAPITAL LETTER A 0301 COMBINING ACUTE ACCENT + + To a user of your program, however, both of these sequences + should be treated as the same "user-level" character "A with + acute accent". When you are searching or comparing text, you + must ensure that these two sequences are treated equivalently. + In addition, you must handle characters with more than one + accent. Sometimes the order of a character's combining accents + is significant, while in other cases accent sequences in different + orders are really equivalent. + + Similarly, the string "ffi" can be encoded as three separate + letters: + + 0066 LATIN SMALL LETTER F 0066 LATIN SMALL LETTER F + 0069 LATIN SMALL LETTER I + + or as the single character + + FB03 LATIN SMALL LIGATURE FFI + + The ffi ligature is not a distinct semantic character, and strictly + speaking it shouldn't be in Unicode at all, but it was included for + compatibility with existing character sets that already provided it. + The Unicode standard identifies such characters by giving them + "compatibility" decompositions into the corresponding semantic + characters. When sorting and searching, you will often want to use + these mappings. + + unorm_normalize helps solve these problems by transforming text into + the canonical composed and decomposed forms as shown in the first + example above. In addition, you can have it perform compatibility + decompositions so that you can treat compatibility characters the + same as their equivalents. Finally, UNormalize rearranges + accents into the proper canonical order, so that you do not have + to worry about accent rearrangement on your own. + + Form FCD, "Fast C or D", is also designed for collation. It allows + to work on strings that are not necessarily normalized with an + algorithm (like in collation) that works under "canonical closure", + i.e., it treats precomposed characters and their decomposed + equivalents the same. + + It is not a normalization form because it does not provide for + uniqueness of representation. Multiple strings may be canonically + equivalent (their NFDs are identical) and may all conform to FCD + without being identical themselves. + + The form is defined such that the "raw decomposition", the + recursive canonical decomposition of each character, results + in a string that is canonically ordered. This means that + precomposed characters are allowed for as long as their + decompositions do not need canonical reordering. + + Its advantage for a process like collation is that all NFD + and most NFC texts - and many unnormalized texts - already + conform to FCD and do not need to be normalized (NFD) for + such a process. The FCD quick check will return UNORM_YES + for most strings in practice. + + For more details on FCD see the collation design document: + http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm + + ICU collation performs either NFD or FCD normalization + automatically if normalization is turned on for the collator + object. Beyond collation and string search, normalized strings + may be useful for string equivalence comparisons, transliteration/ + transcription, unique representations, etc. + + The W3C generally recommends to exchange texts in NFC. Note also + that most legacy character encodings use only precomposed forms + and often do not encode any combining marks by themselves. For + conversion to such character encodings the Unicode text needs to + be normalized to NFC. For more usage examples, see the Unicode + Standard Annex. + + See + this page for full details. + + +*******************************************************************************/ + +class UNormalize : ICU +{ + enum Mode + { + None = 1, + NFD = 2, + NFKD = 3, + NFC = 4, + Default = NFC, + NFKC = 5, + FCD = 6, + Count + } + + enum Check + { + No, + Yes, + Maybe + } + + enum Options + { + None = 0x00, + Unicode32 = 0x20 + } + + /*********************************************************************** + + Normalize a string. The string will be normalized according + the specified normalization mode and options + + ***********************************************************************/ + + static void normalize (UStringView src, UString dst, Mode mode, Options o = Options.None) + { + uint fmt (wchar* dst, uint len, inout UErrorCode e) + { + return unorm_normalize (src.get.ptr, src.len, mode, o, dst, len, e); + } + + dst.format (&fmt, "failed to normalize"); + } + + /*********************************************************************** + + Performing quick check on a string, to quickly determine + if the string is in a particular normalization format. + + Three types of result can be returned: Yes, No or Maybe. + Result Yes indicates that the argument string is in the + desired normalized format, No determines that argument + string is not in the desired normalized format. A Maybe + result indicates that a more thorough check is required, + the user may have to put the string in its normalized + form and compare the results. + + ***********************************************************************/ + + static Check check (UStringView t, Mode mode, Options o = Options.None) + { + UErrorCode e; + + Check c = cast(Check) unorm_quickCheckWithOptions (t.get.ptr, t.len, mode, o, e); + testError (e, "failed to perform normalization check"); + return c; + } + + /*********************************************************************** + + Test if a string is in a given normalization form. + + Unlike check(), this function returns a definitive result, + never a "maybe". For NFD, NFKD, and FCD, both functions + work exactly the same. For NFC and NFKC where quickCheck + may return "maybe", this function will perform further + tests to arrive at a TRUE/FALSE result. + + ***********************************************************************/ + + static bool isNormalized (UStringView t, Mode mode, Options o = Options.None) + { + UErrorCode e; + + byte b = unorm_isNormalizedWithOptions (t.get.ptr, t.len, mode, o, e); + testError (e, "failed to perform normalization test"); + return b != 0; + } + + /*********************************************************************** + + Concatenate normalized strings, making sure that the result + is normalized as well. If both the left and the right strings + are in the normalization form according to "mode/options", + then the result will be + + dest=normalize(left+right, mode, options) + + With the input strings already being normalized, this function + will use unorm_next() and unorm_previous() to find the adjacent + end pieces of the input strings. Only the concatenation of these + end pieces will be normalized and then concatenated with the + remaining parts of the input strings. + + It is allowed to have dst==left to avoid copying the entire + left string. + + ***********************************************************************/ + + static void concatenate (UStringView left, UStringView right, UString dst, Mode mode, Options o = Options.None) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return unorm_concatenate (left.get.ptr, left.len, right.get.ptr, right.len, p, len, mode, o, e); + } + + dst.format (&fmt, "failed to concatenate"); + } + + /*********************************************************************** + + Compare two strings for canonical equivalence. Further + options include case-insensitive comparison and code + point order (as opposed to code unit order). + + Canonical equivalence between two strings is defined as + their normalized forms (NFD or NFC) being identical. + This function compares strings incrementally instead of + normalizing (and optionally case-folding) both strings + entirely, improving performance significantly. + + Bulk normalization is only necessary if the strings do + not fulfill the FCD conditions. Only in this case, and + only if the strings are relatively long, is memory + allocated temporarily. For FCD strings and short non-FCD + strings there is no memory allocation. + + ***********************************************************************/ + + static int compare (UStringView left, UStringView right, Options o = Options.None) + { + UErrorCode e; + + int i = unorm_compare (left.get.ptr, left.len, right.get.ptr, right.len, o, e); + testError (e, "failed to compare"); + return i; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + uint function (wchar*, uint, uint, uint, wchar*, uint, inout UErrorCode) unorm_normalize; + uint function (wchar*, uint, uint, uint, inout UErrorCode) unorm_quickCheckWithOptions; + byte function (wchar*, uint, uint, uint, inout UErrorCode) unorm_isNormalizedWithOptions; + uint function (wchar*, uint, wchar*, uint, wchar*, uint, uint, uint, inout UErrorCode) unorm_concatenate; + uint function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) unorm_compare; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &unorm_normalize, "unorm_normalize"}, + {cast(void**) &unorm_quickCheckWithOptions, "unorm_quickCheckWithOptions"}, + {cast(void**) &unorm_isNormalizedWithOptions, "unorm_isNormalizedWithOptions"}, + {cast(void**) &unorm_concatenate, "unorm_concatenate"}, + {cast(void**) &unorm_compare, "unorm_compare"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UNumberFormat.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UNumberFormat.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,934 @@ +/******************************************************************************* + + @file UNumberFormat.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UNumberFormat; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString; + +public import com.ibm.icu.mangoicu.ULocale; + +/******************************************************************************* + +*******************************************************************************/ + +class UDecimalFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Decimal, null, locale); + } + + /*********************************************************************** + + Set the pattern for a UDecimalFormat + + ***********************************************************************/ + + void setPattern (UStringView pattern, bool localized) + { + UErrorCode e; + + unum_applyPattern (handle, localized, pattern.get.ptr, pattern.length, null, e); + testError (e, "failed to set numeric pattern"); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UCurrencyFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Currency, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UPercentFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Percent, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UScientificFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Scientific, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class USpelloutFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Spellout, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UDurationFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Duration, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class URuleBasedFormat : UNumberFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.RuleBased, null, locale); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setLenientParse (bool yes) + { + unum_setAttribute (handle, Attribute.LenientParse, yes); + } + + + /*********************************************************************** + + ***********************************************************************/ + + bool isLenientParse () + { + return unum_getAttribute (handle, Attribute.LenientParse) != 0; + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +private class UCommonFormat : UNumberFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (Style style, char[] pattern, inout ULocale locale) + { + super (style, pattern, locale); + } + + /*********************************************************************** + + Return true if this format will parse numbers as integers + only + + ***********************************************************************/ + + bool isParseIntegerOnly () + { + return unum_getAttribute (handle, Attribute.ParseIntOnly) != 0; + } + + /*********************************************************************** + + Returns true if grouping is used in this format. + + ***********************************************************************/ + + bool isGroupingUsed () + { + return unum_getAttribute (handle, Attribute.GroupingUsed) != 0; + } + + /*********************************************************************** + + Always show decimal point? + + ***********************************************************************/ + + bool isDecimalSeparatorAlwaysShown () + { + return unum_getAttribute (handle, Attribute.DecimalAlwaysShown) != 0; + } + + /*********************************************************************** + + Sets whether or not numbers should be parsed as integers + only + + ***********************************************************************/ + + void setParseIntegerOnly (bool yes) + { + unum_setAttribute (handle, Attribute.ParseIntOnly, yes); + } + + /*********************************************************************** + + Set whether or not grouping will be used in this format. + + ***********************************************************************/ + + void setGroupingUsed (bool yes) + { + unum_setAttribute (handle, Attribute.GroupingUsed, yes); + } + + /*********************************************************************** + + Always show decimal point. + + ***********************************************************************/ + + void setDecimalSeparatorAlwaysShown (bool yes) + { + unum_setAttribute (handle, Attribute.DecimalAlwaysShown, yes); + } + + /*********************************************************************** + + Sets the maximum number of digits allowed in the integer + portion of a number. + + ***********************************************************************/ + + void setMaxIntegerDigits (uint x) + { + unum_setAttribute (handle, Attribute.MaxIntegerDigits, x); + } + + /*********************************************************************** + + Sets the minimum number of digits allowed in the integer + portion of a number. + + ***********************************************************************/ + + void setMinIntegerDigits (uint x) + { + unum_setAttribute (handle, Attribute.MinIntegerDigits, x); + } + + /*********************************************************************** + + Integer digits displayed + + ***********************************************************************/ + + void setIntegerDigits (uint x) + { + unum_setAttribute (handle, Attribute.IntegerDigits, x); + } + + /*********************************************************************** + + Sets the maximum number of digits allowed in the fraction + portion of a number. + + ***********************************************************************/ + + void setMaxFractionDigits (uint x) + { + unum_setAttribute (handle, Attribute.MaxFractionDigits, x); + } + + /*********************************************************************** + + Sets the minimum number of digits allowed in the fraction + portion of a number. + + ***********************************************************************/ + + void setMinFractionDigits (uint x) + { + unum_setAttribute (handle, Attribute.MinFractionDigits, x); + } + + /*********************************************************************** + + Fraction digits. + + ***********************************************************************/ + + void setFractionDigits (uint x) + { + unum_setAttribute (handle, Attribute.FractionDigits, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setMultiplier (uint x) + { + unum_setAttribute (handle, Attribute.Multiplier, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setGroupingSize (uint x) + { + unum_setAttribute (handle, Attribute.GroupingSize, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setRoundingMode (Rounding x) + { + unum_setAttribute (handle, Attribute.RoundingMode, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setRoundingIncrement (uint x) + { + unum_setAttribute (handle, Attribute.RoundingIncrement, x); + } + + /*********************************************************************** + + The width to which the output of format() is padded + + ***********************************************************************/ + + void setFormatWidth (uint x) + { + unum_setAttribute (handle, Attribute.FormatWidth, x); + } + + /*********************************************************************** + + The position at which padding will take place. + + ***********************************************************************/ + + void setPaddingPosition (Pad x) + { + unum_setAttribute (handle, Attribute.PaddingPosition, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setSecondaryGroupingSize (uint x) + { + unum_setAttribute (handle, Attribute.SecondaryGroupingSize, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setSignificantDigitsUsed (uint x) + { + unum_setAttribute (handle, Attribute.SignificantDigitsUsed, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setMinSignificantDigits (uint x) + { + unum_setAttribute (handle, Attribute.MinSignificantDigits, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setMaxSignificantDigits (uint x) + { + unum_setAttribute (handle, Attribute.MaxSignificantDigits, x); + } + + + /*********************************************************************** + + Returns the maximum number of digits allowed in the integer + portion of a number. + + ***********************************************************************/ + + uint getMaxIntegerDigits () + { + return unum_getAttribute (handle, Attribute.MaxIntegerDigits); + } + + /*********************************************************************** + + Returns the minimum number of digits allowed in the integer + portion of a number. + + ***********************************************************************/ + + uint getMinIntegerDigits () + { + return unum_getAttribute (handle, Attribute.MinIntegerDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getIntegerDigits () + { + return unum_getAttribute (handle, Attribute.IntegerDigits); + } + + /*********************************************************************** + + Returns the maximum number of digits allowed in the fraction + portion of a number. + + ***********************************************************************/ + + uint getMaxFractionDigits () + { + return unum_getAttribute (handle, Attribute.MaxFractionDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getMinFractionDigits () + { + return unum_getAttribute (handle, Attribute.MinFractionDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getFractionDigits () + { + return unum_getAttribute (handle, Attribute.FractionDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getMultiplier () + { + return unum_getAttribute (handle, Attribute.Multiplier); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getGroupingSize () + { + return unum_getAttribute (handle, Attribute.GroupingSize); + } + + /*********************************************************************** + + ***********************************************************************/ + + Rounding getRoundingMode () + { + return cast(Rounding) unum_getAttribute (handle, Attribute.RoundingMode); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getRoundingIncrement () + { + return unum_getAttribute (handle, Attribute.RoundingIncrement); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getFormatWidth () + { + return unum_getAttribute (handle, Attribute.FormatWidth); + } + + /*********************************************************************** + + ***********************************************************************/ + + Pad getPaddingPosition () + { + return cast(Pad) unum_getAttribute (handle, Attribute.PaddingPosition); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getSecondaryGroupingSize () + { + return unum_getAttribute (handle, Attribute.SecondaryGroupingSize); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getSignificantDigitsUsed () + { + return unum_getAttribute (handle, Attribute.SignificantDigitsUsed); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getMinSignificantDigits () + { + return unum_getAttribute (handle, Attribute.MinSignificantDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getMaxSignificantDigits () + { + return unum_getAttribute (handle, Attribute.MaxSignificantDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + void getPattern (UString dst, bool localize) + { + uint fmat (wchar* result, uint len, inout UErrorCode e) + { + return unum_toPattern (handle, localize, result, len, e); + } + + dst.format (&fmat, "failed to retrieve numeric format pattern"); + } +} + + +/******************************************************************************* + + UNumberFormat provides functions for formatting and parsing + a number. Also provides methods for determining which locales have + number formats, and what their names are. + + UNumberFormat helps you to format and parse numbers for any locale. + Your code can be completely independent of the locale conventions + for decimal points, thousands-separators, or even the particular + decimal digits used, or whether the number format is even decimal. + There are different number format styles like decimal, currency, + percent and spellout + + See + this page for full details. + +*******************************************************************************/ + +class UNumberFormat : ICU +{ + package Handle handle; + + typedef void* UFieldPos; + typedef void* ParseError; + + + public enum Rounding + { + Ceiling, + Floor, + Down, + Up, + HalfEven, + HalfDown, + HalfUp + }; + + public enum Pad + { + BeforePrefix, + AfterPrefix, + BeforeSuffix, + AfterSuffix + }; + + public enum Style + { + PatternDecimal, + Decimal, + Currency, + Percent, + Scientific, + Spellout, + Ordinal, + Duration, + RuleBased, + Default = Decimal, + Ignore = PatternDecimal + }; + + private enum Attribute + { + ParseIntOnly, + GroupingUsed, + DecimalAlwaysShown, + MaxIntegerDigits, + MinIntegerDigits, + IntegerDigits, + MaxFractionDigits, + MinFractionDigits, + FractionDigits, + Multiplier, + GroupingSize, + RoundingMode, + RoundingIncrement, + FormatWidth, + PaddingPosition, + SecondaryGroupingSize, + SignificantDigitsUsed, + MinSignificantDigits, + MaxSignificantDigits, + LenientParse + }; + + private enum Symbol + { + DecimalSeparator, + GroupingSeparator, + PatternSeparator, + Percent, + ZeroDigit, + Digit, + MinusSign, + PlusSign, + Currency, + IntlCurrency, + MonetarySeparator, + Exponential, + Permill, + PadEscape, + Infinity, + Nan, + SignificantDigit, + FormatSymbolCount + }; + + /*********************************************************************** + + ***********************************************************************/ + + this (Style style, char[] pattern, inout ULocale locale) + { + UErrorCode e; + + handle = unum_open (style, pattern.ptr, pattern.length, toString(locale.name), null, e); + testError (e, "failed to create NumberFormat"); + } + + /*********************************************************************** + + ***********************************************************************/ + + ~this () + { + unum_close (handle); + } + + /*********************************************************************** + + ***********************************************************************/ + + void format (UString dst, int number, UFieldPos p = null) + { + uint fmat (wchar* result, uint len, inout UErrorCode e) + { + return unum_format (handle, number, result, len, p, e); + } + + dst.format (&fmat, "int format failed"); + } + + /*********************************************************************** + + ***********************************************************************/ + + void format (UString dst, long number, UFieldPos p = null) + { + uint fmat (wchar* result, uint len, inout UErrorCode e) + { + return unum_formatInt64 (handle, number, result, len, p, e); + } + + dst.format (&fmat, "int64 format failed"); + } + + /*********************************************************************** + + ***********************************************************************/ + + void format (UString dst, double number, UFieldPos p = null) + { + uint fmat (wchar* result, uint len, inout UErrorCode e) + { + return unum_formatDouble (handle, number, result, len, p, e); + } + + dst.format (&fmat, "double format failed"); + } + + /*********************************************************************** + + ***********************************************************************/ + + int parseInteger (UStringView src, uint* index=null) + { + UErrorCode e; + + return unum_parse (handle, src.content.ptr, src.len, index, e); + } + + /*********************************************************************** + + ***********************************************************************/ + + long parseLong (UStringView src, uint* index=null) + { + UErrorCode e; + + return unum_parseInt64 (handle, src.content.ptr, src.len, index, e); + } + + /*********************************************************************** + + ***********************************************************************/ + + double parseDouble (UStringView src, uint* index=null) + { + UErrorCode e; + + return unum_parseDouble (handle, src.content.ptr, src.len, index, e); + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (uint, char*, uint, char*, ParseError, inout UErrorCode) unum_open; + void function (Handle) unum_close; + int function (Handle, int, wchar*, uint, UFieldPos, inout UErrorCode) unum_format; + int function (Handle, long, wchar*, uint, UFieldPos, inout UErrorCode) unum_formatInt64; + int function (Handle, double, wchar*, uint, UFieldPos, inout UErrorCode) unum_formatDouble; + int function (Handle, wchar*, uint, uint*, inout UErrorCode) unum_parse; + long function (Handle, wchar*, uint, uint*, inout UErrorCode) unum_parseInt64; + double function (Handle, wchar*, uint, uint*, inout UErrorCode) unum_parseDouble; + int function (Handle, uint) unum_getAttribute; + void function (Handle, uint, uint) unum_setAttribute; + uint function (Handle, byte, wchar*, uint, inout UErrorCode) unum_toPattern; + void function (Handle, byte, wchar*, uint, ParseError, inout UErrorCode) unum_applyPattern; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &unum_open, "unum_open"}, + {cast(void**) &unum_close, "unum_close"}, + {cast(void**) &unum_format, "unum_format"}, + {cast(void**) &unum_formatInt64 "unum_formatInt64"}, + {cast(void**) &unum_formatDouble "unum_formatDouble"}, + {cast(void**) &unum_parse, "unum_parse"}, + {cast(void**) &unum_parseInt64 "unum_parseInt64"}, + {cast(void**) &unum_parseDouble "unum_parseDouble"}, + {cast(void**) &unum_getAttribute "unum_getAttribute"}, + {cast(void**) &unum_setAttribute "unum_setAttribute"}, + {cast(void**) &unum_toPattern "unum_toPattern"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + + + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/URegex.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/URegex.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,700 @@ +/******************************************************************************* + + @file URegex.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.URegex; + +private import com.ibm.icu.mangoicu.ICU; + +public import com.ibm.icu.mangoicu.ULocale, + com.ibm.icu.mangoicu.UString, + com.ibm.icu.mangoicu.UCollator, + com.ibm.icu.mangoicu.UBreakIterator; + + +/******************************************************************************* + + Set of slices to return for group matching. See URegex.groups() + +*******************************************************************************/ + +class Groups : ICU +{ + public wchar[] g0, + g1, + g2, + g3, + g4, + g5, + g6, + g7, + g8, + g9; +} + +/******************************************************************************* + + Apis for an engine that provides regular-expression searching of + UTF16 strings. + + See http://icu.sourceforge.net/apiref/icu4c/uregex_8h.html for full + details. + +*******************************************************************************/ + +class URegex : Groups +{ + private Handle handle; + private UStringView theText; + + // Regex modes + public enum Flag + { + None = 0, + + // Enable case insensitive matching + CaseInsensitive = 2, + + // Allow white space and comments within patterns + Comments = 4, + + // Control behavior of "$" and "^" If set, recognize + // line terminators within string, otherwise, match + // only at start and end of input string. + MultiLine = 8, + + // If set, '.' matches line terminators, otherwise '.' + // matching stops at line end + DotAll = 32, + + // Forces normalization of pattern and strings + CanonEq = 128, + + // If set, uses the Unicode TR 29 definition of word + // boundaries. Warning: Unicode word boundaries are + // quite different from traditional regular expression + // word boundaries. See http://unicode.org/reports/tr29/#Word_Boundaries + UWord = 256, + } + + /*********************************************************************** + + Compiles the regular expression in string form into an + internal representation using the specified match mode + flags. The resulting regular expression handle can then + be used to perform various matching operations. + + ***********************************************************************/ + + this (wchar[] pattern, Flag flags=Flag.None, ParseError* pe=null) + { + UErrorCode e; + + handle = uregex_open (pattern.ptr, pattern.length, flags, pe, e); + testError (e, "failed to open regex"); + uregex_setText (handle, null, 0, e); + } + + /*********************************************************************** + + Compiles the regular expression in string form into an + internal representation using the specified match mode + flags. The resulting regular expression handle can then + be used to perform various matching operations. + + ***********************************************************************/ + + this (UStringView pattern, Flag flags=Flag.None, ParseError* pe=null) + { + this (pattern.get, flags, pe); + } + + /*********************************************************************** + + Internal constructor; used for cloning + + ***********************************************************************/ + + private this (Handle handle) + { + UErrorCode e; + + this.handle = handle; + uregex_setText (handle, null, 0, e); + } + + /*********************************************************************** + + Close the regular expression, recovering all resources (memory) + it was holding + + ***********************************************************************/ + + ~this () + { + uregex_close (handle); + } + + /*********************************************************************** + + Cloning a regular expression is faster than opening a second + instance from the source form of the expression, and requires + less memory. + + Note that the current input string and the position of any + matched text within it are not cloned; only the pattern itself + and and the match mode flags are copied. + + Cloning can be particularly useful to threaded applications + that perform multiple match operations in parallel. Each + concurrent RE operation requires its own instance of a + URegularExpression. + + ***********************************************************************/ + + URegex clone () + { + UErrorCode e; + + Handle h = uregex_clone (handle, e); + testError (e, "failed to clone regex"); + return new URegex (h); + } + + /*********************************************************************** + + Return a copy of the source form of the pattern for this + regular expression + + ***********************************************************************/ + + UString getPattern () + { + UErrorCode e; + uint len; + + wchar* x = uregex_pattern (handle, len, e); + testError (e, "failed to extract regex pattern"); + return new UString (x[0..len]); + } + + /*********************************************************************** + + Get the match mode flags that were specified when compiling + this regular expression + + ***********************************************************************/ + + Flag getFlags () + { + UErrorCode e; + + Flag f = cast(Flag) uregex_flags (handle, e); + testError (e, "failed to get regex flags"); + return f; + } + + /*********************************************************************** + + Set the subject text string upon which the regular expression + will look for matches. + + This function may be called any number of times, allowing the + regular expression pattern to be applied to different strings. + + Regular expression matching operations work directly on the + application's string data. No copy is made. The subject string + data must not be altered after calling this function until after + all regular expression operations involving this string data are + completed. + + Zero length strings are permitted. In this case, no subsequent + match operation will dereference the text string pointer. + + ***********************************************************************/ + + void setText (UStringView t) + { + UErrorCode e; + + theText = t; + uregex_setText (handle, t.get.ptr, t.length, e); + testError (e, "failed to set regex text"); + } + + /*********************************************************************** + + Get the subject text that is currently associated with this + regular expression object. This simply returns whatever was + previously supplied via setText(). + + Note that this returns a read-only reference to the text. + + ***********************************************************************/ + + UStringView getText () + { + return theText; + } + + /*********************************************************************** + + Return a set of slices representing the parenthesised groups. + This can be used in the following manner: + + @code + wchar msg; + + if (regex.next()) + with (regex.groups()) + msg ~= g1 ~ ":" ~ g2 + @endcode + + Note that g0 represents the entire match, whereas g1 through + g9 represent the parenthesised expressions. + + ***********************************************************************/ + + Groups groups () + { + wchar[]* p = &g0; + uint count = groupCount(); + wchar[] content = theText.get(); + + if (count > 9) + count = 9; + for (uint i=0; i <= count; ++p, ++i) + *p = content [start(i)..end(i)]; + return this; + } + + /*********************************************************************** + + Extract the string for the specified matching expression or + subexpression. UString 's' is the destination for the match. + + Group #0 is the complete string of matched text. Group #1 is + the text matched by the first set of capturing parentheses. + + ***********************************************************************/ + + void group (UString s, uint index) + { + uint fmt (wchar* dst, uint length, inout UErrorCode e) + { + return uregex_group (handle, index, dst, length, e); + } + + s.format (&fmt, "failed to extract regex group text"); + } + + /*********************************************************************** + + Get the number of capturing groups in this regular + expression's pattern + + ***********************************************************************/ + + uint groupCount () + { + UErrorCode e; + + uint i = uregex_groupCount (handle, e); + testError (e, "failed to get regex group-count"); + return i; + } + + /*********************************************************************** + + Returns the index in the input string of the start of the + text matched by the specified capture group during the + previous match operation. + + Return -1 if the capture group was not part of the last + match. Group #0 refers to the complete range of matched + text. Group #1 refers to the text matched by the first + set of capturing parentheses + + ***********************************************************************/ + + uint start (uint index = 0) + { + UErrorCode e; + + uint i = uregex_start (handle, index, e); + testError (e, "failed to get regex start"); + return i; + } + + /*********************************************************************** + + Returns the index in the input string of the position + following the end of the text matched by the specified + capture group. + + Return -1 if the capture group was not part of the last + match. Group #0 refers to the complete range of matched + text. Group #1 refers to the text matched by the first + set of capturing parentheses. + + ***********************************************************************/ + + uint end (uint index = 0) + { + UErrorCode e; + + uint i = uregex_end (handle, index, e); + testError (e, "failed to get regex end"); + return i; + } + + /*********************************************************************** + + Reset any saved state from the previous match. + + Has the effect of causing uregex_findNext to begin at the + specified index, and causing uregex_start(), uregex_end() + and uregex_group() to return an error indicating that there + is no match information available. + + ***********************************************************************/ + + void reset (uint startIndex) + { + UErrorCode e; + + uregex_reset (handle, startIndex, e); + testError (e, "failed to set regex next-index"); + } + + /*********************************************************************** + + Attempts to match the input string, beginning at startIndex, + against the pattern. + + To succeed, the match must extend to the end of the input + string + + ***********************************************************************/ + + bool match (uint startIndex) + { + UErrorCode e; + + bool b = uregex_matches (handle, startIndex, e); + testError (e, "failed while matching regex"); + return b; + } + + /*********************************************************************** + + Attempts to match the input string, starting from the + specified index, against the pattern. + + The match may be of any length, and is not required to + extend to the end of the input string. Contrast with match() + + ***********************************************************************/ + + bool probe (uint startIndex) + { + UErrorCode e; + + bool b = uregex_lookingAt (handle, startIndex, e); + testError (e, "failed while looking at regex"); + return b; + } + + /*********************************************************************** + + Returns whether the text matches the search pattern, starting + from the current position. + + If startIndex is specified, the current position is moved to + the specified location before the seach is initiated. + + ***********************************************************************/ + + bool next (uint startIndex = uint.max) + { + UErrorCode e; + bool b; + + b = (startIndex == uint.max) ? uregex_findNext (handle, e) : + uregex_find (handle, startIndex, e); + + testError (e, "failed on next regex"); + return b; + } + + /*********************************************************************** + + Replaces every substring of the input that matches the pattern + with the given replacement string. + + This is a convenience function that provides a complete + find-and-replace-all operation. + + This method scans the input string looking for matches of + the pattern. Input that is not part of any match is copied + unchanged to the destination buffer. Matched regions are + replaced in the output buffer by the replacement string. + The replacement string may contain references to capture + groups; these take the form of $1, $2, etc. + + The provided 'result' will contain the results, and should + be set with a length sufficient to house the entire result. + Upon completion, the 'result' is shortened appropriately + and the total extent (length) of the operation is returned. + Set the initital length of 'result' using the UString method + truncate(). + + The returned extent should be checked to ensure it is not + longer than the length of 'result'. If it is longer, then + the result has been truncated. + + ***********************************************************************/ + + uint replaceAll (UStringView replace, UString result) + { + UErrorCode e; + + uint len = uregex_replaceAll (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); + testError (e, "failed during regex replace"); + result.truncate (len); + return len; + } + + /*********************************************************************** + + Replaces the first substring of the input that matches the + pattern with the given replacement string. + + This is a convenience function that provides a complete + find-and-replace operation. + + This method scans the input string looking for a match of + the pattern. All input that is not part of the match is + copied unchanged to the destination buffer. The matched + region is replaced in the output buffer by the replacement + string. The replacement string may contain references to + capture groups; these take the form of $1, $2, etc + + The provided 'result' will contain the results, and should + be set with a length sufficient to house the entire result. + Upon completion, the 'result' is shortened appropriately + and the total extent (length) of the operation is returned. + Set the initital length of 'result' using the UString method + truncate(). + + The returned extent should be checked to ensure it is not + longer than the length of 'result'. If it is longer, then + the result has been truncated. + + ***********************************************************************/ + + uint replaceFirst (UStringView replace, UString result) + { + UErrorCode e; + + uint len = uregex_replaceFirst (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); + testError (e, "failed during regex replace"); + result.truncate (len); + return len; + } + + /*********************************************************************** + + Split the text up into slices (fields), where each slice + represents the text situated between each pattern matched + within the text. The pattern is expected to represent one + or more slice delimiters. + + ***********************************************************************/ + + uint split (wchar[][] fields) + { + UErrorCode e; + uint pos, + count; + wchar[] content = theText.get; + + while (count < fields.length) + if (uregex_findNext (handle, e) && e == e.OK) + { + uint i = start(); + fields[count] = content[pos..i]; + pos = end (); + + // ignore leading delimiter + if (i) + ++count; + } + else + break; + + testError (e, "failed during split"); + return count; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, uint, ParseError*, inout UErrorCode) uregex_open; + void function (Handle) uregex_close; + Handle function (Handle, inout UErrorCode) uregex_clone; + wchar* function (Handle, inout uint, inout UErrorCode) uregex_pattern; + uint function (Handle, inout UErrorCode) uregex_flags; + void function (Handle, wchar*, uint, inout UErrorCode) uregex_setText; + wchar* function (Handle, inout uint, inout UErrorCode) uregex_getText; + uint function (Handle, uint, wchar*, uint, inout UErrorCode) uregex_group; + uint function (Handle, inout UErrorCode) uregex_groupCount; + uint function (Handle, uint, inout UErrorCode) uregex_start; + uint function (Handle, uint, inout UErrorCode) uregex_end; + void function (Handle, uint, inout UErrorCode) uregex_reset; + bool function (Handle, uint, inout UErrorCode) uregex_matches; + bool function (Handle, uint, inout UErrorCode) uregex_lookingAt; + bool function (Handle, uint, inout UErrorCode) uregex_find; + bool function (Handle, inout UErrorCode) uregex_findNext; + uint function (Handle, wchar*, uint, wchar*, uint, inout UErrorCode) uregex_replaceAll; + uint function (Handle, wchar*, uint, wchar*, uint, inout UErrorCode) uregex_replaceFirst; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uregex_open, "uregex_open"}, + {cast(void**) &uregex_close, "uregex_close"}, + {cast(void**) &uregex_clone, "uregex_clone"}, + {cast(void**) &uregex_pattern, "uregex_pattern"}, + {cast(void**) &uregex_flags, "uregex_flags"}, + {cast(void**) &uregex_setText, "uregex_setText"}, + {cast(void**) &uregex_getText, "uregex_getText"}, + {cast(void**) &uregex_group, "uregex_group"}, + {cast(void**) &uregex_groupCount, "uregex_groupCount"}, + {cast(void**) &uregex_start, "uregex_start"}, + {cast(void**) &uregex_end, "uregex_end"}, + {cast(void**) &uregex_reset, "uregex_reset"}, + {cast(void**) &uregex_matches, "uregex_matches"}, + {cast(void**) &uregex_lookingAt, "uregex_lookingAt"}, + {cast(void**) &uregex_find, "uregex_find"}, + {cast(void**) &uregex_findNext, "uregex_findNext"}, + {cast(void**) &uregex_replaceAll, "uregex_replaceAll"}, + {cast(void**) &uregex_replaceFirst, "uregex_replaceFirst"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UResourceBundle.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UResourceBundle.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,544 @@ +/******************************************************************************* + + @file UResourceBundle.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UResourceBundle; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString; + +public import com.ibm.icu.mangoicu.ULocale; + +/******************************************************************************* + + API representing a collection of resource information pertaining to + a given locale. A resource bundle provides a way of accessing locale- + specific information in a data file. You create a resource bundle that + manages the resources for a given locale and then ask it for individual + resources. + + Resource bundles in ICU4C are currently defined using text files which + conform to the following BNF definition. More on resource bundle concepts + and syntax can be found in the Users Guide. + + See + this page for full details. + +*******************************************************************************/ + +class UResourceBundle : ICU +{ + private Handle handle; + + /*********************************************************************** + + Internals opened up to the public + + ***********************************************************************/ + + // Numeric constants for types of resource items + public enum ResType + { + None = -1, + String = 0, + Binary = 1, + Table = 2, + Alias = 3, + Int = 7, + Array = 8, + IntVector = 14 + } + + /*********************************************************************** + + private constructor for internal use only + + ***********************************************************************/ + + private this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Constructs a resource bundle for the locale-specific bundle + in the specified path. + + locale This is the locale this resource bundle is for. To + get resources for the French locale, for example, you + would create a ResourceBundle passing ULocale::FRENCH + for the "locale" parameter, and all subsequent calls + to that resource bundle will return resources that + pertain to the French locale. If the caller passes a + Locale.Default parameter, the default locale for the + system (as returned by ULocale.getDefault()) will be + used. Passing Locale.Root will cause the root-locale + to be used. + + path This is a full pathname in the platform-specific + format for the directory containing the resource + data files we want to load resources from. We use + locale IDs to generate filenames, and the filenames + have this string prepended to them before being passed + to the C++ I/O functions. Therefore, this string must + always end with a directory delimiter (whatever that + is for the target OS) for this class to work correctly. + A null value will open the default ICU data-files + + ***********************************************************************/ + + this (inout ULocale locale, char[] path = null) + { + UErrorCode e; + + handle = ures_open (toString(path), toString(locale.name), e); + testError (e, "failed to open resource bundle"); + } + + /*********************************************************************** + + ***********************************************************************/ + + ~this () + { + ures_close (handle); + } + + /*********************************************************************** + + Returns the size of a resource. Size for scalar types is + always 1, and for vector/table types is the number of child + resources. + + ***********************************************************************/ + + uint getSize () + { + return ures_getSize (handle); + } + + /*********************************************************************** + + Returns a signed integer from a resource. This integer is + originally 28 bit and the sign gets propagated. + + ***********************************************************************/ + + int getInt () + { + UErrorCode e; + + int x = ures_getInt (handle, e); + testError (e, "failed to get resource integer"); + return x; + } + + /*********************************************************************** + + Returns a string from a string resource type + + ***********************************************************************/ + + UStringView getString () + { + UErrorCode e; + uint len; + + wchar* x = ures_getString (handle, len, e); + testError (e, "failed to get resource string"); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Returns the string in a given resource at the specified + index + + ***********************************************************************/ + + UStringView getString (uint index) + { + UErrorCode e; + uint len; + + wchar* x = ures_getStringByIndex (handle, index, len, e); + testError (e, "failed to get resource string"); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Returns a string in a resource that has a given key. This + procedure works only with table resources. + + ***********************************************************************/ + + UStringView getString (char[] key) + { + UErrorCode e; + uint len; + + wchar* x = ures_getStringByKey (handle, toString(key), len, e); + testError (e, "failed to get resource string"); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Returns the next string in a resource or NULL if there are + no more resources to iterate over + + ***********************************************************************/ + + UStringView getNextString () + { + UErrorCode e; + uint len; + char* key; + + wchar* x = ures_getNextString (handle, len, key, e); + testError (e, "failed to get next resource string"); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Returns a binary data from a resource. Can be used at most + primitive resource types (binaries, strings, ints) + + ***********************************************************************/ + + void[] getBinary () + { + UErrorCode e; + uint len; + + void* x = ures_getBinary (handle, len, e); + testError (e, "failed to get binary resource"); + return x[0..len]; + } + + /*********************************************************************** + + Returns an integer vector from a resource + + ***********************************************************************/ + + int[] getIntVector () + { + UErrorCode e; + uint len; + + int* x = ures_getIntVector (handle, len, e); + testError (e, "failed to get vector resource"); + return x[0..len]; + } + + /*********************************************************************** + + Checks whether the resource has another element to + iterate over + + ***********************************************************************/ + + bool hasNext () + { + return ures_hasNext (handle) != 0; + } + + /*********************************************************************** + + Resets the internal context of a resource so that + iteration starts from the first element + + ***********************************************************************/ + + void resetIterator () + { + ures_resetIterator (handle); + } + + /*********************************************************************** + + Returns the next resource in a given resource or NULL if + there are no more resources + + ***********************************************************************/ + + UResourceBundle getNextResource () + { + UErrorCode e; + + return get (ures_getNextResource (handle, null, e), e); + } + + /*********************************************************************** + + Returns a resource that has a given key. This procedure + works only with table resources. + + ***********************************************************************/ + + UResourceBundle getResource (char[] key) + { + UErrorCode e; + + return get (ures_getByKey (handle, toString(key), null, e), e); + } + + /*********************************************************************** + + Returns the resource at the specified index + + ***********************************************************************/ + + UResourceBundle getResource (uint index) + { + UErrorCode e; + + return get (ures_getByIndex (handle, index, null, e), e); + } + + /*********************************************************************** + + Return the version number associated with this ResourceBundle + as a UVersionInfo array + + ***********************************************************************/ + + void getVersion (inout Version info) + { + ures_getVersion (handle, info); + } + + /*********************************************************************** + + Return the ULocale associated with this ResourceBundle + + ***********************************************************************/ + + void getLocale (inout ULocale locale) + { + UErrorCode e; + + locale.name = toArray (ures_getLocale (handle, e)); + testError (e, "failed to get resource locale"); + } + + /*********************************************************************** + + Returns the key associated with this resource. Not all + the resources have a key - only those that are members + of a table. + + ***********************************************************************/ + + char[] getKey () + { + return toArray (ures_getKey (handle)); + } + + /*********************************************************************** + + Returns the type of a resource. Available types are + defined in enum UResType + + ***********************************************************************/ + + ResType getType () + { + return cast(ResType) ures_getType (handle); + } + + /*********************************************************************** + + Worker function for constructing internal ResourceBundle + instances. Returns null when the provided handle is null. + + ***********************************************************************/ + + private static final UResourceBundle get (Handle handle, inout UErrorCode e) + { + testError (e, "failed to create resource bundle"); + if (handle) + return new UResourceBundle (handle); + return null; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (char*, char*, inout UErrorCode) ures_open; + void function (Handle) ures_close; + char* function (Handle, inout UErrorCode) ures_getLocale; + void function (Handle, inout Version) ures_getVersion; + uint function (Handle) ures_getSize; + int function (Handle, inout UErrorCode) ures_getInt; + wchar* function (Handle, inout uint, inout UErrorCode) ures_getString; + wchar* function (Handle, uint, inout uint, inout UErrorCode) ures_getStringByIndex; + wchar* function (Handle, char*, inout uint, inout UErrorCode) ures_getStringByKey; + void* function (Handle, inout uint, inout UErrorCode) ures_getBinary; + int* function (Handle, inout uint, inout UErrorCode) ures_getIntVector; + byte function (Handle) ures_hasNext; + void function (Handle) ures_resetIterator; + wchar* function (Handle, inout uint, inout char*, inout UErrorCode) ures_getNextString; + char* function (Handle) ures_getKey; + int function (Handle) ures_getType; + Handle function (Handle, Handle, inout UErrorCode) ures_getNextResource; + Handle function (Handle, uint, Handle, inout UErrorCode) ures_getByIndex; + Handle function (Handle, char*, Handle, inout UErrorCode) ures_getByKey; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ures_open, "ures_open"}, + {cast(void**) &ures_close, "ures_close"}, + {cast(void**) &ures_getLocale, "ures_getLocale"}, + {cast(void**) &ures_getVersion, "ures_getVersion"}, + {cast(void**) &ures_getSize, "ures_getSize"}, + {cast(void**) &ures_getInt, "ures_getInt"}, + {cast(void**) &ures_getString, "ures_getString"}, + {cast(void**) &ures_getStringByIndex, "ures_getStringByIndex"}, + {cast(void**) &ures_getStringByKey, "ures_getStringByKey"}, + {cast(void**) &ures_getBinary, "ures_getBinary"}, + {cast(void**) &ures_hasNext, "ures_hasNext"}, + {cast(void**) &ures_resetIterator, "ures_resetIterator"}, + {cast(void**) &ures_getNextString, "ures_getNextString"}, + {cast(void**) &ures_getKey, "ures_getKey"}, + {cast(void**) &ures_getType, "ures_getType"}, + {cast(void**) &ures_getNextResource, "ures_getNextResource"}, + {cast(void**) &ures_getByIndex, "ures_getByIndex"}, + {cast(void**) &ures_getByKey, "ures_getByKey"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + + /*********************************************************************** + + ***********************************************************************/ + + static void test() + { + UResourceBundle b = new UResourceBundle (ULocale.Default); + UStringView t = b.getNextString(); + UResourceBundle b1 = b.getNextResource (); + } +} + + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/USearch.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/USearch.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,607 @@ +/******************************************************************************* + + @file USearch.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.USearch; + +private import com.ibm.icu.mangoicu.ICU; + +public import com.ibm.icu.mangoicu.ULocale, + com.ibm.icu.mangoicu.UString, + com.ibm.icu.mangoicu.UCollator, + com.ibm.icu.mangoicu.UBreakIterator; + +/******************************************************************************* + + Apis for an engine that provides language-sensitive text + searching based on the comparison rules defined in a UCollator + data struct. This ensures that language eccentricity can be handled, + e.g. for the German collator, characters ß and SS will be matched + if case is chosen to be ignored. See the "ICU Collation Design + Document" for more information. + + The algorithm implemented is a modified form of the Boyer Moore's + search. For more information see "Efficient Text Searching in Java", + published in Java Report in February, 1999, for further information + on the algorithm. + + There are 2 match options for selection: Let S' be the sub-string + of a text string S between the offsets start and end . A + pattern string P matches a text string S at the offsets if + + - option 1. Some canonical equivalent of P matches some canonical + equivalent of S' + + - option 2. P matches S' and if P starts or ends with a combining + mark, there exists no non-ignorable combining mark before + or after S' in S respectively. + + Option 2 will be the default + + This search has APIs similar to that of other text iteration + mechanisms such as the break iterators in ubrk.h. Using these + APIs, it is easy to scan through text looking for all occurances + of a given pattern. This search iterator allows changing of + direction by calling a reset followed by a next or previous. + Though a direction change can occur without calling reset first, + this operation comes with some speed penalty. Generally, match + results in the forward direction will match the result matches + in the backwards direction in the reverse order + + USearch provides APIs to specify the starting position within + the text string to be searched, e.g. setOffset(), previous(x) + and next(x). Since the starting position will be set as it + is specified, please take note that there are some dangerous + positions which the search may render incorrect results: + + - The midst of a substring that requires normalization. + + - If the following match is to be found, the position should + not be the second character which requires to be swapped + with the preceding character. Vice versa, if the preceding + match is to be found, position to search from should not be + the first character which requires to be swapped with the + next character. E.g certain Thai and Lao characters require + swapping. + + - If a following pattern match is to be found, any position + within a contracting sequence except the first will fail. + Vice versa if a preceding pattern match is to be found, + a invalid starting point would be any character within a + contracting sequence except the last. + + A breakiterator can be used if only matches at logical breaks are + desired. Using a breakiterator will only give you results that + exactly matches the boundaries given by the breakiterator. For + instance the pattern "e" will not be found in the string "\u00e9" + if a character break iterator is used. + + Options are provided to handle overlapping matches. E.g. In + English, overlapping matches produces the result 0 and 2 for + the pattern "abab" in the text "ababab", where else mutually + exclusive matches only produce the result of 0. + + Though collator attributes will be taken into consideration while + performing matches, there are no APIs here for setting and getting + the attributes. These attributes can be set by getting the collator + from getCollator() and using the APIs in UCollator. Lastly to update + String Search to the new collator attributes, reset() has to be called. + + See http://oss.software.ibm.com/icu/apiref/usearch_8h.html for full + details. + +*******************************************************************************/ + +class USearch : ICU +{ + private Handle handle; + private UBreakIterator* iterator; + + // DONE is returned by previous() and next() after all valid + // matches have been returned, and by first() and last() if + // there are no matches at all. + const uint Done = uint.max; + + //Possible types of searches + public enum Attribute + { + Overlap, + CanonicalMatch, + Count + } + + public enum AttributeValue + { + Default = -1, + Off, + On, + Count + } + + /*********************************************************************** + + Creating a search iterator data struct using the argument + locale language rule set + + ***********************************************************************/ + + this (UStringView pattern, UStringView text, inout ULocale locale, UBreakIterator* iterator = null) + { + UErrorCode e; + + this.iterator = iterator; + handle = usearch_open (pattern.get.ptr, pattern.length, text.get.ptr, text.length, toString(locale.name), ( iterator is null ) ? null : iterator.handle, e); + testError (e, "failed to open search"); + } + + /*********************************************************************** + + Creating a search iterator data struct using the argument + locale language rule set + + ***********************************************************************/ + + this (UStringView pattern, UStringView text, UCollator col, UBreakIterator* iterator = null) + { + UErrorCode e; + + this.iterator = iterator; + handle = usearch_openFromCollator (pattern.get.ptr, pattern.length, text.get.ptr, text.length, col.handle, ( iterator is null ) ? null : iterator.handle, e); + testError (e, "failed to open search from collator"); + } + + /*********************************************************************** + + Close this USearch + + ***********************************************************************/ + + ~this () + { + usearch_close (handle); + } + + /*********************************************************************** + + Sets the current position in the text string which the + next search will start from. + + ***********************************************************************/ + + void setOffset (uint position) + { + UErrorCode e; + + usearch_setOffset (handle, position, e); + testError (e, "failed to set search offset"); + } + + /*********************************************************************** + + Return the current index in the string text being searched + + ***********************************************************************/ + + uint getOffset () + { + return usearch_getOffset (handle); + } + + /*********************************************************************** + + Returns the index to the match in the text string that was + searched + + ***********************************************************************/ + + uint getMatchedStart () + { + return usearch_getMatchedStart (handle); + } + + /*********************************************************************** + + Returns the length of text in the string which matches the + search pattern + + ***********************************************************************/ + + uint getMatchedLength () + { + return usearch_getMatchedLength (handle); + } + + /*********************************************************************** + + Returns the text that was matched by the most recent call to + first(), next(), previous(), or last(). + + ***********************************************************************/ + + void getMatchedText (UString s) + { + uint fmt (wchar* dst, uint length, inout UErrorCode e) + { + return usearch_getMatchedText (handle, dst, length, e); + } + + s.format (&fmt, "failed to extract matched text"); + } + + /*********************************************************************** + + Set the string text to be searched. + + ***********************************************************************/ + + void setText (UStringView t) + { + UErrorCode e; + + usearch_setText (handle, t.get.ptr, t.length, e); + testError (e, "failed to set search text"); + } + + /*********************************************************************** + + Return the string text to be searched. Note that this + returns a read-only reference to the search text. + + ***********************************************************************/ + + UStringView getText () + { + uint len; + + wchar *x = usearch_getText (handle, &len); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Sets the pattern used for matching + + ***********************************************************************/ + + void setPattern (UStringView t) + { + UErrorCode e; + + usearch_setPattern (handle, t.get.ptr, t.length, e); + testError (e, "failed to set search pattern"); + } + + /*********************************************************************** + + Gets the search pattern. Note that this returns a + read-only reference to the pattern. + + ***********************************************************************/ + + UStringView getPattern () + { + uint len; + + wchar *x = usearch_getPattern (handle, &len); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Set the BreakIterator that will be used to restrict the + points at which matches are detected. + + ***********************************************************************/ + + void setIterator (UBreakIterator* iterator) + { + UErrorCode e; + + this.iterator = iterator; + usearch_setBreakIterator (handle, cast(Handle)iterator.handle, e); + testError (e, "failed to set search iterator"); + } + + /*********************************************************************** + + Get the BreakIterator that will be used to restrict the + points at which matches are detected. + + ***********************************************************************/ + + UBreakIterator* getIterator () + { + return iterator; + } + + /*********************************************************************** + + Returns the first index at which the string text matches + the search pattern + + ***********************************************************************/ + + uint first () + { + UErrorCode e; + + uint x = usearch_first (handle, e); + testError (e, "failed on first search"); + return x; + } + + /*********************************************************************** + + Returns the last index in the target text at which it + matches the search pattern + + ***********************************************************************/ + + uint last () + { + UErrorCode e; + + uint x = usearch_last (handle, e); + testError (e, "failed on last search"); + return x; + } + + /*********************************************************************** + + Returns the index of the next point at which the string + text matches the search pattern, starting from the current + position. + + If pos is specified, returns the first index greater than + pos at which the string text matches the search pattern + + ***********************************************************************/ + + uint next (uint pos = uint.max) + { + UErrorCode e; + uint x; + + x = (pos == uint.max) ? usearch_next (handle, e) : + usearch_following (handle, pos, e); + + testError (e, "failed on next search"); + return x; + } + + /*********************************************************************** + + Returns the index of the previous point at which the + string text matches the search pattern, starting at + the current position. + + If pos is specified, returns the first index less + than pos at which the string text matches the search + pattern. + + ***********************************************************************/ + + uint previous (uint pos = uint.max) + { + UErrorCode e; + uint x; + + x = (pos == uint.max) ? usearch_previous (handle, e) : + usearch_preceding (handle, pos, e); + + testError (e, "failed on next search"); + return x; + } + + /*********************************************************************** + + Search will begin at the start of the text string if a + forward iteration is initiated before a backwards iteration. + Otherwise if a backwards iteration is initiated before a + forwards iteration, the search will begin at the end of the + text string + + ***********************************************************************/ + + void reset () + { + usearch_reset (handle); + } + + /*********************************************************************** + + Gets the collator used for the language rules. + + ***********************************************************************/ + + UCollator getCollator () + { + return new UCollator (usearch_getCollator (handle)); + } + + /*********************************************************************** + + Sets the collator used for the language rules. This + method causes internal data such as Boyer-Moore shift + tables to be recalculated, but the iterator's position + is unchanged + + ***********************************************************************/ + + void setCollator (UCollator col) + { + UErrorCode e; + + usearch_setCollator (handle, col.handle, e); + testError (e, "failed to set search collator"); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, wchar*, uint, char*, void*, inout UErrorCode) usearch_open; + Handle function (wchar*, uint, wchar*, uint, Handle, void*, inout UErrorCode) usearch_openFromCollator; + void function (Handle) usearch_close; + void function (Handle, uint, inout UErrorCode) usearch_setOffset; + uint function (Handle) usearch_getOffset; + uint function (Handle) usearch_getMatchedStart; + uint function (Handle) usearch_getMatchedLength; + uint function (Handle, wchar*, uint, inout UErrorCode) usearch_getMatchedText; + void function (Handle, wchar*, uint, inout UErrorCode) usearch_setText; + wchar* function (Handle, uint*) usearch_getText; + void function (Handle, wchar*, uint, inout UErrorCode) usearch_setPattern; + wchar* function (Handle, uint*) usearch_getPattern; + uint function (Handle, inout UErrorCode) usearch_first; + uint function (Handle, inout UErrorCode) usearch_last; + uint function (Handle, inout UErrorCode) usearch_next; + uint function (Handle, inout UErrorCode) usearch_previous; + uint function (Handle, uint, inout UErrorCode) usearch_following; + uint function (Handle, uint, inout UErrorCode) usearch_preceding; + void function (Handle) usearch_reset; + void function (Handle, Handle, inout UErrorCode) usearch_setBreakIterator; + Handle function (Handle) usearch_getCollator; + void function (Handle, Handle, inout UErrorCode) usearch_setCollator; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &usearch_open, "usearch_open"}, + {cast(void**) &usearch_openFromCollator, "usearch_openFromCollator"}, + {cast(void**) &usearch_close, "usearch_close"}, + {cast(void**) &usearch_setOffset, "usearch_setOffset"}, + {cast(void**) &usearch_getOffset, "usearch_getOffset"}, + {cast(void**) &usearch_getMatchedStart, "usearch_getMatchedStart"}, + {cast(void**) &usearch_getMatchedLength, "usearch_getMatchedLength"}, + {cast(void**) &usearch_getMatchedText, "usearch_getMatchedText"}, + {cast(void**) &usearch_setText, "usearch_setText"}, + {cast(void**) &usearch_getText, "usearch_getText"}, + {cast(void**) &usearch_setPattern, "usearch_setPattern"}, + {cast(void**) &usearch_getPattern, "usearch_getPattern"}, + {cast(void**) &usearch_first, "usearch_first"}, + {cast(void**) &usearch_last, "usearch_last"}, + {cast(void**) &usearch_next, "usearch_next"}, + {cast(void**) &usearch_previous, "usearch_previous"}, + {cast(void**) &usearch_following, "usearch_following"}, + {cast(void**) &usearch_preceding, "usearch_preceding"}, + {cast(void**) &usearch_reset, "usearch_reset"}, + {cast(void**) &usearch_setBreakIterator, "usearch_setBreakIterator"}, + {cast(void**) &usearch_getCollator, "usearch_getCollator"}, + {cast(void**) &usearch_setCollator, "usearch_setCollator"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/USet.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/USet.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,472 @@ +/******************************************************************************* + + @file USet.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.USet; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString; + +/******************************************************************************* + + A mutable set of Unicode characters and multicharacter strings. + + Objects of this class represent character classes used in regular + expressions. A character specifies a subset of Unicode code points. + Legal code points are U+0000 to U+10FFFF, inclusive. + + UnicodeSet supports two APIs. The first is the operand API that + allows the caller to modify the value of a UnicodeSet object. It + conforms to Java 2's java.util.Set interface, although UnicodeSet + does not actually implement that interface. All methods of Set are + supported, with the modification that they take a character range + or single character instead of an Object, and they take a UnicodeSet + instead of a Collection. The operand API may be thought of in terms + of boolean logic: a boolean OR is implemented by add, a boolean AND + is implemented by retain, a boolean XOR is implemented by complement + taking an argument, and a boolean NOT is implemented by complement + with no argument. In terms of traditional set theory function names, + add is a union, retain is an intersection, remove is an asymmetric + difference, and complement with no argument is a set complement with + respect to the superset range MIN_VALUE-MAX_VALUE + + The second API is the applyPattern()/toPattern() API from the + java.text.Format-derived classes. Unlike the methods that add + characters, add categories, and control the logic of the set, + the method applyPattern() sets all attributes of a UnicodeSet + at once, based on a string pattern. + + See + this page for full details. + +*******************************************************************************/ + +class USet : ICU +{ + package Handle handle; + + enum Options + { + None = 0, + IgnoreSpace = 1, + CaseInsensitive = 2, + } + + + /*********************************************************************** + + Creates a USet object that contains the range of characters + start..end, inclusive + + ***********************************************************************/ + + this (wchar start, wchar end) + { + handle = uset_open (start, end); + } + + /*********************************************************************** + + Creates a set from the given pattern. See the UnicodeSet + class description for the syntax of the pattern language + + ***********************************************************************/ + + this (UStringView pattern, Options o = Options.None) + { + UErrorCode e; + + handle = uset_openPatternOptions (pattern.get.ptr, pattern.len, o, e); + testError (e, "failed to open pattern-based charset"); + } + + /*********************************************************************** + + Internal constructor invoked via UCollator + + ***********************************************************************/ + + package this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Disposes of the storage used by a USet object + + ***********************************************************************/ + + ~this () + { + uset_close (handle); + } + + /*********************************************************************** + + Modifies the set to represent the set specified by the + given pattern. See the UnicodeSet class description for + the syntax of the pattern language. See also the User + Guide chapter about UnicodeSet. Empties the set passed + before applying the pattern. + + ***********************************************************************/ + + void applyPattern (UStringView pattern, Options o = Options.None) + { + UErrorCode e; + + uset_applyPattern (handle, pattern.get.ptr, pattern.len, o, e); + testError (e, "failed to apply pattern"); + } + + /*********************************************************************** + + Returns a string representation of this set. If the result + of calling this function is passed to a uset_openPattern(), + it will produce another set that is equal to this one. + + ***********************************************************************/ + + void toPattern (UString dst, bool escape) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uset_toPattern (handle, p, len, escape, e); + } + + dst.format (&fmt, "failed to convert charset to a pattern"); + } + + /*********************************************************************** + + Adds the given character to the given USet. After this call, + contains (c) will return true. + + ***********************************************************************/ + + void add (wchar c) + { + uset_add (handle, c); + } + + /*********************************************************************** + + Adds all of the elements in the specified set to this set + if they're not already present. This operation effectively + modifies this set so that its value is the union of the two + sets. The behavior of this operation is unspecified if the + specified collection is modified while the operation is in + progress. + + ***********************************************************************/ + + void addSet (USet other) + { + uset_addAll (handle, other.handle); + } + + /*********************************************************************** + + Adds the given range of characters to the given USet. After + this call, contains(start, end) will return true + + ***********************************************************************/ + + void addRange (wchar start, wchar end) + { + uset_addRange (handle, start, end); + } + + /*********************************************************************** + + Adds the given string to the given USet. After this call, + containsString (str, strLen) will return true + + ***********************************************************************/ + + void addString (UStringView t) + { + uset_addString (handle, t.get.ptr, t.len); + } + + /*********************************************************************** + + Removes the given character from this USet. After the + call, contains(c) will return false + + ***********************************************************************/ + + void remove (wchar c) + { + uset_remove (handle, c); + } + + /*********************************************************************** + + Removes the given range of characters from this USet. + After the call, contains(start, end) will return false + + ***********************************************************************/ + + void removeRange (wchar start, wchar end) + { + uset_removeRange (handle, start, end); + } + + /*********************************************************************** + + Removes the given string from this USet. After the call, + containsString (str, strLen) will return false + + ***********************************************************************/ + + void removeString (UStringView t) + { + uset_removeString (handle, t.get.ptr, t.len); + } + + /*********************************************************************** + + Inverts this set. This operation modifies this set so + that its value is its complement. This operation does + not affect the multicharacter strings, if any + + ***********************************************************************/ + + void complement () + { + uset_complement (handle); + } + + /*********************************************************************** + + Removes all of the elements from this set. This set will + be empty after this call returns. + + ***********************************************************************/ + + void clear () + { + uset_clear (handle); + } + + /*********************************************************************** + + Returns true if this USet contains no characters and no + strings + + ***********************************************************************/ + + bool isEmpty () + { + return uset_isEmpty (handle) != 0; + } + + /*********************************************************************** + + Returns true if this USet contains the given character + + ***********************************************************************/ + + bool contains (wchar c) + { + return uset_contains (handle, c) != 0; + } + + /*********************************************************************** + + Returns true if this USet contains all characters c where + start <= c && c <= end + + ***********************************************************************/ + + bool containsRange (wchar start, wchar end) + { + return uset_containsRange (handle, start, end) != 0; + } + + /*********************************************************************** + + Returns true if this USet contains the given string + + ***********************************************************************/ + + bool containsString (UStringView t) + { + return uset_containsString (handle, t.get.ptr, t.len) != 0; + } + + /*********************************************************************** + + ***********************************************************************/ + + uint size () + { + return uset_size (handle); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar start, wchar end) uset_open; + void function (Handle) uset_close; + Handle function (wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_openPatternOptions; + uint function (Handle, wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_applyPattern; + uint function (Handle, wchar* result, uint resultCapacity, byte escapeUnprintable, inout UErrorCode e) uset_toPattern; + void function (Handle, wchar c) uset_add; + void function (Handle, Handle additionalSet) uset_addAll; + void function (Handle, wchar start, wchar end) uset_addRange; + void function (Handle, wchar* str, uint strLen) uset_addString; + void function (Handle, wchar c) uset_remove; + void function (Handle, wchar start, wchar end) uset_removeRange; + void function (Handle, wchar* str, uint strLen) uset_removeString; + void function (Handle) uset_complement; + void function (Handle) uset_clear; + byte function (Handle) uset_isEmpty; + byte function (Handle, wchar c) uset_contains; + byte function (Handle, wchar start, wchar end) uset_containsRange; + byte function (Handle, wchar* str, uint strLen) uset_containsString; + uint function (Handle) uset_size; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uset_open, "uset_open"}, + {cast(void**) &uset_close, "uset_close"}, + {cast(void**) &uset_openPatternOptions, "uset_openPatternOptions"}, + {cast(void**) &uset_applyPattern, "uset_applyPattern"}, + {cast(void**) &uset_toPattern, "uset_toPattern"}, + {cast(void**) &uset_add, "uset_add"}, + {cast(void**) &uset_addAll, "uset_addAll"}, + {cast(void**) &uset_addRange, "uset_addRange"}, + {cast(void**) &uset_addString, "uset_addString"}, + {cast(void**) &uset_remove, "uset_remove"}, + {cast(void**) &uset_removeRange, "uset_removeRange"}, + {cast(void**) &uset_removeString, "uset_removeString"}, + {cast(void**) &uset_complement, "uset_complement"}, + {cast(void**) &uset_clear, "uset_clear"}, + {cast(void**) &uset_isEmpty, "uset_isEmpty"}, + {cast(void**) &uset_contains, "uset_contains"}, + {cast(void**) &uset_containsRange, "uset_containsRange"}, + {cast(void**) &uset_containsString, "uset_containsString"}, + {cast(void**) &uset_size, "uset_size"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UString.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UString.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,1508 @@ +/******************************************************************************* + + @file UString.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UString; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UChar, + com.ibm.icu.mangoicu.ULocale; +import java.lang.util; +/******************************************************************************* + +*******************************************************************************/ + +private extern (C) void memmove (void* dst, void* src, uint bytes); + +/******************************************************************************* + + Bind to the IReadable and IWritable interfaces if we're building + along with the mango.io package + +*******************************************************************************/ + +version=Isolated; +version (Isolated) + { + private interface ITextOther {} + private interface IStringOther {} + } + else + { + private import com.ibm.icu.mangoicu.UMango; + + private import mango.io.model.IReader, + mango.io.model.IWriter; + + private interface ITextOther : IWritable {} + private interface IStringOther : IReadable {} + } + + +/******************************************************************************* + + UString is a string class that stores Unicode characters directly + and provides similar functionality as the Java String class. + + In ICU, a Unicode string consists of 16-bit Unicode code units. + A Unicode character may be stored with either one code unit — + which is the most common case — or with a matched pair of + special code units ("surrogates"). The data type for code units + is UChar. + + For single-character handling, a Unicode character code point is + a value in the range 0..0x10ffff. ICU uses the UChar32 type for + code points. + + Indexes and offsets into and lengths of strings always count code + units, not code points. This is the same as with multi-byte char* + strings in traditional string handling. Operations on partial + strings typically do not test for code point boundaries. If necessary, + the user needs to take care of such boundaries by testing for the code + unit values or by using functions like getChar32Start() + and getChar32Limit() + + UString methods are more lenient with regard to input parameter values + than other ICU APIs. In particular: + + - If indexes are out of bounds for a UString object (< 0 or > length) + then they are "pinned" to the nearest boundary. + + - If primitive string pointer values (e.g., const wchar* or char*) for + input strings are null, then those input string parameters are treated + as if they pointed to an empty string. However, this is not the case + for char* parameters for charset names or other IDs. + +*******************************************************************************/ + +class UString : UStringView, IStringOther +{ + alias opCat append; + alias opIndexAssign setCharAt; + + /*********************************************************************** + + Create an empty UString with the specified available space + + ***********************************************************************/ + + this (uint space = 0) + { + content.length = space; + mutable = true; + } + + /*********************************************************************** + + Create a UString upon the provided content. If said content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified. + + ***********************************************************************/ + + this (CString16 content, bool mutable = true) + { + setTo (content, mutable); + } + + /*********************************************************************** + + Create a UString via the content of a UStringView. Note that the + default is to assume the content is immutable (read-only). + + ***********************************************************************/ + + this (UStringView other, bool mutable = false) + { + this (other.get, mutable); + } + + /*********************************************************************** + + Create a UString via the content of a UString. If said content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified via UString + methods. + + ***********************************************************************/ + + this (UString other, bool mutable = true) + { + this (other.get, mutable); + } + + /*********************************************************************** + + Support for reading content via the IO system + + ***********************************************************************/ + + version (Isolated){} + else + { + /*************************************************************** + + Internal adapter to handle loading and conversion + of UString content. Once constructed, this may be + used as the target for an IReader. Alternatively, + invoke the load() method with an IBuffer of choice. + + ***************************************************************/ + + class UStringDecoder : StringDecoder16 + { + private UString s; + + // construct a decoder on the given UString + this (UConverter c, uint bytes, UString s) + { + super (c, bytes); + this.s = s; + } + + // IReadable adapter to perform the conversion + protected void read (IReader r) + { + load (r.buffer); + } + + // read from the provided buffer until we + // either have all the content, or an eof + // condition throws an exception. + package void load (IBuffer b) + { + uint produced = super.read (b, s.content); + while (toGo) + { + s.expand (toGo); + produced += super.read (b, s.content[produced..$]); + } + s.len = produced; + } + } + + /*************************************************************** + + Another constructor for loading known content length + into a UString. + + ***************************************************************/ + + this (IBuffer buffer, uint contentLength, UConverter cvt) + { + this (contentLength); + UStringDecoder sd = new UStringDecoder (cvt, contentLength, this); + sd.load (buffer); + } + + /*************************************************************** + + Read as many bytes from the input as is necessary + to produce the expected number of wchar elements. + This uses the default wchar handler, which can be + altered by binding a StringDecoder to the IReader + in use (see UMango for details). + + We're mutable, so ensure we don't mess with the + IO buffers. Interestingly, changing the length + of a D array will account for slice assignments + (it checks the pointer to see if it's a starting + point in the pool). Unfortunately, that doesn't + catch the case where a slice starts at offset 0, + which is where IBuffer slices may come from. + + To be safe, we ask the allocator in use whether + the content it provided can be mutated or not. + Note that this is not necessary for UStringView, since + that is a read-only construct. + + ***************************************************************/ + + void read (IReader r) + { + r.get (content); + len = content.length; + mutable = r.getAllocator.isMutable (content); + } + + /*************************************************************** + + Return a streaming decoder that can be used to + populate this UString with a specified number of + input bytes. + + This differs from the above read() method in the + way content is read: in the above case, exactly + the specified number of wchar elements will be + converter from the input, whereas in this case + a variable number of wchar elements are converted + until 'bytes' have been read from the input. This + is useful in those cases where the original number + of elements has been lost, and only the resultant + converted byte-count remains (a la HTTP). + + The returned StringDecoder is one-shot only. You may + reuse it (both the converter and the byte count) via + its reset() method. + + One applies the resultant converter directly with an + IReader like so: + + @code + UString s = ...; + IReader r = ...; + + // r >> s.createDecoder(cvt, bytes); + r.get (s.createDecoder(cvt, bytes)); + @endcode + + which will read the specified number of bytes from + the input and convert them to an appropriate number + of wchars within the UString. + + ***************************************************************/ + + StringDecoder createDecoder (UConverter c, uint bytes) + { + return new UStringDecoder (c, bytes, this); + } + } + + /*********************************************************************** + + Append text to this UString + + ***********************************************************************/ + + UString opCat (UStringView other) + { + return opCat (other.get); + } + + /*********************************************************************** + + Append partial text to this UString + + ***********************************************************************/ + + UString opCat (UStringView other, uint start, uint len=uint.max) + { + other.pinIndices (start, len); + return opCat (other.content [start..start+len]); + } + + /*********************************************************************** + + Append a single character to this UString + + ***********************************************************************/ + + UString opCat (wchar chr) + { + return opCat (&chr, 1); + } + + /*********************************************************************** + + Append text to this UString + + ***********************************************************************/ + + UString opCat (wchar[] chars) + { + return opCat (chars.ptr, chars.length); + } + + /*********************************************************************** + + Converts a sequence of UTF-8 bytes to UChars (UTF-16) + + ***********************************************************************/ + + UString opCat (char[] chars) + { + uint fmt (wchar* dst, uint len, inout UErrorCode e) + { + uint x; + + u_strFromUTF8 (dst, len, &x, chars.ptr, chars.length, e); + return x; + } + + expand (chars.length); + return format (&fmt, "failed to append UTF char[]"); + } + + /*********************************************************************** + + Set a section of this UString to the specified character + + ***********************************************************************/ + + UString setTo (wchar chr, uint start=0, uint len=uint.max) + { + pinIndices (start, len); + if (! mutable) + realloc (); + content [start..start+len] = chr; + return this; + } + + /*********************************************************************** + + Set the content to the provided array. Parameter 'mutable' + specifies whether the given array is likely to change. If + not, the array is aliased until such time this UString is + altered. + + ***********************************************************************/ + + UString setTo (CString16 chars, bool mutable = true) + { + len = chars.length; + if ((this.mutable = mutable) == true) + content = chars.dup; + else + content = cast(wchar[])chars; + return this; + } + + /*********************************************************************** + + Replace the content of this UString. If the new content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified via one of + these methods. + + ***********************************************************************/ + + UString setTo (UStringView other, bool mutable = true) + { + return setTo (other.get, mutable); + } + + /*********************************************************************** + + Replace the content of this UString. If the new content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified via one of + these methods. + + ***********************************************************************/ + + UString setTo (UStringView other, uint start, uint len, bool mutable = true) + { + other.pinIndices (start, len); + return setTo (other.content [start..start+len], mutable); + } + + /*********************************************************************** + + Replace the character at the specified location. + + ***********************************************************************/ + + final UString opIndexAssign (wchar chr, uint index) + in { + if (index >= len) + exception ("index of out bounds"); + } + body + { + if (! mutable) + realloc (); + content [index] = chr; + return this; + } + + /*********************************************************************** + + Remove a piece of this UString. + + ***********************************************************************/ + + UString remove (uint start, uint length=uint.max) + { + pinIndices (start, length); + if (length) + if (start >= len) + truncate (start); + else + { + if (! mutable) + realloc (); + + uint i = start + length; + memmove (&content[start], &content[i], (len-i) * wchar.sizeof); + len -= length; + } + return this; + } + + /*********************************************************************** + + Truncate the length of this UString. + + ***********************************************************************/ + + UString truncate (uint length=0) + { + if (length <= len) + len = length; + return this; + } + + /*********************************************************************** + + Insert leading spaces in this UString + + ***********************************************************************/ + + UString padLeading (uint count, wchar padChar = 0x0020) + { + expand (count); + memmove (&content[count], content.ptr, len * wchar.sizeof); + len += count; + return setTo (padChar, 0, count); + } + + /*********************************************************************** + + Append some trailing spaces to this UString. + + ***********************************************************************/ + + UString padTrailing (uint length, wchar padChar = 0x0020) + { + expand (length); + len += length; + return setTo (padChar, len-length, length); + } + + /*********************************************************************** + + Check for available space within the buffer, and expand + as necessary. + + ***********************************************************************/ + + package final void expand (uint count) + { + if ((len + count) > content.length) + realloc (count); + } + + /*********************************************************************** + + Allocate memory due to a change in the content. We handle + the distinction between mutable and immutable here. + + ***********************************************************************/ + + private final void realloc (uint count = 0) + { + uint size = (content.length + count + 63) & ~63; + + if (mutable) + content.length = size; + else + { + mutable = true; + wchar[] x = content; + content = new wchar [size]; + if (len) + content[0..len] = x; + } + } + + /*********************************************************************** + + Internal method to support UString appending + + ***********************************************************************/ + + private final UString opCat (wchar* chars, uint count) + { + expand (count); + content[len..len+count] = chars[0..count]; + len += count; + return this; + } + + /*********************************************************************** + + Internal method to support formatting into this UString. + This is used by many of the ICU wrappers to append content + into a UString. + + ***********************************************************************/ + + typedef uint delegate (wchar* dst, uint len, inout UErrorCode e) Formatter; + + package final UString format (Formatter format, CString msg) + { + UErrorCode e; + uint length; + + while (true) + { + e = e.OK; + length = format (&content[len], content.length - len, e); + if (e == e.BufferOverflow) + expand (length); + else + break; + } + + if (isError (e)) + exception (msg); + + len += length; + return this; + } +} + + +/******************************************************************************* + + Immutable (read-only) text -- use UString for mutable strings. + +*******************************************************************************/ + +class UStringView : ICU, ITextOther +{ + alias opIndex charAt; + + // the core of the UStringView and UString attributes. The name 'len' + // is used rather than the more obvious 'length' since there is + // a collision with the silly array[length] syntactic sugar ... + package uint len; + package wchar[] content; + + // this should probably be in UString only, but there seems to + // be a compiler bug where it doesn't get initialised correctly, + // and it's perhaps useful to have here for when a UString is + // passed as a UStringView argument. + private bool mutable; + + // toFolded() argument + public enum CaseOption + { + Default = 0, + SpecialI = 1 + } + + /*********************************************************************** + + Hidden constructor + + ***********************************************************************/ + + private this () + { + } + + /*********************************************************************** + + Construct read-only wrapper around the given content + + ***********************************************************************/ + + this (wchar[] content) + { + this.content = content; + this.len = content.length; + } + + /*********************************************************************** + + Support for writing via the Mango IO subsystem + + ***********************************************************************/ + + version (Isolated){} + else + { + void write (IWriter w) + { + w.put (get); + } + } + + /*********************************************************************** + + Return the valid content from this UStringView + + ***********************************************************************/ + + final package wchar[] get () + { + return content [0..len]; + } + + /*********************************************************************** + + Is this UStringView equal to another? + + ***********************************************************************/ + + final override equals_t opEquals (Object o) + { + UStringView other = cast(UStringView) o; + + if (other) + return (other is this || compare (other) == 0); + return 0; + } + + /*********************************************************************** + + Compare this UStringView to another. + + ***********************************************************************/ + + final override int opCmp (Object o) + { + UStringView other = cast(UStringView) o; + + if (other is this) + return 0; + else + if (other) + return compare (other); + return 1; + } + + /*********************************************************************** + + Hash this UStringView + + ***********************************************************************/ + + final override uint toHash () + { + return typeid(wchar[]).getHash (&content[0..len]); + } + + /*********************************************************************** + + Clone this UStringView into a UString + + ***********************************************************************/ + + final UString copy () + { + return new UString (content); + } + + /*********************************************************************** + + Clone a section of this UStringView into a UString + + ***********************************************************************/ + + final UString extract (uint start, uint len=uint.max) + { + pinIndices (start, len); + return new UString (content[start..start+len]); + } + + /*********************************************************************** + + Count unicode code points in the length UChar code units of + the string. A code point may occupy either one or two UChar + code units. Counting code points involves reading all code + units. + + ***********************************************************************/ + + final uint codePoints (uint start=0, uint length=uint.max) + { + pinIndices (start, length); + return u_countChar32 (&content[start], length); + } + + /*********************************************************************** + + Return an indication whether or not there are surrogate pairs + within the string. + + ***********************************************************************/ + + final bool hasSurrogates (uint start=0, uint length=uint.max) + { + pinIndices (start, length); + return codePoints (start, length) != length; + } + + /*********************************************************************** + + Return the character at the specified position. + + ***********************************************************************/ + + final wchar opIndex (uint index) + in { + if (index >= len) + exception ("index of out bounds"); + } + body + { + return content [index]; + } + + /*********************************************************************** + + Return the length of the valid content + + ***********************************************************************/ + + final uint length () + { + return len; + } + + /*********************************************************************** + + The comparison can be done in code unit order or in code + point order. They differ only in UTF-16 when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compare (UStringView other, bool codePointOrder=false) + { + return compare (other.get, codePointOrder); + } + + /*********************************************************************** + + The comparison can be done in code unit order or in code + point order. They differ only in UTF-16 when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compare (wchar[] other, bool codePointOrder=false) + { + return u_strCompare (content.ptr, len, other.ptr, other.length, codePointOrder); + } + + /*********************************************************************** + + The comparison can be done in UTF-16 code unit order or + in code point order. They differ only when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compareFolded (UStringView other, CaseOption option = CaseOption.Default) + { + return compareFolded (other.content, option); + } + + /*********************************************************************** + + The comparison can be done in UTF-16 code unit order or + in code point order. They differ only when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compareFolded (wchar[] other, CaseOption option = CaseOption.Default) + { + return compareFolded (get, other, option); + } + + /*********************************************************************** + + Does this UStringView start with specified string? + + ***********************************************************************/ + + final bool startsWith (UStringView other) + { + return startsWith (other.get); + } + + /*********************************************************************** + + Does this UStringView start with specified string? + + ***********************************************************************/ + + final bool startsWith (wchar[] chars) + { + if (len >= chars.length) + return compareFolded (content[0..chars.length], chars) == 0; + return false; + } + + /*********************************************************************** + + Does this UStringView end with specified string? + + ***********************************************************************/ + + final bool endsWith (UStringView other) + { + return endsWith (other.get); + } + + /*********************************************************************** + + Does this UStringView end with specified string? + + ***********************************************************************/ + + final bool endsWith (wchar[] chars) + { + if (len >= chars.length) + return compareFolded (content[len-chars.length..len], chars) == 0; + return false; + } + + /*********************************************************************** + + Find the first occurrence of a BMP code point in a string. + A surrogate code point is found only if its match in the + text is not part of a surrogate pair. + + ***********************************************************************/ + + final uint indexOf (wchar c, uint start=0) + { + pinIndex (start); + wchar* s = u_memchr (&content[start], c, len-start); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Find the first occurrence of a substring in a string. + + The substring is found at code point boundaries. That means + that if the substring begins with a trail surrogate or ends + with a lead surrogate, then it is found only if these + surrogates stand alone in the text. Otherwise, the substring + edge units would be matched against halves of surrogate pairs. + + ***********************************************************************/ + + final uint indexOf (UStringView other, uint start=0) + { + return indexOf (other.get, start); + } + + /*********************************************************************** + + Find the first occurrence of a substring in a string. + + The substring is found at code point boundaries. That means + that if the substring begins with a trail surrogate or ends + with a lead surrogate, then it is found only if these + surrogates stand alone in the text. Otherwise, the substring + edge units would be matched against halves of surrogate pairs. + + ***********************************************************************/ + + final uint indexOf (wchar[] chars, uint start=0) + { + pinIndex (start); + wchar* s = u_strFindFirst (&content[start], len-start, chars.ptr, chars.length); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Find the last occurrence of a BMP code point in a string. + A surrogate code point is found only if its match in the + text is not part of a surrogate pair. + + ***********************************************************************/ + + final uint lastIndexOf (wchar c, uint start=uint.max) + { + pinIndex (start); + wchar* s = u_memrchr (content.ptr, c, start); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Find the last occurrence of a BMP code point in a string. + A surrogate code point is found only if its match in the + text is not part of a surrogate pair. + + ***********************************************************************/ + + final uint lastIndexOf (UStringView other, uint start=uint.max) + { + return lastIndexOf (other.get, start); + } + + /*********************************************************************** + + Find the last occurrence of a substring in a string. + + The substring is found at code point boundaries. That means + that if the substring begins with a trail surrogate or ends + with a lead surrogate, then it is found only if these + surrogates stand alone in the text. Otherwise, the substring + edge units would be matched against halves of surrogate pairs. + + ***********************************************************************/ + + final uint lastIndexOf (wchar[] chars, uint start=uint.max) + { + pinIndex (start); + wchar* s = u_strFindLast (content.ptr, start, chars.ptr, chars.length); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Lowercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toLower (UString dst) + { + return toLower (dst, ULocale.Default); + } + + /*********************************************************************** + + Lowercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toLower (UString dst, inout ULocale locale) + { + uint lower (wchar* dst, uint length, inout UErrorCode e) + { + return u_strToLower (dst, length, content.ptr, len, ICU.toString(locale.name), e); + } + + dst.expand (len + 32); + return dst.format (&lower, "toLower() failed"); + } + + /*********************************************************************** + + Uppercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toUpper (UString dst) + { + return toUpper (dst, ULocale.Default); + } + + /*********************************************************************** + + Uppercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toUpper (UString dst, inout ULocale locale) + { + uint upper (wchar* dst, uint length, inout UErrorCode e) + { + return u_strToUpper (dst, length, content.ptr, len, ICU.toString(locale.name), e); + } + + dst.expand (len + 32); + return dst.format (&upper, "toUpper() failed"); + } + + /*********************************************************************** + + Case-fold the characters into a seperate UString. + + Case-folding is locale-independent and not context-sensitive, + but there is an option for whether to include or exclude + mappings for dotted I and dotless i that are marked with 'I' + in CaseFolding.txt. The result may be longer or shorter than + the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toFolded (UString dst, CaseOption option = CaseOption.Default) + { + uint fold (wchar* dst, uint length, inout UErrorCode e) + { + return u_strFoldCase (dst, length, content.ptr, len, option, e); + } + + dst.expand (len + 32); + return dst.format (&fold, "toFolded() failed"); + } + + /*********************************************************************** + + Converts a sequence of wchar (UTF-16) to UTF-8 bytes. If + the output array is not provided, an array of appropriate + size will be allocated and returned. Where the output is + provided, it must be large enough to hold potentially four + bytes per character for surrogate-pairs or three bytes per + character for BMP only. Consider using UConverter where + streaming conversions are required. + + Returns an array slice representing the valid UTF8 content. + + ***********************************************************************/ + + final char[] toUtf8 (char[] dst = null) + { + uint x; + UErrorCode e; + + if (! cast(char*) dst) + dst = new char[len * 4]; + + u_strToUTF8 (dst.ptr, dst.length, &x, content.ptr, len, e); + testError (e, "failed to convert to UTF8"); + return dst [0..x]; + } + + /*********************************************************************** + + Remove leading and trailing whitespace from this UStringView. + Note that we slice the content to remove leading space. + + ***********************************************************************/ + + UStringView trim () + { + wchar c; + uint i = len; + + // cut off trailing white space + while (i && ((c = charAt(i-1)) == 0x20 || UChar.isWhiteSpace (c))) + --i; + len = i; + + // now remove leading whitespace + for (i=0; i < len && ((c = charAt(i)) == 0x20 || UChar.isWhiteSpace (c)); ++i) {} + if (i) + { + len -= i; + content = content[i..$-i]; + } + + return this; + } + + /*********************************************************************** + + Unescape a string of characters and write the resulting + Unicode characters to the destination buffer. The following + escape sequences are recognized: + + uhhhh 4 hex digits; h in [0-9A-Fa-f] + Uhhhhhhhh 8 hex digits + xhh 1-2 hex digits + x{h...} 1-8 hex digits + ooo 1-3 octal digits; o in [0-7] + cX control-X; X is masked with 0x1F + + as well as the standard ANSI C escapes: + + a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + \\" =U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + + Anything else following a backslash is generically escaped. + For example, "[a\\-z]" returns "[a-z]". + + If an escape sequence is ill-formed, this method returns an + empty string. An example of an ill-formed sequence is "\\u" + followed by fewer than 4 hex digits. + + ***********************************************************************/ + + final UString unEscape () + { + UString result = new UString (len); + for (uint i=0; i < len;) + { + dchar c = charAt(i++); + if (c == 0x005C) + { + // bump index ... + c = u_unescapeAt (&_charAt, &i, len, cast(void*) this); + + // error? + if (c == 0xFFFFFFFF) + { + result.truncate (); // return empty string + break; // invalid escape sequence + } + } + result.append (c); + } + return result; + } + + /*********************************************************************** + + Is this code point a surrogate (U+d800..U+dfff)? + + ***********************************************************************/ + + final static bool isSurrogate (wchar c) + { + return (c & 0xfffff800) == 0xd800; + } + + /*********************************************************************** + + Is this code unit a lead surrogate (U+d800..U+dbff)? + + ***********************************************************************/ + + final static bool isLeading (wchar c) + { + return (c & 0xfffffc00) == 0xd800; + } + + /*********************************************************************** + + Is this code unit a trail surrogate (U+dc00..U+dfff)? + + ***********************************************************************/ + + final static bool isTrailing (wchar c) + { + return (c & 0xfffffc00) == 0xdc00; + } + + /*********************************************************************** + + Adjust a random-access offset to a code point boundary + at the start of a code point. If the offset points to + the trail surrogate of a surrogate pair, then the offset + is decremented. Otherwise, it is not modified. + + ***********************************************************************/ + + final uint getCharStart (uint i) + in { + if (i >= len) + exception ("index of out bounds"); + } + body + { + if (isTrailing (content[i]) && i && isLeading (content[i-1])) + --i; + return i; + } + + /*********************************************************************** + + Adjust a random-access offset to a code point boundary + after a code point. If the offset is behind the lead + surrogate of a surrogate pair, then the offset is + incremented. Otherwise, it is not modified. + + ***********************************************************************/ + + final uint getCharLimit (uint i) + in { + if (i >= len) + exception ("index of out bounds"); + } + body + { + if (i && isLeading(content[i-1]) && isTrailing (content[i])) + ++i; + return i; + } + + /*********************************************************************** + + Callback for C unescapeAt() function + + ***********************************************************************/ + + extern (C) + { + typedef wchar function (uint offset, void* context) CharAt; + + private static wchar _charAt (uint offset, void* context) + { + return (cast(UString) context).charAt (offset); + } + } + + /*********************************************************************** + + Pin the given index to a valid position. + + ***********************************************************************/ + + final private void pinIndex (inout uint x) + { + if (x > len) + x = len; + } + + /*********************************************************************** + + Pin the given index and length to a valid position. + + ***********************************************************************/ + + final private void pinIndices (inout uint start, inout uint length) + { + if (start > len) + start = len; + + if (length > (len - start)) + length = len - start; + } + + /*********************************************************************** + + Helper for comparison methods + + ***********************************************************************/ + + final private int compareFolded (wchar[] s1, wchar[] s2, CaseOption option = CaseOption.Default) + { + UErrorCode e; + + int x = u_strCaseCompare (s1.ptr, s1.length, s2.ptr, s2.length, option, e); + testError (e, "compareFolded failed"); + return x; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + wchar* function (wchar*, uint, wchar*, uint) u_strFindFirst; + wchar* function (wchar*, uint, wchar*, uint) u_strFindLast; + wchar* function (wchar*, wchar, uint) u_memchr; + wchar* function (wchar*, wchar, uint) u_memrchr; + int function (wchar*, uint, wchar*, uint, bool) u_strCompare; + int function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strCaseCompare; + dchar function (CharAt, uint*, uint, void*) u_unescapeAt; + uint function (wchar*, uint) u_countChar32; + uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToUpper; + uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToLower; + uint function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strFoldCase; + wchar* function (wchar*, uint, uint*, char*, uint, inout UErrorCode) u_strFromUTF8; + char* function (char*, uint, uint*, wchar*, uint, inout UErrorCode) u_strToUTF8; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &u_strFindFirst, "u_strFindFirst"}, + {cast(void**) &u_strFindLast, "u_strFindLast"}, + {cast(void**) &u_memchr, "u_memchr"}, + {cast(void**) &u_memrchr, "u_memrchr"}, + {cast(void**) &u_strCompare, "u_strCompare"}, + {cast(void**) &u_strCaseCompare, "u_strCaseCompare"}, + {cast(void**) &u_unescapeAt, "u_unescapeAt"}, + {cast(void**) &u_countChar32, "u_countChar32"}, + {cast(void**) &u_strToUpper, "u_strToUpper"}, + {cast(void**) &u_strToLower, "u_strToLower"}, + {cast(void**) &u_strFoldCase, "u_strFoldCase"}, + {cast(void**) &u_strFromUTF8, "u_strFromUTF8"}, + {cast(void**) &u_strToUTF8, "u_strToUTF8"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + + /*********************************************************************** + + ***********************************************************************/ + + //private static void test() + //{ + // UString s = new UString (r"aaaqw \uabcd eaaa"); + // CString16 x = "dssfsdff"; + // s ~ x ~ x; + // wchar c = s[3]; + // s[3] = 'Q'; + // int y = s.indexOf ("qwe"); + // s.unEscape (); + // s.toUpper (new UString); + // s.padLeading(2).padTrailing(2).trim(); + //} +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UStringPrep.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UStringPrep.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,249 @@ +/******************************************************************************* + + @file UStringPrep.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UStringPrep; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString; + +/******************************************************************************* + + StringPrep API implements the StingPrep framework as described + by RFC 3454. + + StringPrep prepares Unicode strings for use in network protocols. + Profiles of StingPrep are set of rules and data according to with + the Unicode Strings are prepared. Each profiles contains tables + which describe how a code point should be treated. The tables are + broadly classied into + + - Unassinged Table: Contains code points that are unassigned + in the Unicode Version supported by StringPrep. Currently + RFC 3454 supports Unicode 3.2. + + - Prohibited Table: Contains code points that are prohibted + from the output of the StringPrep processing function. + + - Mapping Table: Contains code ponts that are deleted from the + output or case mapped. + + The procedure for preparing Unicode strings: + + 1. Map: For each character in the input, check if it has a mapping + and, if so, replace it with its mapping. + + 2. Normalize: Possibly normalize the result of step 1 using Unicode + normalization. + + 3. Prohibit: Check for any characters that are not allowed in the + output. If any are found, return an error. + + 4. Check bidi: Possibly check for right-to-left characters, and if + any are found, make sure that the whole string satisfies the + requirements for bidirectional strings. If the string does not + satisfy the requirements for bidirectional strings, return an + error. + + See + this page for full details. + +*******************************************************************************/ + +class UStringPrep : ICU +{ + private Handle handle; + + enum Options + { + Strict, + Lenient + } + + + /*********************************************************************** + + Creates a StringPrep profile from the data file. + + path string containing the full path pointing + to the directory where the profile reside + followed by the package name e.g. + "/usr/resource/my_app/profiles/mydata" on + a Unix system. if NULL, ICU default data + files will be used. + + fileName name of the profile file to be opened + + ***********************************************************************/ + + this (char[] path, char[] filename) + { + UErrorCode e; + + handle = usprep_open (toString(path), toString(filename), e); + testError (e, "failed to open string-prep"); + } + + /*********************************************************************** + + Close this profile + + ***********************************************************************/ + + ~this () + { + usprep_close (handle); + } + + /*********************************************************************** + + Prepare the input buffer + + This operation maps, normalizes(NFKC), checks for prohited + and BiDi characters in the order defined by RFC 3454 depending + on the options specified in the profile + + ***********************************************************************/ + + void prepare (UStringView src, UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return usprep_prepare (handle, src.get.ptr, src.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to prepare text"); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (char*, char*, inout UErrorCode) usprep_open; + void function (Handle) usprep_close; + uint function (Handle, wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) usprep_prepare; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &usprep_open, "usprep_open"}, + {cast(void**) &usprep_close, "usprep_close"}, + {cast(void**) &usprep_prepare, "usprep_prepare"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UText.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UText.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,249 @@ +/******************************************************************************* + + @file UString.d + + Copyright (c) 2008 Frank Benoit + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, July 2008 + @author Frank + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ +module com.ibm.icu.mangoicu.UText; + +import com.ibm.icu.mangoicu.ICU; + +struct UText { + // UText private fields -- start + private { + uint magic = UTEXT_MAGIC; + int flags = 0; + int providerProperties = 0; + int sizeOfStruct = UText.sizeof; + long chunkNativeLimit = 0; + int extraSize = 0; + int nativeIndexingLimit = 0; + long chunkNativeStart = 0; + int chunkOffset = 0; + int chunkLength = 0; + wchar* chunkContents = null; + void* pFuncs = null; + void* pExtra = null; + void* context = null; + void* p = null; + void* q = null; + void* r = null; + void* privP = null; + long a = 0; + int b = 0; + int c = 0; + long privA = 0; + int privB = 0; + int privC = 0; + } // UText private fields -- end + // do not add any non-static fields + + private enum { + UTEXT_MAGIC = 0x345ad82c + } + void close(){ + version(D_Version2){ + utext_close(&this); + } else { + utext_close(this); + } + } + private void ensureStatusOk( ICU.UErrorCode status ){ + if( status !is ICU.UErrorCode.OK ){ + throw new Exception( "ICU Exception" ); + } + } + void openUTF8( char[] str ){ + auto status = ICU.UErrorCode.OK; + version(D_Version2){ + utext_openUTF8(&this, str.ptr, str.length, status ); + } else { + utext_openUTF8(this, str.ptr, str.length, status ); + } + ensureStatusOk( status ); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + static extern(System){ + UText * function(UText* ut) utext_close; + UText * function(UText* ut, char *s, long length, inout ICU.UErrorCode status) utext_openUTF8; +// UText * function(UText* ut, UChar *s, int64_t length, inout UErrorCode status) utext_openUChars; +// UText * function(UText* ut, U_NAMESPACE_QUALIFIER UnicodeString *s, inout UErrorCode status) utext_openUnicodeString; +// UText * function(UText* ut, U_NAMESPACE_QUALIFIER UnicodeString *s, inout UErrorCode status) utext_openConstUnicodeString; +// UText * function(UText* ut, U_NAMESPACE_QUALIFIER Replaceable *rep, inout UErrorCode status) utext_openReplaceable; +// UText * function(UText* ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, inout UErrorCode status) utext_openCharacterIterator; +// UText * function(UText* ut, UText *src, UBool deep, UBool readOnly, inout UErrorCode status) utext_clone; +// UBool function(const UText *a, const UText *b) utext_equals; +// int64_t function(UText* ut) utext_nativeLength; +// UBool function(UText* ut) utext_isLengthExpensive; +// UChar32 function(UText* ut, int64_t nativeIndex) utext_char32At; +// UChar32 function(UText* ut) utext_current32; +// UChar32 function(UText* ut) utext_next32; +// UChar32 function(UText* ut) utext_previous32; +// UChar32 function(UText* ut, int64_t nativeIndex) utext_next32From; +// UChar32 function(UText* ut, int64_t nativeIndex) utext_previous32From; +// int64_t function(UText* ut) utext_getNativeIndex; +// void function(UText* ut, int64_t nativeIndex) utext_setNativeIndex; +// UBool function(UText* ut, int delta) utext_moveIndex32; +// int64_t function(UText* ut) utext_getPreviousNativeIndex; +// int function(UText* ut, int64_t nativeStart, int64_t nativeLimit, +// UChar *dest, int destCapacity, +// inout UErrorCode status) utext_extract; +// UBool function(UText* ut) utext_isWritable; +// UBool function(UText* ut) utext_hasMetaData; +// int function(UText* ut, +// int64_t nativeStart, int64_t nativeLimit, +// UChar *replacementText, int replacementLength, +// inout UErrorCode status) utext_replace; +// void function(UText* ut, +// int64_t nativeStart, int64_t nativeLimit, +// int64_t destIndex, +// UBool move, +// inout UErrorCode status) utext_copy; +// void function(UText* ut) utext_freeze; +// UText * function(UText* ut, int extraSpace, inout UErrorCode status) utext_setup; + } + + /*********************************************************************** + + ***********************************************************************/ + static FunctionLoader.Bind[] targets = [ + {cast(void**) &utext_close, "utext_close"}, + {cast(void**) &utext_openUTF8, "utext_openUTF8"}, +// {cast(void**) &utext_openUChars, "utext_openUChars"}, +// {cast(void**) &utext_openUnicodeString, "utext_openUnicodeString"}, +// {cast(void**) &utext_openConstUnicodeString, "utext_openConstUnicodeString"}, +// {cast(void**) &utext_openReplaceable, "utext_openReplaceable"}, +// {cast(void**) &utext_openCharacterIterator, "utext_openCharacterIterator"}, +// {cast(void**) &utext_clone, "utext_clone"}, +// {cast(void**) &utext_equals, "utext_equals"}, +// {cast(void**) &utext_nativeLength, "utext_nativeLength"}, +// {cast(void**) &utext_isLengthExpensive, "utext_isLengthExpensive"}, +// {cast(void**) &utext_char32At, "utext_char32At"}, +// {cast(void**) &utext_current32, "utext_current32"}, +// {cast(void**) &utext_next32, "utext_next32"}, +// {cast(void**) &utext_next32From, "utext_next32From"}, +// {cast(void**) &utext_previous32, "utext_previous32"}, +// {cast(void**) &utext_previous32From, "utext_previous32From"}, +// {cast(void**) &utext_setNativeIndex, "utext_setNativeIndex"}, +// {cast(void**) &utext_moveIndex32, "utext_moveIndex32"}, +// {cast(void**) &utext_getPreviousNativeIndex, "utext_getPreviousNativeIndex"}, +// {cast(void**) &utext_extract, "utext_extract"}, +// {cast(void**) &utext_isWritable, "utext_isWritable"}, +// {cast(void**) &utext_hasMetaData, "utext_hasMetaData"}, +// {cast(void**) &utext_replace, "utext_replace"}, +// {cast(void**) &utext_copy, "utext_copy"}, +// {cast(void**) &utext_freeze, "utext_freeze"}, +// {cast(void**) &utext_setup, "utext_setup"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (ICU.icuuc, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UTimeZone.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UTimeZone.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,264 @@ +/******************************************************************************* + + @file UTimeZone.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UTimeZone; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString, + com.ibm.icu.mangoicu.UEnumeration; +private import java.lang.util; + +/******************************************************************************* + + A representation of a TimeZone. Unfortunately, ICU does not expose + this as a seperate entity from the C-API, so we have to make do + with an approximation instead. + +*******************************************************************************/ + +struct UTimeZone +{ + public CString16 name; + + public static UTimeZone Default = {null}; + public static UTimeZone Gmt = {"Etc/GMT"}; + public static UTimeZone Greenwich = {"Etc/Greenwich"}; + public static UTimeZone Uct = {"Etc/UCT"}; + public static UTimeZone Utc = {"Etc/UTC"}; + public static UTimeZone Universal = {"Etc/Universal"}; + + public static UTimeZone GmtPlus0 = {"Etc/GMT+0"}; + public static UTimeZone GmtPlus1 = {"Etc/GMT+1"}; + public static UTimeZone GmtPlus2 = {"Etc/GMT+2"}; + public static UTimeZone GmtPlus3 = {"Etc/GMT+3"}; + public static UTimeZone GmtPlus4 = {"Etc/GMT+4"}; + public static UTimeZone GmtPlus5 = {"Etc/GMT+5"}; + public static UTimeZone GmtPlus6 = {"Etc/GMT+6"}; + public static UTimeZone GmtPlus7 = {"Etc/GMT+7"}; + public static UTimeZone GmtPlus8 = {"Etc/GMT+8"}; + public static UTimeZone GmtPlus9 = {"Etc/GMT+9"}; + public static UTimeZone GmtPlus10 = {"Etc/GMT+10"}; + public static UTimeZone GmtPlus11 = {"Etc/GMT+11"}; + public static UTimeZone GmtPlus12 = {"Etc/GMT+12"}; + + public static UTimeZone GmtMinus0 = {"Etc/GMT-0"}; + public static UTimeZone GmtMinus1 = {"Etc/GMT-1"}; + public static UTimeZone GmtMinus2 = {"Etc/GMT-2"}; + public static UTimeZone GmtMinus3 = {"Etc/GMT-3"}; + public static UTimeZone GmtMinus4 = {"Etc/GMT-4"}; + public static UTimeZone GmtMinus5 = {"Etc/GMT-5"}; + public static UTimeZone GmtMinus6 = {"Etc/GMT-6"}; + public static UTimeZone GmtMinus7 = {"Etc/GMT-7"}; + public static UTimeZone GmtMinus8 = {"Etc/GMT-8"}; + public static UTimeZone GmtMinus9 = {"Etc/GMT-9"}; + public static UTimeZone GmtMinus10 = {"Etc/GMT-10"}; + public static UTimeZone GmtMinus11 = {"Etc/GMT-11"}; + public static UTimeZone GmtMinus12 = {"Etc/GMT-12"}; + + /*********************************************************************** + + Get the default time zone. + + ***********************************************************************/ + + static void getDefault (inout UTimeZone zone) + { + uint format (wchar* dst, uint length, inout ICU.UErrorCode e) + { + return ucal_getDefaultTimeZone (dst, length, e); + } + + UString s = new UString(64); + s.format (&format, "failed to get default time zone"); + zone.name = s.get(); + } + + /*********************************************************************** + + Set the default time zone. + + ***********************************************************************/ + + static void setDefault (inout UTimeZone zone) + { + ICU.UErrorCode e; + + ucal_setDefaultTimeZone (ICU.toString (zone.name), e); + ICU.testError (e, "failed to set default time zone"); + } + + /*********************************************************************** + + Return the amount of time in milliseconds that the clock + is advanced during daylight savings time for the given + time zone, or zero if the time zone does not observe daylight + savings time + + ***********************************************************************/ + + static uint getDSTSavings (inout UTimeZone zone) + { + ICU.UErrorCode e; + + uint x = ucal_getDSTSavings (ICU.toString (zone.name), e); + ICU.testError (e, "failed to get DST savings"); + return x; + } + + + /********************************************************************** + + Iterate over the available timezone names + + **********************************************************************/ + + static int opApply (int delegate(inout wchar[] element) dg) + { + ICU.UErrorCode e; + wchar[] name; + int result; + + void* h = ucal_openTimeZones (e); + ICU.testError (e, "failed to open timeszone iterator"); + + UEnumeration zones = new UEnumeration (cast(UEnumeration.Handle) h); + while (zones.next(name) && (result = dg(name)) != 0) {} + delete zones; + return result; + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + void* function (inout ICU.UErrorCode) ucal_openTimeZones; + uint function (wchar*, uint, inout ICU.UErrorCode) ucal_getDefaultTimeZone; + void function (wchar*, inout ICU.UErrorCode) ucal_setDefaultTimeZone; + uint function (wchar*, inout ICU.UErrorCode) ucal_getDSTSavings; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucal_openTimeZones, "ucal_openTimeZones"}, + {cast(void**) &ucal_getDefaultTimeZone, "ucal_getDefaultTimeZone"}, + {cast(void**) &ucal_setDefaultTimeZone, "ucal_setDefaultTimeZone"}, + {cast(void**) &ucal_getDSTSavings, "ucal_getDSTSavings"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (ICU.icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/mangoicu/UTransform.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UTransform.d Sun Apr 19 13:49:38 2009 +0200 @@ -0,0 +1,239 @@ +/******************************************************************************* + + @file UTransform.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module com.ibm.icu.mangoicu.UTransform; + +private import com.ibm.icu.mangoicu.ICU, + com.ibm.icu.mangoicu.UString; + +/******************************************************************************* + + See + this page for full details. + +*******************************************************************************/ + +class UTransform : ICU +{ + private Handle handle; + + enum Direction + { + Forward, + Reverse + } + + + /*********************************************************************** + + ***********************************************************************/ + + this (UStringView id) + { + UErrorCode e; + + handle = utrans_openU (id.get.ptr, id.len, 0, null, 0, null, e); + testError (e, "failed to open ID transform"); + } + + /*********************************************************************** + + ***********************************************************************/ + + this (UStringView rule, Direction dir) + { + UErrorCode e; + + handle = utrans_openU (null, 0, dir, rule.get.ptr, rule.len, null, e); + testError (e, "failed to open rule-based transform"); + } + + /*********************************************************************** + + ***********************************************************************/ + + ~this () + { + utrans_close (handle); + } + + /*********************************************************************** + + ***********************************************************************/ + + UStringView getID () + { + uint len; + wchar *s = utrans_getUnicodeID (handle, len); + return new UStringView (s[0..len]); + } + + /*********************************************************************** + + ***********************************************************************/ + + UTransform setFilter (UStringView filter) + { + UErrorCode e; + + if (filter.length) + utrans_setFilter (handle, filter.get.ptr, filter.len, e); + else + utrans_setFilter (handle, null, 0, e); + + testError (e, "failed to set transform filter"); + return this; + } + + /*********************************************************************** + + ***********************************************************************/ + + UTransform execute (UString text) + { + UErrorCode e; + uint textLen = text.len; + + utrans_transUChars (handle, text.get.ptr, &textLen, text.content.length, 0, &text.len, e); + testError (e, "failed to execute transform"); + return this; + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, uint, wchar*, uint, void*, inout UErrorCode) utrans_openU; + void function (Handle) utrans_close; + wchar* function (Handle, inout uint) utrans_getUnicodeID; + void function (Handle, wchar*, uint, inout UErrorCode) utrans_setFilter; + void function (Handle, wchar*, uint*, uint, uint, uint*, inout UErrorCode) utrans_transUChars; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &utrans_openU, "utrans_openU"}, + {cast(void**) &utrans_close, "utrans_close"}, + {cast(void**) &utrans_getUnicodeID, "utrans_getUnicodeID"}, + {cast(void**) &utrans_setFilter, "utrans_setFilter"}, + {cast(void**) &utrans_transUChars, "utrans_transUChars"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/text/BreakIterator.d --- a/com.ibm.icu/src/com/ibm/icu/text/BreakIterator.d Sun Apr 19 12:22:47 2009 +0200 +++ b/com.ibm.icu/src/com/ibm/icu/text/BreakIterator.d Sun Apr 19 13:49:38 2009 +0200 @@ -9,47 +9,47 @@ public static final int DONE = 0; public static BreakIterator getLineInstance() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public void setText(String line) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); } public int following(int currOffset) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return 0; } public int next() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return 0; } public static BreakIterator getWordInstance() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public int preceding(int position) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return 0; } public void setText(CharacterIterator docIter) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); } public bool isBoundary(int position) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return false; } public int first() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return 0; } diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/text/CollationKey.d --- a/com.ibm.icu/src/com/ibm/icu/text/CollationKey.d Sun Apr 19 12:22:47 2009 +0200 +++ b/com.ibm.icu/src/com/ibm/icu/text/CollationKey.d Sun Apr 19 13:49:38 2009 +0200 @@ -6,7 +6,7 @@ public class CollationKey { public int compareTo(CollationKey descriptionKey) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return 0; } diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/text/Collator.d --- a/com.ibm.icu/src/com/ibm/icu/text/Collator.d Sun Apr 19 12:22:47 2009 +0200 +++ b/com.ibm.icu/src/com/ibm/icu/text/Collator.d Sun Apr 19 13:49:38 2009 +0200 @@ -9,23 +9,23 @@ public class Collator : Comparator { public static Collator getInstance() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public int compare(Object label, Object label2) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return 0; } //FIXME missing API //public static Collator getInstance(Locale default1) { - // // TODO Auto-generated method stub + // implMissing(__FILE__, __LINE__); // return null; //} public CollationKey getCollationKey(String attributeValue) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/text/DateFormat.d --- a/com.ibm.icu/src/com/ibm/icu/text/DateFormat.d Sun Apr 19 12:22:47 2009 +0200 +++ b/com.ibm.icu/src/com/ibm/icu/text/DateFormat.d Sun Apr 19 13:49:38 2009 +0200 @@ -12,42 +12,42 @@ public static const int MEDIUM = 0; public static DateFormat getDateInstance() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public String format(Date date) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public static DateFormat getTimeInstance() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public static DateFormat getDateTimeInstance(int l, int m) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public static DateFormat getTimeInstance(int s) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public static DateFormat getDateInstance(int dateFormat) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public Date parse(String str, ParsePosition pos) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public String format(Long long2) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/text/MessageFormat.d --- a/com.ibm.icu/src/com/ibm/icu/text/MessageFormat.d Sun Apr 19 12:22:47 2009 +0200 +++ b/com.ibm.icu/src/com/ibm/icu/text/MessageFormat.d Sun Apr 19 13:49:38 2009 +0200 @@ -5,16 +5,16 @@ public class MessageFormat { public this(String taskList_line) { - // TODO Auto-generated constructor stub + implMissing(__FILE__, __LINE__); } public static String format(String format, Object[] args) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public String format(Object[] objects) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/text/NumberFormat.d --- a/com.ibm.icu/src/com/ibm/icu/text/NumberFormat.d Sun Apr 19 12:22:47 2009 +0200 +++ b/com.ibm.icu/src/com/ibm/icu/text/NumberFormat.d Sun Apr 19 13:49:38 2009 +0200 @@ -9,42 +9,42 @@ public class NumberFormat { public static NumberFormat getIntegerInstance() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public static NumberFormat getNumberInstance() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public String format(long longValue) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public String format(double doubleValue) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public String format(BigInteger number) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public Number parse(String source, ParsePosition position) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public String format(Number minValue) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public static DateFormat getInstance() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/text/SimpleDateFormat.d --- a/com.ibm.icu/src/com/ibm/icu/text/SimpleDateFormat.d Sun Apr 19 12:22:47 2009 +0200 +++ b/com.ibm.icu/src/com/ibm/icu/text/SimpleDateFormat.d Sun Apr 19 13:49:38 2009 +0200 @@ -6,7 +6,7 @@ public class SimpleDateFormat : DateFormat { public this(String string) { - // TODO Auto-generated constructor stub + implMissing(__FILE__, __LINE__); } } diff -r 2755ef2c8ef8 -r ebefa5c2eab4 com.ibm.icu/src/com/ibm/icu/util/Calendar.d --- a/com.ibm.icu/src/com/ibm/icu/util/Calendar.d Sun Apr 19 12:22:47 2009 +0200 +++ b/com.ibm.icu/src/com/ibm/icu/util/Calendar.d Sun Apr 19 13:49:38 2009 +0200 @@ -7,12 +7,12 @@ public static final int YEAR = 0; public static Calendar getInstance() { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return null; } public int get(int year2) { - // TODO Auto-generated method stub + implMissing(__FILE__, __LINE__); return 0; } diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.jface.text/src/org/eclipse/jface/internal/text/html/HTMLMessages.d --- a/org.eclipse.jface.text/src/org/eclipse/jface/internal/text/html/HTMLMessages.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.jface.text/src/org/eclipse/jface/internal/text/html/HTMLMessages.d Sun Apr 19 13:49:38 2009 +0200 @@ -24,9 +24,11 @@ import java.lang.all; +import java.util.MissingResourceException; import java.util.ResourceBundle; -import java.util.MissingResourceException; -import java.text.MessageFormat; + +import com.ibm.icu.text.MessageFormat; + /** diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.jface.text/src/org/eclipse/jface/internal/text/link/contentassist/ContentAssistMessages.d --- a/org.eclipse.jface.text/src/org/eclipse/jface/internal/text/link/contentassist/ContentAssistMessages.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.jface.text/src/org/eclipse/jface/internal/text/link/contentassist/ContentAssistMessages.d Sun Apr 19 13:49:38 2009 +0200 @@ -25,9 +25,10 @@ import java.lang.all; +import java.util.MissingResourceException; import java.util.ResourceBundle; -import java.util.MissingResourceException; -import java.text.MessageFormat; + +import com.ibm.icu.text.MessageFormat; /** diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.jface.text/src/org/eclipse/jface/internal/text/link/contentassist/LineBreakingReader.d --- a/org.eclipse.jface.text/src/org/eclipse/jface/internal/text/link/contentassist/LineBreakingReader.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.jface.text/src/org/eclipse/jface/internal/text/link/contentassist/LineBreakingReader.d Sun Apr 19 13:49:38 2009 +0200 @@ -27,10 +27,11 @@ import java.io.Reader; import java.util.Set; import java.io.BufferedReader; -import java.mangoicu.UBreakIterator; import org.eclipse.swt.graphics.GC; +import com.ibm.icu.text.BreakIterator; + /* * Not a real reader. Could change if requested */ @@ -43,7 +44,7 @@ private String fLine; private int fOffset; - private UBreakIterator fLineBreakIterator; + private BreakIterator fLineBreakIterator; private bool fBreakWords; /** @@ -59,7 +60,7 @@ fMaxWidth= maxLineWidth; fOffset= 0; fLine= null; - fLineBreakIterator= UBreakIterator.openLineIterator( ULocale.Default); + fLineBreakIterator= BreakIterator.getLineInstance(); fBreakWords= true; } @@ -90,7 +91,7 @@ } int breakOffset= findNextBreakOffset(fOffset); String res; - if (breakOffset !is UBreakIterator.DONE) { + if (breakOffset !is BreakIterator.DONE) { res= fLine.substring(fOffset, breakOffset); fOffset= findWordBegin(breakOffset); if (fOffset is fLine.length()) { @@ -106,7 +107,7 @@ private int findNextBreakOffset(int currOffset) { int currWidth= 0; int nextOffset= fLineBreakIterator.following(currOffset); - while (nextOffset !is UBreakIterator.DONE) { + while (nextOffset !is BreakIterator.DONE) { String word= fLine.substring(currOffset, nextOffset); int wordWidth= fGC.textExtent(word).x; int nextWidth= wordWidth + currWidth; diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.jface.text/src/org/eclipse/jface/text/DefaultTextDoubleClickStrategy.d --- a/org.eclipse.jface.text/src/org/eclipse/jface/text/DefaultTextDoubleClickStrategy.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.jface.text/src/org/eclipse/jface/text/DefaultTextDoubleClickStrategy.d Sun Apr 19 13:49:38 2009 +0200 @@ -160,19 +160,19 @@ import java.text.CharacterIterator; -import java.mangoicu.UBreakIterator; +import com.ibm.icu.text.BreakIterator; /** * Standard implementation of * {@link org.eclipse.jface.text.ITextDoubleClickStrategy}. *

- * Selects words using java.text.UBreakIterator for the default + * Selects words using java.text.BreakIterator for the default * locale.

*

* This class is not intended to be subclassed. *

* - * @see java.text.UBreakIterator + * @see java.text.BreakIterator * @noextend This class is not intended to be subclassed by clients. */ public class DefaultTextDoubleClickStrategy : ITextDoubleClickStrategy { @@ -342,19 +342,17 @@ if (position is line.getOffset() + line.getLength()) return; - //mangoicu -// fDocIter.setDocument(document, line); - String strLine = document.get( line.getOffset(), line.getLength() ); - UBreakIterator breakIter= UBreakIterator.openWordIterator( ULocale.Default, strLine/+fDocIter+/ ); + fDocIter.setDocument(document, line); + BreakIterator breakIter= BreakIterator.getWordInstance(); + breakIter.setText(fDocIter); - //int start= breakIter.preceding(position); - int start= breakIter.previous(position); // mangoicu - if (start is UBreakIterator.DONE) + int start= breakIter.preceding(position); + if (start is BreakIterator.DONE) start= line.getOffset(); int end= breakIter.following(position); - if (end is UBreakIterator.DONE) + if (end is BreakIterator.DONE) end= line.getOffset() + line.getLength(); if (breakIter.isBoundary(position)) { diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.jface.text/src/org/eclipse/jface/text/hyperlink/URLHyperlink.d --- a/org.eclipse.jface.text/src/org/eclipse/jface/text/hyperlink/URLHyperlink.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.jface.text/src/org/eclipse/jface/text/hyperlink/URLHyperlink.d Sun Apr 19 13:49:38 2009 +0200 @@ -29,12 +29,11 @@ import java.lang.all; import java.text.MessageFormat; -import org.eclipse.swt.program.Program; import org.eclipse.core.runtime.Assert; import org.eclipse.jface.text.IRegion; - +import org.eclipse.swt.program.Program; - +import com.ibm.icu.text.MessageFormat; /** diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.jface.text/src/org/eclipse/jface/text/source/JFaceTextMessages.d --- a/org.eclipse.jface.text/src/org/eclipse/jface/text/source/JFaceTextMessages.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.jface.text/src/org/eclipse/jface/text/source/JFaceTextMessages.d Sun Apr 19 13:49:38 2009 +0200 @@ -16,9 +16,10 @@ import java.lang.all; +import java.util.MissingResourceException; import java.util.ResourceBundle; -import java.util.MissingResourceException; -import java.text.MessageFormat; + +import com.ibm.icu.text.MessageFormat; /** * Accessor for the JFaceTextMessages.properties file in diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.jface.text/src/org/eclipse/jface/text/templates/JFaceTextTemplateMessages.d --- a/org.eclipse.jface.text/src/org/eclipse/jface/text/templates/JFaceTextTemplateMessages.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.jface.text/src/org/eclipse/jface/text/templates/JFaceTextTemplateMessages.d Sun Apr 19 13:49:38 2009 +0200 @@ -14,9 +14,10 @@ import java.lang.all; +import java.util.MissingResourceException; import java.util.ResourceBundle; -import java.util.MissingResourceException; -import java.text.MessageFormat; + +import com.ibm.icu.text.MessageFormat; /** * @since 3.0 diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.jface.text/src/org/eclipse/jface/text/templates/persistence/TemplatePersistenceMessages.d --- a/org.eclipse.jface.text/src/org/eclipse/jface/text/templates/persistence/TemplatePersistenceMessages.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.jface.text/src/org/eclipse/jface/text/templates/persistence/TemplatePersistenceMessages.d Sun Apr 19 13:49:38 2009 +0200 @@ -19,9 +19,10 @@ import java.lang.all; +import java.util.MissingResourceException; import java.util.ResourceBundle; -import java.util.MissingResourceException; -import java.text.MessageFormat; + +import com.ibm.icu.text.MessageFormat; /** * @since 3.0 diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.text/src/org/eclipse/jface/text/TextMessages.d --- a/org.eclipse.text/src/org/eclipse/jface/text/TextMessages.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.text/src/org/eclipse/jface/text/TextMessages.d Sun Apr 19 13:49:38 2009 +0200 @@ -14,9 +14,10 @@ import java.lang.all; +import java.util.MissingResourceException; import java.util.ResourceBundle; -import java.util.MissingResourceException; -import java.text.MessageFormat; + +import com.ibm.icu.text.MessageFormat; /** diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.text/src/org/eclipse/jface/text/templates/GlobalTemplateVariables.d --- a/org.eclipse.text/src/org/eclipse/jface/text/templates/GlobalTemplateVariables.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.text/src/org/eclipse/jface/text/templates/GlobalTemplateVariables.d Sun Apr 19 13:49:38 2009 +0200 @@ -30,8 +30,8 @@ import java.lang.all; import java.util.Set; -// import com.ibm.icu.text.DateFormat; -// import com.ibm.icu.util.Calendar; +import com.ibm.icu.text.DateFormat; +import com.ibm.icu.util.Calendar; /** * Global variables which are available in any context. @@ -150,9 +150,7 @@ super("year", TextTemplateMessages.getString("GlobalVariables.variable.description.year")); //$NON-NLS-1$ //$NON-NLS-2$ } protected String resolve(TemplateContext context) { - implMissing(__FILE__,__LINE__); - return null; - //return Integer.toString(Calendar.getInstance().get(Calendar.YEAR)); + return Integer.toString(Calendar.getInstance().get(Calendar.YEAR)); } } @@ -171,9 +169,7 @@ * {@inheritDoc} */ protected String resolve(TemplateContext context) { - implMissing(__FILE__,__LINE__); - return null; - //return DateFormat.getTimeInstance().format(new java.util.Date()); + return DateFormat.getTimeInstance().format(new java.util.Date()); } } diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.text/src/org/eclipse/jface/text/templates/TextTemplateMessages.d --- a/org.eclipse.text/src/org/eclipse/jface/text/templates/TextTemplateMessages.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.text/src/org/eclipse/jface/text/templates/TextTemplateMessages.d Sun Apr 19 13:49:38 2009 +0200 @@ -14,9 +14,10 @@ import java.lang.all; +import java.util.MissingResourceException; import java.util.ResourceBundle; -import java.util.MissingResourceException; -import java.text.MessageFormat; + +import com.ibm.icu.text.MessageFormat; /* * @since 3.0 diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.text/src/org/eclipse/text/edits/TextEditMessages.d --- a/org.eclipse.text/src/org/eclipse/text/edits/TextEditMessages.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.text/src/org/eclipse/text/edits/TextEditMessages.d Sun Apr 19 13:49:38 2009 +0200 @@ -37,9 +37,11 @@ import java.lang.all; +import java.util.MissingResourceException; import java.util.ResourceBundle; -import java.util.MissingResourceException; -import java.text.MessageFormat; + +import com.ibm.icu.text.MessageFormat; + class TextEditMessages { diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.text/src/org/eclipse/text/undo/UndoMessages.d --- a/org.eclipse.text/src/org/eclipse/text/undo/UndoMessages.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.text/src/org/eclipse/text/undo/UndoMessages.d Sun Apr 19 13:49:38 2009 +0200 @@ -21,9 +21,10 @@ import java.lang.all; +import java.util.MissingResourceException; import java.util.ResourceBundle; -import java.util.MissingResourceException; -import java.text.MessageFormat; + +import com.ibm.icu.text.MessageFormat; /** * Helper class to get NLSed messages. diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.ui.forms/src/org/eclipse/ui/internal/forms/widgets/FormUtil.d --- a/org.eclipse.ui.forms/src/org/eclipse/ui/internal/forms/widgets/FormUtil.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.ui.forms/src/org/eclipse/ui/internal/forms/widgets/FormUtil.d Sun Apr 19 13:49:38 2009 +0200 @@ -13,8 +13,8 @@ *******************************************************************************/ module org.eclipse.ui.internal.forms.widgets.FormUtil; - -// import com.ibm.icu.text.BreakIterator; +import java.lang.all; +import java.util.Set; import org.eclipse.swt.SWT; import org.eclipse.swt.custom.ScrolledComposite; @@ -40,10 +40,7 @@ import org.eclipse.ui.forms.widgets.FormToolkit; import org.eclipse.ui.forms.widgets.ILayoutExtension; -import java.lang.all; -import java.util.Set; - -import java.mangoicu.UBreakIterator; +import com.ibm.icu.text.BreakIterator; public class FormUtil { @@ -109,12 +106,12 @@ } public static int computeMinimumWidth(GC gc, String text) { - auto wb = UBreakIterator.openWordIterator( ULocale.Default, text ); - scope(exit) wb.close(); + BreakIterator wb = BreakIterator.getWordInstance(); + wb.setText(text); int last = 0; int width = 0; - for (int loc = wb.first(); loc !is UBreakIterator.Done; loc = wb.next()) { + for (int loc = wb.first(); loc !is BreakIterator.DONE; loc = wb.next()) { String word = text.substring(last, loc); Point extent = gc.textExtent(word); width = Math.max(width, extent.x); @@ -127,8 +124,8 @@ } public static Point computeWrapSize(GC gc, String text, int wHint) { - auto wb = UBreakIterator.openWordIterator( ULocale.Default, text ); - scope(exit) wb.close(); + BreakIterator wb = BreakIterator.getWordInstance(); + wb.setText(text); FontMetrics fm = gc.getFontMetrics(); int lineHeight = fm.getHeight(); @@ -136,7 +133,7 @@ int last = 0; int height = lineHeight; int maxWidth = 0; - for (int loc = wb.first(); loc !is UBreakIterator.Done; loc = wb.next()) { + for (int loc = wb.first(); loc !is BreakIterator.DONE; loc = wb.next()) { String word = text.substring(saved, loc); Point extent = gc.textExtent(word); if (extent.x > wHint) { @@ -166,8 +163,8 @@ public static void paintWrapText(GC gc, String text, Rectangle bounds, bool underline) { - auto wb = UBreakIterator.openWordIterator( ULocale.Default, text ); - scope(exit) wb.close(); + BreakIterator wb = BreakIterator.getWordInstance(); + wb.setText(text); FontMetrics fm = gc.getFontMetrics(); int lineHeight = fm.getHeight(); int descent = fm.getDescent(); @@ -177,7 +174,7 @@ int y = bounds.y; int width = bounds.width; - for (int loc = wb.first(); loc !is UBreakIterator.Done; loc = wb.next()) { + for (int loc = wb.first(); loc !is BreakIterator.DONE; loc = wb.next()) { String line = text.substring(saved, loc); Point extent = gc.textExtent(line); diff -r 2755ef2c8ef8 -r ebefa5c2eab4 org.eclipse.ui.forms/src/org/eclipse/ui/internal/forms/widgets/TextSegment.d --- a/org.eclipse.ui.forms/src/org/eclipse/ui/internal/forms/widgets/TextSegment.d Sun Apr 19 12:22:47 2009 +0200 +++ b/org.eclipse.ui.forms/src/org/eclipse/ui/internal/forms/widgets/TextSegment.d Sun Apr 19 13:49:38 2009 +0200 @@ -12,12 +12,17 @@ *******************************************************************************/ module org.eclipse.ui.internal.forms.widgets.TextSegment; +import java.lang.all; + import org.eclipse.ui.internal.forms.widgets.ParagraphSegment; import org.eclipse.ui.internal.forms.widgets.Locator; import org.eclipse.ui.internal.forms.widgets.SelectionData; import org.eclipse.ui.internal.forms.widgets.FormTextModel; -// import com.ibm.icu.text.BreakIterator; +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.Vector; +import java.util.Set; import org.eclipse.swt.SWT; import org.eclipse.swt.graphics.Color; @@ -27,12 +32,7 @@ import org.eclipse.swt.graphics.Point; import org.eclipse.swt.graphics.Rectangle; -import java.lang.all; -import java.util.Vector; -import java.util.Hashtable; -import java.util.ArrayList; -import java.util.Set; -import java.mangoicu.UBreakIterator; +import com.ibm.icu.text.BreakIterator; /** * @version 1.0 @@ -720,10 +720,10 @@ if (textFragments !is null) return; ArrayList list = new ArrayList(); - auto wb = UBreakIterator.openLineIterator( ULocale.Default, getText() ); - scope(exit) wb.close(); + BreakIterator wb = BreakIterator.getLineInstance(); + wb.setText(getText()); int cursor = 0; - for (int loc = wb.first(); loc !is UBreakIterator.Done; loc = wb.next()) { + for (int loc = wb.first(); loc !is BreakIterator.DONE; loc = wb.next()) { if (loc is 0) continue; String word = text.substring(cursor, loc); diff -r 2755ef2c8ef8 -r ebefa5c2eab4 rakefile --- a/rakefile Sun Apr 19 12:22:47 2009 +0200 +++ b/rakefile Sun Apr 19 13:49:38 2009 +0200 @@ -76,10 +76,13 @@ LIBNAMES_CORE = [ "org.eclipse.core.runtime", "org.eclipse.core.commands", + "org.eclipse.core.databinding", "org.eclipse.core.jobs" ] LIBNAMES_JFACE = [ "org.eclipse.jface" ] +LIBNAMES_JFACEBIND = [ "org.eclipse.jface.databinding" ] + LIBNAMES_JFACETEXT = [ "org.eclipse.text", "org.eclipse.jface.text.projection", "org.eclipse.jface.text", ] @@ -313,25 +316,17 @@ desc "Build Eclipse Core" task :core do + buildTree( "com.ibm.icu", "src", "res" ) buildTree( "org.eclipse.core.runtime", "src", "res" ) buildTree( "org.eclipse.core.commands", "src", "res" ) + buildTree( "org.eclipse.core.databinding", "src", "res" ) buildTree( "org.eclipse.core.jobs", "src", "res" ) end -desc "Build Jface Databinding" -task :jfacebind do - buildTree( "org.eclipse.jface.databinding", "src", "res" ) -end -desc "Build Databinding" -task :bind do - buildTree( "com.ibm.icu", "src", "res" ) - buildTree( "org.eclipse.core.databinding", "src", "res" ) - buildTree( "org.eclipse.jface.databinding", "src", "res" ) -end - desc "Build JFace" task :jface do buildTree( "org.eclipse.jface", "src", "res" ) + buildTree( "org.eclipse.jface.databinding", "src", "res" ) end desc "Build Eclipse Tools" @@ -429,4 +424,35 @@ end +desc "Build JFace Databinding Snippet Collection" +task :bindsnippets, :explicit_snp do | t, args | + PREFIX = "Bind" + SRCPATH = "src" + BASEPATH = "org.eclipse.jface.examples.databinding" + libnames = LIBNAMES_BASIC + LIBNAMES_SWT + LIBNAMES_EQUINOX + + LIBNAMES_CORE + LIBNAMES_JFACE + LIBNAMES_JFACEBIND + snps_exclude = [] + allsnippets = FileList[ File.join(BASEPATH, SRCPATH, "**/*.d" )] + if args.explicit_snp != nil + snpname = args.explicit_snp + puts "Building jfacesnippets[#{snpname}]" + buildApp( BASEPATH, SRCPATH, "res", "", PREFIX, args.explicit_snp, nil, libnames ) + else + allsnippets.each do | snp | + if snp =~ /.*[\\\/](\w+)\.d$/ + snpname = $1 + puts "Building jfacesnippets[#{snpname}]" + if !snps_exclude.include? snpname + buildApp( BASEPATH, SRCPATH, "res", "", PREFIX, snpname, nil, libnames ) + end + else + puts snp + raise "Name does not match #{snp}" + end + end + end +end + + +