# HG changeset patch # User Frank Benoit # Date 1214168251 -7200 # Node ID 040da1cb0d76d692acfaa90527e0680da3137fcf # Parent cd18fa3b71f112697a7c052095411178a2dae2a6 Add a local copy of the mango ICU binding to work out the utf8 usability. Will hopefully go back into mango. diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/ICU.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/ICU.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,688 @@ +/******************************************************************************* + + @file ICU.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version; October 2004 + Updated to ICU v3.2; March 2005 + + @author Kris + John Reimer + Anders F Bjorklund (Darwin patches) + + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.ICU; + +/******************************************************************************* + + Library version identifiers + +*******************************************************************************/ + +version (ICU30) + { + private static const final char[] ICULib = "30"; + private static const final char[] ICUSig = "_3_0\0"; + } +version (ICU32) + { + private static const final char[] ICULib = "32"; + private static const final char[] ICUSig = "_3_2\0"; + } +version (ICU34) + { + private static const final char[] ICULib = "34"; + private static const final char[] ICUSig = "_3_4\0"; + } +version (ICU36) + { + private static const final char[] ICULib = "36"; + private static const final char[] ICUSig = "_3_6\0"; + } +else + { + private static const final char[] ICULib = "38"; + private static const final char[] ICUSig = "_3_8\0"; + } + +/******************************************************************************* + +*******************************************************************************/ + +private static extern (C) uint strlen (char *s); +private static extern (C) uint wcslen (wchar *s); + + +/******************************************************************************* + + Some low-level routines to help bind the ICU C-API to D. + +*******************************************************************************/ + +protected class ICU +{ + /*********************************************************************** + + The library names to load within the target environment + + ***********************************************************************/ + + version (Win32) + { + protected static char[] icuuc = "icuuc"~ICULib~".dll"; + protected static char[] icuin = "icuin"~ICULib~".dll"; + } + else + version (linux) + { + protected static char[] icuuc = "libicuuc.so."~ICULib; + protected static char[] icuin = "libicui18n.so."~ICULib; + } + else + version (darwin) + { + protected static char[] icuuc = "libicuuc.dylib."~ICULib; + protected static char[] icuin = "libicui18n.dylib."~ICULib; + } + else + { + static assert (false); + } + + /*********************************************************************** + + Use this for the primary argument-type to most ICU functions + + ***********************************************************************/ + + protected typedef void* Handle; + + /*********************************************************************** + + Parse-error filled in by several functions + + ***********************************************************************/ + + public struct ParseError + { + int line, + offset; + wchar[16] preContext, + postContext; + } + + /*********************************************************************** + + The binary form of a version on ICU APIs is an array of + four bytes + + ***********************************************************************/ + + public struct Version + { + ubyte[4] info; + } + + /*********************************************************************** + + ICU error codes (the ones which are referenced) + + ***********************************************************************/ + + protected enum Error:int + { + OK, + BufferOverflow=15 + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final bool isError (Error e) + { + return e > 0; + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final void exception (char[] msg) + { + throw new ICUException (msg); + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final void testError (Error e, char[] msg) + { + if (e > 0) + exception (msg); + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final char* toString (char[] string) + { + static char[] empty = ""; + + if (! string.length) + return (string.ptr) ? empty.ptr : null; + +// if (* (&string[0] + string.length)) + { + // Need to make a copy + char[] copy = new char [string.length + 1]; + copy [0..string.length] = string; + copy [string.length] = 0; + string = copy; + } + return string.ptr; + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final wchar* toString (wchar[] string) + { + static wchar[] empty = ""; + + if (! string.length) + return (string.ptr) ? empty.ptr : null; + +// if (* (&string[0] + string.length)) + { + // Need to make a copy + wchar[] copy = new wchar [string.length + 1]; + copy [0..string.length] = string; + copy [string.length] = 0; + string = copy; + } + return string.ptr; + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final uint length (char* s) + { + return strlen (s); + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final uint length (wchar* s) + { + return wcslen (s); + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final char[] toArray (char* s) + { + if (s) + return s[0..strlen (s)]; + return null; + } + + /*********************************************************************** + + ***********************************************************************/ + + protected static final wchar[] toArray (wchar* s) + { + if (s) + return s[0..wcslen (s)]; + return null; + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class ICUException : Exception +{ + /*********************************************************************** + + Construct exception with the provided text string + + ***********************************************************************/ + + this (char[] msg) + { + super (msg); + } +} + + +/******************************************************************************* + + Function address loader for Win32 + +*******************************************************************************/ + +version (Win32) +{ + typedef void* HANDLE; + extern (Windows) HANDLE LoadLibraryA (char*); + extern (Windows) HANDLE GetProcAddress (HANDLE, char*); + extern (Windows) void FreeLibrary (HANDLE); + + /*********************************************************************** + + ***********************************************************************/ + + class FunctionLoader + { + /*************************************************************** + + ***************************************************************/ + + protected struct Bind + { + void** fnc; + char[] name; + } + + /*************************************************************** + + ***************************************************************/ + + static final void* bind (char[] library, inout Bind[] targets) + { + HANDLE lib = LoadLibraryA (ICU.toString(library)); + + foreach (Bind b; targets) + { + char[] name = b.name ~ ICUSig; + *b.fnc = GetProcAddress (lib, name.ptr); + if (*b.fnc) + {}// printf ("bound '%.*s'\n", name); + else + throw new Exception ("required " ~ name ~ " in library " ~ library); + } + return lib; + } + + /*************************************************************** + + ***************************************************************/ + + static final void unbind (void* library) + { + version (CorrectedTeardown) + FreeLibrary (cast(HANDLE) library); + } + } +} + + +/******************************************************************************* + + 2004-11-26: Added Linux shared library support -- John Reimer + +*******************************************************************************/ + +else version (linux) +{ + //Tell build to link with dl library + version(build) { pragma(link, "dl"); } + + // from include/bits/dlfcn.h on Linux + const int RTLD_LAZY = 0x00001; // Lazy function call binding + const int RTLD_NOW = 0x00002; // Immediate function call binding + const int RTLD_NOLOAD = 0x00004; // no object load + const int RTLD_DEEPBIND = 0x00008; + const int RTLD_GLOBAL = 0x00100; // make object available to whole program + + extern(C) + { + void* dlopen(char* filename, int flag); + char* dlerror(); + void* dlsym(void* handle, char* symbol); + int dlclose(void* handle); + } + + class FunctionLoader + { + /*************************************************************** + + ***************************************************************/ + + protected struct Bind + { + void** fnc; + char[] name; + } + + /*************************************************************** + + ***************************************************************/ + + static final void* bind (char[] library, inout Bind[] targets) + { + static char[] errorInfo; + // printf("the library is %s\n", ICU.toString(library)); + void* lib = dlopen(ICU.toString(library), RTLD_NOW); + + // clear the error buffer + dlerror(); + + foreach (Bind b; targets) + { + char[] name = b.name ~ ICUSig; + + *b.fnc = dlsym (lib, name.ptr); + if (*b.fnc) + {}// printf ("bound '%.*s'\n", name); + else { + // errorInfo = ICU.toArray(dlerror()); + // printf("%s", dlerror()); + throw new Exception ("required " ~ name ~ " in library " ~ library); + } + } + return lib; + } + + /*************************************************************** + + ***************************************************************/ + + static final void unbind (void* library) + { + version (CorrectedTeardown) + { + if (! dlclose (library)) + throw new Exception ("close library failed\n"); + } + } + } +} + + +/******************************************************************************* + + 2004-12-20: Added Darwin shared library support -- afb + +*******************************************************************************/ + +else version (darwin) +{ + // #include + + struct mach_header + { + uint magic; /* mach magic number identifier */ + uint cputype; /* cpu specifier */ + uint cpusubtype; /* machine specifier */ + uint filetype; /* type of file */ + uint ncmds; /* number of load commands */ + uint sizeofcmds; /* the size of all the load commands */ + uint flags; /* flags */ + } + + /* Constant for the magic field of the mach_header */ + const uint MH_MAGIC = 0xfeedface; // the mach magic number + const uint MH_CIGAM = 0xcefaedfe; // x86 variant + + // #include + + typedef void *NSObjectFileImage; + + typedef void *NSModule; + + typedef void *NSSymbol; + + enum // DYLD_BOOL: uint + { + FALSE, + TRUE + } + alias uint DYLD_BOOL; + + enum // NSObjectFileImageReturnCode: uint + { + NSObjectFileImageFailure, /* for this a message is printed on stderr */ + NSObjectFileImageSuccess, + NSObjectFileImageInappropriateFile, + NSObjectFileImageArch, + NSObjectFileImageFormat, /* for this a message is printed on stderr */ + NSObjectFileImageAccess + } + alias uint NSObjectFileImageReturnCode; + + enum // NSLinkEditErrors: uint + { + NSLinkEditFileAccessError, + NSLinkEditFileFormatError, + NSLinkEditMachResourceError, + NSLinkEditUnixResourceError, + NSLinkEditOtherError, + NSLinkEditWarningError, + NSLinkEditMultiplyDefinedError, + NSLinkEditUndefinedError + } + alias uint NSLinkEditErrors; + + extern(C) + { + NSObjectFileImageReturnCode NSCreateObjectFileImageFromFile(char *pathName, NSObjectFileImage* objectFileImage); + DYLD_BOOL NSDestroyObjectFileImage(NSObjectFileImage objectFileImage); + + mach_header * NSAddImage(char *image_name, uint options); + const uint NSADDIMAGE_OPTION_NONE = 0x0; + const uint NSADDIMAGE_OPTION_RETURN_ON_ERROR = 0x1; + const uint NSADDIMAGE_OPTION_WITH_SEARCHING = 0x2; + const uint NSADDIMAGE_OPTION_RETURN_ONLY_IF_LOADED = 0x4; + const uint NSADDIMAGE_OPTION_MATCH_FILENAME_BY_INSTALLNAME = 0x8; + + NSModule NSLinkModule(NSObjectFileImage objectFileImage, char* moduleName, uint options); + const uint NSLINKMODULE_OPTION_NONE = 0x0; + const uint NSLINKMODULE_OPTION_BINDNOW = 0x01; + const uint NSLINKMODULE_OPTION_PRIVATE = 0x02; + const uint NSLINKMODULE_OPTION_RETURN_ON_ERROR = 0x04; + const uint NSLINKMODULE_OPTION_DONT_CALL_MOD_INIT_ROUTINES = 0x08; + const uint NSLINKMODULE_OPTION_TRAILING_PHYS_NAME = 0x10; + DYLD_BOOL NSUnLinkModule(NSModule module_, uint options); + + void NSLinkEditError(NSLinkEditErrors *c, int *errorNumber, char **fileName, char **errorString); + + DYLD_BOOL NSIsSymbolNameDefined(char *symbolName); + DYLD_BOOL NSIsSymbolNameDefinedInImage(mach_header *image, char *symbolName); + NSSymbol NSLookupAndBindSymbol(char *symbolName); + NSSymbol NSLookupSymbolInModule(NSModule module_, char* symbolName); + NSSymbol NSLookupSymbolInImage(mach_header *image, char *symbolName, uint options); + const uint NSLOOKUPSYMBOLINIMAGE_OPTION_BIND = 0x0; + const uint NSLOOKUPSYMBOLINIMAGE_OPTION_BIND_NOW = 0x1; + const uint NSLOOKUPSYMBOLINIMAGE_OPTION_BIND_FULLY = 0x2; + const uint NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR = 0x4; + + void* NSAddressOfSymbol(NSSymbol symbol); + char* NSNameOfSymbol(NSSymbol symbol); + } + + + class FunctionLoader + { + /*************************************************************** + + ***************************************************************/ + + protected struct Bind + { + void** fnc; + char[] name; + } + + /*************************************************************** + + ***************************************************************/ + + private static NSModule open(char* filename) + { + NSModule mod = null; + NSObjectFileImage fileImage = null; + debug printf("Trying to load: %s\n", filename); + + NSObjectFileImageReturnCode returnCode = + NSCreateObjectFileImageFromFile(filename, &fileImage); + if(returnCode == NSObjectFileImageSuccess) + { + mod = NSLinkModule(fileImage,filename, + NSLINKMODULE_OPTION_RETURN_ON_ERROR | + NSLINKMODULE_OPTION_PRIVATE | + NSLINKMODULE_OPTION_BINDNOW); + NSDestroyObjectFileImage(fileImage); + } + else if(returnCode == NSObjectFileImageInappropriateFile) + { + NSDestroyObjectFileImage(fileImage); + /* Could be a dynamic library rather than a bundle */ + mod = cast(NSModule) NSAddImage(filename, + NSADDIMAGE_OPTION_RETURN_ON_ERROR); + } + else + { + debug printf("FileImage Failed: %d\n", returnCode); + } + return mod; + } + + private static void* symbol(NSModule mod, char* name) + { + NSSymbol symbol = null; + uint magic = (* cast(mach_header *) mod).magic; + + if ( (mod == cast(NSModule) -1) && NSIsSymbolNameDefined(name)) + /* Global context, use NSLookupAndBindSymbol */ + symbol = NSLookupAndBindSymbol(name); + else if ( ( magic == MH_MAGIC || magic == MH_CIGAM ) && + NSIsSymbolNameDefinedInImage(cast(mach_header *) mod, name)) + symbol = NSLookupSymbolInImage(cast(mach_header *) mod, name, + NSLOOKUPSYMBOLINIMAGE_OPTION_BIND | + NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR); + else + symbol = NSLookupSymbolInModule(mod, name); + + return NSAddressOfSymbol(symbol); + } + + static final void* bind (char[] library, inout Bind[] targets) + { + static char[] errorInfo; + + debug printf("the library is %s\n", ICU.toString(library)); + + void* lib = null; + static char[][] usual_suspects = [ "", "/usr/local/lib/", "/usr/lib/", + /* Fink */ "/sw/lib/", /* DarwinPorts */ "/opt/local/lib/" ]; + foreach (char[] prefix; usual_suspects) + { + lib = cast(void*) open(ICU.toString(prefix ~ library)); + if (lib != null) break; + } + if (lib == null) + { + throw new Exception ("could not open library " ~ library); + } + + // clear the error buffer + // error(); + + foreach (Bind b; targets) + { + // Note: all C functions have a underscore prefix in Mach-O symbols + char[] name = "_" ~ b.name ~ ICUSig; + + *b.fnc = symbol(cast(NSModule) lib, name.ptr); + if (*b.fnc != null) + { + debug printf ("bound '%.*s'\n", name); + } + else + { + // errorInfo = ICU.toArray(error()); + throw new Exception ("required " ~ name ~ " in library " ~ library); + } + } + return lib; + } + + /*************************************************************** + + ***************************************************************/ + + private static bool close(NSModule mod) + { + uint magic = (* cast(mach_header *) mod).magic; + if ( magic == MH_MAGIC || magic == MH_CIGAM ) + { + // Can not unlink dynamic libraries on Darwin + return true; + } + + return (NSUnLinkModule(mod, 0) == TRUE); + } + + static final void unbind (void* library) + { + version (CorrectedTeardown) + { + if (! close(cast(NSModule) library)) + throw new Exception ("close library failed\n"); + } + } + } +} + +/******************************************************************************* + + unknown platform + +*******************************************************************************/ + +else static assert(0); // need an implementation of FunctionLoader for this OS + + diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UBreakIterator.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UBreakIterator.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,564 @@ +/******************************************************************************* + + @file UBreakIterator.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UBreakIterator; + +private import dwtx.dwthelper.mangoicu.ICU; + +public import dwtx.dwthelper.mangoicu.ULocale, + dwtx.dwthelper.mangoicu.UString; + +/******************************************************************************* + +*******************************************************************************/ + +class UCharacterIterator : UBreakIterator +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale, UText text = null) + { + super (Type.Character, locale, text); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UWordIterator : UBreakIterator +{ + public enum Break + { + None = 0, + NoneLimit = 100, + Number = 100, + NumberLimit = 200, + Letter = 200, + LetterLimit = 300, + Kana = 300, + KanaLimit = 400, + Ideo = 400, + IdeoLimit = 500 + } + + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale, UText text = null) + { + super (Type.Word, locale, text); + } + + /*********************************************************************** + + Return the status from the break rule that determined + the most recently returned break position. + + ***********************************************************************/ + + void getStatus (inout Break b) + { + b = cast(Break) super.getStatus(); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class ULineIterator : UBreakIterator +{ + public enum Break + { + Soft = 0, + SoftLimit = 100, + Hard = 100, + HardLimit = 200 + } + + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale, UText text = null) + { + super (Type.Line, locale, text); + } + + /*********************************************************************** + + Return the status from the break rule that determined + the most recently returned break position. + + ***********************************************************************/ + + void getStatus (inout Break b) + { + b = cast(Break) super.getStatus(); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class USentenceIterator : UBreakIterator +{ + public enum Break + { + Term = 0, + TermLimit = 100, + Sep = 100, + Limit = 200 + } + + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale, UText text = null) + { + super (Type.Sentence, locale, text); + } + + /*********************************************************************** + + Return the status from the break rule that determined + the most recently returned break position. + + ***********************************************************************/ + + void getStatus (inout Break b) + { + b = cast(Break) super.getStatus(); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UTitleIterator : UBreakIterator +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale, UText text = null) + { + super (Type.Title, locale, text); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class URuleIterator : UBreakIterator +{ + /*********************************************************************** + + Open a new UBreakIterator for locating text boundaries + using specified breaking rules + + ***********************************************************************/ + + this (UText rules, UText text = null) + { + Error e; + + handle = ubrk_openRules (rules.get.ptr, rules.length, text.get.ptr, text.length, null, e); + testError (e, "failed to open rule iterator"); + } +} + + +/******************************************************************************* + + BreakIterator defines methods for finding the location of boundaries + in text. Pointer to a UBreakIterator maintain a current position and + scan over text returning the index of characters where boundaries occur. + + Line boundary analysis determines where a text string can be broken + when line-wrapping. The mechanism correctly handles punctuation and + hyphenated words. + + Sentence boundary analysis allows selection with correct interpretation + of periods within numbers and abbreviations, and trailing punctuation + marks such as quotation marks and parentheses. + + Word boundary analysis is used by search and replace functions, as well + as within text editing applications that allow the user to select words + with a double click. Word selection provides correct interpretation of + punctuation marks within and following words. Characters that are not + part of a word, such as symbols or punctuation marks, have word-breaks + on both sides. + + Character boundary analysis allows users to interact with characters + as they expect to, for example, when moving the cursor through a text + string. Character boundary analysis provides correct navigation of + through character strings, regardless of how the character is stored. + For example, an accented character might be stored as a base character + and a diacritical mark. What users consider to be a character can differ + between languages. + + Title boundary analysis locates all positions, typically starts of + words, that should be set to Title Case when title casing the text. + + See + this page for full details. + +*******************************************************************************/ + +private class UBreakIterator : ICU +{ + package Handle handle; + + // this is returned by next(), previous() etc ... + const uint Done = uint.max; + + /*********************************************************************** + + internal types passed to C API + + ***********************************************************************/ + + private enum Type + { + Character, + Word, + Line, + Sentence, + Title + } + + + /*********************************************************************** + + Internal use only! + + ***********************************************************************/ + + private this () + { + } + + /*********************************************************************** + + Open a new UBreakIterator for locating text boundaries for + a specified locale. A UBreakIterator may be used for detecting + character, line, word, and sentence breaks in text. + + ***********************************************************************/ + + this (Type type, inout ULocale locale, UText text) + { + Error e; + + handle = ubrk_open (type, toString(locale.name), text.get.ptr, text.length, e); + testError (e, "failed to create break iterator"); + } + + /*********************************************************************** + + Close a UBreakIterator + + ***********************************************************************/ + + ~this () + { + ubrk_close (handle); + } + + /*********************************************************************** + + Sets an existing iterator to point to a new piece of text + + ***********************************************************************/ + + void setText (UText text) + { + Error e; + ubrk_setText (handle, text.get.ptr, text.length, e); + testError (e, "failed to set iterator text"); + } + + /*********************************************************************** + + Determine the most recently-returned text boundary + + ***********************************************************************/ + + uint current () + { + return ubrk_current (handle); + } + + /*********************************************************************** + + Determine the text boundary following the current text + boundary, or UBRK_DONE if all text boundaries have been + returned. + + If offset is specified, determines the text boundary + following the current text boundary: The value returned + is always greater than offset, or Done + + ***********************************************************************/ + + uint next (uint offset = uint.max) + { + if (offset == uint.max) + return ubrk_next (handle); + return ubrk_following (handle, offset); + } + + /*********************************************************************** + + Determine the text boundary preceding the current text + boundary, or Done if all text boundaries have been returned. + + If offset is specified, determines the text boundary preceding + the specified offset. The value returned is always smaller than + offset, or Done. + + ***********************************************************************/ + + uint previous (uint offset = uint.max) + { + if (offset == uint.max) + return ubrk_previous (handle); + return ubrk_preceding (handle, offset); + } + + /*********************************************************************** + + Determine the index of the first character in the text + being scanned. This is not always the same as index 0 + of the text. + + ***********************************************************************/ + + uint first () + { + return ubrk_first (handle); + } + + /*********************************************************************** + + Determine the index immediately beyond the last character + in the text being scanned. This is not the same as the last + character + + ***********************************************************************/ + + uint last () + { + return ubrk_last (handle); + } + + /*********************************************************************** + + Returns true if the specfied position is a boundary position. + As a side effect, leaves the iterator pointing to the first + boundary position at or after "offset". + + ***********************************************************************/ + + bool isBoundary (uint offset) + { + return ubrk_isBoundary (handle, offset) != 0; + } + + /*********************************************************************** + + Return the status from the break rule that determined + the most recently returned break position. + + ***********************************************************************/ + + void getStatus (inout uint s) + { + s = getStatus (); + } + + /*********************************************************************** + + Return the status from the break rule that determined + the most recently returned break position. + + The values appear in the rule source within brackets, + {123}, for example. For rules that do not specify a status, + a default value of 0 is returned. + + For word break iterators, the possible values are defined + in enum UWordBreak + + ***********************************************************************/ + + private uint getStatus () + { + return ubrk_getRuleStatus (handle); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (uint, char*, wchar*, uint, inout Error) ubrk_open; + Handle function (wchar*, uint, wchar*, uint, void*, inout Error) ubrk_openRules; + void function (Handle) ubrk_close; + void function (Handle, wchar*, uint, inout Error) ubrk_setText; + uint function (Handle) ubrk_current; + uint function (Handle) ubrk_next; + uint function (Handle) ubrk_previous; + uint function (Handle) ubrk_first; + uint function (Handle) ubrk_last; + uint function (Handle, uint) ubrk_preceding; + uint function (Handle, uint) ubrk_following; + byte function (Handle, uint) ubrk_isBoundary; + uint function (Handle) ubrk_getRuleStatus; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ubrk_open, "ubrk_open"}, + {cast(void**) &ubrk_close, "ubrk_close"}, + {cast(void**) &ubrk_openRules, "ubrk_openRules"}, + {cast(void**) &ubrk_setText, "ubrk_setText"}, + {cast(void**) &ubrk_current, "ubrk_current"}, + {cast(void**) &ubrk_next, "ubrk_next"}, + {cast(void**) &ubrk_previous, "ubrk_previous"}, + {cast(void**) &ubrk_first, "ubrk_first"}, + {cast(void**) &ubrk_last, "ubrk_last"}, + {cast(void**) &ubrk_preceding, "ubrk_preceding"}, + {cast(void**) &ubrk_following, "ubrk_following"}, + {cast(void**) &ubrk_isBoundary, "ubrk_isBoundary"}, + {cast(void**) &ubrk_getRuleStatus, "ubrk_getRuleStatus"}, + ]; + + /********************************************************************** + + **********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /********************************************************************** + + **********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UCalendar.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UCalendar.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,666 @@ +/******************************************************************************* + + @file UCalendar.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UCalendar; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString; + +public import dwtx.dwthelper.mangoicu.ULocale, + dwtx.dwthelper.mangoicu.UTimeZone; + +/******************************************************************************* + + UCalendar is used for converting between a UDate object and + a set of integer fields such as Year, Month, Day, + Hour, and so on. (A UDate object represents a specific instant + in time with millisecond precision. See UDate for information about + the UDate) + + Types of UCalendar interpret a UDate according to the rules of a + specific calendar system. UCalendar supports Traditional & Gregorian. + + A UCalendar object can produce all the time field values needed to + implement the date-time formatting for a particular language and + calendar style (for example, Japanese-Gregorian, Japanese-Traditional). + + When computing a UDate from time fields, two special circumstances + may arise: there may be insufficient information to compute the UDate + (such as only year and month but no day in the month), or there may be + inconsistent information (such as "Tuesday, July 15, 1996" -- July 15, + 1996 is actually a Monday). + + Insufficient information. The calendar will use default information + to specify the missing fields. This may vary by calendar; for the + Gregorian calendar, the default for a field is the same as that of + the start of the epoch: i.e., Year = 1970, Month = January, + Date = 1, etc. + + Inconsistent information. If fields conflict, the calendar will give + preference to fields set more recently. For example, when determining + the day, the calendar will look for one of the following combinations + of fields. The most recent combination, as determined by the most + recently set single field, will be used. + + See http://oss.software.ibm.com/icu/apiref/udat_8h.html for full + details. + +*******************************************************************************/ + +class UCalendar : ICU +{ + package Handle handle; + + typedef double UDate; + + //Possible types of UCalendars + public enum Type + { + Traditional, + Gregorian + } + + // Possible fields in a UCalendar + public enum DateFields + { + Era, + Year, + Month, + WeekOfYear, + WeekOfMonth, + Date, + DayOfYear, + DayOfWeek, + DayOfWeekInMonth, + AmPm, + Hour, + HourOfDay, + Minute, + Second, + Millisecond, + ZoneOffset, + DstOffset, + YearWoy, + DowLocal, + ExtendedYear, + JulianDay, + MillisecondsInDay, + FieldCount, + DayOfMonth = Date + } + + // Possible days of the week in a UCalendar + public enum DaysOfWeek + { + Sunday = 1, + Monday, + Tuesday, + Wednesday, + Thursday, + Friday, + Saturday + } + + // Possible months in a UCalendar + public enum Months + { + January, + February, + March, + April, + May, + June, + July, + August, + September, + October, + November, + December, + UnDecimber + } + + // Possible AM/PM values in a UCalendar + public enum AMPMs + { + AM, + PM + } + + // Possible formats for a UCalendar's display name + public enum DisplayNameType + { + Standard, + ShortStandard, + DST, + ShortDST + } + + // Possible limit values for a UCalendar + public enum Limit + { + Minimum, + Maximum, + GreatestMinimum, + LeastMaximum, + ActualMinimum, + ActualMaximum + } + + // Types of UCalendar attributes + private enum Attribute + { + Lenient, // unused: set from UDateFormat instead + FirstDayOfWeek, + MinimalDaysInFirstWeek + } + + /*********************************************************************** + + Open a UCalendar. A UCalendar may be used to convert a + millisecond value to a year, month, and day + + ***********************************************************************/ + + this (inout UTimeZone zone, inout ULocale locale, Type type = Type.Traditional) + { + Error e; + + handle = ucal_open (zone.name.ptr, zone.name.length, toString(locale.name), type, e); + testError (e, "failed to open calendar"); + } + + /*********************************************************************** + + Internal only: Open a UCalendar with the given handle + + ***********************************************************************/ + + package this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Close this UCalendar + + ***********************************************************************/ + + ~this () + { + ucal_close (handle); + } + + /*********************************************************************** + + Set the TimeZone used by a UCalendar + + ***********************************************************************/ + + void setTimeZone (inout UTimeZone zone) + { + Error e; + + ucal_setTimeZone (handle, zone.name.ptr, zone.name.length, e); + testError (e, "failed to set calendar time zone"); + } + + /*********************************************************************** + + Get display name of the TimeZone used by this UCalendar + + ***********************************************************************/ + + void getTimeZoneName (UString s, inout ULocale locale, DisplayNameType type=DisplayNameType.Standard) + { + uint format (wchar* dst, uint length, inout ICU.Error e) + { + return ucal_getTimeZoneDisplayName (handle, type, toString(locale.name), dst, length, e); + } + + s.format (&format, "failed to get time zone name"); + } + + /*********************************************************************** + + Determine if a UCalendar is currently in daylight savings + time + + ***********************************************************************/ + + bool inDaylightTime () + { + Error e; + + auto x = ucal_inDaylightTime (handle, e); + testError (e, "failed to test calendar daylight time"); + return x != 0; + } + + /*********************************************************************** + + Get the current date and time + + ***********************************************************************/ + + UDate getNow () + { + return ucal_getNow (); + } + + /*********************************************************************** + + Get a UCalendar's current time in millis. The time is + represented as milliseconds from the epoch + + ***********************************************************************/ + + UDate getMillis () + { + Error e; + + auto x = ucal_getMillis (handle, e); + testError (e, "failed to get time"); + return x; + } + + /*********************************************************************** + + Set a UCalendar's current time in millis. The time is + represented as milliseconds from the epoch + + ***********************************************************************/ + + void setMillis (UDate date) + { + Error e; + + ucal_setMillis (handle, date, e); + testError (e, "failed to set time"); + } + + /*********************************************************************** + + Set a UCalendar's current date + + ***********************************************************************/ + + void setDate (uint year, Months month, uint date) + { + Error e; + + ucal_setDate (handle, year, month, date, e); + testError (e, "failed to set date"); + } + + /*********************************************************************** + + Set a UCalendar's current date + + ***********************************************************************/ + + void setDateTime (uint year, Months month, uint date, uint hour, uint minute, uint second) + { + Error e; + + ucal_setDateTime (handle, year, month, date, hour, minute, second, e); + testError (e, "failed to set date/time"); + } + + /*********************************************************************** + + Returns TRUE if the given Calendar object is equivalent + to this one + + ***********************************************************************/ + + bool isEquivalent (UCalendar when) + { + return ucal_equivalentTo (handle, when.handle) != 0; + } + + /*********************************************************************** + + Compares the Calendar time + + ***********************************************************************/ + + bool isEqual (UCalendar when) + { + return (this is when || getMillis == when.getMillis); + } + + /*********************************************************************** + + Returns true if this Calendar's current time is before + "when"'s current time + + ***********************************************************************/ + + bool isBefore (UCalendar when) + { + return (this !is when || getMillis < when.getMillis); + } + + /*********************************************************************** + + Returns true if this Calendar's current time is after + "when"'s current time + + ***********************************************************************/ + + bool isAfter (UCalendar when) + { + return (this !is when || getMillis > when.getMillis); + } + + /*********************************************************************** + + Add a specified signed amount to a particular field in a + UCalendar + + ***********************************************************************/ + + void add (DateFields field, uint amount) + { + Error e; + + ucal_add (handle, field, amount, e); + testError (e, "failed to add to calendar"); + } + + /*********************************************************************** + + Add a specified signed amount to a particular field in a + UCalendar + + ***********************************************************************/ + + void roll (DateFields field, uint amount) + { + Error e; + + ucal_roll (handle, field, amount, e); + testError (e, "failed to roll calendar"); + } + + /*********************************************************************** + + Get the current value of a field from a UCalendar + + ***********************************************************************/ + + uint get (DateFields field) + { + Error e; + + auto x = ucal_get (handle, field, e); + testError (e, "failed to get calendar field"); + return x; + } + + /*********************************************************************** + + Set the value of a field in a UCalendar + + ***********************************************************************/ + + void set (DateFields field, uint value) + { + ucal_set (handle, field, value); + } + + /*********************************************************************** + + Determine if a field in a UCalendar is set + + ***********************************************************************/ + + bool isSet (DateFields field) + { + return ucal_isSet (handle, field) != 0; + } + + /*********************************************************************** + + Clear a field in a UCalendar + + ***********************************************************************/ + + void clearField (DateFields field) + { + ucal_clearField (handle, field); + } + + /*********************************************************************** + + Clear all fields in a UCalendar + + ***********************************************************************/ + + void clear () + { + ucal_clear (handle); + } + + /*********************************************************************** + + Determine a limit for a field in a UCalendar. A limit is a + maximum or minimum value for a field + + ***********************************************************************/ + + uint getLimit (DateFields field, Limit type) + { + Error e; + + auto x = ucal_getLimit (handle, field, type, e); + testError (e, "failed to get calendar limit"); + return x; + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getDaysInFirstWeek () + { + return ucal_getAttribute (handle, Attribute.MinimalDaysInFirstWeek); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getFirstDayOfWeek () + { + return ucal_getAttribute (handle, Attribute.FirstDayOfWeek); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setDaysInFirstWeek (uint value) + { + ucal_setAttribute (handle, Attribute.MinimalDaysInFirstWeek, value); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setFirstDayOfWeek (uint value) + { + ucal_setAttribute (handle, Attribute.FirstDayOfWeek, value); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, char*, Type, inout Error) ucal_open; + void function (Handle) ucal_close; + UDate function () ucal_getNow; + UDate function (Handle, inout Error) ucal_getMillis; + void function (Handle, UDate, inout Error) ucal_setMillis; + void function (Handle, uint, uint, uint, inout Error) ucal_setDate; + void function (Handle, uint, uint, uint, uint, uint, uint, inout Error) ucal_setDateTime; + byte function (Handle, Handle) ucal_equivalentTo; + void function (Handle, uint, uint, inout Error) ucal_add; + void function (Handle, uint, uint, inout Error) ucal_roll; + uint function (Handle, uint, inout Error) ucal_get; + void function (Handle, uint, uint) ucal_set; + byte function (Handle, uint) ucal_isSet; + void function (Handle, uint) ucal_clearField; + void function (Handle) ucal_clear; + uint function (Handle, uint, uint, inout Error) ucal_getLimit; + void function (Handle, wchar*, uint, inout Error) ucal_setTimeZone; + byte function (Handle, uint) ucal_inDaylightTime; + uint function (Handle, uint) ucal_getAttribute; + void function (Handle, uint, uint) ucal_setAttribute; + uint function (Handle, uint, char*, wchar*, uint, inout Error) ucal_getTimeZoneDisplayName; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucal_open, "ucal_open"}, + {cast(void**) &ucal_close, "ucal_close"}, + {cast(void**) &ucal_getNow, "ucal_getNow"}, + {cast(void**) &ucal_getMillis, "ucal_getMillis"}, + {cast(void**) &ucal_setMillis, "ucal_setMillis"}, + {cast(void**) &ucal_setDate, "ucal_setDate"}, + {cast(void**) &ucal_setDateTime, "ucal_setDateTime"}, + {cast(void**) &ucal_equivalentTo, "ucal_equivalentTo"}, + {cast(void**) &ucal_add, "ucal_add"}, + {cast(void**) &ucal_roll, "ucal_roll"}, + {cast(void**) &ucal_get, "ucal_get"}, + {cast(void**) &ucal_set, "ucal_set"}, + {cast(void**) &ucal_clearField, "ucal_clearField"}, + {cast(void**) &ucal_clear, "ucal_clear"}, + {cast(void**) &ucal_getLimit, "ucal_getLimit"}, + {cast(void**) &ucal_setTimeZone, "ucal_setTimeZone"}, + {cast(void**) &ucal_inDaylightTime, "ucal_inDaylightTime"}, + {cast(void**) &ucal_getAttribute, "ucal_getAttribute"}, + {cast(void**) &ucal_setAttribute, "ucal_setAttribute"}, + {cast(void**) &ucal_isSet, "ucal_isSet"}, + {cast(void**) &ucal_getTimeZoneDisplayName, "ucal_getTimeZoneDisplayName"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UChar.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UChar.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,1240 @@ +/******************************************************************************* + + @file UChar.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UChar; + +private import dwtx.dwthelper.mangoicu.ICU; + +/******************************************************************************* + + This API provides low-level access to the Unicode Character + Database. In addition to raw property values, some convenience + functions calculate derived properties, for example for Java-style + programming. + + Unicode assigns each code point (not just assigned character) + values for many properties. Most of them are simple boolean + flags, or constants from a small enumerated list. For some + properties, values are strings or other relatively more complex + types. + + For more information see "About the Unicode Character Database" + (http://www.unicode.org/ucd/) and the ICU User Guide chapter on + Properties (http://oss.software.ibm.com/icu/userguide/properties.html). + + Many functions are designed to match java.lang.Character functions. + See the individual function documentation, and see the JDK 1.4.1 + java.lang.Character documentation at + http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html + + There are also functions that provide easy migration from C/POSIX + functions like isblank(). Their use is generally discouraged because + the C/POSIX standards do not define their semantics beyond the ASCII + range, which means that different implementations exhibit very different + behavior. Instead, Unicode properties should be used directly. + + There are also only a few, broad C/POSIX character classes, and they + tend to be used for conflicting purposes. For example, the "isalpha()" + class is sometimes used to determine word boundaries, while a more + sophisticated approach would at least distinguish initial letters from + continuation characters (the latter including combining marks). (In + ICU, BreakIterator is the most sophisticated API for word boundaries.) + Another example: There is no "istitle()" class for titlecase characters. + + A summary of the behavior of some C/POSIX character classification + implementations for Unicode is available at + http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html + + See + this page for full details. + +*******************************************************************************/ + +class UChar : ICU +{ + public enum Property + { + Alphabetic = 0, + BinaryStart = Alphabetic, + AsciiHexDigit, + BidiControl, + BidiMirrored, + Dash, + DefaultIgnorableCodePoint, + Deprecated, + Diacritic, + Extender, + FullCompositionExclusion, + GraphemeBase, + GraphemeExtend, + GraphemeLink, + HexDigit, + Hyphen, + IdContinue, + IdStart, + Ideographic, + IdsBinaryOperator, + IdsTrinaryOperator, + JoinControl, + LogicalOrderException, + Lowercase, + Math, + NoncharacterCodePoint, + QuotationMark, + Radical, + SoftDotted, + TerminalPunctuation, + UnifiedIdeograph, + Uppercase, + WhiteSpace, + XidContinue, + XidStart, + CaseSensitive, + STerm, + VariationSelector, + NfdInert, + NfkdInert, + NfcInert, + NfkcInert, + SegmentStarter, + BinaryLimit, + BidiClass = 0x1000, + IntStart = BidiClass, + Block, CanonicalCombiningClass, + DecompositionType, + EastAsianWidth, + GeneralCategory, + JoiningGroup, + JoiningType, + LineBreak, + NumericType, + Script, + HangulSyllableType, + NfdQuickCheck, + NfkdQuickCheck, + NfcQuickCheck, + NfkcQuickCheck, + LeadCanonicalCombiningClass, + TrailCanonicalCombiningClass, + IntLimit, + GeneralCategoryMask = 0x2000, + MaskStart = GeneralCategoryMask, + MaskLimit, + NumericValue = 0x3000, + DoubleStart = NumericValue, + DoubleLimit, + Age = 0x4000, + StringStart = Age, + BidiMirroringGlyph, + CaseFolding, + IsoComment, + LowercaseMapping, + Name, + SimpleCaseFolding, + SimpleLowercaseMapping, + SimpleTitlecaseMapping, + SimpleUppercaseMapping, + TitlecaseMapping, + Unicode1Name, + UppercaseMapping, + StringLimit, + InvalidCode = -1 + } + + public enum Category + { + Unassigned = 0, + GeneralOtherTypes = 0, + UppercaseLetter = 1, + LowercaseLetter = 2, + TitlecaseLetter = 3, + ModifierLetter = 4, + OtherLetter = 5, + NonSpacingMark = 6, + EnclosingMark = 7, + CombiningSpacingMark = 8, + DecimalDigitNumber = 9, + LetterNumber = 10, + OtherNumber = 11, + SpaceSeparator = 12, + LineSeparator = 13, + ParagraphSeparator = 14, + ControlChar = 15, + FormatChar = 16, + PrivateUseChar = 17, + Surrogate = 18, + DashPunctuation = 19, + StartPunctuation = 20, + EndPunctuation = 21, + ConnectorPunctuation = 22, + OtherPunctuation = 23, + MathSymbol = 24, + CurrencySymbol = 25, + ModifierSymbol = 26, + OtherSymbol = 27, + InitialPunctuation = 28, + FinalPunctuation = 29, + Count + } + + public enum Direction + { + LeftToRight = 0, + RightToLeft = 1, + EuropeanNumber = 2, + EuropeanNumberSeparator = 3, + EuropeanNumberTerminator = 4, + ArabicNumber = 5, + CommonNumberSeparator = 6, + BlockSeparator = 7, + SegmentSeparator = 8, + WhiteSpaceNeutral = 9, + OtherNeutral = 10, + LeftToRightEmbedding = 11, + LeftToRightOverride = 12, + RightToLeftArabic = 13, + RightToLeftEmbedding = 14, + RightToLeftOverride = 15, + PopDirectionalFormat = 16, + DirNonSpacingMark = 17, + BoundaryNeutral = 18, + Count + } + + public enum BlockCode + { + NoBlock = 0, + BasicLatin = 1, + Latin1Supplement = 2, + LatinExtendedA = 3, + LatinExtendedB = 4, + IpaExtensions = 5, + SpacingModifierLetters = 6, + CombiningDiacriticalMarks = 7, + Greek = 8, + Cyrillic = 9, + Armenian = 10, + Hebrew = 11, + Arabic = 12, + Syriac = 13, + Thaana = 14, + Devanagari = 15, + Bengali = 16, + Gurmukhi = 17, + Gujarati = 18, + Oriya = 19, + Tamil = 20, + Telugu = 21, + Kannada = 22, + Malayalam = 23, + Sinhala = 24, + Thai = 25, + Lao = 26, + Tibetan = 27, + Myanmar = 28, + Georgian = 29, + HangulJamo = 30, + Ethiopic = 31, + Cherokee = 32, + UnifiedCanadianAboriginalSyllabics = 33, + Ogham = 34, + Runic = 35, + Khmer = 36, + Mongolian = 37, + LatinExtendedAdditional = 38, + GreekExtended = 39, + GeneralPunctuation = 40, + SuperscriptsAndSubscripts = 41, + CurrencySymbols = 42, + CombiningMarksForSymbols = 43, + LetterlikeSymbols = 44, + NumberForms = 45, + Arrows = 46, + MathematicalOperators = 47, + MiscellaneousTechnical = 48, + ControlPictures = 49, + OpticalCharacterRecognition = 50, + EnclosedAlphanumerics = 51, + BoxDrawing = 52, + BlockElements = 53, + GeometricShapes = 54, + MiscellaneousSymbols = 55, + Dingbats = 56, + BraillePatterns = 57, + CjkRadicalsSupplement = 58, + KangxiRadicals = 59, + IdeographicDescriptionCharacters = 60, + CjkSymbolsAndPunctuation = 61, + Hiragana = 62, + Katakana = 63, + Bopomofo = 64, + HangulCompatibilityJamo = 65, + Kanbun = 66, + BopomofoExtended = 67, + EnclosedCjkLettersAndMonths = 68, + CjkCompatibility = 69, + CjkUnifiedIdeographsExtensionA = 70, + CjkUnifiedIdeographs = 71, + YiSyllables = 72, + YiRadicals = 73, + HangulSyllables = 74, + HighSurrogates = 75, + HighPrivateUseSurrogates = 76, + LowSurrogates = 77, + PrivateUse = 78, + PrivateUseArea = PrivateUse, + CjkCompatibilityIdeographs = 79, + AlphabeticPresentationForms = 80, + ArabicPresentationFormsA = 81, + CombiningHalfMarks = 82, + CjkCompatibilityForms = 83, + SmallFormVariants = 84, + ArabicPresentationFormsB = 85, + Specials = 86, + HalfwidthAndFullwidthForms = 87, + OldItalic = 88, + Gothic = 89, + Deseret = 90, + ByzantineMusicalSymbols = 91, + MusicalSymbols = 92, + MathematicalAlphanumericSymbols = 93, + CjkUnifiedIdeographsExtensionB = 94, + CjkCompatibilityIdeographsSupplement = 95, + Tags = 96, + CyrillicSupplementary = 97, + CyrillicSupplement = CyrillicSupplementary, + Tagalog = 98, + Hanunoo = 99, + Buhid = 100, + Tagbanwa = 101, + MiscellaneousMathematicalSymbolsA = 102, + SupplementalArrowsA = 103, + SupplementalArrowsB = 104, + MiscellaneousMathematicalSymbolsB = 105, + SupplementalMathematicalOperators = 106, + KatakanaPhoneticExtensions = 107, + VariationSelectors = 108, + SupplementaryPrivateUseAreaA = 109, + SupplementaryPrivateUseAreaB = 110, + Limbu = 111, + TaiLe = 112, + KhmerSymbols = 113, + PhoneticExtensions = 114, + MiscellaneousSymbolsAndArrows = 115, + YijingHexagramSymbols = 116, + LinearBSyllabary = 117, + LinearBIdeograms = 118, + AegeanNumbers = 119, + Ugaritic = 120, + Shavian = 121, + Osmanya = 122, + CypriotSyllabary = 123, + TaiXuanJingSymbols = 124, + VariationSelectorsSupplement = 125, + Count, + InvalidCode = -1 + } + + public enum EastAsianWidth + { + Neutral, + Ambiguous, + Halfwidth, + Fullwidth, + Narrow, + Wide, + Count + } + + public enum CharNameChoice + { + Unicode, + Unicode10, + Extended, + Count + } + + public enum NameChoice + { + Short, + Long, + Count + } + + public enum DecompositionType + { + None, + Canonical, + Compat, + Circle, + Final, + Font, + Fraction, + Initial, + Isolated, + Medial, + Narrow, + Nobreak, + Small, + Square, + Sub, + Super, + Vertical, + Wide, + Count + } + + public enum JoiningType + { + NonJoining, + JoinCausing, + DualJoining, + LeftJoining, + RightJoining, + Transparent, + Count + } + + public enum JoiningGroup + { + NoJoiningGroup, + Ain, + Alaph, + Alef, + Beh, + Beth, + Dal, + DalathRish, + E, + Feh, + FinalSemkath, + Gaf, + Gamal, + Hah, + HamzaOnHehGoal, + He, + Heh, + HehGoal, + Heth, + Kaf, + Kaph, + KnottedHeh, + Lam, + Lamadh, + Meem, + Mim, + Noon, + Nun, + Pe, + Qaf, + Qaph, + Reh, + Reversed_Pe, + Sad, + Sadhe, + Seen, + Semkath, + Shin, + Swash_Kaf, + Syriac_Waw, + Tah, + Taw, + Teh_Marbuta, + Teth, + Waw, + Yeh, + Yeh_Barree, + Yeh_With_Tail, + Yudh, + Yudh_He, + Zain, + Fe, + Khaph, + Zhain, + Count + } + + public enum LineBreak + { + Unknown, + Ambiguous, + Alphabetic, + BreakBoth, + BreakAfter, + BreakBefore, + MandatoryBreak, + ContingentBreak, + ClosePunctuation, + CombiningMark, + CarriageReturn, + Exclamation, + Glue, + Hyphen, + Ideographic, + Inseperable, + Inseparable = Inseperable, + InfixNumeric, + LineFeed, + Nonstarter, + Numeric, + OpenPunctuation, + PostfixNumeric, + PrefixNumeric, + Quotation, + ComplexContext, + Surrogate, + Space, + BreakSymbols, + Zwspace, + NextLine, + WordJoiner, + Count + } + + public enum NumericType + { + None, + Decimal, + Digit, + Numeric, + Count + } + + public enum HangulSyllableType + { + NotApplicable, + LeadingJamo, + VowelJamo, + TrailingJamo, + LvSyllable, + LvtSyllable, + Count + } + + /*********************************************************************** + + Get the property value for an enumerated or integer + Unicode property for a code point. Also returns binary + and mask property values. + + Unicode, especially in version 3.2, defines many more + properties than the original set in UnicodeData.txt. + + The properties APIs are intended to reflect Unicode + properties as defined in the Unicode Character Database + (UCD) and Unicode Technical Reports (UTR). For details + about the properties see http://www.unicode.org/ . For + names of Unicode properties see the file PropertyAliases.txt + + ***********************************************************************/ + + uint getProperty (dchar c, Property p) + { + return u_getIntPropertyValue (cast(uint) c, cast(uint) p); + } + + /*********************************************************************** + + Get the minimum value for an enumerated/integer/binary + Unicode property + + ***********************************************************************/ + + uint getPropertyMinimum (Property p) + { + return u_getIntPropertyMinValue (p); + } + + /*********************************************************************** + + Get the maximum value for an enumerated/integer/binary + Unicode property + + ***********************************************************************/ + + uint getPropertyMaximum (Property p) + { + return u_getIntPropertyMaxValue (p); + } + + /*********************************************************************** + + Returns the bidirectional category value for the code + point, which is used in the Unicode bidirectional algorithm + (UAX #9 http://www.unicode.org/reports/tr9/). + + ***********************************************************************/ + + Direction charDirection (dchar c) + { + return cast(Direction) u_charDirection (c); + } + + /*********************************************************************** + + Returns the Unicode allocation block that contains the + character + + ***********************************************************************/ + + BlockCode getBlockCode (dchar c) + { + return cast(BlockCode) ublock_getCode (c); + } + + /*********************************************************************** + + Retrieve the name of a Unicode character. + + ***********************************************************************/ + + char[] getCharName (dchar c, CharNameChoice choice, inout char[] dst) + { + Error e; + + uint len = u_charName (c, choice, dst.ptr, dst.length, e); + testError (e, "failed to extract char name (buffer too small?)"); + return dst [0..len]; + } + + /*********************************************************************** + + Get the ISO 10646 comment for a character. + + ***********************************************************************/ + + char[] getComment (dchar c, inout char[] dst) + { + Error e; + + uint len = u_getISOComment (c, dst.ptr, dst.length, e); + testError (e, "failed to extract comment (buffer too small?)"); + return dst [0..len]; + } + + /*********************************************************************** + + Find a Unicode character by its name and return its code + point value. + + ***********************************************************************/ + + dchar charFromName (CharNameChoice choice, char[] name) + { + Error e; + + dchar c = u_charFromName (choice, toString(name), e); + testError (e, "failed to locate char name"); + return c; + } + + /*********************************************************************** + + Return the Unicode name for a given property, as given in the + Unicode database file PropertyAliases.txt + + ***********************************************************************/ + + char[] getPropertyName (Property p, NameChoice choice) + { + return toArray (u_getPropertyName (p, choice)); + } + + /*********************************************************************** + + Return the Unicode name for a given property value, as given + in the Unicode database file PropertyValueAliases.txt. + + ***********************************************************************/ + + char[] getPropertyValueName (Property p, NameChoice choice, uint value) + { + return toArray (u_getPropertyValueName (p, value, choice)); + } + + /*********************************************************************** + + Gets the Unicode version information + + ***********************************************************************/ + + void getUnicodeVersion (inout Version v) + { + u_getUnicodeVersion (v); + } + + /*********************************************************************** + + Get the "age" of the code point + + ***********************************************************************/ + + void getCharAge (dchar c, inout Version v) + { + u_charAge (c, v); + } + + + /*********************************************************************** + + These are externalised directly to the client (sans wrapper), + but this may have to change for linux, depending upon the + ICU function-naming conventions within the Posix libraries. + + ***********************************************************************/ + + final static extern (C) + { + /*************************************************************** + + Check if a code point has the Alphabetic Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isUAlphabetic; + + /*************************************************************** + + Check if a code point has the Lowercase Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isULowercase; + + /*************************************************************** + + Check if a code point has the Uppercase Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isUUppercase; + + /*************************************************************** + + Check if a code point has the White_Space Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isUWhiteSpace; + + /*************************************************************** + + Determines whether the specified code point has the + general category "Ll" (lowercase letter). + + ***************************************************************/ + + bool function (dchar c) isLower; + + /*************************************************************** + + Determines whether the specified code point has the + general category "Lu" (uppercase letter). + + ***************************************************************/ + + bool function (dchar c) isUpper; + + /*************************************************************** + + Determines whether the specified code point is a + titlecase letter. + + ***************************************************************/ + + bool function (dchar c) isTitle; + + /*************************************************************** + + Determines whether the specified code point is a + digit character according to Java. + + ***************************************************************/ + + bool function (dchar c) isDigit; + + /*************************************************************** + + Determines whether the specified code point is a + letter character. + + ***************************************************************/ + + bool function (dchar c) isAlpha; + + /*************************************************************** + + Determines whether the specified code point is an + alphanumeric character (letter or digit) according + to Java. + + ***************************************************************/ + + bool function (dchar c) isAlphaNumeric; + + /*************************************************************** + + Determines whether the specified code point is a + hexadecimal digit. + + ***************************************************************/ + + bool function (dchar c) isHexDigit; + + /*************************************************************** + + Determines whether the specified code point is a + punctuation character. + + ***************************************************************/ + + bool function (dchar c) isPunct; + + /*************************************************************** + + Determines whether the specified code point is a + "graphic" character (printable, excluding spaces). + + ***************************************************************/ + + bool function (dchar c) isGraph; + + /*************************************************************** + + Determines whether the specified code point is a + "blank" or "horizontal space", a character that + visibly separates words on a line. + + ***************************************************************/ + + bool function (dchar c) isBlank; + + /*************************************************************** + + Determines whether the specified code point is + "defined", which usually means that it is assigned + a character. + + ***************************************************************/ + + bool function (dchar c) isDefined; + + /*************************************************************** + + Determines if the specified character is a space + character or not. + + ***************************************************************/ + + bool function (dchar c) isSpace; + + /*************************************************************** + + Determine if the specified code point is a space + character according to Java. + + ***************************************************************/ + + bool function (dchar c) isJavaSpaceChar; + + /*************************************************************** + + Determines if the specified code point is a whitespace + character according to Java/ICU. + + ***************************************************************/ + + bool function (dchar c) isWhiteSpace; + + /*************************************************************** + + Determines whether the specified code point is a + control character (as defined by this function). + + ***************************************************************/ + + bool function (dchar c) isCtrl; + + /*************************************************************** + + Determines whether the specified code point is an ISO + control code. + + ***************************************************************/ + + bool function (dchar c) isISOControl; + + /*************************************************************** + + Determines whether the specified code point is a + printable character. + + ***************************************************************/ + + bool function (dchar c) isPrint; + + /*************************************************************** + + Determines whether the specified code point is a + base character. + + ***************************************************************/ + + bool function (dchar c) isBase; + + /*************************************************************** + + Determines if the specified character is permissible + as the first character in an identifier according to + Unicode (The Unicode Standard, Version 3.0, chapter + 5.16 Identifiers). + + ***************************************************************/ + + bool function (dchar c) isIDStart; + + /*************************************************************** + + Determines if the specified character is permissible + in an identifier according to Java. + + ***************************************************************/ + + bool function (dchar c) isIDPart; + + /*************************************************************** + + Determines if the specified character should be + regarded as an ignorable character in an identifier, + according to Java. + + ***************************************************************/ + + bool function (dchar c) isIDIgnorable; + + /*************************************************************** + + Determines if the specified character is permissible + as the first character in a Java identifier. + + ***************************************************************/ + + bool function (dchar c) isJavaIDStart; + + /*************************************************************** + + Determines if the specified character is permissible + in a Java identifier. + + ***************************************************************/ + + bool function (dchar c) isJavaIDPart; + + /*************************************************************** + + Determines whether the code point has the + Bidi_Mirrored property. + + ***************************************************************/ + + bool function (dchar c) isMirrored; + + /*************************************************************** + + Returns the decimal digit value of a decimal digit + character. + + ***************************************************************/ + + ubyte function (dchar c) charDigitValue; + + /*************************************************************** + + Maps the specified character to a "mirror-image" + character. + + ***************************************************************/ + + dchar function (dchar c) charMirror; + + /*************************************************************** + + Returns the general category value for the code point. + + ***************************************************************/ + + ubyte function (dchar c) charType; + + /*************************************************************** + + Returns the combining class of the code point as + specified in UnicodeData.txt. + + ***************************************************************/ + + ubyte function (dchar c) getCombiningClass; + + /*************************************************************** + + The given character is mapped to its lowercase + equivalent according to UnicodeData.txt; if the + character has no lowercase equivalent, the + character itself is returned. + + ***************************************************************/ + + dchar function (dchar c) toLower; + + /*************************************************************** + + The given character is mapped to its uppercase equivalent + according to UnicodeData.txt; if the character has no + uppercase equivalent, the character itself is returned. + + ***************************************************************/ + + dchar function (dchar c) toUpper; + + /*************************************************************** + + The given character is mapped to its titlecase + equivalent according to UnicodeData.txt; if none + is defined, the character itself is returned. + + ***************************************************************/ + + dchar function (dchar c) toTitle; + + /*************************************************************** + + The given character is mapped to its case folding + equivalent according to UnicodeData.txt and + CaseFolding.txt; if the character has no case folding + equivalent, the character itself is returned. + + ***************************************************************/ + + dchar function (dchar c, uint options) foldCase; + + /*************************************************************** + + Returns the decimal digit value of the code point in + the specified radix. + + ***************************************************************/ + + uint function (dchar ch, ubyte radix) digit; + + /*************************************************************** + + Determines the character representation for a specific + digit in the specified radix. + + ***************************************************************/ + + dchar function (uint digit, ubyte radix) forDigit; + + /*************************************************************** + + Get the numeric value for a Unicode code point as + defined in the Unicode Character Database. + + ***************************************************************/ + + double function (dchar c) getNumericValue; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + uint function (uint, uint) u_getIntPropertyValue; + uint function (uint) u_getIntPropertyMinValue; + uint function (uint) u_getIntPropertyMaxValue; + uint function (dchar) u_charDirection; + uint function (dchar) ublock_getCode; + uint function (dchar, uint, char*, uint, inout Error) u_charName; + uint function (dchar, char*, uint, inout Error) u_getISOComment; + uint function (uint, char*, inout Error) u_charFromName; + char* function (uint, uint) u_getPropertyName; + char* function (uint, uint, uint) u_getPropertyValueName; + void function (inout Version) u_getUnicodeVersion; + void function (dchar, inout Version) u_charAge; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &forDigit, "u_forDigit"}, + {cast(void**) &digit, "u_digit"}, + {cast(void**) &foldCase, "u_foldCase"}, + {cast(void**) &toTitle, "u_totitle"}, + {cast(void**) &toUpper, "u_toupper"}, + {cast(void**) &toLower, "u_tolower"}, + {cast(void**) &charType, "u_charType"}, + {cast(void**) &charMirror, "u_charMirror"}, + {cast(void**) &charDigitValue, "u_charDigitValue"}, + {cast(void**) &isJavaIDPart, "u_isJavaIDPart"}, + {cast(void**) &isJavaIDStart, "u_isJavaIDStart"}, + {cast(void**) &isIDIgnorable, "u_isIDIgnorable"}, + {cast(void**) &isIDPart, "u_isIDPart"}, + {cast(void**) &isIDStart, "u_isIDStart"}, + {cast(void**) &isMirrored, "u_isMirrored"}, + {cast(void**) &isBase, "u_isbase"}, + {cast(void**) &isPrint, "u_isprint"}, + {cast(void**) &isISOControl, "u_isISOControl"}, + {cast(void**) &isCtrl, "u_iscntrl"}, + {cast(void**) &isWhiteSpace, "u_isWhitespace"}, + {cast(void**) &isJavaSpaceChar, "u_isJavaSpaceChar"}, + {cast(void**) &isSpace, "u_isspace"}, + {cast(void**) &isDefined, "u_isdefined"}, + {cast(void**) &isBlank, "u_isblank"}, + {cast(void**) &isGraph, "u_isgraph"}, + {cast(void**) &isPunct, "u_ispunct"}, + {cast(void**) &isHexDigit, "u_isxdigit"}, + {cast(void**) &isAlpha, "u_isalpha"}, + {cast(void**) &isAlphaNumeric, "u_isalnum"}, + {cast(void**) &isDigit, "u_isdigit"}, + {cast(void**) &isTitle, "u_istitle"}, + {cast(void**) &isUpper, "u_isupper"}, + {cast(void**) &isLower, "u_islower"}, + {cast(void**) &isUAlphabetic, "u_isUAlphabetic"}, + {cast(void**) &isUWhiteSpace, "u_isUWhiteSpace"}, + {cast(void**) &isUUppercase, "u_isUUppercase"}, + {cast(void**) &isULowercase, "u_isULowercase"}, + {cast(void**) &getNumericValue, "u_getNumericValue"}, + {cast(void**) &getCombiningClass, "u_getCombiningClass"}, + {cast(void**) &u_getIntPropertyValue, "u_getIntPropertyValue"}, + {cast(void**) &u_getIntPropertyMinValue,"u_getIntPropertyMinValue"}, + {cast(void**) &u_getIntPropertyMaxValue,"u_getIntPropertyMaxValue"}, + {cast(void**) &u_charDirection, "u_charDirection"}, + {cast(void**) &ublock_getCode, "ublock_getCode"}, + {cast(void**) &u_charName, "u_charName"}, + {cast(void**) &u_getISOComment, "u_getISOComment"}, + {cast(void**) &u_charFromName, "u_charFromName"}, + {cast(void**) &u_getPropertyName, "u_getPropertyName"}, + {cast(void**) &u_getPropertyValueName, "u_getPropertyValueName"}, + {cast(void**) &u_getUnicodeVersion, "u_getUnicodeVersion"}, + {cast(void**) &u_charAge, "u_charAge"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UCollator.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UCollator.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,734 @@ +/******************************************************************************* + + @file UCollator.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UCollator; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.USet, + dwtx.dwthelper.mangoicu.ULocale, + dwtx.dwthelper.mangoicu.UString; + +/******************************************************************************* + + The API for Collator performs locale-sensitive string comparison. + You use this service to build searching and sorting routines for + natural language text. Important: The ICU collation service has been + reimplemented in order to achieve better performance and UCA compliance. + For details, see the collation design document. + + For more information about the collation service see the users guide. + + Collation service provides correct sorting orders for most locales + supported in ICU. If specific data for a locale is not available, + the orders eventually falls back to the UCA sort order. + + Sort ordering may be customized by providing your own set of rules. + For more on this subject see the Collation customization section of + the users guide. + + See + this page for full details. + +*******************************************************************************/ + +class UCollator : ICU +{ + package Handle handle; + + typedef void* UParseError; + + enum Attribute + { + FrenchCollation, + AlternateHandling, + CaseFirst, + CaseLevel, + NormalizationMode, + DecompositionMode = NormalizationMode, + strength, + HiraganaQuaternaryMode, + NumericCollation, + AttributeCount + } + + enum AttributeValue + { + Default = -1, + Primary = 0, + Secondary = 1, + Tertiary = 2, + DefaultStrength = Tertiary, + CeStrengthLimit, + Quaternary = 3, + Identical = 15, + strengthLimit, + Off = 16, + On = 17, + Shifted = 20, + NonIgnorable = 21, + LowerFirst = 24, + UpperFirst = 25, + AttributeValueCount + } + + enum RuleOption + { + TailoringOnly, + FullRules + } + + enum BoundMode + { + BoundLower = 0, + BoundUpper = 1, + BoundUpperLong = 2, + BoundValueCount + } + + typedef AttributeValue Strength; + + /*********************************************************************** + + Open a UCollator for comparing strings. The locale specified + determines the required collation rules. Special values for + locales can be passed in - if ULocale.Default is passed for + the locale, the default locale collation rules will be used. + If ULocale.Root is passed, UCA rules will be used + + ***********************************************************************/ + + this (ULocale locale) + { + Error e; + + handle = ucol_open (toString(locale.name), e); + testError (e, "failed to open collator"); + } + + /*********************************************************************** + + Produce a UCollator instance according to the rules supplied. + + The rules are used to change the default ordering, defined in + the UCA in a process called tailoring. For the syntax of the + rules please see users guide + + ***********************************************************************/ + + this (UText rules, AttributeValue mode, Strength strength) + { + Error e; + + handle = ucol_openRules (rules.get.ptr, rules.len, mode, strength, null, e); + testError (e, "failed to open rules-based collator"); + } + + /*********************************************************************** + + Open a collator defined by a short form string. The + structure and the syntax of the string is defined in + the "Naming collators" section of the users guide: + http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators + Attributes are overriden by the subsequent attributes. + So, for "S2_S3", final strength will be 3. 3066bis + locale overrides individual locale parts. + + The call to this constructor is equivalent to a plain + constructor, followed by a series of calls to setAttribute + and setVariableTop + + ***********************************************************************/ + + this (char[] shortName, bool forceDefaults) + { + Error e; + + handle = ucol_openFromShortString (toString(shortName), forceDefaults, null, e); + testError (e, "failed to open short-name collator"); + } + + /*********************************************************************** + + Internal constructor invoked via USearch + + ***********************************************************************/ + + package this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Close a UCollator + + ***********************************************************************/ + + ~this () + { + ucol_close (handle); + } + + /*********************************************************************** + + Get a set containing the contractions defined by the + collator. + + The set includes both the UCA contractions and the + contractions defined by the collator. This set will + contain only strings. If a tailoring explicitly + suppresses contractions from the UCA (like Russian), + removed contractions will not be in the resulting set. + + ***********************************************************************/ + + void getContractions (USet set) + { + Error e; + + ucol_getContractions (handle, set.handle, e); + testError (e, "failed to get collator contractions"); + } + + /*********************************************************************** + + Compare two strings. Return value is -, 0, + + + ***********************************************************************/ + + int strcoll (UText source, UText target) + { + return ucol_strcoll (handle, source.get.ptr, source.len, target.get.ptr, target.len); + } + + /*********************************************************************** + + Determine if one string is greater than another. This + function is equivalent to strcoll() > 1 + + ***********************************************************************/ + + bool greater (UText source, UText target) + { + return ucol_greater (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; + } + + /*********************************************************************** + + Determine if one string is greater than or equal to + another. This function is equivalent to strcoll() >= 0 + + ***********************************************************************/ + + bool greaterOrEqual (UText source, UText target) + { + return ucol_greaterOrEqual (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; + } + + /*********************************************************************** + + This function is equivalent to strcoll() == 0 + + ***********************************************************************/ + + bool equal (UText source, UText target) + { + return ucol_equal (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; + } + + /*********************************************************************** + + Get the collation strength used in a UCollator. The + strength influences how strings are compared. + + ***********************************************************************/ + + Strength getStrength () + { + return ucol_getStrength (handle); + } + + /*********************************************************************** + + Set the collation strength used in this UCollator. The + strength influences how strings are compared. one of + Primary, Secondary, Tertiary, Quaternary, Dentical, or + Default + + ***********************************************************************/ + + void setStrength (Strength s) + { + ucol_setStrength (handle, s); + } + + /*********************************************************************** + + Get the display name for a UCollator. The display name is + suitable for presentation to a user + + ***********************************************************************/ + + void getDisplayName (ULocale obj, ULocale display, UString dst) + { + uint fmt (wchar* p, uint len, inout Error e) + { + return ucol_getDisplayName (toString(obj.name), toString(display.name), dst.get.ptr, dst.len, e); + } + + dst.format (&fmt, "failed to get collator display name"); + } + + /*********************************************************************** + + Returns current rules. Options define whether full rules + are returned or just the tailoring. + + ***********************************************************************/ + + void getRules (UString dst, RuleOption o = RuleOption.FullRules) + { + uint fmt (wchar* p, uint len, inout Error e) + { + uint needed = ucol_getRulesEx (handle, o, dst.get.ptr, dst.len); + if (needed > len) + e = e.BufferOverflow; + return needed; + } + + dst.format (&fmt, "failed to get collator rules"); + } + + /*********************************************************************** + + Get the short definition string for a collator. + + This API harvests the collator's locale and the attribute + set and produces a string that can be used for opening a + collator with the same properties using the char[] style + constructor. This string will be normalized. + + The structure and the syntax of the string is defined in the + "Naming collators" section of the users guide: + http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators + + ***********************************************************************/ + + char[] getShortDefinitionString (ULocale locale = ULocale.Default) + { + Error e; + char[64] dst; + + uint len = ucol_getShortDefinitionString (handle, toString(locale.name), dst.ptr, dst.length, e); + testError (e, "failed to get collator short name"); + return dst[0..len].dup; + } + + /*********************************************************************** + + Verifies and normalizes short definition string. Normalized + short definition string has all the option sorted by the + argument name, so that equivalent definition strings are the + same + + ***********************************************************************/ + + char[] normalizeShortDefinitionString (char[] source) + { + Error e; + char[64] dst; + + uint len = ucol_normalizeShortDefinitionString (toString(source), dst.ptr, dst.length, null, e); + testError (e, "failed to normalize collator short name"); + return dst[0..len].dup; + } + + /*********************************************************************** + + Get a sort key for a string from a UCollator. Sort keys + may be compared using strcmp. + + ***********************************************************************/ + + ubyte[] getSortKey (UText t, ubyte[] result) + { + uint len = ucol_getSortKey (handle, t.get.ptr, t.len, result.ptr, result.length); + if (len < result.length) + return result [0..len]; + return null; + } + + /*********************************************************************** + + Merge two sort keys. The levels are merged with their + corresponding counterparts (primaries with primaries, + secondaries with secondaries etc.). Between the values + from the same level a separator is inserted. example + (uncompressed): 191B1D 01 050505 01 910505 00 and + 1F2123 01 050505 01 910505 00 will be merged as + 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00 + This allows for concatenating of first and last names for + sorting, among other things. If the destination buffer is + not big enough, the results are undefined. If any of source + lengths are zero or any of source pointers are null/undefined, + result is of size zero. + + ***********************************************************************/ + + ubyte[] mergeSortkeys (ubyte[] left, ubyte[] right, ubyte[] result) + { + uint len = ucol_mergeSortkeys (left.ptr, left.length, right.ptr, right.length, result.ptr, result.length); + if (len < result.length) + return result [0..len]; + return null; + } + + /*********************************************************************** + + Produce a bound for a given sortkey and a number of levels. + + Return value is always the number of bytes needed, regardless + of whether the result buffer was big enough or even valid. + + Resulting bounds can be used to produce a range of strings + that are between upper and lower bounds. For example, if + bounds are produced for a sortkey of string "smith", strings + between upper and lower bounds with one level would include + "Smith", "SMITH", "sMiTh". + + There are two upper bounds that can be produced. If BoundUpper + is produced, strings matched would be as above. However, if + bound produced using BoundUpperLong is used, the above example + will also match "Smithsonian" and similar. + + ***********************************************************************/ + + ubyte[] getBound (BoundMode mode, ubyte[] source, ubyte[] result, uint levels = 1) + { + Error e; + + uint len = ucol_getBound (source.ptr, source.length, mode, levels, result.ptr, result.length, e); + testError (e, "failed to get sortkey bound"); + if (len < result.length) + return result [0..len]; + return null; + } + + /*********************************************************************** + + Gets the version information for a Collator. + + Version is currently an opaque 32-bit number which depends, + among other things, on major versions of the collator + tailoring and UCA + + ***********************************************************************/ + + void getVersion (inout Version v) + { + ucol_getVersion (handle, v); + } + + /*********************************************************************** + + Gets the UCA version information for this Collator + + ***********************************************************************/ + + void getUCAVersion (inout Version v) + { + ucol_getUCAVersion (handle, v); + } + + /*********************************************************************** + + Universal attribute setter + + ***********************************************************************/ + + void setAttribute (Attribute attr, AttributeValue value) + { + Error e; + + ucol_setAttribute (handle, attr, value, e); + testError (e, "failed to set collator attribute"); + } + + /*********************************************************************** + + Universal attribute getter + + ***********************************************************************/ + + AttributeValue getAttribute (Attribute attr) + { + Error e; + + AttributeValue v = ucol_getAttribute (handle, attr, e); + testError (e, "failed to get collator attribute"); + return v; + } + + /*********************************************************************** + + Variable top is a two byte primary value which causes all + the codepoints with primary values that are less or equal + than the variable top to be shifted when alternate handling + is set to Shifted. + + ***********************************************************************/ + + void setVariableTop (UText t) + { + Error e; + + ucol_setVariableTop (handle, t.get.ptr, t.len, e); + testError (e, "failed to set variable-top"); + } + + /*********************************************************************** + + Sets the variable top to a collation element value + supplied.Variable top is set to the upper 16 bits. + Lower 16 bits are ignored. + + ***********************************************************************/ + + void setVariableTop (uint x) + { + Error e; + + ucol_restoreVariableTop (handle, x, e); + testError (e, "failed to restore variable-top"); + } + + /*********************************************************************** + + Gets the variable top value of this Collator. Lower 16 bits + are undefined and should be ignored. + + ***********************************************************************/ + + uint getVariableTop () + { + Error e; + + uint x = ucol_getVariableTop (handle, e); + testError (e, "failed to get variable-top"); + return x; + } + + /*********************************************************************** + + Gets the locale name of the collator. If the collator is + instantiated from the rules, then this function will throw + an exception + + ***********************************************************************/ + + void getLocale (ULocale locale, ULocale.Type type) + { + Error e; + + locale.name = toArray (ucol_getLocaleByType (handle, type, e)); + if (isError(e) || locale.name is null) + exception ("failed to get collator locale"); + } + + /*********************************************************************** + + Get the Unicode set that contains all the characters and + sequences tailored in this collator. + + ***********************************************************************/ + + USet getTailoredSet () + { + Error e; + + Handle h = ucol_getTailoredSet (handle, e); + testError (e, "failed to get tailored set"); + return new USet (h); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + void function (Handle) ucol_close; + Handle function (char *loc, inout Error e) ucol_open; + Handle function (wchar* rules, uint rulesLength, AttributeValue normalizationMode, Strength strength, UParseError *parseError, inout Error e) ucol_openRules; + Handle function (char *definition, byte forceDefaults, UParseError *parseError, inout Error e) ucol_openFromShortString; + uint function (Handle, Handle conts, inout Error e) ucol_getContractions; + int function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_strcoll; + byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greater; + byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greaterOrEqual; + byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_equal; + Strength function (Handle) ucol_getStrength; + void function (Handle, Strength strength) ucol_setStrength; + uint function (char *objLoc, char *dispLoc, wchar* result, uint resultLength, inout Error e) ucol_getDisplayName; + uint function (Handle, char *locale, char *buffer, uint capacity, inout Error e) ucol_getShortDefinitionString; + uint function (char *source, char *destination, uint capacity, UParseError *parseError, inout Error e) ucol_normalizeShortDefinitionString; + uint function (Handle, wchar* source, uint sourceLength, ubyte *result, uint resultLength) ucol_getSortKey; + uint function (ubyte *source, uint sourceLength, BoundMode boundType, uint noOfLevels, ubyte *result, uint resultLength, inout Error e) ucol_getBound; + void function (Handle, Version info) ucol_getVersion; + void function (Handle, Version info) ucol_getUCAVersion; + uint function (ubyte *src1, uint src1Length, ubyte *src2, uint src2Length, ubyte *dest, uint destCapacity) ucol_mergeSortkeys; + void function (Handle, Attribute attr, AttributeValue value, inout Error e) ucol_setAttribute; + AttributeValue function (Handle, Attribute attr, inout Error e) ucol_getAttribute; + uint function (Handle, wchar* varTop, uint len, inout Error e) ucol_setVariableTop; + uint function (Handle, inout Error e) ucol_getVariableTop; + void function (Handle, uint varTop, inout Error e) ucol_restoreVariableTop; + uint function (Handle, RuleOption delta, wchar* buffer, uint bufferLen) ucol_getRulesEx; + char* function (Handle, ULocale.Type type, inout Error e) ucol_getLocaleByType; + Handle function (Handle, inout Error e) ucol_getTailoredSet; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucol_open, "ucol_open"}, + {cast(void**) &ucol_close, "ucol_close"}, + {cast(void**) &ucol_openRules, "ucol_openRules"}, + {cast(void**) &ucol_openFromShortString, "ucol_openFromShortString"}, + {cast(void**) &ucol_getContractions, "ucol_getContractions"}, + {cast(void**) &ucol_strcoll, "ucol_strcoll"}, + {cast(void**) &ucol_greater, "ucol_greater"}, + {cast(void**) &ucol_greaterOrEqual, "ucol_greaterOrEqual"}, + {cast(void**) &ucol_equal, "ucol_equal"}, + {cast(void**) &ucol_getStrength, "ucol_getStrength"}, + {cast(void**) &ucol_setStrength, "ucol_setStrength"}, + {cast(void**) &ucol_getDisplayName, "ucol_getDisplayName"}, + {cast(void**) &ucol_getShortDefinitionString, "ucol_getShortDefinitionString"}, + {cast(void**) &ucol_normalizeShortDefinitionString, "ucol_normalizeShortDefinitionString"}, + {cast(void**) &ucol_getSortKey, "ucol_getSortKey"}, + {cast(void**) &ucol_getBound, "ucol_getBound"}, + {cast(void**) &ucol_getVersion, "ucol_getVersion"}, + {cast(void**) &ucol_getUCAVersion, "ucol_getUCAVersion"}, + {cast(void**) &ucol_mergeSortkeys, "ucol_mergeSortkeys"}, + {cast(void**) &ucol_setAttribute, "ucol_setAttribute"}, + {cast(void**) &ucol_getAttribute, "ucol_getAttribute"}, + {cast(void**) &ucol_setVariableTop, "ucol_setVariableTop"}, + {cast(void**) &ucol_getVariableTop, "ucol_getVariableTop"}, + {cast(void**) &ucol_restoreVariableTop, "ucol_restoreVariableTop"}, + {cast(void**) &ucol_getRulesEx, "ucol_getRulesEx"}, + {cast(void**) &ucol_getLocaleByType, "ucol_getLocaleByType"}, + {cast(void**) &ucol_getTailoredSet, "ucol_getTailoredSet"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UConverter.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UConverter.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,764 @@ +/******************************************************************************* + + @file UConverter.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UConverter; + +private import dwtx.dwthelper.mangoicu.ICU; + +/******************************************************************************* + +*******************************************************************************/ + +struct UAdjust // used with encode() & decode() methods +{ + uint input, // how much was read from the input + output; // how much was written to the output +} + +/******************************************************************************* + +*******************************************************************************/ + +interface ITranscoder +{ + void reset (); + + bool convert (void[] input, void[] output, inout UAdjust x, bool flush); +} + +/******************************************************************************* + + This API is used to convert codepage or character encoded data to + and from UTF-16. You can open a converter with ucnv_open(). With + that converter, you can get its properties, set options, convert + your data and close the converter. + + Since many software programs recogize different converter names + for different types of converters, there are other functions in + this API to iterate over the converter aliases. + + See + this page for full details. + +*******************************************************************************/ + +class UConverter : ICU +{ + private Handle handle; + + + + /*********************************************************************** + + Creates a UConverter object with the names specified as a + string. + + The actual name will be resolved with the alias file using + a case-insensitive string comparison that ignores delimiters + '-', '_', and ' ' (dash, underscore, and space). E.g., the + names "UTF8", "utf-8", and "Utf 8" are all equivalent. If null + is passed for the converter name, it will create one with the + getDefaultName() return value. + + A converter name may contain options like a locale specification + to control the specific behavior of the converter instantiated. + The meaning of the options depends on the particular converter: + if an option is not defined for or recognized, it is ignored. + + Options are appended to the converter name string, with an + OptionSepChar between the name and the first option and also + between adjacent options. + + The conversion behavior and names can vary between platforms, + and ICU may convert some characters differently from other + platforms. Details on this topic are in the User's Guide. + + ***********************************************************************/ + + this (char[] name) + { + Error e; + + handle = ucnv_open (toString (name), e); + if (isError (e)) + exception ("failed to create converter for '"~name~"'"); + } + + /*********************************************************************** + + Deletes the unicode converter and releases resources + associated with just this instance. Does not free up + shared converter tables. + + ***********************************************************************/ + + ~this () + { + ucnv_close (handle); + } + + /*********************************************************************** + + Do a fuzzy compare of two converter/alias names. The + comparison is case-insensitive. It also ignores the + characters '-', '_', and ' ' (dash, underscore, and space). + Thus the strings "UTF-8", "utf_8", and "Utf 8" are exactly + equivalent + + ***********************************************************************/ + + static final int compareNames (char[] a, char[] b) + { + return ucnv_compareNames (toString(a), toString(b)); + } + + /*********************************************************************** + + Resets the state of this converter to the default state. + + This is used in the case of an error, to restart a + conversion from a known default state. It will also + empty the internal output buffers. + + ***********************************************************************/ + + void reset () + { + ucnv_reset (handle); + } + + /*********************************************************************** + + Resets the from-Unicode part of this converter state to the + default state. + + This is used in the case of an error to restart a conversion + from Unicode to a known default state. It will also empty the + internal output buffers used for the conversion from Unicode + codepoints. + + ***********************************************************************/ + + void resetDecoder () + { + ucnv_resetToUnicode (handle); + } + + /*********************************************************************** + + Resets the from-Unicode part of this converter state to the + default state. + + This is used in the case of an error to restart a conversion + from Unicode to a known default state. It will also empty the + internal output buffers used for the conversion from Unicode + codepoints. + + ***********************************************************************/ + + void resetEncoder () + { + ucnv_resetFromUnicode (handle); + } + + /*********************************************************************** + + Returns the maximum number of bytes that are output per + UChar in conversion from Unicode using this converter. + + The returned number can be used to calculate the size of + a target buffer for conversion from Unicode. + + This number may not be the same as the maximum number of + bytes per "conversion unit". In other words, it may not + be the intuitively expected number of bytes per character + that would be published for a charset, and may not fulfill + any other purpose than the allocation of an output buffer + of guaranteed sufficient size for a given input length and + converter. + + Examples for special cases that are taken into account: + + * Supplementary code points may convert to more bytes than + BMP code points. This function returns bytes per UChar + (UTF-16 code unit), not per Unicode code point, for efficient + buffer allocation. + * State-shifting output (SI/SO, escapes, etc.) from stateful + converters. + * When m input UChars are converted to n output bytes, then + the maximum m/n is taken into account. + + The number returned here does not take into account: + + * callbacks which output more than one charset character + sequence per call, like escape callbacks + * initial and final non-character bytes that are output by + some converters (automatic BOMs, initial escape sequence, + final SI, etc.) + + Examples for returned values: + + * SBCS charsets: 1 + * Shift-JIS: 2 + * UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted) + * UTF-8: 3 (3 per BMP, 4 per surrogate _pair_) + * EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS) + * ISO-2022: 3 (always outputs UTF-8) + * ISO-2022-JP: 6 (4-byte escape sequences + DBCS) + * ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + + DBCS) + + ***********************************************************************/ + + ubyte getMaxCharSize () + { + return ucnv_getMaxCharSize (handle); + } + + /*********************************************************************** + + Returns the minimum byte length for characters in this + codepage. This is usually either 1 or 2. + + ***********************************************************************/ + + ubyte getMinCharSize () + { + return ucnv_getMinCharSize (handle); + } + + /*********************************************************************** + + Gets the internal, canonical name of the converter (zero- + terminated). + + ***********************************************************************/ + + char[] getName () + { + Error e; + + char[] name = toArray (ucnv_getName (handle, e)); + testError (e, "failed to get converter name"); + return name; + } + + /*********************************************************************** + + Determines if the converter contains ambiguous mappings of + the same character or not + + ***********************************************************************/ + + bool isAmbiguous () + { + return cast(bool) ucnv_isAmbiguous (handle); + } + + /*********************************************************************** + + Detects Unicode signature byte sequences at the start + of the byte stream and returns the charset name of the + indicated Unicode charset. A null is returned where no + Unicode signature is recognized. + + A caller can create a UConverter using the charset name. + The first code unit (wchar) from the start of the stream + will be U+FEFF (the Unicode BOM/signature character) and + can usually be ignored. + + ***********************************************************************/ + + static final char[] detectSignature (void[] input) + { + Error e; + uint len; + char* name; + + name = ucnv_detectUnicodeSignature (input.ptr, input.length, len, e); + if (name == null || isError (e)) + return null; + return toArray (name); + } + + /*********************************************************************** + + Converts an array of unicode characters to an array of + codepage characters. + + This function is optimized for converting a continuous + stream of data in buffer-sized chunks, where the entire + source and target does not fit in available buffers. + + The source pointer is an in/out parameter. It starts out + pointing where the conversion is to begin, and ends up + pointing after the last UChar consumed. + + Target similarly starts out pointer at the first available + byte in the output buffer, and ends up pointing after the + last byte written to the output. + + The converter always attempts to consume the entire source + buffer, unless (1.) the target buffer is full, or (2.) a + failing error is returned from the current callback function. + When a successful error status has been returned, it means + that all of the source buffer has been consumed. At that + point, the caller should reset the source and sourceLimit + pointers to point to the next chunk. + + At the end of the stream (flush==true), the input is completely + consumed when *source==sourceLimit and no error code is set. + The converter object is then automatically reset by this + function. (This means that a converter need not be reset + explicitly between data streams if it finishes the previous + stream without errors.) + + This is a stateful conversion. Additionally, even when all + source data has been consumed, some data may be in the + converters' internal state. Call this function repeatedly, + updating the target pointers with the next empty chunk of + target in case of a U_BUFFER_OVERFLOW_ERROR, and updating + the source pointers with the next chunk of source when a + successful error status is returned, until there are no more + chunks of source data. + + Parameters: + + converter the Unicode converter + target I/O parameter. Input : Points to the + beginning of the buffer to copy codepage + characters to. Output : points to after + the last codepage character copied to + target. + targetLimit the pointer just after last of the + target buffer + source I/O parameter, pointer to pointer to + the source Unicode character buffer. + sourceLimit the pointer just after the last of + the source buffer + offsets if NULL is passed, nothing will happen + to it, otherwise it needs to have the + same number of allocated cells as target. + Will fill in offsets from target to source + pointer e.g: offsets[3] is equal to 6, it + means that the target[3] was a result of + transcoding source[6] For output data + carried across calls, and other data + without a specific source character + (such as from escape sequences or + callbacks) -1 will be placed for offsets. + flush set to TRUE if the current source buffer + is the last available chunk of the source, + FALSE otherwise. Note that if a failing + status is returned, this function may + have to be called multiple times with + flush set to TRUE until the source buffer + is consumed. + + ***********************************************************************/ + + bool encode (wchar[] input, void[] output, inout UAdjust x, bool flush) + { + Error e; + wchar* src = input.ptr; + void* dst = output.ptr; + wchar* srcLimit = src + input.length; + void* dstLimit = dst + output.length; + + ucnv_fromUnicode (handle, &dst, dstLimit, &src, srcLimit, null, flush, e); + x.input = src - input.ptr; + x.output = dst - output.ptr; + + if (e == e.BufferOverflow) + return true; + + testError (e, "failed to encode"); + return false; + } + + /*********************************************************************** + + Encode the Unicode string into a codepage string. + + This function is a more convenient but less powerful version + of encode(). It is only useful for whole strings, not + for streaming conversion. The maximum output buffer capacity + required (barring output from callbacks) should be calculated + using getMaxCharSize(). + + ***********************************************************************/ + + uint encode (wchar[] input, void[] output) + { + Error e; + uint len; + + len = ucnv_fromUChars (handle, output.ptr, output.length, input.ptr, input.length, e); + testError (e, "failed to encode"); + return len; + } + + /*********************************************************************** + + Converts a buffer of codepage bytes into an array of unicode + UChars characters. + + This function is optimized for converting a continuous stream + of data in buffer-sized chunks, where the entire source and + target does not fit in available buffers. + + The source pointer is an in/out parameter. It starts out pointing + where the conversion is to begin, and ends up pointing after the + last byte of source consumed. + + Target similarly starts out pointer at the first available UChar + in the output buffer, and ends up pointing after the last UChar + written to the output. It does NOT necessarily keep UChar sequences + together. + + The converter always attempts to consume the entire source buffer, + unless (1.) the target buffer is full, or (2.) a failing error is + returned from the current callback function. When a successful + error status has been returned, it means that all of the source + buffer has been consumed. At that point, the caller should reset + the source and sourceLimit pointers to point to the next chunk. + + At the end of the stream (flush==true), the input is completely + consumed when *source==sourceLimit and no error code is set The + converter object is then automatically reset by this function. + (This means that a converter need not be reset explicitly between + data streams if it finishes the previous stream without errors.) + + This is a stateful conversion. Additionally, even when all source + data has been consumed, some data may be in the converters' internal + state. Call this function repeatedly, updating the target pointers + with the next empty chunk of target in case of a BufferOverflow, and + updating the source pointers with the next chunk of source when a + successful error status is returned, until there are no more chunks + of source data. + + Parameters: + converter the Unicode converter + target I/O parameter. Input : Points to the beginning + of the buffer to copy UChars into. Output : + points to after the last UChar copied. + targetLimit the pointer just after the end of the target + buffer + source I/O parameter, pointer to pointer to the source + codepage buffer. + sourceLimit the pointer to the byte after the end of the + source buffer + offsets if NULL is passed, nothing will happen to + it, otherwise it needs to have the same + number of allocated cells as target. Will + fill in offsets from target to source pointer + e.g: offsets[3] is equal to 6, it means that + the target[3] was a result of transcoding + source[6] For output data carried across + calls, and other data without a specific + source character (such as from escape + sequences or callbacks) -1 will be placed + for offsets. + flush set to true if the current source buffer + is the last available chunk of the source, + false otherwise. Note that if a failing + status is returned, this function may have + to be called multiple times with flush set + to true until the source buffer is consumed. + + ***********************************************************************/ + + bool decode (void[] input, wchar[] output, inout UAdjust x, bool flush) + { + Error e; + void* src = input.ptr; + wchar* dst = output.ptr; + void* srcLimit = src + input.length; + wchar* dstLimit = dst + output.length; + + ucnv_toUnicode (handle, &dst, dstLimit, &src, srcLimit, null, flush, e); + x.input = src - input.ptr; + x.output = dst - output.ptr; + + if (e == e.BufferOverflow) + return true; + + testError (e, "failed to decode"); + return false; + } + + /*********************************************************************** + + Decode the codepage string into a Unicode string. + + This function is a more convenient but less powerful version + of decode(). It is only useful for whole strings, not for + streaming conversion. The maximum output buffer capacity + required (barring output from callbacks) will be 2*src.length + (each char may be converted into a surrogate pair) + + ***********************************************************************/ + + uint decode (void[] input, wchar[] output) + { + Error e; + uint len; + + len = ucnv_toUChars (handle, output.ptr, output.length, input.ptr, input.length, e); + testError (e, "failed to decode"); + return len; + } + + /********************************************************************** + + Iterate over the available converter names + + **********************************************************************/ + + static int opApply (int delegate(inout char[] element) dg) + { + char[] name; + int result; + uint count = ucnv_countAvailable (); + + for (uint i=0; i < count; ++i) + { + name = toArray (ucnv_getAvailableName (i)); + result = dg (name); + if (result) + break; + } + return result; + } + + /*********************************************************************** + + ***********************************************************************/ + + ITranscoder createTranscoder (UConverter dst) + { + return new UTranscoder (this, dst); + } + + /********************************************************************** + + **********************************************************************/ + + private class UTranscoder : ITranscoder + { + private UConverter cSrc, + cDst; + private bool clear = true; + + /************************************************************** + + **************************************************************/ + + this (UConverter src, UConverter dst) + { + cSrc = src; + cDst = dst; + } + + /************************************************************** + + **************************************************************/ + + void reset () + { + clear = true; + } + + /************************************************************** + + **************************************************************/ + + bool convert (void[] input, void[] output, inout UAdjust x, bool flush) + { + Error e; + void* src = input.ptr; + void* dst = output.ptr; + void* srcLimit = src + input.length; + void* dstLimit = dst + output.length; + + ucnv_convertEx (cDst.handle, cSrc.handle, &dst, dstLimit, + &src, srcLimit, null, null, null, null, + clear, flush, e); + clear = false; + x.input = src - input.ptr; + x.output = dst - output.ptr; + + if (e == e.BufferOverflow) + return true; + + testError (e, "failed to decode"); + return false; + } + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + int function (char*, char*) ucnv_compareNames; + Handle function (char*, inout Error) ucnv_open; + char* function (void*, uint, inout uint, inout Error) ucnv_detectUnicodeSignature; + void function (Handle) ucnv_close; + void function (Handle) ucnv_reset; + int function (Handle) ucnv_resetToUnicode; + int function (Handle) ucnv_resetFromUnicode; + ubyte function (Handle) ucnv_getMaxCharSize; + ubyte function (Handle) ucnv_getMinCharSize; + char* function (Handle, inout Error) ucnv_getName; + uint function (Handle, wchar*, uint, void*, uint, inout Error) ucnv_toUChars; + uint function (Handle, void*, uint, wchar*, uint, inout Error) ucnv_fromUChars; + void function (Handle, void**, void*, wchar**, wchar*, int*, ubyte, inout Error) ucnv_fromUnicode; + void function (Handle, wchar**, wchar*, void**, void*, int*, ubyte, inout Error) ucnv_toUnicode; + void function (Handle, Handle, void**, void*, void**, void*, wchar*, wchar*, wchar*, wchar*, ubyte, ubyte, inout Error) ucnv_convertEx; + ubyte function (Handle) ucnv_isAmbiguous; + char* function (uint) ucnv_getAvailableName; + uint function () ucnv_countAvailable; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucnv_open, "ucnv_open"}, + {cast(void**) &ucnv_close, "ucnv_close"}, + {cast(void**) &ucnv_reset, "ucnv_reset"}, + {cast(void**) &ucnv_resetToUnicode, "ucnv_resetToUnicode"}, + {cast(void**) &ucnv_resetFromUnicode, "ucnv_resetFromUnicode"}, + {cast(void**) &ucnv_compareNames, "ucnv_compareNames"}, + {cast(void**) &ucnv_getMaxCharSize, "ucnv_getMaxCharSize"}, + {cast(void**) &ucnv_getMinCharSize, "ucnv_getMinCharSize"}, + {cast(void**) &ucnv_getName, "ucnv_getName"}, + {cast(void**) &ucnv_detectUnicodeSignature, "ucnv_detectUnicodeSignature"}, + {cast(void**) &ucnv_toUChars, "ucnv_toUChars"}, + {cast(void**) &ucnv_fromUChars, "ucnv_fromUChars"}, + {cast(void**) &ucnv_toUnicode, "ucnv_toUnicode"}, + {cast(void**) &ucnv_fromUnicode, "ucnv_fromUnicode"}, + {cast(void**) &ucnv_convertEx, "ucnv_convertEx"}, + {cast(void**) &ucnv_isAmbiguous, "ucnv_isAmbiguous"}, + {cast(void**) &ucnv_countAvailable, "ucnv_countAvailable"}, + {cast(void**) &ucnv_getAvailableName, "ucnv_getAvailableName"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); +/+ + foreach (char[] name; UConverter) + printf ("%.*s\n", name); ++/ + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UDateFormat.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UDateFormat.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,426 @@ +/******************************************************************************* + + @file UDateFormat.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UDateFormat; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString, + dwtx.dwthelper.mangoicu.UCalendar, + dwtx.dwthelper.mangoicu.UNumberFormat; + +/******************************************************************************* + + UDateFormat consists of functions that convert dates and + times from their internal representations to textual form and back + again in a language-independent manner. Converting from the internal + representation (milliseconds since midnight, January 1, 1970) to text + is known as "formatting," and converting from text to millis is known + as "parsing." We currently define one concrete structure UDateFormat, + which can handle pretty much all normal date formatting and parsing + actions. + + UDateFormat helps you to format and parse dates for any locale. + Your code can be completely independent of the locale conventions + for months, days of the week, or even the calendar format: lunar + vs. solar. + + See + this page for full details. + +*******************************************************************************/ + +private class UDateFormat : ICU +{ + private Handle handle; + + alias UCalendar.UDate UDate; + + typedef void* UFieldPos; + + public enum Style + { + Full, + Long, + Medium, + Short, + Default = Medium, + None = -1, + Ignore = -2 + }; + + public enum Field + { + EraField = 0, + YearField = 1, + MonthField = 2, + DateField = 3, + HourOfDay1Field = 4, + HourOfDay0Field = 5, + MinuteField = 6, + SecondField = 7, + FractionalSecondField = 8, + DayOfWeekField = 9, + DayOfYearField = 10, + DayOfWeekInMonthField = 11, + WeekOfYearField = 12, + WeekOfMonthField = 13, + AmPmField = 14, + Hour1Field = 15, + Hour0Field = 16, + TimezoneField = 17, + YearWoyField = 18, + DowLocalField = 19, + ExtendedYearField = 20, + JulianDayField = 21, + MillisecondsInDayField = 22, + TimezoneRfcField = 23, + FieldCount = 24 + }; + + private enum Symbol + { + Eras, + Months, + ShortMonths, + Weekdays, + ShortWeekdays, + AmPms, + LocalizedChars + }; + + + /*********************************************************************** + + Open a new UDateFormat for formatting and parsing dates + and time. If a pattern is not specified, an appropriate + one for the given locale will be used. + + ***********************************************************************/ + + this (Style time, Style date, inout ULocale locale, inout UTimeZone tz, UText pattern=null) + { + Error e; + wchar* p; + uint c; + + if (pattern) + p = pattern.get.ptr, c = pattern.length; + handle = udat_open (time, date, toString(locale.name), tz.name.ptr, tz.name.length, p, c, e); + testError (e, "failed to create DateFormat"); + } + + /*********************************************************************** + + Close a UDateFormat + + ***********************************************************************/ + + ~this () + { + udat_close (handle); + } + + /*********************************************************************** + + Format a date using an UDateFormat + + ***********************************************************************/ + + void format (UString dst, UDate date, UFieldPos p = null) + { + uint fmat (wchar* result, uint len, inout Error e) + { + return udat_format (handle, date, result, len, p, e); + } + + dst.format (&fmat, "date format failed"); + } + + /*********************************************************************** + + Parse a string into an date/time using a UDateFormat + + ***********************************************************************/ + + UDate parse (UText src, uint* index=null) + { + Error e; + + UDate x = udat_parse (handle, src.content.ptr, src.len, index, e); + testError (e, "failed to parse date"); + return x; + } + + /*********************************************************************** + + Set the UCalendar associated with an UDateFormat. A + UDateFormat uses a UCalendar to convert a raw value + to, for example, the day of the week. + + ***********************************************************************/ + + void setCalendar (UCalendar c) + { + udat_setCalendar (handle, c.handle); + } + + /*********************************************************************** + + Get the UCalendar associated with this UDateFormat + + ***********************************************************************/ + + UCalendar getCalendar () + { + Handle h = udat_getCalendar (handle); + return new UCalendar (h); + } + + /*********************************************************************** + + Set the UNumberFormat associated with an UDateFormat.A + UDateFormat uses a UNumberFormat to format numbers within + a date, for example the day number. + + ***********************************************************************/ + + void setNumberFormat (UNumberFormat n) + { + udat_setCalendar (handle, n.handle); + } + + /*********************************************************************** + + Get the year relative to which all 2-digit years are + interpreted + + ***********************************************************************/ + + UDate getTwoDigitYearStart () + { + Error e; + + UDate x = udat_get2DigitYearStart (handle, e); + testError (e, "failed to get two digit year start"); + return x; + } + + /*********************************************************************** + + Set the year relative to which all 2-digit years are + interpreted + + ***********************************************************************/ + + void setTwoDigitYearStart (UDate start) + { + Error e; + + udat_set2DigitYearStart (handle, start, e); + testError (e, "failed to set two digit year start"); + } + + /*********************************************************************** + + Extract the pattern from a UDateFormat + + ***********************************************************************/ + + void getPattern (UString dst, bool localize) + { + uint fmat (wchar* result, uint len, inout Error e) + { + return udat_toPattern (handle, localize, result, len, e); + } + + dst.format (&fmat, "failed to retrieve date format pattern"); + } + + /*********************************************************************** + + Set the pattern for a UDateFormat + + ***********************************************************************/ + + void setPattern (UText pattern, bool localized) + { + udat_applyPattern (handle, localized, pattern.get.ptr, pattern.length); + } + + /*********************************************************************** + + Specify whether an UDateFormat will perform lenient parsing. + + ***********************************************************************/ + + void setLenient (bool yes) + { + udat_setLenient (handle, yes); + } + + /*********************************************************************** + + Determine if an UDateFormat will perform lenient parsing. + + ***********************************************************************/ + + bool isLenient () + { + return udat_isLenient (handle) != 0; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (uint, uint, char*, wchar*, uint, wchar*, uint, inout Error) udat_open; + void function (Handle) udat_close; + uint function (Handle, UDate, wchar*, uint, UFieldPos, inout Error) udat_format; + UDate function (Handle, wchar*, uint, uint*, inout Error) udat_parse; + void function (Handle, Handle) udat_setCalendar; + void function (Handle, Handle) udat_setNumberFormat; + UDate function (Handle, inout Error) udat_get2DigitYearStart; + void function (Handle, UDate, inout Error) udat_set2DigitYearStart; + uint function (Handle, byte, wchar*, uint, inout Error) udat_toPattern; + void function (Handle, byte, wchar*, uint) udat_applyPattern; + void function (Handle, byte) udat_setLenient; + byte function (Handle) udat_isLenient; + Handle function (Handle) udat_getCalendar; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &udat_open, "udat_open"}, + {cast(void**) &udat_close, "udat_close"}, + {cast(void**) &udat_format, "udat_format"}, + {cast(void**) &udat_parse, "udat_parse"}, + {cast(void**) &udat_setCalendar, "udat_setCalendar"}, + {cast(void**) &udat_setNumberFormat, "udat_setNumberFormat"}, + {cast(void**) &udat_get2DigitYearStart, "udat_get2DigitYearStart"}, + {cast(void**) &udat_set2DigitYearStart, "udat_set2DigitYearStart"}, + {cast(void**) &udat_toPattern, "udat_toPattern"}, + {cast(void**) &udat_applyPattern, "udat_applyPattern"}, + {cast(void**) &udat_setLenient, "udat_setLenient"}, + {cast(void**) &udat_isLenient, "udat_isLenient"}, + {cast(void**) &udat_getCalendar, "udat_getCalendar"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + + + diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UDomainName.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UDomainName.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,322 @@ +/******************************************************************************* + + @file UDomainName.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UDomainName; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString; + +/******************************************************************************* + + UIDNA API implements the IDNA protocol as defined in the + IDNA RFC (http://www.ietf.org/rfc/rfc3490.txt). + + The RFC defines 2 operations: toAscii and toUnicode. Domain + labels containing non-ASCII code points are required to be + processed by toAscii operation before passing it to resolver + libraries. Domain names that are obtained from resolver + libraries are required to be processed by toUnicode operation + before displaying the domain name to the user. IDNA requires + that implementations process input strings with Nameprep + (http://www.ietf.org/rfc/rfc3491.txt), which is a profile of + Stringprep (http://www.ietf.org/rfc/rfc3454.txt), and then with + Punycode (http://www.ietf.org/rfc/rfc3492.txt). Implementations + of IDNA MUST fully implement Nameprep and Punycode; neither + Nameprep nor Punycode are optional. + + The input and output of toAscii() and ToUnicode() operations are + Unicode and are designed to be chainable, i.e., applying toAscii() + or toUnicode() operations multiple times to an input string will + yield the same result as applying the operation once. + + See + this page for full details. + +*******************************************************************************/ + +class UDomainName : ICU +{ + private UText text; + private Handle handle; + + enum Options + { + Strict, + Lenient, + Std3 + } + + + /*********************************************************************** + + + ***********************************************************************/ + + this (UText text) + { + this.text = text; + } + + /*********************************************************************** + + This function implements the ToASCII operation as + defined in the IDNA RFC. + + This operation is done on single labels before sending + it to something that expects ASCII names. A label is an + individual part of a domain name. Labels are usually + separated by dots; e.g." "www.example.com" is composed + of 3 labels "www","example", and "com". + + ***********************************************************************/ + + void toAscii (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout Error e) + { + return uidna_toASCII (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to ASCII"); + } + + /*********************************************************************** + + This function implements the ToUnicode operation as + defined in the IDNA RFC. + + This operation is done on single labels before sending + it to something that expects Unicode names. A label is + an individual part of a domain name. Labels are usually + separated by dots; for e.g." "www.example.com" is composed + of 3 labels "www","example", and "com". + + ***********************************************************************/ + + void toUnicode (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout Error e) + { + return uidna_toUnicode (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to Unicode"); + } + + /*********************************************************************** + + Convenience function that implements the IDNToASCII + operation as defined in the IDNA RFC. + + This operation is done on complete domain names, e.g: + "www.example.com". It is important to note that this + operation can fail. If it fails, then the input domain + name cannot be used as an Internationalized Domain Name + and the application should have methods defined to deal + with the failure. + + Note: IDNA RFC specifies that a conformant application + should divide a domain name into separate labels, decide + whether to apply allowUnassigned and useSTD3ASCIIRules + on each, and then convert. This function does not offer + that level of granularity. The options once set will apply + to all labels in the domain name + + ***********************************************************************/ + + void IdnToAscii (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout Error e) + { + return uidna_IDNToASCII (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to ASCII"); + } + + /*********************************************************************** + + Convenience function that implements the IDNToUnicode + operation as defined in the IDNA RFC. + + This operation is done on complete domain names, e.g: + "www.example.com". + + Note: IDNA RFC specifies that a conformant application + should divide a domain name into separate labels, decide + whether to apply allowUnassigned and useSTD3ASCIIRules + on each, and then convert. This function does not offer + that level of granularity. The options once set will apply + to all labels in the domain name + + ***********************************************************************/ + + void IdnToUnicode (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout Error e) + { + return uidna_IDNToUnicode (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to Unicode"); + } + + /*********************************************************************** + + Compare two IDN strings for equivalence. + + This function splits the domain names into labels and + compares them. According to IDN RFC, whenever two labels + are compared, they are considered equal if and only if + their ASCII forms (obtained by applying toASCII) match + using an case-insensitive ASCII comparison. Two domain + names are considered a match if and only if all labels + match regardless of whether label separators match + + ***********************************************************************/ + + int compare (UString other, Options o = Options.Strict) + { + Error e; + int i = uidna_compare (text.get.ptr, text.len, other.get.ptr, other.len, o, e); + testError (e, "failed to compare IDN strings"); + return i; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + uint function (wchar*, uint, wchar*, uint, uint, void*, inout Error) uidna_toASCII; + uint function (wchar*, uint, wchar*, uint, uint, void*, inout Error) uidna_toUnicode; + uint function (wchar*, uint, wchar*, uint, uint, void*, inout Error) uidna_IDNToASCII; + uint function (wchar*, uint, wchar*, uint, uint, void*, inout Error) uidna_IDNToUnicode; + int function (wchar*, uint, wchar*, uint, uint, inout Error) uidna_compare; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uidna_toASCII, "uidna_toASCII"}, + {cast(void**) &uidna_toUnicode, "uidna_toUnicode"}, + {cast(void**) &uidna_IDNToASCII, "uidna_IDNToASCII"}, + {cast(void**) &uidna_IDNToUnicode, "uidna_IDNToUnicode"}, + {cast(void**) &uidna_compare, "uidna_compare"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UEnumeration.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UEnumeration.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,264 @@ +/******************************************************************************* + + @file UEnumeration.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UEnumeration; + +private import dwtx.dwthelper.mangoicu.ICU; + +/******************************************************************************* + + UEnumeration is returned by a number of ICU classes, for providing + access to such things as ULocale lists and so on, + +*******************************************************************************/ + +class UEnumeration : ICU +{ + package Handle handle; + + /*********************************************************************** + + ***********************************************************************/ + + this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Disposes of the storage used by a UEnumeration object + + ***********************************************************************/ + + ~this () + { + uenum_close (handle); + } + + /*********************************************************************** + + Returns the next element in the iterator's list. + + If there are no more elements, returns NULL. If the + iterator is out-of-sync with its service, status is + set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. + If the native service string is a UChar* string, it + is converted to char* with the invariant converter. + The result is terminated by (char)0. If the conversion + fails (because a character cannot be converted) then + status is set to U_INVARIANT_CONVERSION_ERROR and the + return value is undefined (but non-NULL). + + ***********************************************************************/ + + uint count () + { + Error e; + + uint x = uenum_count (handle, e); + testError (e, "enumeration out of sync"); + return x; + } + + /*********************************************************************** + + Resets the iterator to the current list of service IDs. + + This re-establishes sync with the service and rewinds + the iterator to start at the first element + + ***********************************************************************/ + + void reset () + { + ICU.Error e; + + uenum_reset (handle, e); + testError (e, "failed to reset enumeration"); + } + + /*********************************************************************** + + Returns the next element in the iterator's list. + + If there are no more elements, returns NULL. If the + iterator is out-of-sync with its service, status is + set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. + If the native service string is a char* string, it is + converted to UChar* with the invariant converter. + + ***********************************************************************/ + + bool next (out char[] dst) + { + ICU.Error e; + uint len; + + char* p = uenum_next (handle, &len, e); + testError (e, "failed to traverse enumeration"); + if (p) + return dst = p[0..len], true; + return false; + } + + /*********************************************************************** + + Returns the next element in the iterator's list. + + If there are no more elements, returns NULL. If the + iterator is out-of-sync with its service, status is + set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. + If the native service string is a char* string, it is + converted to UChar* with the invariant converter. + + ***********************************************************************/ + + bool next (inout wchar[] dst) + { + ICU.Error e; + uint len; + + wchar* p = uenum_unext (handle, &len, e); + testError (e, "failed to traverse enumeration"); + if (p) + return dst = p[0..len], true; + return false; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + void function (Handle) uenum_close; + uint function (Handle, inout Error) uenum_count; + void function (Handle, inout Error) uenum_reset; + char* function (Handle, uint*, inout Error) uenum_next; + wchar* function (Handle, uint*, inout Error) uenum_unext; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uenum_close, "uenum_close"}, + {cast(void**) &uenum_count, "uenum_count"}, + {cast(void**) &uenum_reset, "uenum_reset"}, + {cast(void**) &uenum_next, "uenum_next"}, + {cast(void**) &uenum_unext, "uenum_unext"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/ULocale.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/ULocale.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,229 @@ +/******************************************************************************* + + @file ULocale.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.ULocale; + +private import dwtx.dwthelper.mangoicu.ICU; + +/******************************************************************************* + + Note that this is a struct rather than a class. This is so + that one can easily construct these on the stack, plus the + 'convenience' instances can be created statically. + +*******************************************************************************/ + +struct ULocale +{ + public char[] name; + + /*********************************************************************** + + ***********************************************************************/ + + public static ULocale Root = {""}; + public static ULocale Default = {null}; + public static ULocale English = {"en"}; + public static ULocale Chinese = {"zh"}; + public static ULocale French = {"fr"}; + public static ULocale German = {"de"}; + public static ULocale Italian = {"it"}; + public static ULocale Japanese = {"ja"}; + public static ULocale Korean = {"ko"}; + public static ULocale SimplifiedChinese = {"zh_CN"}; + public static ULocale TraditionalChinese = {"zh_TW"}; + public static ULocale Canada = {"en_CA"}; + public static ULocale CanadaFrench = {"fr_CA"}; + public static ULocale China = {"zh_CN"}; + public static ULocale PRC = {"zh_CN"}; + public static ULocale France = {"fr_FR"}; + public static ULocale Germany = {"de_DE"}; + public static ULocale Italy = {"it_IT"}; + public static ULocale Japan = {"jp_JP"}; + public static ULocale Korea = {"ko_KR"}; + public static ULocale Taiwan = {"zh_TW"}; + public static ULocale UK = {"en_GB"}; + public static ULocale US = {"en_US"}; + + /*********************************************************************** + + ***********************************************************************/ + + public enum Type + { + Actual = 0, + Valid = 1, + Requested = 2, + } + + /*********************************************************************** + + ***********************************************************************/ + + public const uint LanguageCapacity = 12; + public const uint CountryCapacity = 4; + public const uint FullNameCapacity = 56; + public const uint ScriptCapacity = 6; + public const uint KeywordsCapacity = 50; + public const uint KeywordAndValuesCapacity = 100; + public const char KeywordItemSeparator = ':'; + public const char KeywordSeparator = '@'; + public const char KeywordAssign = '='; + + + /*********************************************************************** + + ***********************************************************************/ + + static void getDefault (inout ULocale locale) + { + locale.name = ICU.toArray (uloc_getDefault()); + if (! locale.name) + ICU.exception ("failed to get default locale"); + } + + /*********************************************************************** + + ***********************************************************************/ + + static void setDefault (inout ULocale locale) + { + ICU.Error e; + + uloc_setDefault (ICU.toString(locale.name), e); + + if (ICU.isError (e)) + ICU.exception ("invalid locale '"~locale.name~"'"); + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + char* function () uloc_getDefault; + void function (char*, inout ICU.Error) uloc_setDefault; + } + + /********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uloc_getDefault, "uloc_getDefault"}, + {cast(void**) &uloc_setDefault, "uloc_setDefault"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (ICU.icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UMessageFormat.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UMessageFormat.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,395 @@ +/******************************************************************************* + + @file UMessageFormat.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UMessageFormat; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString; + +public import dwtx.dwthelper.mangoicu.ULocale; + +/******************************************************************************* + + Provides means to produce concatenated messages in language-neutral + way. Use this for all concatenations that show up to end users. Takes + a set of objects, formats them, then inserts the formatted strings into + the pattern at the appropriate places. + + See + this page for full details. + +*******************************************************************************/ + +class UMessageFormat : ICU +{ + private Handle handle; + + /*********************************************************************** + + Open a message formatter with given wchar[] and for the + given locale. + + ***********************************************************************/ + + this (wchar[] pattern, inout ULocale locale = ULocale.Default) + { + Error e; + + handle = umsg_open (pattern.ptr, pattern.length, toString(locale.name), null, e); + testError (e, "failed to open message formatter"); + } + + /*********************************************************************** + + Open a message formatter with given pattern and for the + given locale. + + ***********************************************************************/ + + this (UText pattern, inout ULocale locale = ULocale.Default) + { + this (pattern.get, locale); + } + + /*********************************************************************** + + Release message formatter + + ***********************************************************************/ + + ~this () + { + umsg_close (handle); + } + + /*********************************************************************** + + This locale is used for fetching default number or date + format information + + ***********************************************************************/ + + UMessageFormat setLocale (inout ULocale locale) + { + umsg_setLocale (handle, toString(locale.name)); + return this; + } + + /*********************************************************************** + + This locale is used for fetching default number or date + format information + + ***********************************************************************/ + + UMessageFormat getLocale (inout ULocale locale) + { + locale.name = toArray (umsg_getLocale (handle)); + return this; + } + + /*********************************************************************** + + Sets the pattern + + ***********************************************************************/ + + UMessageFormat setPattern (UText pattern) + { + Error e; + + umsg_applyPattern (handle, pattern.get.ptr, pattern.len, null, e); + testError (e, "failed to set formatter pattern"); + return this; + } + + /*********************************************************************** + + Gets the pattern + + ***********************************************************************/ + + UMessageFormat getPattern (UString s) + { + uint fmt (wchar* dst, uint length, inout Error e) + { + return umsg_toPattern (handle, dst, length, e); + } + + s.format (&fmt, "failed to get formatter pattern"); + return this; + } + + /*********************************************************************** + + This function may perform re-ordering of the arguments + depending on the locale. For all numeric arguments, double + is assumed unless the type is explicitly integer. All choice + format arguments must be of type double. + + ***********************************************************************/ + + UMessageFormat format (UString s, Args* list) + { + uint fmt (wchar* dst, uint length, inout Error e) + { + return umsg_vformat (handle, dst, length, list.args.ptr, e); + } + + s.format (&fmt, "failed to format pattern"); + return this; + } + + + /*********************************************************************** + + A typesafe list of arguments for the UMessageFormat.format() + method. This should be used in the following manner: + + @code + wchar[] format = "{0} {1, number, currency} {2, number, integer}"; + UMessageFormat msg = new UMessageFormat (format); + + msg.Args args; + msg.format (output, args.add("abc").add(152.0).add(456)); + @endcode + + Note that the argument order must follow that of the format + string, although the format string may dictate the ultimate + position of each argument. + + See http://oss.software.ibm.com/icu/apiref/umsg_8h.html for + details on the format string. + + @todo this will likely fail on certain CPU architectures. + + ***********************************************************************/ + + struct Args + { + private uint[32] args; + private uint index; + + /*************************************************************** + + ***************************************************************/ + + invariant + { + assert (index < args.length); + } + + /*************************************************************** + + ***************************************************************/ + + Args* reset () + { + index = 0; + return this; + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (UText x) + { + args[index] = cast(uint) cast(wchar*) x.get(); + ++index; + return this; + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (wchar[] x) + { + args[index] = cast(uint) cast(wchar*) x; + ++index; + return this; + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (int x) + { + args[index] = x; + ++index; + return this; + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (double x) + { + *(cast(double*) &args[index]) = x; + index += 2; + return this; + } + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, char*, void*, inout Error) umsg_open; + void function (Handle) umsg_close; + void function (Handle, char*) umsg_setLocale; + char* function (Handle) umsg_getLocale; + uint function (Handle, wchar*, uint, inout Error) umsg_toPattern; + void function (Handle, wchar*, uint, void*, inout Error) umsg_applyPattern; + uint function (Handle, wchar*, uint, void*, inout Error) umsg_vformat; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &umsg_open, "umsg_open"}, + {cast(void**) &umsg_close, "umsg_close"}, + {cast(void**) &umsg_setLocale, "umsg_setLocale"}, + {cast(void**) &umsg_getLocale, "umsg_getLocale"}, + {cast(void**) &umsg_toPattern, "umsg_toPattern"}, + {cast(void**) &umsg_applyPattern, "umsg_applyPattern"}, + {cast(void**) &umsg_vformat, "umsg_vformat"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + + /*********************************************************************** + + ***********************************************************************/ + + static void test() + { + UString output = new UString(100); + wchar[] format = "{0} {1, number, currency} {2, number, integer}"; + + UMessageFormat msg = new UMessageFormat (format); + + msg.Args args; + msg.format (output, args.add("abc").add(152.0).add(456)); + } +} + + + diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UNormalize.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UNormalize.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,391 @@ +/******************************************************************************* + + @file UNormalize.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UNormalize; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString, + dwtx.dwthelper.mangoicu.ULocale; + +/******************************************************************************* + + transforms Unicode text into an equivalent composed or + decomposed form, allowing for easier sorting and searching + of text. UNormalize supports the standard normalization forms + described in http://www.unicode.org/unicode/reports/tr15/ + + Characters with accents or other adornments can be encoded + in several different ways in Unicode. For example, take the + character A-acute. In Unicode, this can be encoded as a single + character (the "composed" form): + + 00C1 LATIN CAPITAL LETTER A WITH ACUTE + + or as two separate characters (the "decomposed" form): + + 0041 LATIN CAPITAL LETTER A 0301 COMBINING ACUTE ACCENT + + To a user of your program, however, both of these sequences + should be treated as the same "user-level" character "A with + acute accent". When you are searching or comparing text, you + must ensure that these two sequences are treated equivalently. + In addition, you must handle characters with more than one + accent. Sometimes the order of a character's combining accents + is significant, while in other cases accent sequences in different + orders are really equivalent. + + Similarly, the string "ffi" can be encoded as three separate + letters: + + 0066 LATIN SMALL LETTER F 0066 LATIN SMALL LETTER F + 0069 LATIN SMALL LETTER I + + or as the single character + + FB03 LATIN SMALL LIGATURE FFI + + The ffi ligature is not a distinct semantic character, and strictly + speaking it shouldn't be in Unicode at all, but it was included for + compatibility with existing character sets that already provided it. + The Unicode standard identifies such characters by giving them + "compatibility" decompositions into the corresponding semantic + characters. When sorting and searching, you will often want to use + these mappings. + + unorm_normalize helps solve these problems by transforming text into + the canonical composed and decomposed forms as shown in the first + example above. In addition, you can have it perform compatibility + decompositions so that you can treat compatibility characters the + same as their equivalents. Finally, UNormalize rearranges + accents into the proper canonical order, so that you do not have + to worry about accent rearrangement on your own. + + Form FCD, "Fast C or D", is also designed for collation. It allows + to work on strings that are not necessarily normalized with an + algorithm (like in collation) that works under "canonical closure", + i.e., it treats precomposed characters and their decomposed + equivalents the same. + + It is not a normalization form because it does not provide for + uniqueness of representation. Multiple strings may be canonically + equivalent (their NFDs are identical) and may all conform to FCD + without being identical themselves. + + The form is defined such that the "raw decomposition", the + recursive canonical decomposition of each character, results + in a string that is canonically ordered. This means that + precomposed characters are allowed for as long as their + decompositions do not need canonical reordering. + + Its advantage for a process like collation is that all NFD + and most NFC texts - and many unnormalized texts - already + conform to FCD and do not need to be normalized (NFD) for + such a process. The FCD quick check will return UNORM_YES + for most strings in practice. + + For more details on FCD see the collation design document: + http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm + + ICU collation performs either NFD or FCD normalization + automatically if normalization is turned on for the collator + object. Beyond collation and string search, normalized strings + may be useful for string equivalence comparisons, transliteration/ + transcription, unique representations, etc. + + The W3C generally recommends to exchange texts in NFC. Note also + that most legacy character encodings use only precomposed forms + and often do not encode any combining marks by themselves. For + conversion to such character encodings the Unicode text needs to + be normalized to NFC. For more usage examples, see the Unicode + Standard Annex. + + See + this page for full details. + + +*******************************************************************************/ + +class UNormalize : ICU +{ + enum Mode + { + None = 1, + NFD = 2, + NFKD = 3, + NFC = 4, + Default = NFC, + NFKC = 5, + FCD = 6, + Count + } + + enum Check + { + No, + Yes, + Maybe + } + + enum Options + { + None = 0x00, + Unicode32 = 0x20 + } + + /*********************************************************************** + + Normalize a string. The string will be normalized according + the specified normalization mode and options + + ***********************************************************************/ + + static void normalize (UText src, UString dst, Mode mode, Options o = Options.None) + { + uint fmt (wchar* dst, uint len, inout Error e) + { + return unorm_normalize (src.get.ptr, src.len, mode, o, dst, len, e); + } + + dst.format (&fmt, "failed to normalize"); + } + + /*********************************************************************** + + Performing quick check on a string, to quickly determine + if the string is in a particular normalization format. + + Three types of result can be returned: Yes, No or Maybe. + Result Yes indicates that the argument string is in the + desired normalized format, No determines that argument + string is not in the desired normalized format. A Maybe + result indicates that a more thorough check is required, + the user may have to put the string in its normalized + form and compare the results. + + ***********************************************************************/ + + static Check check (UText t, Mode mode, Options o = Options.None) + { + Error e; + + Check c = cast(Check) unorm_quickCheckWithOptions (t.get.ptr, t.len, mode, o, e); + testError (e, "failed to perform normalization check"); + return c; + } + + /*********************************************************************** + + Test if a string is in a given normalization form. + + Unlike check(), this function returns a definitive result, + never a "maybe". For NFD, NFKD, and FCD, both functions + work exactly the same. For NFC and NFKC where quickCheck + may return "maybe", this function will perform further + tests to arrive at a TRUE/FALSE result. + + ***********************************************************************/ + + static bool isNormalized (UText t, Mode mode, Options o = Options.None) + { + Error e; + + byte b = unorm_isNormalizedWithOptions (t.get.ptr, t.len, mode, o, e); + testError (e, "failed to perform normalization test"); + return b != 0; + } + + /*********************************************************************** + + Concatenate normalized strings, making sure that the result + is normalized as well. If both the left and the right strings + are in the normalization form according to "mode/options", + then the result will be + + dest=normalize(left+right, mode, options) + + With the input strings already being normalized, this function + will use unorm_next() and unorm_previous() to find the adjacent + end pieces of the input strings. Only the concatenation of these + end pieces will be normalized and then concatenated with the + remaining parts of the input strings. + + It is allowed to have dst==left to avoid copying the entire + left string. + + ***********************************************************************/ + + static void concatenate (UText left, UText right, UString dst, Mode mode, Options o = Options.None) + { + uint fmt (wchar* p, uint len, inout Error e) + { + return unorm_concatenate (left.get.ptr, left.len, right.get.ptr, right.len, p, len, mode, o, e); + } + + dst.format (&fmt, "failed to concatenate"); + } + + /*********************************************************************** + + Compare two strings for canonical equivalence. Further + options include case-insensitive comparison and code + point order (as opposed to code unit order). + + Canonical equivalence between two strings is defined as + their normalized forms (NFD or NFC) being identical. + This function compares strings incrementally instead of + normalizing (and optionally case-folding) both strings + entirely, improving performance significantly. + + Bulk normalization is only necessary if the strings do + not fulfill the FCD conditions. Only in this case, and + only if the strings are relatively long, is memory + allocated temporarily. For FCD strings and short non-FCD + strings there is no memory allocation. + + ***********************************************************************/ + + static int compare (UText left, UText right, Options o = Options.None) + { + Error e; + + int i = unorm_compare (left.get.ptr, left.len, right.get.ptr, right.len, o, e); + testError (e, "failed to compare"); + return i; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + uint function (wchar*, uint, uint, uint, wchar*, uint, inout Error) unorm_normalize; + uint function (wchar*, uint, uint, uint, inout Error) unorm_quickCheckWithOptions; + byte function (wchar*, uint, uint, uint, inout Error) unorm_isNormalizedWithOptions; + uint function (wchar*, uint, wchar*, uint, wchar*, uint, uint, uint, inout Error) unorm_concatenate; + uint function (wchar*, uint, wchar*, uint, uint, inout Error) unorm_compare; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &unorm_normalize, "unorm_normalize"}, + {cast(void**) &unorm_quickCheckWithOptions, "unorm_quickCheckWithOptions"}, + {cast(void**) &unorm_isNormalizedWithOptions, "unorm_isNormalizedWithOptions"}, + {cast(void**) &unorm_concatenate, "unorm_concatenate"}, + {cast(void**) &unorm_compare, "unorm_compare"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UNumberFormat.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UNumberFormat.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,934 @@ +/******************************************************************************* + + @file UNumberFormat.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UNumberFormat; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString; + +public import dwtx.dwthelper.mangoicu.ULocale; + +/******************************************************************************* + +*******************************************************************************/ + +class UDecimalFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Decimal, null, locale); + } + + /*********************************************************************** + + Set the pattern for a UDecimalFormat + + ***********************************************************************/ + + void setPattern (UText pattern, bool localized) + { + Error e; + + unum_applyPattern (handle, localized, pattern.get.ptr, pattern.length, null, e); + testError (e, "failed to set numeric pattern"); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UCurrencyFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Currency, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UPercentFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Percent, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UScientificFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Scientific, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class USpelloutFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Spellout, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class UDurationFormat : UCommonFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.Duration, null, locale); + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +class URuleBasedFormat : UNumberFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (inout ULocale locale) + { + super (Style.RuleBased, null, locale); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setLenientParse (bool yes) + { + unum_setAttribute (handle, Attribute.LenientParse, yes); + } + + + /*********************************************************************** + + ***********************************************************************/ + + bool isLenientParse () + { + return unum_getAttribute (handle, Attribute.LenientParse) != 0; + } +} + + +/******************************************************************************* + +*******************************************************************************/ + +private class UCommonFormat : UNumberFormat +{ + /*********************************************************************** + + ***********************************************************************/ + + this (Style style, char[] pattern, inout ULocale locale) + { + super (style, pattern, locale); + } + + /*********************************************************************** + + Return true if this format will parse numbers as integers + only + + ***********************************************************************/ + + bool isParseIntegerOnly () + { + return unum_getAttribute (handle, Attribute.ParseIntOnly) != 0; + } + + /*********************************************************************** + + Returns true if grouping is used in this format. + + ***********************************************************************/ + + bool isGroupingUsed () + { + return unum_getAttribute (handle, Attribute.GroupingUsed) != 0; + } + + /*********************************************************************** + + Always show decimal point? + + ***********************************************************************/ + + bool isDecimalSeparatorAlwaysShown () + { + return unum_getAttribute (handle, Attribute.DecimalAlwaysShown) != 0; + } + + /*********************************************************************** + + Sets whether or not numbers should be parsed as integers + only + + ***********************************************************************/ + + void setParseIntegerOnly (bool yes) + { + unum_setAttribute (handle, Attribute.ParseIntOnly, yes); + } + + /*********************************************************************** + + Set whether or not grouping will be used in this format. + + ***********************************************************************/ + + void setGroupingUsed (bool yes) + { + unum_setAttribute (handle, Attribute.GroupingUsed, yes); + } + + /*********************************************************************** + + Always show decimal point. + + ***********************************************************************/ + + void setDecimalSeparatorAlwaysShown (bool yes) + { + unum_setAttribute (handle, Attribute.DecimalAlwaysShown, yes); + } + + /*********************************************************************** + + Sets the maximum number of digits allowed in the integer + portion of a number. + + ***********************************************************************/ + + void setMaxIntegerDigits (uint x) + { + unum_setAttribute (handle, Attribute.MaxIntegerDigits, x); + } + + /*********************************************************************** + + Sets the minimum number of digits allowed in the integer + portion of a number. + + ***********************************************************************/ + + void setMinIntegerDigits (uint x) + { + unum_setAttribute (handle, Attribute.MinIntegerDigits, x); + } + + /*********************************************************************** + + Integer digits displayed + + ***********************************************************************/ + + void setIntegerDigits (uint x) + { + unum_setAttribute (handle, Attribute.IntegerDigits, x); + } + + /*********************************************************************** + + Sets the maximum number of digits allowed in the fraction + portion of a number. + + ***********************************************************************/ + + void setMaxFractionDigits (uint x) + { + unum_setAttribute (handle, Attribute.MaxFractionDigits, x); + } + + /*********************************************************************** + + Sets the minimum number of digits allowed in the fraction + portion of a number. + + ***********************************************************************/ + + void setMinFractionDigits (uint x) + { + unum_setAttribute (handle, Attribute.MinFractionDigits, x); + } + + /*********************************************************************** + + Fraction digits. + + ***********************************************************************/ + + void setFractionDigits (uint x) + { + unum_setAttribute (handle, Attribute.FractionDigits, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setMultiplier (uint x) + { + unum_setAttribute (handle, Attribute.Multiplier, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setGroupingSize (uint x) + { + unum_setAttribute (handle, Attribute.GroupingSize, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setRoundingMode (Rounding x) + { + unum_setAttribute (handle, Attribute.RoundingMode, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setRoundingIncrement (uint x) + { + unum_setAttribute (handle, Attribute.RoundingIncrement, x); + } + + /*********************************************************************** + + The width to which the output of format() is padded + + ***********************************************************************/ + + void setFormatWidth (uint x) + { + unum_setAttribute (handle, Attribute.FormatWidth, x); + } + + /*********************************************************************** + + The position at which padding will take place. + + ***********************************************************************/ + + void setPaddingPosition (Pad x) + { + unum_setAttribute (handle, Attribute.PaddingPosition, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setSecondaryGroupingSize (uint x) + { + unum_setAttribute (handle, Attribute.SecondaryGroupingSize, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setSignificantDigitsUsed (uint x) + { + unum_setAttribute (handle, Attribute.SignificantDigitsUsed, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setMinSignificantDigits (uint x) + { + unum_setAttribute (handle, Attribute.MinSignificantDigits, x); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setMaxSignificantDigits (uint x) + { + unum_setAttribute (handle, Attribute.MaxSignificantDigits, x); + } + + + /*********************************************************************** + + Returns the maximum number of digits allowed in the integer + portion of a number. + + ***********************************************************************/ + + uint getMaxIntegerDigits () + { + return unum_getAttribute (handle, Attribute.MaxIntegerDigits); + } + + /*********************************************************************** + + Returns the minimum number of digits allowed in the integer + portion of a number. + + ***********************************************************************/ + + uint getMinIntegerDigits () + { + return unum_getAttribute (handle, Attribute.MinIntegerDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getIntegerDigits () + { + return unum_getAttribute (handle, Attribute.IntegerDigits); + } + + /*********************************************************************** + + Returns the maximum number of digits allowed in the fraction + portion of a number. + + ***********************************************************************/ + + uint getMaxFractionDigits () + { + return unum_getAttribute (handle, Attribute.MaxFractionDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getMinFractionDigits () + { + return unum_getAttribute (handle, Attribute.MinFractionDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getFractionDigits () + { + return unum_getAttribute (handle, Attribute.FractionDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getMultiplier () + { + return unum_getAttribute (handle, Attribute.Multiplier); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getGroupingSize () + { + return unum_getAttribute (handle, Attribute.GroupingSize); + } + + /*********************************************************************** + + ***********************************************************************/ + + Rounding getRoundingMode () + { + return cast(Rounding) unum_getAttribute (handle, Attribute.RoundingMode); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getRoundingIncrement () + { + return unum_getAttribute (handle, Attribute.RoundingIncrement); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getFormatWidth () + { + return unum_getAttribute (handle, Attribute.FormatWidth); + } + + /*********************************************************************** + + ***********************************************************************/ + + Pad getPaddingPosition () + { + return cast(Pad) unum_getAttribute (handle, Attribute.PaddingPosition); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getSecondaryGroupingSize () + { + return unum_getAttribute (handle, Attribute.SecondaryGroupingSize); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getSignificantDigitsUsed () + { + return unum_getAttribute (handle, Attribute.SignificantDigitsUsed); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getMinSignificantDigits () + { + return unum_getAttribute (handle, Attribute.MinSignificantDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getMaxSignificantDigits () + { + return unum_getAttribute (handle, Attribute.MaxSignificantDigits); + } + + /*********************************************************************** + + ***********************************************************************/ + + void getPattern (UString dst, bool localize) + { + uint fmat (wchar* result, uint len, inout Error e) + { + return unum_toPattern (handle, localize, result, len, e); + } + + dst.format (&fmat, "failed to retrieve numeric format pattern"); + } +} + + +/******************************************************************************* + + UNumberFormat provides functions for formatting and parsing + a number. Also provides methods for determining which locales have + number formats, and what their names are. + + UNumberFormat helps you to format and parse numbers for any locale. + Your code can be completely independent of the locale conventions + for decimal points, thousands-separators, or even the particular + decimal digits used, or whether the number format is even decimal. + There are different number format styles like decimal, currency, + percent and spellout + + See + this page for full details. + +*******************************************************************************/ + +class UNumberFormat : ICU +{ + package Handle handle; + + typedef void* UFieldPos; + typedef void* ParseError; + + + public enum Rounding + { + Ceiling, + Floor, + Down, + Up, + HalfEven, + HalfDown, + HalfUp + }; + + public enum Pad + { + BeforePrefix, + AfterPrefix, + BeforeSuffix, + AfterSuffix + }; + + public enum Style + { + PatternDecimal, + Decimal, + Currency, + Percent, + Scientific, + Spellout, + Ordinal, + Duration, + RuleBased, + Default = Decimal, + Ignore = PatternDecimal + }; + + private enum Attribute + { + ParseIntOnly, + GroupingUsed, + DecimalAlwaysShown, + MaxIntegerDigits, + MinIntegerDigits, + IntegerDigits, + MaxFractionDigits, + MinFractionDigits, + FractionDigits, + Multiplier, + GroupingSize, + RoundingMode, + RoundingIncrement, + FormatWidth, + PaddingPosition, + SecondaryGroupingSize, + SignificantDigitsUsed, + MinSignificantDigits, + MaxSignificantDigits, + LenientParse + }; + + private enum Symbol + { + DecimalSeparator, + GroupingSeparator, + PatternSeparator, + Percent, + ZeroDigit, + Digit, + MinusSign, + PlusSign, + Currency, + IntlCurrency, + MonetarySeparator, + Exponential, + Permill, + PadEscape, + Infinity, + Nan, + SignificantDigit, + FormatSymbolCount + }; + + /*********************************************************************** + + ***********************************************************************/ + + this (Style style, char[] pattern, inout ULocale locale) + { + Error e; + + handle = unum_open (style, pattern.ptr, pattern.length, toString(locale.name), null, e); + testError (e, "failed to create NumberFormat"); + } + + /*********************************************************************** + + ***********************************************************************/ + + ~this () + { + unum_close (handle); + } + + /*********************************************************************** + + ***********************************************************************/ + + void format (UString dst, int number, UFieldPos p = null) + { + uint fmat (wchar* result, uint len, inout Error e) + { + return unum_format (handle, number, result, len, p, e); + } + + dst.format (&fmat, "int format failed"); + } + + /*********************************************************************** + + ***********************************************************************/ + + void format (UString dst, long number, UFieldPos p = null) + { + uint fmat (wchar* result, uint len, inout Error e) + { + return unum_formatInt64 (handle, number, result, len, p, e); + } + + dst.format (&fmat, "int64 format failed"); + } + + /*********************************************************************** + + ***********************************************************************/ + + void format (UString dst, double number, UFieldPos p = null) + { + uint fmat (wchar* result, uint len, inout Error e) + { + return unum_formatDouble (handle, number, result, len, p, e); + } + + dst.format (&fmat, "double format failed"); + } + + /*********************************************************************** + + ***********************************************************************/ + + int parseInteger (UText src, uint* index=null) + { + Error e; + + return unum_parse (handle, src.content.ptr, src.len, index, e); + } + + /*********************************************************************** + + ***********************************************************************/ + + long parseLong (UText src, uint* index=null) + { + Error e; + + return unum_parseInt64 (handle, src.content.ptr, src.len, index, e); + } + + /*********************************************************************** + + ***********************************************************************/ + + double parseDouble (UText src, uint* index=null) + { + Error e; + + return unum_parseDouble (handle, src.content.ptr, src.len, index, e); + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (uint, char*, uint, char*, ParseError, inout Error) unum_open; + void function (Handle) unum_close; + int function (Handle, int, wchar*, uint, UFieldPos, inout Error) unum_format; + int function (Handle, long, wchar*, uint, UFieldPos, inout Error) unum_formatInt64; + int function (Handle, double, wchar*, uint, UFieldPos, inout Error) unum_formatDouble; + int function (Handle, wchar*, uint, uint*, inout Error) unum_parse; + long function (Handle, wchar*, uint, uint*, inout Error) unum_parseInt64; + double function (Handle, wchar*, uint, uint*, inout Error) unum_parseDouble; + int function (Handle, uint) unum_getAttribute; + void function (Handle, uint, uint) unum_setAttribute; + uint function (Handle, byte, wchar*, uint, inout Error) unum_toPattern; + void function (Handle, byte, wchar*, uint, ParseError, inout Error) unum_applyPattern; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &unum_open, "unum_open"}, + {cast(void**) &unum_close, "unum_close"}, + {cast(void**) &unum_format, "unum_format"}, + {cast(void**) &unum_formatInt64 "unum_formatInt64"}, + {cast(void**) &unum_formatDouble "unum_formatDouble"}, + {cast(void**) &unum_parse, "unum_parse"}, + {cast(void**) &unum_parseInt64 "unum_parseInt64"}, + {cast(void**) &unum_parseDouble "unum_parseDouble"}, + {cast(void**) &unum_getAttribute "unum_getAttribute"}, + {cast(void**) &unum_setAttribute "unum_setAttribute"}, + {cast(void**) &unum_toPattern "unum_toPattern"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + + + diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/URegex.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/URegex.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,700 @@ +/******************************************************************************* + + @file URegex.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.URegex; + +private import dwtx.dwthelper.mangoicu.ICU; + +public import dwtx.dwthelper.mangoicu.ULocale, + dwtx.dwthelper.mangoicu.UString, + dwtx.dwthelper.mangoicu.UCollator, + dwtx.dwthelper.mangoicu.UBreakIterator; + + +/******************************************************************************* + + Set of slices to return for group matching. See URegex.groups() + +*******************************************************************************/ + +class Groups : ICU +{ + public wchar[] g0, + g1, + g2, + g3, + g4, + g5, + g6, + g7, + g8, + g9; +} + +/******************************************************************************* + + Apis for an engine that provides regular-expression searching of + UTF16 strings. + + See http://icu.sourceforge.net/apiref/icu4c/uregex_8h.html for full + details. + +*******************************************************************************/ + +class URegex : Groups +{ + private Handle handle; + private UText theText; + + // Regex modes + public enum Flag + { + None = 0, + + // Enable case insensitive matching + CaseInsensitive = 2, + + // Allow white space and comments within patterns + Comments = 4, + + // Control behavior of "$" and "^" If set, recognize + // line terminators within string, otherwise, match + // only at start and end of input string. + MultiLine = 8, + + // If set, '.' matches line terminators, otherwise '.' + // matching stops at line end + DotAll = 32, + + // Forces normalization of pattern and strings + CanonEq = 128, + + // If set, uses the Unicode TR 29 definition of word + // boundaries. Warning: Unicode word boundaries are + // quite different from traditional regular expression + // word boundaries. See http://unicode.org/reports/tr29/#Word_Boundaries + UWord = 256, + } + + /*********************************************************************** + + Compiles the regular expression in string form into an + internal representation using the specified match mode + flags. The resulting regular expression handle can then + be used to perform various matching operations. + + ***********************************************************************/ + + this (wchar[] pattern, Flag flags=Flag.None, ParseError* pe=null) + { + Error e; + + handle = uregex_open (pattern.ptr, pattern.length, flags, pe, e); + testError (e, "failed to open regex"); + uregex_setText (handle, "", 0, e); + } + + /*********************************************************************** + + Compiles the regular expression in string form into an + internal representation using the specified match mode + flags. The resulting regular expression handle can then + be used to perform various matching operations. + + ***********************************************************************/ + + this (UText pattern, Flag flags=Flag.None, ParseError* pe=null) + { + this (pattern.get, flags, pe); + } + + /*********************************************************************** + + Internal constructor; used for cloning + + ***********************************************************************/ + + private this (Handle handle) + { + Error e; + + this.handle = handle; + uregex_setText (handle, "", 0, e); + } + + /*********************************************************************** + + Close the regular expression, recovering all resources (memory) + it was holding + + ***********************************************************************/ + + ~this () + { + uregex_close (handle); + } + + /*********************************************************************** + + Cloning a regular expression is faster than opening a second + instance from the source form of the expression, and requires + less memory. + + Note that the current input string and the position of any + matched text within it are not cloned; only the pattern itself + and and the match mode flags are copied. + + Cloning can be particularly useful to threaded applications + that perform multiple match operations in parallel. Each + concurrent RE operation requires its own instance of a + URegularExpression. + + ***********************************************************************/ + + URegex clone () + { + Error e; + + Handle h = uregex_clone (handle, e); + testError (e, "failed to clone regex"); + return new URegex (h); + } + + /*********************************************************************** + + Return a copy of the source form of the pattern for this + regular expression + + ***********************************************************************/ + + UString getPattern () + { + Error e; + uint len; + + wchar* x = uregex_pattern (handle, len, e); + testError (e, "failed to extract regex pattern"); + return new UString (x[0..len]); + } + + /*********************************************************************** + + Get the match mode flags that were specified when compiling + this regular expression + + ***********************************************************************/ + + Flag getFlags () + { + Error e; + + Flag f = cast(Flag) uregex_flags (handle, e); + testError (e, "failed to get regex flags"); + return f; + } + + /*********************************************************************** + + Set the subject text string upon which the regular expression + will look for matches. + + This function may be called any number of times, allowing the + regular expression pattern to be applied to different strings. + + Regular expression matching operations work directly on the + application's string data. No copy is made. The subject string + data must not be altered after calling this function until after + all regular expression operations involving this string data are + completed. + + Zero length strings are permitted. In this case, no subsequent + match operation will dereference the text string pointer. + + ***********************************************************************/ + + void setText (UText t) + { + Error e; + + theText = t; + uregex_setText (handle, t.get.ptr, t.length, e); + testError (e, "failed to set regex text"); + } + + /*********************************************************************** + + Get the subject text that is currently associated with this + regular expression object. This simply returns whatever was + previously supplied via setText(). + + Note that this returns a read-only reference to the text. + + ***********************************************************************/ + + UText getText () + { + return theText; + } + + /*********************************************************************** + + Return a set of slices representing the parenthesised groups. + This can be used in the following manner: + + @code + wchar msg; + + if (regex.next()) + with (regex.groups()) + msg ~= g1 ~ ":" ~ g2 + @endcode + + Note that g0 represents the entire match, whereas g1 through + g9 represent the parenthesised expressions. + + ***********************************************************************/ + + Groups groups () + { + wchar[]* p = &g0; + uint count = groupCount(); + wchar[] content = theText.get(); + + if (count > 9) + count = 9; + for (uint i=0; i <= count; ++p, ++i) + *p = content [start(i)..end(i)]; + return this; + } + + /*********************************************************************** + + Extract the string for the specified matching expression or + subexpression. UString 's' is the destination for the match. + + Group #0 is the complete string of matched text. Group #1 is + the text matched by the first set of capturing parentheses. + + ***********************************************************************/ + + void group (UString s, uint index) + { + uint fmt (wchar* dst, uint length, inout Error e) + { + return uregex_group (handle, index, dst, length, e); + } + + s.format (&fmt, "failed to extract regex group text"); + } + + /*********************************************************************** + + Get the number of capturing groups in this regular + expression's pattern + + ***********************************************************************/ + + uint groupCount () + { + Error e; + + uint i = uregex_groupCount (handle, e); + testError (e, "failed to get regex group-count"); + return i; + } + + /*********************************************************************** + + Returns the index in the input string of the start of the + text matched by the specified capture group during the + previous match operation. + + Return -1 if the capture group was not part of the last + match. Group #0 refers to the complete range of matched + text. Group #1 refers to the text matched by the first + set of capturing parentheses + + ***********************************************************************/ + + uint start (uint index = 0) + { + Error e; + + uint i = uregex_start (handle, index, e); + testError (e, "failed to get regex start"); + return i; + } + + /*********************************************************************** + + Returns the index in the input string of the position + following the end of the text matched by the specified + capture group. + + Return -1 if the capture group was not part of the last + match. Group #0 refers to the complete range of matched + text. Group #1 refers to the text matched by the first + set of capturing parentheses. + + ***********************************************************************/ + + uint end (uint index = 0) + { + Error e; + + uint i = uregex_end (handle, index, e); + testError (e, "failed to get regex end"); + return i; + } + + /*********************************************************************** + + Reset any saved state from the previous match. + + Has the effect of causing uregex_findNext to begin at the + specified index, and causing uregex_start(), uregex_end() + and uregex_group() to return an error indicating that there + is no match information available. + + ***********************************************************************/ + + void reset (uint startIndex) + { + Error e; + + uregex_reset (handle, startIndex, e); + testError (e, "failed to set regex next-index"); + } + + /*********************************************************************** + + Attempts to match the input string, beginning at startIndex, + against the pattern. + + To succeed, the match must extend to the end of the input + string + + ***********************************************************************/ + + bool match (uint startIndex) + { + Error e; + + bool b = uregex_matches (handle, startIndex, e); + testError (e, "failed while matching regex"); + return b; + } + + /*********************************************************************** + + Attempts to match the input string, starting from the + specified index, against the pattern. + + The match may be of any length, and is not required to + extend to the end of the input string. Contrast with match() + + ***********************************************************************/ + + bool probe (uint startIndex) + { + Error e; + + bool b = uregex_lookingAt (handle, startIndex, e); + testError (e, "failed while looking at regex"); + return b; + } + + /*********************************************************************** + + Returns whether the text matches the search pattern, starting + from the current position. + + If startIndex is specified, the current position is moved to + the specified location before the seach is initiated. + + ***********************************************************************/ + + bool next (uint startIndex = uint.max) + { + Error e; + bool b; + + b = (startIndex == uint.max) ? uregex_findNext (handle, e) : + uregex_find (handle, startIndex, e); + + testError (e, "failed on next regex"); + return b; + } + + /*********************************************************************** + + Replaces every substring of the input that matches the pattern + with the given replacement string. + + This is a convenience function that provides a complete + find-and-replace-all operation. + + This method scans the input string looking for matches of + the pattern. Input that is not part of any match is copied + unchanged to the destination buffer. Matched regions are + replaced in the output buffer by the replacement string. + The replacement string may contain references to capture + groups; these take the form of $1, $2, etc. + + The provided 'result' will contain the results, and should + be set with a length sufficient to house the entire result. + Upon completion, the 'result' is shortened appropriately + and the total extent (length) of the operation is returned. + Set the initital length of 'result' using the UString method + truncate(). + + The returned extent should be checked to ensure it is not + longer than the length of 'result'. If it is longer, then + the result has been truncated. + + ***********************************************************************/ + + uint replaceAll (UText replace, UString result) + { + Error e; + + uint len = uregex_replaceAll (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); + testError (e, "failed during regex replace"); + result.truncate (len); + return len; + } + + /*********************************************************************** + + Replaces the first substring of the input that matches the + pattern with the given replacement string. + + This is a convenience function that provides a complete + find-and-replace operation. + + This method scans the input string looking for a match of + the pattern. All input that is not part of the match is + copied unchanged to the destination buffer. The matched + region is replaced in the output buffer by the replacement + string. The replacement string may contain references to + capture groups; these take the form of $1, $2, etc + + The provided 'result' will contain the results, and should + be set with a length sufficient to house the entire result. + Upon completion, the 'result' is shortened appropriately + and the total extent (length) of the operation is returned. + Set the initital length of 'result' using the UString method + truncate(). + + The returned extent should be checked to ensure it is not + longer than the length of 'result'. If it is longer, then + the result has been truncated. + + ***********************************************************************/ + + uint replaceFirst (UText replace, UString result) + { + Error e; + + uint len = uregex_replaceFirst (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); + testError (e, "failed during regex replace"); + result.truncate (len); + return len; + } + + /*********************************************************************** + + Split the text up into slices (fields), where each slice + represents the text situated between each pattern matched + within the text. The pattern is expected to represent one + or more slice delimiters. + + ***********************************************************************/ + + uint split (wchar[][] fields) + { + Error e; + uint pos, + count; + wchar[] content = theText.get; + + while (count < fields.length) + if (uregex_findNext (handle, e) && e == e.OK) + { + uint i = start(); + fields[count] = content[pos..i]; + pos = end (); + + // ignore leading delimiter + if (i) + ++count; + } + else + break; + + testError (e, "failed during split"); + return count; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, uint, ParseError*, inout Error) uregex_open; + void function (Handle) uregex_close; + Handle function (Handle, inout Error) uregex_clone; + wchar* function (Handle, inout uint, inout Error) uregex_pattern; + uint function (Handle, inout Error) uregex_flags; + void function (Handle, wchar*, uint, inout Error) uregex_setText; + wchar* function (Handle, inout uint, inout Error) uregex_getText; + uint function (Handle, uint, wchar*, uint, inout Error) uregex_group; + uint function (Handle, inout Error) uregex_groupCount; + uint function (Handle, uint, inout Error) uregex_start; + uint function (Handle, uint, inout Error) uregex_end; + void function (Handle, uint, inout Error) uregex_reset; + bool function (Handle, uint, inout Error) uregex_matches; + bool function (Handle, uint, inout Error) uregex_lookingAt; + bool function (Handle, uint, inout Error) uregex_find; + bool function (Handle, inout Error) uregex_findNext; + uint function (Handle, wchar*, uint, wchar*, uint, inout Error) uregex_replaceAll; + uint function (Handle, wchar*, uint, wchar*, uint, inout Error) uregex_replaceFirst; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uregex_open, "uregex_open"}, + {cast(void**) &uregex_close, "uregex_close"}, + {cast(void**) &uregex_clone, "uregex_clone"}, + {cast(void**) &uregex_pattern, "uregex_pattern"}, + {cast(void**) &uregex_flags, "uregex_flags"}, + {cast(void**) &uregex_setText, "uregex_setText"}, + {cast(void**) &uregex_getText, "uregex_getText"}, + {cast(void**) &uregex_group, "uregex_group"}, + {cast(void**) &uregex_groupCount, "uregex_groupCount"}, + {cast(void**) &uregex_start, "uregex_start"}, + {cast(void**) &uregex_end, "uregex_end"}, + {cast(void**) &uregex_reset, "uregex_reset"}, + {cast(void**) &uregex_matches, "uregex_matches"}, + {cast(void**) &uregex_lookingAt, "uregex_lookingAt"}, + {cast(void**) &uregex_find, "uregex_find"}, + {cast(void**) &uregex_findNext, "uregex_findNext"}, + {cast(void**) &uregex_replaceAll, "uregex_replaceAll"}, + {cast(void**) &uregex_replaceFirst, "uregex_replaceFirst"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UResourceBundle.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UResourceBundle.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,544 @@ +/******************************************************************************* + + @file UResourceBundle.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UResourceBundle; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString; + +public import dwtx.dwthelper.mangoicu.ULocale; + +/******************************************************************************* + + API representing a collection of resource information pertaining to + a given locale. A resource bundle provides a way of accessing locale- + specific information in a data file. You create a resource bundle that + manages the resources for a given locale and then ask it for individual + resources. + + Resource bundles in ICU4C are currently defined using text files which + conform to the following BNF definition. More on resource bundle concepts + and syntax can be found in the Users Guide. + + See + this page for full details. + +*******************************************************************************/ + +class UResourceBundle : ICU +{ + private Handle handle; + + /*********************************************************************** + + Internals opened up to the public + + ***********************************************************************/ + + // Numeric constants for types of resource items + public enum ResType + { + None = -1, + String = 0, + Binary = 1, + Table = 2, + Alias = 3, + Int = 7, + Array = 8, + IntVector = 14 + } + + /*********************************************************************** + + private constructor for internal use only + + ***********************************************************************/ + + private this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Constructs a resource bundle for the locale-specific bundle + in the specified path. + + locale This is the locale this resource bundle is for. To + get resources for the French locale, for example, you + would create a ResourceBundle passing ULocale::FRENCH + for the "locale" parameter, and all subsequent calls + to that resource bundle will return resources that + pertain to the French locale. If the caller passes a + Locale.Default parameter, the default locale for the + system (as returned by ULocale.getDefault()) will be + used. Passing Locale.Root will cause the root-locale + to be used. + + path This is a full pathname in the platform-specific + format for the directory containing the resource + data files we want to load resources from. We use + locale IDs to generate filenames, and the filenames + have this string prepended to them before being passed + to the C++ I/O functions. Therefore, this string must + always end with a directory delimiter (whatever that + is for the target OS) for this class to work correctly. + A null value will open the default ICU data-files + + ***********************************************************************/ + + this (inout ULocale locale, char[] path = null) + { + Error e; + + handle = ures_open (toString(path), toString(locale.name), e); + testError (e, "failed to open resource bundle"); + } + + /*********************************************************************** + + ***********************************************************************/ + + ~this () + { + ures_close (handle); + } + + /*********************************************************************** + + Returns the size of a resource. Size for scalar types is + always 1, and for vector/table types is the number of child + resources. + + ***********************************************************************/ + + uint getSize () + { + return ures_getSize (handle); + } + + /*********************************************************************** + + Returns a signed integer from a resource. This integer is + originally 28 bit and the sign gets propagated. + + ***********************************************************************/ + + int getInt () + { + Error e; + + int x = ures_getInt (handle, e); + testError (e, "failed to get resource integer"); + return x; + } + + /*********************************************************************** + + Returns a string from a string resource type + + ***********************************************************************/ + + UText getString () + { + Error e; + uint len; + + wchar* x = ures_getString (handle, len, e); + testError (e, "failed to get resource string"); + return new UText (x[0..len]); + } + + /*********************************************************************** + + Returns the string in a given resource at the specified + index + + ***********************************************************************/ + + UText getString (uint index) + { + Error e; + uint len; + + wchar* x = ures_getStringByIndex (handle, index, len, e); + testError (e, "failed to get resource string"); + return new UText (x[0..len]); + } + + /*********************************************************************** + + Returns a string in a resource that has a given key. This + procedure works only with table resources. + + ***********************************************************************/ + + UText getString (char[] key) + { + Error e; + uint len; + + wchar* x = ures_getStringByKey (handle, toString(key), len, e); + testError (e, "failed to get resource string"); + return new UText (x[0..len]); + } + + /*********************************************************************** + + Returns the next string in a resource or NULL if there are + no more resources to iterate over + + ***********************************************************************/ + + UText getNextString () + { + Error e; + uint len; + char* key; + + wchar* x = ures_getNextString (handle, len, key, e); + testError (e, "failed to get next resource string"); + return new UText (x[0..len]); + } + + /*********************************************************************** + + Returns a binary data from a resource. Can be used at most + primitive resource types (binaries, strings, ints) + + ***********************************************************************/ + + void[] getBinary () + { + Error e; + uint len; + + void* x = ures_getBinary (handle, len, e); + testError (e, "failed to get binary resource"); + return x[0..len]; + } + + /*********************************************************************** + + Returns an integer vector from a resource + + ***********************************************************************/ + + int[] getIntVector () + { + Error e; + uint len; + + int* x = ures_getIntVector (handle, len, e); + testError (e, "failed to get vector resource"); + return x[0..len]; + } + + /*********************************************************************** + + Checks whether the resource has another element to + iterate over + + ***********************************************************************/ + + bool hasNext () + { + return ures_hasNext (handle) != 0; + } + + /*********************************************************************** + + Resets the internal context of a resource so that + iteration starts from the first element + + ***********************************************************************/ + + void resetIterator () + { + ures_resetIterator (handle); + } + + /*********************************************************************** + + Returns the next resource in a given resource or NULL if + there are no more resources + + ***********************************************************************/ + + UResourceBundle getNextResource () + { + Error e; + + return get (ures_getNextResource (handle, null, e), e); + } + + /*********************************************************************** + + Returns a resource that has a given key. This procedure + works only with table resources. + + ***********************************************************************/ + + UResourceBundle getResource (char[] key) + { + Error e; + + return get (ures_getByKey (handle, toString(key), null, e), e); + } + + /*********************************************************************** + + Returns the resource at the specified index + + ***********************************************************************/ + + UResourceBundle getResource (uint index) + { + Error e; + + return get (ures_getByIndex (handle, index, null, e), e); + } + + /*********************************************************************** + + Return the version number associated with this ResourceBundle + as a UVersionInfo array + + ***********************************************************************/ + + void getVersion (inout Version info) + { + ures_getVersion (handle, info); + } + + /*********************************************************************** + + Return the ULocale associated with this ResourceBundle + + ***********************************************************************/ + + void getLocale (inout ULocale locale) + { + Error e; + + locale.name = toArray (ures_getLocale (handle, e)); + testError (e, "failed to get resource locale"); + } + + /*********************************************************************** + + Returns the key associated with this resource. Not all + the resources have a key - only those that are members + of a table. + + ***********************************************************************/ + + char[] getKey () + { + return toArray (ures_getKey (handle)); + } + + /*********************************************************************** + + Returns the type of a resource. Available types are + defined in enum UResType + + ***********************************************************************/ + + ResType getType () + { + return cast(ResType) ures_getType (handle); + } + + /*********************************************************************** + + Worker function for constructing internal ResourceBundle + instances. Returns null when the provided handle is null. + + ***********************************************************************/ + + private static final UResourceBundle get (Handle handle, inout Error e) + { + testError (e, "failed to create resource bundle"); + if (handle) + return new UResourceBundle (handle); + return null; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (char*, char*, inout Error) ures_open; + void function (Handle) ures_close; + char* function (Handle, inout Error) ures_getLocale; + void function (Handle, inout Version) ures_getVersion; + uint function (Handle) ures_getSize; + int function (Handle, inout Error) ures_getInt; + wchar* function (Handle, inout uint, inout Error) ures_getString; + wchar* function (Handle, uint, inout uint, inout Error) ures_getStringByIndex; + wchar* function (Handle, char*, inout uint, inout Error) ures_getStringByKey; + void* function (Handle, inout uint, inout Error) ures_getBinary; + int* function (Handle, inout uint, inout Error) ures_getIntVector; + byte function (Handle) ures_hasNext; + void function (Handle) ures_resetIterator; + wchar* function (Handle, inout uint, inout char*, inout Error) ures_getNextString; + char* function (Handle) ures_getKey; + int function (Handle) ures_getType; + Handle function (Handle, Handle, inout Error) ures_getNextResource; + Handle function (Handle, uint, Handle, inout Error) ures_getByIndex; + Handle function (Handle, char*, Handle, inout Error) ures_getByKey; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ures_open, "ures_open"}, + {cast(void**) &ures_close, "ures_close"}, + {cast(void**) &ures_getLocale, "ures_getLocale"}, + {cast(void**) &ures_getVersion, "ures_getVersion"}, + {cast(void**) &ures_getSize, "ures_getSize"}, + {cast(void**) &ures_getInt, "ures_getInt"}, + {cast(void**) &ures_getString, "ures_getString"}, + {cast(void**) &ures_getStringByIndex, "ures_getStringByIndex"}, + {cast(void**) &ures_getStringByKey, "ures_getStringByKey"}, + {cast(void**) &ures_getBinary, "ures_getBinary"}, + {cast(void**) &ures_hasNext, "ures_hasNext"}, + {cast(void**) &ures_resetIterator, "ures_resetIterator"}, + {cast(void**) &ures_getNextString, "ures_getNextString"}, + {cast(void**) &ures_getKey, "ures_getKey"}, + {cast(void**) &ures_getType, "ures_getType"}, + {cast(void**) &ures_getNextResource, "ures_getNextResource"}, + {cast(void**) &ures_getByIndex, "ures_getByIndex"}, + {cast(void**) &ures_getByKey, "ures_getByKey"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + + /*********************************************************************** + + ***********************************************************************/ + + static void test() + { + UResourceBundle b = new UResourceBundle (ULocale.Default); + UText t = b.getNextString(); + UResourceBundle b1 = b.getNextResource (); + } +} + + diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/USearch.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/USearch.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,607 @@ +/******************************************************************************* + + @file USearch.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.USearch; + +private import dwtx.dwthelper.mangoicu.ICU; + +public import dwtx.dwthelper.mangoicu.ULocale, + dwtx.dwthelper.mangoicu.UString, + dwtx.dwthelper.mangoicu.UCollator, + dwtx.dwthelper.mangoicu.UBreakIterator; + +/******************************************************************************* + + Apis for an engine that provides language-sensitive text + searching based on the comparison rules defined in a UCollator + data struct. This ensures that language eccentricity can be handled, + e.g. for the German collator, characters ß and SS will be matched + if case is chosen to be ignored. See the "ICU Collation Design + Document" for more information. + + The algorithm implemented is a modified form of the Boyer Moore's + search. For more information see "Efficient Text Searching in Java", + published in Java Report in February, 1999, for further information + on the algorithm. + + There are 2 match options for selection: Let S' be the sub-string + of a text string S between the offsets start and end . A + pattern string P matches a text string S at the offsets if + + - option 1. Some canonical equivalent of P matches some canonical + equivalent of S' + + - option 2. P matches S' and if P starts or ends with a combining + mark, there exists no non-ignorable combining mark before + or after S' in S respectively. + + Option 2 will be the default + + This search has APIs similar to that of other text iteration + mechanisms such as the break iterators in ubrk.h. Using these + APIs, it is easy to scan through text looking for all occurances + of a given pattern. This search iterator allows changing of + direction by calling a reset followed by a next or previous. + Though a direction change can occur without calling reset first, + this operation comes with some speed penalty. Generally, match + results in the forward direction will match the result matches + in the backwards direction in the reverse order + + USearch provides APIs to specify the starting position within + the text string to be searched, e.g. setOffset(), previous(x) + and next(x). Since the starting position will be set as it + is specified, please take note that there are some dangerous + positions which the search may render incorrect results: + + - The midst of a substring that requires normalization. + + - If the following match is to be found, the position should + not be the second character which requires to be swapped + with the preceding character. Vice versa, if the preceding + match is to be found, position to search from should not be + the first character which requires to be swapped with the + next character. E.g certain Thai and Lao characters require + swapping. + + - If a following pattern match is to be found, any position + within a contracting sequence except the first will fail. + Vice versa if a preceding pattern match is to be found, + a invalid starting point would be any character within a + contracting sequence except the last. + + A breakiterator can be used if only matches at logical breaks are + desired. Using a breakiterator will only give you results that + exactly matches the boundaries given by the breakiterator. For + instance the pattern "e" will not be found in the string "\u00e9" + if a character break iterator is used. + + Options are provided to handle overlapping matches. E.g. In + English, overlapping matches produces the result 0 and 2 for + the pattern "abab" in the text "ababab", where else mutually + exclusive matches only produce the result of 0. + + Though collator attributes will be taken into consideration while + performing matches, there are no APIs here for setting and getting + the attributes. These attributes can be set by getting the collator + from getCollator() and using the APIs in UCollator. Lastly to update + String Search to the new collator attributes, reset() has to be called. + + See http://oss.software.ibm.com/icu/apiref/usearch_8h.html for full + details. + +*******************************************************************************/ + +class USearch : ICU +{ + private Handle handle; + private UBreakIterator iterator; + + // DONE is returned by previous() and next() after all valid + // matches have been returned, and by first() and last() if + // there are no matches at all. + const uint Done = uint.max; + + //Possible types of searches + public enum Attribute + { + Overlap, + CanonicalMatch, + Count + } + + public enum AttributeValue + { + Default = -1, + Off, + On, + Count + } + + /*********************************************************************** + + Creating a search iterator data struct using the argument + locale language rule set + + ***********************************************************************/ + + this (UText pattern, UText text, inout ULocale locale, UBreakIterator iterator = null) + { + Error e; + + this.iterator = iterator; + handle = usearch_open (pattern.get.ptr, pattern.length, text.get.ptr, text.length, toString(locale.name), cast(void*) iterator, e); + testError (e, "failed to open search"); + } + + /*********************************************************************** + + Creating a search iterator data struct using the argument + locale language rule set + + ***********************************************************************/ + + this (UText pattern, UText text, UCollator col, UBreakIterator iterator = null) + { + Error e; + + this.iterator = iterator; + handle = usearch_openFromCollator (pattern.get.ptr, pattern.length, text.get.ptr, text.length, col.handle, cast(void*) iterator, e); + testError (e, "failed to open search from collator"); + } + + /*********************************************************************** + + Close this USearch + + ***********************************************************************/ + + ~this () + { + usearch_close (handle); + } + + /*********************************************************************** + + Sets the current position in the text string which the + next search will start from. + + ***********************************************************************/ + + void setOffset (uint position) + { + Error e; + + usearch_setOffset (handle, position, e); + testError (e, "failed to set search offset"); + } + + /*********************************************************************** + + Return the current index in the string text being searched + + ***********************************************************************/ + + uint getOffset () + { + return usearch_getOffset (handle); + } + + /*********************************************************************** + + Returns the index to the match in the text string that was + searched + + ***********************************************************************/ + + uint getMatchedStart () + { + return usearch_getMatchedStart (handle); + } + + /*********************************************************************** + + Returns the length of text in the string which matches the + search pattern + + ***********************************************************************/ + + uint getMatchedLength () + { + return usearch_getMatchedLength (handle); + } + + /*********************************************************************** + + Returns the text that was matched by the most recent call to + first(), next(), previous(), or last(). + + ***********************************************************************/ + + void getMatchedText (UString s) + { + uint fmt (wchar* dst, uint length, inout Error e) + { + return usearch_getMatchedText (handle, dst, length, e); + } + + s.format (&fmt, "failed to extract matched text"); + } + + /*********************************************************************** + + Set the string text to be searched. + + ***********************************************************************/ + + void setText (UText t) + { + Error e; + + usearch_setText (handle, t.get.ptr, t.length, e); + testError (e, "failed to set search text"); + } + + /*********************************************************************** + + Return the string text to be searched. Note that this + returns a read-only reference to the search text. + + ***********************************************************************/ + + UText getText () + { + uint len; + + wchar *x = usearch_getText (handle, &len); + return new UText (x[0..len]); + } + + /*********************************************************************** + + Sets the pattern used for matching + + ***********************************************************************/ + + void setPattern (UText t) + { + Error e; + + usearch_setPattern (handle, t.get.ptr, t.length, e); + testError (e, "failed to set search pattern"); + } + + /*********************************************************************** + + Gets the search pattern. Note that this returns a + read-only reference to the pattern. + + ***********************************************************************/ + + UText getPattern () + { + uint len; + + wchar *x = usearch_getPattern (handle, &len); + return new UText (x[0..len]); + } + + /*********************************************************************** + + Set the BreakIterator that will be used to restrict the + points at which matches are detected. + + ***********************************************************************/ + + void setIterator (UBreakIterator iterator) + { + Error e; + + this.iterator = iterator; + usearch_setBreakIterator (handle, iterator.handle, e); + testError (e, "failed to set search iterator"); + } + + /*********************************************************************** + + Get the BreakIterator that will be used to restrict the + points at which matches are detected. + + ***********************************************************************/ + + UBreakIterator getIterator () + { + return iterator; + } + + /*********************************************************************** + + Returns the first index at which the string text matches + the search pattern + + ***********************************************************************/ + + uint first () + { + Error e; + + uint x = usearch_first (handle, e); + testError (e, "failed on first search"); + return x; + } + + /*********************************************************************** + + Returns the last index in the target text at which it + matches the search pattern + + ***********************************************************************/ + + uint last () + { + Error e; + + uint x = usearch_last (handle, e); + testError (e, "failed on last search"); + return x; + } + + /*********************************************************************** + + Returns the index of the next point at which the string + text matches the search pattern, starting from the current + position. + + If pos is specified, returns the first index greater than + pos at which the string text matches the search pattern + + ***********************************************************************/ + + uint next (uint pos = uint.max) + { + Error e; + uint x; + + x = (pos == uint.max) ? usearch_next (handle, e) : + usearch_following (handle, pos, e); + + testError (e, "failed on next search"); + return x; + } + + /*********************************************************************** + + Returns the index of the previous point at which the + string text matches the search pattern, starting at + the current position. + + If pos is specified, returns the first index less + than pos at which the string text matches the search + pattern. + + ***********************************************************************/ + + uint previous (uint pos = uint.max) + { + Error e; + uint x; + + x = (pos == uint.max) ? usearch_previous (handle, e) : + usearch_preceding (handle, pos, e); + + testError (e, "failed on next search"); + return x; + } + + /*********************************************************************** + + Search will begin at the start of the text string if a + forward iteration is initiated before a backwards iteration. + Otherwise if a backwards iteration is initiated before a + forwards iteration, the search will begin at the end of the + text string + + ***********************************************************************/ + + void reset () + { + usearch_reset (handle); + } + + /*********************************************************************** + + Gets the collator used for the language rules. + + ***********************************************************************/ + + UCollator getCollator () + { + return new UCollator (usearch_getCollator (handle)); + } + + /*********************************************************************** + + Sets the collator used for the language rules. This + method causes internal data such as Boyer-Moore shift + tables to be recalculated, but the iterator's position + is unchanged + + ***********************************************************************/ + + void setCollator (UCollator col) + { + Error e; + + usearch_setCollator (handle, col.handle, e); + testError (e, "failed to set search collator"); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, wchar*, uint, char*, void*, inout Error) usearch_open; + Handle function (wchar*, uint, wchar*, uint, Handle, void*, inout Error) usearch_openFromCollator; + void function (Handle) usearch_close; + void function (Handle, uint, inout Error) usearch_setOffset; + uint function (Handle) usearch_getOffset; + uint function (Handle) usearch_getMatchedStart; + uint function (Handle) usearch_getMatchedLength; + uint function (Handle, wchar*, uint, inout Error) usearch_getMatchedText; + void function (Handle, wchar*, uint, inout Error) usearch_setText; + wchar* function (Handle, uint*) usearch_getText; + void function (Handle, wchar*, uint, inout Error) usearch_setPattern; + wchar* function (Handle, uint*) usearch_getPattern; + uint function (Handle, inout Error) usearch_first; + uint function (Handle, inout Error) usearch_last; + uint function (Handle, inout Error) usearch_next; + uint function (Handle, inout Error) usearch_previous; + uint function (Handle, uint, inout Error) usearch_following; + uint function (Handle, uint, inout Error) usearch_preceding; + void function (Handle) usearch_reset; + void function (Handle, Handle, inout Error) usearch_setBreakIterator; + Handle function (Handle) usearch_getCollator; + void function (Handle, Handle, inout Error) usearch_setCollator; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &usearch_open, "usearch_open"}, + {cast(void**) &usearch_openFromCollator, "usearch_openFromCollator"}, + {cast(void**) &usearch_close, "usearch_close"}, + {cast(void**) &usearch_setOffset, "usearch_setOffset"}, + {cast(void**) &usearch_getOffset, "usearch_getOffset"}, + {cast(void**) &usearch_getMatchedStart, "usearch_getMatchedStart"}, + {cast(void**) &usearch_getMatchedLength, "usearch_getMatchedLength"}, + {cast(void**) &usearch_getMatchedText, "usearch_getMatchedText"}, + {cast(void**) &usearch_setText, "usearch_setText"}, + {cast(void**) &usearch_getText, "usearch_getText"}, + {cast(void**) &usearch_setPattern, "usearch_setPattern"}, + {cast(void**) &usearch_getPattern, "usearch_getPattern"}, + {cast(void**) &usearch_first, "usearch_first"}, + {cast(void**) &usearch_last, "usearch_last"}, + {cast(void**) &usearch_next, "usearch_next"}, + {cast(void**) &usearch_previous, "usearch_previous"}, + {cast(void**) &usearch_following, "usearch_following"}, + {cast(void**) &usearch_preceding, "usearch_preceding"}, + {cast(void**) &usearch_reset, "usearch_reset"}, + {cast(void**) &usearch_setBreakIterator, "usearch_setBreakIterator"}, + {cast(void**) &usearch_getCollator, "usearch_getCollator"}, + {cast(void**) &usearch_setCollator, "usearch_setCollator"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/USet.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/USet.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,472 @@ +/******************************************************************************* + + @file USet.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.USet; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString; + +/******************************************************************************* + + A mutable set of Unicode characters and multicharacter strings. + + Objects of this class represent character classes used in regular + expressions. A character specifies a subset of Unicode code points. + Legal code points are U+0000 to U+10FFFF, inclusive. + + UnicodeSet supports two APIs. The first is the operand API that + allows the caller to modify the value of a UnicodeSet object. It + conforms to Java 2's java.util.Set interface, although UnicodeSet + does not actually implement that interface. All methods of Set are + supported, with the modification that they take a character range + or single character instead of an Object, and they take a UnicodeSet + instead of a Collection. The operand API may be thought of in terms + of boolean logic: a boolean OR is implemented by add, a boolean AND + is implemented by retain, a boolean XOR is implemented by complement + taking an argument, and a boolean NOT is implemented by complement + with no argument. In terms of traditional set theory function names, + add is a union, retain is an intersection, remove is an asymmetric + difference, and complement with no argument is a set complement with + respect to the superset range MIN_VALUE-MAX_VALUE + + The second API is the applyPattern()/toPattern() API from the + java.text.Format-derived classes. Unlike the methods that add + characters, add categories, and control the logic of the set, + the method applyPattern() sets all attributes of a UnicodeSet + at once, based on a string pattern. + + See + this page for full details. + +*******************************************************************************/ + +class USet : ICU +{ + package Handle handle; + + enum Options + { + None = 0, + IgnoreSpace = 1, + CaseInsensitive = 2, + } + + + /*********************************************************************** + + Creates a USet object that contains the range of characters + start..end, inclusive + + ***********************************************************************/ + + this (wchar start, wchar end) + { + handle = uset_open (start, end); + } + + /*********************************************************************** + + Creates a set from the given pattern. See the UnicodeSet + class description for the syntax of the pattern language + + ***********************************************************************/ + + this (UText pattern, Options o = Options.None) + { + Error e; + + handle = uset_openPatternOptions (pattern.get.ptr, pattern.len, o, e); + testError (e, "failed to open pattern-based charset"); + } + + /*********************************************************************** + + Internal constructor invoked via UCollator + + ***********************************************************************/ + + package this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Disposes of the storage used by a USet object + + ***********************************************************************/ + + ~this () + { + uset_close (handle); + } + + /*********************************************************************** + + Modifies the set to represent the set specified by the + given pattern. See the UnicodeSet class description for + the syntax of the pattern language. See also the User + Guide chapter about UnicodeSet. Empties the set passed + before applying the pattern. + + ***********************************************************************/ + + void applyPattern (UText pattern, Options o = Options.None) + { + Error e; + + uset_applyPattern (handle, pattern.get.ptr, pattern.len, o, e); + testError (e, "failed to apply pattern"); + } + + /*********************************************************************** + + Returns a string representation of this set. If the result + of calling this function is passed to a uset_openPattern(), + it will produce another set that is equal to this one. + + ***********************************************************************/ + + void toPattern (UString dst, bool escape) + { + uint fmt (wchar* p, uint len, inout Error e) + { + return uset_toPattern (handle, p, len, escape, e); + } + + dst.format (&fmt, "failed to convert charset to a pattern"); + } + + /*********************************************************************** + + Adds the given character to the given USet. After this call, + contains (c) will return true. + + ***********************************************************************/ + + void add (wchar c) + { + uset_add (handle, c); + } + + /*********************************************************************** + + Adds all of the elements in the specified set to this set + if they're not already present. This operation effectively + modifies this set so that its value is the union of the two + sets. The behavior of this operation is unspecified if the + specified collection is modified while the operation is in + progress. + + ***********************************************************************/ + + void addSet (USet other) + { + uset_addAll (handle, other.handle); + } + + /*********************************************************************** + + Adds the given range of characters to the given USet. After + this call, contains(start, end) will return true + + ***********************************************************************/ + + void addRange (wchar start, wchar end) + { + uset_addRange (handle, start, end); + } + + /*********************************************************************** + + Adds the given string to the given USet. After this call, + containsString (str, strLen) will return true + + ***********************************************************************/ + + void addString (UText t) + { + uset_addString (handle, t.get.ptr, t.len); + } + + /*********************************************************************** + + Removes the given character from this USet. After the + call, contains(c) will return false + + ***********************************************************************/ + + void remove (wchar c) + { + uset_remove (handle, c); + } + + /*********************************************************************** + + Removes the given range of characters from this USet. + After the call, contains(start, end) will return false + + ***********************************************************************/ + + void removeRange (wchar start, wchar end) + { + uset_removeRange (handle, start, end); + } + + /*********************************************************************** + + Removes the given string from this USet. After the call, + containsString (str, strLen) will return false + + ***********************************************************************/ + + void removeString (UText t) + { + uset_removeString (handle, t.get.ptr, t.len); + } + + /*********************************************************************** + + Inverts this set. This operation modifies this set so + that its value is its complement. This operation does + not affect the multicharacter strings, if any + + ***********************************************************************/ + + void complement () + { + uset_complement (handle); + } + + /*********************************************************************** + + Removes all of the elements from this set. This set will + be empty after this call returns. + + ***********************************************************************/ + + void clear () + { + uset_clear (handle); + } + + /*********************************************************************** + + Returns true if this USet contains no characters and no + strings + + ***********************************************************************/ + + bool isEmpty () + { + return uset_isEmpty (handle) != 0; + } + + /*********************************************************************** + + Returns true if this USet contains the given character + + ***********************************************************************/ + + bool contains (wchar c) + { + return uset_contains (handle, c) != 0; + } + + /*********************************************************************** + + Returns true if this USet contains all characters c where + start <= c && c <= end + + ***********************************************************************/ + + bool containsRange (wchar start, wchar end) + { + return uset_containsRange (handle, start, end) != 0; + } + + /*********************************************************************** + + Returns true if this USet contains the given string + + ***********************************************************************/ + + bool containsString (UText t) + { + return uset_containsString (handle, t.get.ptr, t.len) != 0; + } + + /*********************************************************************** + + ***********************************************************************/ + + uint size () + { + return uset_size (handle); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar start, wchar end) uset_open; + void function (Handle) uset_close; + Handle function (wchar* pattern, uint patternLength, uint options, inout Error e) uset_openPatternOptions; + uint function (Handle, wchar* pattern, uint patternLength, uint options, inout Error e) uset_applyPattern; + uint function (Handle, wchar* result, uint resultCapacity, byte escapeUnprintable, inout Error e) uset_toPattern; + void function (Handle, wchar c) uset_add; + void function (Handle, Handle additionalSet) uset_addAll; + void function (Handle, wchar start, wchar end) uset_addRange; + void function (Handle, wchar* str, uint strLen) uset_addString; + void function (Handle, wchar c) uset_remove; + void function (Handle, wchar start, wchar end) uset_removeRange; + void function (Handle, wchar* str, uint strLen) uset_removeString; + void function (Handle) uset_complement; + void function (Handle) uset_clear; + byte function (Handle) uset_isEmpty; + byte function (Handle, wchar c) uset_contains; + byte function (Handle, wchar start, wchar end) uset_containsRange; + byte function (Handle, wchar* str, uint strLen) uset_containsString; + uint function (Handle) uset_size; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uset_open, "uset_open"}, + {cast(void**) &uset_close, "uset_close"}, + {cast(void**) &uset_openPatternOptions, "uset_openPatternOptions"}, + {cast(void**) &uset_applyPattern, "uset_applyPattern"}, + {cast(void**) &uset_toPattern, "uset_toPattern"}, + {cast(void**) &uset_add, "uset_add"}, + {cast(void**) &uset_addAll, "uset_addAll"}, + {cast(void**) &uset_addRange, "uset_addRange"}, + {cast(void**) &uset_addString, "uset_addString"}, + {cast(void**) &uset_remove, "uset_remove"}, + {cast(void**) &uset_removeRange, "uset_removeRange"}, + {cast(void**) &uset_removeString, "uset_removeString"}, + {cast(void**) &uset_complement, "uset_complement"}, + {cast(void**) &uset_clear, "uset_clear"}, + {cast(void**) &uset_isEmpty, "uset_isEmpty"}, + {cast(void**) &uset_contains, "uset_contains"}, + {cast(void**) &uset_containsRange, "uset_containsRange"}, + {cast(void**) &uset_containsString, "uset_containsString"}, + {cast(void**) &uset_size, "uset_size"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UString.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UString.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,1508 @@ +/******************************************************************************* + + @file UString.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UString; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UChar, + dwtx.dwthelper.mangoicu.ULocale; + +/******************************************************************************* + +*******************************************************************************/ + +private extern (C) void memmove (void* dst, void* src, uint bytes); + +/******************************************************************************* + + Bind to the IReadable and IWritable interfaces if we're building + along with the mango.io package + +*******************************************************************************/ + +version=Isolated; +version (Isolated) + { + private interface ITextOther {} + private interface IStringOther {} + } + else + { + private import dwtx.dwthelper.mangoicu.UMango; + + private import mango.io.model.IReader, + mango.io.model.IWriter; + + private interface ITextOther : IWritable {} + private interface IStringOther : IReadable {} + } + + +/******************************************************************************* + + UString is a string class that stores Unicode characters directly + and provides similar functionality as the Java String class. + + In ICU, a Unicode string consists of 16-bit Unicode code units. + A Unicode character may be stored with either one code unit — + which is the most common case — or with a matched pair of + special code units ("surrogates"). The data type for code units + is UChar. + + For single-character handling, a Unicode character code point is + a value in the range 0..0x10ffff. ICU uses the UChar32 type for + code points. + + Indexes and offsets into and lengths of strings always count code + units, not code points. This is the same as with multi-byte char* + strings in traditional string handling. Operations on partial + strings typically do not test for code point boundaries. If necessary, + the user needs to take care of such boundaries by testing for the code + unit values or by using functions like getChar32Start() + and getChar32Limit() + + UString methods are more lenient with regard to input parameter values + than other ICU APIs. In particular: + + - If indexes are out of bounds for a UString object (< 0 or > length) + then they are "pinned" to the nearest boundary. + + - If primitive string pointer values (e.g., const wchar* or char*) for + input strings are null, then those input string parameters are treated + as if they pointed to an empty string. However, this is not the case + for char* parameters for charset names or other IDs. + +*******************************************************************************/ + +class UString : UText, IStringOther +{ + alias opCat append; + alias opIndexAssign setCharAt; + + /*********************************************************************** + + Create an empty UString with the specified available space + + ***********************************************************************/ + + this (uint space = 0) + { + content.length = space; + mutable = true; + } + + /*********************************************************************** + + Create a UString upon the provided content. If said content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified. + + ***********************************************************************/ + + this (wchar[] content, bool mutable = true) + { + setTo (content, mutable); + } + + /*********************************************************************** + + Create a UString via the content of a UText. Note that the + default is to assume the content is immutable (read-only). + + ***********************************************************************/ + + this (UText other, bool mutable = false) + { + this (other.get, mutable); + } + + /*********************************************************************** + + Create a UString via the content of a UString. If said content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified via UString + methods. + + ***********************************************************************/ + + this (UString other, bool mutable = true) + { + this (other.get, mutable); + } + + /*********************************************************************** + + Support for reading content via the IO system + + ***********************************************************************/ + + version (Isolated){} + else + { + /*************************************************************** + + Internal adapter to handle loading and conversion + of UString content. Once constructed, this may be + used as the target for an IReader. Alternatively, + invoke the load() method with an IBuffer of choice. + + ***************************************************************/ + + class UStringDecoder : StringDecoder16 + { + private UString s; + + // construct a decoder on the given UString + this (UConverter c, uint bytes, UString s) + { + super (c, bytes); + this.s = s; + } + + // IReadable adapter to perform the conversion + protected void read (IReader r) + { + load (r.buffer); + } + + // read from the provided buffer until we + // either have all the content, or an eof + // condition throws an exception. + package void load (IBuffer b) + { + uint produced = super.read (b, s.content); + while (toGo) + { + s.expand (toGo); + produced += super.read (b, s.content[produced..$]); + } + s.len = produced; + } + } + + /*************************************************************** + + Another constructor for loading known content length + into a UString. + + ***************************************************************/ + + this (IBuffer buffer, uint contentLength, UConverter cvt) + { + this (contentLength); + UStringDecoder sd = new UStringDecoder (cvt, contentLength, this); + sd.load (buffer); + } + + /*************************************************************** + + Read as many bytes from the input as is necessary + to produce the expected number of wchar elements. + This uses the default wchar handler, which can be + altered by binding a StringDecoder to the IReader + in use (see UMango for details). + + We're mutable, so ensure we don't mess with the + IO buffers. Interestingly, changing the length + of a D array will account for slice assignments + (it checks the pointer to see if it's a starting + point in the pool). Unfortunately, that doesn't + catch the case where a slice starts at offset 0, + which is where IBuffer slices may come from. + + To be safe, we ask the allocator in use whether + the content it provided can be mutated or not. + Note that this is not necessary for UText, since + that is a read-only construct. + + ***************************************************************/ + + void read (IReader r) + { + r.get (content); + len = content.length; + mutable = r.getAllocator.isMutable (content); + } + + /*************************************************************** + + Return a streaming decoder that can be used to + populate this UString with a specified number of + input bytes. + + This differs from the above read() method in the + way content is read: in the above case, exactly + the specified number of wchar elements will be + converter from the input, whereas in this case + a variable number of wchar elements are converted + until 'bytes' have been read from the input. This + is useful in those cases where the original number + of elements has been lost, and only the resultant + converted byte-count remains (a la HTTP). + + The returned StringDecoder is one-shot only. You may + reuse it (both the converter and the byte count) via + its reset() method. + + One applies the resultant converter directly with an + IReader like so: + + @code + UString s = ...; + IReader r = ...; + + // r >> s.createDecoder(cvt, bytes); + r.get (s.createDecoder(cvt, bytes)); + @endcode + + which will read the specified number of bytes from + the input and convert them to an appropriate number + of wchars within the UString. + + ***************************************************************/ + + StringDecoder createDecoder (UConverter c, uint bytes) + { + return new UStringDecoder (c, bytes, this); + } + } + + /*********************************************************************** + + Append text to this UString + + ***********************************************************************/ + + UString opCat (UText other) + { + return opCat (other.get); + } + + /*********************************************************************** + + Append partial text to this UString + + ***********************************************************************/ + + UString opCat (UText other, uint start, uint len=uint.max) + { + other.pinIndices (start, len); + return opCat (other.content [start..start+len]); + } + + /*********************************************************************** + + Append a single character to this UString + + ***********************************************************************/ + + UString opCat (wchar chr) + { + return opCat (&chr, 1); + } + + /*********************************************************************** + + Append text to this UString + + ***********************************************************************/ + + UString opCat (wchar[] chars) + { + return opCat (chars.ptr, chars.length); + } + + /*********************************************************************** + + Converts a sequence of UTF-8 bytes to UChars (UTF-16) + + ***********************************************************************/ + + UString opCat (char[] chars) + { + uint fmt (wchar* dst, uint len, inout Error e) + { + uint x; + + u_strFromUTF8 (dst, len, &x, chars.ptr, chars.length, e); + return x; + } + + expand (chars.length); + return format (&fmt, "failed to append UTF char[]"); + } + + /*********************************************************************** + + Set a section of this UString to the specified character + + ***********************************************************************/ + + UString setTo (wchar chr, uint start=0, uint len=uint.max) + { + pinIndices (start, len); + if (! mutable) + realloc (); + content [start..start+len] = chr; + return this; + } + + /*********************************************************************** + + Set the content to the provided array. Parameter 'mutable' + specifies whether the given array is likely to change. If + not, the array is aliased until such time this UString is + altered. + + ***********************************************************************/ + + UString setTo (wchar[] chars, bool mutable = true) + { + len = chars.length; + if ((this.mutable = mutable) == true) + content = chars.dup; + else + content = chars; + return this; + } + + /*********************************************************************** + + Replace the content of this UString. If the new content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified via one of + these methods. + + ***********************************************************************/ + + UString setTo (UText other, bool mutable = true) + { + return setTo (other.get, mutable); + } + + /*********************************************************************** + + Replace the content of this UString. If the new content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified via one of + these methods. + + ***********************************************************************/ + + UString setTo (UText other, uint start, uint len, bool mutable = true) + { + other.pinIndices (start, len); + return setTo (other.content [start..start+len], mutable); + } + + /*********************************************************************** + + Replace the character at the specified location. + + ***********************************************************************/ + + final UString opIndexAssign (wchar chr, uint index) + in { + if (index >= len) + exception ("index of out bounds"); + } + body + { + if (! mutable) + realloc (); + content [index] = chr; + return this; + } + + /*********************************************************************** + + Remove a piece of this UString. + + ***********************************************************************/ + + UString remove (uint start, uint length=uint.max) + { + pinIndices (start, length); + if (length) + if (start >= len) + truncate (start); + else + { + if (! mutable) + realloc (); + + uint i = start + length; + memmove (&content[start], &content[i], (len-i) * wchar.sizeof); + len -= length; + } + return this; + } + + /*********************************************************************** + + Truncate the length of this UString. + + ***********************************************************************/ + + UString truncate (uint length=0) + { + if (length <= len) + len = length; + return this; + } + + /*********************************************************************** + + Insert leading spaces in this UString + + ***********************************************************************/ + + UString padLeading (uint count, wchar padChar = 0x0020) + { + expand (count); + memmove (&content[count], content.ptr, len * wchar.sizeof); + len += count; + return setTo (padChar, 0, count); + } + + /*********************************************************************** + + Append some trailing spaces to this UString. + + ***********************************************************************/ + + UString padTrailing (uint length, wchar padChar = 0x0020) + { + expand (length); + len += length; + return setTo (padChar, len-length, length); + } + + /*********************************************************************** + + Check for available space within the buffer, and expand + as necessary. + + ***********************************************************************/ + + package final void expand (uint count) + { + if ((len + count) > content.length) + realloc (count); + } + + /*********************************************************************** + + Allocate memory due to a change in the content. We handle + the distinction between mutable and immutable here. + + ***********************************************************************/ + + private final void realloc (uint count = 0) + { + uint size = (content.length + count + 63) & ~63; + + if (mutable) + content.length = size; + else + { + mutable = true; + wchar[] x = content; + content = new wchar [size]; + if (len) + content[0..len] = x; + } + } + + /*********************************************************************** + + Internal method to support UString appending + + ***********************************************************************/ + + private final UString opCat (wchar* chars, uint count) + { + expand (count); + content[len..len+count] = chars[0..count]; + len += count; + return this; + } + + /*********************************************************************** + + Internal method to support formatting into this UString. + This is used by many of the ICU wrappers to append content + into a UString. + + ***********************************************************************/ + + typedef uint delegate (wchar* dst, uint len, inout Error e) Formatter; + + package final UString format (Formatter format, char[] msg) + { + Error e; + uint length; + + while (true) + { + e = e.OK; + length = format (&content[len], content.length - len, e); + if (e == e.BufferOverflow) + expand (length); + else + break; + } + + if (isError (e)) + exception (msg); + + len += length; + return this; + } +} + + +/******************************************************************************* + + Immutable (read-only) text -- use UString for mutable strings. + +*******************************************************************************/ + +class UText : ICU, ITextOther +{ + alias opIndex charAt; + + // the core of the UText and UString attributes. The name 'len' + // is used rather than the more obvious 'length' since there is + // a collision with the silly array[length] syntactic sugar ... + package uint len; + package wchar[] content; + + // this should probably be in UString only, but there seems to + // be a compiler bug where it doesn't get initialised correctly, + // and it's perhaps useful to have here for when a UString is + // passed as a UText argument. + private bool mutable; + + // toFolded() argument + public enum CaseOption + { + Default = 0, + SpecialI = 1 + } + + /*********************************************************************** + + Hidden constructor + + ***********************************************************************/ + + private this () + { + } + + /*********************************************************************** + + Construct read-only wrapper around the given content + + ***********************************************************************/ + + this (wchar[] content) + { + this.content = content; + this.len = content.length; + } + + /*********************************************************************** + + Support for writing via the Mango IO subsystem + + ***********************************************************************/ + + version (Isolated){} + else + { + void write (IWriter w) + { + w.put (get); + } + } + + /*********************************************************************** + + Return the valid content from this UText + + ***********************************************************************/ + + final package wchar[] get () + { + return content [0..len]; + } + + /*********************************************************************** + + Is this UText equal to another? + + ***********************************************************************/ + + final override int opEquals (Object o) + { + UText other = cast(UText) o; + + if (other) + return (other is this || compare (other) == 0); + return 0; + } + + /*********************************************************************** + + Compare this UText to another. + + ***********************************************************************/ + + final override int opCmp (Object o) + { + UText other = cast(UText) o; + + if (other is this) + return 0; + else + if (other) + return compare (other); + return 1; + } + + /*********************************************************************** + + Hash this UText + + ***********************************************************************/ + + final override uint toHash () + { + return typeid(wchar[]).getHash (&content[0..len]); + } + + /*********************************************************************** + + Clone this UText into a UString + + ***********************************************************************/ + + final UString copy () + { + return new UString (content); + } + + /*********************************************************************** + + Clone a section of this UText into a UString + + ***********************************************************************/ + + final UString extract (uint start, uint len=uint.max) + { + pinIndices (start, len); + return new UString (content[start..start+len]); + } + + /*********************************************************************** + + Count unicode code points in the length UChar code units of + the string. A code point may occupy either one or two UChar + code units. Counting code points involves reading all code + units. + + ***********************************************************************/ + + final uint codePoints (uint start=0, uint length=uint.max) + { + pinIndices (start, length); + return u_countChar32 (&content[start], length); + } + + /*********************************************************************** + + Return an indication whether or not there are surrogate pairs + within the string. + + ***********************************************************************/ + + final bool hasSurrogates (uint start=0, uint length=uint.max) + { + pinIndices (start, length); + return codePoints (start, length) != length; + } + + /*********************************************************************** + + Return the character at the specified position. + + ***********************************************************************/ + + final wchar opIndex (uint index) + in { + if (index >= len) + exception ("index of out bounds"); + } + body + { + return content [index]; + } + + /*********************************************************************** + + Return the length of the valid content + + ***********************************************************************/ + + final uint length () + { + return len; + } + + /*********************************************************************** + + The comparison can be done in code unit order or in code + point order. They differ only in UTF-16 when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compare (UText other, bool codePointOrder=false) + { + return compare (other.get, codePointOrder); + } + + /*********************************************************************** + + The comparison can be done in code unit order or in code + point order. They differ only in UTF-16 when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compare (wchar[] other, bool codePointOrder=false) + { + return u_strCompare (content.ptr, len, other.ptr, other.length, codePointOrder); + } + + /*********************************************************************** + + The comparison can be done in UTF-16 code unit order or + in code point order. They differ only when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compareFolded (UText other, CaseOption option = CaseOption.Default) + { + return compareFolded (other.content, option); + } + + /*********************************************************************** + + The comparison can be done in UTF-16 code unit order or + in code point order. They differ only when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compareFolded (wchar[] other, CaseOption option = CaseOption.Default) + { + return compareFolded (get, other, option); + } + + /*********************************************************************** + + Does this UText start with specified string? + + ***********************************************************************/ + + final bool startsWith (UText other) + { + return startsWith (other.get); + } + + /*********************************************************************** + + Does this UText start with specified string? + + ***********************************************************************/ + + final bool startsWith (wchar[] chars) + { + if (len >= chars.length) + return compareFolded (content[0..chars.length], chars) == 0; + return false; + } + + /*********************************************************************** + + Does this UText end with specified string? + + ***********************************************************************/ + + final bool endsWith (UText other) + { + return endsWith (other.get); + } + + /*********************************************************************** + + Does this UText end with specified string? + + ***********************************************************************/ + + final bool endsWith (wchar[] chars) + { + if (len >= chars.length) + return compareFolded (content[len-chars.length..len], chars) == 0; + return false; + } + + /*********************************************************************** + + Find the first occurrence of a BMP code point in a string. + A surrogate code point is found only if its match in the + text is not part of a surrogate pair. + + ***********************************************************************/ + + final uint indexOf (wchar c, uint start=0) + { + pinIndex (start); + wchar* s = u_memchr (&content[start], c, len-start); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Find the first occurrence of a substring in a string. + + The substring is found at code point boundaries. That means + that if the substring begins with a trail surrogate or ends + with a lead surrogate, then it is found only if these + surrogates stand alone in the text. Otherwise, the substring + edge units would be matched against halves of surrogate pairs. + + ***********************************************************************/ + + final uint indexOf (UText other, uint start=0) + { + return indexOf (other.get, start); + } + + /*********************************************************************** + + Find the first occurrence of a substring in a string. + + The substring is found at code point boundaries. That means + that if the substring begins with a trail surrogate or ends + with a lead surrogate, then it is found only if these + surrogates stand alone in the text. Otherwise, the substring + edge units would be matched against halves of surrogate pairs. + + ***********************************************************************/ + + final uint indexOf (wchar[] chars, uint start=0) + { + pinIndex (start); + wchar* s = u_strFindFirst (&content[start], len-start, chars.ptr, chars.length); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Find the last occurrence of a BMP code point in a string. + A surrogate code point is found only if its match in the + text is not part of a surrogate pair. + + ***********************************************************************/ + + final uint lastIndexOf (wchar c, uint start=uint.max) + { + pinIndex (start); + wchar* s = u_memrchr (content.ptr, c, start); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Find the last occurrence of a BMP code point in a string. + A surrogate code point is found only if its match in the + text is not part of a surrogate pair. + + ***********************************************************************/ + + final uint lastIndexOf (UText other, uint start=uint.max) + { + return lastIndexOf (other.get, start); + } + + /*********************************************************************** + + Find the last occurrence of a substring in a string. + + The substring is found at code point boundaries. That means + that if the substring begins with a trail surrogate or ends + with a lead surrogate, then it is found only if these + surrogates stand alone in the text. Otherwise, the substring + edge units would be matched against halves of surrogate pairs. + + ***********************************************************************/ + + final uint lastIndexOf (wchar[] chars, uint start=uint.max) + { + pinIndex (start); + wchar* s = u_strFindLast (content.ptr, start, chars.ptr, chars.length); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Lowercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toLower (UString dst) + { + return toLower (dst, ULocale.Default); + } + + /*********************************************************************** + + Lowercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toLower (UString dst, inout ULocale locale) + { + uint lower (wchar* dst, uint length, inout Error e) + { + return u_strToLower (dst, length, content.ptr, len, ICU.toString(locale.name), e); + } + + dst.expand (len + 32); + return dst.format (&lower, "toLower() failed"); + } + + /*********************************************************************** + + Uppercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toUpper (UString dst) + { + return toUpper (dst, ULocale.Default); + } + + /*********************************************************************** + + Uppercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toUpper (UString dst, inout ULocale locale) + { + uint upper (wchar* dst, uint length, inout Error e) + { + return u_strToUpper (dst, length, content.ptr, len, ICU.toString(locale.name), e); + } + + dst.expand (len + 32); + return dst.format (&upper, "toUpper() failed"); + } + + /*********************************************************************** + + Case-fold the characters into a seperate UString. + + Case-folding is locale-independent and not context-sensitive, + but there is an option for whether to include or exclude + mappings for dotted I and dotless i that are marked with 'I' + in CaseFolding.txt. The result may be longer or shorter than + the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toFolded (UString dst, CaseOption option = CaseOption.Default) + { + uint fold (wchar* dst, uint length, inout Error e) + { + return u_strFoldCase (dst, length, content.ptr, len, option, e); + } + + dst.expand (len + 32); + return dst.format (&fold, "toFolded() failed"); + } + + /*********************************************************************** + + Converts a sequence of wchar (UTF-16) to UTF-8 bytes. If + the output array is not provided, an array of appropriate + size will be allocated and returned. Where the output is + provided, it must be large enough to hold potentially four + bytes per character for surrogate-pairs or three bytes per + character for BMP only. Consider using UConverter where + streaming conversions are required. + + Returns an array slice representing the valid UTF8 content. + + ***********************************************************************/ + + final char[] toUtf8 (char[] dst = null) + { + uint x; + Error e; + + if (! cast(char*) dst) + dst = new char[len * 4]; + + u_strToUTF8 (dst.ptr, dst.length, &x, content.ptr, len, e); + testError (e, "failed to convert to UTF8"); + return dst [0..x]; + } + + /*********************************************************************** + + Remove leading and trailing whitespace from this UText. + Note that we slice the content to remove leading space. + + ***********************************************************************/ + + UText trim () + { + wchar c; + uint i = len; + + // cut off trailing white space + while (i && ((c = charAt(i-1)) == 0x20 || UChar.isWhiteSpace (c))) + --i; + len = i; + + // now remove leading whitespace + for (i=0; i < len && ((c = charAt(i)) == 0x20 || UChar.isWhiteSpace (c)); ++i) {} + if (i) + { + len -= i; + content = content[i..$-i]; + } + + return this; + } + + /*********************************************************************** + + Unescape a string of characters and write the resulting + Unicode characters to the destination buffer. The following + escape sequences are recognized: + + uhhhh 4 hex digits; h in [0-9A-Fa-f] + Uhhhhhhhh 8 hex digits + xhh 1-2 hex digits + x{h...} 1-8 hex digits + ooo 1-3 octal digits; o in [0-7] + cX control-X; X is masked with 0x1F + + as well as the standard ANSI C escapes: + + a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + \\" =U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + + Anything else following a backslash is generically escaped. + For example, "[a\\-z]" returns "[a-z]". + + If an escape sequence is ill-formed, this method returns an + empty string. An example of an ill-formed sequence is "\\u" + followed by fewer than 4 hex digits. + + ***********************************************************************/ + + final UString unEscape () + { + UString result = new UString (len); + for (uint i=0; i < len;) + { + dchar c = charAt(i++); + if (c == 0x005C) + { + // bump index ... + c = u_unescapeAt (&_charAt, &i, len, cast(void*) this); + + // error? + if (c == 0xFFFFFFFF) + { + result.truncate (); // return empty string + break; // invalid escape sequence + } + } + result.append (c); + } + return result; + } + + /*********************************************************************** + + Is this code point a surrogate (U+d800..U+dfff)? + + ***********************************************************************/ + + final static bool isSurrogate (wchar c) + { + return (c & 0xfffff800) == 0xd800; + } + + /*********************************************************************** + + Is this code unit a lead surrogate (U+d800..U+dbff)? + + ***********************************************************************/ + + final static bool isLeading (wchar c) + { + return (c & 0xfffffc00) == 0xd800; + } + + /*********************************************************************** + + Is this code unit a trail surrogate (U+dc00..U+dfff)? + + ***********************************************************************/ + + final static bool isTrailing (wchar c) + { + return (c & 0xfffffc00) == 0xdc00; + } + + /*********************************************************************** + + Adjust a random-access offset to a code point boundary + at the start of a code point. If the offset points to + the trail surrogate of a surrogate pair, then the offset + is decremented. Otherwise, it is not modified. + + ***********************************************************************/ + + final uint getCharStart (uint i) + in { + if (i >= len) + exception ("index of out bounds"); + } + body + { + if (isTrailing (content[i]) && i && isLeading (content[i-1])) + --i; + return i; + } + + /*********************************************************************** + + Adjust a random-access offset to a code point boundary + after a code point. If the offset is behind the lead + surrogate of a surrogate pair, then the offset is + incremented. Otherwise, it is not modified. + + ***********************************************************************/ + + final uint getCharLimit (uint i) + in { + if (i >= len) + exception ("index of out bounds"); + } + body + { + if (i && isLeading(content[i-1]) && isTrailing (content[i])) + ++i; + return i; + } + + /*********************************************************************** + + Callback for C unescapeAt() function + + ***********************************************************************/ + + extern (C) + { + typedef wchar function (uint offset, void* context) CharAt; + + private static wchar _charAt (uint offset, void* context) + { + return (cast(UString) context).charAt (offset); + } + } + + /*********************************************************************** + + Pin the given index to a valid position. + + ***********************************************************************/ + + final private void pinIndex (inout uint x) + { + if (x > len) + x = len; + } + + /*********************************************************************** + + Pin the given index and length to a valid position. + + ***********************************************************************/ + + final private void pinIndices (inout uint start, inout uint length) + { + if (start > len) + start = len; + + if (length > (len - start)) + length = len - start; + } + + /*********************************************************************** + + Helper for comparison methods + + ***********************************************************************/ + + final private int compareFolded (wchar[] s1, wchar[] s2, CaseOption option = CaseOption.Default) + { + Error e; + + int x = u_strCaseCompare (s1.ptr, s1.length, s2.ptr, s2.length, option, e); + testError (e, "compareFolded failed"); + return x; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + wchar* function (wchar*, uint, wchar*, uint) u_strFindFirst; + wchar* function (wchar*, uint, wchar*, uint) u_strFindLast; + wchar* function (wchar*, wchar, uint) u_memchr; + wchar* function (wchar*, wchar, uint) u_memrchr; + int function (wchar*, uint, wchar*, uint, bool) u_strCompare; + int function (wchar*, uint, wchar*, uint, uint, inout Error) u_strCaseCompare; + dchar function (CharAt, uint*, uint, void*) u_unescapeAt; + uint function (wchar*, uint) u_countChar32; + uint function (wchar*, uint, wchar*, uint, char*, inout Error) u_strToUpper; + uint function (wchar*, uint, wchar*, uint, char*, inout Error) u_strToLower; + uint function (wchar*, uint, wchar*, uint, uint, inout Error) u_strFoldCase; + wchar* function (wchar*, uint, uint*, char*, uint, inout Error) u_strFromUTF8; + char* function (char*, uint, uint*, wchar*, uint, inout Error) u_strToUTF8; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &u_strFindFirst, "u_strFindFirst"}, + {cast(void**) &u_strFindLast, "u_strFindLast"}, + {cast(void**) &u_memchr, "u_memchr"}, + {cast(void**) &u_memrchr, "u_memrchr"}, + {cast(void**) &u_strCompare, "u_strCompare"}, + {cast(void**) &u_strCaseCompare, "u_strCaseCompare"}, + {cast(void**) &u_unescapeAt, "u_unescapeAt"}, + {cast(void**) &u_countChar32, "u_countChar32"}, + {cast(void**) &u_strToUpper, "u_strToUpper"}, + {cast(void**) &u_strToLower, "u_strToLower"}, + {cast(void**) &u_strFoldCase, "u_strFoldCase"}, + {cast(void**) &u_strFromUTF8, "u_strFromUTF8"}, + {cast(void**) &u_strToUTF8, "u_strToUTF8"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + + /*********************************************************************** + + ***********************************************************************/ + + private static void test() + { + UString s = new UString (r"aaaqw \uabcd eaaa"); + char[] x = "dssfsdff"; + s ~ x ~ x; + wchar c = s[3]; + s[3] = 'Q'; + int y = s.indexOf ("qwe"); + s.unEscape (); + s.toUpper (new UString); + s.padLeading(2).padTrailing(2).trim(); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UStringPrep.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UStringPrep.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,249 @@ +/******************************************************************************* + + @file UStringPrep.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UStringPrep; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString; + +/******************************************************************************* + + StringPrep API implements the StingPrep framework as described + by RFC 3454. + + StringPrep prepares Unicode strings for use in network protocols. + Profiles of StingPrep are set of rules and data according to with + the Unicode Strings are prepared. Each profiles contains tables + which describe how a code point should be treated. The tables are + broadly classied into + + - Unassinged Table: Contains code points that are unassigned + in the Unicode Version supported by StringPrep. Currently + RFC 3454 supports Unicode 3.2. + + - Prohibited Table: Contains code points that are prohibted + from the output of the StringPrep processing function. + + - Mapping Table: Contains code ponts that are deleted from the + output or case mapped. + + The procedure for preparing Unicode strings: + + 1. Map: For each character in the input, check if it has a mapping + and, if so, replace it with its mapping. + + 2. Normalize: Possibly normalize the result of step 1 using Unicode + normalization. + + 3. Prohibit: Check for any characters that are not allowed in the + output. If any are found, return an error. + + 4. Check bidi: Possibly check for right-to-left characters, and if + any are found, make sure that the whole string satisfies the + requirements for bidirectional strings. If the string does not + satisfy the requirements for bidirectional strings, return an + error. + + See + this page for full details. + +*******************************************************************************/ + +class UStringPrep : ICU +{ + private Handle handle; + + enum Options + { + Strict, + Lenient + } + + + /*********************************************************************** + + Creates a StringPrep profile from the data file. + + path string containing the full path pointing + to the directory where the profile reside + followed by the package name e.g. + "/usr/resource/my_app/profiles/mydata" on + a Unix system. if NULL, ICU default data + files will be used. + + fileName name of the profile file to be opened + + ***********************************************************************/ + + this (char[] path, char[] filename) + { + Error e; + + handle = usprep_open (toString(path), toString(filename), e); + testError (e, "failed to open string-prep"); + } + + /*********************************************************************** + + Close this profile + + ***********************************************************************/ + + ~this () + { + usprep_close (handle); + } + + /*********************************************************************** + + Prepare the input buffer + + This operation maps, normalizes(NFKC), checks for prohited + and BiDi characters in the order defined by RFC 3454 depending + on the options specified in the profile + + ***********************************************************************/ + + void prepare (UText src, UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout Error e) + { + return usprep_prepare (handle, src.get.ptr, src.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to prepare text"); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (char*, char*, inout Error) usprep_open; + void function (Handle) usprep_close; + uint function (Handle, wchar*, uint, wchar*, uint, uint, void*, inout Error) usprep_prepare; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &usprep_open, "usprep_open"}, + {cast(void**) &usprep_close, "usprep_close"}, + {cast(void**) &usprep_prepare, "usprep_prepare"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UTimeZone.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UTimeZone.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,263 @@ +/******************************************************************************* + + @file UTimeZone.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UTimeZone; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString, + dwtx.dwthelper.mangoicu.UEnumeration; + +/******************************************************************************* + + A representation of a TimeZone. Unfortunately, ICU does not expose + this as a seperate entity from the C-API, so we have to make do + with an approximation instead. + +*******************************************************************************/ + +struct UTimeZone +{ + public wchar[] name; + + public static UTimeZone Default = {null}; + public static UTimeZone Gmt = {"Etc/GMT"}; + public static UTimeZone Greenwich = {"Etc/Greenwich"}; + public static UTimeZone Uct = {"Etc/UCT"}; + public static UTimeZone Utc = {"Etc/UTC"}; + public static UTimeZone Universal = {"Etc/Universal"}; + + public static UTimeZone GmtPlus0 = {"Etc/GMT+0"}; + public static UTimeZone GmtPlus1 = {"Etc/GMT+1"}; + public static UTimeZone GmtPlus2 = {"Etc/GMT+2"}; + public static UTimeZone GmtPlus3 = {"Etc/GMT+3"}; + public static UTimeZone GmtPlus4 = {"Etc/GMT+4"}; + public static UTimeZone GmtPlus5 = {"Etc/GMT+5"}; + public static UTimeZone GmtPlus6 = {"Etc/GMT+6"}; + public static UTimeZone GmtPlus7 = {"Etc/GMT+7"}; + public static UTimeZone GmtPlus8 = {"Etc/GMT+8"}; + public static UTimeZone GmtPlus9 = {"Etc/GMT+9"}; + public static UTimeZone GmtPlus10 = {"Etc/GMT+10"}; + public static UTimeZone GmtPlus11 = {"Etc/GMT+11"}; + public static UTimeZone GmtPlus12 = {"Etc/GMT+12"}; + + public static UTimeZone GmtMinus0 = {"Etc/GMT-0"}; + public static UTimeZone GmtMinus1 = {"Etc/GMT-1"}; + public static UTimeZone GmtMinus2 = {"Etc/GMT-2"}; + public static UTimeZone GmtMinus3 = {"Etc/GMT-3"}; + public static UTimeZone GmtMinus4 = {"Etc/GMT-4"}; + public static UTimeZone GmtMinus5 = {"Etc/GMT-5"}; + public static UTimeZone GmtMinus6 = {"Etc/GMT-6"}; + public static UTimeZone GmtMinus7 = {"Etc/GMT-7"}; + public static UTimeZone GmtMinus8 = {"Etc/GMT-8"}; + public static UTimeZone GmtMinus9 = {"Etc/GMT-9"}; + public static UTimeZone GmtMinus10 = {"Etc/GMT-10"}; + public static UTimeZone GmtMinus11 = {"Etc/GMT-11"}; + public static UTimeZone GmtMinus12 = {"Etc/GMT-12"}; + + /*********************************************************************** + + Get the default time zone. + + ***********************************************************************/ + + static void getDefault (inout UTimeZone zone) + { + uint format (wchar* dst, uint length, inout ICU.Error e) + { + return ucal_getDefaultTimeZone (dst, length, e); + } + + UString s = new UString(64); + s.format (&format, "failed to get default time zone"); + zone.name = s.get(); + } + + /*********************************************************************** + + Set the default time zone. + + ***********************************************************************/ + + static void setDefault (inout UTimeZone zone) + { + ICU.Error e; + + ucal_setDefaultTimeZone (ICU.toString (zone.name), e); + ICU.testError (e, "failed to set default time zone"); + } + + /*********************************************************************** + + Return the amount of time in milliseconds that the clock + is advanced during daylight savings time for the given + time zone, or zero if the time zone does not observe daylight + savings time + + ***********************************************************************/ + + static uint getDSTSavings (inout UTimeZone zone) + { + ICU.Error e; + + uint x = ucal_getDSTSavings (ICU.toString (zone.name), e); + ICU.testError (e, "failed to get DST savings"); + return x; + } + + + /********************************************************************** + + Iterate over the available timezone names + + **********************************************************************/ + + static int opApply (int delegate(inout wchar[] element) dg) + { + ICU.Error e; + wchar[] name; + int result; + + void* h = ucal_openTimeZones (e); + ICU.testError (e, "failed to open timeszone iterator"); + + UEnumeration zones = new UEnumeration (cast(UEnumeration.Handle) h); + while (zones.next(name) && (result = dg(name)) != 0) {} + delete zones; + return result; + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + void* function (inout ICU.Error) ucal_openTimeZones; + uint function (wchar*, uint, inout ICU.Error) ucal_getDefaultTimeZone; + void function (wchar*, inout ICU.Error) ucal_setDefaultTimeZone; + uint function (wchar*, inout ICU.Error) ucal_getDSTSavings; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucal_openTimeZones, "ucal_openTimeZones"}, + {cast(void**) &ucal_getDefaultTimeZone, "ucal_getDefaultTimeZone"}, + {cast(void**) &ucal_setDefaultTimeZone, "ucal_setDefaultTimeZone"}, + {cast(void**) &ucal_getDSTSavings, "ucal_getDSTSavings"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (ICU.icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r cd18fa3b71f1 -r 040da1cb0d76 dwtx/dwtxhelper/mangoicu/UTransform.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dwtx/dwtxhelper/mangoicu/UTransform.d Sun Jun 22 22:57:31 2008 +0200 @@ -0,0 +1,239 @@ +/******************************************************************************* + + @file UTransform.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwthelper.mangoicu.UTransform; + +private import dwtx.dwthelper.mangoicu.ICU, + dwtx.dwthelper.mangoicu.UString; + +/******************************************************************************* + + See + this page for full details. + +*******************************************************************************/ + +class UTransform : ICU +{ + private Handle handle; + + enum Direction + { + Forward, + Reverse + } + + + /*********************************************************************** + + ***********************************************************************/ + + this (UText id) + { + Error e; + + handle = utrans_openU (id.get.ptr, id.len, 0, null, 0, null, e); + testError (e, "failed to open ID transform"); + } + + /*********************************************************************** + + ***********************************************************************/ + + this (UText rule, Direction dir) + { + Error e; + + handle = utrans_openU (null, 0, dir, rule.get.ptr, rule.len, null, e); + testError (e, "failed to open rule-based transform"); + } + + /*********************************************************************** + + ***********************************************************************/ + + ~this () + { + utrans_close (handle); + } + + /*********************************************************************** + + ***********************************************************************/ + + UText getID () + { + uint len; + wchar *s = utrans_getUnicodeID (handle, len); + return new UText (s[0..len]); + } + + /*********************************************************************** + + ***********************************************************************/ + + UTransform setFilter (UText filter) + { + Error e; + + if (filter.length) + utrans_setFilter (handle, filter.get.ptr, filter.len, e); + else + utrans_setFilter (handle, null, 0, e); + + testError (e, "failed to set transform filter"); + return this; + } + + /*********************************************************************** + + ***********************************************************************/ + + UTransform execute (UString text) + { + Error e; + uint textLen = text.len; + + utrans_transUChars (handle, text.get.ptr, &textLen, text.content.length, 0, &text.len, e); + testError (e, "failed to execute transform"); + return this; + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, uint, wchar*, uint, void*, inout Error) utrans_openU; + void function (Handle) utrans_close; + wchar* function (Handle, inout uint) utrans_getUnicodeID; + void function (Handle, wchar*, uint, inout Error) utrans_setFilter; + void function (Handle, wchar*, uint*, uint, uint, uint*, inout Error) utrans_transUChars; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &utrans_openU, "utrans_openU"}, + {cast(void**) &utrans_close, "utrans_close"}, + {cast(void**) &utrans_getUnicodeID, "utrans_getUnicodeID"}, + {cast(void**) &utrans_setFilter, "utrans_setFilter"}, + {cast(void**) &utrans_transUChars, "utrans_transUChars"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} +