# HG changeset patch # User Frank Benoit # Date 1215438843 -7200 # Node ID f05207c07a989506cf50b5c8019f4262eeeb8396 # Parent 11e8159caf7a71bd0f0419b587e68a04536b84e7 changed filetype to unix diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UBreakIterator.d --- a/dwtx/dwtxhelper/mangoicu/UBreakIterator.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UBreakIterator.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,616 +1,616 @@ -/******************************************************************************* - - @file UBreakIterator.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UBreakIterator; - -private import dwtx.dwtxhelper.mangoicu.ICU; - -public import dwtx.dwtxhelper.mangoicu.ULocale, - dwtx.dwtxhelper.mangoicu.UText, - dwtx.dwtxhelper.mangoicu.UString; - - - -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class UCharacterIterator : UBreakIterator -// { -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Character, locale, text); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class UWordIterator : UBreakIterator -// { -// public enum Break -// { -// None = 0, -// NoneLimit = 100, -// Number = 100, -// NumberLimit = 200, -// Letter = 200, -// LetterLimit = 300, -// Kana = 300, -// KanaLimit = 400, -// Ideo = 400, -// IdeoLimit = 500 -// } -// -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Word, locale, text); -// } -// -// /*********************************************************************** -// -// Return the status from the break rule that determined -// the most recently returned break position. -// -// ***********************************************************************/ -// -// void getStatus (inout Break b) -// { -// b = cast(Break) super.getStatus(); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class ULineIterator : UBreakIterator -// { -// public enum Break -// { -// Soft = 0, -// SoftLimit = 100, -// Hard = 100, -// HardLimit = 200 -// } -// -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Line, locale, text); -// } -// -// /*********************************************************************** -// -// Return the status from the break rule that determined -// the most recently returned break position. -// -// ***********************************************************************/ -// -// void getStatus (inout Break b) -// { -// b = cast(Break) super.getStatus(); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class USentenceIterator : UBreakIterator -// { -// public enum Break -// { -// Term = 0, -// TermLimit = 100, -// Sep = 100, -// Limit = 200 -// } -// -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Sentence, locale, text); -// } -// -// /*********************************************************************** -// -// Return the status from the break rule that determined -// the most recently returned break position. -// -// ***********************************************************************/ -// -// void getStatus (inout Break b) -// { -// b = cast(Break) super.getStatus(); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class UTitleIterator : UBreakIterator -// { -// /*********************************************************************** -// -// ***********************************************************************/ -// -// this (inout ULocale locale, UStringView text = null) -// { -// super (Type.Title, locale, text); -// } -// } -// -// -// /******************************************************************************* -// -// *******************************************************************************/ -// -// class URuleIterator : UBreakIterator -// { -// /*********************************************************************** -// -// Open a new UBreakIterator for locating text boundaries -// using specified breaking rules -// -// ***********************************************************************/ -// -// this (UStringView rules, UStringView text = null) -// { -// UErrorCode e; -// -// handle = ubrk_openRules (rules.get.ptr, rules.length, text.get.ptr, text.length, null, e); -// testError (e, "failed to open rule iterator"); -// } -// } - - -/******************************************************************************* - - BreakIterator defines methods for finding the location of boundaries - in text. Pointer to a UBreakIterator maintain a current position and - scan over text returning the index of characters where boundaries occur. - - Line boundary analysis determines where a text string can be broken - when line-wrapping. The mechanism correctly handles punctuation and - hyphenated words. - - Sentence boundary analysis allows selection with correct interpretation - of periods within numbers and abbreviations, and trailing punctuation - marks such as quotation marks and parentheses. - - Word boundary analysis is used by search and replace functions, as well - as within text editing applications that allow the user to select words - with a double click. Word selection provides correct interpretation of - punctuation marks within and following words. Characters that are not - part of a word, such as symbols or punctuation marks, have word-breaks - on both sides. - - Character boundary analysis allows users to interact with characters - as they expect to, for example, when moving the cursor through a text - string. Character boundary analysis provides correct navigation of - through character strings, regardless of how the character is stored. - For example, an accented character might be stored as a base character - and a diacritical mark. What users consider to be a character can differ - between languages. - - Title boundary analysis locates all positions, typically starts of - words, that should be set to Title Case when title casing the text. - - See - this page for full details. - -*******************************************************************************/ - -struct UBreakIterator -{ - typedef void _UBreakIterator; - alias _UBreakIterator* Handle; - Handle handle; - UText ut; - - // this is returned by next(), previous() etc ... - const uint Done = uint.max; - - /*********************************************************************** - - internal types passed to C API - - ***********************************************************************/ - - private enum Type - { - Character, - Word, - Line, - Sentence, - Title - } - - - public enum WordBreak - { - None = 0, - NoneLimit = 100, - Number = 100, - NumberLimit = 200, - Letter = 200, - LetterLimit = 300, - Kana = 300, - KanaLimit = 400, - Ideo = 400, - IdeoLimit = 500 - } - public enum LineBreak - { - Soft = 0, - SoftLimit = 100, - Hard = 100, - HardLimit = 200 - } - public enum SentenceBreak - { - Term = 0, - TermLimit = 100, - Sep = 100, - Limit = 200 - } - - - /*********************************************************************** - - Open a new UBreakIterator for locating text boundaries for - a specified locale. A UBreakIterator may be used for detecting - character, line, word, and sentence breaks in text. - - ***********************************************************************/ - - static UBreakIterator openWordIterator( ULocale locale, char[] str ){ - UBreakIterator res; - res.ut.openUTF8(str); - auto e = ICU.UErrorCode.OK; - res.handle = ubrk_open( Type.Word, locale.name.ptr, null, 0, e); - ICU.testError (e, "failed to open word iterator"); - ubrk_setUText( res.handle, & res.ut, e); - ICU.testError (e, "failed to set text in iterator"); - return res; - } - - static UBreakIterator openLineIterator( ULocale locale, char[] str ){ - UBreakIterator res; - res.ut.openUTF8(str); - auto e = ICU.UErrorCode.OK; - res.handle = ubrk_open( Type.Line, locale.name.ptr, null, 0, e); - ICU.testError (e, "failed to open line iterator"); - ubrk_setUText( res.handle, & res.ut, e); - ICU.testError (e, "failed to set text in iterator"); - return res; - } - - /*********************************************************************** - - Close a UBreakIterator - - ***********************************************************************/ - - void close () - { - ut.close(); - ubrk_close (handle); - } - - /*********************************************************************** - - Sets an existing iterator to point to a new piece of text - - ***********************************************************************/ - - void setText (UStringView text) - { - ICU.UErrorCode e; - ubrk_setText (handle, text.get.ptr, text.length, e); - ICU.testError (e, "failed to set iterator text"); - } - - void setText (char[] text) - { - auto e = ICU.UErrorCode.OK; - ut.openUTF8(text); - ubrk_setUText( handle, & ut, e); - ICU.testError (e, "failed to set text in iterator"); - } - - /*********************************************************************** - - Determine the most recently-returned text boundary - - ***********************************************************************/ - - uint current () - { - return ubrk_current (handle); - } - - /*********************************************************************** - - Determine the text boundary following the current text - boundary, or UBRK_DONE if all text boundaries have been - returned. - - If offset is specified, determines the text boundary - following the current text boundary: The value returned - is always greater than offset, or Done - - ***********************************************************************/ - - uint next (uint offset = uint.max) - { - if (offset == uint.max) - return ubrk_next (handle); - return ubrk_following (handle, offset); - } - - /*********************************************************************** - - Determine the text boundary preceding the current text - boundary, or Done if all text boundaries have been returned. - - If offset is specified, determines the text boundary preceding - the specified offset. The value returned is always smaller than - offset, or Done. - - ***********************************************************************/ - - uint previous (uint offset = uint.max) - { - if (offset == uint.max) - return ubrk_previous (handle); - return ubrk_preceding (handle, offset); - } - - /*********************************************************************** - - Determine the index of the first character in the text - being scanned. This is not always the same as index 0 - of the text. - - ***********************************************************************/ - - uint first () - { - return ubrk_first (handle); - } - - /*********************************************************************** - - Determine the index immediately beyond the last character - in the text being scanned. This is not the same as the last - character - - ***********************************************************************/ - - uint last () - { - return ubrk_last (handle); - } - - /*********************************************************************** - - Returns true if the specfied position is a boundary position. - As a side effect, leaves the iterator pointing to the first - boundary position at or after "offset". - - ***********************************************************************/ - - bool isBoundary (uint offset) - { - return ubrk_isBoundary (handle, offset) != 0; - } - - /*********************************************************************** - - Return the status from the break rule that determined - the most recently returned break position. - - ***********************************************************************/ - - void getStatus (inout uint s) - { - s = getStatus (); - } - - /*********************************************************************** - - Return the status from the break rule that determined - the most recently returned break position. - - The values appear in the rule source within brackets, - {123}, for example. For rules that do not specify a status, - a default value of 0 is returned. - - For word break iterators, the possible values are defined - in enum UWordBreak - - ***********************************************************************/ - - private uint getStatus () - { - return ubrk_getRuleStatus (handle); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (uint, char*, wchar*, uint, inout ICU.UErrorCode) ubrk_open; - Handle function (wchar*, uint, wchar*, uint, void*, inout ICU.UErrorCode) ubrk_openRules; - void function (Handle) ubrk_close; - void function (Handle, wchar*, uint, inout ICU.UErrorCode) ubrk_setText; - uint function (Handle) ubrk_current; - uint function (Handle) ubrk_next; - uint function (Handle) ubrk_previous; - uint function (Handle) ubrk_first; - uint function (Handle) ubrk_last; - uint function (Handle, uint) ubrk_preceding; - uint function (Handle, uint) ubrk_following; - byte function (Handle, uint) ubrk_isBoundary; - uint function (Handle) ubrk_getRuleStatus; - Handle function (Handle, void *, int *, inout ICU.UErrorCode) ubrk_safeClone; - void function (Handle, UText*, inout ICU.UErrorCode) ubrk_setUText; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ubrk_open, "ubrk_open"}, - {cast(void**) &ubrk_close, "ubrk_close"}, - {cast(void**) &ubrk_openRules, "ubrk_openRules"}, - {cast(void**) &ubrk_setText, "ubrk_setText"}, - {cast(void**) &ubrk_current, "ubrk_current"}, - {cast(void**) &ubrk_next, "ubrk_next"}, - {cast(void**) &ubrk_previous, "ubrk_previous"}, - {cast(void**) &ubrk_first, "ubrk_first"}, - {cast(void**) &ubrk_last, "ubrk_last"}, - {cast(void**) &ubrk_preceding, "ubrk_preceding"}, - {cast(void**) &ubrk_following, "ubrk_following"}, - {cast(void**) &ubrk_isBoundary, "ubrk_isBoundary"}, - {cast(void**) &ubrk_getRuleStatus, "ubrk_getRuleStatus"}, - {cast(void**) &ubrk_setUText, "ubrk_setUText"}, - {cast(void**) &ubrk_safeClone, "ubrk_safeClone"}, - ]; - - /********************************************************************** - - **********************************************************************/ - - static this () - { - library = FunctionLoader.bind (ICU.icuuc, targets); - } - - /********************************************************************** - - **********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} +/******************************************************************************* + + @file UBreakIterator.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UBreakIterator; + +private import dwtx.dwtxhelper.mangoicu.ICU; + +public import dwtx.dwtxhelper.mangoicu.ULocale, + dwtx.dwtxhelper.mangoicu.UText, + dwtx.dwtxhelper.mangoicu.UString; + + + +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class UCharacterIterator : UBreakIterator +// { +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Character, locale, text); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class UWordIterator : UBreakIterator +// { +// public enum Break +// { +// None = 0, +// NoneLimit = 100, +// Number = 100, +// NumberLimit = 200, +// Letter = 200, +// LetterLimit = 300, +// Kana = 300, +// KanaLimit = 400, +// Ideo = 400, +// IdeoLimit = 500 +// } +// +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Word, locale, text); +// } +// +// /*********************************************************************** +// +// Return the status from the break rule that determined +// the most recently returned break position. +// +// ***********************************************************************/ +// +// void getStatus (inout Break b) +// { +// b = cast(Break) super.getStatus(); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class ULineIterator : UBreakIterator +// { +// public enum Break +// { +// Soft = 0, +// SoftLimit = 100, +// Hard = 100, +// HardLimit = 200 +// } +// +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Line, locale, text); +// } +// +// /*********************************************************************** +// +// Return the status from the break rule that determined +// the most recently returned break position. +// +// ***********************************************************************/ +// +// void getStatus (inout Break b) +// { +// b = cast(Break) super.getStatus(); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class USentenceIterator : UBreakIterator +// { +// public enum Break +// { +// Term = 0, +// TermLimit = 100, +// Sep = 100, +// Limit = 200 +// } +// +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Sentence, locale, text); +// } +// +// /*********************************************************************** +// +// Return the status from the break rule that determined +// the most recently returned break position. +// +// ***********************************************************************/ +// +// void getStatus (inout Break b) +// { +// b = cast(Break) super.getStatus(); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class UTitleIterator : UBreakIterator +// { +// /*********************************************************************** +// +// ***********************************************************************/ +// +// this (inout ULocale locale, UStringView text = null) +// { +// super (Type.Title, locale, text); +// } +// } +// +// +// /******************************************************************************* +// +// *******************************************************************************/ +// +// class URuleIterator : UBreakIterator +// { +// /*********************************************************************** +// +// Open a new UBreakIterator for locating text boundaries +// using specified breaking rules +// +// ***********************************************************************/ +// +// this (UStringView rules, UStringView text = null) +// { +// UErrorCode e; +// +// handle = ubrk_openRules (rules.get.ptr, rules.length, text.get.ptr, text.length, null, e); +// testError (e, "failed to open rule iterator"); +// } +// } + + +/******************************************************************************* + + BreakIterator defines methods for finding the location of boundaries + in text. Pointer to a UBreakIterator maintain a current position and + scan over text returning the index of characters where boundaries occur. + + Line boundary analysis determines where a text string can be broken + when line-wrapping. The mechanism correctly handles punctuation and + hyphenated words. + + Sentence boundary analysis allows selection with correct interpretation + of periods within numbers and abbreviations, and trailing punctuation + marks such as quotation marks and parentheses. + + Word boundary analysis is used by search and replace functions, as well + as within text editing applications that allow the user to select words + with a double click. Word selection provides correct interpretation of + punctuation marks within and following words. Characters that are not + part of a word, such as symbols or punctuation marks, have word-breaks + on both sides. + + Character boundary analysis allows users to interact with characters + as they expect to, for example, when moving the cursor through a text + string. Character boundary analysis provides correct navigation of + through character strings, regardless of how the character is stored. + For example, an accented character might be stored as a base character + and a diacritical mark. What users consider to be a character can differ + between languages. + + Title boundary analysis locates all positions, typically starts of + words, that should be set to Title Case when title casing the text. + + See + this page for full details. + +*******************************************************************************/ + +struct UBreakIterator +{ + typedef void _UBreakIterator; + alias _UBreakIterator* Handle; + Handle handle; + UText ut; + + // this is returned by next(), previous() etc ... + const uint Done = uint.max; + + /*********************************************************************** + + internal types passed to C API + + ***********************************************************************/ + + private enum Type + { + Character, + Word, + Line, + Sentence, + Title + } + + + public enum WordBreak + { + None = 0, + NoneLimit = 100, + Number = 100, + NumberLimit = 200, + Letter = 200, + LetterLimit = 300, + Kana = 300, + KanaLimit = 400, + Ideo = 400, + IdeoLimit = 500 + } + public enum LineBreak + { + Soft = 0, + SoftLimit = 100, + Hard = 100, + HardLimit = 200 + } + public enum SentenceBreak + { + Term = 0, + TermLimit = 100, + Sep = 100, + Limit = 200 + } + + + /*********************************************************************** + + Open a new UBreakIterator for locating text boundaries for + a specified locale. A UBreakIterator may be used for detecting + character, line, word, and sentence breaks in text. + + ***********************************************************************/ + + static UBreakIterator openWordIterator( ULocale locale, char[] str ){ + UBreakIterator res; + res.ut.openUTF8(str); + auto e = ICU.UErrorCode.OK; + res.handle = ubrk_open( Type.Word, locale.name.ptr, null, 0, e); + ICU.testError (e, "failed to open word iterator"); + ubrk_setUText( res.handle, & res.ut, e); + ICU.testError (e, "failed to set text in iterator"); + return res; + } + + static UBreakIterator openLineIterator( ULocale locale, char[] str ){ + UBreakIterator res; + res.ut.openUTF8(str); + auto e = ICU.UErrorCode.OK; + res.handle = ubrk_open( Type.Line, locale.name.ptr, null, 0, e); + ICU.testError (e, "failed to open line iterator"); + ubrk_setUText( res.handle, & res.ut, e); + ICU.testError (e, "failed to set text in iterator"); + return res; + } + + /*********************************************************************** + + Close a UBreakIterator + + ***********************************************************************/ + + void close () + { + ut.close(); + ubrk_close (handle); + } + + /*********************************************************************** + + Sets an existing iterator to point to a new piece of text + + ***********************************************************************/ + + void setText (UStringView text) + { + ICU.UErrorCode e; + ubrk_setText (handle, text.get.ptr, text.length, e); + ICU.testError (e, "failed to set iterator text"); + } + + void setText (char[] text) + { + auto e = ICU.UErrorCode.OK; + ut.openUTF8(text); + ubrk_setUText( handle, & ut, e); + ICU.testError (e, "failed to set text in iterator"); + } + + /*********************************************************************** + + Determine the most recently-returned text boundary + + ***********************************************************************/ + + uint current () + { + return ubrk_current (handle); + } + + /*********************************************************************** + + Determine the text boundary following the current text + boundary, or UBRK_DONE if all text boundaries have been + returned. + + If offset is specified, determines the text boundary + following the current text boundary: The value returned + is always greater than offset, or Done + + ***********************************************************************/ + + uint next (uint offset = uint.max) + { + if (offset == uint.max) + return ubrk_next (handle); + return ubrk_following (handle, offset); + } + + /*********************************************************************** + + Determine the text boundary preceding the current text + boundary, or Done if all text boundaries have been returned. + + If offset is specified, determines the text boundary preceding + the specified offset. The value returned is always smaller than + offset, or Done. + + ***********************************************************************/ + + uint previous (uint offset = uint.max) + { + if (offset == uint.max) + return ubrk_previous (handle); + return ubrk_preceding (handle, offset); + } + + /*********************************************************************** + + Determine the index of the first character in the text + being scanned. This is not always the same as index 0 + of the text. + + ***********************************************************************/ + + uint first () + { + return ubrk_first (handle); + } + + /*********************************************************************** + + Determine the index immediately beyond the last character + in the text being scanned. This is not the same as the last + character + + ***********************************************************************/ + + uint last () + { + return ubrk_last (handle); + } + + /*********************************************************************** + + Returns true if the specfied position is a boundary position. + As a side effect, leaves the iterator pointing to the first + boundary position at or after "offset". + + ***********************************************************************/ + + bool isBoundary (uint offset) + { + return ubrk_isBoundary (handle, offset) != 0; + } + + /*********************************************************************** + + Return the status from the break rule that determined + the most recently returned break position. + + ***********************************************************************/ + + void getStatus (inout uint s) + { + s = getStatus (); + } + + /*********************************************************************** + + Return the status from the break rule that determined + the most recently returned break position. + + The values appear in the rule source within brackets, + {123}, for example. For rules that do not specify a status, + a default value of 0 is returned. + + For word break iterators, the possible values are defined + in enum UWordBreak + + ***********************************************************************/ + + private uint getStatus () + { + return ubrk_getRuleStatus (handle); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (uint, char*, wchar*, uint, inout ICU.UErrorCode) ubrk_open; + Handle function (wchar*, uint, wchar*, uint, void*, inout ICU.UErrorCode) ubrk_openRules; + void function (Handle) ubrk_close; + void function (Handle, wchar*, uint, inout ICU.UErrorCode) ubrk_setText; + uint function (Handle) ubrk_current; + uint function (Handle) ubrk_next; + uint function (Handle) ubrk_previous; + uint function (Handle) ubrk_first; + uint function (Handle) ubrk_last; + uint function (Handle, uint) ubrk_preceding; + uint function (Handle, uint) ubrk_following; + byte function (Handle, uint) ubrk_isBoundary; + uint function (Handle) ubrk_getRuleStatus; + Handle function (Handle, void *, int *, inout ICU.UErrorCode) ubrk_safeClone; + void function (Handle, UText*, inout ICU.UErrorCode) ubrk_setUText; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ubrk_open, "ubrk_open"}, + {cast(void**) &ubrk_close, "ubrk_close"}, + {cast(void**) &ubrk_openRules, "ubrk_openRules"}, + {cast(void**) &ubrk_setText, "ubrk_setText"}, + {cast(void**) &ubrk_current, "ubrk_current"}, + {cast(void**) &ubrk_next, "ubrk_next"}, + {cast(void**) &ubrk_previous, "ubrk_previous"}, + {cast(void**) &ubrk_first, "ubrk_first"}, + {cast(void**) &ubrk_last, "ubrk_last"}, + {cast(void**) &ubrk_preceding, "ubrk_preceding"}, + {cast(void**) &ubrk_following, "ubrk_following"}, + {cast(void**) &ubrk_isBoundary, "ubrk_isBoundary"}, + {cast(void**) &ubrk_getRuleStatus, "ubrk_getRuleStatus"}, + {cast(void**) &ubrk_setUText, "ubrk_setUText"}, + {cast(void**) &ubrk_safeClone, "ubrk_safeClone"}, + ]; + + /********************************************************************** + + **********************************************************************/ + + static this () + { + library = FunctionLoader.bind (ICU.icuuc, targets); + } + + /********************************************************************** + + **********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UCalendar.d --- a/dwtx/dwtxhelper/mangoicu/UCalendar.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UCalendar.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,666 +1,666 @@ -/******************************************************************************* - - @file UCalendar.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UCalendar; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UString; - -public import dwtx.dwtxhelper.mangoicu.ULocale, - dwtx.dwtxhelper.mangoicu.UTimeZone; - -/******************************************************************************* - - UCalendar is used for converting between a UDate object and - a set of integer fields such as Year, Month, Day, - Hour, and so on. (A UDate object represents a specific instant - in time with millisecond precision. See UDate for information about - the UDate) - - Types of UCalendar interpret a UDate according to the rules of a - specific calendar system. UCalendar supports Traditional & Gregorian. - - A UCalendar object can produce all the time field values needed to - implement the date-time formatting for a particular language and - calendar style (for example, Japanese-Gregorian, Japanese-Traditional). - - When computing a UDate from time fields, two special circumstances - may arise: there may be insufficient information to compute the UDate - (such as only year and month but no day in the month), or there may be - inconsistent information (such as "Tuesday, July 15, 1996" -- July 15, - 1996 is actually a Monday). - - Insufficient information. The calendar will use default information - to specify the missing fields. This may vary by calendar; for the - Gregorian calendar, the default for a field is the same as that of - the start of the epoch: i.e., Year = 1970, Month = January, - Date = 1, etc. - - Inconsistent information. If fields conflict, the calendar will give - preference to fields set more recently. For example, when determining - the day, the calendar will look for one of the following combinations - of fields. The most recent combination, as determined by the most - recently set single field, will be used. - - See http://oss.software.ibm.com/icu/apiref/udat_8h.html for full - details. - -*******************************************************************************/ - -class UCalendar : ICU -{ - package Handle handle; - - typedef double UDate; - - //Possible types of UCalendars - public enum Type - { - Traditional, - Gregorian - } - - // Possible fields in a UCalendar - public enum DateFields - { - Era, - Year, - Month, - WeekOfYear, - WeekOfMonth, - Date, - DayOfYear, - DayOfWeek, - DayOfWeekInMonth, - AmPm, - Hour, - HourOfDay, - Minute, - Second, - Millisecond, - ZoneOffset, - DstOffset, - YearWoy, - DowLocal, - ExtendedYear, - JulianDay, - MillisecondsInDay, - FieldCount, - DayOfMonth = Date - } - - // Possible days of the week in a UCalendar - public enum DaysOfWeek - { - Sunday = 1, - Monday, - Tuesday, - Wednesday, - Thursday, - Friday, - Saturday - } - - // Possible months in a UCalendar - public enum Months - { - January, - February, - March, - April, - May, - June, - July, - August, - September, - October, - November, - December, - UnDecimber - } - - // Possible AM/PM values in a UCalendar - public enum AMPMs - { - AM, - PM - } - - // Possible formats for a UCalendar's display name - public enum DisplayNameType - { - Standard, - ShortStandard, - DST, - ShortDST - } - - // Possible limit values for a UCalendar - public enum Limit - { - Minimum, - Maximum, - GreatestMinimum, - LeastMaximum, - ActualMinimum, - ActualMaximum - } - - // Types of UCalendar attributes - private enum Attribute - { - Lenient, // unused: set from UDateFormat instead - FirstDayOfWeek, - MinimalDaysInFirstWeek - } - - /*********************************************************************** - - Open a UCalendar. A UCalendar may be used to convert a - millisecond value to a year, month, and day - - ***********************************************************************/ - - this (inout UTimeZone zone, inout ULocale locale, Type type = Type.Traditional) - { - UErrorCode e; - - handle = ucal_open (zone.name.ptr, zone.name.length, toString(locale.name), type, e); - testError (e, "failed to open calendar"); - } - - /*********************************************************************** - - Internal only: Open a UCalendar with the given handle - - ***********************************************************************/ - - package this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Close this UCalendar - - ***********************************************************************/ - - ~this () - { - ucal_close (handle); - } - - /*********************************************************************** - - Set the TimeZone used by a UCalendar - - ***********************************************************************/ - - void setTimeZone (inout UTimeZone zone) - { - UErrorCode e; - - ucal_setTimeZone (handle, zone.name.ptr, zone.name.length, e); - testError (e, "failed to set calendar time zone"); - } - - /*********************************************************************** - - Get display name of the TimeZone used by this UCalendar - - ***********************************************************************/ - - void getTimeZoneName (UString s, inout ULocale locale, DisplayNameType type=DisplayNameType.Standard) - { - uint format (wchar* dst, uint length, inout ICU.UErrorCode e) - { - return ucal_getTimeZoneDisplayName (handle, type, toString(locale.name), dst, length, e); - } - - s.format (&format, "failed to get time zone name"); - } - - /*********************************************************************** - - Determine if a UCalendar is currently in daylight savings - time - - ***********************************************************************/ - - bool inDaylightTime () - { - UErrorCode e; - - auto x = ucal_inDaylightTime (handle, e); - testError (e, "failed to test calendar daylight time"); - return x != 0; - } - - /*********************************************************************** - - Get the current date and time - - ***********************************************************************/ - - UDate getNow () - { - return ucal_getNow (); - } - - /*********************************************************************** - - Get a UCalendar's current time in millis. The time is - represented as milliseconds from the epoch - - ***********************************************************************/ - - UDate getMillis () - { - UErrorCode e; - - auto x = ucal_getMillis (handle, e); - testError (e, "failed to get time"); - return x; - } - - /*********************************************************************** - - Set a UCalendar's current time in millis. The time is - represented as milliseconds from the epoch - - ***********************************************************************/ - - void setMillis (UDate date) - { - UErrorCode e; - - ucal_setMillis (handle, date, e); - testError (e, "failed to set time"); - } - - /*********************************************************************** - - Set a UCalendar's current date - - ***********************************************************************/ - - void setDate (uint year, Months month, uint date) - { - UErrorCode e; - - ucal_setDate (handle, year, month, date, e); - testError (e, "failed to set date"); - } - - /*********************************************************************** - - Set a UCalendar's current date - - ***********************************************************************/ - - void setDateTime (uint year, Months month, uint date, uint hour, uint minute, uint second) - { - UErrorCode e; - - ucal_setDateTime (handle, year, month, date, hour, minute, second, e); - testError (e, "failed to set date/time"); - } - - /*********************************************************************** - - Returns TRUE if the given Calendar object is equivalent - to this one - - ***********************************************************************/ - - bool isEquivalent (UCalendar when) - { - return ucal_equivalentTo (handle, when.handle) != 0; - } - - /*********************************************************************** - - Compares the Calendar time - - ***********************************************************************/ - - bool isEqual (UCalendar when) - { - return (this is when || getMillis == when.getMillis); - } - - /*********************************************************************** - - Returns true if this Calendar's current time is before - "when"'s current time - - ***********************************************************************/ - - bool isBefore (UCalendar when) - { - return (this !is when || getMillis < when.getMillis); - } - - /*********************************************************************** - - Returns true if this Calendar's current time is after - "when"'s current time - - ***********************************************************************/ - - bool isAfter (UCalendar when) - { - return (this !is when || getMillis > when.getMillis); - } - - /*********************************************************************** - - Add a specified signed amount to a particular field in a - UCalendar - - ***********************************************************************/ - - void add (DateFields field, uint amount) - { - UErrorCode e; - - ucal_add (handle, field, amount, e); - testError (e, "failed to add to calendar"); - } - - /*********************************************************************** - - Add a specified signed amount to a particular field in a - UCalendar - - ***********************************************************************/ - - void roll (DateFields field, uint amount) - { - UErrorCode e; - - ucal_roll (handle, field, amount, e); - testError (e, "failed to roll calendar"); - } - - /*********************************************************************** - - Get the current value of a field from a UCalendar - - ***********************************************************************/ - - uint get (DateFields field) - { - UErrorCode e; - - auto x = ucal_get (handle, field, e); - testError (e, "failed to get calendar field"); - return x; - } - - /*********************************************************************** - - Set the value of a field in a UCalendar - - ***********************************************************************/ - - void set (DateFields field, uint value) - { - ucal_set (handle, field, value); - } - - /*********************************************************************** - - Determine if a field in a UCalendar is set - - ***********************************************************************/ - - bool isSet (DateFields field) - { - return ucal_isSet (handle, field) != 0; - } - - /*********************************************************************** - - Clear a field in a UCalendar - - ***********************************************************************/ - - void clearField (DateFields field) - { - ucal_clearField (handle, field); - } - - /*********************************************************************** - - Clear all fields in a UCalendar - - ***********************************************************************/ - - void clear () - { - ucal_clear (handle); - } - - /*********************************************************************** - - Determine a limit for a field in a UCalendar. A limit is a - maximum or minimum value for a field - - ***********************************************************************/ - - uint getLimit (DateFields field, Limit type) - { - UErrorCode e; - - auto x = ucal_getLimit (handle, field, type, e); - testError (e, "failed to get calendar limit"); - return x; - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getDaysInFirstWeek () - { - return ucal_getAttribute (handle, Attribute.MinimalDaysInFirstWeek); - } - - /*********************************************************************** - - ***********************************************************************/ - - uint getFirstDayOfWeek () - { - return ucal_getAttribute (handle, Attribute.FirstDayOfWeek); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setDaysInFirstWeek (uint value) - { - ucal_setAttribute (handle, Attribute.MinimalDaysInFirstWeek, value); - } - - /*********************************************************************** - - ***********************************************************************/ - - void setFirstDayOfWeek (uint value) - { - ucal_setAttribute (handle, Attribute.FirstDayOfWeek, value); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, char*, Type, inout UErrorCode) ucal_open; - void function (Handle) ucal_close; - UDate function () ucal_getNow; - UDate function (Handle, inout UErrorCode) ucal_getMillis; - void function (Handle, UDate, inout UErrorCode) ucal_setMillis; - void function (Handle, uint, uint, uint, inout UErrorCode) ucal_setDate; - void function (Handle, uint, uint, uint, uint, uint, uint, inout UErrorCode) ucal_setDateTime; - byte function (Handle, Handle) ucal_equivalentTo; - void function (Handle, uint, uint, inout UErrorCode) ucal_add; - void function (Handle, uint, uint, inout UErrorCode) ucal_roll; - uint function (Handle, uint, inout UErrorCode) ucal_get; - void function (Handle, uint, uint) ucal_set; - byte function (Handle, uint) ucal_isSet; - void function (Handle, uint) ucal_clearField; - void function (Handle) ucal_clear; - uint function (Handle, uint, uint, inout UErrorCode) ucal_getLimit; - void function (Handle, wchar*, uint, inout UErrorCode) ucal_setTimeZone; - byte function (Handle, uint) ucal_inDaylightTime; - uint function (Handle, uint) ucal_getAttribute; - void function (Handle, uint, uint) ucal_setAttribute; - uint function (Handle, uint, char*, wchar*, uint, inout UErrorCode) ucal_getTimeZoneDisplayName; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ucal_open, "ucal_open"}, - {cast(void**) &ucal_close, "ucal_close"}, - {cast(void**) &ucal_getNow, "ucal_getNow"}, - {cast(void**) &ucal_getMillis, "ucal_getMillis"}, - {cast(void**) &ucal_setMillis, "ucal_setMillis"}, - {cast(void**) &ucal_setDate, "ucal_setDate"}, - {cast(void**) &ucal_setDateTime, "ucal_setDateTime"}, - {cast(void**) &ucal_equivalentTo, "ucal_equivalentTo"}, - {cast(void**) &ucal_add, "ucal_add"}, - {cast(void**) &ucal_roll, "ucal_roll"}, - {cast(void**) &ucal_get, "ucal_get"}, - {cast(void**) &ucal_set, "ucal_set"}, - {cast(void**) &ucal_clearField, "ucal_clearField"}, - {cast(void**) &ucal_clear, "ucal_clear"}, - {cast(void**) &ucal_getLimit, "ucal_getLimit"}, - {cast(void**) &ucal_setTimeZone, "ucal_setTimeZone"}, - {cast(void**) &ucal_inDaylightTime, "ucal_inDaylightTime"}, - {cast(void**) &ucal_getAttribute, "ucal_getAttribute"}, - {cast(void**) &ucal_setAttribute, "ucal_setAttribute"}, - {cast(void**) &ucal_isSet, "ucal_isSet"}, - {cast(void**) &ucal_getTimeZoneDisplayName, "ucal_getTimeZoneDisplayName"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } - -} +/******************************************************************************* + + @file UCalendar.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UCalendar; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UString; + +public import dwtx.dwtxhelper.mangoicu.ULocale, + dwtx.dwtxhelper.mangoicu.UTimeZone; + +/******************************************************************************* + + UCalendar is used for converting between a UDate object and + a set of integer fields such as Year, Month, Day, + Hour, and so on. (A UDate object represents a specific instant + in time with millisecond precision. See UDate for information about + the UDate) + + Types of UCalendar interpret a UDate according to the rules of a + specific calendar system. UCalendar supports Traditional & Gregorian. + + A UCalendar object can produce all the time field values needed to + implement the date-time formatting for a particular language and + calendar style (for example, Japanese-Gregorian, Japanese-Traditional). + + When computing a UDate from time fields, two special circumstances + may arise: there may be insufficient information to compute the UDate + (such as only year and month but no day in the month), or there may be + inconsistent information (such as "Tuesday, July 15, 1996" -- July 15, + 1996 is actually a Monday). + + Insufficient information. The calendar will use default information + to specify the missing fields. This may vary by calendar; for the + Gregorian calendar, the default for a field is the same as that of + the start of the epoch: i.e., Year = 1970, Month = January, + Date = 1, etc. + + Inconsistent information. If fields conflict, the calendar will give + preference to fields set more recently. For example, when determining + the day, the calendar will look for one of the following combinations + of fields. The most recent combination, as determined by the most + recently set single field, will be used. + + See http://oss.software.ibm.com/icu/apiref/udat_8h.html for full + details. + +*******************************************************************************/ + +class UCalendar : ICU +{ + package Handle handle; + + typedef double UDate; + + //Possible types of UCalendars + public enum Type + { + Traditional, + Gregorian + } + + // Possible fields in a UCalendar + public enum DateFields + { + Era, + Year, + Month, + WeekOfYear, + WeekOfMonth, + Date, + DayOfYear, + DayOfWeek, + DayOfWeekInMonth, + AmPm, + Hour, + HourOfDay, + Minute, + Second, + Millisecond, + ZoneOffset, + DstOffset, + YearWoy, + DowLocal, + ExtendedYear, + JulianDay, + MillisecondsInDay, + FieldCount, + DayOfMonth = Date + } + + // Possible days of the week in a UCalendar + public enum DaysOfWeek + { + Sunday = 1, + Monday, + Tuesday, + Wednesday, + Thursday, + Friday, + Saturday + } + + // Possible months in a UCalendar + public enum Months + { + January, + February, + March, + April, + May, + June, + July, + August, + September, + October, + November, + December, + UnDecimber + } + + // Possible AM/PM values in a UCalendar + public enum AMPMs + { + AM, + PM + } + + // Possible formats for a UCalendar's display name + public enum DisplayNameType + { + Standard, + ShortStandard, + DST, + ShortDST + } + + // Possible limit values for a UCalendar + public enum Limit + { + Minimum, + Maximum, + GreatestMinimum, + LeastMaximum, + ActualMinimum, + ActualMaximum + } + + // Types of UCalendar attributes + private enum Attribute + { + Lenient, // unused: set from UDateFormat instead + FirstDayOfWeek, + MinimalDaysInFirstWeek + } + + /*********************************************************************** + + Open a UCalendar. A UCalendar may be used to convert a + millisecond value to a year, month, and day + + ***********************************************************************/ + + this (inout UTimeZone zone, inout ULocale locale, Type type = Type.Traditional) + { + UErrorCode e; + + handle = ucal_open (zone.name.ptr, zone.name.length, toString(locale.name), type, e); + testError (e, "failed to open calendar"); + } + + /*********************************************************************** + + Internal only: Open a UCalendar with the given handle + + ***********************************************************************/ + + package this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Close this UCalendar + + ***********************************************************************/ + + ~this () + { + ucal_close (handle); + } + + /*********************************************************************** + + Set the TimeZone used by a UCalendar + + ***********************************************************************/ + + void setTimeZone (inout UTimeZone zone) + { + UErrorCode e; + + ucal_setTimeZone (handle, zone.name.ptr, zone.name.length, e); + testError (e, "failed to set calendar time zone"); + } + + /*********************************************************************** + + Get display name of the TimeZone used by this UCalendar + + ***********************************************************************/ + + void getTimeZoneName (UString s, inout ULocale locale, DisplayNameType type=DisplayNameType.Standard) + { + uint format (wchar* dst, uint length, inout ICU.UErrorCode e) + { + return ucal_getTimeZoneDisplayName (handle, type, toString(locale.name), dst, length, e); + } + + s.format (&format, "failed to get time zone name"); + } + + /*********************************************************************** + + Determine if a UCalendar is currently in daylight savings + time + + ***********************************************************************/ + + bool inDaylightTime () + { + UErrorCode e; + + auto x = ucal_inDaylightTime (handle, e); + testError (e, "failed to test calendar daylight time"); + return x != 0; + } + + /*********************************************************************** + + Get the current date and time + + ***********************************************************************/ + + UDate getNow () + { + return ucal_getNow (); + } + + /*********************************************************************** + + Get a UCalendar's current time in millis. The time is + represented as milliseconds from the epoch + + ***********************************************************************/ + + UDate getMillis () + { + UErrorCode e; + + auto x = ucal_getMillis (handle, e); + testError (e, "failed to get time"); + return x; + } + + /*********************************************************************** + + Set a UCalendar's current time in millis. The time is + represented as milliseconds from the epoch + + ***********************************************************************/ + + void setMillis (UDate date) + { + UErrorCode e; + + ucal_setMillis (handle, date, e); + testError (e, "failed to set time"); + } + + /*********************************************************************** + + Set a UCalendar's current date + + ***********************************************************************/ + + void setDate (uint year, Months month, uint date) + { + UErrorCode e; + + ucal_setDate (handle, year, month, date, e); + testError (e, "failed to set date"); + } + + /*********************************************************************** + + Set a UCalendar's current date + + ***********************************************************************/ + + void setDateTime (uint year, Months month, uint date, uint hour, uint minute, uint second) + { + UErrorCode e; + + ucal_setDateTime (handle, year, month, date, hour, minute, second, e); + testError (e, "failed to set date/time"); + } + + /*********************************************************************** + + Returns TRUE if the given Calendar object is equivalent + to this one + + ***********************************************************************/ + + bool isEquivalent (UCalendar when) + { + return ucal_equivalentTo (handle, when.handle) != 0; + } + + /*********************************************************************** + + Compares the Calendar time + + ***********************************************************************/ + + bool isEqual (UCalendar when) + { + return (this is when || getMillis == when.getMillis); + } + + /*********************************************************************** + + Returns true if this Calendar's current time is before + "when"'s current time + + ***********************************************************************/ + + bool isBefore (UCalendar when) + { + return (this !is when || getMillis < when.getMillis); + } + + /*********************************************************************** + + Returns true if this Calendar's current time is after + "when"'s current time + + ***********************************************************************/ + + bool isAfter (UCalendar when) + { + return (this !is when || getMillis > when.getMillis); + } + + /*********************************************************************** + + Add a specified signed amount to a particular field in a + UCalendar + + ***********************************************************************/ + + void add (DateFields field, uint amount) + { + UErrorCode e; + + ucal_add (handle, field, amount, e); + testError (e, "failed to add to calendar"); + } + + /*********************************************************************** + + Add a specified signed amount to a particular field in a + UCalendar + + ***********************************************************************/ + + void roll (DateFields field, uint amount) + { + UErrorCode e; + + ucal_roll (handle, field, amount, e); + testError (e, "failed to roll calendar"); + } + + /*********************************************************************** + + Get the current value of a field from a UCalendar + + ***********************************************************************/ + + uint get (DateFields field) + { + UErrorCode e; + + auto x = ucal_get (handle, field, e); + testError (e, "failed to get calendar field"); + return x; + } + + /*********************************************************************** + + Set the value of a field in a UCalendar + + ***********************************************************************/ + + void set (DateFields field, uint value) + { + ucal_set (handle, field, value); + } + + /*********************************************************************** + + Determine if a field in a UCalendar is set + + ***********************************************************************/ + + bool isSet (DateFields field) + { + return ucal_isSet (handle, field) != 0; + } + + /*********************************************************************** + + Clear a field in a UCalendar + + ***********************************************************************/ + + void clearField (DateFields field) + { + ucal_clearField (handle, field); + } + + /*********************************************************************** + + Clear all fields in a UCalendar + + ***********************************************************************/ + + void clear () + { + ucal_clear (handle); + } + + /*********************************************************************** + + Determine a limit for a field in a UCalendar. A limit is a + maximum or minimum value for a field + + ***********************************************************************/ + + uint getLimit (DateFields field, Limit type) + { + UErrorCode e; + + auto x = ucal_getLimit (handle, field, type, e); + testError (e, "failed to get calendar limit"); + return x; + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getDaysInFirstWeek () + { + return ucal_getAttribute (handle, Attribute.MinimalDaysInFirstWeek); + } + + /*********************************************************************** + + ***********************************************************************/ + + uint getFirstDayOfWeek () + { + return ucal_getAttribute (handle, Attribute.FirstDayOfWeek); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setDaysInFirstWeek (uint value) + { + ucal_setAttribute (handle, Attribute.MinimalDaysInFirstWeek, value); + } + + /*********************************************************************** + + ***********************************************************************/ + + void setFirstDayOfWeek (uint value) + { + ucal_setAttribute (handle, Attribute.FirstDayOfWeek, value); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, char*, Type, inout UErrorCode) ucal_open; + void function (Handle) ucal_close; + UDate function () ucal_getNow; + UDate function (Handle, inout UErrorCode) ucal_getMillis; + void function (Handle, UDate, inout UErrorCode) ucal_setMillis; + void function (Handle, uint, uint, uint, inout UErrorCode) ucal_setDate; + void function (Handle, uint, uint, uint, uint, uint, uint, inout UErrorCode) ucal_setDateTime; + byte function (Handle, Handle) ucal_equivalentTo; + void function (Handle, uint, uint, inout UErrorCode) ucal_add; + void function (Handle, uint, uint, inout UErrorCode) ucal_roll; + uint function (Handle, uint, inout UErrorCode) ucal_get; + void function (Handle, uint, uint) ucal_set; + byte function (Handle, uint) ucal_isSet; + void function (Handle, uint) ucal_clearField; + void function (Handle) ucal_clear; + uint function (Handle, uint, uint, inout UErrorCode) ucal_getLimit; + void function (Handle, wchar*, uint, inout UErrorCode) ucal_setTimeZone; + byte function (Handle, uint) ucal_inDaylightTime; + uint function (Handle, uint) ucal_getAttribute; + void function (Handle, uint, uint) ucal_setAttribute; + uint function (Handle, uint, char*, wchar*, uint, inout UErrorCode) ucal_getTimeZoneDisplayName; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucal_open, "ucal_open"}, + {cast(void**) &ucal_close, "ucal_close"}, + {cast(void**) &ucal_getNow, "ucal_getNow"}, + {cast(void**) &ucal_getMillis, "ucal_getMillis"}, + {cast(void**) &ucal_setMillis, "ucal_setMillis"}, + {cast(void**) &ucal_setDate, "ucal_setDate"}, + {cast(void**) &ucal_setDateTime, "ucal_setDateTime"}, + {cast(void**) &ucal_equivalentTo, "ucal_equivalentTo"}, + {cast(void**) &ucal_add, "ucal_add"}, + {cast(void**) &ucal_roll, "ucal_roll"}, + {cast(void**) &ucal_get, "ucal_get"}, + {cast(void**) &ucal_set, "ucal_set"}, + {cast(void**) &ucal_clearField, "ucal_clearField"}, + {cast(void**) &ucal_clear, "ucal_clear"}, + {cast(void**) &ucal_getLimit, "ucal_getLimit"}, + {cast(void**) &ucal_setTimeZone, "ucal_setTimeZone"}, + {cast(void**) &ucal_inDaylightTime, "ucal_inDaylightTime"}, + {cast(void**) &ucal_getAttribute, "ucal_getAttribute"}, + {cast(void**) &ucal_setAttribute, "ucal_setAttribute"}, + {cast(void**) &ucal_isSet, "ucal_isSet"}, + {cast(void**) &ucal_getTimeZoneDisplayName, "ucal_getTimeZoneDisplayName"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UChar.d --- a/dwtx/dwtxhelper/mangoicu/UChar.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UChar.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,1240 +1,1240 @@ -/******************************************************************************* - - @file UChar.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, October 2004 - @author Kris - - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UChar; - -private import dwtx.dwtxhelper.mangoicu.ICU; - -/******************************************************************************* - - This API provides low-level access to the Unicode Character - Database. In addition to raw property values, some convenience - functions calculate derived properties, for example for Java-style - programming. - - Unicode assigns each code point (not just assigned character) - values for many properties. Most of them are simple boolean - flags, or constants from a small enumerated list. For some - properties, values are strings or other relatively more complex - types. - - For more information see "About the Unicode Character Database" - (http://www.unicode.org/ucd/) and the ICU User Guide chapter on - Properties (http://oss.software.ibm.com/icu/userguide/properties.html). - - Many functions are designed to match java.lang.Character functions. - See the individual function documentation, and see the JDK 1.4.1 - java.lang.Character documentation at - http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html - - There are also functions that provide easy migration from C/POSIX - functions like isblank(). Their use is generally discouraged because - the C/POSIX standards do not define their semantics beyond the ASCII - range, which means that different implementations exhibit very different - behavior. Instead, Unicode properties should be used directly. - - There are also only a few, broad C/POSIX character classes, and they - tend to be used for conflicting purposes. For example, the "isalpha()" - class is sometimes used to determine word boundaries, while a more - sophisticated approach would at least distinguish initial letters from - continuation characters (the latter including combining marks). (In - ICU, BreakIterator is the most sophisticated API for word boundaries.) - Another example: There is no "istitle()" class for titlecase characters. - - A summary of the behavior of some C/POSIX character classification - implementations for Unicode is available at - http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html - - See - this page for full details. - -*******************************************************************************/ - -class UChar : ICU -{ - public enum Property - { - Alphabetic = 0, - BinaryStart = Alphabetic, - AsciiHexDigit, - BidiControl, - BidiMirrored, - Dash, - DefaultIgnorableCodePoint, - Deprecated, - Diacritic, - Extender, - FullCompositionExclusion, - GraphemeBase, - GraphemeExtend, - GraphemeLink, - HexDigit, - Hyphen, - IdContinue, - IdStart, - Ideographic, - IdsBinaryOperator, - IdsTrinaryOperator, - JoinControl, - LogicalOrderException, - Lowercase, - Math, - NoncharacterCodePoint, - QuotationMark, - Radical, - SoftDotted, - TerminalPunctuation, - UnifiedIdeograph, - Uppercase, - WhiteSpace, - XidContinue, - XidStart, - CaseSensitive, - STerm, - VariationSelector, - NfdInert, - NfkdInert, - NfcInert, - NfkcInert, - SegmentStarter, - BinaryLimit, - BidiClass = 0x1000, - IntStart = BidiClass, - Block, CanonicalCombiningClass, - DecompositionType, - EastAsianWidth, - GeneralCategory, - JoiningGroup, - JoiningType, - LineBreak, - NumericType, - Script, - HangulSyllableType, - NfdQuickCheck, - NfkdQuickCheck, - NfcQuickCheck, - NfkcQuickCheck, - LeadCanonicalCombiningClass, - TrailCanonicalCombiningClass, - IntLimit, - GeneralCategoryMask = 0x2000, - MaskStart = GeneralCategoryMask, - MaskLimit, - NumericValue = 0x3000, - DoubleStart = NumericValue, - DoubleLimit, - Age = 0x4000, - StringStart = Age, - BidiMirroringGlyph, - CaseFolding, - IsoComment, - LowercaseMapping, - Name, - SimpleCaseFolding, - SimpleLowercaseMapping, - SimpleTitlecaseMapping, - SimpleUppercaseMapping, - TitlecaseMapping, - Unicode1Name, - UppercaseMapping, - StringLimit, - InvalidCode = -1 - } - - public enum Category - { - Unassigned = 0, - GeneralOtherTypes = 0, - UppercaseLetter = 1, - LowercaseLetter = 2, - TitlecaseLetter = 3, - ModifierLetter = 4, - OtherLetter = 5, - NonSpacingMark = 6, - EnclosingMark = 7, - CombiningSpacingMark = 8, - DecimalDigitNumber = 9, - LetterNumber = 10, - OtherNumber = 11, - SpaceSeparator = 12, - LineSeparator = 13, - ParagraphSeparator = 14, - ControlChar = 15, - FormatChar = 16, - PrivateUseChar = 17, - Surrogate = 18, - DashPunctuation = 19, - StartPunctuation = 20, - EndPunctuation = 21, - ConnectorPunctuation = 22, - OtherPunctuation = 23, - MathSymbol = 24, - CurrencySymbol = 25, - ModifierSymbol = 26, - OtherSymbol = 27, - InitialPunctuation = 28, - FinalPunctuation = 29, - Count - } - - public enum Direction - { - LeftToRight = 0, - RightToLeft = 1, - EuropeanNumber = 2, - EuropeanNumberSeparator = 3, - EuropeanNumberTerminator = 4, - ArabicNumber = 5, - CommonNumberSeparator = 6, - BlockSeparator = 7, - SegmentSeparator = 8, - WhiteSpaceNeutral = 9, - OtherNeutral = 10, - LeftToRightEmbedding = 11, - LeftToRightOverride = 12, - RightToLeftArabic = 13, - RightToLeftEmbedding = 14, - RightToLeftOverride = 15, - PopDirectionalFormat = 16, - DirNonSpacingMark = 17, - BoundaryNeutral = 18, - Count - } - - public enum BlockCode - { - NoBlock = 0, - BasicLatin = 1, - Latin1Supplement = 2, - LatinExtendedA = 3, - LatinExtendedB = 4, - IpaExtensions = 5, - SpacingModifierLetters = 6, - CombiningDiacriticalMarks = 7, - Greek = 8, - Cyrillic = 9, - Armenian = 10, - Hebrew = 11, - Arabic = 12, - Syriac = 13, - Thaana = 14, - Devanagari = 15, - Bengali = 16, - Gurmukhi = 17, - Gujarati = 18, - Oriya = 19, - Tamil = 20, - Telugu = 21, - Kannada = 22, - Malayalam = 23, - Sinhala = 24, - Thai = 25, - Lao = 26, - Tibetan = 27, - Myanmar = 28, - Georgian = 29, - HangulJamo = 30, - Ethiopic = 31, - Cherokee = 32, - UnifiedCanadianAboriginalSyllabics = 33, - Ogham = 34, - Runic = 35, - Khmer = 36, - Mongolian = 37, - LatinExtendedAdditional = 38, - GreekExtended = 39, - GeneralPunctuation = 40, - SuperscriptsAndSubscripts = 41, - CurrencySymbols = 42, - CombiningMarksForSymbols = 43, - LetterlikeSymbols = 44, - NumberForms = 45, - Arrows = 46, - MathematicalOperators = 47, - MiscellaneousTechnical = 48, - ControlPictures = 49, - OpticalCharacterRecognition = 50, - EnclosedAlphanumerics = 51, - BoxDrawing = 52, - BlockElements = 53, - GeometricShapes = 54, - MiscellaneousSymbols = 55, - Dingbats = 56, - BraillePatterns = 57, - CjkRadicalsSupplement = 58, - KangxiRadicals = 59, - IdeographicDescriptionCharacters = 60, - CjkSymbolsAndPunctuation = 61, - Hiragana = 62, - Katakana = 63, - Bopomofo = 64, - HangulCompatibilityJamo = 65, - Kanbun = 66, - BopomofoExtended = 67, - EnclosedCjkLettersAndMonths = 68, - CjkCompatibility = 69, - CjkUnifiedIdeographsExtensionA = 70, - CjkUnifiedIdeographs = 71, - YiSyllables = 72, - YiRadicals = 73, - HangulSyllables = 74, - HighSurrogates = 75, - HighPrivateUseSurrogates = 76, - LowSurrogates = 77, - PrivateUse = 78, - PrivateUseArea = PrivateUse, - CjkCompatibilityIdeographs = 79, - AlphabeticPresentationForms = 80, - ArabicPresentationFormsA = 81, - CombiningHalfMarks = 82, - CjkCompatibilityForms = 83, - SmallFormVariants = 84, - ArabicPresentationFormsB = 85, - Specials = 86, - HalfwidthAndFullwidthForms = 87, - OldItalic = 88, - Gothic = 89, - Deseret = 90, - ByzantineMusicalSymbols = 91, - MusicalSymbols = 92, - MathematicalAlphanumericSymbols = 93, - CjkUnifiedIdeographsExtensionB = 94, - CjkCompatibilityIdeographsSupplement = 95, - Tags = 96, - CyrillicSupplementary = 97, - CyrillicSupplement = CyrillicSupplementary, - Tagalog = 98, - Hanunoo = 99, - Buhid = 100, - Tagbanwa = 101, - MiscellaneousMathematicalSymbolsA = 102, - SupplementalArrowsA = 103, - SupplementalArrowsB = 104, - MiscellaneousMathematicalSymbolsB = 105, - SupplementalMathematicalOperators = 106, - KatakanaPhoneticExtensions = 107, - VariationSelectors = 108, - SupplementaryPrivateUseAreaA = 109, - SupplementaryPrivateUseAreaB = 110, - Limbu = 111, - TaiLe = 112, - KhmerSymbols = 113, - PhoneticExtensions = 114, - MiscellaneousSymbolsAndArrows = 115, - YijingHexagramSymbols = 116, - LinearBSyllabary = 117, - LinearBIdeograms = 118, - AegeanNumbers = 119, - Ugaritic = 120, - Shavian = 121, - Osmanya = 122, - CypriotSyllabary = 123, - TaiXuanJingSymbols = 124, - VariationSelectorsSupplement = 125, - Count, - InvalidCode = -1 - } - - public enum EastAsianWidth - { - Neutral, - Ambiguous, - Halfwidth, - Fullwidth, - Narrow, - Wide, - Count - } - - public enum CharNameChoice - { - Unicode, - Unicode10, - Extended, - Count - } - - public enum NameChoice - { - Short, - Long, - Count - } - - public enum DecompositionType - { - None, - Canonical, - Compat, - Circle, - Final, - Font, - Fraction, - Initial, - Isolated, - Medial, - Narrow, - Nobreak, - Small, - Square, - Sub, - Super, - Vertical, - Wide, - Count - } - - public enum JoiningType - { - NonJoining, - JoinCausing, - DualJoining, - LeftJoining, - RightJoining, - Transparent, - Count - } - - public enum JoiningGroup - { - NoJoiningGroup, - Ain, - Alaph, - Alef, - Beh, - Beth, - Dal, - DalathRish, - E, - Feh, - FinalSemkath, - Gaf, - Gamal, - Hah, - HamzaOnHehGoal, - He, - Heh, - HehGoal, - Heth, - Kaf, - Kaph, - KnottedHeh, - Lam, - Lamadh, - Meem, - Mim, - Noon, - Nun, - Pe, - Qaf, - Qaph, - Reh, - Reversed_Pe, - Sad, - Sadhe, - Seen, - Semkath, - Shin, - Swash_Kaf, - Syriac_Waw, - Tah, - Taw, - Teh_Marbuta, - Teth, - Waw, - Yeh, - Yeh_Barree, - Yeh_With_Tail, - Yudh, - Yudh_He, - Zain, - Fe, - Khaph, - Zhain, - Count - } - - public enum LineBreak - { - Unknown, - Ambiguous, - Alphabetic, - BreakBoth, - BreakAfter, - BreakBefore, - MandatoryBreak, - ContingentBreak, - ClosePunctuation, - CombiningMark, - CarriageReturn, - Exclamation, - Glue, - Hyphen, - Ideographic, - Inseperable, - Inseparable = Inseperable, - InfixNumeric, - LineFeed, - Nonstarter, - Numeric, - OpenPunctuation, - PostfixNumeric, - PrefixNumeric, - Quotation, - ComplexContext, - Surrogate, - Space, - BreakSymbols, - Zwspace, - NextLine, - WordJoiner, - Count - } - - public enum NumericType - { - None, - Decimal, - Digit, - Numeric, - Count - } - - public enum HangulSyllableType - { - NotApplicable, - LeadingJamo, - VowelJamo, - TrailingJamo, - LvSyllable, - LvtSyllable, - Count - } - - /*********************************************************************** - - Get the property value for an enumerated or integer - Unicode property for a code point. Also returns binary - and mask property values. - - Unicode, especially in version 3.2, defines many more - properties than the original set in UnicodeData.txt. - - The properties APIs are intended to reflect Unicode - properties as defined in the Unicode Character Database - (UCD) and Unicode Technical Reports (UTR). For details - about the properties see http://www.unicode.org/ . For - names of Unicode properties see the file PropertyAliases.txt - - ***********************************************************************/ - - uint getProperty (dchar c, Property p) - { - return u_getIntPropertyValue (cast(uint) c, cast(uint) p); - } - - /*********************************************************************** - - Get the minimum value for an enumerated/integer/binary - Unicode property - - ***********************************************************************/ - - uint getPropertyMinimum (Property p) - { - return u_getIntPropertyMinValue (p); - } - - /*********************************************************************** - - Get the maximum value for an enumerated/integer/binary - Unicode property - - ***********************************************************************/ - - uint getPropertyMaximum (Property p) - { - return u_getIntPropertyMaxValue (p); - } - - /*********************************************************************** - - Returns the bidirectional category value for the code - point, which is used in the Unicode bidirectional algorithm - (UAX #9 http://www.unicode.org/reports/tr9/). - - ***********************************************************************/ - - Direction charDirection (dchar c) - { - return cast(Direction) u_charDirection (c); - } - - /*********************************************************************** - - Returns the Unicode allocation block that contains the - character - - ***********************************************************************/ - - BlockCode getBlockCode (dchar c) - { - return cast(BlockCode) ublock_getCode (c); - } - - /*********************************************************************** - - Retrieve the name of a Unicode character. - - ***********************************************************************/ - - char[] getCharName (dchar c, CharNameChoice choice, inout char[] dst) - { - UErrorCode e; - - uint len = u_charName (c, choice, dst.ptr, dst.length, e); - testError (e, "failed to extract char name (buffer too small?)"); - return dst [0..len]; - } - - /*********************************************************************** - - Get the ISO 10646 comment for a character. - - ***********************************************************************/ - - char[] getComment (dchar c, inout char[] dst) - { - UErrorCode e; - - uint len = u_getISOComment (c, dst.ptr, dst.length, e); - testError (e, "failed to extract comment (buffer too small?)"); - return dst [0..len]; - } - - /*********************************************************************** - - Find a Unicode character by its name and return its code - point value. - - ***********************************************************************/ - - dchar charFromName (CharNameChoice choice, char[] name) - { - UErrorCode e; - - dchar c = u_charFromName (choice, toString(name), e); - testError (e, "failed to locate char name"); - return c; - } - - /*********************************************************************** - - Return the Unicode name for a given property, as given in the - Unicode database file PropertyAliases.txt - - ***********************************************************************/ - - char[] getPropertyName (Property p, NameChoice choice) - { - return toArray (u_getPropertyName (p, choice)); - } - - /*********************************************************************** - - Return the Unicode name for a given property value, as given - in the Unicode database file PropertyValueAliases.txt. - - ***********************************************************************/ - - char[] getPropertyValueName (Property p, NameChoice choice, uint value) - { - return toArray (u_getPropertyValueName (p, value, choice)); - } - - /*********************************************************************** - - Gets the Unicode version information - - ***********************************************************************/ - - void getUnicodeVersion (inout Version v) - { - u_getUnicodeVersion (v); - } - - /*********************************************************************** - - Get the "age" of the code point - - ***********************************************************************/ - - void getCharAge (dchar c, inout Version v) - { - u_charAge (c, v); - } - - - /*********************************************************************** - - These are externalised directly to the client (sans wrapper), - but this may have to change for linux, depending upon the - ICU function-naming conventions within the Posix libraries. - - ***********************************************************************/ - - final static extern (C) - { - /*************************************************************** - - Check if a code point has the Alphabetic Unicode - property. - - ***************************************************************/ - - bool function (dchar c) isUAlphabetic; - - /*************************************************************** - - Check if a code point has the Lowercase Unicode - property. - - ***************************************************************/ - - bool function (dchar c) isULowercase; - - /*************************************************************** - - Check if a code point has the Uppercase Unicode - property. - - ***************************************************************/ - - bool function (dchar c) isUUppercase; - - /*************************************************************** - - Check if a code point has the White_Space Unicode - property. - - ***************************************************************/ - - bool function (dchar c) isUWhiteSpace; - - /*************************************************************** - - Determines whether the specified code point has the - general category "Ll" (lowercase letter). - - ***************************************************************/ - - bool function (dchar c) isLower; - - /*************************************************************** - - Determines whether the specified code point has the - general category "Lu" (uppercase letter). - - ***************************************************************/ - - bool function (dchar c) isUpper; - - /*************************************************************** - - Determines whether the specified code point is a - titlecase letter. - - ***************************************************************/ - - bool function (dchar c) isTitle; - - /*************************************************************** - - Determines whether the specified code point is a - digit character according to Java. - - ***************************************************************/ - - bool function (dchar c) isDigit; - - /*************************************************************** - - Determines whether the specified code point is a - letter character. - - ***************************************************************/ - - bool function (dchar c) isAlpha; - - /*************************************************************** - - Determines whether the specified code point is an - alphanumeric character (letter or digit) according - to Java. - - ***************************************************************/ - - bool function (dchar c) isAlphaNumeric; - - /*************************************************************** - - Determines whether the specified code point is a - hexadecimal digit. - - ***************************************************************/ - - bool function (dchar c) isHexDigit; - - /*************************************************************** - - Determines whether the specified code point is a - punctuation character. - - ***************************************************************/ - - bool function (dchar c) isPunct; - - /*************************************************************** - - Determines whether the specified code point is a - "graphic" character (printable, excluding spaces). - - ***************************************************************/ - - bool function (dchar c) isGraph; - - /*************************************************************** - - Determines whether the specified code point is a - "blank" or "horizontal space", a character that - visibly separates words on a line. - - ***************************************************************/ - - bool function (dchar c) isBlank; - - /*************************************************************** - - Determines whether the specified code point is - "defined", which usually means that it is assigned - a character. - - ***************************************************************/ - - bool function (dchar c) isDefined; - - /*************************************************************** - - Determines if the specified character is a space - character or not. - - ***************************************************************/ - - bool function (dchar c) isSpace; - - /*************************************************************** - - Determine if the specified code point is a space - character according to Java. - - ***************************************************************/ - - bool function (dchar c) isJavaSpaceChar; - - /*************************************************************** - - Determines if the specified code point is a whitespace - character according to Java/ICU. - - ***************************************************************/ - - bool function (dchar c) isWhiteSpace; - - /*************************************************************** - - Determines whether the specified code point is a - control character (as defined by this function). - - ***************************************************************/ - - bool function (dchar c) isCtrl; - - /*************************************************************** - - Determines whether the specified code point is an ISO - control code. - - ***************************************************************/ - - bool function (dchar c) isISOControl; - - /*************************************************************** - - Determines whether the specified code point is a - printable character. - - ***************************************************************/ - - bool function (dchar c) isPrint; - - /*************************************************************** - - Determines whether the specified code point is a - base character. - - ***************************************************************/ - - bool function (dchar c) isBase; - - /*************************************************************** - - Determines if the specified character is permissible - as the first character in an identifier according to - Unicode (The Unicode Standard, Version 3.0, chapter - 5.16 Identifiers). - - ***************************************************************/ - - bool function (dchar c) isIDStart; - - /*************************************************************** - - Determines if the specified character is permissible - in an identifier according to Java. - - ***************************************************************/ - - bool function (dchar c) isIDPart; - - /*************************************************************** - - Determines if the specified character should be - regarded as an ignorable character in an identifier, - according to Java. - - ***************************************************************/ - - bool function (dchar c) isIDIgnorable; - - /*************************************************************** - - Determines if the specified character is permissible - as the first character in a Java identifier. - - ***************************************************************/ - - bool function (dchar c) isJavaIDStart; - - /*************************************************************** - - Determines if the specified character is permissible - in a Java identifier. - - ***************************************************************/ - - bool function (dchar c) isJavaIDPart; - - /*************************************************************** - - Determines whether the code point has the - Bidi_Mirrored property. - - ***************************************************************/ - - bool function (dchar c) isMirrored; - - /*************************************************************** - - Returns the decimal digit value of a decimal digit - character. - - ***************************************************************/ - - ubyte function (dchar c) charDigitValue; - - /*************************************************************** - - Maps the specified character to a "mirror-image" - character. - - ***************************************************************/ - - dchar function (dchar c) charMirror; - - /*************************************************************** - - Returns the general category value for the code point. - - ***************************************************************/ - - ubyte function (dchar c) charType; - - /*************************************************************** - - Returns the combining class of the code point as - specified in UnicodeData.txt. - - ***************************************************************/ - - ubyte function (dchar c) getCombiningClass; - - /*************************************************************** - - The given character is mapped to its lowercase - equivalent according to UnicodeData.txt; if the - character has no lowercase equivalent, the - character itself is returned. - - ***************************************************************/ - - dchar function (dchar c) toLower; - - /*************************************************************** - - The given character is mapped to its uppercase equivalent - according to UnicodeData.txt; if the character has no - uppercase equivalent, the character itself is returned. - - ***************************************************************/ - - dchar function (dchar c) toUpper; - - /*************************************************************** - - The given character is mapped to its titlecase - equivalent according to UnicodeData.txt; if none - is defined, the character itself is returned. - - ***************************************************************/ - - dchar function (dchar c) toTitle; - - /*************************************************************** - - The given character is mapped to its case folding - equivalent according to UnicodeData.txt and - CaseFolding.txt; if the character has no case folding - equivalent, the character itself is returned. - - ***************************************************************/ - - dchar function (dchar c, uint options) foldCase; - - /*************************************************************** - - Returns the decimal digit value of the code point in - the specified radix. - - ***************************************************************/ - - uint function (dchar ch, ubyte radix) digit; - - /*************************************************************** - - Determines the character representation for a specific - digit in the specified radix. - - ***************************************************************/ - - dchar function (uint digit, ubyte radix) forDigit; - - /*************************************************************** - - Get the numeric value for a Unicode code point as - defined in the Unicode Character Database. - - ***************************************************************/ - - double function (dchar c) getNumericValue; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - uint function (uint, uint) u_getIntPropertyValue; - uint function (uint) u_getIntPropertyMinValue; - uint function (uint) u_getIntPropertyMaxValue; - uint function (dchar) u_charDirection; - uint function (dchar) ublock_getCode; - uint function (dchar, uint, char*, uint, inout UErrorCode) u_charName; - uint function (dchar, char*, uint, inout UErrorCode) u_getISOComment; - uint function (uint, char*, inout UErrorCode) u_charFromName; - char* function (uint, uint) u_getPropertyName; - char* function (uint, uint, uint) u_getPropertyValueName; - void function (inout Version) u_getUnicodeVersion; - void function (dchar, inout Version) u_charAge; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &forDigit, "u_forDigit"}, - {cast(void**) &digit, "u_digit"}, - {cast(void**) &foldCase, "u_foldCase"}, - {cast(void**) &toTitle, "u_totitle"}, - {cast(void**) &toUpper, "u_toupper"}, - {cast(void**) &toLower, "u_tolower"}, - {cast(void**) &charType, "u_charType"}, - {cast(void**) &charMirror, "u_charMirror"}, - {cast(void**) &charDigitValue, "u_charDigitValue"}, - {cast(void**) &isJavaIDPart, "u_isJavaIDPart"}, - {cast(void**) &isJavaIDStart, "u_isJavaIDStart"}, - {cast(void**) &isIDIgnorable, "u_isIDIgnorable"}, - {cast(void**) &isIDPart, "u_isIDPart"}, - {cast(void**) &isIDStart, "u_isIDStart"}, - {cast(void**) &isMirrored, "u_isMirrored"}, - {cast(void**) &isBase, "u_isbase"}, - {cast(void**) &isPrint, "u_isprint"}, - {cast(void**) &isISOControl, "u_isISOControl"}, - {cast(void**) &isCtrl, "u_iscntrl"}, - {cast(void**) &isWhiteSpace, "u_isWhitespace"}, - {cast(void**) &isJavaSpaceChar, "u_isJavaSpaceChar"}, - {cast(void**) &isSpace, "u_isspace"}, - {cast(void**) &isDefined, "u_isdefined"}, - {cast(void**) &isBlank, "u_isblank"}, - {cast(void**) &isGraph, "u_isgraph"}, - {cast(void**) &isPunct, "u_ispunct"}, - {cast(void**) &isHexDigit, "u_isxdigit"}, - {cast(void**) &isAlpha, "u_isalpha"}, - {cast(void**) &isAlphaNumeric, "u_isalnum"}, - {cast(void**) &isDigit, "u_isdigit"}, - {cast(void**) &isTitle, "u_istitle"}, - {cast(void**) &isUpper, "u_isupper"}, - {cast(void**) &isLower, "u_islower"}, - {cast(void**) &isUAlphabetic, "u_isUAlphabetic"}, - {cast(void**) &isUWhiteSpace, "u_isUWhiteSpace"}, - {cast(void**) &isUUppercase, "u_isUUppercase"}, - {cast(void**) &isULowercase, "u_isULowercase"}, - {cast(void**) &getNumericValue, "u_getNumericValue"}, - {cast(void**) &getCombiningClass, "u_getCombiningClass"}, - {cast(void**) &u_getIntPropertyValue, "u_getIntPropertyValue"}, - {cast(void**) &u_getIntPropertyMinValue,"u_getIntPropertyMinValue"}, - {cast(void**) &u_getIntPropertyMaxValue,"u_getIntPropertyMaxValue"}, - {cast(void**) &u_charDirection, "u_charDirection"}, - {cast(void**) &ublock_getCode, "ublock_getCode"}, - {cast(void**) &u_charName, "u_charName"}, - {cast(void**) &u_getISOComment, "u_getISOComment"}, - {cast(void**) &u_charFromName, "u_charFromName"}, - {cast(void**) &u_getPropertyName, "u_getPropertyName"}, - {cast(void**) &u_getPropertyValueName, "u_getPropertyValueName"}, - {cast(void**) &u_getUnicodeVersion, "u_getUnicodeVersion"}, - {cast(void**) &u_charAge, "u_charAge"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} +/******************************************************************************* + + @file UChar.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UChar; + +private import dwtx.dwtxhelper.mangoicu.ICU; + +/******************************************************************************* + + This API provides low-level access to the Unicode Character + Database. In addition to raw property values, some convenience + functions calculate derived properties, for example for Java-style + programming. + + Unicode assigns each code point (not just assigned character) + values for many properties. Most of them are simple boolean + flags, or constants from a small enumerated list. For some + properties, values are strings or other relatively more complex + types. + + For more information see "About the Unicode Character Database" + (http://www.unicode.org/ucd/) and the ICU User Guide chapter on + Properties (http://oss.software.ibm.com/icu/userguide/properties.html). + + Many functions are designed to match java.lang.Character functions. + See the individual function documentation, and see the JDK 1.4.1 + java.lang.Character documentation at + http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html + + There are also functions that provide easy migration from C/POSIX + functions like isblank(). Their use is generally discouraged because + the C/POSIX standards do not define their semantics beyond the ASCII + range, which means that different implementations exhibit very different + behavior. Instead, Unicode properties should be used directly. + + There are also only a few, broad C/POSIX character classes, and they + tend to be used for conflicting purposes. For example, the "isalpha()" + class is sometimes used to determine word boundaries, while a more + sophisticated approach would at least distinguish initial letters from + continuation characters (the latter including combining marks). (In + ICU, BreakIterator is the most sophisticated API for word boundaries.) + Another example: There is no "istitle()" class for titlecase characters. + + A summary of the behavior of some C/POSIX character classification + implementations for Unicode is available at + http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html + + See + this page for full details. + +*******************************************************************************/ + +class UChar : ICU +{ + public enum Property + { + Alphabetic = 0, + BinaryStart = Alphabetic, + AsciiHexDigit, + BidiControl, + BidiMirrored, + Dash, + DefaultIgnorableCodePoint, + Deprecated, + Diacritic, + Extender, + FullCompositionExclusion, + GraphemeBase, + GraphemeExtend, + GraphemeLink, + HexDigit, + Hyphen, + IdContinue, + IdStart, + Ideographic, + IdsBinaryOperator, + IdsTrinaryOperator, + JoinControl, + LogicalOrderException, + Lowercase, + Math, + NoncharacterCodePoint, + QuotationMark, + Radical, + SoftDotted, + TerminalPunctuation, + UnifiedIdeograph, + Uppercase, + WhiteSpace, + XidContinue, + XidStart, + CaseSensitive, + STerm, + VariationSelector, + NfdInert, + NfkdInert, + NfcInert, + NfkcInert, + SegmentStarter, + BinaryLimit, + BidiClass = 0x1000, + IntStart = BidiClass, + Block, CanonicalCombiningClass, + DecompositionType, + EastAsianWidth, + GeneralCategory, + JoiningGroup, + JoiningType, + LineBreak, + NumericType, + Script, + HangulSyllableType, + NfdQuickCheck, + NfkdQuickCheck, + NfcQuickCheck, + NfkcQuickCheck, + LeadCanonicalCombiningClass, + TrailCanonicalCombiningClass, + IntLimit, + GeneralCategoryMask = 0x2000, + MaskStart = GeneralCategoryMask, + MaskLimit, + NumericValue = 0x3000, + DoubleStart = NumericValue, + DoubleLimit, + Age = 0x4000, + StringStart = Age, + BidiMirroringGlyph, + CaseFolding, + IsoComment, + LowercaseMapping, + Name, + SimpleCaseFolding, + SimpleLowercaseMapping, + SimpleTitlecaseMapping, + SimpleUppercaseMapping, + TitlecaseMapping, + Unicode1Name, + UppercaseMapping, + StringLimit, + InvalidCode = -1 + } + + public enum Category + { + Unassigned = 0, + GeneralOtherTypes = 0, + UppercaseLetter = 1, + LowercaseLetter = 2, + TitlecaseLetter = 3, + ModifierLetter = 4, + OtherLetter = 5, + NonSpacingMark = 6, + EnclosingMark = 7, + CombiningSpacingMark = 8, + DecimalDigitNumber = 9, + LetterNumber = 10, + OtherNumber = 11, + SpaceSeparator = 12, + LineSeparator = 13, + ParagraphSeparator = 14, + ControlChar = 15, + FormatChar = 16, + PrivateUseChar = 17, + Surrogate = 18, + DashPunctuation = 19, + StartPunctuation = 20, + EndPunctuation = 21, + ConnectorPunctuation = 22, + OtherPunctuation = 23, + MathSymbol = 24, + CurrencySymbol = 25, + ModifierSymbol = 26, + OtherSymbol = 27, + InitialPunctuation = 28, + FinalPunctuation = 29, + Count + } + + public enum Direction + { + LeftToRight = 0, + RightToLeft = 1, + EuropeanNumber = 2, + EuropeanNumberSeparator = 3, + EuropeanNumberTerminator = 4, + ArabicNumber = 5, + CommonNumberSeparator = 6, + BlockSeparator = 7, + SegmentSeparator = 8, + WhiteSpaceNeutral = 9, + OtherNeutral = 10, + LeftToRightEmbedding = 11, + LeftToRightOverride = 12, + RightToLeftArabic = 13, + RightToLeftEmbedding = 14, + RightToLeftOverride = 15, + PopDirectionalFormat = 16, + DirNonSpacingMark = 17, + BoundaryNeutral = 18, + Count + } + + public enum BlockCode + { + NoBlock = 0, + BasicLatin = 1, + Latin1Supplement = 2, + LatinExtendedA = 3, + LatinExtendedB = 4, + IpaExtensions = 5, + SpacingModifierLetters = 6, + CombiningDiacriticalMarks = 7, + Greek = 8, + Cyrillic = 9, + Armenian = 10, + Hebrew = 11, + Arabic = 12, + Syriac = 13, + Thaana = 14, + Devanagari = 15, + Bengali = 16, + Gurmukhi = 17, + Gujarati = 18, + Oriya = 19, + Tamil = 20, + Telugu = 21, + Kannada = 22, + Malayalam = 23, + Sinhala = 24, + Thai = 25, + Lao = 26, + Tibetan = 27, + Myanmar = 28, + Georgian = 29, + HangulJamo = 30, + Ethiopic = 31, + Cherokee = 32, + UnifiedCanadianAboriginalSyllabics = 33, + Ogham = 34, + Runic = 35, + Khmer = 36, + Mongolian = 37, + LatinExtendedAdditional = 38, + GreekExtended = 39, + GeneralPunctuation = 40, + SuperscriptsAndSubscripts = 41, + CurrencySymbols = 42, + CombiningMarksForSymbols = 43, + LetterlikeSymbols = 44, + NumberForms = 45, + Arrows = 46, + MathematicalOperators = 47, + MiscellaneousTechnical = 48, + ControlPictures = 49, + OpticalCharacterRecognition = 50, + EnclosedAlphanumerics = 51, + BoxDrawing = 52, + BlockElements = 53, + GeometricShapes = 54, + MiscellaneousSymbols = 55, + Dingbats = 56, + BraillePatterns = 57, + CjkRadicalsSupplement = 58, + KangxiRadicals = 59, + IdeographicDescriptionCharacters = 60, + CjkSymbolsAndPunctuation = 61, + Hiragana = 62, + Katakana = 63, + Bopomofo = 64, + HangulCompatibilityJamo = 65, + Kanbun = 66, + BopomofoExtended = 67, + EnclosedCjkLettersAndMonths = 68, + CjkCompatibility = 69, + CjkUnifiedIdeographsExtensionA = 70, + CjkUnifiedIdeographs = 71, + YiSyllables = 72, + YiRadicals = 73, + HangulSyllables = 74, + HighSurrogates = 75, + HighPrivateUseSurrogates = 76, + LowSurrogates = 77, + PrivateUse = 78, + PrivateUseArea = PrivateUse, + CjkCompatibilityIdeographs = 79, + AlphabeticPresentationForms = 80, + ArabicPresentationFormsA = 81, + CombiningHalfMarks = 82, + CjkCompatibilityForms = 83, + SmallFormVariants = 84, + ArabicPresentationFormsB = 85, + Specials = 86, + HalfwidthAndFullwidthForms = 87, + OldItalic = 88, + Gothic = 89, + Deseret = 90, + ByzantineMusicalSymbols = 91, + MusicalSymbols = 92, + MathematicalAlphanumericSymbols = 93, + CjkUnifiedIdeographsExtensionB = 94, + CjkCompatibilityIdeographsSupplement = 95, + Tags = 96, + CyrillicSupplementary = 97, + CyrillicSupplement = CyrillicSupplementary, + Tagalog = 98, + Hanunoo = 99, + Buhid = 100, + Tagbanwa = 101, + MiscellaneousMathematicalSymbolsA = 102, + SupplementalArrowsA = 103, + SupplementalArrowsB = 104, + MiscellaneousMathematicalSymbolsB = 105, + SupplementalMathematicalOperators = 106, + KatakanaPhoneticExtensions = 107, + VariationSelectors = 108, + SupplementaryPrivateUseAreaA = 109, + SupplementaryPrivateUseAreaB = 110, + Limbu = 111, + TaiLe = 112, + KhmerSymbols = 113, + PhoneticExtensions = 114, + MiscellaneousSymbolsAndArrows = 115, + YijingHexagramSymbols = 116, + LinearBSyllabary = 117, + LinearBIdeograms = 118, + AegeanNumbers = 119, + Ugaritic = 120, + Shavian = 121, + Osmanya = 122, + CypriotSyllabary = 123, + TaiXuanJingSymbols = 124, + VariationSelectorsSupplement = 125, + Count, + InvalidCode = -1 + } + + public enum EastAsianWidth + { + Neutral, + Ambiguous, + Halfwidth, + Fullwidth, + Narrow, + Wide, + Count + } + + public enum CharNameChoice + { + Unicode, + Unicode10, + Extended, + Count + } + + public enum NameChoice + { + Short, + Long, + Count + } + + public enum DecompositionType + { + None, + Canonical, + Compat, + Circle, + Final, + Font, + Fraction, + Initial, + Isolated, + Medial, + Narrow, + Nobreak, + Small, + Square, + Sub, + Super, + Vertical, + Wide, + Count + } + + public enum JoiningType + { + NonJoining, + JoinCausing, + DualJoining, + LeftJoining, + RightJoining, + Transparent, + Count + } + + public enum JoiningGroup + { + NoJoiningGroup, + Ain, + Alaph, + Alef, + Beh, + Beth, + Dal, + DalathRish, + E, + Feh, + FinalSemkath, + Gaf, + Gamal, + Hah, + HamzaOnHehGoal, + He, + Heh, + HehGoal, + Heth, + Kaf, + Kaph, + KnottedHeh, + Lam, + Lamadh, + Meem, + Mim, + Noon, + Nun, + Pe, + Qaf, + Qaph, + Reh, + Reversed_Pe, + Sad, + Sadhe, + Seen, + Semkath, + Shin, + Swash_Kaf, + Syriac_Waw, + Tah, + Taw, + Teh_Marbuta, + Teth, + Waw, + Yeh, + Yeh_Barree, + Yeh_With_Tail, + Yudh, + Yudh_He, + Zain, + Fe, + Khaph, + Zhain, + Count + } + + public enum LineBreak + { + Unknown, + Ambiguous, + Alphabetic, + BreakBoth, + BreakAfter, + BreakBefore, + MandatoryBreak, + ContingentBreak, + ClosePunctuation, + CombiningMark, + CarriageReturn, + Exclamation, + Glue, + Hyphen, + Ideographic, + Inseperable, + Inseparable = Inseperable, + InfixNumeric, + LineFeed, + Nonstarter, + Numeric, + OpenPunctuation, + PostfixNumeric, + PrefixNumeric, + Quotation, + ComplexContext, + Surrogate, + Space, + BreakSymbols, + Zwspace, + NextLine, + WordJoiner, + Count + } + + public enum NumericType + { + None, + Decimal, + Digit, + Numeric, + Count + } + + public enum HangulSyllableType + { + NotApplicable, + LeadingJamo, + VowelJamo, + TrailingJamo, + LvSyllable, + LvtSyllable, + Count + } + + /*********************************************************************** + + Get the property value for an enumerated or integer + Unicode property for a code point. Also returns binary + and mask property values. + + Unicode, especially in version 3.2, defines many more + properties than the original set in UnicodeData.txt. + + The properties APIs are intended to reflect Unicode + properties as defined in the Unicode Character Database + (UCD) and Unicode Technical Reports (UTR). For details + about the properties see http://www.unicode.org/ . For + names of Unicode properties see the file PropertyAliases.txt + + ***********************************************************************/ + + uint getProperty (dchar c, Property p) + { + return u_getIntPropertyValue (cast(uint) c, cast(uint) p); + } + + /*********************************************************************** + + Get the minimum value for an enumerated/integer/binary + Unicode property + + ***********************************************************************/ + + uint getPropertyMinimum (Property p) + { + return u_getIntPropertyMinValue (p); + } + + /*********************************************************************** + + Get the maximum value for an enumerated/integer/binary + Unicode property + + ***********************************************************************/ + + uint getPropertyMaximum (Property p) + { + return u_getIntPropertyMaxValue (p); + } + + /*********************************************************************** + + Returns the bidirectional category value for the code + point, which is used in the Unicode bidirectional algorithm + (UAX #9 http://www.unicode.org/reports/tr9/). + + ***********************************************************************/ + + Direction charDirection (dchar c) + { + return cast(Direction) u_charDirection (c); + } + + /*********************************************************************** + + Returns the Unicode allocation block that contains the + character + + ***********************************************************************/ + + BlockCode getBlockCode (dchar c) + { + return cast(BlockCode) ublock_getCode (c); + } + + /*********************************************************************** + + Retrieve the name of a Unicode character. + + ***********************************************************************/ + + char[] getCharName (dchar c, CharNameChoice choice, inout char[] dst) + { + UErrorCode e; + + uint len = u_charName (c, choice, dst.ptr, dst.length, e); + testError (e, "failed to extract char name (buffer too small?)"); + return dst [0..len]; + } + + /*********************************************************************** + + Get the ISO 10646 comment for a character. + + ***********************************************************************/ + + char[] getComment (dchar c, inout char[] dst) + { + UErrorCode e; + + uint len = u_getISOComment (c, dst.ptr, dst.length, e); + testError (e, "failed to extract comment (buffer too small?)"); + return dst [0..len]; + } + + /*********************************************************************** + + Find a Unicode character by its name and return its code + point value. + + ***********************************************************************/ + + dchar charFromName (CharNameChoice choice, char[] name) + { + UErrorCode e; + + dchar c = u_charFromName (choice, toString(name), e); + testError (e, "failed to locate char name"); + return c; + } + + /*********************************************************************** + + Return the Unicode name for a given property, as given in the + Unicode database file PropertyAliases.txt + + ***********************************************************************/ + + char[] getPropertyName (Property p, NameChoice choice) + { + return toArray (u_getPropertyName (p, choice)); + } + + /*********************************************************************** + + Return the Unicode name for a given property value, as given + in the Unicode database file PropertyValueAliases.txt. + + ***********************************************************************/ + + char[] getPropertyValueName (Property p, NameChoice choice, uint value) + { + return toArray (u_getPropertyValueName (p, value, choice)); + } + + /*********************************************************************** + + Gets the Unicode version information + + ***********************************************************************/ + + void getUnicodeVersion (inout Version v) + { + u_getUnicodeVersion (v); + } + + /*********************************************************************** + + Get the "age" of the code point + + ***********************************************************************/ + + void getCharAge (dchar c, inout Version v) + { + u_charAge (c, v); + } + + + /*********************************************************************** + + These are externalised directly to the client (sans wrapper), + but this may have to change for linux, depending upon the + ICU function-naming conventions within the Posix libraries. + + ***********************************************************************/ + + final static extern (C) + { + /*************************************************************** + + Check if a code point has the Alphabetic Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isUAlphabetic; + + /*************************************************************** + + Check if a code point has the Lowercase Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isULowercase; + + /*************************************************************** + + Check if a code point has the Uppercase Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isUUppercase; + + /*************************************************************** + + Check if a code point has the White_Space Unicode + property. + + ***************************************************************/ + + bool function (dchar c) isUWhiteSpace; + + /*************************************************************** + + Determines whether the specified code point has the + general category "Ll" (lowercase letter). + + ***************************************************************/ + + bool function (dchar c) isLower; + + /*************************************************************** + + Determines whether the specified code point has the + general category "Lu" (uppercase letter). + + ***************************************************************/ + + bool function (dchar c) isUpper; + + /*************************************************************** + + Determines whether the specified code point is a + titlecase letter. + + ***************************************************************/ + + bool function (dchar c) isTitle; + + /*************************************************************** + + Determines whether the specified code point is a + digit character according to Java. + + ***************************************************************/ + + bool function (dchar c) isDigit; + + /*************************************************************** + + Determines whether the specified code point is a + letter character. + + ***************************************************************/ + + bool function (dchar c) isAlpha; + + /*************************************************************** + + Determines whether the specified code point is an + alphanumeric character (letter or digit) according + to Java. + + ***************************************************************/ + + bool function (dchar c) isAlphaNumeric; + + /*************************************************************** + + Determines whether the specified code point is a + hexadecimal digit. + + ***************************************************************/ + + bool function (dchar c) isHexDigit; + + /*************************************************************** + + Determines whether the specified code point is a + punctuation character. + + ***************************************************************/ + + bool function (dchar c) isPunct; + + /*************************************************************** + + Determines whether the specified code point is a + "graphic" character (printable, excluding spaces). + + ***************************************************************/ + + bool function (dchar c) isGraph; + + /*************************************************************** + + Determines whether the specified code point is a + "blank" or "horizontal space", a character that + visibly separates words on a line. + + ***************************************************************/ + + bool function (dchar c) isBlank; + + /*************************************************************** + + Determines whether the specified code point is + "defined", which usually means that it is assigned + a character. + + ***************************************************************/ + + bool function (dchar c) isDefined; + + /*************************************************************** + + Determines if the specified character is a space + character or not. + + ***************************************************************/ + + bool function (dchar c) isSpace; + + /*************************************************************** + + Determine if the specified code point is a space + character according to Java. + + ***************************************************************/ + + bool function (dchar c) isJavaSpaceChar; + + /*************************************************************** + + Determines if the specified code point is a whitespace + character according to Java/ICU. + + ***************************************************************/ + + bool function (dchar c) isWhiteSpace; + + /*************************************************************** + + Determines whether the specified code point is a + control character (as defined by this function). + + ***************************************************************/ + + bool function (dchar c) isCtrl; + + /*************************************************************** + + Determines whether the specified code point is an ISO + control code. + + ***************************************************************/ + + bool function (dchar c) isISOControl; + + /*************************************************************** + + Determines whether the specified code point is a + printable character. + + ***************************************************************/ + + bool function (dchar c) isPrint; + + /*************************************************************** + + Determines whether the specified code point is a + base character. + + ***************************************************************/ + + bool function (dchar c) isBase; + + /*************************************************************** + + Determines if the specified character is permissible + as the first character in an identifier according to + Unicode (The Unicode Standard, Version 3.0, chapter + 5.16 Identifiers). + + ***************************************************************/ + + bool function (dchar c) isIDStart; + + /*************************************************************** + + Determines if the specified character is permissible + in an identifier according to Java. + + ***************************************************************/ + + bool function (dchar c) isIDPart; + + /*************************************************************** + + Determines if the specified character should be + regarded as an ignorable character in an identifier, + according to Java. + + ***************************************************************/ + + bool function (dchar c) isIDIgnorable; + + /*************************************************************** + + Determines if the specified character is permissible + as the first character in a Java identifier. + + ***************************************************************/ + + bool function (dchar c) isJavaIDStart; + + /*************************************************************** + + Determines if the specified character is permissible + in a Java identifier. + + ***************************************************************/ + + bool function (dchar c) isJavaIDPart; + + /*************************************************************** + + Determines whether the code point has the + Bidi_Mirrored property. + + ***************************************************************/ + + bool function (dchar c) isMirrored; + + /*************************************************************** + + Returns the decimal digit value of a decimal digit + character. + + ***************************************************************/ + + ubyte function (dchar c) charDigitValue; + + /*************************************************************** + + Maps the specified character to a "mirror-image" + character. + + ***************************************************************/ + + dchar function (dchar c) charMirror; + + /*************************************************************** + + Returns the general category value for the code point. + + ***************************************************************/ + + ubyte function (dchar c) charType; + + /*************************************************************** + + Returns the combining class of the code point as + specified in UnicodeData.txt. + + ***************************************************************/ + + ubyte function (dchar c) getCombiningClass; + + /*************************************************************** + + The given character is mapped to its lowercase + equivalent according to UnicodeData.txt; if the + character has no lowercase equivalent, the + character itself is returned. + + ***************************************************************/ + + dchar function (dchar c) toLower; + + /*************************************************************** + + The given character is mapped to its uppercase equivalent + according to UnicodeData.txt; if the character has no + uppercase equivalent, the character itself is returned. + + ***************************************************************/ + + dchar function (dchar c) toUpper; + + /*************************************************************** + + The given character is mapped to its titlecase + equivalent according to UnicodeData.txt; if none + is defined, the character itself is returned. + + ***************************************************************/ + + dchar function (dchar c) toTitle; + + /*************************************************************** + + The given character is mapped to its case folding + equivalent according to UnicodeData.txt and + CaseFolding.txt; if the character has no case folding + equivalent, the character itself is returned. + + ***************************************************************/ + + dchar function (dchar c, uint options) foldCase; + + /*************************************************************** + + Returns the decimal digit value of the code point in + the specified radix. + + ***************************************************************/ + + uint function (dchar ch, ubyte radix) digit; + + /*************************************************************** + + Determines the character representation for a specific + digit in the specified radix. + + ***************************************************************/ + + dchar function (uint digit, ubyte radix) forDigit; + + /*************************************************************** + + Get the numeric value for a Unicode code point as + defined in the Unicode Character Database. + + ***************************************************************/ + + double function (dchar c) getNumericValue; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + uint function (uint, uint) u_getIntPropertyValue; + uint function (uint) u_getIntPropertyMinValue; + uint function (uint) u_getIntPropertyMaxValue; + uint function (dchar) u_charDirection; + uint function (dchar) ublock_getCode; + uint function (dchar, uint, char*, uint, inout UErrorCode) u_charName; + uint function (dchar, char*, uint, inout UErrorCode) u_getISOComment; + uint function (uint, char*, inout UErrorCode) u_charFromName; + char* function (uint, uint) u_getPropertyName; + char* function (uint, uint, uint) u_getPropertyValueName; + void function (inout Version) u_getUnicodeVersion; + void function (dchar, inout Version) u_charAge; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &forDigit, "u_forDigit"}, + {cast(void**) &digit, "u_digit"}, + {cast(void**) &foldCase, "u_foldCase"}, + {cast(void**) &toTitle, "u_totitle"}, + {cast(void**) &toUpper, "u_toupper"}, + {cast(void**) &toLower, "u_tolower"}, + {cast(void**) &charType, "u_charType"}, + {cast(void**) &charMirror, "u_charMirror"}, + {cast(void**) &charDigitValue, "u_charDigitValue"}, + {cast(void**) &isJavaIDPart, "u_isJavaIDPart"}, + {cast(void**) &isJavaIDStart, "u_isJavaIDStart"}, + {cast(void**) &isIDIgnorable, "u_isIDIgnorable"}, + {cast(void**) &isIDPart, "u_isIDPart"}, + {cast(void**) &isIDStart, "u_isIDStart"}, + {cast(void**) &isMirrored, "u_isMirrored"}, + {cast(void**) &isBase, "u_isbase"}, + {cast(void**) &isPrint, "u_isprint"}, + {cast(void**) &isISOControl, "u_isISOControl"}, + {cast(void**) &isCtrl, "u_iscntrl"}, + {cast(void**) &isWhiteSpace, "u_isWhitespace"}, + {cast(void**) &isJavaSpaceChar, "u_isJavaSpaceChar"}, + {cast(void**) &isSpace, "u_isspace"}, + {cast(void**) &isDefined, "u_isdefined"}, + {cast(void**) &isBlank, "u_isblank"}, + {cast(void**) &isGraph, "u_isgraph"}, + {cast(void**) &isPunct, "u_ispunct"}, + {cast(void**) &isHexDigit, "u_isxdigit"}, + {cast(void**) &isAlpha, "u_isalpha"}, + {cast(void**) &isAlphaNumeric, "u_isalnum"}, + {cast(void**) &isDigit, "u_isdigit"}, + {cast(void**) &isTitle, "u_istitle"}, + {cast(void**) &isUpper, "u_isupper"}, + {cast(void**) &isLower, "u_islower"}, + {cast(void**) &isUAlphabetic, "u_isUAlphabetic"}, + {cast(void**) &isUWhiteSpace, "u_isUWhiteSpace"}, + {cast(void**) &isUUppercase, "u_isUUppercase"}, + {cast(void**) &isULowercase, "u_isULowercase"}, + {cast(void**) &getNumericValue, "u_getNumericValue"}, + {cast(void**) &getCombiningClass, "u_getCombiningClass"}, + {cast(void**) &u_getIntPropertyValue, "u_getIntPropertyValue"}, + {cast(void**) &u_getIntPropertyMinValue,"u_getIntPropertyMinValue"}, + {cast(void**) &u_getIntPropertyMaxValue,"u_getIntPropertyMaxValue"}, + {cast(void**) &u_charDirection, "u_charDirection"}, + {cast(void**) &ublock_getCode, "ublock_getCode"}, + {cast(void**) &u_charName, "u_charName"}, + {cast(void**) &u_getISOComment, "u_getISOComment"}, + {cast(void**) &u_charFromName, "u_charFromName"}, + {cast(void**) &u_getPropertyName, "u_getPropertyName"}, + {cast(void**) &u_getPropertyValueName, "u_getPropertyValueName"}, + {cast(void**) &u_getUnicodeVersion, "u_getUnicodeVersion"}, + {cast(void**) &u_charAge, "u_charAge"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UCollator.d --- a/dwtx/dwtxhelper/mangoicu/UCollator.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UCollator.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,732 +1,732 @@ -/******************************************************************************* - - @file UCollator.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UCollator; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.USet, - dwtx.dwtxhelper.mangoicu.ULocale, - dwtx.dwtxhelper.mangoicu.UString; - -/******************************************************************************* - - The API for Collator performs locale-sensitive string comparison. - You use this service to build searching and sorting routines for - natural language text. Important: The ICU collation service has been - reimplemented in order to achieve better performance and UCA compliance. - For details, see the collation design document. - - For more information about the collation service see the users guide. - - Collation service provides correct sorting orders for most locales - supported in ICU. If specific data for a locale is not available, - the orders eventually falls back to the UCA sort order. - - Sort ordering may be customized by providing your own set of rules. - For more on this subject see the Collation customization section of - the users guide. - - See - this page for full details. - -*******************************************************************************/ - -class UCollator : ICU -{ - package Handle handle; - - enum Attribute - { - FrenchCollation, - AlternateHandling, - CaseFirst, - CaseLevel, - NormalizationMode, - DecompositionMode = NormalizationMode, - strength, - HiraganaQuaternaryMode, - NumericCollation, - AttributeCount - } - - enum AttributeValue - { - Default = -1, - Primary = 0, - Secondary = 1, - Tertiary = 2, - DefaultStrength = Tertiary, - CeStrengthLimit, - Quaternary = 3, - Identical = 15, - strengthLimit, - Off = 16, - On = 17, - Shifted = 20, - NonIgnorable = 21, - LowerFirst = 24, - UpperFirst = 25, - AttributeValueCount - } - - enum RuleOption - { - TailoringOnly, - FullRules - } - - enum BoundMode - { - BoundLower = 0, - BoundUpper = 1, - BoundUpperLong = 2, - BoundValueCount - } - - typedef AttributeValue Strength; - - /*********************************************************************** - - Open a UCollator for comparing strings. The locale specified - determines the required collation rules. Special values for - locales can be passed in - if ULocale.Default is passed for - the locale, the default locale collation rules will be used. - If ULocale.Root is passed, UCA rules will be used - - ***********************************************************************/ - - this (ULocale locale) - { - UErrorCode e; - - handle = ucol_open (toString(locale.name), e); - testError (e, "failed to open collator"); - } - - /*********************************************************************** - - Produce a UCollator instance according to the rules supplied. - - The rules are used to change the default ordering, defined in - the UCA in a process called tailoring. For the syntax of the - rules please see users guide - - ***********************************************************************/ - - this (UStringView rules, AttributeValue mode, Strength strength) - { - UErrorCode e; - - handle = ucol_openRules (rules.get.ptr, rules.len, mode, strength, null, e); - testError (e, "failed to open rules-based collator"); - } - - /*********************************************************************** - - Open a collator defined by a short form string. The - structure and the syntax of the string is defined in - the "Naming collators" section of the users guide: - http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators - Attributes are overriden by the subsequent attributes. - So, for "S2_S3", final strength will be 3. 3066bis - locale overrides individual locale parts. - - The call to this constructor is equivalent to a plain - constructor, followed by a series of calls to setAttribute - and setVariableTop - - ***********************************************************************/ - - this (char[] shortName, bool forceDefaults) - { - UErrorCode e; - - handle = ucol_openFromShortString (toString(shortName), forceDefaults, null, e); - testError (e, "failed to open short-name collator"); - } - - /*********************************************************************** - - Internal constructor invoked via USearch - - ***********************************************************************/ - - package this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Close a UCollator - - ***********************************************************************/ - - ~this () - { - ucol_close (handle); - } - - /*********************************************************************** - - Get a set containing the contractions defined by the - collator. - - The set includes both the UCA contractions and the - contractions defined by the collator. This set will - contain only strings. If a tailoring explicitly - suppresses contractions from the UCA (like Russian), - removed contractions will not be in the resulting set. - - ***********************************************************************/ - - void getContractions (USet set) - { - UErrorCode e; - - ucol_getContractions (handle, set.handle, e); - testError (e, "failed to get collator contractions"); - } - - /*********************************************************************** - - Compare two strings. Return value is -, 0, + - - ***********************************************************************/ - - int strcoll (UStringView source, UStringView target) - { - return ucol_strcoll (handle, source.get.ptr, source.len, target.get.ptr, target.len); - } - - /*********************************************************************** - - Determine if one string is greater than another. This - function is equivalent to strcoll() > 1 - - ***********************************************************************/ - - bool greater (UStringView source, UStringView target) - { - return ucol_greater (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; - } - - /*********************************************************************** - - Determine if one string is greater than or equal to - another. This function is equivalent to strcoll() >= 0 - - ***********************************************************************/ - - bool greaterOrEqual (UStringView source, UStringView target) - { - return ucol_greaterOrEqual (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; - } - - /*********************************************************************** - - This function is equivalent to strcoll() == 0 - - ***********************************************************************/ - - bool equal (UStringView source, UStringView target) - { - return ucol_equal (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; - } - - /*********************************************************************** - - Get the collation strength used in a UCollator. The - strength influences how strings are compared. - - ***********************************************************************/ - - Strength getStrength () - { - return ucol_getStrength (handle); - } - - /*********************************************************************** - - Set the collation strength used in this UCollator. The - strength influences how strings are compared. one of - Primary, Secondary, Tertiary, Quaternary, Dentical, or - Default - - ***********************************************************************/ - - void setStrength (Strength s) - { - ucol_setStrength (handle, s); - } - - /*********************************************************************** - - Get the display name for a UCollator. The display name is - suitable for presentation to a user - - ***********************************************************************/ - - void getDisplayName (ULocale obj, ULocale display, UString dst) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return ucol_getDisplayName (toString(obj.name), toString(display.name), dst.get.ptr, dst.len, e); - } - - dst.format (&fmt, "failed to get collator display name"); - } - - /*********************************************************************** - - Returns current rules. Options define whether full rules - are returned or just the tailoring. - - ***********************************************************************/ - - void getRules (UString dst, RuleOption o = RuleOption.FullRules) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - uint needed = ucol_getRulesEx (handle, o, dst.get.ptr, dst.len); - if (needed > len) - e = e.BufferOverflow; - return needed; - } - - dst.format (&fmt, "failed to get collator rules"); - } - - /*********************************************************************** - - Get the short definition string for a collator. - - This API harvests the collator's locale and the attribute - set and produces a string that can be used for opening a - collator with the same properties using the char[] style - constructor. This string will be normalized. - - The structure and the syntax of the string is defined in the - "Naming collators" section of the users guide: - http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators - - ***********************************************************************/ - - char[] getShortDefinitionString (ULocale locale = ULocale.Default) - { - UErrorCode e; - char[64] dst; - - uint len = ucol_getShortDefinitionString (handle, toString(locale.name), dst.ptr, dst.length, e); - testError (e, "failed to get collator short name"); - return dst[0..len].dup; - } - - /*********************************************************************** - - Verifies and normalizes short definition string. Normalized - short definition string has all the option sorted by the - argument name, so that equivalent definition strings are the - same - - ***********************************************************************/ - - char[] normalizeShortDefinitionString (char[] source) - { - UErrorCode e; - char[64] dst; - - uint len = ucol_normalizeShortDefinitionString (toString(source), dst.ptr, dst.length, null, e); - testError (e, "failed to normalize collator short name"); - return dst[0..len].dup; - } - - /*********************************************************************** - - Get a sort key for a string from a UCollator. Sort keys - may be compared using strcmp. - - ***********************************************************************/ - - ubyte[] getSortKey (UStringView t, ubyte[] result) - { - uint len = ucol_getSortKey (handle, t.get.ptr, t.len, result.ptr, result.length); - if (len < result.length) - return result [0..len]; - return null; - } - - /*********************************************************************** - - Merge two sort keys. The levels are merged with their - corresponding counterparts (primaries with primaries, - secondaries with secondaries etc.). Between the values - from the same level a separator is inserted. example - (uncompressed): 191B1D 01 050505 01 910505 00 and - 1F2123 01 050505 01 910505 00 will be merged as - 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00 - This allows for concatenating of first and last names for - sorting, among other things. If the destination buffer is - not big enough, the results are undefined. If any of source - lengths are zero or any of source pointers are null/undefined, - result is of size zero. - - ***********************************************************************/ - - ubyte[] mergeSortkeys (ubyte[] left, ubyte[] right, ubyte[] result) - { - uint len = ucol_mergeSortkeys (left.ptr, left.length, right.ptr, right.length, result.ptr, result.length); - if (len < result.length) - return result [0..len]; - return null; - } - - /*********************************************************************** - - Produce a bound for a given sortkey and a number of levels. - - Return value is always the number of bytes needed, regardless - of whether the result buffer was big enough or even valid. - - Resulting bounds can be used to produce a range of strings - that are between upper and lower bounds. For example, if - bounds are produced for a sortkey of string "smith", strings - between upper and lower bounds with one level would include - "Smith", "SMITH", "sMiTh". - - There are two upper bounds that can be produced. If BoundUpper - is produced, strings matched would be as above. However, if - bound produced using BoundUpperLong is used, the above example - will also match "Smithsonian" and similar. - - ***********************************************************************/ - - ubyte[] getBound (BoundMode mode, ubyte[] source, ubyte[] result, uint levels = 1) - { - UErrorCode e; - - uint len = ucol_getBound (source.ptr, source.length, mode, levels, result.ptr, result.length, e); - testError (e, "failed to get sortkey bound"); - if (len < result.length) - return result [0..len]; - return null; - } - - /*********************************************************************** - - Gets the version information for a Collator. - - Version is currently an opaque 32-bit number which depends, - among other things, on major versions of the collator - tailoring and UCA - - ***********************************************************************/ - - void getVersion (inout Version v) - { - ucol_getVersion (handle, v); - } - - /*********************************************************************** - - Gets the UCA version information for this Collator - - ***********************************************************************/ - - void getUCAVersion (inout Version v) - { - ucol_getUCAVersion (handle, v); - } - - /*********************************************************************** - - Universal attribute setter - - ***********************************************************************/ - - void setAttribute (Attribute attr, AttributeValue value) - { - UErrorCode e; - - ucol_setAttribute (handle, attr, value, e); - testError (e, "failed to set collator attribute"); - } - - /*********************************************************************** - - Universal attribute getter - - ***********************************************************************/ - - AttributeValue getAttribute (Attribute attr) - { - UErrorCode e; - - AttributeValue v = ucol_getAttribute (handle, attr, e); - testError (e, "failed to get collator attribute"); - return v; - } - - /*********************************************************************** - - Variable top is a two byte primary value which causes all - the codepoints with primary values that are less or equal - than the variable top to be shifted when alternate handling - is set to Shifted. - - ***********************************************************************/ - - void setVariableTop (UStringView t) - { - UErrorCode e; - - ucol_setVariableTop (handle, t.get.ptr, t.len, e); - testError (e, "failed to set variable-top"); - } - - /*********************************************************************** - - Sets the variable top to a collation element value - supplied.Variable top is set to the upper 16 bits. - Lower 16 bits are ignored. - - ***********************************************************************/ - - void setVariableTop (uint x) - { - UErrorCode e; - - ucol_restoreVariableTop (handle, x, e); - testError (e, "failed to restore variable-top"); - } - - /*********************************************************************** - - Gets the variable top value of this Collator. Lower 16 bits - are undefined and should be ignored. - - ***********************************************************************/ - - uint getVariableTop () - { - UErrorCode e; - - uint x = ucol_getVariableTop (handle, e); - testError (e, "failed to get variable-top"); - return x; - } - - /*********************************************************************** - - Gets the locale name of the collator. If the collator is - instantiated from the rules, then this function will throw - an exception - - ***********************************************************************/ - - void getLocale (ULocale locale, ULocale.Type type) - { - UErrorCode e; - - locale.name = toArray (ucol_getLocaleByType (handle, type, e)); - if (isError(e) || locale.name is null) - exception ("failed to get collator locale"); - } - - /*********************************************************************** - - Get the Unicode set that contains all the characters and - sequences tailored in this collator. - - ***********************************************************************/ - - USet getTailoredSet () - { - UErrorCode e; - - Handle h = ucol_getTailoredSet (handle, e); - testError (e, "failed to get tailored set"); - return new USet (h); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - void function (Handle) ucol_close; - Handle function (char *loc, inout UErrorCode e) ucol_open; - Handle function (wchar* rules, uint rulesLength, AttributeValue normalizationMode, Strength strength, UParseError *parseError, inout UErrorCode e) ucol_openRules; - Handle function (char *definition, byte forceDefaults, UParseError *parseError, inout UErrorCode e) ucol_openFromShortString; - uint function (Handle, Handle conts, inout UErrorCode e) ucol_getContractions; - int function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_strcoll; - byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greater; - byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greaterOrEqual; - byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_equal; - Strength function (Handle) ucol_getStrength; - void function (Handle, Strength strength) ucol_setStrength; - uint function (char *objLoc, char *dispLoc, wchar* result, uint resultLength, inout UErrorCode e) ucol_getDisplayName; - uint function (Handle, char *locale, char *buffer, uint capacity, inout UErrorCode e) ucol_getShortDefinitionString; - uint function (char *source, char *destination, uint capacity, UParseError *parseError, inout UErrorCode e) ucol_normalizeShortDefinitionString; - uint function (Handle, wchar* source, uint sourceLength, ubyte *result, uint resultLength) ucol_getSortKey; - uint function (ubyte *source, uint sourceLength, BoundMode boundType, uint noOfLevels, ubyte *result, uint resultLength, inout UErrorCode e) ucol_getBound; - void function (Handle, Version info) ucol_getVersion; - void function (Handle, Version info) ucol_getUCAVersion; - uint function (ubyte *src1, uint src1Length, ubyte *src2, uint src2Length, ubyte *dest, uint destCapacity) ucol_mergeSortkeys; - void function (Handle, Attribute attr, AttributeValue value, inout UErrorCode e) ucol_setAttribute; - AttributeValue function (Handle, Attribute attr, inout UErrorCode e) ucol_getAttribute; - uint function (Handle, wchar* varTop, uint len, inout UErrorCode e) ucol_setVariableTop; - uint function (Handle, inout UErrorCode e) ucol_getVariableTop; - void function (Handle, uint varTop, inout UErrorCode e) ucol_restoreVariableTop; - uint function (Handle, RuleOption delta, wchar* buffer, uint bufferLen) ucol_getRulesEx; - char* function (Handle, ULocale.Type type, inout UErrorCode e) ucol_getLocaleByType; - Handle function (Handle, inout UErrorCode e) ucol_getTailoredSet; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ucol_open, "ucol_open"}, - {cast(void**) &ucol_close, "ucol_close"}, - {cast(void**) &ucol_openRules, "ucol_openRules"}, - {cast(void**) &ucol_openFromShortString, "ucol_openFromShortString"}, - {cast(void**) &ucol_getContractions, "ucol_getContractions"}, - {cast(void**) &ucol_strcoll, "ucol_strcoll"}, - {cast(void**) &ucol_greater, "ucol_greater"}, - {cast(void**) &ucol_greaterOrEqual, "ucol_greaterOrEqual"}, - {cast(void**) &ucol_equal, "ucol_equal"}, - {cast(void**) &ucol_getStrength, "ucol_getStrength"}, - {cast(void**) &ucol_setStrength, "ucol_setStrength"}, - {cast(void**) &ucol_getDisplayName, "ucol_getDisplayName"}, - {cast(void**) &ucol_getShortDefinitionString, "ucol_getShortDefinitionString"}, - {cast(void**) &ucol_normalizeShortDefinitionString, "ucol_normalizeShortDefinitionString"}, - {cast(void**) &ucol_getSortKey, "ucol_getSortKey"}, - {cast(void**) &ucol_getBound, "ucol_getBound"}, - {cast(void**) &ucol_getVersion, "ucol_getVersion"}, - {cast(void**) &ucol_getUCAVersion, "ucol_getUCAVersion"}, - {cast(void**) &ucol_mergeSortkeys, "ucol_mergeSortkeys"}, - {cast(void**) &ucol_setAttribute, "ucol_setAttribute"}, - {cast(void**) &ucol_getAttribute, "ucol_getAttribute"}, - {cast(void**) &ucol_setVariableTop, "ucol_setVariableTop"}, - {cast(void**) &ucol_getVariableTop, "ucol_getVariableTop"}, - {cast(void**) &ucol_restoreVariableTop, "ucol_restoreVariableTop"}, - {cast(void**) &ucol_getRulesEx, "ucol_getRulesEx"}, - {cast(void**) &ucol_getLocaleByType, "ucol_getLocaleByType"}, - {cast(void**) &ucol_getTailoredSet, "ucol_getTailoredSet"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - +/******************************************************************************* + + @file UCollator.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UCollator; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.USet, + dwtx.dwtxhelper.mangoicu.ULocale, + dwtx.dwtxhelper.mangoicu.UString; + +/******************************************************************************* + + The API for Collator performs locale-sensitive string comparison. + You use this service to build searching and sorting routines for + natural language text. Important: The ICU collation service has been + reimplemented in order to achieve better performance and UCA compliance. + For details, see the collation design document. + + For more information about the collation service see the users guide. + + Collation service provides correct sorting orders for most locales + supported in ICU. If specific data for a locale is not available, + the orders eventually falls back to the UCA sort order. + + Sort ordering may be customized by providing your own set of rules. + For more on this subject see the Collation customization section of + the users guide. + + See + this page for full details. + +*******************************************************************************/ + +class UCollator : ICU +{ + package Handle handle; + + enum Attribute + { + FrenchCollation, + AlternateHandling, + CaseFirst, + CaseLevel, + NormalizationMode, + DecompositionMode = NormalizationMode, + strength, + HiraganaQuaternaryMode, + NumericCollation, + AttributeCount + } + + enum AttributeValue + { + Default = -1, + Primary = 0, + Secondary = 1, + Tertiary = 2, + DefaultStrength = Tertiary, + CeStrengthLimit, + Quaternary = 3, + Identical = 15, + strengthLimit, + Off = 16, + On = 17, + Shifted = 20, + NonIgnorable = 21, + LowerFirst = 24, + UpperFirst = 25, + AttributeValueCount + } + + enum RuleOption + { + TailoringOnly, + FullRules + } + + enum BoundMode + { + BoundLower = 0, + BoundUpper = 1, + BoundUpperLong = 2, + BoundValueCount + } + + typedef AttributeValue Strength; + + /*********************************************************************** + + Open a UCollator for comparing strings. The locale specified + determines the required collation rules. Special values for + locales can be passed in - if ULocale.Default is passed for + the locale, the default locale collation rules will be used. + If ULocale.Root is passed, UCA rules will be used + + ***********************************************************************/ + + this (ULocale locale) + { + UErrorCode e; + + handle = ucol_open (toString(locale.name), e); + testError (e, "failed to open collator"); + } + + /*********************************************************************** + + Produce a UCollator instance according to the rules supplied. + + The rules are used to change the default ordering, defined in + the UCA in a process called tailoring. For the syntax of the + rules please see users guide + + ***********************************************************************/ + + this (UStringView rules, AttributeValue mode, Strength strength) + { + UErrorCode e; + + handle = ucol_openRules (rules.get.ptr, rules.len, mode, strength, null, e); + testError (e, "failed to open rules-based collator"); + } + + /*********************************************************************** + + Open a collator defined by a short form string. The + structure and the syntax of the string is defined in + the "Naming collators" section of the users guide: + http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators + Attributes are overriden by the subsequent attributes. + So, for "S2_S3", final strength will be 3. 3066bis + locale overrides individual locale parts. + + The call to this constructor is equivalent to a plain + constructor, followed by a series of calls to setAttribute + and setVariableTop + + ***********************************************************************/ + + this (char[] shortName, bool forceDefaults) + { + UErrorCode e; + + handle = ucol_openFromShortString (toString(shortName), forceDefaults, null, e); + testError (e, "failed to open short-name collator"); + } + + /*********************************************************************** + + Internal constructor invoked via USearch + + ***********************************************************************/ + + package this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Close a UCollator + + ***********************************************************************/ + + ~this () + { + ucol_close (handle); + } + + /*********************************************************************** + + Get a set containing the contractions defined by the + collator. + + The set includes both the UCA contractions and the + contractions defined by the collator. This set will + contain only strings. If a tailoring explicitly + suppresses contractions from the UCA (like Russian), + removed contractions will not be in the resulting set. + + ***********************************************************************/ + + void getContractions (USet set) + { + UErrorCode e; + + ucol_getContractions (handle, set.handle, e); + testError (e, "failed to get collator contractions"); + } + + /*********************************************************************** + + Compare two strings. Return value is -, 0, + + + ***********************************************************************/ + + int strcoll (UStringView source, UStringView target) + { + return ucol_strcoll (handle, source.get.ptr, source.len, target.get.ptr, target.len); + } + + /*********************************************************************** + + Determine if one string is greater than another. This + function is equivalent to strcoll() > 1 + + ***********************************************************************/ + + bool greater (UStringView source, UStringView target) + { + return ucol_greater (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; + } + + /*********************************************************************** + + Determine if one string is greater than or equal to + another. This function is equivalent to strcoll() >= 0 + + ***********************************************************************/ + + bool greaterOrEqual (UStringView source, UStringView target) + { + return ucol_greaterOrEqual (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; + } + + /*********************************************************************** + + This function is equivalent to strcoll() == 0 + + ***********************************************************************/ + + bool equal (UStringView source, UStringView target) + { + return ucol_equal (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0; + } + + /*********************************************************************** + + Get the collation strength used in a UCollator. The + strength influences how strings are compared. + + ***********************************************************************/ + + Strength getStrength () + { + return ucol_getStrength (handle); + } + + /*********************************************************************** + + Set the collation strength used in this UCollator. The + strength influences how strings are compared. one of + Primary, Secondary, Tertiary, Quaternary, Dentical, or + Default + + ***********************************************************************/ + + void setStrength (Strength s) + { + ucol_setStrength (handle, s); + } + + /*********************************************************************** + + Get the display name for a UCollator. The display name is + suitable for presentation to a user + + ***********************************************************************/ + + void getDisplayName (ULocale obj, ULocale display, UString dst) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return ucol_getDisplayName (toString(obj.name), toString(display.name), dst.get.ptr, dst.len, e); + } + + dst.format (&fmt, "failed to get collator display name"); + } + + /*********************************************************************** + + Returns current rules. Options define whether full rules + are returned or just the tailoring. + + ***********************************************************************/ + + void getRules (UString dst, RuleOption o = RuleOption.FullRules) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + uint needed = ucol_getRulesEx (handle, o, dst.get.ptr, dst.len); + if (needed > len) + e = e.BufferOverflow; + return needed; + } + + dst.format (&fmt, "failed to get collator rules"); + } + + /*********************************************************************** + + Get the short definition string for a collator. + + This API harvests the collator's locale and the attribute + set and produces a string that can be used for opening a + collator with the same properties using the char[] style + constructor. This string will be normalized. + + The structure and the syntax of the string is defined in the + "Naming collators" section of the users guide: + http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators + + ***********************************************************************/ + + char[] getShortDefinitionString (ULocale locale = ULocale.Default) + { + UErrorCode e; + char[64] dst; + + uint len = ucol_getShortDefinitionString (handle, toString(locale.name), dst.ptr, dst.length, e); + testError (e, "failed to get collator short name"); + return dst[0..len].dup; + } + + /*********************************************************************** + + Verifies and normalizes short definition string. Normalized + short definition string has all the option sorted by the + argument name, so that equivalent definition strings are the + same + + ***********************************************************************/ + + char[] normalizeShortDefinitionString (char[] source) + { + UErrorCode e; + char[64] dst; + + uint len = ucol_normalizeShortDefinitionString (toString(source), dst.ptr, dst.length, null, e); + testError (e, "failed to normalize collator short name"); + return dst[0..len].dup; + } + + /*********************************************************************** + + Get a sort key for a string from a UCollator. Sort keys + may be compared using strcmp. + + ***********************************************************************/ + + ubyte[] getSortKey (UStringView t, ubyte[] result) + { + uint len = ucol_getSortKey (handle, t.get.ptr, t.len, result.ptr, result.length); + if (len < result.length) + return result [0..len]; + return null; + } + + /*********************************************************************** + + Merge two sort keys. The levels are merged with their + corresponding counterparts (primaries with primaries, + secondaries with secondaries etc.). Between the values + from the same level a separator is inserted. example + (uncompressed): 191B1D 01 050505 01 910505 00 and + 1F2123 01 050505 01 910505 00 will be merged as + 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00 + This allows for concatenating of first and last names for + sorting, among other things. If the destination buffer is + not big enough, the results are undefined. If any of source + lengths are zero or any of source pointers are null/undefined, + result is of size zero. + + ***********************************************************************/ + + ubyte[] mergeSortkeys (ubyte[] left, ubyte[] right, ubyte[] result) + { + uint len = ucol_mergeSortkeys (left.ptr, left.length, right.ptr, right.length, result.ptr, result.length); + if (len < result.length) + return result [0..len]; + return null; + } + + /*********************************************************************** + + Produce a bound for a given sortkey and a number of levels. + + Return value is always the number of bytes needed, regardless + of whether the result buffer was big enough or even valid. + + Resulting bounds can be used to produce a range of strings + that are between upper and lower bounds. For example, if + bounds are produced for a sortkey of string "smith", strings + between upper and lower bounds with one level would include + "Smith", "SMITH", "sMiTh". + + There are two upper bounds that can be produced. If BoundUpper + is produced, strings matched would be as above. However, if + bound produced using BoundUpperLong is used, the above example + will also match "Smithsonian" and similar. + + ***********************************************************************/ + + ubyte[] getBound (BoundMode mode, ubyte[] source, ubyte[] result, uint levels = 1) + { + UErrorCode e; + + uint len = ucol_getBound (source.ptr, source.length, mode, levels, result.ptr, result.length, e); + testError (e, "failed to get sortkey bound"); + if (len < result.length) + return result [0..len]; + return null; + } + + /*********************************************************************** + + Gets the version information for a Collator. + + Version is currently an opaque 32-bit number which depends, + among other things, on major versions of the collator + tailoring and UCA + + ***********************************************************************/ + + void getVersion (inout Version v) + { + ucol_getVersion (handle, v); + } + + /*********************************************************************** + + Gets the UCA version information for this Collator + + ***********************************************************************/ + + void getUCAVersion (inout Version v) + { + ucol_getUCAVersion (handle, v); + } + + /*********************************************************************** + + Universal attribute setter + + ***********************************************************************/ + + void setAttribute (Attribute attr, AttributeValue value) + { + UErrorCode e; + + ucol_setAttribute (handle, attr, value, e); + testError (e, "failed to set collator attribute"); + } + + /*********************************************************************** + + Universal attribute getter + + ***********************************************************************/ + + AttributeValue getAttribute (Attribute attr) + { + UErrorCode e; + + AttributeValue v = ucol_getAttribute (handle, attr, e); + testError (e, "failed to get collator attribute"); + return v; + } + + /*********************************************************************** + + Variable top is a two byte primary value which causes all + the codepoints with primary values that are less or equal + than the variable top to be shifted when alternate handling + is set to Shifted. + + ***********************************************************************/ + + void setVariableTop (UStringView t) + { + UErrorCode e; + + ucol_setVariableTop (handle, t.get.ptr, t.len, e); + testError (e, "failed to set variable-top"); + } + + /*********************************************************************** + + Sets the variable top to a collation element value + supplied.Variable top is set to the upper 16 bits. + Lower 16 bits are ignored. + + ***********************************************************************/ + + void setVariableTop (uint x) + { + UErrorCode e; + + ucol_restoreVariableTop (handle, x, e); + testError (e, "failed to restore variable-top"); + } + + /*********************************************************************** + + Gets the variable top value of this Collator. Lower 16 bits + are undefined and should be ignored. + + ***********************************************************************/ + + uint getVariableTop () + { + UErrorCode e; + + uint x = ucol_getVariableTop (handle, e); + testError (e, "failed to get variable-top"); + return x; + } + + /*********************************************************************** + + Gets the locale name of the collator. If the collator is + instantiated from the rules, then this function will throw + an exception + + ***********************************************************************/ + + void getLocale (ULocale locale, ULocale.Type type) + { + UErrorCode e; + + locale.name = toArray (ucol_getLocaleByType (handle, type, e)); + if (isError(e) || locale.name is null) + exception ("failed to get collator locale"); + } + + /*********************************************************************** + + Get the Unicode set that contains all the characters and + sequences tailored in this collator. + + ***********************************************************************/ + + USet getTailoredSet () + { + UErrorCode e; + + Handle h = ucol_getTailoredSet (handle, e); + testError (e, "failed to get tailored set"); + return new USet (h); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + void function (Handle) ucol_close; + Handle function (char *loc, inout UErrorCode e) ucol_open; + Handle function (wchar* rules, uint rulesLength, AttributeValue normalizationMode, Strength strength, UParseError *parseError, inout UErrorCode e) ucol_openRules; + Handle function (char *definition, byte forceDefaults, UParseError *parseError, inout UErrorCode e) ucol_openFromShortString; + uint function (Handle, Handle conts, inout UErrorCode e) ucol_getContractions; + int function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_strcoll; + byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greater; + byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greaterOrEqual; + byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_equal; + Strength function (Handle) ucol_getStrength; + void function (Handle, Strength strength) ucol_setStrength; + uint function (char *objLoc, char *dispLoc, wchar* result, uint resultLength, inout UErrorCode e) ucol_getDisplayName; + uint function (Handle, char *locale, char *buffer, uint capacity, inout UErrorCode e) ucol_getShortDefinitionString; + uint function (char *source, char *destination, uint capacity, UParseError *parseError, inout UErrorCode e) ucol_normalizeShortDefinitionString; + uint function (Handle, wchar* source, uint sourceLength, ubyte *result, uint resultLength) ucol_getSortKey; + uint function (ubyte *source, uint sourceLength, BoundMode boundType, uint noOfLevels, ubyte *result, uint resultLength, inout UErrorCode e) ucol_getBound; + void function (Handle, Version info) ucol_getVersion; + void function (Handle, Version info) ucol_getUCAVersion; + uint function (ubyte *src1, uint src1Length, ubyte *src2, uint src2Length, ubyte *dest, uint destCapacity) ucol_mergeSortkeys; + void function (Handle, Attribute attr, AttributeValue value, inout UErrorCode e) ucol_setAttribute; + AttributeValue function (Handle, Attribute attr, inout UErrorCode e) ucol_getAttribute; + uint function (Handle, wchar* varTop, uint len, inout UErrorCode e) ucol_setVariableTop; + uint function (Handle, inout UErrorCode e) ucol_getVariableTop; + void function (Handle, uint varTop, inout UErrorCode e) ucol_restoreVariableTop; + uint function (Handle, RuleOption delta, wchar* buffer, uint bufferLen) ucol_getRulesEx; + char* function (Handle, ULocale.Type type, inout UErrorCode e) ucol_getLocaleByType; + Handle function (Handle, inout UErrorCode e) ucol_getTailoredSet; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucol_open, "ucol_open"}, + {cast(void**) &ucol_close, "ucol_close"}, + {cast(void**) &ucol_openRules, "ucol_openRules"}, + {cast(void**) &ucol_openFromShortString, "ucol_openFromShortString"}, + {cast(void**) &ucol_getContractions, "ucol_getContractions"}, + {cast(void**) &ucol_strcoll, "ucol_strcoll"}, + {cast(void**) &ucol_greater, "ucol_greater"}, + {cast(void**) &ucol_greaterOrEqual, "ucol_greaterOrEqual"}, + {cast(void**) &ucol_equal, "ucol_equal"}, + {cast(void**) &ucol_getStrength, "ucol_getStrength"}, + {cast(void**) &ucol_setStrength, "ucol_setStrength"}, + {cast(void**) &ucol_getDisplayName, "ucol_getDisplayName"}, + {cast(void**) &ucol_getShortDefinitionString, "ucol_getShortDefinitionString"}, + {cast(void**) &ucol_normalizeShortDefinitionString, "ucol_normalizeShortDefinitionString"}, + {cast(void**) &ucol_getSortKey, "ucol_getSortKey"}, + {cast(void**) &ucol_getBound, "ucol_getBound"}, + {cast(void**) &ucol_getVersion, "ucol_getVersion"}, + {cast(void**) &ucol_getUCAVersion, "ucol_getUCAVersion"}, + {cast(void**) &ucol_mergeSortkeys, "ucol_mergeSortkeys"}, + {cast(void**) &ucol_setAttribute, "ucol_setAttribute"}, + {cast(void**) &ucol_getAttribute, "ucol_getAttribute"}, + {cast(void**) &ucol_setVariableTop, "ucol_setVariableTop"}, + {cast(void**) &ucol_getVariableTop, "ucol_getVariableTop"}, + {cast(void**) &ucol_restoreVariableTop, "ucol_restoreVariableTop"}, + {cast(void**) &ucol_getRulesEx, "ucol_getRulesEx"}, + {cast(void**) &ucol_getLocaleByType, "ucol_getLocaleByType"}, + {cast(void**) &ucol_getTailoredSet, "ucol_getTailoredSet"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UDomainName.d --- a/dwtx/dwtxhelper/mangoicu/UDomainName.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UDomainName.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,322 +1,322 @@ -/******************************************************************************* - - @file UDomainName.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UDomainName; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UString; - -/******************************************************************************* - - UIDNA API implements the IDNA protocol as defined in the - IDNA RFC (http://www.ietf.org/rfc/rfc3490.txt). - - The RFC defines 2 operations: toAscii and toUnicode. Domain - labels containing non-ASCII code points are required to be - processed by toAscii operation before passing it to resolver - libraries. Domain names that are obtained from resolver - libraries are required to be processed by toUnicode operation - before displaying the domain name to the user. IDNA requires - that implementations process input strings with Nameprep - (http://www.ietf.org/rfc/rfc3491.txt), which is a profile of - Stringprep (http://www.ietf.org/rfc/rfc3454.txt), and then with - Punycode (http://www.ietf.org/rfc/rfc3492.txt). Implementations - of IDNA MUST fully implement Nameprep and Punycode; neither - Nameprep nor Punycode are optional. - - The input and output of toAscii() and ToUnicode() operations are - Unicode and are designed to be chainable, i.e., applying toAscii() - or toUnicode() operations multiple times to an input string will - yield the same result as applying the operation once. - - See - this page for full details. - -*******************************************************************************/ - -class UDomainName : ICU -{ - private UStringView text; - private Handle handle; - - enum Options - { - Strict, - Lenient, - Std3 - } - - - /*********************************************************************** - - - ***********************************************************************/ - - this (UStringView text) - { - this.text = text; - } - - /*********************************************************************** - - This function implements the ToASCII operation as - defined in the IDNA RFC. - - This operation is done on single labels before sending - it to something that expects ASCII names. A label is an - individual part of a domain name. Labels are usually - separated by dots; e.g." "www.example.com" is composed - of 3 labels "www","example", and "com". - - ***********************************************************************/ - - void toAscii (UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uidna_toASCII (text.get.ptr, text.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to convert IDN to ASCII"); - } - - /*********************************************************************** - - This function implements the ToUnicode operation as - defined in the IDNA RFC. - - This operation is done on single labels before sending - it to something that expects Unicode names. A label is - an individual part of a domain name. Labels are usually - separated by dots; for e.g." "www.example.com" is composed - of 3 labels "www","example", and "com". - - ***********************************************************************/ - - void toUnicode (UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uidna_toUnicode (text.get.ptr, text.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to convert IDN to Unicode"); - } - - /*********************************************************************** - - Convenience function that implements the IDNToASCII - operation as defined in the IDNA RFC. - - This operation is done on complete domain names, e.g: - "www.example.com". It is important to note that this - operation can fail. If it fails, then the input domain - name cannot be used as an Internationalized Domain Name - and the application should have methods defined to deal - with the failure. - - Note: IDNA RFC specifies that a conformant application - should divide a domain name into separate labels, decide - whether to apply allowUnassigned and useSTD3ASCIIRules - on each, and then convert. This function does not offer - that level of granularity. The options once set will apply - to all labels in the domain name - - ***********************************************************************/ - - void IdnToAscii (UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uidna_IDNToASCII (text.get.ptr, text.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to convert IDN to ASCII"); - } - - /*********************************************************************** - - Convenience function that implements the IDNToUnicode - operation as defined in the IDNA RFC. - - This operation is done on complete domain names, e.g: - "www.example.com". - - Note: IDNA RFC specifies that a conformant application - should divide a domain name into separate labels, decide - whether to apply allowUnassigned and useSTD3ASCIIRules - on each, and then convert. This function does not offer - that level of granularity. The options once set will apply - to all labels in the domain name - - ***********************************************************************/ - - void IdnToUnicode (UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uidna_IDNToUnicode (text.get.ptr, text.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to convert IDN to Unicode"); - } - - /*********************************************************************** - - Compare two IDN strings for equivalence. - - This function splits the domain names into labels and - compares them. According to IDN RFC, whenever two labels - are compared, they are considered equal if and only if - their ASCII forms (obtained by applying toASCII) match - using an case-insensitive ASCII comparison. Two domain - names are considered a match if and only if all labels - match regardless of whether label separators match - - ***********************************************************************/ - - int compare (UString other, Options o = Options.Strict) - { - UErrorCode e; - int i = uidna_compare (text.get.ptr, text.len, other.get.ptr, other.len, o, e); - testError (e, "failed to compare IDN strings"); - return i; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_toASCII; - uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_toUnicode; - uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_IDNToASCII; - uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_IDNToUnicode; - int function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) uidna_compare; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uidna_toASCII, "uidna_toASCII"}, - {cast(void**) &uidna_toUnicode, "uidna_toUnicode"}, - {cast(void**) &uidna_IDNToASCII, "uidna_IDNToASCII"}, - {cast(void**) &uidna_IDNToUnicode, "uidna_IDNToUnicode"}, - {cast(void**) &uidna_compare, "uidna_compare"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - +/******************************************************************************* + + @file UDomainName.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UDomainName; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UString; + +/******************************************************************************* + + UIDNA API implements the IDNA protocol as defined in the + IDNA RFC (http://www.ietf.org/rfc/rfc3490.txt). + + The RFC defines 2 operations: toAscii and toUnicode. Domain + labels containing non-ASCII code points are required to be + processed by toAscii operation before passing it to resolver + libraries. Domain names that are obtained from resolver + libraries are required to be processed by toUnicode operation + before displaying the domain name to the user. IDNA requires + that implementations process input strings with Nameprep + (http://www.ietf.org/rfc/rfc3491.txt), which is a profile of + Stringprep (http://www.ietf.org/rfc/rfc3454.txt), and then with + Punycode (http://www.ietf.org/rfc/rfc3492.txt). Implementations + of IDNA MUST fully implement Nameprep and Punycode; neither + Nameprep nor Punycode are optional. + + The input and output of toAscii() and ToUnicode() operations are + Unicode and are designed to be chainable, i.e., applying toAscii() + or toUnicode() operations multiple times to an input string will + yield the same result as applying the operation once. + + See + this page for full details. + +*******************************************************************************/ + +class UDomainName : ICU +{ + private UStringView text; + private Handle handle; + + enum Options + { + Strict, + Lenient, + Std3 + } + + + /*********************************************************************** + + + ***********************************************************************/ + + this (UStringView text) + { + this.text = text; + } + + /*********************************************************************** + + This function implements the ToASCII operation as + defined in the IDNA RFC. + + This operation is done on single labels before sending + it to something that expects ASCII names. A label is an + individual part of a domain name. Labels are usually + separated by dots; e.g." "www.example.com" is composed + of 3 labels "www","example", and "com". + + ***********************************************************************/ + + void toAscii (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uidna_toASCII (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to ASCII"); + } + + /*********************************************************************** + + This function implements the ToUnicode operation as + defined in the IDNA RFC. + + This operation is done on single labels before sending + it to something that expects Unicode names. A label is + an individual part of a domain name. Labels are usually + separated by dots; for e.g." "www.example.com" is composed + of 3 labels "www","example", and "com". + + ***********************************************************************/ + + void toUnicode (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uidna_toUnicode (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to Unicode"); + } + + /*********************************************************************** + + Convenience function that implements the IDNToASCII + operation as defined in the IDNA RFC. + + This operation is done on complete domain names, e.g: + "www.example.com". It is important to note that this + operation can fail. If it fails, then the input domain + name cannot be used as an Internationalized Domain Name + and the application should have methods defined to deal + with the failure. + + Note: IDNA RFC specifies that a conformant application + should divide a domain name into separate labels, decide + whether to apply allowUnassigned and useSTD3ASCIIRules + on each, and then convert. This function does not offer + that level of granularity. The options once set will apply + to all labels in the domain name + + ***********************************************************************/ + + void IdnToAscii (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uidna_IDNToASCII (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to ASCII"); + } + + /*********************************************************************** + + Convenience function that implements the IDNToUnicode + operation as defined in the IDNA RFC. + + This operation is done on complete domain names, e.g: + "www.example.com". + + Note: IDNA RFC specifies that a conformant application + should divide a domain name into separate labels, decide + whether to apply allowUnassigned and useSTD3ASCIIRules + on each, and then convert. This function does not offer + that level of granularity. The options once set will apply + to all labels in the domain name + + ***********************************************************************/ + + void IdnToUnicode (UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uidna_IDNToUnicode (text.get.ptr, text.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to convert IDN to Unicode"); + } + + /*********************************************************************** + + Compare two IDN strings for equivalence. + + This function splits the domain names into labels and + compares them. According to IDN RFC, whenever two labels + are compared, they are considered equal if and only if + their ASCII forms (obtained by applying toASCII) match + using an case-insensitive ASCII comparison. Two domain + names are considered a match if and only if all labels + match regardless of whether label separators match + + ***********************************************************************/ + + int compare (UString other, Options o = Options.Strict) + { + UErrorCode e; + int i = uidna_compare (text.get.ptr, text.len, other.get.ptr, other.len, o, e); + testError (e, "failed to compare IDN strings"); + return i; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_toASCII; + uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_toUnicode; + uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_IDNToASCII; + uint function (wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) uidna_IDNToUnicode; + int function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) uidna_compare; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uidna_toASCII, "uidna_toASCII"}, + {cast(void**) &uidna_toUnicode, "uidna_toUnicode"}, + {cast(void**) &uidna_IDNToASCII, "uidna_IDNToASCII"}, + {cast(void**) &uidna_IDNToUnicode, "uidna_IDNToUnicode"}, + {cast(void**) &uidna_compare, "uidna_compare"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UEnumeration.d --- a/dwtx/dwtxhelper/mangoicu/UEnumeration.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UEnumeration.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,264 +1,264 @@ -/******************************************************************************* - - @file UEnumeration.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UEnumeration; - -private import dwtx.dwtxhelper.mangoicu.ICU; - -/******************************************************************************* - - UEnumeration is returned by a number of ICU classes, for providing - access to such things as ULocale lists and so on, - -*******************************************************************************/ - -class UEnumeration : ICU -{ - package Handle handle; - - /*********************************************************************** - - ***********************************************************************/ - - this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Disposes of the storage used by a UEnumeration object - - ***********************************************************************/ - - ~this () - { - uenum_close (handle); - } - - /*********************************************************************** - - Returns the next element in the iterator's list. - - If there are no more elements, returns NULL. If the - iterator is out-of-sync with its service, status is - set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. - If the native service string is a UChar* string, it - is converted to char* with the invariant converter. - The result is terminated by (char)0. If the conversion - fails (because a character cannot be converted) then - status is set to U_INVARIANT_CONVERSION_ERROR and the - return value is undefined (but non-NULL). - - ***********************************************************************/ - - uint count () - { - UErrorCode e; - - uint x = uenum_count (handle, e); - testError (e, "enumeration out of sync"); - return x; - } - - /*********************************************************************** - - Resets the iterator to the current list of service IDs. - - This re-establishes sync with the service and rewinds - the iterator to start at the first element - - ***********************************************************************/ - - void reset () - { - ICU.UErrorCode e; - - uenum_reset (handle, e); - testError (e, "failed to reset enumeration"); - } - - /*********************************************************************** - - Returns the next element in the iterator's list. - - If there are no more elements, returns NULL. If the - iterator is out-of-sync with its service, status is - set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. - If the native service string is a char* string, it is - converted to UChar* with the invariant converter. - - ***********************************************************************/ - - bool next (out char[] dst) - { - ICU.UErrorCode e; - uint len; - - char* p = uenum_next (handle, &len, e); - testError (e, "failed to traverse enumeration"); - if (p) - return dst = p[0..len], true; - return false; - } - - /*********************************************************************** - - Returns the next element in the iterator's list. - - If there are no more elements, returns NULL. If the - iterator is out-of-sync with its service, status is - set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. - If the native service string is a char* string, it is - converted to UChar* with the invariant converter. - - ***********************************************************************/ - - bool next (inout wchar[] dst) - { - ICU.UErrorCode e; - uint len; - - wchar* p = uenum_unext (handle, &len, e); - testError (e, "failed to traverse enumeration"); - if (p) - return dst = p[0..len], true; - return false; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - void function (Handle) uenum_close; - uint function (Handle, inout UErrorCode) uenum_count; - void function (Handle, inout UErrorCode) uenum_reset; - char* function (Handle, uint*, inout UErrorCode) uenum_next; - wchar* function (Handle, uint*, inout UErrorCode) uenum_unext; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uenum_close, "uenum_close"}, - {cast(void**) &uenum_count, "uenum_count"}, - {cast(void**) &uenum_reset, "uenum_reset"}, - {cast(void**) &uenum_next, "uenum_next"}, - {cast(void**) &uenum_unext, "uenum_unext"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} +/******************************************************************************* + + @file UEnumeration.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UEnumeration; + +private import dwtx.dwtxhelper.mangoicu.ICU; + +/******************************************************************************* + + UEnumeration is returned by a number of ICU classes, for providing + access to such things as ULocale lists and so on, + +*******************************************************************************/ + +class UEnumeration : ICU +{ + package Handle handle; + + /*********************************************************************** + + ***********************************************************************/ + + this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Disposes of the storage used by a UEnumeration object + + ***********************************************************************/ + + ~this () + { + uenum_close (handle); + } + + /*********************************************************************** + + Returns the next element in the iterator's list. + + If there are no more elements, returns NULL. If the + iterator is out-of-sync with its service, status is + set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. + If the native service string is a UChar* string, it + is converted to char* with the invariant converter. + The result is terminated by (char)0. If the conversion + fails (because a character cannot be converted) then + status is set to U_INVARIANT_CONVERSION_ERROR and the + return value is undefined (but non-NULL). + + ***********************************************************************/ + + uint count () + { + UErrorCode e; + + uint x = uenum_count (handle, e); + testError (e, "enumeration out of sync"); + return x; + } + + /*********************************************************************** + + Resets the iterator to the current list of service IDs. + + This re-establishes sync with the service and rewinds + the iterator to start at the first element + + ***********************************************************************/ + + void reset () + { + ICU.UErrorCode e; + + uenum_reset (handle, e); + testError (e, "failed to reset enumeration"); + } + + /*********************************************************************** + + Returns the next element in the iterator's list. + + If there are no more elements, returns NULL. If the + iterator is out-of-sync with its service, status is + set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. + If the native service string is a char* string, it is + converted to UChar* with the invariant converter. + + ***********************************************************************/ + + bool next (out char[] dst) + { + ICU.UErrorCode e; + uint len; + + char* p = uenum_next (handle, &len, e); + testError (e, "failed to traverse enumeration"); + if (p) + return dst = p[0..len], true; + return false; + } + + /*********************************************************************** + + Returns the next element in the iterator's list. + + If there are no more elements, returns NULL. If the + iterator is out-of-sync with its service, status is + set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. + If the native service string is a char* string, it is + converted to UChar* with the invariant converter. + + ***********************************************************************/ + + bool next (inout wchar[] dst) + { + ICU.UErrorCode e; + uint len; + + wchar* p = uenum_unext (handle, &len, e); + testError (e, "failed to traverse enumeration"); + if (p) + return dst = p[0..len], true; + return false; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + void function (Handle) uenum_close; + uint function (Handle, inout UErrorCode) uenum_count; + void function (Handle, inout UErrorCode) uenum_reset; + char* function (Handle, uint*, inout UErrorCode) uenum_next; + wchar* function (Handle, uint*, inout UErrorCode) uenum_unext; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uenum_close, "uenum_close"}, + {cast(void**) &uenum_count, "uenum_count"}, + {cast(void**) &uenum_reset, "uenum_reset"}, + {cast(void**) &uenum_next, "uenum_next"}, + {cast(void**) &uenum_unext, "uenum_unext"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/ULocale.d --- a/dwtx/dwtxhelper/mangoicu/ULocale.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/ULocale.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,229 +1,229 @@ -/******************************************************************************* - - @file ULocale.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, October 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.ULocale; - -private import dwtx.dwtxhelper.mangoicu.ICU; - -/******************************************************************************* - - Note that this is a struct rather than a class. This is so - that one can easily construct these on the stack, plus the - 'convenience' instances can be created statically. - -*******************************************************************************/ - -struct ULocale -{ - public char[] name; - - /*********************************************************************** - - ***********************************************************************/ - - public static ULocale Root = {""}; - public static ULocale Default = {null}; - public static ULocale English = {"en"}; - public static ULocale Chinese = {"zh"}; - public static ULocale French = {"fr"}; - public static ULocale German = {"de"}; - public static ULocale Italian = {"it"}; - public static ULocale Japanese = {"ja"}; - public static ULocale Korean = {"ko"}; - public static ULocale SimplifiedChinese = {"zh_CN"}; - public static ULocale TraditionalChinese = {"zh_TW"}; - public static ULocale Canada = {"en_CA"}; - public static ULocale CanadaFrench = {"fr_CA"}; - public static ULocale China = {"zh_CN"}; - public static ULocale PRC = {"zh_CN"}; - public static ULocale France = {"fr_FR"}; - public static ULocale Germany = {"de_DE"}; - public static ULocale Italy = {"it_IT"}; - public static ULocale Japan = {"jp_JP"}; - public static ULocale Korea = {"ko_KR"}; - public static ULocale Taiwan = {"zh_TW"}; - public static ULocale UK = {"en_GB"}; - public static ULocale US = {"en_US"}; - - /*********************************************************************** - - ***********************************************************************/ - - public enum Type - { - Actual = 0, - Valid = 1, - Requested = 2, - } - - /*********************************************************************** - - ***********************************************************************/ - - public const uint LanguageCapacity = 12; - public const uint CountryCapacity = 4; - public const uint FullNameCapacity = 56; - public const uint ScriptCapacity = 6; - public const uint KeywordsCapacity = 50; - public const uint KeywordAndValuesCapacity = 100; - public const char KeywordItemSeparator = ':'; - public const char KeywordSeparator = '@'; - public const char KeywordAssign = '='; - - - /*********************************************************************** - - ***********************************************************************/ - - static void getDefault (inout ULocale locale) - { - locale.name = ICU.toArray (uloc_getDefault()); - if (! locale.name) - ICU.exception ("failed to get default locale"); - } - - /*********************************************************************** - - ***********************************************************************/ - - static void setDefault (inout ULocale locale) - { - ICU.UErrorCode e; - - uloc_setDefault (ICU.toString(locale.name), e); - - if (ICU.isError (e)) - ICU.exception ("invalid locale '"~locale.name~"'"); - } - - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - char* function () uloc_getDefault; - void function (char*, inout ICU.UErrorCode) uloc_setDefault; - } - - /********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uloc_getDefault, "uloc_getDefault"}, - {cast(void**) &uloc_setDefault, "uloc_setDefault"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (ICU.icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} +/******************************************************************************* + + @file ULocale.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.ULocale; + +private import dwtx.dwtxhelper.mangoicu.ICU; + +/******************************************************************************* + + Note that this is a struct rather than a class. This is so + that one can easily construct these on the stack, plus the + 'convenience' instances can be created statically. + +*******************************************************************************/ + +struct ULocale +{ + public char[] name; + + /*********************************************************************** + + ***********************************************************************/ + + public static ULocale Root = {""}; + public static ULocale Default = {null}; + public static ULocale English = {"en"}; + public static ULocale Chinese = {"zh"}; + public static ULocale French = {"fr"}; + public static ULocale German = {"de"}; + public static ULocale Italian = {"it"}; + public static ULocale Japanese = {"ja"}; + public static ULocale Korean = {"ko"}; + public static ULocale SimplifiedChinese = {"zh_CN"}; + public static ULocale TraditionalChinese = {"zh_TW"}; + public static ULocale Canada = {"en_CA"}; + public static ULocale CanadaFrench = {"fr_CA"}; + public static ULocale China = {"zh_CN"}; + public static ULocale PRC = {"zh_CN"}; + public static ULocale France = {"fr_FR"}; + public static ULocale Germany = {"de_DE"}; + public static ULocale Italy = {"it_IT"}; + public static ULocale Japan = {"jp_JP"}; + public static ULocale Korea = {"ko_KR"}; + public static ULocale Taiwan = {"zh_TW"}; + public static ULocale UK = {"en_GB"}; + public static ULocale US = {"en_US"}; + + /*********************************************************************** + + ***********************************************************************/ + + public enum Type + { + Actual = 0, + Valid = 1, + Requested = 2, + } + + /*********************************************************************** + + ***********************************************************************/ + + public const uint LanguageCapacity = 12; + public const uint CountryCapacity = 4; + public const uint FullNameCapacity = 56; + public const uint ScriptCapacity = 6; + public const uint KeywordsCapacity = 50; + public const uint KeywordAndValuesCapacity = 100; + public const char KeywordItemSeparator = ':'; + public const char KeywordSeparator = '@'; + public const char KeywordAssign = '='; + + + /*********************************************************************** + + ***********************************************************************/ + + static void getDefault (inout ULocale locale) + { + locale.name = ICU.toArray (uloc_getDefault()); + if (! locale.name) + ICU.exception ("failed to get default locale"); + } + + /*********************************************************************** + + ***********************************************************************/ + + static void setDefault (inout ULocale locale) + { + ICU.UErrorCode e; + + uloc_setDefault (ICU.toString(locale.name), e); + + if (ICU.isError (e)) + ICU.exception ("invalid locale '"~locale.name~"'"); + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + char* function () uloc_getDefault; + void function (char*, inout ICU.UErrorCode) uloc_setDefault; + } + + /********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uloc_getDefault, "uloc_getDefault"}, + {cast(void**) &uloc_setDefault, "uloc_setDefault"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (ICU.icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UMessageFormat.d --- a/dwtx/dwtxhelper/mangoicu/UMessageFormat.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UMessageFormat.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,395 +1,395 @@ -/******************************************************************************* - - @file UMessageFormat.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UMessageFormat; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UString; - -public import dwtx.dwtxhelper.mangoicu.ULocale; - -/******************************************************************************* - - Provides means to produce concatenated messages in language-neutral - way. Use this for all concatenations that show up to end users. Takes - a set of objects, formats them, then inserts the formatted strings into - the pattern at the appropriate places. - - See - this page for full details. - -*******************************************************************************/ - -class UMessageFormat : ICU -{ - private Handle handle; - - /*********************************************************************** - - Open a message formatter with given wchar[] and for the - given locale. - - ***********************************************************************/ - - this (wchar[] pattern, inout ULocale locale = ULocale.Default) - { - UErrorCode e; - - handle = umsg_open (pattern.ptr, pattern.length, toString(locale.name), null, e); - testError (e, "failed to open message formatter"); - } - - /*********************************************************************** - - Open a message formatter with given pattern and for the - given locale. - - ***********************************************************************/ - - this (UStringView pattern, inout ULocale locale = ULocale.Default) - { - this (pattern.get, locale); - } - - /*********************************************************************** - - Release message formatter - - ***********************************************************************/ - - ~this () - { - umsg_close (handle); - } - - /*********************************************************************** - - This locale is used for fetching default number or date - format information - - ***********************************************************************/ - - UMessageFormat setLocale (inout ULocale locale) - { - umsg_setLocale (handle, toString(locale.name)); - return this; - } - - /*********************************************************************** - - This locale is used for fetching default number or date - format information - - ***********************************************************************/ - - UMessageFormat getLocale (inout ULocale locale) - { - locale.name = toArray (umsg_getLocale (handle)); - return this; - } - - /*********************************************************************** - - Sets the pattern - - ***********************************************************************/ - - UMessageFormat setPattern (UStringView pattern) - { - UErrorCode e; - - umsg_applyPattern (handle, pattern.get.ptr, pattern.len, null, e); - testError (e, "failed to set formatter pattern"); - return this; - } - - /*********************************************************************** - - Gets the pattern - - ***********************************************************************/ - - UMessageFormat getPattern (UString s) - { - uint fmt (wchar* dst, uint length, inout UErrorCode e) - { - return umsg_toPattern (handle, dst, length, e); - } - - s.format (&fmt, "failed to get formatter pattern"); - return this; - } - - /*********************************************************************** - - This function may perform re-ordering of the arguments - depending on the locale. For all numeric arguments, double - is assumed unless the type is explicitly integer. All choice - format arguments must be of type double. - - ***********************************************************************/ - - UMessageFormat format (UString s, Args* list) - { - uint fmt (wchar* dst, uint length, inout UErrorCode e) - { - return umsg_vformat (handle, dst, length, list.args.ptr, e); - } - - s.format (&fmt, "failed to format pattern"); - return this; - } - - - /*********************************************************************** - - A typesafe list of arguments for the UMessageFormat.format() - method. This should be used in the following manner: - - @code - wchar[] format = "{0} {1, number, currency} {2, number, integer}"; - UMessageFormat msg = new UMessageFormat (format); - - msg.Args args; - msg.format (output, args.add("abc").add(152.0).add(456)); - @endcode - - Note that the argument order must follow that of the format - string, although the format string may dictate the ultimate - position of each argument. - - See http://oss.software.ibm.com/icu/apiref/umsg_8h.html for - details on the format string. - - @todo this will likely fail on certain CPU architectures. - - ***********************************************************************/ - - struct Args - { - private uint[32] args; - private uint index; - - /*************************************************************** - - ***************************************************************/ - - invariant - { - assert (index < args.length); - } - - /*************************************************************** - - ***************************************************************/ - - Args* reset () - { - index = 0; - return this; - } - - /*************************************************************** - - ***************************************************************/ - - Args* add (UStringView x) - { - args[index] = cast(uint) cast(wchar*) x.get(); - ++index; - return this; - } - - /*************************************************************** - - ***************************************************************/ - - Args* add (wchar[] x) - { - args[index] = cast(uint) cast(wchar*) x; - ++index; - return this; - } - - /*************************************************************** - - ***************************************************************/ - - Args* add (int x) - { - args[index] = x; - ++index; - return this; - } - - /*************************************************************** - - ***************************************************************/ - - Args* add (double x) - { - *(cast(double*) &args[index]) = x; - index += 2; - return this; - } - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, char*, void*, inout UErrorCode) umsg_open; - void function (Handle) umsg_close; - void function (Handle, char*) umsg_setLocale; - char* function (Handle) umsg_getLocale; - uint function (Handle, wchar*, uint, inout UErrorCode) umsg_toPattern; - void function (Handle, wchar*, uint, void*, inout UErrorCode) umsg_applyPattern; - uint function (Handle, wchar*, uint, void*, inout UErrorCode) umsg_vformat; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &umsg_open, "umsg_open"}, - {cast(void**) &umsg_close, "umsg_close"}, - {cast(void**) &umsg_setLocale, "umsg_setLocale"}, - {cast(void**) &umsg_getLocale, "umsg_getLocale"}, - {cast(void**) &umsg_toPattern, "umsg_toPattern"}, - {cast(void**) &umsg_applyPattern, "umsg_applyPattern"}, - {cast(void**) &umsg_vformat, "umsg_vformat"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - //test (); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } - - /*********************************************************************** - - ***********************************************************************/ - - static void test() - { - UString output = new UString(100); - wchar[] format = "{0} {1, number, currency} {2, number, integer}"; - - UMessageFormat msg = new UMessageFormat (format); - - msg.Args args; - msg.format (output, args.add("abc").add(152.0).add(456)); - } -} - - - +/******************************************************************************* + + @file UMessageFormat.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UMessageFormat; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UString; + +public import dwtx.dwtxhelper.mangoicu.ULocale; + +/******************************************************************************* + + Provides means to produce concatenated messages in language-neutral + way. Use this for all concatenations that show up to end users. Takes + a set of objects, formats them, then inserts the formatted strings into + the pattern at the appropriate places. + + See + this page for full details. + +*******************************************************************************/ + +class UMessageFormat : ICU +{ + private Handle handle; + + /*********************************************************************** + + Open a message formatter with given wchar[] and for the + given locale. + + ***********************************************************************/ + + this (wchar[] pattern, inout ULocale locale = ULocale.Default) + { + UErrorCode e; + + handle = umsg_open (pattern.ptr, pattern.length, toString(locale.name), null, e); + testError (e, "failed to open message formatter"); + } + + /*********************************************************************** + + Open a message formatter with given pattern and for the + given locale. + + ***********************************************************************/ + + this (UStringView pattern, inout ULocale locale = ULocale.Default) + { + this (pattern.get, locale); + } + + /*********************************************************************** + + Release message formatter + + ***********************************************************************/ + + ~this () + { + umsg_close (handle); + } + + /*********************************************************************** + + This locale is used for fetching default number or date + format information + + ***********************************************************************/ + + UMessageFormat setLocale (inout ULocale locale) + { + umsg_setLocale (handle, toString(locale.name)); + return this; + } + + /*********************************************************************** + + This locale is used for fetching default number or date + format information + + ***********************************************************************/ + + UMessageFormat getLocale (inout ULocale locale) + { + locale.name = toArray (umsg_getLocale (handle)); + return this; + } + + /*********************************************************************** + + Sets the pattern + + ***********************************************************************/ + + UMessageFormat setPattern (UStringView pattern) + { + UErrorCode e; + + umsg_applyPattern (handle, pattern.get.ptr, pattern.len, null, e); + testError (e, "failed to set formatter pattern"); + return this; + } + + /*********************************************************************** + + Gets the pattern + + ***********************************************************************/ + + UMessageFormat getPattern (UString s) + { + uint fmt (wchar* dst, uint length, inout UErrorCode e) + { + return umsg_toPattern (handle, dst, length, e); + } + + s.format (&fmt, "failed to get formatter pattern"); + return this; + } + + /*********************************************************************** + + This function may perform re-ordering of the arguments + depending on the locale. For all numeric arguments, double + is assumed unless the type is explicitly integer. All choice + format arguments must be of type double. + + ***********************************************************************/ + + UMessageFormat format (UString s, Args* list) + { + uint fmt (wchar* dst, uint length, inout UErrorCode e) + { + return umsg_vformat (handle, dst, length, list.args.ptr, e); + } + + s.format (&fmt, "failed to format pattern"); + return this; + } + + + /*********************************************************************** + + A typesafe list of arguments for the UMessageFormat.format() + method. This should be used in the following manner: + + @code + wchar[] format = "{0} {1, number, currency} {2, number, integer}"; + UMessageFormat msg = new UMessageFormat (format); + + msg.Args args; + msg.format (output, args.add("abc").add(152.0).add(456)); + @endcode + + Note that the argument order must follow that of the format + string, although the format string may dictate the ultimate + position of each argument. + + See http://oss.software.ibm.com/icu/apiref/umsg_8h.html for + details on the format string. + + @todo this will likely fail on certain CPU architectures. + + ***********************************************************************/ + + struct Args + { + private uint[32] args; + private uint index; + + /*************************************************************** + + ***************************************************************/ + + invariant + { + assert (index < args.length); + } + + /*************************************************************** + + ***************************************************************/ + + Args* reset () + { + index = 0; + return this; + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (UStringView x) + { + args[index] = cast(uint) cast(wchar*) x.get(); + ++index; + return this; + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (wchar[] x) + { + args[index] = cast(uint) cast(wchar*) x; + ++index; + return this; + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (int x) + { + args[index] = x; + ++index; + return this; + } + + /*************************************************************** + + ***************************************************************/ + + Args* add (double x) + { + *(cast(double*) &args[index]) = x; + index += 2; + return this; + } + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, char*, void*, inout UErrorCode) umsg_open; + void function (Handle) umsg_close; + void function (Handle, char*) umsg_setLocale; + char* function (Handle) umsg_getLocale; + uint function (Handle, wchar*, uint, inout UErrorCode) umsg_toPattern; + void function (Handle, wchar*, uint, void*, inout UErrorCode) umsg_applyPattern; + uint function (Handle, wchar*, uint, void*, inout UErrorCode) umsg_vformat; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &umsg_open, "umsg_open"}, + {cast(void**) &umsg_close, "umsg_close"}, + {cast(void**) &umsg_setLocale, "umsg_setLocale"}, + {cast(void**) &umsg_getLocale, "umsg_getLocale"}, + {cast(void**) &umsg_toPattern, "umsg_toPattern"}, + {cast(void**) &umsg_applyPattern, "umsg_applyPattern"}, + {cast(void**) &umsg_vformat, "umsg_vformat"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + + /*********************************************************************** + + ***********************************************************************/ + + static void test() + { + UString output = new UString(100); + wchar[] format = "{0} {1, number, currency} {2, number, integer}"; + + UMessageFormat msg = new UMessageFormat (format); + + msg.Args args; + msg.format (output, args.add("abc").add(152.0).add(456)); + } +} + + + diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UNormalize.d --- a/dwtx/dwtxhelper/mangoicu/UNormalize.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UNormalize.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,391 +1,391 @@ -/******************************************************************************* - - @file UNormalize.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, October 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UNormalize; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UString, - dwtx.dwtxhelper.mangoicu.ULocale; - -/******************************************************************************* - - transforms Unicode text into an equivalent composed or - decomposed form, allowing for easier sorting and searching - of text. UNormalize supports the standard normalization forms - described in http://www.unicode.org/unicode/reports/tr15/ - - Characters with accents or other adornments can be encoded - in several different ways in Unicode. For example, take the - character A-acute. In Unicode, this can be encoded as a single - character (the "composed" form): - - 00C1 LATIN CAPITAL LETTER A WITH ACUTE - - or as two separate characters (the "decomposed" form): - - 0041 LATIN CAPITAL LETTER A 0301 COMBINING ACUTE ACCENT - - To a user of your program, however, both of these sequences - should be treated as the same "user-level" character "A with - acute accent". When you are searching or comparing text, you - must ensure that these two sequences are treated equivalently. - In addition, you must handle characters with more than one - accent. Sometimes the order of a character's combining accents - is significant, while in other cases accent sequences in different - orders are really equivalent. - - Similarly, the string "ffi" can be encoded as three separate - letters: - - 0066 LATIN SMALL LETTER F 0066 LATIN SMALL LETTER F - 0069 LATIN SMALL LETTER I - - or as the single character - - FB03 LATIN SMALL LIGATURE FFI - - The ffi ligature is not a distinct semantic character, and strictly - speaking it shouldn't be in Unicode at all, but it was included for - compatibility with existing character sets that already provided it. - The Unicode standard identifies such characters by giving them - "compatibility" decompositions into the corresponding semantic - characters. When sorting and searching, you will often want to use - these mappings. - - unorm_normalize helps solve these problems by transforming text into - the canonical composed and decomposed forms as shown in the first - example above. In addition, you can have it perform compatibility - decompositions so that you can treat compatibility characters the - same as their equivalents. Finally, UNormalize rearranges - accents into the proper canonical order, so that you do not have - to worry about accent rearrangement on your own. - - Form FCD, "Fast C or D", is also designed for collation. It allows - to work on strings that are not necessarily normalized with an - algorithm (like in collation) that works under "canonical closure", - i.e., it treats precomposed characters and their decomposed - equivalents the same. - - It is not a normalization form because it does not provide for - uniqueness of representation. Multiple strings may be canonically - equivalent (their NFDs are identical) and may all conform to FCD - without being identical themselves. - - The form is defined such that the "raw decomposition", the - recursive canonical decomposition of each character, results - in a string that is canonically ordered. This means that - precomposed characters are allowed for as long as their - decompositions do not need canonical reordering. - - Its advantage for a process like collation is that all NFD - and most NFC texts - and many unnormalized texts - already - conform to FCD and do not need to be normalized (NFD) for - such a process. The FCD quick check will return UNORM_YES - for most strings in practice. - - For more details on FCD see the collation design document: - http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm - - ICU collation performs either NFD or FCD normalization - automatically if normalization is turned on for the collator - object. Beyond collation and string search, normalized strings - may be useful for string equivalence comparisons, transliteration/ - transcription, unique representations, etc. - - The W3C generally recommends to exchange texts in NFC. Note also - that most legacy character encodings use only precomposed forms - and often do not encode any combining marks by themselves. For - conversion to such character encodings the Unicode text needs to - be normalized to NFC. For more usage examples, see the Unicode - Standard Annex. - - See - this page for full details. - - -*******************************************************************************/ - -class UNormalize : ICU -{ - enum Mode - { - None = 1, - NFD = 2, - NFKD = 3, - NFC = 4, - Default = NFC, - NFKC = 5, - FCD = 6, - Count - } - - enum Check - { - No, - Yes, - Maybe - } - - enum Options - { - None = 0x00, - Unicode32 = 0x20 - } - - /*********************************************************************** - - Normalize a string. The string will be normalized according - the specified normalization mode and options - - ***********************************************************************/ - - static void normalize (UStringView src, UString dst, Mode mode, Options o = Options.None) - { - uint fmt (wchar* dst, uint len, inout UErrorCode e) - { - return unorm_normalize (src.get.ptr, src.len, mode, o, dst, len, e); - } - - dst.format (&fmt, "failed to normalize"); - } - - /*********************************************************************** - - Performing quick check on a string, to quickly determine - if the string is in a particular normalization format. - - Three types of result can be returned: Yes, No or Maybe. - Result Yes indicates that the argument string is in the - desired normalized format, No determines that argument - string is not in the desired normalized format. A Maybe - result indicates that a more thorough check is required, - the user may have to put the string in its normalized - form and compare the results. - - ***********************************************************************/ - - static Check check (UStringView t, Mode mode, Options o = Options.None) - { - UErrorCode e; - - Check c = cast(Check) unorm_quickCheckWithOptions (t.get.ptr, t.len, mode, o, e); - testError (e, "failed to perform normalization check"); - return c; - } - - /*********************************************************************** - - Test if a string is in a given normalization form. - - Unlike check(), this function returns a definitive result, - never a "maybe". For NFD, NFKD, and FCD, both functions - work exactly the same. For NFC and NFKC where quickCheck - may return "maybe", this function will perform further - tests to arrive at a TRUE/FALSE result. - - ***********************************************************************/ - - static bool isNormalized (UStringView t, Mode mode, Options o = Options.None) - { - UErrorCode e; - - byte b = unorm_isNormalizedWithOptions (t.get.ptr, t.len, mode, o, e); - testError (e, "failed to perform normalization test"); - return b != 0; - } - - /*********************************************************************** - - Concatenate normalized strings, making sure that the result - is normalized as well. If both the left and the right strings - are in the normalization form according to "mode/options", - then the result will be - - dest=normalize(left+right, mode, options) - - With the input strings already being normalized, this function - will use unorm_next() and unorm_previous() to find the adjacent - end pieces of the input strings. Only the concatenation of these - end pieces will be normalized and then concatenated with the - remaining parts of the input strings. - - It is allowed to have dst==left to avoid copying the entire - left string. - - ***********************************************************************/ - - static void concatenate (UStringView left, UStringView right, UString dst, Mode mode, Options o = Options.None) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return unorm_concatenate (left.get.ptr, left.len, right.get.ptr, right.len, p, len, mode, o, e); - } - - dst.format (&fmt, "failed to concatenate"); - } - - /*********************************************************************** - - Compare two strings for canonical equivalence. Further - options include case-insensitive comparison and code - point order (as opposed to code unit order). - - Canonical equivalence between two strings is defined as - their normalized forms (NFD or NFC) being identical. - This function compares strings incrementally instead of - normalizing (and optionally case-folding) both strings - entirely, improving performance significantly. - - Bulk normalization is only necessary if the strings do - not fulfill the FCD conditions. Only in this case, and - only if the strings are relatively long, is memory - allocated temporarily. For FCD strings and short non-FCD - strings there is no memory allocation. - - ***********************************************************************/ - - static int compare (UStringView left, UStringView right, Options o = Options.None) - { - UErrorCode e; - - int i = unorm_compare (left.get.ptr, left.len, right.get.ptr, right.len, o, e); - testError (e, "failed to compare"); - return i; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - uint function (wchar*, uint, uint, uint, wchar*, uint, inout UErrorCode) unorm_normalize; - uint function (wchar*, uint, uint, uint, inout UErrorCode) unorm_quickCheckWithOptions; - byte function (wchar*, uint, uint, uint, inout UErrorCode) unorm_isNormalizedWithOptions; - uint function (wchar*, uint, wchar*, uint, wchar*, uint, uint, uint, inout UErrorCode) unorm_concatenate; - uint function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) unorm_compare; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &unorm_normalize, "unorm_normalize"}, - {cast(void**) &unorm_quickCheckWithOptions, "unorm_quickCheckWithOptions"}, - {cast(void**) &unorm_isNormalizedWithOptions, "unorm_isNormalizedWithOptions"}, - {cast(void**) &unorm_concatenate, "unorm_concatenate"}, - {cast(void**) &unorm_compare, "unorm_compare"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} +/******************************************************************************* + + @file UNormalize.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UNormalize; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UString, + dwtx.dwtxhelper.mangoicu.ULocale; + +/******************************************************************************* + + transforms Unicode text into an equivalent composed or + decomposed form, allowing for easier sorting and searching + of text. UNormalize supports the standard normalization forms + described in http://www.unicode.org/unicode/reports/tr15/ + + Characters with accents or other adornments can be encoded + in several different ways in Unicode. For example, take the + character A-acute. In Unicode, this can be encoded as a single + character (the "composed" form): + + 00C1 LATIN CAPITAL LETTER A WITH ACUTE + + or as two separate characters (the "decomposed" form): + + 0041 LATIN CAPITAL LETTER A 0301 COMBINING ACUTE ACCENT + + To a user of your program, however, both of these sequences + should be treated as the same "user-level" character "A with + acute accent". When you are searching or comparing text, you + must ensure that these two sequences are treated equivalently. + In addition, you must handle characters with more than one + accent. Sometimes the order of a character's combining accents + is significant, while in other cases accent sequences in different + orders are really equivalent. + + Similarly, the string "ffi" can be encoded as three separate + letters: + + 0066 LATIN SMALL LETTER F 0066 LATIN SMALL LETTER F + 0069 LATIN SMALL LETTER I + + or as the single character + + FB03 LATIN SMALL LIGATURE FFI + + The ffi ligature is not a distinct semantic character, and strictly + speaking it shouldn't be in Unicode at all, but it was included for + compatibility with existing character sets that already provided it. + The Unicode standard identifies such characters by giving them + "compatibility" decompositions into the corresponding semantic + characters. When sorting and searching, you will often want to use + these mappings. + + unorm_normalize helps solve these problems by transforming text into + the canonical composed and decomposed forms as shown in the first + example above. In addition, you can have it perform compatibility + decompositions so that you can treat compatibility characters the + same as their equivalents. Finally, UNormalize rearranges + accents into the proper canonical order, so that you do not have + to worry about accent rearrangement on your own. + + Form FCD, "Fast C or D", is also designed for collation. It allows + to work on strings that are not necessarily normalized with an + algorithm (like in collation) that works under "canonical closure", + i.e., it treats precomposed characters and their decomposed + equivalents the same. + + It is not a normalization form because it does not provide for + uniqueness of representation. Multiple strings may be canonically + equivalent (their NFDs are identical) and may all conform to FCD + without being identical themselves. + + The form is defined such that the "raw decomposition", the + recursive canonical decomposition of each character, results + in a string that is canonically ordered. This means that + precomposed characters are allowed for as long as their + decompositions do not need canonical reordering. + + Its advantage for a process like collation is that all NFD + and most NFC texts - and many unnormalized texts - already + conform to FCD and do not need to be normalized (NFD) for + such a process. The FCD quick check will return UNORM_YES + for most strings in practice. + + For more details on FCD see the collation design document: + http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm + + ICU collation performs either NFD or FCD normalization + automatically if normalization is turned on for the collator + object. Beyond collation and string search, normalized strings + may be useful for string equivalence comparisons, transliteration/ + transcription, unique representations, etc. + + The W3C generally recommends to exchange texts in NFC. Note also + that most legacy character encodings use only precomposed forms + and often do not encode any combining marks by themselves. For + conversion to such character encodings the Unicode text needs to + be normalized to NFC. For more usage examples, see the Unicode + Standard Annex. + + See + this page for full details. + + +*******************************************************************************/ + +class UNormalize : ICU +{ + enum Mode + { + None = 1, + NFD = 2, + NFKD = 3, + NFC = 4, + Default = NFC, + NFKC = 5, + FCD = 6, + Count + } + + enum Check + { + No, + Yes, + Maybe + } + + enum Options + { + None = 0x00, + Unicode32 = 0x20 + } + + /*********************************************************************** + + Normalize a string. The string will be normalized according + the specified normalization mode and options + + ***********************************************************************/ + + static void normalize (UStringView src, UString dst, Mode mode, Options o = Options.None) + { + uint fmt (wchar* dst, uint len, inout UErrorCode e) + { + return unorm_normalize (src.get.ptr, src.len, mode, o, dst, len, e); + } + + dst.format (&fmt, "failed to normalize"); + } + + /*********************************************************************** + + Performing quick check on a string, to quickly determine + if the string is in a particular normalization format. + + Three types of result can be returned: Yes, No or Maybe. + Result Yes indicates that the argument string is in the + desired normalized format, No determines that argument + string is not in the desired normalized format. A Maybe + result indicates that a more thorough check is required, + the user may have to put the string in its normalized + form and compare the results. + + ***********************************************************************/ + + static Check check (UStringView t, Mode mode, Options o = Options.None) + { + UErrorCode e; + + Check c = cast(Check) unorm_quickCheckWithOptions (t.get.ptr, t.len, mode, o, e); + testError (e, "failed to perform normalization check"); + return c; + } + + /*********************************************************************** + + Test if a string is in a given normalization form. + + Unlike check(), this function returns a definitive result, + never a "maybe". For NFD, NFKD, and FCD, both functions + work exactly the same. For NFC and NFKC where quickCheck + may return "maybe", this function will perform further + tests to arrive at a TRUE/FALSE result. + + ***********************************************************************/ + + static bool isNormalized (UStringView t, Mode mode, Options o = Options.None) + { + UErrorCode e; + + byte b = unorm_isNormalizedWithOptions (t.get.ptr, t.len, mode, o, e); + testError (e, "failed to perform normalization test"); + return b != 0; + } + + /*********************************************************************** + + Concatenate normalized strings, making sure that the result + is normalized as well. If both the left and the right strings + are in the normalization form according to "mode/options", + then the result will be + + dest=normalize(left+right, mode, options) + + With the input strings already being normalized, this function + will use unorm_next() and unorm_previous() to find the adjacent + end pieces of the input strings. Only the concatenation of these + end pieces will be normalized and then concatenated with the + remaining parts of the input strings. + + It is allowed to have dst==left to avoid copying the entire + left string. + + ***********************************************************************/ + + static void concatenate (UStringView left, UStringView right, UString dst, Mode mode, Options o = Options.None) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return unorm_concatenate (left.get.ptr, left.len, right.get.ptr, right.len, p, len, mode, o, e); + } + + dst.format (&fmt, "failed to concatenate"); + } + + /*********************************************************************** + + Compare two strings for canonical equivalence. Further + options include case-insensitive comparison and code + point order (as opposed to code unit order). + + Canonical equivalence between two strings is defined as + their normalized forms (NFD or NFC) being identical. + This function compares strings incrementally instead of + normalizing (and optionally case-folding) both strings + entirely, improving performance significantly. + + Bulk normalization is only necessary if the strings do + not fulfill the FCD conditions. Only in this case, and + only if the strings are relatively long, is memory + allocated temporarily. For FCD strings and short non-FCD + strings there is no memory allocation. + + ***********************************************************************/ + + static int compare (UStringView left, UStringView right, Options o = Options.None) + { + UErrorCode e; + + int i = unorm_compare (left.get.ptr, left.len, right.get.ptr, right.len, o, e); + testError (e, "failed to compare"); + return i; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + uint function (wchar*, uint, uint, uint, wchar*, uint, inout UErrorCode) unorm_normalize; + uint function (wchar*, uint, uint, uint, inout UErrorCode) unorm_quickCheckWithOptions; + byte function (wchar*, uint, uint, uint, inout UErrorCode) unorm_isNormalizedWithOptions; + uint function (wchar*, uint, wchar*, uint, wchar*, uint, uint, uint, inout UErrorCode) unorm_concatenate; + uint function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) unorm_compare; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &unorm_normalize, "unorm_normalize"}, + {cast(void**) &unorm_quickCheckWithOptions, "unorm_quickCheckWithOptions"}, + {cast(void**) &unorm_isNormalizedWithOptions, "unorm_isNormalizedWithOptions"}, + {cast(void**) &unorm_concatenate, "unorm_concatenate"}, + {cast(void**) &unorm_compare, "unorm_compare"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/URegex.d --- a/dwtx/dwtxhelper/mangoicu/URegex.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/URegex.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,700 +1,700 @@ -/******************************************************************************* - - @file URegex.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.URegex; - -private import dwtx.dwtxhelper.mangoicu.ICU; - -public import dwtx.dwtxhelper.mangoicu.ULocale, - dwtx.dwtxhelper.mangoicu.UString, - dwtx.dwtxhelper.mangoicu.UCollator, - dwtx.dwtxhelper.mangoicu.UBreakIterator; - - -/******************************************************************************* - - Set of slices to return for group matching. See URegex.groups() - -*******************************************************************************/ - -class Groups : ICU -{ - public wchar[] g0, - g1, - g2, - g3, - g4, - g5, - g6, - g7, - g8, - g9; -} - -/******************************************************************************* - - Apis for an engine that provides regular-expression searching of - UTF16 strings. - - See http://icu.sourceforge.net/apiref/icu4c/uregex_8h.html for full - details. - -*******************************************************************************/ - -class URegex : Groups -{ - private Handle handle; - private UStringView theText; - - // Regex modes - public enum Flag - { - None = 0, - - // Enable case insensitive matching - CaseInsensitive = 2, - - // Allow white space and comments within patterns - Comments = 4, - - // Control behavior of "$" and "^" If set, recognize - // line terminators within string, otherwise, match - // only at start and end of input string. - MultiLine = 8, - - // If set, '.' matches line terminators, otherwise '.' - // matching stops at line end - DotAll = 32, - - // Forces normalization of pattern and strings - CanonEq = 128, - - // If set, uses the Unicode TR 29 definition of word - // boundaries. Warning: Unicode word boundaries are - // quite different from traditional regular expression - // word boundaries. See http://unicode.org/reports/tr29/#Word_Boundaries - UWord = 256, - } - - /*********************************************************************** - - Compiles the regular expression in string form into an - internal representation using the specified match mode - flags. The resulting regular expression handle can then - be used to perform various matching operations. - - ***********************************************************************/ - - this (wchar[] pattern, Flag flags=Flag.None, ParseError* pe=null) - { - UErrorCode e; - - handle = uregex_open (pattern.ptr, pattern.length, flags, pe, e); - testError (e, "failed to open regex"); - uregex_setText (handle, "", 0, e); - } - - /*********************************************************************** - - Compiles the regular expression in string form into an - internal representation using the specified match mode - flags. The resulting regular expression handle can then - be used to perform various matching operations. - - ***********************************************************************/ - - this (UStringView pattern, Flag flags=Flag.None, ParseError* pe=null) - { - this (pattern.get, flags, pe); - } - - /*********************************************************************** - - Internal constructor; used for cloning - - ***********************************************************************/ - - private this (Handle handle) - { - UErrorCode e; - - this.handle = handle; - uregex_setText (handle, "", 0, e); - } - - /*********************************************************************** - - Close the regular expression, recovering all resources (memory) - it was holding - - ***********************************************************************/ - - ~this () - { - uregex_close (handle); - } - - /*********************************************************************** - - Cloning a regular expression is faster than opening a second - instance from the source form of the expression, and requires - less memory. - - Note that the current input string and the position of any - matched text within it are not cloned; only the pattern itself - and and the match mode flags are copied. - - Cloning can be particularly useful to threaded applications - that perform multiple match operations in parallel. Each - concurrent RE operation requires its own instance of a - URegularExpression. - - ***********************************************************************/ - - URegex clone () - { - UErrorCode e; - - Handle h = uregex_clone (handle, e); - testError (e, "failed to clone regex"); - return new URegex (h); - } - - /*********************************************************************** - - Return a copy of the source form of the pattern for this - regular expression - - ***********************************************************************/ - - UString getPattern () - { - UErrorCode e; - uint len; - - wchar* x = uregex_pattern (handle, len, e); - testError (e, "failed to extract regex pattern"); - return new UString (x[0..len]); - } - - /*********************************************************************** - - Get the match mode flags that were specified when compiling - this regular expression - - ***********************************************************************/ - - Flag getFlags () - { - UErrorCode e; - - Flag f = cast(Flag) uregex_flags (handle, e); - testError (e, "failed to get regex flags"); - return f; - } - - /*********************************************************************** - - Set the subject text string upon which the regular expression - will look for matches. - - This function may be called any number of times, allowing the - regular expression pattern to be applied to different strings. - - Regular expression matching operations work directly on the - application's string data. No copy is made. The subject string - data must not be altered after calling this function until after - all regular expression operations involving this string data are - completed. - - Zero length strings are permitted. In this case, no subsequent - match operation will dereference the text string pointer. - - ***********************************************************************/ - - void setText (UStringView t) - { - UErrorCode e; - - theText = t; - uregex_setText (handle, t.get.ptr, t.length, e); - testError (e, "failed to set regex text"); - } - - /*********************************************************************** - - Get the subject text that is currently associated with this - regular expression object. This simply returns whatever was - previously supplied via setText(). - - Note that this returns a read-only reference to the text. - - ***********************************************************************/ - - UStringView getText () - { - return theText; - } - - /*********************************************************************** - - Return a set of slices representing the parenthesised groups. - This can be used in the following manner: - - @code - wchar msg; - - if (regex.next()) - with (regex.groups()) - msg ~= g1 ~ ":" ~ g2 - @endcode - - Note that g0 represents the entire match, whereas g1 through - g9 represent the parenthesised expressions. - - ***********************************************************************/ - - Groups groups () - { - wchar[]* p = &g0; - uint count = groupCount(); - wchar[] content = theText.get(); - - if (count > 9) - count = 9; - for (uint i=0; i <= count; ++p, ++i) - *p = content [start(i)..end(i)]; - return this; - } - - /*********************************************************************** - - Extract the string for the specified matching expression or - subexpression. UString 's' is the destination for the match. - - Group #0 is the complete string of matched text. Group #1 is - the text matched by the first set of capturing parentheses. - - ***********************************************************************/ - - void group (UString s, uint index) - { - uint fmt (wchar* dst, uint length, inout UErrorCode e) - { - return uregex_group (handle, index, dst, length, e); - } - - s.format (&fmt, "failed to extract regex group text"); - } - - /*********************************************************************** - - Get the number of capturing groups in this regular - expression's pattern - - ***********************************************************************/ - - uint groupCount () - { - UErrorCode e; - - uint i = uregex_groupCount (handle, e); - testError (e, "failed to get regex group-count"); - return i; - } - - /*********************************************************************** - - Returns the index in the input string of the start of the - text matched by the specified capture group during the - previous match operation. - - Return -1 if the capture group was not part of the last - match. Group #0 refers to the complete range of matched - text. Group #1 refers to the text matched by the first - set of capturing parentheses - - ***********************************************************************/ - - uint start (uint index = 0) - { - UErrorCode e; - - uint i = uregex_start (handle, index, e); - testError (e, "failed to get regex start"); - return i; - } - - /*********************************************************************** - - Returns the index in the input string of the position - following the end of the text matched by the specified - capture group. - - Return -1 if the capture group was not part of the last - match. Group #0 refers to the complete range of matched - text. Group #1 refers to the text matched by the first - set of capturing parentheses. - - ***********************************************************************/ - - uint end (uint index = 0) - { - UErrorCode e; - - uint i = uregex_end (handle, index, e); - testError (e, "failed to get regex end"); - return i; - } - - /*********************************************************************** - - Reset any saved state from the previous match. - - Has the effect of causing uregex_findNext to begin at the - specified index, and causing uregex_start(), uregex_end() - and uregex_group() to return an error indicating that there - is no match information available. - - ***********************************************************************/ - - void reset (uint startIndex) - { - UErrorCode e; - - uregex_reset (handle, startIndex, e); - testError (e, "failed to set regex next-index"); - } - - /*********************************************************************** - - Attempts to match the input string, beginning at startIndex, - against the pattern. - - To succeed, the match must extend to the end of the input - string - - ***********************************************************************/ - - bool match (uint startIndex) - { - UErrorCode e; - - bool b = uregex_matches (handle, startIndex, e); - testError (e, "failed while matching regex"); - return b; - } - - /*********************************************************************** - - Attempts to match the input string, starting from the - specified index, against the pattern. - - The match may be of any length, and is not required to - extend to the end of the input string. Contrast with match() - - ***********************************************************************/ - - bool probe (uint startIndex) - { - UErrorCode e; - - bool b = uregex_lookingAt (handle, startIndex, e); - testError (e, "failed while looking at regex"); - return b; - } - - /*********************************************************************** - - Returns whether the text matches the search pattern, starting - from the current position. - - If startIndex is specified, the current position is moved to - the specified location before the seach is initiated. - - ***********************************************************************/ - - bool next (uint startIndex = uint.max) - { - UErrorCode e; - bool b; - - b = (startIndex == uint.max) ? uregex_findNext (handle, e) : - uregex_find (handle, startIndex, e); - - testError (e, "failed on next regex"); - return b; - } - - /*********************************************************************** - - Replaces every substring of the input that matches the pattern - with the given replacement string. - - This is a convenience function that provides a complete - find-and-replace-all operation. - - This method scans the input string looking for matches of - the pattern. Input that is not part of any match is copied - unchanged to the destination buffer. Matched regions are - replaced in the output buffer by the replacement string. - The replacement string may contain references to capture - groups; these take the form of $1, $2, etc. - - The provided 'result' will contain the results, and should - be set with a length sufficient to house the entire result. - Upon completion, the 'result' is shortened appropriately - and the total extent (length) of the operation is returned. - Set the initital length of 'result' using the UString method - truncate(). - - The returned extent should be checked to ensure it is not - longer than the length of 'result'. If it is longer, then - the result has been truncated. - - ***********************************************************************/ - - uint replaceAll (UStringView replace, UString result) - { - UErrorCode e; - - uint len = uregex_replaceAll (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); - testError (e, "failed during regex replace"); - result.truncate (len); - return len; - } - - /*********************************************************************** - - Replaces the first substring of the input that matches the - pattern with the given replacement string. - - This is a convenience function that provides a complete - find-and-replace operation. - - This method scans the input string looking for a match of - the pattern. All input that is not part of the match is - copied unchanged to the destination buffer. The matched - region is replaced in the output buffer by the replacement - string. The replacement string may contain references to - capture groups; these take the form of $1, $2, etc - - The provided 'result' will contain the results, and should - be set with a length sufficient to house the entire result. - Upon completion, the 'result' is shortened appropriately - and the total extent (length) of the operation is returned. - Set the initital length of 'result' using the UString method - truncate(). - - The returned extent should be checked to ensure it is not - longer than the length of 'result'. If it is longer, then - the result has been truncated. - - ***********************************************************************/ - - uint replaceFirst (UStringView replace, UString result) - { - UErrorCode e; - - uint len = uregex_replaceFirst (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); - testError (e, "failed during regex replace"); - result.truncate (len); - return len; - } - - /*********************************************************************** - - Split the text up into slices (fields), where each slice - represents the text situated between each pattern matched - within the text. The pattern is expected to represent one - or more slice delimiters. - - ***********************************************************************/ - - uint split (wchar[][] fields) - { - UErrorCode e; - uint pos, - count; - wchar[] content = theText.get; - - while (count < fields.length) - if (uregex_findNext (handle, e) && e == e.OK) - { - uint i = start(); - fields[count] = content[pos..i]; - pos = end (); - - // ignore leading delimiter - if (i) - ++count; - } - else - break; - - testError (e, "failed during split"); - return count; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, uint, ParseError*, inout UErrorCode) uregex_open; - void function (Handle) uregex_close; - Handle function (Handle, inout UErrorCode) uregex_clone; - wchar* function (Handle, inout uint, inout UErrorCode) uregex_pattern; - uint function (Handle, inout UErrorCode) uregex_flags; - void function (Handle, wchar*, uint, inout UErrorCode) uregex_setText; - wchar* function (Handle, inout uint, inout UErrorCode) uregex_getText; - uint function (Handle, uint, wchar*, uint, inout UErrorCode) uregex_group; - uint function (Handle, inout UErrorCode) uregex_groupCount; - uint function (Handle, uint, inout UErrorCode) uregex_start; - uint function (Handle, uint, inout UErrorCode) uregex_end; - void function (Handle, uint, inout UErrorCode) uregex_reset; - bool function (Handle, uint, inout UErrorCode) uregex_matches; - bool function (Handle, uint, inout UErrorCode) uregex_lookingAt; - bool function (Handle, uint, inout UErrorCode) uregex_find; - bool function (Handle, inout UErrorCode) uregex_findNext; - uint function (Handle, wchar*, uint, wchar*, uint, inout UErrorCode) uregex_replaceAll; - uint function (Handle, wchar*, uint, wchar*, uint, inout UErrorCode) uregex_replaceFirst; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uregex_open, "uregex_open"}, - {cast(void**) &uregex_close, "uregex_close"}, - {cast(void**) &uregex_clone, "uregex_clone"}, - {cast(void**) &uregex_pattern, "uregex_pattern"}, - {cast(void**) &uregex_flags, "uregex_flags"}, - {cast(void**) &uregex_setText, "uregex_setText"}, - {cast(void**) &uregex_getText, "uregex_getText"}, - {cast(void**) &uregex_group, "uregex_group"}, - {cast(void**) &uregex_groupCount, "uregex_groupCount"}, - {cast(void**) &uregex_start, "uregex_start"}, - {cast(void**) &uregex_end, "uregex_end"}, - {cast(void**) &uregex_reset, "uregex_reset"}, - {cast(void**) &uregex_matches, "uregex_matches"}, - {cast(void**) &uregex_lookingAt, "uregex_lookingAt"}, - {cast(void**) &uregex_find, "uregex_find"}, - {cast(void**) &uregex_findNext, "uregex_findNext"}, - {cast(void**) &uregex_replaceAll, "uregex_replaceAll"}, - {cast(void**) &uregex_replaceFirst, "uregex_replaceFirst"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} +/******************************************************************************* + + @file URegex.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.URegex; + +private import dwtx.dwtxhelper.mangoicu.ICU; + +public import dwtx.dwtxhelper.mangoicu.ULocale, + dwtx.dwtxhelper.mangoicu.UString, + dwtx.dwtxhelper.mangoicu.UCollator, + dwtx.dwtxhelper.mangoicu.UBreakIterator; + + +/******************************************************************************* + + Set of slices to return for group matching. See URegex.groups() + +*******************************************************************************/ + +class Groups : ICU +{ + public wchar[] g0, + g1, + g2, + g3, + g4, + g5, + g6, + g7, + g8, + g9; +} + +/******************************************************************************* + + Apis for an engine that provides regular-expression searching of + UTF16 strings. + + See http://icu.sourceforge.net/apiref/icu4c/uregex_8h.html for full + details. + +*******************************************************************************/ + +class URegex : Groups +{ + private Handle handle; + private UStringView theText; + + // Regex modes + public enum Flag + { + None = 0, + + // Enable case insensitive matching + CaseInsensitive = 2, + + // Allow white space and comments within patterns + Comments = 4, + + // Control behavior of "$" and "^" If set, recognize + // line terminators within string, otherwise, match + // only at start and end of input string. + MultiLine = 8, + + // If set, '.' matches line terminators, otherwise '.' + // matching stops at line end + DotAll = 32, + + // Forces normalization of pattern and strings + CanonEq = 128, + + // If set, uses the Unicode TR 29 definition of word + // boundaries. Warning: Unicode word boundaries are + // quite different from traditional regular expression + // word boundaries. See http://unicode.org/reports/tr29/#Word_Boundaries + UWord = 256, + } + + /*********************************************************************** + + Compiles the regular expression in string form into an + internal representation using the specified match mode + flags. The resulting regular expression handle can then + be used to perform various matching operations. + + ***********************************************************************/ + + this (wchar[] pattern, Flag flags=Flag.None, ParseError* pe=null) + { + UErrorCode e; + + handle = uregex_open (pattern.ptr, pattern.length, flags, pe, e); + testError (e, "failed to open regex"); + uregex_setText (handle, "", 0, e); + } + + /*********************************************************************** + + Compiles the regular expression in string form into an + internal representation using the specified match mode + flags. The resulting regular expression handle can then + be used to perform various matching operations. + + ***********************************************************************/ + + this (UStringView pattern, Flag flags=Flag.None, ParseError* pe=null) + { + this (pattern.get, flags, pe); + } + + /*********************************************************************** + + Internal constructor; used for cloning + + ***********************************************************************/ + + private this (Handle handle) + { + UErrorCode e; + + this.handle = handle; + uregex_setText (handle, "", 0, e); + } + + /*********************************************************************** + + Close the regular expression, recovering all resources (memory) + it was holding + + ***********************************************************************/ + + ~this () + { + uregex_close (handle); + } + + /*********************************************************************** + + Cloning a regular expression is faster than opening a second + instance from the source form of the expression, and requires + less memory. + + Note that the current input string and the position of any + matched text within it are not cloned; only the pattern itself + and and the match mode flags are copied. + + Cloning can be particularly useful to threaded applications + that perform multiple match operations in parallel. Each + concurrent RE operation requires its own instance of a + URegularExpression. + + ***********************************************************************/ + + URegex clone () + { + UErrorCode e; + + Handle h = uregex_clone (handle, e); + testError (e, "failed to clone regex"); + return new URegex (h); + } + + /*********************************************************************** + + Return a copy of the source form of the pattern for this + regular expression + + ***********************************************************************/ + + UString getPattern () + { + UErrorCode e; + uint len; + + wchar* x = uregex_pattern (handle, len, e); + testError (e, "failed to extract regex pattern"); + return new UString (x[0..len]); + } + + /*********************************************************************** + + Get the match mode flags that were specified when compiling + this regular expression + + ***********************************************************************/ + + Flag getFlags () + { + UErrorCode e; + + Flag f = cast(Flag) uregex_flags (handle, e); + testError (e, "failed to get regex flags"); + return f; + } + + /*********************************************************************** + + Set the subject text string upon which the regular expression + will look for matches. + + This function may be called any number of times, allowing the + regular expression pattern to be applied to different strings. + + Regular expression matching operations work directly on the + application's string data. No copy is made. The subject string + data must not be altered after calling this function until after + all regular expression operations involving this string data are + completed. + + Zero length strings are permitted. In this case, no subsequent + match operation will dereference the text string pointer. + + ***********************************************************************/ + + void setText (UStringView t) + { + UErrorCode e; + + theText = t; + uregex_setText (handle, t.get.ptr, t.length, e); + testError (e, "failed to set regex text"); + } + + /*********************************************************************** + + Get the subject text that is currently associated with this + regular expression object. This simply returns whatever was + previously supplied via setText(). + + Note that this returns a read-only reference to the text. + + ***********************************************************************/ + + UStringView getText () + { + return theText; + } + + /*********************************************************************** + + Return a set of slices representing the parenthesised groups. + This can be used in the following manner: + + @code + wchar msg; + + if (regex.next()) + with (regex.groups()) + msg ~= g1 ~ ":" ~ g2 + @endcode + + Note that g0 represents the entire match, whereas g1 through + g9 represent the parenthesised expressions. + + ***********************************************************************/ + + Groups groups () + { + wchar[]* p = &g0; + uint count = groupCount(); + wchar[] content = theText.get(); + + if (count > 9) + count = 9; + for (uint i=0; i <= count; ++p, ++i) + *p = content [start(i)..end(i)]; + return this; + } + + /*********************************************************************** + + Extract the string for the specified matching expression or + subexpression. UString 's' is the destination for the match. + + Group #0 is the complete string of matched text. Group #1 is + the text matched by the first set of capturing parentheses. + + ***********************************************************************/ + + void group (UString s, uint index) + { + uint fmt (wchar* dst, uint length, inout UErrorCode e) + { + return uregex_group (handle, index, dst, length, e); + } + + s.format (&fmt, "failed to extract regex group text"); + } + + /*********************************************************************** + + Get the number of capturing groups in this regular + expression's pattern + + ***********************************************************************/ + + uint groupCount () + { + UErrorCode e; + + uint i = uregex_groupCount (handle, e); + testError (e, "failed to get regex group-count"); + return i; + } + + /*********************************************************************** + + Returns the index in the input string of the start of the + text matched by the specified capture group during the + previous match operation. + + Return -1 if the capture group was not part of the last + match. Group #0 refers to the complete range of matched + text. Group #1 refers to the text matched by the first + set of capturing parentheses + + ***********************************************************************/ + + uint start (uint index = 0) + { + UErrorCode e; + + uint i = uregex_start (handle, index, e); + testError (e, "failed to get regex start"); + return i; + } + + /*********************************************************************** + + Returns the index in the input string of the position + following the end of the text matched by the specified + capture group. + + Return -1 if the capture group was not part of the last + match. Group #0 refers to the complete range of matched + text. Group #1 refers to the text matched by the first + set of capturing parentheses. + + ***********************************************************************/ + + uint end (uint index = 0) + { + UErrorCode e; + + uint i = uregex_end (handle, index, e); + testError (e, "failed to get regex end"); + return i; + } + + /*********************************************************************** + + Reset any saved state from the previous match. + + Has the effect of causing uregex_findNext to begin at the + specified index, and causing uregex_start(), uregex_end() + and uregex_group() to return an error indicating that there + is no match information available. + + ***********************************************************************/ + + void reset (uint startIndex) + { + UErrorCode e; + + uregex_reset (handle, startIndex, e); + testError (e, "failed to set regex next-index"); + } + + /*********************************************************************** + + Attempts to match the input string, beginning at startIndex, + against the pattern. + + To succeed, the match must extend to the end of the input + string + + ***********************************************************************/ + + bool match (uint startIndex) + { + UErrorCode e; + + bool b = uregex_matches (handle, startIndex, e); + testError (e, "failed while matching regex"); + return b; + } + + /*********************************************************************** + + Attempts to match the input string, starting from the + specified index, against the pattern. + + The match may be of any length, and is not required to + extend to the end of the input string. Contrast with match() + + ***********************************************************************/ + + bool probe (uint startIndex) + { + UErrorCode e; + + bool b = uregex_lookingAt (handle, startIndex, e); + testError (e, "failed while looking at regex"); + return b; + } + + /*********************************************************************** + + Returns whether the text matches the search pattern, starting + from the current position. + + If startIndex is specified, the current position is moved to + the specified location before the seach is initiated. + + ***********************************************************************/ + + bool next (uint startIndex = uint.max) + { + UErrorCode e; + bool b; + + b = (startIndex == uint.max) ? uregex_findNext (handle, e) : + uregex_find (handle, startIndex, e); + + testError (e, "failed on next regex"); + return b; + } + + /*********************************************************************** + + Replaces every substring of the input that matches the pattern + with the given replacement string. + + This is a convenience function that provides a complete + find-and-replace-all operation. + + This method scans the input string looking for matches of + the pattern. Input that is not part of any match is copied + unchanged to the destination buffer. Matched regions are + replaced in the output buffer by the replacement string. + The replacement string may contain references to capture + groups; these take the form of $1, $2, etc. + + The provided 'result' will contain the results, and should + be set with a length sufficient to house the entire result. + Upon completion, the 'result' is shortened appropriately + and the total extent (length) of the operation is returned. + Set the initital length of 'result' using the UString method + truncate(). + + The returned extent should be checked to ensure it is not + longer than the length of 'result'. If it is longer, then + the result has been truncated. + + ***********************************************************************/ + + uint replaceAll (UStringView replace, UString result) + { + UErrorCode e; + + uint len = uregex_replaceAll (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); + testError (e, "failed during regex replace"); + result.truncate (len); + return len; + } + + /*********************************************************************** + + Replaces the first substring of the input that matches the + pattern with the given replacement string. + + This is a convenience function that provides a complete + find-and-replace operation. + + This method scans the input string looking for a match of + the pattern. All input that is not part of the match is + copied unchanged to the destination buffer. The matched + region is replaced in the output buffer by the replacement + string. The replacement string may contain references to + capture groups; these take the form of $1, $2, etc + + The provided 'result' will contain the results, and should + be set with a length sufficient to house the entire result. + Upon completion, the 'result' is shortened appropriately + and the total extent (length) of the operation is returned. + Set the initital length of 'result' using the UString method + truncate(). + + The returned extent should be checked to ensure it is not + longer than the length of 'result'. If it is longer, then + the result has been truncated. + + ***********************************************************************/ + + uint replaceFirst (UStringView replace, UString result) + { + UErrorCode e; + + uint len = uregex_replaceFirst (handle, replace.get.ptr, replace.length, result.get.ptr, result.length, e); + testError (e, "failed during regex replace"); + result.truncate (len); + return len; + } + + /*********************************************************************** + + Split the text up into slices (fields), where each slice + represents the text situated between each pattern matched + within the text. The pattern is expected to represent one + or more slice delimiters. + + ***********************************************************************/ + + uint split (wchar[][] fields) + { + UErrorCode e; + uint pos, + count; + wchar[] content = theText.get; + + while (count < fields.length) + if (uregex_findNext (handle, e) && e == e.OK) + { + uint i = start(); + fields[count] = content[pos..i]; + pos = end (); + + // ignore leading delimiter + if (i) + ++count; + } + else + break; + + testError (e, "failed during split"); + return count; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, uint, ParseError*, inout UErrorCode) uregex_open; + void function (Handle) uregex_close; + Handle function (Handle, inout UErrorCode) uregex_clone; + wchar* function (Handle, inout uint, inout UErrorCode) uregex_pattern; + uint function (Handle, inout UErrorCode) uregex_flags; + void function (Handle, wchar*, uint, inout UErrorCode) uregex_setText; + wchar* function (Handle, inout uint, inout UErrorCode) uregex_getText; + uint function (Handle, uint, wchar*, uint, inout UErrorCode) uregex_group; + uint function (Handle, inout UErrorCode) uregex_groupCount; + uint function (Handle, uint, inout UErrorCode) uregex_start; + uint function (Handle, uint, inout UErrorCode) uregex_end; + void function (Handle, uint, inout UErrorCode) uregex_reset; + bool function (Handle, uint, inout UErrorCode) uregex_matches; + bool function (Handle, uint, inout UErrorCode) uregex_lookingAt; + bool function (Handle, uint, inout UErrorCode) uregex_find; + bool function (Handle, inout UErrorCode) uregex_findNext; + uint function (Handle, wchar*, uint, wchar*, uint, inout UErrorCode) uregex_replaceAll; + uint function (Handle, wchar*, uint, wchar*, uint, inout UErrorCode) uregex_replaceFirst; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uregex_open, "uregex_open"}, + {cast(void**) &uregex_close, "uregex_close"}, + {cast(void**) &uregex_clone, "uregex_clone"}, + {cast(void**) &uregex_pattern, "uregex_pattern"}, + {cast(void**) &uregex_flags, "uregex_flags"}, + {cast(void**) &uregex_setText, "uregex_setText"}, + {cast(void**) &uregex_getText, "uregex_getText"}, + {cast(void**) &uregex_group, "uregex_group"}, + {cast(void**) &uregex_groupCount, "uregex_groupCount"}, + {cast(void**) &uregex_start, "uregex_start"}, + {cast(void**) &uregex_end, "uregex_end"}, + {cast(void**) &uregex_reset, "uregex_reset"}, + {cast(void**) &uregex_matches, "uregex_matches"}, + {cast(void**) &uregex_lookingAt, "uregex_lookingAt"}, + {cast(void**) &uregex_find, "uregex_find"}, + {cast(void**) &uregex_findNext, "uregex_findNext"}, + {cast(void**) &uregex_replaceAll, "uregex_replaceAll"}, + {cast(void**) &uregex_replaceFirst, "uregex_replaceFirst"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UResourceBundle.d --- a/dwtx/dwtxhelper/mangoicu/UResourceBundle.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UResourceBundle.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,544 +1,544 @@ -/******************************************************************************* - - @file UResourceBundle.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UResourceBundle; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UString; - -public import dwtx.dwtxhelper.mangoicu.ULocale; - -/******************************************************************************* - - API representing a collection of resource information pertaining to - a given locale. A resource bundle provides a way of accessing locale- - specific information in a data file. You create a resource bundle that - manages the resources for a given locale and then ask it for individual - resources. - - Resource bundles in ICU4C are currently defined using text files which - conform to the following BNF definition. More on resource bundle concepts - and syntax can be found in the Users Guide. - - See - this page for full details. - -*******************************************************************************/ - -class UResourceBundle : ICU -{ - private Handle handle; - - /*********************************************************************** - - Internals opened up to the public - - ***********************************************************************/ - - // Numeric constants for types of resource items - public enum ResType - { - None = -1, - String = 0, - Binary = 1, - Table = 2, - Alias = 3, - Int = 7, - Array = 8, - IntVector = 14 - } - - /*********************************************************************** - - private constructor for internal use only - - ***********************************************************************/ - - private this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Constructs a resource bundle for the locale-specific bundle - in the specified path. - - locale This is the locale this resource bundle is for. To - get resources for the French locale, for example, you - would create a ResourceBundle passing ULocale::FRENCH - for the "locale" parameter, and all subsequent calls - to that resource bundle will return resources that - pertain to the French locale. If the caller passes a - Locale.Default parameter, the default locale for the - system (as returned by ULocale.getDefault()) will be - used. Passing Locale.Root will cause the root-locale - to be used. - - path This is a full pathname in the platform-specific - format for the directory containing the resource - data files we want to load resources from. We use - locale IDs to generate filenames, and the filenames - have this string prepended to them before being passed - to the C++ I/O functions. Therefore, this string must - always end with a directory delimiter (whatever that - is for the target OS) for this class to work correctly. - A null value will open the default ICU data-files - - ***********************************************************************/ - - this (inout ULocale locale, char[] path = null) - { - UErrorCode e; - - handle = ures_open (toString(path), toString(locale.name), e); - testError (e, "failed to open resource bundle"); - } - - /*********************************************************************** - - ***********************************************************************/ - - ~this () - { - ures_close (handle); - } - - /*********************************************************************** - - Returns the size of a resource. Size for scalar types is - always 1, and for vector/table types is the number of child - resources. - - ***********************************************************************/ - - uint getSize () - { - return ures_getSize (handle); - } - - /*********************************************************************** - - Returns a signed integer from a resource. This integer is - originally 28 bit and the sign gets propagated. - - ***********************************************************************/ - - int getInt () - { - UErrorCode e; - - int x = ures_getInt (handle, e); - testError (e, "failed to get resource integer"); - return x; - } - - /*********************************************************************** - - Returns a string from a string resource type - - ***********************************************************************/ - - UStringView getString () - { - UErrorCode e; - uint len; - - wchar* x = ures_getString (handle, len, e); - testError (e, "failed to get resource string"); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Returns the string in a given resource at the specified - index - - ***********************************************************************/ - - UStringView getString (uint index) - { - UErrorCode e; - uint len; - - wchar* x = ures_getStringByIndex (handle, index, len, e); - testError (e, "failed to get resource string"); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Returns a string in a resource that has a given key. This - procedure works only with table resources. - - ***********************************************************************/ - - UStringView getString (char[] key) - { - UErrorCode e; - uint len; - - wchar* x = ures_getStringByKey (handle, toString(key), len, e); - testError (e, "failed to get resource string"); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Returns the next string in a resource or NULL if there are - no more resources to iterate over - - ***********************************************************************/ - - UStringView getNextString () - { - UErrorCode e; - uint len; - char* key; - - wchar* x = ures_getNextString (handle, len, key, e); - testError (e, "failed to get next resource string"); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Returns a binary data from a resource. Can be used at most - primitive resource types (binaries, strings, ints) - - ***********************************************************************/ - - void[] getBinary () - { - UErrorCode e; - uint len; - - void* x = ures_getBinary (handle, len, e); - testError (e, "failed to get binary resource"); - return x[0..len]; - } - - /*********************************************************************** - - Returns an integer vector from a resource - - ***********************************************************************/ - - int[] getIntVector () - { - UErrorCode e; - uint len; - - int* x = ures_getIntVector (handle, len, e); - testError (e, "failed to get vector resource"); - return x[0..len]; - } - - /*********************************************************************** - - Checks whether the resource has another element to - iterate over - - ***********************************************************************/ - - bool hasNext () - { - return ures_hasNext (handle) != 0; - } - - /*********************************************************************** - - Resets the internal context of a resource so that - iteration starts from the first element - - ***********************************************************************/ - - void resetIterator () - { - ures_resetIterator (handle); - } - - /*********************************************************************** - - Returns the next resource in a given resource or NULL if - there are no more resources - - ***********************************************************************/ - - UResourceBundle getNextResource () - { - UErrorCode e; - - return get (ures_getNextResource (handle, null, e), e); - } - - /*********************************************************************** - - Returns a resource that has a given key. This procedure - works only with table resources. - - ***********************************************************************/ - - UResourceBundle getResource (char[] key) - { - UErrorCode e; - - return get (ures_getByKey (handle, toString(key), null, e), e); - } - - /*********************************************************************** - - Returns the resource at the specified index - - ***********************************************************************/ - - UResourceBundle getResource (uint index) - { - UErrorCode e; - - return get (ures_getByIndex (handle, index, null, e), e); - } - - /*********************************************************************** - - Return the version number associated with this ResourceBundle - as a UVersionInfo array - - ***********************************************************************/ - - void getVersion (inout Version info) - { - ures_getVersion (handle, info); - } - - /*********************************************************************** - - Return the ULocale associated with this ResourceBundle - - ***********************************************************************/ - - void getLocale (inout ULocale locale) - { - UErrorCode e; - - locale.name = toArray (ures_getLocale (handle, e)); - testError (e, "failed to get resource locale"); - } - - /*********************************************************************** - - Returns the key associated with this resource. Not all - the resources have a key - only those that are members - of a table. - - ***********************************************************************/ - - char[] getKey () - { - return toArray (ures_getKey (handle)); - } - - /*********************************************************************** - - Returns the type of a resource. Available types are - defined in enum UResType - - ***********************************************************************/ - - ResType getType () - { - return cast(ResType) ures_getType (handle); - } - - /*********************************************************************** - - Worker function for constructing internal ResourceBundle - instances. Returns null when the provided handle is null. - - ***********************************************************************/ - - private static final UResourceBundle get (Handle handle, inout UErrorCode e) - { - testError (e, "failed to create resource bundle"); - if (handle) - return new UResourceBundle (handle); - return null; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (char*, char*, inout UErrorCode) ures_open; - void function (Handle) ures_close; - char* function (Handle, inout UErrorCode) ures_getLocale; - void function (Handle, inout Version) ures_getVersion; - uint function (Handle) ures_getSize; - int function (Handle, inout UErrorCode) ures_getInt; - wchar* function (Handle, inout uint, inout UErrorCode) ures_getString; - wchar* function (Handle, uint, inout uint, inout UErrorCode) ures_getStringByIndex; - wchar* function (Handle, char*, inout uint, inout UErrorCode) ures_getStringByKey; - void* function (Handle, inout uint, inout UErrorCode) ures_getBinary; - int* function (Handle, inout uint, inout UErrorCode) ures_getIntVector; - byte function (Handle) ures_hasNext; - void function (Handle) ures_resetIterator; - wchar* function (Handle, inout uint, inout char*, inout UErrorCode) ures_getNextString; - char* function (Handle) ures_getKey; - int function (Handle) ures_getType; - Handle function (Handle, Handle, inout UErrorCode) ures_getNextResource; - Handle function (Handle, uint, Handle, inout UErrorCode) ures_getByIndex; - Handle function (Handle, char*, Handle, inout UErrorCode) ures_getByKey; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ures_open, "ures_open"}, - {cast(void**) &ures_close, "ures_close"}, - {cast(void**) &ures_getLocale, "ures_getLocale"}, - {cast(void**) &ures_getVersion, "ures_getVersion"}, - {cast(void**) &ures_getSize, "ures_getSize"}, - {cast(void**) &ures_getInt, "ures_getInt"}, - {cast(void**) &ures_getString, "ures_getString"}, - {cast(void**) &ures_getStringByIndex, "ures_getStringByIndex"}, - {cast(void**) &ures_getStringByKey, "ures_getStringByKey"}, - {cast(void**) &ures_getBinary, "ures_getBinary"}, - {cast(void**) &ures_hasNext, "ures_hasNext"}, - {cast(void**) &ures_resetIterator, "ures_resetIterator"}, - {cast(void**) &ures_getNextString, "ures_getNextString"}, - {cast(void**) &ures_getKey, "ures_getKey"}, - {cast(void**) &ures_getType, "ures_getType"}, - {cast(void**) &ures_getNextResource, "ures_getNextResource"}, - {cast(void**) &ures_getByIndex, "ures_getByIndex"}, - {cast(void**) &ures_getByKey, "ures_getByKey"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - //test (); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } - - /*********************************************************************** - - ***********************************************************************/ - - static void test() - { - UResourceBundle b = new UResourceBundle (ULocale.Default); - UStringView t = b.getNextString(); - UResourceBundle b1 = b.getNextResource (); - } -} - - +/******************************************************************************* + + @file UResourceBundle.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UResourceBundle; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UString; + +public import dwtx.dwtxhelper.mangoicu.ULocale; + +/******************************************************************************* + + API representing a collection of resource information pertaining to + a given locale. A resource bundle provides a way of accessing locale- + specific information in a data file. You create a resource bundle that + manages the resources for a given locale and then ask it for individual + resources. + + Resource bundles in ICU4C are currently defined using text files which + conform to the following BNF definition. More on resource bundle concepts + and syntax can be found in the Users Guide. + + See + this page for full details. + +*******************************************************************************/ + +class UResourceBundle : ICU +{ + private Handle handle; + + /*********************************************************************** + + Internals opened up to the public + + ***********************************************************************/ + + // Numeric constants for types of resource items + public enum ResType + { + None = -1, + String = 0, + Binary = 1, + Table = 2, + Alias = 3, + Int = 7, + Array = 8, + IntVector = 14 + } + + /*********************************************************************** + + private constructor for internal use only + + ***********************************************************************/ + + private this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Constructs a resource bundle for the locale-specific bundle + in the specified path. + + locale This is the locale this resource bundle is for. To + get resources for the French locale, for example, you + would create a ResourceBundle passing ULocale::FRENCH + for the "locale" parameter, and all subsequent calls + to that resource bundle will return resources that + pertain to the French locale. If the caller passes a + Locale.Default parameter, the default locale for the + system (as returned by ULocale.getDefault()) will be + used. Passing Locale.Root will cause the root-locale + to be used. + + path This is a full pathname in the platform-specific + format for the directory containing the resource + data files we want to load resources from. We use + locale IDs to generate filenames, and the filenames + have this string prepended to them before being passed + to the C++ I/O functions. Therefore, this string must + always end with a directory delimiter (whatever that + is for the target OS) for this class to work correctly. + A null value will open the default ICU data-files + + ***********************************************************************/ + + this (inout ULocale locale, char[] path = null) + { + UErrorCode e; + + handle = ures_open (toString(path), toString(locale.name), e); + testError (e, "failed to open resource bundle"); + } + + /*********************************************************************** + + ***********************************************************************/ + + ~this () + { + ures_close (handle); + } + + /*********************************************************************** + + Returns the size of a resource. Size for scalar types is + always 1, and for vector/table types is the number of child + resources. + + ***********************************************************************/ + + uint getSize () + { + return ures_getSize (handle); + } + + /*********************************************************************** + + Returns a signed integer from a resource. This integer is + originally 28 bit and the sign gets propagated. + + ***********************************************************************/ + + int getInt () + { + UErrorCode e; + + int x = ures_getInt (handle, e); + testError (e, "failed to get resource integer"); + return x; + } + + /*********************************************************************** + + Returns a string from a string resource type + + ***********************************************************************/ + + UStringView getString () + { + UErrorCode e; + uint len; + + wchar* x = ures_getString (handle, len, e); + testError (e, "failed to get resource string"); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Returns the string in a given resource at the specified + index + + ***********************************************************************/ + + UStringView getString (uint index) + { + UErrorCode e; + uint len; + + wchar* x = ures_getStringByIndex (handle, index, len, e); + testError (e, "failed to get resource string"); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Returns a string in a resource that has a given key. This + procedure works only with table resources. + + ***********************************************************************/ + + UStringView getString (char[] key) + { + UErrorCode e; + uint len; + + wchar* x = ures_getStringByKey (handle, toString(key), len, e); + testError (e, "failed to get resource string"); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Returns the next string in a resource or NULL if there are + no more resources to iterate over + + ***********************************************************************/ + + UStringView getNextString () + { + UErrorCode e; + uint len; + char* key; + + wchar* x = ures_getNextString (handle, len, key, e); + testError (e, "failed to get next resource string"); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Returns a binary data from a resource. Can be used at most + primitive resource types (binaries, strings, ints) + + ***********************************************************************/ + + void[] getBinary () + { + UErrorCode e; + uint len; + + void* x = ures_getBinary (handle, len, e); + testError (e, "failed to get binary resource"); + return x[0..len]; + } + + /*********************************************************************** + + Returns an integer vector from a resource + + ***********************************************************************/ + + int[] getIntVector () + { + UErrorCode e; + uint len; + + int* x = ures_getIntVector (handle, len, e); + testError (e, "failed to get vector resource"); + return x[0..len]; + } + + /*********************************************************************** + + Checks whether the resource has another element to + iterate over + + ***********************************************************************/ + + bool hasNext () + { + return ures_hasNext (handle) != 0; + } + + /*********************************************************************** + + Resets the internal context of a resource so that + iteration starts from the first element + + ***********************************************************************/ + + void resetIterator () + { + ures_resetIterator (handle); + } + + /*********************************************************************** + + Returns the next resource in a given resource or NULL if + there are no more resources + + ***********************************************************************/ + + UResourceBundle getNextResource () + { + UErrorCode e; + + return get (ures_getNextResource (handle, null, e), e); + } + + /*********************************************************************** + + Returns a resource that has a given key. This procedure + works only with table resources. + + ***********************************************************************/ + + UResourceBundle getResource (char[] key) + { + UErrorCode e; + + return get (ures_getByKey (handle, toString(key), null, e), e); + } + + /*********************************************************************** + + Returns the resource at the specified index + + ***********************************************************************/ + + UResourceBundle getResource (uint index) + { + UErrorCode e; + + return get (ures_getByIndex (handle, index, null, e), e); + } + + /*********************************************************************** + + Return the version number associated with this ResourceBundle + as a UVersionInfo array + + ***********************************************************************/ + + void getVersion (inout Version info) + { + ures_getVersion (handle, info); + } + + /*********************************************************************** + + Return the ULocale associated with this ResourceBundle + + ***********************************************************************/ + + void getLocale (inout ULocale locale) + { + UErrorCode e; + + locale.name = toArray (ures_getLocale (handle, e)); + testError (e, "failed to get resource locale"); + } + + /*********************************************************************** + + Returns the key associated with this resource. Not all + the resources have a key - only those that are members + of a table. + + ***********************************************************************/ + + char[] getKey () + { + return toArray (ures_getKey (handle)); + } + + /*********************************************************************** + + Returns the type of a resource. Available types are + defined in enum UResType + + ***********************************************************************/ + + ResType getType () + { + return cast(ResType) ures_getType (handle); + } + + /*********************************************************************** + + Worker function for constructing internal ResourceBundle + instances. Returns null when the provided handle is null. + + ***********************************************************************/ + + private static final UResourceBundle get (Handle handle, inout UErrorCode e) + { + testError (e, "failed to create resource bundle"); + if (handle) + return new UResourceBundle (handle); + return null; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (char*, char*, inout UErrorCode) ures_open; + void function (Handle) ures_close; + char* function (Handle, inout UErrorCode) ures_getLocale; + void function (Handle, inout Version) ures_getVersion; + uint function (Handle) ures_getSize; + int function (Handle, inout UErrorCode) ures_getInt; + wchar* function (Handle, inout uint, inout UErrorCode) ures_getString; + wchar* function (Handle, uint, inout uint, inout UErrorCode) ures_getStringByIndex; + wchar* function (Handle, char*, inout uint, inout UErrorCode) ures_getStringByKey; + void* function (Handle, inout uint, inout UErrorCode) ures_getBinary; + int* function (Handle, inout uint, inout UErrorCode) ures_getIntVector; + byte function (Handle) ures_hasNext; + void function (Handle) ures_resetIterator; + wchar* function (Handle, inout uint, inout char*, inout UErrorCode) ures_getNextString; + char* function (Handle) ures_getKey; + int function (Handle) ures_getType; + Handle function (Handle, Handle, inout UErrorCode) ures_getNextResource; + Handle function (Handle, uint, Handle, inout UErrorCode) ures_getByIndex; + Handle function (Handle, char*, Handle, inout UErrorCode) ures_getByKey; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ures_open, "ures_open"}, + {cast(void**) &ures_close, "ures_close"}, + {cast(void**) &ures_getLocale, "ures_getLocale"}, + {cast(void**) &ures_getVersion, "ures_getVersion"}, + {cast(void**) &ures_getSize, "ures_getSize"}, + {cast(void**) &ures_getInt, "ures_getInt"}, + {cast(void**) &ures_getString, "ures_getString"}, + {cast(void**) &ures_getStringByIndex, "ures_getStringByIndex"}, + {cast(void**) &ures_getStringByKey, "ures_getStringByKey"}, + {cast(void**) &ures_getBinary, "ures_getBinary"}, + {cast(void**) &ures_hasNext, "ures_hasNext"}, + {cast(void**) &ures_resetIterator, "ures_resetIterator"}, + {cast(void**) &ures_getNextString, "ures_getNextString"}, + {cast(void**) &ures_getKey, "ures_getKey"}, + {cast(void**) &ures_getType, "ures_getType"}, + {cast(void**) &ures_getNextResource, "ures_getNextResource"}, + {cast(void**) &ures_getByIndex, "ures_getByIndex"}, + {cast(void**) &ures_getByKey, "ures_getByKey"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + + /*********************************************************************** + + ***********************************************************************/ + + static void test() + { + UResourceBundle b = new UResourceBundle (ULocale.Default); + UStringView t = b.getNextString(); + UResourceBundle b1 = b.getNextResource (); + } +} + + diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/USearch.d --- a/dwtx/dwtxhelper/mangoicu/USearch.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/USearch.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,607 +1,607 @@ -/******************************************************************************* - - @file USearch.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.USearch; - -private import dwtx.dwtxhelper.mangoicu.ICU; - -public import dwtx.dwtxhelper.mangoicu.ULocale, - dwtx.dwtxhelper.mangoicu.UString, - dwtx.dwtxhelper.mangoicu.UCollator, - dwtx.dwtxhelper.mangoicu.UBreakIterator; - -/******************************************************************************* - - Apis for an engine that provides language-sensitive text - searching based on the comparison rules defined in a UCollator - data struct. This ensures that language eccentricity can be handled, - e.g. for the German collator, characters ß and SS will be matched - if case is chosen to be ignored. See the "ICU Collation Design - Document" for more information. - - The algorithm implemented is a modified form of the Boyer Moore's - search. For more information see "Efficient Text Searching in Java", - published in Java Report in February, 1999, for further information - on the algorithm. - - There are 2 match options for selection: Let S' be the sub-string - of a text string S between the offsets start and end . A - pattern string P matches a text string S at the offsets if - - - option 1. Some canonical equivalent of P matches some canonical - equivalent of S' - - - option 2. P matches S' and if P starts or ends with a combining - mark, there exists no non-ignorable combining mark before - or after S' in S respectively. - - Option 2 will be the default - - This search has APIs similar to that of other text iteration - mechanisms such as the break iterators in ubrk.h. Using these - APIs, it is easy to scan through text looking for all occurances - of a given pattern. This search iterator allows changing of - direction by calling a reset followed by a next or previous. - Though a direction change can occur without calling reset first, - this operation comes with some speed penalty. Generally, match - results in the forward direction will match the result matches - in the backwards direction in the reverse order - - USearch provides APIs to specify the starting position within - the text string to be searched, e.g. setOffset(), previous(x) - and next(x). Since the starting position will be set as it - is specified, please take note that there are some dangerous - positions which the search may render incorrect results: - - - The midst of a substring that requires normalization. - - - If the following match is to be found, the position should - not be the second character which requires to be swapped - with the preceding character. Vice versa, if the preceding - match is to be found, position to search from should not be - the first character which requires to be swapped with the - next character. E.g certain Thai and Lao characters require - swapping. - - - If a following pattern match is to be found, any position - within a contracting sequence except the first will fail. - Vice versa if a preceding pattern match is to be found, - a invalid starting point would be any character within a - contracting sequence except the last. - - A breakiterator can be used if only matches at logical breaks are - desired. Using a breakiterator will only give you results that - exactly matches the boundaries given by the breakiterator. For - instance the pattern "e" will not be found in the string "\u00e9" - if a character break iterator is used. - - Options are provided to handle overlapping matches. E.g. In - English, overlapping matches produces the result 0 and 2 for - the pattern "abab" in the text "ababab", where else mutually - exclusive matches only produce the result of 0. - - Though collator attributes will be taken into consideration while - performing matches, there are no APIs here for setting and getting - the attributes. These attributes can be set by getting the collator - from getCollator() and using the APIs in UCollator. Lastly to update - String Search to the new collator attributes, reset() has to be called. - - See http://oss.software.ibm.com/icu/apiref/usearch_8h.html for full - details. - -*******************************************************************************/ - -class USearch : ICU -{ - private Handle handle; - private UBreakIterator* iterator; - - // DONE is returned by previous() and next() after all valid - // matches have been returned, and by first() and last() if - // there are no matches at all. - const uint Done = uint.max; - - //Possible types of searches - public enum Attribute - { - Overlap, - CanonicalMatch, - Count - } - - public enum AttributeValue - { - Default = -1, - Off, - On, - Count - } - - /*********************************************************************** - - Creating a search iterator data struct using the argument - locale language rule set - - ***********************************************************************/ - - this (UStringView pattern, UStringView text, inout ULocale locale, UBreakIterator* iterator = null) - { - UErrorCode e; - - this.iterator = iterator; - handle = usearch_open (pattern.get.ptr, pattern.length, text.get.ptr, text.length, toString(locale.name), ( iterator is null ) ? null : iterator.handle, e); - testError (e, "failed to open search"); - } - - /*********************************************************************** - - Creating a search iterator data struct using the argument - locale language rule set - - ***********************************************************************/ - - this (UStringView pattern, UStringView text, UCollator col, UBreakIterator* iterator = null) - { - UErrorCode e; - - this.iterator = iterator; - handle = usearch_openFromCollator (pattern.get.ptr, pattern.length, text.get.ptr, text.length, col.handle, ( iterator is null ) ? null : iterator.handle, e); - testError (e, "failed to open search from collator"); - } - - /*********************************************************************** - - Close this USearch - - ***********************************************************************/ - - ~this () - { - usearch_close (handle); - } - - /*********************************************************************** - - Sets the current position in the text string which the - next search will start from. - - ***********************************************************************/ - - void setOffset (uint position) - { - UErrorCode e; - - usearch_setOffset (handle, position, e); - testError (e, "failed to set search offset"); - } - - /*********************************************************************** - - Return the current index in the string text being searched - - ***********************************************************************/ - - uint getOffset () - { - return usearch_getOffset (handle); - } - - /*********************************************************************** - - Returns the index to the match in the text string that was - searched - - ***********************************************************************/ - - uint getMatchedStart () - { - return usearch_getMatchedStart (handle); - } - - /*********************************************************************** - - Returns the length of text in the string which matches the - search pattern - - ***********************************************************************/ - - uint getMatchedLength () - { - return usearch_getMatchedLength (handle); - } - - /*********************************************************************** - - Returns the text that was matched by the most recent call to - first(), next(), previous(), or last(). - - ***********************************************************************/ - - void getMatchedText (UString s) - { - uint fmt (wchar* dst, uint length, inout UErrorCode e) - { - return usearch_getMatchedText (handle, dst, length, e); - } - - s.format (&fmt, "failed to extract matched text"); - } - - /*********************************************************************** - - Set the string text to be searched. - - ***********************************************************************/ - - void setText (UStringView t) - { - UErrorCode e; - - usearch_setText (handle, t.get.ptr, t.length, e); - testError (e, "failed to set search text"); - } - - /*********************************************************************** - - Return the string text to be searched. Note that this - returns a read-only reference to the search text. - - ***********************************************************************/ - - UStringView getText () - { - uint len; - - wchar *x = usearch_getText (handle, &len); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Sets the pattern used for matching - - ***********************************************************************/ - - void setPattern (UStringView t) - { - UErrorCode e; - - usearch_setPattern (handle, t.get.ptr, t.length, e); - testError (e, "failed to set search pattern"); - } - - /*********************************************************************** - - Gets the search pattern. Note that this returns a - read-only reference to the pattern. - - ***********************************************************************/ - - UStringView getPattern () - { - uint len; - - wchar *x = usearch_getPattern (handle, &len); - return new UStringView (x[0..len]); - } - - /*********************************************************************** - - Set the BreakIterator that will be used to restrict the - points at which matches are detected. - - ***********************************************************************/ - - void setIterator (UBreakIterator* iterator) - { - UErrorCode e; - - this.iterator = iterator; - usearch_setBreakIterator (handle, cast(Handle)iterator.handle, e); - testError (e, "failed to set search iterator"); - } - - /*********************************************************************** - - Get the BreakIterator that will be used to restrict the - points at which matches are detected. - - ***********************************************************************/ - - UBreakIterator* getIterator () - { - return iterator; - } - - /*********************************************************************** - - Returns the first index at which the string text matches - the search pattern - - ***********************************************************************/ - - uint first () - { - UErrorCode e; - - uint x = usearch_first (handle, e); - testError (e, "failed on first search"); - return x; - } - - /*********************************************************************** - - Returns the last index in the target text at which it - matches the search pattern - - ***********************************************************************/ - - uint last () - { - UErrorCode e; - - uint x = usearch_last (handle, e); - testError (e, "failed on last search"); - return x; - } - - /*********************************************************************** - - Returns the index of the next point at which the string - text matches the search pattern, starting from the current - position. - - If pos is specified, returns the first index greater than - pos at which the string text matches the search pattern - - ***********************************************************************/ - - uint next (uint pos = uint.max) - { - UErrorCode e; - uint x; - - x = (pos == uint.max) ? usearch_next (handle, e) : - usearch_following (handle, pos, e); - - testError (e, "failed on next search"); - return x; - } - - /*********************************************************************** - - Returns the index of the previous point at which the - string text matches the search pattern, starting at - the current position. - - If pos is specified, returns the first index less - than pos at which the string text matches the search - pattern. - - ***********************************************************************/ - - uint previous (uint pos = uint.max) - { - UErrorCode e; - uint x; - - x = (pos == uint.max) ? usearch_previous (handle, e) : - usearch_preceding (handle, pos, e); - - testError (e, "failed on next search"); - return x; - } - - /*********************************************************************** - - Search will begin at the start of the text string if a - forward iteration is initiated before a backwards iteration. - Otherwise if a backwards iteration is initiated before a - forwards iteration, the search will begin at the end of the - text string - - ***********************************************************************/ - - void reset () - { - usearch_reset (handle); - } - - /*********************************************************************** - - Gets the collator used for the language rules. - - ***********************************************************************/ - - UCollator getCollator () - { - return new UCollator (usearch_getCollator (handle)); - } - - /*********************************************************************** - - Sets the collator used for the language rules. This - method causes internal data such as Boyer-Moore shift - tables to be recalculated, but the iterator's position - is unchanged - - ***********************************************************************/ - - void setCollator (UCollator col) - { - UErrorCode e; - - usearch_setCollator (handle, col.handle, e); - testError (e, "failed to set search collator"); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, wchar*, uint, char*, void*, inout UErrorCode) usearch_open; - Handle function (wchar*, uint, wchar*, uint, Handle, void*, inout UErrorCode) usearch_openFromCollator; - void function (Handle) usearch_close; - void function (Handle, uint, inout UErrorCode) usearch_setOffset; - uint function (Handle) usearch_getOffset; - uint function (Handle) usearch_getMatchedStart; - uint function (Handle) usearch_getMatchedLength; - uint function (Handle, wchar*, uint, inout UErrorCode) usearch_getMatchedText; - void function (Handle, wchar*, uint, inout UErrorCode) usearch_setText; - wchar* function (Handle, uint*) usearch_getText; - void function (Handle, wchar*, uint, inout UErrorCode) usearch_setPattern; - wchar* function (Handle, uint*) usearch_getPattern; - uint function (Handle, inout UErrorCode) usearch_first; - uint function (Handle, inout UErrorCode) usearch_last; - uint function (Handle, inout UErrorCode) usearch_next; - uint function (Handle, inout UErrorCode) usearch_previous; - uint function (Handle, uint, inout UErrorCode) usearch_following; - uint function (Handle, uint, inout UErrorCode) usearch_preceding; - void function (Handle) usearch_reset; - void function (Handle, Handle, inout UErrorCode) usearch_setBreakIterator; - Handle function (Handle) usearch_getCollator; - void function (Handle, Handle, inout UErrorCode) usearch_setCollator; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &usearch_open, "usearch_open"}, - {cast(void**) &usearch_openFromCollator, "usearch_openFromCollator"}, - {cast(void**) &usearch_close, "usearch_close"}, - {cast(void**) &usearch_setOffset, "usearch_setOffset"}, - {cast(void**) &usearch_getOffset, "usearch_getOffset"}, - {cast(void**) &usearch_getMatchedStart, "usearch_getMatchedStart"}, - {cast(void**) &usearch_getMatchedLength, "usearch_getMatchedLength"}, - {cast(void**) &usearch_getMatchedText, "usearch_getMatchedText"}, - {cast(void**) &usearch_setText, "usearch_setText"}, - {cast(void**) &usearch_getText, "usearch_getText"}, - {cast(void**) &usearch_setPattern, "usearch_setPattern"}, - {cast(void**) &usearch_getPattern, "usearch_getPattern"}, - {cast(void**) &usearch_first, "usearch_first"}, - {cast(void**) &usearch_last, "usearch_last"}, - {cast(void**) &usearch_next, "usearch_next"}, - {cast(void**) &usearch_previous, "usearch_previous"}, - {cast(void**) &usearch_following, "usearch_following"}, - {cast(void**) &usearch_preceding, "usearch_preceding"}, - {cast(void**) &usearch_reset, "usearch_reset"}, - {cast(void**) &usearch_setBreakIterator, "usearch_setBreakIterator"}, - {cast(void**) &usearch_getCollator, "usearch_getCollator"}, - {cast(void**) &usearch_setCollator, "usearch_setCollator"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} +/******************************************************************************* + + @file USearch.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.USearch; + +private import dwtx.dwtxhelper.mangoicu.ICU; + +public import dwtx.dwtxhelper.mangoicu.ULocale, + dwtx.dwtxhelper.mangoicu.UString, + dwtx.dwtxhelper.mangoicu.UCollator, + dwtx.dwtxhelper.mangoicu.UBreakIterator; + +/******************************************************************************* + + Apis for an engine that provides language-sensitive text + searching based on the comparison rules defined in a UCollator + data struct. This ensures that language eccentricity can be handled, + e.g. for the German collator, characters ß and SS will be matched + if case is chosen to be ignored. See the "ICU Collation Design + Document" for more information. + + The algorithm implemented is a modified form of the Boyer Moore's + search. For more information see "Efficient Text Searching in Java", + published in Java Report in February, 1999, for further information + on the algorithm. + + There are 2 match options for selection: Let S' be the sub-string + of a text string S between the offsets start and end . A + pattern string P matches a text string S at the offsets if + + - option 1. Some canonical equivalent of P matches some canonical + equivalent of S' + + - option 2. P matches S' and if P starts or ends with a combining + mark, there exists no non-ignorable combining mark before + or after S' in S respectively. + + Option 2 will be the default + + This search has APIs similar to that of other text iteration + mechanisms such as the break iterators in ubrk.h. Using these + APIs, it is easy to scan through text looking for all occurances + of a given pattern. This search iterator allows changing of + direction by calling a reset followed by a next or previous. + Though a direction change can occur without calling reset first, + this operation comes with some speed penalty. Generally, match + results in the forward direction will match the result matches + in the backwards direction in the reverse order + + USearch provides APIs to specify the starting position within + the text string to be searched, e.g. setOffset(), previous(x) + and next(x). Since the starting position will be set as it + is specified, please take note that there are some dangerous + positions which the search may render incorrect results: + + - The midst of a substring that requires normalization. + + - If the following match is to be found, the position should + not be the second character which requires to be swapped + with the preceding character. Vice versa, if the preceding + match is to be found, position to search from should not be + the first character which requires to be swapped with the + next character. E.g certain Thai and Lao characters require + swapping. + + - If a following pattern match is to be found, any position + within a contracting sequence except the first will fail. + Vice versa if a preceding pattern match is to be found, + a invalid starting point would be any character within a + contracting sequence except the last. + + A breakiterator can be used if only matches at logical breaks are + desired. Using a breakiterator will only give you results that + exactly matches the boundaries given by the breakiterator. For + instance the pattern "e" will not be found in the string "\u00e9" + if a character break iterator is used. + + Options are provided to handle overlapping matches. E.g. In + English, overlapping matches produces the result 0 and 2 for + the pattern "abab" in the text "ababab", where else mutually + exclusive matches only produce the result of 0. + + Though collator attributes will be taken into consideration while + performing matches, there are no APIs here for setting and getting + the attributes. These attributes can be set by getting the collator + from getCollator() and using the APIs in UCollator. Lastly to update + String Search to the new collator attributes, reset() has to be called. + + See http://oss.software.ibm.com/icu/apiref/usearch_8h.html for full + details. + +*******************************************************************************/ + +class USearch : ICU +{ + private Handle handle; + private UBreakIterator* iterator; + + // DONE is returned by previous() and next() after all valid + // matches have been returned, and by first() and last() if + // there are no matches at all. + const uint Done = uint.max; + + //Possible types of searches + public enum Attribute + { + Overlap, + CanonicalMatch, + Count + } + + public enum AttributeValue + { + Default = -1, + Off, + On, + Count + } + + /*********************************************************************** + + Creating a search iterator data struct using the argument + locale language rule set + + ***********************************************************************/ + + this (UStringView pattern, UStringView text, inout ULocale locale, UBreakIterator* iterator = null) + { + UErrorCode e; + + this.iterator = iterator; + handle = usearch_open (pattern.get.ptr, pattern.length, text.get.ptr, text.length, toString(locale.name), ( iterator is null ) ? null : iterator.handle, e); + testError (e, "failed to open search"); + } + + /*********************************************************************** + + Creating a search iterator data struct using the argument + locale language rule set + + ***********************************************************************/ + + this (UStringView pattern, UStringView text, UCollator col, UBreakIterator* iterator = null) + { + UErrorCode e; + + this.iterator = iterator; + handle = usearch_openFromCollator (pattern.get.ptr, pattern.length, text.get.ptr, text.length, col.handle, ( iterator is null ) ? null : iterator.handle, e); + testError (e, "failed to open search from collator"); + } + + /*********************************************************************** + + Close this USearch + + ***********************************************************************/ + + ~this () + { + usearch_close (handle); + } + + /*********************************************************************** + + Sets the current position in the text string which the + next search will start from. + + ***********************************************************************/ + + void setOffset (uint position) + { + UErrorCode e; + + usearch_setOffset (handle, position, e); + testError (e, "failed to set search offset"); + } + + /*********************************************************************** + + Return the current index in the string text being searched + + ***********************************************************************/ + + uint getOffset () + { + return usearch_getOffset (handle); + } + + /*********************************************************************** + + Returns the index to the match in the text string that was + searched + + ***********************************************************************/ + + uint getMatchedStart () + { + return usearch_getMatchedStart (handle); + } + + /*********************************************************************** + + Returns the length of text in the string which matches the + search pattern + + ***********************************************************************/ + + uint getMatchedLength () + { + return usearch_getMatchedLength (handle); + } + + /*********************************************************************** + + Returns the text that was matched by the most recent call to + first(), next(), previous(), or last(). + + ***********************************************************************/ + + void getMatchedText (UString s) + { + uint fmt (wchar* dst, uint length, inout UErrorCode e) + { + return usearch_getMatchedText (handle, dst, length, e); + } + + s.format (&fmt, "failed to extract matched text"); + } + + /*********************************************************************** + + Set the string text to be searched. + + ***********************************************************************/ + + void setText (UStringView t) + { + UErrorCode e; + + usearch_setText (handle, t.get.ptr, t.length, e); + testError (e, "failed to set search text"); + } + + /*********************************************************************** + + Return the string text to be searched. Note that this + returns a read-only reference to the search text. + + ***********************************************************************/ + + UStringView getText () + { + uint len; + + wchar *x = usearch_getText (handle, &len); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Sets the pattern used for matching + + ***********************************************************************/ + + void setPattern (UStringView t) + { + UErrorCode e; + + usearch_setPattern (handle, t.get.ptr, t.length, e); + testError (e, "failed to set search pattern"); + } + + /*********************************************************************** + + Gets the search pattern. Note that this returns a + read-only reference to the pattern. + + ***********************************************************************/ + + UStringView getPattern () + { + uint len; + + wchar *x = usearch_getPattern (handle, &len); + return new UStringView (x[0..len]); + } + + /*********************************************************************** + + Set the BreakIterator that will be used to restrict the + points at which matches are detected. + + ***********************************************************************/ + + void setIterator (UBreakIterator* iterator) + { + UErrorCode e; + + this.iterator = iterator; + usearch_setBreakIterator (handle, cast(Handle)iterator.handle, e); + testError (e, "failed to set search iterator"); + } + + /*********************************************************************** + + Get the BreakIterator that will be used to restrict the + points at which matches are detected. + + ***********************************************************************/ + + UBreakIterator* getIterator () + { + return iterator; + } + + /*********************************************************************** + + Returns the first index at which the string text matches + the search pattern + + ***********************************************************************/ + + uint first () + { + UErrorCode e; + + uint x = usearch_first (handle, e); + testError (e, "failed on first search"); + return x; + } + + /*********************************************************************** + + Returns the last index in the target text at which it + matches the search pattern + + ***********************************************************************/ + + uint last () + { + UErrorCode e; + + uint x = usearch_last (handle, e); + testError (e, "failed on last search"); + return x; + } + + /*********************************************************************** + + Returns the index of the next point at which the string + text matches the search pattern, starting from the current + position. + + If pos is specified, returns the first index greater than + pos at which the string text matches the search pattern + + ***********************************************************************/ + + uint next (uint pos = uint.max) + { + UErrorCode e; + uint x; + + x = (pos == uint.max) ? usearch_next (handle, e) : + usearch_following (handle, pos, e); + + testError (e, "failed on next search"); + return x; + } + + /*********************************************************************** + + Returns the index of the previous point at which the + string text matches the search pattern, starting at + the current position. + + If pos is specified, returns the first index less + than pos at which the string text matches the search + pattern. + + ***********************************************************************/ + + uint previous (uint pos = uint.max) + { + UErrorCode e; + uint x; + + x = (pos == uint.max) ? usearch_previous (handle, e) : + usearch_preceding (handle, pos, e); + + testError (e, "failed on next search"); + return x; + } + + /*********************************************************************** + + Search will begin at the start of the text string if a + forward iteration is initiated before a backwards iteration. + Otherwise if a backwards iteration is initiated before a + forwards iteration, the search will begin at the end of the + text string + + ***********************************************************************/ + + void reset () + { + usearch_reset (handle); + } + + /*********************************************************************** + + Gets the collator used for the language rules. + + ***********************************************************************/ + + UCollator getCollator () + { + return new UCollator (usearch_getCollator (handle)); + } + + /*********************************************************************** + + Sets the collator used for the language rules. This + method causes internal data such as Boyer-Moore shift + tables to be recalculated, but the iterator's position + is unchanged + + ***********************************************************************/ + + void setCollator (UCollator col) + { + UErrorCode e; + + usearch_setCollator (handle, col.handle, e); + testError (e, "failed to set search collator"); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, wchar*, uint, char*, void*, inout UErrorCode) usearch_open; + Handle function (wchar*, uint, wchar*, uint, Handle, void*, inout UErrorCode) usearch_openFromCollator; + void function (Handle) usearch_close; + void function (Handle, uint, inout UErrorCode) usearch_setOffset; + uint function (Handle) usearch_getOffset; + uint function (Handle) usearch_getMatchedStart; + uint function (Handle) usearch_getMatchedLength; + uint function (Handle, wchar*, uint, inout UErrorCode) usearch_getMatchedText; + void function (Handle, wchar*, uint, inout UErrorCode) usearch_setText; + wchar* function (Handle, uint*) usearch_getText; + void function (Handle, wchar*, uint, inout UErrorCode) usearch_setPattern; + wchar* function (Handle, uint*) usearch_getPattern; + uint function (Handle, inout UErrorCode) usearch_first; + uint function (Handle, inout UErrorCode) usearch_last; + uint function (Handle, inout UErrorCode) usearch_next; + uint function (Handle, inout UErrorCode) usearch_previous; + uint function (Handle, uint, inout UErrorCode) usearch_following; + uint function (Handle, uint, inout UErrorCode) usearch_preceding; + void function (Handle) usearch_reset; + void function (Handle, Handle, inout UErrorCode) usearch_setBreakIterator; + Handle function (Handle) usearch_getCollator; + void function (Handle, Handle, inout UErrorCode) usearch_setCollator; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &usearch_open, "usearch_open"}, + {cast(void**) &usearch_openFromCollator, "usearch_openFromCollator"}, + {cast(void**) &usearch_close, "usearch_close"}, + {cast(void**) &usearch_setOffset, "usearch_setOffset"}, + {cast(void**) &usearch_getOffset, "usearch_getOffset"}, + {cast(void**) &usearch_getMatchedStart, "usearch_getMatchedStart"}, + {cast(void**) &usearch_getMatchedLength, "usearch_getMatchedLength"}, + {cast(void**) &usearch_getMatchedText, "usearch_getMatchedText"}, + {cast(void**) &usearch_setText, "usearch_setText"}, + {cast(void**) &usearch_getText, "usearch_getText"}, + {cast(void**) &usearch_setPattern, "usearch_setPattern"}, + {cast(void**) &usearch_getPattern, "usearch_getPattern"}, + {cast(void**) &usearch_first, "usearch_first"}, + {cast(void**) &usearch_last, "usearch_last"}, + {cast(void**) &usearch_next, "usearch_next"}, + {cast(void**) &usearch_previous, "usearch_previous"}, + {cast(void**) &usearch_following, "usearch_following"}, + {cast(void**) &usearch_preceding, "usearch_preceding"}, + {cast(void**) &usearch_reset, "usearch_reset"}, + {cast(void**) &usearch_setBreakIterator, "usearch_setBreakIterator"}, + {cast(void**) &usearch_getCollator, "usearch_getCollator"}, + {cast(void**) &usearch_setCollator, "usearch_setCollator"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/USet.d --- a/dwtx/dwtxhelper/mangoicu/USet.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/USet.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,472 +1,472 @@ -/******************************************************************************* - - @file USet.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.USet; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UString; - -/******************************************************************************* - - A mutable set of Unicode characters and multicharacter strings. - - Objects of this class represent character classes used in regular - expressions. A character specifies a subset of Unicode code points. - Legal code points are U+0000 to U+10FFFF, inclusive. - - UnicodeSet supports two APIs. The first is the operand API that - allows the caller to modify the value of a UnicodeSet object. It - conforms to Java 2's java.util.Set interface, although UnicodeSet - does not actually implement that interface. All methods of Set are - supported, with the modification that they take a character range - or single character instead of an Object, and they take a UnicodeSet - instead of a Collection. The operand API may be thought of in terms - of boolean logic: a boolean OR is implemented by add, a boolean AND - is implemented by retain, a boolean XOR is implemented by complement - taking an argument, and a boolean NOT is implemented by complement - with no argument. In terms of traditional set theory function names, - add is a union, retain is an intersection, remove is an asymmetric - difference, and complement with no argument is a set complement with - respect to the superset range MIN_VALUE-MAX_VALUE - - The second API is the applyPattern()/toPattern() API from the - java.text.Format-derived classes. Unlike the methods that add - characters, add categories, and control the logic of the set, - the method applyPattern() sets all attributes of a UnicodeSet - at once, based on a string pattern. - - See - this page for full details. - -*******************************************************************************/ - -class USet : ICU -{ - package Handle handle; - - enum Options - { - None = 0, - IgnoreSpace = 1, - CaseInsensitive = 2, - } - - - /*********************************************************************** - - Creates a USet object that contains the range of characters - start..end, inclusive - - ***********************************************************************/ - - this (wchar start, wchar end) - { - handle = uset_open (start, end); - } - - /*********************************************************************** - - Creates a set from the given pattern. See the UnicodeSet - class description for the syntax of the pattern language - - ***********************************************************************/ - - this (UStringView pattern, Options o = Options.None) - { - UErrorCode e; - - handle = uset_openPatternOptions (pattern.get.ptr, pattern.len, o, e); - testError (e, "failed to open pattern-based charset"); - } - - /*********************************************************************** - - Internal constructor invoked via UCollator - - ***********************************************************************/ - - package this (Handle handle) - { - this.handle = handle; - } - - /*********************************************************************** - - Disposes of the storage used by a USet object - - ***********************************************************************/ - - ~this () - { - uset_close (handle); - } - - /*********************************************************************** - - Modifies the set to represent the set specified by the - given pattern. See the UnicodeSet class description for - the syntax of the pattern language. See also the User - Guide chapter about UnicodeSet. Empties the set passed - before applying the pattern. - - ***********************************************************************/ - - void applyPattern (UStringView pattern, Options o = Options.None) - { - UErrorCode e; - - uset_applyPattern (handle, pattern.get.ptr, pattern.len, o, e); - testError (e, "failed to apply pattern"); - } - - /*********************************************************************** - - Returns a string representation of this set. If the result - of calling this function is passed to a uset_openPattern(), - it will produce another set that is equal to this one. - - ***********************************************************************/ - - void toPattern (UString dst, bool escape) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return uset_toPattern (handle, p, len, escape, e); - } - - dst.format (&fmt, "failed to convert charset to a pattern"); - } - - /*********************************************************************** - - Adds the given character to the given USet. After this call, - contains (c) will return true. - - ***********************************************************************/ - - void add (wchar c) - { - uset_add (handle, c); - } - - /*********************************************************************** - - Adds all of the elements in the specified set to this set - if they're not already present. This operation effectively - modifies this set so that its value is the union of the two - sets. The behavior of this operation is unspecified if the - specified collection is modified while the operation is in - progress. - - ***********************************************************************/ - - void addSet (USet other) - { - uset_addAll (handle, other.handle); - } - - /*********************************************************************** - - Adds the given range of characters to the given USet. After - this call, contains(start, end) will return true - - ***********************************************************************/ - - void addRange (wchar start, wchar end) - { - uset_addRange (handle, start, end); - } - - /*********************************************************************** - - Adds the given string to the given USet. After this call, - containsString (str, strLen) will return true - - ***********************************************************************/ - - void addString (UStringView t) - { - uset_addString (handle, t.get.ptr, t.len); - } - - /*********************************************************************** - - Removes the given character from this USet. After the - call, contains(c) will return false - - ***********************************************************************/ - - void remove (wchar c) - { - uset_remove (handle, c); - } - - /*********************************************************************** - - Removes the given range of characters from this USet. - After the call, contains(start, end) will return false - - ***********************************************************************/ - - void removeRange (wchar start, wchar end) - { - uset_removeRange (handle, start, end); - } - - /*********************************************************************** - - Removes the given string from this USet. After the call, - containsString (str, strLen) will return false - - ***********************************************************************/ - - void removeString (UStringView t) - { - uset_removeString (handle, t.get.ptr, t.len); - } - - /*********************************************************************** - - Inverts this set. This operation modifies this set so - that its value is its complement. This operation does - not affect the multicharacter strings, if any - - ***********************************************************************/ - - void complement () - { - uset_complement (handle); - } - - /*********************************************************************** - - Removes all of the elements from this set. This set will - be empty after this call returns. - - ***********************************************************************/ - - void clear () - { - uset_clear (handle); - } - - /*********************************************************************** - - Returns true if this USet contains no characters and no - strings - - ***********************************************************************/ - - bool isEmpty () - { - return uset_isEmpty (handle) != 0; - } - - /*********************************************************************** - - Returns true if this USet contains the given character - - ***********************************************************************/ - - bool contains (wchar c) - { - return uset_contains (handle, c) != 0; - } - - /*********************************************************************** - - Returns true if this USet contains all characters c where - start <= c && c <= end - - ***********************************************************************/ - - bool containsRange (wchar start, wchar end) - { - return uset_containsRange (handle, start, end) != 0; - } - - /*********************************************************************** - - Returns true if this USet contains the given string - - ***********************************************************************/ - - bool containsString (UStringView t) - { - return uset_containsString (handle, t.get.ptr, t.len) != 0; - } - - /*********************************************************************** - - ***********************************************************************/ - - uint size () - { - return uset_size (handle); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar start, wchar end) uset_open; - void function (Handle) uset_close; - Handle function (wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_openPatternOptions; - uint function (Handle, wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_applyPattern; - uint function (Handle, wchar* result, uint resultCapacity, byte escapeUnprintable, inout UErrorCode e) uset_toPattern; - void function (Handle, wchar c) uset_add; - void function (Handle, Handle additionalSet) uset_addAll; - void function (Handle, wchar start, wchar end) uset_addRange; - void function (Handle, wchar* str, uint strLen) uset_addString; - void function (Handle, wchar c) uset_remove; - void function (Handle, wchar start, wchar end) uset_removeRange; - void function (Handle, wchar* str, uint strLen) uset_removeString; - void function (Handle) uset_complement; - void function (Handle) uset_clear; - byte function (Handle) uset_isEmpty; - byte function (Handle, wchar c) uset_contains; - byte function (Handle, wchar start, wchar end) uset_containsRange; - byte function (Handle, wchar* str, uint strLen) uset_containsString; - uint function (Handle) uset_size; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &uset_open, "uset_open"}, - {cast(void**) &uset_close, "uset_close"}, - {cast(void**) &uset_openPatternOptions, "uset_openPatternOptions"}, - {cast(void**) &uset_applyPattern, "uset_applyPattern"}, - {cast(void**) &uset_toPattern, "uset_toPattern"}, - {cast(void**) &uset_add, "uset_add"}, - {cast(void**) &uset_addAll, "uset_addAll"}, - {cast(void**) &uset_addRange, "uset_addRange"}, - {cast(void**) &uset_addString, "uset_addString"}, - {cast(void**) &uset_remove, "uset_remove"}, - {cast(void**) &uset_removeRange, "uset_removeRange"}, - {cast(void**) &uset_removeString, "uset_removeString"}, - {cast(void**) &uset_complement, "uset_complement"}, - {cast(void**) &uset_clear, "uset_clear"}, - {cast(void**) &uset_isEmpty, "uset_isEmpty"}, - {cast(void**) &uset_contains, "uset_contains"}, - {cast(void**) &uset_containsRange, "uset_containsRange"}, - {cast(void**) &uset_containsString, "uset_containsString"}, - {cast(void**) &uset_size, "uset_size"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - +/******************************************************************************* + + @file USet.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.USet; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UString; + +/******************************************************************************* + + A mutable set of Unicode characters and multicharacter strings. + + Objects of this class represent character classes used in regular + expressions. A character specifies a subset of Unicode code points. + Legal code points are U+0000 to U+10FFFF, inclusive. + + UnicodeSet supports two APIs. The first is the operand API that + allows the caller to modify the value of a UnicodeSet object. It + conforms to Java 2's java.util.Set interface, although UnicodeSet + does not actually implement that interface. All methods of Set are + supported, with the modification that they take a character range + or single character instead of an Object, and they take a UnicodeSet + instead of a Collection. The operand API may be thought of in terms + of boolean logic: a boolean OR is implemented by add, a boolean AND + is implemented by retain, a boolean XOR is implemented by complement + taking an argument, and a boolean NOT is implemented by complement + with no argument. In terms of traditional set theory function names, + add is a union, retain is an intersection, remove is an asymmetric + difference, and complement with no argument is a set complement with + respect to the superset range MIN_VALUE-MAX_VALUE + + The second API is the applyPattern()/toPattern() API from the + java.text.Format-derived classes. Unlike the methods that add + characters, add categories, and control the logic of the set, + the method applyPattern() sets all attributes of a UnicodeSet + at once, based on a string pattern. + + See + this page for full details. + +*******************************************************************************/ + +class USet : ICU +{ + package Handle handle; + + enum Options + { + None = 0, + IgnoreSpace = 1, + CaseInsensitive = 2, + } + + + /*********************************************************************** + + Creates a USet object that contains the range of characters + start..end, inclusive + + ***********************************************************************/ + + this (wchar start, wchar end) + { + handle = uset_open (start, end); + } + + /*********************************************************************** + + Creates a set from the given pattern. See the UnicodeSet + class description for the syntax of the pattern language + + ***********************************************************************/ + + this (UStringView pattern, Options o = Options.None) + { + UErrorCode e; + + handle = uset_openPatternOptions (pattern.get.ptr, pattern.len, o, e); + testError (e, "failed to open pattern-based charset"); + } + + /*********************************************************************** + + Internal constructor invoked via UCollator + + ***********************************************************************/ + + package this (Handle handle) + { + this.handle = handle; + } + + /*********************************************************************** + + Disposes of the storage used by a USet object + + ***********************************************************************/ + + ~this () + { + uset_close (handle); + } + + /*********************************************************************** + + Modifies the set to represent the set specified by the + given pattern. See the UnicodeSet class description for + the syntax of the pattern language. See also the User + Guide chapter about UnicodeSet. Empties the set passed + before applying the pattern. + + ***********************************************************************/ + + void applyPattern (UStringView pattern, Options o = Options.None) + { + UErrorCode e; + + uset_applyPattern (handle, pattern.get.ptr, pattern.len, o, e); + testError (e, "failed to apply pattern"); + } + + /*********************************************************************** + + Returns a string representation of this set. If the result + of calling this function is passed to a uset_openPattern(), + it will produce another set that is equal to this one. + + ***********************************************************************/ + + void toPattern (UString dst, bool escape) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return uset_toPattern (handle, p, len, escape, e); + } + + dst.format (&fmt, "failed to convert charset to a pattern"); + } + + /*********************************************************************** + + Adds the given character to the given USet. After this call, + contains (c) will return true. + + ***********************************************************************/ + + void add (wchar c) + { + uset_add (handle, c); + } + + /*********************************************************************** + + Adds all of the elements in the specified set to this set + if they're not already present. This operation effectively + modifies this set so that its value is the union of the two + sets. The behavior of this operation is unspecified if the + specified collection is modified while the operation is in + progress. + + ***********************************************************************/ + + void addSet (USet other) + { + uset_addAll (handle, other.handle); + } + + /*********************************************************************** + + Adds the given range of characters to the given USet. After + this call, contains(start, end) will return true + + ***********************************************************************/ + + void addRange (wchar start, wchar end) + { + uset_addRange (handle, start, end); + } + + /*********************************************************************** + + Adds the given string to the given USet. After this call, + containsString (str, strLen) will return true + + ***********************************************************************/ + + void addString (UStringView t) + { + uset_addString (handle, t.get.ptr, t.len); + } + + /*********************************************************************** + + Removes the given character from this USet. After the + call, contains(c) will return false + + ***********************************************************************/ + + void remove (wchar c) + { + uset_remove (handle, c); + } + + /*********************************************************************** + + Removes the given range of characters from this USet. + After the call, contains(start, end) will return false + + ***********************************************************************/ + + void removeRange (wchar start, wchar end) + { + uset_removeRange (handle, start, end); + } + + /*********************************************************************** + + Removes the given string from this USet. After the call, + containsString (str, strLen) will return false + + ***********************************************************************/ + + void removeString (UStringView t) + { + uset_removeString (handle, t.get.ptr, t.len); + } + + /*********************************************************************** + + Inverts this set. This operation modifies this set so + that its value is its complement. This operation does + not affect the multicharacter strings, if any + + ***********************************************************************/ + + void complement () + { + uset_complement (handle); + } + + /*********************************************************************** + + Removes all of the elements from this set. This set will + be empty after this call returns. + + ***********************************************************************/ + + void clear () + { + uset_clear (handle); + } + + /*********************************************************************** + + Returns true if this USet contains no characters and no + strings + + ***********************************************************************/ + + bool isEmpty () + { + return uset_isEmpty (handle) != 0; + } + + /*********************************************************************** + + Returns true if this USet contains the given character + + ***********************************************************************/ + + bool contains (wchar c) + { + return uset_contains (handle, c) != 0; + } + + /*********************************************************************** + + Returns true if this USet contains all characters c where + start <= c && c <= end + + ***********************************************************************/ + + bool containsRange (wchar start, wchar end) + { + return uset_containsRange (handle, start, end) != 0; + } + + /*********************************************************************** + + Returns true if this USet contains the given string + + ***********************************************************************/ + + bool containsString (UStringView t) + { + return uset_containsString (handle, t.get.ptr, t.len) != 0; + } + + /*********************************************************************** + + ***********************************************************************/ + + uint size () + { + return uset_size (handle); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar start, wchar end) uset_open; + void function (Handle) uset_close; + Handle function (wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_openPatternOptions; + uint function (Handle, wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_applyPattern; + uint function (Handle, wchar* result, uint resultCapacity, byte escapeUnprintable, inout UErrorCode e) uset_toPattern; + void function (Handle, wchar c) uset_add; + void function (Handle, Handle additionalSet) uset_addAll; + void function (Handle, wchar start, wchar end) uset_addRange; + void function (Handle, wchar* str, uint strLen) uset_addString; + void function (Handle, wchar c) uset_remove; + void function (Handle, wchar start, wchar end) uset_removeRange; + void function (Handle, wchar* str, uint strLen) uset_removeString; + void function (Handle) uset_complement; + void function (Handle) uset_clear; + byte function (Handle) uset_isEmpty; + byte function (Handle, wchar c) uset_contains; + byte function (Handle, wchar start, wchar end) uset_containsRange; + byte function (Handle, wchar* str, uint strLen) uset_containsString; + uint function (Handle) uset_size; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &uset_open, "uset_open"}, + {cast(void**) &uset_close, "uset_close"}, + {cast(void**) &uset_openPatternOptions, "uset_openPatternOptions"}, + {cast(void**) &uset_applyPattern, "uset_applyPattern"}, + {cast(void**) &uset_toPattern, "uset_toPattern"}, + {cast(void**) &uset_add, "uset_add"}, + {cast(void**) &uset_addAll, "uset_addAll"}, + {cast(void**) &uset_addRange, "uset_addRange"}, + {cast(void**) &uset_addString, "uset_addString"}, + {cast(void**) &uset_remove, "uset_remove"}, + {cast(void**) &uset_removeRange, "uset_removeRange"}, + {cast(void**) &uset_removeString, "uset_removeString"}, + {cast(void**) &uset_complement, "uset_complement"}, + {cast(void**) &uset_clear, "uset_clear"}, + {cast(void**) &uset_isEmpty, "uset_isEmpty"}, + {cast(void**) &uset_contains, "uset_contains"}, + {cast(void**) &uset_containsRange, "uset_containsRange"}, + {cast(void**) &uset_containsString, "uset_containsString"}, + {cast(void**) &uset_size, "uset_size"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UString.d --- a/dwtx/dwtxhelper/mangoicu/UString.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UString.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,1508 +1,1508 @@ -/******************************************************************************* - - @file UString.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, October 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UString; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UChar, - dwtx.dwtxhelper.mangoicu.ULocale; - -/******************************************************************************* - -*******************************************************************************/ - -private extern (C) void memmove (void* dst, void* src, uint bytes); - -/******************************************************************************* - - Bind to the IReadable and IWritable interfaces if we're building - along with the mango.io package - -*******************************************************************************/ - -version=Isolated; -version (Isolated) - { - private interface ITextOther {} - private interface IStringOther {} - } - else - { - private import dwtx.dwtxhelper.mangoicu.UMango; - - private import mango.io.model.IReader, - mango.io.model.IWriter; - - private interface ITextOther : IWritable {} - private interface IStringOther : IReadable {} - } - - -/******************************************************************************* - - UString is a string class that stores Unicode characters directly - and provides similar functionality as the Java String class. - - In ICU, a Unicode string consists of 16-bit Unicode code units. - A Unicode character may be stored with either one code unit — - which is the most common case — or with a matched pair of - special code units ("surrogates"). The data type for code units - is UChar. - - For single-character handling, a Unicode character code point is - a value in the range 0..0x10ffff. ICU uses the UChar32 type for - code points. - - Indexes and offsets into and lengths of strings always count code - units, not code points. This is the same as with multi-byte char* - strings in traditional string handling. Operations on partial - strings typically do not test for code point boundaries. If necessary, - the user needs to take care of such boundaries by testing for the code - unit values or by using functions like getChar32Start() - and getChar32Limit() - - UString methods are more lenient with regard to input parameter values - than other ICU APIs. In particular: - - - If indexes are out of bounds for a UString object (< 0 or > length) - then they are "pinned" to the nearest boundary. - - - If primitive string pointer values (e.g., const wchar* or char*) for - input strings are null, then those input string parameters are treated - as if they pointed to an empty string. However, this is not the case - for char* parameters for charset names or other IDs. - -*******************************************************************************/ - -class UString : UStringView, IStringOther -{ - alias opCat append; - alias opIndexAssign setCharAt; - - /*********************************************************************** - - Create an empty UString with the specified available space - - ***********************************************************************/ - - this (uint space = 0) - { - content.length = space; - mutable = true; - } - - /*********************************************************************** - - Create a UString upon the provided content. If said content - is immutable (read-only) then you might consider setting the - 'mutable' parameter to false. Doing so will avoid allocating - heap-space for the content until it is modified. - - ***********************************************************************/ - - this (wchar[] content, bool mutable = true) - { - setTo (content, mutable); - } - - /*********************************************************************** - - Create a UString via the content of a UStringView. Note that the - default is to assume the content is immutable (read-only). - - ***********************************************************************/ - - this (UStringView other, bool mutable = false) - { - this (other.get, mutable); - } - - /*********************************************************************** - - Create a UString via the content of a UString. If said content - is immutable (read-only) then you might consider setting the - 'mutable' parameter to false. Doing so will avoid allocating - heap-space for the content until it is modified via UString - methods. - - ***********************************************************************/ - - this (UString other, bool mutable = true) - { - this (other.get, mutable); - } - - /*********************************************************************** - - Support for reading content via the IO system - - ***********************************************************************/ - - version (Isolated){} - else - { - /*************************************************************** - - Internal adapter to handle loading and conversion - of UString content. Once constructed, this may be - used as the target for an IReader. Alternatively, - invoke the load() method with an IBuffer of choice. - - ***************************************************************/ - - class UStringDecoder : StringDecoder16 - { - private UString s; - - // construct a decoder on the given UString - this (UConverter c, uint bytes, UString s) - { - super (c, bytes); - this.s = s; - } - - // IReadable adapter to perform the conversion - protected void read (IReader r) - { - load (r.buffer); - } - - // read from the provided buffer until we - // either have all the content, or an eof - // condition throws an exception. - package void load (IBuffer b) - { - uint produced = super.read (b, s.content); - while (toGo) - { - s.expand (toGo); - produced += super.read (b, s.content[produced..$]); - } - s.len = produced; - } - } - - /*************************************************************** - - Another constructor for loading known content length - into a UString. - - ***************************************************************/ - - this (IBuffer buffer, uint contentLength, UConverter cvt) - { - this (contentLength); - UStringDecoder sd = new UStringDecoder (cvt, contentLength, this); - sd.load (buffer); - } - - /*************************************************************** - - Read as many bytes from the input as is necessary - to produce the expected number of wchar elements. - This uses the default wchar handler, which can be - altered by binding a StringDecoder to the IReader - in use (see UMango for details). - - We're mutable, so ensure we don't mess with the - IO buffers. Interestingly, changing the length - of a D array will account for slice assignments - (it checks the pointer to see if it's a starting - point in the pool). Unfortunately, that doesn't - catch the case where a slice starts at offset 0, - which is where IBuffer slices may come from. - - To be safe, we ask the allocator in use whether - the content it provided can be mutated or not. - Note that this is not necessary for UStringView, since - that is a read-only construct. - - ***************************************************************/ - - void read (IReader r) - { - r.get (content); - len = content.length; - mutable = r.getAllocator.isMutable (content); - } - - /*************************************************************** - - Return a streaming decoder that can be used to - populate this UString with a specified number of - input bytes. - - This differs from the above read() method in the - way content is read: in the above case, exactly - the specified number of wchar elements will be - converter from the input, whereas in this case - a variable number of wchar elements are converted - until 'bytes' have been read from the input. This - is useful in those cases where the original number - of elements has been lost, and only the resultant - converted byte-count remains (a la HTTP). - - The returned StringDecoder is one-shot only. You may - reuse it (both the converter and the byte count) via - its reset() method. - - One applies the resultant converter directly with an - IReader like so: - - @code - UString s = ...; - IReader r = ...; - - // r >> s.createDecoder(cvt, bytes); - r.get (s.createDecoder(cvt, bytes)); - @endcode - - which will read the specified number of bytes from - the input and convert them to an appropriate number - of wchars within the UString. - - ***************************************************************/ - - StringDecoder createDecoder (UConverter c, uint bytes) - { - return new UStringDecoder (c, bytes, this); - } - } - - /*********************************************************************** - - Append text to this UString - - ***********************************************************************/ - - UString opCat (UStringView other) - { - return opCat (other.get); - } - - /*********************************************************************** - - Append partial text to this UString - - ***********************************************************************/ - - UString opCat (UStringView other, uint start, uint len=uint.max) - { - other.pinIndices (start, len); - return opCat (other.content [start..start+len]); - } - - /*********************************************************************** - - Append a single character to this UString - - ***********************************************************************/ - - UString opCat (wchar chr) - { - return opCat (&chr, 1); - } - - /*********************************************************************** - - Append text to this UString - - ***********************************************************************/ - - UString opCat (wchar[] chars) - { - return opCat (chars.ptr, chars.length); - } - - /*********************************************************************** - - Converts a sequence of UTF-8 bytes to UChars (UTF-16) - - ***********************************************************************/ - - UString opCat (char[] chars) - { - uint fmt (wchar* dst, uint len, inout UErrorCode e) - { - uint x; - - u_strFromUTF8 (dst, len, &x, chars.ptr, chars.length, e); - return x; - } - - expand (chars.length); - return format (&fmt, "failed to append UTF char[]"); - } - - /*********************************************************************** - - Set a section of this UString to the specified character - - ***********************************************************************/ - - UString setTo (wchar chr, uint start=0, uint len=uint.max) - { - pinIndices (start, len); - if (! mutable) - realloc (); - content [start..start+len] = chr; - return this; - } - - /*********************************************************************** - - Set the content to the provided array. Parameter 'mutable' - specifies whether the given array is likely to change. If - not, the array is aliased until such time this UString is - altered. - - ***********************************************************************/ - - UString setTo (wchar[] chars, bool mutable = true) - { - len = chars.length; - if ((this.mutable = mutable) == true) - content = chars.dup; - else - content = chars; - return this; - } - - /*********************************************************************** - - Replace the content of this UString. If the new content - is immutable (read-only) then you might consider setting the - 'mutable' parameter to false. Doing so will avoid allocating - heap-space for the content until it is modified via one of - these methods. - - ***********************************************************************/ - - UString setTo (UStringView other, bool mutable = true) - { - return setTo (other.get, mutable); - } - - /*********************************************************************** - - Replace the content of this UString. If the new content - is immutable (read-only) then you might consider setting the - 'mutable' parameter to false. Doing so will avoid allocating - heap-space for the content until it is modified via one of - these methods. - - ***********************************************************************/ - - UString setTo (UStringView other, uint start, uint len, bool mutable = true) - { - other.pinIndices (start, len); - return setTo (other.content [start..start+len], mutable); - } - - /*********************************************************************** - - Replace the character at the specified location. - - ***********************************************************************/ - - final UString opIndexAssign (wchar chr, uint index) - in { - if (index >= len) - exception ("index of out bounds"); - } - body - { - if (! mutable) - realloc (); - content [index] = chr; - return this; - } - - /*********************************************************************** - - Remove a piece of this UString. - - ***********************************************************************/ - - UString remove (uint start, uint length=uint.max) - { - pinIndices (start, length); - if (length) - if (start >= len) - truncate (start); - else - { - if (! mutable) - realloc (); - - uint i = start + length; - memmove (&content[start], &content[i], (len-i) * wchar.sizeof); - len -= length; - } - return this; - } - - /*********************************************************************** - - Truncate the length of this UString. - - ***********************************************************************/ - - UString truncate (uint length=0) - { - if (length <= len) - len = length; - return this; - } - - /*********************************************************************** - - Insert leading spaces in this UString - - ***********************************************************************/ - - UString padLeading (uint count, wchar padChar = 0x0020) - { - expand (count); - memmove (&content[count], content.ptr, len * wchar.sizeof); - len += count; - return setTo (padChar, 0, count); - } - - /*********************************************************************** - - Append some trailing spaces to this UString. - - ***********************************************************************/ - - UString padTrailing (uint length, wchar padChar = 0x0020) - { - expand (length); - len += length; - return setTo (padChar, len-length, length); - } - - /*********************************************************************** - - Check for available space within the buffer, and expand - as necessary. - - ***********************************************************************/ - - package final void expand (uint count) - { - if ((len + count) > content.length) - realloc (count); - } - - /*********************************************************************** - - Allocate memory due to a change in the content. We handle - the distinction between mutable and immutable here. - - ***********************************************************************/ - - private final void realloc (uint count = 0) - { - uint size = (content.length + count + 63) & ~63; - - if (mutable) - content.length = size; - else - { - mutable = true; - wchar[] x = content; - content = new wchar [size]; - if (len) - content[0..len] = x; - } - } - - /*********************************************************************** - - Internal method to support UString appending - - ***********************************************************************/ - - private final UString opCat (wchar* chars, uint count) - { - expand (count); - content[len..len+count] = chars[0..count]; - len += count; - return this; - } - - /*********************************************************************** - - Internal method to support formatting into this UString. - This is used by many of the ICU wrappers to append content - into a UString. - - ***********************************************************************/ - - typedef uint delegate (wchar* dst, uint len, inout UErrorCode e) Formatter; - - package final UString format (Formatter format, char[] msg) - { - UErrorCode e; - uint length; - - while (true) - { - e = e.OK; - length = format (&content[len], content.length - len, e); - if (e == e.BufferOverflow) - expand (length); - else - break; - } - - if (isError (e)) - exception (msg); - - len += length; - return this; - } -} - - -/******************************************************************************* - - Immutable (read-only) text -- use UString for mutable strings. - -*******************************************************************************/ - -class UStringView : ICU, ITextOther -{ - alias opIndex charAt; - - // the core of the UStringView and UString attributes. The name 'len' - // is used rather than the more obvious 'length' since there is - // a collision with the silly array[length] syntactic sugar ... - package uint len; - package wchar[] content; - - // this should probably be in UString only, but there seems to - // be a compiler bug where it doesn't get initialised correctly, - // and it's perhaps useful to have here for when a UString is - // passed as a UStringView argument. - private bool mutable; - - // toFolded() argument - public enum CaseOption - { - Default = 0, - SpecialI = 1 - } - - /*********************************************************************** - - Hidden constructor - - ***********************************************************************/ - - private this () - { - } - - /*********************************************************************** - - Construct read-only wrapper around the given content - - ***********************************************************************/ - - this (wchar[] content) - { - this.content = content; - this.len = content.length; - } - - /*********************************************************************** - - Support for writing via the Mango IO subsystem - - ***********************************************************************/ - - version (Isolated){} - else - { - void write (IWriter w) - { - w.put (get); - } - } - - /*********************************************************************** - - Return the valid content from this UStringView - - ***********************************************************************/ - - final package wchar[] get () - { - return content [0..len]; - } - - /*********************************************************************** - - Is this UStringView equal to another? - - ***********************************************************************/ - - final override int opEquals (Object o) - { - UStringView other = cast(UStringView) o; - - if (other) - return (other is this || compare (other) == 0); - return 0; - } - - /*********************************************************************** - - Compare this UStringView to another. - - ***********************************************************************/ - - final override int opCmp (Object o) - { - UStringView other = cast(UStringView) o; - - if (other is this) - return 0; - else - if (other) - return compare (other); - return 1; - } - - /*********************************************************************** - - Hash this UStringView - - ***********************************************************************/ - - final override uint toHash () - { - return typeid(wchar[]).getHash (&content[0..len]); - } - - /*********************************************************************** - - Clone this UStringView into a UString - - ***********************************************************************/ - - final UString copy () - { - return new UString (content); - } - - /*********************************************************************** - - Clone a section of this UStringView into a UString - - ***********************************************************************/ - - final UString extract (uint start, uint len=uint.max) - { - pinIndices (start, len); - return new UString (content[start..start+len]); - } - - /*********************************************************************** - - Count unicode code points in the length UChar code units of - the string. A code point may occupy either one or two UChar - code units. Counting code points involves reading all code - units. - - ***********************************************************************/ - - final uint codePoints (uint start=0, uint length=uint.max) - { - pinIndices (start, length); - return u_countChar32 (&content[start], length); - } - - /*********************************************************************** - - Return an indication whether or not there are surrogate pairs - within the string. - - ***********************************************************************/ - - final bool hasSurrogates (uint start=0, uint length=uint.max) - { - pinIndices (start, length); - return codePoints (start, length) != length; - } - - /*********************************************************************** - - Return the character at the specified position. - - ***********************************************************************/ - - final wchar opIndex (uint index) - in { - if (index >= len) - exception ("index of out bounds"); - } - body - { - return content [index]; - } - - /*********************************************************************** - - Return the length of the valid content - - ***********************************************************************/ - - final uint length () - { - return len; - } - - /*********************************************************************** - - The comparison can be done in code unit order or in code - point order. They differ only in UTF-16 when comparing - supplementary code points (U+10000..U+10ffff) to BMP code - points near the end of the BMP (i.e., U+e000..U+ffff). - - In code unit order, high BMP code points sort after - supplementary code points because they are stored as - pairs of surrogates which are at U+d800..U+dfff. - - ***********************************************************************/ - - final int compare (UStringView other, bool codePointOrder=false) - { - return compare (other.get, codePointOrder); - } - - /*********************************************************************** - - The comparison can be done in code unit order or in code - point order. They differ only in UTF-16 when comparing - supplementary code points (U+10000..U+10ffff) to BMP code - points near the end of the BMP (i.e., U+e000..U+ffff). - - In code unit order, high BMP code points sort after - supplementary code points because they are stored as - pairs of surrogates which are at U+d800..U+dfff. - - ***********************************************************************/ - - final int compare (wchar[] other, bool codePointOrder=false) - { - return u_strCompare (content.ptr, len, other.ptr, other.length, codePointOrder); - } - - /*********************************************************************** - - The comparison can be done in UTF-16 code unit order or - in code point order. They differ only when comparing - supplementary code points (U+10000..U+10ffff) to BMP code - points near the end of the BMP (i.e., U+e000..U+ffff). - - In code unit order, high BMP code points sort after - supplementary code points because they are stored as - pairs of surrogates which are at U+d800..U+dfff. - - ***********************************************************************/ - - final int compareFolded (UStringView other, CaseOption option = CaseOption.Default) - { - return compareFolded (other.content, option); - } - - /*********************************************************************** - - The comparison can be done in UTF-16 code unit order or - in code point order. They differ only when comparing - supplementary code points (U+10000..U+10ffff) to BMP code - points near the end of the BMP (i.e., U+e000..U+ffff). - - In code unit order, high BMP code points sort after - supplementary code points because they are stored as - pairs of surrogates which are at U+d800..U+dfff. - - ***********************************************************************/ - - final int compareFolded (wchar[] other, CaseOption option = CaseOption.Default) - { - return compareFolded (get, other, option); - } - - /*********************************************************************** - - Does this UStringView start with specified string? - - ***********************************************************************/ - - final bool startsWith (UStringView other) - { - return startsWith (other.get); - } - - /*********************************************************************** - - Does this UStringView start with specified string? - - ***********************************************************************/ - - final bool startsWith (wchar[] chars) - { - if (len >= chars.length) - return compareFolded (content[0..chars.length], chars) == 0; - return false; - } - - /*********************************************************************** - - Does this UStringView end with specified string? - - ***********************************************************************/ - - final bool endsWith (UStringView other) - { - return endsWith (other.get); - } - - /*********************************************************************** - - Does this UStringView end with specified string? - - ***********************************************************************/ - - final bool endsWith (wchar[] chars) - { - if (len >= chars.length) - return compareFolded (content[len-chars.length..len], chars) == 0; - return false; - } - - /*********************************************************************** - - Find the first occurrence of a BMP code point in a string. - A surrogate code point is found only if its match in the - text is not part of a surrogate pair. - - ***********************************************************************/ - - final uint indexOf (wchar c, uint start=0) - { - pinIndex (start); - wchar* s = u_memchr (&content[start], c, len-start); - if (s) - return s - content.ptr; - return uint.max; - } - - /*********************************************************************** - - Find the first occurrence of a substring in a string. - - The substring is found at code point boundaries. That means - that if the substring begins with a trail surrogate or ends - with a lead surrogate, then it is found only if these - surrogates stand alone in the text. Otherwise, the substring - edge units would be matched against halves of surrogate pairs. - - ***********************************************************************/ - - final uint indexOf (UStringView other, uint start=0) - { - return indexOf (other.get, start); - } - - /*********************************************************************** - - Find the first occurrence of a substring in a string. - - The substring is found at code point boundaries. That means - that if the substring begins with a trail surrogate or ends - with a lead surrogate, then it is found only if these - surrogates stand alone in the text. Otherwise, the substring - edge units would be matched against halves of surrogate pairs. - - ***********************************************************************/ - - final uint indexOf (wchar[] chars, uint start=0) - { - pinIndex (start); - wchar* s = u_strFindFirst (&content[start], len-start, chars.ptr, chars.length); - if (s) - return s - content.ptr; - return uint.max; - } - - /*********************************************************************** - - Find the last occurrence of a BMP code point in a string. - A surrogate code point is found only if its match in the - text is not part of a surrogate pair. - - ***********************************************************************/ - - final uint lastIndexOf (wchar c, uint start=uint.max) - { - pinIndex (start); - wchar* s = u_memrchr (content.ptr, c, start); - if (s) - return s - content.ptr; - return uint.max; - } - - /*********************************************************************** - - Find the last occurrence of a BMP code point in a string. - A surrogate code point is found only if its match in the - text is not part of a surrogate pair. - - ***********************************************************************/ - - final uint lastIndexOf (UStringView other, uint start=uint.max) - { - return lastIndexOf (other.get, start); - } - - /*********************************************************************** - - Find the last occurrence of a substring in a string. - - The substring is found at code point boundaries. That means - that if the substring begins with a trail surrogate or ends - with a lead surrogate, then it is found only if these - surrogates stand alone in the text. Otherwise, the substring - edge units would be matched against halves of surrogate pairs. - - ***********************************************************************/ - - final uint lastIndexOf (wchar[] chars, uint start=uint.max) - { - pinIndex (start); - wchar* s = u_strFindLast (content.ptr, start, chars.ptr, chars.length); - if (s) - return s - content.ptr; - return uint.max; - } - - /*********************************************************************** - - Lowercase the characters into a seperate UString. - - Casing is locale-dependent and context-sensitive. The - result may be longer or shorter than the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toLower (UString dst) - { - return toLower (dst, ULocale.Default); - } - - /*********************************************************************** - - Lowercase the characters into a seperate UString. - - Casing is locale-dependent and context-sensitive. The - result may be longer or shorter than the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toLower (UString dst, inout ULocale locale) - { - uint lower (wchar* dst, uint length, inout UErrorCode e) - { - return u_strToLower (dst, length, content.ptr, len, ICU.toString(locale.name), e); - } - - dst.expand (len + 32); - return dst.format (&lower, "toLower() failed"); - } - - /*********************************************************************** - - Uppercase the characters into a seperate UString. - - Casing is locale-dependent and context-sensitive. The - result may be longer or shorter than the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toUpper (UString dst) - { - return toUpper (dst, ULocale.Default); - } - - /*********************************************************************** - - Uppercase the characters into a seperate UString. - - Casing is locale-dependent and context-sensitive. The - result may be longer or shorter than the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toUpper (UString dst, inout ULocale locale) - { - uint upper (wchar* dst, uint length, inout UErrorCode e) - { - return u_strToUpper (dst, length, content.ptr, len, ICU.toString(locale.name), e); - } - - dst.expand (len + 32); - return dst.format (&upper, "toUpper() failed"); - } - - /*********************************************************************** - - Case-fold the characters into a seperate UString. - - Case-folding is locale-independent and not context-sensitive, - but there is an option for whether to include or exclude - mappings for dotted I and dotless i that are marked with 'I' - in CaseFolding.txt. The result may be longer or shorter than - the original. - - Note that the return value refers to the provided destination - UString. - - ***********************************************************************/ - - final UString toFolded (UString dst, CaseOption option = CaseOption.Default) - { - uint fold (wchar* dst, uint length, inout UErrorCode e) - { - return u_strFoldCase (dst, length, content.ptr, len, option, e); - } - - dst.expand (len + 32); - return dst.format (&fold, "toFolded() failed"); - } - - /*********************************************************************** - - Converts a sequence of wchar (UTF-16) to UTF-8 bytes. If - the output array is not provided, an array of appropriate - size will be allocated and returned. Where the output is - provided, it must be large enough to hold potentially four - bytes per character for surrogate-pairs or three bytes per - character for BMP only. Consider using UConverter where - streaming conversions are required. - - Returns an array slice representing the valid UTF8 content. - - ***********************************************************************/ - - final char[] toUtf8 (char[] dst = null) - { - uint x; - UErrorCode e; - - if (! cast(char*) dst) - dst = new char[len * 4]; - - u_strToUTF8 (dst.ptr, dst.length, &x, content.ptr, len, e); - testError (e, "failed to convert to UTF8"); - return dst [0..x]; - } - - /*********************************************************************** - - Remove leading and trailing whitespace from this UStringView. - Note that we slice the content to remove leading space. - - ***********************************************************************/ - - UStringView trim () - { - wchar c; - uint i = len; - - // cut off trailing white space - while (i && ((c = charAt(i-1)) == 0x20 || UChar.isWhiteSpace (c))) - --i; - len = i; - - // now remove leading whitespace - for (i=0; i < len && ((c = charAt(i)) == 0x20 || UChar.isWhiteSpace (c)); ++i) {} - if (i) - { - len -= i; - content = content[i..$-i]; - } - - return this; - } - - /*********************************************************************** - - Unescape a string of characters and write the resulting - Unicode characters to the destination buffer. The following - escape sequences are recognized: - - uhhhh 4 hex digits; h in [0-9A-Fa-f] - Uhhhhhhhh 8 hex digits - xhh 1-2 hex digits - x{h...} 1-8 hex digits - ooo 1-3 octal digits; o in [0-7] - cX control-X; X is masked with 0x1F - - as well as the standard ANSI C escapes: - - a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, - v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, - \\" =U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C - - Anything else following a backslash is generically escaped. - For example, "[a\\-z]" returns "[a-z]". - - If an escape sequence is ill-formed, this method returns an - empty string. An example of an ill-formed sequence is "\\u" - followed by fewer than 4 hex digits. - - ***********************************************************************/ - - final UString unEscape () - { - UString result = new UString (len); - for (uint i=0; i < len;) - { - dchar c = charAt(i++); - if (c == 0x005C) - { - // bump index ... - c = u_unescapeAt (&_charAt, &i, len, cast(void*) this); - - // error? - if (c == 0xFFFFFFFF) - { - result.truncate (); // return empty string - break; // invalid escape sequence - } - } - result.append (c); - } - return result; - } - - /*********************************************************************** - - Is this code point a surrogate (U+d800..U+dfff)? - - ***********************************************************************/ - - final static bool isSurrogate (wchar c) - { - return (c & 0xfffff800) == 0xd800; - } - - /*********************************************************************** - - Is this code unit a lead surrogate (U+d800..U+dbff)? - - ***********************************************************************/ - - final static bool isLeading (wchar c) - { - return (c & 0xfffffc00) == 0xd800; - } - - /*********************************************************************** - - Is this code unit a trail surrogate (U+dc00..U+dfff)? - - ***********************************************************************/ - - final static bool isTrailing (wchar c) - { - return (c & 0xfffffc00) == 0xdc00; - } - - /*********************************************************************** - - Adjust a random-access offset to a code point boundary - at the start of a code point. If the offset points to - the trail surrogate of a surrogate pair, then the offset - is decremented. Otherwise, it is not modified. - - ***********************************************************************/ - - final uint getCharStart (uint i) - in { - if (i >= len) - exception ("index of out bounds"); - } - body - { - if (isTrailing (content[i]) && i && isLeading (content[i-1])) - --i; - return i; - } - - /*********************************************************************** - - Adjust a random-access offset to a code point boundary - after a code point. If the offset is behind the lead - surrogate of a surrogate pair, then the offset is - incremented. Otherwise, it is not modified. - - ***********************************************************************/ - - final uint getCharLimit (uint i) - in { - if (i >= len) - exception ("index of out bounds"); - } - body - { - if (i && isLeading(content[i-1]) && isTrailing (content[i])) - ++i; - return i; - } - - /*********************************************************************** - - Callback for C unescapeAt() function - - ***********************************************************************/ - - extern (C) - { - typedef wchar function (uint offset, void* context) CharAt; - - private static wchar _charAt (uint offset, void* context) - { - return (cast(UString) context).charAt (offset); - } - } - - /*********************************************************************** - - Pin the given index to a valid position. - - ***********************************************************************/ - - final private void pinIndex (inout uint x) - { - if (x > len) - x = len; - } - - /*********************************************************************** - - Pin the given index and length to a valid position. - - ***********************************************************************/ - - final private void pinIndices (inout uint start, inout uint length) - { - if (start > len) - start = len; - - if (length > (len - start)) - length = len - start; - } - - /*********************************************************************** - - Helper for comparison methods - - ***********************************************************************/ - - final private int compareFolded (wchar[] s1, wchar[] s2, CaseOption option = CaseOption.Default) - { - UErrorCode e; - - int x = u_strCaseCompare (s1.ptr, s1.length, s2.ptr, s2.length, option, e); - testError (e, "compareFolded failed"); - return x; - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - wchar* function (wchar*, uint, wchar*, uint) u_strFindFirst; - wchar* function (wchar*, uint, wchar*, uint) u_strFindLast; - wchar* function (wchar*, wchar, uint) u_memchr; - wchar* function (wchar*, wchar, uint) u_memrchr; - int function (wchar*, uint, wchar*, uint, bool) u_strCompare; - int function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strCaseCompare; - dchar function (CharAt, uint*, uint, void*) u_unescapeAt; - uint function (wchar*, uint) u_countChar32; - uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToUpper; - uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToLower; - uint function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strFoldCase; - wchar* function (wchar*, uint, uint*, char*, uint, inout UErrorCode) u_strFromUTF8; - char* function (char*, uint, uint*, wchar*, uint, inout UErrorCode) u_strToUTF8; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &u_strFindFirst, "u_strFindFirst"}, - {cast(void**) &u_strFindLast, "u_strFindLast"}, - {cast(void**) &u_memchr, "u_memchr"}, - {cast(void**) &u_memrchr, "u_memrchr"}, - {cast(void**) &u_strCompare, "u_strCompare"}, - {cast(void**) &u_strCaseCompare, "u_strCaseCompare"}, - {cast(void**) &u_unescapeAt, "u_unescapeAt"}, - {cast(void**) &u_countChar32, "u_countChar32"}, - {cast(void**) &u_strToUpper, "u_strToUpper"}, - {cast(void**) &u_strToLower, "u_strToLower"}, - {cast(void**) &u_strFoldCase, "u_strFoldCase"}, - {cast(void**) &u_strFromUTF8, "u_strFromUTF8"}, - {cast(void**) &u_strToUTF8, "u_strToUTF8"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - //test (); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } - - /*********************************************************************** - - ***********************************************************************/ - - private static void test() - { - UString s = new UString (r"aaaqw \uabcd eaaa"); - char[] x = "dssfsdff"; - s ~ x ~ x; - wchar c = s[3]; - s[3] = 'Q'; - int y = s.indexOf ("qwe"); - s.unEscape (); - s.toUpper (new UString); - s.padLeading(2).padTrailing(2).trim(); - } -} +/******************************************************************************* + + @file UString.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, October 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UString; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UChar, + dwtx.dwtxhelper.mangoicu.ULocale; + +/******************************************************************************* + +*******************************************************************************/ + +private extern (C) void memmove (void* dst, void* src, uint bytes); + +/******************************************************************************* + + Bind to the IReadable and IWritable interfaces if we're building + along with the mango.io package + +*******************************************************************************/ + +version=Isolated; +version (Isolated) + { + private interface ITextOther {} + private interface IStringOther {} + } + else + { + private import dwtx.dwtxhelper.mangoicu.UMango; + + private import mango.io.model.IReader, + mango.io.model.IWriter; + + private interface ITextOther : IWritable {} + private interface IStringOther : IReadable {} + } + + +/******************************************************************************* + + UString is a string class that stores Unicode characters directly + and provides similar functionality as the Java String class. + + In ICU, a Unicode string consists of 16-bit Unicode code units. + A Unicode character may be stored with either one code unit — + which is the most common case — or with a matched pair of + special code units ("surrogates"). The data type for code units + is UChar. + + For single-character handling, a Unicode character code point is + a value in the range 0..0x10ffff. ICU uses the UChar32 type for + code points. + + Indexes and offsets into and lengths of strings always count code + units, not code points. This is the same as with multi-byte char* + strings in traditional string handling. Operations on partial + strings typically do not test for code point boundaries. If necessary, + the user needs to take care of such boundaries by testing for the code + unit values or by using functions like getChar32Start() + and getChar32Limit() + + UString methods are more lenient with regard to input parameter values + than other ICU APIs. In particular: + + - If indexes are out of bounds for a UString object (< 0 or > length) + then they are "pinned" to the nearest boundary. + + - If primitive string pointer values (e.g., const wchar* or char*) for + input strings are null, then those input string parameters are treated + as if they pointed to an empty string. However, this is not the case + for char* parameters for charset names or other IDs. + +*******************************************************************************/ + +class UString : UStringView, IStringOther +{ + alias opCat append; + alias opIndexAssign setCharAt; + + /*********************************************************************** + + Create an empty UString with the specified available space + + ***********************************************************************/ + + this (uint space = 0) + { + content.length = space; + mutable = true; + } + + /*********************************************************************** + + Create a UString upon the provided content. If said content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified. + + ***********************************************************************/ + + this (wchar[] content, bool mutable = true) + { + setTo (content, mutable); + } + + /*********************************************************************** + + Create a UString via the content of a UStringView. Note that the + default is to assume the content is immutable (read-only). + + ***********************************************************************/ + + this (UStringView other, bool mutable = false) + { + this (other.get, mutable); + } + + /*********************************************************************** + + Create a UString via the content of a UString. If said content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified via UString + methods. + + ***********************************************************************/ + + this (UString other, bool mutable = true) + { + this (other.get, mutable); + } + + /*********************************************************************** + + Support for reading content via the IO system + + ***********************************************************************/ + + version (Isolated){} + else + { + /*************************************************************** + + Internal adapter to handle loading and conversion + of UString content. Once constructed, this may be + used as the target for an IReader. Alternatively, + invoke the load() method with an IBuffer of choice. + + ***************************************************************/ + + class UStringDecoder : StringDecoder16 + { + private UString s; + + // construct a decoder on the given UString + this (UConverter c, uint bytes, UString s) + { + super (c, bytes); + this.s = s; + } + + // IReadable adapter to perform the conversion + protected void read (IReader r) + { + load (r.buffer); + } + + // read from the provided buffer until we + // either have all the content, or an eof + // condition throws an exception. + package void load (IBuffer b) + { + uint produced = super.read (b, s.content); + while (toGo) + { + s.expand (toGo); + produced += super.read (b, s.content[produced..$]); + } + s.len = produced; + } + } + + /*************************************************************** + + Another constructor for loading known content length + into a UString. + + ***************************************************************/ + + this (IBuffer buffer, uint contentLength, UConverter cvt) + { + this (contentLength); + UStringDecoder sd = new UStringDecoder (cvt, contentLength, this); + sd.load (buffer); + } + + /*************************************************************** + + Read as many bytes from the input as is necessary + to produce the expected number of wchar elements. + This uses the default wchar handler, which can be + altered by binding a StringDecoder to the IReader + in use (see UMango for details). + + We're mutable, so ensure we don't mess with the + IO buffers. Interestingly, changing the length + of a D array will account for slice assignments + (it checks the pointer to see if it's a starting + point in the pool). Unfortunately, that doesn't + catch the case where a slice starts at offset 0, + which is where IBuffer slices may come from. + + To be safe, we ask the allocator in use whether + the content it provided can be mutated or not. + Note that this is not necessary for UStringView, since + that is a read-only construct. + + ***************************************************************/ + + void read (IReader r) + { + r.get (content); + len = content.length; + mutable = r.getAllocator.isMutable (content); + } + + /*************************************************************** + + Return a streaming decoder that can be used to + populate this UString with a specified number of + input bytes. + + This differs from the above read() method in the + way content is read: in the above case, exactly + the specified number of wchar elements will be + converter from the input, whereas in this case + a variable number of wchar elements are converted + until 'bytes' have been read from the input. This + is useful in those cases where the original number + of elements has been lost, and only the resultant + converted byte-count remains (a la HTTP). + + The returned StringDecoder is one-shot only. You may + reuse it (both the converter and the byte count) via + its reset() method. + + One applies the resultant converter directly with an + IReader like so: + + @code + UString s = ...; + IReader r = ...; + + // r >> s.createDecoder(cvt, bytes); + r.get (s.createDecoder(cvt, bytes)); + @endcode + + which will read the specified number of bytes from + the input and convert them to an appropriate number + of wchars within the UString. + + ***************************************************************/ + + StringDecoder createDecoder (UConverter c, uint bytes) + { + return new UStringDecoder (c, bytes, this); + } + } + + /*********************************************************************** + + Append text to this UString + + ***********************************************************************/ + + UString opCat (UStringView other) + { + return opCat (other.get); + } + + /*********************************************************************** + + Append partial text to this UString + + ***********************************************************************/ + + UString opCat (UStringView other, uint start, uint len=uint.max) + { + other.pinIndices (start, len); + return opCat (other.content [start..start+len]); + } + + /*********************************************************************** + + Append a single character to this UString + + ***********************************************************************/ + + UString opCat (wchar chr) + { + return opCat (&chr, 1); + } + + /*********************************************************************** + + Append text to this UString + + ***********************************************************************/ + + UString opCat (wchar[] chars) + { + return opCat (chars.ptr, chars.length); + } + + /*********************************************************************** + + Converts a sequence of UTF-8 bytes to UChars (UTF-16) + + ***********************************************************************/ + + UString opCat (char[] chars) + { + uint fmt (wchar* dst, uint len, inout UErrorCode e) + { + uint x; + + u_strFromUTF8 (dst, len, &x, chars.ptr, chars.length, e); + return x; + } + + expand (chars.length); + return format (&fmt, "failed to append UTF char[]"); + } + + /*********************************************************************** + + Set a section of this UString to the specified character + + ***********************************************************************/ + + UString setTo (wchar chr, uint start=0, uint len=uint.max) + { + pinIndices (start, len); + if (! mutable) + realloc (); + content [start..start+len] = chr; + return this; + } + + /*********************************************************************** + + Set the content to the provided array. Parameter 'mutable' + specifies whether the given array is likely to change. If + not, the array is aliased until such time this UString is + altered. + + ***********************************************************************/ + + UString setTo (wchar[] chars, bool mutable = true) + { + len = chars.length; + if ((this.mutable = mutable) == true) + content = chars.dup; + else + content = chars; + return this; + } + + /*********************************************************************** + + Replace the content of this UString. If the new content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified via one of + these methods. + + ***********************************************************************/ + + UString setTo (UStringView other, bool mutable = true) + { + return setTo (other.get, mutable); + } + + /*********************************************************************** + + Replace the content of this UString. If the new content + is immutable (read-only) then you might consider setting the + 'mutable' parameter to false. Doing so will avoid allocating + heap-space for the content until it is modified via one of + these methods. + + ***********************************************************************/ + + UString setTo (UStringView other, uint start, uint len, bool mutable = true) + { + other.pinIndices (start, len); + return setTo (other.content [start..start+len], mutable); + } + + /*********************************************************************** + + Replace the character at the specified location. + + ***********************************************************************/ + + final UString opIndexAssign (wchar chr, uint index) + in { + if (index >= len) + exception ("index of out bounds"); + } + body + { + if (! mutable) + realloc (); + content [index] = chr; + return this; + } + + /*********************************************************************** + + Remove a piece of this UString. + + ***********************************************************************/ + + UString remove (uint start, uint length=uint.max) + { + pinIndices (start, length); + if (length) + if (start >= len) + truncate (start); + else + { + if (! mutable) + realloc (); + + uint i = start + length; + memmove (&content[start], &content[i], (len-i) * wchar.sizeof); + len -= length; + } + return this; + } + + /*********************************************************************** + + Truncate the length of this UString. + + ***********************************************************************/ + + UString truncate (uint length=0) + { + if (length <= len) + len = length; + return this; + } + + /*********************************************************************** + + Insert leading spaces in this UString + + ***********************************************************************/ + + UString padLeading (uint count, wchar padChar = 0x0020) + { + expand (count); + memmove (&content[count], content.ptr, len * wchar.sizeof); + len += count; + return setTo (padChar, 0, count); + } + + /*********************************************************************** + + Append some trailing spaces to this UString. + + ***********************************************************************/ + + UString padTrailing (uint length, wchar padChar = 0x0020) + { + expand (length); + len += length; + return setTo (padChar, len-length, length); + } + + /*********************************************************************** + + Check for available space within the buffer, and expand + as necessary. + + ***********************************************************************/ + + package final void expand (uint count) + { + if ((len + count) > content.length) + realloc (count); + } + + /*********************************************************************** + + Allocate memory due to a change in the content. We handle + the distinction between mutable and immutable here. + + ***********************************************************************/ + + private final void realloc (uint count = 0) + { + uint size = (content.length + count + 63) & ~63; + + if (mutable) + content.length = size; + else + { + mutable = true; + wchar[] x = content; + content = new wchar [size]; + if (len) + content[0..len] = x; + } + } + + /*********************************************************************** + + Internal method to support UString appending + + ***********************************************************************/ + + private final UString opCat (wchar* chars, uint count) + { + expand (count); + content[len..len+count] = chars[0..count]; + len += count; + return this; + } + + /*********************************************************************** + + Internal method to support formatting into this UString. + This is used by many of the ICU wrappers to append content + into a UString. + + ***********************************************************************/ + + typedef uint delegate (wchar* dst, uint len, inout UErrorCode e) Formatter; + + package final UString format (Formatter format, char[] msg) + { + UErrorCode e; + uint length; + + while (true) + { + e = e.OK; + length = format (&content[len], content.length - len, e); + if (e == e.BufferOverflow) + expand (length); + else + break; + } + + if (isError (e)) + exception (msg); + + len += length; + return this; + } +} + + +/******************************************************************************* + + Immutable (read-only) text -- use UString for mutable strings. + +*******************************************************************************/ + +class UStringView : ICU, ITextOther +{ + alias opIndex charAt; + + // the core of the UStringView and UString attributes. The name 'len' + // is used rather than the more obvious 'length' since there is + // a collision with the silly array[length] syntactic sugar ... + package uint len; + package wchar[] content; + + // this should probably be in UString only, but there seems to + // be a compiler bug where it doesn't get initialised correctly, + // and it's perhaps useful to have here for when a UString is + // passed as a UStringView argument. + private bool mutable; + + // toFolded() argument + public enum CaseOption + { + Default = 0, + SpecialI = 1 + } + + /*********************************************************************** + + Hidden constructor + + ***********************************************************************/ + + private this () + { + } + + /*********************************************************************** + + Construct read-only wrapper around the given content + + ***********************************************************************/ + + this (wchar[] content) + { + this.content = content; + this.len = content.length; + } + + /*********************************************************************** + + Support for writing via the Mango IO subsystem + + ***********************************************************************/ + + version (Isolated){} + else + { + void write (IWriter w) + { + w.put (get); + } + } + + /*********************************************************************** + + Return the valid content from this UStringView + + ***********************************************************************/ + + final package wchar[] get () + { + return content [0..len]; + } + + /*********************************************************************** + + Is this UStringView equal to another? + + ***********************************************************************/ + + final override int opEquals (Object o) + { + UStringView other = cast(UStringView) o; + + if (other) + return (other is this || compare (other) == 0); + return 0; + } + + /*********************************************************************** + + Compare this UStringView to another. + + ***********************************************************************/ + + final override int opCmp (Object o) + { + UStringView other = cast(UStringView) o; + + if (other is this) + return 0; + else + if (other) + return compare (other); + return 1; + } + + /*********************************************************************** + + Hash this UStringView + + ***********************************************************************/ + + final override uint toHash () + { + return typeid(wchar[]).getHash (&content[0..len]); + } + + /*********************************************************************** + + Clone this UStringView into a UString + + ***********************************************************************/ + + final UString copy () + { + return new UString (content); + } + + /*********************************************************************** + + Clone a section of this UStringView into a UString + + ***********************************************************************/ + + final UString extract (uint start, uint len=uint.max) + { + pinIndices (start, len); + return new UString (content[start..start+len]); + } + + /*********************************************************************** + + Count unicode code points in the length UChar code units of + the string. A code point may occupy either one or two UChar + code units. Counting code points involves reading all code + units. + + ***********************************************************************/ + + final uint codePoints (uint start=0, uint length=uint.max) + { + pinIndices (start, length); + return u_countChar32 (&content[start], length); + } + + /*********************************************************************** + + Return an indication whether or not there are surrogate pairs + within the string. + + ***********************************************************************/ + + final bool hasSurrogates (uint start=0, uint length=uint.max) + { + pinIndices (start, length); + return codePoints (start, length) != length; + } + + /*********************************************************************** + + Return the character at the specified position. + + ***********************************************************************/ + + final wchar opIndex (uint index) + in { + if (index >= len) + exception ("index of out bounds"); + } + body + { + return content [index]; + } + + /*********************************************************************** + + Return the length of the valid content + + ***********************************************************************/ + + final uint length () + { + return len; + } + + /*********************************************************************** + + The comparison can be done in code unit order or in code + point order. They differ only in UTF-16 when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compare (UStringView other, bool codePointOrder=false) + { + return compare (other.get, codePointOrder); + } + + /*********************************************************************** + + The comparison can be done in code unit order or in code + point order. They differ only in UTF-16 when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compare (wchar[] other, bool codePointOrder=false) + { + return u_strCompare (content.ptr, len, other.ptr, other.length, codePointOrder); + } + + /*********************************************************************** + + The comparison can be done in UTF-16 code unit order or + in code point order. They differ only when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compareFolded (UStringView other, CaseOption option = CaseOption.Default) + { + return compareFolded (other.content, option); + } + + /*********************************************************************** + + The comparison can be done in UTF-16 code unit order or + in code point order. They differ only when comparing + supplementary code points (U+10000..U+10ffff) to BMP code + points near the end of the BMP (i.e., U+e000..U+ffff). + + In code unit order, high BMP code points sort after + supplementary code points because they are stored as + pairs of surrogates which are at U+d800..U+dfff. + + ***********************************************************************/ + + final int compareFolded (wchar[] other, CaseOption option = CaseOption.Default) + { + return compareFolded (get, other, option); + } + + /*********************************************************************** + + Does this UStringView start with specified string? + + ***********************************************************************/ + + final bool startsWith (UStringView other) + { + return startsWith (other.get); + } + + /*********************************************************************** + + Does this UStringView start with specified string? + + ***********************************************************************/ + + final bool startsWith (wchar[] chars) + { + if (len >= chars.length) + return compareFolded (content[0..chars.length], chars) == 0; + return false; + } + + /*********************************************************************** + + Does this UStringView end with specified string? + + ***********************************************************************/ + + final bool endsWith (UStringView other) + { + return endsWith (other.get); + } + + /*********************************************************************** + + Does this UStringView end with specified string? + + ***********************************************************************/ + + final bool endsWith (wchar[] chars) + { + if (len >= chars.length) + return compareFolded (content[len-chars.length..len], chars) == 0; + return false; + } + + /*********************************************************************** + + Find the first occurrence of a BMP code point in a string. + A surrogate code point is found only if its match in the + text is not part of a surrogate pair. + + ***********************************************************************/ + + final uint indexOf (wchar c, uint start=0) + { + pinIndex (start); + wchar* s = u_memchr (&content[start], c, len-start); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Find the first occurrence of a substring in a string. + + The substring is found at code point boundaries. That means + that if the substring begins with a trail surrogate or ends + with a lead surrogate, then it is found only if these + surrogates stand alone in the text. Otherwise, the substring + edge units would be matched against halves of surrogate pairs. + + ***********************************************************************/ + + final uint indexOf (UStringView other, uint start=0) + { + return indexOf (other.get, start); + } + + /*********************************************************************** + + Find the first occurrence of a substring in a string. + + The substring is found at code point boundaries. That means + that if the substring begins with a trail surrogate or ends + with a lead surrogate, then it is found only if these + surrogates stand alone in the text. Otherwise, the substring + edge units would be matched against halves of surrogate pairs. + + ***********************************************************************/ + + final uint indexOf (wchar[] chars, uint start=0) + { + pinIndex (start); + wchar* s = u_strFindFirst (&content[start], len-start, chars.ptr, chars.length); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Find the last occurrence of a BMP code point in a string. + A surrogate code point is found only if its match in the + text is not part of a surrogate pair. + + ***********************************************************************/ + + final uint lastIndexOf (wchar c, uint start=uint.max) + { + pinIndex (start); + wchar* s = u_memrchr (content.ptr, c, start); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Find the last occurrence of a BMP code point in a string. + A surrogate code point is found only if its match in the + text is not part of a surrogate pair. + + ***********************************************************************/ + + final uint lastIndexOf (UStringView other, uint start=uint.max) + { + return lastIndexOf (other.get, start); + } + + /*********************************************************************** + + Find the last occurrence of a substring in a string. + + The substring is found at code point boundaries. That means + that if the substring begins with a trail surrogate or ends + with a lead surrogate, then it is found only if these + surrogates stand alone in the text. Otherwise, the substring + edge units would be matched against halves of surrogate pairs. + + ***********************************************************************/ + + final uint lastIndexOf (wchar[] chars, uint start=uint.max) + { + pinIndex (start); + wchar* s = u_strFindLast (content.ptr, start, chars.ptr, chars.length); + if (s) + return s - content.ptr; + return uint.max; + } + + /*********************************************************************** + + Lowercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toLower (UString dst) + { + return toLower (dst, ULocale.Default); + } + + /*********************************************************************** + + Lowercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toLower (UString dst, inout ULocale locale) + { + uint lower (wchar* dst, uint length, inout UErrorCode e) + { + return u_strToLower (dst, length, content.ptr, len, ICU.toString(locale.name), e); + } + + dst.expand (len + 32); + return dst.format (&lower, "toLower() failed"); + } + + /*********************************************************************** + + Uppercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toUpper (UString dst) + { + return toUpper (dst, ULocale.Default); + } + + /*********************************************************************** + + Uppercase the characters into a seperate UString. + + Casing is locale-dependent and context-sensitive. The + result may be longer or shorter than the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toUpper (UString dst, inout ULocale locale) + { + uint upper (wchar* dst, uint length, inout UErrorCode e) + { + return u_strToUpper (dst, length, content.ptr, len, ICU.toString(locale.name), e); + } + + dst.expand (len + 32); + return dst.format (&upper, "toUpper() failed"); + } + + /*********************************************************************** + + Case-fold the characters into a seperate UString. + + Case-folding is locale-independent and not context-sensitive, + but there is an option for whether to include or exclude + mappings for dotted I and dotless i that are marked with 'I' + in CaseFolding.txt. The result may be longer or shorter than + the original. + + Note that the return value refers to the provided destination + UString. + + ***********************************************************************/ + + final UString toFolded (UString dst, CaseOption option = CaseOption.Default) + { + uint fold (wchar* dst, uint length, inout UErrorCode e) + { + return u_strFoldCase (dst, length, content.ptr, len, option, e); + } + + dst.expand (len + 32); + return dst.format (&fold, "toFolded() failed"); + } + + /*********************************************************************** + + Converts a sequence of wchar (UTF-16) to UTF-8 bytes. If + the output array is not provided, an array of appropriate + size will be allocated and returned. Where the output is + provided, it must be large enough to hold potentially four + bytes per character for surrogate-pairs or three bytes per + character for BMP only. Consider using UConverter where + streaming conversions are required. + + Returns an array slice representing the valid UTF8 content. + + ***********************************************************************/ + + final char[] toUtf8 (char[] dst = null) + { + uint x; + UErrorCode e; + + if (! cast(char*) dst) + dst = new char[len * 4]; + + u_strToUTF8 (dst.ptr, dst.length, &x, content.ptr, len, e); + testError (e, "failed to convert to UTF8"); + return dst [0..x]; + } + + /*********************************************************************** + + Remove leading and trailing whitespace from this UStringView. + Note that we slice the content to remove leading space. + + ***********************************************************************/ + + UStringView trim () + { + wchar c; + uint i = len; + + // cut off trailing white space + while (i && ((c = charAt(i-1)) == 0x20 || UChar.isWhiteSpace (c))) + --i; + len = i; + + // now remove leading whitespace + for (i=0; i < len && ((c = charAt(i)) == 0x20 || UChar.isWhiteSpace (c)); ++i) {} + if (i) + { + len -= i; + content = content[i..$-i]; + } + + return this; + } + + /*********************************************************************** + + Unescape a string of characters and write the resulting + Unicode characters to the destination buffer. The following + escape sequences are recognized: + + uhhhh 4 hex digits; h in [0-9A-Fa-f] + Uhhhhhhhh 8 hex digits + xhh 1-2 hex digits + x{h...} 1-8 hex digits + ooo 1-3 octal digits; o in [0-7] + cX control-X; X is masked with 0x1F + + as well as the standard ANSI C escapes: + + a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + \\" =U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + + Anything else following a backslash is generically escaped. + For example, "[a\\-z]" returns "[a-z]". + + If an escape sequence is ill-formed, this method returns an + empty string. An example of an ill-formed sequence is "\\u" + followed by fewer than 4 hex digits. + + ***********************************************************************/ + + final UString unEscape () + { + UString result = new UString (len); + for (uint i=0; i < len;) + { + dchar c = charAt(i++); + if (c == 0x005C) + { + // bump index ... + c = u_unescapeAt (&_charAt, &i, len, cast(void*) this); + + // error? + if (c == 0xFFFFFFFF) + { + result.truncate (); // return empty string + break; // invalid escape sequence + } + } + result.append (c); + } + return result; + } + + /*********************************************************************** + + Is this code point a surrogate (U+d800..U+dfff)? + + ***********************************************************************/ + + final static bool isSurrogate (wchar c) + { + return (c & 0xfffff800) == 0xd800; + } + + /*********************************************************************** + + Is this code unit a lead surrogate (U+d800..U+dbff)? + + ***********************************************************************/ + + final static bool isLeading (wchar c) + { + return (c & 0xfffffc00) == 0xd800; + } + + /*********************************************************************** + + Is this code unit a trail surrogate (U+dc00..U+dfff)? + + ***********************************************************************/ + + final static bool isTrailing (wchar c) + { + return (c & 0xfffffc00) == 0xdc00; + } + + /*********************************************************************** + + Adjust a random-access offset to a code point boundary + at the start of a code point. If the offset points to + the trail surrogate of a surrogate pair, then the offset + is decremented. Otherwise, it is not modified. + + ***********************************************************************/ + + final uint getCharStart (uint i) + in { + if (i >= len) + exception ("index of out bounds"); + } + body + { + if (isTrailing (content[i]) && i && isLeading (content[i-1])) + --i; + return i; + } + + /*********************************************************************** + + Adjust a random-access offset to a code point boundary + after a code point. If the offset is behind the lead + surrogate of a surrogate pair, then the offset is + incremented. Otherwise, it is not modified. + + ***********************************************************************/ + + final uint getCharLimit (uint i) + in { + if (i >= len) + exception ("index of out bounds"); + } + body + { + if (i && isLeading(content[i-1]) && isTrailing (content[i])) + ++i; + return i; + } + + /*********************************************************************** + + Callback for C unescapeAt() function + + ***********************************************************************/ + + extern (C) + { + typedef wchar function (uint offset, void* context) CharAt; + + private static wchar _charAt (uint offset, void* context) + { + return (cast(UString) context).charAt (offset); + } + } + + /*********************************************************************** + + Pin the given index to a valid position. + + ***********************************************************************/ + + final private void pinIndex (inout uint x) + { + if (x > len) + x = len; + } + + /*********************************************************************** + + Pin the given index and length to a valid position. + + ***********************************************************************/ + + final private void pinIndices (inout uint start, inout uint length) + { + if (start > len) + start = len; + + if (length > (len - start)) + length = len - start; + } + + /*********************************************************************** + + Helper for comparison methods + + ***********************************************************************/ + + final private int compareFolded (wchar[] s1, wchar[] s2, CaseOption option = CaseOption.Default) + { + UErrorCode e; + + int x = u_strCaseCompare (s1.ptr, s1.length, s2.ptr, s2.length, option, e); + testError (e, "compareFolded failed"); + return x; + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + wchar* function (wchar*, uint, wchar*, uint) u_strFindFirst; + wchar* function (wchar*, uint, wchar*, uint) u_strFindLast; + wchar* function (wchar*, wchar, uint) u_memchr; + wchar* function (wchar*, wchar, uint) u_memrchr; + int function (wchar*, uint, wchar*, uint, bool) u_strCompare; + int function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strCaseCompare; + dchar function (CharAt, uint*, uint, void*) u_unescapeAt; + uint function (wchar*, uint) u_countChar32; + uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToUpper; + uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToLower; + uint function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strFoldCase; + wchar* function (wchar*, uint, uint*, char*, uint, inout UErrorCode) u_strFromUTF8; + char* function (char*, uint, uint*, wchar*, uint, inout UErrorCode) u_strToUTF8; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &u_strFindFirst, "u_strFindFirst"}, + {cast(void**) &u_strFindLast, "u_strFindLast"}, + {cast(void**) &u_memchr, "u_memchr"}, + {cast(void**) &u_memrchr, "u_memrchr"}, + {cast(void**) &u_strCompare, "u_strCompare"}, + {cast(void**) &u_strCaseCompare, "u_strCaseCompare"}, + {cast(void**) &u_unescapeAt, "u_unescapeAt"}, + {cast(void**) &u_countChar32, "u_countChar32"}, + {cast(void**) &u_strToUpper, "u_strToUpper"}, + {cast(void**) &u_strToLower, "u_strToLower"}, + {cast(void**) &u_strFoldCase, "u_strFoldCase"}, + {cast(void**) &u_strFromUTF8, "u_strFromUTF8"}, + {cast(void**) &u_strToUTF8, "u_strToUTF8"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + //test (); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } + + /*********************************************************************** + + ***********************************************************************/ + + private static void test() + { + UString s = new UString (r"aaaqw \uabcd eaaa"); + char[] x = "dssfsdff"; + s ~ x ~ x; + wchar c = s[3]; + s[3] = 'Q'; + int y = s.indexOf ("qwe"); + s.unEscape (); + s.toUpper (new UString); + s.padLeading(2).padTrailing(2).trim(); + } +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UStringPrep.d --- a/dwtx/dwtxhelper/mangoicu/UStringPrep.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UStringPrep.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,249 +1,249 @@ -/******************************************************************************* - - @file UStringPrep.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UStringPrep; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UString; - -/******************************************************************************* - - StringPrep API implements the StingPrep framework as described - by RFC 3454. - - StringPrep prepares Unicode strings for use in network protocols. - Profiles of StingPrep are set of rules and data according to with - the Unicode Strings are prepared. Each profiles contains tables - which describe how a code point should be treated. The tables are - broadly classied into - - - Unassinged Table: Contains code points that are unassigned - in the Unicode Version supported by StringPrep. Currently - RFC 3454 supports Unicode 3.2. - - - Prohibited Table: Contains code points that are prohibted - from the output of the StringPrep processing function. - - - Mapping Table: Contains code ponts that are deleted from the - output or case mapped. - - The procedure for preparing Unicode strings: - - 1. Map: For each character in the input, check if it has a mapping - and, if so, replace it with its mapping. - - 2. Normalize: Possibly normalize the result of step 1 using Unicode - normalization. - - 3. Prohibit: Check for any characters that are not allowed in the - output. If any are found, return an error. - - 4. Check bidi: Possibly check for right-to-left characters, and if - any are found, make sure that the whole string satisfies the - requirements for bidirectional strings. If the string does not - satisfy the requirements for bidirectional strings, return an - error. - - See - this page for full details. - -*******************************************************************************/ - -class UStringPrep : ICU -{ - private Handle handle; - - enum Options - { - Strict, - Lenient - } - - - /*********************************************************************** - - Creates a StringPrep profile from the data file. - - path string containing the full path pointing - to the directory where the profile reside - followed by the package name e.g. - "/usr/resource/my_app/profiles/mydata" on - a Unix system. if NULL, ICU default data - files will be used. - - fileName name of the profile file to be opened - - ***********************************************************************/ - - this (char[] path, char[] filename) - { - UErrorCode e; - - handle = usprep_open (toString(path), toString(filename), e); - testError (e, "failed to open string-prep"); - } - - /*********************************************************************** - - Close this profile - - ***********************************************************************/ - - ~this () - { - usprep_close (handle); - } - - /*********************************************************************** - - Prepare the input buffer - - This operation maps, normalizes(NFKC), checks for prohited - and BiDi characters in the order defined by RFC 3454 depending - on the options specified in the profile - - ***********************************************************************/ - - void prepare (UStringView src, UString dst, Options o = Options.Strict) - { - uint fmt (wchar* p, uint len, inout UErrorCode e) - { - return usprep_prepare (handle, src.get.ptr, src.len, p, len, o, null, e); - } - - dst.format (&fmt, "failed to prepare text"); - } - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (char*, char*, inout UErrorCode) usprep_open; - void function (Handle) usprep_close; - uint function (Handle, wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) usprep_prepare; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &usprep_open, "usprep_open"}, - {cast(void**) &usprep_close, "usprep_close"}, - {cast(void**) &usprep_prepare, "usprep_prepare"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuuc, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - +/******************************************************************************* + + @file UStringPrep.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UStringPrep; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UString; + +/******************************************************************************* + + StringPrep API implements the StingPrep framework as described + by RFC 3454. + + StringPrep prepares Unicode strings for use in network protocols. + Profiles of StingPrep are set of rules and data according to with + the Unicode Strings are prepared. Each profiles contains tables + which describe how a code point should be treated. The tables are + broadly classied into + + - Unassinged Table: Contains code points that are unassigned + in the Unicode Version supported by StringPrep. Currently + RFC 3454 supports Unicode 3.2. + + - Prohibited Table: Contains code points that are prohibted + from the output of the StringPrep processing function. + + - Mapping Table: Contains code ponts that are deleted from the + output or case mapped. + + The procedure for preparing Unicode strings: + + 1. Map: For each character in the input, check if it has a mapping + and, if so, replace it with its mapping. + + 2. Normalize: Possibly normalize the result of step 1 using Unicode + normalization. + + 3. Prohibit: Check for any characters that are not allowed in the + output. If any are found, return an error. + + 4. Check bidi: Possibly check for right-to-left characters, and if + any are found, make sure that the whole string satisfies the + requirements for bidirectional strings. If the string does not + satisfy the requirements for bidirectional strings, return an + error. + + See + this page for full details. + +*******************************************************************************/ + +class UStringPrep : ICU +{ + private Handle handle; + + enum Options + { + Strict, + Lenient + } + + + /*********************************************************************** + + Creates a StringPrep profile from the data file. + + path string containing the full path pointing + to the directory where the profile reside + followed by the package name e.g. + "/usr/resource/my_app/profiles/mydata" on + a Unix system. if NULL, ICU default data + files will be used. + + fileName name of the profile file to be opened + + ***********************************************************************/ + + this (char[] path, char[] filename) + { + UErrorCode e; + + handle = usprep_open (toString(path), toString(filename), e); + testError (e, "failed to open string-prep"); + } + + /*********************************************************************** + + Close this profile + + ***********************************************************************/ + + ~this () + { + usprep_close (handle); + } + + /*********************************************************************** + + Prepare the input buffer + + This operation maps, normalizes(NFKC), checks for prohited + and BiDi characters in the order defined by RFC 3454 depending + on the options specified in the profile + + ***********************************************************************/ + + void prepare (UStringView src, UString dst, Options o = Options.Strict) + { + uint fmt (wchar* p, uint len, inout UErrorCode e) + { + return usprep_prepare (handle, src.get.ptr, src.len, p, len, o, null, e); + } + + dst.format (&fmt, "failed to prepare text"); + } + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (char*, char*, inout UErrorCode) usprep_open; + void function (Handle) usprep_close; + uint function (Handle, wchar*, uint, wchar*, uint, uint, void*, inout UErrorCode) usprep_prepare; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &usprep_open, "usprep_open"}, + {cast(void**) &usprep_close, "usprep_close"}, + {cast(void**) &usprep_prepare, "usprep_prepare"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuuc, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} + diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UTimeZone.d --- a/dwtx/dwtxhelper/mangoicu/UTimeZone.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UTimeZone.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,263 +1,263 @@ -/******************************************************************************* - - @file UTimeZone.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UTimeZone; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UString, - dwtx.dwtxhelper.mangoicu.UEnumeration; - -/******************************************************************************* - - A representation of a TimeZone. Unfortunately, ICU does not expose - this as a seperate entity from the C-API, so we have to make do - with an approximation instead. - -*******************************************************************************/ - -struct UTimeZone -{ - public wchar[] name; - - public static UTimeZone Default = {null}; - public static UTimeZone Gmt = {"Etc/GMT"}; - public static UTimeZone Greenwich = {"Etc/Greenwich"}; - public static UTimeZone Uct = {"Etc/UCT"}; - public static UTimeZone Utc = {"Etc/UTC"}; - public static UTimeZone Universal = {"Etc/Universal"}; - - public static UTimeZone GmtPlus0 = {"Etc/GMT+0"}; - public static UTimeZone GmtPlus1 = {"Etc/GMT+1"}; - public static UTimeZone GmtPlus2 = {"Etc/GMT+2"}; - public static UTimeZone GmtPlus3 = {"Etc/GMT+3"}; - public static UTimeZone GmtPlus4 = {"Etc/GMT+4"}; - public static UTimeZone GmtPlus5 = {"Etc/GMT+5"}; - public static UTimeZone GmtPlus6 = {"Etc/GMT+6"}; - public static UTimeZone GmtPlus7 = {"Etc/GMT+7"}; - public static UTimeZone GmtPlus8 = {"Etc/GMT+8"}; - public static UTimeZone GmtPlus9 = {"Etc/GMT+9"}; - public static UTimeZone GmtPlus10 = {"Etc/GMT+10"}; - public static UTimeZone GmtPlus11 = {"Etc/GMT+11"}; - public static UTimeZone GmtPlus12 = {"Etc/GMT+12"}; - - public static UTimeZone GmtMinus0 = {"Etc/GMT-0"}; - public static UTimeZone GmtMinus1 = {"Etc/GMT-1"}; - public static UTimeZone GmtMinus2 = {"Etc/GMT-2"}; - public static UTimeZone GmtMinus3 = {"Etc/GMT-3"}; - public static UTimeZone GmtMinus4 = {"Etc/GMT-4"}; - public static UTimeZone GmtMinus5 = {"Etc/GMT-5"}; - public static UTimeZone GmtMinus6 = {"Etc/GMT-6"}; - public static UTimeZone GmtMinus7 = {"Etc/GMT-7"}; - public static UTimeZone GmtMinus8 = {"Etc/GMT-8"}; - public static UTimeZone GmtMinus9 = {"Etc/GMT-9"}; - public static UTimeZone GmtMinus10 = {"Etc/GMT-10"}; - public static UTimeZone GmtMinus11 = {"Etc/GMT-11"}; - public static UTimeZone GmtMinus12 = {"Etc/GMT-12"}; - - /*********************************************************************** - - Get the default time zone. - - ***********************************************************************/ - - static void getDefault (inout UTimeZone zone) - { - uint format (wchar* dst, uint length, inout ICU.UErrorCode e) - { - return ucal_getDefaultTimeZone (dst, length, e); - } - - UString s = new UString(64); - s.format (&format, "failed to get default time zone"); - zone.name = s.get(); - } - - /*********************************************************************** - - Set the default time zone. - - ***********************************************************************/ - - static void setDefault (inout UTimeZone zone) - { - ICU.UErrorCode e; - - ucal_setDefaultTimeZone (ICU.toString (zone.name), e); - ICU.testError (e, "failed to set default time zone"); - } - - /*********************************************************************** - - Return the amount of time in milliseconds that the clock - is advanced during daylight savings time for the given - time zone, or zero if the time zone does not observe daylight - savings time - - ***********************************************************************/ - - static uint getDSTSavings (inout UTimeZone zone) - { - ICU.UErrorCode e; - - uint x = ucal_getDSTSavings (ICU.toString (zone.name), e); - ICU.testError (e, "failed to get DST savings"); - return x; - } - - - /********************************************************************** - - Iterate over the available timezone names - - **********************************************************************/ - - static int opApply (int delegate(inout wchar[] element) dg) - { - ICU.UErrorCode e; - wchar[] name; - int result; - - void* h = ucal_openTimeZones (e); - ICU.testError (e, "failed to open timeszone iterator"); - - UEnumeration zones = new UEnumeration (cast(UEnumeration.Handle) h); - while (zones.next(name) && (result = dg(name)) != 0) {} - delete zones; - return result; - } - - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - void* function (inout ICU.UErrorCode) ucal_openTimeZones; - uint function (wchar*, uint, inout ICU.UErrorCode) ucal_getDefaultTimeZone; - void function (wchar*, inout ICU.UErrorCode) ucal_setDefaultTimeZone; - uint function (wchar*, inout ICU.UErrorCode) ucal_getDSTSavings; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &ucal_openTimeZones, "ucal_openTimeZones"}, - {cast(void**) &ucal_getDefaultTimeZone, "ucal_getDefaultTimeZone"}, - {cast(void**) &ucal_setDefaultTimeZone, "ucal_setDefaultTimeZone"}, - {cast(void**) &ucal_getDSTSavings, "ucal_getDSTSavings"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (ICU.icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} +/******************************************************************************* + + @file UTimeZone.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UTimeZone; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UString, + dwtx.dwtxhelper.mangoicu.UEnumeration; + +/******************************************************************************* + + A representation of a TimeZone. Unfortunately, ICU does not expose + this as a seperate entity from the C-API, so we have to make do + with an approximation instead. + +*******************************************************************************/ + +struct UTimeZone +{ + public wchar[] name; + + public static UTimeZone Default = {null}; + public static UTimeZone Gmt = {"Etc/GMT"}; + public static UTimeZone Greenwich = {"Etc/Greenwich"}; + public static UTimeZone Uct = {"Etc/UCT"}; + public static UTimeZone Utc = {"Etc/UTC"}; + public static UTimeZone Universal = {"Etc/Universal"}; + + public static UTimeZone GmtPlus0 = {"Etc/GMT+0"}; + public static UTimeZone GmtPlus1 = {"Etc/GMT+1"}; + public static UTimeZone GmtPlus2 = {"Etc/GMT+2"}; + public static UTimeZone GmtPlus3 = {"Etc/GMT+3"}; + public static UTimeZone GmtPlus4 = {"Etc/GMT+4"}; + public static UTimeZone GmtPlus5 = {"Etc/GMT+5"}; + public static UTimeZone GmtPlus6 = {"Etc/GMT+6"}; + public static UTimeZone GmtPlus7 = {"Etc/GMT+7"}; + public static UTimeZone GmtPlus8 = {"Etc/GMT+8"}; + public static UTimeZone GmtPlus9 = {"Etc/GMT+9"}; + public static UTimeZone GmtPlus10 = {"Etc/GMT+10"}; + public static UTimeZone GmtPlus11 = {"Etc/GMT+11"}; + public static UTimeZone GmtPlus12 = {"Etc/GMT+12"}; + + public static UTimeZone GmtMinus0 = {"Etc/GMT-0"}; + public static UTimeZone GmtMinus1 = {"Etc/GMT-1"}; + public static UTimeZone GmtMinus2 = {"Etc/GMT-2"}; + public static UTimeZone GmtMinus3 = {"Etc/GMT-3"}; + public static UTimeZone GmtMinus4 = {"Etc/GMT-4"}; + public static UTimeZone GmtMinus5 = {"Etc/GMT-5"}; + public static UTimeZone GmtMinus6 = {"Etc/GMT-6"}; + public static UTimeZone GmtMinus7 = {"Etc/GMT-7"}; + public static UTimeZone GmtMinus8 = {"Etc/GMT-8"}; + public static UTimeZone GmtMinus9 = {"Etc/GMT-9"}; + public static UTimeZone GmtMinus10 = {"Etc/GMT-10"}; + public static UTimeZone GmtMinus11 = {"Etc/GMT-11"}; + public static UTimeZone GmtMinus12 = {"Etc/GMT-12"}; + + /*********************************************************************** + + Get the default time zone. + + ***********************************************************************/ + + static void getDefault (inout UTimeZone zone) + { + uint format (wchar* dst, uint length, inout ICU.UErrorCode e) + { + return ucal_getDefaultTimeZone (dst, length, e); + } + + UString s = new UString(64); + s.format (&format, "failed to get default time zone"); + zone.name = s.get(); + } + + /*********************************************************************** + + Set the default time zone. + + ***********************************************************************/ + + static void setDefault (inout UTimeZone zone) + { + ICU.UErrorCode e; + + ucal_setDefaultTimeZone (ICU.toString (zone.name), e); + ICU.testError (e, "failed to set default time zone"); + } + + /*********************************************************************** + + Return the amount of time in milliseconds that the clock + is advanced during daylight savings time for the given + time zone, or zero if the time zone does not observe daylight + savings time + + ***********************************************************************/ + + static uint getDSTSavings (inout UTimeZone zone) + { + ICU.UErrorCode e; + + uint x = ucal_getDSTSavings (ICU.toString (zone.name), e); + ICU.testError (e, "failed to get DST savings"); + return x; + } + + + /********************************************************************** + + Iterate over the available timezone names + + **********************************************************************/ + + static int opApply (int delegate(inout wchar[] element) dg) + { + ICU.UErrorCode e; + wchar[] name; + int result; + + void* h = ucal_openTimeZones (e); + ICU.testError (e, "failed to open timeszone iterator"); + + UEnumeration zones = new UEnumeration (cast(UEnumeration.Handle) h); + while (zones.next(name) && (result = dg(name)) != 0) {} + delete zones; + return result; + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + void* function (inout ICU.UErrorCode) ucal_openTimeZones; + uint function (wchar*, uint, inout ICU.UErrorCode) ucal_getDefaultTimeZone; + void function (wchar*, inout ICU.UErrorCode) ucal_setDefaultTimeZone; + uint function (wchar*, inout ICU.UErrorCode) ucal_getDSTSavings; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &ucal_openTimeZones, "ucal_openTimeZones"}, + {cast(void**) &ucal_getDefaultTimeZone, "ucal_getDefaultTimeZone"}, + {cast(void**) &ucal_setDefaultTimeZone, "ucal_setDefaultTimeZone"}, + {cast(void**) &ucal_getDSTSavings, "ucal_getDSTSavings"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (ICU.icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} diff -r 11e8159caf7a -r f05207c07a98 dwtx/dwtxhelper/mangoicu/UTransform.d --- a/dwtx/dwtxhelper/mangoicu/UTransform.d Mon Jul 07 15:53:07 2008 +0200 +++ b/dwtx/dwtxhelper/mangoicu/UTransform.d Mon Jul 07 15:54:03 2008 +0200 @@ -1,239 +1,239 @@ -/******************************************************************************* - - @file UTransform.d - - Copyright (c) 2004 Kris Bell - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for damages - of any kind arising from the use of this software. - - Permission is hereby granted to anyone to use this software for any - purpose, including commercial applications, and to alter it and/or - redistribute it freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment within documentation of - said product would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any distribution - of the source. - - 4. Derivative works are permitted, but they must carry this notice - in full and credit the original source. - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - @version Initial version, November 2004 - @author Kris - - Note that this package and documentation is built around the ICU - project (http://oss.software.ibm.com/icu/). Below is the license - statement as specified by that software: - - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - ICU License - ICU 1.8.1 and later - - COPYRIGHT AND PERMISSION NOTICE - - Copyright (c) 1995-2003 International Business Machines Corporation and - others. - - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, and/or sell copies of the Software, and to permit persons - to whom the Software is furnished to do so, provided that the above - copyright notice(s) and this permission notice appear in all copies of - the Software and that both the above copyright notice(s) and this - permission notice appear in supporting documentation. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT - OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL - INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING - FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, use - or other dealings in this Software without prior written authorization - of the copyright holder. - - ---------------------------------------------------------------------- - - All trademarks and registered trademarks mentioned herein are the - property of their respective owners. - -*******************************************************************************/ - -module dwtx.dwtxhelper.mangoicu.UTransform; - -private import dwtx.dwtxhelper.mangoicu.ICU, - dwtx.dwtxhelper.mangoicu.UString; - -/******************************************************************************* - - See - this page for full details. - -*******************************************************************************/ - -class UTransform : ICU -{ - private Handle handle; - - enum Direction - { - Forward, - Reverse - } - - - /*********************************************************************** - - ***********************************************************************/ - - this (UStringView id) - { - UErrorCode e; - - handle = utrans_openU (id.get.ptr, id.len, 0, null, 0, null, e); - testError (e, "failed to open ID transform"); - } - - /*********************************************************************** - - ***********************************************************************/ - - this (UStringView rule, Direction dir) - { - UErrorCode e; - - handle = utrans_openU (null, 0, dir, rule.get.ptr, rule.len, null, e); - testError (e, "failed to open rule-based transform"); - } - - /*********************************************************************** - - ***********************************************************************/ - - ~this () - { - utrans_close (handle); - } - - /*********************************************************************** - - ***********************************************************************/ - - UStringView getID () - { - uint len; - wchar *s = utrans_getUnicodeID (handle, len); - return new UStringView (s[0..len]); - } - - /*********************************************************************** - - ***********************************************************************/ - - UTransform setFilter (UStringView filter) - { - UErrorCode e; - - if (filter.length) - utrans_setFilter (handle, filter.get.ptr, filter.len, e); - else - utrans_setFilter (handle, null, 0, e); - - testError (e, "failed to set transform filter"); - return this; - } - - /*********************************************************************** - - ***********************************************************************/ - - UTransform execute (UString text) - { - UErrorCode e; - uint textLen = text.len; - - utrans_transUChars (handle, text.get.ptr, &textLen, text.content.length, 0, &text.len, e); - testError (e, "failed to execute transform"); - return this; - } - - - - /*********************************************************************** - - Bind the ICU functions from a shared library. This is - complicated by the issues regarding D and DLLs on the - Windows platform - - ***********************************************************************/ - - private static void* library; - - /*********************************************************************** - - ***********************************************************************/ - - private static extern (C) - { - Handle function (wchar*, uint, uint, wchar*, uint, void*, inout UErrorCode) utrans_openU; - void function (Handle) utrans_close; - wchar* function (Handle, inout uint) utrans_getUnicodeID; - void function (Handle, wchar*, uint, inout UErrorCode) utrans_setFilter; - void function (Handle, wchar*, uint*, uint, uint, uint*, inout UErrorCode) utrans_transUChars; - } - - /*********************************************************************** - - ***********************************************************************/ - - static FunctionLoader.Bind[] targets = - [ - {cast(void**) &utrans_openU, "utrans_openU"}, - {cast(void**) &utrans_close, "utrans_close"}, - {cast(void**) &utrans_getUnicodeID, "utrans_getUnicodeID"}, - {cast(void**) &utrans_setFilter, "utrans_setFilter"}, - {cast(void**) &utrans_transUChars, "utrans_transUChars"}, - ]; - - /*********************************************************************** - - ***********************************************************************/ - - static this () - { - library = FunctionLoader.bind (icuin, targets); - } - - /*********************************************************************** - - ***********************************************************************/ - - static ~this () - { - FunctionLoader.unbind (library); - } -} - +/******************************************************************************* + + @file UTransform.d + + Copyright (c) 2004 Kris Bell + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for damages + of any kind arising from the use of this software. + + Permission is hereby granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and/or + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment within documentation of + said product would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any distribution + of the source. + + 4. Derivative works are permitted, but they must carry this notice + in full and credit the original source. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + @version Initial version, November 2004 + @author Kris + + Note that this package and documentation is built around the ICU + project (http://oss.software.ibm.com/icu/). Below is the license + statement as specified by that software: + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + ICU License - ICU 1.8.1 and later + + COPYRIGHT AND PERMISSION NOTICE + + Copyright (c) 1995-2003 International Business Machines Corporation and + others. + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, and/or sell copies of the Software, and to permit persons + to whom the Software is furnished to do so, provided that the above + copyright notice(s) and this permission notice appear in all copies of + the Software and that both the above copyright notice(s) and this + permission notice appear in supporting documentation. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, use + or other dealings in this Software without prior written authorization + of the copyright holder. + + ---------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the + property of their respective owners. + +*******************************************************************************/ + +module dwtx.dwtxhelper.mangoicu.UTransform; + +private import dwtx.dwtxhelper.mangoicu.ICU, + dwtx.dwtxhelper.mangoicu.UString; + +/******************************************************************************* + + See + this page for full details. + +*******************************************************************************/ + +class UTransform : ICU +{ + private Handle handle; + + enum Direction + { + Forward, + Reverse + } + + + /*********************************************************************** + + ***********************************************************************/ + + this (UStringView id) + { + UErrorCode e; + + handle = utrans_openU (id.get.ptr, id.len, 0, null, 0, null, e); + testError (e, "failed to open ID transform"); + } + + /*********************************************************************** + + ***********************************************************************/ + + this (UStringView rule, Direction dir) + { + UErrorCode e; + + handle = utrans_openU (null, 0, dir, rule.get.ptr, rule.len, null, e); + testError (e, "failed to open rule-based transform"); + } + + /*********************************************************************** + + ***********************************************************************/ + + ~this () + { + utrans_close (handle); + } + + /*********************************************************************** + + ***********************************************************************/ + + UStringView getID () + { + uint len; + wchar *s = utrans_getUnicodeID (handle, len); + return new UStringView (s[0..len]); + } + + /*********************************************************************** + + ***********************************************************************/ + + UTransform setFilter (UStringView filter) + { + UErrorCode e; + + if (filter.length) + utrans_setFilter (handle, filter.get.ptr, filter.len, e); + else + utrans_setFilter (handle, null, 0, e); + + testError (e, "failed to set transform filter"); + return this; + } + + /*********************************************************************** + + ***********************************************************************/ + + UTransform execute (UString text) + { + UErrorCode e; + uint textLen = text.len; + + utrans_transUChars (handle, text.get.ptr, &textLen, text.content.length, 0, &text.len, e); + testError (e, "failed to execute transform"); + return this; + } + + + + /*********************************************************************** + + Bind the ICU functions from a shared library. This is + complicated by the issues regarding D and DLLs on the + Windows platform + + ***********************************************************************/ + + private static void* library; + + /*********************************************************************** + + ***********************************************************************/ + + private static extern (C) + { + Handle function (wchar*, uint, uint, wchar*, uint, void*, inout UErrorCode) utrans_openU; + void function (Handle) utrans_close; + wchar* function (Handle, inout uint) utrans_getUnicodeID; + void function (Handle, wchar*, uint, inout UErrorCode) utrans_setFilter; + void function (Handle, wchar*, uint*, uint, uint, uint*, inout UErrorCode) utrans_transUChars; + } + + /*********************************************************************** + + ***********************************************************************/ + + static FunctionLoader.Bind[] targets = + [ + {cast(void**) &utrans_openU, "utrans_openU"}, + {cast(void**) &utrans_close, "utrans_close"}, + {cast(void**) &utrans_getUnicodeID, "utrans_getUnicodeID"}, + {cast(void**) &utrans_setFilter, "utrans_setFilter"}, + {cast(void**) &utrans_transUChars, "utrans_transUChars"}, + ]; + + /*********************************************************************** + + ***********************************************************************/ + + static this () + { + library = FunctionLoader.bind (icuin, targets); + } + + /*********************************************************************** + + ***********************************************************************/ + + static ~this () + { + FunctionLoader.unbind (library); + } +} +