Mercurial > projects > dwt2

diff com.ibm.icu/src/com/ibm/icu/mangoicu/UBreakIterator.d @ 92:ebefa5c2eab4
moving ICU bindings to com.ibm.icu
author: Frank Benoit <benoit@tionex.de>
date: Sun, 19 Apr 2009 13:49:38 +0200
parents: base/src/java/mangoicu/UBreakIterator.d@1bf55a6eb092
children: 536e43f63c81
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/com.ibm.icu/src/com/ibm/icu/mangoicu/UBreakIterator.d	Sun Apr 19 13:49:38 2009 +0200
@@ -0,0 +1,621 @@
+/*******************************************************************************
+
+        @file UBreakIterator.d
+
+        Copyright (c) 2004 Kris Bell
+
+        This software is provided 'as-is', without any express or implied
+        warranty. In no event will the authors be held liable for damages
+        of any kind arising from the use of this software.
+
+        Permission is hereby granted to anyone to use this software for any
+        purpose, including commercial applications, and to alter it and/or
+        redistribute it freely, subject to the following restrictions:
+
+        1. The origin of this software must not be misrepresented; you must
+           not claim that you wrote the original software. If you use this
+           software in a product, an acknowledgment within documentation of
+           said product would be appreciated but is not required.
+
+        2. Altered source versions must be plainly marked as such, and must
+           not be misrepresented as being the original software.
+
+        3. This notice may not be removed or altered from any distribution
+           of the source.
+
+        4. Derivative works are permitted, but they must carry this notice
+           in full and credit the original source.
+
+
+                        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+        @version        Initial version, November 2004
+        @author         Kris
+
+        Note that this package and documentation is built around the ICU
+        project (http://oss.software.ibm.com/icu/). Below is the license
+        statement as specified by that software:
+
+
+                        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+        ICU License - ICU 1.8.1 and later
+
+        COPYRIGHT AND PERMISSION NOTICE
+
+        Copyright (c) 1995-2003 International Business Machines Corporation and
+        others.
+
+        All rights reserved.
+
+        Permission is hereby granted, free of charge, to any person obtaining a
+        copy of this software and associated documentation files (the
+        "Software"), to deal in the Software without restriction, including
+        without limitation the rights to use, copy, modify, merge, publish,
+        distribute, and/or sell copies of the Software, and to permit persons
+        to whom the Software is furnished to do so, provided that the above
+        copyright notice(s) and this permission notice appear in all copies of
+        the Software and that both the above copyright notice(s) and this
+        permission notice appear in supporting documentation.
+
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+        OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+        OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+        HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+        INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+        FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+        NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+        WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+        Except as contained in this notice, the name of a copyright holder
+        shall not be used in advertising or otherwise to promote the sale, use
+        or other dealings in this Software without prior written authorization
+        of the copyright holder.
+
+        ----------------------------------------------------------------------
+
+        All trademarks and registered trademarks mentioned herein are the
+        property of their respective owners.
+
+*******************************************************************************/
+
+module com.ibm.icu.mangoicu.UBreakIterator;
+
+private import  com.ibm.icu.mangoicu.ICU;
+
+public  import  com.ibm.icu.mangoicu.ULocale,
+                com.ibm.icu.mangoicu.UText,
+                com.ibm.icu.mangoicu.UString;
+
+
+
+// /*******************************************************************************
+//
+// *******************************************************************************/
+//
+// class UCharacterIterator : UBreakIterator
+// {
+//         /***********************************************************************
+//
+//         ***********************************************************************/
+//
+//         this (inout ULocale locale, UStringView text = null)
+//         {
+//                 super (Type.Character, locale, text);
+//         }
+// }
+//
+//
+// /*******************************************************************************
+//
+// *******************************************************************************/
+//
+// class UWordIterator : UBreakIterator
+// {
+//         public enum     Break
+//                         {
+//                         None = 0,
+//                         NoneLimit = 100,
+//                         Number = 100,
+//                         NumberLimit = 200,
+//                         Letter = 200,
+//                         LetterLimit = 300,
+//                         Kana = 300,
+//                         KanaLimit = 400,
+//                         Ideo = 400,
+//                         IdeoLimit = 500
+//                         }
+//
+//         /***********************************************************************
+//
+//         ***********************************************************************/
+//
+//         this (inout ULocale locale, UStringView text = null)
+//         {
+//                 super (Type.Word, locale, text);
+//         }
+//
+//         /***********************************************************************
+//
+//                 Return the status from the break rule that determined
+//                 the most recently returned break position.
+//
+//         ***********************************************************************/
+//
+//         void getStatus (inout Break b)
+//         {
+//                 b = cast(Break) super.getStatus();
+//         }
+// }
+//
+//
+// /*******************************************************************************
+//
+// *******************************************************************************/
+//
+// class ULineIterator : UBreakIterator
+// {
+//         public enum     Break
+//                         {
+//                         Soft = 0,
+//                         SoftLimit = 100,
+//                         Hard = 100,
+//                         HardLimit = 200
+//                         }
+//
+//         /***********************************************************************
+//
+//         ***********************************************************************/
+//
+//         this (inout ULocale locale, UStringView text = null)
+//         {
+//                 super (Type.Line, locale, text);
+//         }
+//
+//         /***********************************************************************
+//
+//                 Return the status from the break rule that determined
+//                 the most recently returned break position.
+//
+//         ***********************************************************************/
+//
+//         void getStatus (inout Break b)
+//         {
+//                 b = cast(Break) super.getStatus();
+//         }
+// }
+//
+//
+// /*******************************************************************************
+//
+// *******************************************************************************/
+//
+// class USentenceIterator : UBreakIterator
+// {
+//         public enum     Break
+//                         {
+//                         Term = 0,
+//                         TermLimit = 100,
+//                         Sep = 100,
+//                         Limit = 200
+//                         }
+//
+//         /***********************************************************************
+//
+//         ***********************************************************************/
+//
+//         this (inout ULocale locale, UStringView text = null)
+//         {
+//                 super (Type.Sentence, locale, text);
+//         }
+//
+//         /***********************************************************************
+//
+//                 Return the status from the break rule that determined
+//                 the most recently returned break position.
+//
+//         ***********************************************************************/
+//
+//         void getStatus (inout Break b)
+//         {
+//                 b = cast(Break) super.getStatus();
+//         }
+// }
+//
+//
+// /*******************************************************************************
+//
+// *******************************************************************************/
+//
+// class UTitleIterator : UBreakIterator
+// {
+//         /***********************************************************************
+//
+//         ***********************************************************************/
+//
+//         this (inout ULocale locale, UStringView text = null)
+//         {
+//                 super (Type.Title, locale, text);
+//         }
+// }
+//
+//
+// /*******************************************************************************
+//
+// *******************************************************************************/
+//
+// class URuleIterator : UBreakIterator
+// {
+//         /***********************************************************************
+//
+//                 Open a new UBreakIterator for locating text boundaries
+//                 using specified breaking rules
+//
+//         ***********************************************************************/
+//
+//         this (UStringView rules, UStringView text = null)
+//         {
+//                 UErrorCode e;
+//
+//                 handle = ubrk_openRules (rules.get.ptr, rules.length, text.get.ptr, text.length, null, e);
+//                 testError (e, "failed to open rule iterator");
+//         }
+// }
+
+
+/*******************************************************************************
+
+        BreakIterator defines methods for finding the location of boundaries
+        in text. Pointer to a UBreakIterator maintain a current position and
+        scan over text returning the index of characters where boundaries occur.
+
+        Line boundary analysis determines where a text string can be broken
+        when line-wrapping. The mechanism correctly handles punctuation and
+        hyphenated words.
+
+        Sentence boundary analysis allows selection with correct interpretation
+        of periods within numbers and abbreviations, and trailing punctuation
+        marks such as quotation marks and parentheses.
+
+        Word boundary analysis is used by search and replace functions, as well
+        as within text editing applications that allow the user to select words
+        with a double click. Word selection provides correct interpretation of
+        punctuation marks within and following words. Characters that are not
+        part of a word, such as symbols or punctuation marks, have word-breaks
+        on both sides.
+
+        Character boundary analysis allows users to interact with characters
+        as they expect to, for example, when moving the cursor through a text
+        string. Character boundary analysis provides correct navigation of
+        through character strings, regardless of how the character is stored.
+        For example, an accented character might be stored as a base character
+        and a diacritical mark. What users consider to be a character can differ
+        between languages.
+
+        Title boundary analysis locates all positions, typically starts of
+        words, that should be set to Title Case when title casing the text.
+
+        See <A HREF="http://oss.software.ibm.com/icu/apiref/ubrk_8h.html">
+        this page</A> for full details.
+
+*******************************************************************************/
+
+struct UBreakIterator
+{
+        typedef void _UBreakIterator;
+        alias _UBreakIterator* Handle;
+        Handle handle;
+        UText ut;
+
+        // this is returned by next(), previous() etc ...
+        const uint Done = uint.max;
+        alias Done DONE;
+
+        /***********************************************************************
+
+                internal types passed to C API
+
+        ***********************************************************************/
+
+        private  enum   Type
+                        {
+                        Character,
+                        Word,
+                        Line,
+                        Sentence,
+                        Title
+                        }
+
+
+        public enum     WordBreak
+                        {
+                        None = 0,
+                        NoneLimit = 100,
+                        Number = 100,
+                        NumberLimit = 200,
+                        Letter = 200,
+                        LetterLimit = 300,
+                        Kana = 300,
+                        KanaLimit = 400,
+                        Ideo = 400,
+                        IdeoLimit = 500
+                        }
+        public enum     LineBreak
+                        {
+                        Soft = 0,
+                        SoftLimit = 100,
+                        Hard = 100,
+                        HardLimit = 200
+                        }
+        public enum     SentenceBreak
+                        {
+                        Term = 0,
+                        TermLimit = 100,
+                        Sep = 100,
+                        Limit = 200
+                        }
+
+
+        /***********************************************************************
+
+                Open a new UBreakIterator for locating text boundaries for
+                a specified locale. A UBreakIterator may be used for detecting
+                character, line, word, and sentence breaks in text.
+
+        ***********************************************************************/
+
+        static UBreakIterator openWordIterator( ULocale locale, char[] str = null ){
+            UBreakIterator res;
+            auto e = ICU.UErrorCode.OK;
+            res.handle = ubrk_open( Type.Word, cast(char*)locale.name.ptr, null, 0, e);
+            ICU.testError (e, "failed to open word iterator");
+            if( str ) {
+                res.ut.openUTF8(str);
+                ubrk_setUText( res.handle, & res.ut, e);
+                ICU.testError (e, "failed to set text in iterator");
+            }
+            return res;
+        }
+
+        static UBreakIterator openLineIterator( ULocale locale, char[] str = null ){
+            UBreakIterator res;
+            auto e = ICU.UErrorCode.OK;
+            res.handle = ubrk_open( Type.Line, cast(char*)locale.name.ptr, null, 0, e);
+            ICU.testError (e, "failed to open line iterator");
+            if( str ) {
+                res.ut.openUTF8(str);
+                ubrk_setUText( res.handle, & res.ut, e);
+                ICU.testError (e, "failed to set text in iterator");
+            }
+            return res;
+        }
+
+        /***********************************************************************
+
+                Close a UBreakIterator
+
+        ***********************************************************************/
+
+        void close ()
+        {
+                ut.close();
+                ubrk_close (handle);
+        }
+
+        /***********************************************************************
+
+                Sets an existing iterator to point to a new piece of text
+
+        ***********************************************************************/
+
+        void setText (UStringView text)
+        {
+                ICU.UErrorCode e;
+                ubrk_setText (handle, text.get.ptr, text.length, e);
+                ICU.testError (e, "failed to set iterator text");
+        }
+
+        void setText (char[] text)
+        {
+                auto e = ICU.UErrorCode.OK;
+                ut.openUTF8(text);
+                ubrk_setUText( handle, & ut, e);
+                ICU.testError (e, "failed to set text in iterator");
+        }
+
+        /***********************************************************************
+
+                Determine the most recently-returned text boundary
+
+        ***********************************************************************/
+
+        uint current ()
+        {
+                return ubrk_current (handle);
+        }
+
+        /***********************************************************************
+
+                Determine the text boundary following the current text
+                boundary, or UBRK_DONE if all text boundaries have been
+                returned.
+
+                If offset is specified, determines the text boundary
+                following the current text boundary: The value returned
+                is always greater than offset, or Done
+
+        ***********************************************************************/
+
+        uint next (uint offset = uint.max)
+        {
+                if (offset == uint.max)
+                    return ubrk_next (handle);
+                return ubrk_following (handle, offset);
+        }
+        alias next following;
+        /***********************************************************************
+
+                Determine the text boundary preceding the current text
+                boundary, or Done if all text boundaries have been returned.
+
+                If offset is specified, determines the text boundary preceding
+                the specified offset. The value returned is always smaller than
+                offset, or Done.
+
+        ***********************************************************************/
+
+        uint previous (uint offset = uint.max)
+        {
+                if (offset == uint.max)
+                    return ubrk_previous (handle);
+                return ubrk_preceding (handle, offset);
+        }
+
+        /***********************************************************************
+
+                Determine the index of the first character in the text
+                being scanned. This is not always the same as index 0
+                of the text.
+
+        ***********************************************************************/
+
+        uint first ()
+        {
+                return ubrk_first (handle);
+        }
+
+        /***********************************************************************
+
+                Determine the index immediately beyond the last character
+                in the text being scanned. This is not the same as the last
+                character
+
+        ***********************************************************************/
+
+        uint last ()
+        {
+                return ubrk_last (handle);
+        }
+
+        /***********************************************************************
+
+                Returns true if the specfied position is a boundary position.
+                As a side effect, leaves the iterator pointing to the first
+                boundary position at or after "offset".
+
+        ***********************************************************************/
+
+        bool isBoundary (uint offset)
+        {
+                return ubrk_isBoundary (handle, offset) != 0;
+        }
+
+        /***********************************************************************
+
+                Return the status from the break rule that determined
+                the most recently returned break position.
+
+        ***********************************************************************/
+
+        void getStatus (inout uint s)
+        {
+                s = getStatus ();
+        }
+
+        /***********************************************************************
+
+                Return the status from the break rule that determined
+                the most recently returned break position.
+
+                The values appear in the rule source within brackets,
+                {123}, for example. For rules that do not specify a status,
+                a default value of 0 is returned.
+
+                For word break iterators, the possible values are defined
+                in enum UWordBreak
+
+        ***********************************************************************/
+
+        private uint getStatus ()
+        {
+                return ubrk_getRuleStatus (handle);
+        }
+
+
+        /***********************************************************************
+
+                Bind the ICU functions from a shared library. This is
+                complicated by the issues regarding D and DLLs on the
+                Windows platform
+
+        ***********************************************************************/
+
+        private static void* library;
+
+        /***********************************************************************
+
+        ***********************************************************************/
+
+        private static extern (C)
+        {
+                Handle function (uint, char*, wchar*, uint, inout ICU.UErrorCode) ubrk_open;
+                Handle function (wchar*, uint, wchar*, uint, void*, inout ICU.UErrorCode) ubrk_openRules;
+                void   function (Handle) ubrk_close;
+                void   function (Handle, wchar*, uint, inout ICU.UErrorCode) ubrk_setText;
+                uint   function (Handle) ubrk_current;
+                uint   function (Handle) ubrk_next;
+                uint   function (Handle) ubrk_previous;
+                uint   function (Handle) ubrk_first;
+                uint   function (Handle) ubrk_last;
+                uint   function (Handle, uint) ubrk_preceding;
+                uint   function (Handle, uint) ubrk_following;
+                byte   function (Handle, uint) ubrk_isBoundary;
+                uint   function (Handle) ubrk_getRuleStatus;
+                Handle function (Handle, void *, int *, inout ICU.UErrorCode) ubrk_safeClone;
+                void   function (Handle, UText*, inout ICU.UErrorCode) ubrk_setUText;
+        }
+
+        /***********************************************************************
+
+        ***********************************************************************/
+
+        static FunctionLoader.Bind[] targets =
+                [
+                {cast(void**) &ubrk_open,               "ubrk_open"},
+                {cast(void**) &ubrk_close,              "ubrk_close"},
+                {cast(void**) &ubrk_openRules,          "ubrk_openRules"},
+                {cast(void**) &ubrk_setText,            "ubrk_setText"},
+                {cast(void**) &ubrk_current,            "ubrk_current"},
+                {cast(void**) &ubrk_next,               "ubrk_next"},
+                {cast(void**) &ubrk_previous,           "ubrk_previous"},
+                {cast(void**) &ubrk_first,              "ubrk_first"},
+                {cast(void**) &ubrk_last,               "ubrk_last"},
+                {cast(void**) &ubrk_preceding,          "ubrk_preceding"},
+                {cast(void**) &ubrk_following,          "ubrk_following"},
+                {cast(void**) &ubrk_isBoundary,         "ubrk_isBoundary"},
+                {cast(void**) &ubrk_getRuleStatus,      "ubrk_getRuleStatus"},
+                {cast(void**) &ubrk_setUText,           "ubrk_setUText"},
+                {cast(void**) &ubrk_safeClone,          "ubrk_safeClone"},
+                ];
+
+         /**********************************************************************
+
+         **********************************************************************/
+
+         static this ()
+         {
+                library = FunctionLoader.bind (ICU.icuuc, targets);
+         }
+
+         /**********************************************************************
+
+         **********************************************************************/
+
+         static ~this ()
+         {
+               FunctionLoader.unbind (library);
+         }
+}
author	Frank Benoit <benoit@tionex.de>
date	Sun, 19 Apr 2009 13:49:38 +0200
parents	base/src/java/mangoicu/UBreakIterator.d@1bf55a6eb092
children	536e43f63c81