comparison java/src/java/mangoicu/UBreakIterator.d @ 16:dbfb303e8fb0

first complete successful compile (win-only)
author Frank Benoit <benoit@tionex.de>
date Wed, 18 Mar 2009 08:56:47 +0100
parents
children 9b96950f2c3c
comparison
equal deleted inserted replaced
15:c4b1a29263fc 16:dbfb303e8fb0
1 /*******************************************************************************
2
3 @file UBreakIterator.d
4
5 Copyright (c) 2004 Kris Bell
6
7 This software is provided 'as-is', without any express or implied
8 warranty. In no event will the authors be held liable for damages
9 of any kind arising from the use of this software.
10
11 Permission is hereby granted to anyone to use this software for any
12 purpose, including commercial applications, and to alter it and/or
13 redistribute it freely, subject to the following restrictions:
14
15 1. The origin of this software must not be misrepresented; you must
16 not claim that you wrote the original software. If you use this
17 software in a product, an acknowledgment within documentation of
18 said product would be appreciated but is not required.
19
20 2. Altered source versions must be plainly marked as such, and must
21 not be misrepresented as being the original software.
22
23 3. This notice may not be removed or altered from any distribution
24 of the source.
25
26 4. Derivative works are permitted, but they must carry this notice
27 in full and credit the original source.
28
29
30 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31
32
33 @version Initial version, November 2004
34 @author Kris
35
36 Note that this package and documentation is built around the ICU
37 project (http://oss.software.ibm.com/icu/). Below is the license
38 statement as specified by that software:
39
40
41 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
42
43
44 ICU License - ICU 1.8.1 and later
45
46 COPYRIGHT AND PERMISSION NOTICE
47
48 Copyright (c) 1995-2003 International Business Machines Corporation and
49 others.
50
51 All rights reserved.
52
53 Permission is hereby granted, free of charge, to any person obtaining a
54 copy of this software and associated documentation files (the
55 "Software"), to deal in the Software without restriction, including
56 without limitation the rights to use, copy, modify, merge, publish,
57 distribute, and/or sell copies of the Software, and to permit persons
58 to whom the Software is furnished to do so, provided that the above
59 copyright notice(s) and this permission notice appear in all copies of
60 the Software and that both the above copyright notice(s) and this
61 permission notice appear in supporting documentation.
62
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
64 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
66 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
68 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
69 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
70 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
71 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
72
73 Except as contained in this notice, the name of a copyright holder
74 shall not be used in advertising or otherwise to promote the sale, use
75 or other dealings in this Software without prior written authorization
76 of the copyright holder.
77
78 ----------------------------------------------------------------------
79
80 All trademarks and registered trademarks mentioned herein are the
81 property of their respective owners.
82
83 *******************************************************************************/
84
85 module java.mangoicu.UBreakIterator;
86
87 private import java.mangoicu.ICU;
88
89 public import java.mangoicu.ULocale,
90 java.mangoicu.UText,
91 java.mangoicu.UString;
92
93
94
95 // /*******************************************************************************
96 //
97 // *******************************************************************************/
98 //
99 // class UCharacterIterator : UBreakIterator
100 // {
101 // /***********************************************************************
102 //
103 // ***********************************************************************/
104 //
105 // this (inout ULocale locale, UStringView text = null)
106 // {
107 // super (Type.Character, locale, text);
108 // }
109 // }
110 //
111 //
112 // /*******************************************************************************
113 //
114 // *******************************************************************************/
115 //
116 // class UWordIterator : UBreakIterator
117 // {
118 // public enum Break
119 // {
120 // None = 0,
121 // NoneLimit = 100,
122 // Number = 100,
123 // NumberLimit = 200,
124 // Letter = 200,
125 // LetterLimit = 300,
126 // Kana = 300,
127 // KanaLimit = 400,
128 // Ideo = 400,
129 // IdeoLimit = 500
130 // }
131 //
132 // /***********************************************************************
133 //
134 // ***********************************************************************/
135 //
136 // this (inout ULocale locale, UStringView text = null)
137 // {
138 // super (Type.Word, locale, text);
139 // }
140 //
141 // /***********************************************************************
142 //
143 // Return the status from the break rule that determined
144 // the most recently returned break position.
145 //
146 // ***********************************************************************/
147 //
148 // void getStatus (inout Break b)
149 // {
150 // b = cast(Break) super.getStatus();
151 // }
152 // }
153 //
154 //
155 // /*******************************************************************************
156 //
157 // *******************************************************************************/
158 //
159 // class ULineIterator : UBreakIterator
160 // {
161 // public enum Break
162 // {
163 // Soft = 0,
164 // SoftLimit = 100,
165 // Hard = 100,
166 // HardLimit = 200
167 // }
168 //
169 // /***********************************************************************
170 //
171 // ***********************************************************************/
172 //
173 // this (inout ULocale locale, UStringView text = null)
174 // {
175 // super (Type.Line, locale, text);
176 // }
177 //
178 // /***********************************************************************
179 //
180 // Return the status from the break rule that determined
181 // the most recently returned break position.
182 //
183 // ***********************************************************************/
184 //
185 // void getStatus (inout Break b)
186 // {
187 // b = cast(Break) super.getStatus();
188 // }
189 // }
190 //
191 //
192 // /*******************************************************************************
193 //
194 // *******************************************************************************/
195 //
196 // class USentenceIterator : UBreakIterator
197 // {
198 // public enum Break
199 // {
200 // Term = 0,
201 // TermLimit = 100,
202 // Sep = 100,
203 // Limit = 200
204 // }
205 //
206 // /***********************************************************************
207 //
208 // ***********************************************************************/
209 //
210 // this (inout ULocale locale, UStringView text = null)
211 // {
212 // super (Type.Sentence, locale, text);
213 // }
214 //
215 // /***********************************************************************
216 //
217 // Return the status from the break rule that determined
218 // the most recently returned break position.
219 //
220 // ***********************************************************************/
221 //
222 // void getStatus (inout Break b)
223 // {
224 // b = cast(Break) super.getStatus();
225 // }
226 // }
227 //
228 //
229 // /*******************************************************************************
230 //
231 // *******************************************************************************/
232 //
233 // class UTitleIterator : UBreakIterator
234 // {
235 // /***********************************************************************
236 //
237 // ***********************************************************************/
238 //
239 // this (inout ULocale locale, UStringView text = null)
240 // {
241 // super (Type.Title, locale, text);
242 // }
243 // }
244 //
245 //
246 // /*******************************************************************************
247 //
248 // *******************************************************************************/
249 //
250 // class URuleIterator : UBreakIterator
251 // {
252 // /***********************************************************************
253 //
254 // Open a new UBreakIterator for locating text boundaries
255 // using specified breaking rules
256 //
257 // ***********************************************************************/
258 //
259 // this (UStringView rules, UStringView text = null)
260 // {
261 // UErrorCode e;
262 //
263 // handle = ubrk_openRules (rules.get.ptr, rules.length, text.get.ptr, text.length, null, e);
264 // testError (e, "failed to open rule iterator");
265 // }
266 // }
267
268
269 /*******************************************************************************
270
271 BreakIterator defines methods for finding the location of boundaries
272 in text. Pointer to a UBreakIterator maintain a current position and
273 scan over text returning the index of characters where boundaries occur.
274
275 Line boundary analysis determines where a text string can be broken
276 when line-wrapping. The mechanism correctly handles punctuation and
277 hyphenated words.
278
279 Sentence boundary analysis allows selection with correct interpretation
280 of periods within numbers and abbreviations, and trailing punctuation
281 marks such as quotation marks and parentheses.
282
283 Word boundary analysis is used by search and replace functions, as well
284 as within text editing applications that allow the user to select words
285 with a double click. Word selection provides correct interpretation of
286 punctuation marks within and following words. Characters that are not
287 part of a word, such as symbols or punctuation marks, have word-breaks
288 on both sides.
289
290 Character boundary analysis allows users to interact with characters
291 as they expect to, for example, when moving the cursor through a text
292 string. Character boundary analysis provides correct navigation of
293 through character strings, regardless of how the character is stored.
294 For example, an accented character might be stored as a base character
295 and a diacritical mark. What users consider to be a character can differ
296 between languages.
297
298 Title boundary analysis locates all positions, typically starts of
299 words, that should be set to Title Case when title casing the text.
300
301 See <A HREF="http://oss.software.ibm.com/icu/apiref/ubrk_8h.html">
302 this page</A> for full details.
303
304 *******************************************************************************/
305
306 struct UBreakIterator
307 {
308 typedef void _UBreakIterator;
309 alias _UBreakIterator* Handle;
310 Handle handle;
311 UText ut;
312
313 // this is returned by next(), previous() etc ...
314 const uint Done = uint.max;
315 alias Done DONE;
316
317 /***********************************************************************
318
319 internal types passed to C API
320
321 ***********************************************************************/
322
323 private enum Type
324 {
325 Character,
326 Word,
327 Line,
328 Sentence,
329 Title
330 }
331
332
333 public enum WordBreak
334 {
335 None = 0,
336 NoneLimit = 100,
337 Number = 100,
338 NumberLimit = 200,
339 Letter = 200,
340 LetterLimit = 300,
341 Kana = 300,
342 KanaLimit = 400,
343 Ideo = 400,
344 IdeoLimit = 500
345 }
346 public enum LineBreak
347 {
348 Soft = 0,
349 SoftLimit = 100,
350 Hard = 100,
351 HardLimit = 200
352 }
353 public enum SentenceBreak
354 {
355 Term = 0,
356 TermLimit = 100,
357 Sep = 100,
358 Limit = 200
359 }
360
361
362 /***********************************************************************
363
364 Open a new UBreakIterator for locating text boundaries for
365 a specified locale. A UBreakIterator may be used for detecting
366 character, line, word, and sentence breaks in text.
367
368 ***********************************************************************/
369
370 static UBreakIterator openWordIterator( ULocale locale, char[] str = null ){
371 UBreakIterator res;
372 auto e = ICU.UErrorCode.OK;
373 res.handle = ubrk_open( Type.Word, locale.name.ptr, null, 0, e);
374 ICU.testError (e, "failed to open word iterator");
375 if( str ) {
376 res.ut.openUTF8(str);
377 ubrk_setUText( res.handle, & res.ut, e);
378 ICU.testError (e, "failed to set text in iterator");
379 }
380 return res;
381 }
382
383 static UBreakIterator openLineIterator( ULocale locale, char[] str = null ){
384 UBreakIterator res;
385 auto e = ICU.UErrorCode.OK;
386 res.handle = ubrk_open( Type.Line, locale.name.ptr, null, 0, e);
387 ICU.testError (e, "failed to open line iterator");
388 if( str ) {
389 res.ut.openUTF8(str);
390 ubrk_setUText( res.handle, & res.ut, e);
391 ICU.testError (e, "failed to set text in iterator");
392 }
393 return res;
394 }
395
396 /***********************************************************************
397
398 Close a UBreakIterator
399
400 ***********************************************************************/
401
402 void close ()
403 {
404 ut.close();
405 ubrk_close (handle);
406 }
407
408 /***********************************************************************
409
410 Sets an existing iterator to point to a new piece of text
411
412 ***********************************************************************/
413
414 void setText (UStringView text)
415 {
416 ICU.UErrorCode e;
417 ubrk_setText (handle, text.get.ptr, text.length, e);
418 ICU.testError (e, "failed to set iterator text");
419 }
420
421 void setText (char[] text)
422 {
423 auto e = ICU.UErrorCode.OK;
424 ut.openUTF8(text);
425 ubrk_setUText( handle, & ut, e);
426 ICU.testError (e, "failed to set text in iterator");
427 }
428
429 /***********************************************************************
430
431 Determine the most recently-returned text boundary
432
433 ***********************************************************************/
434
435 uint current ()
436 {
437 return ubrk_current (handle);
438 }
439
440 /***********************************************************************
441
442 Determine the text boundary following the current text
443 boundary, or UBRK_DONE if all text boundaries have been
444 returned.
445
446 If offset is specified, determines the text boundary
447 following the current text boundary: The value returned
448 is always greater than offset, or Done
449
450 ***********************************************************************/
451
452 uint next (uint offset = uint.max)
453 {
454 if (offset == uint.max)
455 return ubrk_next (handle);
456 return ubrk_following (handle, offset);
457 }
458 alias next following;
459 /***********************************************************************
460
461 Determine the text boundary preceding the current text
462 boundary, or Done if all text boundaries have been returned.
463
464 If offset is specified, determines the text boundary preceding
465 the specified offset. The value returned is always smaller than
466 offset, or Done.
467
468 ***********************************************************************/
469
470 uint previous (uint offset = uint.max)
471 {
472 if (offset == uint.max)
473 return ubrk_previous (handle);
474 return ubrk_preceding (handle, offset);
475 }
476
477 /***********************************************************************
478
479 Determine the index of the first character in the text
480 being scanned. This is not always the same as index 0
481 of the text.
482
483 ***********************************************************************/
484
485 uint first ()
486 {
487 return ubrk_first (handle);
488 }
489
490 /***********************************************************************
491
492 Determine the index immediately beyond the last character
493 in the text being scanned. This is not the same as the last
494 character
495
496 ***********************************************************************/
497
498 uint last ()
499 {
500 return ubrk_last (handle);
501 }
502
503 /***********************************************************************
504
505 Returns true if the specfied position is a boundary position.
506 As a side effect, leaves the iterator pointing to the first
507 boundary position at or after "offset".
508
509 ***********************************************************************/
510
511 bool isBoundary (uint offset)
512 {
513 return ubrk_isBoundary (handle, offset) != 0;
514 }
515
516 /***********************************************************************
517
518 Return the status from the break rule that determined
519 the most recently returned break position.
520
521 ***********************************************************************/
522
523 void getStatus (inout uint s)
524 {
525 s = getStatus ();
526 }
527
528 /***********************************************************************
529
530 Return the status from the break rule that determined
531 the most recently returned break position.
532
533 The values appear in the rule source within brackets,
534 {123}, for example. For rules that do not specify a status,
535 a default value of 0 is returned.
536
537 For word break iterators, the possible values are defined
538 in enum UWordBreak
539
540 ***********************************************************************/
541
542 private uint getStatus ()
543 {
544 return ubrk_getRuleStatus (handle);
545 }
546
547
548 /***********************************************************************
549
550 Bind the ICU functions from a shared library. This is
551 complicated by the issues regarding D and DLLs on the
552 Windows platform
553
554 ***********************************************************************/
555
556 private static void* library;
557
558 /***********************************************************************
559
560 ***********************************************************************/
561
562 private static extern (C)
563 {
564 Handle function (uint, char*, wchar*, uint, inout ICU.UErrorCode) ubrk_open;
565 Handle function (wchar*, uint, wchar*, uint, void*, inout ICU.UErrorCode) ubrk_openRules;
566 void function (Handle) ubrk_close;
567 void function (Handle, wchar*, uint, inout ICU.UErrorCode) ubrk_setText;
568 uint function (Handle) ubrk_current;
569 uint function (Handle) ubrk_next;
570 uint function (Handle) ubrk_previous;
571 uint function (Handle) ubrk_first;
572 uint function (Handle) ubrk_last;
573 uint function (Handle, uint) ubrk_preceding;
574 uint function (Handle, uint) ubrk_following;
575 byte function (Handle, uint) ubrk_isBoundary;
576 uint function (Handle) ubrk_getRuleStatus;
577 Handle function (Handle, void *, int *, inout ICU.UErrorCode) ubrk_safeClone;
578 void function (Handle, UText*, inout ICU.UErrorCode) ubrk_setUText;
579 }
580
581 /***********************************************************************
582
583 ***********************************************************************/
584
585 static FunctionLoader.Bind[] targets =
586 [
587 {cast(void**) &ubrk_open, "ubrk_open"},
588 {cast(void**) &ubrk_close, "ubrk_close"},
589 {cast(void**) &ubrk_openRules, "ubrk_openRules"},
590 {cast(void**) &ubrk_setText, "ubrk_setText"},
591 {cast(void**) &ubrk_current, "ubrk_current"},
592 {cast(void**) &ubrk_next, "ubrk_next"},
593 {cast(void**) &ubrk_previous, "ubrk_previous"},
594 {cast(void**) &ubrk_first, "ubrk_first"},
595 {cast(void**) &ubrk_last, "ubrk_last"},
596 {cast(void**) &ubrk_preceding, "ubrk_preceding"},
597 {cast(void**) &ubrk_following, "ubrk_following"},
598 {cast(void**) &ubrk_isBoundary, "ubrk_isBoundary"},
599 {cast(void**) &ubrk_getRuleStatus, "ubrk_getRuleStatus"},
600 {cast(void**) &ubrk_setUText, "ubrk_setUText"},
601 {cast(void**) &ubrk_safeClone, "ubrk_safeClone"},
602 ];
603
604 /**********************************************************************
605
606 **********************************************************************/
607
608 static this ()
609 {
610 library = FunctionLoader.bind (ICU.icuuc, targets);
611 }
612
613 /**********************************************************************
614
615 **********************************************************************/
616
617 static ~this ()
618 {
619 FunctionLoader.unbind (library);
620 }
621 }