Mercurial > projects > dwt-addons
annotate dwtx/dwtxhelper/mangoicu/UBreakIterator.d @ 162:1a5b8f8129df
...
author | Frank Benoit <benoit@tionex.de> |
---|---|
date | Mon, 08 Sep 2008 00:51:37 +0200 |
parents | 95307ad235d9 |
children |
rev | line source |
---|---|
92 | 1 /******************************************************************************* |
2 | |
3 @file UBreakIterator.d | |
4 | |
5 Copyright (c) 2004 Kris Bell | |
6 | |
7 This software is provided 'as-is', without any express or implied | |
8 warranty. In no event will the authors be held liable for damages | |
9 of any kind arising from the use of this software. | |
10 | |
11 Permission is hereby granted to anyone to use this software for any | |
12 purpose, including commercial applications, and to alter it and/or | |
13 redistribute it freely, subject to the following restrictions: | |
14 | |
15 1. The origin of this software must not be misrepresented; you must | |
16 not claim that you wrote the original software. If you use this | |
17 software in a product, an acknowledgment within documentation of | |
18 said product would be appreciated but is not required. | |
19 | |
20 2. Altered source versions must be plainly marked as such, and must | |
21 not be misrepresented as being the original software. | |
22 | |
23 3. This notice may not be removed or altered from any distribution | |
24 of the source. | |
25 | |
26 4. Derivative works are permitted, but they must carry this notice | |
27 in full and credit the original source. | |
28 | |
29 | |
30 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
31 | |
32 | |
33 @version Initial version, November 2004 | |
34 @author Kris | |
35 | |
36 Note that this package and documentation is built around the ICU | |
37 project (http://oss.software.ibm.com/icu/). Below is the license | |
38 statement as specified by that software: | |
39 | |
40 | |
41 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
42 | |
43 | |
44 ICU License - ICU 1.8.1 and later | |
45 | |
46 COPYRIGHT AND PERMISSION NOTICE | |
47 | |
48 Copyright (c) 1995-2003 International Business Machines Corporation and | |
49 others. | |
50 | |
51 All rights reserved. | |
52 | |
53 Permission is hereby granted, free of charge, to any person obtaining a | |
54 copy of this software and associated documentation files (the | |
55 "Software"), to deal in the Software without restriction, including | |
56 without limitation the rights to use, copy, modify, merge, publish, | |
57 distribute, and/or sell copies of the Software, and to permit persons | |
58 to whom the Software is furnished to do so, provided that the above | |
59 copyright notice(s) and this permission notice appear in all copies of | |
60 the Software and that both the above copyright notice(s) and this | |
61 permission notice appear in supporting documentation. | |
62 | |
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
64 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT | |
66 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR | |
67 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL | |
68 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING | |
69 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, | |
70 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION | |
71 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
72 | |
73 Except as contained in this notice, the name of a copyright holder | |
74 shall not be used in advertising or otherwise to promote the sale, use | |
75 or other dealings in this Software without prior written authorization | |
76 of the copyright holder. | |
77 | |
78 ---------------------------------------------------------------------- | |
79 | |
80 All trademarks and registered trademarks mentioned herein are the | |
81 property of their respective owners. | |
82 | |
83 *******************************************************************************/ | |
84 | |
85 module dwtx.dwtxhelper.mangoicu.UBreakIterator; | |
86 | |
87 private import dwtx.dwtxhelper.mangoicu.ICU; | |
88 | |
89 public import dwtx.dwtxhelper.mangoicu.ULocale, | |
90 dwtx.dwtxhelper.mangoicu.UText, | |
91 dwtx.dwtxhelper.mangoicu.UString; | |
92 | |
93 | |
94 | |
95 // /******************************************************************************* | |
96 // | |
97 // *******************************************************************************/ | |
98 // | |
99 // class UCharacterIterator : UBreakIterator | |
100 // { | |
101 // /*********************************************************************** | |
102 // | |
103 // ***********************************************************************/ | |
104 // | |
105 // this (inout ULocale locale, UStringView text = null) | |
106 // { | |
107 // super (Type.Character, locale, text); | |
108 // } | |
109 // } | |
110 // | |
111 // | |
112 // /******************************************************************************* | |
113 // | |
114 // *******************************************************************************/ | |
115 // | |
116 // class UWordIterator : UBreakIterator | |
117 // { | |
118 // public enum Break | |
119 // { | |
120 // None = 0, | |
121 // NoneLimit = 100, | |
122 // Number = 100, | |
123 // NumberLimit = 200, | |
124 // Letter = 200, | |
125 // LetterLimit = 300, | |
126 // Kana = 300, | |
127 // KanaLimit = 400, | |
128 // Ideo = 400, | |
129 // IdeoLimit = 500 | |
130 // } | |
131 // | |
132 // /*********************************************************************** | |
133 // | |
134 // ***********************************************************************/ | |
135 // | |
136 // this (inout ULocale locale, UStringView text = null) | |
137 // { | |
138 // super (Type.Word, locale, text); | |
139 // } | |
140 // | |
141 // /*********************************************************************** | |
142 // | |
143 // Return the status from the break rule that determined | |
144 // the most recently returned break position. | |
145 // | |
146 // ***********************************************************************/ | |
147 // | |
148 // void getStatus (inout Break b) | |
149 // { | |
150 // b = cast(Break) super.getStatus(); | |
151 // } | |
152 // } | |
153 // | |
154 // | |
155 // /******************************************************************************* | |
156 // | |
157 // *******************************************************************************/ | |
158 // | |
159 // class ULineIterator : UBreakIterator | |
160 // { | |
161 // public enum Break | |
162 // { | |
163 // Soft = 0, | |
164 // SoftLimit = 100, | |
165 // Hard = 100, | |
166 // HardLimit = 200 | |
167 // } | |
168 // | |
169 // /*********************************************************************** | |
170 // | |
171 // ***********************************************************************/ | |
172 // | |
173 // this (inout ULocale locale, UStringView text = null) | |
174 // { | |
175 // super (Type.Line, locale, text); | |
176 // } | |
177 // | |
178 // /*********************************************************************** | |
179 // | |
180 // Return the status from the break rule that determined | |
181 // the most recently returned break position. | |
182 // | |
183 // ***********************************************************************/ | |
184 // | |
185 // void getStatus (inout Break b) | |
186 // { | |
187 // b = cast(Break) super.getStatus(); | |
188 // } | |
189 // } | |
190 // | |
191 // | |
192 // /******************************************************************************* | |
193 // | |
194 // *******************************************************************************/ | |
195 // | |
196 // class USentenceIterator : UBreakIterator | |
197 // { | |
198 // public enum Break | |
199 // { | |
200 // Term = 0, | |
201 // TermLimit = 100, | |
202 // Sep = 100, | |
203 // Limit = 200 | |
204 // } | |
205 // | |
206 // /*********************************************************************** | |
207 // | |
208 // ***********************************************************************/ | |
209 // | |
210 // this (inout ULocale locale, UStringView text = null) | |
211 // { | |
212 // super (Type.Sentence, locale, text); | |
213 // } | |
214 // | |
215 // /*********************************************************************** | |
216 // | |
217 // Return the status from the break rule that determined | |
218 // the most recently returned break position. | |
219 // | |
220 // ***********************************************************************/ | |
221 // | |
222 // void getStatus (inout Break b) | |
223 // { | |
224 // b = cast(Break) super.getStatus(); | |
225 // } | |
226 // } | |
227 // | |
228 // | |
229 // /******************************************************************************* | |
230 // | |
231 // *******************************************************************************/ | |
232 // | |
233 // class UTitleIterator : UBreakIterator | |
234 // { | |
235 // /*********************************************************************** | |
236 // | |
237 // ***********************************************************************/ | |
238 // | |
239 // this (inout ULocale locale, UStringView text = null) | |
240 // { | |
241 // super (Type.Title, locale, text); | |
242 // } | |
243 // } | |
244 // | |
245 // | |
246 // /******************************************************************************* | |
247 // | |
248 // *******************************************************************************/ | |
249 // | |
250 // class URuleIterator : UBreakIterator | |
251 // { | |
252 // /*********************************************************************** | |
253 // | |
254 // Open a new UBreakIterator for locating text boundaries | |
255 // using specified breaking rules | |
256 // | |
257 // ***********************************************************************/ | |
258 // | |
259 // this (UStringView rules, UStringView text = null) | |
260 // { | |
261 // UErrorCode e; | |
262 // | |
263 // handle = ubrk_openRules (rules.get.ptr, rules.length, text.get.ptr, text.length, null, e); | |
264 // testError (e, "failed to open rule iterator"); | |
265 // } | |
266 // } | |
267 | |
268 | |
269 /******************************************************************************* | |
270 | |
271 BreakIterator defines methods for finding the location of boundaries | |
272 in text. Pointer to a UBreakIterator maintain a current position and | |
273 scan over text returning the index of characters where boundaries occur. | |
274 | |
275 Line boundary analysis determines where a text string can be broken | |
276 when line-wrapping. The mechanism correctly handles punctuation and | |
277 hyphenated words. | |
278 | |
279 Sentence boundary analysis allows selection with correct interpretation | |
280 of periods within numbers and abbreviations, and trailing punctuation | |
281 marks such as quotation marks and parentheses. | |
282 | |
283 Word boundary analysis is used by search and replace functions, as well | |
284 as within text editing applications that allow the user to select words | |
285 with a double click. Word selection provides correct interpretation of | |
286 punctuation marks within and following words. Characters that are not | |
287 part of a word, such as symbols or punctuation marks, have word-breaks | |
288 on both sides. | |
289 | |
290 Character boundary analysis allows users to interact with characters | |
291 as they expect to, for example, when moving the cursor through a text | |
292 string. Character boundary analysis provides correct navigation of | |
293 through character strings, regardless of how the character is stored. | |
294 For example, an accented character might be stored as a base character | |
295 and a diacritical mark. What users consider to be a character can differ | |
296 between languages. | |
297 | |
298 Title boundary analysis locates all positions, typically starts of | |
299 words, that should be set to Title Case when title casing the text. | |
300 | |
301 See <A HREF="http://oss.software.ibm.com/icu/apiref/ubrk_8h.html"> | |
302 this page</A> for full details. | |
303 | |
304 *******************************************************************************/ | |
305 | |
306 struct UBreakIterator | |
307 { | |
308 typedef void _UBreakIterator; | |
309 alias _UBreakIterator* Handle; | |
310 Handle handle; | |
311 UText ut; | |
312 | |
313 // this is returned by next(), previous() etc ... | |
314 const uint Done = uint.max; | |
162 | 315 alias Done DONE; |
92 | 316 |
317 /*********************************************************************** | |
318 | |
319 internal types passed to C API | |
320 | |
321 ***********************************************************************/ | |
322 | |
323 private enum Type | |
324 { | |
325 Character, | |
326 Word, | |
327 Line, | |
328 Sentence, | |
329 Title | |
330 } | |
331 | |
332 | |
333 public enum WordBreak | |
334 { | |
335 None = 0, | |
336 NoneLimit = 100, | |
337 Number = 100, | |
338 NumberLimit = 200, | |
339 Letter = 200, | |
340 LetterLimit = 300, | |
341 Kana = 300, | |
342 KanaLimit = 400, | |
343 Ideo = 400, | |
344 IdeoLimit = 500 | |
345 } | |
346 public enum LineBreak | |
347 { | |
348 Soft = 0, | |
349 SoftLimit = 100, | |
350 Hard = 100, | |
351 HardLimit = 200 | |
352 } | |
353 public enum SentenceBreak | |
354 { | |
355 Term = 0, | |
356 TermLimit = 100, | |
357 Sep = 100, | |
358 Limit = 200 | |
359 } | |
360 | |
361 | |
362 /*********************************************************************** | |
363 | |
364 Open a new UBreakIterator for locating text boundaries for | |
365 a specified locale. A UBreakIterator may be used for detecting | |
366 character, line, word, and sentence breaks in text. | |
367 | |
368 ***********************************************************************/ | |
369 | |
98
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
370 static UBreakIterator openWordIterator( ULocale locale, char[] str = null ){ |
92 | 371 UBreakIterator res; |
372 auto e = ICU.UErrorCode.OK; | |
373 res.handle = ubrk_open( Type.Word, locale.name.ptr, null, 0, e); | |
374 ICU.testError (e, "failed to open word iterator"); | |
98
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
375 if( str ) { |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
376 res.ut.openUTF8(str); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
377 ubrk_setUText( res.handle, & res.ut, e); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
378 ICU.testError (e, "failed to set text in iterator"); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
379 } |
92 | 380 return res; |
381 } | |
382 | |
98
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
383 static UBreakIterator openLineIterator( ULocale locale, char[] str = null ){ |
92 | 384 UBreakIterator res; |
385 auto e = ICU.UErrorCode.OK; | |
386 res.handle = ubrk_open( Type.Line, locale.name.ptr, null, 0, e); | |
387 ICU.testError (e, "failed to open line iterator"); | |
98
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
388 if( str ) { |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
389 res.ut.openUTF8(str); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
390 ubrk_setUText( res.handle, & res.ut, e); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
391 ICU.testError (e, "failed to set text in iterator"); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
392 } |
92 | 393 return res; |
394 } | |
395 | |
396 /*********************************************************************** | |
397 | |
398 Close a UBreakIterator | |
399 | |
400 ***********************************************************************/ | |
401 | |
402 void close () | |
403 { | |
404 ut.close(); | |
405 ubrk_close (handle); | |
406 } | |
407 | |
408 /*********************************************************************** | |
409 | |
410 Sets an existing iterator to point to a new piece of text | |
411 | |
412 ***********************************************************************/ | |
413 | |
414 void setText (UStringView text) | |
415 { | |
416 ICU.UErrorCode e; | |
417 ubrk_setText (handle, text.get.ptr, text.length, e); | |
418 ICU.testError (e, "failed to set iterator text"); | |
419 } | |
420 | |
421 void setText (char[] text) | |
422 { | |
423 auto e = ICU.UErrorCode.OK; | |
424 ut.openUTF8(text); | |
425 ubrk_setUText( handle, & ut, e); | |
426 ICU.testError (e, "failed to set text in iterator"); | |
427 } | |
428 | |
429 /*********************************************************************** | |
430 | |
431 Determine the most recently-returned text boundary | |
432 | |
433 ***********************************************************************/ | |
434 | |
435 uint current () | |
436 { | |
437 return ubrk_current (handle); | |
438 } | |
439 | |
440 /*********************************************************************** | |
441 | |
442 Determine the text boundary following the current text | |
443 boundary, or UBRK_DONE if all text boundaries have been | |
444 returned. | |
445 | |
446 If offset is specified, determines the text boundary | |
447 following the current text boundary: The value returned | |
448 is always greater than offset, or Done | |
449 | |
450 ***********************************************************************/ | |
451 | |
452 uint next (uint offset = uint.max) | |
453 { | |
454 if (offset == uint.max) | |
455 return ubrk_next (handle); | |
456 return ubrk_following (handle, offset); | |
457 } | |
162 | 458 alias next following; |
92 | 459 /*********************************************************************** |
460 | |
461 Determine the text boundary preceding the current text | |
462 boundary, or Done if all text boundaries have been returned. | |
463 | |
464 If offset is specified, determines the text boundary preceding | |
465 the specified offset. The value returned is always smaller than | |
466 offset, or Done. | |
467 | |
468 ***********************************************************************/ | |
469 | |
470 uint previous (uint offset = uint.max) | |
471 { | |
472 if (offset == uint.max) | |
473 return ubrk_previous (handle); | |
474 return ubrk_preceding (handle, offset); | |
475 } | |
476 | |
477 /*********************************************************************** | |
478 | |
479 Determine the index of the first character in the text | |
480 being scanned. This is not always the same as index 0 | |
481 of the text. | |
482 | |
483 ***********************************************************************/ | |
484 | |
485 uint first () | |
486 { | |
487 return ubrk_first (handle); | |
488 } | |
489 | |
490 /*********************************************************************** | |
491 | |
492 Determine the index immediately beyond the last character | |
493 in the text being scanned. This is not the same as the last | |
494 character | |
495 | |
496 ***********************************************************************/ | |
497 | |
498 uint last () | |
499 { | |
500 return ubrk_last (handle); | |
501 } | |
502 | |
503 /*********************************************************************** | |
504 | |
505 Returns true if the specfied position is a boundary position. | |
506 As a side effect, leaves the iterator pointing to the first | |
507 boundary position at or after "offset". | |
508 | |
509 ***********************************************************************/ | |
510 | |
511 bool isBoundary (uint offset) | |
512 { | |
513 return ubrk_isBoundary (handle, offset) != 0; | |
514 } | |
515 | |
516 /*********************************************************************** | |
517 | |
518 Return the status from the break rule that determined | |
519 the most recently returned break position. | |
520 | |
521 ***********************************************************************/ | |
522 | |
523 void getStatus (inout uint s) | |
524 { | |
525 s = getStatus (); | |
526 } | |
527 | |
528 /*********************************************************************** | |
529 | |
530 Return the status from the break rule that determined | |
531 the most recently returned break position. | |
532 | |
533 The values appear in the rule source within brackets, | |
534 {123}, for example. For rules that do not specify a status, | |
535 a default value of 0 is returned. | |
536 | |
537 For word break iterators, the possible values are defined | |
538 in enum UWordBreak | |
539 | |
540 ***********************************************************************/ | |
541 | |
542 private uint getStatus () | |
543 { | |
544 return ubrk_getRuleStatus (handle); | |
545 } | |
546 | |
547 | |
548 /*********************************************************************** | |
549 | |
550 Bind the ICU functions from a shared library. This is | |
551 complicated by the issues regarding D and DLLs on the | |
552 Windows platform | |
553 | |
554 ***********************************************************************/ | |
555 | |
556 private static void* library; | |
557 | |
558 /*********************************************************************** | |
559 | |
560 ***********************************************************************/ | |
561 | |
562 private static extern (C) | |
563 { | |
564 Handle function (uint, char*, wchar*, uint, inout ICU.UErrorCode) ubrk_open; | |
565 Handle function (wchar*, uint, wchar*, uint, void*, inout ICU.UErrorCode) ubrk_openRules; | |
566 void function (Handle) ubrk_close; | |
567 void function (Handle, wchar*, uint, inout ICU.UErrorCode) ubrk_setText; | |
568 uint function (Handle) ubrk_current; | |
569 uint function (Handle) ubrk_next; | |
570 uint function (Handle) ubrk_previous; | |
571 uint function (Handle) ubrk_first; | |
572 uint function (Handle) ubrk_last; | |
573 uint function (Handle, uint) ubrk_preceding; | |
574 uint function (Handle, uint) ubrk_following; | |
575 byte function (Handle, uint) ubrk_isBoundary; | |
576 uint function (Handle) ubrk_getRuleStatus; | |
577 Handle function (Handle, void *, int *, inout ICU.UErrorCode) ubrk_safeClone; | |
578 void function (Handle, UText*, inout ICU.UErrorCode) ubrk_setUText; | |
579 } | |
580 | |
581 /*********************************************************************** | |
582 | |
583 ***********************************************************************/ | |
584 | |
585 static FunctionLoader.Bind[] targets = | |
586 [ | |
587 {cast(void**) &ubrk_open, "ubrk_open"}, | |
588 {cast(void**) &ubrk_close, "ubrk_close"}, | |
589 {cast(void**) &ubrk_openRules, "ubrk_openRules"}, | |
590 {cast(void**) &ubrk_setText, "ubrk_setText"}, | |
591 {cast(void**) &ubrk_current, "ubrk_current"}, | |
592 {cast(void**) &ubrk_next, "ubrk_next"}, | |
593 {cast(void**) &ubrk_previous, "ubrk_previous"}, | |
594 {cast(void**) &ubrk_first, "ubrk_first"}, | |
595 {cast(void**) &ubrk_last, "ubrk_last"}, | |
596 {cast(void**) &ubrk_preceding, "ubrk_preceding"}, | |
597 {cast(void**) &ubrk_following, "ubrk_following"}, | |
598 {cast(void**) &ubrk_isBoundary, "ubrk_isBoundary"}, | |
599 {cast(void**) &ubrk_getRuleStatus, "ubrk_getRuleStatus"}, | |
600 {cast(void**) &ubrk_setUText, "ubrk_setUText"}, | |
601 {cast(void**) &ubrk_safeClone, "ubrk_safeClone"}, | |
602 ]; | |
603 | |
604 /********************************************************************** | |
605 | |
606 **********************************************************************/ | |
607 | |
608 static this () | |
609 { | |
610 library = FunctionLoader.bind (ICU.icuuc, targets); | |
611 } | |
612 | |
613 /********************************************************************** | |
614 | |
615 **********************************************************************/ | |
616 | |
617 static ~this () | |
618 { | |
619 FunctionLoader.unbind (library); | |
620 } | |
621 } |