Mercurial > projects > dwt-addons
annotate dwtx/dwtxhelper/mangoicu/UBreakIterator.d @ 98:95307ad235d9
Added Draw2d code, still work in progress
author | Frank Benoit <benoit@tionex.de> |
---|---|
date | Sun, 03 Aug 2008 00:52:14 +0200 |
parents | f05207c07a98 |
children | 1a5b8f8129df |
rev | line source |
---|---|
92 | 1 /******************************************************************************* |
2 | |
3 @file UBreakIterator.d | |
4 | |
5 Copyright (c) 2004 Kris Bell | |
6 | |
7 This software is provided 'as-is', without any express or implied | |
8 warranty. In no event will the authors be held liable for damages | |
9 of any kind arising from the use of this software. | |
10 | |
11 Permission is hereby granted to anyone to use this software for any | |
12 purpose, including commercial applications, and to alter it and/or | |
13 redistribute it freely, subject to the following restrictions: | |
14 | |
15 1. The origin of this software must not be misrepresented; you must | |
16 not claim that you wrote the original software. If you use this | |
17 software in a product, an acknowledgment within documentation of | |
18 said product would be appreciated but is not required. | |
19 | |
20 2. Altered source versions must be plainly marked as such, and must | |
21 not be misrepresented as being the original software. | |
22 | |
23 3. This notice may not be removed or altered from any distribution | |
24 of the source. | |
25 | |
26 4. Derivative works are permitted, but they must carry this notice | |
27 in full and credit the original source. | |
28 | |
29 | |
30 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
31 | |
32 | |
33 @version Initial version, November 2004 | |
34 @author Kris | |
35 | |
36 Note that this package and documentation is built around the ICU | |
37 project (http://oss.software.ibm.com/icu/). Below is the license | |
38 statement as specified by that software: | |
39 | |
40 | |
41 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
42 | |
43 | |
44 ICU License - ICU 1.8.1 and later | |
45 | |
46 COPYRIGHT AND PERMISSION NOTICE | |
47 | |
48 Copyright (c) 1995-2003 International Business Machines Corporation and | |
49 others. | |
50 | |
51 All rights reserved. | |
52 | |
53 Permission is hereby granted, free of charge, to any person obtaining a | |
54 copy of this software and associated documentation files (the | |
55 "Software"), to deal in the Software without restriction, including | |
56 without limitation the rights to use, copy, modify, merge, publish, | |
57 distribute, and/or sell copies of the Software, and to permit persons | |
58 to whom the Software is furnished to do so, provided that the above | |
59 copyright notice(s) and this permission notice appear in all copies of | |
60 the Software and that both the above copyright notice(s) and this | |
61 permission notice appear in supporting documentation. | |
62 | |
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
64 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT | |
66 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR | |
67 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL | |
68 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING | |
69 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, | |
70 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION | |
71 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
72 | |
73 Except as contained in this notice, the name of a copyright holder | |
74 shall not be used in advertising or otherwise to promote the sale, use | |
75 or other dealings in this Software without prior written authorization | |
76 of the copyright holder. | |
77 | |
78 ---------------------------------------------------------------------- | |
79 | |
80 All trademarks and registered trademarks mentioned herein are the | |
81 property of their respective owners. | |
82 | |
83 *******************************************************************************/ | |
84 | |
85 module dwtx.dwtxhelper.mangoicu.UBreakIterator; | |
86 | |
87 private import dwtx.dwtxhelper.mangoicu.ICU; | |
88 | |
89 public import dwtx.dwtxhelper.mangoicu.ULocale, | |
90 dwtx.dwtxhelper.mangoicu.UText, | |
91 dwtx.dwtxhelper.mangoicu.UString; | |
92 | |
93 | |
94 | |
95 // /******************************************************************************* | |
96 // | |
97 // *******************************************************************************/ | |
98 // | |
99 // class UCharacterIterator : UBreakIterator | |
100 // { | |
101 // /*********************************************************************** | |
102 // | |
103 // ***********************************************************************/ | |
104 // | |
105 // this (inout ULocale locale, UStringView text = null) | |
106 // { | |
107 // super (Type.Character, locale, text); | |
108 // } | |
109 // } | |
110 // | |
111 // | |
112 // /******************************************************************************* | |
113 // | |
114 // *******************************************************************************/ | |
115 // | |
116 // class UWordIterator : UBreakIterator | |
117 // { | |
118 // public enum Break | |
119 // { | |
120 // None = 0, | |
121 // NoneLimit = 100, | |
122 // Number = 100, | |
123 // NumberLimit = 200, | |
124 // Letter = 200, | |
125 // LetterLimit = 300, | |
126 // Kana = 300, | |
127 // KanaLimit = 400, | |
128 // Ideo = 400, | |
129 // IdeoLimit = 500 | |
130 // } | |
131 // | |
132 // /*********************************************************************** | |
133 // | |
134 // ***********************************************************************/ | |
135 // | |
136 // this (inout ULocale locale, UStringView text = null) | |
137 // { | |
138 // super (Type.Word, locale, text); | |
139 // } | |
140 // | |
141 // /*********************************************************************** | |
142 // | |
143 // Return the status from the break rule that determined | |
144 // the most recently returned break position. | |
145 // | |
146 // ***********************************************************************/ | |
147 // | |
148 // void getStatus (inout Break b) | |
149 // { | |
150 // b = cast(Break) super.getStatus(); | |
151 // } | |
152 // } | |
153 // | |
154 // | |
155 // /******************************************************************************* | |
156 // | |
157 // *******************************************************************************/ | |
158 // | |
159 // class ULineIterator : UBreakIterator | |
160 // { | |
161 // public enum Break | |
162 // { | |
163 // Soft = 0, | |
164 // SoftLimit = 100, | |
165 // Hard = 100, | |
166 // HardLimit = 200 | |
167 // } | |
168 // | |
169 // /*********************************************************************** | |
170 // | |
171 // ***********************************************************************/ | |
172 // | |
173 // this (inout ULocale locale, UStringView text = null) | |
174 // { | |
175 // super (Type.Line, locale, text); | |
176 // } | |
177 // | |
178 // /*********************************************************************** | |
179 // | |
180 // Return the status from the break rule that determined | |
181 // the most recently returned break position. | |
182 // | |
183 // ***********************************************************************/ | |
184 // | |
185 // void getStatus (inout Break b) | |
186 // { | |
187 // b = cast(Break) super.getStatus(); | |
188 // } | |
189 // } | |
190 // | |
191 // | |
192 // /******************************************************************************* | |
193 // | |
194 // *******************************************************************************/ | |
195 // | |
196 // class USentenceIterator : UBreakIterator | |
197 // { | |
198 // public enum Break | |
199 // { | |
200 // Term = 0, | |
201 // TermLimit = 100, | |
202 // Sep = 100, | |
203 // Limit = 200 | |
204 // } | |
205 // | |
206 // /*********************************************************************** | |
207 // | |
208 // ***********************************************************************/ | |
209 // | |
210 // this (inout ULocale locale, UStringView text = null) | |
211 // { | |
212 // super (Type.Sentence, locale, text); | |
213 // } | |
214 // | |
215 // /*********************************************************************** | |
216 // | |
217 // Return the status from the break rule that determined | |
218 // the most recently returned break position. | |
219 // | |
220 // ***********************************************************************/ | |
221 // | |
222 // void getStatus (inout Break b) | |
223 // { | |
224 // b = cast(Break) super.getStatus(); | |
225 // } | |
226 // } | |
227 // | |
228 // | |
229 // /******************************************************************************* | |
230 // | |
231 // *******************************************************************************/ | |
232 // | |
233 // class UTitleIterator : UBreakIterator | |
234 // { | |
235 // /*********************************************************************** | |
236 // | |
237 // ***********************************************************************/ | |
238 // | |
239 // this (inout ULocale locale, UStringView text = null) | |
240 // { | |
241 // super (Type.Title, locale, text); | |
242 // } | |
243 // } | |
244 // | |
245 // | |
246 // /******************************************************************************* | |
247 // | |
248 // *******************************************************************************/ | |
249 // | |
250 // class URuleIterator : UBreakIterator | |
251 // { | |
252 // /*********************************************************************** | |
253 // | |
254 // Open a new UBreakIterator for locating text boundaries | |
255 // using specified breaking rules | |
256 // | |
257 // ***********************************************************************/ | |
258 // | |
259 // this (UStringView rules, UStringView text = null) | |
260 // { | |
261 // UErrorCode e; | |
262 // | |
263 // handle = ubrk_openRules (rules.get.ptr, rules.length, text.get.ptr, text.length, null, e); | |
264 // testError (e, "failed to open rule iterator"); | |
265 // } | |
266 // } | |
267 | |
268 | |
269 /******************************************************************************* | |
270 | |
271 BreakIterator defines methods for finding the location of boundaries | |
272 in text. Pointer to a UBreakIterator maintain a current position and | |
273 scan over text returning the index of characters where boundaries occur. | |
274 | |
275 Line boundary analysis determines where a text string can be broken | |
276 when line-wrapping. The mechanism correctly handles punctuation and | |
277 hyphenated words. | |
278 | |
279 Sentence boundary analysis allows selection with correct interpretation | |
280 of periods within numbers and abbreviations, and trailing punctuation | |
281 marks such as quotation marks and parentheses. | |
282 | |
283 Word boundary analysis is used by search and replace functions, as well | |
284 as within text editing applications that allow the user to select words | |
285 with a double click. Word selection provides correct interpretation of | |
286 punctuation marks within and following words. Characters that are not | |
287 part of a word, such as symbols or punctuation marks, have word-breaks | |
288 on both sides. | |
289 | |
290 Character boundary analysis allows users to interact with characters | |
291 as they expect to, for example, when moving the cursor through a text | |
292 string. Character boundary analysis provides correct navigation of | |
293 through character strings, regardless of how the character is stored. | |
294 For example, an accented character might be stored as a base character | |
295 and a diacritical mark. What users consider to be a character can differ | |
296 between languages. | |
297 | |
298 Title boundary analysis locates all positions, typically starts of | |
299 words, that should be set to Title Case when title casing the text. | |
300 | |
301 See <A HREF="http://oss.software.ibm.com/icu/apiref/ubrk_8h.html"> | |
302 this page</A> for full details. | |
303 | |
304 *******************************************************************************/ | |
305 | |
306 struct UBreakIterator | |
307 { | |
308 typedef void _UBreakIterator; | |
309 alias _UBreakIterator* Handle; | |
310 Handle handle; | |
311 UText ut; | |
312 | |
313 // this is returned by next(), previous() etc ... | |
314 const uint Done = uint.max; | |
315 | |
316 /*********************************************************************** | |
317 | |
318 internal types passed to C API | |
319 | |
320 ***********************************************************************/ | |
321 | |
322 private enum Type | |
323 { | |
324 Character, | |
325 Word, | |
326 Line, | |
327 Sentence, | |
328 Title | |
329 } | |
330 | |
331 | |
332 public enum WordBreak | |
333 { | |
334 None = 0, | |
335 NoneLimit = 100, | |
336 Number = 100, | |
337 NumberLimit = 200, | |
338 Letter = 200, | |
339 LetterLimit = 300, | |
340 Kana = 300, | |
341 KanaLimit = 400, | |
342 Ideo = 400, | |
343 IdeoLimit = 500 | |
344 } | |
345 public enum LineBreak | |
346 { | |
347 Soft = 0, | |
348 SoftLimit = 100, | |
349 Hard = 100, | |
350 HardLimit = 200 | |
351 } | |
352 public enum SentenceBreak | |
353 { | |
354 Term = 0, | |
355 TermLimit = 100, | |
356 Sep = 100, | |
357 Limit = 200 | |
358 } | |
359 | |
360 | |
361 /*********************************************************************** | |
362 | |
363 Open a new UBreakIterator for locating text boundaries for | |
364 a specified locale. A UBreakIterator may be used for detecting | |
365 character, line, word, and sentence breaks in text. | |
366 | |
367 ***********************************************************************/ | |
368 | |
98
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
369 static UBreakIterator openWordIterator( ULocale locale, char[] str = null ){ |
92 | 370 UBreakIterator res; |
371 auto e = ICU.UErrorCode.OK; | |
372 res.handle = ubrk_open( Type.Word, locale.name.ptr, null, 0, e); | |
373 ICU.testError (e, "failed to open word iterator"); | |
98
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
374 if( str ) { |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
375 res.ut.openUTF8(str); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
376 ubrk_setUText( res.handle, & res.ut, e); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
377 ICU.testError (e, "failed to set text in iterator"); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
378 } |
92 | 379 return res; |
380 } | |
381 | |
98
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
382 static UBreakIterator openLineIterator( ULocale locale, char[] str = null ){ |
92 | 383 UBreakIterator res; |
384 auto e = ICU.UErrorCode.OK; | |
385 res.handle = ubrk_open( Type.Line, locale.name.ptr, null, 0, e); | |
386 ICU.testError (e, "failed to open line iterator"); | |
98
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
387 if( str ) { |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
388 res.ut.openUTF8(str); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
389 ubrk_setUText( res.handle, & res.ut, e); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
390 ICU.testError (e, "failed to set text in iterator"); |
95307ad235d9
Added Draw2d code, still work in progress
Frank Benoit <benoit@tionex.de>
parents:
92
diff
changeset
|
391 } |
92 | 392 return res; |
393 } | |
394 | |
395 /*********************************************************************** | |
396 | |
397 Close a UBreakIterator | |
398 | |
399 ***********************************************************************/ | |
400 | |
401 void close () | |
402 { | |
403 ut.close(); | |
404 ubrk_close (handle); | |
405 } | |
406 | |
407 /*********************************************************************** | |
408 | |
409 Sets an existing iterator to point to a new piece of text | |
410 | |
411 ***********************************************************************/ | |
412 | |
413 void setText (UStringView text) | |
414 { | |
415 ICU.UErrorCode e; | |
416 ubrk_setText (handle, text.get.ptr, text.length, e); | |
417 ICU.testError (e, "failed to set iterator text"); | |
418 } | |
419 | |
420 void setText (char[] text) | |
421 { | |
422 auto e = ICU.UErrorCode.OK; | |
423 ut.openUTF8(text); | |
424 ubrk_setUText( handle, & ut, e); | |
425 ICU.testError (e, "failed to set text in iterator"); | |
426 } | |
427 | |
428 /*********************************************************************** | |
429 | |
430 Determine the most recently-returned text boundary | |
431 | |
432 ***********************************************************************/ | |
433 | |
434 uint current () | |
435 { | |
436 return ubrk_current (handle); | |
437 } | |
438 | |
439 /*********************************************************************** | |
440 | |
441 Determine the text boundary following the current text | |
442 boundary, or UBRK_DONE if all text boundaries have been | |
443 returned. | |
444 | |
445 If offset is specified, determines the text boundary | |
446 following the current text boundary: The value returned | |
447 is always greater than offset, or Done | |
448 | |
449 ***********************************************************************/ | |
450 | |
451 uint next (uint offset = uint.max) | |
452 { | |
453 if (offset == uint.max) | |
454 return ubrk_next (handle); | |
455 return ubrk_following (handle, offset); | |
456 } | |
457 | |
458 /*********************************************************************** | |
459 | |
460 Determine the text boundary preceding the current text | |
461 boundary, or Done if all text boundaries have been returned. | |
462 | |
463 If offset is specified, determines the text boundary preceding | |
464 the specified offset. The value returned is always smaller than | |
465 offset, or Done. | |
466 | |
467 ***********************************************************************/ | |
468 | |
469 uint previous (uint offset = uint.max) | |
470 { | |
471 if (offset == uint.max) | |
472 return ubrk_previous (handle); | |
473 return ubrk_preceding (handle, offset); | |
474 } | |
475 | |
476 /*********************************************************************** | |
477 | |
478 Determine the index of the first character in the text | |
479 being scanned. This is not always the same as index 0 | |
480 of the text. | |
481 | |
482 ***********************************************************************/ | |
483 | |
484 uint first () | |
485 { | |
486 return ubrk_first (handle); | |
487 } | |
488 | |
489 /*********************************************************************** | |
490 | |
491 Determine the index immediately beyond the last character | |
492 in the text being scanned. This is not the same as the last | |
493 character | |
494 | |
495 ***********************************************************************/ | |
496 | |
497 uint last () | |
498 { | |
499 return ubrk_last (handle); | |
500 } | |
501 | |
502 /*********************************************************************** | |
503 | |
504 Returns true if the specfied position is a boundary position. | |
505 As a side effect, leaves the iterator pointing to the first | |
506 boundary position at or after "offset". | |
507 | |
508 ***********************************************************************/ | |
509 | |
510 bool isBoundary (uint offset) | |
511 { | |
512 return ubrk_isBoundary (handle, offset) != 0; | |
513 } | |
514 | |
515 /*********************************************************************** | |
516 | |
517 Return the status from the break rule that determined | |
518 the most recently returned break position. | |
519 | |
520 ***********************************************************************/ | |
521 | |
522 void getStatus (inout uint s) | |
523 { | |
524 s = getStatus (); | |
525 } | |
526 | |
527 /*********************************************************************** | |
528 | |
529 Return the status from the break rule that determined | |
530 the most recently returned break position. | |
531 | |
532 The values appear in the rule source within brackets, | |
533 {123}, for example. For rules that do not specify a status, | |
534 a default value of 0 is returned. | |
535 | |
536 For word break iterators, the possible values are defined | |
537 in enum UWordBreak | |
538 | |
539 ***********************************************************************/ | |
540 | |
541 private uint getStatus () | |
542 { | |
543 return ubrk_getRuleStatus (handle); | |
544 } | |
545 | |
546 | |
547 /*********************************************************************** | |
548 | |
549 Bind the ICU functions from a shared library. This is | |
550 complicated by the issues regarding D and DLLs on the | |
551 Windows platform | |
552 | |
553 ***********************************************************************/ | |
554 | |
555 private static void* library; | |
556 | |
557 /*********************************************************************** | |
558 | |
559 ***********************************************************************/ | |
560 | |
561 private static extern (C) | |
562 { | |
563 Handle function (uint, char*, wchar*, uint, inout ICU.UErrorCode) ubrk_open; | |
564 Handle function (wchar*, uint, wchar*, uint, void*, inout ICU.UErrorCode) ubrk_openRules; | |
565 void function (Handle) ubrk_close; | |
566 void function (Handle, wchar*, uint, inout ICU.UErrorCode) ubrk_setText; | |
567 uint function (Handle) ubrk_current; | |
568 uint function (Handle) ubrk_next; | |
569 uint function (Handle) ubrk_previous; | |
570 uint function (Handle) ubrk_first; | |
571 uint function (Handle) ubrk_last; | |
572 uint function (Handle, uint) ubrk_preceding; | |
573 uint function (Handle, uint) ubrk_following; | |
574 byte function (Handle, uint) ubrk_isBoundary; | |
575 uint function (Handle) ubrk_getRuleStatus; | |
576 Handle function (Handle, void *, int *, inout ICU.UErrorCode) ubrk_safeClone; | |
577 void function (Handle, UText*, inout ICU.UErrorCode) ubrk_setUText; | |
578 } | |
579 | |
580 /*********************************************************************** | |
581 | |
582 ***********************************************************************/ | |
583 | |
584 static FunctionLoader.Bind[] targets = | |
585 [ | |
586 {cast(void**) &ubrk_open, "ubrk_open"}, | |
587 {cast(void**) &ubrk_close, "ubrk_close"}, | |
588 {cast(void**) &ubrk_openRules, "ubrk_openRules"}, | |
589 {cast(void**) &ubrk_setText, "ubrk_setText"}, | |
590 {cast(void**) &ubrk_current, "ubrk_current"}, | |
591 {cast(void**) &ubrk_next, "ubrk_next"}, | |
592 {cast(void**) &ubrk_previous, "ubrk_previous"}, | |
593 {cast(void**) &ubrk_first, "ubrk_first"}, | |
594 {cast(void**) &ubrk_last, "ubrk_last"}, | |
595 {cast(void**) &ubrk_preceding, "ubrk_preceding"}, | |
596 {cast(void**) &ubrk_following, "ubrk_following"}, | |
597 {cast(void**) &ubrk_isBoundary, "ubrk_isBoundary"}, | |
598 {cast(void**) &ubrk_getRuleStatus, "ubrk_getRuleStatus"}, | |
599 {cast(void**) &ubrk_setUText, "ubrk_setUText"}, | |
600 {cast(void**) &ubrk_safeClone, "ubrk_safeClone"}, | |
601 ]; | |
602 | |
603 /********************************************************************** | |
604 | |
605 **********************************************************************/ | |
606 | |
607 static this () | |
608 { | |
609 library = FunctionLoader.bind (ICU.icuuc, targets); | |
610 } | |
611 | |
612 /********************************************************************** | |
613 | |
614 **********************************************************************/ | |
615 | |
616 static ~this () | |
617 { | |
618 FunctionLoader.unbind (library); | |
619 } | |
620 } |