comparison com.ibm.icu/src/com/ibm/icu/mangoicu/UCollator.d @ 92:ebefa5c2eab4

moving ICU bindings to com.ibm.icu
author Frank Benoit <benoit@tionex.de>
date Sun, 19 Apr 2009 13:49:38 +0200
parents base/src/java/mangoicu/UCollator.d@1bf55a6eb092
children 536e43f63c81
comparison
equal deleted inserted replaced
91:2755ef2c8ef8 92:ebefa5c2eab4
1 /*******************************************************************************
2
3 @file UCollator.d
4
5 Copyright (c) 2004 Kris Bell
6
7 This software is provided 'as-is', without any express or implied
8 warranty. In no event will the authors be held liable for damages
9 of any kind arising from the use of this software.
10
11 Permission is hereby granted to anyone to use this software for any
12 purpose, including commercial applications, and to alter it and/or
13 redistribute it freely, subject to the following restrictions:
14
15 1. The origin of this software must not be misrepresented; you must
16 not claim that you wrote the original software. If you use this
17 software in a product, an acknowledgment within documentation of
18 said product would be appreciated but is not required.
19
20 2. Altered source versions must be plainly marked as such, and must
21 not be misrepresented as being the original software.
22
23 3. This notice may not be removed or altered from any distribution
24 of the source.
25
26 4. Derivative works are permitted, but they must carry this notice
27 in full and credit the original source.
28
29
30 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31
32
33 @version Initial version, November 2004
34 @author Kris
35
36 Note that this package and documentation is built around the ICU
37 project (http://oss.software.ibm.com/icu/). Below is the license
38 statement as specified by that software:
39
40
41 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
42
43
44 ICU License - ICU 1.8.1 and later
45
46 COPYRIGHT AND PERMISSION NOTICE
47
48 Copyright (c) 1995-2003 International Business Machines Corporation and
49 others.
50
51 All rights reserved.
52
53 Permission is hereby granted, free of charge, to any person obtaining a
54 copy of this software and associated documentation files (the
55 "Software"), to deal in the Software without restriction, including
56 without limitation the rights to use, copy, modify, merge, publish,
57 distribute, and/or sell copies of the Software, and to permit persons
58 to whom the Software is furnished to do so, provided that the above
59 copyright notice(s) and this permission notice appear in all copies of
60 the Software and that both the above copyright notice(s) and this
61 permission notice appear in supporting documentation.
62
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
64 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
66 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
68 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
69 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
70 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
71 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
72
73 Except as contained in this notice, the name of a copyright holder
74 shall not be used in advertising or otherwise to promote the sale, use
75 or other dealings in this Software without prior written authorization
76 of the copyright holder.
77
78 ----------------------------------------------------------------------
79
80 All trademarks and registered trademarks mentioned herein are the
81 property of their respective owners.
82
83 *******************************************************************************/
84
85 module com.ibm.icu.mangoicu.UCollator;
86
87 private import com.ibm.icu.mangoicu.ICU,
88 com.ibm.icu.mangoicu.USet,
89 com.ibm.icu.mangoicu.ULocale,
90 com.ibm.icu.mangoicu.UString;
91
92 /*******************************************************************************
93
94 The API for Collator performs locale-sensitive string comparison.
95 You use this service to build searching and sorting routines for
96 natural language text. Important: The ICU collation service has been
97 reimplemented in order to achieve better performance and UCA compliance.
98 For details, see the collation design document.
99
100 For more information about the collation service see the users guide.
101
102 Collation service provides correct sorting orders for most locales
103 supported in ICU. If specific data for a locale is not available,
104 the orders eventually falls back to the UCA sort order.
105
106 Sort ordering may be customized by providing your own set of rules.
107 For more on this subject see the Collation customization section of
108 the users guide.
109
110 See <A HREF="http://oss.software.ibm.com/icu/apiref/ucol_8h.html">
111 this page</A> for full details.
112
113 *******************************************************************************/
114
115 class UCollator : ICU
116 {
117 package Handle handle;
118
119 enum Attribute
120 {
121 FrenchCollation,
122 AlternateHandling,
123 CaseFirst,
124 CaseLevel,
125 NormalizationMode,
126 DecompositionMode = NormalizationMode,
127 strength,
128 HiraganaQuaternaryMode,
129 NumericCollation,
130 AttributeCount
131 }
132
133 enum AttributeValue
134 {
135 Default = -1,
136 Primary = 0,
137 Secondary = 1,
138 Tertiary = 2,
139 DefaultStrength = Tertiary,
140 CeStrengthLimit,
141 Quaternary = 3,
142 Identical = 15,
143 strengthLimit,
144 Off = 16,
145 On = 17,
146 Shifted = 20,
147 NonIgnorable = 21,
148 LowerFirst = 24,
149 UpperFirst = 25,
150 AttributeValueCount
151 }
152
153 enum RuleOption
154 {
155 TailoringOnly,
156 FullRules
157 }
158
159 enum BoundMode
160 {
161 BoundLower = 0,
162 BoundUpper = 1,
163 BoundUpperLong = 2,
164 BoundValueCount
165 }
166
167 typedef AttributeValue Strength;
168
169 /***********************************************************************
170
171 Open a UCollator for comparing strings. The locale specified
172 determines the required collation rules. Special values for
173 locales can be passed in - if ULocale.Default is passed for
174 the locale, the default locale collation rules will be used.
175 If ULocale.Root is passed, UCA rules will be used
176
177 ***********************************************************************/
178
179 this (ULocale locale)
180 {
181 UErrorCode e;
182
183 handle = ucol_open (toString(locale.name), e);
184 testError (e, "failed to open collator");
185 }
186
187 /***********************************************************************
188
189 Produce a UCollator instance according to the rules supplied.
190
191 The rules are used to change the default ordering, defined in
192 the UCA in a process called tailoring. For the syntax of the
193 rules please see users guide
194
195 ***********************************************************************/
196
197 this (UStringView rules, AttributeValue mode, Strength strength)
198 {
199 UErrorCode e;
200
201 handle = ucol_openRules (rules.get.ptr, rules.len, mode, strength, null, e);
202 testError (e, "failed to open rules-based collator");
203 }
204
205 /***********************************************************************
206
207 Open a collator defined by a short form string. The
208 structure and the syntax of the string is defined in
209 the "Naming collators" section of the users guide:
210 http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators
211 Attributes are overriden by the subsequent attributes.
212 So, for "S2_S3", final strength will be 3. 3066bis
213 locale overrides individual locale parts.
214
215 The call to this constructor is equivalent to a plain
216 constructor, followed by a series of calls to setAttribute
217 and setVariableTop
218
219 ***********************************************************************/
220
221 this (char[] shortName, bool forceDefaults)
222 {
223 UErrorCode e;
224
225 handle = ucol_openFromShortString (toString(shortName), forceDefaults, null, e);
226 testError (e, "failed to open short-name collator");
227 }
228
229 /***********************************************************************
230
231 Internal constructor invoked via USearch
232
233 ***********************************************************************/
234
235 package this (Handle handle)
236 {
237 this.handle = handle;
238 }
239
240 /***********************************************************************
241
242 Close a UCollator
243
244 ***********************************************************************/
245
246 ~this ()
247 {
248 ucol_close (handle);
249 }
250
251 /***********************************************************************
252
253 Get a set containing the contractions defined by the
254 collator.
255
256 The set includes both the UCA contractions and the
257 contractions defined by the collator. This set will
258 contain only strings. If a tailoring explicitly
259 suppresses contractions from the UCA (like Russian),
260 removed contractions will not be in the resulting set.
261
262 ***********************************************************************/
263
264 void getContractions (USet set)
265 {
266 UErrorCode e;
267
268 ucol_getContractions (handle, set.handle, e);
269 testError (e, "failed to get collator contractions");
270 }
271
272 /***********************************************************************
273
274 Compare two strings. Return value is -, 0, +
275
276 ***********************************************************************/
277
278 int strcoll (UStringView source, UStringView target)
279 {
280 return ucol_strcoll (handle, source.get.ptr, source.len, target.get.ptr, target.len);
281 }
282
283 /***********************************************************************
284
285 Determine if one string is greater than another. This
286 function is equivalent to strcoll() > 1
287
288 ***********************************************************************/
289
290 bool greater (UStringView source, UStringView target)
291 {
292 return ucol_greater (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0;
293 }
294
295 /***********************************************************************
296
297 Determine if one string is greater than or equal to
298 another. This function is equivalent to strcoll() >= 0
299
300 ***********************************************************************/
301
302 bool greaterOrEqual (UStringView source, UStringView target)
303 {
304 return ucol_greaterOrEqual (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0;
305 }
306
307 /***********************************************************************
308
309 This function is equivalent to strcoll() == 0
310
311 ***********************************************************************/
312
313 bool equal (UStringView source, UStringView target)
314 {
315 return ucol_equal (handle, source.get.ptr, source.len, target.get.ptr, target.len) != 0;
316 }
317
318 /***********************************************************************
319
320 Get the collation strength used in a UCollator. The
321 strength influences how strings are compared.
322
323 ***********************************************************************/
324
325 Strength getStrength ()
326 {
327 return ucol_getStrength (handle);
328 }
329
330 /***********************************************************************
331
332 Set the collation strength used in this UCollator. The
333 strength influences how strings are compared. one of
334 Primary, Secondary, Tertiary, Quaternary, Dentical, or
335 Default
336
337 ***********************************************************************/
338
339 void setStrength (Strength s)
340 {
341 ucol_setStrength (handle, s);
342 }
343
344 /***********************************************************************
345
346 Get the display name for a UCollator. The display name is
347 suitable for presentation to a user
348
349 ***********************************************************************/
350
351 void getDisplayName (ULocale obj, ULocale display, UString dst)
352 {
353 uint fmt (wchar* p, uint len, inout UErrorCode e)
354 {
355 return ucol_getDisplayName (toString(obj.name), toString(display.name), dst.get.ptr, dst.len, e);
356 }
357
358 dst.format (&fmt, "failed to get collator display name");
359 }
360
361 /***********************************************************************
362
363 Returns current rules. Options define whether full rules
364 are returned or just the tailoring.
365
366 ***********************************************************************/
367
368 void getRules (UString dst, RuleOption o = RuleOption.FullRules)
369 {
370 uint fmt (wchar* p, uint len, inout UErrorCode e)
371 {
372 uint needed = ucol_getRulesEx (handle, o, dst.get.ptr, dst.len);
373 if (needed > len)
374 e = e.BufferOverflow;
375 return needed;
376 }
377
378 dst.format (&fmt, "failed to get collator rules");
379 }
380
381 /***********************************************************************
382
383 Get the short definition string for a collator.
384
385 This API harvests the collator's locale and the attribute
386 set and produces a string that can be used for opening a
387 collator with the same properties using the char[] style
388 constructor. This string will be normalized.
389
390 The structure and the syntax of the string is defined in the
391 "Naming collators" section of the users guide:
392 http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators
393
394 ***********************************************************************/
395
396 char[] getShortDefinitionString (ULocale locale = ULocale.Default)
397 {
398 UErrorCode e;
399 char[64] dst;
400
401 uint len = ucol_getShortDefinitionString (handle, toString(locale.name), dst.ptr, dst.length, e);
402 testError (e, "failed to get collator short name");
403 return dst[0..len].dup;
404 }
405
406 /***********************************************************************
407
408 Verifies and normalizes short definition string. Normalized
409 short definition string has all the option sorted by the
410 argument name, so that equivalent definition strings are the
411 same
412
413 ***********************************************************************/
414
415 char[] normalizeShortDefinitionString (char[] source)
416 {
417 UErrorCode e;
418 char[64] dst;
419
420 uint len = ucol_normalizeShortDefinitionString (toString(source), dst.ptr, dst.length, null, e);
421 testError (e, "failed to normalize collator short name");
422 return dst[0..len].dup;
423 }
424
425 /***********************************************************************
426
427 Get a sort key for a string from a UCollator. Sort keys
428 may be compared using strcmp.
429
430 ***********************************************************************/
431
432 ubyte[] getSortKey (UStringView t, ubyte[] result)
433 {
434 uint len = ucol_getSortKey (handle, t.get.ptr, t.len, result.ptr, result.length);
435 if (len < result.length)
436 return result [0..len];
437 return null;
438 }
439
440 /***********************************************************************
441
442 Merge two sort keys. The levels are merged with their
443 corresponding counterparts (primaries with primaries,
444 secondaries with secondaries etc.). Between the values
445 from the same level a separator is inserted. example
446 (uncompressed): 191B1D 01 050505 01 910505 00 and
447 1F2123 01 050505 01 910505 00 will be merged as
448 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
449 This allows for concatenating of first and last names for
450 sorting, among other things. If the destination buffer is
451 not big enough, the results are undefined. If any of source
452 lengths are zero or any of source pointers are null/undefined,
453 result is of size zero.
454
455 ***********************************************************************/
456
457 ubyte[] mergeSortkeys (ubyte[] left, ubyte[] right, ubyte[] result)
458 {
459 uint len = ucol_mergeSortkeys (left.ptr, left.length, right.ptr, right.length, result.ptr, result.length);
460 if (len < result.length)
461 return result [0..len];
462 return null;
463 }
464
465 /***********************************************************************
466
467 Produce a bound for a given sortkey and a number of levels.
468
469 Return value is always the number of bytes needed, regardless
470 of whether the result buffer was big enough or even valid.
471
472 Resulting bounds can be used to produce a range of strings
473 that are between upper and lower bounds. For example, if
474 bounds are produced for a sortkey of string "smith", strings
475 between upper and lower bounds with one level would include
476 "Smith", "SMITH", "sMiTh".
477
478 There are two upper bounds that can be produced. If BoundUpper
479 is produced, strings matched would be as above. However, if
480 bound produced using BoundUpperLong is used, the above example
481 will also match "Smithsonian" and similar.
482
483 ***********************************************************************/
484
485 ubyte[] getBound (BoundMode mode, ubyte[] source, ubyte[] result, uint levels = 1)
486 {
487 UErrorCode e;
488
489 uint len = ucol_getBound (source.ptr, source.length, mode, levels, result.ptr, result.length, e);
490 testError (e, "failed to get sortkey bound");
491 if (len < result.length)
492 return result [0..len];
493 return null;
494 }
495
496 /***********************************************************************
497
498 Gets the version information for a Collator.
499
500 Version is currently an opaque 32-bit number which depends,
501 among other things, on major versions of the collator
502 tailoring and UCA
503
504 ***********************************************************************/
505
506 void getVersion (inout Version v)
507 {
508 ucol_getVersion (handle, v);
509 }
510
511 /***********************************************************************
512
513 Gets the UCA version information for this Collator
514
515 ***********************************************************************/
516
517 void getUCAVersion (inout Version v)
518 {
519 ucol_getUCAVersion (handle, v);
520 }
521
522 /***********************************************************************
523
524 Universal attribute setter
525
526 ***********************************************************************/
527
528 void setAttribute (Attribute attr, AttributeValue value)
529 {
530 UErrorCode e;
531
532 ucol_setAttribute (handle, attr, value, e);
533 testError (e, "failed to set collator attribute");
534 }
535
536 /***********************************************************************
537
538 Universal attribute getter
539
540 ***********************************************************************/
541
542 AttributeValue getAttribute (Attribute attr)
543 {
544 UErrorCode e;
545
546 AttributeValue v = ucol_getAttribute (handle, attr, e);
547 testError (e, "failed to get collator attribute");
548 return v;
549 }
550
551 /***********************************************************************
552
553 Variable top is a two byte primary value which causes all
554 the codepoints with primary values that are less or equal
555 than the variable top to be shifted when alternate handling
556 is set to Shifted.
557
558 ***********************************************************************/
559
560 void setVariableTop (UStringView t)
561 {
562 UErrorCode e;
563
564 ucol_setVariableTop (handle, t.get.ptr, t.len, e);
565 testError (e, "failed to set variable-top");
566 }
567
568 /***********************************************************************
569
570 Sets the variable top to a collation element value
571 supplied.Variable top is set to the upper 16 bits.
572 Lower 16 bits are ignored.
573
574 ***********************************************************************/
575
576 void setVariableTop (uint x)
577 {
578 UErrorCode e;
579
580 ucol_restoreVariableTop (handle, x, e);
581 testError (e, "failed to restore variable-top");
582 }
583
584 /***********************************************************************
585
586 Gets the variable top value of this Collator. Lower 16 bits
587 are undefined and should be ignored.
588
589 ***********************************************************************/
590
591 uint getVariableTop ()
592 {
593 UErrorCode e;
594
595 uint x = ucol_getVariableTop (handle, e);
596 testError (e, "failed to get variable-top");
597 return x;
598 }
599
600 /***********************************************************************
601
602 Gets the locale name of the collator. If the collator is
603 instantiated from the rules, then this function will throw
604 an exception
605
606 ***********************************************************************/
607
608 void getLocale (ULocale locale, ULocale.Type type)
609 {
610 UErrorCode e;
611
612 locale.name = toArray (ucol_getLocaleByType (handle, type, e));
613 if (isError(e) || locale.name is null)
614 exception ("failed to get collator locale");
615 }
616
617 /***********************************************************************
618
619 Get the Unicode set that contains all the characters and
620 sequences tailored in this collator.
621
622 ***********************************************************************/
623
624 USet getTailoredSet ()
625 {
626 UErrorCode e;
627
628 Handle h = ucol_getTailoredSet (handle, e);
629 testError (e, "failed to get tailored set");
630 return new USet (h);
631 }
632
633
634 /***********************************************************************
635
636 Bind the ICU functions from a shared library. This is
637 complicated by the issues regarding D and DLLs on the
638 Windows platform
639
640 ***********************************************************************/
641
642 private static void* library;
643
644 /***********************************************************************
645
646 ***********************************************************************/
647
648 private static extern (C)
649 {
650 void function (Handle) ucol_close;
651 Handle function (char *loc, inout UErrorCode e) ucol_open;
652 Handle function (wchar* rules, uint rulesLength, AttributeValue normalizationMode, Strength strength, UParseError *parseError, inout UErrorCode e) ucol_openRules;
653 Handle function (char *definition, byte forceDefaults, UParseError *parseError, inout UErrorCode e) ucol_openFromShortString;
654 uint function (Handle, Handle conts, inout UErrorCode e) ucol_getContractions;
655 int function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_strcoll;
656 byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greater;
657 byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_greaterOrEqual;
658 byte function (Handle, wchar* source, uint sourceLength, wchar* target, uint targetLength) ucol_equal;
659 Strength function (Handle) ucol_getStrength;
660 void function (Handle, Strength strength) ucol_setStrength;
661 uint function (char *objLoc, char *dispLoc, wchar* result, uint resultLength, inout UErrorCode e) ucol_getDisplayName;
662 uint function (Handle, char *locale, char *buffer, uint capacity, inout UErrorCode e) ucol_getShortDefinitionString;
663 uint function (char *source, char *destination, uint capacity, UParseError *parseError, inout UErrorCode e) ucol_normalizeShortDefinitionString;
664 uint function (Handle, wchar* source, uint sourceLength, ubyte *result, uint resultLength) ucol_getSortKey;
665 uint function (ubyte *source, uint sourceLength, BoundMode boundType, uint noOfLevels, ubyte *result, uint resultLength, inout UErrorCode e) ucol_getBound;
666 void function (Handle, Version info) ucol_getVersion;
667 void function (Handle, Version info) ucol_getUCAVersion;
668 uint function (ubyte *src1, uint src1Length, ubyte *src2, uint src2Length, ubyte *dest, uint destCapacity) ucol_mergeSortkeys;
669 void function (Handle, Attribute attr, AttributeValue value, inout UErrorCode e) ucol_setAttribute;
670 AttributeValue function (Handle, Attribute attr, inout UErrorCode e) ucol_getAttribute;
671 uint function (Handle, wchar* varTop, uint len, inout UErrorCode e) ucol_setVariableTop;
672 uint function (Handle, inout UErrorCode e) ucol_getVariableTop;
673 void function (Handle, uint varTop, inout UErrorCode e) ucol_restoreVariableTop;
674 uint function (Handle, RuleOption delta, wchar* buffer, uint bufferLen) ucol_getRulesEx;
675 char* function (Handle, ULocale.Type type, inout UErrorCode e) ucol_getLocaleByType;
676 Handle function (Handle, inout UErrorCode e) ucol_getTailoredSet;
677 }
678
679 /***********************************************************************
680
681 ***********************************************************************/
682
683 static FunctionLoader.Bind[] targets =
684 [
685 {cast(void**) &ucol_open, "ucol_open"},
686 {cast(void**) &ucol_close, "ucol_close"},
687 {cast(void**) &ucol_openRules, "ucol_openRules"},
688 {cast(void**) &ucol_openFromShortString, "ucol_openFromShortString"},
689 {cast(void**) &ucol_getContractions, "ucol_getContractions"},
690 {cast(void**) &ucol_strcoll, "ucol_strcoll"},
691 {cast(void**) &ucol_greater, "ucol_greater"},
692 {cast(void**) &ucol_greaterOrEqual, "ucol_greaterOrEqual"},
693 {cast(void**) &ucol_equal, "ucol_equal"},
694 {cast(void**) &ucol_getStrength, "ucol_getStrength"},
695 {cast(void**) &ucol_setStrength, "ucol_setStrength"},
696 {cast(void**) &ucol_getDisplayName, "ucol_getDisplayName"},
697 {cast(void**) &ucol_getShortDefinitionString, "ucol_getShortDefinitionString"},
698 {cast(void**) &ucol_normalizeShortDefinitionString, "ucol_normalizeShortDefinitionString"},
699 {cast(void**) &ucol_getSortKey, "ucol_getSortKey"},
700 {cast(void**) &ucol_getBound, "ucol_getBound"},
701 {cast(void**) &ucol_getVersion, "ucol_getVersion"},
702 {cast(void**) &ucol_getUCAVersion, "ucol_getUCAVersion"},
703 {cast(void**) &ucol_mergeSortkeys, "ucol_mergeSortkeys"},
704 {cast(void**) &ucol_setAttribute, "ucol_setAttribute"},
705 {cast(void**) &ucol_getAttribute, "ucol_getAttribute"},
706 {cast(void**) &ucol_setVariableTop, "ucol_setVariableTop"},
707 {cast(void**) &ucol_getVariableTop, "ucol_getVariableTop"},
708 {cast(void**) &ucol_restoreVariableTop, "ucol_restoreVariableTop"},
709 {cast(void**) &ucol_getRulesEx, "ucol_getRulesEx"},
710 {cast(void**) &ucol_getLocaleByType, "ucol_getLocaleByType"},
711 {cast(void**) &ucol_getTailoredSet, "ucol_getTailoredSet"},
712 ];
713
714 /***********************************************************************
715
716 ***********************************************************************/
717
718 static this ()
719 {
720 library = FunctionLoader.bind (icuin, targets);
721 }
722
723 /***********************************************************************
724
725 ***********************************************************************/
726
727 static ~this ()
728 {
729 FunctionLoader.unbind (library);
730 }
731 }
732