comparison tango/tango/text/Unicode.d @ 132:1700239cab2e trunk

[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author lindquist
date Fri, 11 Jan 2008 17:57:40 +0100
parents
children
comparison
equal deleted inserted replaced
131:5825d48b27d1 132:1700239cab2e
1 /*******************************************************************************
2
3 copyright: Copyright (c) 2007 Peter Triller. All rights reserved
4
5 license: BSD style: $(LICENSE)
6
7 version: Initial release: Sept 2007
8
9 authors: Peter
10
11 Provides case mapping Functions for Unicode Strings. As of now it is
12 only 99 % complete, because it does not take into account Conditional
13 case mappings. This means the Greek Letter Sigma will not be correctly
14 case mapped at the end of a Word, and the Locales Lithuanian, Turkish
15 and Azeri are not taken into account during Case Mappings. This means
16 all in all around 12 Characters will not be mapped correctly under
17 some circumstances.
18
19 ICU4j also does not handle these cases at the moment.
20
21 Unittests are written against output from ICU4j
22
23 This Module tries to minimize Memory allocation and usage. You can
24 always pass the output buffer that should be used to the case mapping
25 function, which will be resized if necessary.
26
27 *******************************************************************************/
28
29 module tango.text.Unicode;
30
31 private import tango.text.UnicodeData;
32 private import tango.text.convert.Utf;
33
34
35
36 /**
37 * Converts an Utf8 String to Upper case
38 *
39 * Params:
40 * input = String to be case mapped
41 * output = this output buffer will be used unless too small
42 * Returns: the case mapped string
43 */
44 deprecated char[] blockToUpper(char[] input, char[] output = null, dchar[] working = null) {
45
46 // ?? How much preallocation ?? This is worst case allocation
47 if (working is null)
48 working.length = input.length;
49
50 uint produced = 0;
51 uint ate;
52 uint oprod = 0;
53 foreach(dchar ch; input) {
54 // TODO Conditional Case Mapping
55 UnicodeData **d = (ch in unicodeData);
56 if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
57 SpecialCaseData **s = (ch in specialCaseData);
58 debug {
59 assert(s !is null);
60 }
61 if((*s).upperCaseMapping !is null) {
62 // To speed up, use worst case for memory prealocation
63 // since the length of an UpperCaseMapping list is at most 4
64 // Make sure no relocation is made in the toString Method
65 // better allocation algorithm ?
66 int len = (*s).upperCaseMapping.length;
67 if(produced + len >= working.length)
68 working.length = working.length + working.length / 2 + len;
69 oprod = produced;
70 produced += len;
71 working[oprod..produced] = (*s).upperCaseMapping;
72 continue;
73 }
74 }
75 // Make sure no relocation is made in the toString Method
76 if(produced + 1 >= output.length)
77 working.length = working.length + working.length / 2 + 1;
78 working[produced++] = d is null ? ch:(*d).simpleUpperCaseMapping;
79 }
80 return toString(working[0..produced],output);
81 }
82
83
84
85 /**
86 * Converts an Utf8 String to Upper case
87 *
88 * Params:
89 * input = String to be case mapped
90 * output = this output buffer will be used unless too small
91 * Returns: the case mapped string
92 */
93 char[] toUpper(char[] input, char[] output = null) {
94
95 dchar[1] buf;
96 // assume most common case: String stays the same length
97 if (output.length < input.length)
98 output.length = input.length;
99
100 uint produced = 0;
101 uint ate;
102 foreach(dchar ch; input) {
103 // TODO Conditional Case Mapping
104 UnicodeData **d = (ch in unicodeData);
105 if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
106 SpecialCaseData **s = (ch in specialCaseData);
107 debug {
108 assert(s !is null);
109 }
110 if((*s).upperCaseMapping !is null) {
111 // To speed up, use worst case for memory prealocation
112 // since the length of an UpperCaseMapping list is at most 4
113 // Make sure no relocation is made in the toString Method
114 // better allocation algorithm ?
115 if(produced + (*s).upperCaseMapping.length * 4 >= output.length)
116 output.length = output.length + output.length / 2 + (*s).upperCaseMapping.length * 4;
117 char[] res = toString((*s).upperCaseMapping, output[produced..output.length], &ate);
118 debug {
119 assert(ate == (*s).upperCaseMapping.length);
120 assert(res.ptr == output[produced..output.length].ptr);
121 }
122 produced += res.length;
123 continue;
124 }
125 }
126 // Make sure no relocation is made in the toString Method
127 if(produced + 4 >= output.length)
128 output.length = output.length + output.length / 2 + 4;
129 buf[0] = d is null ? ch:(*d).simpleUpperCaseMapping;
130 char[] res = toString(buf, output[produced..output.length], &ate);
131 debug {
132 assert(ate == 1);
133 assert(res.ptr == output[produced..output.length].ptr);
134 }
135 produced += res.length;
136 }
137 return output[0..produced];
138 }
139
140
141 /**
142 * Converts an Utf16 String to Upper case
143 *
144 * Params:
145 * input = String to be case mapped
146 * output = this output buffer will be used unless too small
147 * Returns: the case mapped string
148 */
149 wchar[] toUpper(wchar[] input, wchar[] output = null) {
150
151 dchar[1] buf;
152 // assume most common case: String stays the same length
153 if (output.length < input.length)
154 output.length = input.length;
155
156 uint produced = 0;
157 uint ate;
158 foreach(dchar ch; input) {
159 // TODO Conditional Case Mapping
160 UnicodeData **d = (ch in unicodeData);
161 if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
162 SpecialCaseData **s = (ch in specialCaseData);
163 debug {
164 assert(s !is null);
165 }
166 if((*s).upperCaseMapping !is null) {
167 // To speed up, use worst case for memory prealocation
168 // Make sure no relocation is made in the toString16 Method
169 // better allocation algorithm ?
170 if(produced + (*s).upperCaseMapping.length * 2 >= output.length)
171 output.length = output.length + output.length / 2 + (*s).upperCaseMapping.length * 3;
172 wchar[] res = toString16((*s).upperCaseMapping, output[produced..output.length], &ate);
173 debug {
174 assert(ate == (*s).upperCaseMapping.length);
175 assert(res.ptr == output[produced..output.length].ptr);
176 }
177 produced += res.length;
178 continue;
179 }
180 }
181 // Make sure no relocation is made in the toString16 Method
182 if(produced + 4 >= output.length)
183 output.length = output.length + output.length / 2 + 3;
184 buf[0] = d is null ? ch:(*d).simpleUpperCaseMapping;
185 wchar[] res = toString16(buf, output[produced..output.length], &ate);
186 debug {
187 assert(ate == 1);
188 assert(res.ptr == output[produced..output.length].ptr);
189 }
190 produced += res.length;
191 }
192 return output[0..produced];
193 }
194
195 /**
196 * Converts an Utf32 String to Upper case
197 *
198 * Params:
199 * input = String to be case mapped
200 * output = this output buffer will be used unless too small
201 * Returns: the case mapped string
202 */
203 dchar[] toUpper(dchar[] input, dchar[] output = null) {
204
205 // assume most common case: String stays the same length
206 if (input.length > output.length)
207 output.length = input.length;
208
209 uint produced = 0;
210 if (input.length)
211 foreach(dchar orig; input) {
212 // TODO Conditional Case Mapping
213 UnicodeData **d = (orig in unicodeData);
214 if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
215 SpecialCaseData **s = (orig in specialCaseData);
216 debug {
217 assert(s !is null);
218 }
219 if((*s).upperCaseMapping !is null) {
220 // Better resize strategy ???
221 if(produced + (*s).upperCaseMapping.length > output.length)
222 output.length = output.length + output.length / 2 + (*s).upperCaseMapping.length;
223 foreach(ch; (*s).upperCaseMapping) {
224 output[produced++] = ch;
225 }
226 }
227 continue;
228 }
229 if(produced >= output.length)
230 output.length = output.length + output.length / 2;
231 output[produced++] = d is null ? orig:(*d).simpleUpperCaseMapping;
232 }
233 return output[0..produced];
234 }
235
236
237 /**
238 * Converts an Utf8 String to Lower case
239 *
240 * Params:
241 * input = String to be case mapped
242 * output = this output buffer will be used unless too small
243 * Returns: the case mapped string
244 */
245 char[] toLower(char[] input, char[] output = null) {
246
247 dchar[1] buf;
248 // assume most common case: String stays the same length
249 if (output.length < input.length)
250 output.length = input.length;
251
252 uint produced = 0;
253 uint ate;
254 foreach(dchar ch; input) {
255 // TODO Conditional Case Mapping
256 UnicodeData **d = (ch in unicodeData);
257 if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
258 SpecialCaseData **s = (ch in specialCaseData);
259 debug {
260 assert(s !is null);
261 }
262 if((*s).lowerCaseMapping !is null) {
263 // To speed up, use worst case for memory prealocation
264 // since the length of an LowerCaseMapping list is at most 4
265 // Make sure no relocation is made in the toString Method
266 // better allocation algorithm ?
267 if(produced + (*s).lowerCaseMapping.length * 4 >= output.length)
268 output.length = output.length + output.length / 2 + (*s).lowerCaseMapping.length * 4;
269 char[] res = toString((*s).lowerCaseMapping, output[produced..output.length], &ate);
270 debug {
271 assert(ate == (*s).lowerCaseMapping.length);
272 assert(res.ptr == output[produced..output.length].ptr);
273 }
274 produced += res.length;
275 continue;
276 }
277 }
278 // Make sure no relocation is made in the toString Method
279 if(produced + 4 >= output.length)
280 output.length = output.length + output.length / 2 + 4;
281 buf[0] = d is null ? ch:(*d).simpleLowerCaseMapping;
282 char[] res = toString(buf, output[produced..output.length], &ate);
283 debug {
284 assert(ate == 1);
285 assert(res.ptr == output[produced..output.length].ptr);
286 }
287 produced += res.length;
288 }
289 return output[0..produced];
290 }
291
292
293 /**
294 * Converts an Utf16 String to Lower case
295 *
296 * Params:
297 * input = String to be case mapped
298 * output = this output buffer will be used unless too small
299 * Returns: the case mapped string
300 */
301 wchar[] toLower(wchar[] input, wchar[] output = null) {
302
303 dchar[1] buf;
304 // assume most common case: String stays the same length
305 if (output.length < input.length)
306 output.length = input.length;
307
308 uint produced = 0;
309 uint ate;
310 foreach(dchar ch; input) {
311 // TODO Conditional Case Mapping
312 UnicodeData **d = (ch in unicodeData);
313 if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
314 SpecialCaseData **s = (ch in specialCaseData);
315 debug {
316 assert(s !is null);
317 }
318 if((*s).lowerCaseMapping !is null) {
319 // To speed up, use worst case for memory prealocation
320 // Make sure no relocation is made in the toString16 Method
321 // better allocation algorithm ?
322 if(produced + (*s).lowerCaseMapping.length * 2 >= output.length)
323 output.length = output.length + output.length / 2 + (*s).lowerCaseMapping.length * 3;
324 wchar[] res = toString16((*s).lowerCaseMapping, output[produced..output.length], &ate);
325 debug {
326 assert(ate == (*s).lowerCaseMapping.length);
327 assert(res.ptr == output[produced..output.length].ptr);
328 }
329 produced += res.length;
330 continue;
331 }
332 }
333 // Make sure no relocation is made in the toString16 Method
334 if(produced + 4 >= output.length)
335 output.length = output.length + output.length / 2 + 3;
336 buf[0] = d is null ? ch:(*d).simpleLowerCaseMapping;
337 wchar[] res = toString16(buf, output[produced..output.length], &ate);
338 debug {
339 assert(ate == 1);
340 assert(res.ptr == output[produced..output.length].ptr);
341 }
342 produced += res.length;
343 }
344 return output[0..produced];
345 }
346
347
348 /**
349 * Converts an Utf32 String to Lower case
350 *
351 * Params:
352 * input = String to be case mapped
353 * output = this output buffer will be used unless too small
354 * Returns: the case mapped string
355 */
356 dchar[] toLower(dchar[] input, dchar[] output = null) {
357
358 // assume most common case: String stays the same length
359 if (input.length > output.length)
360 output.length = input.length;
361
362 uint produced = 0;
363 if (input.length)
364 foreach(dchar orig; input) {
365 // TODO Conditional Case Mapping
366 UnicodeData **d = (orig in unicodeData);
367 if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
368 SpecialCaseData **s = (orig in specialCaseData);
369 debug {
370 assert(s !is null);
371 }
372 if((*s).lowerCaseMapping !is null) {
373 // Better resize strategy ???
374 if(produced + (*s).lowerCaseMapping.length > output.length)
375 output.length = output.length + output.length / 2 + (*s).lowerCaseMapping.length;
376 foreach(ch; (*s).lowerCaseMapping) {
377 output[produced++] = ch;
378 }
379 }
380 continue;
381 }
382 if(produced >= output.length)
383 output.length = output.length + output.length / 2;
384 output[produced++] = d is null ? orig:(*d).simpleLowerCaseMapping;
385 }
386 return output[0..produced];
387 }
388
389 /**
390 * Converts an Utf8 String to Folding case
391 * Folding case is used for case insensitive comparsions.
392 *
393 * Params:
394 * input = String to be case mapped
395 * output = this output buffer will be used unless too small
396 * Returns: the case mapped string
397 */
398 char[] toFold(char[] input, char[] output = null) {
399
400 dchar[1] buf;
401 // assume most common case: String stays the same length
402 if (output.length < input.length)
403 output.length = input.length;
404
405 uint produced = 0;
406 uint ate;
407 foreach(dchar ch; input) {
408 FoldingCaseData **s = (ch in foldingCaseData);
409 if(s !is null) {
410 // To speed up, use worst case for memory prealocation
411 // since the length of an UpperCaseMapping list is at most 4
412 // Make sure no relocation is made in the toString Method
413 // better allocation algorithm ?
414 if(produced + (*s).mapping.length * 4 >= output.length)
415 output.length = output.length + output.length / 2 + (*s).mapping.length * 4;
416 char[] res = toString((*s).mapping, output[produced..output.length], &ate);
417 debug {
418 assert(ate == (*s).mapping.length);
419 assert(res.ptr == output[produced..output.length].ptr);
420 }
421 produced += res.length;
422 continue;
423 }
424 // Make sure no relocation is made in the toString Method
425 if(produced + 4 >= output.length)
426 output.length = output.length + output.length / 2 + 4;
427 buf[0] = ch;
428 char[] res = toString(buf, output[produced..output.length], &ate);
429 debug {
430 assert(ate == 1);
431 assert(res.ptr == output[produced..output.length].ptr);
432 }
433 produced += res.length;
434 }
435 return output[0..produced];
436 }
437
438 /**
439 * Converts an Utf16 String to Folding case
440 * Folding case is used for case insensitive comparsions.
441 *
442 * Params:
443 * input = String to be case mapped
444 * output = this output buffer will be used unless too small
445 * Returns: the case mapped string
446 */
447 wchar[] toFold(wchar[] input, wchar[] output = null) {
448
449 dchar[1] buf;
450 // assume most common case: String stays the same length
451 if (output.length < input.length)
452 output.length = input.length;
453
454 uint produced = 0;
455 uint ate;
456 foreach(dchar ch; input) {
457 FoldingCaseData **s = (ch in foldingCaseData);
458 if(s !is null) {
459 // To speed up, use worst case for memory prealocation
460 // Make sure no relocation is made in the toString16 Method
461 // better allocation algorithm ?
462 if(produced + (*s).mapping.length * 2 >= output.length)
463 output.length = output.length + output.length / 2 + (*s).mapping.length * 3;
464 wchar[] res = toString16((*s).mapping, output[produced..output.length], &ate);
465 debug {
466 assert(ate == (*s).mapping.length);
467 assert(res.ptr == output[produced..output.length].ptr);
468 }
469 produced += res.length;
470 continue;
471 }
472 // Make sure no relocation is made in the toString16 Method
473 if(produced + 4 >= output.length)
474 output.length = output.length + output.length / 2 + 3;
475 buf[0] = ch;
476 wchar[] res = toString16(buf, output[produced..output.length], &ate);
477 debug {
478 assert(ate == 1);
479 assert(res.ptr == output[produced..output.length].ptr);
480 }
481 produced += res.length;
482 }
483 return output[0..produced];
484 }
485
486 /**
487 * Converts an Utf32 String to Folding case
488 * Folding case is used for case insensitive comparsions.
489 *
490 * Params:
491 * input = String to be case mapped
492 * output = this output buffer will be used unless too small
493 * Returns: the case mapped string
494 */
495 dchar[] toFold(dchar[] input, dchar[] output = null) {
496
497 // assume most common case: String stays the same length
498 if (input.length > output.length)
499 output.length = input.length;
500
501 uint produced = 0;
502 if (input.length)
503 foreach(dchar orig; input) {
504 FoldingCaseData **d = (orig in foldingCaseData);
505 if(d !is null ) {
506 // Better resize strategy ???
507 if(produced + (*d).mapping.length > output.length)
508 output.length = output.length + output.length / 2 + (*d).mapping.length;
509 foreach(ch; (*d).mapping) {
510 output[produced++] = ch;
511 }
512 continue;
513 }
514 if(produced >= output.length)
515 output.length = output.length + output.length / 2;
516 output[produced++] = orig;
517 }
518 return output[0..produced];
519 }
520
521
522 /**
523 * Determines if a character is a digit. It returns true for decimal
524 * digits only.
525 *
526 * Params:
527 * ch = the character to be inspected
528 */
529 bool isDigit(dchar ch) {
530 UnicodeData **d = (ch in unicodeData);
531 return (d !is null) && ((*d).generalCategory & UnicodeData.GeneralCategory.Nd);
532 }
533
534
535 /**
536 * Determines if a character is a letter.
537 *
538 * Params:
539 * ch = the character to be inspected
540 */
541 bool isLetter(int ch) {
542 UnicodeData **d = (ch in unicodeData);
543 return (d !is null) && ((*d).generalCategory &
544 ( UnicodeData.GeneralCategory.Lu
545 | UnicodeData.GeneralCategory.Ll
546 | UnicodeData.GeneralCategory.Lt
547 | UnicodeData.GeneralCategory.Lm
548 | UnicodeData.GeneralCategory.Lo));
549 }
550
551 /**
552 * Determines if a character is a letter or a
553 * decimal digit.
554 *
555 * Params:
556 * ch = the character to be inspected
557 */
558 bool isLetterOrDigit(int ch) {
559 UnicodeData **d = (ch in unicodeData);
560 return (d !is null) && ((*d).generalCategory &
561 ( UnicodeData.GeneralCategory.Lu
562 | UnicodeData.GeneralCategory.Ll
563 | UnicodeData.GeneralCategory.Lt
564 | UnicodeData.GeneralCategory.Lm
565 | UnicodeData.GeneralCategory.Lo
566 | UnicodeData.GeneralCategory.Nd));
567 }
568
569 /**
570 * Determines if a character is a lower case letter.
571 * Params:
572 * ch = the character to be inspected
573 */
574 bool isLower(dchar ch) {
575 UnicodeData **d = (ch in unicodeData);
576 return (d !is null) && ((*d).generalCategory & UnicodeData.GeneralCategory.Ll);
577 }
578
579 /**
580 * Determines if a character is a title case letter.
581 * In case of combined letters, only the first is upper and the second is lower.
582 * Some of these special characters can be found in the croatian and greek language.
583 * See_Also: http://en.wikipedia.org/wiki/Capitalization
584 * Params:
585 * ch = the character to be inspected
586 */
587 bool isTitle(dchar ch) {
588 UnicodeData **d = (ch in unicodeData);
589 return (d !is null) && ((*d).generalCategory & UnicodeData.GeneralCategory.Lt);
590 }
591
592 /**
593 * Determines if a character is a upper case letter.
594 * Params:
595 * ch = the character to be inspected
596 */
597 bool isUpper(dchar ch) {
598 UnicodeData **d = (ch in unicodeData);
599 return (d !is null) && ((*d).generalCategory & UnicodeData.GeneralCategory.Lu);
600 }
601
602 /**
603 * Determines if a character is a Whitespace character.
604 * Whitespace characters are characters in the
605 * General Catetories Zs, Zl, Zp without the No Break
606 * spaces plus the control characters out of the ASCII
607 * range, that are used as spaces:
608 * TAB VT LF FF CR FS GS RS US NL
609 *
610 * WARNING: look at isSpace, maybe that function does
611 * more what you expect.
612 *
613 * Params:
614 * ch = the character to be inspected
615 */
616 bool isWhitespace(dchar ch) {
617 if((ch >= 0x0009 && ch <= 0x000D) || (ch >= 0x001C && ch <= 0x001F))
618 return true;
619 UnicodeData **d = (ch in unicodeData);
620 return (d !is null) && ((*d).generalCategory &
621 ( UnicodeData.GeneralCategory.Zs
622 | UnicodeData.GeneralCategory.Zl
623 | UnicodeData.GeneralCategory.Zp))
624 && ch != 0x00A0 // NBSP
625 && ch != 0x202F // NARROW NBSP
626 && ch != 0xFEFF; // ZERO WIDTH NBSP
627 }
628
629 /**
630 * Detemines if a character is a Space character as
631 * specified in the Unicode Standart.
632 *
633 * WARNING: look at isWhitepace, maybe that function does
634 * more what you expect.
635 *
636 * Params:
637 * ch = the character to be inspected
638 */
639 bool isSpace(dchar ch) {
640 UnicodeData **d = (ch in unicodeData);
641 return (d !is null) && ((*d).generalCategory &
642 ( UnicodeData.GeneralCategory.Zs
643 | UnicodeData.GeneralCategory.Zl
644 | UnicodeData.GeneralCategory.Zp));
645 }
646
647
648 /**
649 * Detemines if a character is a printable character as
650 * specified in the Unicode Standart.
651 *
652 *
653 * WARNING: look at isWhitepace, maybe that function does
654 * more what you expect.
655 *
656 * Params:
657 * ch = the character to be inspected
658 */
659 bool isPrintable(dchar ch) {
660 UnicodeData **d = (ch in unicodeData);
661 return (d !is null) && ((*d).generalCategory &
662 ( UnicodeData.GeneralCategory.Cn
663 | UnicodeData.GeneralCategory.Cc
664 | UnicodeData.GeneralCategory.Cf
665 | UnicodeData.GeneralCategory.Co
666 | UnicodeData.GeneralCategory.Cs));
667 }
668
669 debug ( UnicodeTest ):
670 void main() {}
671
672 debug (UnitTest) {
673
674 unittest {
675
676
677 // 1) No Buffer passed, no resize, no SpecialCase
678
679 char[] testString1utf8 = "\u00E4\u00F6\u00FC";
680 wchar[] testString1utf16 = "\u00E4\u00F6\u00FC";
681 dchar[] testString1utf32 = "\u00E4\u00F6\u00FC";
682 char[] refString1utf8 = "\u00C4\u00D6\u00DC";
683 wchar[] refString1utf16 = "\u00C4\u00D6\u00DC";
684 dchar[] refString1utf32 = "\u00C4\u00D6\u00DC";
685 char[] resultString1utf8 = toUpper(testString1utf8);
686 assert(resultString1utf8 == refString1utf8);
687 wchar[] resultString1utf16 = toUpper(testString1utf16);
688 assert(resultString1utf16 == refString1utf16);
689 dchar[] resultString1utf32 = toUpper(testString1utf32);
690 assert(resultString1utf32 == refString1utf32);
691
692 // 2) Buffer passed, no resize, no SpecialCase
693 char[60] buffer1utf8;
694 wchar[30] buffer1utf16;
695 dchar[30] buffer1utf32;
696 resultString1utf8 = toUpper(testString1utf8,buffer1utf8);
697 assert(resultString1utf8.ptr == buffer1utf8.ptr);
698 assert(resultString1utf8 == refString1utf8);
699 resultString1utf16 = toUpper(testString1utf16,buffer1utf16);
700 assert(resultString1utf16.ptr == buffer1utf16.ptr);
701 assert(resultString1utf16 == refString1utf16);
702 resultString1utf32 = toUpper(testString1utf32,buffer1utf32);
703 assert(resultString1utf32.ptr == buffer1utf32.ptr);
704 assert(resultString1utf32 == refString1utf32);
705
706 // 3/ Buffer passed, resize necessary, no Special case
707
708 char[5] buffer2utf8;
709 wchar[2] buffer2utf16;
710 dchar[2] buffer2utf32;
711 resultString1utf8 = toUpper(testString1utf8,buffer2utf8);
712 assert(resultString1utf8.ptr != buffer2utf8.ptr);
713 assert(resultString1utf8 == refString1utf8);
714 resultString1utf16 = toUpper(testString1utf16,buffer2utf16);
715 assert(resultString1utf16.ptr != buffer2utf16.ptr);
716 assert(resultString1utf16 == refString1utf16);
717 resultString1utf32 = toUpper(testString1utf32,buffer2utf32);
718 assert(resultString1utf32.ptr != buffer2utf32.ptr);
719 assert(resultString1utf32 == refString1utf32);
720
721 // 4) Buffer passed, resize necessary, extensive SpecialCase
722
723
724 char[] testString2utf8 = "\uFB03\uFB04\uFB05";
725 wchar[] testString2utf16 = "\uFB03\uFB04\uFB05";
726 dchar[] testString2utf32 = "\uFB03\uFB04\uFB05";
727 char[] refString2utf8 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
728 wchar[] refString2utf16 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
729 dchar[] refString2utf32 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
730 resultString1utf8 = toUpper(testString2utf8,buffer2utf8);
731 assert(resultString1utf8.ptr != buffer2utf8.ptr);
732 assert(resultString1utf8 == refString2utf8);
733 resultString1utf16 = toUpper(testString2utf16,buffer2utf16);
734 assert(resultString1utf16.ptr != buffer2utf16.ptr);
735 assert(resultString1utf16 == refString2utf16);
736 resultString1utf32 = toUpper(testString2utf32,buffer2utf32);
737 assert(resultString1utf32.ptr != buffer2utf32.ptr);
738 assert(resultString1utf32 == refString2utf32);
739
740 }
741
742
743 unittest {
744
745
746 // 1) No Buffer passed, no resize, no SpecialCase
747
748 char[] testString1utf8 = "\u00C4\u00D6\u00DC";
749 wchar[] testString1utf16 = "\u00C4\u00D6\u00DC";
750 dchar[] testString1utf32 = "\u00C4\u00D6\u00DC";
751 char[] refString1utf8 = "\u00E4\u00F6\u00FC";
752 wchar[] refString1utf16 = "\u00E4\u00F6\u00FC";
753 dchar[] refString1utf32 = "\u00E4\u00F6\u00FC";
754 char[] resultString1utf8 = toLower(testString1utf8);
755 assert(resultString1utf8 == refString1utf8);
756 wchar[] resultString1utf16 = toLower(testString1utf16);
757 assert(resultString1utf16 == refString1utf16);
758 dchar[] resultString1utf32 = toLower(testString1utf32);
759 assert(resultString1utf32 == refString1utf32);
760
761 // 2) Buffer passed, no resize, no SpecialCase
762 char[60] buffer1utf8;
763 wchar[30] buffer1utf16;
764 dchar[30] buffer1utf32;
765 resultString1utf8 = toLower(testString1utf8,buffer1utf8);
766 assert(resultString1utf8.ptr == buffer1utf8.ptr);
767 assert(resultString1utf8 == refString1utf8);
768 resultString1utf16 = toLower(testString1utf16,buffer1utf16);
769 assert(resultString1utf16.ptr == buffer1utf16.ptr);
770 assert(resultString1utf16 == refString1utf16);
771 resultString1utf32 = toLower(testString1utf32,buffer1utf32);
772 assert(resultString1utf32.ptr == buffer1utf32.ptr);
773 assert(resultString1utf32 == refString1utf32);
774
775 // 3/ Buffer passed, resize necessary, no Special case
776
777 char[5] buffer2utf8;
778 wchar[2] buffer2utf16;
779 dchar[2] buffer2utf32;
780 resultString1utf8 = toLower(testString1utf8,buffer2utf8);
781 assert(resultString1utf8.ptr != buffer2utf8.ptr);
782 assert(resultString1utf8 == refString1utf8);
783 resultString1utf16 = toLower(testString1utf16,buffer2utf16);
784 assert(resultString1utf16.ptr != buffer2utf16.ptr);
785 assert(resultString1utf16 == refString1utf16);
786 resultString1utf32 = toLower(testString1utf32,buffer2utf32);
787 assert(resultString1utf32.ptr != buffer2utf32.ptr);
788 assert(resultString1utf32 == refString1utf32);
789
790 // 4) Buffer passed, resize necessary, extensive SpecialCase
791
792 char[] testString2utf8 = "\u0130\u0130\u0130";
793 wchar[] testString2utf16 = "\u0130\u0130\u0130";
794 dchar[] testString2utf32 = "\u0130\u0130\u0130";
795 char[] refString2utf8 = "\u0069\u0307\u0069\u0307\u0069\u0307";
796 wchar[] refString2utf16 = "\u0069\u0307\u0069\u0307\u0069\u0307";
797 dchar[] refString2utf32 = "\u0069\u0307\u0069\u0307\u0069\u0307";
798 resultString1utf8 = toLower(testString2utf8,buffer2utf8);
799 assert(resultString1utf8.ptr != buffer2utf8.ptr);
800 assert(resultString1utf8 == refString2utf8);
801 resultString1utf16 = toLower(testString2utf16,buffer2utf16);
802 assert(resultString1utf16.ptr != buffer2utf16.ptr);
803 assert(resultString1utf16 == refString2utf16);
804 resultString1utf32 = toLower(testString2utf32,buffer2utf32);
805 assert(resultString1utf32.ptr != buffer2utf32.ptr);
806 assert(resultString1utf32 == refString2utf32);
807 }
808
809 unittest {
810 char[] testString1utf8 = "?!Mädchen \u0390\u0390,;";
811 char[] testString2utf8 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
812 assert(toFold(testString1utf8) == toFold(testString2utf8));
813 wchar[] testString1utf16 = "?!Mädchen \u0390\u0390,;";;
814 wchar[] testString2utf16 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
815 assert(toFold(testString1utf16) == toFold(testString2utf16));
816 wchar[] testString1utf32 = "?!Mädchen \u0390\u0390,;";
817 wchar[] testString2utf32 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
818 assert(toFold(testString1utf32) == toFold(testString2utf32));
819 }
820
821 }