projects/ldc: tango/tango/text/Unicode.d comparison

comparison tango/tango/text/Unicode.d @ 132:1700239cab2e trunk

[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.

author	lindquist
date	Fri, 11 Jan 2008 17:57:40 +0100
parents
children

comparison

equal deleted inserted replaced

-:5825d48b27d1
+:1700239cab2e
+/*******************************************************************************
+copyright:      Copyright (c) 2007 Peter Triller. All rights reserved
+license:        BSD style: $(LICENSE)
+version:        Initial release: Sept 2007
+authors:        Peter
+		Provides case mapping Functions for Unicode Strings. As of now it is
+		only 99 % complete, because it does not take into account Conditional
+		case mappings. This means the Greek Letter Sigma will not be correctly
+		case mapped at the end of a Word, and the Locales Lithuanian, Turkish
+		and Azeri are not taken into account during Case Mappings. This means
+		all in all around 12 Characters will not be mapped correctly under
+		some circumstances.
+		ICU4j also does not handle these cases at the moment.
+		Unittests are written against output from ICU4j
+		This Module tries to minimize Memory allocation and usage. You can
+		always pass the output buffer that should be used to the case mapping
+		function, which will be resized if necessary.
+*******************************************************************************/
+module tango.text.Unicode;
+private import tango.text.UnicodeData;
+private import tango.text.convert.Utf;
+/**
+* Converts an Utf8 String to Upper case
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+deprecated char[] blockToUpper(char[] input, char[] output = null, dchar[] working = null) {
+	// ?? How much preallocation ?? This is worst case allocation
+if (working is null)
+working.length = input.length;
+uint produced = 0;
+uint ate;
+uint oprod = 0;
+foreach(dchar ch; input) {
+	// TODO Conditional Case Mapping
+		UnicodeData **d = (ch in unicodeData);
+		if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
+			SpecialCaseData **s = (ch in specialCaseData);
+			debug {
+				assert(s !is null);
+			}
+			if((*s).upperCaseMapping !is null) {
+				// To speed up, use worst case for memory prealocation
+				// since the length of an UpperCaseMapping list is at most 4
+				// Make sure no relocation is made in the toString Method
+				// better allocation algorithm ?
+				int len = (*s).upperCaseMapping.length;
+				if(produced + len >= working.length)
+					working.length = working.length + working.length / 2 +  len;
+				oprod = produced;
+				produced += len;
+				working[oprod..produced] = (*s).upperCaseMapping;
+				continue;
+			}
+		}
+		// Make sure no relocation is made in the toString Method
+		if(produced + 1 >= output.length)
+			working.length = working.length + working.length / 2 + 1;
+		working[produced++] =  d is null ? ch:(*d).simpleUpperCaseMapping;
+	}
+return toString(working[0..produced],output);
+}
+/**
+* Converts an Utf8 String to Upper case
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+char[] toUpper(char[] input, char[] output = null) {
+	dchar[1] buf;
+	// assume most common case: String stays the same length
+if (output.length < input.length)
+output.length = input.length;
+uint produced = 0;
+uint ate;
+foreach(dchar ch; input) {
+	// TODO Conditional Case Mapping
+		UnicodeData **d = (ch in unicodeData);
+		if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
+			SpecialCaseData **s = (ch in specialCaseData);
+			debug {
+				assert(s !is null);
+			}
+			if((*s).upperCaseMapping !is null) {
+				// To speed up, use worst case for memory prealocation
+				// since the length of an UpperCaseMapping list is at most 4
+				// Make sure no relocation is made in the toString Method
+				// better allocation algorithm ?
+				if(produced + (*s).upperCaseMapping.length * 4 >= output.length)
+						output.length = output.length + output.length / 2 +  (*s).upperCaseMapping.length * 4;
+				char[] res = toString((*s).upperCaseMapping, output[produced..output.length], &ate);
+				debug {
+					assert(ate == (*s).upperCaseMapping.length);
+					assert(res.ptr == output[produced..output.length].ptr);
+				}
+				produced += res.length;
+				continue;
+			}
+		}
+		// Make sure no relocation is made in the toString Method
+		if(produced + 4 >= output.length)
+			output.length = output.length + output.length / 2 + 4;
+		buf[0] = d is null ? ch:(*d).simpleUpperCaseMapping;
+		char[] res = toString(buf, output[produced..output.length], &ate);
+		debug {
+			assert(ate == 1);
+			assert(res.ptr == output[produced..output.length].ptr);
+		}
+		produced += res.length;
+	}
+return output[0..produced];
+}
+/**
+* Converts an Utf16 String to Upper case
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+wchar[] toUpper(wchar[] input, wchar[] output = null) {
+	dchar[1] buf;
+	// assume most common case: String stays the same length
+if (output.length < input.length)
+output.length = input.length;
+uint produced = 0;
+uint ate;
+foreach(dchar ch; input) {
+		// TODO Conditional Case Mapping
+		UnicodeData **d = (ch in unicodeData);
+		if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
+			SpecialCaseData **s = (ch in specialCaseData);
+			debug {
+				assert(s !is null);
+			}
+			if((*s).upperCaseMapping !is null) {
+				// To speed up, use worst case for memory prealocation
+				// Make sure no relocation is made in the toString16 Method
+				// better allocation algorithm ?
+				if(produced + (*s).upperCaseMapping.length * 2 >= output.length)
+					output.length = output.length + output.length / 2 +  (*s).upperCaseMapping.length * 3;
+				wchar[] res = toString16((*s).upperCaseMapping, output[produced..output.length], &ate);
+				debug {
+					assert(ate == (*s).upperCaseMapping.length);
+					assert(res.ptr == output[produced..output.length].ptr);
+				}
+				produced += res.length;
+				continue;
+			}
+		}
+		// Make sure no relocation is made in the toString16 Method
+		if(produced + 4 >= output.length)
+			output.length = output.length + output.length / 2 + 3;
+		buf[0] = d is null ? ch:(*d).simpleUpperCaseMapping;
+		wchar[] res = toString16(buf, output[produced..output.length], &ate);
+		debug {
+			assert(ate == 1);
+			assert(res.ptr == output[produced..output.length].ptr);
+		}
+		produced += res.length;
+}
+return output[0..produced];
+}
+/**
+* Converts an Utf32 String to Upper case
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+dchar[] toUpper(dchar[] input, dchar[] output = null) {
+	// assume most common case: String stays the same length
+if (input.length > output.length)
+output.length = input.length;
+uint produced = 0;
+if (input.length)
+	foreach(dchar orig; input) {
+		// TODO Conditional Case Mapping
+			UnicodeData **d = (orig in unicodeData);
+			if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
+				SpecialCaseData **s = (orig in specialCaseData);
+				debug {
+					assert(s !is null);
+				}
+				if((*s).upperCaseMapping !is null) {
+					// Better resize strategy ???
+					if(produced + (*s).upperCaseMapping.length  > output.length)
+						output.length = output.length + output.length / 2 + (*s).upperCaseMapping.length;
+					foreach(ch; (*s).upperCaseMapping) {
+						output[produced++] = ch;
+					}
+				}
+				continue;
+			}
+			if(produced >= output.length)
+				output.length = output.length + output.length / 2;
+			output[produced++] = d is null ? orig:(*d).simpleUpperCaseMapping;
+		}
+return output[0..produced];
+}
+/**
+* Converts an Utf8 String to Lower case
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+char[] toLower(char[] input, char[] output = null) {
+	dchar[1] buf;
+	// assume most common case: String stays the same length
+if (output.length < input.length)
+output.length = input.length;
+uint produced = 0;
+uint ate;
+foreach(dchar ch; input) {
+	// TODO Conditional Case Mapping
+		UnicodeData **d = (ch in unicodeData);
+		if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
+			SpecialCaseData **s = (ch in specialCaseData);
+			debug {
+				assert(s !is null);
+			}
+			if((*s).lowerCaseMapping !is null) {
+				// To speed up, use worst case for memory prealocation
+				// since the length of an LowerCaseMapping list is at most 4
+				// Make sure no relocation is made in the toString Method
+				// better allocation algorithm ?
+				if(produced + (*s).lowerCaseMapping.length * 4 >= output.length)
+						output.length = output.length + output.length / 2 +  (*s).lowerCaseMapping.length * 4;
+				char[] res = toString((*s).lowerCaseMapping, output[produced..output.length], &ate);
+				debug {
+					assert(ate == (*s).lowerCaseMapping.length);
+					assert(res.ptr == output[produced..output.length].ptr);
+				}
+				produced += res.length;
+				continue;
+			}
+		}
+		// Make sure no relocation is made in the toString Method
+		if(produced + 4 >= output.length)
+			output.length = output.length + output.length / 2 + 4;
+		buf[0] = d is null ? ch:(*d).simpleLowerCaseMapping;
+		char[] res = toString(buf, output[produced..output.length], &ate);
+		debug {
+			assert(ate == 1);
+			assert(res.ptr == output[produced..output.length].ptr);
+		}
+		produced += res.length;
+	}
+return output[0..produced];
+}
+/**
+* Converts an Utf16 String to Lower case
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+wchar[] toLower(wchar[] input, wchar[] output = null) {
+	dchar[1] buf;
+	// assume most common case: String stays the same length
+if (output.length < input.length)
+output.length = input.length;
+uint produced = 0;
+uint ate;
+foreach(dchar ch; input) {
+		// TODO Conditional Case Mapping
+		UnicodeData **d = (ch in unicodeData);
+		if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
+			SpecialCaseData **s = (ch in specialCaseData);
+			debug {
+				assert(s !is null);
+			}
+			if((*s).lowerCaseMapping !is null) {
+				// To speed up, use worst case for memory prealocation
+				// Make sure no relocation is made in the toString16 Method
+				// better allocation algorithm ?
+				if(produced + (*s).lowerCaseMapping.length * 2 >= output.length)
+					output.length = output.length + output.length / 2 +  (*s).lowerCaseMapping.length * 3;
+				wchar[] res = toString16((*s).lowerCaseMapping, output[produced..output.length], &ate);
+				debug {
+					assert(ate == (*s).lowerCaseMapping.length);
+					assert(res.ptr == output[produced..output.length].ptr);
+				}
+				produced += res.length;
+				continue;
+			}
+		}
+		// Make sure no relocation is made in the toString16 Method
+		if(produced + 4 >= output.length)
+			output.length = output.length + output.length / 2 + 3;
+		buf[0] = d is null ? ch:(*d).simpleLowerCaseMapping;
+		wchar[] res = toString16(buf, output[produced..output.length], &ate);
+		debug {
+			assert(ate == 1);
+			assert(res.ptr == output[produced..output.length].ptr);
+		}
+		produced += res.length;
+}
+return output[0..produced];
+}
+/**
+* Converts an Utf32 String to Lower case
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+dchar[] toLower(dchar[] input, dchar[] output = null) {
+	// assume most common case: String stays the same length
+if (input.length > output.length)
+output.length = input.length;
+uint produced = 0;
+if (input.length)
+	foreach(dchar orig; input) {
+		// TODO Conditional Case Mapping
+			UnicodeData **d = (orig in unicodeData);
+			if(d !is null && ((*d).generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
+				SpecialCaseData **s = (orig in specialCaseData);
+				debug {
+					assert(s !is null);
+				}
+				if((*s).lowerCaseMapping !is null) {
+					// Better resize strategy ???
+					if(produced + (*s).lowerCaseMapping.length  > output.length)
+						output.length = output.length + output.length / 2 + (*s).lowerCaseMapping.length;
+					foreach(ch; (*s).lowerCaseMapping) {
+						output[produced++] = ch;
+					}
+				}
+				continue;
+			}
+			if(produced >= output.length)
+				output.length = output.length + output.length / 2;
+			output[produced++] = d is null ? orig:(*d).simpleLowerCaseMapping;
+		}
+return output[0..produced];
+}
+/**
+* Converts an Utf8 String to Folding case
+* Folding case is used for case insensitive comparsions.
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+char[] toFold(char[] input, char[] output = null) {
+	dchar[1] buf;
+	// assume most common case: String stays the same length
+if (output.length < input.length)
+output.length = input.length;
+uint produced = 0;
+uint ate;
+foreach(dchar ch; input) {
+	FoldingCaseData **s = (ch in foldingCaseData);
+	if(s !is null) {
+		// To speed up, use worst case for memory prealocation
+		// since the length of an UpperCaseMapping list is at most 4
+		// Make sure no relocation is made in the toString Method
+		// better allocation algorithm ?
+		if(produced + (*s).mapping.length * 4 >= output.length)
+			output.length = output.length + output.length / 2 +  (*s).mapping.length * 4;
+		char[] res = toString((*s).mapping, output[produced..output.length], &ate);
+		debug {
+			assert(ate == (*s).mapping.length);
+			assert(res.ptr == output[produced..output.length].ptr);
+		}
+		produced += res.length;
+		continue;
+	}
+		// Make sure no relocation is made in the toString Method
+		if(produced + 4 >= output.length)
+			output.length = output.length + output.length / 2 + 4;
+		buf[0] = ch;
+		char[] res = toString(buf, output[produced..output.length], &ate);
+		debug {
+			assert(ate == 1);
+			assert(res.ptr == output[produced..output.length].ptr);
+		}
+		produced += res.length;
+	}
+return output[0..produced];
+}
+/**
+* Converts an Utf16 String to Folding case
+* Folding case is used for case insensitive comparsions.
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+wchar[] toFold(wchar[] input, wchar[] output = null) {
+	dchar[1] buf;
+	// assume most common case: String stays the same length
+if (output.length < input.length)
+output.length = input.length;
+uint produced = 0;
+uint ate;
+foreach(dchar ch; input) {
+	FoldingCaseData **s = (ch in foldingCaseData);
+		if(s !is null) {
+			// To speed up, use worst case for memory prealocation
+			// Make sure no relocation is made in the toString16 Method
+			// better allocation algorithm ?
+			if(produced + (*s).mapping.length * 2 >= output.length)
+				output.length = output.length + output.length / 2 +  (*s).mapping.length * 3;
+			wchar[] res = toString16((*s).mapping, output[produced..output.length], &ate);
+			debug {
+				assert(ate == (*s).mapping.length);
+				assert(res.ptr == output[produced..output.length].ptr);
+			}
+			produced += res.length;
+			continue;
+		}
+		// Make sure no relocation is made in the toString16 Method
+		if(produced + 4 >= output.length)
+			output.length = output.length + output.length / 2 + 3;
+		buf[0] = ch;
+		wchar[] res = toString16(buf, output[produced..output.length], &ate);
+		debug {
+			assert(ate == 1);
+			assert(res.ptr == output[produced..output.length].ptr);
+		}
+		produced += res.length;
+}
+return output[0..produced];
+}
+/**
+* Converts an Utf32 String to Folding case
+* Folding case is used for case insensitive comparsions.
+*
+* Params:
+*     input = String to be case mapped
+*     output = this output buffer will be used unless too small
+* Returns: the case mapped string
+*/
+dchar[] toFold(dchar[] input, dchar[] output = null) {
+	// assume most common case: String stays the same length
+if (input.length > output.length)
+output.length = input.length;
+uint produced = 0;
+if (input.length)
+	foreach(dchar orig; input) {
+			FoldingCaseData **d = (orig in foldingCaseData);
+			if(d !is null ) {
+				// Better resize strategy ???
+				if(produced + (*d).mapping.length  > output.length)
+					output.length = output.length + output.length / 2 + (*d).mapping.length;
+				foreach(ch; (*d).mapping) {
+					output[produced++] = ch;
+				}
+				continue;
+			}
+			if(produced >= output.length)
+				output.length = output.length + output.length / 2;
+			output[produced++] = orig;
+		}
+return output[0..produced];
+}
+/**
+* Determines if a character is a digit. It returns true for decimal
+* digits only.
+*
+* Params:
+*     ch = the character to be inspected
+*/
+bool isDigit(dchar ch) {
+	UnicodeData **d = (ch in unicodeData);
+	return (d !is null) && ((*d).generalCategory & UnicodeData.GeneralCategory.Nd);
+}
+/**
+* Determines if a character is a letter.
+*
+* Params:
+*     ch = the character to be inspected
+*/
+bool isLetter(int ch) {
+	UnicodeData **d = (ch in unicodeData);
+	return (d !is null) && ((*d).generalCategory &
+		( UnicodeData.GeneralCategory.Lu
+		| UnicodeData.GeneralCategory.Ll
+		| UnicodeData.GeneralCategory.Lt
+		| UnicodeData.GeneralCategory.Lm
+		| UnicodeData.GeneralCategory.Lo));
+}
+/**
+* Determines if a character is a letter or a
+* decimal digit.
+*
+* Params:
+*     ch = the character to be inspected
+*/
+bool isLetterOrDigit(int ch) {
+	UnicodeData **d = (ch in unicodeData);
+	return (d !is null) && ((*d).generalCategory &
+		( UnicodeData.GeneralCategory.Lu
+		| UnicodeData.GeneralCategory.Ll
+		| UnicodeData.GeneralCategory.Lt
+		| UnicodeData.GeneralCategory.Lm
+		| UnicodeData.GeneralCategory.Lo
+		| UnicodeData.GeneralCategory.Nd));
+}
+/**
+* Determines if a character is a lower case letter.
+* Params:
+*     ch = the character to be inspected
+*/
+bool isLower(dchar ch) {
+	UnicodeData **d = (ch in unicodeData);
+	return (d !is null) && ((*d).generalCategory & UnicodeData.GeneralCategory.Ll);
+}
+/**
+* Determines if a character is a title case letter.
+* In case of combined letters, only the first is upper and the second is lower.
+* Some of these special characters can be found in the croatian and greek language.
+* See_Also: http://en.wikipedia.org/wiki/Capitalization
+* Params:
+*     ch = the character to be inspected
+*/
+bool isTitle(dchar ch) {
+	UnicodeData **d = (ch in unicodeData);
+	return (d !is null) && ((*d).generalCategory & UnicodeData.GeneralCategory.Lt);
+}
+/**
+* Determines if a character is a upper case letter.
+* Params:
+*     ch = the character to be inspected
+*/
+bool isUpper(dchar ch) {
+	UnicodeData **d = (ch in unicodeData);
+	return (d !is null) && ((*d).generalCategory & UnicodeData.GeneralCategory.Lu);
+}
+/**
+* Determines if a character is a Whitespace character.
+* Whitespace characters are characters in the
+* General Catetories Zs, Zl, Zp without the No Break
+* spaces plus the control characters out of the ASCII
+* range, that are used as spaces:
+* TAB VT LF FF CR FS GS RS US NL
+*
+* WARNING: look at isSpace, maybe that function does
+*          more what you expect.
+*
+* Params:
+*     ch = the character to be inspected
+*/
+bool isWhitespace(dchar ch) {
+	if((ch >= 0x0009 && ch <= 0x000D) || (ch >= 0x001C && ch <= 0x001F))
+		return true;
+	UnicodeData **d = (ch in unicodeData);
+return (d !is null) && ((*d).generalCategory &
+		( UnicodeData.GeneralCategory.Zs
+		| UnicodeData.GeneralCategory.Zl
+		| UnicodeData.GeneralCategory.Zp))
+		&& ch != 0x00A0 // NBSP
+		&& ch != 0x202F // NARROW NBSP
+		&& ch != 0xFEFF; // ZERO WIDTH NBSP
+}
+/**
+* Detemines if a character is a Space character as
+* specified in the Unicode Standart.
+*
+* WARNING: look at isWhitepace, maybe that function does
+*          more what you expect.
+*
+* Params:
+*     ch = the character to be inspected
+*/
+bool isSpace(dchar ch) {
+	UnicodeData **d = (ch in unicodeData);
+return (d !is null) && ((*d).generalCategory &
+		( UnicodeData.GeneralCategory.Zs
+		| UnicodeData.GeneralCategory.Zl
+		| UnicodeData.GeneralCategory.Zp));
+}
+/**
+* Detemines if a character is a printable character as
+* specified in the Unicode Standart.
+*
+*
+* WARNING: look at isWhitepace, maybe that function does
+*          more what you expect.
+*
+* Params:
+*     ch = the character to be inspected
+*/
+bool isPrintable(dchar ch) {
+	UnicodeData **d = (ch in unicodeData);
+return (d !is null) && ((*d).generalCategory &
+		( UnicodeData.GeneralCategory.Cn
+		| UnicodeData.GeneralCategory.Cc
+		| UnicodeData.GeneralCategory.Cf
+		| UnicodeData.GeneralCategory.Co
+		| UnicodeData.GeneralCategory.Cs));
+}
+debug ( UnicodeTest ):
+void main() {}
+debug (UnitTest) {
+unittest {
+	// 1) No Buffer passed, no resize, no SpecialCase
+	char[] testString1utf8 = "\u00E4\u00F6\u00FC";
+	wchar[] testString1utf16 = "\u00E4\u00F6\u00FC";
+	dchar[] testString1utf32 = "\u00E4\u00F6\u00FC";
+	char[] refString1utf8 = "\u00C4\u00D6\u00DC";
+	wchar[] refString1utf16 = "\u00C4\u00D6\u00DC";
+	dchar[] refString1utf32 = "\u00C4\u00D6\u00DC";
+	char[] resultString1utf8 = toUpper(testString1utf8);
+	assert(resultString1utf8 == refString1utf8);
+	wchar[] resultString1utf16 = toUpper(testString1utf16);
+	assert(resultString1utf16 == refString1utf16);
+	dchar[] resultString1utf32 = toUpper(testString1utf32);
+	assert(resultString1utf32 == refString1utf32);
+	// 2) Buffer passed, no resize, no SpecialCase
+	char[60] buffer1utf8;
+	wchar[30] buffer1utf16;
+	dchar[30] buffer1utf32;
+	resultString1utf8 = toUpper(testString1utf8,buffer1utf8);
+	assert(resultString1utf8.ptr == buffer1utf8.ptr);
+	assert(resultString1utf8 == refString1utf8);
+	resultString1utf16 = toUpper(testString1utf16,buffer1utf16);
+	assert(resultString1utf16.ptr == buffer1utf16.ptr);
+	assert(resultString1utf16 == refString1utf16);
+	resultString1utf32 = toUpper(testString1utf32,buffer1utf32);
+	assert(resultString1utf32.ptr == buffer1utf32.ptr);
+	assert(resultString1utf32 == refString1utf32);
+	// 3/ Buffer passed, resize necessary, no Special case
+	char[5] buffer2utf8;
+	wchar[2] buffer2utf16;
+	dchar[2] buffer2utf32;
+	resultString1utf8 = toUpper(testString1utf8,buffer2utf8);
+	assert(resultString1utf8.ptr != buffer2utf8.ptr);
+	assert(resultString1utf8 == refString1utf8);
+	resultString1utf16 = toUpper(testString1utf16,buffer2utf16);
+	assert(resultString1utf16.ptr != buffer2utf16.ptr);
+	assert(resultString1utf16 == refString1utf16);
+	resultString1utf32 = toUpper(testString1utf32,buffer2utf32);
+	assert(resultString1utf32.ptr != buffer2utf32.ptr);
+	assert(resultString1utf32 == refString1utf32);
+	// 4) Buffer passed, resize necessary, extensive SpecialCase
+	char[] testString2utf8 = "\uFB03\uFB04\uFB05";
+	wchar[] testString2utf16 = "\uFB03\uFB04\uFB05";
+	dchar[] testString2utf32 = "\uFB03\uFB04\uFB05";
+	char[] refString2utf8 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
+	wchar[] refString2utf16 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
+	dchar[] refString2utf32 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
+	resultString1utf8 = toUpper(testString2utf8,buffer2utf8);
+	assert(resultString1utf8.ptr != buffer2utf8.ptr);
+	assert(resultString1utf8 == refString2utf8);
+	resultString1utf16 = toUpper(testString2utf16,buffer2utf16);
+	assert(resultString1utf16.ptr != buffer2utf16.ptr);
+	assert(resultString1utf16 == refString2utf16);
+	resultString1utf32 = toUpper(testString2utf32,buffer2utf32);
+	assert(resultString1utf32.ptr != buffer2utf32.ptr);
+	assert(resultString1utf32 == refString2utf32);
+}
+unittest {
+	// 1) No Buffer passed, no resize, no SpecialCase
+	char[] testString1utf8 = "\u00C4\u00D6\u00DC";
+	wchar[] testString1utf16 = "\u00C4\u00D6\u00DC";
+	dchar[] testString1utf32 = "\u00C4\u00D6\u00DC";
+	char[] refString1utf8 = "\u00E4\u00F6\u00FC";
+	wchar[] refString1utf16 = "\u00E4\u00F6\u00FC";
+	dchar[] refString1utf32 = "\u00E4\u00F6\u00FC";
+	char[] resultString1utf8 = toLower(testString1utf8);
+	assert(resultString1utf8 == refString1utf8);
+	wchar[] resultString1utf16 = toLower(testString1utf16);
+	assert(resultString1utf16 == refString1utf16);
+	dchar[] resultString1utf32 = toLower(testString1utf32);
+	assert(resultString1utf32 == refString1utf32);
+	// 2) Buffer passed, no resize, no SpecialCase
+	char[60] buffer1utf8;
+	wchar[30] buffer1utf16;
+	dchar[30] buffer1utf32;
+	resultString1utf8 = toLower(testString1utf8,buffer1utf8);
+	assert(resultString1utf8.ptr == buffer1utf8.ptr);
+	assert(resultString1utf8 == refString1utf8);
+	resultString1utf16 = toLower(testString1utf16,buffer1utf16);
+	assert(resultString1utf16.ptr == buffer1utf16.ptr);
+	assert(resultString1utf16 == refString1utf16);
+	resultString1utf32 = toLower(testString1utf32,buffer1utf32);
+	assert(resultString1utf32.ptr == buffer1utf32.ptr);
+	assert(resultString1utf32 == refString1utf32);
+	// 3/ Buffer passed, resize necessary, no Special case
+	char[5] buffer2utf8;
+	wchar[2] buffer2utf16;
+	dchar[2] buffer2utf32;
+	resultString1utf8 = toLower(testString1utf8,buffer2utf8);
+	assert(resultString1utf8.ptr != buffer2utf8.ptr);
+	assert(resultString1utf8 == refString1utf8);
+	resultString1utf16 = toLower(testString1utf16,buffer2utf16);
+	assert(resultString1utf16.ptr != buffer2utf16.ptr);
+	assert(resultString1utf16 == refString1utf16);
+	resultString1utf32 = toLower(testString1utf32,buffer2utf32);
+	assert(resultString1utf32.ptr != buffer2utf32.ptr);
+	assert(resultString1utf32 == refString1utf32);
+	// 4) Buffer passed, resize necessary, extensive SpecialCase
+	char[] testString2utf8 = "\u0130\u0130\u0130";
+	wchar[] testString2utf16 = "\u0130\u0130\u0130";
+	dchar[] testString2utf32 = "\u0130\u0130\u0130";
+	char[] refString2utf8 = "\u0069\u0307\u0069\u0307\u0069\u0307";
+	wchar[] refString2utf16 = "\u0069\u0307\u0069\u0307\u0069\u0307";
+	dchar[] refString2utf32 = "\u0069\u0307\u0069\u0307\u0069\u0307";
+	resultString1utf8 = toLower(testString2utf8,buffer2utf8);
+	assert(resultString1utf8.ptr != buffer2utf8.ptr);
+	assert(resultString1utf8 == refString2utf8);
+	resultString1utf16 = toLower(testString2utf16,buffer2utf16);
+	assert(resultString1utf16.ptr != buffer2utf16.ptr);
+	assert(resultString1utf16 == refString2utf16);
+	resultString1utf32 = toLower(testString2utf32,buffer2utf32);
+	assert(resultString1utf32.ptr != buffer2utf32.ptr);
+	assert(resultString1utf32 == refString2utf32);
+}
+unittest {
+	char[] testString1utf8 = "?!Mädchen \u0390\u0390,;";
+	char[] testString2utf8 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
+	assert(toFold(testString1utf8) == toFold(testString2utf8));
+	wchar[] testString1utf16 = "?!Mädchen \u0390\u0390,;";;
+	wchar[] testString2utf16 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
+	assert(toFold(testString1utf16) == toFold(testString2utf16));
+	wchar[] testString1utf32 = "?!Mädchen \u0390\u0390,;";
+	wchar[] testString2utf32 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
+	assert(toFold(testString1utf32) == toFold(testString2utf32));
+}
+}

Mercurial > projects > ldc

comparison tango/tango/text/Unicode.d @ 132:1700239cab2e trunk