changeset 629:d050e211402b

Moved files in src/std/ to src/util/.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Fri, 11 Jan 2008 20:03:46 +0100
parents 08681b93c3b3
children 5197bd351e5f
files trunk/src/dil/CompilerInfo.d trunk/src/dil/File.d trunk/src/dil/Unicode.d trunk/src/std/metastrings.d trunk/src/std/uni.d trunk/src/std/utf.d trunk/src/util/metastrings.d trunk/src/util/uni.d trunk/src/util/utf.d
diffstat 9 files changed, 1833 insertions(+), 1833 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/dil/CompilerInfo.d	Fri Jan 11 15:23:38 2008 +0100
+++ b/trunk/src/dil/CompilerInfo.d	Fri Jan 11 20:03:46 2008 +0100
@@ -3,7 +3,7 @@
   License: GPL3
 +/
 module dil.CompilerInfo;
-import std.metastrings : FormatT = Format, ToString;
+import util.metastrings : FormatT = Format, ToString;
 
 template Pad(char[] str, uint amount)
 {
--- a/trunk/src/dil/File.d	Fri Jan 11 15:23:38 2008 +0100
+++ b/trunk/src/dil/File.d	Fri Jan 11 20:03:46 2008 +0100
@@ -8,7 +8,7 @@
 import dil.Information;
 import dil.Converter;
 import tango.io.File;
-import std.utf;
+import util.utf;
 import common;
 
 /// Loads a file in any valid Unicode format and converts it to UTF-8.
--- a/trunk/src/dil/Unicode.d	Fri Jan 11 15:23:38 2008 +0100
+++ b/trunk/src/dil/Unicode.d	Fri Jan 11 20:03:46 2008 +0100
@@ -3,7 +3,7 @@
   License: GPL3
 +/
 module dil.Unicode;
-public import std.uni : isUniAlpha;
+public import util.uni : isUniAlpha;
 
 /// U+FFFD = �. Used to replace invalid Unicode characters.
 const dchar REPLACEMENT_CHAR = '\uFFFD';
--- a/trunk/src/std/metastrings.d	Fri Jan 11 15:23:38 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,225 +0,0 @@
-
-// Written in the D programming language.
-
-/**
- * Templates with which to do compile time manipulation of strings.
- *
- * Macros:
- *	WIKI = Phobos/StdMetastrings
- * Copyright:
- *	Public Domain
- */
-
-/*
- * Authors:
- *	Walter Bright, Digital Mars, www.digitalmars.com
- *	Don Clugston
- */
-
-/*
-  Note: this is not the original file!
-  Modified by Aziz Köksal:
-    Only changed some types from string to char[]
-*/
-
-module std.metastrings;
-
-/**
- * Formats constants into a string at compile time.
- * Analogous to std.string.format().
- * Parameters:
- *	A =	tuple of constants, which can be strings,
- *		characters, or integral values.
- * Formats:
- *	The formats supported are %s for strings, and %%
- *	for the % character.
- * Example:
- * ---
-import std.metastrings;
-import std.stdio;
-
-void main()
-{
-  string s = Format!("Arg %s = %s", "foo", 27);
-  writefln(s); // "Arg foo = 27"
-}
- * ---
- */
-
-template Format(A...)
-{
-    static if (A.length == 0)
-	const char[] Format = "";
-    else static if (is(typeof(A[0]) : char[]))
-	const char[] Format = FormatString!(A[0], A[1..$]);
-	//const char[] Format = FormatString!(A[0]);
-    else
-	const char[] Format = ToString!(A[0]) ~ Format!(A[1..$]);
-}
-
-template FormatString(char[] F, A...)
-{
-    static if (F.length == 0)
-	const char[] FormatString = Format!(A);
-    else static if (F.length == 1)
-	const char[] FormatString = F[0] ~ Format!(A);
-    else static if (F[0..2] == "%s")
-	const char[] FormatString = ToString!(A[0]) ~ FormatString!(F[2..$],A[1..$]);
-    else static if (F[0..2] == "%%")
-	const char[] FormatString = "%" ~ FormatString!(F[2..$],A);
-    else static if (F[0] == '%')
-	static assert(0, "unrecognized format %" ~ F[1]);
-    else
-	const char[] FormatString = F[0] ~ FormatString!(F[1..$],A);
-}
-
-/**
- * Convert constant argument to a string.
- */
-
-template ToString(ulong U)
-{
-    static if (U < 10)
-	const char[] ToString = "" ~ cast(char)(U + '0');
-    else
-	const char[] ToString = ToString!(U / 10) ~ ToString!(U % 10);
-}
-
-/// ditto
-template ToString(long I)
-{
-    static if (I < 0)
-	const char[] ToString = "-" ~ ToString!(cast(ulong)(-I));
-    else
-	const char[] ToString = ToString!(cast(ulong)I);
-}
-
-static assert(ToString!(0x100000000) == "4294967296");
-
-/// ditto
-template ToString(uint U)
-{
-    const char[] ToString = ToString!(cast(ulong)U);
-}
-
-/// ditto
-template ToString(int I)
-{
-    const char[] ToString = ToString!(cast(long)I);
-}
-
-/// ditto
-template ToString(ushort U)
-{
-    const char[] ToString = ToString!(cast(ulong)U);
-}
-
-/// ditto
-template ToString(short I)
-{
-    const char[] ToString = ToString!(cast(long)I);
-}
-
-/// ditto
-template ToString(ubyte U)
-{
-    const char[] ToString = ToString!(cast(ulong)U);
-}
-
-/// ditto
-template ToString(byte I)
-{
-    const char[] ToString = ToString!(cast(long)I);
-}
-
-/// ditto
-template ToString(bool B)
-{
-    const char[] ToString = B ? "true" : "false";
-}
-
-/// ditto
-template ToString(char[] S)
-{
-    const char[] ToString = S;
-}
-
-/// ditto
-template ToString(char C)
-{
-    const char[] ToString = "" ~ C;
-}
-
-unittest
-{
-    char[] s = Format!("hel%slo", "world", -138, 'c', true);
-    assert(s == "helworldlo-138ctrue");
-}
-
-
-/********
- * Parse unsigned integer literal from the start of string s.
- * returns:
- *	.value = the integer literal as a string,
- *	.rest = the string following the integer literal
- * Otherwise:
- *	.value = null,
- *	.rest = s
- */
-
-template ParseUinteger(char[] s)
-{
-    static if (s.length == 0)
-    {	const char[] value = "";
-	const char[] rest = "";
-    }
-    else static if (s[0] >= '0' && s[0] <= '9')
-    {	const char[] value = s[0] ~ ParseUinteger!(s[1..$]).value;
-	const char[] rest = ParseUinteger!(s[1..$]).rest;
-    }
-    else
-    {	const char[] value = "";
-	const char[] rest = s;
-    }
-}
-
-/********
- * Parse integer literal optionally preceded by '-'
- * from the start of string s.
- * returns:
- *	.value = the integer literal as a string,
- *	.rest = the string following the integer literal
- * Otherwise:
- *	.value = null,
- *	.rest = s
- */
-
-template ParseInteger(char[] s)
-{
-    static if (s.length == 0)
-    {	const char[] value = "";
-	const char[] rest = "";
-    }
-    else static if (s[0] >= '0' && s[0] <= '9')
-    {	const char[] value = s[0] ~ ParseUinteger!(s[1..$]).value;
-	const char[] rest = ParseUinteger!(s[1..$]).rest;
-    }
-    else static if (s.length >= 2 &&
-		s[0] == '-' && s[1] >= '0' && s[1] <= '9')
-    {	const char[] value = s[0..2] ~ ParseUinteger!(s[2..$]).value;
-	const char[] rest = ParseUinteger!(s[2..$]).rest;
-    }
-    else
-    {	const char[] value = "";
-	const char[] rest = s;
-    }
-}
-
-unittest
-{
-    assert(ParseUinteger!("1234abc").value == "1234");
-    assert(ParseUinteger!("1234abc").rest == "abc");
-    assert(ParseInteger!("-1234abc").value == "-1234");
-    assert(ParseInteger!("-1234abc").rest == "abc");
-}
-
--- a/trunk/src/std/uni.d	Fri Jan 11 15:23:38 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,630 +0,0 @@
-
-// Written in the D programming language.
-
-/*
- * Placed into the Public Domain.
- * Digital Mars, www.digitalmars.com
- * Written by Walter Bright
- */
-
-/**
- * Simple Unicode character classification functions.
- * For ASCII classification, see $(LINK2 std_ctype.html, std.ctype).
- * Macros:
- *	WIKI=Phobos/StdUni
- * References:
- *	$(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table),
- *	$(LINK2 http://en.wikipedia.org/wiki/Unicode, Wikipedia),
- *	$(LINK2 http://www.unicode.org, The Unicode Consortium)
- * Trademarks:
- *	Unicode(tm) is a trademark of Unicode, Inc.
- */
-
-
-module std.uni;
-
-/**
- * Returns !=0 if c is a Unicode lower case character.
- */
-int isUniLower(dchar c)
-{
-    if (c <= 0x7F)
-	return (c >= 'a' && c <= 'z');
-
-    return isUniAlpha(c) && c == toUniLower(c);
-}
-
-/**
- * Returns !=0 if c is a Unicode upper case character.
- */
-int isUniUpper(dchar c)
-{
-    if (c <= 0x7F)
-	return (c >= 'A' && c <= 'Z');
-
-    return isUniAlpha(c) && c == toUniUpper(c);
-}
-
-/**
- * If c is a Unicode upper case character, return the lower case
- * equivalent, otherwise return c.
- */
-dchar toUniLower(dchar c)
-{
-    if (c >= 'A' && c <= 'Z')
-    {
-        c += 32;
-    }
-    else if (c >= 0x00C0)
-    {
-	if ((c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c<=0x00DE))
-	{
-	    c += 32;
-	}
-	else if ((c >= 0x0100 && c < 0x0138) || (c > 0x0149 && c < 0x0178))
-	{
-	    if (c == 0x0130)
-		c = 0x0069;
-	    else if ((c & 1) == 0)
-		c += 1;
-	}
-	else if (c == 0x0178)
-	{
-	    c = 0x00FF;
-	}
-	else if ((c >= 0x0139 && c < 0x0149) || (c > 0x0178 && c < 0x017F))
-	{
-	    if (c & 1)
-		c += 1;
-	}
-	else if (c >= 0x0200 && c <= 0x0217)
-	{
-	    if ((c & 1) == 0)
-		c += 1;
-	}
-	else if ((c >= 0x0401 && c <= 0x040C) || (c>= 0x040E && c <= 0x040F))
-	{
-	    c += 80;
-	}
-	else if (c >= 0x0410  && c <= 0x042F)
-	{
-	    c += 32;
-	}
-	else if (c >= 0x0460 && c <= 0x047F)
-	{
-	    if ((c & 1) == 0)
-		c += 1;
-	}
-	else if (c >= 0x0531 && c <= 0x0556)
-	{
-	    c += 48;
-	}
-	else if (c >= 0x10A0 && c <= 0x10C5)
-	{
-	    c += 48;
-	}
-	else if (c >= 0xFF21 && c <= 0xFF3A)
-	{
-	    c += 32;
-	}
-    }
-    return c;
-}
-
-/**
- * If c is a Unicode lower case character, return the upper case
- * equivalent, otherwise return c.
- */
-dchar toUniUpper(dchar c)
-{
-    if (c >= 'a' && c <= 'z')
-    {
-	c -= 32;
-    }
-    else if (c >= 0x00E0)
-    {
-	if ((c >= 0x00E0 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FE))
-	{
-	    c -= 32;
-	}
-	else if (c == 0x00FF)
-	{
-	    c = 0x0178;
-	}
-	else if ((c >= 0x0100 && c < 0x0138) || (c > 0x0149 && c < 0x0178))
-	{
-	    if (c == 0x0131)
-		c = 0x0049;
-	    else if (c & 1)
-		c -= 1;
-	}
-	else if ((c >= 0x0139 && c < 0x0149) || (c > 0x0178 && c < 0x017F))
-	{
-	    if ((c & 1) == 0)
-		c = c-1;
-	}
-	else if (c == 0x017F)
-	{
-	    c = 0x0053;
-	}
-	else if (c >= 0x0200 && c <= 0x0217)
-	{
-	    if (c & 1)
-		c = c-1;
-	}
-	else if (c >= 0x0430 && c<= 0x044F)
-	{
-	    c -= 32;
-	}
-	else if ((c >= 0x0451 && c <= 0x045C) || (c >=0x045E && c<= 0x045F))
-	{
-	    c -= 80;
-	}
-	else if (c >= 0x0460 && c <= 0x047F)
-	{
-	    if (c & 1)
-		c -= 1;
-	}
-	else if (c >= 0x0561 && c < 0x0587)
-	{
-	    c -= 48;
-	}
-	else if (c >= 0xFF41 && c <= 0xFF5A)
-	{
-	    c -= 32;
-	}
-    }
-    return c;
-}
-
-
-/*******************************
- * Return !=0 if u is a Unicode alpha character.
- * (general Unicode category: Lu, Ll, Lt, Lm and Lo)
- *
- * Standards: Unicode 5.0.0
- */
-
-int isUniAlpha(dchar u)
-{
-    static dchar table[][2] =
-    [
-	[ 'A', 'Z' ],
-	[ 'a', 'z' ],
-	[ 0x00AA, 0x00AA ],
-	[ 0x00B5, 0x00B5 ],
-	[ 0x00BA, 0x00BA ],
-	[ 0x00C0, 0x00D6 ],
-	[ 0x00D8, 0x00F6 ],
-	[ 0x00F8, 0x02C1 ],
-	[ 0x02C6, 0x02D1 ],
-	[ 0x02E0, 0x02E4 ],
-	[ 0x02EE, 0x02EE ],
-	[ 0x037A, 0x037D ],
-	[ 0x0386, 0x0386 ],
-	[ 0x0388, 0x038A ],
-	[ 0x038C, 0x038C ],
-	[ 0x038E, 0x03A1 ],
-	[ 0x03A3, 0x03CE ],
-	[ 0x03D0, 0x03F5 ],
-	[ 0x03F7, 0x0481 ],
-	[ 0x048A, 0x0513 ],
-	[ 0x0531, 0x0556 ],
-	[ 0x0559, 0x0559 ],
-	[ 0x0561, 0x0587 ],
-	[ 0x05D0, 0x05EA ],
-	[ 0x05F0, 0x05F2 ],
-	[ 0x0621, 0x063A ],
-	[ 0x0640, 0x064A ],
-	[ 0x066E, 0x066F ],
-	[ 0x0671, 0x06D3 ],
-	[ 0x06D5, 0x06D5 ],
-	[ 0x06E5, 0x06E6 ],
-	[ 0x06EE, 0x06EF ],
-	[ 0x06FA, 0x06FC ],
-	[ 0x06FF, 0x06FF ],
-	[ 0x0710, 0x0710 ],
-	[ 0x0712, 0x072F ],
-	[ 0x074D, 0x076D ],
-	[ 0x0780, 0x07A5 ],
-	[ 0x07B1, 0x07B1 ],
-	[ 0x07CA, 0x07EA ],
-	[ 0x07F4, 0x07F5 ],
-	[ 0x07FA, 0x07FA ],
-	[ 0x0904, 0x0939 ],
-	[ 0x093D, 0x093D ],
-	[ 0x0950, 0x0950 ],
-	[ 0x0958, 0x0961 ],
-	[ 0x097B, 0x097F ],
-	[ 0x0985, 0x098C ],
-	[ 0x098F, 0x0990 ],
-	[ 0x0993, 0x09A8 ],
-	[ 0x09AA, 0x09B0 ],
-	[ 0x09B2, 0x09B2 ],
-	[ 0x09B6, 0x09B9 ],
-	[ 0x09BD, 0x09BD ],
-	[ 0x09CE, 0x09CE ],
-	[ 0x09DC, 0x09DD ],
-	[ 0x09DF, 0x09E1 ],
-	[ 0x09F0, 0x09F1 ],
-	[ 0x0A05, 0x0A0A ],
-	[ 0x0A0F, 0x0A10 ],
-	[ 0x0A13, 0x0A28 ],
-	[ 0x0A2A, 0x0A30 ],
-	[ 0x0A32, 0x0A33 ],
-	[ 0x0A35, 0x0A36 ],
-	[ 0x0A38, 0x0A39 ],
-	[ 0x0A59, 0x0A5C ],
-	[ 0x0A5E, 0x0A5E ],
-	[ 0x0A72, 0x0A74 ],
-	[ 0x0A85, 0x0A8D ],
-	[ 0x0A8F, 0x0A91 ],
-	[ 0x0A93, 0x0AA8 ],
-	[ 0x0AAA, 0x0AB0 ],
-	[ 0x0AB2, 0x0AB3 ],
-	[ 0x0AB5, 0x0AB9 ],
-	[ 0x0ABD, 0x0ABD ],
-	[ 0x0AD0, 0x0AD0 ],
-	[ 0x0AE0, 0x0AE1 ],
-	[ 0x0B05, 0x0B0C ],
-	[ 0x0B0F, 0x0B10 ],
-	[ 0x0B13, 0x0B28 ],
-	[ 0x0B2A, 0x0B30 ],
-	[ 0x0B32, 0x0B33 ],
-	[ 0x0B35, 0x0B39 ],
-	[ 0x0B3D, 0x0B3D ],
-	[ 0x0B5C, 0x0B5D ],
-	[ 0x0B5F, 0x0B61 ],
-	[ 0x0B71, 0x0B71 ],
-	[ 0x0B83, 0x0B83 ],
-	[ 0x0B85, 0x0B8A ],
-	[ 0x0B8E, 0x0B90 ],
-	[ 0x0B92, 0x0B95 ],
-	[ 0x0B99, 0x0B9A ],
-	[ 0x0B9C, 0x0B9C ],
-	[ 0x0B9E, 0x0B9F ],
-	[ 0x0BA3, 0x0BA4 ],
-	[ 0x0BA8, 0x0BAA ],
-	[ 0x0BAE, 0x0BB9 ],
-	[ 0x0C05, 0x0C0C ],
-	[ 0x0C0E, 0x0C10 ],
-	[ 0x0C12, 0x0C28 ],
-	[ 0x0C2A, 0x0C33 ],
-	[ 0x0C35, 0x0C39 ],
-	[ 0x0C60, 0x0C61 ],
-	[ 0x0C85, 0x0C8C ],
-	[ 0x0C8E, 0x0C90 ],
-	[ 0x0C92, 0x0CA8 ],
-	[ 0x0CAA, 0x0CB3 ],
-	[ 0x0CB5, 0x0CB9 ],
-	[ 0x0CBD, 0x0CBD ],
-	[ 0x0CDE, 0x0CDE ],
-	[ 0x0CE0, 0x0CE1 ],
-	[ 0x0D05, 0x0D0C ],
-	[ 0x0D0E, 0x0D10 ],
-	[ 0x0D12, 0x0D28 ],
-	[ 0x0D2A, 0x0D39 ],
-	[ 0x0D60, 0x0D61 ],
-	[ 0x0D85, 0x0D96 ],
-	[ 0x0D9A, 0x0DB1 ],
-	[ 0x0DB3, 0x0DBB ],
-	[ 0x0DBD, 0x0DBD ],
-	[ 0x0DC0, 0x0DC6 ],
-	[ 0x0E01, 0x0E30 ],
-	[ 0x0E32, 0x0E33 ],
-	[ 0x0E40, 0x0E46 ],
-	[ 0x0E81, 0x0E82 ],
-	[ 0x0E84, 0x0E84 ],
-	[ 0x0E87, 0x0E88 ],
-	[ 0x0E8A, 0x0E8A ],
-	[ 0x0E8D, 0x0E8D ],
-	[ 0x0E94, 0x0E97 ],
-	[ 0x0E99, 0x0E9F ],
-	[ 0x0EA1, 0x0EA3 ],
-	[ 0x0EA5, 0x0EA5 ],
-	[ 0x0EA7, 0x0EA7 ],
-	[ 0x0EAA, 0x0EAB ],
-	[ 0x0EAD, 0x0EB0 ],
-	[ 0x0EB2, 0x0EB3 ],
-	[ 0x0EBD, 0x0EBD ],
-	[ 0x0EC0, 0x0EC4 ],
-	[ 0x0EC6, 0x0EC6 ],
-	[ 0x0EDC, 0x0EDD ],
-	[ 0x0F00, 0x0F00 ],
-	[ 0x0F40, 0x0F47 ],
-	[ 0x0F49, 0x0F6A ],
-	[ 0x0F88, 0x0F8B ],
-	[ 0x1000, 0x1021 ],
-	[ 0x1023, 0x1027 ],
-	[ 0x1029, 0x102A ],
-	[ 0x1050, 0x1055 ],
-	[ 0x10A0, 0x10C5 ],
-	[ 0x10D0, 0x10FA ],
-	[ 0x10FC, 0x10FC ],
-	[ 0x1100, 0x1159 ],
-	[ 0x115F, 0x11A2 ],
-	[ 0x11A8, 0x11F9 ],
-	[ 0x1200, 0x1248 ],
-	[ 0x124A, 0x124D ],
-	[ 0x1250, 0x1256 ],
-	[ 0x1258, 0x1258 ],
-	[ 0x125A, 0x125D ],
-	[ 0x1260, 0x1288 ],
-	[ 0x128A, 0x128D ],
-	[ 0x1290, 0x12B0 ],
-	[ 0x12B2, 0x12B5 ],
-	[ 0x12B8, 0x12BE ],
-	[ 0x12C0, 0x12C0 ],
-	[ 0x12C2, 0x12C5 ],
-	[ 0x12C8, 0x12D6 ],
-	[ 0x12D8, 0x1310 ],
-	[ 0x1312, 0x1315 ],
-	[ 0x1318, 0x135A ],
-	[ 0x1380, 0x138F ],
-	[ 0x13A0, 0x13F4 ],
-	[ 0x1401, 0x166C ],
-	[ 0x166F, 0x1676 ],
-	[ 0x1681, 0x169A ],
-	[ 0x16A0, 0x16EA ],
-	[ 0x1700, 0x170C ],
-	[ 0x170E, 0x1711 ],
-	[ 0x1720, 0x1731 ],
-	[ 0x1740, 0x1751 ],
-	[ 0x1760, 0x176C ],
-	[ 0x176E, 0x1770 ],
-	[ 0x1780, 0x17B3 ],
-	[ 0x17D7, 0x17D7 ],
-	[ 0x17DC, 0x17DC ],
-	[ 0x1820, 0x1877 ],
-	[ 0x1880, 0x18A8 ],
-	[ 0x1900, 0x191C ],
-	[ 0x1950, 0x196D ],
-	[ 0x1970, 0x1974 ],
-	[ 0x1980, 0x19A9 ],
-	[ 0x19C1, 0x19C7 ],
-	[ 0x1A00, 0x1A16 ],
-	[ 0x1B05, 0x1B33 ],
-	[ 0x1B45, 0x1B4B ],
-	[ 0x1D00, 0x1DBF ],
-	[ 0x1E00, 0x1E9B ],
-	[ 0x1EA0, 0x1EF9 ],
-	[ 0x1F00, 0x1F15 ],
-	[ 0x1F18, 0x1F1D ],
-	[ 0x1F20, 0x1F45 ],
-	[ 0x1F48, 0x1F4D ],
-	[ 0x1F50, 0x1F57 ],
-	[ 0x1F59, 0x1F59 ],
-	[ 0x1F5B, 0x1F5B ],
-	[ 0x1F5D, 0x1F5D ],
-	[ 0x1F5F, 0x1F7D ],
-	[ 0x1F80, 0x1FB4 ],
-	[ 0x1FB6, 0x1FBC ],
-	[ 0x1FBE, 0x1FBE ],
-	[ 0x1FC2, 0x1FC4 ],
-	[ 0x1FC6, 0x1FCC ],
-	[ 0x1FD0, 0x1FD3 ],
-	[ 0x1FD6, 0x1FDB ],
-	[ 0x1FE0, 0x1FEC ],
-	[ 0x1FF2, 0x1FF4 ],
-	[ 0x1FF6, 0x1FFC ],
-	[ 0x2071, 0x2071 ],
-	[ 0x207F, 0x207F ],
-	[ 0x2090, 0x2094 ],
-	[ 0x2102, 0x2102 ],
-	[ 0x2107, 0x2107 ],
-	[ 0x210A, 0x2113 ],
-	[ 0x2115, 0x2115 ],
-	[ 0x2119, 0x211D ],
-	[ 0x2124, 0x2124 ],
-	[ 0x2126, 0x2126 ],
-	[ 0x2128, 0x2128 ],
-	[ 0x212A, 0x212D ],
-	[ 0x212F, 0x2139 ],
-	[ 0x213C, 0x213F ],
-	[ 0x2145, 0x2149 ],
-	[ 0x214E, 0x214E ],
-	[ 0x2183, 0x2184 ],
-	[ 0x2C00, 0x2C2E ],
-	[ 0x2C30, 0x2C5E ],
-	[ 0x2C60, 0x2C6C ],
-	[ 0x2C74, 0x2C77 ],
-	[ 0x2C80, 0x2CE4 ],
-	[ 0x2D00, 0x2D25 ],
-	[ 0x2D30, 0x2D65 ],
-	[ 0x2D6F, 0x2D6F ],
-	[ 0x2D80, 0x2D96 ],
-	[ 0x2DA0, 0x2DA6 ],
-	[ 0x2DA8, 0x2DAE ],
-	[ 0x2DB0, 0x2DB6 ],
-	[ 0x2DB8, 0x2DBE ],
-	[ 0x2DC0, 0x2DC6 ],
-	[ 0x2DC8, 0x2DCE ],
-	[ 0x2DD0, 0x2DD6 ],
-	[ 0x2DD8, 0x2DDE ],
-	[ 0x3005, 0x3006 ],
-	[ 0x3031, 0x3035 ],
-	[ 0x303B, 0x303C ],
-	[ 0x3041, 0x3096 ],
-	[ 0x309D, 0x309F ],
-	[ 0x30A1, 0x30FA ],
-	[ 0x30FC, 0x30FF ],
-	[ 0x3105, 0x312C ],
-	[ 0x3131, 0x318E ],
-	[ 0x31A0, 0x31B7 ],
-	[ 0x31F0, 0x31FF ],
-	[ 0x3400, 0x4DB5 ],
-	[ 0x4E00, 0x9FBB ],
-	[ 0xA000, 0xA48C ],
-	[ 0xA717, 0xA71A ],
-	[ 0xA800, 0xA801 ],
-	[ 0xA803, 0xA805 ],
-	[ 0xA807, 0xA80A ],
-	[ 0xA80C, 0xA822 ],
-	[ 0xA840, 0xA873 ],
-	[ 0xAC00, 0xD7A3 ],
-	[ 0xF900, 0xFA2D ],
-	[ 0xFA30, 0xFA6A ],
-	[ 0xFA70, 0xFAD9 ],
-	[ 0xFB00, 0xFB06 ],
-	[ 0xFB13, 0xFB17 ],
-	[ 0xFB1D, 0xFB1D ],
-	[ 0xFB1F, 0xFB28 ],
-	[ 0xFB2A, 0xFB36 ],
-	[ 0xFB38, 0xFB3C ],
-	[ 0xFB3E, 0xFB3E ],
-	[ 0xFB40, 0xFB41 ],
-	[ 0xFB43, 0xFB44 ],
-	[ 0xFB46, 0xFBB1 ],
-	[ 0xFBD3, 0xFD3D ],
-	[ 0xFD50, 0xFD8F ],
-	[ 0xFD92, 0xFDC7 ],
-	[ 0xFDF0, 0xFDFB ],
-	[ 0xFE70, 0xFE74 ],
-	[ 0xFE76, 0xFEFC ],
-	[ 0xFF21, 0xFF3A ],
-	[ 0xFF41, 0xFF5A ],
-	[ 0xFF66, 0xFFBE ],
-	[ 0xFFC2, 0xFFC7 ],
-	[ 0xFFCA, 0xFFCF ],
-	[ 0xFFD2, 0xFFD7 ],
-	[ 0xFFDA, 0xFFDC ],
-	[ 0x10000, 0x1000B ],
-	[ 0x1000D, 0x10026 ],
-	[ 0x10028, 0x1003A ],
-	[ 0x1003C, 0x1003D ],
-	[ 0x1003F, 0x1004D ],
-	[ 0x10050, 0x1005D ],
-	[ 0x10080, 0x100FA ],
-	[ 0x10300, 0x1031E ],
-	[ 0x10330, 0x10340 ],
-	[ 0x10342, 0x10349 ],
-	[ 0x10380, 0x1039D ],
-	[ 0x103A0, 0x103C3 ],
-	[ 0x103C8, 0x103CF ],
-	[ 0x10400, 0x1049D ],
-	[ 0x10800, 0x10805 ],
-	[ 0x10808, 0x10808 ],
-	[ 0x1080A, 0x10835 ],
-	[ 0x10837, 0x10838 ],
-	[ 0x1083C, 0x1083C ],
-	[ 0x1083F, 0x1083F ],
-	[ 0x10900, 0x10915 ],
-	[ 0x10A00, 0x10A00 ],
-	[ 0x10A10, 0x10A13 ],
-	[ 0x10A15, 0x10A17 ],
-	[ 0x10A19, 0x10A33 ],
-	[ 0x12000, 0x1236E ],
-	[ 0x1D400, 0x1D454 ],
-	[ 0x1D456, 0x1D49C ],
-	[ 0x1D49E, 0x1D49F ],
-	[ 0x1D4A2, 0x1D4A2 ],
-	[ 0x1D4A5, 0x1D4A6 ],
-	[ 0x1D4A9, 0x1D4AC ],
-	[ 0x1D4AE, 0x1D4B9 ],
-	[ 0x1D4BB, 0x1D4BB ],
-	[ 0x1D4BD, 0x1D4C3 ],
-	[ 0x1D4C5, 0x1D505 ],
-	[ 0x1D507, 0x1D50A ],
-	[ 0x1D50D, 0x1D514 ],
-	[ 0x1D516, 0x1D51C ],
-	[ 0x1D51E, 0x1D539 ],
-	[ 0x1D53B, 0x1D53E ],
-	[ 0x1D540, 0x1D544 ],
-	[ 0x1D546, 0x1D546 ],
-	[ 0x1D54A, 0x1D550 ],
-	[ 0x1D552, 0x1D6A5 ],
-	[ 0x1D6A8, 0x1D6C0 ],
-	[ 0x1D6C2, 0x1D6DA ],
-	[ 0x1D6DC, 0x1D6FA ],
-	[ 0x1D6FC, 0x1D714 ],
-	[ 0x1D716, 0x1D734 ],
-	[ 0x1D736, 0x1D74E ],
-	[ 0x1D750, 0x1D76E ],
-	[ 0x1D770, 0x1D788 ],
-	[ 0x1D78A, 0x1D7A8 ],
-	[ 0x1D7AA, 0x1D7C2 ],
-	[ 0x1D7C4, 0x1D7CB ],
-	[ 0x20000, 0x2A6D6 ],
-	[ 0x2F800, 0x2FA1D ],
-    ];
-
-    debug
-    {
-	for (int i = 0; i < table.length; i++)
-	{
-	    assert(table[i][0] <= table[i][1]);
-	    if (i < table.length - 1)
-	    {
-//		if (table[i][1] >= table[i + 1][0])
-//		    printf("table[%d][1] = x%x, table[%d][0] = x%x\n", i, table[i][1], i + 1, table[i + 1][0]);
-		assert(table[i][1] < table[i + 1][0]);
-	    }
-	}
-    }
-
-    if (u < 0xAA)
-    {
-	if (u < 'A')
-	    goto Lisnot;
-	if (u <= 'Z')
-	    goto Lis;
-	if (u < 'a')
-	    goto Lisnot;
-	if (u <= 'z')
-	    goto Lis;
-	goto Lisnot;
-    }
-
-    // Binary search
-    uint mid;
-    uint low;
-    uint high;
-
-    low = 0;
-    high = table.length - 1;
-    while (cast(int)low <= cast(int)high)
-    {
-	mid = (low + high) >> 1;
-	if (u < table[mid][0])
-	    high = mid - 1;
-	else if (u > table[mid][1])
-	    low = mid + 1;
-	else
-	    goto Lis;
-    }
-
-Lisnot:
-    debug
-    {
-	for (int i = 0; i < table.length; i++)
-	{
-	    assert(u < table[i][0] || u > table[i][1]);
-	}
-    }
-    return 0;
-
-Lis:
-    debug
-    {
-	for (int i = 0; i < table.length; i++)
-	{
-	    if (u >= table[i][0] && u <= table[i][1])
-		return 1;
-	}
-	assert(0);		// should have been in table
-    }
-    return 1;
-}
-
-unittest
-{
-    for (uint i = 0; i < 0x80; i++)
-    {
-	if (i >= 'A' && i <= 'Z')
-	    assert(isUniAlpha(i));
-	else if (i >= 'a' && i <= 'z')
-	    assert(isUniAlpha(i));
-	else
-	    assert(!isUniAlpha(i));
-    }
-}
--- a/trunk/src/std/utf.d	Fri Jan 11 15:23:38 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,975 +0,0 @@
-// utf.d
-
-/*
- *  Copyright (C) 2003-2004 by Digital Mars, www.digitalmars.com
- *  Written by Walter Bright
- *
- *  This software is provided 'as-is', without any express or implied
- *  warranty. In no event will the authors be held liable for any damages
- *  arising from the use of this software.
- *
- *  Permission is granted to anyone to use this software for any purpose,
- *  including commercial applications, and to alter it and redistribute it
- *  freely, subject to the following restrictions:
- *
- *  o  The origin of this software must not be misrepresented; you must not
- *     claim that you wrote the original software. If you use this software
- *     in a product, an acknowledgment in the product documentation would be
- *     appreciated but is not required.
- *  o  Altered source versions must be plainly marked as such, and must not
- *     be misrepresented as being the original software.
- *  o  This notice may not be removed or altered from any source
- *     distribution.
- */
-
-/********************************************
- * Encode and decode UTF-8, UTF-16 and UTF-32 strings.
- *
- * For Win32 systems, the C wchar_t type is UTF-16 and corresponds to the D
- * wchar type.
- * For linux systems, the C wchar_t type is UTF-32 and corresponds to
- * the D utf.dchar type. 
- *
- * UTF character support is restricted to (\u0000 &lt;= character &lt;= \U0010FFFF).
- *
- * See_Also:
- *	$(LINK2 http://en.wikipedia.org/wiki/Unicode, Wikipedia)<br>
- *	$(LINK http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8)<br>
- *	$(LINK http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335)
- * Macros:
- *	WIKI = Phobos/StdUtf
- */
-
-/*
-  Note: this is not the original file!
-  Modified by Aziz Köksal:
-    Only commented out deprecated class UtfError.
-*/
- 
-module std.utf;
-
-// private import std.stdio;
-
-//debug=utf;		// uncomment to turn on debugging printf's
-/+
-deprecated class UtfError : Error
-{
-    size_t idx;	// index in string of where error occurred
-
-    this(char[] s, size_t i)
-    {
-	idx = i;
-	super(s);
-    }
-}
-+/
-/**********************************
- * Exception class that is thrown upon any errors.
- */
-
-class UtfException : Exception
-{
-    size_t idx;	/// index in string of where error occurred
-
-    this(char[] s, size_t i)
-    {
-	idx = i;
-	super(s);
-    }
-}
-
-/*******************************
- * Test if c is a valid UTF-32 character.
- *
- * \uFFFE and \uFFFF are considered valid by this function,
- * as they are permitted for internal use by an application,
- * but they are not allowed for interchange by the Unicode standard.
- *
- * Returns: true if it is, false if not.
- */
-
-bool isValidDchar(dchar c)
-{
-    /* Note: FFFE and FFFF are specifically permitted by the
-     * Unicode standard for application internal use, but are not
-     * allowed for interchange.
-     * (thanks to Arcane Jill)
-     */
-
-    return c < 0xD800 ||
-	(c > 0xDFFF && c <= 0x10FFFF /*&& c != 0xFFFE && c != 0xFFFF*/);
-}
-
-unittest
-{
-    debug(utf) printf("utf.isValidDchar.unittest\n");
-    assert(isValidDchar(cast(dchar)'a') == true);
-    assert(isValidDchar(cast(dchar)0x1FFFFF) == false);
-}
-
-
-ubyte[256] UTF8stride =
-[
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-    4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
-];
-
-/**
- * stride() returns the length of a UTF-8 sequence starting at index i
- * in string s.
- * Returns:
- *	The number of bytes in the UTF-8 sequence or
- *	0xFF meaning s[i] is not the start of of UTF-8 sequence.
- */
-
-uint stride(char[] s, size_t i)
-{
-    return UTF8stride[s[i]];
-}
-
-/**
- * stride() returns the length of a UTF-16 sequence starting at index i
- * in string s.
- */
-
-uint stride(wchar[] s, size_t i)
-{   uint u = s[i];
-    return 1 + (u >= 0xD800 && u <= 0xDBFF);
-}
-
-/**
- * stride() returns the length of a UTF-32 sequence starting at index i
- * in string s.
- * Returns: The return value will always be 1.
- */
-
-uint stride(dchar[] s, size_t i)
-{
-    return 1;
-}
-
-/*******************************************
- * Given an index i into an array of characters s[],
- * and assuming that index i is at the start of a UTF character,
- * determine the number of UCS characters up to that index i.
- */
-
-size_t toUCSindex(char[] s, size_t i)
-{
-    size_t n;
-    size_t j;
-    size_t stride;
-
-    for (j = 0; j < i; j += stride)
-    {
-	stride = UTF8stride[s[j]];
-	if (stride == 0xFF)
-	    goto Lerr;
-	n++;
-    }
-    if (j > i)
-    {
-      Lerr:
-	throw new UtfException("1invalid UTF-8 sequence", j);
-    }
-    return n;
-}
-
-/** ditto */
-
-size_t toUCSindex(wchar[] s, size_t i)
-{
-    size_t n;
-    size_t j;
-
-    for (j = 0; j < i; )
-    {	uint u = s[j];
-
-	j += 1 + (u >= 0xD800 && u <= 0xDBFF);
-	n++;
-    }
-    if (j > i)
-    {
-      Lerr:
-	throw new UtfException("2invalid UTF-16 sequence", j);
-    }
-    return n;
-}
-
-/** ditto */
-
-size_t toUCSindex(dchar[] s, size_t i)
-{
-    return i;
-}
-
-/******************************************
- * Given a UCS index n into an array of characters s[], return the UTF index.
- */
-
-size_t toUTFindex(char[] s, size_t n)
-{
-    size_t i;
-
-    while (n--)
-    {
-	uint j = UTF8stride[s[i]];
-	if (j == 0xFF)
-	    throw new UtfException("3invalid UTF-8 sequence", i);
-	i += j;
-    }
-    return i;
-}
-
-/** ditto */
-
-size_t toUTFindex(wchar[] s, size_t n)
-{
-    size_t i;
-
-    while (n--)
-    {	wchar u = s[i];
-
-	i += 1 + (u >= 0xD800 && u <= 0xDBFF);
-    }
-    return i;
-}
-
-/** ditto */
-
-size_t toUTFindex(dchar[] s, size_t n)
-{
-    return n;
-}
-
-/* =================== Decode ======================= */
-
-/***************
- * Decodes and returns character starting at s[idx]. idx is advanced past the
- * decoded character. If the character is not well formed, a UtfException is
- * thrown and idx remains unchanged.
- */
-
-dchar decode(char[] s, inout size_t idx)
-    in
-    {
-	assert(idx >= 0 && idx < s.length);
-    }
-    out (result)
-    {
-	assert(isValidDchar(result));
-    }
-    body
-    {
-	size_t len = s.length;
-	dchar V;
-	size_t i = idx;
-	char u = s[i];
-
-	if (u & 0x80)
-	{   uint n;
-	    char u2;
-
-	    /* The following encodings are valid, except for the 5 and 6 byte
-	     * combinations:
-	     *	0xxxxxxx
-	     *	110xxxxx 10xxxxxx
-	     *	1110xxxx 10xxxxxx 10xxxxxx
-	     *	11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-	     *	111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-	     *	1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-	     */
-	    for (n = 1; ; n++)
-	    {
-		if (n > 4)
-		    goto Lerr;		// only do the first 4 of 6 encodings
-		if (((u << n) & 0x80) == 0)
-		{
-		    if (n == 1)
-			goto Lerr;
-		    break;
-		}
-	    }
-
-	    // Pick off (7 - n) significant bits of B from first byte of octet
-	    V = cast(dchar)(u & ((1 << (7 - n)) - 1));
-
-	    if (i + (n - 1) >= len)
-		goto Lerr;			// off end of string
-
-	    /* The following combinations are overlong, and illegal:
-	     *	1100000x (10xxxxxx)
-	     *	11100000 100xxxxx (10xxxxxx)
-	     *	11110000 1000xxxx (10xxxxxx 10xxxxxx)
-	     *	11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
-	     *	11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
-	     */
-	    u2 = s[i + 1];
-	    if ((u & 0xFE) == 0xC0 ||
-		(u == 0xE0 && (u2 & 0xE0) == 0x80) ||
-		(u == 0xF0 && (u2 & 0xF0) == 0x80) ||
-		(u == 0xF8 && (u2 & 0xF8) == 0x80) ||
-		(u == 0xFC && (u2 & 0xFC) == 0x80))
-		goto Lerr;			// overlong combination
-
-	    for (uint j = 1; j != n; j++)
-	    {
-		u = s[i + j];
-		if ((u & 0xC0) != 0x80)
-		    goto Lerr;			// trailing bytes are 10xxxxxx
-		V = (V << 6) | (u & 0x3F);
-	    }
-	    if (!isValidDchar(V))
-		goto Lerr;
-	    i += n;
-	}
-	else
-	{
-	    V = cast(dchar) u;
-	    i++;
-	}
-
-	idx = i;
-	return V;
-
-      Lerr:
-	//printf("\ndecode: idx = %d, i = %d, length = %d s = \n'%.*s'\n%x\n'%.*s'\n", idx, i, s.length, s, s[i], s[i .. length]);
-	throw new UtfException("4invalid UTF-8 sequence", i);
-    }
-
-unittest
-{   size_t i;
-    dchar c;
-
-    debug(utf) printf("utf.decode.unittest\n");
-
-    static char[] s1 = "abcd";
-    i = 0;
-    c = decode(s1, i);
-    assert(c == cast(dchar)'a');
-    assert(i == 1);
-    c = decode(s1, i);
-    assert(c == cast(dchar)'b');
-    assert(i == 2);
-
-    static char[] s2 = "\xC2\xA9";
-    i = 0;
-    c = decode(s2, i);
-    assert(c == cast(dchar)'\u00A9');
-    assert(i == 2);
-
-    static char[] s3 = "\xE2\x89\xA0";
-    i = 0;
-    c = decode(s3, i);
-    assert(c == cast(dchar)'\u2260');
-    assert(i == 3);
-
-    static char[][] s4 =
-    [	"\xE2\x89",		// too short
-	"\xC0\x8A",
-	"\xE0\x80\x8A",
-	"\xF0\x80\x80\x8A",
-	"\xF8\x80\x80\x80\x8A",
-	"\xFC\x80\x80\x80\x80\x8A",
-    ];
-
-    for (int j = 0; j < s4.length; j++)
-    {
-	try
-	{
-	    i = 0;
-	    c = decode(s4[j], i);
-	    assert(0);
-	}
-	catch (UtfException u)
-	{
-	    i = 23;
-	    delete u;
-	}
-	assert(i == 23);
-    }
-}
-
-/** ditto */
-
-dchar decode(wchar[] s, inout size_t idx)
-    in
-    {
-	assert(idx >= 0 && idx < s.length);
-    }
-    out (result)
-    {
-	assert(isValidDchar(result));
-    }
-    body
-    {
-	char[] msg;
-	dchar V;
-	size_t i = idx;
-	uint u = s[i];
-
-	if (u & ~0x7F)
-	{   if (u >= 0xD800 && u <= 0xDBFF)
-	    {   uint u2;
-
-		if (i + 1 == s.length)
-		{   msg = "surrogate UTF-16 high value past end of string";
-		    goto Lerr;
-		}
-		u2 = s[i + 1];
-		if (u2 < 0xDC00 || u2 > 0xDFFF)
-		{   msg = "surrogate UTF-16 low value out of range";
-		    goto Lerr;
-		}
-		u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00);
-		i += 2;
-	    }
-	    else if (u >= 0xDC00 && u <= 0xDFFF)
-	    {   msg = "unpaired surrogate UTF-16 value";
-		goto Lerr;
-	    }
-	    else if (u == 0xFFFE || u == 0xFFFF)
-	    {   msg = "illegal UTF-16 value";
-		goto Lerr;
-	    }
-	    else
-		i++;
-	}
-	else
-	{
-	    i++;
-	}
-
-	idx = i;
-	return cast(dchar)u;
-
-      Lerr:
-	throw new UtfException(msg, i);
-    }
-
-/** ditto */
-
-dchar decode(dchar[] s, inout size_t idx)
-    in
-    {
-	assert(idx >= 0 && idx < s.length);
-    }
-    body
-    {
-	size_t i = idx;
-	dchar c = s[i];
-
-	if (!isValidDchar(c))
-	    goto Lerr;
-	idx = i + 1;
-	return c;
-
-      Lerr:
-	throw new UtfException("5invalid UTF-32 value", i);
-    }
-
-
-/* =================== Encode ======================= */
-
-/*******************************
- * Encodes character c and appends it to array s[].
- */
-
-void encode(inout char[] s, dchar c)
-    in
-    {
-	assert(isValidDchar(c));
-    }
-    body
-    {
-	char[] r = s;
-
-	if (c <= 0x7F)
-	{
-	    r ~= cast(char) c;
-	}
-	else
-	{
-	    char[4] buf;
-	    uint L;
-
-	    if (c <= 0x7FF)
-	    {
-		buf[0] = cast(char)(0xC0 | (c >> 6));
-		buf[1] = cast(char)(0x80 | (c & 0x3F));
-		L = 2;
-	    }
-	    else if (c <= 0xFFFF)
-	    {
-		buf[0] = cast(char)(0xE0 | (c >> 12));
-		buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
-		buf[2] = cast(char)(0x80 | (c & 0x3F));
-		L = 3;
-	    }
-	    else if (c <= 0x10FFFF)
-	    {
-		buf[0] = cast(char)(0xF0 | (c >> 18));
-		buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
-		buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
-		buf[3] = cast(char)(0x80 | (c & 0x3F));
-		L = 4;
-	    }
-	    else
-	    {
-		assert(0);
-	    }
-	    r ~= buf[0 .. L];
-	}
-	s = r;
-    }
-
-unittest
-{
-    debug(utf) printf("utf.encode.unittest\n");
-
-    char[] s = "abcd";
-    encode(s, cast(dchar)'a');
-    assert(s.length == 5);
-    assert(s == "abcda");
-
-    encode(s, cast(dchar)'\u00A9');
-    assert(s.length == 7);
-    assert(s == "abcda\xC2\xA9");
-    //assert(s == "abcda\u00A9");	// BUG: fix compiler
-
-    encode(s, cast(dchar)'\u2260');
-    assert(s.length == 10);
-    assert(s == "abcda\xC2\xA9\xE2\x89\xA0");
-}
-
-/** ditto */
-
-void encode(inout wchar[] s, dchar c)
-    in
-    {
-	assert(isValidDchar(c));
-    }
-    body
-    {
-	wchar[] r = s;
-
-	if (c <= 0xFFFF)
-	{
-	    r ~= cast(wchar) c;
-	}
-	else
-	{
-	    wchar[2] buf;
-
-	    buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
-	    buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
-	    r ~= buf;
-	}
-	s = r;
-    }
-
-/** ditto */
-
-void encode(inout dchar[] s, dchar c)
-    in
-    {
-	assert(isValidDchar(c));
-    }
-    body
-    {
-	s ~= c;
-    }
-
-/* =================== Validation ======================= */
-
-/***********************************
- * Checks to see if string is well formed or not. Throws a UtfException if it is
- * not. Use to check all untrusted input for correctness.
- */
-
-void validate(char[] s)
-{
-    size_t len = s.length;
-    size_t i;
-
-    for (i = 0; i < len; )
-    {
-	decode(s, i);
-    }
-}
-
-/** ditto */
-
-void validate(wchar[] s)
-{
-    size_t len = s.length;
-    size_t i;
-
-    for (i = 0; i < len; )
-    {
-	decode(s, i);
-    }
-}
-
-/** ditto */
-
-void validate(dchar[] s)
-{
-    size_t len = s.length;
-    size_t i;
-
-    for (i = 0; i < len; )
-    {
-	decode(s, i);
-    }
-}
-
-/* =================== Conversion to UTF8 ======================= */
-
-char[] toUTF8(char[4] buf, dchar c)
-    in
-    {
-	assert(isValidDchar(c));
-    }
-    body
-    {
-	if (c <= 0x7F)
-	{
-	    buf[0] = cast(char) c;
-	    return buf[0 .. 1];
-	}
-	else if (c <= 0x7FF)
-	{
-	    buf[0] = cast(char)(0xC0 | (c >> 6));
-	    buf[1] = cast(char)(0x80 | (c & 0x3F));
-	    return buf[0 .. 2];
-	}
-	else if (c <= 0xFFFF)
-	{
-	    buf[0] = cast(char)(0xE0 | (c >> 12));
-	    buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
-	    buf[2] = cast(char)(0x80 | (c & 0x3F));
-	    return buf[0 .. 3];
-	}
-	else if (c <= 0x10FFFF)
-	{
-	    buf[0] = cast(char)(0xF0 | (c >> 18));
-	    buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
-	    buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
-	    buf[3] = cast(char)(0x80 | (c & 0x3F));
-	    return buf[0 .. 4];
-	}
-	assert(0);
-    }
-
-/*******************
- * Encodes string s into UTF-8 and returns the encoded string.
- */
-
-char[] toUTF8(char[] s)
-    in
-    {
-	validate(s);
-    }
-    body
-    {
-	return s;
-    }
-
-/** ditto */
-
-char[] toUTF8(wchar[] s)
-{
-    char[] r;
-    size_t i;
-    size_t slen = s.length;
-
-    r.length = slen;
-
-    for (i = 0; i < slen; i++)
-    {	wchar c = s[i];
-
-	if (c <= 0x7F)
-	    r[i] = cast(char)c;		// fast path for ascii
-	else
-	{
-	    r.length = i;
-	    foreach (dchar c; s[i .. slen])
-	    {
-		encode(r, c);
-	    }
-	    break;
-	}
-    }
-    return r;
-}
-
-/** ditto */
-
-char[] toUTF8(dchar[] s)
-{
-    char[] r;
-    size_t i;
-    size_t slen = s.length;
-
-    r.length = slen;
-
-    for (i = 0; i < slen; i++)
-    {	dchar c = s[i];
-
-	if (c <= 0x7F)
-	    r[i] = cast(char)c;		// fast path for ascii
-	else
-	{
-	    r.length = i;
-	    foreach (dchar d; s[i .. slen])
-	    {
-		encode(r, d);
-	    }
-	    break;
-	}
-    }
-    return r;
-}
-
-/* =================== Conversion to UTF16 ======================= */
-
-wchar[] toUTF16(wchar[2] buf, dchar c)
-    in
-    {
-	assert(isValidDchar(c));
-    }
-    body
-    {
-	if (c <= 0xFFFF)
-	{
-	    buf[0] = cast(wchar) c;
-	    return buf[0 .. 1];
-	}
-	else
-	{
-	    buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
-	    buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
-	    return buf[0 .. 2];
-	}
-    }
-
-/****************
- * Encodes string s into UTF-16 and returns the encoded string.
- * toUTF16z() is suitable for calling the 'W' functions in the Win32 API that take
- * an LPWSTR or LPCWSTR argument.
- */
-
-wchar[] toUTF16(char[] s)
-{
-    wchar[] r;
-    size_t slen = s.length;
-
-    r.length = slen;
-    r.length = 0;
-    for (size_t i = 0; i < slen; )
-    {
-	dchar c = s[i];
-	if (c <= 0x7F)
-	{
-	    i++;
-	    r ~= cast(wchar)c;
-	}
-	else
-	{
-	    c = decode(s, i);
-	    encode(r, c);
-	}
-    }
-    return r;
-}
-
-/** ditto */
-
-wchar* toUTF16z(char[] s)
-{
-    wchar[] r;
-    size_t slen = s.length;
-
-    r.length = slen + 1;
-    r.length = 0;
-    for (size_t i = 0; i < slen; )
-    {
-	dchar c = s[i];
-	if (c <= 0x7F)
-	{
-	    i++;
-	    r ~= cast(wchar)c;
-	}
-	else
-	{
-	    c = decode(s, i);
-	    encode(r, c);
-	}
-    }
-    r ~= "\000";
-    return r.ptr;
-}
-
-/** ditto */
-
-wchar[] toUTF16(wchar[] s)
-    in
-    {
-	validate(s);
-    }
-    body
-    {
-	return s;
-    }
-
-/** ditto */
-
-wchar[] toUTF16(dchar[] s)
-{
-    wchar[] r;
-    size_t slen = s.length;
-
-    r.length = slen;
-    r.length = 0;
-    for (size_t i = 0; i < slen; i++)
-    {
-	encode(r, s[i]);
-    }
-    return r;
-}
-
-/* =================== Conversion to UTF32 ======================= */
-
-/*****
- * Encodes string s into UTF-32 and returns the encoded string.
- */
-
-dchar[] toUTF32(char[] s)
-{
-    dchar[] r;
-    size_t slen = s.length;
-    size_t j = 0;
-
-    r.length = slen;		// r[] will never be longer than s[]
-    for (size_t i = 0; i < slen; )
-    {
-	dchar c = s[i];
-	if (c >= 0x80)
-	    c = decode(s, i);
-	else
-	    i++;		// c is ascii, no need for decode
-	r[j++] = c;
-    }
-    return r[0 .. j];
-}
-
-/** ditto */
-
-dchar[] toUTF32(wchar[] s)
-{
-    dchar[] r;
-    size_t slen = s.length;
-    size_t j = 0;
-
-    r.length = slen;		// r[] will never be longer than s[]
-    for (size_t i = 0; i < slen; )
-    {
-	dchar c = s[i];
-	if (c >= 0x80)
-	    c = decode(s, i);
-	else
-	    i++;		// c is ascii, no need for decode
-	r[j++] = c;
-    }
-    return r[0 .. j];
-}
-
-/** ditto */
-
-dchar[] toUTF32(dchar[] s)
-    in
-    {
-	validate(s);
-    }
-    body
-    {
-	return s;
-    }
-
-/* ================================ tests ================================== */
-
-unittest
-{
-    debug(utf) printf("utf.toUTF.unittest\n");
-
-    char[] c;
-    wchar[] w;
-    dchar[] d;
-
-    c = "hello";
-    w = toUTF16(c);
-    assert(w == "hello");
-    d = toUTF32(c);
-    assert(d == "hello");
-
-    c = toUTF8(w);
-    assert(c == "hello");
-    d = toUTF32(w);
-    assert(d == "hello");
-
-    c = toUTF8(d);
-    assert(c == "hello");
-    w = toUTF16(d);
-    assert(w == "hello");
-
-
-    c = "hel\u1234o";
-    w = toUTF16(c);
-    assert(w == "hel\u1234o");
-    d = toUTF32(c);
-    assert(d == "hel\u1234o");
-
-    c = toUTF8(w);
-    assert(c == "hel\u1234o");
-    d = toUTF32(w);
-    assert(d == "hel\u1234o");
-
-    c = toUTF8(d);
-    assert(c == "hel\u1234o");
-    w = toUTF16(d);
-    assert(w == "hel\u1234o");
-
-
-    c = "he\U0010AAAAllo";
-    w = toUTF16(c);
-    //foreach (wchar c; w) printf("c = x%x\n", c);
-    //foreach (wchar c; cast(wchar[])"he\U0010AAAAllo") printf("c = x%x\n", c);
-    assert(w == "he\U0010AAAAllo");
-    d = toUTF32(c);
-    assert(d == "he\U0010AAAAllo");
-
-    c = toUTF8(w);
-    assert(c == "he\U0010AAAAllo");
-    d = toUTF32(w);
-    assert(d == "he\U0010AAAAllo");
-
-    c = toUTF8(d);
-    assert(c == "he\U0010AAAAllo");
-    w = toUTF16(d);
-    assert(w == "he\U0010AAAAllo");
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/src/util/metastrings.d	Fri Jan 11 20:03:46 2008 +0100
@@ -0,0 +1,225 @@
+
+// Written in the D programming language.
+
+/**
+ * Templates with which to do compile time manipulation of strings.
+ *
+ * Macros:
+ *	WIKI = Phobos/StdMetastrings
+ * Copyright:
+ *	Public Domain
+ */
+
+/*
+ * Authors:
+ *	Walter Bright, Digital Mars, www.digitalmars.com
+ *	Don Clugston
+ */
+
+/*
+  Note: this is not the original file!
+  Modified by Aziz Köksal:
+    Only changed some types from string to char[]
+*/
+
+module util.metastrings;
+
+/**
+ * Formats constants into a string at compile time.
+ * Analogous to std.string.format().
+ * Parameters:
+ *	A =	tuple of constants, which can be strings,
+ *		characters, or integral values.
+ * Formats:
+ *	The formats supported are %s for strings, and %%
+ *	for the % character.
+ * Example:
+ * ---
+import std.metastrings;
+import std.stdio;
+
+void main()
+{
+  string s = Format!("Arg %s = %s", "foo", 27);
+  writefln(s); // "Arg foo = 27"
+}
+ * ---
+ */
+
+template Format(A...)
+{
+    static if (A.length == 0)
+	const char[] Format = "";
+    else static if (is(typeof(A[0]) : char[]))
+	const char[] Format = FormatString!(A[0], A[1..$]);
+	//const char[] Format = FormatString!(A[0]);
+    else
+	const char[] Format = ToString!(A[0]) ~ Format!(A[1..$]);
+}
+
+template FormatString(char[] F, A...)
+{
+    static if (F.length == 0)
+	const char[] FormatString = Format!(A);
+    else static if (F.length == 1)
+	const char[] FormatString = F[0] ~ Format!(A);
+    else static if (F[0..2] == "%s")
+	const char[] FormatString = ToString!(A[0]) ~ FormatString!(F[2..$],A[1..$]);
+    else static if (F[0..2] == "%%")
+	const char[] FormatString = "%" ~ FormatString!(F[2..$],A);
+    else static if (F[0] == '%')
+	static assert(0, "unrecognized format %" ~ F[1]);
+    else
+	const char[] FormatString = F[0] ~ FormatString!(F[1..$],A);
+}
+
+/**
+ * Convert constant argument to a string.
+ */
+
+template ToString(ulong U)
+{
+    static if (U < 10)
+	const char[] ToString = "" ~ cast(char)(U + '0');
+    else
+	const char[] ToString = ToString!(U / 10) ~ ToString!(U % 10);
+}
+
+/// ditto
+template ToString(long I)
+{
+    static if (I < 0)
+	const char[] ToString = "-" ~ ToString!(cast(ulong)(-I));
+    else
+	const char[] ToString = ToString!(cast(ulong)I);
+}
+
+static assert(ToString!(0x100000000) == "4294967296");
+
+/// ditto
+template ToString(uint U)
+{
+    const char[] ToString = ToString!(cast(ulong)U);
+}
+
+/// ditto
+template ToString(int I)
+{
+    const char[] ToString = ToString!(cast(long)I);
+}
+
+/// ditto
+template ToString(ushort U)
+{
+    const char[] ToString = ToString!(cast(ulong)U);
+}
+
+/// ditto
+template ToString(short I)
+{
+    const char[] ToString = ToString!(cast(long)I);
+}
+
+/// ditto
+template ToString(ubyte U)
+{
+    const char[] ToString = ToString!(cast(ulong)U);
+}
+
+/// ditto
+template ToString(byte I)
+{
+    const char[] ToString = ToString!(cast(long)I);
+}
+
+/// ditto
+template ToString(bool B)
+{
+    const char[] ToString = B ? "true" : "false";
+}
+
+/// ditto
+template ToString(char[] S)
+{
+    const char[] ToString = S;
+}
+
+/// ditto
+template ToString(char C)
+{
+    const char[] ToString = "" ~ C;
+}
+
+unittest
+{
+    char[] s = Format!("hel%slo", "world", -138, 'c', true);
+    assert(s == "helworldlo-138ctrue");
+}
+
+
+/********
+ * Parse unsigned integer literal from the start of string s.
+ * returns:
+ *	.value = the integer literal as a string,
+ *	.rest = the string following the integer literal
+ * Otherwise:
+ *	.value = null,
+ *	.rest = s
+ */
+
+template ParseUinteger(char[] s)
+{
+    static if (s.length == 0)
+    {	const char[] value = "";
+	const char[] rest = "";
+    }
+    else static if (s[0] >= '0' && s[0] <= '9')
+    {	const char[] value = s[0] ~ ParseUinteger!(s[1..$]).value;
+	const char[] rest = ParseUinteger!(s[1..$]).rest;
+    }
+    else
+    {	const char[] value = "";
+	const char[] rest = s;
+    }
+}
+
+/********
+ * Parse integer literal optionally preceded by '-'
+ * from the start of string s.
+ * returns:
+ *	.value = the integer literal as a string,
+ *	.rest = the string following the integer literal
+ * Otherwise:
+ *	.value = null,
+ *	.rest = s
+ */
+
+template ParseInteger(char[] s)
+{
+    static if (s.length == 0)
+    {	const char[] value = "";
+	const char[] rest = "";
+    }
+    else static if (s[0] >= '0' && s[0] <= '9')
+    {	const char[] value = s[0] ~ ParseUinteger!(s[1..$]).value;
+	const char[] rest = ParseUinteger!(s[1..$]).rest;
+    }
+    else static if (s.length >= 2 &&
+		s[0] == '-' && s[1] >= '0' && s[1] <= '9')
+    {	const char[] value = s[0..2] ~ ParseUinteger!(s[2..$]).value;
+	const char[] rest = ParseUinteger!(s[2..$]).rest;
+    }
+    else
+    {	const char[] value = "";
+	const char[] rest = s;
+    }
+}
+
+unittest
+{
+    assert(ParseUinteger!("1234abc").value == "1234");
+    assert(ParseUinteger!("1234abc").rest == "abc");
+    assert(ParseInteger!("-1234abc").value == "-1234");
+    assert(ParseInteger!("-1234abc").rest == "abc");
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/src/util/uni.d	Fri Jan 11 20:03:46 2008 +0100
@@ -0,0 +1,630 @@
+
+// Written in the D programming language.
+
+/*
+ * Placed into the Public Domain.
+ * Digital Mars, www.digitalmars.com
+ * Written by Walter Bright
+ */
+
+/**
+ * Simple Unicode character classification functions.
+ * For ASCII classification, see $(LINK2 std_ctype.html, std.ctype).
+ * Macros:
+ *	WIKI=Phobos/StdUni
+ * References:
+ *	$(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table),
+ *	$(LINK2 http://en.wikipedia.org/wiki/Unicode, Wikipedia),
+ *	$(LINK2 http://www.unicode.org, The Unicode Consortium)
+ * Trademarks:
+ *	Unicode(tm) is a trademark of Unicode, Inc.
+ */
+
+
+module util.uni;
+
+/**
+ * Returns !=0 if c is a Unicode lower case character.
+ */
+int isUniLower(dchar c)
+{
+    if (c <= 0x7F)
+	return (c >= 'a' && c <= 'z');
+
+    return isUniAlpha(c) && c == toUniLower(c);
+}
+
+/**
+ * Returns !=0 if c is a Unicode upper case character.
+ */
+int isUniUpper(dchar c)
+{
+    if (c <= 0x7F)
+	return (c >= 'A' && c <= 'Z');
+
+    return isUniAlpha(c) && c == toUniUpper(c);
+}
+
+/**
+ * If c is a Unicode upper case character, return the lower case
+ * equivalent, otherwise return c.
+ */
+dchar toUniLower(dchar c)
+{
+    if (c >= 'A' && c <= 'Z')
+    {
+        c += 32;
+    }
+    else if (c >= 0x00C0)
+    {
+	if ((c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c<=0x00DE))
+	{
+	    c += 32;
+	}
+	else if ((c >= 0x0100 && c < 0x0138) || (c > 0x0149 && c < 0x0178))
+	{
+	    if (c == 0x0130)
+		c = 0x0069;
+	    else if ((c & 1) == 0)
+		c += 1;
+	}
+	else if (c == 0x0178)
+	{
+	    c = 0x00FF;
+	}
+	else if ((c >= 0x0139 && c < 0x0149) || (c > 0x0178 && c < 0x017F))
+	{
+	    if (c & 1)
+		c += 1;
+	}
+	else if (c >= 0x0200 && c <= 0x0217)
+	{
+	    if ((c & 1) == 0)
+		c += 1;
+	}
+	else if ((c >= 0x0401 && c <= 0x040C) || (c>= 0x040E && c <= 0x040F))
+	{
+	    c += 80;
+	}
+	else if (c >= 0x0410  && c <= 0x042F)
+	{
+	    c += 32;
+	}
+	else if (c >= 0x0460 && c <= 0x047F)
+	{
+	    if ((c & 1) == 0)
+		c += 1;
+	}
+	else if (c >= 0x0531 && c <= 0x0556)
+	{
+	    c += 48;
+	}
+	else if (c >= 0x10A0 && c <= 0x10C5)
+	{
+	    c += 48;
+	}
+	else if (c >= 0xFF21 && c <= 0xFF3A)
+	{
+	    c += 32;
+	}
+    }
+    return c;
+}
+
+/**
+ * If c is a Unicode lower case character, return the upper case
+ * equivalent, otherwise return c.
+ */
+dchar toUniUpper(dchar c)
+{
+    if (c >= 'a' && c <= 'z')
+    {
+	c -= 32;
+    }
+    else if (c >= 0x00E0)
+    {
+	if ((c >= 0x00E0 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FE))
+	{
+	    c -= 32;
+	}
+	else if (c == 0x00FF)
+	{
+	    c = 0x0178;
+	}
+	else if ((c >= 0x0100 && c < 0x0138) || (c > 0x0149 && c < 0x0178))
+	{
+	    if (c == 0x0131)
+		c = 0x0049;
+	    else if (c & 1)
+		c -= 1;
+	}
+	else if ((c >= 0x0139 && c < 0x0149) || (c > 0x0178 && c < 0x017F))
+	{
+	    if ((c & 1) == 0)
+		c = c-1;
+	}
+	else if (c == 0x017F)
+	{
+	    c = 0x0053;
+	}
+	else if (c >= 0x0200 && c <= 0x0217)
+	{
+	    if (c & 1)
+		c = c-1;
+	}
+	else if (c >= 0x0430 && c<= 0x044F)
+	{
+	    c -= 32;
+	}
+	else if ((c >= 0x0451 && c <= 0x045C) || (c >=0x045E && c<= 0x045F))
+	{
+	    c -= 80;
+	}
+	else if (c >= 0x0460 && c <= 0x047F)
+	{
+	    if (c & 1)
+		c -= 1;
+	}
+	else if (c >= 0x0561 && c < 0x0587)
+	{
+	    c -= 48;
+	}
+	else if (c >= 0xFF41 && c <= 0xFF5A)
+	{
+	    c -= 32;
+	}
+    }
+    return c;
+}
+
+
+/*******************************
+ * Return !=0 if u is a Unicode alpha character.
+ * (general Unicode category: Lu, Ll, Lt, Lm and Lo)
+ *
+ * Standards: Unicode 5.0.0
+ */
+
+int isUniAlpha(dchar u)
+{
+    static dchar table[][2] =
+    [
+	[ 'A', 'Z' ],
+	[ 'a', 'z' ],
+	[ 0x00AA, 0x00AA ],
+	[ 0x00B5, 0x00B5 ],
+	[ 0x00BA, 0x00BA ],
+	[ 0x00C0, 0x00D6 ],
+	[ 0x00D8, 0x00F6 ],
+	[ 0x00F8, 0x02C1 ],
+	[ 0x02C6, 0x02D1 ],
+	[ 0x02E0, 0x02E4 ],
+	[ 0x02EE, 0x02EE ],
+	[ 0x037A, 0x037D ],
+	[ 0x0386, 0x0386 ],
+	[ 0x0388, 0x038A ],
+	[ 0x038C, 0x038C ],
+	[ 0x038E, 0x03A1 ],
+	[ 0x03A3, 0x03CE ],
+	[ 0x03D0, 0x03F5 ],
+	[ 0x03F7, 0x0481 ],
+	[ 0x048A, 0x0513 ],
+	[ 0x0531, 0x0556 ],
+	[ 0x0559, 0x0559 ],
+	[ 0x0561, 0x0587 ],
+	[ 0x05D0, 0x05EA ],
+	[ 0x05F0, 0x05F2 ],
+	[ 0x0621, 0x063A ],
+	[ 0x0640, 0x064A ],
+	[ 0x066E, 0x066F ],
+	[ 0x0671, 0x06D3 ],
+	[ 0x06D5, 0x06D5 ],
+	[ 0x06E5, 0x06E6 ],
+	[ 0x06EE, 0x06EF ],
+	[ 0x06FA, 0x06FC ],
+	[ 0x06FF, 0x06FF ],
+	[ 0x0710, 0x0710 ],
+	[ 0x0712, 0x072F ],
+	[ 0x074D, 0x076D ],
+	[ 0x0780, 0x07A5 ],
+	[ 0x07B1, 0x07B1 ],
+	[ 0x07CA, 0x07EA ],
+	[ 0x07F4, 0x07F5 ],
+	[ 0x07FA, 0x07FA ],
+	[ 0x0904, 0x0939 ],
+	[ 0x093D, 0x093D ],
+	[ 0x0950, 0x0950 ],
+	[ 0x0958, 0x0961 ],
+	[ 0x097B, 0x097F ],
+	[ 0x0985, 0x098C ],
+	[ 0x098F, 0x0990 ],
+	[ 0x0993, 0x09A8 ],
+	[ 0x09AA, 0x09B0 ],
+	[ 0x09B2, 0x09B2 ],
+	[ 0x09B6, 0x09B9 ],
+	[ 0x09BD, 0x09BD ],
+	[ 0x09CE, 0x09CE ],
+	[ 0x09DC, 0x09DD ],
+	[ 0x09DF, 0x09E1 ],
+	[ 0x09F0, 0x09F1 ],
+	[ 0x0A05, 0x0A0A ],
+	[ 0x0A0F, 0x0A10 ],
+	[ 0x0A13, 0x0A28 ],
+	[ 0x0A2A, 0x0A30 ],
+	[ 0x0A32, 0x0A33 ],
+	[ 0x0A35, 0x0A36 ],
+	[ 0x0A38, 0x0A39 ],
+	[ 0x0A59, 0x0A5C ],
+	[ 0x0A5E, 0x0A5E ],
+	[ 0x0A72, 0x0A74 ],
+	[ 0x0A85, 0x0A8D ],
+	[ 0x0A8F, 0x0A91 ],
+	[ 0x0A93, 0x0AA8 ],
+	[ 0x0AAA, 0x0AB0 ],
+	[ 0x0AB2, 0x0AB3 ],
+	[ 0x0AB5, 0x0AB9 ],
+	[ 0x0ABD, 0x0ABD ],
+	[ 0x0AD0, 0x0AD0 ],
+	[ 0x0AE0, 0x0AE1 ],
+	[ 0x0B05, 0x0B0C ],
+	[ 0x0B0F, 0x0B10 ],
+	[ 0x0B13, 0x0B28 ],
+	[ 0x0B2A, 0x0B30 ],
+	[ 0x0B32, 0x0B33 ],
+	[ 0x0B35, 0x0B39 ],
+	[ 0x0B3D, 0x0B3D ],
+	[ 0x0B5C, 0x0B5D ],
+	[ 0x0B5F, 0x0B61 ],
+	[ 0x0B71, 0x0B71 ],
+	[ 0x0B83, 0x0B83 ],
+	[ 0x0B85, 0x0B8A ],
+	[ 0x0B8E, 0x0B90 ],
+	[ 0x0B92, 0x0B95 ],
+	[ 0x0B99, 0x0B9A ],
+	[ 0x0B9C, 0x0B9C ],
+	[ 0x0B9E, 0x0B9F ],
+	[ 0x0BA3, 0x0BA4 ],
+	[ 0x0BA8, 0x0BAA ],
+	[ 0x0BAE, 0x0BB9 ],
+	[ 0x0C05, 0x0C0C ],
+	[ 0x0C0E, 0x0C10 ],
+	[ 0x0C12, 0x0C28 ],
+	[ 0x0C2A, 0x0C33 ],
+	[ 0x0C35, 0x0C39 ],
+	[ 0x0C60, 0x0C61 ],
+	[ 0x0C85, 0x0C8C ],
+	[ 0x0C8E, 0x0C90 ],
+	[ 0x0C92, 0x0CA8 ],
+	[ 0x0CAA, 0x0CB3 ],
+	[ 0x0CB5, 0x0CB9 ],
+	[ 0x0CBD, 0x0CBD ],
+	[ 0x0CDE, 0x0CDE ],
+	[ 0x0CE0, 0x0CE1 ],
+	[ 0x0D05, 0x0D0C ],
+	[ 0x0D0E, 0x0D10 ],
+	[ 0x0D12, 0x0D28 ],
+	[ 0x0D2A, 0x0D39 ],
+	[ 0x0D60, 0x0D61 ],
+	[ 0x0D85, 0x0D96 ],
+	[ 0x0D9A, 0x0DB1 ],
+	[ 0x0DB3, 0x0DBB ],
+	[ 0x0DBD, 0x0DBD ],
+	[ 0x0DC0, 0x0DC6 ],
+	[ 0x0E01, 0x0E30 ],
+	[ 0x0E32, 0x0E33 ],
+	[ 0x0E40, 0x0E46 ],
+	[ 0x0E81, 0x0E82 ],
+	[ 0x0E84, 0x0E84 ],
+	[ 0x0E87, 0x0E88 ],
+	[ 0x0E8A, 0x0E8A ],
+	[ 0x0E8D, 0x0E8D ],
+	[ 0x0E94, 0x0E97 ],
+	[ 0x0E99, 0x0E9F ],
+	[ 0x0EA1, 0x0EA3 ],
+	[ 0x0EA5, 0x0EA5 ],
+	[ 0x0EA7, 0x0EA7 ],
+	[ 0x0EAA, 0x0EAB ],
+	[ 0x0EAD, 0x0EB0 ],
+	[ 0x0EB2, 0x0EB3 ],
+	[ 0x0EBD, 0x0EBD ],
+	[ 0x0EC0, 0x0EC4 ],
+	[ 0x0EC6, 0x0EC6 ],
+	[ 0x0EDC, 0x0EDD ],
+	[ 0x0F00, 0x0F00 ],
+	[ 0x0F40, 0x0F47 ],
+	[ 0x0F49, 0x0F6A ],
+	[ 0x0F88, 0x0F8B ],
+	[ 0x1000, 0x1021 ],
+	[ 0x1023, 0x1027 ],
+	[ 0x1029, 0x102A ],
+	[ 0x1050, 0x1055 ],
+	[ 0x10A0, 0x10C5 ],
+	[ 0x10D0, 0x10FA ],
+	[ 0x10FC, 0x10FC ],
+	[ 0x1100, 0x1159 ],
+	[ 0x115F, 0x11A2 ],
+	[ 0x11A8, 0x11F9 ],
+	[ 0x1200, 0x1248 ],
+	[ 0x124A, 0x124D ],
+	[ 0x1250, 0x1256 ],
+	[ 0x1258, 0x1258 ],
+	[ 0x125A, 0x125D ],
+	[ 0x1260, 0x1288 ],
+	[ 0x128A, 0x128D ],
+	[ 0x1290, 0x12B0 ],
+	[ 0x12B2, 0x12B5 ],
+	[ 0x12B8, 0x12BE ],
+	[ 0x12C0, 0x12C0 ],
+	[ 0x12C2, 0x12C5 ],
+	[ 0x12C8, 0x12D6 ],
+	[ 0x12D8, 0x1310 ],
+	[ 0x1312, 0x1315 ],
+	[ 0x1318, 0x135A ],
+	[ 0x1380, 0x138F ],
+	[ 0x13A0, 0x13F4 ],
+	[ 0x1401, 0x166C ],
+	[ 0x166F, 0x1676 ],
+	[ 0x1681, 0x169A ],
+	[ 0x16A0, 0x16EA ],
+	[ 0x1700, 0x170C ],
+	[ 0x170E, 0x1711 ],
+	[ 0x1720, 0x1731 ],
+	[ 0x1740, 0x1751 ],
+	[ 0x1760, 0x176C ],
+	[ 0x176E, 0x1770 ],
+	[ 0x1780, 0x17B3 ],
+	[ 0x17D7, 0x17D7 ],
+	[ 0x17DC, 0x17DC ],
+	[ 0x1820, 0x1877 ],
+	[ 0x1880, 0x18A8 ],
+	[ 0x1900, 0x191C ],
+	[ 0x1950, 0x196D ],
+	[ 0x1970, 0x1974 ],
+	[ 0x1980, 0x19A9 ],
+	[ 0x19C1, 0x19C7 ],
+	[ 0x1A00, 0x1A16 ],
+	[ 0x1B05, 0x1B33 ],
+	[ 0x1B45, 0x1B4B ],
+	[ 0x1D00, 0x1DBF ],
+	[ 0x1E00, 0x1E9B ],
+	[ 0x1EA0, 0x1EF9 ],
+	[ 0x1F00, 0x1F15 ],
+	[ 0x1F18, 0x1F1D ],
+	[ 0x1F20, 0x1F45 ],
+	[ 0x1F48, 0x1F4D ],
+	[ 0x1F50, 0x1F57 ],
+	[ 0x1F59, 0x1F59 ],
+	[ 0x1F5B, 0x1F5B ],
+	[ 0x1F5D, 0x1F5D ],
+	[ 0x1F5F, 0x1F7D ],
+	[ 0x1F80, 0x1FB4 ],
+	[ 0x1FB6, 0x1FBC ],
+	[ 0x1FBE, 0x1FBE ],
+	[ 0x1FC2, 0x1FC4 ],
+	[ 0x1FC6, 0x1FCC ],
+	[ 0x1FD0, 0x1FD3 ],
+	[ 0x1FD6, 0x1FDB ],
+	[ 0x1FE0, 0x1FEC ],
+	[ 0x1FF2, 0x1FF4 ],
+	[ 0x1FF6, 0x1FFC ],
+	[ 0x2071, 0x2071 ],
+	[ 0x207F, 0x207F ],
+	[ 0x2090, 0x2094 ],
+	[ 0x2102, 0x2102 ],
+	[ 0x2107, 0x2107 ],
+	[ 0x210A, 0x2113 ],
+	[ 0x2115, 0x2115 ],
+	[ 0x2119, 0x211D ],
+	[ 0x2124, 0x2124 ],
+	[ 0x2126, 0x2126 ],
+	[ 0x2128, 0x2128 ],
+	[ 0x212A, 0x212D ],
+	[ 0x212F, 0x2139 ],
+	[ 0x213C, 0x213F ],
+	[ 0x2145, 0x2149 ],
+	[ 0x214E, 0x214E ],
+	[ 0x2183, 0x2184 ],
+	[ 0x2C00, 0x2C2E ],
+	[ 0x2C30, 0x2C5E ],
+	[ 0x2C60, 0x2C6C ],
+	[ 0x2C74, 0x2C77 ],
+	[ 0x2C80, 0x2CE4 ],
+	[ 0x2D00, 0x2D25 ],
+	[ 0x2D30, 0x2D65 ],
+	[ 0x2D6F, 0x2D6F ],
+	[ 0x2D80, 0x2D96 ],
+	[ 0x2DA0, 0x2DA6 ],
+	[ 0x2DA8, 0x2DAE ],
+	[ 0x2DB0, 0x2DB6 ],
+	[ 0x2DB8, 0x2DBE ],
+	[ 0x2DC0, 0x2DC6 ],
+	[ 0x2DC8, 0x2DCE ],
+	[ 0x2DD0, 0x2DD6 ],
+	[ 0x2DD8, 0x2DDE ],
+	[ 0x3005, 0x3006 ],
+	[ 0x3031, 0x3035 ],
+	[ 0x303B, 0x303C ],
+	[ 0x3041, 0x3096 ],
+	[ 0x309D, 0x309F ],
+	[ 0x30A1, 0x30FA ],
+	[ 0x30FC, 0x30FF ],
+	[ 0x3105, 0x312C ],
+	[ 0x3131, 0x318E ],
+	[ 0x31A0, 0x31B7 ],
+	[ 0x31F0, 0x31FF ],
+	[ 0x3400, 0x4DB5 ],
+	[ 0x4E00, 0x9FBB ],
+	[ 0xA000, 0xA48C ],
+	[ 0xA717, 0xA71A ],
+	[ 0xA800, 0xA801 ],
+	[ 0xA803, 0xA805 ],
+	[ 0xA807, 0xA80A ],
+	[ 0xA80C, 0xA822 ],
+	[ 0xA840, 0xA873 ],
+	[ 0xAC00, 0xD7A3 ],
+	[ 0xF900, 0xFA2D ],
+	[ 0xFA30, 0xFA6A ],
+	[ 0xFA70, 0xFAD9 ],
+	[ 0xFB00, 0xFB06 ],
+	[ 0xFB13, 0xFB17 ],
+	[ 0xFB1D, 0xFB1D ],
+	[ 0xFB1F, 0xFB28 ],
+	[ 0xFB2A, 0xFB36 ],
+	[ 0xFB38, 0xFB3C ],
+	[ 0xFB3E, 0xFB3E ],
+	[ 0xFB40, 0xFB41 ],
+	[ 0xFB43, 0xFB44 ],
+	[ 0xFB46, 0xFBB1 ],
+	[ 0xFBD3, 0xFD3D ],
+	[ 0xFD50, 0xFD8F ],
+	[ 0xFD92, 0xFDC7 ],
+	[ 0xFDF0, 0xFDFB ],
+	[ 0xFE70, 0xFE74 ],
+	[ 0xFE76, 0xFEFC ],
+	[ 0xFF21, 0xFF3A ],
+	[ 0xFF41, 0xFF5A ],
+	[ 0xFF66, 0xFFBE ],
+	[ 0xFFC2, 0xFFC7 ],
+	[ 0xFFCA, 0xFFCF ],
+	[ 0xFFD2, 0xFFD7 ],
+	[ 0xFFDA, 0xFFDC ],
+	[ 0x10000, 0x1000B ],
+	[ 0x1000D, 0x10026 ],
+	[ 0x10028, 0x1003A ],
+	[ 0x1003C, 0x1003D ],
+	[ 0x1003F, 0x1004D ],
+	[ 0x10050, 0x1005D ],
+	[ 0x10080, 0x100FA ],
+	[ 0x10300, 0x1031E ],
+	[ 0x10330, 0x10340 ],
+	[ 0x10342, 0x10349 ],
+	[ 0x10380, 0x1039D ],
+	[ 0x103A0, 0x103C3 ],
+	[ 0x103C8, 0x103CF ],
+	[ 0x10400, 0x1049D ],
+	[ 0x10800, 0x10805 ],
+	[ 0x10808, 0x10808 ],
+	[ 0x1080A, 0x10835 ],
+	[ 0x10837, 0x10838 ],
+	[ 0x1083C, 0x1083C ],
+	[ 0x1083F, 0x1083F ],
+	[ 0x10900, 0x10915 ],
+	[ 0x10A00, 0x10A00 ],
+	[ 0x10A10, 0x10A13 ],
+	[ 0x10A15, 0x10A17 ],
+	[ 0x10A19, 0x10A33 ],
+	[ 0x12000, 0x1236E ],
+	[ 0x1D400, 0x1D454 ],
+	[ 0x1D456, 0x1D49C ],
+	[ 0x1D49E, 0x1D49F ],
+	[ 0x1D4A2, 0x1D4A2 ],
+	[ 0x1D4A5, 0x1D4A6 ],
+	[ 0x1D4A9, 0x1D4AC ],
+	[ 0x1D4AE, 0x1D4B9 ],
+	[ 0x1D4BB, 0x1D4BB ],
+	[ 0x1D4BD, 0x1D4C3 ],
+	[ 0x1D4C5, 0x1D505 ],
+	[ 0x1D507, 0x1D50A ],
+	[ 0x1D50D, 0x1D514 ],
+	[ 0x1D516, 0x1D51C ],
+	[ 0x1D51E, 0x1D539 ],
+	[ 0x1D53B, 0x1D53E ],
+	[ 0x1D540, 0x1D544 ],
+	[ 0x1D546, 0x1D546 ],
+	[ 0x1D54A, 0x1D550 ],
+	[ 0x1D552, 0x1D6A5 ],
+	[ 0x1D6A8, 0x1D6C0 ],
+	[ 0x1D6C2, 0x1D6DA ],
+	[ 0x1D6DC, 0x1D6FA ],
+	[ 0x1D6FC, 0x1D714 ],
+	[ 0x1D716, 0x1D734 ],
+	[ 0x1D736, 0x1D74E ],
+	[ 0x1D750, 0x1D76E ],
+	[ 0x1D770, 0x1D788 ],
+	[ 0x1D78A, 0x1D7A8 ],
+	[ 0x1D7AA, 0x1D7C2 ],
+	[ 0x1D7C4, 0x1D7CB ],
+	[ 0x20000, 0x2A6D6 ],
+	[ 0x2F800, 0x2FA1D ],
+    ];
+
+    debug
+    {
+	for (int i = 0; i < table.length; i++)
+	{
+	    assert(table[i][0] <= table[i][1]);
+	    if (i < table.length - 1)
+	    {
+//		if (table[i][1] >= table[i + 1][0])
+//		    printf("table[%d][1] = x%x, table[%d][0] = x%x\n", i, table[i][1], i + 1, table[i + 1][0]);
+		assert(table[i][1] < table[i + 1][0]);
+	    }
+	}
+    }
+
+    if (u < 0xAA)
+    {
+	if (u < 'A')
+	    goto Lisnot;
+	if (u <= 'Z')
+	    goto Lis;
+	if (u < 'a')
+	    goto Lisnot;
+	if (u <= 'z')
+	    goto Lis;
+	goto Lisnot;
+    }
+
+    // Binary search
+    uint mid;
+    uint low;
+    uint high;
+
+    low = 0;
+    high = table.length - 1;
+    while (cast(int)low <= cast(int)high)
+    {
+	mid = (low + high) >> 1;
+	if (u < table[mid][0])
+	    high = mid - 1;
+	else if (u > table[mid][1])
+	    low = mid + 1;
+	else
+	    goto Lis;
+    }
+
+Lisnot:
+    debug
+    {
+	for (int i = 0; i < table.length; i++)
+	{
+	    assert(u < table[i][0] || u > table[i][1]);
+	}
+    }
+    return 0;
+
+Lis:
+    debug
+    {
+	for (int i = 0; i < table.length; i++)
+	{
+	    if (u >= table[i][0] && u <= table[i][1])
+		return 1;
+	}
+	assert(0);		// should have been in table
+    }
+    return 1;
+}
+
+unittest
+{
+    for (uint i = 0; i < 0x80; i++)
+    {
+	if (i >= 'A' && i <= 'Z')
+	    assert(isUniAlpha(i));
+	else if (i >= 'a' && i <= 'z')
+	    assert(isUniAlpha(i));
+	else
+	    assert(!isUniAlpha(i));
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/src/util/utf.d	Fri Jan 11 20:03:46 2008 +0100
@@ -0,0 +1,975 @@
+// utf.d
+
+/*
+ *  Copyright (C) 2003-2004 by Digital Mars, www.digitalmars.com
+ *  Written by Walter Bright
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty. In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  o  The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  o  Altered source versions must be plainly marked as such, and must not
+ *     be misrepresented as being the original software.
+ *  o  This notice may not be removed or altered from any source
+ *     distribution.
+ */
+
+/********************************************
+ * Encode and decode UTF-8, UTF-16 and UTF-32 strings.
+ *
+ * For Win32 systems, the C wchar_t type is UTF-16 and corresponds to the D
+ * wchar type.
+ * For linux systems, the C wchar_t type is UTF-32 and corresponds to
+ * the D utf.dchar type. 
+ *
+ * UTF character support is restricted to (\u0000 &lt;= character &lt;= \U0010FFFF).
+ *
+ * See_Also:
+ *	$(LINK2 http://en.wikipedia.org/wiki/Unicode, Wikipedia)<br>
+ *	$(LINK http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8)<br>
+ *	$(LINK http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335)
+ * Macros:
+ *	WIKI = Phobos/StdUtf
+ */
+
+/*
+  Note: this is not the original file!
+  Modified by Aziz Köksal:
+    Only commented out deprecated class UtfError.
+*/
+ 
+module util.utf;
+
+// private import std.stdio;
+
+//debug=utf;		// uncomment to turn on debugging printf's
+/+
+deprecated class UtfError : Error
+{
+    size_t idx;	// index in string of where error occurred
+
+    this(char[] s, size_t i)
+    {
+	idx = i;
+	super(s);
+    }
+}
++/
+/**********************************
+ * Exception class that is thrown upon any errors.
+ */
+
+class UtfException : Exception
+{
+    size_t idx;	/// index in string of where error occurred
+
+    this(char[] s, size_t i)
+    {
+	idx = i;
+	super(s);
+    }
+}
+
+/*******************************
+ * Test if c is a valid UTF-32 character.
+ *
+ * \uFFFE and \uFFFF are considered valid by this function,
+ * as they are permitted for internal use by an application,
+ * but they are not allowed for interchange by the Unicode standard.
+ *
+ * Returns: true if it is, false if not.
+ */
+
+bool isValidDchar(dchar c)
+{
+    /* Note: FFFE and FFFF are specifically permitted by the
+     * Unicode standard for application internal use, but are not
+     * allowed for interchange.
+     * (thanks to Arcane Jill)
+     */
+
+    return c < 0xD800 ||
+	(c > 0xDFFF && c <= 0x10FFFF /*&& c != 0xFFFE && c != 0xFFFF*/);
+}
+
+unittest
+{
+    debug(utf) printf("utf.isValidDchar.unittest\n");
+    assert(isValidDchar(cast(dchar)'a') == true);
+    assert(isValidDchar(cast(dchar)0x1FFFFF) == false);
+}
+
+
+ubyte[256] UTF8stride =
+[
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+    0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+    4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
+];
+
+/**
+ * stride() returns the length of a UTF-8 sequence starting at index i
+ * in string s.
+ * Returns:
+ *	The number of bytes in the UTF-8 sequence or
+ *	0xFF meaning s[i] is not the start of of UTF-8 sequence.
+ */
+
+uint stride(char[] s, size_t i)
+{
+    return UTF8stride[s[i]];
+}
+
+/**
+ * stride() returns the length of a UTF-16 sequence starting at index i
+ * in string s.
+ */
+
+uint stride(wchar[] s, size_t i)
+{   uint u = s[i];
+    return 1 + (u >= 0xD800 && u <= 0xDBFF);
+}
+
+/**
+ * stride() returns the length of a UTF-32 sequence starting at index i
+ * in string s.
+ * Returns: The return value will always be 1.
+ */
+
+uint stride(dchar[] s, size_t i)
+{
+    return 1;
+}
+
+/*******************************************
+ * Given an index i into an array of characters s[],
+ * and assuming that index i is at the start of a UTF character,
+ * determine the number of UCS characters up to that index i.
+ */
+
+size_t toUCSindex(char[] s, size_t i)
+{
+    size_t n;
+    size_t j;
+    size_t stride;
+
+    for (j = 0; j < i; j += stride)
+    {
+	stride = UTF8stride[s[j]];
+	if (stride == 0xFF)
+	    goto Lerr;
+	n++;
+    }
+    if (j > i)
+    {
+      Lerr:
+	throw new UtfException("1invalid UTF-8 sequence", j);
+    }
+    return n;
+}
+
+/** ditto */
+
+size_t toUCSindex(wchar[] s, size_t i)
+{
+    size_t n;
+    size_t j;
+
+    for (j = 0; j < i; )
+    {	uint u = s[j];
+
+	j += 1 + (u >= 0xD800 && u <= 0xDBFF);
+	n++;
+    }
+    if (j > i)
+    {
+      Lerr:
+	throw new UtfException("2invalid UTF-16 sequence", j);
+    }
+    return n;
+}
+
+/** ditto */
+
+size_t toUCSindex(dchar[] s, size_t i)
+{
+    return i;
+}
+
+/******************************************
+ * Given a UCS index n into an array of characters s[], return the UTF index.
+ */
+
+size_t toUTFindex(char[] s, size_t n)
+{
+    size_t i;
+
+    while (n--)
+    {
+	uint j = UTF8stride[s[i]];
+	if (j == 0xFF)
+	    throw new UtfException("3invalid UTF-8 sequence", i);
+	i += j;
+    }
+    return i;
+}
+
+/** ditto */
+
+size_t toUTFindex(wchar[] s, size_t n)
+{
+    size_t i;
+
+    while (n--)
+    {	wchar u = s[i];
+
+	i += 1 + (u >= 0xD800 && u <= 0xDBFF);
+    }
+    return i;
+}
+
+/** ditto */
+
+size_t toUTFindex(dchar[] s, size_t n)
+{
+    return n;
+}
+
+/* =================== Decode ======================= */
+
+/***************
+ * Decodes and returns character starting at s[idx]. idx is advanced past the
+ * decoded character. If the character is not well formed, a UtfException is
+ * thrown and idx remains unchanged.
+ */
+
+dchar decode(char[] s, inout size_t idx)
+    in
+    {
+	assert(idx >= 0 && idx < s.length);
+    }
+    out (result)
+    {
+	assert(isValidDchar(result));
+    }
+    body
+    {
+	size_t len = s.length;
+	dchar V;
+	size_t i = idx;
+	char u = s[i];
+
+	if (u & 0x80)
+	{   uint n;
+	    char u2;
+
+	    /* The following encodings are valid, except for the 5 and 6 byte
+	     * combinations:
+	     *	0xxxxxxx
+	     *	110xxxxx 10xxxxxx
+	     *	1110xxxx 10xxxxxx 10xxxxxx
+	     *	11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+	     *	111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+	     *	1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+	     */
+	    for (n = 1; ; n++)
+	    {
+		if (n > 4)
+		    goto Lerr;		// only do the first 4 of 6 encodings
+		if (((u << n) & 0x80) == 0)
+		{
+		    if (n == 1)
+			goto Lerr;
+		    break;
+		}
+	    }
+
+	    // Pick off (7 - n) significant bits of B from first byte of octet
+	    V = cast(dchar)(u & ((1 << (7 - n)) - 1));
+
+	    if (i + (n - 1) >= len)
+		goto Lerr;			// off end of string
+
+	    /* The following combinations are overlong, and illegal:
+	     *	1100000x (10xxxxxx)
+	     *	11100000 100xxxxx (10xxxxxx)
+	     *	11110000 1000xxxx (10xxxxxx 10xxxxxx)
+	     *	11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
+	     *	11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+	     */
+	    u2 = s[i + 1];
+	    if ((u & 0xFE) == 0xC0 ||
+		(u == 0xE0 && (u2 & 0xE0) == 0x80) ||
+		(u == 0xF0 && (u2 & 0xF0) == 0x80) ||
+		(u == 0xF8 && (u2 & 0xF8) == 0x80) ||
+		(u == 0xFC && (u2 & 0xFC) == 0x80))
+		goto Lerr;			// overlong combination
+
+	    for (uint j = 1; j != n; j++)
+	    {
+		u = s[i + j];
+		if ((u & 0xC0) != 0x80)
+		    goto Lerr;			// trailing bytes are 10xxxxxx
+		V = (V << 6) | (u & 0x3F);
+	    }
+	    if (!isValidDchar(V))
+		goto Lerr;
+	    i += n;
+	}
+	else
+	{
+	    V = cast(dchar) u;
+	    i++;
+	}
+
+	idx = i;
+	return V;
+
+      Lerr:
+	//printf("\ndecode: idx = %d, i = %d, length = %d s = \n'%.*s'\n%x\n'%.*s'\n", idx, i, s.length, s, s[i], s[i .. length]);
+	throw new UtfException("4invalid UTF-8 sequence", i);
+    }
+
+unittest
+{   size_t i;
+    dchar c;
+
+    debug(utf) printf("utf.decode.unittest\n");
+
+    static char[] s1 = "abcd";
+    i = 0;
+    c = decode(s1, i);
+    assert(c == cast(dchar)'a');
+    assert(i == 1);
+    c = decode(s1, i);
+    assert(c == cast(dchar)'b');
+    assert(i == 2);
+
+    static char[] s2 = "\xC2\xA9";
+    i = 0;
+    c = decode(s2, i);
+    assert(c == cast(dchar)'\u00A9');
+    assert(i == 2);
+
+    static char[] s3 = "\xE2\x89\xA0";
+    i = 0;
+    c = decode(s3, i);
+    assert(c == cast(dchar)'\u2260');
+    assert(i == 3);
+
+    static char[][] s4 =
+    [	"\xE2\x89",		// too short
+	"\xC0\x8A",
+	"\xE0\x80\x8A",
+	"\xF0\x80\x80\x8A",
+	"\xF8\x80\x80\x80\x8A",
+	"\xFC\x80\x80\x80\x80\x8A",
+    ];
+
+    for (int j = 0; j < s4.length; j++)
+    {
+	try
+	{
+	    i = 0;
+	    c = decode(s4[j], i);
+	    assert(0);
+	}
+	catch (UtfException u)
+	{
+	    i = 23;
+	    delete u;
+	}
+	assert(i == 23);
+    }
+}
+
+/** ditto */
+
+dchar decode(wchar[] s, inout size_t idx)
+    in
+    {
+	assert(idx >= 0 && idx < s.length);
+    }
+    out (result)
+    {
+	assert(isValidDchar(result));
+    }
+    body
+    {
+	char[] msg;
+	dchar V;
+	size_t i = idx;
+	uint u = s[i];
+
+	if (u & ~0x7F)
+	{   if (u >= 0xD800 && u <= 0xDBFF)
+	    {   uint u2;
+
+		if (i + 1 == s.length)
+		{   msg = "surrogate UTF-16 high value past end of string";
+		    goto Lerr;
+		}
+		u2 = s[i + 1];
+		if (u2 < 0xDC00 || u2 > 0xDFFF)
+		{   msg = "surrogate UTF-16 low value out of range";
+		    goto Lerr;
+		}
+		u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00);
+		i += 2;
+	    }
+	    else if (u >= 0xDC00 && u <= 0xDFFF)
+	    {   msg = "unpaired surrogate UTF-16 value";
+		goto Lerr;
+	    }
+	    else if (u == 0xFFFE || u == 0xFFFF)
+	    {   msg = "illegal UTF-16 value";
+		goto Lerr;
+	    }
+	    else
+		i++;
+	}
+	else
+	{
+	    i++;
+	}
+
+	idx = i;
+	return cast(dchar)u;
+
+      Lerr:
+	throw new UtfException(msg, i);
+    }
+
+/** ditto */
+
+dchar decode(dchar[] s, inout size_t idx)
+    in
+    {
+	assert(idx >= 0 && idx < s.length);
+    }
+    body
+    {
+	size_t i = idx;
+	dchar c = s[i];
+
+	if (!isValidDchar(c))
+	    goto Lerr;
+	idx = i + 1;
+	return c;
+
+      Lerr:
+	throw new UtfException("5invalid UTF-32 value", i);
+    }
+
+
+/* =================== Encode ======================= */
+
+/*******************************
+ * Encodes character c and appends it to array s[].
+ */
+
+void encode(inout char[] s, dchar c)
+    in
+    {
+	assert(isValidDchar(c));
+    }
+    body
+    {
+	char[] r = s;
+
+	if (c <= 0x7F)
+	{
+	    r ~= cast(char) c;
+	}
+	else
+	{
+	    char[4] buf;
+	    uint L;
+
+	    if (c <= 0x7FF)
+	    {
+		buf[0] = cast(char)(0xC0 | (c >> 6));
+		buf[1] = cast(char)(0x80 | (c & 0x3F));
+		L = 2;
+	    }
+	    else if (c <= 0xFFFF)
+	    {
+		buf[0] = cast(char)(0xE0 | (c >> 12));
+		buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
+		buf[2] = cast(char)(0x80 | (c & 0x3F));
+		L = 3;
+	    }
+	    else if (c <= 0x10FFFF)
+	    {
+		buf[0] = cast(char)(0xF0 | (c >> 18));
+		buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
+		buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
+		buf[3] = cast(char)(0x80 | (c & 0x3F));
+		L = 4;
+	    }
+	    else
+	    {
+		assert(0);
+	    }
+	    r ~= buf[0 .. L];
+	}
+	s = r;
+    }
+
+unittest
+{
+    debug(utf) printf("utf.encode.unittest\n");
+
+    char[] s = "abcd";
+    encode(s, cast(dchar)'a');
+    assert(s.length == 5);
+    assert(s == "abcda");
+
+    encode(s, cast(dchar)'\u00A9');
+    assert(s.length == 7);
+    assert(s == "abcda\xC2\xA9");
+    //assert(s == "abcda\u00A9");	// BUG: fix compiler
+
+    encode(s, cast(dchar)'\u2260');
+    assert(s.length == 10);
+    assert(s == "abcda\xC2\xA9\xE2\x89\xA0");
+}
+
+/** ditto */
+
+void encode(inout wchar[] s, dchar c)
+    in
+    {
+	assert(isValidDchar(c));
+    }
+    body
+    {
+	wchar[] r = s;
+
+	if (c <= 0xFFFF)
+	{
+	    r ~= cast(wchar) c;
+	}
+	else
+	{
+	    wchar[2] buf;
+
+	    buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
+	    buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
+	    r ~= buf;
+	}
+	s = r;
+    }
+
+/** ditto */
+
+void encode(inout dchar[] s, dchar c)
+    in
+    {
+	assert(isValidDchar(c));
+    }
+    body
+    {
+	s ~= c;
+    }
+
+/* =================== Validation ======================= */
+
+/***********************************
+ * Checks to see if string is well formed or not. Throws a UtfException if it is
+ * not. Use to check all untrusted input for correctness.
+ */
+
+void validate(char[] s)
+{
+    size_t len = s.length;
+    size_t i;
+
+    for (i = 0; i < len; )
+    {
+	decode(s, i);
+    }
+}
+
+/** ditto */
+
+void validate(wchar[] s)
+{
+    size_t len = s.length;
+    size_t i;
+
+    for (i = 0; i < len; )
+    {
+	decode(s, i);
+    }
+}
+
+/** ditto */
+
+void validate(dchar[] s)
+{
+    size_t len = s.length;
+    size_t i;
+
+    for (i = 0; i < len; )
+    {
+	decode(s, i);
+    }
+}
+
+/* =================== Conversion to UTF8 ======================= */
+
+char[] toUTF8(char[4] buf, dchar c)
+    in
+    {
+	assert(isValidDchar(c));
+    }
+    body
+    {
+	if (c <= 0x7F)
+	{
+	    buf[0] = cast(char) c;
+	    return buf[0 .. 1];
+	}
+	else if (c <= 0x7FF)
+	{
+	    buf[0] = cast(char)(0xC0 | (c >> 6));
+	    buf[1] = cast(char)(0x80 | (c & 0x3F));
+	    return buf[0 .. 2];
+	}
+	else if (c <= 0xFFFF)
+	{
+	    buf[0] = cast(char)(0xE0 | (c >> 12));
+	    buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
+	    buf[2] = cast(char)(0x80 | (c & 0x3F));
+	    return buf[0 .. 3];
+	}
+	else if (c <= 0x10FFFF)
+	{
+	    buf[0] = cast(char)(0xF0 | (c >> 18));
+	    buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
+	    buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
+	    buf[3] = cast(char)(0x80 | (c & 0x3F));
+	    return buf[0 .. 4];
+	}
+	assert(0);
+    }
+
+/*******************
+ * Encodes string s into UTF-8 and returns the encoded string.
+ */
+
+char[] toUTF8(char[] s)
+    in
+    {
+	validate(s);
+    }
+    body
+    {
+	return s;
+    }
+
+/** ditto */
+
+char[] toUTF8(wchar[] s)
+{
+    char[] r;
+    size_t i;
+    size_t slen = s.length;
+
+    r.length = slen;
+
+    for (i = 0; i < slen; i++)
+    {	wchar c = s[i];
+
+	if (c <= 0x7F)
+	    r[i] = cast(char)c;		// fast path for ascii
+	else
+	{
+	    r.length = i;
+	    foreach (dchar c; s[i .. slen])
+	    {
+		encode(r, c);
+	    }
+	    break;
+	}
+    }
+    return r;
+}
+
+/** ditto */
+
+char[] toUTF8(dchar[] s)
+{
+    char[] r;
+    size_t i;
+    size_t slen = s.length;
+
+    r.length = slen;
+
+    for (i = 0; i < slen; i++)
+    {	dchar c = s[i];
+
+	if (c <= 0x7F)
+	    r[i] = cast(char)c;		// fast path for ascii
+	else
+	{
+	    r.length = i;
+	    foreach (dchar d; s[i .. slen])
+	    {
+		encode(r, d);
+	    }
+	    break;
+	}
+    }
+    return r;
+}
+
+/* =================== Conversion to UTF16 ======================= */
+
+wchar[] toUTF16(wchar[2] buf, dchar c)
+    in
+    {
+	assert(isValidDchar(c));
+    }
+    body
+    {
+	if (c <= 0xFFFF)
+	{
+	    buf[0] = cast(wchar) c;
+	    return buf[0 .. 1];
+	}
+	else
+	{
+	    buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
+	    buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
+	    return buf[0 .. 2];
+	}
+    }
+
+/****************
+ * Encodes string s into UTF-16 and returns the encoded string.
+ * toUTF16z() is suitable for calling the 'W' functions in the Win32 API that take
+ * an LPWSTR or LPCWSTR argument.
+ */
+
+wchar[] toUTF16(char[] s)
+{
+    wchar[] r;
+    size_t slen = s.length;
+
+    r.length = slen;
+    r.length = 0;
+    for (size_t i = 0; i < slen; )
+    {
+	dchar c = s[i];
+	if (c <= 0x7F)
+	{
+	    i++;
+	    r ~= cast(wchar)c;
+	}
+	else
+	{
+	    c = decode(s, i);
+	    encode(r, c);
+	}
+    }
+    return r;
+}
+
+/** ditto */
+
+wchar* toUTF16z(char[] s)
+{
+    wchar[] r;
+    size_t slen = s.length;
+
+    r.length = slen + 1;
+    r.length = 0;
+    for (size_t i = 0; i < slen; )
+    {
+	dchar c = s[i];
+	if (c <= 0x7F)
+	{
+	    i++;
+	    r ~= cast(wchar)c;
+	}
+	else
+	{
+	    c = decode(s, i);
+	    encode(r, c);
+	}
+    }
+    r ~= "\000";
+    return r.ptr;
+}
+
+/** ditto */
+
+wchar[] toUTF16(wchar[] s)
+    in
+    {
+	validate(s);
+    }
+    body
+    {
+	return s;
+    }
+
+/** ditto */
+
+wchar[] toUTF16(dchar[] s)
+{
+    wchar[] r;
+    size_t slen = s.length;
+
+    r.length = slen;
+    r.length = 0;
+    for (size_t i = 0; i < slen; i++)
+    {
+	encode(r, s[i]);
+    }
+    return r;
+}
+
+/* =================== Conversion to UTF32 ======================= */
+
+/*****
+ * Encodes string s into UTF-32 and returns the encoded string.
+ */
+
+dchar[] toUTF32(char[] s)
+{
+    dchar[] r;
+    size_t slen = s.length;
+    size_t j = 0;
+
+    r.length = slen;		// r[] will never be longer than s[]
+    for (size_t i = 0; i < slen; )
+    {
+	dchar c = s[i];
+	if (c >= 0x80)
+	    c = decode(s, i);
+	else
+	    i++;		// c is ascii, no need for decode
+	r[j++] = c;
+    }
+    return r[0 .. j];
+}
+
+/** ditto */
+
+dchar[] toUTF32(wchar[] s)
+{
+    dchar[] r;
+    size_t slen = s.length;
+    size_t j = 0;
+
+    r.length = slen;		// r[] will never be longer than s[]
+    for (size_t i = 0; i < slen; )
+    {
+	dchar c = s[i];
+	if (c >= 0x80)
+	    c = decode(s, i);
+	else
+	    i++;		// c is ascii, no need for decode
+	r[j++] = c;
+    }
+    return r[0 .. j];
+}
+
+/** ditto */
+
+dchar[] toUTF32(dchar[] s)
+    in
+    {
+	validate(s);
+    }
+    body
+    {
+	return s;
+    }
+
+/* ================================ tests ================================== */
+
+unittest
+{
+    debug(utf) printf("utf.toUTF.unittest\n");
+
+    char[] c;
+    wchar[] w;
+    dchar[] d;
+
+    c = "hello";
+    w = toUTF16(c);
+    assert(w == "hello");
+    d = toUTF32(c);
+    assert(d == "hello");
+
+    c = toUTF8(w);
+    assert(c == "hello");
+    d = toUTF32(w);
+    assert(d == "hello");
+
+    c = toUTF8(d);
+    assert(c == "hello");
+    w = toUTF16(d);
+    assert(w == "hello");
+
+
+    c = "hel\u1234o";
+    w = toUTF16(c);
+    assert(w == "hel\u1234o");
+    d = toUTF32(c);
+    assert(d == "hel\u1234o");
+
+    c = toUTF8(w);
+    assert(c == "hel\u1234o");
+    d = toUTF32(w);
+    assert(d == "hel\u1234o");
+
+    c = toUTF8(d);
+    assert(c == "hel\u1234o");
+    w = toUTF16(d);
+    assert(w == "hel\u1234o");
+
+
+    c = "he\U0010AAAAllo";
+    w = toUTF16(c);
+    //foreach (wchar c; w) printf("c = x%x\n", c);
+    //foreach (wchar c; cast(wchar[])"he\U0010AAAAllo") printf("c = x%x\n", c);
+    assert(w == "he\U0010AAAAllo");
+    d = toUTF32(c);
+    assert(d == "he\U0010AAAAllo");
+
+    c = toUTF8(w);
+    assert(c == "he\U0010AAAAllo");
+    d = toUTF32(w);
+    assert(d == "he\U0010AAAAllo");
+
+    c = toUTF8(d);
+    assert(c == "he\U0010AAAAllo");
+    w = toUTF16(d);
+    assert(w == "he\U0010AAAAllo");
+}