view lphobos/std/uni.d @ 323:0d52412d5b1a trunk

[svn r344] Fixed some very minor issues with the usage listing when calling llvmdc with no arguments. Changed the way moduleinfo is registered to use the same approach as DMD, this eliminates the need for correct linking order and should make the way for using a natively compiled runtime library. This should speed up linking tremendously and should now be possible. Fixed the llvm.used array to only be emitted if really necessary.
author lindquist
date Wed, 09 Jul 2008 23:43:51 +0200
parents fd32135dca3e
children
line wrap: on
line source


// Written in the D programming language.

/*
 * Placed into the Public Domain.
 * Digital Mars, www.digitalmars.com
 * Written by Walter Bright
 */

/**
 * Simple Unicode character classification functions.
 * For ASCII classification, see $(LINK2 std_ctype.html, std.ctype).
 * Macros:
 *	WIKI=Phobos/StdUni
 * References:
 *	$(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table),
 *	$(LINK2 http://en.wikipedia.org/wiki/Unicode, Wikipedia),
 *	$(LINK2 http://www.unicode.org, The Unicode Consortium)
 * Trademarks:
 *	Unicode(tm) is a trademark of Unicode, Inc.
 */


module std.uni;

/**
 * Returns !=0 if c is a Unicode lower case character.
 */
int isUniLower(dchar c)
{
    if (c <= 0x7F)
	return (c >= 'a' && c <= 'z');

    return isUniAlpha(c) && c == toUniLower(c);
}

/**
 * Returns !=0 if c is a Unicode upper case character.
 */
int isUniUpper(dchar c)
{
    if (c <= 0x7F)
	return (c >= 'A' && c <= 'Z');

    return isUniAlpha(c) && c == toUniUpper(c);
}

/**
 * If c is a Unicode upper case character, return the lower case
 * equivalent, otherwise return c.
 */
dchar toUniLower(dchar c)
{
    if (c >= 'A' && c <= 'Z')
    {
        c += 32;
    }
    else if (c >= 0x00C0)
    {
	if ((c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c<=0x00DE))
	{
	    c += 32;
	}
	else if ((c >= 0x0100 && c < 0x0138) || (c > 0x0149 && c < 0x0178))
	{
	    if (c == 0x0130)
		c = 0x0069;
	    else if ((c & 1) == 0)
		c += 1;
	}
	else if (c == 0x0178)
	{
	    c = 0x00FF;
	}
	else if ((c >= 0x0139 && c < 0x0149) || (c > 0x0178 && c < 0x017F))
	{
	    if (c & 1)
		c += 1;
	}
	else if (c >= 0x0200 && c <= 0x0217)
	{
	    if ((c & 1) == 0)
		c += 1;
	}
	else if ((c >= 0x0401 && c <= 0x040C) || (c>= 0x040E && c <= 0x040F))
	{
	    c += 80;
	}
	else if (c >= 0x0410  && c <= 0x042F)
	{
	    c += 32;
	}
	else if (c >= 0x0460 && c <= 0x047F)
	{
	    if ((c & 1) == 0)
		c += 1;
	}
	else if (c >= 0x0531 && c <= 0x0556)
	{
	    c += 48;
	}
	else if (c >= 0x10A0 && c <= 0x10C5)
	{
	    c += 48;
	}
	else if (c >= 0xFF21 && c <= 0xFF3A)
	{
	    c += 32;
	}
    }
    return c;
}

/**
 * If c is a Unicode lower case character, return the upper case
 * equivalent, otherwise return c.
 */
dchar toUniUpper(dchar c)
{
    if (c >= 'a' && c <= 'z')
    {
	c -= 32;
    }
    else if (c >= 0x00E0)
    {
	if ((c >= 0x00E0 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FE))
	{
	    c -= 32;
	}
	else if (c == 0x00FF)
	{
	    c = 0x0178;
	}
	else if ((c >= 0x0100 && c < 0x0138) || (c > 0x0149 && c < 0x0178))
	{
	    if (c == 0x0131)
		c = 0x0049;
	    else if (c & 1)
		c -= 1;
	}
	else if ((c >= 0x0139 && c < 0x0149) || (c > 0x0178 && c < 0x017F))
	{
	    if ((c & 1) == 0)
		c = c-1;
	}
	else if (c == 0x017F)
	{
	    c = 0x0053;
	}
	else if (c >= 0x0200 && c <= 0x0217)
	{
	    if (c & 1)
		c = c-1;
	}
	else if (c >= 0x0430 && c<= 0x044F)
	{
	    c -= 32;
	}
	else if ((c >= 0x0451 && c <= 0x045C) || (c >=0x045E && c<= 0x045F))
	{
	    c -= 80;
	}
	else if (c >= 0x0460 && c <= 0x047F)
	{
	    if (c & 1)
		c -= 1;
	}
	else if (c >= 0x0561 && c < 0x0587)
	{
	    c -= 48;
	}
	else if (c >= 0xFF41 && c <= 0xFF5A)
	{
	    c -= 32;
	}
    }
    return c;
}


/*******************************
 * Return !=0 if u is a Unicode alpha character.
 * (general Unicode category: Lu, Ll, Lt, Lm and Lo)
 *
 * Standards: Unicode 5.0.0
 */

int isUniAlpha(dchar u)
{
    static dchar table[][2] =
    [
	[ 'A', 'Z' ],
	[ 'a', 'z' ],
	[ 0x00AA, 0x00AA ],
	[ 0x00B5, 0x00B5 ],
	[ 0x00BA, 0x00BA ],
	[ 0x00C0, 0x00D6 ],
	[ 0x00D8, 0x00F6 ],
	[ 0x00F8, 0x02C1 ],
	[ 0x02C6, 0x02D1 ],
	[ 0x02E0, 0x02E4 ],
	[ 0x02EE, 0x02EE ],
	[ 0x037A, 0x037D ],
	[ 0x0386, 0x0386 ],
	[ 0x0388, 0x038A ],
	[ 0x038C, 0x038C ],
	[ 0x038E, 0x03A1 ],
	[ 0x03A3, 0x03CE ],
	[ 0x03D0, 0x03F5 ],
	[ 0x03F7, 0x0481 ],
	[ 0x048A, 0x0513 ],
	[ 0x0531, 0x0556 ],
	[ 0x0559, 0x0559 ],
	[ 0x0561, 0x0587 ],
	[ 0x05D0, 0x05EA ],
	[ 0x05F0, 0x05F2 ],
	[ 0x0621, 0x063A ],
	[ 0x0640, 0x064A ],
	[ 0x066E, 0x066F ],
	[ 0x0671, 0x06D3 ],
	[ 0x06D5, 0x06D5 ],
	[ 0x06E5, 0x06E6 ],
	[ 0x06EE, 0x06EF ],
	[ 0x06FA, 0x06FC ],
	[ 0x06FF, 0x06FF ],
	[ 0x0710, 0x0710 ],
	[ 0x0712, 0x072F ],
	[ 0x074D, 0x076D ],
	[ 0x0780, 0x07A5 ],
	[ 0x07B1, 0x07B1 ],
	[ 0x07CA, 0x07EA ],
	[ 0x07F4, 0x07F5 ],
	[ 0x07FA, 0x07FA ],
	[ 0x0904, 0x0939 ],
	[ 0x093D, 0x093D ],
	[ 0x0950, 0x0950 ],
	[ 0x0958, 0x0961 ],
	[ 0x097B, 0x097F ],
	[ 0x0985, 0x098C ],
	[ 0x098F, 0x0990 ],
	[ 0x0993, 0x09A8 ],
	[ 0x09AA, 0x09B0 ],
	[ 0x09B2, 0x09B2 ],
	[ 0x09B6, 0x09B9 ],
	[ 0x09BD, 0x09BD ],
	[ 0x09CE, 0x09CE ],
	[ 0x09DC, 0x09DD ],
	[ 0x09DF, 0x09E1 ],
	[ 0x09F0, 0x09F1 ],
	[ 0x0A05, 0x0A0A ],
	[ 0x0A0F, 0x0A10 ],
	[ 0x0A13, 0x0A28 ],
	[ 0x0A2A, 0x0A30 ],
	[ 0x0A32, 0x0A33 ],
	[ 0x0A35, 0x0A36 ],
	[ 0x0A38, 0x0A39 ],
	[ 0x0A59, 0x0A5C ],
	[ 0x0A5E, 0x0A5E ],
	[ 0x0A72, 0x0A74 ],
	[ 0x0A85, 0x0A8D ],
	[ 0x0A8F, 0x0A91 ],
	[ 0x0A93, 0x0AA8 ],
	[ 0x0AAA, 0x0AB0 ],
	[ 0x0AB2, 0x0AB3 ],
	[ 0x0AB5, 0x0AB9 ],
	[ 0x0ABD, 0x0ABD ],
	[ 0x0AD0, 0x0AD0 ],
	[ 0x0AE0, 0x0AE1 ],
	[ 0x0B05, 0x0B0C ],
	[ 0x0B0F, 0x0B10 ],
	[ 0x0B13, 0x0B28 ],
	[ 0x0B2A, 0x0B30 ],
	[ 0x0B32, 0x0B33 ],
	[ 0x0B35, 0x0B39 ],
	[ 0x0B3D, 0x0B3D ],
	[ 0x0B5C, 0x0B5D ],
	[ 0x0B5F, 0x0B61 ],
	[ 0x0B71, 0x0B71 ],
	[ 0x0B83, 0x0B83 ],
	[ 0x0B85, 0x0B8A ],
	[ 0x0B8E, 0x0B90 ],
	[ 0x0B92, 0x0B95 ],
	[ 0x0B99, 0x0B9A ],
	[ 0x0B9C, 0x0B9C ],
	[ 0x0B9E, 0x0B9F ],
	[ 0x0BA3, 0x0BA4 ],
	[ 0x0BA8, 0x0BAA ],
	[ 0x0BAE, 0x0BB9 ],
	[ 0x0C05, 0x0C0C ],
	[ 0x0C0E, 0x0C10 ],
	[ 0x0C12, 0x0C28 ],
	[ 0x0C2A, 0x0C33 ],
	[ 0x0C35, 0x0C39 ],
	[ 0x0C60, 0x0C61 ],
	[ 0x0C85, 0x0C8C ],
	[ 0x0C8E, 0x0C90 ],
	[ 0x0C92, 0x0CA8 ],
	[ 0x0CAA, 0x0CB3 ],
	[ 0x0CB5, 0x0CB9 ],
	[ 0x0CBD, 0x0CBD ],
	[ 0x0CDE, 0x0CDE ],
	[ 0x0CE0, 0x0CE1 ],
	[ 0x0D05, 0x0D0C ],
	[ 0x0D0E, 0x0D10 ],
	[ 0x0D12, 0x0D28 ],
	[ 0x0D2A, 0x0D39 ],
	[ 0x0D60, 0x0D61 ],
	[ 0x0D85, 0x0D96 ],
	[ 0x0D9A, 0x0DB1 ],
	[ 0x0DB3, 0x0DBB ],
	[ 0x0DBD, 0x0DBD ],
	[ 0x0DC0, 0x0DC6 ],
	[ 0x0E01, 0x0E30 ],
	[ 0x0E32, 0x0E33 ],
	[ 0x0E40, 0x0E46 ],
	[ 0x0E81, 0x0E82 ],
	[ 0x0E84, 0x0E84 ],
	[ 0x0E87, 0x0E88 ],
	[ 0x0E8A, 0x0E8A ],
	[ 0x0E8D, 0x0E8D ],
	[ 0x0E94, 0x0E97 ],
	[ 0x0E99, 0x0E9F ],
	[ 0x0EA1, 0x0EA3 ],
	[ 0x0EA5, 0x0EA5 ],
	[ 0x0EA7, 0x0EA7 ],
	[ 0x0EAA, 0x0EAB ],
	[ 0x0EAD, 0x0EB0 ],
	[ 0x0EB2, 0x0EB3 ],
	[ 0x0EBD, 0x0EBD ],
	[ 0x0EC0, 0x0EC4 ],
	[ 0x0EC6, 0x0EC6 ],
	[ 0x0EDC, 0x0EDD ],
	[ 0x0F00, 0x0F00 ],
	[ 0x0F40, 0x0F47 ],
	[ 0x0F49, 0x0F6A ],
	[ 0x0F88, 0x0F8B ],
	[ 0x1000, 0x1021 ],
	[ 0x1023, 0x1027 ],
	[ 0x1029, 0x102A ],
	[ 0x1050, 0x1055 ],
	[ 0x10A0, 0x10C5 ],
	[ 0x10D0, 0x10FA ],
	[ 0x10FC, 0x10FC ],
	[ 0x1100, 0x1159 ],
	[ 0x115F, 0x11A2 ],
	[ 0x11A8, 0x11F9 ],
	[ 0x1200, 0x1248 ],
	[ 0x124A, 0x124D ],
	[ 0x1250, 0x1256 ],
	[ 0x1258, 0x1258 ],
	[ 0x125A, 0x125D ],
	[ 0x1260, 0x1288 ],
	[ 0x128A, 0x128D ],
	[ 0x1290, 0x12B0 ],
	[ 0x12B2, 0x12B5 ],
	[ 0x12B8, 0x12BE ],
	[ 0x12C0, 0x12C0 ],
	[ 0x12C2, 0x12C5 ],
	[ 0x12C8, 0x12D6 ],
	[ 0x12D8, 0x1310 ],
	[ 0x1312, 0x1315 ],
	[ 0x1318, 0x135A ],
	[ 0x1380, 0x138F ],
	[ 0x13A0, 0x13F4 ],
	[ 0x1401, 0x166C ],
	[ 0x166F, 0x1676 ],
	[ 0x1681, 0x169A ],
	[ 0x16A0, 0x16EA ],
	[ 0x1700, 0x170C ],
	[ 0x170E, 0x1711 ],
	[ 0x1720, 0x1731 ],
	[ 0x1740, 0x1751 ],
	[ 0x1760, 0x176C ],
	[ 0x176E, 0x1770 ],
	[ 0x1780, 0x17B3 ],
	[ 0x17D7, 0x17D7 ],
	[ 0x17DC, 0x17DC ],
	[ 0x1820, 0x1877 ],
	[ 0x1880, 0x18A8 ],
	[ 0x1900, 0x191C ],
	[ 0x1950, 0x196D ],
	[ 0x1970, 0x1974 ],
	[ 0x1980, 0x19A9 ],
	[ 0x19C1, 0x19C7 ],
	[ 0x1A00, 0x1A16 ],
	[ 0x1B05, 0x1B33 ],
	[ 0x1B45, 0x1B4B ],
	[ 0x1D00, 0x1DBF ],
	[ 0x1E00, 0x1E9B ],
	[ 0x1EA0, 0x1EF9 ],
	[ 0x1F00, 0x1F15 ],
	[ 0x1F18, 0x1F1D ],
	[ 0x1F20, 0x1F45 ],
	[ 0x1F48, 0x1F4D ],
	[ 0x1F50, 0x1F57 ],
	[ 0x1F59, 0x1F59 ],
	[ 0x1F5B, 0x1F5B ],
	[ 0x1F5D, 0x1F5D ],
	[ 0x1F5F, 0x1F7D ],
	[ 0x1F80, 0x1FB4 ],
	[ 0x1FB6, 0x1FBC ],
	[ 0x1FBE, 0x1FBE ],
	[ 0x1FC2, 0x1FC4 ],
	[ 0x1FC6, 0x1FCC ],
	[ 0x1FD0, 0x1FD3 ],
	[ 0x1FD6, 0x1FDB ],
	[ 0x1FE0, 0x1FEC ],
	[ 0x1FF2, 0x1FF4 ],
	[ 0x1FF6, 0x1FFC ],
	[ 0x2071, 0x2071 ],
	[ 0x207F, 0x207F ],
	[ 0x2090, 0x2094 ],
	[ 0x2102, 0x2102 ],
	[ 0x2107, 0x2107 ],
	[ 0x210A, 0x2113 ],
	[ 0x2115, 0x2115 ],
	[ 0x2119, 0x211D ],
	[ 0x2124, 0x2124 ],
	[ 0x2126, 0x2126 ],
	[ 0x2128, 0x2128 ],
	[ 0x212A, 0x212D ],
	[ 0x212F, 0x2139 ],
	[ 0x213C, 0x213F ],
	[ 0x2145, 0x2149 ],
	[ 0x214E, 0x214E ],
	[ 0x2183, 0x2184 ],
	[ 0x2C00, 0x2C2E ],
	[ 0x2C30, 0x2C5E ],
	[ 0x2C60, 0x2C6C ],
	[ 0x2C74, 0x2C77 ],
	[ 0x2C80, 0x2CE4 ],
	[ 0x2D00, 0x2D25 ],
	[ 0x2D30, 0x2D65 ],
	[ 0x2D6F, 0x2D6F ],
	[ 0x2D80, 0x2D96 ],
	[ 0x2DA0, 0x2DA6 ],
	[ 0x2DA8, 0x2DAE ],
	[ 0x2DB0, 0x2DB6 ],
	[ 0x2DB8, 0x2DBE ],
	[ 0x2DC0, 0x2DC6 ],
	[ 0x2DC8, 0x2DCE ],
	[ 0x2DD0, 0x2DD6 ],
	[ 0x2DD8, 0x2DDE ],
	[ 0x3005, 0x3006 ],
	[ 0x3031, 0x3035 ],
	[ 0x303B, 0x303C ],
	[ 0x3041, 0x3096 ],
	[ 0x309D, 0x309F ],
	[ 0x30A1, 0x30FA ],
	[ 0x30FC, 0x30FF ],
	[ 0x3105, 0x312C ],
	[ 0x3131, 0x318E ],
	[ 0x31A0, 0x31B7 ],
	[ 0x31F0, 0x31FF ],
	[ 0x3400, 0x4DB5 ],
	[ 0x4E00, 0x9FBB ],
	[ 0xA000, 0xA48C ],
	[ 0xA717, 0xA71A ],
	[ 0xA800, 0xA801 ],
	[ 0xA803, 0xA805 ],
	[ 0xA807, 0xA80A ],
	[ 0xA80C, 0xA822 ],
	[ 0xA840, 0xA873 ],
	[ 0xAC00, 0xD7A3 ],
	[ 0xF900, 0xFA2D ],
	[ 0xFA30, 0xFA6A ],
	[ 0xFA70, 0xFAD9 ],
	[ 0xFB00, 0xFB06 ],
	[ 0xFB13, 0xFB17 ],
	[ 0xFB1D, 0xFB1D ],
	[ 0xFB1F, 0xFB28 ],
	[ 0xFB2A, 0xFB36 ],
	[ 0xFB38, 0xFB3C ],
	[ 0xFB3E, 0xFB3E ],
	[ 0xFB40, 0xFB41 ],
	[ 0xFB43, 0xFB44 ],
	[ 0xFB46, 0xFBB1 ],
	[ 0xFBD3, 0xFD3D ],
	[ 0xFD50, 0xFD8F ],
	[ 0xFD92, 0xFDC7 ],
	[ 0xFDF0, 0xFDFB ],
	[ 0xFE70, 0xFE74 ],
	[ 0xFE76, 0xFEFC ],
	[ 0xFF21, 0xFF3A ],
	[ 0xFF41, 0xFF5A ],
	[ 0xFF66, 0xFFBE ],
	[ 0xFFC2, 0xFFC7 ],
	[ 0xFFCA, 0xFFCF ],
	[ 0xFFD2, 0xFFD7 ],
	[ 0xFFDA, 0xFFDC ],
	[ 0x10000, 0x1000B ],
	[ 0x1000D, 0x10026 ],
	[ 0x10028, 0x1003A ],
	[ 0x1003C, 0x1003D ],
	[ 0x1003F, 0x1004D ],
	[ 0x10050, 0x1005D ],
	[ 0x10080, 0x100FA ],
	[ 0x10300, 0x1031E ],
	[ 0x10330, 0x10340 ],
	[ 0x10342, 0x10349 ],
	[ 0x10380, 0x1039D ],
	[ 0x103A0, 0x103C3 ],
	[ 0x103C8, 0x103CF ],
	[ 0x10400, 0x1049D ],
	[ 0x10800, 0x10805 ],
	[ 0x10808, 0x10808 ],
	[ 0x1080A, 0x10835 ],
	[ 0x10837, 0x10838 ],
	[ 0x1083C, 0x1083C ],
	[ 0x1083F, 0x1083F ],
	[ 0x10900, 0x10915 ],
	[ 0x10A00, 0x10A00 ],
	[ 0x10A10, 0x10A13 ],
	[ 0x10A15, 0x10A17 ],
	[ 0x10A19, 0x10A33 ],
	[ 0x12000, 0x1236E ],
	[ 0x1D400, 0x1D454 ],
	[ 0x1D456, 0x1D49C ],
	[ 0x1D49E, 0x1D49F ],
	[ 0x1D4A2, 0x1D4A2 ],
	[ 0x1D4A5, 0x1D4A6 ],
	[ 0x1D4A9, 0x1D4AC ],
	[ 0x1D4AE, 0x1D4B9 ],
	[ 0x1D4BB, 0x1D4BB ],
	[ 0x1D4BD, 0x1D4C3 ],
	[ 0x1D4C5, 0x1D505 ],
	[ 0x1D507, 0x1D50A ],
	[ 0x1D50D, 0x1D514 ],
	[ 0x1D516, 0x1D51C ],
	[ 0x1D51E, 0x1D539 ],
	[ 0x1D53B, 0x1D53E ],
	[ 0x1D540, 0x1D544 ],
	[ 0x1D546, 0x1D546 ],
	[ 0x1D54A, 0x1D550 ],
	[ 0x1D552, 0x1D6A5 ],
	[ 0x1D6A8, 0x1D6C0 ],
	[ 0x1D6C2, 0x1D6DA ],
	[ 0x1D6DC, 0x1D6FA ],
	[ 0x1D6FC, 0x1D714 ],
	[ 0x1D716, 0x1D734 ],
	[ 0x1D736, 0x1D74E ],
	[ 0x1D750, 0x1D76E ],
	[ 0x1D770, 0x1D788 ],
	[ 0x1D78A, 0x1D7A8 ],
	[ 0x1D7AA, 0x1D7C2 ],
	[ 0x1D7C4, 0x1D7CB ],
	[ 0x20000, 0x2A6D6 ],
	[ 0x2F800, 0x2FA1D ],
    ];

    debug
    {
	for (int i = 0; i < table.length; i++)
	{
	    assert(table[i][0] <= table[i][1]);
	    if (i < table.length - 1)
	    {
		if (table[i][1] >= table[i + 1][0])
		    printf("table[%d][1] = x%x, table[%d][0] = x%x\n", i, table[i][1], i + 1, table[i + 1][0]);
		assert(table[i][1] < table[i + 1][0]);
	    }
	}
    }

    if (u < 0xAA)
    {
	if (u < 'A')
	    goto Lisnot;
	if (u <= 'Z')
	    goto Lis;
	if (u < 'a')
	    goto Lisnot;
	if (u <= 'z')
	    goto Lis;
	goto Lisnot;
    }

    // Binary search
    uint mid;
    uint low;
    uint high;

    low = 0;
    high = table.length - 1;
    while (cast(int)low <= cast(int)high)
    {
	mid = (low + high) >> 1;
	if (u < table[mid][0])
	    high = mid - 1;
	else if (u > table[mid][1])
	    low = mid + 1;
	else
	    goto Lis;
    }

Lisnot:
    debug
    {
	for (int i = 0; i < table.length; i++)
	{
	    assert(u < table[i][0] || u > table[i][1]);
	}
    }
    return 0;

Lis:
    debug
    {
	for (int i = 0; i < table.length; i++)
	{
	    if (u >= table[i][0] && u <= table[i][1])
		return 1;
	}
	assert(0);		// should have been in table
    }
    return 1;
}

unittest
{
    for (uint i = 0; i < 0x80; i++)
    {
	if (i >= 'A' && i <= 'Z')
	    assert(isUniAlpha(i));
	else if (i >= 'a' && i <= 'z')
	    assert(isUniAlpha(i));
	else
	    assert(!isUniAlpha(i));
    }
}