view dmd/entity.c @ 1578:1dee66f6ec0b

Removed a chunk of code in favour of a shorter and more portable method
author Robert Clipsham <robert@octarineparrot.com>
date Tue, 08 Sep 2009 11:21:30 +0100
parents b30fe7e1dbb9
children
line wrap: on
line source


// Copyright (c) 1999-2009 by Digital Mars
// All Rights Reserved
// written by Walter Bright
// http://www.digitalmars.com
// License for redistribution is by either the Artistic License
// in artistic.txt, or the GNU General Public License in gnu.txt.
// See the included readme.txt for details.


#include <string.h>

/*********************************************
 * Convert from named entity to its encoding.
 * For reference:
 *	http://www.htmlhelp.com/reference/html40/entities/
 *	http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html
 */

struct NameId
{
    const char *name;
    unsigned short value;
};

#if IN_GCC
static NameId namesA[]={
	"Aacgr", 	0x0386,
	"aacgr", 	0x03AC,
	"Aacute",	0x00C1,
	"aacute",	0x00E1,
	"Abreve",	0x0102,
	"abreve",	0x0103,
	"Acirc", 	0x00C2,
	"acirc", 	0x00E2,
	"acute", 	0x00B4,
	"Acy",   	0x0410,
	"acy",   	0x0430,
	"AElig", 	0x00C6,
	"aelig", 	0x00E6,
	"Agr",   	0x0391,
	"agr",   	0x03B1,
	"Agrave",	0x00C0,
	"agrave",	0x00E0,
	"aleph", 	0x2135,
	"alpha", 	0x03B1,
	"Amacr", 	0x0100,
	"amacr", 	0x0101,
	"amalg", 	0x2210,
	"amp",   	0x0026,
	"and",   	0x2227,
	"ang",   	0x2220,
	"ang90", 	0x221F,
	"angmsd",	0x2221,
	"angsph",	0x2222,
	"angst", 	0x212B,
	"Aogon", 	0x0104,
	"aogon", 	0x0105,
	"ap",    	0x2248,
	"ape",   	0x224A,
	"apos",  	0x0027,
	"Aring", 	0x00C5,
	"aring", 	0x00E5,
	"ast",   	0x002A,
	"asymp", 	0x224D,
	"Atilde",	0x00C3,
	"atilde",	0x00E3,
	"Auml",  	0x00C4,
	"auml",  	0x00E4,
	NULL,		0
};

static NameId namesB[]={
	"barwed",	0x22BC,
	"Barwed",	0x2306,
	"bcong", 	0x224C,
	"Bcy",   	0x0411,
	"bcy",   	0x0431,
	"becaus",	0x2235,
	"bepsi", 	0x220D,
	"bernou",	0x212C,
	"beta",  	0x03B2,
	"beth",  	0x2136,
	"Bgr",   	0x0392,
	"bgr",   	0x03B2,
	"blank", 	0x2423,
	"blk12", 	0x2592,
	"blk14", 	0x2591,
	"blk34", 	0x2593,
	"block", 	0x2588,
	"bottom",	0x22A5,
	"bowtie",	0x22C8,
	"boxdl", 	0x2510,
	"boxDL", 	0x2555,
	"boxdL", 	0x2556,
	"boxDl", 	0x2557,
	"boxdr", 	0x250C,
	"boxDR", 	0x2552,
	"boxDr", 	0x2553,
	"boxdR", 	0x2554,
	"boxh",  	0x2500,
	"boxH",  	0x2550,
	"boxhd", 	0x252C,
	"boxhD", 	0x2564,
	"boxHD", 	0x2565,
	"boxHd", 	0x2566,
	"boxhu", 	0x2534,
	"boxhU", 	0x2567,
	"boxHU", 	0x2568,
	"boxHu", 	0x2569,
	"boxul", 	0x2518,
	"boxUL", 	0x255B,
	"boxUl", 	0x255C,
	"boxuL", 	0x255D,
	"boxur", 	0x2514,
	"boxUR", 	0x2558,
	"boxuR", 	0x2559,
	"boxUr", 	0x255A,
	"boxv",  	0x2502,
	"boxV",  	0x2551,
	"boxvh", 	0x253C,
	"boxvH", 	0x256A,
	"boxVH", 	0x256B,
	"boxVh", 	0x256C,
	"boxvl", 	0x2524,
	"boxvL", 	0x2561,
	"boxVL", 	0x2562,
	"boxVl", 	0x2563,
	"boxvr", 	0x251C,
	"boxvR", 	0x255E,
	"boxVR", 	0x255F,
	"boxVr", 	0x2560,
	"bprime",	0x2035,
	"breve", 	0x02D8,
	"brvbar",	0x00A6,
	"bsim",  	0x223D,
	"bsime", 	0x22CD,
	"bsol",  	0x005C,
	"bull",  	0x2022,
	"bump",  	0x224E,
	"bumpe", 	0x224F,
	NULL,		0
};

static NameId namesC[]={
	"Cacute",	0x0106,
	"cacute",	0x0107,
	"cap",   	0x2229,
	"Cap",   	0x22D2,
	"caret", 	0x2041,
	"caron", 	0x02C7,
	"Ccaron",	0x010C,
	"ccaron",	0x010D,
	"Ccedil",	0x00C7,
	"ccedil",	0x00E7,
	"Ccirc", 	0x0108,
	"ccirc", 	0x0109,
	"Cdot",  	0x010A,
	"cdot",  	0x010B,
	"cedil", 	0x00B8,
	"cent",  	0x00A2,
	"CHcy",  	0x0427,
	"chcy",  	0x0447,
	"check", 	0x2713,
	"chi",   	0x03C7,
	"cir",   	0x25CB,
	"circ",  	0x005E,
	"cire",  	0x2257,
	"clubs", 	0x2663,
	"colon", 	0x003A,
	"colone",	0x2254,
	"comma", 	0x002C,
	"commat",	0x0040,
	"comp",  	0x2201,
	"compfn",	0x2218,
	"cong",  	0x2245,
	"conint",	0x222E,
	"coprod",	0x2210,
	"copy",  	0x00A9,
	"copysr",	0x2117,
	"cross", 	0x2717,
	"cuepr", 	0x22DE,
	"cuesc", 	0x22DF,
	"cularr",	0x21B6,
	"cup",   	0x222A,
	"Cup",   	0x22D3,
	"cupre", 	0x227C,
	"curarr",	0x21B7,
	"curren",	0x00A4,
	"cuvee", 	0x22CE,
	"cuwed", 	0x22CF,
	NULL,		0
};

static NameId namesD[]={
	"dagger",	0x2020,
	"Dagger",	0x2021,
	"daleth",	0x2138,
	"darr",  	0x2193,
	"dArr",  	0x21D3,
	"darr2", 	0x21CA,
	"dash",  	0x2010,
	"dashv", 	0x22A3,
	"dblac", 	0x02DD,
	"Dcaron",	0x010E,
	"dcaron",	0x010F,
	"Dcy",   	0x0414,
	"dcy",   	0x0434,
	"deg",   	0x00B0,
	"Delta", 	0x0394,
	"delta", 	0x03B4,
	"Dgr",   	0x0394,
	"dgr",   	0x03B4,
	"dharl", 	0x21C3,
	"dharr", 	0x21C2,
	"diam",  	0x22C4,
	"diams", 	0x2666,
	"die",   	0x00A8,
	"divide",	0x00F7,
	"divonx",	0x22C7,
	"DJcy",  	0x0402,
	"djcy",  	0x0452,
	"dlarr", 	0x2199,
	"dlcorn",	0x231E,
	"dlcrop",	0x230D,
	"dollar",	0x0024,
	"Dot",   	0x00A8,
	"dot",   	0x02D9,
	"DotDot",	0x20DC,
	"drarr", 	0x2198,
	"drcorn",	0x231F,
	"drcrop",	0x230C,
	"DScy",  	0x0405,
	"dscy",  	0x0455,
	"Dstrok",	0x0110,
	"dstrok",	0x0111,
	"dtri",  	0x25BF,
	"dtrif", 	0x25BE,
	"DZcy",  	0x040F,
	"dzcy",  	0x045F,
	NULL,		0
};

static NameId namesE[]={
	"Eacgr", 	0x0388,
	"eacgr", 	0x03AD,
	"Eacute",	0x00C9,
	"eacute",	0x00E9,
	"Ecaron",	0x011A,
	"ecaron",	0x011B,
	"ecir",  	0x2256,
	"Ecirc", 	0x00CA,
	"ecirc", 	0x00EA,
	"ecolon",	0x2255,
	"Ecy",   	0x042D,
	"ecy",   	0x044D,
	"Edot",  	0x0116,
	"edot",  	0x0117,
	"eDot",  	0x2251,
	"EEacgr",	0x0389,
	"eeacgr",	0x03AE,
	"EEgr",  	0x0397,
	"eegr",  	0x03B7,
	"efDot", 	0x2252,
	"Egr",   	0x0395,
	"egr",   	0x03B5,
	"Egrave",	0x00C8,
	"egrave",	0x00E8,
	"egs",   	0x22DD,
	"ell",   	0x2113,
	"els",   	0x22DC,
	"Emacr", 	0x0112,
	"emacr", 	0x0113,
	"empty", 	0x2205,
	"emsp",  	0x2003,
	"emsp13",	0x2004,
	"emsp14",	0x2005,
	"ENG",   	0x014A,
	"eng",   	0x014B,
	"ensp",  	0x2002,
	"Eogon", 	0x0118,
	"eogon", 	0x0119,
	"epsi",  	0x220A,
	"epsis", 	0x220A,
	"epsiv", 	0x03B5,
	"equals",	0x003D,
	"equiv", 	0x2261,
	"erDot", 	0x2253,
	"esdot", 	0x2250,
	"eta",   	0x03B7,
	"ETH",   	0x00D0,
	"eth",   	0x00F0,
	"Euml",  	0x00CB,
	"euml",  	0x00EB,
	"excl",  	0x0021,
	"exist", 	0x2203,
	NULL,		0
};

static NameId namesF[]={
	"Fcy",   	0x0424,
	"fcy",   	0x0444,
	"female",	0x2640,
	"ffilig",	0xFB03,
	"fflig", 	0xFB00,
	"ffllig",	0xFB04,
	"filig", 	0xFB01,
	"flat",  	0x266D,
	"fllig", 	0xFB02,
	"fnof",  	0x0192,
	"forall",	0x2200,
	"fork",  	0x22D4,
	"frac12",	0x00BD,
	"frac13",	0x2153,
	"frac14",	0x00BC,
	"frac15",	0x2155,
	"frac16",	0x2159,
	"frac18",	0x215B,
	"frac23",	0x2154,
	"frac25",	0x2156,
	"frac34",	0x00BE,
	"frac35",	0x2157,
	"frac38",	0x215C,
	"frac45",	0x2158,
	"frac56",	0x215A,
	"frac58",	0x215D,
	"frac78",	0x215E,
	"frown", 	0x2322,
	NULL,		0
};

static NameId namesG[]={
	"gacute",	0x01F5,
	"Gamma", 	0x0393,
	"gamma", 	0x03B3,
	"gammad",	0x03DC,
	"gap",   	0x2273,
	"Gbreve",	0x011E,
	"gbreve",	0x011F,
	"Gcedil",	0x0122,
	"Gcirc", 	0x011C,
	"gcirc", 	0x011D,
	"Gcy",   	0x0413,
	"gcy",   	0x0433,
	"Gdot",  	0x0120,
	"gdot",  	0x0121,
	"ge",    	0x2265,
	"gE",    	0x2267,
	"gel",   	0x22DB,
	"gEl",   	0x22DB,
	"ges",   	0x2265,
	"Gg",    	0x22D9,
	"Ggr",   	0x0393,
	"ggr",   	0x03B3,
	"gimel", 	0x2137,
	"GJcy",  	0x0403,
	"gjcy",  	0x0453,
	"gl",    	0x2277,
	"gnap",  	0xE411,
	"gne",   	0x2269,
	"gnE",   	0x2269,
	"gnsim", 	0x22E7,
	"grave", 	0x0060,
	"gsdot", 	0x22D7,
	"gsim",  	0x2273,
	"gt",    	0x003E,
	"Gt",    	0x226B,
	"gvnE",  	0x2269,
	NULL,		0
};

static NameId namesH[]={
	"hairsp",	0x200A,
	"half",  	0x00BD,
	"hamilt",	0x210B,
	"HARDcy",	0x042A,
	"hardcy",	0x044A,
	"harr",  	0x2194,
	"hArr",  	0x21D4,
	"harrw", 	0x21AD,
	"Hcirc", 	0x0124,
	"hcirc", 	0x0125,
	"hearts",	0x2665,
	"hellip",	0x2026,
	"horbar",	0x2015,
	"Hstrok",	0x0126,
	"hstrok",	0x0127,
	"hybull",	0x2043,
	"hyphen",	0x002D,
	NULL,		0
};

static NameId namesI[]={
	"Iacgr", 	0x038A,
	"iacgr", 	0x03AF,
	"Iacute",	0x00CD,
	"iacute",	0x00ED,
	"Icirc", 	0x00CE,
	"icirc", 	0x00EE,
	"Icy",   	0x0418,
	"icy",   	0x0438,
	"idiagr",	0x0390,
	"Idigr", 	0x03AA,
	"idigr", 	0x03CA,
	"Idot",  	0x0130,
	"IEcy",  	0x0415,
	"iecy",  	0x0435,
	"iexcl", 	0x00A1,
	"iff",   	0x21D4,
	"Igr",   	0x0399,
	"igr",   	0x03B9,
	"Igrave",	0x00CC,
	"igrave",	0x00EC,
	"IJlig", 	0x0132,
	"ijlig", 	0x0133,
	"Imacr", 	0x012A,
	"imacr", 	0x012B,
	"image", 	0x2111,
	"incare",	0x2105,
	"infin", 	0x221E,
	"inodot",	0x0131,
	"int",   	0x222B,
	"intcal",	0x22BA,
	"IOcy",  	0x0401,
	"iocy",  	0x0451,
	"Iogon", 	0x012E,
	"iogon", 	0x012F,
	"iota",  	0x03B9,
	"iquest",	0x00BF,
	"isin",  	0x220A,
	"Itilde",	0x0128,
	"itilde",	0x0129,
	"Iukcy", 	0x0406,
	"iukcy", 	0x0456,
	"Iuml",  	0x00CF,
	"iuml",  	0x00EF,
	NULL,		0
};

static NameId namesJ[]={
	"Jcirc", 	0x0134,
	"jcirc", 	0x0135,
	"Jcy",   	0x0419,
	"jcy",   	0x0439,
	"Jsercy",	0x0408,
	"jsercy",	0x0458,
	"Jukcy", 	0x0404,
	"jukcy", 	0x0454,
	NULL,		0
};

static NameId namesK[]={
	"kappa", 	0x03BA,
	"kappav",	0x03F0,
	"Kcedil",	0x0136,
	"kcedil",	0x0137,
	"Kcy",   	0x041A,
	"kcy",   	0x043A,
	"Kgr",   	0x039A,
	"kgr",   	0x03BA,
	"kgreen",	0x0138,
	"KHcy",  	0x0425,
	"khcy",  	0x0445,
	"KHgr",  	0x03A7,
	"khgr",  	0x03C7,
	"KJcy",  	0x040C,
	"kjcy",  	0x045C,
	NULL,		0
};

static NameId namesL[]={
	"lAarr", 	0x21DA,
	"Lacute",	0x0139,
	"lacute",	0x013A,
	"lagran",	0x2112,
	"Lambda",	0x039B,
	"lambda",	0x03BB,
	"lang",  	0x3008,
	"lap",   	0x2272,
	"laquo", 	0x00AB,
	"larr",  	0x2190,
	"Larr",  	0x219E,
	"lArr",  	0x21D0,
	"larr2", 	0x21C7,
	"larrhk",	0x21A9,
	"larrlp",	0x21AB,
	"larrtl",	0x21A2,
	"Lcaron",	0x013D,
	"lcaron",	0x013E,
	"Lcedil",	0x013B,
	"lcedil",	0x013C,
	"lceil", 	0x2308,
	"lcub",  	0x007B,
	"Lcy",   	0x041B,
	"lcy",   	0x043B,
	"ldot",  	0x22D6,
	"ldquo", 	0x201C,
	"ldquor",	0x201E,
	"le",    	0x2264,
	"lE",    	0x2266,
	"leg",   	0x22DA,
	"lEg",   	0x22DA,
	"les",   	0x2264,
	"lfloor",	0x230A,
	"lg",    	0x2276,
	"Lgr",   	0x039B,
	"lgr",   	0x03BB,
	"lhard", 	0x21BD,
	"lharu", 	0x21BC,
	"lhblk", 	0x2584,
	"LJcy",  	0x0409,
	"ljcy",  	0x0459,
	"Ll",    	0x22D8,
	"Lmidot",	0x013F,
	"lmidot",	0x0140,
	"lnap",  	0xE2A2,
	"lne",   	0x2268,
	"lnE",   	0x2268,
	"lnsim", 	0x22E6,
	"lowast",	0x2217,
	"lowbar",	0x005F,
	"loz",   	0x25CA,
	"lozf",  	0x2726,
	"lpar",  	0x0028,
	"lrarr2",	0x21C6,
	"lrhar2",	0x21CB,
	"lsh",   	0x21B0,
	"lsim",  	0x2272,
	"lsqb",  	0x005B,
	"lsquo", 	0x2018,
	"lsquor",	0x201A,
	"Lstrok",	0x0141,
	"lstrok",	0x0142,
	"lt",    	0x003C,
	"Lt",    	0x226A,
	"lthree",	0x22CB,
	"ltimes",	0x22C9,
	"ltri",  	0x25C3,
	"ltrie", 	0x22B4,
	"ltrif", 	0x25C2,
	"lvnE",  	0x2268,
	NULL,		0
};

static NameId namesM[]={
	"macr",  	0x00AF,
	"male",  	0x2642,
	"malt",  	0x2720,
	"map",   	0x21A6,
	"marker",	0x25AE,
	"Mcy",   	0x041C,
	"mcy",   	0x043C,
	"mdash", 	0x2014,
	"Mgr",   	0x039C,
	"mgr",   	0x03BC,
	"micro", 	0x00B5,
	"mid",   	0x2223,
	"middot",	0x00B7,
	"minus", 	0x2212,
	"minusb",	0x229F,
	"mldr",  	0x2026,
	"mnplus",	0x2213,
	"models",	0x22A7,
	"mu",    	0x03BC,
	"mumap", 	0x22B8,
	NULL,		0
};

static NameId namesN[]={
	"nabla", 	0x2207,
	"Nacute",	0x0143,
	"nacute",	0x0144,
	"nap",   	0x2249,
	"napos", 	0x0149,
	"natur", 	0x266E,
//	"nbsp",  	0x00A0,
	"nbsp",  	32,    // make non-breaking space appear as space
	"Ncaron",	0x0147,
	"ncaron",	0x0148,
	"Ncedil",	0x0145,
	"ncedil",	0x0146,
	"ncong", 	0x2247,
	"Ncy",   	0x041D,
	"ncy",   	0x043D,
	"ndash", 	0x2013,
	"ne",    	0x2260,
	"nearr", 	0x2197,
	"nequiv",	0x2262,
	"nexist",	0x2204,
	"nge",   	0x2271,
	"ngE",   	0x2271,
	"nges",  	0x2271,
	"Ngr",   	0x039D,
	"ngr",   	0x03BD,
	"ngt",   	0x226F,
	"nharr", 	0x21AE,
	"nhArr", 	0x21CE,
	"ni",    	0x220D,
	"NJcy",  	0x040A,
	"njcy",  	0x045A,
	"nlarr", 	0x219A,
	"nlArr", 	0x21CD,
	"nldr",  	0x2025,
	"nle",   	0x2270,
	"nlE",   	0x2270,
	"nles",  	0x2270,
	"nlt",   	0x226E,
	"nltri", 	0x22EA,
	"nltrie",	0x22EC,
	"nmid",  	0x2224,
	"not",   	0x00AC,
	"notin", 	0x2209,
	"npar",  	0x2226,
	"npr",   	0x2280,
	"npre",  	0x22E0,
	"nrarr", 	0x219B,
	"nrArr", 	0x21CF,
	"nrtri", 	0x22EB,
	"nrtrie",	0x22ED,
	"nsc",   	0x2281,
	"nsce",  	0x22E1,
	"nsim",  	0x2241,
	"nsime", 	0x2244,
	"nsmid", 	0xE2AA,
	"nspar", 	0x2226,
	"nsub",  	0x2284,
	"nsube", 	0x2288,
	"nsubE", 	0x2288,
	"nsup",  	0x2285,
	"nsupe", 	0x2289,
	"nsupE", 	0x2289,
	"Ntilde",	0x00D1,
	"ntilde",	0x00F1,
	"nu",    	0x03BD,
	"num",   	0x0023,
	"numero",	0x2116,
	"numsp", 	0x2007,
	"nvdash",	0x22AC,
	"nvDash",	0x22AD,
	"nVdash",	0x22AE,
	"nVDash",	0x22AF,
	"nwarr", 	0x2196,
	NULL,		0
};

static NameId namesO[]={
	"Oacgr", 	0x038C,
	"oacgr", 	0x03CC,
	"Oacute",	0x00D3,
	"oacute",	0x00F3,
	"oast",  	0x229B,
	"ocir",  	0x229A,
	"Ocirc", 	0x00D4,
	"ocirc", 	0x00F4,
	"Ocy",   	0x041E,
	"ocy",   	0x043E,
	"odash", 	0x229D,
	"Odblac",	0x0150,
	"odblac",	0x0151,
	"odot",  	0x2299,
	"OElig", 	0x0152,
	"oelig", 	0x0153,
	"ogon",  	0x02DB,
	"Ogr",   	0x039F,
	"ogr",   	0x03BF,
	"Ograve",	0x00D2,
	"ograve",	0x00F2,
	"OHacgr",	0x038F,
	"ohacgr",	0x03CE,
	"OHgr",  	0x03A9,
	"ohgr",  	0x03C9,
	"ohm",   	0x2126,
	"olarr", 	0x21BA,
	"Omacr", 	0x014C,
	"omacr", 	0x014D,
	"Omega", 	0x03A9,
	"omega", 	0x03C9,
	"ominus",	0x2296,
	"oplus", 	0x2295,
	"or",    	0x2228,
	"orarr", 	0x21BB,
	"order", 	0x2134,
	"ordf",  	0x00AA,
	"ordm",  	0x00BA,
	"oS",    	0x24C8,
	"Oslash",	0x00D8,
	"oslash",	0x00F8,
	"osol",  	0x2298,
	"Otilde",	0x00D5,
	"otilde",	0x00F5,
	"otimes",	0x2297,
	"Ouml",  	0x00D6,
	"ouml",  	0x00F6,
	NULL,		0
};

static NameId namesP[]={
	"par",   	0x2225,
	"para",  	0x00B6,
	"part",  	0x2202,
	"Pcy",   	0x041F,
	"pcy",   	0x043F,
	"percnt",	0x0025,
	"period",	0x002E,
	"permil",	0x2030,
	"perp",  	0x22A5,
	"Pgr",   	0x03A0,
	"pgr",   	0x03C0,
	"PHgr",  	0x03A6,
	"phgr",  	0x03C6,
	"Phi",   	0x03A6,
	"phis",  	0x03C6,
	"phiv",  	0x03D5,
	"phmmat",	0x2133,
	"phone", 	0x260E,
	"Pi",    	0x03A0,
	"pi",    	0x03C0,
	"piv",   	0x03D6,
	"planck",	0x210F,
	"plus",  	0x002B,
	"plusb", 	0x229E,
	"plusdo",	0x2214,
	"plusmn",	0x00B1,
	"pound", 	0x00A3,
	"pr",    	0x227A,
	"prap",  	0x227E,
	"pre",   	0x227C,
	"prime", 	0x2032,
	"Prime", 	0x2033,
	"prnap", 	0x22E8,
	"prnE",  	0xE2B3,
	"prnsim",	0x22E8,
	"prod",  	0x220F,
	"prop",  	0x221D,
	"prsim", 	0x227E,
	"PSgr",  	0x03A8,
	"psgr",  	0x03C8,
	"Psi",   	0x03A8,
	"psi",   	0x03C8,
	"puncsp",	0x2008,
	NULL,		0
};

static NameId namesQ[]={
	"quest", 	0x003F,
	"quot",  	0x0022,
	NULL,		0
};

static NameId namesR[]={
	"rAarr", 	0x21DB,
	"Racute",	0x0154,
	"racute",	0x0155,
	"radic", 	0x221A,
	"rang",  	0x3009,
	"raquo", 	0x00BB,
	"rarr",  	0x2192,
	"Rarr",  	0x21A0,
	"rArr",  	0x21D2,
	"rarr2", 	0x21C9,
	"rarrhk",	0x21AA,
	"rarrlp",	0x21AC,
	"rarrtl",	0x21A3,
	"rarrw", 	0x219D,
	"Rcaron",	0x0158,
	"rcaron",	0x0159,
	"Rcedil",	0x0156,
	"rcedil",	0x0157,
	"rceil", 	0x2309,
	"rcub",  	0x007D,
	"Rcy",   	0x0420,
	"rcy",   	0x0440,
	"rdquo", 	0x201D,
	"rdquor",	0x201C,
	"real",  	0x211C,
	"rect",  	0x25AD,
	"reg",   	0x00AE,
	"rfloor",	0x230B,
	"Rgr",   	0x03A1,
	"rgr",   	0x03C1,
	"rhard", 	0x21C1,
	"rharu", 	0x21C0,
	"rho",   	0x03C1,
	"rhov",  	0x03F1,
	"ring",  	0x02DA,
	"rlarr2",	0x21C4,
	"rlhar2",	0x21CC,
	"rpar",  	0x0029,
	"rpargt",	0xE291,
	"rsh",   	0x21B1,
	"rsqb",  	0x005D,
	"rsquo", 	0x2019,
	"rsquor",	0x2018,
	"rthree",	0x22CC,
	"rtimes",	0x22CA,
	"rtri",  	0x25B9,
	"rtrie", 	0x22B5,
	"rtrif", 	0x25B8,
	"rx",    	0x211E,
	NULL,		0
};

static NameId namesS[]={
	"Sacute",	0x015A,
	"sacute",	0x015B,
	"samalg",	0x2210,
	"sbsol", 	0xFE68,
	"sc",    	0x227B,
	"scap",  	0x227F,
	"Scaron",	0x0160,
	"scaron",	0x0161,
	"sccue", 	0x227D,
	"sce",   	0x227D,
	"Scedil",	0x015E,
	"scedil",	0x015F,
	"Scirc", 	0x015C,
	"scirc", 	0x015D,
	"scnap", 	0x22E9,
	"scnE",  	0xE2B5,
	"scnsim",	0x22E9,
	"scsim", 	0x227F,
	"Scy",   	0x0421,
	"scy",   	0x0441,
	"sdot",  	0x22C5,
	"sdotb", 	0x22A1,
	"sect",  	0x00A7,
	"semi",  	0x003B,
	"setmn", 	0x2216,
	"sext",  	0x2736,
	"sfgr",  	0x03C2,
	"sfrown",	0x2322,
	"Sgr",   	0x03A3,
	"sgr",   	0x03C3,
	"sharp", 	0x266F,
	"SHCHcy",	0x0429,
	"shchcy",	0x0449,
	"SHcy",  	0x0428,
	"shcy",  	0x0448,
	"shy",   	0x00AD,
	"Sigma", 	0x03A3,
	"sigma", 	0x03C3,
	"sigmav",	0x03C2,
	"sim",   	0x223C,
	"sime",  	0x2243,
	"smid",  	0xE301,
	"smile", 	0x2323,
	"SOFTcy",	0x042C,
	"softcy",	0x044C,
	"sol",   	0x002F,
	"spades",	0x2660,
	"spar",  	0x2225,
	"sqcap", 	0x2293,
	"sqcup", 	0x2294,
	"sqsub", 	0x228F,
	"sqsube",	0x2291,
	"sqsup", 	0x2290,
	"sqsupe",	0x2292,
	"squ",   	0x25A1,
	"square",	0x25A1,
	"squf",  	0x25AA,
	"ssetmn",	0x2216,
	"ssmile",	0x2323,
	"sstarf",	0x22C6,
	"star",  	0x22C6,
	"starf", 	0x2605,
	"sub",   	0x2282,
	"Sub",   	0x22D0,
	"sube",  	0x2286,
	"subE",  	0x2286,
	"subne", 	0x228A,
	"subnE", 	0x228A,
	"sum",   	0x2211,
	"sung",  	0x2669,
	"sup",   	0x2283,
	"Sup",   	0x22D1,
	"sup1",  	0x00B9,
	"sup2",  	0x00B2,
	"sup3",  	0x00B3,
	"supe",  	0x2287,
	"supE",  	0x2287,
	"supne", 	0x228B,
	"supnE", 	0x228B,
	"szlig", 	0x00DF,
	NULL,		0
};

static NameId namesT[]={
	"target",	0x2316,
	"tau",   	0x03C4,
	"Tcaron",	0x0164,
	"tcaron",	0x0165,
	"Tcedil",	0x0162,
	"tcedil",	0x0163,
	"Tcy",   	0x0422,
	"tcy",   	0x0442,
	"tdot",  	0x20DB,
	"telrec",	0x2315,
	"Tgr",   	0x03A4,
	"tgr",   	0x03C4,
	"there4",	0x2234,
	"Theta", 	0x0398,
	"thetas",	0x03B8,
	"thetav",	0x03D1,
	"THgr",  	0x0398,
	"thgr",  	0x03B8,
	"thinsp",	0x2009,
	"thkap", 	0x2248,
	"thksim",	0x223C,
	"THORN", 	0x00DE,
	"thorn", 	0x00FE,
	"tilde", 	0x02DC,
	"times", 	0x00D7,
	"timesb",	0x22A0,
	"top",   	0x22A4,
	"tprime",	0x2034,
	"trade", 	0x2122,
	"trie",  	0x225C,
	"TScy",  	0x0426,
	"tscy",  	0x0446,
	"TSHcy", 	0x040B,
	"tshcy", 	0x045B,
	"Tstrok",	0x0166,
	"tstrok",	0x0167,
	"twixt", 	0x226C,
	NULL,		0
};

static NameId namesU[]={
	"Uacgr", 	0x038E,
	"uacgr", 	0x03CD,
	"Uacute",	0x00DA,
	"uacute",	0x00FA,
	"uarr",  	0x2191,
	"uArr",  	0x21D1,
	"uarr2", 	0x21C8,
	"Ubrcy", 	0x040E,
	"ubrcy", 	0x045E,
	"Ubreve",	0x016C,
	"ubreve",	0x016D,
	"Ucirc", 	0x00DB,
	"ucirc", 	0x00FB,
	"Ucy",   	0x0423,
	"ucy",   	0x0443,
	"Udblac",	0x0170,
	"udblac",	0x0171,
	"udiagr",	0x03B0,
	"Udigr", 	0x03AB,
	"udigr", 	0x03CB,
	"Ugr",   	0x03A5,
	"ugr",   	0x03C5,
	"Ugrave",	0x00D9,
	"ugrave",	0x00F9,
	"uharl", 	0x21BF,
	"uharr", 	0x21BE,
	"uhblk", 	0x2580,
	"ulcorn",	0x231C,
	"ulcrop",	0x230F,
	"Umacr", 	0x016A,
	"umacr", 	0x016B,
	"uml",   	0x00A8,
	"Uogon", 	0x0172,
	"uogon", 	0x0173,
	"uplus", 	0x228E,
	"upsi",  	0x03C5,
	"Upsi",  	0x03D2,
	"urcorn",	0x231D,
	"urcrop",	0x230E,
	"Uring", 	0x016E,
	"uring", 	0x016F,
	"Utilde",	0x0168,
	"utilde",	0x0169,
	"utri",  	0x25B5,
	"utrif", 	0x25B4,
	"Uuml",  	0x00DC,
	"uuml",  	0x00FC,
	NULL,		0
};

static NameId namesV[]={
	"varr",  	0x2195,
	"vArr",  	0x21D5,
	"Vcy",   	0x0412,
	"vcy",   	0x0432,
	"vdash", 	0x22A2,
	"vDash", 	0x22A8,
	"Vdash", 	0x22A9,
	"veebar",	0x22BB,
	"vellip",	0x22EE,
	"verbar",	0x007C,
	"Verbar",	0x2016,
	"vltri", 	0x22B2,
	"vprime",	0x2032,
	"vprop", 	0x221D,
	"vrtri", 	0x22B3,
	"vsubne",	0x228A,
	"vsubnE",	0xE2B8,
	"vsupne",	0x228B,
	"vsupnE",	0x228B,
	"Vvdash",	0x22AA,
	NULL,		0
};

static NameId namesW[]={
	"Wcirc", 	0x0174,
	"wcirc", 	0x0175,
	"wedgeq",	0x2259,
	"weierp",	0x2118,
	"wreath",	0x2240,
	NULL,		0
};

static NameId namesX[]={
	"xcirc", 	0x25CB,
	"xdtri", 	0x25BD,
	"Xgr",   	0x039E,
	"xgr",   	0x03BE,
	"xharr", 	0x2194,
	"xhArr", 	0x2194,
	"Xi",    	0x039E,
	"xi",    	0x03BE,
	"xlArr", 	0x21D0,
	"xrArr", 	0x21D2,
	"xutri", 	0x25B3,
	NULL,		0
};

static NameId namesY[]={
	"Yacute",	0x00DD,
	"yacute",	0x00FD,
	"YAcy",  	0x042F,
	"yacy",  	0x044F,
	"Ycirc", 	0x0176,
	"ycirc", 	0x0177,
	"Ycy",   	0x042B,
	"ycy",   	0x044B,
	"yen",   	0x00A5,
	"YIcy",  	0x0407,
	"yicy",  	0x0457,
	"YUcy",  	0x042E,
	"yucy",  	0x044E,
	"yuml",  	0x00FF,
	"Yuml",  	0x0178,
	NULL,		0
};

static NameId namesZ[]={
	"Zacute",	0x0179,
	"zacute",	0x017A,
	"Zcaron",	0x017D,
	"zcaron",	0x017E,
	"Zcy",   	0x0417,
	"zcy",   	0x0437,
	"Zdot",  	0x017B,
	"zdot",  	0x017C,
	"zeta",  	0x03B6,
	"Zgr",   	0x0396,
	"zgr",   	0x03B6,
	"ZHcy",  	0x0416,
	"zhcy",  	0x0436,
	NULL, 0
};

// @todo@ order namesTable and names? by frequency
static NameId* namesTable[] = { 
	namesA, namesB, namesC, namesD, namesE, namesF, namesG, namesH, namesI,
	namesJ, namesK, namesL, namesM, namesN, namesO, namesP, namesQ, namesR,
	namesS, namesT, namesU, namesV, namesW, namesX, namesY, namesZ, NULL
};

int HtmlNamedEntity(unsigned char *p, int length)
{
    int tableIndex = tolower(*p) - 'a';
    if (tableIndex >= 0 && tableIndex < 26) {
	NameId* names = namesTable[tableIndex];
	int i;

	for (i = 0; names[i].name; i++){
		if (strncmp(names[i].name, (char *)p, length) == 0){
			return names[i].value;
		}
	}
    }
    error("unrecognized character entity \"%.*s\"", length, p);
    return -1;
}

#else //TODO: Merge Walter's list with Thomas'

static NameId names[] =
{
    // Entities
    "quot",	34,
    "amp",	38,
    "lt",	60,
    "gt",	62,

    "OElig",	338,
    "oelig",	339,
    "Scaron",	352,
    "scaron",	353,
    "Yuml",	376,
    "circ",	710,
    "tilde",	732,
    "ensp",	8194,
    "emsp",	8195,
    "thinsp",	8201,
    "zwnj",	8204,
    "zwj",	8205,
    "lrm",	8206,
    "rlm",	8207,
    "ndash",	8211,
    "mdash",	8212,
    "lsquo",	8216,
    "rsquo",	8217,
    "sbquo",	8218,
    "ldquo",	8220,
    "rdquo",	8221,
    "bdquo",	8222,
    "dagger",	8224,
    "Dagger",	8225,
    "permil",	8240,
    "lsaquo",	8249,
    "rsaquo",	8250,
    "euro",	8364,

    // Latin-1 (ISO-8859-1) Entities
    "nbsp",	160,
    "iexcl",	161,
    "cent",	162,
    "pound",	163,
    "curren",	164,
    "yen",	165,
    "brvbar",	166,
    "sect",	167,
    "uml",	168,
    "copy",	169,
    "ordf",	170,
    "laquo",	171,
    "not",	172,
    "shy",	173,
    "reg",	174,
    "macr",	175,
    "deg",	176,
    "plusmn",	177,
    "sup2",	178,
    "sup3",	179,
    "acute",	180,
    "micro",	181,
    "para",	182,
    "middot",	183,
    "cedil",	184,
    "sup1",	185,
    "ordm",	186,
    "raquo",	187,
    "frac14",	188,
    "frac12",	189,
    "frac34",	190,
    "iquest",	191,
    "Agrave",	192,
    "Aacute",	193,
    "Acirc",	194,
    "Atilde",	195,
    "Auml",	196,
    "Aring",	197,
    "AElig",	198,
    "Ccedil",	199,
    "Egrave",	200,
    "Eacute",	201,
    "Ecirc",	202,
    "Euml",	203,
    "Igrave",	204,
    "Iacute",	205,
    "Icirc",	206,
    "Iuml",	207,
    "ETH",	208,
    "Ntilde",	209,
    "Ograve",	210,
    "Oacute",	211,
    "Ocirc",	212,
    "Otilde",	213,
    "Ouml",	214,
    "times",	215,
    "Oslash",	216,
    "Ugrave",	217,
    "Uacute",	218,
    "Ucirc",	219,
    "Uuml",	220,
    "Yacute",	221,
    "THORN",	222,
    "szlig",	223,
    "agrave",	224,
    "aacute",	225,
    "acirc",	226,
    "atilde",	227,
    "auml",	228,
    "aring",	229,
    "aelig",	230,
    "ccedil",	231,
    "egrave",	232,
    "eacute",	233,
    "ecirc",	234,
    "euml",	235,
    "igrave",	236,
    "iacute",	237,
    "icirc",	238,
    "iuml",	239,
    "eth",	240,
    "ntilde",	241,
    "ograve",	242,
    "oacute",	243,
    "ocirc",	244,
    "otilde",	245,
    "ouml",	246,
    "divide",	247,
    "oslash",	248,
    "ugrave",	249,
    "uacute",	250,
    "ucirc",	251,
    "uuml",	252,
    "yacute",	253,
    "thorn",	254,
    "yuml",	255,

	// Symbols and Greek letter entities
    "fnof",	402,
    "Alpha",	913,
    "Beta",	914,
    "Gamma",	915,
    "Delta",	916,
    "Epsilon",	917,
    "Zeta",	918,
    "Eta",	919,
    "Theta",	920,
    "Iota",	921,
    "Kappa",	922,
    "Lambda",	923,
    "Mu",	924,
    "Nu",	925,
    "Xi",	926,
    "Omicron",	927,
    "Pi",	928,
    "Rho",	929,
    "Sigma",	931,
    "Tau",	932,
    "Upsilon",	933,
    "Phi",	934,
    "Chi",	935,
    "Psi",	936,
    "Omega",	937,
    "alpha",	945,
    "beta",	946,
    "gamma",	947,
    "delta",	948,
    "epsilon",	949,
    "zeta",	950,
    "eta",	951,
    "theta",	952,
    "iota",	953,
    "kappa",	954,
    "lambda",	955,
    "mu",	956,
    "nu",	957,
    "xi",	958,
    "omicron",	959,
    "pi",	960,
    "rho",	961,
    "sigmaf",	962,
    "sigma",	963,
    "tau",	964,
    "upsilon",	965,
    "phi",	966,
    "chi",	967,
    "psi",	968,
    "omega",	969,
    "thetasym",	977,
    "upsih",	978,
    "piv",	982,
    "bull",	8226,
    "hellip",	8230,
    "prime",	8242,
    "Prime",	8243,
    "oline",	8254,
    "frasl",	8260,
    "weierp",	8472,
    "image",	8465,
    "real",	8476,
    "trade",	8482,
    "alefsym",	8501,
    "larr",	8592,
    "uarr",	8593,
    "rarr",	8594,
    "darr",	8595,
    "harr",	8596,
    "crarr",	8629,
    "lArr",	8656,
    "uArr",	8657,
    "rArr",	8658,
    "dArr",	8659,
    "hArr",	8660,
    "forall",	8704,
    "part",	8706,
    "exist",	8707,
    "empty",	8709,
    "nabla",	8711,
    "isin",	8712,
    "notin",	8713,
    "ni",	8715,
    "prod",	8719,
    "sum",	8721,
    "minus",	8722,
    "lowast",	8727,
    "radic",	8730,
    "prop",	8733,
    "infin",	8734,
    "ang",	8736,
    "and",	8743,
    "or",	8744,
    "cap",	8745,
    "cup",	8746,
    "int",	8747,
    "there4",	8756,
    "sim",	8764,
    "cong",	8773,
    "asymp",	8776,
    "ne",	8800,
    "equiv",	8801,
    "le",	8804,
    "ge",	8805,
    "sub",	8834,
    "sup",	8835,
    "nsub",	8836,
    "sube",	8838,
    "supe",	8839,
    "oplus",	8853,
    "otimes",	8855,
    "perp",	8869,
    "sdot",	8901,
    "lceil",	8968,
    "rceil",	8969,
    "lfloor",	8970,
    "rfloor",	8971,
    "lang",	9001,
    "rang",	9002,
    "loz",	9674,
    "spades",	9824,
    "clubs",	9827,
    "hearts",	9829,
    "diams",	9830,
};

int HtmlNamedEntity(unsigned char *p, int length)
{
    int i;

    // BUG: this is a dumb, slow linear search
    for (i = 0; i < sizeof(names) / sizeof(names[0]); i++)
    {
	// Entries are case sensitive
	if (memcmp(names[i].name, (char *)p, length) == 0 &&
	    !names[i].name[length])
	    return names[i].value;
    }
    return -1;
}

#endif