Mercurial > projects > ldc
diff dmd2/entity.c @ 758:f04dde6e882c
Added initial D2 support, D2 frontend and changes to codegen to make things compile.
author | Tomas Lindquist Olsen <tomas.l.olsen@gmail.com> |
---|---|
date | Tue, 11 Nov 2008 01:38:48 +0100 |
parents | |
children | 638d16625da2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dmd2/entity.c Tue Nov 11 01:38:48 2008 +0100 @@ -0,0 +1,1366 @@ + +// Copyright (c) 1999-2008 by Digital Mars +// All Rights Reserved +// written by Walter Bright +// http://www.digitalmars.com +// License for redistribution is by either the Artistic License +// in artistic.txt, or the GNU General Public License in gnu.txt. +// See the included readme.txt for details. + + +#include <string.h> + +/********************************************* + * Convert from named entity to its encoding. + * For reference: + * http://www.htmlhelp.com/reference/html40/entities/ + * http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html + */ + +struct NameId +{ + const char *name; + unsigned short value; +}; + +#if IN_GCC +static NameId namesA[]={ + "Aacgr", 0x0386, + "aacgr", 0x03AC, + "Aacute", 0x00C1, + "aacute", 0x00E1, + "Abreve", 0x0102, + "abreve", 0x0103, + "Acirc", 0x00C2, + "acirc", 0x00E2, + "acute", 0x00B4, + "Acy", 0x0410, + "acy", 0x0430, + "AElig", 0x00C6, + "aelig", 0x00E6, + "Agr", 0x0391, + "agr", 0x03B1, + "Agrave", 0x00C0, + "agrave", 0x00E0, + "aleph", 0x2135, + "alpha", 0x03B1, + "Amacr", 0x0100, + "amacr", 0x0101, + "amalg", 0x2210, + "amp", 0x0026, + "and", 0x2227, + "ang", 0x2220, + "ang90", 0x221F, + "angmsd", 0x2221, + "angsph", 0x2222, + "angst", 0x212B, + "Aogon", 0x0104, + "aogon", 0x0105, + "ap", 0x2248, + "ape", 0x224A, + "apos", 0x0027, + "Aring", 0x00C5, + "aring", 0x00E5, + "ast", 0x002A, + "asymp", 0x224D, + "Atilde", 0x00C3, + "atilde", 0x00E3, + "Auml", 0x00C4, + "auml", 0x00E4, + NULL, 0 +}; + +static NameId namesB[]={ + "barwed", 0x22BC, + "Barwed", 0x2306, + "bcong", 0x224C, + "Bcy", 0x0411, + "bcy", 0x0431, + "becaus", 0x2235, + "bepsi", 0x220D, + "bernou", 0x212C, + "beta", 0x03B2, + "beth", 0x2136, + "Bgr", 0x0392, + "bgr", 0x03B2, + "blank", 0x2423, + "blk12", 0x2592, + "blk14", 0x2591, + "blk34", 0x2593, + "block", 0x2588, + "bottom", 0x22A5, + "bowtie", 0x22C8, + "boxdl", 0x2510, + "boxDL", 0x2555, + "boxdL", 0x2556, + "boxDl", 0x2557, + "boxdr", 0x250C, + "boxDR", 0x2552, + "boxDr", 0x2553, + "boxdR", 0x2554, + "boxh", 0x2500, + "boxH", 0x2550, + "boxhd", 0x252C, + "boxhD", 0x2564, + "boxHD", 0x2565, + "boxHd", 0x2566, + "boxhu", 0x2534, + "boxhU", 0x2567, + "boxHU", 0x2568, + "boxHu", 0x2569, + "boxul", 0x2518, + "boxUL", 0x255B, + "boxUl", 0x255C, + "boxuL", 0x255D, + "boxur", 0x2514, + "boxUR", 0x2558, + "boxuR", 0x2559, + "boxUr", 0x255A, + "boxv", 0x2502, + "boxV", 0x2551, + "boxvh", 0x253C, + "boxvH", 0x256A, + "boxVH", 0x256B, + "boxVh", 0x256C, + "boxvl", 0x2524, + "boxvL", 0x2561, + "boxVL", 0x2562, + "boxVl", 0x2563, + "boxvr", 0x251C, + "boxvR", 0x255E, + "boxVR", 0x255F, + "boxVr", 0x2560, + "bprime", 0x2035, + "breve", 0x02D8, + "brvbar", 0x00A6, + "bsim", 0x223D, + "bsime", 0x22CD, + "bsol", 0x005C, + "bull", 0x2022, + "bump", 0x224E, + "bumpe", 0x224F, + NULL, 0 +}; + +static NameId namesC[]={ + "Cacute", 0x0106, + "cacute", 0x0107, + "cap", 0x2229, + "Cap", 0x22D2, + "caret", 0x2041, + "caron", 0x02C7, + "Ccaron", 0x010C, + "ccaron", 0x010D, + "Ccedil", 0x00C7, + "ccedil", 0x00E7, + "Ccirc", 0x0108, + "ccirc", 0x0109, + "Cdot", 0x010A, + "cdot", 0x010B, + "cedil", 0x00B8, + "cent", 0x00A2, + "CHcy", 0x0427, + "chcy", 0x0447, + "check", 0x2713, + "chi", 0x03C7, + "cir", 0x25CB, + "circ", 0x005E, + "cire", 0x2257, + "clubs", 0x2663, + "colon", 0x003A, + "colone", 0x2254, + "comma", 0x002C, + "commat", 0x0040, + "comp", 0x2201, + "compfn", 0x2218, + "cong", 0x2245, + "conint", 0x222E, + "coprod", 0x2210, + "copy", 0x00A9, + "copysr", 0x2117, + "cross", 0x2717, + "cuepr", 0x22DE, + "cuesc", 0x22DF, + "cularr", 0x21B6, + "cup", 0x222A, + "Cup", 0x22D3, + "cupre", 0x227C, + "curarr", 0x21B7, + "curren", 0x00A4, + "cuvee", 0x22CE, + "cuwed", 0x22CF, + NULL, 0 +}; + +static NameId namesD[]={ + "dagger", 0x2020, + "Dagger", 0x2021, + "daleth", 0x2138, + "darr", 0x2193, + "dArr", 0x21D3, + "darr2", 0x21CA, + "dash", 0x2010, + "dashv", 0x22A3, + "dblac", 0x02DD, + "Dcaron", 0x010E, + "dcaron", 0x010F, + "Dcy", 0x0414, + "dcy", 0x0434, + "deg", 0x00B0, + "Delta", 0x0394, + "delta", 0x03B4, + "Dgr", 0x0394, + "dgr", 0x03B4, + "dharl", 0x21C3, + "dharr", 0x21C2, + "diam", 0x22C4, + "diams", 0x2666, + "die", 0x00A8, + "divide", 0x00F7, + "divonx", 0x22C7, + "DJcy", 0x0402, + "djcy", 0x0452, + "dlarr", 0x2199, + "dlcorn", 0x231E, + "dlcrop", 0x230D, + "dollar", 0x0024, + "Dot", 0x00A8, + "dot", 0x02D9, + "DotDot", 0x20DC, + "drarr", 0x2198, + "drcorn", 0x231F, + "drcrop", 0x230C, + "DScy", 0x0405, + "dscy", 0x0455, + "Dstrok", 0x0110, + "dstrok", 0x0111, + "dtri", 0x25BF, + "dtrif", 0x25BE, + "DZcy", 0x040F, + "dzcy", 0x045F, + NULL, 0 +}; + +static NameId namesE[]={ + "Eacgr", 0x0388, + "eacgr", 0x03AD, + "Eacute", 0x00C9, + "eacute", 0x00E9, + "Ecaron", 0x011A, + "ecaron", 0x011B, + "ecir", 0x2256, + "Ecirc", 0x00CA, + "ecirc", 0x00EA, + "ecolon", 0x2255, + "Ecy", 0x042D, + "ecy", 0x044D, + "Edot", 0x0116, + "edot", 0x0117, + "eDot", 0x2251, + "EEacgr", 0x0389, + "eeacgr", 0x03AE, + "EEgr", 0x0397, + "eegr", 0x03B7, + "efDot", 0x2252, + "Egr", 0x0395, + "egr", 0x03B5, + "Egrave", 0x00C8, + "egrave", 0x00E8, + "egs", 0x22DD, + "ell", 0x2113, + "els", 0x22DC, + "Emacr", 0x0112, + "emacr", 0x0113, + "empty", 0x2205, + "emsp", 0x2003, + "emsp13", 0x2004, + "emsp14", 0x2005, + "ENG", 0x014A, + "eng", 0x014B, + "ensp", 0x2002, + "Eogon", 0x0118, + "eogon", 0x0119, + "epsi", 0x220A, + "epsis", 0x220A, + "epsiv", 0x03B5, + "equals", 0x003D, + "equiv", 0x2261, + "erDot", 0x2253, + "esdot", 0x2250, + "eta", 0x03B7, + "ETH", 0x00D0, + "eth", 0x00F0, + "Euml", 0x00CB, + "euml", 0x00EB, + "excl", 0x0021, + "exist", 0x2203, + NULL, 0 +}; + +static NameId namesF[]={ + "Fcy", 0x0424, + "fcy", 0x0444, + "female", 0x2640, + "ffilig", 0xFB03, + "fflig", 0xFB00, + "ffllig", 0xFB04, + "filig", 0xFB01, + "flat", 0x266D, + "fllig", 0xFB02, + "fnof", 0x0192, + "forall", 0x2200, + "fork", 0x22D4, + "frac12", 0x00BD, + "frac13", 0x2153, + "frac14", 0x00BC, + "frac15", 0x2155, + "frac16", 0x2159, + "frac18", 0x215B, + "frac23", 0x2154, + "frac25", 0x2156, + "frac34", 0x00BE, + "frac35", 0x2157, + "frac38", 0x215C, + "frac45", 0x2158, + "frac56", 0x215A, + "frac58", 0x215D, + "frac78", 0x215E, + "frown", 0x2322, + NULL, 0 +}; + +static NameId namesG[]={ + "gacute", 0x01F5, + "Gamma", 0x0393, + "gamma", 0x03B3, + "gammad", 0x03DC, + "gap", 0x2273, + "Gbreve", 0x011E, + "gbreve", 0x011F, + "Gcedil", 0x0122, + "Gcirc", 0x011C, + "gcirc", 0x011D, + "Gcy", 0x0413, + "gcy", 0x0433, + "Gdot", 0x0120, + "gdot", 0x0121, + "ge", 0x2265, + "gE", 0x2267, + "gel", 0x22DB, + "gEl", 0x22DB, + "ges", 0x2265, + "Gg", 0x22D9, + "Ggr", 0x0393, + "ggr", 0x03B3, + "gimel", 0x2137, + "GJcy", 0x0403, + "gjcy", 0x0453, + "gl", 0x2277, + "gnap", 0xE411, + "gne", 0x2269, + "gnE", 0x2269, + "gnsim", 0x22E7, + "grave", 0x0060, + "gsdot", 0x22D7, + "gsim", 0x2273, + "gt", 0x003E, + "Gt", 0x226B, + "gvnE", 0x2269, + NULL, 0 +}; + +static NameId namesH[]={ + "hairsp", 0x200A, + "half", 0x00BD, + "hamilt", 0x210B, + "HARDcy", 0x042A, + "hardcy", 0x044A, + "harr", 0x2194, + "hArr", 0x21D4, + "harrw", 0x21AD, + "Hcirc", 0x0124, + "hcirc", 0x0125, + "hearts", 0x2665, + "hellip", 0x2026, + "horbar", 0x2015, + "Hstrok", 0x0126, + "hstrok", 0x0127, + "hybull", 0x2043, + "hyphen", 0x002D, + NULL, 0 +}; + +static NameId namesI[]={ + "Iacgr", 0x038A, + "iacgr", 0x03AF, + "Iacute", 0x00CD, + "iacute", 0x00ED, + "Icirc", 0x00CE, + "icirc", 0x00EE, + "Icy", 0x0418, + "icy", 0x0438, + "idiagr", 0x0390, + "Idigr", 0x03AA, + "idigr", 0x03CA, + "Idot", 0x0130, + "IEcy", 0x0415, + "iecy", 0x0435, + "iexcl", 0x00A1, + "iff", 0x21D4, + "Igr", 0x0399, + "igr", 0x03B9, + "Igrave", 0x00CC, + "igrave", 0x00EC, + "IJlig", 0x0132, + "ijlig", 0x0133, + "Imacr", 0x012A, + "imacr", 0x012B, + "image", 0x2111, + "incare", 0x2105, + "infin", 0x221E, + "inodot", 0x0131, + "int", 0x222B, + "intcal", 0x22BA, + "IOcy", 0x0401, + "iocy", 0x0451, + "Iogon", 0x012E, + "iogon", 0x012F, + "iota", 0x03B9, + "iquest", 0x00BF, + "isin", 0x220A, + "Itilde", 0x0128, + "itilde", 0x0129, + "Iukcy", 0x0406, + "iukcy", 0x0456, + "Iuml", 0x00CF, + "iuml", 0x00EF, + NULL, 0 +}; + +static NameId namesJ[]={ + "Jcirc", 0x0134, + "jcirc", 0x0135, + "Jcy", 0x0419, + "jcy", 0x0439, + "Jsercy", 0x0408, + "jsercy", 0x0458, + "Jukcy", 0x0404, + "jukcy", 0x0454, + NULL, 0 +}; + +static NameId namesK[]={ + "kappa", 0x03BA, + "kappav", 0x03F0, + "Kcedil", 0x0136, + "kcedil", 0x0137, + "Kcy", 0x041A, + "kcy", 0x043A, + "Kgr", 0x039A, + "kgr", 0x03BA, + "kgreen", 0x0138, + "KHcy", 0x0425, + "khcy", 0x0445, + "KHgr", 0x03A7, + "khgr", 0x03C7, + "KJcy", 0x040C, + "kjcy", 0x045C, + NULL, 0 +}; + +static NameId namesL[]={ + "lAarr", 0x21DA, + "Lacute", 0x0139, + "lacute", 0x013A, + "lagran", 0x2112, + "Lambda", 0x039B, + "lambda", 0x03BB, + "lang", 0x3008, + "lap", 0x2272, + "laquo", 0x00AB, + "larr", 0x2190, + "Larr", 0x219E, + "lArr", 0x21D0, + "larr2", 0x21C7, + "larrhk", 0x21A9, + "larrlp", 0x21AB, + "larrtl", 0x21A2, + "Lcaron", 0x013D, + "lcaron", 0x013E, + "Lcedil", 0x013B, + "lcedil", 0x013C, + "lceil", 0x2308, + "lcub", 0x007B, + "Lcy", 0x041B, + "lcy", 0x043B, + "ldot", 0x22D6, + "ldquo", 0x201C, + "ldquor", 0x201E, + "le", 0x2264, + "lE", 0x2266, + "leg", 0x22DA, + "lEg", 0x22DA, + "les", 0x2264, + "lfloor", 0x230A, + "lg", 0x2276, + "Lgr", 0x039B, + "lgr", 0x03BB, + "lhard", 0x21BD, + "lharu", 0x21BC, + "lhblk", 0x2584, + "LJcy", 0x0409, + "ljcy", 0x0459, + "Ll", 0x22D8, + "Lmidot", 0x013F, + "lmidot", 0x0140, + "lnap", 0xE2A2, + "lne", 0x2268, + "lnE", 0x2268, + "lnsim", 0x22E6, + "lowast", 0x2217, + "lowbar", 0x005F, + "loz", 0x25CA, + "lozf", 0x2726, + "lpar", 0x0028, + "lrarr2", 0x21C6, + "lrhar2", 0x21CB, + "lsh", 0x21B0, + "lsim", 0x2272, + "lsqb", 0x005B, + "lsquo", 0x2018, + "lsquor", 0x201A, + "Lstrok", 0x0141, + "lstrok", 0x0142, + "lt", 0x003C, + "Lt", 0x226A, + "lthree", 0x22CB, + "ltimes", 0x22C9, + "ltri", 0x25C3, + "ltrie", 0x22B4, + "ltrif", 0x25C2, + "lvnE", 0x2268, + NULL, 0 +}; + +static NameId namesM[]={ + "macr", 0x00AF, + "male", 0x2642, + "malt", 0x2720, + "map", 0x21A6, + "marker", 0x25AE, + "Mcy", 0x041C, + "mcy", 0x043C, + "mdash", 0x2014, + "Mgr", 0x039C, + "mgr", 0x03BC, + "micro", 0x00B5, + "mid", 0x2223, + "middot", 0x00B7, + "minus", 0x2212, + "minusb", 0x229F, + "mldr", 0x2026, + "mnplus", 0x2213, + "models", 0x22A7, + "mu", 0x03BC, + "mumap", 0x22B8, + NULL, 0 +}; + +static NameId namesN[]={ + "nabla", 0x2207, + "Nacute", 0x0143, + "nacute", 0x0144, + "nap", 0x2249, + "napos", 0x0149, + "natur", 0x266E, +// "nbsp", 0x00A0, + "nbsp", 32, // make non-breaking space appear as space + "Ncaron", 0x0147, + "ncaron", 0x0148, + "Ncedil", 0x0145, + "ncedil", 0x0146, + "ncong", 0x2247, + "Ncy", 0x041D, + "ncy", 0x043D, + "ndash", 0x2013, + "ne", 0x2260, + "nearr", 0x2197, + "nequiv", 0x2262, + "nexist", 0x2204, + "nge", 0x2271, + "ngE", 0x2271, + "nges", 0x2271, + "Ngr", 0x039D, + "ngr", 0x03BD, + "ngt", 0x226F, + "nharr", 0x21AE, + "nhArr", 0x21CE, + "ni", 0x220D, + "NJcy", 0x040A, + "njcy", 0x045A, + "nlarr", 0x219A, + "nlArr", 0x21CD, + "nldr", 0x2025, + "nle", 0x2270, + "nlE", 0x2270, + "nles", 0x2270, + "nlt", 0x226E, + "nltri", 0x22EA, + "nltrie", 0x22EC, + "nmid", 0x2224, + "not", 0x00AC, + "notin", 0x2209, + "npar", 0x2226, + "npr", 0x2280, + "npre", 0x22E0, + "nrarr", 0x219B, + "nrArr", 0x21CF, + "nrtri", 0x22EB, + "nrtrie", 0x22ED, + "nsc", 0x2281, + "nsce", 0x22E1, + "nsim", 0x2241, + "nsime", 0x2244, + "nsmid", 0xE2AA, + "nspar", 0x2226, + "nsub", 0x2284, + "nsube", 0x2288, + "nsubE", 0x2288, + "nsup", 0x2285, + "nsupe", 0x2289, + "nsupE", 0x2289, + "Ntilde", 0x00D1, + "ntilde", 0x00F1, + "nu", 0x03BD, + "num", 0x0023, + "numero", 0x2116, + "numsp", 0x2007, + "nvdash", 0x22AC, + "nvDash", 0x22AD, + "nVdash", 0x22AE, + "nVDash", 0x22AF, + "nwarr", 0x2196, + NULL, 0 +}; + +static NameId namesO[]={ + "Oacgr", 0x038C, + "oacgr", 0x03CC, + "Oacute", 0x00D3, + "oacute", 0x00F3, + "oast", 0x229B, + "ocir", 0x229A, + "Ocirc", 0x00D4, + "ocirc", 0x00F4, + "Ocy", 0x041E, + "ocy", 0x043E, + "odash", 0x229D, + "Odblac", 0x0150, + "odblac", 0x0151, + "odot", 0x2299, + "OElig", 0x0152, + "oelig", 0x0153, + "ogon", 0x02DB, + "Ogr", 0x039F, + "ogr", 0x03BF, + "Ograve", 0x00D2, + "ograve", 0x00F2, + "OHacgr", 0x038F, + "ohacgr", 0x03CE, + "OHgr", 0x03A9, + "ohgr", 0x03C9, + "ohm", 0x2126, + "olarr", 0x21BA, + "Omacr", 0x014C, + "omacr", 0x014D, + "Omega", 0x03A9, + "omega", 0x03C9, + "ominus", 0x2296, + "oplus", 0x2295, + "or", 0x2228, + "orarr", 0x21BB, + "order", 0x2134, + "ordf", 0x00AA, + "ordm", 0x00BA, + "oS", 0x24C8, + "Oslash", 0x00D8, + "oslash", 0x00F8, + "osol", 0x2298, + "Otilde", 0x00D5, + "otilde", 0x00F5, + "otimes", 0x2297, + "Ouml", 0x00D6, + "ouml", 0x00F6, + NULL, 0 +}; + +static NameId namesP[]={ + "par", 0x2225, + "para", 0x00B6, + "part", 0x2202, + "Pcy", 0x041F, + "pcy", 0x043F, + "percnt", 0x0025, + "period", 0x002E, + "permil", 0x2030, + "perp", 0x22A5, + "Pgr", 0x03A0, + "pgr", 0x03C0, + "PHgr", 0x03A6, + "phgr", 0x03C6, + "Phi", 0x03A6, + "phis", 0x03C6, + "phiv", 0x03D5, + "phmmat", 0x2133, + "phone", 0x260E, + "Pi", 0x03A0, + "pi", 0x03C0, + "piv", 0x03D6, + "planck", 0x210F, + "plus", 0x002B, + "plusb", 0x229E, + "plusdo", 0x2214, + "plusmn", 0x00B1, + "pound", 0x00A3, + "pr", 0x227A, + "prap", 0x227E, + "pre", 0x227C, + "prime", 0x2032, + "Prime", 0x2033, + "prnap", 0x22E8, + "prnE", 0xE2B3, + "prnsim", 0x22E8, + "prod", 0x220F, + "prop", 0x221D, + "prsim", 0x227E, + "PSgr", 0x03A8, + "psgr", 0x03C8, + "Psi", 0x03A8, + "psi", 0x03C8, + "puncsp", 0x2008, + NULL, 0 +}; + +static NameId namesQ[]={ + "quest", 0x003F, + "quot", 0x0022, + NULL, 0 +}; + +static NameId namesR[]={ + "rAarr", 0x21DB, + "Racute", 0x0154, + "racute", 0x0155, + "radic", 0x221A, + "rang", 0x3009, + "raquo", 0x00BB, + "rarr", 0x2192, + "Rarr", 0x21A0, + "rArr", 0x21D2, + "rarr2", 0x21C9, + "rarrhk", 0x21AA, + "rarrlp", 0x21AC, + "rarrtl", 0x21A3, + "rarrw", 0x219D, + "Rcaron", 0x0158, + "rcaron", 0x0159, + "Rcedil", 0x0156, + "rcedil", 0x0157, + "rceil", 0x2309, + "rcub", 0x007D, + "Rcy", 0x0420, + "rcy", 0x0440, + "rdquo", 0x201D, + "rdquor", 0x201C, + "real", 0x211C, + "rect", 0x25AD, + "reg", 0x00AE, + "rfloor", 0x230B, + "Rgr", 0x03A1, + "rgr", 0x03C1, + "rhard", 0x21C1, + "rharu", 0x21C0, + "rho", 0x03C1, + "rhov", 0x03F1, + "ring", 0x02DA, + "rlarr2", 0x21C4, + "rlhar2", 0x21CC, + "rpar", 0x0029, + "rpargt", 0xE291, + "rsh", 0x21B1, + "rsqb", 0x005D, + "rsquo", 0x2019, + "rsquor", 0x2018, + "rthree", 0x22CC, + "rtimes", 0x22CA, + "rtri", 0x25B9, + "rtrie", 0x22B5, + "rtrif", 0x25B8, + "rx", 0x211E, + NULL, 0 +}; + +static NameId namesS[]={ + "Sacute", 0x015A, + "sacute", 0x015B, + "samalg", 0x2210, + "sbsol", 0xFE68, + "sc", 0x227B, + "scap", 0x227F, + "Scaron", 0x0160, + "scaron", 0x0161, + "sccue", 0x227D, + "sce", 0x227D, + "Scedil", 0x015E, + "scedil", 0x015F, + "Scirc", 0x015C, + "scirc", 0x015D, + "scnap", 0x22E9, + "scnE", 0xE2B5, + "scnsim", 0x22E9, + "scsim", 0x227F, + "Scy", 0x0421, + "scy", 0x0441, + "sdot", 0x22C5, + "sdotb", 0x22A1, + "sect", 0x00A7, + "semi", 0x003B, + "setmn", 0x2216, + "sext", 0x2736, + "sfgr", 0x03C2, + "sfrown", 0x2322, + "Sgr", 0x03A3, + "sgr", 0x03C3, + "sharp", 0x266F, + "SHCHcy", 0x0429, + "shchcy", 0x0449, + "SHcy", 0x0428, + "shcy", 0x0448, + "shy", 0x00AD, + "Sigma", 0x03A3, + "sigma", 0x03C3, + "sigmav", 0x03C2, + "sim", 0x223C, + "sime", 0x2243, + "smid", 0xE301, + "smile", 0x2323, + "SOFTcy", 0x042C, + "softcy", 0x044C, + "sol", 0x002F, + "spades", 0x2660, + "spar", 0x2225, + "sqcap", 0x2293, + "sqcup", 0x2294, + "sqsub", 0x228F, + "sqsube", 0x2291, + "sqsup", 0x2290, + "sqsupe", 0x2292, + "squ", 0x25A1, + "square", 0x25A1, + "squf", 0x25AA, + "ssetmn", 0x2216, + "ssmile", 0x2323, + "sstarf", 0x22C6, + "star", 0x22C6, + "starf", 0x2605, + "sub", 0x2282, + "Sub", 0x22D0, + "sube", 0x2286, + "subE", 0x2286, + "subne", 0x228A, + "subnE", 0x228A, + "sum", 0x2211, + "sung", 0x2669, + "sup", 0x2283, + "Sup", 0x22D1, + "sup1", 0x00B9, + "sup2", 0x00B2, + "sup3", 0x00B3, + "supe", 0x2287, + "supE", 0x2287, + "supne", 0x228B, + "supnE", 0x228B, + "szlig", 0x00DF, + NULL, 0 +}; + +static NameId namesT[]={ + "target", 0x2316, + "tau", 0x03C4, + "Tcaron", 0x0164, + "tcaron", 0x0165, + "Tcedil", 0x0162, + "tcedil", 0x0163, + "Tcy", 0x0422, + "tcy", 0x0442, + "tdot", 0x20DB, + "telrec", 0x2315, + "Tgr", 0x03A4, + "tgr", 0x03C4, + "there4", 0x2234, + "Theta", 0x0398, + "thetas", 0x03B8, + "thetav", 0x03D1, + "THgr", 0x0398, + "thgr", 0x03B8, + "thinsp", 0x2009, + "thkap", 0x2248, + "thksim", 0x223C, + "THORN", 0x00DE, + "thorn", 0x00FE, + "tilde", 0x02DC, + "times", 0x00D7, + "timesb", 0x22A0, + "top", 0x22A4, + "tprime", 0x2034, + "trade", 0x2122, + "trie", 0x225C, + "TScy", 0x0426, + "tscy", 0x0446, + "TSHcy", 0x040B, + "tshcy", 0x045B, + "Tstrok", 0x0166, + "tstrok", 0x0167, + "twixt", 0x226C, + NULL, 0 +}; + +static NameId namesU[]={ + "Uacgr", 0x038E, + "uacgr", 0x03CD, + "Uacute", 0x00DA, + "uacute", 0x00FA, + "uarr", 0x2191, + "uArr", 0x21D1, + "uarr2", 0x21C8, + "Ubrcy", 0x040E, + "ubrcy", 0x045E, + "Ubreve", 0x016C, + "ubreve", 0x016D, + "Ucirc", 0x00DB, + "ucirc", 0x00FB, + "Ucy", 0x0423, + "ucy", 0x0443, + "Udblac", 0x0170, + "udblac", 0x0171, + "udiagr", 0x03B0, + "Udigr", 0x03AB, + "udigr", 0x03CB, + "Ugr", 0x03A5, + "ugr", 0x03C5, + "Ugrave", 0x00D9, + "ugrave", 0x00F9, + "uharl", 0x21BF, + "uharr", 0x21BE, + "uhblk", 0x2580, + "ulcorn", 0x231C, + "ulcrop", 0x230F, + "Umacr", 0x016A, + "umacr", 0x016B, + "uml", 0x00A8, + "Uogon", 0x0172, + "uogon", 0x0173, + "uplus", 0x228E, + "upsi", 0x03C5, + "Upsi", 0x03D2, + "urcorn", 0x231D, + "urcrop", 0x230E, + "Uring", 0x016E, + "uring", 0x016F, + "Utilde", 0x0168, + "utilde", 0x0169, + "utri", 0x25B5, + "utrif", 0x25B4, + "Uuml", 0x00DC, + "uuml", 0x00FC, + NULL, 0 +}; + +static NameId namesV[]={ + "varr", 0x2195, + "vArr", 0x21D5, + "Vcy", 0x0412, + "vcy", 0x0432, + "vdash", 0x22A2, + "vDash", 0x22A8, + "Vdash", 0x22A9, + "veebar", 0x22BB, + "vellip", 0x22EE, + "verbar", 0x007C, + "Verbar", 0x2016, + "vltri", 0x22B2, + "vprime", 0x2032, + "vprop", 0x221D, + "vrtri", 0x22B3, + "vsubne", 0x228A, + "vsubnE", 0xE2B8, + "vsupne", 0x228B, + "vsupnE", 0x228B, + "Vvdash", 0x22AA, + NULL, 0 +}; + +static NameId namesW[]={ + "Wcirc", 0x0174, + "wcirc", 0x0175, + "wedgeq", 0x2259, + "weierp", 0x2118, + "wreath", 0x2240, + NULL, 0 +}; + +static NameId namesX[]={ + "xcirc", 0x25CB, + "xdtri", 0x25BD, + "Xgr", 0x039E, + "xgr", 0x03BE, + "xharr", 0x2194, + "xhArr", 0x2194, + "Xi", 0x039E, + "xi", 0x03BE, + "xlArr", 0x21D0, + "xrArr", 0x21D2, + "xutri", 0x25B3, + NULL, 0 +}; + +static NameId namesY[]={ + "Yacute", 0x00DD, + "yacute", 0x00FD, + "YAcy", 0x042F, + "yacy", 0x044F, + "Ycirc", 0x0176, + "ycirc", 0x0177, + "Ycy", 0x042B, + "ycy", 0x044B, + "yen", 0x00A5, + "YIcy", 0x0407, + "yicy", 0x0457, + "YUcy", 0x042E, + "yucy", 0x044E, + "yuml", 0x00FF, + "Yuml", 0x0178, + NULL, 0 +}; + +static NameId namesZ[]={ + "Zacute", 0x0179, + "zacute", 0x017A, + "Zcaron", 0x017D, + "zcaron", 0x017E, + "Zcy", 0x0417, + "zcy", 0x0437, + "Zdot", 0x017B, + "zdot", 0x017C, + "zeta", 0x03B6, + "Zgr", 0x0396, + "zgr", 0x03B6, + "ZHcy", 0x0416, + "zhcy", 0x0436, + NULL, 0 +}; + +// @todo@ order namesTable and names? by frequency +static NameId* namesTable[] = { + namesA, namesB, namesC, namesD, namesE, namesF, namesG, namesH, namesI, + namesJ, namesK, namesL, namesM, namesN, namesO, namesP, namesQ, namesR, + namesS, namesT, namesU, namesV, namesW, namesX, namesY, namesZ, NULL +}; + +int HtmlNamedEntity(unsigned char *p, int length) +{ + int tableIndex = tolower(*p) - 'a'; + if (tableIndex >= 0 && tableIndex < 26) { + NameId* names = namesTable[tableIndex]; + int i; + + for (i = 0; names[i].name; i++){ + if (strncmp(names[i].name, (char *)p, length) == 0){ + return names[i].value; + } + } + } + error("unrecognized character entity \"%.*s\"", length, p); + return -1; +} + +#else //TODO: Merge Walter's list with Thomas' + +static NameId names[] = +{ + // Entities + "quot", 34, + "amp", 38, + "lt", 60, + "gt", 62, + + "OElig", 338, + "oelig", 339, + "Scaron", 352, + "scaron", 353, + "Yuml", 376, + "circ", 710, + "tilde", 732, + "ensp", 8194, + "emsp", 8195, + "thinsp", 8201, + "zwnj", 8204, + "zwj", 8205, + "lrm", 8206, + "rlm", 8207, + "ndash", 8211, + "mdash", 8212, + "lsquo", 8216, + "rsquo", 8217, + "sbquo", 8218, + "ldquo", 8220, + "rdquo", 8221, + "bdquo", 8222, + "dagger", 8224, + "Dagger", 8225, + "permil", 8240, + "lsaquo", 8249, + "rsaquo", 8250, + "euro", 8364, + + // Latin-1 (ISO-8859-1) Entities + "nbsp", 160, + "iexcl", 161, + "cent", 162, + "pound", 163, + "curren", 164, + "yen", 165, + "brvbar", 166, + "sect", 167, + "uml", 168, + "copy", 169, + "ordf", 170, + "laquo", 171, + "not", 172, + "shy", 173, + "reg", 174, + "macr", 175, + "deg", 176, + "plusmn", 177, + "sup2", 178, + "sup3", 179, + "acute", 180, + "micro", 181, + "para", 182, + "middot", 183, + "cedil", 184, + "sup1", 185, + "ordm", 186, + "raquo", 187, + "frac14", 188, + "frac12", 189, + "frac34", 190, + "iquest", 191, + "Agrave", 192, + "Aacute", 193, + "Acirc", 194, + "Atilde", 195, + "Auml", 196, + "Aring", 197, + "AElig", 198, + "Ccedil", 199, + "Egrave", 200, + "Eacute", 201, + "Ecirc", 202, + "Euml", 203, + "Igrave", 204, + "Iacute", 205, + "Icirc", 206, + "Iuml", 207, + "ETH", 208, + "Ntilde", 209, + "Ograve", 210, + "Oacute", 211, + "Ocirc", 212, + "Otilde", 213, + "Ouml", 214, + "times", 215, + "Oslash", 216, + "Ugrave", 217, + "Uacute", 218, + "Ucirc", 219, + "Uuml", 220, + "Yacute", 221, + "THORN", 222, + "szlig", 223, + "agrave", 224, + "aacute", 225, + "acirc", 226, + "atilde", 227, + "auml", 228, + "aring", 229, + "aelig", 230, + "ccedil", 231, + "egrave", 232, + "eacute", 233, + "ecirc", 234, + "euml", 235, + "igrave", 236, + "iacute", 237, + "icirc", 238, + "iuml", 239, + "eth", 240, + "ntilde", 241, + "ograve", 242, + "oacute", 243, + "ocirc", 244, + "otilde", 245, + "ouml", 246, + "divide", 247, + "oslash", 248, + "ugrave", 249, + "uacute", 250, + "ucirc", 251, + "uuml", 252, + "yacute", 253, + "thorn", 254, + "yuml", 255, + + // Symbols and Greek letter entities + "fnof", 402, + "Alpha", 913, + "Beta", 914, + "Gamma", 915, + "Delta", 916, + "Epsilon", 917, + "Zeta", 918, + "Eta", 919, + "Theta", 920, + "Iota", 921, + "Kappa", 922, + "Lambda", 923, + "Mu", 924, + "Nu", 925, + "Xi", 926, + "Omicron", 927, + "Pi", 928, + "Rho", 929, + "Sigma", 931, + "Tau", 932, + "Upsilon", 933, + "Phi", 934, + "Chi", 935, + "Psi", 936, + "Omega", 937, + "alpha", 945, + "beta", 946, + "gamma", 947, + "delta", 948, + "epsilon", 949, + "zeta", 950, + "eta", 951, + "theta", 952, + "iota", 953, + "kappa", 954, + "lambda", 955, + "mu", 956, + "nu", 957, + "xi", 958, + "omicron", 959, + "pi", 960, + "rho", 961, + "sigmaf", 962, + "sigma", 963, + "tau", 964, + "upsilon", 965, + "phi", 966, + "chi", 967, + "psi", 968, + "omega", 969, + "thetasym", 977, + "upsih", 978, + "piv", 982, + "bull", 8226, + "hellip", 8230, + "prime", 8242, + "Prime", 8243, + "oline", 8254, + "frasl", 8260, + "weierp", 8472, + "image", 8465, + "real", 8476, + "trade", 8482, + "alefsym", 8501, + "larr", 8592, + "uarr", 8593, + "rarr", 8594, + "darr", 8595, + "harr", 8596, + "crarr", 8629, + "lArr", 8656, + "uArr", 8657, + "rArr", 8658, + "dArr", 8659, + "hArr", 8660, + "forall", 8704, + "part", 8706, + "exist", 8707, + "empty", 8709, + "nabla", 8711, + "isin", 8712, + "notin", 8713, + "ni", 8715, + "prod", 8719, + "sum", 8721, + "minus", 8722, + "lowast", 8727, + "radic", 8730, + "prop", 8733, + "infin", 8734, + "ang", 8736, + "and", 8743, + "or", 8744, + "cap", 8745, + "cup", 8746, + "int", 8747, + "there4", 8756, + "sim", 8764, + "cong", 8773, + "asymp", 8776, + "ne", 8800, + "equiv", 8801, + "le", 8804, + "ge", 8805, + "sub", 8834, + "sup", 8835, + "nsub", 8836, + "sube", 8838, + "supe", 8839, + "oplus", 8853, + "otimes", 8855, + "perp", 8869, + "sdot", 8901, + "lceil", 8968, + "rceil", 8969, + "lfloor", 8970, + "rfloor", 8971, + "lang", 9001, + "rang", 9002, + "loz", 9674, + "spades", 9824, + "clubs", 9827, + "hearts", 9829, + "diams", 9830, +}; + +int HtmlNamedEntity(unsigned char *p, int length) +{ + int i; + + // BUG: this is a dumb, slow linear search + for (i = 0; i < sizeof(names) / sizeof(names[0]); i++) + { + // Entries are case sensitive + if (memcmp(names[i].name, (char *)p, length) == 0 && + !names[i].name[length]) + return names[i].value; + } + return -1; +} + +#endif