Mercurial > projects > ldc
view dmd/entity.c @ 986:a8cb25d478c4
Use LLVM-style command line (instead of DMD-style)
Note: For a backward compatible interface, use the new bin/ldmd script. It
supports all old options while passing on anything it doesn't recognize.
Some changes caused by this:
* -debug and -version are now -d-debug and -d-version due to a conflict with
standard LLVM options.
* All "flag" options now allow an optional =true/=1/=false/=0 suffix.
* Some "hidden debug switches" starting with "--" were renamed because LLVM
doesn't care about the number of dashes, so they were conflicting with other
options (such as -c).
The new versions start with "-hidden-debug-" instead of "--"
* --help works, but has a non-zero exit code. This breaks some Tango scripts
which use it to test for compiler existence. See tango.patch.
Some changes not (directly) caused by this;
* (-enable/-disable)-FOO options are now available for pre- and postconditions.
* -march is used instead of -m (like other LLVM programs), but -m is an alias
for it.
* -defaultlib, -debuglib, -d-debug and -d-version allow comma-separated values.
The effect should be identical to specifying the same option multiple times.
I decided against allowing these for some other options because paths might
contain commas on some systems.
* -fPIC is removed in favor of the standard LLVM option -relocation-model=pic
Bug:
* If -run is specified as the last argument in DFLAGS, no error is generated.
(Not very serious IMHO)
author | Frits van Bommel <fvbommel wxs.nl> |
---|---|
date | Wed, 25 Feb 2009 17:34:51 +0100 |
parents | c53b6e3fe49a |
children | b30fe7e1dbb9 |
line wrap: on
line source
// Copyright (c) 1999-2006 by Digital Mars // All Rights Reserved // written by Walter Bright // http://www.digitalmars.com // License for redistribution is by either the Artistic License // in artistic.txt, or the GNU General Public License in gnu.txt. // See the included readme.txt for details. #include <string.h> /********************************************* * Convert from named entity to its encoding. * For reference: * http://www.htmlhelp.com/reference/html40/entities/ * http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html */ struct NameId { char *name; unsigned short value; }; #if IN_GCC static NameId namesA[]={ "Aacgr", 0x0386, "aacgr", 0x03AC, "Aacute", 0x00C1, "aacute", 0x00E1, "Abreve", 0x0102, "abreve", 0x0103, "Acirc", 0x00C2, "acirc", 0x00E2, "acute", 0x00B4, "Acy", 0x0410, "acy", 0x0430, "AElig", 0x00C6, "aelig", 0x00E6, "Agr", 0x0391, "agr", 0x03B1, "Agrave", 0x00C0, "agrave", 0x00E0, "aleph", 0x2135, "alpha", 0x03B1, "Amacr", 0x0100, "amacr", 0x0101, "amalg", 0x2210, "amp", 0x0026, "and", 0x2227, "ang", 0x2220, "ang90", 0x221F, "angmsd", 0x2221, "angsph", 0x2222, "angst", 0x212B, "Aogon", 0x0104, "aogon", 0x0105, "ap", 0x2248, "ape", 0x224A, "apos", 0x0027, "Aring", 0x00C5, "aring", 0x00E5, "ast", 0x002A, "asymp", 0x224D, "Atilde", 0x00C3, "atilde", 0x00E3, "Auml", 0x00C4, "auml", 0x00E4, NULL, 0 }; static NameId namesB[]={ "barwed", 0x22BC, "Barwed", 0x2306, "bcong", 0x224C, "Bcy", 0x0411, "bcy", 0x0431, "becaus", 0x2235, "bepsi", 0x220D, "bernou", 0x212C, "beta", 0x03B2, "beth", 0x2136, "Bgr", 0x0392, "bgr", 0x03B2, "blank", 0x2423, "blk12", 0x2592, "blk14", 0x2591, "blk34", 0x2593, "block", 0x2588, "bottom", 0x22A5, "bowtie", 0x22C8, "boxdl", 0x2510, "boxDL", 0x2555, "boxdL", 0x2556, "boxDl", 0x2557, "boxdr", 0x250C, "boxDR", 0x2552, "boxDr", 0x2553, "boxdR", 0x2554, "boxh", 0x2500, "boxH", 0x2550, "boxhd", 0x252C, "boxhD", 0x2564, "boxHD", 0x2565, "boxHd", 0x2566, "boxhu", 0x2534, "boxhU", 0x2567, "boxHU", 0x2568, "boxHu", 0x2569, "boxul", 0x2518, "boxUL", 0x255B, "boxUl", 0x255C, "boxuL", 0x255D, "boxur", 0x2514, "boxUR", 0x2558, "boxuR", 0x2559, "boxUr", 0x255A, "boxv", 0x2502, "boxV", 0x2551, "boxvh", 0x253C, "boxvH", 0x256A, "boxVH", 0x256B, "boxVh", 0x256C, "boxvl", 0x2524, "boxvL", 0x2561, "boxVL", 0x2562, "boxVl", 0x2563, "boxvr", 0x251C, "boxvR", 0x255E, "boxVR", 0x255F, "boxVr", 0x2560, "bprime", 0x2035, "breve", 0x02D8, "brvbar", 0x00A6, "bsim", 0x223D, "bsime", 0x22CD, "bsol", 0x005C, "bull", 0x2022, "bump", 0x224E, "bumpe", 0x224F, NULL, 0 }; static NameId namesC[]={ "Cacute", 0x0106, "cacute", 0x0107, "cap", 0x2229, "Cap", 0x22D2, "caret", 0x2041, "caron", 0x02C7, "Ccaron", 0x010C, "ccaron", 0x010D, "Ccedil", 0x00C7, "ccedil", 0x00E7, "Ccirc", 0x0108, "ccirc", 0x0109, "Cdot", 0x010A, "cdot", 0x010B, "cedil", 0x00B8, "cent", 0x00A2, "CHcy", 0x0427, "chcy", 0x0447, "check", 0x2713, "chi", 0x03C7, "cir", 0x25CB, "circ", 0x005E, "cire", 0x2257, "clubs", 0x2663, "colon", 0x003A, "colone", 0x2254, "comma", 0x002C, "commat", 0x0040, "comp", 0x2201, "compfn", 0x2218, "cong", 0x2245, "conint", 0x222E, "coprod", 0x2210, "copy", 0x00A9, "copysr", 0x2117, "cross", 0x2717, "cuepr", 0x22DE, "cuesc", 0x22DF, "cularr", 0x21B6, "cup", 0x222A, "Cup", 0x22D3, "cupre", 0x227C, "curarr", 0x21B7, "curren", 0x00A4, "cuvee", 0x22CE, "cuwed", 0x22CF, NULL, 0 }; static NameId namesD[]={ "dagger", 0x2020, "Dagger", 0x2021, "daleth", 0x2138, "darr", 0x2193, "dArr", 0x21D3, "darr2", 0x21CA, "dash", 0x2010, "dashv", 0x22A3, "dblac", 0x02DD, "Dcaron", 0x010E, "dcaron", 0x010F, "Dcy", 0x0414, "dcy", 0x0434, "deg", 0x00B0, "Delta", 0x0394, "delta", 0x03B4, "Dgr", 0x0394, "dgr", 0x03B4, "dharl", 0x21C3, "dharr", 0x21C2, "diam", 0x22C4, "diams", 0x2666, "die", 0x00A8, "divide", 0x00F7, "divonx", 0x22C7, "DJcy", 0x0402, "djcy", 0x0452, "dlarr", 0x2199, "dlcorn", 0x231E, "dlcrop", 0x230D, "dollar", 0x0024, "Dot", 0x00A8, "dot", 0x02D9, "DotDot", 0x20DC, "drarr", 0x2198, "drcorn", 0x231F, "drcrop", 0x230C, "DScy", 0x0405, "dscy", 0x0455, "Dstrok", 0x0110, "dstrok", 0x0111, "dtri", 0x25BF, "dtrif", 0x25BE, "DZcy", 0x040F, "dzcy", 0x045F, NULL, 0 }; static NameId namesE[]={ "Eacgr", 0x0388, "eacgr", 0x03AD, "Eacute", 0x00C9, "eacute", 0x00E9, "Ecaron", 0x011A, "ecaron", 0x011B, "ecir", 0x2256, "Ecirc", 0x00CA, "ecirc", 0x00EA, "ecolon", 0x2255, "Ecy", 0x042D, "ecy", 0x044D, "Edot", 0x0116, "edot", 0x0117, "eDot", 0x2251, "EEacgr", 0x0389, "eeacgr", 0x03AE, "EEgr", 0x0397, "eegr", 0x03B7, "efDot", 0x2252, "Egr", 0x0395, "egr", 0x03B5, "Egrave", 0x00C8, "egrave", 0x00E8, "egs", 0x22DD, "ell", 0x2113, "els", 0x22DC, "Emacr", 0x0112, "emacr", 0x0113, "empty", 0x2205, "emsp", 0x2003, "emsp13", 0x2004, "emsp14", 0x2005, "ENG", 0x014A, "eng", 0x014B, "ensp", 0x2002, "Eogon", 0x0118, "eogon", 0x0119, "epsi", 0x220A, "epsis", 0x220A, "epsiv", 0x03B5, "equals", 0x003D, "equiv", 0x2261, "erDot", 0x2253, "esdot", 0x2250, "eta", 0x03B7, "ETH", 0x00D0, "eth", 0x00F0, "Euml", 0x00CB, "euml", 0x00EB, "excl", 0x0021, "exist", 0x2203, NULL, 0 }; static NameId namesF[]={ "Fcy", 0x0424, "fcy", 0x0444, "female", 0x2640, "ffilig", 0xFB03, "fflig", 0xFB00, "ffllig", 0xFB04, "filig", 0xFB01, "flat", 0x266D, "fllig", 0xFB02, "fnof", 0x0192, "forall", 0x2200, "fork", 0x22D4, "frac12", 0x00BD, "frac13", 0x2153, "frac14", 0x00BC, "frac15", 0x2155, "frac16", 0x2159, "frac18", 0x215B, "frac23", 0x2154, "frac25", 0x2156, "frac34", 0x00BE, "frac35", 0x2157, "frac38", 0x215C, "frac45", 0x2158, "frac56", 0x215A, "frac58", 0x215D, "frac78", 0x215E, "frown", 0x2322, NULL, 0 }; static NameId namesG[]={ "gacute", 0x01F5, "Gamma", 0x0393, "gamma", 0x03B3, "gammad", 0x03DC, "gap", 0x2273, "Gbreve", 0x011E, "gbreve", 0x011F, "Gcedil", 0x0122, "Gcirc", 0x011C, "gcirc", 0x011D, "Gcy", 0x0413, "gcy", 0x0433, "Gdot", 0x0120, "gdot", 0x0121, "ge", 0x2265, "gE", 0x2267, "gel", 0x22DB, "gEl", 0x22DB, "ges", 0x2265, "Gg", 0x22D9, "Ggr", 0x0393, "ggr", 0x03B3, "gimel", 0x2137, "GJcy", 0x0403, "gjcy", 0x0453, "gl", 0x2277, "gnap", 0xE411, "gne", 0x2269, "gnE", 0x2269, "gnsim", 0x22E7, "grave", 0x0060, "gsdot", 0x22D7, "gsim", 0x2273, "gt", 0x003E, "Gt", 0x226B, "gvnE", 0x2269, NULL, 0 }; static NameId namesH[]={ "hairsp", 0x200A, "half", 0x00BD, "hamilt", 0x210B, "HARDcy", 0x042A, "hardcy", 0x044A, "harr", 0x2194, "hArr", 0x21D4, "harrw", 0x21AD, "Hcirc", 0x0124, "hcirc", 0x0125, "hearts", 0x2665, "hellip", 0x2026, "horbar", 0x2015, "Hstrok", 0x0126, "hstrok", 0x0127, "hybull", 0x2043, "hyphen", 0x002D, NULL, 0 }; static NameId namesI[]={ "Iacgr", 0x038A, "iacgr", 0x03AF, "Iacute", 0x00CD, "iacute", 0x00ED, "Icirc", 0x00CE, "icirc", 0x00EE, "Icy", 0x0418, "icy", 0x0438, "idiagr", 0x0390, "Idigr", 0x03AA, "idigr", 0x03CA, "Idot", 0x0130, "IEcy", 0x0415, "iecy", 0x0435, "iexcl", 0x00A1, "iff", 0x21D4, "Igr", 0x0399, "igr", 0x03B9, "Igrave", 0x00CC, "igrave", 0x00EC, "IJlig", 0x0132, "ijlig", 0x0133, "Imacr", 0x012A, "imacr", 0x012B, "image", 0x2111, "incare", 0x2105, "infin", 0x221E, "inodot", 0x0131, "int", 0x222B, "intcal", 0x22BA, "IOcy", 0x0401, "iocy", 0x0451, "Iogon", 0x012E, "iogon", 0x012F, "iota", 0x03B9, "iquest", 0x00BF, "isin", 0x220A, "Itilde", 0x0128, "itilde", 0x0129, "Iukcy", 0x0406, "iukcy", 0x0456, "Iuml", 0x00CF, "iuml", 0x00EF, NULL, 0 }; static NameId namesJ[]={ "Jcirc", 0x0134, "jcirc", 0x0135, "Jcy", 0x0419, "jcy", 0x0439, "Jsercy", 0x0408, "jsercy", 0x0458, "Jukcy", 0x0404, "jukcy", 0x0454, NULL, 0 }; static NameId namesK[]={ "kappa", 0x03BA, "kappav", 0x03F0, "Kcedil", 0x0136, "kcedil", 0x0137, "Kcy", 0x041A, "kcy", 0x043A, "Kgr", 0x039A, "kgr", 0x03BA, "kgreen", 0x0138, "KHcy", 0x0425, "khcy", 0x0445, "KHgr", 0x03A7, "khgr", 0x03C7, "KJcy", 0x040C, "kjcy", 0x045C, NULL, 0 }; static NameId namesL[]={ "lAarr", 0x21DA, "Lacute", 0x0139, "lacute", 0x013A, "lagran", 0x2112, "Lambda", 0x039B, "lambda", 0x03BB, "lang", 0x3008, "lap", 0x2272, "laquo", 0x00AB, "larr", 0x2190, "Larr", 0x219E, "lArr", 0x21D0, "larr2", 0x21C7, "larrhk", 0x21A9, "larrlp", 0x21AB, "larrtl", 0x21A2, "Lcaron", 0x013D, "lcaron", 0x013E, "Lcedil", 0x013B, "lcedil", 0x013C, "lceil", 0x2308, "lcub", 0x007B, "Lcy", 0x041B, "lcy", 0x043B, "ldot", 0x22D6, "ldquo", 0x201C, "ldquor", 0x201E, "le", 0x2264, "lE", 0x2266, "leg", 0x22DA, "lEg", 0x22DA, "les", 0x2264, "lfloor", 0x230A, "lg", 0x2276, "Lgr", 0x039B, "lgr", 0x03BB, "lhard", 0x21BD, "lharu", 0x21BC, "lhblk", 0x2584, "LJcy", 0x0409, "ljcy", 0x0459, "Ll", 0x22D8, "Lmidot", 0x013F, "lmidot", 0x0140, "lnap", 0xE2A2, "lne", 0x2268, "lnE", 0x2268, "lnsim", 0x22E6, "lowast", 0x2217, "lowbar", 0x005F, "loz", 0x25CA, "lozf", 0x2726, "lpar", 0x0028, "lrarr2", 0x21C6, "lrhar2", 0x21CB, "lsh", 0x21B0, "lsim", 0x2272, "lsqb", 0x005B, "lsquo", 0x2018, "lsquor", 0x201A, "Lstrok", 0x0141, "lstrok", 0x0142, "lt", 0x003C, "Lt", 0x226A, "lthree", 0x22CB, "ltimes", 0x22C9, "ltri", 0x25C3, "ltrie", 0x22B4, "ltrif", 0x25C2, "lvnE", 0x2268, NULL, 0 }; static NameId namesM[]={ "macr", 0x00AF, "male", 0x2642, "malt", 0x2720, "map", 0x21A6, "marker", 0x25AE, "Mcy", 0x041C, "mcy", 0x043C, "mdash", 0x2014, "Mgr", 0x039C, "mgr", 0x03BC, "micro", 0x00B5, "mid", 0x2223, "middot", 0x00B7, "minus", 0x2212, "minusb", 0x229F, "mldr", 0x2026, "mnplus", 0x2213, "models", 0x22A7, "mu", 0x03BC, "mumap", 0x22B8, NULL, 0 }; static NameId namesN[]={ "nabla", 0x2207, "Nacute", 0x0143, "nacute", 0x0144, "nap", 0x2249, "napos", 0x0149, "natur", 0x266E, // "nbsp", 0x00A0, "nbsp", 32, // make non-breaking space appear as space "Ncaron", 0x0147, "ncaron", 0x0148, "Ncedil", 0x0145, "ncedil", 0x0146, "ncong", 0x2247, "Ncy", 0x041D, "ncy", 0x043D, "ndash", 0x2013, "ne", 0x2260, "nearr", 0x2197, "nequiv", 0x2262, "nexist", 0x2204, "nge", 0x2271, "ngE", 0x2271, "nges", 0x2271, "Ngr", 0x039D, "ngr", 0x03BD, "ngt", 0x226F, "nharr", 0x21AE, "nhArr", 0x21CE, "ni", 0x220D, "NJcy", 0x040A, "njcy", 0x045A, "nlarr", 0x219A, "nlArr", 0x21CD, "nldr", 0x2025, "nle", 0x2270, "nlE", 0x2270, "nles", 0x2270, "nlt", 0x226E, "nltri", 0x22EA, "nltrie", 0x22EC, "nmid", 0x2224, "not", 0x00AC, "notin", 0x2209, "npar", 0x2226, "npr", 0x2280, "npre", 0x22E0, "nrarr", 0x219B, "nrArr", 0x21CF, "nrtri", 0x22EB, "nrtrie", 0x22ED, "nsc", 0x2281, "nsce", 0x22E1, "nsim", 0x2241, "nsime", 0x2244, "nsmid", 0xE2AA, "nspar", 0x2226, "nsub", 0x2284, "nsube", 0x2288, "nsubE", 0x2288, "nsup", 0x2285, "nsupe", 0x2289, "nsupE", 0x2289, "Ntilde", 0x00D1, "ntilde", 0x00F1, "nu", 0x03BD, "num", 0x0023, "numero", 0x2116, "numsp", 0x2007, "nvdash", 0x22AC, "nvDash", 0x22AD, "nVdash", 0x22AE, "nVDash", 0x22AF, "nwarr", 0x2196, NULL, 0 }; static NameId namesO[]={ "Oacgr", 0x038C, "oacgr", 0x03CC, "Oacute", 0x00D3, "oacute", 0x00F3, "oast", 0x229B, "ocir", 0x229A, "Ocirc", 0x00D4, "ocirc", 0x00F4, "Ocy", 0x041E, "ocy", 0x043E, "odash", 0x229D, "Odblac", 0x0150, "odblac", 0x0151, "odot", 0x2299, "OElig", 0x0152, "oelig", 0x0153, "ogon", 0x02DB, "Ogr", 0x039F, "ogr", 0x03BF, "Ograve", 0x00D2, "ograve", 0x00F2, "OHacgr", 0x038F, "ohacgr", 0x03CE, "OHgr", 0x03A9, "ohgr", 0x03C9, "ohm", 0x2126, "olarr", 0x21BA, "Omacr", 0x014C, "omacr", 0x014D, "Omega", 0x03A9, "omega", 0x03C9, "ominus", 0x2296, "oplus", 0x2295, "or", 0x2228, "orarr", 0x21BB, "order", 0x2134, "ordf", 0x00AA, "ordm", 0x00BA, "oS", 0x24C8, "Oslash", 0x00D8, "oslash", 0x00F8, "osol", 0x2298, "Otilde", 0x00D5, "otilde", 0x00F5, "otimes", 0x2297, "Ouml", 0x00D6, "ouml", 0x00F6, NULL, 0 }; static NameId namesP[]={ "par", 0x2225, "para", 0x00B6, "part", 0x2202, "Pcy", 0x041F, "pcy", 0x043F, "percnt", 0x0025, "period", 0x002E, "permil", 0x2030, "perp", 0x22A5, "Pgr", 0x03A0, "pgr", 0x03C0, "PHgr", 0x03A6, "phgr", 0x03C6, "Phi", 0x03A6, "phis", 0x03C6, "phiv", 0x03D5, "phmmat", 0x2133, "phone", 0x260E, "Pi", 0x03A0, "pi", 0x03C0, "piv", 0x03D6, "planck", 0x210F, "plus", 0x002B, "plusb", 0x229E, "plusdo", 0x2214, "plusmn", 0x00B1, "pound", 0x00A3, "pr", 0x227A, "prap", 0x227E, "pre", 0x227C, "prime", 0x2032, "Prime", 0x2033, "prnap", 0x22E8, "prnE", 0xE2B3, "prnsim", 0x22E8, "prod", 0x220F, "prop", 0x221D, "prsim", 0x227E, "PSgr", 0x03A8, "psgr", 0x03C8, "Psi", 0x03A8, "psi", 0x03C8, "puncsp", 0x2008, NULL, 0 }; static NameId namesQ[]={ "quest", 0x003F, "quot", 0x0022, NULL, 0 }; static NameId namesR[]={ "rAarr", 0x21DB, "Racute", 0x0154, "racute", 0x0155, "radic", 0x221A, "rang", 0x3009, "raquo", 0x00BB, "rarr", 0x2192, "Rarr", 0x21A0, "rArr", 0x21D2, "rarr2", 0x21C9, "rarrhk", 0x21AA, "rarrlp", 0x21AC, "rarrtl", 0x21A3, "rarrw", 0x219D, "Rcaron", 0x0158, "rcaron", 0x0159, "Rcedil", 0x0156, "rcedil", 0x0157, "rceil", 0x2309, "rcub", 0x007D, "Rcy", 0x0420, "rcy", 0x0440, "rdquo", 0x201D, "rdquor", 0x201C, "real", 0x211C, "rect", 0x25AD, "reg", 0x00AE, "rfloor", 0x230B, "Rgr", 0x03A1, "rgr", 0x03C1, "rhard", 0x21C1, "rharu", 0x21C0, "rho", 0x03C1, "rhov", 0x03F1, "ring", 0x02DA, "rlarr2", 0x21C4, "rlhar2", 0x21CC, "rpar", 0x0029, "rpargt", 0xE291, "rsh", 0x21B1, "rsqb", 0x005D, "rsquo", 0x2019, "rsquor", 0x2018, "rthree", 0x22CC, "rtimes", 0x22CA, "rtri", 0x25B9, "rtrie", 0x22B5, "rtrif", 0x25B8, "rx", 0x211E, NULL, 0 }; static NameId namesS[]={ "Sacute", 0x015A, "sacute", 0x015B, "samalg", 0x2210, "sbsol", 0xFE68, "sc", 0x227B, "scap", 0x227F, "Scaron", 0x0160, "scaron", 0x0161, "sccue", 0x227D, "sce", 0x227D, "Scedil", 0x015E, "scedil", 0x015F, "Scirc", 0x015C, "scirc", 0x015D, "scnap", 0x22E9, "scnE", 0xE2B5, "scnsim", 0x22E9, "scsim", 0x227F, "Scy", 0x0421, "scy", 0x0441, "sdot", 0x22C5, "sdotb", 0x22A1, "sect", 0x00A7, "semi", 0x003B, "setmn", 0x2216, "sext", 0x2736, "sfgr", 0x03C2, "sfrown", 0x2322, "Sgr", 0x03A3, "sgr", 0x03C3, "sharp", 0x266F, "SHCHcy", 0x0429, "shchcy", 0x0449, "SHcy", 0x0428, "shcy", 0x0448, "shy", 0x00AD, "Sigma", 0x03A3, "sigma", 0x03C3, "sigmav", 0x03C2, "sim", 0x223C, "sime", 0x2243, "smid", 0xE301, "smile", 0x2323, "SOFTcy", 0x042C, "softcy", 0x044C, "sol", 0x002F, "spades", 0x2660, "spar", 0x2225, "sqcap", 0x2293, "sqcup", 0x2294, "sqsub", 0x228F, "sqsube", 0x2291, "sqsup", 0x2290, "sqsupe", 0x2292, "squ", 0x25A1, "square", 0x25A1, "squf", 0x25AA, "ssetmn", 0x2216, "ssmile", 0x2323, "sstarf", 0x22C6, "star", 0x22C6, "starf", 0x2605, "sub", 0x2282, "Sub", 0x22D0, "sube", 0x2286, "subE", 0x2286, "subne", 0x228A, "subnE", 0x228A, "sum", 0x2211, "sung", 0x2669, "sup", 0x2283, "Sup", 0x22D1, "sup1", 0x00B9, "sup2", 0x00B2, "sup3", 0x00B3, "supe", 0x2287, "supE", 0x2287, "supne", 0x228B, "supnE", 0x228B, "szlig", 0x00DF, NULL, 0 }; static NameId namesT[]={ "target", 0x2316, "tau", 0x03C4, "Tcaron", 0x0164, "tcaron", 0x0165, "Tcedil", 0x0162, "tcedil", 0x0163, "Tcy", 0x0422, "tcy", 0x0442, "tdot", 0x20DB, "telrec", 0x2315, "Tgr", 0x03A4, "tgr", 0x03C4, "there4", 0x2234, "Theta", 0x0398, "thetas", 0x03B8, "thetav", 0x03D1, "THgr", 0x0398, "thgr", 0x03B8, "thinsp", 0x2009, "thkap", 0x2248, "thksim", 0x223C, "THORN", 0x00DE, "thorn", 0x00FE, "tilde", 0x02DC, "times", 0x00D7, "timesb", 0x22A0, "top", 0x22A4, "tprime", 0x2034, "trade", 0x2122, "trie", 0x225C, "TScy", 0x0426, "tscy", 0x0446, "TSHcy", 0x040B, "tshcy", 0x045B, "Tstrok", 0x0166, "tstrok", 0x0167, "twixt", 0x226C, NULL, 0 }; static NameId namesU[]={ "Uacgr", 0x038E, "uacgr", 0x03CD, "Uacute", 0x00DA, "uacute", 0x00FA, "uarr", 0x2191, "uArr", 0x21D1, "uarr2", 0x21C8, "Ubrcy", 0x040E, "ubrcy", 0x045E, "Ubreve", 0x016C, "ubreve", 0x016D, "Ucirc", 0x00DB, "ucirc", 0x00FB, "Ucy", 0x0423, "ucy", 0x0443, "Udblac", 0x0170, "udblac", 0x0171, "udiagr", 0x03B0, "Udigr", 0x03AB, "udigr", 0x03CB, "Ugr", 0x03A5, "ugr", 0x03C5, "Ugrave", 0x00D9, "ugrave", 0x00F9, "uharl", 0x21BF, "uharr", 0x21BE, "uhblk", 0x2580, "ulcorn", 0x231C, "ulcrop", 0x230F, "Umacr", 0x016A, "umacr", 0x016B, "uml", 0x00A8, "Uogon", 0x0172, "uogon", 0x0173, "uplus", 0x228E, "upsi", 0x03C5, "Upsi", 0x03D2, "urcorn", 0x231D, "urcrop", 0x230E, "Uring", 0x016E, "uring", 0x016F, "Utilde", 0x0168, "utilde", 0x0169, "utri", 0x25B5, "utrif", 0x25B4, "Uuml", 0x00DC, "uuml", 0x00FC, NULL, 0 }; static NameId namesV[]={ "varr", 0x2195, "vArr", 0x21D5, "Vcy", 0x0412, "vcy", 0x0432, "vdash", 0x22A2, "vDash", 0x22A8, "Vdash", 0x22A9, "veebar", 0x22BB, "vellip", 0x22EE, "verbar", 0x007C, "Verbar", 0x2016, "vltri", 0x22B2, "vprime", 0x2032, "vprop", 0x221D, "vrtri", 0x22B3, "vsubne", 0x228A, "vsubnE", 0xE2B8, "vsupne", 0x228B, "vsupnE", 0x228B, "Vvdash", 0x22AA, NULL, 0 }; static NameId namesW[]={ "Wcirc", 0x0174, "wcirc", 0x0175, "wedgeq", 0x2259, "weierp", 0x2118, "wreath", 0x2240, NULL, 0 }; static NameId namesX[]={ "xcirc", 0x25CB, "xdtri", 0x25BD, "Xgr", 0x039E, "xgr", 0x03BE, "xharr", 0x2194, "xhArr", 0x2194, "Xi", 0x039E, "xi", 0x03BE, "xlArr", 0x21D0, "xrArr", 0x21D2, "xutri", 0x25B3, NULL, 0 }; static NameId namesY[]={ "Yacute", 0x00DD, "yacute", 0x00FD, "YAcy", 0x042F, "yacy", 0x044F, "Ycirc", 0x0176, "ycirc", 0x0177, "Ycy", 0x042B, "ycy", 0x044B, "yen", 0x00A5, "YIcy", 0x0407, "yicy", 0x0457, "YUcy", 0x042E, "yucy", 0x044E, "yuml", 0x00FF, "Yuml", 0x0178, NULL, 0 }; static NameId namesZ[]={ "Zacute", 0x0179, "zacute", 0x017A, "Zcaron", 0x017D, "zcaron", 0x017E, "Zcy", 0x0417, "zcy", 0x0437, "Zdot", 0x017B, "zdot", 0x017C, "zeta", 0x03B6, "Zgr", 0x0396, "zgr", 0x03B6, "ZHcy", 0x0416, "zhcy", 0x0436, NULL, 0 }; // @todo@ order namesTable and names? by frequency static NameId* namesTable[] = { namesA, namesB, namesC, namesD, namesE, namesF, namesG, namesH, namesI, namesJ, namesK, namesL, namesM, namesN, namesO, namesP, namesQ, namesR, namesS, namesT, namesU, namesV, namesW, namesX, namesY, namesZ, NULL }; int HtmlNamedEntity(unsigned char *p, int length) { int tableIndex = tolower(*p) - 'a'; if (tableIndex >= 0 && tableIndex < 26) { NameId* names = namesTable[tableIndex]; int i; for (i = 0; names[i].name; i++){ if (strncmp(names[i].name, (char *)p, length) == 0){ return names[i].value; } } } error("unrecognized character entity \"%.*s\"", length, p); return -1; } #else //TODO: Merge Walter's list with Thomas' static NameId names[] = { // Entities "quot", 34, "amp", 38, "lt", 60, "gt", 62, "OElig", 338, "oelig", 339, "Scaron", 352, "scaron", 353, "Yuml", 376, "circ", 710, "tilde", 732, "ensp", 8194, "emsp", 8195, "thinsp", 8201, "zwnj", 8204, "zwj", 8205, "lrm", 8206, "rlm", 8207, "ndash", 8211, "mdash", 8212, "lsquo", 8216, "rsquo", 8217, "sbquo", 8218, "ldquo", 8220, "rdquo", 8221, "bdquo", 8222, "dagger", 8224, "Dagger", 8225, "permil", 8240, "lsaquo", 8249, "rsaquo", 8250, "euro", 8364, // Latin-1 (ISO-8859-1) Entities "nbsp", 160, "iexcl", 161, "cent", 162, "pound", 163, "curren", 164, "yen", 165, "brvbar", 166, "sect", 167, "uml", 168, "copy", 169, "ordf", 170, "laquo", 171, "not", 172, "shy", 173, "reg", 174, "macr", 175, "deg", 176, "plusmn", 177, "sup2", 178, "sup3", 179, "acute", 180, "micro", 181, "para", 182, "middot", 183, "cedil", 184, "sup1", 185, "ordm", 186, "raquo", 187, "frac14", 188, "frac12", 189, "frac34", 190, "iquest", 191, "Agrave", 192, "Aacute", 193, "Acirc", 194, "Atilde", 195, "Auml", 196, "Aring", 197, "AElig", 198, "Ccedil", 199, "Egrave", 200, "Eacute", 201, "Ecirc", 202, "Euml", 203, "Igrave", 204, "Iacute", 205, "Icirc", 206, "Iuml", 207, "ETH", 208, "Ntilde", 209, "Ograve", 210, "Oacute", 211, "Ocirc", 212, "Otilde", 213, "Ouml", 214, "times", 215, "Oslash", 216, "Ugrave", 217, "Uacute", 218, "Ucirc", 219, "Uuml", 220, "Yacute", 221, "THORN", 222, "szlig", 223, "agrave", 224, "aacute", 225, "acirc", 226, "atilde", 227, "auml", 228, "aring", 229, "aelig", 230, "ccedil", 231, "egrave", 232, "eacute", 233, "ecirc", 234, "euml", 235, "igrave", 236, "iacute", 237, "icirc", 238, "iuml", 239, "eth", 240, "ntilde", 241, "ograve", 242, "oacute", 243, "ocirc", 244, "otilde", 245, "ouml", 246, "divide", 247, "oslash", 248, "ugrave", 249, "uacute", 250, "ucirc", 251, "uuml", 252, "yacute", 253, "thorn", 254, "yuml", 255, // Symbols and Greek letter entities "fnof", 402, "Alpha", 913, "Beta", 914, "Gamma", 915, "Delta", 916, "Epsilon", 917, "Zeta", 918, "Eta", 919, "Theta", 920, "Iota", 921, "Kappa", 922, "Lambda", 923, "Mu", 924, "Nu", 925, "Xi", 926, "Omicron", 927, "Pi", 928, "Rho", 929, "Sigma", 931, "Tau", 932, "Upsilon", 933, "Phi", 934, "Chi", 935, "Psi", 936, "Omega", 937, "alpha", 945, "beta", 946, "gamma", 947, "delta", 948, "epsilon", 949, "zeta", 950, "eta", 951, "theta", 952, "iota", 953, "kappa", 954, "lambda", 955, "mu", 956, "nu", 957, "xi", 958, "omicron", 959, "pi", 960, "rho", 961, "sigmaf", 962, "sigma", 963, "tau", 964, "upsilon", 965, "phi", 966, "chi", 967, "psi", 968, "omega", 969, "thetasym", 977, "upsih", 978, "piv", 982, "bull", 8226, "hellip", 8230, "prime", 8242, "Prime", 8243, "oline", 8254, "frasl", 8260, "weierp", 8472, "image", 8465, "real", 8476, "trade", 8482, "alefsym", 8501, "larr", 8592, "uarr", 8593, "rarr", 8594, "darr", 8595, "harr", 8596, "crarr", 8629, "lArr", 8656, "uArr", 8657, "rArr", 8658, "dArr", 8659, "hArr", 8660, "forall", 8704, "part", 8706, "exist", 8707, "empty", 8709, "nabla", 8711, "isin", 8712, "notin", 8713, "ni", 8715, "prod", 8719, "sum", 8721, "minus", 8722, "lowast", 8727, "radic", 8730, "prop", 8733, "infin", 8734, "ang", 8736, "and", 8743, "or", 8744, "cap", 8745, "cup", 8746, "int", 8747, "there4", 8756, "sim", 8764, "cong", 8773, "asymp", 8776, "ne", 8800, "equiv", 8801, "le", 8804, "ge", 8805, "sub", 8834, "sup", 8835, "nsub", 8836, "sube", 8838, "supe", 8839, "oplus", 8853, "otimes", 8855, "perp", 8869, "sdot", 8901, "lceil", 8968, "rceil", 8969, "lfloor", 8970, "rfloor", 8971, "lang", 9001, "rang", 9002, "loz", 9674, "spades", 9824, "clubs", 9827, "hearts", 9829, "diams", 9830, }; int HtmlNamedEntity(unsigned char *p, int length) { int i; // BUG: this is a dumb, slow linear search for (i = 0; i < sizeof(names) / sizeof(names[0]); i++) { // Entries are case sensitive if (memcmp(names[i].name, (char *)p, length) == 0 && !names[i].name[length]) return names[i].value; } return -1; } #endif