Mercurial > projects > ldc
view dmd/entity.c @ 1117:4c20fcc4252b
Fun with parameter attributes: For several of the "synthetic" parameters added
to D functions, we can apply noalias and nocapture. They are sret parameters,
'nest' pointers passed to nested functions, and _argptr:
Nocapture:
- Sret and nest are nocapture because they don't represent D-level variables,
and thus the callee can't (validly) obtain a pointer to them, let alone keep
it around after it returns.
- _argptr is nocapture because although the callee has access to it as a
pointer, that pointer is invalidated when it returns.
All three are noalias because they're function-local variables
- Sret and _argptr are noalias because they're freshly alloca'd memory only
used for a single function call that's not allowed to keep an aliasing
pointer to it around (since the parameter is nocapture).
- 'Nest' is noalias because the callee only ever has access to one such pointer
per parent function, and every parent function has a different one.
This commit also ensures attributes set on sret, _arguments and _argptr are
propagated to calls to such functions.
It also adds one exception to the general rule that attributes on function types
should propagate to calls: the type of a delegate's function pointer has a
'nest' parameter, but this can either be a true 'nest' (for delegates to nested
functions) or a 'this' (for delegates to member functions). Since 'this' is
neither noalias nor nocapture, and there's generally no way to tell which one it
is, we remove these attributes at the call site if the callee is a delegate.
author | Frits van Bommel <fvbommel wxs.nl> |
---|---|
date | Sat, 14 Mar 2009 22:15:31 +0100 |
parents | b30fe7e1dbb9 |
children |
line wrap: on
line source
// Copyright (c) 1999-2009 by Digital Mars // All Rights Reserved // written by Walter Bright // http://www.digitalmars.com // License for redistribution is by either the Artistic License // in artistic.txt, or the GNU General Public License in gnu.txt. // See the included readme.txt for details. #include <string.h> /********************************************* * Convert from named entity to its encoding. * For reference: * http://www.htmlhelp.com/reference/html40/entities/ * http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html */ struct NameId { const char *name; unsigned short value; }; #if IN_GCC static NameId namesA[]={ "Aacgr", 0x0386, "aacgr", 0x03AC, "Aacute", 0x00C1, "aacute", 0x00E1, "Abreve", 0x0102, "abreve", 0x0103, "Acirc", 0x00C2, "acirc", 0x00E2, "acute", 0x00B4, "Acy", 0x0410, "acy", 0x0430, "AElig", 0x00C6, "aelig", 0x00E6, "Agr", 0x0391, "agr", 0x03B1, "Agrave", 0x00C0, "agrave", 0x00E0, "aleph", 0x2135, "alpha", 0x03B1, "Amacr", 0x0100, "amacr", 0x0101, "amalg", 0x2210, "amp", 0x0026, "and", 0x2227, "ang", 0x2220, "ang90", 0x221F, "angmsd", 0x2221, "angsph", 0x2222, "angst", 0x212B, "Aogon", 0x0104, "aogon", 0x0105, "ap", 0x2248, "ape", 0x224A, "apos", 0x0027, "Aring", 0x00C5, "aring", 0x00E5, "ast", 0x002A, "asymp", 0x224D, "Atilde", 0x00C3, "atilde", 0x00E3, "Auml", 0x00C4, "auml", 0x00E4, NULL, 0 }; static NameId namesB[]={ "barwed", 0x22BC, "Barwed", 0x2306, "bcong", 0x224C, "Bcy", 0x0411, "bcy", 0x0431, "becaus", 0x2235, "bepsi", 0x220D, "bernou", 0x212C, "beta", 0x03B2, "beth", 0x2136, "Bgr", 0x0392, "bgr", 0x03B2, "blank", 0x2423, "blk12", 0x2592, "blk14", 0x2591, "blk34", 0x2593, "block", 0x2588, "bottom", 0x22A5, "bowtie", 0x22C8, "boxdl", 0x2510, "boxDL", 0x2555, "boxdL", 0x2556, "boxDl", 0x2557, "boxdr", 0x250C, "boxDR", 0x2552, "boxDr", 0x2553, "boxdR", 0x2554, "boxh", 0x2500, "boxH", 0x2550, "boxhd", 0x252C, "boxhD", 0x2564, "boxHD", 0x2565, "boxHd", 0x2566, "boxhu", 0x2534, "boxhU", 0x2567, "boxHU", 0x2568, "boxHu", 0x2569, "boxul", 0x2518, "boxUL", 0x255B, "boxUl", 0x255C, "boxuL", 0x255D, "boxur", 0x2514, "boxUR", 0x2558, "boxuR", 0x2559, "boxUr", 0x255A, "boxv", 0x2502, "boxV", 0x2551, "boxvh", 0x253C, "boxvH", 0x256A, "boxVH", 0x256B, "boxVh", 0x256C, "boxvl", 0x2524, "boxvL", 0x2561, "boxVL", 0x2562, "boxVl", 0x2563, "boxvr", 0x251C, "boxvR", 0x255E, "boxVR", 0x255F, "boxVr", 0x2560, "bprime", 0x2035, "breve", 0x02D8, "brvbar", 0x00A6, "bsim", 0x223D, "bsime", 0x22CD, "bsol", 0x005C, "bull", 0x2022, "bump", 0x224E, "bumpe", 0x224F, NULL, 0 }; static NameId namesC[]={ "Cacute", 0x0106, "cacute", 0x0107, "cap", 0x2229, "Cap", 0x22D2, "caret", 0x2041, "caron", 0x02C7, "Ccaron", 0x010C, "ccaron", 0x010D, "Ccedil", 0x00C7, "ccedil", 0x00E7, "Ccirc", 0x0108, "ccirc", 0x0109, "Cdot", 0x010A, "cdot", 0x010B, "cedil", 0x00B8, "cent", 0x00A2, "CHcy", 0x0427, "chcy", 0x0447, "check", 0x2713, "chi", 0x03C7, "cir", 0x25CB, "circ", 0x005E, "cire", 0x2257, "clubs", 0x2663, "colon", 0x003A, "colone", 0x2254, "comma", 0x002C, "commat", 0x0040, "comp", 0x2201, "compfn", 0x2218, "cong", 0x2245, "conint", 0x222E, "coprod", 0x2210, "copy", 0x00A9, "copysr", 0x2117, "cross", 0x2717, "cuepr", 0x22DE, "cuesc", 0x22DF, "cularr", 0x21B6, "cup", 0x222A, "Cup", 0x22D3, "cupre", 0x227C, "curarr", 0x21B7, "curren", 0x00A4, "cuvee", 0x22CE, "cuwed", 0x22CF, NULL, 0 }; static NameId namesD[]={ "dagger", 0x2020, "Dagger", 0x2021, "daleth", 0x2138, "darr", 0x2193, "dArr", 0x21D3, "darr2", 0x21CA, "dash", 0x2010, "dashv", 0x22A3, "dblac", 0x02DD, "Dcaron", 0x010E, "dcaron", 0x010F, "Dcy", 0x0414, "dcy", 0x0434, "deg", 0x00B0, "Delta", 0x0394, "delta", 0x03B4, "Dgr", 0x0394, "dgr", 0x03B4, "dharl", 0x21C3, "dharr", 0x21C2, "diam", 0x22C4, "diams", 0x2666, "die", 0x00A8, "divide", 0x00F7, "divonx", 0x22C7, "DJcy", 0x0402, "djcy", 0x0452, "dlarr", 0x2199, "dlcorn", 0x231E, "dlcrop", 0x230D, "dollar", 0x0024, "Dot", 0x00A8, "dot", 0x02D9, "DotDot", 0x20DC, "drarr", 0x2198, "drcorn", 0x231F, "drcrop", 0x230C, "DScy", 0x0405, "dscy", 0x0455, "Dstrok", 0x0110, "dstrok", 0x0111, "dtri", 0x25BF, "dtrif", 0x25BE, "DZcy", 0x040F, "dzcy", 0x045F, NULL, 0 }; static NameId namesE[]={ "Eacgr", 0x0388, "eacgr", 0x03AD, "Eacute", 0x00C9, "eacute", 0x00E9, "Ecaron", 0x011A, "ecaron", 0x011B, "ecir", 0x2256, "Ecirc", 0x00CA, "ecirc", 0x00EA, "ecolon", 0x2255, "Ecy", 0x042D, "ecy", 0x044D, "Edot", 0x0116, "edot", 0x0117, "eDot", 0x2251, "EEacgr", 0x0389, "eeacgr", 0x03AE, "EEgr", 0x0397, "eegr", 0x03B7, "efDot", 0x2252, "Egr", 0x0395, "egr", 0x03B5, "Egrave", 0x00C8, "egrave", 0x00E8, "egs", 0x22DD, "ell", 0x2113, "els", 0x22DC, "Emacr", 0x0112, "emacr", 0x0113, "empty", 0x2205, "emsp", 0x2003, "emsp13", 0x2004, "emsp14", 0x2005, "ENG", 0x014A, "eng", 0x014B, "ensp", 0x2002, "Eogon", 0x0118, "eogon", 0x0119, "epsi", 0x220A, "epsis", 0x220A, "epsiv", 0x03B5, "equals", 0x003D, "equiv", 0x2261, "erDot", 0x2253, "esdot", 0x2250, "eta", 0x03B7, "ETH", 0x00D0, "eth", 0x00F0, "Euml", 0x00CB, "euml", 0x00EB, "excl", 0x0021, "exist", 0x2203, NULL, 0 }; static NameId namesF[]={ "Fcy", 0x0424, "fcy", 0x0444, "female", 0x2640, "ffilig", 0xFB03, "fflig", 0xFB00, "ffllig", 0xFB04, "filig", 0xFB01, "flat", 0x266D, "fllig", 0xFB02, "fnof", 0x0192, "forall", 0x2200, "fork", 0x22D4, "frac12", 0x00BD, "frac13", 0x2153, "frac14", 0x00BC, "frac15", 0x2155, "frac16", 0x2159, "frac18", 0x215B, "frac23", 0x2154, "frac25", 0x2156, "frac34", 0x00BE, "frac35", 0x2157, "frac38", 0x215C, "frac45", 0x2158, "frac56", 0x215A, "frac58", 0x215D, "frac78", 0x215E, "frown", 0x2322, NULL, 0 }; static NameId namesG[]={ "gacute", 0x01F5, "Gamma", 0x0393, "gamma", 0x03B3, "gammad", 0x03DC, "gap", 0x2273, "Gbreve", 0x011E, "gbreve", 0x011F, "Gcedil", 0x0122, "Gcirc", 0x011C, "gcirc", 0x011D, "Gcy", 0x0413, "gcy", 0x0433, "Gdot", 0x0120, "gdot", 0x0121, "ge", 0x2265, "gE", 0x2267, "gel", 0x22DB, "gEl", 0x22DB, "ges", 0x2265, "Gg", 0x22D9, "Ggr", 0x0393, "ggr", 0x03B3, "gimel", 0x2137, "GJcy", 0x0403, "gjcy", 0x0453, "gl", 0x2277, "gnap", 0xE411, "gne", 0x2269, "gnE", 0x2269, "gnsim", 0x22E7, "grave", 0x0060, "gsdot", 0x22D7, "gsim", 0x2273, "gt", 0x003E, "Gt", 0x226B, "gvnE", 0x2269, NULL, 0 }; static NameId namesH[]={ "hairsp", 0x200A, "half", 0x00BD, "hamilt", 0x210B, "HARDcy", 0x042A, "hardcy", 0x044A, "harr", 0x2194, "hArr", 0x21D4, "harrw", 0x21AD, "Hcirc", 0x0124, "hcirc", 0x0125, "hearts", 0x2665, "hellip", 0x2026, "horbar", 0x2015, "Hstrok", 0x0126, "hstrok", 0x0127, "hybull", 0x2043, "hyphen", 0x002D, NULL, 0 }; static NameId namesI[]={ "Iacgr", 0x038A, "iacgr", 0x03AF, "Iacute", 0x00CD, "iacute", 0x00ED, "Icirc", 0x00CE, "icirc", 0x00EE, "Icy", 0x0418, "icy", 0x0438, "idiagr", 0x0390, "Idigr", 0x03AA, "idigr", 0x03CA, "Idot", 0x0130, "IEcy", 0x0415, "iecy", 0x0435, "iexcl", 0x00A1, "iff", 0x21D4, "Igr", 0x0399, "igr", 0x03B9, "Igrave", 0x00CC, "igrave", 0x00EC, "IJlig", 0x0132, "ijlig", 0x0133, "Imacr", 0x012A, "imacr", 0x012B, "image", 0x2111, "incare", 0x2105, "infin", 0x221E, "inodot", 0x0131, "int", 0x222B, "intcal", 0x22BA, "IOcy", 0x0401, "iocy", 0x0451, "Iogon", 0x012E, "iogon", 0x012F, "iota", 0x03B9, "iquest", 0x00BF, "isin", 0x220A, "Itilde", 0x0128, "itilde", 0x0129, "Iukcy", 0x0406, "iukcy", 0x0456, "Iuml", 0x00CF, "iuml", 0x00EF, NULL, 0 }; static NameId namesJ[]={ "Jcirc", 0x0134, "jcirc", 0x0135, "Jcy", 0x0419, "jcy", 0x0439, "Jsercy", 0x0408, "jsercy", 0x0458, "Jukcy", 0x0404, "jukcy", 0x0454, NULL, 0 }; static NameId namesK[]={ "kappa", 0x03BA, "kappav", 0x03F0, "Kcedil", 0x0136, "kcedil", 0x0137, "Kcy", 0x041A, "kcy", 0x043A, "Kgr", 0x039A, "kgr", 0x03BA, "kgreen", 0x0138, "KHcy", 0x0425, "khcy", 0x0445, "KHgr", 0x03A7, "khgr", 0x03C7, "KJcy", 0x040C, "kjcy", 0x045C, NULL, 0 }; static NameId namesL[]={ "lAarr", 0x21DA, "Lacute", 0x0139, "lacute", 0x013A, "lagran", 0x2112, "Lambda", 0x039B, "lambda", 0x03BB, "lang", 0x3008, "lap", 0x2272, "laquo", 0x00AB, "larr", 0x2190, "Larr", 0x219E, "lArr", 0x21D0, "larr2", 0x21C7, "larrhk", 0x21A9, "larrlp", 0x21AB, "larrtl", 0x21A2, "Lcaron", 0x013D, "lcaron", 0x013E, "Lcedil", 0x013B, "lcedil", 0x013C, "lceil", 0x2308, "lcub", 0x007B, "Lcy", 0x041B, "lcy", 0x043B, "ldot", 0x22D6, "ldquo", 0x201C, "ldquor", 0x201E, "le", 0x2264, "lE", 0x2266, "leg", 0x22DA, "lEg", 0x22DA, "les", 0x2264, "lfloor", 0x230A, "lg", 0x2276, "Lgr", 0x039B, "lgr", 0x03BB, "lhard", 0x21BD, "lharu", 0x21BC, "lhblk", 0x2584, "LJcy", 0x0409, "ljcy", 0x0459, "Ll", 0x22D8, "Lmidot", 0x013F, "lmidot", 0x0140, "lnap", 0xE2A2, "lne", 0x2268, "lnE", 0x2268, "lnsim", 0x22E6, "lowast", 0x2217, "lowbar", 0x005F, "loz", 0x25CA, "lozf", 0x2726, "lpar", 0x0028, "lrarr2", 0x21C6, "lrhar2", 0x21CB, "lsh", 0x21B0, "lsim", 0x2272, "lsqb", 0x005B, "lsquo", 0x2018, "lsquor", 0x201A, "Lstrok", 0x0141, "lstrok", 0x0142, "lt", 0x003C, "Lt", 0x226A, "lthree", 0x22CB, "ltimes", 0x22C9, "ltri", 0x25C3, "ltrie", 0x22B4, "ltrif", 0x25C2, "lvnE", 0x2268, NULL, 0 }; static NameId namesM[]={ "macr", 0x00AF, "male", 0x2642, "malt", 0x2720, "map", 0x21A6, "marker", 0x25AE, "Mcy", 0x041C, "mcy", 0x043C, "mdash", 0x2014, "Mgr", 0x039C, "mgr", 0x03BC, "micro", 0x00B5, "mid", 0x2223, "middot", 0x00B7, "minus", 0x2212, "minusb", 0x229F, "mldr", 0x2026, "mnplus", 0x2213, "models", 0x22A7, "mu", 0x03BC, "mumap", 0x22B8, NULL, 0 }; static NameId namesN[]={ "nabla", 0x2207, "Nacute", 0x0143, "nacute", 0x0144, "nap", 0x2249, "napos", 0x0149, "natur", 0x266E, // "nbsp", 0x00A0, "nbsp", 32, // make non-breaking space appear as space "Ncaron", 0x0147, "ncaron", 0x0148, "Ncedil", 0x0145, "ncedil", 0x0146, "ncong", 0x2247, "Ncy", 0x041D, "ncy", 0x043D, "ndash", 0x2013, "ne", 0x2260, "nearr", 0x2197, "nequiv", 0x2262, "nexist", 0x2204, "nge", 0x2271, "ngE", 0x2271, "nges", 0x2271, "Ngr", 0x039D, "ngr", 0x03BD, "ngt", 0x226F, "nharr", 0x21AE, "nhArr", 0x21CE, "ni", 0x220D, "NJcy", 0x040A, "njcy", 0x045A, "nlarr", 0x219A, "nlArr", 0x21CD, "nldr", 0x2025, "nle", 0x2270, "nlE", 0x2270, "nles", 0x2270, "nlt", 0x226E, "nltri", 0x22EA, "nltrie", 0x22EC, "nmid", 0x2224, "not", 0x00AC, "notin", 0x2209, "npar", 0x2226, "npr", 0x2280, "npre", 0x22E0, "nrarr", 0x219B, "nrArr", 0x21CF, "nrtri", 0x22EB, "nrtrie", 0x22ED, "nsc", 0x2281, "nsce", 0x22E1, "nsim", 0x2241, "nsime", 0x2244, "nsmid", 0xE2AA, "nspar", 0x2226, "nsub", 0x2284, "nsube", 0x2288, "nsubE", 0x2288, "nsup", 0x2285, "nsupe", 0x2289, "nsupE", 0x2289, "Ntilde", 0x00D1, "ntilde", 0x00F1, "nu", 0x03BD, "num", 0x0023, "numero", 0x2116, "numsp", 0x2007, "nvdash", 0x22AC, "nvDash", 0x22AD, "nVdash", 0x22AE, "nVDash", 0x22AF, "nwarr", 0x2196, NULL, 0 }; static NameId namesO[]={ "Oacgr", 0x038C, "oacgr", 0x03CC, "Oacute", 0x00D3, "oacute", 0x00F3, "oast", 0x229B, "ocir", 0x229A, "Ocirc", 0x00D4, "ocirc", 0x00F4, "Ocy", 0x041E, "ocy", 0x043E, "odash", 0x229D, "Odblac", 0x0150, "odblac", 0x0151, "odot", 0x2299, "OElig", 0x0152, "oelig", 0x0153, "ogon", 0x02DB, "Ogr", 0x039F, "ogr", 0x03BF, "Ograve", 0x00D2, "ograve", 0x00F2, "OHacgr", 0x038F, "ohacgr", 0x03CE, "OHgr", 0x03A9, "ohgr", 0x03C9, "ohm", 0x2126, "olarr", 0x21BA, "Omacr", 0x014C, "omacr", 0x014D, "Omega", 0x03A9, "omega", 0x03C9, "ominus", 0x2296, "oplus", 0x2295, "or", 0x2228, "orarr", 0x21BB, "order", 0x2134, "ordf", 0x00AA, "ordm", 0x00BA, "oS", 0x24C8, "Oslash", 0x00D8, "oslash", 0x00F8, "osol", 0x2298, "Otilde", 0x00D5, "otilde", 0x00F5, "otimes", 0x2297, "Ouml", 0x00D6, "ouml", 0x00F6, NULL, 0 }; static NameId namesP[]={ "par", 0x2225, "para", 0x00B6, "part", 0x2202, "Pcy", 0x041F, "pcy", 0x043F, "percnt", 0x0025, "period", 0x002E, "permil", 0x2030, "perp", 0x22A5, "Pgr", 0x03A0, "pgr", 0x03C0, "PHgr", 0x03A6, "phgr", 0x03C6, "Phi", 0x03A6, "phis", 0x03C6, "phiv", 0x03D5, "phmmat", 0x2133, "phone", 0x260E, "Pi", 0x03A0, "pi", 0x03C0, "piv", 0x03D6, "planck", 0x210F, "plus", 0x002B, "plusb", 0x229E, "plusdo", 0x2214, "plusmn", 0x00B1, "pound", 0x00A3, "pr", 0x227A, "prap", 0x227E, "pre", 0x227C, "prime", 0x2032, "Prime", 0x2033, "prnap", 0x22E8, "prnE", 0xE2B3, "prnsim", 0x22E8, "prod", 0x220F, "prop", 0x221D, "prsim", 0x227E, "PSgr", 0x03A8, "psgr", 0x03C8, "Psi", 0x03A8, "psi", 0x03C8, "puncsp", 0x2008, NULL, 0 }; static NameId namesQ[]={ "quest", 0x003F, "quot", 0x0022, NULL, 0 }; static NameId namesR[]={ "rAarr", 0x21DB, "Racute", 0x0154, "racute", 0x0155, "radic", 0x221A, "rang", 0x3009, "raquo", 0x00BB, "rarr", 0x2192, "Rarr", 0x21A0, "rArr", 0x21D2, "rarr2", 0x21C9, "rarrhk", 0x21AA, "rarrlp", 0x21AC, "rarrtl", 0x21A3, "rarrw", 0x219D, "Rcaron", 0x0158, "rcaron", 0x0159, "Rcedil", 0x0156, "rcedil", 0x0157, "rceil", 0x2309, "rcub", 0x007D, "Rcy", 0x0420, "rcy", 0x0440, "rdquo", 0x201D, "rdquor", 0x201C, "real", 0x211C, "rect", 0x25AD, "reg", 0x00AE, "rfloor", 0x230B, "Rgr", 0x03A1, "rgr", 0x03C1, "rhard", 0x21C1, "rharu", 0x21C0, "rho", 0x03C1, "rhov", 0x03F1, "ring", 0x02DA, "rlarr2", 0x21C4, "rlhar2", 0x21CC, "rpar", 0x0029, "rpargt", 0xE291, "rsh", 0x21B1, "rsqb", 0x005D, "rsquo", 0x2019, "rsquor", 0x2018, "rthree", 0x22CC, "rtimes", 0x22CA, "rtri", 0x25B9, "rtrie", 0x22B5, "rtrif", 0x25B8, "rx", 0x211E, NULL, 0 }; static NameId namesS[]={ "Sacute", 0x015A, "sacute", 0x015B, "samalg", 0x2210, "sbsol", 0xFE68, "sc", 0x227B, "scap", 0x227F, "Scaron", 0x0160, "scaron", 0x0161, "sccue", 0x227D, "sce", 0x227D, "Scedil", 0x015E, "scedil", 0x015F, "Scirc", 0x015C, "scirc", 0x015D, "scnap", 0x22E9, "scnE", 0xE2B5, "scnsim", 0x22E9, "scsim", 0x227F, "Scy", 0x0421, "scy", 0x0441, "sdot", 0x22C5, "sdotb", 0x22A1, "sect", 0x00A7, "semi", 0x003B, "setmn", 0x2216, "sext", 0x2736, "sfgr", 0x03C2, "sfrown", 0x2322, "Sgr", 0x03A3, "sgr", 0x03C3, "sharp", 0x266F, "SHCHcy", 0x0429, "shchcy", 0x0449, "SHcy", 0x0428, "shcy", 0x0448, "shy", 0x00AD, "Sigma", 0x03A3, "sigma", 0x03C3, "sigmav", 0x03C2, "sim", 0x223C, "sime", 0x2243, "smid", 0xE301, "smile", 0x2323, "SOFTcy", 0x042C, "softcy", 0x044C, "sol", 0x002F, "spades", 0x2660, "spar", 0x2225, "sqcap", 0x2293, "sqcup", 0x2294, "sqsub", 0x228F, "sqsube", 0x2291, "sqsup", 0x2290, "sqsupe", 0x2292, "squ", 0x25A1, "square", 0x25A1, "squf", 0x25AA, "ssetmn", 0x2216, "ssmile", 0x2323, "sstarf", 0x22C6, "star", 0x22C6, "starf", 0x2605, "sub", 0x2282, "Sub", 0x22D0, "sube", 0x2286, "subE", 0x2286, "subne", 0x228A, "subnE", 0x228A, "sum", 0x2211, "sung", 0x2669, "sup", 0x2283, "Sup", 0x22D1, "sup1", 0x00B9, "sup2", 0x00B2, "sup3", 0x00B3, "supe", 0x2287, "supE", 0x2287, "supne", 0x228B, "supnE", 0x228B, "szlig", 0x00DF, NULL, 0 }; static NameId namesT[]={ "target", 0x2316, "tau", 0x03C4, "Tcaron", 0x0164, "tcaron", 0x0165, "Tcedil", 0x0162, "tcedil", 0x0163, "Tcy", 0x0422, "tcy", 0x0442, "tdot", 0x20DB, "telrec", 0x2315, "Tgr", 0x03A4, "tgr", 0x03C4, "there4", 0x2234, "Theta", 0x0398, "thetas", 0x03B8, "thetav", 0x03D1, "THgr", 0x0398, "thgr", 0x03B8, "thinsp", 0x2009, "thkap", 0x2248, "thksim", 0x223C, "THORN", 0x00DE, "thorn", 0x00FE, "tilde", 0x02DC, "times", 0x00D7, "timesb", 0x22A0, "top", 0x22A4, "tprime", 0x2034, "trade", 0x2122, "trie", 0x225C, "TScy", 0x0426, "tscy", 0x0446, "TSHcy", 0x040B, "tshcy", 0x045B, "Tstrok", 0x0166, "tstrok", 0x0167, "twixt", 0x226C, NULL, 0 }; static NameId namesU[]={ "Uacgr", 0x038E, "uacgr", 0x03CD, "Uacute", 0x00DA, "uacute", 0x00FA, "uarr", 0x2191, "uArr", 0x21D1, "uarr2", 0x21C8, "Ubrcy", 0x040E, "ubrcy", 0x045E, "Ubreve", 0x016C, "ubreve", 0x016D, "Ucirc", 0x00DB, "ucirc", 0x00FB, "Ucy", 0x0423, "ucy", 0x0443, "Udblac", 0x0170, "udblac", 0x0171, "udiagr", 0x03B0, "Udigr", 0x03AB, "udigr", 0x03CB, "Ugr", 0x03A5, "ugr", 0x03C5, "Ugrave", 0x00D9, "ugrave", 0x00F9, "uharl", 0x21BF, "uharr", 0x21BE, "uhblk", 0x2580, "ulcorn", 0x231C, "ulcrop", 0x230F, "Umacr", 0x016A, "umacr", 0x016B, "uml", 0x00A8, "Uogon", 0x0172, "uogon", 0x0173, "uplus", 0x228E, "upsi", 0x03C5, "Upsi", 0x03D2, "urcorn", 0x231D, "urcrop", 0x230E, "Uring", 0x016E, "uring", 0x016F, "Utilde", 0x0168, "utilde", 0x0169, "utri", 0x25B5, "utrif", 0x25B4, "Uuml", 0x00DC, "uuml", 0x00FC, NULL, 0 }; static NameId namesV[]={ "varr", 0x2195, "vArr", 0x21D5, "Vcy", 0x0412, "vcy", 0x0432, "vdash", 0x22A2, "vDash", 0x22A8, "Vdash", 0x22A9, "veebar", 0x22BB, "vellip", 0x22EE, "verbar", 0x007C, "Verbar", 0x2016, "vltri", 0x22B2, "vprime", 0x2032, "vprop", 0x221D, "vrtri", 0x22B3, "vsubne", 0x228A, "vsubnE", 0xE2B8, "vsupne", 0x228B, "vsupnE", 0x228B, "Vvdash", 0x22AA, NULL, 0 }; static NameId namesW[]={ "Wcirc", 0x0174, "wcirc", 0x0175, "wedgeq", 0x2259, "weierp", 0x2118, "wreath", 0x2240, NULL, 0 }; static NameId namesX[]={ "xcirc", 0x25CB, "xdtri", 0x25BD, "Xgr", 0x039E, "xgr", 0x03BE, "xharr", 0x2194, "xhArr", 0x2194, "Xi", 0x039E, "xi", 0x03BE, "xlArr", 0x21D0, "xrArr", 0x21D2, "xutri", 0x25B3, NULL, 0 }; static NameId namesY[]={ "Yacute", 0x00DD, "yacute", 0x00FD, "YAcy", 0x042F, "yacy", 0x044F, "Ycirc", 0x0176, "ycirc", 0x0177, "Ycy", 0x042B, "ycy", 0x044B, "yen", 0x00A5, "YIcy", 0x0407, "yicy", 0x0457, "YUcy", 0x042E, "yucy", 0x044E, "yuml", 0x00FF, "Yuml", 0x0178, NULL, 0 }; static NameId namesZ[]={ "Zacute", 0x0179, "zacute", 0x017A, "Zcaron", 0x017D, "zcaron", 0x017E, "Zcy", 0x0417, "zcy", 0x0437, "Zdot", 0x017B, "zdot", 0x017C, "zeta", 0x03B6, "Zgr", 0x0396, "zgr", 0x03B6, "ZHcy", 0x0416, "zhcy", 0x0436, NULL, 0 }; // @todo@ order namesTable and names? by frequency static NameId* namesTable[] = { namesA, namesB, namesC, namesD, namesE, namesF, namesG, namesH, namesI, namesJ, namesK, namesL, namesM, namesN, namesO, namesP, namesQ, namesR, namesS, namesT, namesU, namesV, namesW, namesX, namesY, namesZ, NULL }; int HtmlNamedEntity(unsigned char *p, int length) { int tableIndex = tolower(*p) - 'a'; if (tableIndex >= 0 && tableIndex < 26) { NameId* names = namesTable[tableIndex]; int i; for (i = 0; names[i].name; i++){ if (strncmp(names[i].name, (char *)p, length) == 0){ return names[i].value; } } } error("unrecognized character entity \"%.*s\"", length, p); return -1; } #else //TODO: Merge Walter's list with Thomas' static NameId names[] = { // Entities "quot", 34, "amp", 38, "lt", 60, "gt", 62, "OElig", 338, "oelig", 339, "Scaron", 352, "scaron", 353, "Yuml", 376, "circ", 710, "tilde", 732, "ensp", 8194, "emsp", 8195, "thinsp", 8201, "zwnj", 8204, "zwj", 8205, "lrm", 8206, "rlm", 8207, "ndash", 8211, "mdash", 8212, "lsquo", 8216, "rsquo", 8217, "sbquo", 8218, "ldquo", 8220, "rdquo", 8221, "bdquo", 8222, "dagger", 8224, "Dagger", 8225, "permil", 8240, "lsaquo", 8249, "rsaquo", 8250, "euro", 8364, // Latin-1 (ISO-8859-1) Entities "nbsp", 160, "iexcl", 161, "cent", 162, "pound", 163, "curren", 164, "yen", 165, "brvbar", 166, "sect", 167, "uml", 168, "copy", 169, "ordf", 170, "laquo", 171, "not", 172, "shy", 173, "reg", 174, "macr", 175, "deg", 176, "plusmn", 177, "sup2", 178, "sup3", 179, "acute", 180, "micro", 181, "para", 182, "middot", 183, "cedil", 184, "sup1", 185, "ordm", 186, "raquo", 187, "frac14", 188, "frac12", 189, "frac34", 190, "iquest", 191, "Agrave", 192, "Aacute", 193, "Acirc", 194, "Atilde", 195, "Auml", 196, "Aring", 197, "AElig", 198, "Ccedil", 199, "Egrave", 200, "Eacute", 201, "Ecirc", 202, "Euml", 203, "Igrave", 204, "Iacute", 205, "Icirc", 206, "Iuml", 207, "ETH", 208, "Ntilde", 209, "Ograve", 210, "Oacute", 211, "Ocirc", 212, "Otilde", 213, "Ouml", 214, "times", 215, "Oslash", 216, "Ugrave", 217, "Uacute", 218, "Ucirc", 219, "Uuml", 220, "Yacute", 221, "THORN", 222, "szlig", 223, "agrave", 224, "aacute", 225, "acirc", 226, "atilde", 227, "auml", 228, "aring", 229, "aelig", 230, "ccedil", 231, "egrave", 232, "eacute", 233, "ecirc", 234, "euml", 235, "igrave", 236, "iacute", 237, "icirc", 238, "iuml", 239, "eth", 240, "ntilde", 241, "ograve", 242, "oacute", 243, "ocirc", 244, "otilde", 245, "ouml", 246, "divide", 247, "oslash", 248, "ugrave", 249, "uacute", 250, "ucirc", 251, "uuml", 252, "yacute", 253, "thorn", 254, "yuml", 255, // Symbols and Greek letter entities "fnof", 402, "Alpha", 913, "Beta", 914, "Gamma", 915, "Delta", 916, "Epsilon", 917, "Zeta", 918, "Eta", 919, "Theta", 920, "Iota", 921, "Kappa", 922, "Lambda", 923, "Mu", 924, "Nu", 925, "Xi", 926, "Omicron", 927, "Pi", 928, "Rho", 929, "Sigma", 931, "Tau", 932, "Upsilon", 933, "Phi", 934, "Chi", 935, "Psi", 936, "Omega", 937, "alpha", 945, "beta", 946, "gamma", 947, "delta", 948, "epsilon", 949, "zeta", 950, "eta", 951, "theta", 952, "iota", 953, "kappa", 954, "lambda", 955, "mu", 956, "nu", 957, "xi", 958, "omicron", 959, "pi", 960, "rho", 961, "sigmaf", 962, "sigma", 963, "tau", 964, "upsilon", 965, "phi", 966, "chi", 967, "psi", 968, "omega", 969, "thetasym", 977, "upsih", 978, "piv", 982, "bull", 8226, "hellip", 8230, "prime", 8242, "Prime", 8243, "oline", 8254, "frasl", 8260, "weierp", 8472, "image", 8465, "real", 8476, "trade", 8482, "alefsym", 8501, "larr", 8592, "uarr", 8593, "rarr", 8594, "darr", 8595, "harr", 8596, "crarr", 8629, "lArr", 8656, "uArr", 8657, "rArr", 8658, "dArr", 8659, "hArr", 8660, "forall", 8704, "part", 8706, "exist", 8707, "empty", 8709, "nabla", 8711, "isin", 8712, "notin", 8713, "ni", 8715, "prod", 8719, "sum", 8721, "minus", 8722, "lowast", 8727, "radic", 8730, "prop", 8733, "infin", 8734, "ang", 8736, "and", 8743, "or", 8744, "cap", 8745, "cup", 8746, "int", 8747, "there4", 8756, "sim", 8764, "cong", 8773, "asymp", 8776, "ne", 8800, "equiv", 8801, "le", 8804, "ge", 8805, "sub", 8834, "sup", 8835, "nsub", 8836, "sube", 8838, "supe", 8839, "oplus", 8853, "otimes", 8855, "perp", 8869, "sdot", 8901, "lceil", 8968, "rceil", 8969, "lfloor", 8970, "rfloor", 8971, "lang", 9001, "rang", 9002, "loz", 9674, "spades", 9824, "clubs", 9827, "hearts", 9829, "diams", 9830, }; int HtmlNamedEntity(unsigned char *p, int length) { int i; // BUG: this is a dumb, slow linear search for (i = 0; i < sizeof(names) / sizeof(names[0]); i++) { // Entries are case sensitive if (memcmp(names[i].name, (char *)p, length) == 0 && !names[i].name[length]) return names[i].value; } return -1; } #endif