comparison dmd/entity.c @ 1:c53b6e3fe49a trunk

[svn r5] Initial commit. Most things are very rough.
author lindquist
date Sat, 01 Sep 2007 21:43:27 +0200
parents
children b30fe7e1dbb9
comparison
equal deleted inserted replaced
0:a9e71648e74d 1:c53b6e3fe49a
1
2 // Copyright (c) 1999-2006 by Digital Mars
3 // All Rights Reserved
4 // written by Walter Bright
5 // http://www.digitalmars.com
6 // License for redistribution is by either the Artistic License
7 // in artistic.txt, or the GNU General Public License in gnu.txt.
8 // See the included readme.txt for details.
9
10
11 #include <string.h>
12
13 /*********************************************
14 * Convert from named entity to its encoding.
15 * For reference:
16 * http://www.htmlhelp.com/reference/html40/entities/
17 * http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html
18 */
19
20 struct NameId
21 {
22 char *name;
23 unsigned short value;
24 };
25
26 #if IN_GCC
27 static NameId namesA[]={
28 "Aacgr", 0x0386,
29 "aacgr", 0x03AC,
30 "Aacute", 0x00C1,
31 "aacute", 0x00E1,
32 "Abreve", 0x0102,
33 "abreve", 0x0103,
34 "Acirc", 0x00C2,
35 "acirc", 0x00E2,
36 "acute", 0x00B4,
37 "Acy", 0x0410,
38 "acy", 0x0430,
39 "AElig", 0x00C6,
40 "aelig", 0x00E6,
41 "Agr", 0x0391,
42 "agr", 0x03B1,
43 "Agrave", 0x00C0,
44 "agrave", 0x00E0,
45 "aleph", 0x2135,
46 "alpha", 0x03B1,
47 "Amacr", 0x0100,
48 "amacr", 0x0101,
49 "amalg", 0x2210,
50 "amp", 0x0026,
51 "and", 0x2227,
52 "ang", 0x2220,
53 "ang90", 0x221F,
54 "angmsd", 0x2221,
55 "angsph", 0x2222,
56 "angst", 0x212B,
57 "Aogon", 0x0104,
58 "aogon", 0x0105,
59 "ap", 0x2248,
60 "ape", 0x224A,
61 "apos", 0x0027,
62 "Aring", 0x00C5,
63 "aring", 0x00E5,
64 "ast", 0x002A,
65 "asymp", 0x224D,
66 "Atilde", 0x00C3,
67 "atilde", 0x00E3,
68 "Auml", 0x00C4,
69 "auml", 0x00E4,
70 NULL, 0
71 };
72
73 static NameId namesB[]={
74 "barwed", 0x22BC,
75 "Barwed", 0x2306,
76 "bcong", 0x224C,
77 "Bcy", 0x0411,
78 "bcy", 0x0431,
79 "becaus", 0x2235,
80 "bepsi", 0x220D,
81 "bernou", 0x212C,
82 "beta", 0x03B2,
83 "beth", 0x2136,
84 "Bgr", 0x0392,
85 "bgr", 0x03B2,
86 "blank", 0x2423,
87 "blk12", 0x2592,
88 "blk14", 0x2591,
89 "blk34", 0x2593,
90 "block", 0x2588,
91 "bottom", 0x22A5,
92 "bowtie", 0x22C8,
93 "boxdl", 0x2510,
94 "boxDL", 0x2555,
95 "boxdL", 0x2556,
96 "boxDl", 0x2557,
97 "boxdr", 0x250C,
98 "boxDR", 0x2552,
99 "boxDr", 0x2553,
100 "boxdR", 0x2554,
101 "boxh", 0x2500,
102 "boxH", 0x2550,
103 "boxhd", 0x252C,
104 "boxhD", 0x2564,
105 "boxHD", 0x2565,
106 "boxHd", 0x2566,
107 "boxhu", 0x2534,
108 "boxhU", 0x2567,
109 "boxHU", 0x2568,
110 "boxHu", 0x2569,
111 "boxul", 0x2518,
112 "boxUL", 0x255B,
113 "boxUl", 0x255C,
114 "boxuL", 0x255D,
115 "boxur", 0x2514,
116 "boxUR", 0x2558,
117 "boxuR", 0x2559,
118 "boxUr", 0x255A,
119 "boxv", 0x2502,
120 "boxV", 0x2551,
121 "boxvh", 0x253C,
122 "boxvH", 0x256A,
123 "boxVH", 0x256B,
124 "boxVh", 0x256C,
125 "boxvl", 0x2524,
126 "boxvL", 0x2561,
127 "boxVL", 0x2562,
128 "boxVl", 0x2563,
129 "boxvr", 0x251C,
130 "boxvR", 0x255E,
131 "boxVR", 0x255F,
132 "boxVr", 0x2560,
133 "bprime", 0x2035,
134 "breve", 0x02D8,
135 "brvbar", 0x00A6,
136 "bsim", 0x223D,
137 "bsime", 0x22CD,
138 "bsol", 0x005C,
139 "bull", 0x2022,
140 "bump", 0x224E,
141 "bumpe", 0x224F,
142 NULL, 0
143 };
144
145 static NameId namesC[]={
146 "Cacute", 0x0106,
147 "cacute", 0x0107,
148 "cap", 0x2229,
149 "Cap", 0x22D2,
150 "caret", 0x2041,
151 "caron", 0x02C7,
152 "Ccaron", 0x010C,
153 "ccaron", 0x010D,
154 "Ccedil", 0x00C7,
155 "ccedil", 0x00E7,
156 "Ccirc", 0x0108,
157 "ccirc", 0x0109,
158 "Cdot", 0x010A,
159 "cdot", 0x010B,
160 "cedil", 0x00B8,
161 "cent", 0x00A2,
162 "CHcy", 0x0427,
163 "chcy", 0x0447,
164 "check", 0x2713,
165 "chi", 0x03C7,
166 "cir", 0x25CB,
167 "circ", 0x005E,
168 "cire", 0x2257,
169 "clubs", 0x2663,
170 "colon", 0x003A,
171 "colone", 0x2254,
172 "comma", 0x002C,
173 "commat", 0x0040,
174 "comp", 0x2201,
175 "compfn", 0x2218,
176 "cong", 0x2245,
177 "conint", 0x222E,
178 "coprod", 0x2210,
179 "copy", 0x00A9,
180 "copysr", 0x2117,
181 "cross", 0x2717,
182 "cuepr", 0x22DE,
183 "cuesc", 0x22DF,
184 "cularr", 0x21B6,
185 "cup", 0x222A,
186 "Cup", 0x22D3,
187 "cupre", 0x227C,
188 "curarr", 0x21B7,
189 "curren", 0x00A4,
190 "cuvee", 0x22CE,
191 "cuwed", 0x22CF,
192 NULL, 0
193 };
194
195 static NameId namesD[]={
196 "dagger", 0x2020,
197 "Dagger", 0x2021,
198 "daleth", 0x2138,
199 "darr", 0x2193,
200 "dArr", 0x21D3,
201 "darr2", 0x21CA,
202 "dash", 0x2010,
203 "dashv", 0x22A3,
204 "dblac", 0x02DD,
205 "Dcaron", 0x010E,
206 "dcaron", 0x010F,
207 "Dcy", 0x0414,
208 "dcy", 0x0434,
209 "deg", 0x00B0,
210 "Delta", 0x0394,
211 "delta", 0x03B4,
212 "Dgr", 0x0394,
213 "dgr", 0x03B4,
214 "dharl", 0x21C3,
215 "dharr", 0x21C2,
216 "diam", 0x22C4,
217 "diams", 0x2666,
218 "die", 0x00A8,
219 "divide", 0x00F7,
220 "divonx", 0x22C7,
221 "DJcy", 0x0402,
222 "djcy", 0x0452,
223 "dlarr", 0x2199,
224 "dlcorn", 0x231E,
225 "dlcrop", 0x230D,
226 "dollar", 0x0024,
227 "Dot", 0x00A8,
228 "dot", 0x02D9,
229 "DotDot", 0x20DC,
230 "drarr", 0x2198,
231 "drcorn", 0x231F,
232 "drcrop", 0x230C,
233 "DScy", 0x0405,
234 "dscy", 0x0455,
235 "Dstrok", 0x0110,
236 "dstrok", 0x0111,
237 "dtri", 0x25BF,
238 "dtrif", 0x25BE,
239 "DZcy", 0x040F,
240 "dzcy", 0x045F,
241 NULL, 0
242 };
243
244 static NameId namesE[]={
245 "Eacgr", 0x0388,
246 "eacgr", 0x03AD,
247 "Eacute", 0x00C9,
248 "eacute", 0x00E9,
249 "Ecaron", 0x011A,
250 "ecaron", 0x011B,
251 "ecir", 0x2256,
252 "Ecirc", 0x00CA,
253 "ecirc", 0x00EA,
254 "ecolon", 0x2255,
255 "Ecy", 0x042D,
256 "ecy", 0x044D,
257 "Edot", 0x0116,
258 "edot", 0x0117,
259 "eDot", 0x2251,
260 "EEacgr", 0x0389,
261 "eeacgr", 0x03AE,
262 "EEgr", 0x0397,
263 "eegr", 0x03B7,
264 "efDot", 0x2252,
265 "Egr", 0x0395,
266 "egr", 0x03B5,
267 "Egrave", 0x00C8,
268 "egrave", 0x00E8,
269 "egs", 0x22DD,
270 "ell", 0x2113,
271 "els", 0x22DC,
272 "Emacr", 0x0112,
273 "emacr", 0x0113,
274 "empty", 0x2205,
275 "emsp", 0x2003,
276 "emsp13", 0x2004,
277 "emsp14", 0x2005,
278 "ENG", 0x014A,
279 "eng", 0x014B,
280 "ensp", 0x2002,
281 "Eogon", 0x0118,
282 "eogon", 0x0119,
283 "epsi", 0x220A,
284 "epsis", 0x220A,
285 "epsiv", 0x03B5,
286 "equals", 0x003D,
287 "equiv", 0x2261,
288 "erDot", 0x2253,
289 "esdot", 0x2250,
290 "eta", 0x03B7,
291 "ETH", 0x00D0,
292 "eth", 0x00F0,
293 "Euml", 0x00CB,
294 "euml", 0x00EB,
295 "excl", 0x0021,
296 "exist", 0x2203,
297 NULL, 0
298 };
299
300 static NameId namesF[]={
301 "Fcy", 0x0424,
302 "fcy", 0x0444,
303 "female", 0x2640,
304 "ffilig", 0xFB03,
305 "fflig", 0xFB00,
306 "ffllig", 0xFB04,
307 "filig", 0xFB01,
308 "flat", 0x266D,
309 "fllig", 0xFB02,
310 "fnof", 0x0192,
311 "forall", 0x2200,
312 "fork", 0x22D4,
313 "frac12", 0x00BD,
314 "frac13", 0x2153,
315 "frac14", 0x00BC,
316 "frac15", 0x2155,
317 "frac16", 0x2159,
318 "frac18", 0x215B,
319 "frac23", 0x2154,
320 "frac25", 0x2156,
321 "frac34", 0x00BE,
322 "frac35", 0x2157,
323 "frac38", 0x215C,
324 "frac45", 0x2158,
325 "frac56", 0x215A,
326 "frac58", 0x215D,
327 "frac78", 0x215E,
328 "frown", 0x2322,
329 NULL, 0
330 };
331
332 static NameId namesG[]={
333 "gacute", 0x01F5,
334 "Gamma", 0x0393,
335 "gamma", 0x03B3,
336 "gammad", 0x03DC,
337 "gap", 0x2273,
338 "Gbreve", 0x011E,
339 "gbreve", 0x011F,
340 "Gcedil", 0x0122,
341 "Gcirc", 0x011C,
342 "gcirc", 0x011D,
343 "Gcy", 0x0413,
344 "gcy", 0x0433,
345 "Gdot", 0x0120,
346 "gdot", 0x0121,
347 "ge", 0x2265,
348 "gE", 0x2267,
349 "gel", 0x22DB,
350 "gEl", 0x22DB,
351 "ges", 0x2265,
352 "Gg", 0x22D9,
353 "Ggr", 0x0393,
354 "ggr", 0x03B3,
355 "gimel", 0x2137,
356 "GJcy", 0x0403,
357 "gjcy", 0x0453,
358 "gl", 0x2277,
359 "gnap", 0xE411,
360 "gne", 0x2269,
361 "gnE", 0x2269,
362 "gnsim", 0x22E7,
363 "grave", 0x0060,
364 "gsdot", 0x22D7,
365 "gsim", 0x2273,
366 "gt", 0x003E,
367 "Gt", 0x226B,
368 "gvnE", 0x2269,
369 NULL, 0
370 };
371
372 static NameId namesH[]={
373 "hairsp", 0x200A,
374 "half", 0x00BD,
375 "hamilt", 0x210B,
376 "HARDcy", 0x042A,
377 "hardcy", 0x044A,
378 "harr", 0x2194,
379 "hArr", 0x21D4,
380 "harrw", 0x21AD,
381 "Hcirc", 0x0124,
382 "hcirc", 0x0125,
383 "hearts", 0x2665,
384 "hellip", 0x2026,
385 "horbar", 0x2015,
386 "Hstrok", 0x0126,
387 "hstrok", 0x0127,
388 "hybull", 0x2043,
389 "hyphen", 0x002D,
390 NULL, 0
391 };
392
393 static NameId namesI[]={
394 "Iacgr", 0x038A,
395 "iacgr", 0x03AF,
396 "Iacute", 0x00CD,
397 "iacute", 0x00ED,
398 "Icirc", 0x00CE,
399 "icirc", 0x00EE,
400 "Icy", 0x0418,
401 "icy", 0x0438,
402 "idiagr", 0x0390,
403 "Idigr", 0x03AA,
404 "idigr", 0x03CA,
405 "Idot", 0x0130,
406 "IEcy", 0x0415,
407 "iecy", 0x0435,
408 "iexcl", 0x00A1,
409 "iff", 0x21D4,
410 "Igr", 0x0399,
411 "igr", 0x03B9,
412 "Igrave", 0x00CC,
413 "igrave", 0x00EC,
414 "IJlig", 0x0132,
415 "ijlig", 0x0133,
416 "Imacr", 0x012A,
417 "imacr", 0x012B,
418 "image", 0x2111,
419 "incare", 0x2105,
420 "infin", 0x221E,
421 "inodot", 0x0131,
422 "int", 0x222B,
423 "intcal", 0x22BA,
424 "IOcy", 0x0401,
425 "iocy", 0x0451,
426 "Iogon", 0x012E,
427 "iogon", 0x012F,
428 "iota", 0x03B9,
429 "iquest", 0x00BF,
430 "isin", 0x220A,
431 "Itilde", 0x0128,
432 "itilde", 0x0129,
433 "Iukcy", 0x0406,
434 "iukcy", 0x0456,
435 "Iuml", 0x00CF,
436 "iuml", 0x00EF,
437 NULL, 0
438 };
439
440 static NameId namesJ[]={
441 "Jcirc", 0x0134,
442 "jcirc", 0x0135,
443 "Jcy", 0x0419,
444 "jcy", 0x0439,
445 "Jsercy", 0x0408,
446 "jsercy", 0x0458,
447 "Jukcy", 0x0404,
448 "jukcy", 0x0454,
449 NULL, 0
450 };
451
452 static NameId namesK[]={
453 "kappa", 0x03BA,
454 "kappav", 0x03F0,
455 "Kcedil", 0x0136,
456 "kcedil", 0x0137,
457 "Kcy", 0x041A,
458 "kcy", 0x043A,
459 "Kgr", 0x039A,
460 "kgr", 0x03BA,
461 "kgreen", 0x0138,
462 "KHcy", 0x0425,
463 "khcy", 0x0445,
464 "KHgr", 0x03A7,
465 "khgr", 0x03C7,
466 "KJcy", 0x040C,
467 "kjcy", 0x045C,
468 NULL, 0
469 };
470
471 static NameId namesL[]={
472 "lAarr", 0x21DA,
473 "Lacute", 0x0139,
474 "lacute", 0x013A,
475 "lagran", 0x2112,
476 "Lambda", 0x039B,
477 "lambda", 0x03BB,
478 "lang", 0x3008,
479 "lap", 0x2272,
480 "laquo", 0x00AB,
481 "larr", 0x2190,
482 "Larr", 0x219E,
483 "lArr", 0x21D0,
484 "larr2", 0x21C7,
485 "larrhk", 0x21A9,
486 "larrlp", 0x21AB,
487 "larrtl", 0x21A2,
488 "Lcaron", 0x013D,
489 "lcaron", 0x013E,
490 "Lcedil", 0x013B,
491 "lcedil", 0x013C,
492 "lceil", 0x2308,
493 "lcub", 0x007B,
494 "Lcy", 0x041B,
495 "lcy", 0x043B,
496 "ldot", 0x22D6,
497 "ldquo", 0x201C,
498 "ldquor", 0x201E,
499 "le", 0x2264,
500 "lE", 0x2266,
501 "leg", 0x22DA,
502 "lEg", 0x22DA,
503 "les", 0x2264,
504 "lfloor", 0x230A,
505 "lg", 0x2276,
506 "Lgr", 0x039B,
507 "lgr", 0x03BB,
508 "lhard", 0x21BD,
509 "lharu", 0x21BC,
510 "lhblk", 0x2584,
511 "LJcy", 0x0409,
512 "ljcy", 0x0459,
513 "Ll", 0x22D8,
514 "Lmidot", 0x013F,
515 "lmidot", 0x0140,
516 "lnap", 0xE2A2,
517 "lne", 0x2268,
518 "lnE", 0x2268,
519 "lnsim", 0x22E6,
520 "lowast", 0x2217,
521 "lowbar", 0x005F,
522 "loz", 0x25CA,
523 "lozf", 0x2726,
524 "lpar", 0x0028,
525 "lrarr2", 0x21C6,
526 "lrhar2", 0x21CB,
527 "lsh", 0x21B0,
528 "lsim", 0x2272,
529 "lsqb", 0x005B,
530 "lsquo", 0x2018,
531 "lsquor", 0x201A,
532 "Lstrok", 0x0141,
533 "lstrok", 0x0142,
534 "lt", 0x003C,
535 "Lt", 0x226A,
536 "lthree", 0x22CB,
537 "ltimes", 0x22C9,
538 "ltri", 0x25C3,
539 "ltrie", 0x22B4,
540 "ltrif", 0x25C2,
541 "lvnE", 0x2268,
542 NULL, 0
543 };
544
545 static NameId namesM[]={
546 "macr", 0x00AF,
547 "male", 0x2642,
548 "malt", 0x2720,
549 "map", 0x21A6,
550 "marker", 0x25AE,
551 "Mcy", 0x041C,
552 "mcy", 0x043C,
553 "mdash", 0x2014,
554 "Mgr", 0x039C,
555 "mgr", 0x03BC,
556 "micro", 0x00B5,
557 "mid", 0x2223,
558 "middot", 0x00B7,
559 "minus", 0x2212,
560 "minusb", 0x229F,
561 "mldr", 0x2026,
562 "mnplus", 0x2213,
563 "models", 0x22A7,
564 "mu", 0x03BC,
565 "mumap", 0x22B8,
566 NULL, 0
567 };
568
569 static NameId namesN[]={
570 "nabla", 0x2207,
571 "Nacute", 0x0143,
572 "nacute", 0x0144,
573 "nap", 0x2249,
574 "napos", 0x0149,
575 "natur", 0x266E,
576 // "nbsp", 0x00A0,
577 "nbsp", 32, // make non-breaking space appear as space
578 "Ncaron", 0x0147,
579 "ncaron", 0x0148,
580 "Ncedil", 0x0145,
581 "ncedil", 0x0146,
582 "ncong", 0x2247,
583 "Ncy", 0x041D,
584 "ncy", 0x043D,
585 "ndash", 0x2013,
586 "ne", 0x2260,
587 "nearr", 0x2197,
588 "nequiv", 0x2262,
589 "nexist", 0x2204,
590 "nge", 0x2271,
591 "ngE", 0x2271,
592 "nges", 0x2271,
593 "Ngr", 0x039D,
594 "ngr", 0x03BD,
595 "ngt", 0x226F,
596 "nharr", 0x21AE,
597 "nhArr", 0x21CE,
598 "ni", 0x220D,
599 "NJcy", 0x040A,
600 "njcy", 0x045A,
601 "nlarr", 0x219A,
602 "nlArr", 0x21CD,
603 "nldr", 0x2025,
604 "nle", 0x2270,
605 "nlE", 0x2270,
606 "nles", 0x2270,
607 "nlt", 0x226E,
608 "nltri", 0x22EA,
609 "nltrie", 0x22EC,
610 "nmid", 0x2224,
611 "not", 0x00AC,
612 "notin", 0x2209,
613 "npar", 0x2226,
614 "npr", 0x2280,
615 "npre", 0x22E0,
616 "nrarr", 0x219B,
617 "nrArr", 0x21CF,
618 "nrtri", 0x22EB,
619 "nrtrie", 0x22ED,
620 "nsc", 0x2281,
621 "nsce", 0x22E1,
622 "nsim", 0x2241,
623 "nsime", 0x2244,
624 "nsmid", 0xE2AA,
625 "nspar", 0x2226,
626 "nsub", 0x2284,
627 "nsube", 0x2288,
628 "nsubE", 0x2288,
629 "nsup", 0x2285,
630 "nsupe", 0x2289,
631 "nsupE", 0x2289,
632 "Ntilde", 0x00D1,
633 "ntilde", 0x00F1,
634 "nu", 0x03BD,
635 "num", 0x0023,
636 "numero", 0x2116,
637 "numsp", 0x2007,
638 "nvdash", 0x22AC,
639 "nvDash", 0x22AD,
640 "nVdash", 0x22AE,
641 "nVDash", 0x22AF,
642 "nwarr", 0x2196,
643 NULL, 0
644 };
645
646 static NameId namesO[]={
647 "Oacgr", 0x038C,
648 "oacgr", 0x03CC,
649 "Oacute", 0x00D3,
650 "oacute", 0x00F3,
651 "oast", 0x229B,
652 "ocir", 0x229A,
653 "Ocirc", 0x00D4,
654 "ocirc", 0x00F4,
655 "Ocy", 0x041E,
656 "ocy", 0x043E,
657 "odash", 0x229D,
658 "Odblac", 0x0150,
659 "odblac", 0x0151,
660 "odot", 0x2299,
661 "OElig", 0x0152,
662 "oelig", 0x0153,
663 "ogon", 0x02DB,
664 "Ogr", 0x039F,
665 "ogr", 0x03BF,
666 "Ograve", 0x00D2,
667 "ograve", 0x00F2,
668 "OHacgr", 0x038F,
669 "ohacgr", 0x03CE,
670 "OHgr", 0x03A9,
671 "ohgr", 0x03C9,
672 "ohm", 0x2126,
673 "olarr", 0x21BA,
674 "Omacr", 0x014C,
675 "omacr", 0x014D,
676 "Omega", 0x03A9,
677 "omega", 0x03C9,
678 "ominus", 0x2296,
679 "oplus", 0x2295,
680 "or", 0x2228,
681 "orarr", 0x21BB,
682 "order", 0x2134,
683 "ordf", 0x00AA,
684 "ordm", 0x00BA,
685 "oS", 0x24C8,
686 "Oslash", 0x00D8,
687 "oslash", 0x00F8,
688 "osol", 0x2298,
689 "Otilde", 0x00D5,
690 "otilde", 0x00F5,
691 "otimes", 0x2297,
692 "Ouml", 0x00D6,
693 "ouml", 0x00F6,
694 NULL, 0
695 };
696
697 static NameId namesP[]={
698 "par", 0x2225,
699 "para", 0x00B6,
700 "part", 0x2202,
701 "Pcy", 0x041F,
702 "pcy", 0x043F,
703 "percnt", 0x0025,
704 "period", 0x002E,
705 "permil", 0x2030,
706 "perp", 0x22A5,
707 "Pgr", 0x03A0,
708 "pgr", 0x03C0,
709 "PHgr", 0x03A6,
710 "phgr", 0x03C6,
711 "Phi", 0x03A6,
712 "phis", 0x03C6,
713 "phiv", 0x03D5,
714 "phmmat", 0x2133,
715 "phone", 0x260E,
716 "Pi", 0x03A0,
717 "pi", 0x03C0,
718 "piv", 0x03D6,
719 "planck", 0x210F,
720 "plus", 0x002B,
721 "plusb", 0x229E,
722 "plusdo", 0x2214,
723 "plusmn", 0x00B1,
724 "pound", 0x00A3,
725 "pr", 0x227A,
726 "prap", 0x227E,
727 "pre", 0x227C,
728 "prime", 0x2032,
729 "Prime", 0x2033,
730 "prnap", 0x22E8,
731 "prnE", 0xE2B3,
732 "prnsim", 0x22E8,
733 "prod", 0x220F,
734 "prop", 0x221D,
735 "prsim", 0x227E,
736 "PSgr", 0x03A8,
737 "psgr", 0x03C8,
738 "Psi", 0x03A8,
739 "psi", 0x03C8,
740 "puncsp", 0x2008,
741 NULL, 0
742 };
743
744 static NameId namesQ[]={
745 "quest", 0x003F,
746 "quot", 0x0022,
747 NULL, 0
748 };
749
750 static NameId namesR[]={
751 "rAarr", 0x21DB,
752 "Racute", 0x0154,
753 "racute", 0x0155,
754 "radic", 0x221A,
755 "rang", 0x3009,
756 "raquo", 0x00BB,
757 "rarr", 0x2192,
758 "Rarr", 0x21A0,
759 "rArr", 0x21D2,
760 "rarr2", 0x21C9,
761 "rarrhk", 0x21AA,
762 "rarrlp", 0x21AC,
763 "rarrtl", 0x21A3,
764 "rarrw", 0x219D,
765 "Rcaron", 0x0158,
766 "rcaron", 0x0159,
767 "Rcedil", 0x0156,
768 "rcedil", 0x0157,
769 "rceil", 0x2309,
770 "rcub", 0x007D,
771 "Rcy", 0x0420,
772 "rcy", 0x0440,
773 "rdquo", 0x201D,
774 "rdquor", 0x201C,
775 "real", 0x211C,
776 "rect", 0x25AD,
777 "reg", 0x00AE,
778 "rfloor", 0x230B,
779 "Rgr", 0x03A1,
780 "rgr", 0x03C1,
781 "rhard", 0x21C1,
782 "rharu", 0x21C0,
783 "rho", 0x03C1,
784 "rhov", 0x03F1,
785 "ring", 0x02DA,
786 "rlarr2", 0x21C4,
787 "rlhar2", 0x21CC,
788 "rpar", 0x0029,
789 "rpargt", 0xE291,
790 "rsh", 0x21B1,
791 "rsqb", 0x005D,
792 "rsquo", 0x2019,
793 "rsquor", 0x2018,
794 "rthree", 0x22CC,
795 "rtimes", 0x22CA,
796 "rtri", 0x25B9,
797 "rtrie", 0x22B5,
798 "rtrif", 0x25B8,
799 "rx", 0x211E,
800 NULL, 0
801 };
802
803 static NameId namesS[]={
804 "Sacute", 0x015A,
805 "sacute", 0x015B,
806 "samalg", 0x2210,
807 "sbsol", 0xFE68,
808 "sc", 0x227B,
809 "scap", 0x227F,
810 "Scaron", 0x0160,
811 "scaron", 0x0161,
812 "sccue", 0x227D,
813 "sce", 0x227D,
814 "Scedil", 0x015E,
815 "scedil", 0x015F,
816 "Scirc", 0x015C,
817 "scirc", 0x015D,
818 "scnap", 0x22E9,
819 "scnE", 0xE2B5,
820 "scnsim", 0x22E9,
821 "scsim", 0x227F,
822 "Scy", 0x0421,
823 "scy", 0x0441,
824 "sdot", 0x22C5,
825 "sdotb", 0x22A1,
826 "sect", 0x00A7,
827 "semi", 0x003B,
828 "setmn", 0x2216,
829 "sext", 0x2736,
830 "sfgr", 0x03C2,
831 "sfrown", 0x2322,
832 "Sgr", 0x03A3,
833 "sgr", 0x03C3,
834 "sharp", 0x266F,
835 "SHCHcy", 0x0429,
836 "shchcy", 0x0449,
837 "SHcy", 0x0428,
838 "shcy", 0x0448,
839 "shy", 0x00AD,
840 "Sigma", 0x03A3,
841 "sigma", 0x03C3,
842 "sigmav", 0x03C2,
843 "sim", 0x223C,
844 "sime", 0x2243,
845 "smid", 0xE301,
846 "smile", 0x2323,
847 "SOFTcy", 0x042C,
848 "softcy", 0x044C,
849 "sol", 0x002F,
850 "spades", 0x2660,
851 "spar", 0x2225,
852 "sqcap", 0x2293,
853 "sqcup", 0x2294,
854 "sqsub", 0x228F,
855 "sqsube", 0x2291,
856 "sqsup", 0x2290,
857 "sqsupe", 0x2292,
858 "squ", 0x25A1,
859 "square", 0x25A1,
860 "squf", 0x25AA,
861 "ssetmn", 0x2216,
862 "ssmile", 0x2323,
863 "sstarf", 0x22C6,
864 "star", 0x22C6,
865 "starf", 0x2605,
866 "sub", 0x2282,
867 "Sub", 0x22D0,
868 "sube", 0x2286,
869 "subE", 0x2286,
870 "subne", 0x228A,
871 "subnE", 0x228A,
872 "sum", 0x2211,
873 "sung", 0x2669,
874 "sup", 0x2283,
875 "Sup", 0x22D1,
876 "sup1", 0x00B9,
877 "sup2", 0x00B2,
878 "sup3", 0x00B3,
879 "supe", 0x2287,
880 "supE", 0x2287,
881 "supne", 0x228B,
882 "supnE", 0x228B,
883 "szlig", 0x00DF,
884 NULL, 0
885 };
886
887 static NameId namesT[]={
888 "target", 0x2316,
889 "tau", 0x03C4,
890 "Tcaron", 0x0164,
891 "tcaron", 0x0165,
892 "Tcedil", 0x0162,
893 "tcedil", 0x0163,
894 "Tcy", 0x0422,
895 "tcy", 0x0442,
896 "tdot", 0x20DB,
897 "telrec", 0x2315,
898 "Tgr", 0x03A4,
899 "tgr", 0x03C4,
900 "there4", 0x2234,
901 "Theta", 0x0398,
902 "thetas", 0x03B8,
903 "thetav", 0x03D1,
904 "THgr", 0x0398,
905 "thgr", 0x03B8,
906 "thinsp", 0x2009,
907 "thkap", 0x2248,
908 "thksim", 0x223C,
909 "THORN", 0x00DE,
910 "thorn", 0x00FE,
911 "tilde", 0x02DC,
912 "times", 0x00D7,
913 "timesb", 0x22A0,
914 "top", 0x22A4,
915 "tprime", 0x2034,
916 "trade", 0x2122,
917 "trie", 0x225C,
918 "TScy", 0x0426,
919 "tscy", 0x0446,
920 "TSHcy", 0x040B,
921 "tshcy", 0x045B,
922 "Tstrok", 0x0166,
923 "tstrok", 0x0167,
924 "twixt", 0x226C,
925 NULL, 0
926 };
927
928 static NameId namesU[]={
929 "Uacgr", 0x038E,
930 "uacgr", 0x03CD,
931 "Uacute", 0x00DA,
932 "uacute", 0x00FA,
933 "uarr", 0x2191,
934 "uArr", 0x21D1,
935 "uarr2", 0x21C8,
936 "Ubrcy", 0x040E,
937 "ubrcy", 0x045E,
938 "Ubreve", 0x016C,
939 "ubreve", 0x016D,
940 "Ucirc", 0x00DB,
941 "ucirc", 0x00FB,
942 "Ucy", 0x0423,
943 "ucy", 0x0443,
944 "Udblac", 0x0170,
945 "udblac", 0x0171,
946 "udiagr", 0x03B0,
947 "Udigr", 0x03AB,
948 "udigr", 0x03CB,
949 "Ugr", 0x03A5,
950 "ugr", 0x03C5,
951 "Ugrave", 0x00D9,
952 "ugrave", 0x00F9,
953 "uharl", 0x21BF,
954 "uharr", 0x21BE,
955 "uhblk", 0x2580,
956 "ulcorn", 0x231C,
957 "ulcrop", 0x230F,
958 "Umacr", 0x016A,
959 "umacr", 0x016B,
960 "uml", 0x00A8,
961 "Uogon", 0x0172,
962 "uogon", 0x0173,
963 "uplus", 0x228E,
964 "upsi", 0x03C5,
965 "Upsi", 0x03D2,
966 "urcorn", 0x231D,
967 "urcrop", 0x230E,
968 "Uring", 0x016E,
969 "uring", 0x016F,
970 "Utilde", 0x0168,
971 "utilde", 0x0169,
972 "utri", 0x25B5,
973 "utrif", 0x25B4,
974 "Uuml", 0x00DC,
975 "uuml", 0x00FC,
976 NULL, 0
977 };
978
979 static NameId namesV[]={
980 "varr", 0x2195,
981 "vArr", 0x21D5,
982 "Vcy", 0x0412,
983 "vcy", 0x0432,
984 "vdash", 0x22A2,
985 "vDash", 0x22A8,
986 "Vdash", 0x22A9,
987 "veebar", 0x22BB,
988 "vellip", 0x22EE,
989 "verbar", 0x007C,
990 "Verbar", 0x2016,
991 "vltri", 0x22B2,
992 "vprime", 0x2032,
993 "vprop", 0x221D,
994 "vrtri", 0x22B3,
995 "vsubne", 0x228A,
996 "vsubnE", 0xE2B8,
997 "vsupne", 0x228B,
998 "vsupnE", 0x228B,
999 "Vvdash", 0x22AA,
1000 NULL, 0
1001 };
1002
1003 static NameId namesW[]={
1004 "Wcirc", 0x0174,
1005 "wcirc", 0x0175,
1006 "wedgeq", 0x2259,
1007 "weierp", 0x2118,
1008 "wreath", 0x2240,
1009 NULL, 0
1010 };
1011
1012 static NameId namesX[]={
1013 "xcirc", 0x25CB,
1014 "xdtri", 0x25BD,
1015 "Xgr", 0x039E,
1016 "xgr", 0x03BE,
1017 "xharr", 0x2194,
1018 "xhArr", 0x2194,
1019 "Xi", 0x039E,
1020 "xi", 0x03BE,
1021 "xlArr", 0x21D0,
1022 "xrArr", 0x21D2,
1023 "xutri", 0x25B3,
1024 NULL, 0
1025 };
1026
1027 static NameId namesY[]={
1028 "Yacute", 0x00DD,
1029 "yacute", 0x00FD,
1030 "YAcy", 0x042F,
1031 "yacy", 0x044F,
1032 "Ycirc", 0x0176,
1033 "ycirc", 0x0177,
1034 "Ycy", 0x042B,
1035 "ycy", 0x044B,
1036 "yen", 0x00A5,
1037 "YIcy", 0x0407,
1038 "yicy", 0x0457,
1039 "YUcy", 0x042E,
1040 "yucy", 0x044E,
1041 "yuml", 0x00FF,
1042 "Yuml", 0x0178,
1043 NULL, 0
1044 };
1045
1046 static NameId namesZ[]={
1047 "Zacute", 0x0179,
1048 "zacute", 0x017A,
1049 "Zcaron", 0x017D,
1050 "zcaron", 0x017E,
1051 "Zcy", 0x0417,
1052 "zcy", 0x0437,
1053 "Zdot", 0x017B,
1054 "zdot", 0x017C,
1055 "zeta", 0x03B6,
1056 "Zgr", 0x0396,
1057 "zgr", 0x03B6,
1058 "ZHcy", 0x0416,
1059 "zhcy", 0x0436,
1060 NULL, 0
1061 };
1062
1063 // @todo@ order namesTable and names? by frequency
1064 static NameId* namesTable[] = {
1065 namesA, namesB, namesC, namesD, namesE, namesF, namesG, namesH, namesI,
1066 namesJ, namesK, namesL, namesM, namesN, namesO, namesP, namesQ, namesR,
1067 namesS, namesT, namesU, namesV, namesW, namesX, namesY, namesZ, NULL
1068 };
1069
1070 int HtmlNamedEntity(unsigned char *p, int length)
1071 {
1072 int tableIndex = tolower(*p) - 'a';
1073 if (tableIndex >= 0 && tableIndex < 26) {
1074 NameId* names = namesTable[tableIndex];
1075 int i;
1076
1077 for (i = 0; names[i].name; i++){
1078 if (strncmp(names[i].name, (char *)p, length) == 0){
1079 return names[i].value;
1080 }
1081 }
1082 }
1083 error("unrecognized character entity \"%.*s\"", length, p);
1084 return -1;
1085 }
1086
1087 #else //TODO: Merge Walter's list with Thomas'
1088
1089 static NameId names[] =
1090 {
1091 // Entities
1092 "quot", 34,
1093 "amp", 38,
1094 "lt", 60,
1095 "gt", 62,
1096
1097 "OElig", 338,
1098 "oelig", 339,
1099 "Scaron", 352,
1100 "scaron", 353,
1101 "Yuml", 376,
1102 "circ", 710,
1103 "tilde", 732,
1104 "ensp", 8194,
1105 "emsp", 8195,
1106 "thinsp", 8201,
1107 "zwnj", 8204,
1108 "zwj", 8205,
1109 "lrm", 8206,
1110 "rlm", 8207,
1111 "ndash", 8211,
1112 "mdash", 8212,
1113 "lsquo", 8216,
1114 "rsquo", 8217,
1115 "sbquo", 8218,
1116 "ldquo", 8220,
1117 "rdquo", 8221,
1118 "bdquo", 8222,
1119 "dagger", 8224,
1120 "Dagger", 8225,
1121 "permil", 8240,
1122 "lsaquo", 8249,
1123 "rsaquo", 8250,
1124 "euro", 8364,
1125
1126 // Latin-1 (ISO-8859-1) Entities
1127 "nbsp", 160,
1128 "iexcl", 161,
1129 "cent", 162,
1130 "pound", 163,
1131 "curren", 164,
1132 "yen", 165,
1133 "brvbar", 166,
1134 "sect", 167,
1135 "uml", 168,
1136 "copy", 169,
1137 "ordf", 170,
1138 "laquo", 171,
1139 "not", 172,
1140 "shy", 173,
1141 "reg", 174,
1142 "macr", 175,
1143 "deg", 176,
1144 "plusmn", 177,
1145 "sup2", 178,
1146 "sup3", 179,
1147 "acute", 180,
1148 "micro", 181,
1149 "para", 182,
1150 "middot", 183,
1151 "cedil", 184,
1152 "sup1", 185,
1153 "ordm", 186,
1154 "raquo", 187,
1155 "frac14", 188,
1156 "frac12", 189,
1157 "frac34", 190,
1158 "iquest", 191,
1159 "Agrave", 192,
1160 "Aacute", 193,
1161 "Acirc", 194,
1162 "Atilde", 195,
1163 "Auml", 196,
1164 "Aring", 197,
1165 "AElig", 198,
1166 "Ccedil", 199,
1167 "Egrave", 200,
1168 "Eacute", 201,
1169 "Ecirc", 202,
1170 "Euml", 203,
1171 "Igrave", 204,
1172 "Iacute", 205,
1173 "Icirc", 206,
1174 "Iuml", 207,
1175 "ETH", 208,
1176 "Ntilde", 209,
1177 "Ograve", 210,
1178 "Oacute", 211,
1179 "Ocirc", 212,
1180 "Otilde", 213,
1181 "Ouml", 214,
1182 "times", 215,
1183 "Oslash", 216,
1184 "Ugrave", 217,
1185 "Uacute", 218,
1186 "Ucirc", 219,
1187 "Uuml", 220,
1188 "Yacute", 221,
1189 "THORN", 222,
1190 "szlig", 223,
1191 "agrave", 224,
1192 "aacute", 225,
1193 "acirc", 226,
1194 "atilde", 227,
1195 "auml", 228,
1196 "aring", 229,
1197 "aelig", 230,
1198 "ccedil", 231,
1199 "egrave", 232,
1200 "eacute", 233,
1201 "ecirc", 234,
1202 "euml", 235,
1203 "igrave", 236,
1204 "iacute", 237,
1205 "icirc", 238,
1206 "iuml", 239,
1207 "eth", 240,
1208 "ntilde", 241,
1209 "ograve", 242,
1210 "oacute", 243,
1211 "ocirc", 244,
1212 "otilde", 245,
1213 "ouml", 246,
1214 "divide", 247,
1215 "oslash", 248,
1216 "ugrave", 249,
1217 "uacute", 250,
1218 "ucirc", 251,
1219 "uuml", 252,
1220 "yacute", 253,
1221 "thorn", 254,
1222 "yuml", 255,
1223
1224 // Symbols and Greek letter entities
1225 "fnof", 402,
1226 "Alpha", 913,
1227 "Beta", 914,
1228 "Gamma", 915,
1229 "Delta", 916,
1230 "Epsilon", 917,
1231 "Zeta", 918,
1232 "Eta", 919,
1233 "Theta", 920,
1234 "Iota", 921,
1235 "Kappa", 922,
1236 "Lambda", 923,
1237 "Mu", 924,
1238 "Nu", 925,
1239 "Xi", 926,
1240 "Omicron", 927,
1241 "Pi", 928,
1242 "Rho", 929,
1243 "Sigma", 931,
1244 "Tau", 932,
1245 "Upsilon", 933,
1246 "Phi", 934,
1247 "Chi", 935,
1248 "Psi", 936,
1249 "Omega", 937,
1250 "alpha", 945,
1251 "beta", 946,
1252 "gamma", 947,
1253 "delta", 948,
1254 "epsilon", 949,
1255 "zeta", 950,
1256 "eta", 951,
1257 "theta", 952,
1258 "iota", 953,
1259 "kappa", 954,
1260 "lambda", 955,
1261 "mu", 956,
1262 "nu", 957,
1263 "xi", 958,
1264 "omicron", 959,
1265 "pi", 960,
1266 "rho", 961,
1267 "sigmaf", 962,
1268 "sigma", 963,
1269 "tau", 964,
1270 "upsilon", 965,
1271 "phi", 966,
1272 "chi", 967,
1273 "psi", 968,
1274 "omega", 969,
1275 "thetasym", 977,
1276 "upsih", 978,
1277 "piv", 982,
1278 "bull", 8226,
1279 "hellip", 8230,
1280 "prime", 8242,
1281 "Prime", 8243,
1282 "oline", 8254,
1283 "frasl", 8260,
1284 "weierp", 8472,
1285 "image", 8465,
1286 "real", 8476,
1287 "trade", 8482,
1288 "alefsym", 8501,
1289 "larr", 8592,
1290 "uarr", 8593,
1291 "rarr", 8594,
1292 "darr", 8595,
1293 "harr", 8596,
1294 "crarr", 8629,
1295 "lArr", 8656,
1296 "uArr", 8657,
1297 "rArr", 8658,
1298 "dArr", 8659,
1299 "hArr", 8660,
1300 "forall", 8704,
1301 "part", 8706,
1302 "exist", 8707,
1303 "empty", 8709,
1304 "nabla", 8711,
1305 "isin", 8712,
1306 "notin", 8713,
1307 "ni", 8715,
1308 "prod", 8719,
1309 "sum", 8721,
1310 "minus", 8722,
1311 "lowast", 8727,
1312 "radic", 8730,
1313 "prop", 8733,
1314 "infin", 8734,
1315 "ang", 8736,
1316 "and", 8743,
1317 "or", 8744,
1318 "cap", 8745,
1319 "cup", 8746,
1320 "int", 8747,
1321 "there4", 8756,
1322 "sim", 8764,
1323 "cong", 8773,
1324 "asymp", 8776,
1325 "ne", 8800,
1326 "equiv", 8801,
1327 "le", 8804,
1328 "ge", 8805,
1329 "sub", 8834,
1330 "sup", 8835,
1331 "nsub", 8836,
1332 "sube", 8838,
1333 "supe", 8839,
1334 "oplus", 8853,
1335 "otimes", 8855,
1336 "perp", 8869,
1337 "sdot", 8901,
1338 "lceil", 8968,
1339 "rceil", 8969,
1340 "lfloor", 8970,
1341 "rfloor", 8971,
1342 "lang", 9001,
1343 "rang", 9002,
1344 "loz", 9674,
1345 "spades", 9824,
1346 "clubs", 9827,
1347 "hearts", 9829,
1348 "diams", 9830,
1349 };
1350
1351 int HtmlNamedEntity(unsigned char *p, int length)
1352 {
1353 int i;
1354
1355 // BUG: this is a dumb, slow linear search
1356 for (i = 0; i < sizeof(names) / sizeof(names[0]); i++)
1357 {
1358 // Entries are case sensitive
1359 if (memcmp(names[i].name, (char *)p, length) == 0 &&
1360 !names[i].name[length])
1361 return names[i].value;
1362 }
1363 return -1;
1364 }
1365
1366 #endif