Mercurial > projects > dil
comparison trunk/src/dil/HtmlEntities.d @ 609:0c10255d8009
Wrote custom look-up tables for HTML entities.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sun, 06 Jan 2008 21:06:20 +0100 |
parents | f203c5248d0b |
children | 3b34f6a95a27 |
comparison
equal
deleted
inserted
replaced
608:fac9e8b258fc | 609:0c10255d8009 |
---|---|
2 Author: Aziz Köksal | 2 Author: Aziz Köksal |
3 License: GPL3 | 3 License: GPL3 |
4 +/ | 4 +/ |
5 module dil.HtmlEntities; | 5 module dil.HtmlEntities; |
6 | 6 |
7 private const dchar[char[]] entities_table; | 7 import common; |
8 | 8 |
9 static this() | 9 struct Entity |
10 { | 10 { |
11 entities_table = [ | 11 char[] name; |
12 "Aacute"[] : '\u00C1', | 12 uint value; |
13 "aacute" : '\u00E1', | 13 } |
14 "Acirc" : '\u00C2', | 14 |
15 "acirc" : '\u00E2', | 15 static const Entity[] namedEntities = [ |
16 "acute" : '\u00B4', | 16 {"Aacute", '\u00C1'}, |
17 "AElig" : '\u00C6', | 17 {"aacute", '\u00E1'}, |
18 "aelig" : '\u00E6', | 18 {"Acirc", '\u00C2'}, |
19 "Agrave" : '\u00C0', | 19 {"acirc", '\u00E2'}, |
20 "agrave" : '\u00E0', | 20 {"acute", '\u00B4'}, |
21 "alefsym" : '\u2135', | 21 {"AElig", '\u00C6'}, |
22 "Alpha" : '\u0391', | 22 {"aelig", '\u00E6'}, |
23 "alpha" : '\u03B1', | 23 {"Agrave", '\u00C0'}, |
24 "amp" : '\u0026', | 24 {"agrave", '\u00E0'}, |
25 "and" : '\u2227', | 25 {"alefsym", '\u2135'}, |
26 "ang" : '\u2220', | 26 {"Alpha", '\u0391'}, |
27 "Aring" : '\u00C5', | 27 {"alpha", '\u03B1'}, |
28 "aring" : '\u00E5', | 28 {"amp", '\u0026'}, |
29 "asymp" : '\u2248', | 29 {"and", '\u2227'}, |
30 "Atilde" : '\u00C3', | 30 {"ang", '\u2220'}, |
31 "atilde" : '\u00E3', | 31 {"Aring", '\u00C5'}, |
32 "Auml" : '\u00C4', | 32 {"aring", '\u00E5'}, |
33 "auml" : '\u00E4', | 33 {"asymp", '\u2248'}, |
34 "bdquo" : '\u201E', | 34 {"Atilde", '\u00C3'}, |
35 "Beta" : '\u0392', | 35 {"atilde", '\u00E3'}, |
36 "beta" : '\u03B2', | 36 {"Auml", '\u00C4'}, |
37 "brvbar" : '\u00A6', | 37 {"auml", '\u00E4'}, |
38 "bull" : '\u2022', | 38 {"bdquo", '\u201E'}, |
39 "cap" : '\u2229', | 39 {"Beta", '\u0392'}, |
40 "Ccedil" : '\u00C7', | 40 {"beta", '\u03B2'}, |
41 "ccedil" : '\u00E7', | 41 {"brvbar", '\u00A6'}, |
42 "cedil" : '\u00B8', | 42 {"bull", '\u2022'}, |
43 "cent" : '\u00A2', | 43 {"cap", '\u2229'}, |
44 "Chi" : '\u03A7', | 44 {"Ccedil", '\u00C7'}, |
45 "chi" : '\u03C7', | 45 {"ccedil", '\u00E7'}, |
46 "circ" : '\u02C6', | 46 {"cedil", '\u00B8'}, |
47 "clubs" : '\u2663', | 47 {"cent", '\u00A2'}, |
48 "cong" : '\u2245', | 48 {"Chi", '\u03A7'}, |
49 "copy" : '\u00A9', | 49 {"chi", '\u03C7'}, |
50 "crarr" : '\u21B5', | 50 {"circ", '\u02C6'}, |
51 "cup" : '\u222A', | 51 {"clubs", '\u2663'}, |
52 "curren" : '\u00A4', | 52 {"cong", '\u2245'}, |
53 "Dagger" : '\u2021', | 53 {"copy", '\u00A9'}, |
54 "dagger" : '\u2020', | 54 {"crarr", '\u21B5'}, |
55 "dArr" : '\u21D3', | 55 {"cup", '\u222A'}, |
56 "darr" : '\u2193', | 56 {"curren", '\u00A4'}, |
57 "deg" : '\u00B0', | 57 {"Dagger", '\u2021'}, |
58 "Delta" : '\u0394', | 58 {"dagger", '\u2020'}, |
59 "delta" : '\u03B4', | 59 {"dArr", '\u21D3'}, |
60 "diams" : '\u2666', | 60 {"darr", '\u2193'}, |
61 "divide" : '\u00F7', | 61 {"deg", '\u00B0'}, |
62 "Eacute" : '\u00C9', | 62 {"Delta", '\u0394'}, |
63 "eacute" : '\u00E9', | 63 {"delta", '\u03B4'}, |
64 "Ecirc" : '\u00CA', | 64 {"diams", '\u2666'}, |
65 "ecirc" : '\u00EA', | 65 {"divide", '\u00F7'}, |
66 "Egrave" : '\u00C8', | 66 {"Eacute", '\u00C9'}, |
67 "egrave" : '\u00E8', | 67 {"eacute", '\u00E9'}, |
68 "empty" : '\u2205', | 68 {"Ecirc", '\u00CA'}, |
69 "emsp" : '\u2003', | 69 {"ecirc", '\u00EA'}, |
70 "ensp" : '\u2002', | 70 {"Egrave", '\u00C8'}, |
71 "Epsilon" : '\u0395', | 71 {"egrave", '\u00E8'}, |
72 "epsilon" : '\u03B5', | 72 {"empty", '\u2205'}, |
73 "equiv" : '\u2261', | 73 {"emsp", '\u2003'}, |
74 "Eta" : '\u0397', | 74 {"ensp", '\u2002'}, |
75 "eta" : '\u03B7', | 75 {"Epsilon", '\u0395'}, |
76 "ETH" : '\u00D0', | 76 {"epsilon", '\u03B5'}, |
77 "eth" : '\u00F0', | 77 {"equiv", '\u2261'}, |
78 "Euml" : '\u00CB', | 78 {"Eta", '\u0397'}, |
79 "euml" : '\u00EB', | 79 {"eta", '\u03B7'}, |
80 "euro" : '\u20AC', | 80 {"ETH", '\u00D0'}, |
81 "exist" : '\u2203', | 81 {"eth", '\u00F0'}, |
82 "fnof" : '\u0192', | 82 {"Euml", '\u00CB'}, |
83 "forall" : '\u2200', | 83 {"euml", '\u00EB'}, |
84 "frac12" : '\u00BD', | 84 {"euro", '\u20AC'}, |
85 "frac14" : '\u00BC', | 85 {"exist", '\u2203'}, |
86 "frac34" : '\u00BE', | 86 {"fnof", '\u0192'}, |
87 "frasl" : '\u2044', | 87 {"forall", '\u2200'}, |
88 "Gamma" : '\u0393', | 88 {"frac12", '\u00BD'}, |
89 "gamma" : '\u03B3', | 89 {"frac14", '\u00BC'}, |
90 "ge" : '\u2265', | 90 {"frac34", '\u00BE'}, |
91 "gt" : '\u003E', | 91 {"frasl", '\u2044'}, |
92 "hArr" : '\u21D4', | 92 {"Gamma", '\u0393'}, |
93 "harr" : '\u2194', | 93 {"gamma", '\u03B3'}, |
94 "hearts" : '\u2665', | 94 {"ge", '\u2265'}, |
95 "hellip" : '\u2026', | 95 {"gt", '\u003E'}, |
96 "Iacute" : '\u00CD', | 96 {"hArr", '\u21D4'}, |
97 "iacute" : '\u00ED', | 97 {"harr", '\u2194'}, |
98 "Icirc" : '\u00CE', | 98 {"hearts", '\u2665'}, |
99 "icirc" : '\u00EE', | 99 {"hellip", '\u2026'}, |
100 "iexcl" : '\u00A1', | 100 {"Iacute", '\u00CD'}, |
101 "Igrave" : '\u00CC', | 101 {"iacute", '\u00ED'}, |
102 "igrave" : '\u00EC', | 102 {"Icirc", '\u00CE'}, |
103 "image" : '\u2111', | 103 {"icirc", '\u00EE'}, |
104 "infin" : '\u221E', | 104 {"iexcl", '\u00A1'}, |
105 "int" : '\u222B', | 105 {"Igrave", '\u00CC'}, |
106 "Iota" : '\u0399', | 106 {"igrave", '\u00EC'}, |
107 "iota" : '\u03B9', | 107 {"image", '\u2111'}, |
108 "iquest" : '\u00BF', | 108 {"infin", '\u221E'}, |
109 "isin" : '\u2208', | 109 {"int", '\u222B'}, |
110 "Iuml" : '\u00CF', | 110 {"Iota", '\u0399'}, |
111 "iuml" : '\u00EF', | 111 {"iota", '\u03B9'}, |
112 "Kappa" : '\u039A', | 112 {"iquest", '\u00BF'}, |
113 "kappa" : '\u03BA', | 113 {"isin", '\u2208'}, |
114 "Lambda" : '\u039B', | 114 {"Iuml", '\u00CF'}, |
115 "lambda" : '\u03BB', | 115 {"iuml", '\u00EF'}, |
116 "lang" : '\u2329', | 116 {"Kappa", '\u039A'}, |
117 "laquo" : '\u00AB', | 117 {"kappa", '\u03BA'}, |
118 "lArr" : '\u21D0', | 118 {"Lambda", '\u039B'}, |
119 "larr" : '\u2190', | 119 {"lambda", '\u03BB'}, |
120 "lceil" : '\u2308', | 120 {"lang", '\u2329'}, |
121 "ldquo" : '\u201C', | 121 {"laquo", '\u00AB'}, |
122 "le" : '\u2264', | 122 {"lArr", '\u21D0'}, |
123 "lfloor" : '\u230A', | 123 {"larr", '\u2190'}, |
124 "lowast" : '\u2217', | 124 {"lceil", '\u2308'}, |
125 "loz" : '\u25CA', | 125 {"ldquo", '\u201C'}, |
126 "lrm" : '\u200E', | 126 {"le", '\u2264'}, |
127 "lsaquo" : '\u2039', | 127 {"lfloor", '\u230A'}, |
128 "lsquo" : '\u2018', | 128 {"lowast", '\u2217'}, |
129 "lt" : '\u003C', | 129 {"loz", '\u25CA'}, |
130 "macr" : '\u00AF', | 130 {"lrm", '\u200E'}, |
131 "mdash" : '\u2014', | 131 {"lsaquo", '\u2039'}, |
132 "micro" : '\u00B5', | 132 {"lsquo", '\u2018'}, |
133 "middot" : '\u00B7', | 133 {"lt", '\u003C'}, |
134 "minus" : '\u2212', | 134 {"macr", '\u00AF'}, |
135 "Mu" : '\u039C', | 135 {"mdash", '\u2014'}, |
136 "mu" : '\u03BC', | 136 {"micro", '\u00B5'}, |
137 "nabla" : '\u2207', | 137 {"middot", '\u00B7'}, |
138 "nbsp" : '\u00A0', | 138 {"minus", '\u2212'}, |
139 "ndash" : '\u2013', | 139 {"Mu", '\u039C'}, |
140 "ne" : '\u2260', | 140 {"mu", '\u03BC'}, |
141 "ni" : '\u220B', | 141 {"nabla", '\u2207'}, |
142 "not" : '\u00AC', | 142 {"nbsp", '\u00A0'}, |
143 "notin" : '\u2209', | 143 {"ndash", '\u2013'}, |
144 "nsub" : '\u2284', | 144 {"ne", '\u2260'}, |
145 "Ntilde" : '\u00D1', | 145 {"ni", '\u220B'}, |
146 "ntilde" : '\u00F1', | 146 {"not", '\u00AC'}, |
147 "Nu" : '\u039D', | 147 {"notin", '\u2209'}, |
148 "nu" : '\u03BD', | 148 {"nsub", '\u2284'}, |
149 "Oacute" : '\u00D3', | 149 {"Ntilde", '\u00D1'}, |
150 "oacute" : '\u00F3', | 150 {"ntilde", '\u00F1'}, |
151 "Ocirc" : '\u00D4', | 151 {"Nu", '\u039D'}, |
152 "ocirc" : '\u00F4', | 152 {"nu", '\u03BD'}, |
153 "OElig" : '\u0152', | 153 {"Oacute", '\u00D3'}, |
154 "oelig" : '\u0153', | 154 {"oacute", '\u00F3'}, |
155 "Ograve" : '\u00D2', | 155 {"Ocirc", '\u00D4'}, |
156 "ograve" : '\u00F2', | 156 {"ocirc", '\u00F4'}, |
157 "oline" : '\u203E', | 157 {"OElig", '\u0152'}, |
158 "Omega" : '\u03A9', | 158 {"oelig", '\u0153'}, |
159 "omega" : '\u03C9', | 159 {"Ograve", '\u00D2'}, |
160 "Omicron" : '\u039F', | 160 {"ograve", '\u00F2'}, |
161 "omicron" : '\u03BF', | 161 {"oline", '\u203E'}, |
162 "oplus" : '\u2295', | 162 {"Omega", '\u03A9'}, |
163 "or" : '\u2228', | 163 {"omega", '\u03C9'}, |
164 "ordf" : '\u00AA', | 164 {"Omicron", '\u039F'}, |
165 "ordm" : '\u00BA', | 165 {"omicron", '\u03BF'}, |
166 "Oslash" : '\u00D8', | 166 {"oplus", '\u2295'}, |
167 "oslash" : '\u00F8', | 167 {"or", '\u2228'}, |
168 "Otilde" : '\u00D5', | 168 {"ordf", '\u00AA'}, |
169 "otilde" : '\u00F5', | 169 {"ordm", '\u00BA'}, |
170 "otimes" : '\u2297', | 170 {"Oslash", '\u00D8'}, |
171 "Ouml" : '\u00D6', | 171 {"oslash", '\u00F8'}, |
172 "ouml" : '\u00F6', | 172 {"Otilde", '\u00D5'}, |
173 "para" : '\u00B6', | 173 {"otilde", '\u00F5'}, |
174 "part" : '\u2202', | 174 {"otimes", '\u2297'}, |
175 "permil" : '\u2030', | 175 {"Ouml", '\u00D6'}, |
176 "perp" : '\u22A5', | 176 {"ouml", '\u00F6'}, |
177 "Phi" : '\u03A6', | 177 {"para", '\u00B6'}, |
178 "phi" : '\u03C6', | 178 {"part", '\u2202'}, |
179 "Pi" : '\u03A0', | 179 {"permil", '\u2030'}, |
180 "pi" : '\u03C0', | 180 {"perp", '\u22A5'}, |
181 "piv" : '\u03D6', | 181 {"Phi", '\u03A6'}, |
182 "plusmn" : '\u00B1', | 182 {"phi", '\u03C6'}, |
183 "pound" : '\u00A3', | 183 {"Pi", '\u03A0'}, |
184 "Prime" : '\u2033', | 184 {"pi", '\u03C0'}, |
185 "prime" : '\u2032', | 185 {"piv", '\u03D6'}, |
186 "prod" : '\u220F', | 186 {"plusmn", '\u00B1'}, |
187 "prop" : '\u221D', | 187 {"pound", '\u00A3'}, |
188 "Psi" : '\u03A8', | 188 {"Prime", '\u2033'}, |
189 "psi" : '\u03C8', | 189 {"prime", '\u2032'}, |
190 "quot" : '\u0022', | 190 {"prod", '\u220F'}, |
191 "radic" : '\u221A', | 191 {"prop", '\u221D'}, |
192 "rang" : '\u232A', | 192 {"Psi", '\u03A8'}, |
193 "raquo" : '\u00BB', | 193 {"psi", '\u03C8'}, |
194 "rArr" : '\u21D2', | 194 {"quot", '\u0022'}, |
195 "rarr" : '\u2192', | 195 {"radic", '\u221A'}, |
196 "rceil" : '\u2309', | 196 {"rang", '\u232A'}, |
197 "rdquo" : '\u201D', | 197 {"raquo", '\u00BB'}, |
198 "real" : '\u211C', | 198 {"rArr", '\u21D2'}, |
199 "reg" : '\u00AE', | 199 {"rarr", '\u2192'}, |
200 "rfloor" : '\u230B', | 200 {"rceil", '\u2309'}, |
201 "Rho" : '\u03A1', | 201 {"rdquo", '\u201D'}, |
202 "rho" : '\u03C1', | 202 {"real", '\u211C'}, |
203 "rlm" : '\u200F', | 203 {"reg", '\u00AE'}, |
204 "rsaquo" : '\u203A', | 204 {"rfloor", '\u230B'}, |
205 "rsquo" : '\u2019', | 205 {"Rho", '\u03A1'}, |
206 "sbquo" : '\u201A', | 206 {"rho", '\u03C1'}, |
207 "Scaron" : '\u0160', | 207 {"rlm", '\u200F'}, |
208 "scaron" : '\u0161', | 208 {"rsaquo", '\u203A'}, |
209 "sdot" : '\u22C5', | 209 {"rsquo", '\u2019'}, |
210 "sect" : '\u00A7', | 210 {"sbquo", '\u201A'}, |
211 "shy" : '\u00AD', | 211 {"Scaron", '\u0160'}, |
212 "Sigma" : '\u03A3', | 212 {"scaron", '\u0161'}, |
213 "sigma" : '\u03C3', | 213 {"sdot", '\u22C5'}, |
214 "sigmaf" : '\u03C2', | 214 {"sect", '\u00A7'}, |
215 "sim" : '\u223C', | 215 {"shy", '\u00AD'}, |
216 "spades" : '\u2660', | 216 {"Sigma", '\u03A3'}, |
217 "sub" : '\u2282', | 217 {"sigma", '\u03C3'}, |
218 "sube" : '\u2286', | 218 {"sigmaf", '\u03C2'}, |
219 "sum" : '\u2211', | 219 {"sim", '\u223C'}, |
220 "sup" : '\u2283', | 220 {"spades", '\u2660'}, |
221 "sup1" : '\u00B9', | 221 {"sub", '\u2282'}, |
222 "sup2" : '\u00B2', | 222 {"sube", '\u2286'}, |
223 "sup3" : '\u00B3', | 223 {"sum", '\u2211'}, |
224 "supe" : '\u2287', | 224 {"sup", '\u2283'}, |
225 "szlig" : '\u00DF', | 225 {"sup1", '\u00B9'}, |
226 "Tau" : '\u03A4', | 226 {"sup2", '\u00B2'}, |
227 "tau" : '\u03C4', | 227 {"sup3", '\u00B3'}, |
228 "there4" : '\u2234', | 228 {"supe", '\u2287'}, |
229 "Theta" : '\u0398', | 229 {"szlig", '\u00DF'}, |
230 "theta" : '\u03B8', | 230 {"Tau", '\u03A4'}, |
231 "thetasym" : '\u03D1', | 231 {"tau", '\u03C4'}, |
232 "thinsp" : '\u2009', | 232 {"there4", '\u2234'}, |
233 "THORN" : '\u00DE', | 233 {"Theta", '\u0398'}, |
234 "thorn" : '\u00FE', | 234 {"theta", '\u03B8'}, |
235 "tilde" : '\u02DC', | 235 {"thetasym", '\u03D1'}, |
236 "times" : '\u00D7', | 236 {"thinsp", '\u2009'}, |
237 "trade" : '\u2122', | 237 {"THORN", '\u00DE'}, |
238 "Uacute" : '\u00DA', | 238 {"thorn", '\u00FE'}, |
239 "uacute" : '\u00FA', | 239 {"tilde", '\u02DC'}, |
240 "uArr" : '\u21D1', | 240 {"times", '\u00D7'}, |
241 "uarr" : '\u2191', | 241 {"trade", '\u2122'}, |
242 "Ucirc" : '\u00DB', | 242 {"Uacute", '\u00DA'}, |
243 "ucirc" : '\u00FB', | 243 {"uacute", '\u00FA'}, |
244 "Ugrave" : '\u00D9', | 244 {"uArr", '\u21D1'}, |
245 "ugrave" : '\u00F9', | 245 {"uarr", '\u2191'}, |
246 "uml" : '\u00A8', | 246 {"Ucirc", '\u00DB'}, |
247 "upsih" : '\u03D2', | 247 {"ucirc", '\u00FB'}, |
248 "Upsilon" : '\u03A5', | 248 {"Ugrave", '\u00D9'}, |
249 "upsilon" : '\u03C5', | 249 {"ugrave", '\u00F9'}, |
250 "Uuml" : '\u00DC', | 250 {"uml", '\u00A8'}, |
251 "uuml" : '\u00FC', | 251 {"upsih", '\u03D2'}, |
252 "weierp" : '\u2118', | 252 {"Upsilon", '\u03A5'}, |
253 "Xi" : '\u039E', | 253 {"upsilon", '\u03C5'}, |
254 "xi" : '\u03BE', | 254 {"Uuml", '\u00DC'}, |
255 "Yacute" : '\u00DD', | 255 {"uuml", '\u00FC'}, |
256 "yacute" : '\u00FD', | 256 {"weierp", '\u2118'}, |
257 "yen" : '\u00A5', | 257 {"Xi", '\u039E'}, |
258 "Yuml" : '\u0178', | 258 {"xi", '\u03BE'}, |
259 "yuml" : '\u00FF', | 259 {"Yacute", '\u00DD'}, |
260 "Zeta" : '\u0396', | 260 {"yacute", '\u00FD'}, |
261 "zeta" : '\u03B6', | 261 {"yen", '\u00A5'}, |
262 "zwj" : '\u200D', | 262 {"Yuml", '\u0178'}, |
263 "zwnj" : '\u200C' | 263 {"yuml", '\u00FF'}, |
264 ]; | 264 {"Zeta", '\u0396'}, |
265 } | 265 {"zeta", '\u03B6'}, |
266 {"zwj", '\u200D'}, | |
267 {"zwnj", '\u200C'} | |
268 ]; | |
269 | |
270 uint stringToHash(char[] str) | |
271 { | |
272 uint hash; | |
273 foreach(c; str) { | |
274 hash *= 11; | |
275 hash += c; | |
276 } | |
277 return hash; | |
278 } | |
279 | |
280 char[] toString(uint x) | |
281 { | |
282 char[] str; | |
283 do | |
284 str = cast(char)('0' + (x % 10)) ~ str; | |
285 while (x /= 10) | |
286 return str; | |
287 } | |
288 | |
289 char[] generateHashAndValueArrays() | |
290 { | |
291 uint[] hashes; // String hashes. | |
292 uint[] values; // Unicode codepoints. | |
293 // Build arrays: | |
294 foreach (entity; namedEntities) | |
295 { | |
296 auto hash = stringToHash(entity.name); | |
297 auto value = entity.value; | |
298 assert(hash != 0); | |
299 // Find insertion place. | |
300 uint i; | |
301 for (; i < hashes.length; ++i) | |
302 { | |
303 assert(hash != hashes[i], "bad hash function: conflicting hashes"); | |
304 if (hash < hashes[i]) | |
305 break; | |
306 } | |
307 // Insert hash and value into tables. | |
308 if (i == hashes.length) | |
309 { | |
310 hashes ~= hash; | |
311 values ~= value; | |
312 } | |
313 else | |
314 { | |
315 hashes = hashes[0..i] ~ hash ~ hashes[i..$]; // Insert before index. | |
316 values = values[0..i] ~ value ~ values[i..$]; // Insert before index. | |
317 } | |
318 assert(hashes[i] == hash && values[i] == value); | |
319 } | |
320 // Build source text: | |
321 char[] hashesText = "private static const uint[] hashes = [", | |
322 valuesText = "private static const dchar[] values = ["; | |
323 foreach (i, hash; hashes) | |
324 { | |
325 hashesText ~= toString(hash) ~ ","; | |
326 valuesText ~= toString(values[i]) ~ ","; | |
327 } | |
328 hashesText ~= "];"; | |
329 valuesText ~= "];"; | |
330 return hashesText ~"\n"~ valuesText; | |
331 } | |
332 | |
333 // Mixin: | |
334 // private static const uint[] hashes; | |
335 // private static const dchar[] values; | |
336 mixin(generateHashAndValueArrays); | |
337 // pragma(msg, generateHashAndValueArrays()); | |
266 | 338 |
267 /++ | 339 /++ |
268 Converts a named HTML entity into its equivalent Unicode codepoint. | 340 Converts a named HTML entity into its equivalent Unicode codepoint. |
269 Returns 0xFFFF if entity doesn't exist. | 341 Returns 0xFFFF if entity doesn't exist. |
270 +/ | 342 +/ |
271 dchar entity2Unicode(char[] entity) | 343 dchar entity2Unicode(char[] entity) |
272 { | 344 { |
273 auto d = entity in entities_table; | 345 auto hash = stringToHash(entity); |
274 if (d) | 346 // Binary search: |
275 return *d; | 347 size_t lower = void, index = void, upper = void; |
276 return 0xFFFF; | 348 lower = 0; |
277 } | 349 upper = hashes.length -1; |
350 while (lower <= upper) | |
351 { | |
352 index = (lower + upper) / 2; | |
353 if (hash < hashes[index]) | |
354 upper = index - 1; | |
355 else if (hash > hashes[index]) | |
356 lower = index + 1; | |
357 else | |
358 return values[index]; // Return the Unicode codepoint. | |
359 } | |
360 return 0xFFFF; // Return error value. | |
361 } | |
362 | |
363 unittest | |
364 { | |
365 Stdout("Testing entity2Unicode().").newline; | |
366 alias entity2Unicode f; | |
367 foreach (entity; namedEntities) | |
368 assert(f(entity.name) == entity.value, | |
369 Format("'&{};' == \\u{:X4}, not \\u{:X4}", entity.name, entity.value, cast(uint)f(entity.name)) | |
370 ); | |
371 } |