Mercurial > projects > ldc
annotate dmd/dchar.c @ 1138:4c8bb03e4fbc
Update DtoConstFP() to be correct after LLVM r67562, which changed the way the
APFloat constructor expects its i80 APInts to be formatted. (They're now
actually consistent with the x87 format)
author | Frits van Bommel <fvbommel wxs.nl> |
---|---|
date | Tue, 24 Mar 2009 15:24:59 +0100 |
parents | b30fe7e1dbb9 |
children |
rev | line source |
---|---|
1 | 1 |
2 // Copyright (c) 1999-2006 by Digital Mars | |
3 // All Rights Reserved | |
4 // written by Walter Bright | |
5 // www.digitalmars.com | |
6 // License for redistribution is by either the Artistic License | |
7 // in artistic.txt, or the GNU General Public License in gnu.txt. | |
8 // See the included readme.txt for details. | |
9 | |
10 | |
11 #include <stdio.h> | |
12 #include <stdlib.h> | |
13 #include <stdint.h> | |
14 #include <assert.h> | |
15 | |
16 #include "dchar.h" | |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1
diff
changeset
|
17 #include "rmem.h" |
1 | 18 |
19 #if M_UNICODE | |
20 | |
21 // Converts a char string to Unicode | |
22 | |
23 dchar *Dchar::dup(char *p) | |
24 { | |
25 dchar *s; | |
26 size_t len; | |
27 | |
28 if (!p) | |
29 return NULL; | |
30 len = strlen(p); | |
31 s = (dchar *)mem.malloc((len + 1) * sizeof(dchar)); | |
32 for (unsigned i = 0; i < len; i++) | |
33 { | |
34 s[i] = (dchar)(p[i] & 0xFF); | |
35 } | |
36 s[len] = 0; | |
37 return s; | |
38 } | |
39 | |
40 dchar *Dchar::memchr(dchar *p, int c, int count) | |
41 { | |
42 int u; | |
43 | |
44 for (u = 0; u < count; u++) | |
45 { | |
46 if (p[u] == c) | |
47 return p + u; | |
48 } | |
49 return NULL; | |
50 } | |
51 | |
52 #if _WIN32 && __DMC__ | |
53 __declspec(naked) | |
54 unsigned Dchar::calcHash(const dchar *str, unsigned len) | |
55 { | |
56 __asm | |
57 { | |
58 mov ECX,4[ESP] | |
59 mov EDX,8[ESP] | |
60 xor EAX,EAX | |
61 test EDX,EDX | |
62 je L92 | |
63 | |
64 LC8: cmp EDX,1 | |
65 je L98 | |
66 cmp EDX,2 | |
67 je LAE | |
68 | |
69 add EAX,[ECX] | |
70 // imul EAX,EAX,025h | |
71 lea EAX,[EAX][EAX*8] | |
72 add ECX,4 | |
73 sub EDX,2 | |
74 jmp LC8 | |
75 | |
76 L98: mov DX,[ECX] | |
77 and EDX,0FFFFh | |
78 add EAX,EDX | |
79 ret | |
80 | |
81 LAE: add EAX,[ECX] | |
82 L92: ret | |
83 } | |
84 } | |
85 #else | |
86 hash_t Dchar::calcHash(const dchar *str, size_t len) | |
87 { | |
88 unsigned hash = 0; | |
89 | |
90 for (;;) | |
91 { | |
92 switch (len) | |
93 { | |
94 case 0: | |
95 return hash; | |
96 | |
97 case 1: | |
98 hash += *(const uint16_t *)str; | |
99 return hash; | |
100 | |
101 case 2: | |
102 hash += *(const uint32_t *)str; | |
103 return hash; | |
104 | |
105 default: | |
106 hash += *(const uint32_t *)str; | |
107 hash *= 37; | |
108 str += 2; | |
109 len -= 2; | |
110 break; | |
111 } | |
112 } | |
113 } | |
114 #endif | |
115 | |
116 hash_t Dchar::icalcHash(const dchar *str, size_t len) | |
117 { | |
118 hash_t hash = 0; | |
119 | |
120 for (;;) | |
121 { | |
122 switch (len) | |
123 { | |
124 case 0: | |
125 return hash; | |
126 | |
127 case 1: | |
128 hash += *(const uint16_t *)str | 0x20; | |
129 return hash; | |
130 | |
131 case 2: | |
132 hash += *(const uint32_t *)str | 0x200020; | |
133 return hash; | |
134 | |
135 default: | |
136 hash += *(const uint32_t *)str | 0x200020; | |
137 hash *= 37; | |
138 str += 2; | |
139 len -= 2; | |
140 break; | |
141 } | |
142 } | |
143 } | |
144 | |
145 #elif MCBS | |
146 | |
147 hash_t Dchar::calcHash(const dchar *str, size_t len) | |
148 { | |
149 hash_t hash = 0; | |
150 | |
151 while (1) | |
152 { | |
153 switch (len) | |
154 { | |
155 case 0: | |
156 return hash; | |
157 | |
158 case 1: | |
159 hash *= 37; | |
160 hash += *(const uint8_t *)str; | |
161 return hash; | |
162 | |
163 case 2: | |
164 hash *= 37; | |
165 hash += *(const uint16_t *)str; | |
166 return hash; | |
167 | |
168 case 3: | |
169 hash *= 37; | |
170 hash += (*(const uint16_t *)str << 8) + | |
171 ((const uint8_t *)str)[2]; | |
172 return hash; | |
173 | |
174 default: | |
175 hash *= 37; | |
176 hash += *(const uint32_t *)str; | |
177 str += 4; | |
178 len -= 4; | |
179 break; | |
180 } | |
181 } | |
182 } | |
183 | |
184 #elif UTF8 | |
185 | |
186 // Specification is: http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335 | |
187 | |
188 char Dchar::mblen[256] = | |
189 { | |
190 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
191 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
192 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
193 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
194 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
195 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
196 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
197 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
198 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
199 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
200 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
201 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
202 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
203 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
204 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |
205 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, | |
206 }; | |
207 | |
208 dchar *Dchar::dec(dchar *pstart, dchar *p) | |
209 { | |
210 while ((p[-1] & 0xC0) == 0x80) | |
211 p--; | |
212 return p; | |
213 } | |
214 | |
215 int Dchar::get(dchar *p) | |
216 { | |
217 unsigned c; | |
218 unsigned char *q = (unsigned char *)p; | |
219 | |
220 c = q[0]; | |
221 switch (mblen[c]) | |
222 { | |
223 case 2: | |
224 c = ((c - 0xC0) << 6) | | |
225 (q[1] - 0x80); | |
226 break; | |
227 | |
228 case 3: | |
229 c = ((c - 0xE0) << 12) | | |
230 ((q[1] - 0x80) << 6) | | |
231 (q[2] - 0x80); | |
232 break; | |
233 | |
234 case 4: | |
235 c = ((c - 0xF0) << 18) | | |
236 ((q[1] - 0x80) << 12) | | |
237 ((q[2] - 0x80) << 6) | | |
238 (q[3] - 0x80); | |
239 break; | |
240 | |
241 case 5: | |
242 c = ((c - 0xF8) << 24) | | |
243 ((q[1] - 0x80) << 18) | | |
244 ((q[2] - 0x80) << 12) | | |
245 ((q[3] - 0x80) << 6) | | |
246 (q[4] - 0x80); | |
247 break; | |
248 | |
249 case 6: | |
250 c = ((c - 0xFC) << 30) | | |
251 ((q[1] - 0x80) << 24) | | |
252 ((q[2] - 0x80) << 18) | | |
253 ((q[3] - 0x80) << 12) | | |
254 ((q[4] - 0x80) << 6) | | |
255 (q[5] - 0x80); | |
256 break; | |
257 } | |
258 return c; | |
259 } | |
260 | |
261 dchar *Dchar::put(dchar *p, unsigned c) | |
262 { | |
263 if (c <= 0x7F) | |
264 { | |
265 *p++ = c; | |
266 } | |
267 else if (c <= 0x7FF) | |
268 { | |
269 p[0] = 0xC0 + (c >> 6); | |
270 p[1] = 0x80 + (c & 0x3F); | |
271 p += 2; | |
272 } | |
273 else if (c <= 0xFFFF) | |
274 { | |
275 p[0] = 0xE0 + (c >> 12); | |
276 p[1] = 0x80 + ((c >> 6) & 0x3F); | |
277 p[2] = 0x80 + (c & 0x3F); | |
278 p += 3; | |
279 } | |
280 else if (c <= 0x1FFFFF) | |
281 { | |
282 p[0] = 0xF0 + (c >> 18); | |
283 p[1] = 0x80 + ((c >> 12) & 0x3F); | |
284 p[2] = 0x80 + ((c >> 6) & 0x3F); | |
285 p[3] = 0x80 + (c & 0x3F); | |
286 p += 4; | |
287 } | |
288 else if (c <= 0x3FFFFFF) | |
289 { | |
290 p[0] = 0xF8 + (c >> 24); | |
291 p[1] = 0x80 + ((c >> 18) & 0x3F); | |
292 p[2] = 0x80 + ((c >> 12) & 0x3F); | |
293 p[3] = 0x80 + ((c >> 6) & 0x3F); | |
294 p[4] = 0x80 + (c & 0x3F); | |
295 p += 5; | |
296 } | |
297 else if (c <= 0x7FFFFFFF) | |
298 { | |
299 p[0] = 0xFC + (c >> 30); | |
300 p[1] = 0x80 + ((c >> 24) & 0x3F); | |
301 p[2] = 0x80 + ((c >> 18) & 0x3F); | |
302 p[3] = 0x80 + ((c >> 12) & 0x3F); | |
303 p[4] = 0x80 + ((c >> 6) & 0x3F); | |
304 p[5] = 0x80 + (c & 0x3F); | |
305 p += 6; | |
306 } | |
307 else | |
308 assert(0); // not a UCS-4 character | |
309 return p; | |
310 } | |
311 | |
312 hash_t Dchar::calcHash(const dchar *str, size_t len) | |
313 { | |
314 hash_t hash = 0; | |
315 | |
316 while (1) | |
317 { | |
318 switch (len) | |
319 { | |
320 case 0: | |
321 return hash; | |
322 | |
323 case 1: | |
324 hash *= 37; | |
325 hash += *(const uint8_t *)str; | |
326 return hash; | |
327 | |
328 case 2: | |
329 hash *= 37; | |
330 #if __I86__ | |
331 hash += *(const uint16_t *)str; | |
332 #else | |
333 hash += str[0] * 256 + str[1]; | |
334 #endif | |
335 return hash; | |
336 | |
337 case 3: | |
338 hash *= 37; | |
339 #if __I86__ | |
340 hash += (*(const uint16_t *)str << 8) + | |
341 ((const uint8_t *)str)[2]; | |
342 #else | |
343 hash += (str[0] * 256 + str[1]) * 256 + str[2]; | |
344 #endif | |
345 return hash; | |
346 | |
347 default: | |
348 hash *= 37; | |
349 #if __I86__ | |
350 hash += *(const uint32_t *)str; | |
351 #else | |
352 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3]; | |
353 #endif | |
354 | |
355 str += 4; | |
356 len -= 4; | |
357 break; | |
358 } | |
359 } | |
360 } | |
361 | |
362 #else // ascii | |
363 | |
364 hash_t Dchar::calcHash(const dchar *str, size_t len) | |
365 { | |
366 hash_t hash = 0; | |
367 | |
368 while (1) | |
369 { | |
370 switch (len) | |
371 { | |
372 case 0: | |
373 return hash; | |
374 | |
375 case 1: | |
376 hash *= 37; | |
377 hash += *(const uint8_t *)str; | |
378 return hash; | |
379 | |
380 case 2: | |
381 hash *= 37; | |
382 #if __I86__ | |
383 hash += *(const uint16_t *)str; | |
384 #else | |
385 hash += str[0] * 256 + str[1]; | |
386 #endif | |
387 return hash; | |
388 | |
389 case 3: | |
390 hash *= 37; | |
391 #if __I86__ | |
392 hash += (*(const uint16_t *)str << 8) + | |
393 ((const uint8_t *)str)[2]; | |
394 #else | |
395 hash += (str[0] * 256 + str[1]) * 256 + str[2]; | |
396 #endif | |
397 return hash; | |
398 | |
399 default: | |
400 hash *= 37; | |
401 #if __I86__ | |
402 hash += *(const uint32_t *)str; | |
403 #else | |
404 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3]; | |
405 #endif | |
406 str += 4; | |
407 len -= 4; | |
408 break; | |
409 } | |
410 } | |
411 } | |
412 | |
413 hash_t Dchar::icalcHash(const dchar *str, size_t len) | |
414 { | |
415 hash_t hash = 0; | |
416 | |
417 while (1) | |
418 { | |
419 switch (len) | |
420 { | |
421 case 0: | |
422 return hash; | |
423 | |
424 case 1: | |
425 hash *= 37; | |
426 hash += *(const uint8_t *)str | 0x20; | |
427 return hash; | |
428 | |
429 case 2: | |
430 hash *= 37; | |
431 hash += *(const uint16_t *)str | 0x2020; | |
432 return hash; | |
433 | |
434 case 3: | |
435 hash *= 37; | |
436 hash += ((*(const uint16_t *)str << 8) + | |
437 ((const uint8_t *)str)[2]) | 0x202020; | |
438 return hash; | |
439 | |
440 default: | |
441 hash *= 37; | |
442 hash += *(const uint32_t *)str | 0x20202020; | |
443 str += 4; | |
444 len -= 4; | |
445 break; | |
446 } | |
447 } | |
448 } | |
449 | |
450 #endif | |
451 | |
452 #if 0 | |
453 #include <stdio.h> | |
454 | |
455 void main() | |
456 { | |
457 // Print out values to hardcode into Dchar::mblen[] | |
458 int c; | |
459 int s; | |
460 | |
461 for (c = 0; c < 256; c++) | |
462 { | |
463 s = 1; | |
464 if (c >= 0xC0 && c <= 0xDF) | |
465 s = 2; | |
466 if (c >= 0xE0 && c <= 0xEF) | |
467 s = 3; | |
468 if (c >= 0xF0 && c <= 0xF7) | |
469 s = 4; | |
470 if (c >= 0xF8 && c <= 0xFB) | |
471 s = 5; | |
472 if (c >= 0xFC && c <= 0xFD) | |
473 s = 6; | |
474 | |
475 printf("%d", s); | |
476 if ((c & 15) == 15) | |
477 printf(",\n"); | |
478 else | |
479 printf(","); | |
480 } | |
481 } | |
482 #endif |