Mercurial > projects > ldc
annotate dmd/dchar.c @ 1150:2a687353c84d
Added missing new files.
author | Tomas Lindquist Olsen <tomas.l.olsen gmail.com> |
---|---|
date | Fri, 27 Mar 2009 23:24:47 +0100 |
parents | b30fe7e1dbb9 |
children |
rev | line source |
---|---|
1 | 1 |
2 // Copyright (c) 1999-2006 by Digital Mars | |
3 // All Rights Reserved | |
4 // written by Walter Bright | |
5 // www.digitalmars.com | |
6 // License for redistribution is by either the Artistic License | |
7 // in artistic.txt, or the GNU General Public License in gnu.txt. | |
8 // See the included readme.txt for details. | |
9 | |
10 | |
11 #include <stdio.h> | |
12 #include <stdlib.h> | |
13 #include <stdint.h> | |
14 #include <assert.h> | |
15 | |
16 #include "dchar.h" | |
1103
b30fe7e1dbb9
- Updated to DMD frontend 1.041.
Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
parents:
1
diff
changeset
|
17 #include "rmem.h" |
1 | 18 |
19 #if M_UNICODE | |
20 | |
21 // Converts a char string to Unicode | |
22 | |
23 dchar *Dchar::dup(char *p) | |
24 { | |
25 dchar *s; | |
26 size_t len; | |
27 | |
28 if (!p) | |
29 return NULL; | |
30 len = strlen(p); | |
31 s = (dchar *)mem.malloc((len + 1) * sizeof(dchar)); | |
32 for (unsigned i = 0; i < len; i++) | |
33 { | |
34 s[i] = (dchar)(p[i] & 0xFF); | |
35 } | |
36 s[len] = 0; | |
37 return s; | |
38 } | |
39 | |
40 dchar *Dchar::memchr(dchar *p, int c, int count) | |
41 { | |
42 int u; | |
43 | |
44 for (u = 0; u < count; u++) | |
45 { | |
46 if (p[u] == c) | |
47 return p + u; | |
48 } | |
49 return NULL; | |
50 } | |
51 | |
52 #if _WIN32 && __DMC__ | |
53 __declspec(naked) | |
54 unsigned Dchar::calcHash(const dchar *str, unsigned len) | |
55 { | |
56 __asm | |
57 { | |
58 mov ECX,4[ESP] | |
59 mov EDX,8[ESP] | |
60 xor EAX,EAX | |
61 test EDX,EDX | |
62 je L92 | |
63 | |
64 LC8: cmp EDX,1 | |
65 je L98 | |
66 cmp EDX,2 | |
67 je LAE | |
68 | |
69 add EAX,[ECX] | |
70 // imul EAX,EAX,025h | |
71 lea EAX,[EAX][EAX*8] | |
72 add ECX,4 | |
73 sub EDX,2 | |
74 jmp LC8 | |
75 | |
76 L98: mov DX,[ECX] | |
77 and EDX,0FFFFh | |
78 add EAX,EDX | |
79 ret | |
80 | |
81 LAE: add EAX,[ECX] | |
82 L92: ret | |
83 } | |
84 } | |
85 #else | |
86 hash_t Dchar::calcHash(const dchar *str, size_t len) | |
87 { | |
88 unsigned hash = 0; | |
89 | |
90 for (;;) | |
91 { | |
92 switch (len) | |
93 { | |
94 case 0: | |
95 return hash; | |
96 | |
97 case 1: | |
98 hash += *(const uint16_t *)str; | |
99 return hash; | |
100 | |
101 case 2: | |
102 hash += *(const uint32_t *)str; | |
103 return hash; | |
104 | |
105 default: | |
106 hash += *(const uint32_t *)str; | |
107 hash *= 37; | |
108 str += 2; | |
109 len -= 2; | |
110 break; | |
111 } | |
112 } | |
113 } | |
114 #endif | |
115 | |
116 hash_t Dchar::icalcHash(const dchar *str, size_t len) | |
117 { | |
118 hash_t hash = 0; | |
119 | |
120 for (;;) | |
121 { | |
122 switch (len) | |
123 { | |
124 case 0: | |
125 return hash; | |
126 | |
127 case 1: | |
128 hash += *(const uint16_t *)str | 0x20; | |
129 return hash; | |
130 | |
131 case 2: | |
132 hash += *(const uint32_t *)str | 0x200020; | |
133 return hash; | |
134 | |
135 default: | |
136 hash += *(const uint32_t *)str | 0x200020; | |
137 hash *= 37; | |
138 str += 2; | |
139 len -= 2; | |
140 break; | |
141 } | |
142 } | |
143 } | |
144 | |
145 #elif MCBS | |
146 | |
147 hash_t Dchar::calcHash(const dchar *str, size_t len) | |
148 { | |
149 hash_t hash = 0; | |
150 | |
151 while (1) | |
152 { | |
153 switch (len) | |
154 { | |
155 case 0: | |
156 return hash; | |
157 | |
158 case 1: | |
159 hash *= 37; | |
160 hash += *(const uint8_t *)str; | |
161 return hash; | |
162 | |
163 case 2: | |
164 hash *= 37; | |
165 hash += *(const uint16_t *)str; | |
166 return hash; | |
167 | |
168 case 3: | |
169 hash *= 37; | |
170 hash += (*(const uint16_t *)str << 8) + | |
171 ((const uint8_t *)str)[2]; | |
172 return hash; | |
173 | |
174 default: | |
175 hash *= 37; | |
176 hash += *(const uint32_t *)str; | |
177 str += 4; | |
178 len -= 4; | |
179 break; | |
180 } | |
181 } | |
182 } | |
183 | |
184 #elif UTF8 | |
185 | |
186 // Specification is: http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335 | |
187 | |
188 char Dchar::mblen[256] = | |
189 { | |
190 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
191 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
192 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
193 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
194 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
195 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
196 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
197 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
198 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
199 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
200 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
201 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
202 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
203 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
204 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |
205 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, | |
206 }; | |
207 | |
208 dchar *Dchar::dec(dchar *pstart, dchar *p) | |
209 { | |
210 while ((p[-1] & 0xC0) == 0x80) | |
211 p--; | |
212 return p; | |
213 } | |
214 | |
215 int Dchar::get(dchar *p) | |
216 { | |
217 unsigned c; | |
218 unsigned char *q = (unsigned char *)p; | |
219 | |
220 c = q[0]; | |
221 switch (mblen[c]) | |
222 { | |
223 case 2: | |
224 c = ((c - 0xC0) << 6) | | |
225 (q[1] - 0x80); | |
226 break; | |
227 | |
228 case 3: | |
229 c = ((c - 0xE0) << 12) | | |
230 ((q[1] - 0x80) << 6) | | |
231 (q[2] - 0x80); | |
232 break; | |
233 | |
234 case 4: | |
235 c = ((c - 0xF0) << 18) | | |
236 ((q[1] - 0x80) << 12) | | |
237 ((q[2] - 0x80) << 6) | | |
238 (q[3] - 0x80); | |
239 break; | |
240 | |
241 case 5: | |
242 c = ((c - 0xF8) << 24) | | |
243 ((q[1] - 0x80) << 18) | | |
244 ((q[2] - 0x80) << 12) | | |
245 ((q[3] - 0x80) << 6) | | |
246 (q[4] - 0x80); | |
247 break; | |
248 | |
249 case 6: | |
250 c = ((c - 0xFC) << 30) | | |
251 ((q[1] - 0x80) << 24) | | |
252 ((q[2] - 0x80) << 18) | | |
253 ((q[3] - 0x80) << 12) | | |
254 ((q[4] - 0x80) << 6) | | |
255 (q[5] - 0x80); | |
256 break; | |
257 } | |
258 return c; | |
259 } | |
260 | |
261 dchar *Dchar::put(dchar *p, unsigned c) | |
262 { | |
263 if (c <= 0x7F) | |
264 { | |
265 *p++ = c; | |
266 } | |
267 else if (c <= 0x7FF) | |
268 { | |
269 p[0] = 0xC0 + (c >> 6); | |
270 p[1] = 0x80 + (c & 0x3F); | |
271 p += 2; | |
272 } | |
273 else if (c <= 0xFFFF) | |
274 { | |
275 p[0] = 0xE0 + (c >> 12); | |
276 p[1] = 0x80 + ((c >> 6) & 0x3F); | |
277 p[2] = 0x80 + (c & 0x3F); | |
278 p += 3; | |
279 } | |
280 else if (c <= 0x1FFFFF) | |
281 { | |
282 p[0] = 0xF0 + (c >> 18); | |
283 p[1] = 0x80 + ((c >> 12) & 0x3F); | |
284 p[2] = 0x80 + ((c >> 6) & 0x3F); | |
285 p[3] = 0x80 + (c & 0x3F); | |
286 p += 4; | |
287 } | |
288 else if (c <= 0x3FFFFFF) | |
289 { | |
290 p[0] = 0xF8 + (c >> 24); | |
291 p[1] = 0x80 + ((c >> 18) & 0x3F); | |
292 p[2] = 0x80 + ((c >> 12) & 0x3F); | |
293 p[3] = 0x80 + ((c >> 6) & 0x3F); | |
294 p[4] = 0x80 + (c & 0x3F); | |
295 p += 5; | |
296 } | |
297 else if (c <= 0x7FFFFFFF) | |
298 { | |
299 p[0] = 0xFC + (c >> 30); | |
300 p[1] = 0x80 + ((c >> 24) & 0x3F); | |
301 p[2] = 0x80 + ((c >> 18) & 0x3F); | |
302 p[3] = 0x80 + ((c >> 12) & 0x3F); | |
303 p[4] = 0x80 + ((c >> 6) & 0x3F); | |
304 p[5] = 0x80 + (c & 0x3F); | |
305 p += 6; | |
306 } | |
307 else | |
308 assert(0); // not a UCS-4 character | |
309 return p; | |
310 } | |
311 | |
312 hash_t Dchar::calcHash(const dchar *str, size_t len) | |
313 { | |
314 hash_t hash = 0; | |
315 | |
316 while (1) | |
317 { | |
318 switch (len) | |
319 { | |
320 case 0: | |
321 return hash; | |
322 | |
323 case 1: | |
324 hash *= 37; | |
325 hash += *(const uint8_t *)str; | |
326 return hash; | |
327 | |
328 case 2: | |
329 hash *= 37; | |
330 #if __I86__ | |
331 hash += *(const uint16_t *)str; | |
332 #else | |
333 hash += str[0] * 256 + str[1]; | |
334 #endif | |
335 return hash; | |
336 | |
337 case 3: | |
338 hash *= 37; | |
339 #if __I86__ | |
340 hash += (*(const uint16_t *)str << 8) + | |
341 ((const uint8_t *)str)[2]; | |
342 #else | |
343 hash += (str[0] * 256 + str[1]) * 256 + str[2]; | |
344 #endif | |
345 return hash; | |
346 | |
347 default: | |
348 hash *= 37; | |
349 #if __I86__ | |
350 hash += *(const uint32_t *)str; | |
351 #else | |
352 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3]; | |
353 #endif | |
354 | |
355 str += 4; | |
356 len -= 4; | |
357 break; | |
358 } | |
359 } | |
360 } | |
361 | |
362 #else // ascii | |
363 | |
364 hash_t Dchar::calcHash(const dchar *str, size_t len) | |
365 { | |
366 hash_t hash = 0; | |
367 | |
368 while (1) | |
369 { | |
370 switch (len) | |
371 { | |
372 case 0: | |
373 return hash; | |
374 | |
375 case 1: | |
376 hash *= 37; | |
377 hash += *(const uint8_t *)str; | |
378 return hash; | |
379 | |
380 case 2: | |
381 hash *= 37; | |
382 #if __I86__ | |
383 hash += *(const uint16_t *)str; | |
384 #else | |
385 hash += str[0] * 256 + str[1]; | |
386 #endif | |
387 return hash; | |
388 | |
389 case 3: | |
390 hash *= 37; | |
391 #if __I86__ | |
392 hash += (*(const uint16_t *)str << 8) + | |
393 ((const uint8_t *)str)[2]; | |
394 #else | |
395 hash += (str[0] * 256 + str[1]) * 256 + str[2]; | |
396 #endif | |
397 return hash; | |
398 | |
399 default: | |
400 hash *= 37; | |
401 #if __I86__ | |
402 hash += *(const uint32_t *)str; | |
403 #else | |
404 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3]; | |
405 #endif | |
406 str += 4; | |
407 len -= 4; | |
408 break; | |
409 } | |
410 } | |
411 } | |
412 | |
413 hash_t Dchar::icalcHash(const dchar *str, size_t len) | |
414 { | |
415 hash_t hash = 0; | |
416 | |
417 while (1) | |
418 { | |
419 switch (len) | |
420 { | |
421 case 0: | |
422 return hash; | |
423 | |
424 case 1: | |
425 hash *= 37; | |
426 hash += *(const uint8_t *)str | 0x20; | |
427 return hash; | |
428 | |
429 case 2: | |
430 hash *= 37; | |
431 hash += *(const uint16_t *)str | 0x2020; | |
432 return hash; | |
433 | |
434 case 3: | |
435 hash *= 37; | |
436 hash += ((*(const uint16_t *)str << 8) + | |
437 ((const uint8_t *)str)[2]) | 0x202020; | |
438 return hash; | |
439 | |
440 default: | |
441 hash *= 37; | |
442 hash += *(const uint32_t *)str | 0x20202020; | |
443 str += 4; | |
444 len -= 4; | |
445 break; | |
446 } | |
447 } | |
448 } | |
449 | |
450 #endif | |
451 | |
452 #if 0 | |
453 #include <stdio.h> | |
454 | |
455 void main() | |
456 { | |
457 // Print out values to hardcode into Dchar::mblen[] | |
458 int c; | |
459 int s; | |
460 | |
461 for (c = 0; c < 256; c++) | |
462 { | |
463 s = 1; | |
464 if (c >= 0xC0 && c <= 0xDF) | |
465 s = 2; | |
466 if (c >= 0xE0 && c <= 0xEF) | |
467 s = 3; | |
468 if (c >= 0xF0 && c <= 0xF7) | |
469 s = 4; | |
470 if (c >= 0xF8 && c <= 0xFB) | |
471 s = 5; | |
472 if (c >= 0xFC && c <= 0xFD) | |
473 s = 6; | |
474 | |
475 printf("%d", s); | |
476 if ((c & 15) == 15) | |
477 printf(",\n"); | |
478 else | |
479 printf(","); | |
480 } | |
481 } | |
482 #endif |