comparison dmd/root/dchar.c @ 1194:1853dcd9b944

Moved some DMDFE files into a seperate dmd/root subdir to closer match the DMD file structure since 1.041.
author Tomas Lindquist Olsen <tomas.l.olsen gmail.com>
date Fri, 03 Apr 2009 17:02:52 +0200
parents dmd/dchar.c@b30fe7e1dbb9
children
comparison
equal deleted inserted replaced
1193:c271eca933fb 1194:1853dcd9b944
1
2 // Copyright (c) 1999-2006 by Digital Mars
3 // All Rights Reserved
4 // written by Walter Bright
5 // www.digitalmars.com
6 // License for redistribution is by either the Artistic License
7 // in artistic.txt, or the GNU General Public License in gnu.txt.
8 // See the included readme.txt for details.
9
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <stdint.h>
14 #include <assert.h>
15
16 #include "dchar.h"
17 #include "rmem.h"
18
19 #if M_UNICODE
20
21 // Converts a char string to Unicode
22
23 dchar *Dchar::dup(char *p)
24 {
25 dchar *s;
26 size_t len;
27
28 if (!p)
29 return NULL;
30 len = strlen(p);
31 s = (dchar *)mem.malloc((len + 1) * sizeof(dchar));
32 for (unsigned i = 0; i < len; i++)
33 {
34 s[i] = (dchar)(p[i] & 0xFF);
35 }
36 s[len] = 0;
37 return s;
38 }
39
40 dchar *Dchar::memchr(dchar *p, int c, int count)
41 {
42 int u;
43
44 for (u = 0; u < count; u++)
45 {
46 if (p[u] == c)
47 return p + u;
48 }
49 return NULL;
50 }
51
52 #if _WIN32 && __DMC__
53 __declspec(naked)
54 unsigned Dchar::calcHash(const dchar *str, unsigned len)
55 {
56 __asm
57 {
58 mov ECX,4[ESP]
59 mov EDX,8[ESP]
60 xor EAX,EAX
61 test EDX,EDX
62 je L92
63
64 LC8: cmp EDX,1
65 je L98
66 cmp EDX,2
67 je LAE
68
69 add EAX,[ECX]
70 // imul EAX,EAX,025h
71 lea EAX,[EAX][EAX*8]
72 add ECX,4
73 sub EDX,2
74 jmp LC8
75
76 L98: mov DX,[ECX]
77 and EDX,0FFFFh
78 add EAX,EDX
79 ret
80
81 LAE: add EAX,[ECX]
82 L92: ret
83 }
84 }
85 #else
86 hash_t Dchar::calcHash(const dchar *str, size_t len)
87 {
88 unsigned hash = 0;
89
90 for (;;)
91 {
92 switch (len)
93 {
94 case 0:
95 return hash;
96
97 case 1:
98 hash += *(const uint16_t *)str;
99 return hash;
100
101 case 2:
102 hash += *(const uint32_t *)str;
103 return hash;
104
105 default:
106 hash += *(const uint32_t *)str;
107 hash *= 37;
108 str += 2;
109 len -= 2;
110 break;
111 }
112 }
113 }
114 #endif
115
116 hash_t Dchar::icalcHash(const dchar *str, size_t len)
117 {
118 hash_t hash = 0;
119
120 for (;;)
121 {
122 switch (len)
123 {
124 case 0:
125 return hash;
126
127 case 1:
128 hash += *(const uint16_t *)str | 0x20;
129 return hash;
130
131 case 2:
132 hash += *(const uint32_t *)str | 0x200020;
133 return hash;
134
135 default:
136 hash += *(const uint32_t *)str | 0x200020;
137 hash *= 37;
138 str += 2;
139 len -= 2;
140 break;
141 }
142 }
143 }
144
145 #elif MCBS
146
147 hash_t Dchar::calcHash(const dchar *str, size_t len)
148 {
149 hash_t hash = 0;
150
151 while (1)
152 {
153 switch (len)
154 {
155 case 0:
156 return hash;
157
158 case 1:
159 hash *= 37;
160 hash += *(const uint8_t *)str;
161 return hash;
162
163 case 2:
164 hash *= 37;
165 hash += *(const uint16_t *)str;
166 return hash;
167
168 case 3:
169 hash *= 37;
170 hash += (*(const uint16_t *)str << 8) +
171 ((const uint8_t *)str)[2];
172 return hash;
173
174 default:
175 hash *= 37;
176 hash += *(const uint32_t *)str;
177 str += 4;
178 len -= 4;
179 break;
180 }
181 }
182 }
183
184 #elif UTF8
185
186 // Specification is: http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335
187
188 char Dchar::mblen[256] =
189 {
190 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
191 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
192 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
193 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
194 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
195 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
196 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
197 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
198 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
199 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
200 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
201 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
202 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
203 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
204 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
205 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
206 };
207
208 dchar *Dchar::dec(dchar *pstart, dchar *p)
209 {
210 while ((p[-1] & 0xC0) == 0x80)
211 p--;
212 return p;
213 }
214
215 int Dchar::get(dchar *p)
216 {
217 unsigned c;
218 unsigned char *q = (unsigned char *)p;
219
220 c = q[0];
221 switch (mblen[c])
222 {
223 case 2:
224 c = ((c - 0xC0) << 6) |
225 (q[1] - 0x80);
226 break;
227
228 case 3:
229 c = ((c - 0xE0) << 12) |
230 ((q[1] - 0x80) << 6) |
231 (q[2] - 0x80);
232 break;
233
234 case 4:
235 c = ((c - 0xF0) << 18) |
236 ((q[1] - 0x80) << 12) |
237 ((q[2] - 0x80) << 6) |
238 (q[3] - 0x80);
239 break;
240
241 case 5:
242 c = ((c - 0xF8) << 24) |
243 ((q[1] - 0x80) << 18) |
244 ((q[2] - 0x80) << 12) |
245 ((q[3] - 0x80) << 6) |
246 (q[4] - 0x80);
247 break;
248
249 case 6:
250 c = ((c - 0xFC) << 30) |
251 ((q[1] - 0x80) << 24) |
252 ((q[2] - 0x80) << 18) |
253 ((q[3] - 0x80) << 12) |
254 ((q[4] - 0x80) << 6) |
255 (q[5] - 0x80);
256 break;
257 }
258 return c;
259 }
260
261 dchar *Dchar::put(dchar *p, unsigned c)
262 {
263 if (c <= 0x7F)
264 {
265 *p++ = c;
266 }
267 else if (c <= 0x7FF)
268 {
269 p[0] = 0xC0 + (c >> 6);
270 p[1] = 0x80 + (c & 0x3F);
271 p += 2;
272 }
273 else if (c <= 0xFFFF)
274 {
275 p[0] = 0xE0 + (c >> 12);
276 p[1] = 0x80 + ((c >> 6) & 0x3F);
277 p[2] = 0x80 + (c & 0x3F);
278 p += 3;
279 }
280 else if (c <= 0x1FFFFF)
281 {
282 p[0] = 0xF0 + (c >> 18);
283 p[1] = 0x80 + ((c >> 12) & 0x3F);
284 p[2] = 0x80 + ((c >> 6) & 0x3F);
285 p[3] = 0x80 + (c & 0x3F);
286 p += 4;
287 }
288 else if (c <= 0x3FFFFFF)
289 {
290 p[0] = 0xF8 + (c >> 24);
291 p[1] = 0x80 + ((c >> 18) & 0x3F);
292 p[2] = 0x80 + ((c >> 12) & 0x3F);
293 p[3] = 0x80 + ((c >> 6) & 0x3F);
294 p[4] = 0x80 + (c & 0x3F);
295 p += 5;
296 }
297 else if (c <= 0x7FFFFFFF)
298 {
299 p[0] = 0xFC + (c >> 30);
300 p[1] = 0x80 + ((c >> 24) & 0x3F);
301 p[2] = 0x80 + ((c >> 18) & 0x3F);
302 p[3] = 0x80 + ((c >> 12) & 0x3F);
303 p[4] = 0x80 + ((c >> 6) & 0x3F);
304 p[5] = 0x80 + (c & 0x3F);
305 p += 6;
306 }
307 else
308 assert(0); // not a UCS-4 character
309 return p;
310 }
311
312 hash_t Dchar::calcHash(const dchar *str, size_t len)
313 {
314 hash_t hash = 0;
315
316 while (1)
317 {
318 switch (len)
319 {
320 case 0:
321 return hash;
322
323 case 1:
324 hash *= 37;
325 hash += *(const uint8_t *)str;
326 return hash;
327
328 case 2:
329 hash *= 37;
330 #if __I86__
331 hash += *(const uint16_t *)str;
332 #else
333 hash += str[0] * 256 + str[1];
334 #endif
335 return hash;
336
337 case 3:
338 hash *= 37;
339 #if __I86__
340 hash += (*(const uint16_t *)str << 8) +
341 ((const uint8_t *)str)[2];
342 #else
343 hash += (str[0] * 256 + str[1]) * 256 + str[2];
344 #endif
345 return hash;
346
347 default:
348 hash *= 37;
349 #if __I86__
350 hash += *(const uint32_t *)str;
351 #else
352 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3];
353 #endif
354
355 str += 4;
356 len -= 4;
357 break;
358 }
359 }
360 }
361
362 #else // ascii
363
364 hash_t Dchar::calcHash(const dchar *str, size_t len)
365 {
366 hash_t hash = 0;
367
368 while (1)
369 {
370 switch (len)
371 {
372 case 0:
373 return hash;
374
375 case 1:
376 hash *= 37;
377 hash += *(const uint8_t *)str;
378 return hash;
379
380 case 2:
381 hash *= 37;
382 #if __I86__
383 hash += *(const uint16_t *)str;
384 #else
385 hash += str[0] * 256 + str[1];
386 #endif
387 return hash;
388
389 case 3:
390 hash *= 37;
391 #if __I86__
392 hash += (*(const uint16_t *)str << 8) +
393 ((const uint8_t *)str)[2];
394 #else
395 hash += (str[0] * 256 + str[1]) * 256 + str[2];
396 #endif
397 return hash;
398
399 default:
400 hash *= 37;
401 #if __I86__
402 hash += *(const uint32_t *)str;
403 #else
404 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3];
405 #endif
406 str += 4;
407 len -= 4;
408 break;
409 }
410 }
411 }
412
413 hash_t Dchar::icalcHash(const dchar *str, size_t len)
414 {
415 hash_t hash = 0;
416
417 while (1)
418 {
419 switch (len)
420 {
421 case 0:
422 return hash;
423
424 case 1:
425 hash *= 37;
426 hash += *(const uint8_t *)str | 0x20;
427 return hash;
428
429 case 2:
430 hash *= 37;
431 hash += *(const uint16_t *)str | 0x2020;
432 return hash;
433
434 case 3:
435 hash *= 37;
436 hash += ((*(const uint16_t *)str << 8) +
437 ((const uint8_t *)str)[2]) | 0x202020;
438 return hash;
439
440 default:
441 hash *= 37;
442 hash += *(const uint32_t *)str | 0x20202020;
443 str += 4;
444 len -= 4;
445 break;
446 }
447 }
448 }
449
450 #endif
451
452 #if 0
453 #include <stdio.h>
454
455 void main()
456 {
457 // Print out values to hardcode into Dchar::mblen[]
458 int c;
459 int s;
460
461 for (c = 0; c < 256; c++)
462 {
463 s = 1;
464 if (c >= 0xC0 && c <= 0xDF)
465 s = 2;
466 if (c >= 0xE0 && c <= 0xEF)
467 s = 3;
468 if (c >= 0xF0 && c <= 0xF7)
469 s = 4;
470 if (c >= 0xF8 && c <= 0xFB)
471 s = 5;
472 if (c >= 0xFC && c <= 0xFD)
473 s = 6;
474
475 printf("%d", s);
476 if ((c & 15) == 15)
477 printf(",\n");
478 else
479 printf(",");
480 }
481 }
482 #endif