1
|
1
|
|
2 // Copyright (c) 1999-2006 by Digital Mars
|
|
3 // All Rights Reserved
|
|
4 // written by Walter Bright
|
|
5 // www.digitalmars.com
|
|
6 // License for redistribution is by either the Artistic License
|
|
7 // in artistic.txt, or the GNU General Public License in gnu.txt.
|
|
8 // See the included readme.txt for details.
|
|
9
|
|
10
|
|
11 #include <stdio.h>
|
|
12 #include <stdlib.h>
|
|
13 #include <stdint.h>
|
|
14 #include <assert.h>
|
|
15
|
|
16 #include "dchar.h"
|
|
17 #include "mem.h"
|
|
18
|
|
19 #if M_UNICODE
|
|
20
|
|
21 // Converts a char string to Unicode
|
|
22
|
|
23 dchar *Dchar::dup(char *p)
|
|
24 {
|
|
25 dchar *s;
|
|
26 size_t len;
|
|
27
|
|
28 if (!p)
|
|
29 return NULL;
|
|
30 len = strlen(p);
|
|
31 s = (dchar *)mem.malloc((len + 1) * sizeof(dchar));
|
|
32 for (unsigned i = 0; i < len; i++)
|
|
33 {
|
|
34 s[i] = (dchar)(p[i] & 0xFF);
|
|
35 }
|
|
36 s[len] = 0;
|
|
37 return s;
|
|
38 }
|
|
39
|
|
40 dchar *Dchar::memchr(dchar *p, int c, int count)
|
|
41 {
|
|
42 int u;
|
|
43
|
|
44 for (u = 0; u < count; u++)
|
|
45 {
|
|
46 if (p[u] == c)
|
|
47 return p + u;
|
|
48 }
|
|
49 return NULL;
|
|
50 }
|
|
51
|
|
52 #if _WIN32 && __DMC__
|
|
53 __declspec(naked)
|
|
54 unsigned Dchar::calcHash(const dchar *str, unsigned len)
|
|
55 {
|
|
56 __asm
|
|
57 {
|
|
58 mov ECX,4[ESP]
|
|
59 mov EDX,8[ESP]
|
|
60 xor EAX,EAX
|
|
61 test EDX,EDX
|
|
62 je L92
|
|
63
|
|
64 LC8: cmp EDX,1
|
|
65 je L98
|
|
66 cmp EDX,2
|
|
67 je LAE
|
|
68
|
|
69 add EAX,[ECX]
|
|
70 // imul EAX,EAX,025h
|
|
71 lea EAX,[EAX][EAX*8]
|
|
72 add ECX,4
|
|
73 sub EDX,2
|
|
74 jmp LC8
|
|
75
|
|
76 L98: mov DX,[ECX]
|
|
77 and EDX,0FFFFh
|
|
78 add EAX,EDX
|
|
79 ret
|
|
80
|
|
81 LAE: add EAX,[ECX]
|
|
82 L92: ret
|
|
83 }
|
|
84 }
|
|
85 #else
|
|
86 hash_t Dchar::calcHash(const dchar *str, size_t len)
|
|
87 {
|
|
88 unsigned hash = 0;
|
|
89
|
|
90 for (;;)
|
|
91 {
|
|
92 switch (len)
|
|
93 {
|
|
94 case 0:
|
|
95 return hash;
|
|
96
|
|
97 case 1:
|
|
98 hash += *(const uint16_t *)str;
|
|
99 return hash;
|
|
100
|
|
101 case 2:
|
|
102 hash += *(const uint32_t *)str;
|
|
103 return hash;
|
|
104
|
|
105 default:
|
|
106 hash += *(const uint32_t *)str;
|
|
107 hash *= 37;
|
|
108 str += 2;
|
|
109 len -= 2;
|
|
110 break;
|
|
111 }
|
|
112 }
|
|
113 }
|
|
114 #endif
|
|
115
|
|
116 hash_t Dchar::icalcHash(const dchar *str, size_t len)
|
|
117 {
|
|
118 hash_t hash = 0;
|
|
119
|
|
120 for (;;)
|
|
121 {
|
|
122 switch (len)
|
|
123 {
|
|
124 case 0:
|
|
125 return hash;
|
|
126
|
|
127 case 1:
|
|
128 hash += *(const uint16_t *)str | 0x20;
|
|
129 return hash;
|
|
130
|
|
131 case 2:
|
|
132 hash += *(const uint32_t *)str | 0x200020;
|
|
133 return hash;
|
|
134
|
|
135 default:
|
|
136 hash += *(const uint32_t *)str | 0x200020;
|
|
137 hash *= 37;
|
|
138 str += 2;
|
|
139 len -= 2;
|
|
140 break;
|
|
141 }
|
|
142 }
|
|
143 }
|
|
144
|
|
145 #elif MCBS
|
|
146
|
|
147 hash_t Dchar::calcHash(const dchar *str, size_t len)
|
|
148 {
|
|
149 hash_t hash = 0;
|
|
150
|
|
151 while (1)
|
|
152 {
|
|
153 switch (len)
|
|
154 {
|
|
155 case 0:
|
|
156 return hash;
|
|
157
|
|
158 case 1:
|
|
159 hash *= 37;
|
|
160 hash += *(const uint8_t *)str;
|
|
161 return hash;
|
|
162
|
|
163 case 2:
|
|
164 hash *= 37;
|
|
165 hash += *(const uint16_t *)str;
|
|
166 return hash;
|
|
167
|
|
168 case 3:
|
|
169 hash *= 37;
|
|
170 hash += (*(const uint16_t *)str << 8) +
|
|
171 ((const uint8_t *)str)[2];
|
|
172 return hash;
|
|
173
|
|
174 default:
|
|
175 hash *= 37;
|
|
176 hash += *(const uint32_t *)str;
|
|
177 str += 4;
|
|
178 len -= 4;
|
|
179 break;
|
|
180 }
|
|
181 }
|
|
182 }
|
|
183
|
|
184 #elif UTF8
|
|
185
|
|
186 // Specification is: http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335
|
|
187
|
|
188 char Dchar::mblen[256] =
|
|
189 {
|
|
190 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
191 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
192 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
193 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
194 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
195 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
196 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
197 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
198 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
199 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
200 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
201 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
202 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
203 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
204 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
|
205 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
|
|
206 };
|
|
207
|
|
208 dchar *Dchar::dec(dchar *pstart, dchar *p)
|
|
209 {
|
|
210 while ((p[-1] & 0xC0) == 0x80)
|
|
211 p--;
|
|
212 return p;
|
|
213 }
|
|
214
|
|
215 int Dchar::get(dchar *p)
|
|
216 {
|
|
217 unsigned c;
|
|
218 unsigned char *q = (unsigned char *)p;
|
|
219
|
|
220 c = q[0];
|
|
221 switch (mblen[c])
|
|
222 {
|
|
223 case 2:
|
|
224 c = ((c - 0xC0) << 6) |
|
|
225 (q[1] - 0x80);
|
|
226 break;
|
|
227
|
|
228 case 3:
|
|
229 c = ((c - 0xE0) << 12) |
|
|
230 ((q[1] - 0x80) << 6) |
|
|
231 (q[2] - 0x80);
|
|
232 break;
|
|
233
|
|
234 case 4:
|
|
235 c = ((c - 0xF0) << 18) |
|
|
236 ((q[1] - 0x80) << 12) |
|
|
237 ((q[2] - 0x80) << 6) |
|
|
238 (q[3] - 0x80);
|
|
239 break;
|
|
240
|
|
241 case 5:
|
|
242 c = ((c - 0xF8) << 24) |
|
|
243 ((q[1] - 0x80) << 18) |
|
|
244 ((q[2] - 0x80) << 12) |
|
|
245 ((q[3] - 0x80) << 6) |
|
|
246 (q[4] - 0x80);
|
|
247 break;
|
|
248
|
|
249 case 6:
|
|
250 c = ((c - 0xFC) << 30) |
|
|
251 ((q[1] - 0x80) << 24) |
|
|
252 ((q[2] - 0x80) << 18) |
|
|
253 ((q[3] - 0x80) << 12) |
|
|
254 ((q[4] - 0x80) << 6) |
|
|
255 (q[5] - 0x80);
|
|
256 break;
|
|
257 }
|
|
258 return c;
|
|
259 }
|
|
260
|
|
261 dchar *Dchar::put(dchar *p, unsigned c)
|
|
262 {
|
|
263 if (c <= 0x7F)
|
|
264 {
|
|
265 *p++ = c;
|
|
266 }
|
|
267 else if (c <= 0x7FF)
|
|
268 {
|
|
269 p[0] = 0xC0 + (c >> 6);
|
|
270 p[1] = 0x80 + (c & 0x3F);
|
|
271 p += 2;
|
|
272 }
|
|
273 else if (c <= 0xFFFF)
|
|
274 {
|
|
275 p[0] = 0xE0 + (c >> 12);
|
|
276 p[1] = 0x80 + ((c >> 6) & 0x3F);
|
|
277 p[2] = 0x80 + (c & 0x3F);
|
|
278 p += 3;
|
|
279 }
|
|
280 else if (c <= 0x1FFFFF)
|
|
281 {
|
|
282 p[0] = 0xF0 + (c >> 18);
|
|
283 p[1] = 0x80 + ((c >> 12) & 0x3F);
|
|
284 p[2] = 0x80 + ((c >> 6) & 0x3F);
|
|
285 p[3] = 0x80 + (c & 0x3F);
|
|
286 p += 4;
|
|
287 }
|
|
288 else if (c <= 0x3FFFFFF)
|
|
289 {
|
|
290 p[0] = 0xF8 + (c >> 24);
|
|
291 p[1] = 0x80 + ((c >> 18) & 0x3F);
|
|
292 p[2] = 0x80 + ((c >> 12) & 0x3F);
|
|
293 p[3] = 0x80 + ((c >> 6) & 0x3F);
|
|
294 p[4] = 0x80 + (c & 0x3F);
|
|
295 p += 5;
|
|
296 }
|
|
297 else if (c <= 0x7FFFFFFF)
|
|
298 {
|
|
299 p[0] = 0xFC + (c >> 30);
|
|
300 p[1] = 0x80 + ((c >> 24) & 0x3F);
|
|
301 p[2] = 0x80 + ((c >> 18) & 0x3F);
|
|
302 p[3] = 0x80 + ((c >> 12) & 0x3F);
|
|
303 p[4] = 0x80 + ((c >> 6) & 0x3F);
|
|
304 p[5] = 0x80 + (c & 0x3F);
|
|
305 p += 6;
|
|
306 }
|
|
307 else
|
|
308 assert(0); // not a UCS-4 character
|
|
309 return p;
|
|
310 }
|
|
311
|
|
312 hash_t Dchar::calcHash(const dchar *str, size_t len)
|
|
313 {
|
|
314 hash_t hash = 0;
|
|
315
|
|
316 while (1)
|
|
317 {
|
|
318 switch (len)
|
|
319 {
|
|
320 case 0:
|
|
321 return hash;
|
|
322
|
|
323 case 1:
|
|
324 hash *= 37;
|
|
325 hash += *(const uint8_t *)str;
|
|
326 return hash;
|
|
327
|
|
328 case 2:
|
|
329 hash *= 37;
|
|
330 #if __I86__
|
|
331 hash += *(const uint16_t *)str;
|
|
332 #else
|
|
333 hash += str[0] * 256 + str[1];
|
|
334 #endif
|
|
335 return hash;
|
|
336
|
|
337 case 3:
|
|
338 hash *= 37;
|
|
339 #if __I86__
|
|
340 hash += (*(const uint16_t *)str << 8) +
|
|
341 ((const uint8_t *)str)[2];
|
|
342 #else
|
|
343 hash += (str[0] * 256 + str[1]) * 256 + str[2];
|
|
344 #endif
|
|
345 return hash;
|
|
346
|
|
347 default:
|
|
348 hash *= 37;
|
|
349 #if __I86__
|
|
350 hash += *(const uint32_t *)str;
|
|
351 #else
|
|
352 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3];
|
|
353 #endif
|
|
354
|
|
355 str += 4;
|
|
356 len -= 4;
|
|
357 break;
|
|
358 }
|
|
359 }
|
|
360 }
|
|
361
|
|
362 #else // ascii
|
|
363
|
|
364 hash_t Dchar::calcHash(const dchar *str, size_t len)
|
|
365 {
|
|
366 hash_t hash = 0;
|
|
367
|
|
368 while (1)
|
|
369 {
|
|
370 switch (len)
|
|
371 {
|
|
372 case 0:
|
|
373 return hash;
|
|
374
|
|
375 case 1:
|
|
376 hash *= 37;
|
|
377 hash += *(const uint8_t *)str;
|
|
378 return hash;
|
|
379
|
|
380 case 2:
|
|
381 hash *= 37;
|
|
382 #if __I86__
|
|
383 hash += *(const uint16_t *)str;
|
|
384 #else
|
|
385 hash += str[0] * 256 + str[1];
|
|
386 #endif
|
|
387 return hash;
|
|
388
|
|
389 case 3:
|
|
390 hash *= 37;
|
|
391 #if __I86__
|
|
392 hash += (*(const uint16_t *)str << 8) +
|
|
393 ((const uint8_t *)str)[2];
|
|
394 #else
|
|
395 hash += (str[0] * 256 + str[1]) * 256 + str[2];
|
|
396 #endif
|
|
397 return hash;
|
|
398
|
|
399 default:
|
|
400 hash *= 37;
|
|
401 #if __I86__
|
|
402 hash += *(const uint32_t *)str;
|
|
403 #else
|
|
404 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3];
|
|
405 #endif
|
|
406 str += 4;
|
|
407 len -= 4;
|
|
408 break;
|
|
409 }
|
|
410 }
|
|
411 }
|
|
412
|
|
413 hash_t Dchar::icalcHash(const dchar *str, size_t len)
|
|
414 {
|
|
415 hash_t hash = 0;
|
|
416
|
|
417 while (1)
|
|
418 {
|
|
419 switch (len)
|
|
420 {
|
|
421 case 0:
|
|
422 return hash;
|
|
423
|
|
424 case 1:
|
|
425 hash *= 37;
|
|
426 hash += *(const uint8_t *)str | 0x20;
|
|
427 return hash;
|
|
428
|
|
429 case 2:
|
|
430 hash *= 37;
|
|
431 hash += *(const uint16_t *)str | 0x2020;
|
|
432 return hash;
|
|
433
|
|
434 case 3:
|
|
435 hash *= 37;
|
|
436 hash += ((*(const uint16_t *)str << 8) +
|
|
437 ((const uint8_t *)str)[2]) | 0x202020;
|
|
438 return hash;
|
|
439
|
|
440 default:
|
|
441 hash *= 37;
|
|
442 hash += *(const uint32_t *)str | 0x20202020;
|
|
443 str += 4;
|
|
444 len -= 4;
|
|
445 break;
|
|
446 }
|
|
447 }
|
|
448 }
|
|
449
|
|
450 #endif
|
|
451
|
|
452 #if 0
|
|
453 #include <stdio.h>
|
|
454
|
|
455 void main()
|
|
456 {
|
|
457 // Print out values to hardcode into Dchar::mblen[]
|
|
458 int c;
|
|
459 int s;
|
|
460
|
|
461 for (c = 0; c < 256; c++)
|
|
462 {
|
|
463 s = 1;
|
|
464 if (c >= 0xC0 && c <= 0xDF)
|
|
465 s = 2;
|
|
466 if (c >= 0xE0 && c <= 0xEF)
|
|
467 s = 3;
|
|
468 if (c >= 0xF0 && c <= 0xF7)
|
|
469 s = 4;
|
|
470 if (c >= 0xF8 && c <= 0xFB)
|
|
471 s = 5;
|
|
472 if (c >= 0xFC && c <= 0xFD)
|
|
473 s = 6;
|
|
474
|
|
475 printf("%d", s);
|
|
476 if ((c & 15) == 15)
|
|
477 printf(",\n");
|
|
478 else
|
|
479 printf(",");
|
|
480 }
|
|
481 }
|
|
482 #endif
|