Mercurial > projects > ldc
diff dmd2/dchar.c @ 758:f04dde6e882c
Added initial D2 support, D2 frontend and changes to codegen to make things compile.
author | Tomas Lindquist Olsen <tomas.l.olsen@gmail.com> |
---|---|
date | Tue, 11 Nov 2008 01:38:48 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dmd2/dchar.c Tue Nov 11 01:38:48 2008 +0100 @@ -0,0 +1,482 @@ + +// Copyright (c) 1999-2006 by Digital Mars +// All Rights Reserved +// written by Walter Bright +// www.digitalmars.com +// License for redistribution is by either the Artistic License +// in artistic.txt, or the GNU General Public License in gnu.txt. +// See the included readme.txt for details. + + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <assert.h> + +#include "dchar.h" +#include "mem.h" + +#if M_UNICODE + +// Converts a char string to Unicode + +dchar *Dchar::dup(char *p) +{ + dchar *s; + size_t len; + + if (!p) + return NULL; + len = strlen(p); + s = (dchar *)mem.malloc((len + 1) * sizeof(dchar)); + for (unsigned i = 0; i < len; i++) + { + s[i] = (dchar)(p[i] & 0xFF); + } + s[len] = 0; + return s; +} + +dchar *Dchar::memchr(dchar *p, int c, int count) +{ + int u; + + for (u = 0; u < count; u++) + { + if (p[u] == c) + return p + u; + } + return NULL; +} + +#if _WIN32 && __DMC__ +__declspec(naked) +unsigned Dchar::calcHash(const dchar *str, unsigned len) +{ + __asm + { + mov ECX,4[ESP] + mov EDX,8[ESP] + xor EAX,EAX + test EDX,EDX + je L92 + +LC8: cmp EDX,1 + je L98 + cmp EDX,2 + je LAE + + add EAX,[ECX] +// imul EAX,EAX,025h + lea EAX,[EAX][EAX*8] + add ECX,4 + sub EDX,2 + jmp LC8 + +L98: mov DX,[ECX] + and EDX,0FFFFh + add EAX,EDX + ret + +LAE: add EAX,[ECX] +L92: ret + } +} +#else +hash_t Dchar::calcHash(const dchar *str, size_t len) +{ + unsigned hash = 0; + + for (;;) + { + switch (len) + { + case 0: + return hash; + + case 1: + hash += *(const uint16_t *)str; + return hash; + + case 2: + hash += *(const uint32_t *)str; + return hash; + + default: + hash += *(const uint32_t *)str; + hash *= 37; + str += 2; + len -= 2; + break; + } + } +} +#endif + +hash_t Dchar::icalcHash(const dchar *str, size_t len) +{ + hash_t hash = 0; + + for (;;) + { + switch (len) + { + case 0: + return hash; + + case 1: + hash += *(const uint16_t *)str | 0x20; + return hash; + + case 2: + hash += *(const uint32_t *)str | 0x200020; + return hash; + + default: + hash += *(const uint32_t *)str | 0x200020; + hash *= 37; + str += 2; + len -= 2; + break; + } + } +} + +#elif MCBS + +hash_t Dchar::calcHash(const dchar *str, size_t len) +{ + hash_t hash = 0; + + while (1) + { + switch (len) + { + case 0: + return hash; + + case 1: + hash *= 37; + hash += *(const uint8_t *)str; + return hash; + + case 2: + hash *= 37; + hash += *(const uint16_t *)str; + return hash; + + case 3: + hash *= 37; + hash += (*(const uint16_t *)str << 8) + + ((const uint8_t *)str)[2]; + return hash; + + default: + hash *= 37; + hash += *(const uint32_t *)str; + str += 4; + len -= 4; + break; + } + } +} + +#elif UTF8 + +// Specification is: http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335 + +char Dchar::mblen[256] = +{ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, +}; + +dchar *Dchar::dec(dchar *pstart, dchar *p) +{ + while ((p[-1] & 0xC0) == 0x80) + p--; + return p; +} + +int Dchar::get(dchar *p) +{ + unsigned c; + unsigned char *q = (unsigned char *)p; + + c = q[0]; + switch (mblen[c]) + { + case 2: + c = ((c - 0xC0) << 6) | + (q[1] - 0x80); + break; + + case 3: + c = ((c - 0xE0) << 12) | + ((q[1] - 0x80) << 6) | + (q[2] - 0x80); + break; + + case 4: + c = ((c - 0xF0) << 18) | + ((q[1] - 0x80) << 12) | + ((q[2] - 0x80) << 6) | + (q[3] - 0x80); + break; + + case 5: + c = ((c - 0xF8) << 24) | + ((q[1] - 0x80) << 18) | + ((q[2] - 0x80) << 12) | + ((q[3] - 0x80) << 6) | + (q[4] - 0x80); + break; + + case 6: + c = ((c - 0xFC) << 30) | + ((q[1] - 0x80) << 24) | + ((q[2] - 0x80) << 18) | + ((q[3] - 0x80) << 12) | + ((q[4] - 0x80) << 6) | + (q[5] - 0x80); + break; + } + return c; +} + +dchar *Dchar::put(dchar *p, unsigned c) +{ + if (c <= 0x7F) + { + *p++ = c; + } + else if (c <= 0x7FF) + { + p[0] = 0xC0 + (c >> 6); + p[1] = 0x80 + (c & 0x3F); + p += 2; + } + else if (c <= 0xFFFF) + { + p[0] = 0xE0 + (c >> 12); + p[1] = 0x80 + ((c >> 6) & 0x3F); + p[2] = 0x80 + (c & 0x3F); + p += 3; + } + else if (c <= 0x1FFFFF) + { + p[0] = 0xF0 + (c >> 18); + p[1] = 0x80 + ((c >> 12) & 0x3F); + p[2] = 0x80 + ((c >> 6) & 0x3F); + p[3] = 0x80 + (c & 0x3F); + p += 4; + } + else if (c <= 0x3FFFFFF) + { + p[0] = 0xF8 + (c >> 24); + p[1] = 0x80 + ((c >> 18) & 0x3F); + p[2] = 0x80 + ((c >> 12) & 0x3F); + p[3] = 0x80 + ((c >> 6) & 0x3F); + p[4] = 0x80 + (c & 0x3F); + p += 5; + } + else if (c <= 0x7FFFFFFF) + { + p[0] = 0xFC + (c >> 30); + p[1] = 0x80 + ((c >> 24) & 0x3F); + p[2] = 0x80 + ((c >> 18) & 0x3F); + p[3] = 0x80 + ((c >> 12) & 0x3F); + p[4] = 0x80 + ((c >> 6) & 0x3F); + p[5] = 0x80 + (c & 0x3F); + p += 6; + } + else + assert(0); // not a UCS-4 character + return p; +} + +hash_t Dchar::calcHash(const dchar *str, size_t len) +{ + hash_t hash = 0; + + while (1) + { + switch (len) + { + case 0: + return hash; + + case 1: + hash *= 37; + hash += *(const uint8_t *)str; + return hash; + + case 2: + hash *= 37; +#if __I86__ + hash += *(const uint16_t *)str; +#else + hash += str[0] * 256 + str[1]; +#endif + return hash; + + case 3: + hash *= 37; +#if __I86__ + hash += (*(const uint16_t *)str << 8) + + ((const uint8_t *)str)[2]; +#else + hash += (str[0] * 256 + str[1]) * 256 + str[2]; +#endif + return hash; + + default: + hash *= 37; +#if __I86__ + hash += *(const uint32_t *)str; +#else + hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3]; +#endif + + str += 4; + len -= 4; + break; + } + } +} + +#else // ascii + +hash_t Dchar::calcHash(const dchar *str, size_t len) +{ + hash_t hash = 0; + + while (1) + { + switch (len) + { + case 0: + return hash; + + case 1: + hash *= 37; + hash += *(const uint8_t *)str; + return hash; + + case 2: + hash *= 37; +#if __I86__ + hash += *(const uint16_t *)str; +#else + hash += str[0] * 256 + str[1]; +#endif + return hash; + + case 3: + hash *= 37; +#if __I86__ + hash += (*(const uint16_t *)str << 8) + + ((const uint8_t *)str)[2]; +#else + hash += (str[0] * 256 + str[1]) * 256 + str[2]; +#endif + return hash; + + default: + hash *= 37; +#if __I86__ + hash += *(const uint32_t *)str; +#else + hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3]; +#endif + str += 4; + len -= 4; + break; + } + } +} + +hash_t Dchar::icalcHash(const dchar *str, size_t len) +{ + hash_t hash = 0; + + while (1) + { + switch (len) + { + case 0: + return hash; + + case 1: + hash *= 37; + hash += *(const uint8_t *)str | 0x20; + return hash; + + case 2: + hash *= 37; + hash += *(const uint16_t *)str | 0x2020; + return hash; + + case 3: + hash *= 37; + hash += ((*(const uint16_t *)str << 8) + + ((const uint8_t *)str)[2]) | 0x202020; + return hash; + + default: + hash *= 37; + hash += *(const uint32_t *)str | 0x20202020; + str += 4; + len -= 4; + break; + } + } +} + +#endif + +#if 0 +#include <stdio.h> + +void main() +{ + // Print out values to hardcode into Dchar::mblen[] + int c; + int s; + + for (c = 0; c < 256; c++) + { + s = 1; + if (c >= 0xC0 && c <= 0xDF) + s = 2; + if (c >= 0xE0 && c <= 0xEF) + s = 3; + if (c >= 0xF0 && c <= 0xF7) + s = 4; + if (c >= 0xF8 && c <= 0xFB) + s = 5; + if (c >= 0xFC && c <= 0xFD) + s = 6; + + printf("%d", s); + if ((c & 15) == 15) + printf(",\n"); + else + printf(","); + } +} +#endif