Mercurial > projects > ldc
diff druntime/src/compiler/dmd/aApplyR.d @ 1458:e0b2d67cfe7c
Added druntime (this should be removed once it works).
author | Robert Clipsham <robert@octarineparrot.com> |
---|---|
date | Tue, 02 Jun 2009 17:43:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/druntime/src/compiler/dmd/aApplyR.d Tue Jun 02 17:43:06 2009 +0100 @@ -0,0 +1,957 @@ +/** + * This code handles decoding UTF strings for foreach_reverse loops. There are + * 6 combinations of conversions between char, wchar, and dchar, and 2 of each + * of those. + * + * Copyright: Copyright Digital Mars 2004 - 2009. + * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>. + * Authors: Walter Bright, Sean Kelly + * + * Copyright Digital Mars 2004 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module rt.aApplyR; + +/* This code handles decoding UTF strings for foreach_reverse loops. + * There are 6 combinations of conversions between char, wchar, + * and dchar, and 2 of each of those. + */ + +private import rt.util.utf; + +/**********************************************/ +/* 1 argument versions */ + +// dg is D, but _aApplyRcd() is C +extern (D) typedef int delegate(void *) dg_t; + +extern (C) int _aApplyRcd1(in char[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + + i--; + d = aa[i]; + if (d & 0x80) + { char c = cast(char)d; + uint j; + uint m = 0x3F; + d = 0; + while ((c & 0xC0) != 0xC0) + { if (i == 0) + onUnicodeError("Invalid UTF-8 sequence", 0); + i--; + d |= (c & 0x3F) << j; + j += 6; + m >>= 1; + c = aa[i]; + } + d |= (c & m) << j; + } + result = dg(cast(void *)&d); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRcd1.unittest\n"); + + auto s = "hello"c[]; + int i; + + foreach_reverse(dchar d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(dchar d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == '\U00100456'); break; + case 2: assert(d == '\u1234'); break; + case 3: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 4); +} + +/*****************************/ + +extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + + i--; + d = aa[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { if (i == 0) + onUnicodeError("Invalid UTF-16 sequence", 0); + i--; + d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); + } + result = dg(cast(void *)&d); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRwd1.unittest\n"); + + auto s = "hello"w[]; + int i; + + foreach_reverse(dchar d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(dchar d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == '\U00100456'); break; + case 2: assert(d == '\u1234'); break; + case 3: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 4); +} + +/*****************************/ + +extern (C) int _aApplyRcw1(in char[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + wchar w; + + i--; + w = aa[i]; + if (w & 0x80) + { char c = cast(char)w; + uint j; + uint m = 0x3F; + d = 0; + while ((c & 0xC0) != 0xC0) + { if (i == 0) + onUnicodeError("Invalid UTF-8 sequence", 0); + i--; + d |= (c & 0x3F) << j; + j += 6; + m >>= 1; + c = aa[i]; + } + d |= (c & m) << j; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + } + result = dg(cast(void *)&w); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRcw1.unittest\n"); + + auto s = "hello"c[]; + int i; + + foreach_reverse(wchar d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(wchar d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == 0xDBC1); break; + case 2: assert(d == 0xDC56); break; + case 3: assert(d == 0x1234); break; + case 4: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 5); +} + +/*****************************/ + +extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + char c; + + i--; + d = aa[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { if (i == 0) + onUnicodeError("Invalid UTF-16 sequence", 0); + i--; + d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); + } + + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(cast(void *)&c2); + if (result) + return result; + } + continue; + } + c = cast(char)d; + result = dg(cast(void *)&c); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRwc1.unittest\n"); + + auto s = "hello"w[]; + int i; + + foreach_reverse(char d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(char d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == 0xF4); break; + case 2: assert(d == 0x80); break; + case 3: assert(d == 0x91); break; + case 4: assert(d == 0x96); break; + case 5: assert(d == 0xE1); break; + case 6: assert(d == 0x88); break; + case 7: assert(d == 0xB4); break; + case 8: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 9); +} + +/*****************************/ + +extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0;) + { dchar d = aa[--i]; + char c; + + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(cast(void *)&c2); + if (result) + return result; + } + continue; + } + else + { + c = cast(char)d; + } + result = dg(cast(void *)&c); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRdc1.unittest\n"); + + auto s = "hello"d[]; + int i; + + foreach_reverse(char d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(char d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == 0xF4); break; + case 2: assert(d == 0x80); break; + case 3: assert(d == 0x91); break; + case 4: assert(d == 0x96); break; + case 5: assert(d == 0xE1); break; + case 6: assert(d == 0x88); break; + case 7: assert(d == 0xB4); break; + case 8: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 9); +} + +/*****************************/ + +extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d = aa[--i]; + wchar w; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + result = dg(cast(void *)&w); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRdw1.unittest\n"); + + auto s = "hello"d[]; + int i; + + foreach_reverse(wchar d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(wchar d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == 0xDBC1); break; + case 2: assert(d == 0xDC56); break; + case 3: assert(d == 0x1234); break; + case 4: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 5); +} + + +/****************************************************************************/ +/* 2 argument versions */ + +// dg is D, but _aApplyRcd2() is C +extern (D) typedef int delegate(void *, void *) dg2_t; + +extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg) +{ int result; + size_t i; + size_t len = aa.length; + + debug(apply) printf("_aApplyRcd2(), len = %d\n", len); + for (i = len; i != 0; ) + { dchar d; + + i--; + d = aa[i]; + if (d & 0x80) + { char c = cast(char)d; + uint j; + uint m = 0x3F; + d = 0; + while ((c & 0xC0) != 0xC0) + { if (i == 0) + onUnicodeError("Invalid UTF-8 sequence", 0); + i--; + d |= (c & 0x3F) << j; + j += 6; + m >>= 1; + c = aa[i]; + } + d |= (c & m) << j; + } + result = dg(&i, cast(void *)&d); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRcd2.unittest\n"); + + auto s = "hello"c[]; + int i; + + foreach_reverse(k, dchar d; s) + { + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, dchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(d == 'b'); assert(k == 8); break; + case 1: assert(d == '\U00100456'); assert(k == 4); break; + case 2: assert(d == '\u1234'); assert(k == 1); break; + case 3: assert(d == 'a'); assert(k == 0); break; + default: assert(0); + } + i++; + } + assert(i == 4); +} + +/*****************************/ + +extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + + i--; + d = aa[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { if (i == 0) + onUnicodeError("Invalid UTF-16 sequence", 0); + i--; + d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); + } + result = dg(&i, cast(void *)&d); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRwd2.unittest\n"); + + auto s = "hello"w[]; + int i; + + foreach_reverse(k, dchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, dchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 4); assert(d == 'b'); break; + case 1: assert(k == 2); assert(d == '\U00100456'); break; + case 2: assert(k == 1); assert(d == '\u1234'); break; + case 3: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 4); +} + +/*****************************/ + +extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + wchar w; + + i--; + w = aa[i]; + if (w & 0x80) + { char c = cast(char)w; + uint j; + uint m = 0x3F; + d = 0; + while ((c & 0xC0) != 0xC0) + { if (i == 0) + onUnicodeError("Invalid UTF-8 sequence", 0); + i--; + d |= (c & 0x3F) << j; + j += 6; + m >>= 1; + c = aa[i]; + } + d |= (c & m) << j; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(&i, cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + } + result = dg(&i, cast(void *)&w); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRcw2.unittest\n"); + + auto s = "hello"c[]; + int i; + + foreach_reverse(k, wchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, wchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 8); assert(d == 'b'); break; + case 1: assert(k == 4); assert(d == 0xDBC1); break; + case 2: assert(k == 4); assert(d == 0xDC56); break; + case 3: assert(k == 1); assert(d == 0x1234); break; + case 4: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 5); +} + +/*****************************/ + +extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + char c; + + i--; + d = aa[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { if (i == 0) + onUnicodeError("Invalid UTF-16 sequence", 0); + i--; + d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); + } + + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(&i, cast(void *)&c2); + if (result) + return result; + } + continue; + } + c = cast(char)d; + result = dg(&i, cast(void *)&c); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRwc2.unittest\n"); + + auto s = "hello"w[]; + int i; + + foreach_reverse(k, char d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, char d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 4); assert(d == 'b'); break; + case 1: assert(k == 2); assert(d == 0xF4); break; + case 2: assert(k == 2); assert(d == 0x80); break; + case 3: assert(k == 2); assert(d == 0x91); break; + case 4: assert(k == 2); assert(d == 0x96); break; + case 5: assert(k == 1); assert(d == 0xE1); break; + case 6: assert(k == 1); assert(d == 0x88); break; + case 7: assert(k == 1); assert(d == 0xB4); break; + case 8: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 9); +} + +/*****************************/ + +extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d = aa[--i]; + char c; + + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(&i, cast(void *)&c2); + if (result) + return result; + } + continue; + } + else + { c = cast(char)d; + } + result = dg(&i, cast(void *)&c); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRdc2.unittest\n"); + + auto s = "hello"d[]; + int i; + + foreach_reverse(k, char d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, char d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 3); assert(d == 'b'); break; + case 1: assert(k == 2); assert(d == 0xF4); break; + case 2: assert(k == 2); assert(d == 0x80); break; + case 3: assert(k == 2); assert(d == 0x91); break; + case 4: assert(k == 2); assert(d == 0x96); break; + case 5: assert(k == 1); assert(d == 0xE1); break; + case 6: assert(k == 1); assert(d == 0x88); break; + case 7: assert(k == 1); assert(d == 0xB4); break; + case 8: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 9); +} + +/*****************************/ + +extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d = aa[--i]; + wchar w; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(&i, cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + result = dg(&i, cast(void *)&w); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRdw2.unittest\n"); + + auto s = "hello"d[]; + int i; + + foreach_reverse(k, wchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, wchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 3); assert(d == 'b'); break; + case 1: assert(k == 2); assert(d == 0xDBC1); break; + case 2: assert(k == 2); assert(d == 0xDC56); break; + case 3: assert(k == 1); assert(d == 0x1234); break; + case 4: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 5); +}