changeset 759:d3eb054172f9

Added copy of druntime from DMD 2.020 modified for LDC.
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Tue, 11 Nov 2008 01:52:37 +0100
parents f04dde6e882c
children 6f33b427bfd1
files bin/ldc2.conf druntime/import/ldc/cstdarg.di druntime/import/ldc/intrinsics.di druntime/license.txt druntime/readme.txt druntime/src/build-dmd.bat druntime/src/build-dmd.sh druntime/src/build-ldc.sh druntime/src/common/core/bitmanip.d druntime/src/common/core/exception.d druntime/src/common/core/memory.d druntime/src/common/core/runtime.d druntime/src/common/core/thread.d druntime/src/common/ldc.mak druntime/src/common/posix.mak druntime/src/common/stdc/errno.c druntime/src/common/win32.mak druntime/src/compiler/dmd/aApply.d druntime/src/compiler/dmd/aApplyR.d druntime/src/compiler/dmd/aaA.d druntime/src/compiler/dmd/adi.d druntime/src/compiler/dmd/alloca.d druntime/src/compiler/dmd/arrayassign.d druntime/src/compiler/dmd/arraybyte.d druntime/src/compiler/dmd/arraycast.d druntime/src/compiler/dmd/arraycat.d druntime/src/compiler/dmd/arraydouble.d druntime/src/compiler/dmd/arrayfloat.d druntime/src/compiler/dmd/arrayint.d druntime/src/compiler/dmd/arrayreal.d druntime/src/compiler/dmd/arrayshort.d druntime/src/compiler/dmd/cast_.d druntime/src/compiler/dmd/cmath2.d druntime/src/compiler/dmd/compiler.d druntime/src/compiler/dmd/complex.c druntime/src/compiler/dmd/cover.d druntime/src/compiler/dmd/critical.c druntime/src/compiler/dmd/deh.c druntime/src/compiler/dmd/deh2.d druntime/src/compiler/dmd/dmain2.d druntime/src/compiler/dmd/invariant.d druntime/src/compiler/dmd/invariant_.d druntime/src/compiler/dmd/lifetime.d druntime/src/compiler/dmd/llmath.d druntime/src/compiler/dmd/mars.h druntime/src/compiler/dmd/memory.d druntime/src/compiler/dmd/memset.d druntime/src/compiler/dmd/minit.asm druntime/src/compiler/dmd/minit.obj druntime/src/compiler/dmd/monitor.c druntime/src/compiler/dmd/obj.d druntime/src/compiler/dmd/object_.d druntime/src/compiler/dmd/posix.mak druntime/src/compiler/dmd/qsort.d druntime/src/compiler/dmd/qsort2.d druntime/src/compiler/dmd/switch_.d druntime/src/compiler/dmd/trace.d druntime/src/compiler/dmd/typeinfo/ti_AC.d druntime/src/compiler/dmd/typeinfo/ti_Acdouble.d druntime/src/compiler/dmd/typeinfo/ti_Acfloat.d druntime/src/compiler/dmd/typeinfo/ti_Acreal.d druntime/src/compiler/dmd/typeinfo/ti_Adouble.d druntime/src/compiler/dmd/typeinfo/ti_Afloat.d druntime/src/compiler/dmd/typeinfo/ti_Ag.d druntime/src/compiler/dmd/typeinfo/ti_Aint.d druntime/src/compiler/dmd/typeinfo/ti_Along.d druntime/src/compiler/dmd/typeinfo/ti_Areal.d druntime/src/compiler/dmd/typeinfo/ti_Ashort.d druntime/src/compiler/dmd/typeinfo/ti_C.d druntime/src/compiler/dmd/typeinfo/ti_byte.d druntime/src/compiler/dmd/typeinfo/ti_cdouble.d druntime/src/compiler/dmd/typeinfo/ti_cfloat.d druntime/src/compiler/dmd/typeinfo/ti_char.d druntime/src/compiler/dmd/typeinfo/ti_creal.d druntime/src/compiler/dmd/typeinfo/ti_dchar.d druntime/src/compiler/dmd/typeinfo/ti_delegate.d druntime/src/compiler/dmd/typeinfo/ti_double.d druntime/src/compiler/dmd/typeinfo/ti_float.d druntime/src/compiler/dmd/typeinfo/ti_idouble.d druntime/src/compiler/dmd/typeinfo/ti_ifloat.d druntime/src/compiler/dmd/typeinfo/ti_int.d druntime/src/compiler/dmd/typeinfo/ti_ireal.d druntime/src/compiler/dmd/typeinfo/ti_long.d druntime/src/compiler/dmd/typeinfo/ti_ptr.d druntime/src/compiler/dmd/typeinfo/ti_real.d druntime/src/compiler/dmd/typeinfo/ti_short.d druntime/src/compiler/dmd/typeinfo/ti_ubyte.d druntime/src/compiler/dmd/typeinfo/ti_uint.d druntime/src/compiler/dmd/typeinfo/ti_ulong.d druntime/src/compiler/dmd/typeinfo/ti_ushort.d druntime/src/compiler/dmd/typeinfo/ti_void.d druntime/src/compiler/dmd/typeinfo/ti_wchar.d druntime/src/compiler/dmd/util/console.d druntime/src/compiler/dmd/util/cpuid.d druntime/src/compiler/dmd/util/ctype.d druntime/src/compiler/dmd/util/string.d druntime/src/compiler/dmd/util/utf.d druntime/src/compiler/dmd/win32.mak druntime/src/compiler/ldc/aApply.d druntime/src/compiler/ldc/aApplyR.d druntime/src/compiler/ldc/aaA.d druntime/src/compiler/ldc/adi.d druntime/src/compiler/ldc/arrayInit.d druntime/src/compiler/ldc/cast.d druntime/src/compiler/ldc/critical.c druntime/src/compiler/ldc/dmain2.d druntime/src/compiler/ldc/eh.d druntime/src/compiler/ldc/genobj.d druntime/src/compiler/ldc/invariant.d druntime/src/compiler/ldc/ldc.mak druntime/src/compiler/ldc/ldc/bitmanip.d druntime/src/compiler/ldc/ldc/vararg.d druntime/src/compiler/ldc/lifetime.d druntime/src/compiler/ldc/mars.h druntime/src/compiler/ldc/memory.d druntime/src/compiler/ldc/monitor.c druntime/src/compiler/ldc/qsort2.d druntime/src/compiler/ldc/switch.d druntime/src/compiler/ldc/typeinfo/ti_AC.d druntime/src/compiler/ldc/typeinfo/ti_Acdouble.d druntime/src/compiler/ldc/typeinfo/ti_Acfloat.d druntime/src/compiler/ldc/typeinfo/ti_Acreal.d druntime/src/compiler/ldc/typeinfo/ti_Adouble.d druntime/src/compiler/ldc/typeinfo/ti_Afloat.d druntime/src/compiler/ldc/typeinfo/ti_Ag.d druntime/src/compiler/ldc/typeinfo/ti_Aint.d druntime/src/compiler/ldc/typeinfo/ti_Along.d druntime/src/compiler/ldc/typeinfo/ti_Areal.d druntime/src/compiler/ldc/typeinfo/ti_Ashort.d druntime/src/compiler/ldc/typeinfo/ti_C.d druntime/src/compiler/ldc/typeinfo/ti_byte.d druntime/src/compiler/ldc/typeinfo/ti_cdouble.d druntime/src/compiler/ldc/typeinfo/ti_cfloat.d druntime/src/compiler/ldc/typeinfo/ti_char.d druntime/src/compiler/ldc/typeinfo/ti_creal.d druntime/src/compiler/ldc/typeinfo/ti_dchar.d druntime/src/compiler/ldc/typeinfo/ti_delegate.d druntime/src/compiler/ldc/typeinfo/ti_double.d druntime/src/compiler/ldc/typeinfo/ti_float.d druntime/src/compiler/ldc/typeinfo/ti_idouble.d druntime/src/compiler/ldc/typeinfo/ti_ifloat.d druntime/src/compiler/ldc/typeinfo/ti_int.d druntime/src/compiler/ldc/typeinfo/ti_ireal.d druntime/src/compiler/ldc/typeinfo/ti_long.d druntime/src/compiler/ldc/typeinfo/ti_ptr.d druntime/src/compiler/ldc/typeinfo/ti_real.d druntime/src/compiler/ldc/typeinfo/ti_short.d druntime/src/compiler/ldc/typeinfo/ti_ubyte.d druntime/src/compiler/ldc/typeinfo/ti_uint.d druntime/src/compiler/ldc/typeinfo/ti_ulong.d druntime/src/compiler/ldc/typeinfo/ti_ushort.d druntime/src/compiler/ldc/typeinfo/ti_void.d druntime/src/compiler/ldc/typeinfo/ti_wchar.d druntime/src/compiler/ldc/util/console.d druntime/src/compiler/ldc/util/ctype.d druntime/src/compiler/ldc/util/string.d druntime/src/compiler/ldc/util/utf.d druntime/src/dmd-posix.mak druntime/src/dmd-win32.mak druntime/src/dmd.conf druntime/src/gc/basic/gc.d druntime/src/gc/basic/gcalloc.d druntime/src/gc/basic/gcbits.d druntime/src/gc/basic/gcstats.d druntime/src/gc/basic/gcx.d druntime/src/gc/basic/ldc.mak druntime/src/gc/basic/posix.mak druntime/src/gc/basic/win32.mak druntime/src/gc/stub/gc.d druntime/src/gc/stub/ldc.mak druntime/src/gc/stub/posix.mak druntime/src/gc/stub/win32.mak druntime/src/ldc-gcc.mak druntime/src/ldc2.conf druntime/src/sc.ini
diffstat 175 files changed, 48069 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/ldc2.conf	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,4 @@
+
+[Environment]
+
+DFLAGS=-I%@P%/../druntime/import -L-L%@P%/../druntime/lib
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/import/ldc/cstdarg.di	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,29 @@
+/*
+ * vararg support for extern(C) functions
+ */
+
+module ldc.cstdarg;
+
+// Check for the right compiler
+version(LDC)
+{
+    // OK
+}
+else
+{
+    static assert(false, "This module is only valid for LDC");
+}
+
+alias void* va_list;
+
+pragma(va_start)
+    void va_start(T)(va_list ap, ref T);
+
+pragma(va_arg)
+    T va_arg(T)(va_list ap);
+
+pragma(va_end)
+    void va_end(va_list args);
+
+pragma(va_copy)
+    void va_copy(va_list dst, va_list src);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/import/ldc/intrinsics.di	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,343 @@
+/*
+ * This module holds declarations to LLVM intrinsics.
+ *
+ * See the LLVM language reference for more information:
+ *
+ * - http://llvm.org/docs/LangRef.html#intrinsics
+ *
+ */
+
+module ldc.intrinsics;
+
+// Check for the right compiler
+version(LDC)
+{
+    // OK
+}
+else
+{
+    static assert(false, "This module is only valid for LDC");
+}
+
+//
+// CODE GENERATOR INTRINSICS
+//
+
+
+// The 'llvm.returnaddress' intrinsic attempts to compute a target-specific value indicating the return address of the current function or one of its callers. 
+
+pragma(intrinsic, "llvm.returnaddress")
+    void* llvm_returnaddress(uint level);
+
+
+// The 'llvm.frameaddress' intrinsic attempts to return the target-specific frame pointer value for the specified stack frame. 
+
+pragma(intrinsic, "llvm.frameaddress")
+    void* llvm_frameaddress(uint level);
+
+
+// The 'llvm.stacksave' intrinsic is used to remember the current state of the function stack, for use with llvm.stackrestore. This is useful for implementing language features like scoped automatic variable sized arrays in C99. 
+
+pragma(intrinsic, "llvm.stacksave")
+    void* llvm_stacksave();
+
+
+// The 'llvm.stackrestore' intrinsic is used to restore the state of the function stack to the state it was in when the corresponding llvm.stacksave intrinsic executed. This is useful for implementing language features like scoped automatic variable sized arrays in C99. 
+
+pragma(intrinsic, "llvm.stackrestore")
+    void llvm_stackrestore(void* ptr);
+
+
+// The 'llvm.prefetch' intrinsic is a hint to the code generator to insert a prefetch instruction if supported; otherwise, it is a noop. Prefetches have no effect on the behavior of the program but can change its performance characteristics.
+
+pragma(intrinsic, "llvm.prefetch")
+    void llvm_prefetch(void* ptr, uint rw, uint locality);
+
+
+// The 'llvm.pcmarker' intrinsic is a method to export a Program Counter (PC) in a region of code to simulators and other tools. The method is target specific, but it is expected that the marker will use exported symbols to transmit the PC of the marker. The marker makes no guarantees that it will remain with any specific instruction after optimizations. It is possible that the presence of a marker will inhibit optimizations. The intended use is to be inserted after optimizations to allow correlations of simulation runs. 
+
+pragma(intrinsic, "llvm.pcmarker")
+    void llvm_pcmarker(uint id);
+
+
+// The 'llvm.readcyclecounter' intrinsic provides access to the cycle counter register (or similar low latency, high accuracy clocks) on those targets that support it. On X86, it should map to RDTSC. On Alpha, it should map to RPCC. As the backing counters overflow quickly (on the order of 9 seconds on alpha), this should only be used for small timings. 
+
+pragma(intrinsic, "llvm.readcyclecounter")
+    ulong readcyclecounter();
+
+
+
+
+//
+// STANDARD C LIBRARY INTRINSICS
+//
+
+
+// The 'llvm.memcpy.*' intrinsics copy a block of memory from the source location to the destination location.
+// Note that, unlike the standard libc function, the llvm.memcpy.* intrinsics do not return a value, and takes an extra alignment argument.
+
+pragma(intrinsic, "llvm.memcpy.i32")
+    void llvm_memcpy_i32(void* dst, void* src, uint len, uint alignment);
+pragma(intrinsic, "llvm.memcpy.i64")
+    void llvm_memcpy_i64(void* dst, void* src, ulong len, uint alignment);
+
+
+// The 'llvm.memmove.*' intrinsics move a block of memory from the source location to the destination location. It is similar to the 'llvm.memcpy' intrinsic but allows the two memory locations to overlap.
+// Note that, unlike the standard libc function, the llvm.memmove.* intrinsics do not return a value, and takes an extra alignment argument.
+
+pragma(intrinsic, "llvm.memmove.i32")
+    void llvm_memmove_i32(void* dst, void* src, uint len, uint alignment);
+pragma(intrinsic, "llvm.memmove.i64")
+    void llvm_memmove_i64(void* dst, void* src, ulong len, int alignment);
+
+
+// The 'llvm.memset.*' intrinsics fill a block of memory with a particular byte value.
+// Note that, unlike the standard libc function, the llvm.memset intrinsic does not return a value, and takes an extra alignment argument.
+
+pragma(intrinsic, "llvm.memset.i32")
+    void llvm_memset_i32(void* dst, ubyte val, uint len, uint alignment);
+pragma(intrinsic, "llvm.memset.i64")
+    void llvm_memset_i64(void* dst, ubyte val, ulong len, uint alignment);
+
+
+// The 'llvm.sqrt' intrinsics return the sqrt of the specified operand, returning the same value as the libm 'sqrt' functions would. Unlike sqrt in libm, however, llvm.sqrt has undefined behavior for negative numbers other than -0.0 (which allows for better optimization, because there is no need to worry about errno being set). llvm.sqrt(-0.0) is defined to return -0.0 like IEEE sqrt. 
+
+pragma(intrinsic, "llvm.sqrt.f32")
+    float llvm_sqrt_f32(float val);
+pragma(intrinsic, "llvm.sqrt.f64")
+    double llvm_sqrt_f64(double val);
+version(X86)
+{
+pragma(intrinsic, "llvm.sqrt.f80")
+    real llvm_sqrt_f80(real val);
+}
+
+version(X86_64)
+{
+pragma(intrinsic, "llvm.sqrt.f80")
+    real llvm_sqrt_f80(real val);
+}
+
+
+// The 'llvm.sin.*' intrinsics return the sine of the operand. 
+
+pragma(intrinsic, "llvm.sin.f32")
+    float llvm_sin_f32(float val);
+pragma(intrinsic, "llvm.sin.f64")
+    double llvm_sin_f64(double val);
+version(X86)
+{
+pragma(intrinsic, "llvm.sin.f80")
+    real llvm_sin_f80(real val);
+}
+
+version(X86_64)
+{
+pragma(intrinsic, "llvm.sin.f80")
+    real llvm_sin_f80(real val);
+}
+
+
+// The 'llvm.cos.*' intrinsics return the cosine of the operand. 
+
+pragma(intrinsic, "llvm.cos.f32")
+    float llvm_cos_f32(float val);
+pragma(intrinsic, "llvm.cos.f64")
+    double llvm_cos_f64(double val);
+version(X86)
+{
+pragma(intrinsic, "llvm.cos.f80")
+    real llvm_cos_f80(real val);
+}
+
+version(X86_64)
+{
+pragma(intrinsic, "llvm.cos.f80")
+    real llvm_cos_f80(real val);
+}
+
+
+// The 'llvm.powi.*' intrinsics return the first operand raised to the specified (positive or negative) power. The order of evaluation of multiplications is not defined. When a vector of floating point type is used, the second argument remains a scalar integer value. 
+
+pragma(intrinsic, "llvm.powi.f32")
+    float llvm_powi_f32(float val, int power);
+
+pragma(intrinsic, "llvm.powi.f64")
+    double llvm_powi_f64(double val, int power);
+version(X86)
+{
+pragma(intrinsic, "llvm.powi.f80")
+    real llvm_powi_f80(real val, int power);
+}
+
+version(X86_64)
+{
+pragma(intrinsic, "llvm.powi.f80")
+    real llvm_powi_f80(real val, int power);
+}
+
+// The 'llvm.pow.*' intrinsics return the first operand raised to the specified (positive or negative) power. 
+
+pragma(intrinsic, "llvm.pow.f32")
+    float llvm_pow_f32(float val, float power);
+
+pragma(intrinsic, "llvm.pow.f64")
+    double llvm_pow_f64(double val, double power);
+version(X86)
+{
+pragma(intrinsic, "llvm.pow.f80")
+    real llvm_pow_f80(real val, real power);
+}
+
+version(X86_64)
+{
+pragma(intrinsic, "llvm.pow.f80")
+    real llvm_pow_f80(real val, real power);
+}
+
+
+//
+// BIT MANIPULATION INTRINSICS
+//
+
+// The 'llvm.bswap' family of intrinsics is used to byte swap integer values with an even number of bytes (positive multiple of 16 bits). These are useful for performing operations on data that is not in the target's native byte order. 
+
+pragma(intrinsic, "llvm.bswap.i16.i16")
+    ushort llvm_bswap_i16(ushort val);
+
+pragma(intrinsic, "llvm.bswap.i32.i32")
+    uint llvm_bswap_i32(uint val);
+
+pragma(intrinsic, "llvm.bswap.i64.i64")
+    ulong llvm_bswap_i64(ulong val);
+
+
+// The 'llvm.ctpop' family of intrinsics counts the number of bits set in a value. 
+
+pragma(intrinsic, "llvm.ctpop.i8")
+    ubyte llvm_ctpop_i8(ubyte src);
+
+pragma(intrinsic, "llvm.ctpop.i16")
+    ushort llvm_ctpop_i16(ushort src);
+
+pragma(intrinsic, "llvm.ctpop.i32")
+    uint llvm_ctpop_i32(uint src);
+
+pragma(intrinsic, "llvm.ctpop.i64")
+    ulong llvm_ctpop_i64(ulong src);
+
+
+// The 'llvm.ctlz' family of intrinsic functions counts the number of leading zeros in a variable. 
+
+pragma(intrinsic, "llvm.ctlz.i8")
+    ubyte llvm_ctlz_i8(ubyte src);
+
+pragma(intrinsic, "llvm.ctlz.i16")
+    ushort llvm_ctlz_i16(ushort src);
+
+pragma(intrinsic, "llvm.ctlz.i32")
+    uint llvm_ctlz_i32(uint src);
+
+pragma(intrinsic, "llvm.ctlz.i64")
+    ulong llvm_ctlz_i64(ulong src);
+
+
+// The 'llvm.cttz' family of intrinsic functions counts the number of trailing zeros. 
+
+pragma(intrinsic, "llvm.cttz.i8")
+    ubyte llvm_cttz_i8(ubyte src);
+
+pragma(intrinsic, "llvm.cttz.i16")
+    ushort llvm_cttz_i16(ushort src);
+
+pragma(intrinsic, "llvm.cttz.i32")
+    uint llvm_cttz_i32(uint src);
+
+pragma(intrinsic, "llvm.cttz.i64")
+    ulong llvm_cttz_i64(ulong src);
+
+
+// The 'llvm.part.select' family of intrinsic functions selects a range of bits from an integer value and returns them in the same bit width as the original value.
+
+pragma(intrinsic, "llvm.part.select.i8")
+    ubyte llvm_part_select_i(ubyte val, uint loBit, uint hiBit);
+
+pragma(intrinsic, "llvm.part.select.i16")
+    ushort llvm_part_select_i(ushort val, uint loBit, uint hiBit);
+
+pragma(intrinsic, "llvm.part.select.i32")
+    uint llvm_part_select_i(uint val, uint loBit, uint hiBit);
+
+pragma(intrinsic, "llvm.part.select.i64")
+    ulong llvm_part_select_i(ulong val, uint loBit, uint hiBit);
+
+
+// The 'llvm.part.set' family of intrinsic functions replaces a range of bits in an integer value with another integer value. It returns the integer with the replaced bits.
+
+// TODO
+// declare i17 @llvm.part.set.i17.i9 (i17 %val, i9 %repl, i32 %lo, i32 %hi)
+// declare i29 @llvm.part.set.i29.i9 (i29 %val, i9 %repl, i32 %lo, i32 %hi)
+
+
+
+
+//
+// ATOMIC OPERATIONS AND SYNCHRONIZATION INTRINSICS
+//
+
+// The llvm.memory.barrier intrinsic guarantees ordering between specific pairs of memory access types.
+
+pragma(intrinsic, "llvm.memory.barrier")
+    void llvm_memory_barrier(bool ll, bool ls, bool sl, bool ss, bool device);
+
+// This loads a value in memory and compares it to a given value. If they are equal, it stores a new value into the memory.
+
+pragma(intrinsic, "llvm.atomic.cmp.swap.i#.p0i#")
+    T llvm_atomic_cmp_swap(T)(T* ptr, T cmp, T val);
+
+// This intrinsic loads the value stored in memory at ptr and yields the value from memory. It then stores the value in val in the memory at ptr.
+
+pragma(intrinsic, "llvm.atomic.swap.i#.p0i#")
+    T llvm_atomic_swap(T)(T* ptr, T val);
+
+// This intrinsic adds delta to the value stored in memory at ptr. It yields the original value at ptr.
+
+pragma(intrinsic, "llvm.atomic.load.add.i#.p0i#")
+    T llvm_atomic_load_add(T)(T* ptr, T val);
+
+// This intrinsic subtracts delta to the value stored in memory at ptr. It yields the original value at ptr.
+
+pragma(intrinsic, "llvm.atomic.load.sub.i#.p0i#")
+    T llvm_atomic_load_sub(T)(T* ptr, T val);
+
+// These intrinsics bitwise the operation (and, nand, or, xor) delta to the value stored in memory at ptr. It yields the original value at ptr.
+
+pragma(intrinsic, "llvm.atomic.load.and.i#.p0i#")
+    T llvm_atomic_load_and(T)(T* ptr, T val);
+pragma(intrinsic, "llvm.atomic.load.nand.i#.p0i#")
+    T llvm_atomic_load_nand(T)(T* ptr, T val);
+pragma(intrinsic, "llvm.atomic.load.or.i#.p0i#")
+    T llvm_atomic_load_or(T)(T* ptr, T val);
+pragma(intrinsic, "llvm.atomic.load.xor.i#.p0i#")
+    T llvm_atomic_load_xor(T)(T* ptr, T val);
+
+// These intrinsics takes the signed or unsigned minimum or maximum of delta and the value stored in memory at ptr. It yields the original value at ptr. 
+
+pragma(intrinsic, "llvm.atomic.load.max.i#.p0i#")
+    T llvm_atomic_load_max(T)(T* ptr, T val);
+pragma(intrinsic, "llvm.atomic.load.min.i#.p0i#")
+    T llvm_atomic_load_min(T)(T* ptr, T val);
+pragma(intrinsic, "llvm.atomic.load.umax.i#.p0i#")
+    T llvm_atomic_load_umax(T)(T* ptr, T val);
+pragma(intrinsic, "llvm.atomic.load.umin.i#.p0i#")
+    T llvm_atomic_load_umin(T)(T* ptr, T val);
+
+//
+// GENERAL INTRINSICS
+//
+
+
+// This intrinsics is lowered to the target dependent trap instruction. If the target does not have a trap instruction, this intrinsic will be lowered to the call of the abort() function. 
+
+pragma(intrinsic, "llvm.trap")
+    void llvm_trap();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/license.txt	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,10 @@
+Copyright (c) 2008, The D Runtime Project
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/readme.txt	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,10 @@
+	Druntime
+
+The source code repository for druntime is: http://dsource.org/projects/druntime
+
+Druntime is the minimum library required to support the D programming
+language. It includes the system code required to support the garbage collector,
+associative arrays, exception handling, array vector operations,
+startup/shutdown, etc.
+
+Druntime forms a common layer underlying the Phobos and Tango user libraries.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/build-dmd.bat	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,7 @@
+@echo off
+set OLDHOME=%HOME%
+set HOME=%CD%
+make clean -fdmd-win32.mak
+make lib install -fdmd-win32.mak
+make clean -fdmd-win32.mak
+set HOME=%OLDHOME%
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/build-dmd.sh	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+OLDHOME=$HOME
+export HOME=`pwd`
+
+goerror(){
+    export HOME=$OLDHOME
+    echo "="
+    echo "= *** Error ***"
+    echo "="
+    exit 1
+}
+
+make clean -fdmd-posix.mak           || goerror
+make lib doc install -fdmd-posix.mak || goerror
+make clean -fdmd-posix.mak           || goerror
+chmod 644 ../import/*.di             || goerror
+
+export HOME=$OLDHOME
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/build-ldc.sh	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+OLDHOME=$HOME
+export HOME=`pwd`
+
+goerror(){
+    export HOME=$OLDHOME
+    echo "="
+    echo "= *** Error ***"
+    echo "="
+    exit 1
+}
+
+make clean -fldc-gcc.mak           || goerror
+make lib doc install -fldc-gcc.mak || goerror
+make clean -fldc-gcc.mak           || goerror
+chmod 644 ../import/*.di           || goerror
+
+export HOME=$OLDHOME
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/common/core/bitmanip.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,290 @@
+/**
+ * This module contains a collection of bit-level operations.
+ *
+ * Copyright: Copyright (c) 2005-2008, The D Runtime Project
+ * License:   BSD Style, see LICENSE
+ * Authors:   Walter Bright, Don Clugston, Sean Kelly
+ */
+module core.bitmanip;
+
+
+version( DDoc )
+{
+    /**
+     * Scans the bits in v starting with bit 0, looking
+     * for the first set bit.
+     * Returns:
+     *  The bit number of the first bit set.
+     *  The return value is undefined if v is zero.
+     */
+    int bsf( uint v );
+
+
+    /**
+     * Scans the bits in v from the most significant bit
+     * to the least significant bit, looking
+     * for the first set bit.
+     * Returns:
+     *  The bit number of the first bit set.
+     *  The return value is undefined if v is zero.
+     * Example:
+     * ---
+     * import core.bitmanip;
+     *
+     * int main()
+     * {
+     *     uint v;
+     *     int x;
+     *
+     *     v = 0x21;
+     *     x = bsf(v);
+     *     printf("bsf(x%x) = %d\n", v, x);
+     *     x = bsr(v);
+     *     printf("bsr(x%x) = %d\n", v, x);
+     *     return 0;
+     * }
+     * ---
+     * Output:
+     *  bsf(x21) = 0<br>
+     *  bsr(x21) = 5
+     */
+    int bsr( uint v );
+
+
+    /**
+     * Tests the bit.
+     */
+    int bt( uint* p, uint bitnum );
+
+
+    /**
+     * Tests and complements the bit.
+     */
+    int btc( uint* p, uint bitnum );
+
+
+    /**
+     * Tests and resets (sets to 0) the bit.
+     */
+    int btr( uint* p, uint bitnum );
+
+
+    /**
+     * Tests and sets the bit.
+     * Params:
+     * p = a non-NULL pointer to an array of uints.
+     * index = a bit number, starting with bit 0 of p[0],
+     * and progressing. It addresses bits like the expression:
+    ---
+    p[index / (uint.sizeof*8)] & (1 << (index & ((uint.sizeof*8) - 1)))
+    ---
+     * Returns:
+     *  A non-zero value if the bit was set, and a zero
+     *  if it was clear.
+     *
+     * Example:
+     * ---
+    import core.bitmanip;
+
+    int main()
+    {
+        uint array[2];
+
+        array[0] = 2;
+        array[1] = 0x100;
+
+        printf("btc(array, 35) = %d\n", <b>btc</b>(array, 35));
+        printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+        printf("btc(array, 35) = %d\n", <b>btc</b>(array, 35));
+        printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+        printf("bts(array, 35) = %d\n", <b>bts</b>(array, 35));
+        printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+        printf("btr(array, 35) = %d\n", <b>btr</b>(array, 35));
+        printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+        printf("bt(array, 1) = %d\n", <b>bt</b>(array, 1));
+        printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+        return 0;
+    }
+     * ---
+     * Output:
+    <pre>
+    btc(array, 35) = 0
+    array = [0]:x2, [1]:x108
+    btc(array, 35) = -1
+    array = [0]:x2, [1]:x100
+    bts(array, 35) = 0
+    array = [0]:x2, [1]:x108
+    btr(array, 35) = -1
+    array = [0]:x2, [1]:x100
+    bt(array, 1) = -1
+    array = [0]:x2, [1]:x100
+    </pre>
+     */
+    int bts( uint* p, uint bitnum );
+
+
+    /**
+     * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes
+     * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3
+     * becomes byte 0.
+     */
+    uint bswap( uint v );
+
+
+    /**
+     * Reads I/O port at port_address.
+     */
+    ubyte inp( uint port_address );
+
+
+    /**
+     * ditto
+     */
+    ushort inpw( uint port_address );
+
+
+    /**
+     * ditto
+     */
+    uint inpl( uint port_address );
+
+
+    /**
+     * Writes and returns value to I/O port at port_address.
+     */
+    ubyte outp( uint port_address, ubyte value );
+
+
+    /**
+     * ditto
+     */
+    ushort outpw( uint port_address, ushort value );
+
+
+    /**
+     * ditto
+     */
+    uint outpl( uint port_address, uint value );
+}
+else version( LDC )
+{
+    public import ldc.bitmanip;
+}
+else
+{
+    public import std.intrinsic;
+}
+
+
+/**
+ *  Calculates the number of set bits in a 32-bit integer.
+ */
+int popcnt( uint x )
+{
+    // Avoid branches, and the potential for cache misses which
+    // could be incurred with a table lookup.
+
+    // We need to mask alternate bits to prevent the
+    // sum from overflowing.
+    // add neighbouring bits. Each bit is 0 or 1.
+    x = x - ((x>>1) & 0x5555_5555);
+    // now each two bits of x is a number 00,01 or 10.
+    // now add neighbouring pairs
+    x = ((x&0xCCCC_CCCC)>>2) + (x&0x3333_3333);
+    // now each nibble holds 0000-0100. Adding them won't
+    // overflow any more, so we don't need to mask any more
+
+    // Now add the nibbles, then the bytes, then the words
+    // We still need to mask to prevent double-counting.
+    // Note that if we used a rotate instead of a shift, we
+    // wouldn't need the masks, and could just divide the sum
+    // by 8 to account for the double-counting.
+    // On some CPUs, it may be faster to perform a multiply.
+
+    x += (x>>4);
+    x &= 0x0F0F_0F0F;
+    x += (x>>8);
+    x &= 0x00FF_00FF;
+    x += (x>>16);
+    x &= 0xFFFF;
+    return x;
+}
+
+
+debug( UnitTest )
+{
+    unittest
+    {
+      assert( popcnt( 0 ) == 0 );
+      assert( popcnt( 7 ) == 3 );
+      assert( popcnt( 0xAA )== 4 );
+      assert( popcnt( 0x8421_1248 ) == 8 );
+      assert( popcnt( 0xFFFF_FFFF ) == 32 );
+      assert( popcnt( 0xCCCC_CCCC ) == 16 );
+      assert( popcnt( 0x7777_7777 ) == 24 );
+    }
+}
+
+
+/**
+ * Reverses the order of bits in a 32-bit integer.
+ */
+uint bitswap( uint x )
+{
+
+    version( D_InlineAsm_X86 )
+    {
+        asm
+        {
+            // Author: Tiago Gasiba.
+            mov EDX, EAX;
+            shr EAX, 1;
+            and EDX, 0x5555_5555;
+            and EAX, 0x5555_5555;
+            shl EDX, 1;
+            or  EAX, EDX;
+            mov EDX, EAX;
+            shr EAX, 2;
+            and EDX, 0x3333_3333;
+            and EAX, 0x3333_3333;
+            shl EDX, 2;
+            or  EAX, EDX;
+            mov EDX, EAX;
+            shr EAX, 4;
+            and EDX, 0x0f0f_0f0f;
+            and EAX, 0x0f0f_0f0f;
+            shl EDX, 4;
+            or  EAX, EDX;
+            bswap EAX;
+        }
+    }
+    else
+    {
+        // swap odd and even bits
+        x = ((x >> 1) & 0x5555_5555) | ((x & 0x5555_5555) << 1);
+        // swap consecutive pairs
+        x = ((x >> 2) & 0x3333_3333) | ((x & 0x3333_3333) << 2);
+        // swap nibbles
+        x = ((x >> 4) & 0x0F0F_0F0F) | ((x & 0x0F0F_0F0F) << 4);
+        // swap bytes
+        x = ((x >> 8) & 0x00FF_00FF) | ((x & 0x00FF_00FF) << 8);
+        // swap 2-byte long pairs
+        x = ( x >> 16              ) | ( x               << 16);
+        return x;
+
+    }
+}
+
+
+debug( UnitTest )
+{
+    unittest
+    {
+        assert( bitswap( 0x8000_0100 ) == 0x0080_0001 );
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/common/core/exception.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,276 @@
+/**
+ * The exception module defines all system-level exceptions and provides a
+ * mechanism to alter system-level error handling.
+ *
+ * Copyright: Copyright (c) 2005-2008, The D Runtime Project
+ * License:   BSD Style, see LICENSE
+ * Authors:   Sean Kelly
+ */
+module core.exception;
+
+
+private
+{
+    alias void  function( string file, size_t line, string msg = null ) assertHandlerType;
+
+    assertHandlerType   assertHandler   = null;
+}
+
+
+/**
+ * Thrown on an array bounds error.
+ */
+class ArrayBoundsException : Exception
+{
+    this( string file, size_t line )
+    {
+        super( "Array index out of bounds", file, line );
+    }
+}
+
+
+/**
+ * Thrown on an assert error.
+ */
+class AssertException : Exception
+{
+    this( string file, size_t line )
+    {
+        super( "Assertion failure", file, line );
+    }
+
+    this( string msg, string file, size_t line )
+    {
+        super( msg, file, line );
+    }
+}
+
+
+/**
+ * Thrown on finalize error.
+ */
+class FinalizeException : Exception
+{
+    ClassInfo   info;
+
+    this( ClassInfo c, Exception e = null )
+    {
+        super( "Finalization error", e );
+        info = c;
+    }
+
+    override string toString()
+    {
+        return "An exception was thrown while finalizing an instance of class " ~ info.name;
+    }
+}
+
+
+/**
+ * Thrown on hidden function error.
+ */
+class HiddenFuncException : Exception
+{
+    this( ClassInfo ci )
+    {
+        super( "Hidden method called for " ~ ci.name );
+    }
+}
+
+
+/**
+ * Thrown on an out of memory error.
+ */
+class OutOfMemoryException : Exception
+{
+    this( string file, size_t line )
+    {
+        super( "Memory allocation failed", file, line );
+    }
+
+    override string toString()
+    {
+        return msg ? super.toString() : "Memory allocation failed";
+    }
+}
+
+
+/**
+ * Thrown on a switch error.
+ */
+class SwitchException : Exception
+{
+    this( string file, size_t line )
+    {
+        super( "No appropriate switch clause found", file, line );
+    }
+}
+
+
+/**
+ * Thrown on a unicode conversion error.
+ */
+class UnicodeException : Exception
+{
+    size_t idx;
+
+    this( string msg, size_t idx )
+    {
+        super( msg );
+        this.idx = idx;
+    }
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Overrides
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * Overrides the default assert hander with a user-supplied version.
+ *
+ * Params:
+ *  h = The new assert handler.  Set to null to use the default handler.
+ */
+void setAssertHandler( assertHandlerType h )
+{
+    assertHandler = h;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Overridable Callbacks
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * A callback for assert errors in D.  The user-supplied assert handler will
+ * be called if one has been supplied, otherwise an AssertException will be
+ * thrown.
+ *
+ * Params:
+ *  file = The name of the file that signaled this error.
+ *  line = The line number on which this error occurred.
+ */
+extern (C) void onAssertError( string file, size_t line )
+{
+    if( assertHandler is null )
+        throw new AssertException( file, line );
+    assertHandler( file, line );
+}
+
+
+/**
+ * A callback for assert errors in D.  The user-supplied assert handler will
+ * be called if one has been supplied, otherwise an AssertException will be
+ * thrown.
+ *
+ * Params:
+ *  file = The name of the file that signaled this error.
+ *  line = The line number on which this error occurred.
+ *  msg  = An error message supplied by the user.
+ */
+extern (C) void onAssertErrorMsg( string file, size_t line, string msg )
+{
+    if( assertHandler is null )
+        throw new AssertException( msg, file, line );
+    assertHandler( file, line, msg );
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Internal Error Callbacks
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * A callback for array bounds errors in D.  An ArrayBoundsException will be
+ * thrown.
+ *
+ * Params:
+ *  file = The name of the file that signaled this error.
+ *  line = The line number on which this error occurred.
+ *
+ * Throws:
+ *  ArrayBoundsException.
+ */
+extern (C) void onArrayBoundsError( string file, size_t line )
+{
+    throw new ArrayBoundsException( file, line );
+}
+
+
+/**
+ * A callback for finalize errors in D.  A FinalizeException will be thrown.
+ *
+ * Params:
+ *  e = The exception thrown during finalization.
+ *
+ * Throws:
+ *  FinalizeException.
+ */
+extern (C) void onFinalizeError( ClassInfo info, Exception ex )
+{
+    throw new FinalizeException( info, ex );
+}
+
+
+/**
+ * A callback for hidden function errors in D.  A HiddenFuncException will be
+ * thrown.
+ *
+ * Throws:
+ *  HiddenFuncException.
+ */
+extern (C) void onHiddenFuncError( Object o )
+{
+    throw new HiddenFuncException( o.classinfo );
+}
+
+
+/**
+ * A callback for out of memory errors in D.  An OutOfMemoryException will be
+ * thrown.
+ *
+ * Throws:
+ *  OutOfMemoryException.
+ */
+extern (C) void onOutOfMemoryError()
+{
+    // NOTE: Since an out of memory condition exists, no allocation must occur
+    //       while generating this object.
+    throw cast(OutOfMemoryException) cast(void*) OutOfMemoryException.classinfo.init;
+}
+
+
+/**
+ * A callback for switch errors in D.  A SwitchException will be thrown.
+ *
+ * Params:
+ *  file = The name of the file that signaled this error.
+ *  line = The line number on which this error occurred.
+ *
+ * Throws:
+ *  SwitchException.
+ */
+extern (C) void onSwitchError( string file, size_t line )
+{
+    throw new SwitchException( file, line );
+}
+
+
+/**
+ * A callback for unicode errors in D.  A UnicodeException will be thrown.
+ *
+ * Params:
+ *  msg = Information about the error.
+ *  idx = String index where this error was detected.
+ *
+ * Throws:
+ *  UnicodeException.
+ */
+extern (C) void onUnicodeError( string msg, size_t idx )
+{
+    throw new UnicodeException( msg, idx );
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/common/core/memory.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,484 @@
+/**
+ * The memory module provides an interface to the garbage collector and to
+ * any other OS or API-level memory management facilities.
+ *
+ * Copyright: Copyright (c) 2005-2008, The D Runtime Project
+ * License:   BSD Style, see LICENSE
+ * Authors:   Sean Kelly
+ */
+module core.memory;
+
+
+private
+{
+    extern (C) void gc_init();
+    extern (C) void gc_term();
+
+    extern (C) void gc_enable();
+    extern (C) void gc_disable();
+    extern (C) void gc_collect();
+    extern (C) void gc_minimize();
+
+    extern (C) uint gc_getAttr( void* p );
+    extern (C) uint gc_setAttr( void* p, uint a );
+    extern (C) uint gc_clrAttr( void* p, uint a );
+
+    extern (C) void*  gc_malloc( size_t sz, uint ba = 0 );
+    extern (C) void*  gc_calloc( size_t sz, uint ba = 0 );
+    extern (C) void*  gc_realloc( void* p, size_t sz, uint ba = 0 );
+    extern (C) size_t gc_extend( void* p, size_t mx, size_t sz );
+    extern (C) size_t gc_reserve( size_t sz );
+    extern (C) void   gc_free( void* p );
+
+    extern (C) void*   gc_addrOf( void* p );
+    extern (C) size_t  gc_sizeOf( void* p );
+
+    struct BlkInfo_
+    {
+        void*  base;
+        size_t size;
+        uint   attr;
+    }
+
+    extern (C) BlkInfo_ gc_query( void* p );
+
+    extern (C) void gc_addRoot( void* p );
+    extern (C) void gc_addRange( void* p, size_t sz );
+
+    extern (C) void gc_removeRoot( void* p );
+    extern (C) void gc_removeRange( void* p );
+
+    extern (C) void* gc_getHandle();
+    extern (C) void gc_setHandle( void* p );
+    extern (C) void gc_endHandle();
+}
+
+
+/**
+ * This struct encapsulates all garbage collection functionality for the D
+ * programming language.
+ */
+struct GC
+{
+    /**
+     * Enables the garbage collector if collections have previously been
+     * suspended by a call to disable.  This function is reentrant, and
+     * must be called once for every call to disable before the garbage
+     * collector is enabled.
+     */
+    static void enable()
+    {
+        gc_enable();
+    }
+
+
+    /**
+     * Disables the garbage collector.  This function is reentrant, but
+     * enable must be called once for each call to disable.
+     */
+    static void disable()
+    {
+        gc_disable();
+    }
+
+
+    /**
+     * Begins a full collection.  While the meaning of this may change based
+     * on the garbage collector implementation, typical behavior is to scan
+     * all stack segments for roots, mark accessible memory blocks as alive,
+     * and then to reclaim free space.  This action may need to suspend all
+     * running threads for at least part of the collection process.
+     */
+    static void collect()
+    {
+        gc_collect();
+    }
+
+    /**
+     * Indicates that the managed memory space be minimized by returning free
+     * physical memory to the operating system.  The amount of free memory
+     * returned depends on the allocator design and on program behavior.
+     */
+    static void minimize()
+    {
+        gc_minimize();
+    }
+
+
+    /**
+     * Elements for a bit field representing memory block attributes.  These
+     * are manipulated via the getAttr, setAttr, clrAttr functions.
+     */
+    enum BlkAttr : uint
+    {
+        FINALIZE = 0b0000_0001, /// Finalize the data in this block on collect.
+        NO_SCAN  = 0b0000_0010, /// Do not scan through this block on collect.
+        NO_MOVE  = 0b0000_0100  /// Do not move this memory block on collect.
+    }
+
+
+    /**
+     * Contains aggregate information about a block of managed memory.  The
+     * purpose of this struct is to support a more efficient query style in
+     * instances where detailed information is needed.
+     *
+     * base = A pointer to the base of the block in question.
+     * size = The size of the block, calculated from base.
+     * attr = Attribute bits set on the memory block.
+     */
+    alias BlkInfo_ BlkInfo;
+
+
+    /**
+     * Returns a bit field representing all block attributes set for the memory
+     * referenced by p.  If p references memory not originally allocated by
+     * this garbage collector, points to the interior of a memory block, or if
+     * p is null, zero will be returned.
+     *
+     * Params:
+     *  p = A pointer to the root of a valid memory block or to null.
+     *
+     * Returns:
+     *  A bit field containing any bits set for the memory block referenced by
+     *  p or zero on error.
+     */
+    static uint getAttr( void* p )
+    {
+        return gc_getAttr( p );
+    }
+
+
+    /**
+     * Sets the specified bits for the memory references by p.  If p references
+     * memory not originally allocated by this garbage collector, points to the
+     * interior of a memory block, or if p is null, no action will be
+     * performed.
+     *
+     * Params:
+     *  p = A pointer to the root of a valid memory block or to null.
+     *  a = A bit field containing any bits to set for this memory block.
+     *
+     *  The result of a call to getAttr after the specified bits have been
+     *  set.
+     */
+    static uint setAttr( void* p, uint a )
+    {
+        return gc_setAttr( p, a );
+    }
+
+
+    /**
+     * Clears the specified bits for the memory references by p.  If p
+     * references memory not originally allocated by this garbage collector,
+     * points to the interior of a memory block, or if p is null, no action
+     * will be performed.
+     *
+     * Params:
+     *  p = A pointer to the root of a valid memory block or to null.
+     *  a = A bit field containing any bits to clear for this memory block.
+     *
+     * Returns:
+     *  The result of a call to getAttr after the specified bits have been
+     *  cleared.
+     */
+    static uint clrAttr( void* p, uint a )
+    {
+        return gc_clrAttr( p, a );
+    }
+
+
+    /**
+     * Requests an aligned block of managed memory from the garbage collector.
+     * This memory may be deleted at will with a call to free, or it may be
+     * discarded and cleaned up automatically during a collection run.  If
+     * allocation fails, this function will call onOutOfMemory which is
+     * expected to throw an OutOfMemoryException.
+     *
+     * Params:
+     *  sz = The desired allocation size in bytes.
+     *  ba = A bitmask of the attributes to set on this block.
+     *
+     * Returns:
+     *  A reference to the allocated memory or null if insufficient memory
+     *  is available.
+     *
+     * Throws:
+     *  OutOfMemoryException on allocation failure.
+     */
+    static void* malloc( size_t sz, uint ba = 0 )
+    {
+        return gc_malloc( sz, ba );
+    }
+
+
+    /**
+     * Requests an aligned block of managed memory from the garbage collector,
+     * which is initialized with all bits set to zero.  This memory may be
+     * deleted at will with a call to free, or it may be discarded and cleaned
+     * up automatically during a collection run.  If allocation fails, this
+     * function will call onOutOfMemory which is expected to throw an
+     * OutOfMemoryException.
+     *
+     * Params:
+     *  sz = The desired allocation size in bytes.
+     *  ba = A bitmask of the attributes to set on this block.
+     *
+     * Returns:
+     *  A reference to the allocated memory or null if insufficient memory
+     *  is available.
+     *
+     * Throws:
+     *  OutOfMemoryException on allocation failure.
+     */
+    static void* calloc( size_t sz, uint ba = 0 )
+    {
+        return gc_calloc( sz, ba );
+    }
+
+
+    /**
+     * If sz is zero, the memory referenced by p will be deallocated as if
+     * by a call to free.  A new memory block of size sz will then be
+     * allocated as if by a call to malloc, or the implementation may instead
+     * resize the memory block in place.  The contents of the new memory block
+     * will be the same as the contents of the old memory block, up to the
+     * lesser of the new and old sizes.  Note that existing memory will only
+     * be freed by realloc if sz is equal to zero.  The garbage collector is
+     * otherwise expected to later reclaim the memory block if it is unused.
+     * If allocation fails, this function will call onOutOfMemory which is
+     * expected to throw an OutOfMemoryException.  If p references memory not
+     * originally allocated by this garbage collector, or if it points to the
+     * interior of a memory block, no action will be taken.  If ba is zero
+     * (the default) and p references the head of a valid, known memory block
+     * then any bits set on the current block will be set on the new block if a
+     * reallocation is required.  If ba is not zero and p references the head
+     * of a valid, known memory block then the bits in ba will replace those on
+     * the current memory block and will also be set on the new block if a
+     * reallocation is required.
+     *
+     * Params:
+     *  p  = A pointer to the root of a valid memory block or to null.
+     *  sz = The desired allocation size in bytes.
+     *  ba = A bitmask of the attributes to set on this block.
+     *
+     * Returns:
+     *  A reference to the allocated memory on success or null if sz is
+     *  zero.  On failure, the original value of p is returned.
+     *
+     * Throws:
+     *  OutOfMemoryException on allocation failure.
+     */
+    static void* realloc( void* p, size_t sz, uint ba = 0 )
+    {
+        return gc_realloc( p, sz, ba );
+    }
+
+
+    /**
+     * Requests that the managed memory block referenced by p be extended in
+     * place by at least mx bytes, with a desired extension of sz bytes.  If an
+     * extension of the required size is not possible, if p references memory
+     * not originally allocated by this garbage collector, or if p points to
+     * the interior of a memory block, no action will be taken.
+     *
+     * Params:
+     *  mx = The minimum extension size in bytes.
+     *  sz = The  desired extension size in bytes.
+     *
+     * Returns:
+     *  The size in bytes of the extended memory block referenced by p or zero
+     *  if no extension occurred.
+     */
+    static size_t extend( void* p, size_t mx, size_t sz )
+    {
+        return gc_extend( p, mx, sz );
+    }
+
+
+    /**
+     * Requests that at least sz bytes of memory be obtained from the operating
+     * system and marked as free.
+     *
+     * Params:
+     *  sz = The desired size in bytes.
+     *
+     * Returns:
+     *  The actual number of bytes reserved or zero on error.
+     */
+    static size_t reserve( size_t sz )
+    {
+        return gc_reserve( sz );
+    }
+
+
+    /**
+     * Deallocates the memory referenced by p.  If p is null, no action
+     * occurs.  If p references memory not originally allocated by this
+     * garbage collector, or if it points to the interior of a memory block,
+     * no action will be taken.  The block will not be finalized regardless
+     * of whether the FINALIZE attribute is set.  If finalization is desired,
+     * use delete instead.
+     *
+     * Params:
+     *  p = A pointer to the root of a valid memory block or to null.
+     */
+    static void free( void* p )
+    {
+        gc_free( p );
+    }
+
+
+    /**
+     * Returns the base address of the memory block containing p.  This value
+     * is useful to determine whether p is an interior pointer, and the result
+     * may be passed to routines such as sizeOf which may otherwise fail.  If p
+     * references memory not originally allocated by this garbage collector, if
+     * p is null, or if the garbage collector does not support this operation,
+     * null will be returned.
+     *
+     * Params:
+     *  p = A pointer to the root or the interior of a valid memory block or to
+     *      null.
+     *
+     * Returns:
+     *  The base address of the memory block referenced by p or null on error.
+     */
+    static void* addrOf( void* p )
+    {
+        return gc_addrOf( p );
+    }
+
+
+    /**
+     * Returns the true size of the memory block referenced by p.  This value
+     * represents the maximum number of bytes for which a call to realloc may
+     * resize the existing block in place.  If p references memory not
+     * originally allocated by this garbage collector, points to the interior
+     * of a memory block, or if p is null, zero will be returned.
+     *
+     * Params:
+     *  p = A pointer to the root of a valid memory block or to null.
+     *
+     * Returns:
+     *  The size in bytes of the memory block referenced by p or zero on error.
+     */
+    static size_t sizeOf( void* p )
+    {
+        return gc_sizeOf( p );
+    }
+
+
+    /**
+     * Returns aggregate information about the memory block containing p.  If p
+     * references memory not originally allocated by this garbage collector, if
+     * p is null, or if the garbage collector does not support this operation,
+     * BlkInfo.init will be returned.  Typically, support for this operation
+     * is dependent on support for addrOf.
+     *
+     * Params:
+     *  p = A pointer to the root or the interior of a valid memory block or to
+     *      null.
+     *
+     * Returns:
+     *  Information regarding the memory block referenced by p or BlkInfo.init
+     *  on error.
+     */
+    static BlkInfo query( void* p )
+    {
+        return gc_query( p );
+    }
+
+
+    /**
+     * Adds the memory address referenced by p to an internal list of roots to
+     * be scanned during a collection.  If p is null, no operation is
+     * performed.
+     *
+     * Params:
+     *  p = A pointer to a valid memory address or to null.
+     */
+    static void addRoot( void* p )
+    {
+        gc_addRoot( p );
+    }
+
+
+    /**
+     * Adds the memory block referenced by p and of size sz to an internal list
+     * of ranges to be scanned during a collection.  If p is null, no operation
+     * is performed.
+     *
+     * Params:
+     *  p  = A pointer to a valid memory address or to null.
+     *  sz = The size in bytes of the block to add.  If sz is zero then the
+     *       no operation will occur.  If p is null then sz must be zero.
+     */
+    static void addRange( void* p, size_t sz )
+    {
+        gc_addRange( p, sz );
+    }
+
+
+    /**
+     * Removes the memory block referenced by p from an internal list of roots
+     * to be scanned during a collection.  If p is null or does not represent
+     * a value previously passed to add(void*) then no operation is performed.
+     *
+     *  p  = A pointer to a valid memory address or to null.
+     */
+    static void removeRoot( void* p )
+    {
+        gc_removeRoot( p );
+    }
+
+
+    /**
+     * Removes the memory block referenced by p from an internal list of ranges
+     * to be scanned during a collection.  If p is null or does not represent
+     * a value previously passed to add(void*, size_t) then no operation is
+     * performed.
+     *
+     * Params:
+     *  p  = A pointer to a valid memory address or to null.
+     */
+    static void removeRange( void* p )
+    {
+        gc_removeRange( p );
+    }
+
+    /**
+     * Get handle to the collector.
+     * The only thing that can be done with this handle is pass it to
+     * setHandle(). getHandle/setHandle/endHandle work together so that
+     * if there are multiple instances of the gc running, only one instance
+     * can be set to run.
+     * The most common case of this is under Windows where a D application
+     * calls functions in a DLL that is implemented in D.
+     */
+
+    static void* getHandle()
+    {
+	return gc_getHandle();
+    }
+
+    /**
+     * Set handle to the collector.
+     * The handle p is an opaque handle, acquired by a call to
+     * getHandle().
+     */
+
+    static void setHandle(void* p)
+    {
+	gc_setHandle(p);
+    }
+
+    /**
+     * Call when done using the collector specified by the
+     * call to setHandle().
+     */
+
+    static void endHandle()
+    {
+	gc_endHandle();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/common/core/runtime.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,171 @@
+/**
+ * The runtime module exposes information specific to the D runtime code.
+ *
+ * Copyright: Copyright (c) 2005-2008, The D Runtime Project
+ * License:   BSD Style, see LICENSE
+ * Authors:   Sean Kelly
+ */
+module core.runtime;
+
+
+private
+{
+    extern (C) bool rt_isHalting();
+
+    alias bool function() ModuleUnitTester;
+    alias bool function(Object) CollectHandler;
+    alias Exception.TraceInfo function( void* ptr = null ) TraceHandler;
+
+    extern (C) void rt_setCollectHandler( CollectHandler h );
+    extern (C) void rt_setTraceHandler( TraceHandler h );
+
+    alias void delegate( Exception ) ExceptionHandler;
+    extern (C) bool rt_init( ExceptionHandler dg = null );
+    extern (C) bool rt_term( ExceptionHandler dg = null );
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Runtime
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * This struct encapsulates all functionality related to the underlying runtime
+ * module for the calling context.
+ */
+struct Runtime
+{
+    /**
+     * Initializes the runtime.  This call is to be used in instances where the
+     * standard program initialization process is not executed.  This is most
+     * often in shared libraries or in libraries linked to a C program.
+     *
+     * Params:
+     *  dg = A delegate which will receive any exception thrown during the
+     *       initialization process or null if such exceptions should be
+     *       discarded.
+     *
+     * Returns:
+     *  true if initialization succeeds and false if initialization fails.
+     */
+    static bool initialize( void delegate( Exception ) dg = null )
+    {
+        return rt_init( dg );
+    }
+
+
+    /**
+     * Terminates the runtime.  This call is to be used in instances where the
+     * standard program termination process will not be not executed.  This is
+     * most often in shared libraries or in libraries linked to a C program.
+     *
+     * Params:
+     *  dg = A delegate which will receive any exception thrown during the
+     *       termination process or null if such exceptions should be
+     *       discarded.
+     *
+     * Returns:
+     *  true if termination succeeds and false if termination fails.
+     */
+    static bool terminate( void delegate( Exception ) dg = null )
+    {
+        return rt_term( dg );
+    }
+
+
+    /**
+     * Returns true if the runtime is halting.  Under normal circumstances,
+     * this will be set between the time that normal application code has
+     * exited and before module dtors are called.
+     *
+     * Returns:
+     *  true if the runtime is halting.
+     */
+    static bool isHalting()
+    {
+        return rt_isHalting();
+    }
+
+
+    /**
+     * Overrides the default trace mechanism with s user-supplied version.  A
+     * trace represents the context from which an exception was thrown, and the
+     * trace handler will be called when this occurs.  The pointer supplied to
+     * this routine indicates the base address from which tracing should occur.
+     * If the supplied pointer is null then the trace routine should determine
+     * an appropriate calling context from which to begin the trace.
+     *
+     * Params:
+     *  h = The new trace handler.  Set to null to use the default handler.
+     */
+    static void traceHandler( TraceHandler h )
+    {
+        rt_setTraceHandler( h );
+    }
+
+
+    /**
+     * Overrides the default collect hander with a user-supplied version.  This
+     * routine will be called for each resource object that is finalized in a
+     * non-deterministic manner--typically during a garbage collection cycle.
+     * If the supplied routine returns true then the object's dtor will called
+     * as normal, but if the routine returns false than the dtor will not be
+     * called.  The default behavior is for all object dtors to be called.
+     *
+     * Params:
+     *  h = The new collect handler.  Set to null to use the default handler.
+     */
+    static void collectHandler( CollectHandler h )
+    {
+        rt_setCollectHandler( h );
+    }
+
+
+    /**
+     * Overrides the default module unit tester with a user-supplied version.
+     * This routine will be called once on program initialization.  The return
+     * value of this routine indicates to the runtime whether the body of the
+     * program will be executed.
+     *
+     * Params:
+     *  h = The new unit tester.  Set to null to use the default unit tester.
+     */
+    static void moduleUnitTester( ModuleUnitTester h )
+    {
+        sm_moduleUnitTester = h;
+    }
+
+
+private:
+    static ModuleUnitTester sm_moduleUnitTester = null;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Overridable Callbacks
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * This routine is called by the runtime to run module unit tests on startup.
+ * The user-supplied unit tester will be called if one has been supplied,
+ * otherwise all unit tests will be run in sequence.
+ *
+ * Returns:
+ *  true if execution should continue after testing is complete and false if
+ *  not.  Default behavior is to return true.
+ */
+extern (C) bool runModuleUnitTests()
+{
+    if( Runtime.sm_moduleUnitTester is null )
+    {
+        foreach( m; ModuleInfo )
+        {
+            if( m.unitTest )
+                m.unitTest();
+        }
+        return true;
+    }
+    return Runtime.sm_moduleUnitTester();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/common/core/thread.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,3322 @@
+/**
+ * The thread module provides support for thread creation and management.
+ *
+ * Copyright: Copyright (c) 2005-2008, The D Runtime Project
+ * License:   BSD Style, see LICENSE
+ * Authors:   Sean Kelly
+ */
+module core.thread;
+
+
+// this should be true for most architectures
+version = StackGrowsDown;
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Thread and Fiber Exceptions
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * Base class for thread exceptions.
+ */
+class ThreadException : Exception
+{
+    this( string msg )
+    {
+        super( msg );
+    }
+}
+
+
+/**
+ * Base class for fiber exceptions.
+ */
+class FiberException : Exception
+{
+    this( string msg )
+    {
+        super( msg );
+    }
+}
+
+
+private
+{
+    //
+    // exposed by compiler runtime
+    //
+    extern (C) void* rt_stackBottom();
+    extern (C) void* rt_stackTop();
+
+
+    void* getStackBottom()
+    {
+        return rt_stackBottom();
+    }
+
+
+    void* getStackTop()
+    {
+        version( D_InlineAsm_X86 )
+        {
+            asm
+            {
+                naked;
+                mov EAX, ESP;
+                ret;
+            }
+        }
+        else
+        {
+            return rt_stackTop();
+        }
+    }
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Thread Entry Point and Signal Handlers
+///////////////////////////////////////////////////////////////////////////////
+
+
+version( Windows )
+{
+    private
+    {
+        import stdc.stdint : uintptr_t; // for _beginthreadex decl below
+        import sys.windows.windows;
+
+        const DWORD TLS_OUT_OF_INDEXES  = 0xFFFFFFFF;
+
+        extern (Windows) alias uint function(void*) btex_fptr;
+        extern (C) uintptr_t _beginthreadex(void*, uint, btex_fptr, void*, uint, uint*);
+
+
+        //
+        // entry point for Windows threads
+        //
+        extern (Windows) uint thread_entryPoint( void* arg )
+        {
+            Thread  obj = cast(Thread) arg;
+            assert( obj );
+            scope( exit ) Thread.remove( obj );
+
+            assert( obj.m_curr is &obj.m_main );
+            obj.m_main.bstack = getStackBottom();
+            obj.m_main.tstack = obj.m_main.bstack;
+            Thread.add( &obj.m_main );
+            Thread.setThis( obj );
+
+            // NOTE: No GC allocations may occur until the stack pointers have
+            //       been set and Thread.getThis returns a valid reference to
+            //       this thread object (this latter condition is not strictly
+            //       necessary on Win32 but it should be followed for the sake
+            //       of consistency).
+
+            // TODO: Consider putting an auto exception object here (using
+            //       alloca) forOutOfMemoryError plus something to track
+            //       whether an exception is in-flight?
+
+            try
+            {
+                obj.run();
+            }
+            catch( Object o )
+            {
+                obj.m_unhandled = o;
+            }
+            return 0;
+        }
+
+
+        //
+        // copy of the same-named function in phobos.std.thread--it uses the
+        // Windows naming convention to be consistent with GetCurrentThreadId
+        //
+        HANDLE GetCurrentThreadHandle()
+        {
+            const uint DUPLICATE_SAME_ACCESS = 0x00000002;
+
+            HANDLE curr = GetCurrentThread(),
+                   proc = GetCurrentProcess(),
+                   hndl;
+
+            DuplicateHandle( proc, curr, proc, &hndl, 0, TRUE, DUPLICATE_SAME_ACCESS );
+            return hndl;
+        }
+    }
+}
+else version( Posix )
+{
+    private
+    {
+        import stdc.posix.semaphore;
+        import stdc.posix.pthread;
+        import stdc.posix.signal;
+        import stdc.posix.time;
+        import stdc.errno;
+
+        extern (C) int getErrno();
+        version( GNU )
+        {
+            import gcc.builtins;
+        }
+
+
+        //
+        // entry point for POSIX threads
+        //
+        extern (C) void* thread_entryPoint( void* arg )
+        {
+            Thread  obj = cast(Thread) arg;
+            assert( obj );
+            scope( exit )
+            {
+                // NOTE: isRunning should be set to false after the thread is
+                //       removed or a double-removal could occur between this
+                //       function and thread_suspendAll.
+                Thread.remove( obj );
+                obj.m_isRunning = false;
+            }
+
+            static extern (C) void thread_cleanupHandler( void* arg )
+            {
+                Thread  obj = cast(Thread) arg;
+                assert( obj );
+
+                // NOTE: If the thread terminated abnormally, just set it as
+                //       not running and let thread_suspendAll remove it from
+                //       the thread list.  This is safer and is consistent
+                //       with the Windows thread code.
+                obj.m_isRunning = false;
+            }
+
+            // NOTE: Using void to skip the initialization here relies on
+            //       knowledge of how pthread_cleanup is implemented.  It may
+            //       not be appropriate for all platforms.  However, it does
+            //       avoid the need to link the pthread module.  If any
+            //       implementation actually requires default initialization
+            //       then pthread_cleanup should be restructured to maintain
+            //       the current lack of a link dependency.
+            pthread_cleanup cleanup = void;
+            cleanup.push( &thread_cleanupHandler, cast(void*) obj );
+
+            // NOTE: For some reason this does not always work for threads.
+            //obj.m_main.bstack = getStackBottom();
+            version( D_InlineAsm_X86 )
+            {
+                static void* getBasePtr()
+                {
+                    asm
+                    {
+                        naked;
+                        mov EAX, EBP;
+                        ret;
+                    }
+                }
+
+                obj.m_main.bstack = getBasePtr();
+            }
+            else version( StackGrowsDown )
+                obj.m_main.bstack = &obj + 1;
+            else
+                obj.m_main.bstack = &obj;
+            obj.m_main.tstack = obj.m_main.bstack;
+            assert( obj.m_curr == &obj.m_main );
+            Thread.add( &obj.m_main );
+            Thread.setThis( obj );
+
+            // NOTE: No GC allocations may occur until the stack pointers have
+            //       been set and Thread.getThis returns a valid reference to
+            //       this thread object (this latter condition is not strictly
+            //       necessary on Win32 but it should be followed for the sake
+            //       of consistency).
+
+            // TODO: Consider putting an auto exception object here (using
+            //       alloca) forOutOfMemoryError plus something to track
+            //       whether an exception is in-flight?
+
+            try
+            {
+                obj.run();
+            }
+            catch( Object o )
+            {
+                obj.m_unhandled = o;
+            }
+            return null;
+        }
+
+
+        //
+        // used to track the number of suspended threads
+        //
+        sem_t   suspendCount;
+
+
+        extern (C) void thread_suspendHandler( int sig )
+        in
+        {
+            assert( sig == SIGUSR1 );
+        }
+        body
+        {
+            version( LDC )
+            {
+                version(X86)
+                {
+                    uint eax,ecx,edx,ebx,ebp,esi,edi;
+                    asm
+                    {
+                        mov eax[EBP], EAX ;
+                        mov ecx[EBP], ECX ;
+                        mov edx[EBP], EDX ;
+                        mov ebx[EBP], EBX ;
+                        mov ebp[EBP], EBP ;
+                        mov esi[EBP], ESI ;
+                        mov edi[EBP], EDI ;
+                    }
+                }
+                else version (X86_64)
+                {
+                    ulong rax,rbx,rcx,rdx,rbp,rsi,rdi,rsp,r10,r11,r12,r13,r14,r15;
+                    asm
+                    {
+                        movq rax[RBP], RAX ;
+                        movq rbx[RBP], RBX ;
+                        movq rcx[RBP], RCX ;
+                        movq rdx[RBP], RDX ;
+                        movq rbp[RBP], RBP ;
+                        movq rsi[RBP], RSI ;
+                        movq rdi[RBP], RDI ;
+                        movq rsp[RBP], RSP ;
+                        movq r10[RBP], R10 ;
+                        movq r11[RBP], R11 ;
+                        movq r12[RBP], R12 ;
+                        movq r13[RBP], R13 ;
+                        movq r14[RBP], R14 ;
+                        movq r15[RBP], R15 ;
+                    }
+                }
+                else
+                {
+                    static assert( false, "Architecture not supported." );
+                }
+            }
+            else version( D_InlineAsm_X86 )
+            {
+                asm
+                {
+                    pushad;
+                }
+            }
+            else version( GNU )
+            {
+                __builtin_unwind_init();
+            }
+            else
+            {
+                static assert( false, "Architecture not supported." );
+            }
+
+            // NOTE: Since registers are being pushed and popped from the
+            //       stack, any other stack data used by this function should
+            //       be gone before the stack cleanup code is called below.
+            {
+                Thread  obj = Thread.getThis();
+
+                // NOTE: The thread reference returned by getThis is set within
+                //       the thread startup code, so it is possible that this
+                //       handler may be called before the reference is set.  In
+                //       this case it is safe to simply suspend and not worry
+                //       about the stack pointers as the thread will not have
+                //       any references to GC-managed data.
+                if( obj && !obj.m_lock )
+                {
+                    obj.m_curr.tstack = getStackTop();
+                }
+
+                sigset_t    sigres = void;
+                int         status;
+
+                status = sigfillset( &sigres );
+                assert( status == 0 );
+
+                status = sigdelset( &sigres, SIGUSR2 );
+                assert( status == 0 );
+
+                status = sem_post( &suspendCount );
+                assert( status == 0 );
+
+                sigsuspend( &sigres );
+
+                if( obj && !obj.m_lock )
+                {
+                    obj.m_curr.tstack = obj.m_curr.bstack;
+                }
+            }
+
+            version( LDC )
+            {
+                // nothing to pop
+            }
+            else version( D_InlineAsm_X86 )
+            {
+                asm
+                {
+                    popad;
+                }
+            }
+            else version( GNU )
+            {
+                // registers will be popped automatically
+            }
+            else
+            {
+                static assert( false, "Architecture not supported." );
+            }
+        }
+
+
+        extern (C) void thread_resumeHandler( int sig )
+        in
+        {
+            assert( sig == SIGUSR2 );
+        }
+        body
+        {
+
+        }
+    }
+}
+else
+{
+    // NOTE: This is the only place threading versions are checked.  If a new
+    //       version is added, the module code will need to be searched for
+    //       places where version-specific code may be required.  This can be
+    //       easily accomlished by searching for 'Windows' or 'Posix'.
+    static assert( false, "Unknown threading implementation." );
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Thread
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * This class encapsulates all threading functionality for the D
+ * programming language.  As thread manipulation is a required facility
+ * for garbage collection, all user threads should derive from this
+ * class, and instances of this class should never be explicitly deleted.
+ * A new thread may be created using either derivation or composition, as
+ * in the following example.
+ *
+ * Example:
+ * ----------------------------------------------------------------------------
+ *
+ * class DerivedThread : Thread
+ * {
+ *     this()
+ *     {
+ *         super( &run );
+ *     }
+ *
+ * private :
+ *     void run()
+ *     {
+ *         printf( "Derived thread running.\n" );
+ *     }
+ * }
+ *
+ * void threadFunc()
+ * {
+ *     printf( "Composed thread running.\n" );
+ * }
+ *
+ * // create instances of each type
+ * Thread derived = new DerivedThread();
+ * Thread composed = new Thread( &threadFunc );
+ *
+ * // start both threads
+ * derived.start();
+ * composed.start();
+ *
+ * ----------------------------------------------------------------------------
+ */
+class Thread
+{
+    ///////////////////////////////////////////////////////////////////////////
+    // Initialization
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Initializes a thread object which is associated with a static
+     * D function.
+     *
+     * Params:
+     *  fn = The thread function.
+     *  sz = The stack size for this thread.
+     *
+     * In:
+     *  fn must not be null.
+     */
+    this( void function() fn, size_t sz = 0 )
+    in
+    {
+        assert( fn );
+    }
+    body
+    {
+        m_fn   = fn;
+        m_sz   = sz;
+        m_call = Call.FN;
+        m_curr = &m_main;
+    }
+
+
+    /**
+     * Initializes a thread object which is associated with a dynamic
+     * D function.
+     *
+     * Params:
+     *  dg = The thread function.
+     *  sz = The stack size for this thread.
+     *
+     * In:
+     *  dg must not be null.
+     */
+    this( void delegate() dg, size_t sz = 0 )
+    in
+    {
+        assert( dg );
+    }
+    body
+    {
+        m_dg   = dg;
+        m_sz   = sz;
+        m_call = Call.DG;
+        m_curr = &m_main;
+    }
+
+
+    /**
+     * Cleans up any remaining resources used by this object.
+     */
+    ~this()
+    {
+        if( m_addr == m_addr.init )
+        {
+            return;
+        }
+
+        version( Win32 )
+        {
+            m_addr = m_addr.init;
+            CloseHandle( m_hndl );
+            m_hndl = m_hndl.init;
+        }
+        else version( Posix )
+        {
+            pthread_detach( m_addr );
+            m_addr = m_addr.init;
+        }
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // General Actions
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Starts the thread and invokes the function or delegate passed upon
+     * construction.
+     *
+     * In:
+     *  This routine may only be called once per thread instance.
+     *
+     * Throws:
+     *  ThreadException if the thread fails to start.
+     */
+    final void start()
+    in
+    {
+        assert( !next && !prev );
+    }
+    body
+    {
+        version( Win32 ) {} else
+        version( Posix )
+        {
+            pthread_attr_t  attr;
+
+            if( pthread_attr_init( &attr ) )
+                throw new ThreadException( "Error initializing thread attributes" );
+            if( m_sz && pthread_attr_setstacksize( &attr, m_sz ) )
+                throw new ThreadException( "Error initializing thread stack size" );
+            if( pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ) )
+                throw new ThreadException( "Error setting thread joinable" );
+        }
+
+        // NOTE: This operation needs to be synchronized to avoid a race
+        //       condition with the GC.  Without this lock, the thread
+        //       could start and allocate memory before being added to
+        //       the global thread list, preventing it from being scanned
+        //       and causing memory to be collected that is still in use.
+        synchronized( slock )
+        {
+            version( Win32 )
+            {
+                m_hndl = cast(HANDLE) _beginthreadex( null, m_sz, &thread_entryPoint, cast(void*) this, 0, &m_addr );
+                if( cast(size_t) m_hndl == 0 )
+                    throw new ThreadException( "Error creating thread" );
+            }
+            else version( Posix )
+            {
+                m_isRunning = true;
+                scope( failure ) m_isRunning = false;
+
+                if( pthread_create( &m_addr, &attr, &thread_entryPoint, cast(void*) this ) != 0 )
+                    throw new ThreadException( "Error creating thread" );
+            }
+            multiThreadedFlag = true;
+            add( this );
+        }
+    }
+
+
+    /**
+     * Waits for this thread to complete.  If the thread terminated as the
+     * result of an unhandled exception, this exception will be rethrown.
+     *
+     * Params:
+     *  rethrow = Rethrow any unhandled exception which may have caused this
+     *            thread to terminate.
+     *
+     * Throws:
+     *  ThreadException if the operation fails.
+     *  Any exception not handled by the joined thread.
+     *
+     * Returns:
+     *  Any exception not handled by this thread if rethrow = false, null
+     *  otherwise.
+     */
+    final Object join( bool rethrow = true )
+    {
+        version( Win32 )
+        {
+            if( WaitForSingleObject( m_hndl, INFINITE ) != WAIT_OBJECT_0 )
+                throw new ThreadException( "Unable to join thread" );
+            // NOTE: m_addr must be cleared before m_hndl is closed to avoid
+            //       a race condition with isRunning.  The operation is labeled
+            //       volatile to prevent compiler reordering.
+            volatile m_addr = m_addr.init;
+            CloseHandle( m_hndl );
+            m_hndl = m_hndl.init;
+        }
+        else version( Posix )
+        {
+            if( pthread_join( m_addr, null ) != 0 )
+                throw new ThreadException( "Unable to join thread" );
+            // NOTE: pthread_join acts as a substitute for pthread_detach,
+            //       which is normally called by the dtor.  Setting m_addr
+            //       to zero ensures that pthread_detach will not be called
+            //       on object destruction.
+            volatile m_addr = m_addr.init;
+        }
+        if( m_unhandled )
+        {
+            if( rethrow )
+                throw m_unhandled;
+            return m_unhandled;
+        }
+        return null;
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // General Properties
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Gets the user-readable label for this thread.
+     *
+     * Returns:
+     *  The name of this thread.
+     */
+    final char[] name()
+    {
+        synchronized( this )
+        {
+            return m_name;
+        }
+    }
+
+
+    /**
+     * Sets the user-readable label for this thread.
+     *
+     * Params:
+     *  val = The new name of this thread.
+     */
+    final void name( char[] val )
+    {
+        synchronized( this )
+        {
+            m_name = val.dup;
+        }
+    }
+
+
+    /**
+     * Gets the daemon status for this thread.  While the runtime will wait for
+     * all normal threads to complete before tearing down the process, daemon
+     * threads are effectively ignored and thus will not prevent the process
+     * from terminating.  In effect, daemon threads will be terminated
+     * automatically by the OS when the process exits.
+     *
+     * Returns:
+     *  true if this is a daemon thread.
+     */
+    final bool isDaemon()
+    {
+        synchronized( this )
+        {
+            return m_isDaemon;
+        }
+    }
+
+
+    /**
+     * Sets the daemon status for this thread.  While the runtime will wait for
+     * all normal threads to complete before tearing down the process, daemon
+     * threads are effectively ignored and thus will not prevent the process
+     * from terminating.  In effect, daemon threads will be terminated
+     * automatically by the OS when the process exits.
+     *
+     * Params:
+     *  val = The new daemon status for this thread.
+     */
+    final void isDaemon( bool val )
+    {
+        synchronized( this )
+        {
+            m_isDaemon = val;
+        }
+    }
+
+
+    /**
+     * Tests whether this thread is running.
+     *
+     * Returns:
+     *  true if the thread is running, false if not.
+     */
+    final bool isRunning()
+    {
+        if( m_addr == m_addr.init )
+        {
+            return false;
+        }
+
+        version( Win32 )
+        {
+            uint ecode = 0;
+            GetExitCodeThread( m_hndl, &ecode );
+            return ecode == STILL_ACTIVE;
+        }
+        else version( Posix )
+        {
+            // NOTE: It should be safe to access this value without
+            //       memory barriers because word-tearing and such
+            //       really isn't an issue for boolean values.
+            return m_isRunning;
+        }
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Thread Priority Actions
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * The minimum scheduling priority that may be set for a thread.  On
+     * systems where multiple scheduling policies are defined, this value
+     * represents the minimum valid priority for the scheduling policy of
+     * the process.
+     */
+    static const int PRIORITY_MIN;
+
+
+    /**
+     * The maximum scheduling priority that may be set for a thread.  On
+     * systems where multiple scheduling policies are defined, this value
+     * represents the minimum valid priority for the scheduling policy of
+     * the process.
+     */
+    static const int PRIORITY_MAX;
+
+
+    /**
+     * Gets the scheduling priority for the associated thread.
+     *
+     * Returns:
+     *  The scheduling priority of this thread.
+     */
+    final int priority()
+    {
+        version( Win32 )
+        {
+            return GetThreadPriority( m_hndl );
+        }
+        else version( Posix )
+        {
+            int         policy;
+            sched_param param;
+
+            if( pthread_getschedparam( m_addr, &policy, &param ) )
+                throw new ThreadException( "Unable to get thread priority" );
+            return param.sched_priority;
+        }
+    }
+
+
+    /**
+     * Sets the scheduling priority for the associated thread.
+     *
+     * Params:
+     *  val = The new scheduling priority of this thread.
+     */
+    final void priority( int val )
+    {
+        version( Win32 )
+        {
+            if( !SetThreadPriority( m_hndl, val ) )
+                throw new ThreadException( "Unable to set thread priority" );
+        }
+        else version( Posix )
+        {
+            // NOTE: pthread_setschedprio is not implemented on linux, so use
+            //       the more complicated get/set sequence below.
+            //if( pthread_setschedprio( m_addr, val ) )
+            //    throw new ThreadException( "Unable to set thread priority" );
+
+            int         policy;
+            sched_param param;
+
+            if( pthread_getschedparam( m_addr, &policy, &param ) )
+                throw new ThreadException( "Unable to set thread priority" );
+            param.sched_priority = val;
+            if( pthread_setschedparam( m_addr, policy, &param ) )
+                throw new ThreadException( "Unable to set thread priority" );
+        }
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Actions on Calling Thread
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Suspends the calling thread for at least the supplied period.  This may
+     * result in multiple OS calls if period is greater than the maximum sleep
+     * duration supported by the operating system.
+     *
+     * Params:
+     *  period = The minimum duration the calling thread should be suspended,
+     *           in 100 nanosecond intervals.
+     *
+     * In:
+     *  period must be non-negative.
+     *
+     * Example:
+     * ------------------------------------------------------------------------
+     *
+     * Thread.sleep( 500 );        // sleep for 50 milliseconds
+     * Thread.sleep( 50_000_000 ); // sleep for 5 seconds
+     *
+     * ------------------------------------------------------------------------
+     */
+    static void sleep( long period )
+    in
+    {
+        assert( period >= 0 );
+    }
+    body
+    {
+        version( Win32 )
+        {
+            enum : uint
+            {
+                TICKS_PER_MILLI  = 10_000,
+                MAX_SLEEP_MILLIS = uint.max - 1
+            }
+
+            period = period < TICKS_PER_MILLI ?
+                        1 :
+                        period / TICKS_PER_MILLI;
+            while( period > MAX_SLEEP_MILLIS )
+            {
+                Sleep( MAX_SLEEP_MILLIS );
+                period -= MAX_SLEEP_MILLIS;
+            }
+            Sleep( cast(uint) period );
+        }
+        else version( Posix )
+        {
+            timespec tin  = void;
+            timespec tout = void;
+
+            enum : uint
+            {
+                NANOS_PER_TICK   = 100,
+                TICKS_PER_SECOND = 10_000_000,
+            }
+            enum : typeof(period)
+            {
+                MAX_SLEEP_TICKS = cast(typeof(period)) tin.tv_sec.max * TICKS_PER_SECOND
+            }
+
+            do
+            {
+                if( period > MAX_SLEEP_TICKS )
+                {
+                    tin.tv_sec = tin.tv_sec.max;
+                    tin.tv_nsec = 0;
+                }
+                else
+                {
+                    tin.tv_sec = cast(typeof(tin.tv_sec)) (period / TICKS_PER_SECOND);
+                    tin.tv_nsec = cast(typeof(tin.tv_nsec)) (period % TICKS_PER_SECOND) * NANOS_PER_TICK;
+                }
+                while( true )
+                {
+                    if( !nanosleep( &tin, &tout ) )
+                        return;
+                    if( getErrno() != EINTR )
+                        throw new ThreadException( "Unable to sleep for the specified duration" );
+                    tin = tout;
+                }
+                period -= (cast(typeof(period)) tin.tv_sec) * TICKS_PER_SECOND;
+                period -= (cast(typeof(period)) tin.tv_nsec) / NANOS_PER_TICK;
+            } while( period > 0 );
+        }
+    }
+
+
+    /**
+     * Forces a context switch to occur away from the calling thread.
+     */
+    static void yield()
+    {
+        version( Win32 )
+        {
+            // NOTE: Sleep(1) is necessary because Sleep(0) does not give
+            //       lower priority threads any timeslice, so looping on
+            //       Sleep(0) could be resource-intensive in some cases.
+            Sleep( 1 );
+        }
+        else version( Posix )
+        {
+            sched_yield();
+        }
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Thread Accessors
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Provides a reference to the calling thread.
+     *
+     * Returns:
+     *  The thread object representing the calling thread.  The result of
+     *  deleting this object is undefined.
+     */
+    static Thread getThis()
+    {
+        // NOTE: This function may not be called until thread_init has
+        //       completed.  See thread_suspendAll for more information
+        //       on why this might occur.
+        version( Win32 )
+        {
+            return cast(Thread) TlsGetValue( sm_this );
+        }
+        else version( Posix )
+        {
+            return cast(Thread) pthread_getspecific( sm_this );
+        }
+    }
+
+
+    /**
+     * Provides a list of all threads currently being tracked by the system.
+     *
+     * Returns:
+     *  An array containing references to all threads currently being
+     *  tracked by the system.  The result of deleting any contained
+     *  objects is undefined.
+     */
+    static Thread[] getAll()
+    {
+        synchronized( slock )
+        {
+            size_t   pos = 0;
+            Thread[] buf = new Thread[sm_tlen];
+
+            foreach( Thread t; Thread )
+            {
+                buf[pos++] = t;
+            }
+            return buf;
+        }
+    }
+
+
+    /**
+     * Operates on all threads currently being tracked by the system.  The
+     * result of deleting any Thread object is undefined.
+     *
+     * Params:
+     *  dg = The supplied code as a delegate.
+     *
+     * Returns:
+     *  Zero if all elemented are visited, nonzero if not.
+     */
+    static int opApply( int delegate( inout Thread ) dg )
+    {
+        synchronized( slock )
+        {
+            int ret = 0;
+
+            for( Thread t = sm_tbeg; t; t = t.next )
+            {
+                ret = dg( t );
+                if( ret )
+                    break;
+            }
+            return ret;
+        }
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Local Storage Actions
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Indicates the number of local storage pointers available at program
+     * startup.  It is recommended that this number be at least 64.
+     */
+    static const uint LOCAL_MAX = 64;
+
+
+    /**
+     * Reserves a local storage pointer for use and initializes this location
+     * to null for all running threads.
+     *
+     * Returns:
+     *  A key representing the array offset of this memory location.
+     */
+    static uint createLocal()
+    {
+        synchronized( slock )
+        {
+            foreach( uint key, inout bool set; sm_local )
+            {
+                if( !set )
+                {
+                    //foreach( Thread t; sm_tbeg ) Bug in GDC 0.24 SVN (r139)
+                    for( Thread t = sm_tbeg; t; t = t.next )
+                    {
+                        t.m_local[key] = null;
+                    }
+                    set = true;
+                    return key;
+                }
+            }
+            throw new ThreadException( "No more local storage slots available" );
+        }
+    }
+
+
+    /**
+     * Marks the supplied key as available and sets the associated location
+     * to null for all running threads.  It is assumed that any key passed
+     * to this function is valid.  The result of calling this function for
+     * a key which is still in use is undefined.
+     *
+     * Params:
+     *  key = The key to delete.
+     */
+    static void deleteLocal( uint key )
+    {
+        synchronized( slock )
+        {
+            sm_local[key] = false;
+            // foreach( Thread t; sm_tbeg ) Bug in GDC 0.24 SVN (r139)
+            for( Thread t = sm_tbeg; t; t = t.next )
+            {
+                t.m_local[key] = null;
+            }
+        }
+    }
+
+
+    /**
+     * Loads the value stored at key within a thread-local static array.  It is
+     * assumed that any key passed to this function is valid.
+     *
+     * Params:
+     *  key = The location which holds the desired data.
+     *
+     * Returns:
+     *  The data associated with the supplied key.
+     */
+    static void* getLocal( uint key )
+    {
+        return getThis().m_local[key];
+    }
+
+
+    /**
+     * Stores the supplied value at key within a thread-local static array.  It
+     * is assumed that any key passed to this function is valid.
+     *
+     * Params:
+     *  key = The location to store the supplied data.
+     *  val = The data to store.
+     *
+     * Returns:
+     *  A copy of the data which has just been stored.
+     */
+    static void* setLocal( uint key, void* val )
+    {
+        return getThis().m_local[key] = val;
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Static Initalizer
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * This initializer is used to set thread constants.  All functional
+     * initialization occurs within thread_init().
+     */
+    static this()
+    {
+        version( Win32 )
+        {
+            PRIORITY_MIN = -15;
+            PRIORITY_MAX =  15;
+        }
+        else version( Posix )
+        {
+            int         policy;
+            sched_param param;
+            pthread_t   self = pthread_self();
+
+            int status = pthread_getschedparam( self, &policy, &param );
+            assert( status == 0 );
+
+            PRIORITY_MIN = sched_get_priority_min( policy );
+            assert( PRIORITY_MIN != -1 );
+
+            PRIORITY_MAX = sched_get_priority_max( policy );
+            assert( PRIORITY_MAX != -1 );
+        }
+    }
+
+
+private:
+    //
+    // Initializes a thread object which has no associated executable function.
+    // This is used for the main thread initialized in thread_init().
+    //
+    this()
+    {
+        m_call = Call.NO;
+        m_curr = &m_main;
+    }
+
+
+    //
+    // Thread entry point.  Invokes the function or delegate passed on
+    // construction (if any).
+    //
+    final void run()
+    {
+        switch( m_call )
+        {
+        case Call.FN:
+            m_fn();
+            break;
+        case Call.DG:
+            m_dg();
+            break;
+        default:
+            break;
+        }
+    }
+
+
+private:
+    //
+    // The type of routine passed on thread construction.
+    //
+    enum Call
+    {
+        NO,
+        FN,
+        DG
+    }
+
+
+    //
+    // Standard types
+    //
+    version( Win32 )
+    {
+        alias uint TLSKey;
+        alias uint ThreadAddr;
+    }
+    else version( Posix )
+    {
+        alias pthread_key_t TLSKey;
+        alias pthread_t     ThreadAddr;
+    }
+
+
+    //
+    // Local storage
+    //
+    static bool[LOCAL_MAX]  sm_local;
+    static TLSKey           sm_this;
+
+    void*[LOCAL_MAX]        m_local;
+
+
+    //
+    // Standard thread data
+    //
+    version( Win32 )
+    {
+        HANDLE          m_hndl;
+    }
+    ThreadAddr          m_addr;
+    Call                m_call;
+    char[]              m_name;
+    union
+    {
+        void function() m_fn;
+        void delegate() m_dg;
+    }
+    size_t              m_sz;
+    version( Posix )
+    {
+        bool            m_isRunning;
+    }
+    bool                m_isDaemon;
+    Object              m_unhandled;
+
+
+private:
+    ///////////////////////////////////////////////////////////////////////////
+    // Storage of Active Thread
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    //
+    // Sets a thread-local reference to the current thread object.
+    //
+    static void setThis( Thread t )
+    {
+        version( Win32 )
+        {
+            TlsSetValue( sm_this, cast(void*) t );
+        }
+        else version( Posix )
+        {
+            pthread_setspecific( sm_this, cast(void*) t );
+        }
+    }
+
+
+private:
+    ///////////////////////////////////////////////////////////////////////////
+    // Thread Context and GC Scanning Support
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    final void pushContext( Context* c )
+    in
+    {
+        assert( !c.within );
+    }
+    body
+    {
+        c.within = m_curr;
+        m_curr = c;
+    }
+
+
+    final void popContext()
+    in
+    {
+        assert( m_curr && m_curr.within );
+    }
+    body
+    {
+        Context* c = m_curr;
+        m_curr = c.within;
+        c.within = null;
+    }
+
+
+    final Context* topContext()
+    in
+    {
+        assert( m_curr );
+    }
+    body
+    {
+        return m_curr;
+    }
+
+
+    static struct Context
+    {
+        void*           bstack,
+                        tstack;
+        Context*        within;
+        Context*        next,
+                        prev;
+    }
+
+
+    Context             m_main;
+    Context*            m_curr;
+    bool                m_lock;
+
+    version( Win32 )
+    {
+        uint[8]         m_reg; // edi,esi,ebp,esp,ebx,edx,ecx,eax
+    }
+
+
+private:
+    ///////////////////////////////////////////////////////////////////////////
+    // GC Scanning Support
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    // NOTE: The GC scanning process works like so:
+    //
+    //          1. Suspend all threads.
+    //          2. Scan the stacks of all suspended threads for roots.
+    //          3. Resume all threads.
+    //
+    //       Step 1 and 3 require a list of all threads in the system, while
+    //       step 2 requires a list of all thread stacks (each represented by
+    //       a Context struct).  Traditionally, there was one stack per thread
+    //       and the Context structs were not necessary.  However, Fibers have
+    //       changed things so that each thread has its own 'main' stack plus
+    //       an arbitrary number of nested stacks (normally referenced via
+    //       m_curr).  Also, there may be 'free-floating' stacks in the system,
+    //       which are Fibers that are not currently executing on any specific
+    //       thread but are still being processed and still contain valid
+    //       roots.
+    //
+    //       To support all of this, the Context struct has been created to
+    //       represent a stack range, and a global list of Context structs has
+    //       been added to enable scanning of these stack ranges.  The lifetime
+    //       (and presence in the Context list) of a thread's 'main' stack will
+    //       be equivalent to the thread's lifetime.  So the Ccontext will be
+    //       added to the list on thread entry, and removed from the list on
+    //       thread exit (which is essentially the same as the presence of a
+    //       Thread object in its own global list).  The lifetime of a Fiber's
+    //       context, however, will be tied to the lifetime of the Fiber object
+    //       itself, and Fibers are expected to add/remove their Context struct
+    //       on construction/deletion.
+
+
+    //
+    // All use of the global lists should synchronize on this lock.
+    //
+    static Object slock()
+    {
+        return Thread.classinfo;
+    }
+
+
+    static Context*     sm_cbeg;
+    static size_t       sm_clen;
+
+    static Thread       sm_tbeg;
+    static size_t       sm_tlen;
+
+    //
+    // Used for ordering threads in the global thread list.
+    //
+    Thread              prev;
+    Thread              next;
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Global Context List Operations
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    //
+    // Add a context to the global context list.
+    //
+    static void add( Context* c )
+    in
+    {
+        assert( c );
+        assert( !c.next && !c.prev );
+    }
+    body
+    {
+        synchronized( slock )
+        {
+            if( sm_cbeg )
+            {
+                c.next = sm_cbeg;
+                sm_cbeg.prev = c;
+            }
+            sm_cbeg = c;
+            ++sm_clen;
+        }
+    }
+
+
+    //
+    // Remove a context from the global context list.
+    //
+    static void remove( Context* c )
+    in
+    {
+        assert( c );
+        assert( c.next || c.prev );
+    }
+    body
+    {
+        synchronized( slock )
+        {
+            if( c.prev )
+                c.prev.next = c.next;
+            if( c.next )
+                c.next.prev = c.prev;
+            if( sm_cbeg == c )
+                sm_cbeg = c.next;
+            --sm_clen;
+        }
+        // NOTE: Don't null out c.next or c.prev because opApply currently
+        //       follows c.next after removing a node.  This could be easily
+        //       addressed by simply returning the next node from this
+        //       function, however, a context should never be re-added to the
+        //       list anyway and having next and prev be non-null is a good way
+        //       to ensure that.
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Global Thread List Operations
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    //
+    // Add a thread to the global thread list.
+    //
+    static void add( Thread t )
+    in
+    {
+        assert( t );
+        assert( !t.next && !t.prev );
+        assert( t.isRunning );
+    }
+    body
+    {
+        synchronized( slock )
+        {
+            if( sm_tbeg )
+            {
+                t.next = sm_tbeg;
+                sm_tbeg.prev = t;
+            }
+            sm_tbeg = t;
+            ++sm_tlen;
+        }
+    }
+
+
+    //
+    // Remove a thread from the global thread list.
+    //
+    static void remove( Thread t )
+    in
+    {
+        assert( t );
+        assert( t.next || t.prev );
+        version( Win32 )
+        {
+            // NOTE: This doesn't work for Posix as m_isRunning must be set to
+            //       false after the thread is removed during normal execution.
+            assert( !t.isRunning );
+        }
+    }
+    body
+    {
+        synchronized( slock )
+        {
+            // NOTE: When a thread is removed from the global thread list its
+            //       main context is invalid and should be removed as well.
+            //       It is possible that t.m_curr could reference more
+            //       than just the main context if the thread exited abnormally
+            //       (if it was terminated), but we must assume that the user
+            //       retains a reference to them and that they may be re-used
+            //       elsewhere.  Therefore, it is the responsibility of any
+            //       object that creates contexts to clean them up properly
+            //       when it is done with them.
+            remove( &t.m_main );
+
+            if( t.prev )
+                t.prev.next = t.next;
+            if( t.next )
+                t.next.prev = t.prev;
+            if( sm_tbeg == t )
+                sm_tbeg = t.next;
+            --sm_tlen;
+        }
+        // NOTE: Don't null out t.next or t.prev because opApply currently
+        //       follows t.next after removing a node.  This could be easily
+        //       addressed by simply returning the next node from this
+        //       function, however, a thread should never be re-added to the
+        //       list anyway and having next and prev be non-null is a good way
+        //       to ensure that.
+    }
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GC Support Routines
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * Initializes the thread module.  This function must be called by the
+ * garbage collector on startup and before any other thread routines
+ * are called.
+ */
+extern (C) void thread_init()
+{
+    // NOTE: If thread_init itself performs any allocations then the thread
+    //       routines reserved for garbage collector use may be called while
+    //       thread_init is being processed.  However, since no memory should
+    //       exist to be scanned at this point, it is sufficient for these
+    //       functions to detect the condition and return immediately.
+
+    version( Win32 )
+    {
+        Thread.sm_this = TlsAlloc();
+        assert( Thread.sm_this != TLS_OUT_OF_INDEXES );
+    }
+    else version( Posix )
+    {
+        int         status;
+        sigaction_t sigusr1 = void;
+        sigaction_t sigusr2 = void;
+
+        // This is a quick way to zero-initialize the structs without using
+        // memset or creating a link dependency on their static initializer.
+        (cast(byte*) &sigusr1)[0 .. sigaction_t.sizeof] = 0;
+        (cast(byte*) &sigusr2)[0 .. sigaction_t.sizeof] = 0;
+
+        // NOTE: SA_RESTART indicates that system calls should restart if they
+        //       are interrupted by a signal, but this is not available on all
+        //       Posix systems, even those that support multithreading.
+        static if( is( typeof( SA_RESTART ) ) )
+            sigusr1.sa_flags = SA_RESTART;
+        else
+            sigusr1.sa_flags   = 0;
+        sigusr1.sa_handler = &thread_suspendHandler;
+        // NOTE: We want to ignore all signals while in this handler, so fill
+        //       sa_mask to indicate this.
+        status = sigfillset( &sigusr1.sa_mask );
+        assert( status == 0 );
+
+        // NOTE: Since SIGUSR2 should only be issued for threads within the
+        //       suspend handler, we don't want this signal to trigger a
+        //       restart.
+        sigusr2.sa_flags   = 0;
+        sigusr2.sa_handler = &thread_resumeHandler;
+        // NOTE: We want to ignore all signals while in this handler, so fill
+        //       sa_mask to indicate this.
+        status = sigfillset( &sigusr2.sa_mask );
+        assert( status == 0 );
+
+        status = sigaction( SIGUSR1, &sigusr1, null );
+        assert( status == 0 );
+
+        status = sigaction( SIGUSR2, &sigusr2, null );
+        assert( status == 0 );
+
+        status = sem_init( &suspendCount, 0, 0 );
+        assert( status == 0 );
+
+        status = pthread_key_create( &Thread.sm_this, null );
+        assert( status == 0 );
+    }
+
+    thread_attachThis();
+}
+
+
+/**
+ * Registers the calling thread for use with the D Runtime.  If this routine
+ * is called for a thread which is already registered, the result is undefined.
+ */
+extern (C) void thread_attachThis()
+{
+    version( Win32 )
+    {
+        Thread          thisThread  = new Thread();
+        Thread.Context* thisContext = &thisThread.m_main;
+        assert( thisContext == thisThread.m_curr );
+
+        thisThread.m_addr  = GetCurrentThreadId();
+        thisThread.m_hndl  = GetCurrentThreadHandle();
+        thisContext.bstack = getStackBottom();
+        thisContext.tstack = thisContext.bstack;
+
+        thisThread.m_isDaemon = true;
+
+        Thread.setThis( thisThread );
+    }
+    else version( Posix )
+    {
+        Thread          thisThread  = new Thread();
+        Thread.Context* thisContext = thisThread.m_curr;
+        assert( thisContext == &thisThread.m_main );
+
+        thisThread.m_addr  = pthread_self();
+        thisContext.bstack = getStackBottom();
+        thisContext.tstack = thisContext.bstack;
+
+        thisThread.m_isRunning = true;
+        thisThread.m_isDaemon  = true;
+
+        Thread.setThis( thisThread );
+    }
+
+    Thread.add( thisThread );
+    Thread.add( thisContext );
+}
+
+
+/**
+ * Deregisters the calling thread from use with the runtime.  If this routine
+ * is called for a thread which is already registered, the result is undefined.
+ */
+extern (C) void thread_detachThis()
+{
+    Thread.remove( Thread.getThis() );
+}
+
+
+/**
+ * Joins all non-daemon threads that are currently running.  This is done by
+ * performing successive scans through the thread list until a scan consists
+ * of only daemon threads.
+ */
+extern (C) void thread_joinAll()
+{
+
+    while( true )
+    {
+        Thread nonDaemon = null;
+
+        foreach( t; Thread )
+        {
+            if( !t.isDaemon )
+            {
+                nonDaemon = t;
+                break;
+            }
+        }
+        if( nonDaemon is null )
+            return;
+        nonDaemon.join();
+    }
+}
+
+
+/**
+ * Performs intermediate shutdown of the thread module.
+ */
+static ~this()
+{
+    // NOTE: The functionality related to garbage collection must be minimally
+    //       operable after this dtor completes.  Therefore, only minimal
+    //       cleanup may occur.
+
+    for( Thread t = Thread.sm_tbeg; t; t = t.next )
+    {
+        if( !t.isRunning )
+            Thread.remove( t );
+    }
+}
+
+
+// Used for needLock below
+private bool multiThreadedFlag = false;
+
+
+/**
+ * This function is used to determine whether the the process is
+ * multi-threaded.  Optimizations may only be performed on this
+ * value if the programmer can guarantee that no path from the
+ * enclosed code will start a thread.
+ *
+ * Returns:
+ *  True if Thread.start() has been called in this process.
+ */
+extern (C) bool thread_needLock()
+{
+    return multiThreadedFlag;
+}
+
+
+// Used for suspendAll/resumeAll below
+private uint suspendDepth = 0;
+
+
+/**
+ * Suspend all threads but the calling thread for "stop the world" garbage
+ * collection runs.  This function may be called multiple times, and must
+ * be followed by a matching number of calls to thread_resumeAll before
+ * processing is resumed.
+ *
+ * Throws:
+ *  ThreadException if the suspend operation fails for a running thread.
+ */
+extern (C) void thread_suspendAll()
+{
+    /**
+     * Suspend the specified thread and load stack and register information for
+     * use by thread_scanAll.  If the supplied thread is the calling thread,
+     * stack and register information will be loaded but the thread will not
+     * be suspended.  If the suspend operation fails and the thread is not
+     * running then it will be removed from the global thread list, otherwise
+     * an exception will be thrown.
+     *
+     * Params:
+     *  t = The thread to suspend.
+     *
+     * Throws:
+     *  ThreadException if the suspend operation fails for a running thread.
+     */
+    void suspend( Thread t )
+    {
+        version( Win32 )
+        {
+            if( t.m_addr != GetCurrentThreadId() && SuspendThread( t.m_hndl ) == 0xFFFFFFFF )
+            {
+                if( !t.isRunning )
+                {
+                    Thread.remove( t );
+                    return;
+                }
+                throw new ThreadException( "Unable to suspend thread" );
+            }
+
+            CONTEXT context = void;
+            context.ContextFlags = CONTEXT_INTEGER | CONTEXT_CONTROL;
+
+            if( !GetThreadContext( t.m_hndl, &context ) )
+                throw new ThreadException( "Unable to load thread context" );
+            if( !t.m_lock )
+                t.m_curr.tstack = cast(void*) context.Esp;
+            // edi,esi,ebp,esp,ebx,edx,ecx,eax
+            t.m_reg[0] = context.Edi;
+            t.m_reg[1] = context.Esi;
+            t.m_reg[2] = context.Ebp;
+            t.m_reg[3] = context.Esp;
+            t.m_reg[4] = context.Ebx;
+            t.m_reg[5] = context.Edx;
+            t.m_reg[6] = context.Ecx;
+            t.m_reg[7] = context.Eax;
+        }
+        else version( Posix )
+        {
+            if( t.m_addr != pthread_self() )
+            {
+                if( pthread_kill( t.m_addr, SIGUSR1 ) != 0 )
+                {
+                    if( !t.isRunning )
+                    {
+                        Thread.remove( t );
+                        return;
+                    }
+                    throw new ThreadException( "Unable to suspend thread" );
+                }
+                // NOTE: It's really not ideal to wait for each thread to
+                //       signal individually -- rather, it would be better to
+                //       suspend them all and wait once at the end.  However,
+                //       semaphores don't really work this way, and the obvious
+                //       alternative (looping on an atomic suspend count)
+                //       requires either the atomic module (which only works on
+                //       x86) or other specialized functionality.  It would
+                //       also be possible to simply loop on sem_wait at the
+                //       end, but I'm not convinced that this would be much
+                //       faster than the current approach.
+                sem_wait( &suspendCount );
+            }
+            else if( !t.m_lock )
+            {
+                t.m_curr.tstack = getStackTop();
+            }
+        }
+    }
+
+
+    // NOTE: We've got an odd chicken & egg problem here, because while the GC
+    //       is required to call thread_init before calling any other thread
+    //       routines, thread_init may allocate memory which could in turn
+    //       trigger a collection.  Thus, thread_suspendAll, thread_scanAll,
+    //       and thread_resumeAll must be callable before thread_init
+    //       completes, with the assumption that no other GC memory has yet
+    //       been allocated by the system, and thus there is no risk of losing
+    //       data if the global thread list is empty.  The check of
+    //       Thread.sm_tbeg below is done to ensure thread_init has completed,
+    //       and therefore that calling Thread.getThis will not result in an
+    //       error.  For the short time when Thread.sm_tbeg is null, there is
+    //       no reason not to simply call the multithreaded code below, with
+    //       the expectation that the foreach loop will never be entered.
+    if( !multiThreadedFlag && Thread.sm_tbeg )
+    {
+        if( ++suspendDepth == 1 )
+            suspend( Thread.getThis() );
+        return;
+    }
+    synchronized( Thread.slock )
+    {
+        if( ++suspendDepth > 1 )
+            return;
+
+        // NOTE: I'd really prefer not to check isRunning within this loop but
+        //       not doing so could be problematic if threads are termianted
+        //       abnormally and a new thread is created with the same thread
+        //       address before the next GC run.  This situation might cause
+        //       the same thread to be suspended twice, which would likely
+        //       cause the second suspend to fail, the garbage collection to
+        //       abort, and Bad Things to occur.
+        for( Thread t = Thread.sm_tbeg; t; t = t.next )
+        {
+            if( t.isRunning )
+                suspend( t );
+            else
+                Thread.remove( t );
+        }
+
+        version( Posix )
+        {
+            // wait on semaphore -- see note in suspend for
+            // why this is currently not implemented
+        }
+    }
+}
+
+
+/**
+ * Resume all threads but the calling thread for "stop the world" garbage
+ * collection runs.  This function must be called once for each preceding
+ * call to thread_suspendAll before the threads are actually resumed.
+ *
+ * In:
+ *  This routine must be preceded by a call to thread_suspendAll.
+ *
+ * Throws:
+ *  ThreadException if the resume operation fails for a running thread.
+ */
+extern (C) void thread_resumeAll()
+in
+{
+    assert( suspendDepth > 0 );
+}
+body
+{
+    /**
+     * Resume the specified thread and unload stack and register information.
+     * If the supplied thread is the calling thread, stack and register
+     * information will be unloaded but the thread will not be resumed.  If
+     * the resume operation fails and the thread is not running then it will
+     * be removed from the global thread list, otherwise an exception will be
+     * thrown.
+     *
+     * Params:
+     *  t = The thread to resume.
+     *
+     * Throws:
+     *  ThreadException if the resume fails for a running thread.
+     */
+    void resume( Thread t )
+    {
+        version( Win32 )
+        {
+            if( t.m_addr != GetCurrentThreadId() && ResumeThread( t.m_hndl ) == 0xFFFFFFFF )
+            {
+                if( !t.isRunning )
+                {
+                    Thread.remove( t );
+                    return;
+                }
+                throw new ThreadException( "Unable to resume thread" );
+            }
+
+            if( !t.m_lock )
+                t.m_curr.tstack = t.m_curr.bstack;
+            t.m_reg[0 .. $] = 0;
+        }
+        else version( Posix )
+        {
+            if( t.m_addr != pthread_self() )
+            {
+                if( pthread_kill( t.m_addr, SIGUSR2 ) != 0 )
+                {
+                    if( !t.isRunning )
+                    {
+                        Thread.remove( t );
+                        return;
+                    }
+                    throw new ThreadException( "Unable to resume thread" );
+                }
+            }
+            else if( !t.m_lock )
+            {
+                t.m_curr.tstack = t.m_curr.bstack;
+            }
+        }
+    }
+
+
+    // NOTE: See thread_suspendAll for the logic behind this.
+    if( !multiThreadedFlag && Thread.sm_tbeg )
+    {
+        if( --suspendDepth == 0 )
+            resume( Thread.getThis() );
+        return;
+    }
+    synchronized( Thread.slock )
+    {
+        if( --suspendDepth > 0 )
+            return;
+
+        for( Thread t = Thread.sm_tbeg; t; t = t.next )
+        {
+            resume( t );
+        }
+    }
+}
+
+
+private alias void delegate( void*, void* ) scanAllThreadsFn;
+
+
+/**
+ * The main entry point for garbage collection.  The supplied delegate
+ * will be passed ranges representing both stack and register values.
+ *
+ * Params:
+ *  scan        = The scanner function.  It should scan from p1 through p2 - 1.
+ *  curStackTop = An optional pointer to the top of the calling thread's stack.
+ *
+ * In:
+ *  This routine must be preceded by a call to thread_suspendAll.
+ */
+extern (C) void thread_scanAll( scanAllThreadsFn scan, void* curStackTop = null )
+in
+{
+    assert( suspendDepth > 0 );
+}
+body
+{
+    Thread  thisThread  = null;
+    void*   oldStackTop = null;
+
+    if( curStackTop && Thread.sm_tbeg )
+    {
+        thisThread  = Thread.getThis();
+        if( !thisThread.m_lock )
+        {
+            oldStackTop = thisThread.m_curr.tstack;
+            thisThread.m_curr.tstack = curStackTop;
+        }
+    }
+
+    scope( exit )
+    {
+        if( curStackTop && Thread.sm_tbeg )
+        {
+            if( !thisThread.m_lock )
+            {
+                thisThread.m_curr.tstack = oldStackTop;
+            }
+        }
+    }
+
+    // NOTE: Synchronizing on Thread.slock is not needed because this
+    //       function may only be called after all other threads have
+    //       been suspended from within the same lock.
+    for( Thread.Context* c = Thread.sm_cbeg; c; c = c.next )
+    {
+        version( StackGrowsDown )
+        {
+            // NOTE: We can't index past the bottom of the stack
+            //       so don't do the "+1" for StackGrowsDown.
+            if( c.tstack && c.tstack < c.bstack )
+                scan( c.tstack, c.bstack );
+        }
+        else
+        {
+            if( c.bstack && c.bstack < c.tstack )
+                scan( c.bstack, c.tstack + 1 );
+        }
+    }
+    version( Win32 )
+    {
+        for( Thread t = Thread.sm_tbeg; t; t = t.next )
+        {
+            scan( &t.m_reg[0], &t.m_reg[0] + t.m_reg.length );
+        }
+    }
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Thread Local
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * This class encapsulates the operations required to initialize, access, and
+ * destroy thread local data.
+ */
+class ThreadLocal( T )
+{
+    ///////////////////////////////////////////////////////////////////////////
+    // Initialization
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Initializes thread local storage for the indicated value which will be
+     * initialized to def for all threads.
+     *
+     * Params:
+     *  def = The default value to return if no value has been explicitly set.
+     */
+    this( T def = T.init )
+    {
+        m_def = def;
+        m_key = Thread.createLocal();
+    }
+
+
+    ~this()
+    {
+        Thread.deleteLocal( m_key );
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Accessors
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Gets the value last set by the calling thread, or def if no such value
+     * has been set.
+     *
+     * Returns:
+     *  The stored value or def if no value is stored.
+     */
+    T val()
+    {
+        Wrap* wrap = cast(Wrap*) Thread.getLocal( m_key );
+
+        return wrap ? wrap.val : m_def;
+    }
+
+
+    /**
+     * Copies newval to a location specific to the calling thread, and returns
+     * newval.
+     *
+     * Params:
+     *  newval = The value to set.
+     *
+     * Returns:
+     *  The value passed to this function.
+     */
+    T val( T newval )
+    {
+        Wrap* wrap = cast(Wrap*) Thread.getLocal( m_key );
+
+        if( wrap is null )
+        {
+            wrap = new Wrap;
+            Thread.setLocal( m_key, wrap );
+        }
+        wrap.val = newval;
+        return newval;
+    }
+
+
+private:
+    //
+    // A wrapper for the stored data.  This is needed for determining whether
+    // set has ever been called for this thread (and therefore whether the
+    // default value should be returned) and also to flatten the differences
+    // between data that is smaller and larger than (void*).sizeof.  The
+    // obvious tradeoff here is an extra per-thread allocation for each
+    // ThreadLocal value as compared to calling the Thread routines directly.
+    //
+    struct Wrap
+    {
+        T   val;
+    }
+
+
+    T       m_def;
+    uint    m_key;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Thread Group
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * This class is intended to simplify certain common programming techniques.
+ */
+class ThreadGroup
+{
+    /**
+     * Creates and starts a new Thread object that executes fn and adds it to
+     * the list of tracked threads.
+     *
+     * Params:
+     *  fn = The thread function.
+     *
+     * Returns:
+     *  A reference to the newly created thread.
+     */
+    final Thread create( void function() fn )
+    {
+        Thread t = new Thread( fn );
+
+        t.start();
+        synchronized( this )
+        {
+            m_all[t] = t;
+        }
+        return t;
+    }
+
+
+    /**
+     * Creates and starts a new Thread object that executes dg and adds it to
+     * the list of tracked threads.
+     *
+     * Params:
+     *  dg = The thread function.
+     *
+     * Returns:
+     *  A reference to the newly created thread.
+     */
+    final Thread create( void delegate() dg )
+    {
+        Thread t = new Thread( dg );
+
+        t.start();
+        synchronized( this )
+        {
+            m_all[t] = t;
+        }
+        return t;
+    }
+
+
+    /**
+     * Add t to the list of tracked threads if it is not already being tracked.
+     *
+     * Params:
+     *  t = The thread to add.
+     *
+     * In:
+     *  t must not be null.
+     */
+    final void add( Thread t )
+    in
+    {
+        assert( t );
+    }
+    body
+    {
+        synchronized( this )
+        {
+            m_all[t] = t;
+        }
+    }
+
+
+    /**
+     * Removes t from the list of tracked threads.  No operation will be
+     * performed if t is not currently being tracked by this object.
+     *
+     * Params:
+     *  t = The thread to remove.
+     *
+     * In:
+     *  t must not be null.
+     */
+    final void remove( Thread t )
+    in
+    {
+        assert( t );
+    }
+    body
+    {
+        synchronized( this )
+        {
+            m_all.remove( t );
+        }
+    }
+
+
+    /**
+     * Operates on all threads currently tracked by this object.
+     */
+    final int opApply( int delegate( inout Thread ) dg )
+    {
+        synchronized( this )
+        {
+            int ret = 0;
+
+            // NOTE: This loop relies on the knowledge that m_all uses the
+            //       Thread object for both the key and the mapped value.
+            foreach( Thread t; m_all.keys )
+            {
+                ret = dg( t );
+                if( ret )
+                    break;
+            }
+            return ret;
+        }
+    }
+
+
+    /**
+     * Iteratively joins all tracked threads.  This function will block add,
+     * remove, and opApply until it completes.
+     *
+     * Params:
+     *  rethrow = Rethrow any unhandled exception which may have caused the
+     *            current thread to terminate.
+     *
+     * Throws:
+     *  Any exception not handled by the joined threads.
+     */
+    final void joinAll( bool rethrow = true )
+    {
+        synchronized( this )
+        {
+            // NOTE: This loop relies on the knowledge that m_all uses the
+            //       Thread object for both the key and the mapped value.
+            foreach( Thread t; m_all.keys )
+            {
+                t.join( rethrow );
+            }
+        }
+    }
+
+
+private:
+    Thread[Thread]  m_all;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Fiber Platform Detection and Memory Allocation
+///////////////////////////////////////////////////////////////////////////////
+
+
+private
+{
+    version( D_InlineAsm_X86 )
+    {
+        version( X86_64 )
+        {
+
+        }
+        else
+        {
+            version( Win32 )
+                version = AsmX86_Win32;
+            else version( Posix )
+                version = AsmX86_Posix;
+        }
+    }
+    else version( PPC )
+    {
+        version( Posix )
+            version = AsmPPC_Posix;
+    }
+
+    version( LLVM_InlineAsm_X86 )
+    {
+        version( Win32 )
+            version = LLVM_AsmX86_Win32;
+        else version( Posix )
+            version = LLVM_AsmX86_Posix;
+    }
+    else version( LLVM_InlineAsm_X86_64 )
+    {
+        version( Posix )
+            version = LLVM_AsmX86_64_Posix;
+    }
+
+    version( Posix )
+    {
+        import stdc.posix.unistd;   // for sysconf
+        import stdc.posix.sys.mman; // for mmap
+        import stdc.posix.stdlib;   // for malloc, valloc, free
+
+        version( AsmX86_Win32 ) {} else
+        version( AsmX86_Posix ) {} else
+        version( AsmPPC_Posix ) {} else
+        version( LLVM_AsmX86_Win32 ) {} else
+        version( LLVM_AsmX86_Posix ) {} else
+        //TODO: Enable when x86-64 Posix supports fibers
+        //version( LLVM_AsmX86_64_Posix ) {} else
+        {
+            // NOTE: The ucontext implementation requires architecture specific
+            //       data definitions to operate so testing for it must be done
+            //       by checking for the existence of ucontext_t rather than by
+            //       a version identifier.  Please note that this is considered
+            //       an obsolescent feature according to the POSIX spec, so a
+            //       custom solution is still preferred.
+            import stdc.posix.ucontext;
+        }
+    }
+
+    const size_t PAGESIZE;
+}
+
+
+static this()
+{
+    static if( is( typeof( GetSystemInfo ) ) )
+    {
+        SYSTEM_INFO info;
+        GetSystemInfo( &info );
+
+        PAGESIZE = info.dwPageSize;
+        assert( PAGESIZE < int.max );
+    }
+    else static if( is( typeof( sysconf ) ) &&
+                    is( typeof( _SC_PAGESIZE ) ) )
+    {
+        PAGESIZE = cast(size_t) sysconf( _SC_PAGESIZE );
+        assert( PAGESIZE < int.max );
+    }
+    else
+    {
+        version( PPC )
+            PAGESIZE = 8192;
+        else
+            PAGESIZE = 4096;
+    }
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Fiber Entry Point and Context Switch
+///////////////////////////////////////////////////////////////////////////////
+
+
+private
+{
+    extern (C) void fiber_entryPoint()
+    {
+        Fiber   obj = Fiber.getThis();
+        assert( obj );
+
+        assert( Thread.getThis().m_curr is obj.m_ctxt );
+        volatile Thread.getThis().m_lock = false;
+        obj.m_ctxt.tstack = obj.m_ctxt.bstack;
+        obj.m_state = Fiber.State.EXEC;
+
+        try
+        {
+            obj.run();
+        }
+        catch( Object o )
+        {
+            obj.m_unhandled = o;
+        }
+
+        static if( is( ucontext_t ) )
+          obj.m_ucur = &obj.m_utxt;
+
+        obj.m_state = Fiber.State.TERM;
+        obj.switchOut();
+    }
+
+
+  // NOTE: If AsmPPC_Posix is defined then the context switch routine will
+  //       be defined externally until GDC supports inline PPC ASM.
+  version( AsmPPC_Posix )
+    extern (C) void fiber_switchContext( void** oldp, void* newp );
+  else
+    extern (C) void fiber_switchContext( void** oldp, void* newp )
+    {
+        // NOTE: The data pushed and popped in this routine must match the
+        //       default stack created by Fiber.initStack or the initial
+        //       switch into a new context will fail.
+
+        version( AsmX86_Win32 )
+        {
+            asm
+            {
+                naked;
+
+                // save current stack state
+                push EBP;
+                mov  EBP, ESP;
+                push EAX;
+                push dword ptr FS:[0];
+                push dword ptr FS:[4];
+                push dword ptr FS:[8];
+                push EBX;
+                push ESI;
+                push EDI;
+
+                // store oldp again with more accurate address
+                mov EAX, dword ptr 8[EBP];
+                mov [EAX], ESP;
+                // load newp to begin context switch
+                mov ESP, dword ptr 12[EBP];
+
+                // load saved state from new stack
+                pop EDI;
+                pop ESI;
+                pop EBX;
+                pop dword ptr FS:[8];
+                pop dword ptr FS:[4];
+                pop dword ptr FS:[0];
+                pop EAX;
+                pop EBP;
+
+                // 'return' to complete switch
+                ret;
+            }
+        }
+        else version( AsmX86_Posix )
+        {
+            asm
+            {
+                naked;
+
+                // save current stack state
+                push EBP;
+                mov  EBP, ESP;
+                push EAX;
+                push EBX;
+                push ESI;
+                push EDI;
+
+                // store oldp again with more accurate address
+                mov EAX, dword ptr 8[EBP];
+                mov [EAX], ESP;
+                // load newp to begin context switch
+                mov ESP, dword ptr 12[EBP];
+
+                // load saved state from new stack
+                pop EDI;
+                pop ESI;
+                pop EBX;
+                pop EAX;
+                pop EBP;
+
+                // 'return' to complete switch
+                ret;
+            }
+        }
+        else version( LLVM_AsmX86_Posix )
+        {
+            asm
+            {
+                // clobber registers to save
+                inc EBX;
+                inc ESI;
+                inc EDI;
+
+                // store oldp again with more acc
+                mov EAX, oldp;
+                mov [EAX], ESP;
+                // load newp to begin context swi
+                mov ESP, newp;
+            }
+        }
+/+
+        version( LLVM_AsmX86_64_Posix )
+        {
+            //TODO: Fiber implementation here
+        }
++/
+        else static if( is( ucontext_t ) )
+        {
+            Fiber   cfib = Fiber.getThis();
+            void*   ucur = cfib.m_ucur;
+
+            *oldp = &ucur;
+            swapcontext( **(cast(ucontext_t***) oldp),
+                          *(cast(ucontext_t**)  newp) );
+        }
+    }
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Fiber
+///////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * This class provides a cooperative concurrency mechanism integrated with the
+ * threading and garbage collection functionality.  Calling a fiber may be
+ * considered a blocking operation that returns when the fiber yields (via
+ * Fiber.yield()).  Execution occurs within the context of the calling thread
+ * so synchronization is not necessary to guarantee memory visibility so long
+ * as the same thread calls the fiber each time.  Please note that there is no
+ * requirement that a fiber be bound to one specific thread.  Rather, fibers
+ * may be freely passed between threads so long as they are not currently
+ * executing.  Like threads, a new fiber thread may be created using either
+ * derivation or composition, as in the following example.
+ *
+ * Example:
+ * ----------------------------------------------------------------------
+ *
+ * class DerivedFiber : Fiber
+ * {
+ *     this()
+ *     {
+ *         super( &run );
+ *     }
+ *
+ * private :
+ *     void run()
+ *     {
+ *         printf( "Derived fiber running.\n" );
+ *     }
+ * }
+ *
+ * void fiberFunc()
+ * {
+ *     printf( "Composed fiber running.\n" );
+ *     Fiber.yield();
+ *     printf( "Composed fiber running.\n" );
+ * }
+ *
+ * // create instances of each type
+ * Fiber derived = new DerivedFiber();
+ * Fiber composed = new Fiber( &fiberFunc );
+ *
+ * // call both fibers once
+ * derived.call();
+ * composed.call();
+ * printf( "Execution returned to calling context.\n" );
+ * composed.call();
+ *
+ * // since each fiber has run to completion, each should have state TERM
+ * assert( derived.state == Fiber.State.TERM );
+ * assert( composed.state == Fiber.State.TERM );
+ *
+ * ----------------------------------------------------------------------
+ *
+ * Authors: Based on a design by Mikola Lysenko.
+ */
+class Fiber
+{
+    ///////////////////////////////////////////////////////////////////////////
+    // Initialization
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Initializes a fiber object which is associated with a static
+     * D function.
+     *
+     * Params:
+     *  fn = The thread function.
+     *  sz = The stack size for this fiber.
+     *
+     * In:
+     *  fn must not be null.
+     */
+    this( void function() fn, size_t sz = PAGESIZE )
+    in
+    {
+        assert( fn );
+    }
+    body
+    {
+        m_fn    = fn;
+        m_call  = Call.FN;
+        m_state = State.HOLD;
+        allocStack( sz );
+        initStack();
+    }
+
+
+    /**
+     * Initializes a fiber object which is associated with a dynamic
+     * D function.
+     *
+     * Params:
+     *  dg = The thread function.
+     *  sz = The stack size for this fiber.
+     *
+     * In:
+     *  dg must not be null.
+     */
+    this( void delegate() dg, size_t sz = PAGESIZE )
+    in
+    {
+        assert( dg );
+    }
+    body
+    {
+        m_dg    = dg;
+        m_call  = Call.DG;
+        m_state = State.HOLD;
+        allocStack( sz );
+        initStack();
+    }
+
+
+    /**
+     * Cleans up any remaining resources used by this object.
+     */
+    ~this()
+    {
+        // NOTE: A live reference to this object will exist on its associated
+        //       stack from the first time its call() method has been called
+        //       until its execution completes with State.TERM.  Thus, the only
+        //       times this dtor should be called are either if the fiber has
+        //       terminated (and therefore has no active stack) or if the user
+        //       explicitly deletes this object.  The latter case is an error
+        //       but is not easily tested for, since State.HOLD may imply that
+        //       the fiber was just created but has never been run.  There is
+        //       not a compelling case to create a State.INIT just to offer a
+        //       means of ensuring the user isn't violating this object's
+        //       contract, so for now this requirement will be enforced by
+        //       documentation only.
+        freeStack();
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // General Actions
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Transfers execution to this fiber object.  The calling context will be
+     * suspended until the fiber calls Fiber.yield() or until it terminates
+     * via an unhandled exception.
+     *
+     * Params:
+     *  rethrow = Rethrow any unhandled exception which may have caused this
+     *            fiber to terminate.
+     *
+     * In:
+     *  This fiber must be in state HOLD.
+     *
+     * Throws:
+     *  Any exception not handled by the joined thread.
+     *
+     * Returns:
+     *  Any exception not handled by this fiber if rethrow = false, null
+     *  otherwise.
+     */
+    final Object call( bool rethrow = true )
+    in
+    {
+        assert( m_state == State.HOLD );
+    }
+    body
+    {
+        Fiber   cur = getThis();
+
+        static if( is( ucontext_t ) )
+          m_ucur = cur ? &cur.m_utxt : &Fiber.sm_utxt;
+
+        setThis( this );
+        this.switchIn();
+        setThis( cur );
+
+        static if( is( ucontext_t ) )
+          m_ucur = null;
+
+        // NOTE: If the fiber has terminated then the stack pointers must be
+        //       reset.  This ensures that the stack for this fiber is not
+        //       scanned if the fiber has terminated.  This is necessary to
+        //       prevent any references lingering on the stack from delaying
+        //       the collection of otherwise dead objects.  The most notable
+        //       being the current object, which is referenced at the top of
+        //       fiber_entryPoint.
+        if( m_state == State.TERM )
+        {
+            m_ctxt.tstack = m_ctxt.bstack;
+        }
+        if( m_unhandled )
+        {
+            Object obj  = m_unhandled;
+            m_unhandled = null;
+            if( rethrow )
+                throw obj;
+            return obj;
+        }
+        return null;
+    }
+
+
+    /**
+     * Resets this fiber so that it may be re-used.  This routine may only be
+     * called for fibers that have terminated, as doing otherwise could result
+     * in scope-dependent functionality that is not executed.  Stack-based
+     * classes, for example, may not be cleaned up properly if a fiber is reset
+     * before it has terminated.
+     *
+     * In:
+     *  This fiber must be in state TERM.
+     */
+    final void reset()
+    in
+    {
+        assert( m_state == State.TERM );
+        assert( m_ctxt.tstack == m_ctxt.bstack );
+    }
+    body
+    {
+        m_state = State.HOLD;
+        initStack();
+        m_unhandled = null;
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // General Properties
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * A fiber may occupy one of three states: HOLD, EXEC, and TERM.  The HOLD
+     * state applies to any fiber that is suspended and ready to be called.
+     * The EXEC state will be set for any fiber that is currently executing.
+     * And the TERM state is set when a fiber terminates.  Once a fiber
+     * terminates, it must be reset before it may be called again.
+     */
+    enum State
+    {
+        HOLD,   ///
+        EXEC,   ///
+        TERM    ///
+    }
+
+
+    /**
+     * Gets the current state of this fiber.
+     *
+     * Returns:
+     *  The state of this fiber as an enumerated value.
+     */
+    final State state()
+    {
+        return m_state;
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Actions on Calling Fiber
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Forces a context switch to occur away from the calling fiber.
+     */
+    static void yield()
+    {
+        Fiber   cur = getThis();
+        assert( cur, "Fiber.yield() called with no active fiber" );
+        assert( cur.m_state == State.EXEC );
+
+        static if( is( ucontext_t ) )
+          cur.m_ucur = &cur.m_utxt;
+
+        cur.m_state = State.HOLD;
+        cur.switchOut();
+        cur.m_state = State.EXEC;
+    }
+
+
+    /**
+     * Forces a context switch to occur away from the calling fiber and then
+     * throws obj in the calling fiber.
+     *
+     * Params:
+     *  obj = The object to throw.
+     *
+     * In:
+     *  obj must not be null.
+     */
+    static void yieldAndThrow( Object obj )
+    in
+    {
+        assert( obj );
+    }
+    body
+    {
+        Fiber   cur = getThis();
+        assert( cur, "Fiber.yield() called with no active fiber" );
+        assert( cur.m_state == State.EXEC );
+
+        static if( is( ucontext_t ) )
+          cur.m_ucur = &cur.m_utxt;
+
+        cur.m_unhandled = obj;
+        cur.m_state = State.HOLD;
+        cur.switchOut();
+        cur.m_state = State.EXEC;
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Fiber Accessors
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    /**
+     * Provides a reference to the calling fiber or null if no fiber is
+     * currently active.
+     *
+     * Returns:
+     *  The fiber object representing the calling fiber or null if no fiber
+     *  is currently active.  The result of deleting this object is undefined.
+     */
+    static Fiber getThis()
+    {
+        version( Win32 )
+        {
+            return cast(Fiber) TlsGetValue( sm_this );
+        }
+        else version( Posix )
+        {
+            return cast(Fiber) pthread_getspecific( sm_this );
+        }
+    }
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Static Initialization
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    static this()
+    {
+        version( Win32 )
+        {
+            sm_this = TlsAlloc();
+            assert( sm_this != TLS_OUT_OF_INDEXES );
+        }
+        else version( Posix )
+        {
+            int status;
+
+            status = pthread_key_create( &sm_this, null );
+            assert( status == 0 );
+
+          static if( is( ucontext_t ) )
+          {
+            status = getcontext( &sm_utxt );
+            assert( status == 0 );
+          }
+        }
+    }
+
+
+private:
+    //
+    // Initializes a fiber object which has no associated executable function.
+    //
+    this()
+    {
+        m_call = Call.NO;
+    }
+
+
+    //
+    // Fiber entry point.  Invokes the function or delegate passed on
+    // construction (if any).
+    //
+    final void run()
+    {
+        switch( m_call )
+        {
+        case Call.FN:
+            m_fn();
+            break;
+        case Call.DG:
+            m_dg();
+            break;
+        default:
+            break;
+        }
+    }
+
+
+private:
+    //
+    // The type of routine passed on fiber construction.
+    //
+    enum Call
+    {
+        NO,
+        FN,
+        DG
+    }
+
+
+    //
+    // Standard fiber data
+    //
+    Call                m_call;
+    union
+    {
+        void function() m_fn;
+        void delegate() m_dg;
+    }
+    bool                m_isRunning;
+    Object              m_unhandled;
+    State               m_state;
+
+
+private:
+    ///////////////////////////////////////////////////////////////////////////
+    // Stack Management
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    //
+    // Allocate a new stack for this fiber.
+    //
+    final void allocStack( size_t sz )
+    in
+    {
+        assert( !m_pmem && !m_ctxt );
+    }
+    body
+    {
+        // adjust alloc size to a multiple of PAGESIZE
+        sz += PAGESIZE - 1;
+        sz -= sz % PAGESIZE;
+
+        // NOTE: This instance of Thread.Context is dynamic so Fiber objects
+        //       can be collected by the GC so long as no user level references
+        //       to the object exist.  If m_ctxt were not dynamic then its
+        //       presence in the global context list would be enough to keep
+        //       this object alive indefinitely.  An alternative to allocating
+        //       room for this struct explicitly would be to mash it into the
+        //       base of the stack being allocated below.  However, doing so
+        //       requires too much special logic to be worthwhile.
+        m_ctxt = new Thread.Context;
+
+        static if( is( typeof( VirtualAlloc ) ) )
+        {
+            // reserve memory for stack
+            m_pmem = VirtualAlloc( null,
+                                   sz + PAGESIZE,
+                                   MEM_RESERVE,
+                                   PAGE_NOACCESS );
+            if( !m_pmem )
+            {
+                throw new FiberException( "Unable to reserve memory for stack" );
+            }
+
+            version( StackGrowsDown )
+            {
+                void* stack = m_pmem + PAGESIZE;
+                void* guard = m_pmem;
+                void* pbase = stack + sz;
+            }
+            else
+            {
+                void* stack = m_pmem;
+                void* guard = m_pmem + sz;
+                void* pbase = stack;
+            }
+
+            // allocate reserved stack segment
+            stack = VirtualAlloc( stack,
+                                  sz,
+                                  MEM_COMMIT,
+                                  PAGE_READWRITE );
+            if( !stack )
+            {
+                throw new FiberException( "Unable to allocate memory for stack" );
+            }
+
+            // allocate reserved guard page
+            guard = VirtualAlloc( guard,
+                                  PAGESIZE,
+                                  MEM_COMMIT,
+                                  PAGE_READWRITE | PAGE_GUARD );
+            if( !guard )
+            {
+                throw new FiberException( "Unable to create guard page for stack" );
+            }
+
+            m_ctxt.bstack = pbase;
+            m_ctxt.tstack = pbase;
+            m_size = sz;
+        }
+        else
+        {   static if( is( typeof( mmap ) ) )
+            {
+                m_pmem = mmap( null,
+                               sz,
+                               PROT_READ | PROT_WRITE,
+                               MAP_PRIVATE | MAP_ANON,
+                               -1,
+                               0 );
+                if( m_pmem == MAP_FAILED )
+                    m_pmem = null;
+            }
+            else static if( is( typeof( valloc ) ) )
+            {
+                m_pmem = valloc( sz );
+            }
+            else static if( is( typeof( malloc ) ) )
+            {
+                m_pmem = malloc( sz );
+            }
+            else
+            {
+                m_pmem = null;
+            }
+
+            if( !m_pmem )
+            {
+                throw new FiberException( "Unable to allocate memory for stack" );
+            }
+
+            version( StackGrowsDown )
+            {
+                m_ctxt.bstack = m_pmem + sz;
+                m_ctxt.tstack = m_pmem + sz;
+            }
+            else
+            {
+                m_ctxt.bstack = m_pmem;
+                m_ctxt.tstack = m_pmem;
+            }
+            m_size = sz;
+        }
+
+        Thread.add( m_ctxt );
+    }
+
+
+    //
+    // Free this fiber's stack.
+    //
+    final void freeStack()
+    in
+    {
+        assert( m_pmem && m_ctxt );
+    }
+    body
+    {
+        // NOTE: Since this routine is only ever expected to be called from
+        //       the dtor, pointers to freed data are not set to null.
+
+        // NOTE: m_ctxt is guaranteed to be alive because it is held in the
+        //       global context list.
+        Thread.remove( m_ctxt );
+
+        static if( is( typeof( VirtualAlloc ) ) )
+        {
+            VirtualFree( m_pmem, 0, MEM_RELEASE );
+        }
+        else static if( is( typeof( mmap ) ) )
+        {
+            munmap( m_pmem, m_size );
+        }
+        else static if( is( typeof( valloc ) ) )
+        {
+            free( m_pmem );
+        }
+        else static if( is( typeof( malloc ) ) )
+        {
+            free( m_pmem );
+        }
+        delete m_ctxt;
+    }
+
+
+    //
+    // Initialize the allocated stack.
+    //
+    final void initStack()
+    in
+    {
+        assert( m_ctxt.tstack && m_ctxt.tstack == m_ctxt.bstack );
+        assert( cast(size_t) m_ctxt.bstack % (void*).sizeof == 0 );
+    }
+    body
+    {
+        void* pstack = m_ctxt.tstack;
+        scope( exit )  m_ctxt.tstack = pstack;
+
+        void push( size_t val )
+        {
+            version( StackGrowsDown )
+            {
+                pstack -= size_t.sizeof;
+                *(cast(size_t*) pstack) = val;
+            }
+            else
+            {
+                pstack += size_t.sizeof;
+                *(cast(size_t*) pstack) = val;
+            }
+        }
+
+        // NOTE: On OS X the stack must be 16-byte aligned according to the
+        // IA-32 call spec.
+        version( darwin )
+        {
+             pstack = cast(void*)(cast(uint)(pstack) - (cast(uint)(pstack) & 0x0F));
+        }
+
+        version( AsmX86_Win32 )
+        {
+            push( cast(size_t) &fiber_entryPoint );                 // EIP
+            push( 0xFFFFFFFF );                                     // EBP
+            push( 0x00000000 );                                     // EAX
+            push( 0xFFFFFFFF );                                     // FS:[0]
+            version( StackGrowsDown )
+            {
+                push( cast(size_t) m_ctxt.bstack );                 // FS:[4]
+                push( cast(size_t) m_ctxt.bstack - m_size );        // FS:[8]
+            }
+            else
+            {
+                push( cast(size_t) m_ctxt.bstack );                 // FS:[4]
+                push( cast(size_t) m_ctxt.bstack + m_size );        // FS:[8]
+            }
+            push( 0x00000000 );                                     // EBX
+            push( 0x00000000 );                                     // ESI
+            push( 0x00000000 );                                     // EDI
+        }
+        else version( AsmX86_Posix )
+        {
+            push( cast(size_t) &fiber_entryPoint );                 // EIP
+            push( 0x00000000 );                                     // EBP
+            push( 0x00000000 );                                     // EAX
+            push( 0x00000000 );                                     // EBX
+            push( 0x00000000 );                                     // ESI
+            push( 0x00000000 );                                     // EDI
+        }
+        else version( LLVM_AsmX86_Posix )
+        {
+            push( cast(size_t) &fiber_entryPoint );                 // EIP
+            push( 0x00000000 );                                     // newp
+            push( 0x00000000 );                                     // oldp
+            push( 0x00000000 );                                     // EBP
+            push( 0x00000000 );                                     // EBX
+            push( 0x00000000 );                                     // ESI
+            push( 0x00000000 );                                     // EDI
+        }
+//TODO: Implement x86-64 fibers
+/+
+        else version( LLVM_AsmX86_Posix )
+        {
+        }
++/
+        else version( AsmPPC_Posix )
+        {
+            version( StackGrowsDown )
+            {
+                pstack -= int.sizeof * 5;
+            }
+            else
+            {
+                pstack += int.sizeof * 5;
+            }
+
+            push( cast(size_t) &fiber_entryPoint );     // link register
+            push( 0x00000000 );                         // control register
+            push( 0x00000000 );                         // old stack pointer
+
+            // GPR values
+            version( StackGrowsDown )
+            {
+                pstack -= int.sizeof * 20;
+            }
+            else
+            {
+                pstack += int.sizeof * 20;
+            }
+
+            assert( cast(uint) pstack & 0x0f == 0 );
+        }
+        else static if( is( ucontext_t ) )
+        {
+            getcontext( &m_utxt );
+            m_utxt.uc_stack.ss_sp   = m_ctxt.bstack;
+            m_utxt.uc_stack.ss_size = m_size;
+            makecontext( &m_utxt, &fiber_entryPoint, 0 );
+            // NOTE: If ucontext is being used then the top of the stack will
+            //       be a pointer to the ucontext_t struct for that fiber.
+            push( cast(size_t) &m_utxt );
+        }
+    }
+
+
+    Thread.Context* m_ctxt;
+    size_t          m_size;
+    void*           m_pmem;
+
+    static if( is( ucontext_t ) )
+    {
+        // NOTE: The static ucontext instance is used to represent the context
+        //       of the main application thread.
+        static ucontext_t   sm_utxt = void;
+        ucontext_t          m_utxt  = void;
+        ucontext_t*         m_ucur  = null;
+    }
+
+
+private:
+    ///////////////////////////////////////////////////////////////////////////
+    // Storage of Active Fiber
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    //
+    // Sets a thread-local reference to the current fiber object.
+    //
+    static void setThis( Fiber f )
+    {
+        version( Win32 )
+        {
+            TlsSetValue( sm_this, cast(void*) f );
+        }
+        else version( Posix )
+        {
+            pthread_setspecific( sm_this, cast(void*) f );
+        }
+    }
+
+
+    static Thread.TLSKey    sm_this;
+
+
+private:
+    ///////////////////////////////////////////////////////////////////////////
+    // Context Switching
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    //
+    // Switches into the stack held by this fiber.
+    //
+    final void switchIn()
+    {
+        Thread  tobj = Thread.getThis();
+        void**  oldp = &tobj.m_curr.tstack;
+        void*   newp = m_ctxt.tstack;
+
+        // NOTE: The order of operations here is very important.  The current
+        //       stack top must be stored before m_lock is set, and pushContext
+        //       must not be called until after m_lock is set.  This process
+        //       is intended to prevent a race condition with the suspend
+        //       mechanism used for garbage collection.  If it is not followed,
+        //       a badly timed collection could cause the GC to scan from the
+        //       bottom of one stack to the top of another, or to miss scanning
+        //       a stack that still contains valid data.  The old stack pointer
+        //       oldp will be set again before the context switch to guarantee
+        //       that it points to exactly the correct stack location so the
+        //       successive pop operations will succeed.
+        *oldp = getStackTop();
+        volatile tobj.m_lock = true;
+        tobj.pushContext( m_ctxt );
+
+        fiber_switchContext( oldp, newp );
+
+        // NOTE: As above, these operations must be performed in a strict order
+        //       to prevent Bad Things from happening.
+        tobj.popContext();
+        volatile tobj.m_lock = false;
+        tobj.m_curr.tstack = tobj.m_curr.bstack;
+    }
+
+
+    //
+    // Switches out of the current stack and into the enclosing stack.
+    //
+    final void switchOut()
+    {
+        Thread  tobj = Thread.getThis();
+        void**  oldp = &m_ctxt.tstack;
+        void*   newp = tobj.m_curr.within.tstack;
+
+        // NOTE: The order of operations here is very important.  The current
+        //       stack top must be stored before m_lock is set, and pushContext
+        //       must not be called until after m_lock is set.  This process
+        //       is intended to prevent a race condition with the suspend
+        //       mechanism used for garbage collection.  If it is not followed,
+        //       a badly timed collection could cause the GC to scan from the
+        //       bottom of one stack to the top of another, or to miss scanning
+        //       a stack that still contains valid data.  The old stack pointer
+        //       oldp will be set again before the context switch to guarantee
+        //       that it points to exactly the correct stack location so the
+        //       successive pop operations will succeed.
+        *oldp = getStackTop();
+        volatile tobj.m_lock = true;
+
+        fiber_switchContext( oldp, newp );
+
+        // NOTE: As above, these operations must be performed in a strict order
+        //       to prevent Bad Things from happening.
+        volatile tobj.m_lock = false;
+        tobj.m_curr.tstack = tobj.m_curr.bstack;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/common/ldc.mak	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,139 @@
+# Makefile to build the D runtime library core components for Posix
+# Designed to work with GNU make
+# Targets:
+#	make
+#		Same as make all
+#	make lib
+#		Build the common library
+#   make doc
+#       Generate documentation
+#	make clean
+#		Delete unneeded files created by build process
+
+LIB_TARGET=libdruntime-core.a
+LIB_MASK=libdruntime-core*.a
+
+CP=cp -f
+RM=rm -f
+MD=mkdir -p
+
+ADD_CFLAGS=
+ADD_DFLAGS=
+
+CFLAGS=-O $(ADD_CFLAGS)
+#CFLAGS=-g $(ADD_CFLAGS)
+
+DFLAGS=-release -O -inline -w $(ADD_DFLAGS)
+#DFLAGS=-g -w $(ADD_DFLAGS)
+
+TFLAGS=-O -inline -w $(ADD_DFLAGS)
+#TFLAGS=-g -w $(ADD_DFLAGS)
+
+DOCFLAGS=-version=DDoc
+
+CC=gcc
+LC=$(AR) -qsv
+DC=ldc2
+
+INC_DEST=../../import
+LIB_DEST=../../lib
+DOC_DEST=../../doc
+
+.SUFFIXES: .s .S .c .cpp .d .html .o
+
+.s.o:
+	$(CC) -c $(CFLAGS) $< -o$@
+
+.S.o:
+	$(CC) -c $(CFLAGS) $< -o$@
+
+.c.o:
+	$(CC) -c $(CFLAGS) $< -o$@
+
+.cpp.o:
+	g++ -c $(CFLAGS) $< -o$@
+
+.d.o:
+	$(DC) -c $(DFLAGS) -Hf$*.di $< -of$@
+#	$(DC) -c $(DFLAGS) $< -of$@
+
+.d.html:
+	$(DC) -c -o- $(DOCFLAGS) -Df$*.html $<
+
+targets : lib doc
+all     : lib doc
+core    : lib
+lib     : core.lib
+doc     : core.doc
+
+######################################################
+
+OBJ_CORE= \
+    core/bitmanip.o \
+    core/exception.o \
+    core/memory_.o \
+    core/runtime.o \
+    core/thread.o
+
+OBJ_STDC= \
+    stdc/errno.o
+
+ALL_OBJS= \
+    $(OBJ_CORE) \
+    $(OBJ_STDC)
+
+######################################################
+
+DOC_CORE= \
+    core/bitmanip.html \
+    core/exception.html \
+    core/memory.html \
+    core/runtime.html \
+    core/thread.html
+
+
+ALL_DOCS=
+
+######################################################
+
+core.lib : $(LIB_TARGET)
+
+$(LIB_TARGET) : $(ALL_OBJS)
+	$(RM) $@
+	$(LC) $@ $(ALL_OBJS)
+
+core.doc : $(ALL_DOCS)
+	echo Documentation generated.
+
+######################################################
+
+### bitmanip
+
+core/bitmanip.o : core/bitmanip.d
+	$(DC) -c $(DFLAGS) core/bitmanip.d -of$@
+
+### memory
+
+core/memory_.o : core/memory.d
+	$(DC) -c $(DFLAGS) -Hf$*.di $< -of$@
+
+### thread
+
+core/thread.o : core/thread.d
+	$(DC) -c $(DFLAGS) -d -Hf$*.di core/thread.d -of$@
+
+######################################################
+
+clean :
+	find . -name "*.di" | xargs $(RM)
+	$(RM) $(ALL_OBJS)
+	$(RM) $(ALL_DOCS)
+	find . -name "$(LIB_MASK)" | xargs $(RM)
+
+install :
+	$(MD) $(INC_DEST)
+	find . -name "*.di" -exec cp -f {} $(INC_DEST)/{} \;
+	$(MD) $(DOC_DEST)
+	find . -name "*.html" -exec cp -f {} $(DOC_DEST)/{} \;
+	$(MD) $(LIB_DEST)
+	find . -name "$(LIB_MASK)" -exec cp -f {} $(LIB_DEST)/{} \;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/common/posix.mak	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,139 @@
+# Makefile to build the D runtime library core components for Posix
+# Designed to work with GNU make
+# Targets:
+#	make
+#		Same as make all
+#	make lib
+#		Build the common library
+#   make doc
+#       Generate documentation
+#	make clean
+#		Delete unneeded files created by build process
+
+LIB_TARGET=libdruntime-core.a
+LIB_MASK=libdruntime-core*.a
+
+CP=cp -f
+RM=rm -f
+MD=mkdir -p
+
+ADD_CFLAGS=
+ADD_DFLAGS=
+
+CFLAGS=-O $(ADD_CFLAGS)
+#CFLAGS=-g $(ADD_CFLAGS)
+
+DFLAGS=-release -O -inline -w -nofloat $(ADD_DFLAGS)
+#DFLAGS=-g -w -nofloat $(ADD_DFLAGS)
+
+TFLAGS=-O -inline -w -nofloat $(ADD_DFLAGS)
+#TFLAGS=-g -w -nofloat $(ADD_DFLAGS)
+
+DOCFLAGS=-version=DDoc
+
+CC=gcc
+LC=$(AR) -qsv
+DC=dmd
+
+INC_DEST=../../import
+LIB_DEST=../../lib
+DOC_DEST=../../doc
+
+.SUFFIXES: .s .S .c .cpp .d .html .o
+
+.s.o:
+	$(CC) -c $(CFLAGS) $< -o$@
+
+.S.o:
+	$(CC) -c $(CFLAGS) $< -o$@
+
+.c.o:
+	$(CC) -c $(CFLAGS) $< -o$@
+
+.cpp.o:
+	g++ -c $(CFLAGS) $< -o$@
+
+.d.o:
+	$(DC) -c $(DFLAGS) -Hf$*.di $< -of$@
+#	$(DC) -c $(DFLAGS) $< -of$@
+
+.d.html:
+	$(DC) -c -o- $(DOCFLAGS) -Df$*.html $<
+
+targets : lib doc
+all     : lib doc
+core    : lib
+lib     : core.lib
+doc     : core.doc
+
+######################################################
+
+OBJ_CORE= \
+    core/bitmanip.o \
+    core/exception.o \
+    core/memory_.o \
+    core/runtime.o \
+    core/thread.o
+
+OBJ_STDC= \
+    stdc/errno.o
+
+ALL_OBJS= \
+    $(OBJ_CORE) \
+    $(OBJ_STDC)
+
+######################################################
+
+DOC_CORE= \
+    core/bitmanip.html \
+    core/exception.html \
+    core/memory.html \
+    core/runtime.html \
+    core/thread.html
+
+
+ALL_DOCS=
+
+######################################################
+
+core.lib : $(LIB_TARGET)
+
+$(LIB_TARGET) : $(ALL_OBJS)
+	$(RM) $@
+	$(LC) $@ $(ALL_OBJS)
+
+core.doc : $(ALL_DOCS)
+	echo Documentation generated.
+
+######################################################
+
+### bitmanip
+
+core/bitmanip.o : core/bitmanip.d
+	$(DC) -c $(DFLAGS) core/bitmanip.d -of$@
+
+### memory
+
+core/memory_.o : core/memory.d
+	$(DC) -c $(DFLAGS) -Hf$*.di $< -of$@
+
+### thread
+
+core/thread.o : core/thread.d
+	$(DC) -c $(DFLAGS) -d -Hf$*.di core/thread.d -of$@
+
+######################################################
+
+clean :
+	find . -name "*.di" | xargs $(RM)
+	$(RM) $(ALL_OBJS)
+	$(RM) $(ALL_DOCS)
+	find . -name "$(LIB_MASK)" | xargs $(RM)
+
+install :
+	$(MD) $(INC_DEST)
+	find . -name "*.di" -exec cp -f {} $(INC_DEST)/{} \;
+	$(MD) $(DOC_DEST)
+	find . -name "*.html" -exec cp -f {} $(DOC_DEST)/{} \;
+	$(MD) $(LIB_DEST)
+	find . -name "$(LIB_MASK)" -exec cp -f {} $(LIB_DEST)/{} \;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/common/stdc/errno.c	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,21 @@
+/**
+ * This file contains wrapper functions for macro-defined C rouines.
+ *
+ * Copyright: Copyright (c) 2005-2008, The D Runtime Project
+ * License:   BSD Style, see LICENSE
+ * Authors:   Sean Kelly
+ */
+#include <errno.h>
+
+
+int getErrno()
+{
+    return errno;
+}
+
+
+int setErrno( int val )
+{
+    errno = val;
+    return val;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/common/win32.mak	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,130 @@
+# Makefile to build the D runtime library core components for Win32
+# Designed to work with DigitalMars make
+# Targets:
+#	make
+#		Same as make all
+#	make lib
+#		Build the common library
+#   make doc
+#       Generate documentation
+#	make clean
+#		Delete unneeded files created by build process
+
+LIB_TARGET=druntime-core.lib
+LIB_MASK=druntime-core*.lib
+
+CP=xcopy /y
+RM=del /f
+MD=mkdir
+
+ADD_CFLAGS=
+ADD_DFLAGS=
+
+CFLAGS=-mn -6 -r $(ADD_CFLAGS)
+#CFLAGS=-g -mn -6 -r $(ADD_CFLAGS)
+
+DFLAGS=-release -O -inline -w -nofloat $(ADD_DFLAGS)
+#DFLAGS=-g -w -nofloat $(ADD_DFLAGS)
+
+TFLAGS=-O -inline -w  -nofloat $(ADD_DFLAGS)
+#TFLAGS=-g -w -nofloat $(ADD_DFLAGS)
+
+DOCFLAGS=-version=DDoc
+
+CC=dmc
+LC=lib
+DC=dmd
+
+INC_DEST=..\..\import
+LIB_DEST=..\..\lib
+DOC_DEST=..\..\doc
+
+.DEFAULT: .asm .c .cpp .d .html .obj
+
+.asm.obj:
+	$(CC) -c $<
+
+.c.obj:
+	$(CC) -c $(CFLAGS) $< -o$@
+
+.cpp.obj:
+	$(CC) -c $(CFLAGS) $< -o$@
+
+.d.obj:
+	$(DC) -c $(DFLAGS) -Hf$*.di $< -of$@
+#	$(DC) -c $(DFLAGS) $< -of$@
+
+.d.html:
+	$(DC) -c -o- $(DOCFLAGS) -Df$*.html $<
+
+targets : lib doc
+all     : lib doc
+core    : lib
+lib     : core.lib
+doc     : core.doc
+
+######################################################
+
+OBJ_CORE= \
+    core\bitmanip.obj \
+    core\exception.obj \
+    core\memory.obj \
+    core\runtime.obj \
+    core\thread.obj
+
+OBJ_STDC= \
+    stdc\errno.obj
+
+ALL_OBJS= \
+    $(OBJ_CORE) \
+    $(OBJ_STDC)
+
+######################################################
+
+DOC_CORE= \
+    core\bitmanip.html \
+    core\exception.html \
+    core\memory.html \
+    core\runtime.html \
+    core\thread.html
+
+ALL_DOCS=
+
+######################################################
+
+core.lib : $(LIB_TARGET)
+
+$(LIB_TARGET) : $(ALL_OBJS)
+	$(RM) $@
+	$(LC) -c -n $@ $(ALL_OBJS)
+
+core.doc : $(ALL_DOCS)
+	@echo Documentation generated.
+
+######################################################
+
+### bitmanip
+
+core\bitmanip.obj : core\bitmanip.d
+	$(DC) -c $(DFLAGS) core\bitmanip.d -of$@
+
+### thread
+
+core\thread.obj : core\thread.d
+	$(DC) -c $(DFLAGS) -d -Hf$*.di core\thread.d -of$@
+
+######################################################
+
+clean :
+	$(RM) /s .\*.di
+	$(RM) $(ALL_OBJS)
+	$(RM) $(ALL_DOCS)
+	$(RM) $(LIB_MASK)
+
+install :
+	$(MD) $(INC_DEST)\.
+	$(CP) /s *.di $(INC_DEST)\.
+	$(MD) $(DOC_DEST)
+	$(CP) /s *.html $(DOC_DEST)\.
+	$(MD) $(LIB_DEST)
+	$(CP) $(LIB_MASK) $(LIB_DEST)\.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/compiler/dmd/aApply.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,408 @@
+/**
+ * Part of the D programming language runtime library.
+ */
+
+/*
+ *  Copyright (C) 2004-2006 by Digital Mars, www.digitalmars.com
+ *  Written by Walter Bright
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty. In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, in both source and binary form, subject to the following
+ *  restrictions:
+ *
+ *  o  The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  o  Altered source versions must be plainly marked as such, and must not
+ *     be misrepresented as being the original software.
+ *  o  This notice may not be removed or altered from any source
+ *     distribution.
+ */
+
+/*
+ *  Modified by Sean Kelly for use with the D Runtime Project
+ */
+
+module rt.aApply;
+
+/* This code handles decoding UTF strings for foreach loops.
+ * There are 6 combinations of conversions between char, wchar,
+ * and dchar, and 2 of each of those.
+ */
+
+private import util.utf;
+
+/**********************************************
+ */
+
+// dg is D, but _aApplycd() is C
+extern (D) typedef int delegate(void *) dg_t;
+
+extern (C) int _aApplycd1(char[] aa, dg_t dg)
+{   int result;
+    size_t i;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplycd1(), len = %d\n", len);
+    for (i = 0; i < len; )
+    {   dchar d;
+
+        d = aa[i];
+        if (d & 0x80)
+            d = decode(aa, i);
+        else
+            i++;
+        result = dg(cast(void *)&d);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplywd1(wchar[] aa, dg_t dg)
+{   int result;
+    size_t i;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplywd1(), len = %d\n", len);
+    for (i = 0; i < len; )
+    {   dchar d;
+
+        d = aa[i];
+        if (d & ~0x7F)
+            d = decode(aa, i);
+        else
+            i++;
+        result = dg(cast(void *)&d);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplycw1(char[] aa, dg_t dg)
+{   int result;
+    size_t i;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplycw1(), len = %d\n", len);
+    for (i = 0; i < len; )
+    {   dchar d;
+        wchar w;
+
+        w = aa[i];
+        if (w & 0x80)
+        {   d = decode(aa, i);
+            if (d <= 0xFFFF)
+                w = cast(wchar) d;
+            else
+            {
+                w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
+                result = dg(cast(void *)&w);
+                if (result)
+                    break;
+                w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00);
+            }
+        }
+        else
+            i++;
+        result = dg(cast(void *)&w);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplywc1(wchar[] aa, dg_t dg)
+{   int result;
+    size_t i;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplywc1(), len = %d\n", len);
+    for (i = 0; i < len; )
+    {   dchar d;
+        wchar w;
+        char c;
+
+        w = aa[i];
+        if (w & ~0x7F)
+        {
+            char[4] buf;
+
+            d = decode(aa, i);
+            auto b = toUTF8(buf, d);
+            foreach (char c2; b)
+            {
+                result = dg(cast(void *)&c2);
+                if (result)
+                    return result;
+            }
+            continue;
+        }
+        else
+        {   c = cast(char)w;
+            i++;
+        }
+        result = dg(cast(void *)&c);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplydc1(dchar[] aa, dg_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplydc1(), len = %d\n", aa.length);
+    foreach (dchar d; aa)
+    {
+        char c;
+
+        if (d & ~0x7F)
+        {
+            char[4] buf;
+
+            auto b = toUTF8(buf, d);
+            foreach (char c2; b)
+            {
+                result = dg(cast(void *)&c2);
+                if (result)
+                    return result;
+            }
+            continue;
+        }
+        else
+        {
+            c = cast(char)d;
+        }
+        result = dg(cast(void *)&c);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplydw1(dchar[] aa, dg_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplydw1(), len = %d\n", aa.length);
+    foreach (dchar d; aa)
+    {
+        wchar w;
+
+        if (d <= 0xFFFF)
+            w = cast(wchar) d;
+        else
+        {
+            w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
+            result = dg(cast(void *)&w);
+            if (result)
+                break;
+            w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00);
+        }
+        result = dg(cast(void *)&w);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+
+/****************************************************************************/
+
+// dg is D, but _aApplycd2() is C
+extern (D) typedef int delegate(void *, void *) dg2_t;
+
+extern (C) int _aApplycd2(char[] aa, dg2_t dg)
+{   int result;
+    size_t i;
+    size_t n;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplycd2(), len = %d\n", len);
+    for (i = 0; i < len; i += n)
+    {   dchar d;
+
+        d = aa[i];
+        if (d & 0x80)
+        {
+            n = i;
+            d = decode(aa, n);
+            n -= i;
+        }
+        else
+            n = 1;
+        result = dg(&i, cast(void *)&d);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplywd2(wchar[] aa, dg2_t dg)
+{   int result;
+    size_t i;
+    size_t n;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplywd2(), len = %d\n", len);
+    for (i = 0; i < len; i += n)
+    {   dchar d;
+
+        d = aa[i];
+        if (d & ~0x7F)
+        {
+            n = i;
+            d = decode(aa, n);
+            n -= i;
+        }
+        else
+            n = 1;
+        result = dg(&i, cast(void *)&d);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplycw2(char[] aa, dg2_t dg)
+{   int result;
+    size_t i;
+    size_t n;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplycw2(), len = %d\n", len);
+    for (i = 0; i < len; i += n)
+    {   dchar d;
+        wchar w;
+
+        w = aa[i];
+        if (w & 0x80)
+        {   n = i;
+            d = decode(aa, n);
+            n -= i;
+            if (d <= 0xFFFF)
+                w = cast(wchar) d;
+            else
+            {
+                w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
+                result = dg(&i, cast(void *)&w);
+                if (result)
+                    break;
+                w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
+            }
+        }
+        else
+            n = 1;
+        result = dg(&i, cast(void *)&w);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplywc2(wchar[] aa, dg2_t dg)
+{   int result;
+    size_t i;
+    size_t n;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplywc2(), len = %d\n", len);
+    for (i = 0; i < len; i += n)
+    {   dchar d;
+        wchar w;
+        char c;
+
+        w = aa[i];
+        if (w & ~0x7F)
+        {
+            char[4] buf;
+
+            n = i;
+            d = decode(aa, n);
+            n -= i;
+            auto b = toUTF8(buf, d);
+            foreach (char c2; b)
+            {
+                result = dg(&i, cast(void *)&c2);
+                if (result)
+                    return result;
+            }
+            continue;
+        }
+        else
+        {   c = cast(char)w;
+            n = 1;
+        }
+        result = dg(&i, cast(void *)&c);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplydc2(dchar[] aa, dg2_t dg)
+{   int result;
+    size_t i;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplydc2(), len = %d\n", len);
+    for (i = 0; i < len; i++)
+    {   dchar d;
+        char c;
+
+        d = aa[i];
+        if (d & ~0x7F)
+        {
+            char[4] buf;
+
+            auto b = toUTF8(buf, d);
+            foreach (char c2; b)
+            {
+                result = dg(&i, cast(void *)&c2);
+                if (result)
+                    return result;
+            }
+            continue;
+        }
+        else
+        {   c = cast(char)d;
+        }
+        result = dg(&i, cast(void *)&c);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+extern (C) int _aApplydw2(dchar[] aa, dg2_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplydw2(), len = %d\n", aa.length);
+    foreach (size_t i, dchar d; aa)
+    {
+        wchar w;
+        auto j = i;
+
+        if (d <= 0xFFFF)
+            w = cast(wchar) d;
+        else
+        {
+            w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
+            result = dg(&j, cast(void *)&w);
+            if (result)
+                break;
+            w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
+        }
+        result = dg(&j, cast(void *)&w);
+        if (result)
+            break;
+    }
+    return result;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/compiler/dmd/aApplyR.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,974 @@
+/**
+ * Part of the D programming language runtime library.
+ */
+
+/*
+ *  Copyright (C) 2004-2006 by Digital Mars, www.digitalmars.com
+ *  Written by Walter Bright
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty. In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, in both source and binary form, subject to the following
+ *  restrictions:
+ *
+ *  o  The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  o  Altered source versions must be plainly marked as such, and must not
+ *     be misrepresented as being the original software.
+ *  o  This notice may not be removed or altered from any source
+ *     distribution.
+ */
+
+/*
+ *  Modified by Sean Kelly for use with the D Runtime Project
+ */
+
+module rt.aApplyR;
+
+/* This code handles decoding UTF strings for foreach_reverse loops.
+ * There are 6 combinations of conversions between char, wchar,
+ * and dchar, and 2 of each of those.
+ */
+
+private import util.utf;
+
+/**********************************************/
+/* 1 argument versions */
+
+// dg is D, but _aApplyRcd() is C
+extern (D) typedef int delegate(void *) dg_t;
+
+extern (C) int _aApplyRcd1(in char[] aa, dg_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d;
+
+        i--;
+        d = aa[i];
+        if (d & 0x80)
+        {   char c = cast(char)d;
+            uint j;
+            uint m = 0x3F;
+            d = 0;
+            while ((c & 0xC0) != 0xC0)
+            {   if (i == 0)
+                    onUnicodeError("Invalid UTF-8 sequence", 0);
+                i--;
+                d |= (c & 0x3F) << j;
+                j += 6;
+                m >>= 1;
+                c = aa[i];
+            }
+            d |= (c & m) << j;
+        }
+        result = dg(cast(void *)&d);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRcd1.unittest\n");
+
+    auto s = "hello"c;
+    int i;
+
+    foreach_reverse(dchar d; s)
+    {
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(dchar d; s)
+    {
+        //printf("i = %d, d = %x\n", i, d);
+        switch (i)
+        {
+            case 0:     assert(d == 'b'); break;
+            case 1:     assert(d == '\U00100456'); break;
+            case 2:     assert(d == '\u1234'); break;
+            case 3:     assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 4);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d;
+
+        i--;
+        d = aa[i];
+        if (d >= 0xDC00 && d <= 0xDFFF)
+        {   if (i == 0)
+                onUnicodeError("Invalid UTF-16 sequence", 0);
+            i--;
+            d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
+        }
+        result = dg(cast(void *)&d);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRwd1.unittest\n");
+
+    auto s = "hello"w;
+    int i;
+
+    foreach_reverse(dchar d; s)
+    {
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(dchar d; s)
+    {
+        //printf("i = %d, d = %x\n", i, d);
+        switch (i)
+        {
+            case 0:     assert(d == 'b'); break;
+            case 1:     assert(d == '\U00100456'); break;
+            case 2:     assert(d == '\u1234'); break;
+            case 3:     assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 4);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRcw1(in char[] aa, dg_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d;
+        wchar w;
+
+        i--;
+        w = aa[i];
+        if (w & 0x80)
+        {   char c = cast(char)w;
+            uint j;
+            uint m = 0x3F;
+            d = 0;
+            while ((c & 0xC0) != 0xC0)
+            {   if (i == 0)
+                    onUnicodeError("Invalid UTF-8 sequence", 0);
+                i--;
+                d |= (c & 0x3F) << j;
+                j += 6;
+                m >>= 1;
+                c = aa[i];
+            }
+            d |= (c & m) << j;
+
+            if (d <= 0xFFFF)
+                w = cast(wchar) d;
+            else
+            {
+                w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
+                result = dg(cast(void *)&w);
+                if (result)
+                    break;
+                w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
+            }
+        }
+        result = dg(cast(void *)&w);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRcw1.unittest\n");
+
+    auto s = "hello"c;
+    int i;
+
+    foreach_reverse(wchar d; s)
+    {
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(wchar d; s)
+    {
+        //printf("i = %d, d = %x\n", i, d);
+        switch (i)
+        {
+            case 0:     assert(d == 'b'); break;
+            case 1:     assert(d == 0xDBC1); break;
+            case 2:     assert(d == 0xDC56); break;
+            case 3:     assert(d == 0x1234); break;
+            case 4:     assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d;
+        char c;
+
+        i--;
+        d = aa[i];
+        if (d >= 0xDC00 && d <= 0xDFFF)
+        {   if (i == 0)
+                onUnicodeError("Invalid UTF-16 sequence", 0);
+            i--;
+            d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
+        }
+
+        if (d & ~0x7F)
+        {
+            char[4] buf;
+
+            auto b = toUTF8(buf, d);
+            foreach (char c2; b)
+            {
+                result = dg(cast(void *)&c2);
+                if (result)
+                    return result;
+            }
+            continue;
+        }
+        c = cast(char)d;
+        result = dg(cast(void *)&c);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRwc1.unittest\n");
+
+    auto s = "hello"w;
+    int i;
+
+    foreach_reverse(char d; s)
+    {
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(char d; s)
+    {
+        //printf("i = %d, d = %x\n", i, d);
+        switch (i)
+        {
+            case 0:     assert(d == 'b'); break;
+            case 1:     assert(d == 0xF4); break;
+            case 2:     assert(d == 0x80); break;
+            case 3:     assert(d == 0x91); break;
+            case 4:     assert(d == 0x96); break;
+            case 5:     assert(d == 0xE1); break;
+            case 6:     assert(d == 0x88); break;
+            case 7:     assert(d == 0xB4); break;
+            case 8:     assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 9);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0;)
+    {   dchar d = aa[--i];
+        char c;
+
+        if (d & ~0x7F)
+        {
+            char[4] buf;
+
+            auto b = toUTF8(buf, d);
+            foreach (char c2; b)
+            {
+                result = dg(cast(void *)&c2);
+                if (result)
+                    return result;
+            }
+            continue;
+        }
+        else
+        {
+            c = cast(char)d;
+        }
+        result = dg(cast(void *)&c);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRdc1.unittest\n");
+
+    auto s = "hello"d;
+    int i;
+
+    foreach_reverse(char d; s)
+    {
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(char d; s)
+    {
+        //printf("i = %d, d = %x\n", i, d);
+        switch (i)
+        {
+            case 0:     assert(d == 'b'); break;
+            case 1:     assert(d == 0xF4); break;
+            case 2:     assert(d == 0x80); break;
+            case 3:     assert(d == 0x91); break;
+            case 4:     assert(d == 0x96); break;
+            case 5:     assert(d == 0xE1); break;
+            case 6:     assert(d == 0x88); break;
+            case 7:     assert(d == 0xB4); break;
+            case 8:     assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 9);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d = aa[--i];
+        wchar w;
+
+        if (d <= 0xFFFF)
+            w = cast(wchar) d;
+        else
+        {
+            w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
+            result = dg(cast(void *)&w);
+            if (result)
+                break;
+            w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
+        }
+        result = dg(cast(void *)&w);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRdw1.unittest\n");
+
+    auto s = "hello"d;
+    int i;
+
+    foreach_reverse(wchar d; s)
+    {
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(wchar d; s)
+    {
+        //printf("i = %d, d = %x\n", i, d);
+        switch (i)
+        {
+            case 0:     assert(d == 'b'); break;
+            case 1:     assert(d == 0xDBC1); break;
+            case 2:     assert(d == 0xDC56); break;
+            case 3:     assert(d == 0x1234); break;
+            case 4:     assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+}
+
+
+/****************************************************************************/
+/* 2 argument versions */
+
+// dg is D, but _aApplyRcd2() is C
+extern (D) typedef int delegate(void *, void *) dg2_t;
+
+extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg)
+{   int result;
+    size_t i;
+    size_t len = aa.length;
+
+    debug(apply) printf("_aApplyRcd2(), len = %d\n", len);
+    for (i = len; i != 0; )
+    {   dchar d;
+
+        i--;
+        d = aa[i];
+        if (d & 0x80)
+        {   char c = cast(char)d;
+            uint j;
+            uint m = 0x3F;
+            d = 0;
+            while ((c & 0xC0) != 0xC0)
+            {   if (i == 0)
+                    onUnicodeError("Invalid UTF-8 sequence", 0);
+                i--;
+                d |= (c & 0x3F) << j;
+                j += 6;
+                m >>= 1;
+                c = aa[i];
+            }
+            d |= (c & m) << j;
+        }
+        result = dg(&i, cast(void *)&d);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRcd2.unittest\n");
+
+    auto s = "hello"c;
+    int i;
+
+    foreach_reverse(k, dchar d; s)
+    {
+        assert(k == 4 - i);
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(k, dchar d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        switch (i)
+        {
+            case 0:     assert(d == 'b'); assert(k == 8); break;
+            case 1:     assert(d == '\U00100456'); assert(k == 4); break;
+            case 2:     assert(d == '\u1234'); assert(k == 1); break;
+            case 3:     assert(d == 'a'); assert(k == 0); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 4);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d;
+
+        i--;
+        d = aa[i];
+        if (d >= 0xDC00 && d <= 0xDFFF)
+        {   if (i == 0)
+                onUnicodeError("Invalid UTF-16 sequence", 0);
+            i--;
+            d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
+        }
+        result = dg(&i, cast(void *)&d);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRwd2.unittest\n");
+
+    auto s = "hello"w;
+    int i;
+
+    foreach_reverse(k, dchar d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        assert(k == 4 - i);
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(k, dchar d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        switch (i)
+        {
+            case 0:     assert(k == 4); assert(d == 'b'); break;
+            case 1:     assert(k == 2); assert(d == '\U00100456'); break;
+            case 2:     assert(k == 1); assert(d == '\u1234'); break;
+            case 3:     assert(k == 0); assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 4);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d;
+        wchar w;
+
+        i--;
+        w = aa[i];
+        if (w & 0x80)
+        {   char c = cast(char)w;
+            uint j;
+            uint m = 0x3F;
+            d = 0;
+            while ((c & 0xC0) != 0xC0)
+            {   if (i == 0)
+                    onUnicodeError("Invalid UTF-8 sequence", 0);
+                i--;
+                d |= (c & 0x3F) << j;
+                j += 6;
+                m >>= 1;
+                c = aa[i];
+            }
+            d |= (c & m) << j;
+
+            if (d <= 0xFFFF)
+                w = cast(wchar) d;
+            else
+            {
+                w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
+                result = dg(&i, cast(void *)&w);
+                if (result)
+                    break;
+                w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
+            }
+        }
+        result = dg(&i, cast(void *)&w);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRcw2.unittest\n");
+
+    auto s = "hello"c;
+    int i;
+
+    foreach_reverse(k, wchar d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        assert(k == 4 - i);
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(k, wchar d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        switch (i)
+        {
+            case 0:     assert(k == 8); assert(d == 'b'); break;
+            case 1:     assert(k == 4); assert(d == 0xDBC1); break;
+            case 2:     assert(k == 4); assert(d == 0xDC56); break;
+            case 3:     assert(k == 1); assert(d == 0x1234); break;
+            case 4:     assert(k == 0); assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d;
+        char c;
+
+        i--;
+        d = aa[i];
+        if (d >= 0xDC00 && d <= 0xDFFF)
+        {   if (i == 0)
+                onUnicodeError("Invalid UTF-16 sequence", 0);
+            i--;
+            d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
+        }
+
+        if (d & ~0x7F)
+        {
+            char[4] buf;
+
+            auto b = toUTF8(buf, d);
+            foreach (char c2; b)
+            {
+                result = dg(&i, cast(void *)&c2);
+                if (result)
+                    return result;
+            }
+            continue;
+        }
+        c = cast(char)d;
+        result = dg(&i, cast(void *)&c);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRwc2.unittest\n");
+
+    auto s = "hello"w;
+    int i;
+
+    foreach_reverse(k, char d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        assert(k == 4 - i);
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(k, char d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        switch (i)
+        {
+            case 0:     assert(k == 4); assert(d == 'b'); break;
+            case 1:     assert(k == 2); assert(d == 0xF4); break;
+            case 2:     assert(k == 2); assert(d == 0x80); break;
+            case 3:     assert(k == 2); assert(d == 0x91); break;
+            case 4:     assert(k == 2); assert(d == 0x96); break;
+            case 5:     assert(k == 1); assert(d == 0xE1); break;
+            case 6:     assert(k == 1); assert(d == 0x88); break;
+            case 7:     assert(k == 1); assert(d == 0xB4); break;
+            case 8:     assert(k == 0); assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 9);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d = aa[--i];
+        char c;
+
+        if (d & ~0x7F)
+        {
+            char[4] buf;
+
+            auto b = toUTF8(buf, d);
+            foreach (char c2; b)
+            {
+                result = dg(&i, cast(void *)&c2);
+                if (result)
+                    return result;
+            }
+            continue;
+        }
+        else
+        {   c = cast(char)d;
+        }
+        result = dg(&i, cast(void *)&c);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRdc2.unittest\n");
+
+    auto s = "hello"d;
+    int i;
+
+    foreach_reverse(k, char d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        assert(k == 4 - i);
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(k, char d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        switch (i)
+        {
+            case 0:     assert(k == 3); assert(d == 'b'); break;
+            case 1:     assert(k == 2); assert(d == 0xF4); break;
+            case 2:     assert(k == 2); assert(d == 0x80); break;
+            case 3:     assert(k == 2); assert(d == 0x91); break;
+            case 4:     assert(k == 2); assert(d == 0x96); break;
+            case 5:     assert(k == 1); assert(d == 0xE1); break;
+            case 6:     assert(k == 1); assert(d == 0x88); break;
+            case 7:     assert(k == 1); assert(d == 0xB4); break;
+            case 8:     assert(k == 0); assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 9);
+}
+
+/*****************************/
+
+extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg)
+{   int result;
+
+    debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length);
+    for (size_t i = aa.length; i != 0; )
+    {   dchar d = aa[--i];
+        wchar w;
+
+        if (d <= 0xFFFF)
+            w = cast(wchar) d;
+        else
+        {
+            w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
+            result = dg(&i, cast(void *)&w);
+            if (result)
+                break;
+            w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
+        }
+        result = dg(&i, cast(void *)&w);
+        if (result)
+            break;
+    }
+    return result;
+}
+
+unittest
+{
+    debug(apply) printf("_aApplyRdw2.unittest\n");
+
+    auto s = "hello"d;
+    int i;
+
+    foreach_reverse(k, wchar d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        assert(k == 4 - i);
+        switch (i)
+        {
+            case 0:     assert(d == 'o'); break;
+            case 1:     assert(d == 'l'); break;
+            case 2:     assert(d == 'l'); break;
+            case 3:     assert(d == 'e'); break;
+            case 4:     assert(d == 'h'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+
+    s = "a\u1234\U00100456b";
+    i = 0;
+    foreach_reverse(k, wchar d; s)
+    {
+        //printf("i = %d, k = %d, d = %x\n", i, k, d);
+        switch (i)
+        {
+            case 0:     assert(k == 3); assert(d == 'b'); break;
+            case 1:     assert(k == 2); assert(d == 0xDBC1); break;
+            case 2:     assert(k == 2); assert(d == 0xDC56); break;
+            case 3:     assert(k == 1); assert(d == 0x1234); break;
+            case 4:     assert(k == 0); assert(d == 'a'); break;
+            default:    assert(0);
+        }
+        i++;
+    }
+    assert(i == 5);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/compiler/dmd/aaA.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,886 @@
+//_ aaA.d
+
+/**
+ * Part of the D programming language runtime library.
+ * Implementation of associative arrays.
+ */
+
+/*
+ *  Copyright (C) 2000-2008 by Digital Mars, http://www.digitalmars.com
+ *  Written by Walter Bright
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty. In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  o  The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  o  Altered source versions must be plainly marked as such, and must not
+ *     be misrepresented as being the original software.
+ *  o  This notice may not be removed or altered from any source
+ *     distribution.
+ */
+
+/*
+ *  Modified by Sean Kelly for use with the D Runtime Project
+ */
+
+module rt.aaA;
+
+private
+{
+    import stdc.stdarg;
+    import stdc.string;
+
+    enum BlkAttr : uint
+    {
+        FINALIZE = 0b0000_0001,
+        NO_SCAN  = 0b0000_0010,
+        NO_MOVE  = 0b0000_0100,
+        ALL_BITS = 0b1111_1111
+    }
+
+    extern (C) void* gc_malloc( size_t sz, uint ba = 0 );
+    extern (C) void* gc_calloc( size_t sz, uint ba = 0 );
+    extern (C) void  gc_free( void* p );
+}
+
+// Auto-rehash and pre-allocate - Dave Fladebo
+
+static size_t[] prime_list = [
+              97UL,            389UL,
+           1_543UL,          6_151UL,
+          24_593UL,         98_317UL,
+          393_241UL,      1_572_869UL,
+        6_291_469UL,     25_165_843UL,
+      100_663_319UL,    402_653_189UL,
+    1_610_612_741UL,  4_294_967_291UL,
+//  8_589_934_513UL, 17_179_869_143UL
+];
+
+/* This is the type of the return value for dynamic arrays.
+ * It should be a type that is returned in registers.
+ * Although DMD will return types of Array in registers,
+ * gcc will not, so we instead use a 'long'.
+ */
+alias long ArrayRet_t;
+
+struct Array
+{
+    size_t length;
+    void* ptr;
+}
+
+struct aaA
+{
+    aaA *left;
+    aaA *right;
+    hash_t hash;
+    /* key   */
+    /* value */
+}
+
+struct BB
+{
+    aaA*[] b;
+    size_t nodes;       // total number of aaA nodes
+    TypeInfo keyti;     // TODO: replace this with TypeInfo_AssociativeArray when available in _aaGet()
+}
+
+/* This is the type actually seen by the programmer, although
+ * it is completely opaque.
+ */
+
+struct AA
+{
+    BB* a;
+}
+
+/**********************************
+ * Align to next pointer boundary, so that
+ * GC won't be faced with misaligned pointers
+ * in value.
+ */
+
+size_t aligntsize(size_t tsize)
+{
+    // Is pointer alignment on the x64 4 bytes or 8?
+    return (tsize + size_t.sizeof - 1) & ~(size_t.sizeof - 1);
+}
+
+extern (C):
+
+/*************************************************
+ * Invariant for aa.
+ */
+
+/+
+void _aaInvAh(aaA*[] aa)
+{
+    for (size_t i = 0; i < aa.length; i++)
+    {
+        if (aa[i])
+            _aaInvAh_x(aa[i]);
+    }
+}
+
+private int _aaCmpAh_x(aaA *e1, aaA *e2)
+{   int c;
+
+    c = e1.hash - e2.hash;
+    if (c == 0)
+    {
+        c = e1.key.length - e2.key.length;
+        if (c == 0)
+            c = memcmp((char *)e1.key, (char *)e2.key, e1.key.length);
+    }
+    return c;
+}
+
+private void _aaInvAh_x(aaA *e)
+{
+    hash_t key_hash;
+    aaA *e1;
+    aaA *e2;
+
+    key_hash = getHash(e.key);
+    assert(key_hash == e.hash);
+
+    while (1)
+    {   int c;
+
+        e1 = e.left;
+        if (e1)
+        {
+            _aaInvAh_x(e1);             // ordinary recursion
+            do
+            {
+                c = _aaCmpAh_x(e1, e);
+                assert(c < 0);
+                e1 = e1.right;
+            } while (e1 != null);
+        }
+
+        e2 = e.right;
+        if (e2)
+        {
+            do
+            {
+                c = _aaCmpAh_x(e, e2);
+                assert(c < 0);
+                e2 = e2.left;
+            } while (e2 != null);
+            e = e.right;                // tail recursion
+        }
+        else
+            break;
+    }
+}
++/
+
+/****************************************************
+ * Determine number of entries in associative array.
+ */
+
+size_t _aaLen(AA aa)
+in
+{
+    //printf("_aaLen()+\n");
+    //_aaInv(aa);
+}
+out (result)
+{
+    size_t len = 0;
+
+    void _aaLen_x(aaA* ex)
+    {
+        auto e = ex;
+        len++;
+
+        while (1)
+        {
+            if (e.right)
+               _aaLen_x(e.right);
+            e = e.left;
+            if (!e)
+                break;
+            len++;
+        }
+    }
+
+    if (aa.a)
+    {
+        foreach (e; aa.a.b)
+        {
+            if (e)
+                _aaLen_x(e);
+        }
+    }
+    assert(len == result);
+
+    //printf("_aaLen()-\n");
+}
+body
+{
+    return aa.a ? aa.a.nodes : 0;
+}
+
+
+/*************************************************
+ * Get pointer to value in associative array indexed by key.
+ * Add entry for key if it is not already there.
+ */
+
+void* _aaGet(AA* aa, TypeInfo keyti, size_t valuesize, ...)
+in
+{
+    assert(aa);
+}
+out (result)
+{
+    assert(result);
+    assert(aa.a);
+    assert(aa.a.b.length);
+    //assert(_aaInAh(*aa.a, key));
+}
+body
+{
+    auto pkey = cast(void *)(&valuesize + 1);
+    size_t i;
+    aaA *e;
+    auto keysize = aligntsize(keyti.tsize());
+
+    if (!aa.a)
+        aa.a = new BB();
+    aa.a.keyti = keyti;
+
+    if (!aa.a.b.length)
+    {
+        alias aaA *pa;
+        auto len = prime_list[0];
+
+        aa.a.b = new pa[len];
+    }
+
+    auto key_hash = keyti.getHash(pkey);
+    //printf("hash = %d\n", key_hash);
+    i = key_hash % aa.a.b.length;
+    auto pe = &aa.a.b[i];
+    while ((e = *pe) !is null)
+    {
+        if (key_hash == e.hash)
+        {
+            auto c = keyti.compare(pkey, e + 1);
+            if (c == 0)
+                goto Lret;
+            pe = (c < 0) ? &e.left : &e.right;
+        }
+        else
+            pe = (key_hash < e.hash) ? &e.left : &e.right;
+    }
+
+    // Not found, create new elem
+    //printf("create new one\n");
+    size_t size = aaA.sizeof + keysize + valuesize;
+    e = cast(aaA *) gc_calloc(size);
+    memcpy(e + 1, pkey, keysize);
+    e.hash = key_hash;
+    *pe = e;
+
+    auto nodes = ++aa.a.nodes;
+    //printf("length = %d, nodes = %d\n", (*aa.a).length, nodes);
+    if (nodes > aa.a.b.length * 4)
+    {
+        _aaRehash(aa,keyti);
+    }
+
+Lret:
+    return cast(void *)(e + 1) + keysize;
+}
+
+
+/*************************************************
+ * Get pointer to value in associative array indexed by key.
+ * Returns null if it is not already there.
+ */
+
+void* _aaGetRvalue(AA aa, TypeInfo keyti, size_t valuesize, ...)
+{
+    //printf("_aaGetRvalue(valuesize = %u)\n", valuesize);
+    if (!aa.a)
+        return null;
+
+    auto pkey = cast(void *)(&valuesize + 1);
+    auto keysize = aligntsize(keyti.tsize());
+    auto len = aa.a.b.length;
+
+    if (len)
+    {
+        auto key_hash = keyti.getHash(pkey);
+        //printf("hash = %d\n", key_hash);
+        size_t i = key_hash % len;
+        auto e = aa.a.b[i];
+        while (e !is null)
+        {
+            if (key_hash == e.hash)
+            {
+                auto c = keyti.compare(pkey, e + 1);
+            if (c == 0)
+                return cast(void *)(e + 1) + keysize;
+                e = (c < 0) ? e.left : e.right;
+            }
+            else
+                e = (key_hash < e.hash) ? e.left : e.right;
+        }
+    }
+    return null;    // not found, caller will throw exception
+}
+
+
+/*************************************************
+ * Determine if key is in aa.
+ * Returns:
+ *      null    not in aa
+ *      !=null  in aa, return pointer to value
+ */
+
+void* _aaIn(AA aa, TypeInfo keyti, ...)
+in
+{
+}
+out (result)
+{
+    //assert(result == 0 || result == 1);
+}
+body
+{
+    if (aa.a)
+    {
+        auto pkey = cast(void *)(&keyti + 1);
+
+        //printf("_aaIn(), .length = %d, .ptr = %x\n", aa.a.length, cast(uint)aa.a.ptr);
+        auto len = aa.a.b.length;
+
+        if (len)
+        {
+            auto key_hash = keyti.getHash(pkey);
+            //printf("hash = %d\n", key_hash);
+            size_t i = key_hash % len;
+            auto e = aa.a.b[i];
+            while (e !is null)
+            {
+                if (key_hash == e.hash)
+                {
+                    auto c = keyti.compare(pkey, e + 1);
+                    if (c == 0)
+                        return cast(void *)(e + 1) + aligntsize(keyti.tsize());
+                    e = (c < 0) ? e.left : e.right;
+                }
+                else
+                    e = (key_hash < e.hash) ? e.left : e.right;
+            }
+        }
+    }
+
+    // Not found
+    return null;
+}
+
+/*************************************************
+ * Delete key entry in aa[].
+ * If key is not in aa[], do nothing.
+ */
+
+void _aaDel(AA aa, TypeInfo keyti, ...)
+{
+    auto pkey = cast(void *)(&keyti + 1);
+    aaA *e;
+
+    if (aa.a && aa.a.b.length)
+    {
+        auto key_hash = keyti.getHash(pkey);
+        //printf("hash = %d\n", key_hash);
+        size_t i = key_hash % aa.a.b.length;
+        auto pe = &aa.a.b[i];
+        while ((e = *pe) !is null) // null means not found
+        {
+            if (key_hash == e.hash)
+            {
+                auto c = keyti.compare(pkey, e + 1);
+                if (c == 0)
+                {
+                    if (!e.left && !e.right)
+                    {
+                        *pe = null;
+                    }
+                    else if (e.left && !e.right)
+                    {
+                        *pe = e.left;
+                         e.left = null;
+                    }
+                    else if (!e.left && e.right)
+                    {
+                        *pe = e.right;
+                         e.right = null;
+                    }
+                    else
+                    {
+                        *pe = e.left;
+                        e.left = null;
+                        do
+                            pe = &(*pe).right;
+                        while (*pe);
+                        *pe = e.right;
+                        e.right = null;
+                    }
+
+                    aa.a.nodes--;
+                    gc_free(e);
+                    break;
+                }
+                pe = (c < 0) ? &e.left : &e.right;
+            }
+            else
+                pe = (key_hash < e.hash) ? &e.left : &e.right;
+        }
+    }
+}
+
+
+/********************************************
+ * Produce array of values from aa.
+ */
+
+ArrayRet_t _aaValues(AA aa, size_t keysize, size_t valuesize)
+in
+{
+    assert(keysize == aligntsize(keysize));
+}
+body
+{
+    size_t resi;
+    Array a;
+
+    void _aaValues_x(aaA* e)
+    {
+        do
+        {
+            memcpy(a.ptr + resi * valuesize,
+                   cast(byte*)e + aaA.sizeof + keysize,
+                   valuesize);
+            resi++;
+            if (e.left)
+            {   if (!e.right)
+                {   e = e.left;
+                    continue;
+                }
+                _aaValues_x(e.left);
+            }
+            e = e.right;
+        } while (e !is null);
+    }
+
+    if (aa.a)
+    {
+        a.length = _aaLen(aa);
+        a.ptr = cast(byte*) gc_malloc(a.length * valuesize,
+                                      valuesize < (void*).sizeof ? BlkAttr.NO_SCAN : 0);
+        resi = 0;
+        foreach (e; aa.a.b)
+        {
+            if (e)
+                _aaValues_x(e);
+        }
+        assert(resi == a.length);
+    }
+    return *cast(ArrayRet_t*)(&a);
+}
+
+
+/********************************************
+ * Rehash an array.
+ */
+
+void* _aaRehash(AA* paa, TypeInfo keyti)
+in
+{
+    //_aaInvAh(paa);
+}
+out (result)
+{
+    //_aaInvAh(result);
+}
+body
+{
+    BB newb;
+
+    void _aaRehash_x(aaA* olde)
+    {
+        while (1)
+        {
+            auto left = olde.left;
+            auto right = olde.right;
+            olde.left = null;
+            olde.right = null;
+
+            aaA *e;
+
+            //printf("rehash %p\n", olde);
+            auto key_hash = olde.hash;
+            size_t i = key_hash % newb.b.length;
+            auto pe = &newb.b[i];
+            while ((e = *pe) !is null)
+            {
+                //printf("\te = %p, e.left = %p, e.right = %p\n", e, e.left, e.right);
+                assert(e.left != e);
+                assert(e.right != e);
+                if (key_hash == e.hash)
+                {
+                    auto c = keyti.compare(olde + 1, e + 1);
+                    assert(c != 0);
+                    pe = (c < 0) ? &e.left : &e.right;
+                }
+                else
+                    pe = (key_hash < e.hash) ? &e.left : &e.right;
+            }
+            *pe = olde;
+
+            if (right)
+            {
+                if (!left)
+                {   olde = right;
+                    continue;
+                }
+                _aaRehash_x(right);
+            }
+            if (!left)
+                break;
+            olde = left;
+        }
+    }
+
+    //printf("Rehash\n");
+    if (paa.a)
+    {
+        auto aa = paa.a;
+        auto len = _aaLen(*paa);
+        if (len)
+        {   size_t i;
+
+            for (i = 0; i < prime_list.length - 1; i++)
+            {
+                if (len <= prime_list[i])
+                    break;
+            }
+            len = prime_list[i];
+            newb.b = new aaA*[len];
+
+            foreach (e; aa.b)
+            {
+                if (e)
+                    _aaRehash_x(e);
+            }
+
+            newb.nodes = aa.nodes;
+            newb.keyti = aa.keyti;
+        }
+
+        *paa.a = newb;
+        _aaBalance(paa);
+    }
+    return (*paa).a;
+}
+
+/********************************************
+ * Balance an array.
+ */
+
+void _aaBalance(AA* paa)
+{
+    //printf("_aaBalance()\n");
+    if (paa.a)
+    {
+        aaA*[16] tmp;
+        aaA*[] array = tmp;
+        auto aa = paa.a;
+        foreach (j, e; aa.b)
+        {
+            /* Temporarily store contents of bucket in array[]
+             */
+            size_t k = 0;
+            void addToArray(aaA* e)
+            {
+                while (e)
+                {   addToArray(e.left);
+                    if (k == array.length)
+                        array.length = array.length * 2;
+                    array[k++] = e;
+                    e = e.right;
+                }
+            }
+            addToArray(e);
+            /* The contents of the bucket are now sorted into array[].
+             * Rebuild the tree.
+             */
+            void buildTree(aaA** p, size_t x1, size_t x2)
+            {
+                if (x1 >= x2)
+                    *p = null;
+                else
+                {   auto mid = (x1 + x2) >> 1;
+                    *p = array[mid];
+                    buildTree(&(*p).left, x1, mid);
+                    buildTree(&(*p).right, mid + 1, x2);
+                }
+            }
+            auto p = &aa.b[j];
+            buildTree(p, 0, k);
+        }
+    }
+}
+/********************************************
+ * Produce array of N byte keys from aa.
+ */
+
+ArrayRet_t _aaKeys(AA aa, size_t keysize)
+{
+    byte[] res;
+    size_t resi;
+
+    void _aaKeys_x(aaA* e)
+    {
+        do
+        {
+            memcpy(&res[resi * keysize], cast(byte*)(e + 1), keysize);
+            resi++;
+            if (e.left)
+            {   if (!e.right)
+                {   e = e.left;
+                    continue;
+                }
+                _aaKeys_x(e.left);
+            }
+            e = e.right;
+        } while (e !is null);
+    }
+
+    auto len = _aaLen(aa);
+    if (!len)
+        return 0;
+    res = (cast(byte*) gc_malloc(len * keysize,
+                                 !(aa.a.keyti.flags() & 1) ? BlkAttr.NO_SCAN : 0))[0 .. len * keysize];
+    resi = 0;
+    foreach (e; aa.a.b)
+    {
+        if (e)
+            _aaKeys_x(e);
+    }
+    assert(resi == len);
+
+    Array a;
+    a.length = len;
+    a.ptr = res.ptr;
+    return *cast(ArrayRet_t*)(&a);
+}
+
+
+/**********************************************
+ * 'apply' for associative arrays - to support foreach
+ */
+
+// dg is D, but _aaApply() is C
+extern (D) typedef int delegate(void *) dg_t;
+
+int _aaApply(AA aa, size_t keysize, dg_t dg)
+in
+{
+    assert(aligntsize(keysize) == keysize);
+}
+body
+{   int result;
+
+    //printf("_aaApply(aa = x%llx, keysize = %d, dg = x%llx)\n", aa.a, keysize, dg);
+
+    int treewalker(aaA* e)
+    {   int result;
+
+        do
+        {
+            //printf("treewalker(e = %p, dg = x%llx)\n", e, dg);
+            result = dg(cast(void *)(e + 1) + keysize);
+            if (result)
+                break;
+            if (e.right)
+            {   if (!e.left)
+                {
+                    e = e.right;
+                    continue;
+                }
+                result = treewalker(e.right);
+                if (result)
+                    break;
+            }
+            e = e.left;
+        } while (e);
+
+        return result;
+    }
+
+    if (aa.a)
+    {
+        foreach (e; aa.a.b)
+        {
+            if (e)
+            {
+                result = treewalker(e);
+                if (result)
+                    break;
+            }
+        }
+    }
+    return result;
+}
+
+// dg is D, but _aaApply2() is C
+extern (D) typedef int delegate(void *, void *) dg2_t;
+
+int _aaApply2(AA aa, size_t keysize, dg2_t dg)
+in
+{
+    assert(aligntsize(keysize) == keysize);
+}
+body
+{   int result;
+
+    //printf("_aaApply(aa = x%llx, keysize = %d, dg = x%llx)\n", aa.a, keysize, dg);
+
+    int treewalker(aaA* e)
+    {   int result;
+
+        do
+        {
+            //printf("treewalker(e = %p, dg = x%llx)\n", e, dg);
+            result = dg(cast(void *)(e + 1), cast(void *)(e + 1) + keysize);
+            if (result)
+                break;
+            if (e.right)
+            {   if (!e.left)
+                {
+                    e = e.right;
+                    continue;
+                }
+                result = treewalker(e.right);
+                if (result)
+                    break;
+            }
+            e = e.left;
+        } while (e);
+
+        return result;
+    }
+
+    if (aa.a)
+    {
+        foreach (e; aa.a.b)
+        {
+            if (e)
+            {
+                result = treewalker(e);
+                if (result)
+                    break;
+            }
+        }
+    }
+    return result;
+}
+
+
+/***********************************
+ * Construct an associative array of type ti from
+ * length pairs of key/value pairs.
+ */
+
+extern (C)
+BB* _d_assocarrayliteralT(TypeInfo_AssociativeArray ti, size_t length, ...)
+{
+    auto valuesize = ti.next.tsize();           // value size
+    auto keyti = ti.key;
+    auto keysize = keyti.tsize();               // key size
+    BB* result;
+
+    //printf("_d_assocarrayliteralT(keysize = %d, valuesize = %d, length = %d)\n", keysize, valuesize, length);
+    //printf("tivalue = %.*s\n", ti.next.classinfo.name);
+    if (length == 0 || valuesize == 0 || keysize == 0)
+    {
+        ;
+    }
+    else
+    {
+        va_list q;
+        va_start!(size_t)(q, length);
+
+        result = new BB();
+        result.keyti = keyti;
+        size_t i;
+
+        for (i = 0; i < prime_list.length - 1; i++)
+        {
+            if (length <= prime_list[i])
+                break;
+        }
+        auto len = prime_list[i];
+        result.b = new aaA*[len];
+
+        size_t keystacksize   = (keysize   + int.sizeof - 1) & ~(int.sizeof - 1);
+        size_t valuestacksize = (valuesize + int.sizeof - 1) & ~(int.sizeof - 1);
+
+        size_t keytsize = aligntsize(keysize);
+
+        for (size_t j = 0; j < length; j++)
+        {   void* pkey = q;
+            q += keystacksize;
+            void* pvalue = q;
+            q += valuestacksize;
+            aaA* e;
+
+            auto key_hash = keyti.getHash(pkey);
+            //printf("hash = %d\n", key_hash);
+            i = key_hash % len;
+            auto pe = &result.b[i];
+            while (1)
+            {
+                e = *pe;
+                if (!e)
+                {
+                    // Not found, create new elem
+                    //printf("create new one\n");
+                    e = cast(aaA *) cast(void*) new void[aaA.sizeof + keytsize + valuesize];
+                    memcpy(e + 1, pkey, keysize);
+                    e.hash = key_hash;
+                    *pe = e;
+                    result.nodes++;
+                    break;
+                }
+                if (key_hash == e.hash)
+                {
+                    auto c = keyti.compare(pkey, e + 1);
+                    if (c == 0)
+                        break;
+                    pe = (c < 0) ? &e.left : &e.right;
+                }
+                else
+                    pe = (key_hash < e.hash) ? &e.left : &e.right;
+            }
+            memcpy(cast(void *)(e + 1) + keytsize, pvalue, valuesize);
+        }
+
+        va_end(q);
+    }
+    return result;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/compiler/dmd/adi.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,625 @@
+//_ adi.d
+
+/**
+ * Part of the D programming language runtime library.
+ * Dynamic array property support routines
+ */
+
+/*
+ *  Copyright (C) 2000-2006 by Digital Mars, www.digitalmars.com
+ *  Written by Walter Bright
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty. In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, in both source and binary form, subject to the following
+ *  restrictions:
+ *
+ *  o  The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  o  Altered source versions must be plainly marked as such, and must not
+ *     be misrepresented as being the original software.
+ *  o  This notice may not be removed or altered from any source
+ *     distribution.
+ */
+
+/*
+ *  Modified by Sean Kelly for use with the D Runtime Project
+ */
+
+module rt.adi;
+
+//debug=adi;            // uncomment to turn on debugging printf's
+
+private
+{
+    debug(adi) import stdc.stdio;
+    import stdc.string;
+    import stdc.stdlib;
+    import util.utf;
+
+    enum BlkAttr : uint
+    {
+        FINALIZE = 0b0000_0001,
+        NO_SCAN  = 0b0000_0010,
+        NO_MOVE  = 0b0000_0100,
+        ALL_BITS = 0b1111_1111
+    }
+
+    extern (C) void* gc_malloc( size_t sz, uint ba = 0 );
+    extern (C) void* gc_calloc( size_t sz, uint ba = 0 );
+    extern (C) void  gc_free( void* p );
+}
+
+
+struct Array
+{
+    size_t  length;
+    void*   ptr;
+}
+
+/**********************************************
+ * Reverse array of chars.
+ * Handled separately because embedded multibyte encodings should not be
+ * reversed.
+ */
+
+extern (C) long _adReverseChar(char[] a)
+{
+    if (a.length > 1)
+    {
+        char[6] tmp;
+        char[6] tmplo;
+        char* lo = a.ptr;
+        char* hi = &a[length - 1];
+
+        while (lo < hi)
+        {   auto clo = *lo;
+            auto chi = *hi;
+
+            debug(adi) printf("lo = %d, hi = %d\n", lo, hi);
+            if (clo <= 0x7F && chi <= 0x7F)
+            {
+                debug(adi) printf("\tascii\n");
+                *lo = chi;
+                *hi = clo;
+                lo++;
+                hi--;
+                continue;
+            }
+
+            uint stridelo = UTF8stride[clo];
+
+            uint stridehi = 1;
+            while ((chi & 0xC0) == 0x80)
+            {
+                chi = *--hi;
+                stridehi++;
+                assert(hi >= lo);
+            }
+            if (lo == hi)
+                break;
+
+            debug(adi) printf("\tstridelo = %d, stridehi = %d\n", stridelo, stridehi);
+            if (stridelo == stridehi)
+            {
+
+                memcpy(tmp.ptr, lo, stridelo);
+                memcpy(lo, hi, stridelo);
+                memcpy(hi, tmp.ptr, stridelo);
+                lo += stridelo;
+                hi--;
+                continue;
+            }
+
+            /* Shift the whole array. This is woefully inefficient
+             */
+            memcpy(tmp.ptr, hi, stridehi);
+            memcpy(tmplo.ptr, lo, stridelo);
+            memmove(lo + stridehi, lo + stridelo , (hi - lo) - stridelo);
+            memcpy(lo, tmp.ptr, stridehi);
+            memcpy(hi + stridehi - stridelo, tmplo.ptr, stridelo);
+
+            lo += stridehi;
+            hi = hi - 1 + (stridehi - stridelo);
+        }
+    }
+    return *cast(long*)(&a);
+}
+
+unittest
+{
+    auto a = "abcd"c;
+
+    auto r = a.dup.reverse;
+    //writefln(r);
+    assert(r == "dcba");
+
+    a = "a\u1235\u1234c";
+    //writefln(a);
+    r = a.dup.reverse;
+    //writefln(r);
+    assert(r == "c\u1234\u1235a");
+
+    a = "ab\u1234c";
+    //writefln(a);
+    r = a.dup.reverse;
+    //writefln(r);
+    assert(r == "c\u1234ba");
+
+    a = "\u3026\u2021\u3061\n";
+    r = a.dup.reverse;
+    assert(r == "\n\u3061\u2021\u3026");
+}
+
+
+/**********************************************
+ * Reverse array of wchars.
+ * Handled separately because embedded multiword encodings should not be
+ * reversed.
+ */
+
+extern (C) long _adReverseWchar(wchar[] a)
+{
+    if (a.length > 1)
+    {
+        wchar[2] tmp;
+        wchar* lo = a.ptr;
+        wchar* hi = &a[length - 1];
+
+        while (lo < hi)
+        {   auto clo = *lo;
+            auto chi = *hi;
+
+            if ((clo < 0xD800 || clo > 0xDFFF) &&
+                (chi < 0xD800 || chi > 0xDFFF))
+            {
+                *lo = chi;
+                *hi = clo;
+                lo++;
+                hi--;
+                continue;
+            }
+
+            int stridelo = 1 + (clo >= 0xD800 && clo <= 0xDBFF);
+
+            int stridehi = 1;
+            if (chi >= 0xDC00 && chi <= 0xDFFF)
+            {
+                chi = *--hi;
+                stridehi++;
+                assert(hi >= lo);
+            }
+            if (lo == hi)
+                break;
+
+            if (stridelo == stridehi)
+            {   int stmp;
+
+                assert(stridelo == 2);
+                assert(stmp.sizeof == 2 * (*lo).sizeof);
+                stmp = *cast(int*)lo;
+                *cast(int*)lo = *cast(int*)hi;
+                *cast(int*)hi = stmp;
+                lo += stridelo;
+                hi--;
+                continue;
+            }
+
+            /* Shift the whole array. This is woefully inefficient
+             */
+            memcpy(tmp.ptr, hi, stridehi * wchar.sizeof);
+            memcpy(hi + stridehi - stridelo, lo, stridelo * wchar.sizeof);
+            memmove(lo + stridehi, lo + stridelo , (hi - (lo + stridelo)) * wchar.sizeof);
+            memcpy(lo, tmp.ptr, stridehi * wchar.sizeof);
+
+            lo += stridehi;
+            hi = hi - 1 + (stridehi - stridelo);
+        }
+    }
+    return *cast(long*)(&a);
+}
+
+unittest
+{
+    wstring a = "abcd";
+
+    auto r = a.dup.reverse;
+    assert(r == "dcba");
+
+    a = "a\U00012356\U00012346c";
+    r = a.dup.reverse;
+    assert(r == "c\U00012346\U00012356a");
+
+    a = "ab\U00012345c";
+    r = a.dup.reverse;
+    assert(r == "c\U00012345ba");
+}
+
+
+/**********************************************
+ * Support for array.reverse property.
+ */
+
+extern (C) long _adReverse(Array a, size_t szelem)
+    out (result)
+    {
+        assert(result is *cast(long*)(&a));
+    }
+    body
+    {
+        if (a.length >= 2)
+        {
+            byte*    tmp;
+            byte[16] buffer;
+
+            void* lo = a.ptr;
+            void* hi = a.ptr + (a.length - 1) * szelem;
+
+            tmp = buffer.ptr;
+            if (szelem > 16)
+            {
+                //version (Windows)
+                    tmp = cast(byte*) alloca(szelem);
+                //else
+                    //tmp = gc_malloc(szelem);
+            }
+
+            for (; lo < hi; lo += szelem, hi -= szelem)
+            {
+                memcpy(tmp, lo,  szelem);
+                memcpy(lo,  hi,  szelem);
+                memcpy(hi,  tmp, szelem);
+            }
+
+            version (Windows)
+            {
+            }
+            else
+            {
+                //if (szelem > 16)
+                    // BUG: bad code is generate for delete pointer, tries
+                    // to call delclass.
+                    //gc_free(tmp);
+            }
+        }
+        return *cast(long*)(&a);
+    }
+
+unittest
+{
+    debug(adi) printf("array.reverse.unittest\n");
+
+    int[] a = new int[5];
+    int[] b;
+    size_t i;
+
+    for (i = 0; i < 5; i++)
+        a[i] = i;
+    b = a.reverse;
+    assert(b is a);
+    for (i = 0; i < 5; i++)
+        assert(a[i] == 4 - i);
+
+    struct X20
+    {   // More than 16 bytes in size
+        int a;
+        int b, c, d, e;
+    }
+
+    X20[] c = new X20[5];
+    X20[] d;
+
+    for (i = 0; i < 5; i++)
+    {   c[i].a = i;
+        c[i].e = 10;
+    }
+    d = c.reverse;
+    assert(d is c);
+    for (i = 0; i < 5; i++)
+    {
+        assert(c[i].a == 4 - i);
+        assert(c[i].e == 10);
+    }
+}
+
+/**********************************************
+ * Sort array of chars.
+ */
+
+extern (C) long _adSortChar(char[] a)
+{
+    if (a.length > 1)
+    {
+        dstring da = toUTF32(a);
+        da.sort;
+        size_t i = 0;
+        foreach (dchar d; da)
+        {   char[4] buf;
+            auto t = toUTF8(buf, d);
+            a[i .. i + t.length] = t[];
+            i += t.length;
+        }
+        delete da;
+    }
+    return *cast(long*)(&a);
+}
+
+/**********************************************
+ * Sort array of wchars.
+ */
+
+extern (C) long _adSortWchar(wchar[] a)
+{
+    if (a.length > 1)
+    {
+        dstring da = toUTF32(a);
+        da.sort;
+        size_t i = 0;
+        foreach (dchar d; da)
+        {   wchar[2] buf;
+            auto t = toUTF16(buf, d);
+            a[i .. i + t.length] = t[];
+            i += t.length;
+        }
+        delete da;
+    }
+    return *cast(long*)(&a);
+}
+
+/***************************************
+ * Support for array equality test.
+ * Returns:
+ *      1       equal
+ *      0       not equal
+ */
+
+extern (C) int _adEq(Array a1, Array a2, TypeInfo ti)
+{
+    debug(adi) printf("_adEq(a1.length = %d, a2.length = %d)\n", a1.length, a2.length);
+    if (a1.length != a2.length)
+        return 0; // not equal
+    auto sz = ti.tsize();
+    auto p1 = a1.ptr;
+    auto p2 = a2.ptr;
+
+    if (sz == 1)
+        // We should really have a ti.isPOD() check for this
+        return (memcmp(p1, p2, a1.length) == 0);
+
+    for (size_t i = 0; i < a1.length; i++)
+    {
+        if (!ti.equals(p1 + i * sz, p2 + i * sz))
+            return 0; // not equal
+    }
+    return 1; // equal
+}
+
+extern (C) int _adEq2(Array a1, Array a2, TypeInfo ti)
+{
+    debug(adi) printf("_adEq2(a1.length = %d, a2.length = %d)\n", a1.length, a2.length);
+    if (a1.length != a2.length)
+        return 0;               // not equal
+    if (!ti.equals(&a1, &a2))
+        return 0;
+    return 1;
+}
+unittest
+{
+    debug(adi) printf("array.Eq unittest\n");
+
+    auto a = "hello"c;
+
+    assert(a != "hel");
+    assert(a != "helloo");
+    assert(a != "betty");
+    assert(a == "hello");
+    assert(a != "hxxxx");
+}
+
+/***************************************
+ * Support for array compare test.
+ */
+
+extern (C) int _adCmp(Array a1, Array a2, TypeInfo ti)
+{
+    debug(adi) printf("adCmp()\n");
+    auto len = a1.length;
+    if (a2.length < len)
+        len = a2.length;
+    auto sz = ti.tsize();
+    void *p1 = a1.ptr;
+    void *p2 = a2.ptr;
+
+    if (sz == 1)
+    {   // We should really have a ti.isPOD() check for this
+        auto c = memcmp(p1, p2, len);
+        if (c)
+            return c;
+    }
+    else
+    {
+        for (size_t i = 0; i < len; i++)
+        {
+            auto c = ti.compare(p1 + i * sz, p2 + i * sz);
+            if (c)
+                return c;
+        }
+    }
+    if (a1.length == a2.length)
+        return 0;
+    return (a1.length > a2.length) ? 1 : -1;
+}
+
+extern (C) int _adCmp2(Array a1, Array a2, TypeInfo ti)
+{
+    debug(adi) printf("_adCmp2(a1.length = %d, a2.length = %d)\n", a1.length, a2.length);
+    return ti.compare(&a1, &a2);
+}
+unittest
+{
+    debug(adi) printf("array.Cmp unittest\n");
+
+    auto a = "hello"c;
+
+    assert(a >  "hel");
+    assert(a >= "hel");
+    assert(a <  "helloo");
+    assert(a <= "helloo");
+    assert(a >  "betty");
+    assert(a >= "betty");
+    assert(a == "hello");
+    assert(a <= "hello");
+    assert(a >= "hello");
+}
+
+/***************************************
+ * Support for array compare test.
+ */
+
+extern (C) int _adCmpChar(Array a1, Array a2)
+{
+  version (X86)
+  {
+    asm
+    {   naked                   ;
+
+        push    EDI             ;
+        push    ESI             ;
+
+        mov    ESI,a1+4[4+ESP]  ;
+        mov    EDI,a2+4[4+ESP]  ;
+
+        mov    ECX,a1[4+ESP]    ;
+        mov    EDX,a2[4+ESP]    ;
+
+        cmp     ECX,EDX         ;
+        jb      GotLength       ;
+
+        mov     ECX,EDX         ;
+
+GotLength:
+        cmp    ECX,4            ;
+        jb    DoBytes           ;
+
+        // Do alignment if neither is dword aligned
+        test    ESI,3           ;
+        jz    Aligned           ;
+
+        test    EDI,3           ;
+        jz    Aligned           ;
+DoAlign:
+        mov    AL,[ESI]         ; //align ESI to dword bounds
+        mov    DL,[EDI]         ;
+
+        cmp    AL,DL            ;
+        jnz    Unequal          ;
+
+        inc    ESI              ;
+        inc    EDI              ;
+
+        test    ESI,3           ;
+
+        lea    ECX,[ECX-1]      ;
+        jnz    DoAlign          ;
+Aligned:
+        mov    EAX,ECX          ;
+
+        // do multiple of 4 bytes at a time
+
+        shr    ECX,2            ;
+        jz    TryOdd            ;
+
+        repe                    ;
+        cmpsd                   ;
+
+        jnz    UnequalQuad      ;
+
+TryOdd:
+        mov    ECX,EAX          ;
+DoBytes:
+        // if still equal and not end of string, do up to 3 bytes slightly
+        // slower.
+
+        and    ECX,3            ;
+        jz    Equal             ;
+
+        repe                    ;
+        cmpsb                   ;
+
+        jnz    Unequal          ;
+Equal:
+        mov    EAX,a1[4+ESP]    ;
+        mov    EDX,a2[4+ESP]    ;
+
+        sub    EAX,EDX          ;
+        pop    ESI              ;
+
+        pop    EDI              ;
+        ret                     ;
+
+UnequalQuad:
+        mov    EDX,[EDI-4]      ;
+        mov    EAX,[ESI-4]      ;
+
+        cmp    AL,DL            ;
+        jnz    Unequal          ;
+
+        cmp    AH,DH            ;
+        jnz    Unequal          ;
+
+        shr    EAX,16           ;
+
+        shr    EDX,16           ;
+
+        cmp    AL,DL            ;
+        jnz    Unequal          ;
+
+        cmp    AH,DH            ;
+Unequal:
+        sbb    EAX,EAX          ;
+        pop    ESI              ;
+
+        or     EAX,1            ;
+        pop    EDI              ;
+
+        ret                     ;
+    }
+  }
+  else
+  {
+    int len;
+    int c;
+
+    debug(adi) printf("adCmpChar()\n");
+    len = a1.length;
+    if (a2.length < len)
+        len = a2.length;
+    c = memcmp(cast(char *)a1.ptr, cast(char *)a2.ptr, len);
+    if (!c)
+        c = cast(int)a1.length - cast(int)a2.length;
+    return c;
+  }
+}
+
+unittest
+{
+    debug(adi) printf("array.CmpChar unittest\n");
+
+    auto a = "hello"c;
+
+    assert(a >  "hel");
+    assert(a >= "hel");
+    assert(a <  "helloo");
+    assert(a <= "helloo");
+    assert(a >  "betty");
+    assert(a >= "betty");
+    assert(a == "hello");
+    assert(a <= "hello");
+    assert(a >= "hello");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/compiler/dmd/alloca.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,110 @@
+/*_ _alloca.d
+ * Copyright (C) 1990-2003 by Digital Mars, www.digitalmars.com
+ * All Rights Reserved
+ * Written by Walter Bright
+ */
+
+module rt.alloca;
+
+/+
+#if DOS386
+extern size_t _x386_break;
+#else
+extern size_t _pastdata;
+#endif
++/
+
+/*******************************************
+ * Allocate data from the caller's stack frame.
+ * This is a 'magic' function that needs help from the compiler to
+ * work right, do not change its name, do not call it from other compilers.
+ * Input:
+ *      nbytes  number of bytes to allocate
+ *      ECX     address of variable with # of bytes in locals
+ *              This is adjusted upon return to reflect the additional
+ *              size of the stack frame.
+ * Returns:
+ *      EAX     allocated data, null if stack overflows
+ */
+
+extern (C) void* __alloca(int nbytes)
+{
+    asm
+    {
+        naked                   ;
+        mov     EDX,ECX         ;
+        mov     EAX,4[ESP]      ; // get nbytes
+        push    EBX             ;
+        push    EDI             ;
+        push    ESI             ;
+        add     EAX,3           ;
+        and     EAX,0xFFFFFFFC  ; // round up to dword
+        jnz     Abegin          ;
+        mov     EAX,4           ; // allow zero bytes allocation, 0 rounded to dword is 4..
+    Abegin:
+        mov     ESI,EAX         ; // ESI = nbytes
+        neg     EAX             ;
+        add     EAX,ESP         ; // EAX is now what the new ESP will be.
+        jae     Aoverflow       ;
+    }
+    version (Windows)
+    {
+    asm
+    {
+        // We need to be careful about the guard page
+        // Thus, for every 4k page, touch it to cause the OS to load it in.
+        mov     ECX,EAX         ; // ECX is new location for stack
+        mov     EBX,ESI         ; // EBX is size to "grow" stack
+    L1:
+        test    [ECX+EBX],EBX   ; // bring in page
+        sub     EBX,0x1000      ; // next 4K page down
+        jae     L1              ; // if more pages
+        test    [ECX],EBX       ; // bring in last page
+    }
+    }
+    version (DOS386)
+    {
+    asm
+    {
+        // is ESP off bottom?
+        cmp     EAX,_x386_break ;
+        jbe     Aoverflow       ;
+    }
+    }
+    version (Unix)
+    {
+    asm
+    {
+        cmp     EAX,_pastdata   ;
+        jbe     Aoverflow       ; // Unlikely - ~2 Gbytes under UNIX
+    }
+    }
+    asm
+    {
+        // Copy down to [ESP] the temps on the stack.
+        // The number of temps is (EBP - ESP - locals).
+        mov     ECX,EBP         ;
+        sub     ECX,ESP         ;
+        sub     ECX,[EDX]       ; // ECX = number of temps (bytes) to move.
+        add     [EDX],ESI       ; // adjust locals by nbytes for next call to alloca()
+        mov     ESP,EAX         ; // Set up new stack pointer.
+        add     EAX,ECX         ; // Return value = ESP + temps.
+        mov     EDI,ESP         ; // Destination of copy of temps.
+        add     ESI,ESP         ; // Source of copy.
+        shr     ECX,2           ; // ECX to count of dwords in temps
+                                  // Always at least 4 (nbytes, EIP, ESI,and EDI).
+        rep                     ;
+        movsd                   ;
+        jmp     done            ;
+
+    Aoverflow:
+        // Overflowed the stack.  Return null
+        xor     EAX,EAX         ;
+
+    done:
+        pop     ESI             ;
+        pop     EDI             ;
+        pop     EBX             ;
+        ret                     ;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/compiler/dmd/arrayassign.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,180 @@
+/**
+ * Part of the D programming language runtime library.
+ *  http://www.digitalmars.com
+ *  Written by Walter Bright
+ *  Placed in the Public Domain
+ */
+module rt.arrayassign;
+
+private
+{
+    import util.string;
+    import stdc.string;
+    import stdc.stdlib;
+    debug(PRINTF) import stdc.stdio;
+}
+
+/**
+ * Does array assignment (not construction) from another
+ * array of the same element type.
+ * ti is the element type.
+ * Handles overlapping copies.
+ */
+extern (C) void[] _d_arrayassign(TypeInfo ti, void[] from, void[] to)
+{
+    debug(PRINTF) printf("_d_arrayassign(from = %p,%d, to = %p,%d) size = %d\n", from.ptr, from.length, to.ptr, to.length, ti.tsize());
+
+    if (to.length != from.length)
+    {
+        char[10] tmp = void;
+        string msg = "lengths don't match for array copy,"c;
+        msg ~= tmp.intToString(to.length) ~ " = " ~ tmp.intToString(from.length);
+        throw new Exception(msg);
+    }
+
+    auto element_size = ti.tsize();
+
+    /* Need a temporary buffer tmp[] big enough to hold one element
+     */
+    void[16] buf = void;
+    void[] tmp;
+    if (element_size > buf.sizeof)
+        tmp = alloca(element_size)[0 .. element_size];
+    else
+        tmp = buf;
+
+
+    if (to.ptr <= from.ptr)
+    {
+        foreach (i; 0 .. to.length)
+        {
+            void* pto   = to.ptr   + i * element_size;
+            void* pfrom = from.ptr + i * element_size;
+            memcpy(tmp.ptr, pto, element_size);
+            memcpy(pto, pfrom, element_size);
+            ti.postblit(pto);
+            ti.destroy(tmp.ptr);
+        }
+    }
+    else
+    {
+        for (int i = to.length; i--; )
+        {
+            void* pto   = to.ptr   + i * element_size;
+            void* pfrom = from.ptr + i * element_size;
+            memcpy(tmp.ptr, pto, element_size);
+            memcpy(pto, pfrom, element_size);
+            ti.postblit(pto);
+            ti.destroy(tmp.ptr);
+        }
+    }
+    return to;
+}
+
+/**
+ * Does array initialization (not assignment) from another
+ * array of the same element type.
+ * ti is the element type.
+ */
+extern (C) void[] _d_arrayctor(TypeInfo ti, void[] from, void[] to)
+{
+    debug(PRINTF) printf("_d_arrayctor(from = %p,%d, to = %p,%d) size = %d\n", from.ptr, from.length, to.ptr, to.length, ti.tsize());
+
+    if (to.length != from.length)
+    {
+        char[10] tmp = void;
+        string msg = "lengths don't match for array initialization,"c;
+        msg ~= tmp.intToString(to.length) ~ " = " ~ tmp.intToString(from.length);
+        throw new Exception(msg);
+    }
+
+    auto element_size = ti.tsize();
+
+    int i;
+    try
+    {
+        for (i = 0; i < to.length; i++)
+        {
+            // Copy construction is defined as bit copy followed by postblit.
+            memcpy(to.ptr + i * element_size, from.ptr + i * element_size, element_size);
+            ti.postblit(to.ptr + i * element_size);
+        }
+    }
+    catch (Object o)
+    {
+        /* Destroy, in reverse order, what we've constructed so far
+         */
+        while (i--)
+        {
+            ti.destroy(to.ptr + i * element_size);
+        }
+
+        throw o;
+    }
+    return to;
+}
+
+
+/**
+ * Do assignment to an array.
+ *      p[0 .. count] = value;
+ */
+extern (C) void* _d_arraysetassign(void* p, void* value, int count, TypeInfo ti)
+{
+    void* pstart = p;
+
+    auto element_size = ti.tsize();
+
+    //Need a temporary buffer tmp[] big enough to hold one element
+    void[16] buf = void;
+    void[] tmp;
+    if (element_size > buf.sizeof)
+    {
+        tmp = alloca(element_size)[0 .. element_size];
+    }
+    else
+        tmp = buf;
+
+    foreach (i; 0 .. count)
+    {
+        memcpy(tmp.ptr, p, element_size);
+        memcpy(p, value, element_size);
+        ti.postblit(p);
+        ti.destroy(tmp.ptr);
+        p += element_size;
+    }
+    return pstart;
+}
+
+/**
+ * Do construction of an array.
+ *      ti[count] p = value;
+ */
+extern (C) void* _d_arraysetctor(void* p, void* value, int count, TypeInfo ti)
+{
+    void* pstart = p;
+    auto element_size = ti.tsize();
+
+    try
+    {
+        foreach (i; 0 .. count)
+        {
+            // Copy construction is defined as bit copy followed by postblit.
+            memcpy(p, value, element_size);
+            ti.postblit(p);
+            p += element_size;
+        }
+    }
+    catch (Object o)
+    {
+        // Destroy, in reverse order, what we've constructed so far
+        while (p > pstart)
+        {
+            p -= element_size;
+            ti.destroy(p);
+        }
+
+        throw o;
+    }
+    return pstart;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/druntime/src/compiler/dmd/arraybyte.d	Tue Nov 11 01:52:37 2008 +0100
@@ -0,0 +1,1890 @@
+/***************************
+ * D programming language http://www.digitalmars.com/d/
+ * Runtime support for byte array operations.
+ * Based on code originally written by Burton Radons.
+ * Placed in public domain.
+ */
+
+/* Contains SSE2 and MMX versions of certain operations for char, byte,
+ * and ubyte ('a', 'g' and 'h' suffixes).
+ */
+
+module rt.arraybyte;
+
+import util.cpuid;
+
+version (Unittest)
+{
+    /* This is so unit tests will test every CPU variant
+     */
+    int cpuid;
+    const int CPUID_MAX = 4;
+    bool mmx()      { return cpuid == 1 && util.cpuid.mmx(); }
+    bool sse()      { return cpuid == 2 && util.cpuid.sse(); }
+    bool sse2()     { return cpuid == 3 && util.cpuid.sse2(); }
+    bool amd3dnow() { return cpuid == 4 && util.cpuid.amd3dnow(); }
+}
+else
+{
+    alias util.cpuid.mmx mmx;
+    alias util.cpuid.sse sse;
+    alias util.cpuid.sse2 sse2;
+    alias util.cpuid.amd3dnow amd3dnow;
+}
+
+//version = log;
+
+bool disjoint(T)(T[] a, T[] b)
+{
+    return (a.ptr + a.length <= b.ptr || b.ptr + b.length <= a.ptr);
+}
+
+alias byte T;
+
+extern (C):
+
+/* ======================================================================== */
+
+
+/***********************
+ * Computes:
+ *      a[] = b[] + value
+ */
+
+T[] _arraySliceExpAddSliceAssign_a(T[] a, T value, T[] b)
+{
+    return _arraySliceExpAddSliceAssign_g(a, value, b);
+}
+
+T[] _arraySliceExpAddSliceAssign_h(T[] a, T value, T[] b)
+{
+    return _arraySliceExpAddSliceAssign_g(a, value, b);
+}
+
+T[] _arraySliceExpAddSliceAssign_g(T[] a, T value, T[] b)
+in
+{
+    assert(a.length == b.length);
+    assert(disjoint(a, b));
+}
+body
+{
+    //printf("_arraySliceExpAddSliceAssign_g()\n");
+    auto aptr = a.ptr;
+    auto aend = aptr + a.length;
+    auto bptr = b.ptr;
+
+    version (D_InlineAsm_X86)
+    {
+        // SSE2 aligned version is 1088% faster
+        if (sse2() && a.length >= 64)
+        {
+            auto n = aptr + (a.length & ~63);
+
+            uint l = cast(ubyte) value;
+            l |= (l << 8);
+            l |= (l << 16);
+
+            if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0)
+            {
+                asm // unaligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    movd XMM4, l;
+                    pshufd XMM4, XMM4, 0;
+
+                    align 8;
+                startaddsse2u:
+                    add ESI, 64;
+                    movdqu XMM0, [EAX];
+                    movdqu XMM1, [EAX+16];
+                    movdqu XMM2, [EAX+32];
+                    movdqu XMM3, [EAX+48];
+                    add EAX, 64;
+                    paddb XMM0, XMM4;
+                    paddb XMM1, XMM4;
+                    paddb XMM2, XMM4;
+                    paddb XMM3, XMM4;
+                    movdqu [ESI   -64], XMM0;
+                    movdqu [ESI+16-64], XMM1;
+                    movdqu [ESI+32-64], XMM2;
+                    movdqu [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startaddsse2u;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                }
+            }
+            else
+            {
+                asm // aligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    movd XMM4, l;
+                    pshufd XMM4, XMM4, 0;
+
+                    align 8;
+                startaddsse2a:
+                    add ESI, 64;
+                    movdqa XMM0, [EAX];
+                    movdqa XMM1, [EAX+16];
+                    movdqa XMM2, [EAX+32];
+                    movdqa XMM3, [EAX+48];
+                    add EAX, 64;
+                    paddb XMM0, XMM4;
+                    paddb XMM1, XMM4;
+                    paddb XMM2, XMM4;
+                    paddb XMM3, XMM4;
+                    movdqa [ESI   -64], XMM0;
+                    movdqa [ESI+16-64], XMM1;
+                    movdqa [ESI+32-64], XMM2;
+                    movdqa [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startaddsse2a;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                }
+            }
+        }
+        else
+        // MMX version is 1000% faster
+        if (mmx() && a.length >= 32)
+        {
+            auto n = aptr + (a.length & ~31);
+
+            uint l = cast(ubyte) value;
+            l |= (l << 8);
+
+            asm
+            {
+                mov ESI, aptr;
+                mov EDI, n;
+                mov EAX, bptr;
+                movd MM4, l;
+                pshufw MM4, MM4, 0;
+
+                align 4;
+            startaddmmx:
+                add ESI, 32;
+                movq MM0, [EAX];
+                movq MM1, [EAX+8];
+                movq MM2, [EAX+16];
+                movq MM3, [EAX+24];
+                add EAX, 32;
+                paddb MM0, MM4;
+                paddb MM1, MM4;
+                paddb MM2, MM4;
+                paddb MM3, MM4;
+                movq [ESI   -32], MM0;
+                movq [ESI+8 -32], MM1;
+                movq [ESI+16-32], MM2;
+                movq [ESI+24-32], MM3;
+                cmp ESI, EDI;
+                jb startaddmmx;
+
+                emms;
+                mov aptr, ESI;
+                mov bptr, EAX;
+            }
+        }
+        /* trying to be fair and treat normal 32-bit cpu the same way as we do
+         * the SIMD units, with unrolled asm.  There's not enough registers,
+         * really.
+         */
+        else
+        if (a.length >= 4)
+        {
+
+            auto n = aptr + (a.length & ~3);
+            asm
+            {
+                mov ESI, aptr;
+                mov EDI, n;
+                mov EAX, bptr;
+                mov CL, value;
+
+                align 4;
+            startadd386:
+                add ESI, 4;
+                mov DX, [EAX];
+                mov BX, [EAX+2];
+                add EAX, 4;
+                add BL, CL;
+                add BH, CL;
+                add DL, CL;
+                add DH, CL;
+                mov [ESI   -4], DX;
+                mov [ESI+2 -4], BX;
+                cmp ESI, EDI;
+                jb startadd386;
+
+                mov aptr, ESI;
+                mov bptr, EAX;
+            }
+
+        }
+    }
+
+    while (aptr < aend)
+        *aptr++ = cast(T)(*bptr++ + value);
+
+    return a;
+}
+
+unittest
+{
+    printf("_arraySliceExpAddSliceAssign_g unittest\n");
+
+    for (cpuid = 0; cpuid < CPUID_MAX; cpuid++)
+    {
+        version (log) printf("    cpuid %d\n", cpuid);
+
+        for (int j = 0; j < 2; j++)
+        {
+            const int dim = 67;
+            T[] a = new T[dim + j];     // aligned on 16 byte boundary
+            a = a[j .. dim + j];        // misalign for second iteration
+            T[] b = new T[dim + j];
+            b = b[j .. dim + j];
+            T[] c = new T[dim + j];
+            c = c[j .. dim + j];
+
+            for (int i = 0; i < dim; i++)
+            {   a[i] = cast(T)i;
+                b[i] = cast(T)(i + 7);
+                c[i] = cast(T)(i * 2);
+            }
+
+            c[] = a[] + 6;
+
+            for (int i = 0; i < dim; i++)
+            {
+                if (c[i] != cast(T)(a[i] + 6))
+                {
+                    printf("[%d]: %d != %d + 6\n", i, c[i], a[i]);
+                    assert(0);
+                }
+            }
+        }
+    }
+}
+
+
+/* ======================================================================== */
+
+/***********************
+ * Computes:
+ *      a[] = b[] + c[]
+ */
+
+T[] _arraySliceSliceAddSliceAssign_a(T[] a, T[] c, T[] b)
+{
+    return _arraySliceSliceAddSliceAssign_g(a, c, b);
+}
+
+T[] _arraySliceSliceAddSliceAssign_h(T[] a, T[] c, T[] b)
+{
+    return _arraySliceSliceAddSliceAssign_g(a, c, b);
+}
+
+T[] _arraySliceSliceAddSliceAssign_g(T[] a, T[] c, T[] b)
+in
+{
+        assert(a.length == b.length && b.length == c.length);
+        assert(disjoint(a, b));
+        assert(disjoint(a, c));
+        assert(disjoint(b, c));
+}
+body
+{
+    //printf("_arraySliceSliceAddSliceAssign_g()\n");
+    auto aptr = a.ptr;
+    auto aend = aptr + a.length;
+    auto bptr = b.ptr;
+    auto cptr = c.ptr;
+
+    version (D_InlineAsm_X86)
+    {
+        // SSE2 aligned version is 5739% faster
+        if (sse2() && a.length >= 64)
+        {
+            auto n = aptr + (a.length & ~63);
+
+            if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0)
+            {
+                version (log) printf("\tsse2 unaligned\n");
+                asm // unaligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    mov ECX, cptr;
+
+                    align 8;
+                startaddlsse2u:
+                    add ESI, 64;
+                    movdqu XMM0, [EAX];
+                    movdqu XMM1, [EAX+16];
+                    movdqu XMM2, [EAX+32];
+                    movdqu XMM3, [EAX+48];
+                    add EAX, 64;
+                    movdqu XMM4, [ECX];
+                    movdqu XMM5, [ECX+16];
+                    movdqu XMM6, [ECX+32];
+                    movdqu XMM7, [ECX+48];
+                    add ECX, 64;
+                    paddb XMM0, XMM4;
+                    paddb XMM1, XMM5;
+                    paddb XMM2, XMM6;
+                    paddb XMM3, XMM7;
+                    movdqu [ESI   -64], XMM0;
+                    movdqu [ESI+16-64], XMM1;
+                    movdqu [ESI+32-64], XMM2;
+                    movdqu [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startaddlsse2u;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                    mov cptr, ECX;
+                }
+            }
+            else
+            {
+                version (log) printf("\tsse2 aligned\n");
+                asm // aligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    mov ECX, cptr;
+
+                    align 8;
+                startaddlsse2a:
+                    add ESI, 64;
+                    movdqa XMM0, [EAX];
+                    movdqa XMM1, [EAX+16];
+                    movdqa XMM2, [EAX+32];
+                    movdqa XMM3, [EAX+48];
+                    add EAX, 64;
+                    movdqa XMM4, [ECX];
+                    movdqa XMM5, [ECX+16];
+                    movdqa XMM6, [ECX+32];
+                    movdqa XMM7, [ECX+48];
+                    add ECX, 64;
+                    paddb XMM0, XMM4;
+                    paddb XMM1, XMM5;
+                    paddb XMM2, XMM6;
+                    paddb XMM3, XMM7;
+                    movdqa [ESI   -64], XMM0;
+                    movdqa [ESI+16-64], XMM1;
+                    movdqa [ESI+32-64], XMM2;
+                    movdqa [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startaddlsse2a;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                    mov cptr, ECX;
+                }
+            }
+        }
+        else
+        // MMX version is 4428% faster
+        if (mmx() && a.length >= 32)
+        {
+            version (log) printf("\tmmx\n");
+            auto n = aptr + (a.length & ~31);
+
+            asm
+            {
+                mov ESI, aptr;
+                mov EDI, n;
+                mov EAX, bptr;
+                mov ECX, cptr;
+
+                align 4;
+            startaddlmmx:
+                add ESI, 32;
+                movq MM0, [EAX];
+                movq MM1, [EAX+8];
+                movq MM2, [EAX+16];
+                movq MM3, [EAX+24];
+                add EAX, 32;
+                movq MM4, [ECX];
+                movq MM5, [ECX+8];
+                movq MM6, [ECX+16];
+                movq MM7, [ECX+24];
+                add ECX, 32;
+                paddb MM0, MM4;
+                paddb MM1, MM5;
+                paddb MM2, MM6;
+                paddb MM3, MM7;
+                movq [ESI   -32], MM0;
+                movq [ESI+8 -32], MM1;
+                movq [ESI+16-32], MM2;
+                movq [ESI+24-32], MM3;
+                cmp ESI, EDI;
+                jb startaddlmmx;
+
+                emms;
+                mov aptr, ESI;
+                mov bptr, EAX;
+                mov cptr, ECX;
+            }
+        }
+    }
+
+    version (log) if (aptr < aend) printf("\tbase\n");
+    while (aptr < aend)
+        *aptr++ = cast(T)(*bptr++ + *cptr++);
+
+    return a;
+}
+
+unittest
+{
+    printf("_arraySliceSliceAddSliceAssign_g unittest\n");
+
+    for (cpuid = 0; cpuid < CPUID_MAX; cpuid++)
+    {
+        version (log) printf("    cpuid %d\n", cpuid);
+
+        for (int j = 0; j < 2; j++)
+        {
+            const int dim = 67;
+            T[] a = new T[dim + j];     // aligned on 16 byte boundary
+            a = a[j .. dim + j];        // misalign for second iteration
+            T[] b = new T[dim + j];
+            b = b[j .. dim + j];
+            T[] c = new T[dim + j];
+            c = c[j .. dim + j];
+
+            for (int i = 0; i < dim; i++)
+            {   a[i] = cast(T)i;
+                b[i] = cast(T)(i + 7);
+                c[i] = cast(T)(i * 2);
+            }
+
+            c[] = a[] + b[];
+
+            for (int i = 0; i < dim; i++)
+            {
+                if (c[i] != cast(T)(a[i] + b[i]))
+                {
+                    printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]);
+                    assert(0);
+                }
+            }
+        }
+    }
+}
+
+
+/* ======================================================================== */
+
+/***********************
+ * Computes:
+ *      a[] += value
+ */
+
+T[] _arrayExpSliceAddass_a(T[] a, T value)
+{
+    return _arrayExpSliceAddass_g(a, value);
+}
+
+T[] _arrayExpSliceAddass_h(T[] a, T value)
+{
+    return _arrayExpSliceAddass_g(a, value);
+}
+
+T[] _arrayExpSliceAddass_g(T[] a, T value)
+{
+    //printf("_arrayExpSliceAddass_g(a.length = %d, value = %Lg)\n", a.length, cast(real)value);
+    auto aptr = a.ptr;
+    auto aend = aptr + a.length;
+
+    version (D_InlineAsm_X86)
+    {
+        // SSE2 aligned version is 1578% faster
+        if (sse2() && a.length >= 64)
+        {
+            auto n = aptr + (a.length & ~63);
+
+            uint l = cast(ubyte) value;
+            l |= (l << 8);
+            l |= (l << 16);
+
+            if (((cast(uint) aptr) & 15) != 0)
+            {
+                asm // unaligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    movd XMM4, l;
+                    pshufd XMM4, XMM4, 0;
+
+                    align 8;
+                startaddasssse2u:
+                    movdqu XMM0, [ESI];
+                    movdqu XMM1, [ESI+16];
+                    movdqu XMM2, [ESI+32];
+                    movdqu XMM3, [ESI+48];
+                    add ESI, 64;
+                    paddb XMM0, XMM4;
+                    paddb XMM1, XMM4;
+                    paddb XMM2, XMM4;
+                    paddb XMM3, XMM4;
+                    movdqu [ESI   -64], XMM0;
+                    movdqu [ESI+16-64], XMM1;
+                    movdqu [ESI+32-64], XMM2;
+                    movdqu [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startaddasssse2u;
+
+                    mov aptr, ESI;
+                }
+            }
+            else
+            {
+                asm // aligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    movd XMM4, l;
+                    pshufd XMM4, XMM4, 0;
+
+                    align 8;
+                startaddasssse2a:
+                    movdqa XMM0, [ESI];
+                    movdqa XMM1, [ESI+16];
+                    movdqa XMM2, [ESI+32];
+                    movdqa XMM3, [ESI+48];
+                    add ESI, 64;
+                    paddb XMM0, XMM4;
+                    paddb XMM1, XMM4;
+                    paddb XMM2, XMM4;
+                    paddb XMM3, XMM4;
+                    movdqa [ESI   -64], XMM0;
+                    movdqa [ESI+16-64], XMM1;
+                    movdqa [ESI+32-64], XMM2;
+                    movdqa [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startaddasssse2a;
+
+                    mov aptr, ESI;
+                }
+            }
+        }
+        else
+        // MMX version is 1721% faster
+        if (mmx() && a.length >= 32)
+        {
+
+            auto n = aptr + (a.length & ~31);
+
+            uint l = cast(ubyte) value;
+            l |= (l << 8);
+
+            asm
+            {
+                mov ESI, aptr;
+                mov EDI, n;
+                movd MM4, l;
+                pshufw MM4, MM4, 0;
+
+                align 8;
+            startaddassmmx:
+                movq MM0, [ESI];
+                movq MM1, [ESI+8];
+                movq MM2, [ESI+16];
+                movq MM3, [ESI+24];
+                add ESI, 32;
+                paddb MM0, MM4;
+                paddb MM1, MM4;
+                paddb MM2, MM4;
+                paddb MM3, MM4;
+                movq [ESI   -32], MM0;
+                movq [ESI+8 -32], MM1;
+                movq [ESI+16-32], MM2;
+                movq [ESI+24-32], MM3;
+                cmp ESI, EDI;
+                jb startaddassmmx;
+
+                emms;
+                mov aptr, ESI;
+            }
+        }
+    }
+
+    while (aptr < aend)
+        *aptr++ += value;
+
+    return a;
+}
+
+unittest
+{
+    printf("_arrayExpSliceAddass_g unittest\n");
+
+    for (cpuid = 0; cpuid < CPUID_MAX; cpuid++)
+    {
+        version (log) printf("    cpuid %d\n", cpuid);
+
+        for (int j = 0; j < 2; j++)
+        {
+            const int dim = 67;
+            T[] a = new T[dim + j];     // aligned on 16 byte boundary
+            a = a[j .. dim + j];        // misalign for second iteration
+            T[] b = new T[dim + j];
+            b = b[j .. dim + j];
+            T[] c = new T[dim + j];
+            c = c[j .. dim + j];
+
+            for (int i = 0; i < dim; i++)
+            {   a[i] = cast(T)i;
+                b[i] = cast(T)(i + 7);
+                c[i] = cast(T)(i * 2);
+            }
+
+            a[] = c[];
+            c[] += 6;
+
+            for (int i = 0; i < dim; i++)
+            {
+                if (c[i] != cast(T)(a[i] + 6))
+                {
+                    printf("[%d]: %d != %d + 6\n", i, c[i], a[i]);
+                    assert(0);
+                }
+            }
+        }
+    }
+}
+
+
+/* ======================================================================== */
+
+/***********************
+ * Computes:
+ *      a[] += b[]
+ */
+
+T[] _arraySliceSliceAddass_a(T[] a, T[] b)
+{
+    return _arraySliceSliceAddass_g(a, b);
+}
+
+T[] _arraySliceSliceAddass_h(T[] a, T[] b)
+{
+    return _arraySliceSliceAddass_g(a, b);
+}
+
+T[] _arraySliceSliceAddass_g(T[] a, T[] b)
+in
+{
+    assert (a.length == b.length);
+    assert (disjoint(a, b));
+}
+body
+{
+    //printf("_arraySliceSliceAddass_g()\n");
+    auto aptr = a.ptr;
+    auto aend = aptr + a.length;
+    auto bptr = b.ptr;
+
+    version (D_InlineAsm_X86)
+    {
+        // SSE2 aligned version is 4727% faster
+        if (sse2() && a.length >= 64)
+        {
+            auto n = aptr + (a.length & ~63);
+
+            if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0)
+            {
+                asm // unaligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov ECX, bptr;
+
+                    align 8;
+                startaddasslsse2u:
+                    movdqu XMM0, [ESI];
+                    movdqu XMM1, [ESI+16];
+                    movdqu XMM2, [ESI+32];
+                    movdqu XMM3, [ESI+48];
+                    add ESI, 64;
+                    movdqu XMM4, [ECX];
+                    movdqu XMM5, [ECX+16];
+                    movdqu XMM6, [ECX+32];
+                    movdqu XMM7, [ECX+48];
+                    add ECX, 64;
+                    paddb XMM0, XMM4;
+                    paddb XMM1, XMM5;
+                    paddb XMM2, XMM6;
+                    paddb XMM3, XMM7;
+                    movdqu [ESI   -64], XMM0;
+                    movdqu [ESI+16-64], XMM1;
+                    movdqu [ESI+32-64], XMM2;
+                    movdqu [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startaddasslsse2u;
+
+                    mov aptr, ESI;
+                    mov bptr, ECX;
+                }
+            }
+            else
+            {
+                asm // aligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov ECX, bptr;
+
+                    align 8;
+                startaddasslsse2a:
+                    movdqa XMM0, [ESI];
+                    movdqa XMM1, [ESI+16];
+                    movdqa XMM2, [ESI+32];
+                    movdqa XMM3, [ESI+48];
+                    add ESI, 64;
+                    movdqa XMM4, [ECX];
+                    movdqa XMM5, [ECX+16];
+                    movdqa XMM6, [ECX+32];
+                    movdqa XMM7, [ECX+48];
+                    add ECX, 64;
+                    paddb XMM0, XMM4;
+                    paddb XMM1, XMM5;
+                    paddb XMM2, XMM6;
+                    paddb XMM3, XMM7;
+                    movdqa [ESI   -64], XMM0;
+                    movdqa [ESI+16-64], XMM1;
+                    movdqa [ESI+32-64], XMM2;
+                    movdqa [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startaddasslsse2a;
+
+                    mov aptr, ESI;
+                    mov bptr, ECX;
+                }
+            }
+        }
+        else
+        // MMX version is 3059% faster
+        if (mmx() && a.length >= 32)
+        {
+
+            auto n = aptr + (a.length & ~31);
+
+            asm
+            {
+                mov ESI, aptr;
+                mov EDI, n;
+                mov ECX, bptr;
+
+                align 8;
+            startaddasslmmx:
+                movq MM0, [ESI];
+                movq MM1, [ESI+8];
+                movq MM2, [ESI+16];
+                movq MM3, [ESI+24];
+                add ESI, 32;
+                movq MM4, [ECX];
+                movq MM5, [ECX+8];
+                movq MM6, [ECX+16];
+                movq MM7, [ECX+24];
+                add ECX, 32;
+                paddb MM0, MM4;
+                paddb MM1, MM5;
+                paddb MM2, MM6;
+                paddb MM3, MM7;
+                movq [ESI   -32], MM0;
+                movq [ESI+8 -32], MM1;
+                movq [ESI+16-32], MM2;
+                movq [ESI+24-32], MM3;
+                cmp ESI, EDI;
+                jb startaddasslmmx;
+
+                emms;
+                mov aptr, ESI;
+                mov bptr, ECX;
+            }
+        }
+    }
+
+    while (aptr < aend)
+        *aptr++ += *bptr++;
+
+    return a;
+}
+
+unittest
+{
+    printf("_arraySliceSliceAddass_g unittest\n");
+
+    for (cpuid = 0; cpuid < CPUID_MAX; cpuid++)
+    {
+        version (log) printf("    cpuid %d\n", cpuid);
+
+        for (int j = 0; j < 2; j++)
+        {
+            const int dim = 67;
+            T[] a = new T[dim + j];     // aligned on 16 byte boundary
+            a = a[j .. dim + j];        // misalign for second iteration
+            T[] b = new T[dim + j];
+            b = b[j .. dim + j];
+            T[] c = new T[dim + j];
+            c = c[j .. dim + j];
+
+            for (int i = 0; i < dim; i++)
+            {   a[i] = cast(T)i;
+                b[i] = cast(T)(i + 7);
+                c[i] = cast(T)(i * 2);
+            }
+
+            a[] = c[];
+            c[] += b[];
+
+            for (int i = 0; i < dim; i++)
+            {
+                if (c[i] != cast(T)(a[i] + b[i]))
+                {
+                    printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]);
+                    assert(0);
+                }
+            }
+        }
+    }
+}
+
+
+/* ======================================================================== */
+
+
+/***********************
+ * Computes:
+ *      a[] = b[] - value
+ */
+
+T[] _arraySliceExpMinSliceAssign_a(T[] a, T value, T[] b)
+{
+    return _arraySliceExpMinSliceAssign_g(a, value, b);
+}
+
+T[] _arraySliceExpMinSliceAssign_h(T[] a, T value, T[] b)
+{
+    return _arraySliceExpMinSliceAssign_g(a, value, b);
+}
+
+T[] _arraySliceExpMinSliceAssign_g(T[] a, T value, T[] b)
+in
+{
+    assert(a.length == b.length);
+    assert(disjoint(a, b));
+}
+body
+{
+    //printf("_arraySliceExpMinSliceAssign_g()\n");
+    auto aptr = a.ptr;
+    auto aend = aptr + a.length;
+    auto bptr = b.ptr;
+
+    version (D_InlineAsm_X86)
+    {
+        // SSE2 aligned version is 1189% faster
+        if (sse2() && a.length >= 64)
+        {
+            auto n = aptr + (a.length & ~63);
+
+            uint l = cast(ubyte) value;
+            l |= (l << 8);
+            l |= (l << 16);
+
+            if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0)
+            {
+                asm // unaligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    movd XMM4, l;
+                    pshufd XMM4, XMM4, 0;
+
+                    align 8;
+                startsubsse2u:
+                    add ESI, 64;
+                    movdqu XMM0, [EAX];
+                    movdqu XMM1, [EAX+16];
+                    movdqu XMM2, [EAX+32];
+                    movdqu XMM3, [EAX+48];
+                    add EAX, 64;
+                    psubb XMM0, XMM4;
+                    psubb XMM1, XMM4;
+                    psubb XMM2, XMM4;
+                    psubb XMM3, XMM4;
+                    movdqu [ESI   -64], XMM0;
+                    movdqu [ESI+16-64], XMM1;
+                    movdqu [ESI+32-64], XMM2;
+                    movdqu [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startsubsse2u;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                }
+            }
+            else
+            {
+                asm // aligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    movd XMM4, l;
+                    pshufd XMM4, XMM4, 0;
+
+                    align 8;
+                startsubsse2a:
+                    add ESI, 64;
+                    movdqa XMM0, [EAX];
+                    movdqa XMM1, [EAX+16];
+                    movdqa XMM2, [EAX+32];
+                    movdqa XMM3, [EAX+48];
+                    add EAX, 64;
+                    psubb XMM0, XMM4;
+                    psubb XMM1, XMM4;
+                    psubb XMM2, XMM4;
+                    psubb XMM3, XMM4;
+                    movdqa [ESI   -64], XMM0;
+                    movdqa [ESI+16-64], XMM1;
+                    movdqa [ESI+32-64], XMM2;
+                    movdqa [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startsubsse2a;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                }
+            }
+        }
+        else
+        // MMX version is 1079% faster
+        if (mmx() && a.length >= 32)
+        {
+            auto n = aptr + (a.length & ~31);
+
+            uint l = cast(ubyte) value;
+            l |= (l << 8);
+
+            asm
+            {
+                mov ESI, aptr;
+                mov EDI, n;
+                mov EAX, bptr;
+                movd MM4, l;
+                pshufw MM4, MM4, 0;
+
+                align 4;
+            startsubmmx:
+                add ESI, 32;
+                movq MM0, [EAX];
+                movq MM1, [EAX+8];
+                movq MM2, [EAX+16];
+                movq MM3, [EAX+24];
+                add EAX, 32;
+                psubb MM0, MM4;
+                psubb MM1, MM4;
+                psubb MM2, MM4;
+                psubb MM3, MM4;
+                movq [ESI   -32], MM0;
+                movq [ESI+8 -32], MM1;
+                movq [ESI+16-32], MM2;
+                movq [ESI+24-32], MM3;
+                cmp ESI, EDI;
+                jb startsubmmx;
+
+                emms;
+                mov aptr, ESI;
+                mov bptr, EAX;
+            }
+        }
+        // trying to be fair and treat normal 32-bit cpu the same way as we do the SIMD units, with unrolled asm.  There's not enough registers, really.
+        else
+        if (a.length >= 4)
+        {
+            auto n = aptr + (a.length & ~3);
+            asm
+            {
+                mov ESI, aptr;
+                mov EDI, n;
+                mov EAX, bptr;
+                mov CL, value;
+
+                align 4;
+            startsub386:
+                add ESI, 4;
+                mov DX, [EAX];
+                mov BX, [EAX+2];
+                add EAX, 4;
+                sub BL, CL;
+                sub BH, CL;
+                sub DL, CL;
+                sub DH, CL;
+                mov [ESI   -4], DX;
+                mov [ESI+2 -4], BX;
+                cmp ESI, EDI;
+                jb startsub386;
+
+                mov aptr, ESI;
+                mov bptr, EAX;
+            }
+        }
+    }
+
+    while (aptr < aend)
+        *aptr++ = cast(T)(*bptr++ - value);
+
+    return a;
+}
+
+unittest
+{
+    printf("_arraySliceExpMinSliceAssign_g unittest\n");
+
+    for (cpuid = 0; cpuid < CPUID_MAX; cpuid++)
+    {
+        version (log) printf("    cpuid %d\n", cpuid);
+
+        for (int j = 0; j < 2; j++)
+        {
+            const int dim = 67;
+            T[] a = new T[dim + j];     // aligned on 16 byte boundary
+            a = a[j .. dim + j];        // misalign for second iteration
+            T[] b = new T[dim + j];
+            b = b[j .. dim + j];
+            T[] c = new T[dim + j];
+            c = c[j .. dim + j];
+
+            for (int i = 0; i < dim; i++)
+            {   a[i] = cast(T)i;
+                b[i] = cast(T)(i + 7);
+                c[i] = cast(T)(i * 2);
+            }
+
+            a[] = c[];
+            c[] = b[] - 6;
+
+            for (int i = 0; i < dim; i++)
+            {
+                if (c[i] != cast(T)(b[i] - 6))
+                {
+                    printf("[%d]: %d != %d - 6\n", i, c[i], b[i]);
+                    assert(0);
+                }
+            }
+        }
+    }
+}
+
+
+/* ======================================================================== */
+
+/***********************
+ * Computes:
+ *      a[] = value - b[]
+ */
+
+T[] _arrayExpSliceMinSliceAssign_a(T[] a, T[] b, T value)
+{
+    return _arrayExpSliceMinSliceAssign_g(a, b, value);
+}
+
+T[] _arrayExpSliceMinSliceAssign_h(T[] a, T[] b, T value)
+{
+    return _arrayExpSliceMinSliceAssign_g(a, b, value);
+}
+
+T[] _arrayExpSliceMinSliceAssign_g(T[] a, T[] b, T value)
+in
+{
+    assert(a.length == b.length);
+    assert(disjoint(a, b));
+}
+body
+{
+    //printf("_arrayExpSliceMinSliceAssign_g()\n");
+    auto aptr = a.ptr;
+    auto aend = aptr + a.length;
+    auto bptr = b.ptr;
+
+    version (D_InlineAsm_X86)
+    {
+        // SSE2 aligned version is 8748% faster
+        if (sse2() && a.length >= 64)
+        {
+            auto n = aptr + (a.length & ~63);
+
+            uint l = cast(ubyte) value;
+            l |= (l << 8);
+            l |= (l << 16);
+
+            if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0)
+            {
+                asm // unaligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    movd XMM4, l;
+                    pshufd XMM4, XMM4, 0;
+
+                    align 8;
+                startsubrsse2u:
+                    add ESI, 64;
+                    movdqa XMM5, XMM4;
+                    movdqa XMM6, XMM4;
+                    movdqu XMM0, [EAX];
+                    movdqu XMM1, [EAX+16];
+                    psubb XMM5, XMM0;
+                    psubb XMM6, XMM1;
+                    movdqu [ESI   -64], XMM5;
+                    movdqu [ESI+16-64], XMM6;
+                    movdqa XMM5, XMM4;
+                    movdqa XMM6, XMM4;
+                    movdqu XMM2, [EAX+32];
+                    movdqu XMM3, [EAX+48];
+                    add EAX, 64;
+                    psubb XMM5, XMM2;
+                    psubb XMM6, XMM3;
+                    movdqu [ESI+32-64], XMM5;
+                    movdqu [ESI+48-64], XMM6;
+                    cmp ESI, EDI;
+                    jb startsubrsse2u;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                }
+            }
+            else
+            {
+                asm // aligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    movd XMM4, l;
+                    pshufd XMM4, XMM4, 0;
+
+                    align 8;
+                startsubrsse2a:
+                    add ESI, 64;
+                    movdqa XMM5, XMM4;
+                    movdqa XMM6, XMM4;
+                    movdqa XMM0, [EAX];
+                    movdqa XMM1, [EAX+16];
+                    psubb XMM5, XMM0;
+                    psubb XMM6, XMM1;
+                    movdqa [ESI   -64], XMM5;
+                    movdqa [ESI+16-64], XMM6;
+                    movdqa XMM5, XMM4;
+                    movdqa XMM6, XMM4;
+                    movdqa XMM2, [EAX+32];
+                    movdqa XMM3, [EAX+48];
+                    add EAX, 64;
+                    psubb XMM5, XMM2;
+                    psubb XMM6, XMM3;
+                    movdqa [ESI+32-64], XMM5;
+                    movdqa [ESI+48-64], XMM6;
+                    cmp ESI, EDI;
+                    jb startsubrsse2a;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                }
+            }
+        }
+        else
+        // MMX version is 7397% faster
+        if (mmx() && a.length >= 32)
+        {
+            auto n = aptr + (a.length & ~31);
+
+            uint l = cast(ubyte) value;
+            l |= (l << 8);
+
+            asm
+            {
+                mov ESI, aptr;
+                mov EDI, n;
+                mov EAX, bptr;
+                movd MM4, l;
+                pshufw MM4, MM4, 0;
+
+                align 4;
+            startsubrmmx:
+                add ESI, 32;
+                movq MM5, MM4;
+                movq MM6, MM4;
+                movq MM0, [EAX];
+                movq MM1, [EAX+8];
+                psubb MM5, MM0;
+                psubb MM6, MM1;
+                movq [ESI   -32], MM5;
+                movq [ESI+8 -32], MM6;
+                movq MM5, MM4;
+                movq MM6, MM4;
+                movq MM2, [EAX+16];
+                movq MM3, [EAX+24];
+                add EAX, 32;
+                psubb MM5, MM2;
+                psubb MM6, MM3;
+                movq [ESI+16-32], MM5;
+                movq [ESI+24-32], MM6;
+                cmp ESI, EDI;
+                jb startsubrmmx;
+
+                emms;
+                mov aptr, ESI;
+                mov bptr, EAX;
+            }
+        }
+
+    }
+
+    while (aptr < aend)
+        *aptr++ = cast(T)(value - *bptr++);
+
+    return a;
+}
+
+unittest
+{
+    printf("_arrayExpSliceMinSliceAssign_g unittest\n");
+
+    for (cpuid = 0; cpuid < CPUID_MAX; cpuid++)
+    {
+        version (log) printf("    cpuid %d\n", cpuid);
+
+        for (int j = 0; j < 2; j++)
+        {
+            const int dim = 67;
+            T[] a = new T[dim + j];     // aligned on 16 byte boundary
+            a = a[j .. dim + j];        // misalign for second iteration
+            T[] b = new T[dim + j];
+            b = b[j .. dim + j];
+            T[] c = new T[dim + j];
+            c = c[j .. dim + j];
+
+            for (int i = 0; i < dim; i++)
+            {   a[i] = cast(T)i;
+                b[i] = cast(T)(i + 7);
+                c[i] = cast(T)(i * 2);
+            }
+
+            a[] = c[];
+            c[] = 6 - b[];
+
+            for (int i = 0; i < dim; i++)
+            {
+                if (c[i] != cast(T)(6 - b[i]))
+                {
+                    printf("[%d]: %d != 6 - %d\n", i, c[i], b[i]);
+                    assert(0);
+                }
+            }
+        }
+    }
+}
+
+
+/* ======================================================================== */
+
+/***********************
+ * Computes:
+ *      a[] = b[] - c[]
+ */
+
+T[] _arraySliceSliceMinSliceAssign_a(T[] a, T[] c, T[] b)
+{
+    return _arraySliceSliceMinSliceAssign_g(a, c, b);
+}
+
+T[] _arraySliceSliceMinSliceAssign_h(T[] a, T[] c, T[] b)
+{
+    return _arraySliceSliceMinSliceAssign_g(a, c, b);
+}
+
+T[] _arraySliceSliceMinSliceAssign_g(T[] a, T[] c, T[] b)
+in
+{
+        assert(a.length == b.length && b.length == c.length);
+        assert(disjoint(a, b));
+        assert(disjoint(a, c));
+        assert(disjoint(b, c));
+}
+body
+{
+    auto aptr = a.ptr;
+    auto aend = aptr + a.length;
+    auto bptr = b.ptr;
+    auto cptr = c.ptr;
+
+    version (D_InlineAsm_X86)
+    {
+        // SSE2 aligned version is 5756% faster
+        if (sse2() && a.length >= 64)
+        {
+            auto n = aptr + (a.length & ~63);
+
+            if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0)
+            {
+                asm // unaligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    mov ECX, cptr;
+
+                    align 8;
+                startsublsse2u:
+                    add ESI, 64;
+                    movdqu XMM0, [EAX];
+                    movdqu XMM1, [EAX+16];
+                    movdqu XMM2, [EAX+32];
+                    movdqu XMM3, [EAX+48];
+                    add EAX, 64;
+                    movdqu XMM4, [ECX];
+                    movdqu XMM5, [ECX+16];
+                    movdqu XMM6, [ECX+32];
+                    movdqu XMM7, [ECX+48];
+                    add ECX, 64;
+                    psubb XMM0, XMM4;
+                    psubb XMM1, XMM5;
+                    psubb XMM2, XMM6;
+                    psubb XMM3, XMM7;
+                    movdqu [ESI   -64], XMM0;
+                    movdqu [ESI+16-64], XMM1;
+                    movdqu [ESI+32-64], XMM2;
+                    movdqu [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startsublsse2u;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                    mov cptr, ECX;
+                }
+            }
+            else
+            {
+                asm // aligned case
+                {
+                    mov ESI, aptr;
+                    mov EDI, n;
+                    mov EAX, bptr;
+                    mov ECX, cptr;
+
+                    align 8;
+                startsublsse2a:
+                    add ESI, 64;
+                    movdqa XMM0, [EAX];
+                    movdqa XMM1, [EAX+16];
+                    movdqa XMM2, [EAX+32];
+                    movdqa XMM3, [EAX+48];
+                    add EAX, 64;
+                    movdqa XMM4, [ECX];
+                    movdqa XMM5, [ECX+16];
+                    movdqa XMM6, [ECX+32];
+                    movdqa XMM7, [ECX+48];
+                    add ECX, 64;
+                    psubb XMM0, XMM4;
+                    psubb XMM1, XMM5;
+                    psubb XMM2, XMM6;
+                    psubb XMM3, XMM7;
+                    movdqa [ESI   -64], XMM0;
+                    movdqa [ESI+16-64], XMM1;
+                    movdqa [ESI+32-64], XMM2;
+                    movdqa [ESI+48-64], XMM3;
+                    cmp ESI, EDI;
+                    jb startsublsse2a;
+
+                    mov aptr, ESI;
+                    mov bptr, EAX;
+                    mov cptr, ECX;
+                }
+            }
+        }
+        else
+        // MMX version is 4428% faster
+        if (mmx() && a.length >= 32)
+        {
+            auto n = aptr + (a.length & ~31);
+
+            asm
+            {
+                mov ESI, aptr;
+                mov EDI, n;
+                mov EAX, bptr;
+                mov ECX, cptr;
+
+                align 8;
+            startsublmmx:
+                add ESI, 32;
+                movq MM0, [EAX];
+                movq MM1, [EAX+8];
+                movq MM2, [EAX+16];
+                movq MM3, [EAX+24];
+                add EAX, 32;
+                movq MM4, [ECX];
+                movq MM5, [ECX+8];
+                movq MM6, [ECX+16];
+                movq MM7, [ECX+24];
+                add ECX, 32;
+                psubb MM0, MM4;
+                psubb MM1, MM5;
+                psubb MM2, MM6;
+                psubb MM3, MM7;
+                movq [ESI   -32], MM0;
+                movq [ESI+8 -32], MM1;
+                movq [ESI+16-32], MM2;
+                movq [ESI+24-32], MM3;
+                cmp ESI, EDI;
+                jb startsublmmx;
+
+                emms;
+                mov aptr, ESI;
+                mov bptr, EAX;
+                mov cptr, ECX;
+            }
+        }
+    }
+
+    while (aptr < aend)
+        *aptr++ = cast(T)(*bptr++ - *cptr++);
+
+    return a;
+}
+
+unittest
+{
+    printf("_arraySliceSliceMinSliceAssign_g unittest\n");
+
+    for (cpuid = 0; cpuid < CPUID_MAX; cpuid++)
+    {
+        version (log) printf("    cpuid %d\n", cpuid);
+
+        for (int j = 0; j < 2; j++)
+        {
+            const int dim = 67;
+            T[] a = new T[dim + j];     // aligned on 16 byte boundary
+            a = a[j .. dim + j];        // misalign for second iteration
+            T[] b = new T[dim + j];
+            b = b[j .. dim + j];
+            T[] c = new T[dim + j];
+            c = c[j .. dim + j];
+