Mercurial > projects > ldc
diff tango/tango/math/IEEE.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!!
Initial commit after moving to Tango instead of Phobos.
Lots of bugfixes...
This build is not suitable for most things.
author | lindquist |
---|---|
date | Fri, 11 Jan 2008 17:57:40 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tango/tango/math/IEEE.d Fri Jan 11 17:57:40 2008 +0100 @@ -0,0 +1,1704 @@ +/** + * Low-level Mathematical Functions which take advantage of the IEEE754 ABI. + * + * Copyright: Portions Copyright (C) 2001-2005 Digital Mars. + * License: BSD style: $(LICENSE), Digital Mars. + * Authors: Don Clugston, Walter Bright, Sean Kelly + */ +/* Portions of this code were taken from Phobos std.math, which has the following + * copyright notice: + * + * Author: + * Walter Bright + * Copyright: + * Copyright (c) 2001-2005 by Digital Mars, + * All Rights Reserved, + * www.digitalmars.com + * License: + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * <ul> + * <li> The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * </li> + * <li> Altered source versions must be plainly marked as such, and must not + * be misrepresented as being the original software. + * </li> + * <li> This notice may not be removed or altered from any source + * distribution. + * </li> + * </ul> + */ +/** + * Macros: + * + * TABLE_SV = <table border=1 cellpadding=4 cellspacing=0> + * <caption>Special Values</caption> + * $0</table> + * SVH = $(TR $(TH $1) $(TH $2)) + * SV = $(TR $(TD $1) $(TD $2)) + * SVH3 = $(TR $(TH $1) $(TH $2) $(TH $3)) + * SV3 = $(TR $(TD $1) $(TD $2) $(TD $3)) + * NAN = $(RED NAN) + */ +module tango.math.IEEE; + +version(DigitalMars) +{ + version(D_InlineAsm_X86) + { + version = DigitalMars_D_InlineAsm_X86; + } +} + +version (X86){ + version = X86_Any; +} + +version (X86_64){ + version = X86_Any; +} + +version (DigitalMars_D_InlineAsm_X86) { + // Don't include this extra dependency unless we need to. + debug(UnitTest) { + static import tango.stdc.math; + } +} else { + // Needed for cos(), sin(), tan() on GNU. + static import tango.stdc.math; +} + +// Standard Tango NaN payloads. +// NOTE: These values may change in future Tango releases +// The lowest three bits indicate the cause of the NaN: +// 0 = error other than those listed below: +// 1 = domain error +// 2 = singularity +// 3 = range +// 4-7 = reserved. +enum TANGO_NAN { + // General errors + DOMAIN_ERROR = 0x0101, + SINGULARITY = 0x0102, + RANGE_ERROR = 0x0103, + // NaNs created by functions in the basic library + TAN_DOMAIN = 0x1001, + POW_DOMAIN = 0x1021, + GAMMA_DOMAIN = 0x1101, + GAMMA_POLE = 0x1102, + SGNGAMMA = 0x1112, + BETA_DOMAIN = 0x1131, + // NaNs from statistical functions + NORMALDISTRIBUTION_INV_DOMAIN = 0x2001, + STUDENTSDDISTRIBUTION_DOMAIN = 0x2011 +} + +/* Most of the functions depend on the format of the largest IEEE floating-point type. + * These code will differ depending on whether 'real' is 64, 80, or 128 bits, + * and whether it is a big-endian or little-endian architecture. + * Only three 'real' ABIs are currently supported: + * 64 bit Big-endian (eg PowerPC) + * 64 bit Little-endian + * 80 bit Little-endian, with implied bit (eg x87, Itanium). + * There is also an unsupported ABI which does not follow IEEE; several of its functions + * will generate run-time errors if used. + * 128 bit Big-endian (double-double, as used by GDC <= 0.23) + */ + +version(LittleEndian) { + static assert(real.mant_dig == 53 || real.mant_dig==64, + "Only 64-bit and 80-bit reals are supported for LittleEndian CPUs"); +} else { + static assert(real.mant_dig == 53 || real.mant_dig==106, + "Only 64-bit reals are supported for BigEndian CPUs. 106-bit reals have partial support"); +} + +/** IEEE exception status flags + + These flags indicate that an exceptional floating-point condition has occured. + They indicate that a NaN or an infinity has been generated, that a result + is inexact, or that a signalling NaN has been encountered. + The return values of the properties should be treated as booleans, although + each is returned as an int, for speed. + + Example: + ---- + real a=3.5; + // Set all the flags to zero + resetIeeeFlags(); + assert(!ieeeFlags.divByZero); + // Perform a division by zero. + a/=0.0L; + assert(a==real.infinity); + assert(ieeeFlags.divByZero); + // Create a NaN + a*=0.0L; + assert(ieeeFlags.invalid); + assert(isNaN(a)); + + // Check that calling func() has no effect on the + // status flags. + IeeeFlags f = ieeeFlags; + func(); + assert(ieeeFlags == f); + + ---- + */ +struct IeeeFlags +{ +private: + // The x87 FPU status register is 16 bits. + // The Pentium SSE2 status register is 32 bits. + int m_flags; + version (X86_Any) { + // Applies to both x87 status word (16 bits) and SSE2 status word(32 bits). + enum : int { + INEXACT_MASK = 0x20, + UNDERFLOW_MASK = 0x10, + OVERFLOW_MASK = 0x08, + DIVBYZERO_MASK = 0x04, + INVALID_MASK = 0x01 + } + // Don't bother about denormals, they are not supported on all CPUs. + //const int DENORMAL_MASK = 0x02; + } else version (PPC) { + // PowerPC FPSCR is a 32-bit register. + enum : int { + INEXACT_MASK = 0x600, + UNDERFLOW_MASK = 0x010, + OVERFLOW_MASK = 0x008, + DIVBYZERO_MASK = 0x020, + INVALID_MASK = 0xF80 + } + } +private: + static IeeeFlags getIeeeFlags() + { + // This is a highly time-critical operation, and + // should really be an intrinsic. In this case, we + // take advantage of the fact that for DMD + // a struct containing only a int is returned in EAX. + version(D_InlineAsm_X86) { + asm { + fstsw AX; + // NOTE: If compiler supports SSE2, need to OR the result with + // the SSE2 status register. + // Clear all irrelevant bits + and EAX, 0x03D; + } + } else { + assert(0, "Not yet supported"); + } + } + static void resetIeeeFlags() + { + version(D_InlineAsm_X86) { + asm { + fnclex; + } + } else { + assert(0, "Not yet supported"); + } + } +public: + /// The result cannot be represented exactly, so rounding occured. + /// (example: x = sin(0.1); } + int inexact() { return m_flags & INEXACT_MASK; } + /// A zero was generated by underflow (example: x = real.min*real.epsilon/2;) + int underflow() { return m_flags & UNDERFLOW_MASK; } + /// An infinity was generated by overflow (example: x = real.max*2;) + int overflow() { return m_flags & OVERFLOW_MASK; } + /// An infinity was generated by division by zero (example: x = 3/0.0; ) + int divByZero() { return m_flags & DIVBYZERO_MASK; } + /// A machine NaN was generated. (example: x = real.infinity * 0.0; ) + int invalid() { return m_flags & INVALID_MASK; } +} + +/// Return a snapshot of the current state of the floating-point status flags. +IeeeFlags ieeeFlags() { return IeeeFlags.getIeeeFlags(); } + +/// Set all of the floating-point status flags to false. +void resetIeeeFlags() { IeeeFlags.resetIeeeFlags; } + +/** IEEE rounding modes. + * The default mode is ROUNDTONEAREST. + */ +enum RoundingMode : short { + ROUNDTONEAREST = 0x0000, + ROUNDDOWN = 0x0400, + ROUNDUP = 0x0800, + ROUNDTOZERO = 0x0C00 +}; + +/** Change the rounding mode used for all floating-point operations. + * + * Returns the old rounding mode. + * + * When changing the rounding mode, it is almost always necessary to restore it + * at the end of the function. Typical usage: +--- + auto oldrounding = setIeeeRounding(RoundingMode.ROUNDDOWN); + scope (exit) setIeeeRounding(oldrounding); +--- + */ +RoundingMode setIeeeRounding(RoundingMode roundingmode) { + version(D_InlineAsm_X86) { + // TODO: For SSE/SSE2, do we also need to set the SSE rounding mode? + short cont; + asm { + fstcw cont; + mov CX, cont; + mov AX, cont; + and EAX, 0x0C00; // Form the return value + and CX, 0xF3FF; + or CX, roundingmode; + mov cont, CX; + fldcw cont; + } + } else { + assert(0, "Not yet supported"); + } +} + +/** Get the IEEE rounding mode which is in use. + * + */ +RoundingMode getIeeeRounding() { + version(D_InlineAsm_X86) { + // TODO: For SSE/SSE2, do we also need to check the SSE rounding mode? + short cont; + asm { + mov EAX, 0x0C00; + fstcw cont; + and AX, cont; + } + } else { + assert(0, "Not yet supported"); + } +} + +debug(UnitTest) { + version(D_InlineAsm_X86) { // Won't work for anything else yet +unittest { + real a = 3.5; + resetIeeeFlags(); + assert(!ieeeFlags.divByZero); + a /= 0.0L; + assert(ieeeFlags.divByZero); + assert(a == real.infinity); + a *= 0.0L; + assert(ieeeFlags.invalid); + assert(isNaN(a)); + a = real.max; + a *= 2; + assert(ieeeFlags.overflow); + a = real.min * real.epsilon; + a /= 99; + assert(ieeeFlags.underflow); + assert(ieeeFlags.inexact); + + int r = getIeeeRounding; + assert(r == RoundingMode.ROUNDTONEAREST); +} +} +} + +// Note: Itanium supports more precision options than this. SSE/SSE2 does not support any. +enum PrecisionControl : short { + PRECISION80 = 0x300, + PRECISION64 = 0x200, + PRECISION32 = 0x000 +}; + +/** Set the number of bits of precision used by 'real'. + * + * Returns: the old precision. + * This is not supported on all platforms. + */ +PrecisionControl reduceRealPrecision(PrecisionControl prec) { + version(D_InlineAsm_X86) { + short cont; + asm { + fstcw cont; + mov CX, cont; + mov AX, cont; + and EAX, 0x0300; // Form the return value + and CX, 0xFCFF; + or CX, prec; + mov cont, CX; + fldcw cont; + } + } else { + assert(0, "Not yet supported"); + } +} + +/** + * Separate floating point value into significand and exponent. + * + * Returns: + * Calculate and return <i>x</i> and exp such that + * value =<i>x</i>*2$(SUP exp) and + * .5 <= |<i>x</i>| < 1.0<br> + * <i>x</i> has same sign as value. + * + * $(TABLE_SV + * <tr> <th> value <th> returns <th> exp + * <tr> <td> ±0.0 <td> ±0.0 <td> 0 + * <tr> <td> +∞ <td> +∞ <td> int.max + * <tr> <td> -∞ <td> -∞ <td> int.min + * <tr> <td> ±$(NAN) <td> ±$(NAN) <td> int.min + * ) + */ +real frexp(real value, out int exp) +{ + ushort* vu = cast(ushort*)&value; + long* vl = cast(long*)&value; + uint ex; + + static if (real.mant_dig==64) const ushort EXPMASK = 0x7FFF; + else const ushort EXPMASK = 0x7FF0; + + version(LittleEndian) { + static if (real.mant_dig==64) const int EXPONENTPOS = 4; + else const int EXPONENTPOS = 3; + } else { // BigEndian + const int EXPONENTPOS = 0; + } + + ex = vu[EXPONENTPOS] & EXPMASK; + static if (real.mant_dig == 64) { + // 80-bit reals + if (ex) { // If exponent is non-zero + if (ex == EXPMASK) { // infinity or NaN + // 80-bit reals + if (*vl & 0x7FFFFFFFFFFFFFFF) { // NaN + *vl |= 0xC000000000000000; // convert $(NAN)S to $(NAN)Q + exp = int.min; + } else if (vu[EXPONENTPOS] & 0x8000) { // negative infinity + exp = int.min; + } else { // positive infinity + exp = int.max; + } + } else { + exp = ex - 0x3FFE; + vu[EXPONENTPOS] = cast(ushort)((0x8000 & vu[EXPONENTPOS]) | 0x3FFE); + } + } else if (!*vl) { + // value is +-0.0 + exp = 0; + } else { + // denormal + int i = -0x3FFD; + do { + i--; + *vl <<= 1; + } while (*vl > 0); + exp = i; + vu[EXPONENTPOS] = cast(ushort)((0x8000 & vu[EXPONENTPOS]) | 0x3FFE); + } + } else static if(real.mant_dig==106) { + // 128-bit reals + assert(0, "Unsupported"); + } else { + // 64-bit reals + if (ex) { // If exponent is non-zero + if (ex == EXPMASK) { // infinity or NaN + if (*vl==0x7FF0_0000_0000_0000) { // positive infinity + exp = int.max; + } else if (*vl==0xFFF0_0000_0000_0000) { // negative infinity + exp = int.min; + } else { // NaN + *vl |= 0x0008_0000_0000_0000; // convert $(NAN)S to $(NAN)Q + exp = int.min; + } + } else { + exp = (ex - 0x3FE0) >>> 4; + ve[EXPONENTPOS] = (0x8000 & ve[EXPONENTPOS]) | 0x3FE0; + } + } else if (!(*vl & 0x7FFF_FFFF_FFFF_FFFF)) { + // value is +-0.0 + exp = 0; + } else { + // denormal + ushort sgn; + sgn = (0x8000 & ve[EXPONENTPOS])| 0x3FE0; + *vl &= 0x7FFF_FFFF_FFFF_FFFF; + + int i = -0x3FD+11; + do { + i--; + *vl <<= 1; + } while (*vl > 0); + exp = i; + ve[EXPONENTPOS] = sgn; + } + } + return value; +} + +debug(UnitTest) { + +unittest +{ + static real vals[][3] = // x,frexp,exp + [ + [0.0, 0.0, 0], + [-0.0, -0.0, 0], + [1.0, .5, 1], + [-1.0, -.5, 1], + [2.0, .5, 2], + [double.min/2.0, .5, -1022], + [real.infinity,real.infinity,int.max], + [-real.infinity,-real.infinity,int.min], + [real.nan,real.nan,int.min], + [-real.nan,-real.nan,int.min], + ]; + + int i; + + for (i = 0; i < vals.length; i++) { + real x = vals[i][0]; + real e = vals[i][1]; + int exp = cast(int)vals[i][2]; + int eptr; + real v = frexp(x, eptr); +// printf("frexp(%La) = %La, should be %La, eptr = %d, should be %d\n", x, v, e, eptr, exp); + assert(isIdentical(e, v)); + assert(exp == eptr); + + } + static if (real.mant_dig == 64) { + static real extendedvals[][3] = [ // x,frexp,exp + [0x1.a5f1c2eb3fe4efp+73, 0x1.A5F1C2EB3FE4EFp-1, 74], // normal + [0x1.fa01712e8f0471ap-1064, 0x1.fa01712e8f0471ap-1, -1063], + [real.min, .5, -16381], + [real.min/2.0L, .5, -16382] // denormal + ]; + + for (i = 0; i < extendedvals.length; i++) { + real x = extendedvals[i][0]; + real e = extendedvals[i][1]; + int exp = cast(int)extendedvals[i][2]; + int eptr; + real v = frexp(x, eptr); + assert(isIdentical(e, v)); + assert(exp == eptr); + + } + } +} +} + +/** + * Compute n * 2$(SUP exp) + * References: frexp + */ +real ldexp(real n, int exp) /* intrinsic */ +{ + version(DigitalMars_D_InlineAsm_X86) + { + asm + { + fild exp; + fld n; + fscale; + fstp st(1), st(0); + } + } + else + { + return tango.stdc.math.ldexpl(n, exp); + } +} + +/** + * Extracts the exponent of x as a signed integral value. + * + * If x is not a special value, the result is the same as + * <tt>cast(int)logb(x)</tt>. + * + * Remarks: This function is consistent with IEEE754R, but it + * differs from the C function of the same name + * in the return value of infinity. (in C, ilogb(real.infinity)== int.max). + * Note that the special return values may all be equal. + * + * $(TABLE_SV + * <tr> <th> x <th>ilogb(x) <th>invalid? + * <tr> <td> 0 <td> FP_ILOGB0 <th> yes + * <tr> <td> ±∞ <td> FP_ILOGBINFINITY <th> yes + * <tr> <td> $(NAN) <td> FP_ILOGBNAN <th> yes + * ) + */ +int ilogb(real x) +{ + version(DigitalMars_D_InlineAsm_X86) + { + int y; + asm { + fld x; + fxtract; + fstp ST(0), ST; // drop significand + fistp y, ST(0); // and return the exponent + } + return y; + } else static if (real.mant_dig==64) { // 80-bit reals + short e = (cast(short *)&x)[4] & 0x7FFF; + if (e == 0x7FFF) { + // BUG: should also set the invalid exception + ulong s = *cast(ulong *)&x; + if (s == 0x8000_0000_0000_0000) { + return FP_ILOGBINFINITY; + } + else return FP_ILOGBNAN; + } + if (e==0) { + ulong s = *cast(ulong *)&x; + if (s == 0x0000_0000_0000_0000) { + // BUG: should also set the invalid exception + return FP_ILOGB0; + } + // Denormals + x *= 0x1p+63; + short f = (cast(short *)&x)[4]; + return -0x3FFF - (63-f); + + } + return e - 0x3FFF; + } else { + return tango.stdc.math.ilogbl(x); + } +} + +version (X86) +{ + const int FP_ILOGB0 = -int.max-1; + const int FP_ILOGBNAN = -int.max-1; + const int FP_ILOGBINFINITY = -int.max-1; +} else { + alias tango.stdc.math.FP_ILOGB0 FP_ILOGB0; + alias tango.stdc.math.FP_ILOGBNAN FP_ILOGBNAN; + const int FP_ILOGBINFINITY = int.max; +} + +debug(UnitTest) { +unittest { + assert(ilogb(1.0) == 0); + assert(ilogb(65536) == 16); + assert(ilogb(-65536) == 16); + assert(ilogb(1.0 / 65536) == -16); + assert(ilogb(real.nan) == FP_ILOGBNAN); + assert(ilogb(0.0) == FP_ILOGB0); + assert(ilogb(-0.0) == FP_ILOGB0); + // denormal + assert(ilogb(0.125 * real.min) == real.min_exp - 4); + assert(ilogb(real.infinity) == FP_ILOGBINFINITY); +} +} + +/** + * Extracts the exponent of x as a signed integral value. + * + * If x is subnormal, it is treated as if it were normalized. + * For a positive, finite x: + * + * ----- + * 1 <= $(I x) * FLT_RADIX$(SUP -logb(x)) < FLT_RADIX + * ----- + * + * $(TABLE_SV + * <tr> <th> x <th> logb(x) <th> Divide by 0? + * <tr> <td> ±∞ <td> +∞ <td> no + * <tr> <td> ±0.0 <td> -∞ <td> yes + * ) + */ +real logb(real x) +{ + version(DigitalMars_D_InlineAsm_X86) + { + asm { + fld x; + fxtract; + fstp ST(0), ST; // drop significand + } + } else { + return tango.stdc.math.logbl(x); + } +} + +debug(UnitTest) { +unittest { + assert(logb(real.infinity)== real.infinity); + assert(isIdentical(logb(NaN(0xFCD)), NaN(0xFCD))); + assert(logb(1.0)== 0.0); + assert(logb(-65536) == 16); + assert(logb(0.0)== -real.infinity); + assert(ilogb(0.125*real.min) == real.min_exp-4); +} +} + +/** + * Efficiently calculates x * 2$(SUP n). + * + * scalbn handles underflow and overflow in + * the same fashion as the basic arithmetic operators. + * + * $(TABLE_SV + * <tr> <th> x <th> scalb(x) + * <tr> <td> ±∞ <td> ±∞ + * <tr> <td> ±0.0 <td> ±0.0 + * ) + */ +real scalbn(real x, int n) +{ + version(DigitalMars_D_InlineAsm_X86) + { + asm { + fild n; + fld x; + fscale; + fstp st(1), st; + } + } else { + // BUG: Not implemented in DMD + return tango.stdc.math.scalbnl(x, n); + } +} + +debug(UnitTest) { +unittest { + assert(scalbn(-real.infinity, 5) == -real.infinity); + assert(isIdentical(scalbn(NaN(0xABC),7), NaN(0xABC))); +} +} + +/** + * Returns the positive difference between x and y. + * + * If either of x or y is $(NAN), it will be returned. + * Returns: + * $(TABLE_SV + * $(SVH Arguments, fdim(x, y)) + * $(SV x > y, x - y) + * $(SV x <= y, +0.0) + * ) + */ +real fdim(real x, real y) +{ + return (x !<= y) ? x - y : +0.0; +} + +debug(UnitTest) { +unittest { + assert(isIdentical(fdim(NaN(0xABC), 58.2), NaN(0xABC))); +} +} + +/** + * Returns |x| + * + * $(TABLE_SV + * <tr> <th> x <th> fabs(x) + * <tr> <td> ±0.0 <td> +0.0 + * <tr> <td> ±∞ <td> +∞ + * ) + */ +real fabs(real x) /* intrinsic */ +{ + version(D_InlineAsm_X86) + { + asm + { + fld x; + fabs; + } + } + else + { + return tango.stdc.math.fabsl(x); + } +} + +unittest { + assert(isIdentical(fabs(NaN(0xABC)), NaN(0xABC))); +} + +/** + * Returns (x * y) + z, rounding only once according to the + * current rounding mode. + * + * BUGS: Not currently implemented - rounds twice. + */ +real fma(float x, float y, float z) +{ + return (x * y) + z; +} + +/** + * Calculate cos(y) + i sin(y). + * + * On x86 CPUs, this is a very efficient operation; + * almost twice as fast as calculating sin(y) and cos(y) + * seperately, and is the preferred method when both are required. + */ +creal expi(real y) +{ + version(DigitalMars_D_InlineAsm_X86) + { + asm + { + fld y; + fsincos; + fxch st(1), st(0); + } + } + else + { + return tango.stdc.math.cosl(y) + tango.stdc.math.sinl(y)*1i; + } +} + +debug(UnitTest) { +unittest +{ + assert(expi(1.3e5L) == tango.stdc.math.cosl(1.3e5L) + tango.stdc.math.sinl(1.3e5L) * 1i); + assert(expi(0.0L) == 1L + 0.0Li); +} +} + +/********************************* + * Returns !=0 if e is a NaN. + */ + +int isNaN(real x) +{ + static if (real.mant_dig==double.mant_dig) { + // 64-bit real + ulong* p = cast(ulong *)&x; + return (*p & 0x7FF0_0000 == 0x7FF0_0000) && *p & 0x000F_FFFF; + } else { + // 80-bit real + ushort* pe = cast(ushort *)&x; + ulong* ps = cast(ulong *)&x; + + return (pe[4] & 0x7FFF) == 0x7FFF && + *ps & 0x7FFFFFFFFFFFFFFF; + } +} + + +debug(UnitTest) { +unittest +{ + assert(isNaN(float.nan)); + assert(isNaN(-double.nan)); + assert(isNaN(real.nan)); + + assert(!isNaN(53.6)); + assert(!isNaN(float.infinity)); +} +} + +/** + * Returns !=0 if x is normalized. + * + * (Need one for each format because subnormal + * floats might be converted to normal reals) + */ +int isNormal(float x) +{ + uint *p = cast(uint *)&x; + uint e; + + e = *p & 0x7F800000; + return e && e != 0x7F800000; +} + +/** ditto */ +int isNormal(double d) +{ + uint *p = cast(uint *)&d; + uint e; + + e = p[1] & 0x7FF00000; + return e && e != 0x7FF00000; +} + +/** ditto */ +int isNormal(real x) +{ + static if (real.mant_dig == double.mant_dig) { + return isNormal(cast(double)x); + } else { + ushort* pe = cast(ushort *)&x; + long* ps = cast(long *)&x; + + return (pe[4] & 0x7FFF) != 0x7FFF && *ps < 0; + } +} + +debug(UnitTest) { +unittest +{ + float f = 3; + double d = 500; + real e = 10e+48; + + assert(isNormal(f)); + assert(isNormal(d)); + assert(isNormal(e)); +} +} + +/********************************* + * Is the binary representation of x identical to y? + * + * Same as ==, except that positive and negative zero are not identical, + * and two $(NAN)s are identical if they have the same 'payload'. + */ + +bool isIdentical(real x, real y) +{ + long* pxs = cast(long *)&x; + long* pys = cast(long *)&y; + static if (real.mant_dig == double.mant_dig){ + return pxs[0] == pys[0]; + } else { + ushort* pxe = cast(ushort *)&x; + ushort* pye = cast(ushort *)&y; + return pxe[4] == pye[4] && pxs[0] == pys[0]; + } +} + +/** ditto */ +bool isIdentical(ireal x, ireal y) { + return isIdentical(x.im, y.im); +} + +/** ditto */ +bool isIdentical(creal x, creal y) { + return isIdentical(x.re, y.re) && isIdentical(x.im, y.im); +} + + +debug(UnitTest) { +unittest { + assert(isIdentical(0.0, 0.0)); + assert(!isIdentical(0.0, -0.0)); + assert(isIdentical(NaN(0xABC), NaN(0xABC))); + assert(!isIdentical(NaN(0xABC), NaN(218))); + assert(isIdentical(1.234e56, 1.234e56)); + assert(isNaN(NaN(0x12345))); + assert(isIdentical(3.1 + NaN(0xDEF) * 1i, 3.1 + NaN(0xDEF)*1i)); + assert(!isIdentical(3.1+0.0i, 3.1-0i)); + assert(!isIdentical(0.0i, 2.5e58i)); +} +} + +/********************************* + * Is number subnormal? (Also called "denormal".) + * Subnormals have a 0 exponent and a 0 most significant significand bit. + */ + +/* Need one for each format because subnormal floats might + * be converted to normal reals. + */ + +int isSubnormal(float f) +{ + uint *p = cast(uint *)&f; + + return (*p & 0x7F800000) == 0 && *p & 0x007FFFFF; +} + +debug(UnitTest) { +unittest +{ + float f = 3.0; + + for (f = 1.0; !isSubnormal(f); f /= 2) + assert(f != 0); +} +} + +/// ditto + +int isSubnormal(double d) +{ + uint *p = cast(uint *)&d; + + return (p[1] & 0x7FF00000) == 0 && (p[0] || p[1] & 0x000FFFFF); +} + +debug(UnitTest) { +unittest +{ + double f; + + for (f = 1; !isSubnormal(f); f /= 2) + assert(f != 0); +} +} + +/// ditto + +int isSubnormal(real e) +{ + static if (real.mant_dig == double.mant_dig) { + return isSubnormal(cast(double)e); + } else { + ushort* pe = cast(ushort *)&e; + long* ps = cast(long *)&e; + + return (pe[4] & 0x7FFF) == 0 && *ps > 0; + } +} + +debug(UnitTest) { +unittest +{ + real f; + + for (f = 1; !isSubnormal(f); f /= 2) + assert(f != 0); +} +} + +/********************************* + * Return !=0 if x is ±0. + */ +int isZero(real x) +{ + static if (real.mant_dig == double.mant_dig) { + return ((*cast(ulong *)&x) & 0x7FFF_FFFF_FFFF_FFFF) == 0; + } else { + ushort* pe = cast(ushort *)&x; + ulong* ps = cast(ulong *)&x; + return (pe[4] & 0x7FFF) == 0 && *ps == 0; + } +} + +debug(UnitTest) { +unittest +{ + assert(isZero(0.0)); + assert(isZero(-0.0)); + assert(!isZero(2.5)); + assert(!isZero(real.min / 1000)); +} +} + +/********************************* + * Return !=0 if e is ±∞. + */ + +int isInfinity(real e) +{ + static if (real.mant_dig == double.mant_dig) { + return ((*cast(ulong *)&x)&0x7FFF_FFFF_FFFF_FFFF) == 0x7FF8_0000_0000_0000; + } else { + ushort* pe = cast(ushort *)&e; + ulong* ps = cast(ulong *)&e; + + return (pe[4] & 0x7FFF) == 0x7FFF && + *ps == 0x8000_0000_0000_0000; + } +} + +debug(UnitTest) { +unittest +{ + assert(isInfinity(float.infinity)); + assert(!isInfinity(float.nan)); + assert(isInfinity(double.infinity)); + assert(isInfinity(-real.infinity)); + + assert(isInfinity(-1.0 / 0.0)); +} +} + +/** + * Calculate the next largest floating point value after x. + * + * Return the least number greater than x that is representable as a real; + * thus, it gives the next point on the IEEE number line. + * This function is included in the forthcoming IEEE 754R standard. + * + * $(TABLE_SV + * $(SVH x, nextup(x) ) + * $(SV -∞, -real.max ) + * $(SV ±0.0, real.min*real.epsilon ) + * $(SV real.max, real.infinity ) + * $(SV real.infinity, real.infinity ) + * $(SV $(NAN), $(NAN) ) + * ) + * + * nextDoubleUp and nextFloatUp are the corresponding functions for + * the IEEE double and IEEE float number lines. + */ +real nextUp(real x) +{ + static if (real.mant_dig == double.mant_dig) { + return nextDoubleUp(x); + } else { + // For 80-bit reals, the "implied bit" is a nuisance... + ushort *pe = cast(ushort *)&x; + ulong *ps = cast(ulong *)&x; + + if ((pe[4] & 0x7FFF) == 0x7FFF) { + // First, deal with NANs and infinity + if (x == -real.infinity) return -real.max; + return x; // +INF and NAN are unchanged. + } + if (pe[4] & 0x8000) { // Negative number -- need to decrease the significand + --*ps; + // Need to mask with 0x7FFF... so denormals are treated correctly. + if ((*ps & 0x7FFFFFFFFFFFFFFF) == 0x7FFFFFFFFFFFFFFF) { + if (pe[4] == 0x8000) { // it was negative zero + *ps = 1; pe[4] = 0; // smallest subnormal. + return x; + } + --pe[4]; + if (pe[4] == 0x8000) { + return x; // it's become a denormal, implied bit stays low. + } + *ps = 0xFFFFFFFFFFFFFFFF; // set the implied bit + return x; + } + return x; + } else { + // Positive number -- need to increase the significand. + // Works automatically for positive zero. + ++*ps; + if ((*ps & 0x7FFFFFFFFFFFFFFF) == 0) { + // change in exponent + ++pe[4]; + *ps = 0x8000000000000000; // set the high bit + } + } + return x; + } +} + +/** ditto */ +double nextDoubleUp(double x) +{ + ulong *ps = cast(ulong *)&x; + + if ((*ps & 0x7FF0_0000_0000_0000) == 0x7FF0_0000_0000_0000) { + // First, deal with NANs and infinity + if (x == -x.infinity) return -x.max; + return x; // +INF and NAN are unchanged. + } + if (*ps & 0x8000_0000_0000_0000) { // Negative number + if (*ps == 0x8000_0000_0000_0000) { // it was negative zero + *ps = 0x0000_0000_0000_0001; // change to smallest subnormal + return x; + } + --*ps; + } else { // Positive number + ++*ps; + } + return x; +} + +/** ditto */ +float nextFloatUp(float x) +{ + uint *ps = cast(uint *)&x; + + if ((*ps & 0x7F80_0000) == 0x7F80_0000) { + // First, deal with NANs and infinity + if (x == -x.infinity) return -x.max; + return x; // +INF and NAN are unchanged. + } + if (*ps & 0x8000_0000) { // Negative number + if (*ps == 0x8000_0000) { // it was negative zero + *ps = 0x0000_0001; // change to smallest subnormal + return x; + } + --*ps; + } else { // Positive number + ++*ps; + } + return x; +} + +debug(UnitTest) { +unittest { + static if (real.mant_dig == 64) { + + // Tests for 80-bit reals + + assert(isIdentical(nextUp(NaN(0xABC)), NaN(0xABC))); + // negative numbers + assert( nextUp(-real.infinity) == -real.max ); + assert( nextUp(-1-real.epsilon) == -1.0 ); + assert( nextUp(-2) == -2.0 + real.epsilon); + // denormals and zero + assert( nextUp(-real.min) == -real.min*(1-real.epsilon) ); + assert( nextUp(-real.min*(1-real.epsilon) == -real.min*(1-2*real.epsilon)) ); + assert( isIdentical(-0.0L, nextUp(-real.min*real.epsilon)) ); + assert( nextUp(-0.0) == real.min*real.epsilon ); + assert( nextUp(0.0) == real.min*real.epsilon ); + assert( nextUp(real.min*(1-real.epsilon)) == real.min ); + assert( nextUp(real.min) == real.min*(1+real.epsilon) ); + // positive numbers + assert( nextUp(1) == 1.0 + real.epsilon ); + assert( nextUp(2.0-real.epsilon) == 2.0 ); + assert( nextUp(real.max) == real.infinity ); + assert( nextUp(real.infinity)==real.infinity ); + } + + assert(isIdentical(nextDoubleUp(NaN(0xABC)), NaN(0xABC))); + // negative numbers + assert( nextDoubleUp(-double.infinity) == -double.max ); + assert( nextDoubleUp(-1-double.epsilon) == -1.0 ); + assert( nextDoubleUp(-2) == -2.0 + double.epsilon); + // denormals and zero + + assert( nextDoubleUp(-double.min) == -double.min*(1-double.epsilon) ); + assert( nextDoubleUp(-double.min*(1-double.epsilon) == -double.min*(1-2*double.epsilon)) ); + assert( isIdentical(-0.0, nextDoubleUp(-double.min*double.epsilon)) ); + assert( nextDoubleUp(0.0) == double.min*double.epsilon ); + assert( nextDoubleUp(-0.0) == double.min*double.epsilon ); + assert( nextDoubleUp(double.min*(1-double.epsilon)) == double.min ); + assert( nextDoubleUp(double.min) == double.min*(1+double.epsilon) ); + // positive numbers + assert( nextDoubleUp(1) == 1.0 + double.epsilon ); + assert( nextDoubleUp(2.0-double.epsilon) == 2.0 ); + assert( nextDoubleUp(double.max) == double.infinity ); + + assert(isIdentical(nextFloatUp(NaN(0xABC)), NaN(0xABC))); + assert( nextFloatUp(-float.min) == -float.min*(1-float.epsilon) ); + assert( nextFloatUp(1.0) == 1.0+float.epsilon ); + assert( nextFloatUp(-0.0) == float.min*float.epsilon); + assert( nextFloatUp(float.infinity)==float.infinity ); + + assert(nextDown(1.0+real.epsilon)==1.0); + assert(nextDoubleDown(1.0+double.epsilon)==1.0); + assert(nextFloatDown(1.0+float.epsilon)==1.0); + assert(nextafter(1.0+real.epsilon, -real.infinity)==1.0); +} +} + +package { +/** Reduces the magnitude of x, so the bits in the lower half of its significand + * are all zero. Returns the amount which needs to be added to x to restore its + * initial value; this amount will also have zeros in all bits in the lower half + * of its significand. + */ +X splitSignificand(X)(inout X x) +{ + if (fabs(x) !< X.infinity) return 0; // don't change NaN or infinity + X y = x; // copy the original value + static if (X.mant_dig == float.mant_dig) { + uint *ps = cast(uint *)&x; + (*ps) &= 0xFFFF_FC00; + } else static if (X.mant_dig == double.mant_dig) { + ulong *ps = cast(ulong *)&x; + (*ps) &= 0xFFFF_FFFF_FC00_0000; + } else static if (X.mant_dig == 64){ // 80-bit real + // An x87 real80 has 63 bits, because the 'implied' bit is stored explicitly. + // This is annoying, because it means the significand cannot be + // precisely halved. Instead, we split it into 31+32 bits. + ulong *ps = cast(ulong *)&x; + (*ps) &= 0xFFFF_FFFF_0000_0000; + } //else static assert(0, "Unsupported size"); + + return y - x; +} + + +//import tango.stdc.stdio; +unittest { + double x = -0x1.234_567A_AAAA_AAp+250; + double y = splitSignificand(x); + assert(x == -0x1.234_5678p+250); + assert(y == -0x0.000_000A_AAAA_A8p+248); + assert(x + y == -0x1.234_567A_AAAA_AAp+250); +} +} + +/** + * Calculate the next smallest floating point value after x. + * + * Return the greatest number less than x that is representable as a real; + * thus, it gives the previous point on the IEEE number line. + * Note: This function is included in the forthcoming IEEE 754R standard. + * + * Special values: + * real.infinity real.max + * real.min*real.epsilon 0.0 + * 0.0 -real.min*real.epsilon + * -0.0 -real.min*real.epsilon + * -real.max -real.infinity + * -real.infinity -real.infinity + * NAN NAN + * + * nextDoubleDown and nextFloatDown are the corresponding functions for + * the IEEE double and IEEE float number lines. + */ +real nextDown(real x) +{ + return -nextUp(-x); +} + +/** ditto */ +double nextDoubleDown(double x) +{ + return -nextDoubleUp(-x); +} + +/** ditto */ +float nextFloatDown(float x) +{ + return -nextFloatUp(-x); +} + +debug(UnitTest) { +unittest { + assert( nextDown(1.0 + real.epsilon) == 1.0); +} +} + + +/** + * Calculates the next representable value after x in the direction of y. + * + * If y > x, the result will be the next largest floating-point value; + * if y < x, the result will be the next smallest value. + * If x == y, the result is y. + * + * Remarks: + * This function is not generally very useful; it's almost always better to use + * the faster functions nextup() or nextdown() instead. + * + * IEEE 754 requirements not implemented: + * The FE_INEXACT and FE_OVERFLOW exceptions will be raised if x is finite and + * the function result is infinite. The FE_INEXACT and FE_UNDERFLOW + * exceptions will be raised if the function value is subnormal, and x is + * not equal to y. + */ +real nextafter(real x, real y) +{ + if (x==y) return y; + return (y>x) ? nextUp(x) : nextDown(x); +} + +/************************************** + * To what precision is x equal to y? + * + * Returns: the number of significand bits which are equal in x and y. + * eg, 0x1.F8p+60 and 0x1.F1p+60 are equal to 5 bits of precision. + * + * $(TABLE_SV + * $(SVH3 x, y, feqrel(x, y) ) + * $(SV3 x, x, real.mant_dig ) + * $(SV3 x, >= 2*x, 0 ) + * $(SV3 x, <= x/2, 0 ) + * $(SV3 $(NAN), any, 0 ) + * $(SV3 any, $(NAN), 0 ) + * ) + * + * Remarks: + * This is a very fast operation, suitable for use in speed-critical code. + * + */ + +int feqrel(real x, real y) +{ + /* Public Domain. Author: Don Clugston, 18 Aug 2005. + */ + + if (x == y) return real.mant_dig; // ensure diff!=0, cope with INF. + + real diff = fabs(x - y); + + ushort *pa = cast(ushort *)(&x); + ushort *pb = cast(ushort *)(&y); + ushort *pd = cast(ushort *)(&diff); + + // The difference in abs(exponent) between x or y and abs(x-y) + // is equal to the number of significand bits of x which are + // equal to y. If negative, x and y have different exponents. + // If positive, x and y are equal to 'bitsdiff' bits. + // AND with 0x7FFF to form the absolute value. + // To avoid out-by-1 errors, we subtract 1 so it rounds down + // if the exponents were different. This means 'bitsdiff' is + // always 1 lower than we want, except that if bitsdiff==0, + // they could have 0 or 1 bits in common. + + static if (real.mant_dig==64) + { + + int bitsdiff = ( ((pa[4]&0x7FFF) + (pb[4]&0x7FFF)-1)>>1) - pd[4]; + + if (pd[4] == 0) + { // Difference is denormal + // For denormals, we need to add the number of zeros that + // lie at the start of diff's significand. + // We do this by multiplying by 2^real.mant_dig + diff *= 0x1p+63; + return bitsdiff + real.mant_dig - pd[4]; + } + + if (bitsdiff > 0) + return bitsdiff + 1; // add the 1 we subtracted before + + // Avoid out-by-1 errors when factor is almost 2. + return (bitsdiff == 0) ? (pa[4] == pb[4]) : 0; + } else { + // 64-bit reals + version(LittleEndian) + const int EXPONENTPOS = 3; + else const int EXPONENTPOS = 0; + + int bitsdiff = ( ((pa[EXPONENTPOS]&0x7FF0) + (pb[EXPONENTPOS]&0x7FF0)-0x10)>>5) - (pd[EXPONENTPOS]&0x7FF0>>4); + + if (pd[EXPONENTPOS] == 0) + { // Difference is denormal + // For denormals, we need to add the number of zeros that + // lie at the start of diff's significand. + // We do this by multiplying by 2^real.mant_dig + diff *= 0x1p+53; + return bitsdiff + real.mant_dig - pd[EXPONENTPOS]; + } + + if (bitsdiff > 0) + return bitsdiff + 1; // add the 1 we subtracted before + + // Avoid out-by-1 errors when factor is almost 2. + if (bitsdiff == 0 && (pa[EXPONENTPOS] ^ pb[EXPONENTPOS])&0x7FF0) return 1; + else return 0; + + } + +} + +debug(UnitTest) { +unittest +{ + // Exact equality + assert(feqrel(real.max,real.max)==real.mant_dig); + assert(feqrel(0,0)==real.mant_dig); + assert(feqrel(7.1824,7.1824)==real.mant_dig); + assert(feqrel(real.infinity,real.infinity)==real.mant_dig); + + // a few bits away from exact equality + real w=1; + for (int i=1; i<real.mant_dig-1; ++i) { + assert(feqrel(1+w*real.epsilon,1)==real.mant_dig-i); + assert(feqrel(1-w*real.epsilon,1)==real.mant_dig-i); + assert(feqrel(1,1+(w-1)*real.epsilon)==real.mant_dig-i+1); + w*=2; + } + assert(feqrel(1.5+real.epsilon,1.5)==real.mant_dig-1); + assert(feqrel(1.5-real.epsilon,1.5)==real.mant_dig-1); + assert(feqrel(1.5-real.epsilon,1.5+real.epsilon)==real.mant_dig-2); + + assert(feqrel(real.min/8,real.min/17)==3);; + + // Numbers that are close + assert(feqrel(0x1.Bp+84, 0x1.B8p+84)==5); + assert(feqrel(0x1.8p+10, 0x1.Cp+10)==2); + assert(feqrel(1.5*(1-real.epsilon), 1)==2); + assert(feqrel(1.5, 1)==1); + assert(feqrel(2*(1-real.epsilon), 1)==1); + + // Factors of 2 + assert(feqrel(real.max,real.infinity)==0); + assert(feqrel(2*(1-real.epsilon), 1)==1); + assert(feqrel(1, 2)==0); + assert(feqrel(4, 1)==0); + + // Extreme inequality + assert(feqrel(real.nan,real.nan)==0); + assert(feqrel(0,-real.nan)==0); + assert(feqrel(real.nan,real.infinity)==0); + assert(feqrel(real.infinity,-real.infinity)==0); + assert(feqrel(-real.max,real.infinity)==0); + assert(feqrel(real.max,-real.max)==0); +} +} + +/********************************* + * Return 1 if sign bit of e is set, 0 if not. + */ + +int signbit(real x) +{ + static if (real.mant_dig == double.mant_dig) { + return ((*cast(ulong *)&x) & 0x8000_0000_0000_0000) != 0; + } else { + ubyte* pe = cast(ubyte *)&x; + return (pe[9] & 0x80) != 0; + } +} + +debug(UnitTest) { +unittest +{ + assert(!signbit(float.nan)); + assert(signbit(-float.nan)); + assert(!signbit(168.1234)); + assert(signbit(-168.1234)); + assert(!signbit(0.0)); + assert(signbit(-0.0)); +} +} + + +/********************************* + * Return a value composed of to with from's sign bit. + */ + +real copysign(real to, real from) +{ + static if (real.mant_dig == double.mant_dig) { + ulong* pto = cast(ulong *)&to; + ulong* pfrom = cast(ulong *)&from; + *pto &= 0x7FFF_FFFF_FFFF_FFFF; + *pto |= (*pfrom) & 0x8000_0000_0000_0000; + return to; + } else { + ubyte* pto = cast(ubyte *)&to; + ubyte* pfrom = cast(ubyte *)&from; + + pto[9] &= 0x7F; + pto[9] |= pfrom[9] & 0x80; + + return to; + } +} + +debug(UnitTest) { +unittest +{ + real e; + + e = copysign(21, 23.8); + assert(e == 21); + + e = copysign(-21, 23.8); + assert(e == 21); + + e = copysign(21, -23.8); + assert(e == -21); + + e = copysign(-21, -23.8); + assert(e == -21); + + e = copysign(real.nan, -23.8); + assert(isNaN(e) && signbit(e)); +} +} + +/** Return the value that lies halfway between x and y on the IEEE number line. + * + * Formally, the result is the arithmetic mean of the binary significands of x + * and y, multiplied by the geometric mean of the binary exponents of x and y. + * x and y must have the same sign, and must not be NaN. + * Note: this function is useful for ensuring O(log n) behaviour in algorithms + * involving a 'binary chop'. + * + * Special cases: + * If x and y are within a factor of 2, (ie, feqrel(x, y) > 0), the return value + * is the arithmetic mean (x + y) / 2. + * If x and y are even powers of 2, the return value is the geometric mean, + * ieeeMean(x, y) = sqrt(x * y). + * + */ +T ieeeMean(T)(T x, T y) +in { + // both x and y must have the same sign, and must not be NaN. + assert(signbit(x) == signbit(y) && x<>=0 && y<>=0); +} +body { + // Runtime behaviour for contract violation: + // If signs are opposite, or one is a NaN, return 0. + if (!((x>=0 && y>=0) || (x<=0 && y<=0))) return 0.0; + + // The implementation is simple: cast x and y to integers, + // average them (avoiding overflow), and cast the result back to a floating-point number. + + T u; + static if (T.mant_dig==64) { // x87, 80-bit reals + // There's slight additional complexity because they are actually + // 79-bit reals... + ushort *ue = cast(ushort *)&u; + ulong *ul = cast(ulong *)&u; + ushort *xe = cast(ushort *)&x; + ulong *xl = cast(ulong *)&x; + ushort *ye = cast(ushort *)&y; + ulong *yl = cast(ulong *)&y; + // Ignore the useless implicit bit. + ulong m = ((*xl) & 0x7FFF_FFFF_FFFF_FFFF) + ((*yl) & 0x7FFF_FFFF_FFFF_FFFF); + + ushort e = cast(ushort)((xe[4] & 0x7FFF) + (ye[4] & 0x7FFF)); + if (m & 0x8000_0000_0000_0000) { + ++e; + m &= 0x7FFF_FFFF_FFFF_FFFF; + } + // Now do a multi-byte right shift + uint c = e & 1; // carry + e >>= 1; + m >>>= 1; + if (c) m |= 0x4000_0000_0000_0000; // shift carry into significand + if (e) *ul = m | 0x8000_0000_0000_0000; // set implicit bit... + else *ul = m; // ... unless exponent is 0 (denormal or zero). + // Prevent a ridiculous warning (why does (ushort | ushort) get promoted to int???) + ue[4]= cast(ushort)( e | (xe[4]& 0x8000)); // restore sign bit + } else static if (T.mant_dig == double.mant_dig) { + ulong *ul = cast(ulong *)&u; + ulong *xl = cast(ulong *)&x; + ulong *yl = cast(ulong *)&y; + ulong m = (((*xl) & 0x7FFF_FFFF_FFFF_FFFF) + ((*yl) & 0x7FFF_FFFF_FFFF_FFFF)) >>> 1; + m |= ((*xl) & 0x8000_0000_0000_0000); + *ul = m; + }else static if (T.mant_dig == float.mant_dig) { + uint *ul = cast(uint *)&u; + uint *xl = cast(uint *)&x; + uint *yl = cast(uint *)&y; + uint m = (((*xl) & 0x7FFF_FFFF) + ((*yl) & 0x7FFF_FFFF)) >>> 1; + m |= ((*xl) & 0x8000_0000); + *ul = m; + } + return u; +} + +debug(UnitTest) { +unittest { + assert(ieeeMean(-0.0,-1e-20)<0); + assert(ieeeMean(0.0,1e-20)>0); + + assert(ieeeMean(1.0L,4.0L)==2L); + assert(ieeeMean(2.0*1.013,8.0*1.013)==4*1.013); + assert(ieeeMean(-1.0L,-4.0L)==-2L); + assert(ieeeMean(-1.0,-4.0)==-2); + assert(ieeeMean(-1.0f,-4.0f)==-2f); + assert(ieeeMean(-1.0,-2.0)==-1.5); + assert(ieeeMean(-1*(1+8*real.epsilon),-2*(1+8*real.epsilon))==-1.5*(1+5*real.epsilon)); + assert(ieeeMean(0x1p60,0x1p-10)==0x1p25); + static if (real.mant_dig==64) { // x87, 80-bit reals + assert(ieeeMean(1.0L,real.infinity)==0x1p8192L); + assert(ieeeMean(0.0L,real.infinity)==1.5); + } + assert(ieeeMean(0.5*real.min*(1-4*real.epsilon),0.5*real.min)==0.5*real.min*(1-2*real.epsilon)); +} +} + +// Functions for NaN payloads +/* + * A 'payload' can be stored in the significand of a $(NAN). One bit is required + * to distinguish between a quiet and a signalling $(NAN). This leaves 22 bits + * of payload for a float; 51 bits for a double; 62 bits for an 80-bit real; + * and 111 bits for a 128-bit quad. +*/ +/** + * Create a $(NAN), storing an integer inside the payload. + * + * For 80-bit or 128-bit reals, the largest possible payload is 0x3FFF_FFFF_FFFF_FFFF. + * For doubles, it is 0x3_FFFF_FFFF_FFFF. + * For floats, it is 0x3F_FFFF. + */ +real NaN(ulong payload) +{ + static if (real.mant_dig == double.mant_dig) { + ulong v = 2; // no implied bit. quiet bit = 1 + } else { + ulong v = 3; // implied bit = 1, quiet bit = 1 + } + + ulong a = payload; + + // 22 Float bits + ulong w = a & 0x3F_FFFF; + a -= w; + + v <<=22; + v |= w; + a >>=22; + + // 29 Double bits + v <<=29; + w = a & 0xFFF_FFFF; + v |= w; + a -= w; + a >>=29; + + static if (real.mant_dig == double.mant_dig) { + v |=0x7FF0_0000_0000_0000; + real x; + * cast(ulong *)(&x) = v; + return x; + } else { + // Extended real bits + v <<=11; + a &= 0x7FF; + v |= a; + + real x = real.nan; + * cast(ulong *)(&x) = v; + return x; + } +} + +/** + * Extract an integral payload from a $(NAN). + * + * Returns: + * the integer payload as a ulong. + * + * For 80-bit or 128-bit reals, the largest possible payload is 0x3FFF_FFFF_FFFF_FFFF. + * For doubles, it is 0x3_FFFF_FFFF_FFFF. + * For floats, it is 0x3F_FFFF. + */ +ulong getNaNPayload(real x) +{ + assert(isNaN(x)); + ulong m = *cast(ulong *)(&x); + static if (real.mant_dig == double.mant_dig) { + // Make it look like an 80-bit significand. + // Skip exponent, and quiet bit + m &= 0x0007_FFFF_FFFF_FFFF; + m <<= 10; + } + // ignore implicit bit and quiet bit + ulong f = m & 0x3FFF_FF00_0000_0000L; + ulong w = f >>> 40; + w |= (m & 0x00FF_FFFF_F800L) << (22 - 11); + w |= (m & 0x7FF) << 51; + return w; +} + +debug(UnitTest) { +unittest { + real nan4 = NaN(0x789_ABCD_EF12_3456); + static if (real.mant_dig == 64) { + assert (getNaNPayload(nan4) == 0x789_ABCD_EF12_3456); + } else { + assert (getNaNPayload(nan4) == 0x1_ABCD_EF12_3456); + } + double nan5 = nan4; + assert (getNaNPayload(nan5) == 0x1_ABCD_EF12_3456); + float nan6 = nan4; + assert (getNaNPayload(nan6) == 0x12_3456); + nan4 = NaN(0xFABCD); + assert (getNaNPayload(nan4) == 0xFABCD); + nan6 = nan4; + assert (getNaNPayload(nan6) == 0xFABCD); + nan5 = NaN(0x100_0000_0000_3456); + assert(getNaNPayload(nan5) == 0x0000_0000_3456); +} +} +