Mercurial > projects > ldc
comparison tango/tango/math/IEEE.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!!
Initial commit after moving to Tango instead of Phobos.
Lots of bugfixes...
This build is not suitable for most things.
author | lindquist |
---|---|
date | Fri, 11 Jan 2008 17:57:40 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
131:5825d48b27d1 | 132:1700239cab2e |
---|---|
1 /** | |
2 * Low-level Mathematical Functions which take advantage of the IEEE754 ABI. | |
3 * | |
4 * Copyright: Portions Copyright (C) 2001-2005 Digital Mars. | |
5 * License: BSD style: $(LICENSE), Digital Mars. | |
6 * Authors: Don Clugston, Walter Bright, Sean Kelly | |
7 */ | |
8 /* Portions of this code were taken from Phobos std.math, which has the following | |
9 * copyright notice: | |
10 * | |
11 * Author: | |
12 * Walter Bright | |
13 * Copyright: | |
14 * Copyright (c) 2001-2005 by Digital Mars, | |
15 * All Rights Reserved, | |
16 * www.digitalmars.com | |
17 * License: | |
18 * This software is provided 'as-is', without any express or implied | |
19 * warranty. In no event will the authors be held liable for any damages | |
20 * arising from the use of this software. | |
21 * | |
22 * Permission is granted to anyone to use this software for any purpose, | |
23 * including commercial applications, and to alter it and redistribute it | |
24 * freely, subject to the following restrictions: | |
25 * | |
26 * <ul> | |
27 * <li> The origin of this software must not be misrepresented; you must not | |
28 * claim that you wrote the original software. If you use this software | |
29 * in a product, an acknowledgment in the product documentation would be | |
30 * appreciated but is not required. | |
31 * </li> | |
32 * <li> Altered source versions must be plainly marked as such, and must not | |
33 * be misrepresented as being the original software. | |
34 * </li> | |
35 * <li> This notice may not be removed or altered from any source | |
36 * distribution. | |
37 * </li> | |
38 * </ul> | |
39 */ | |
40 /** | |
41 * Macros: | |
42 * | |
43 * TABLE_SV = <table border=1 cellpadding=4 cellspacing=0> | |
44 * <caption>Special Values</caption> | |
45 * $0</table> | |
46 * SVH = $(TR $(TH $1) $(TH $2)) | |
47 * SV = $(TR $(TD $1) $(TD $2)) | |
48 * SVH3 = $(TR $(TH $1) $(TH $2) $(TH $3)) | |
49 * SV3 = $(TR $(TD $1) $(TD $2) $(TD $3)) | |
50 * NAN = $(RED NAN) | |
51 */ | |
52 module tango.math.IEEE; | |
53 | |
54 version(DigitalMars) | |
55 { | |
56 version(D_InlineAsm_X86) | |
57 { | |
58 version = DigitalMars_D_InlineAsm_X86; | |
59 } | |
60 } | |
61 | |
62 version (X86){ | |
63 version = X86_Any; | |
64 } | |
65 | |
66 version (X86_64){ | |
67 version = X86_Any; | |
68 } | |
69 | |
70 version (DigitalMars_D_InlineAsm_X86) { | |
71 // Don't include this extra dependency unless we need to. | |
72 debug(UnitTest) { | |
73 static import tango.stdc.math; | |
74 } | |
75 } else { | |
76 // Needed for cos(), sin(), tan() on GNU. | |
77 static import tango.stdc.math; | |
78 } | |
79 | |
80 // Standard Tango NaN payloads. | |
81 // NOTE: These values may change in future Tango releases | |
82 // The lowest three bits indicate the cause of the NaN: | |
83 // 0 = error other than those listed below: | |
84 // 1 = domain error | |
85 // 2 = singularity | |
86 // 3 = range | |
87 // 4-7 = reserved. | |
88 enum TANGO_NAN { | |
89 // General errors | |
90 DOMAIN_ERROR = 0x0101, | |
91 SINGULARITY = 0x0102, | |
92 RANGE_ERROR = 0x0103, | |
93 // NaNs created by functions in the basic library | |
94 TAN_DOMAIN = 0x1001, | |
95 POW_DOMAIN = 0x1021, | |
96 GAMMA_DOMAIN = 0x1101, | |
97 GAMMA_POLE = 0x1102, | |
98 SGNGAMMA = 0x1112, | |
99 BETA_DOMAIN = 0x1131, | |
100 // NaNs from statistical functions | |
101 NORMALDISTRIBUTION_INV_DOMAIN = 0x2001, | |
102 STUDENTSDDISTRIBUTION_DOMAIN = 0x2011 | |
103 } | |
104 | |
105 /* Most of the functions depend on the format of the largest IEEE floating-point type. | |
106 * These code will differ depending on whether 'real' is 64, 80, or 128 bits, | |
107 * and whether it is a big-endian or little-endian architecture. | |
108 * Only three 'real' ABIs are currently supported: | |
109 * 64 bit Big-endian (eg PowerPC) | |
110 * 64 bit Little-endian | |
111 * 80 bit Little-endian, with implied bit (eg x87, Itanium). | |
112 * There is also an unsupported ABI which does not follow IEEE; several of its functions | |
113 * will generate run-time errors if used. | |
114 * 128 bit Big-endian (double-double, as used by GDC <= 0.23) | |
115 */ | |
116 | |
117 version(LittleEndian) { | |
118 static assert(real.mant_dig == 53 || real.mant_dig==64, | |
119 "Only 64-bit and 80-bit reals are supported for LittleEndian CPUs"); | |
120 } else { | |
121 static assert(real.mant_dig == 53 || real.mant_dig==106, | |
122 "Only 64-bit reals are supported for BigEndian CPUs. 106-bit reals have partial support"); | |
123 } | |
124 | |
125 /** IEEE exception status flags | |
126 | |
127 These flags indicate that an exceptional floating-point condition has occured. | |
128 They indicate that a NaN or an infinity has been generated, that a result | |
129 is inexact, or that a signalling NaN has been encountered. | |
130 The return values of the properties should be treated as booleans, although | |
131 each is returned as an int, for speed. | |
132 | |
133 Example: | |
134 ---- | |
135 real a=3.5; | |
136 // Set all the flags to zero | |
137 resetIeeeFlags(); | |
138 assert(!ieeeFlags.divByZero); | |
139 // Perform a division by zero. | |
140 a/=0.0L; | |
141 assert(a==real.infinity); | |
142 assert(ieeeFlags.divByZero); | |
143 // Create a NaN | |
144 a*=0.0L; | |
145 assert(ieeeFlags.invalid); | |
146 assert(isNaN(a)); | |
147 | |
148 // Check that calling func() has no effect on the | |
149 // status flags. | |
150 IeeeFlags f = ieeeFlags; | |
151 func(); | |
152 assert(ieeeFlags == f); | |
153 | |
154 ---- | |
155 */ | |
156 struct IeeeFlags | |
157 { | |
158 private: | |
159 // The x87 FPU status register is 16 bits. | |
160 // The Pentium SSE2 status register is 32 bits. | |
161 int m_flags; | |
162 version (X86_Any) { | |
163 // Applies to both x87 status word (16 bits) and SSE2 status word(32 bits). | |
164 enum : int { | |
165 INEXACT_MASK = 0x20, | |
166 UNDERFLOW_MASK = 0x10, | |
167 OVERFLOW_MASK = 0x08, | |
168 DIVBYZERO_MASK = 0x04, | |
169 INVALID_MASK = 0x01 | |
170 } | |
171 // Don't bother about denormals, they are not supported on all CPUs. | |
172 //const int DENORMAL_MASK = 0x02; | |
173 } else version (PPC) { | |
174 // PowerPC FPSCR is a 32-bit register. | |
175 enum : int { | |
176 INEXACT_MASK = 0x600, | |
177 UNDERFLOW_MASK = 0x010, | |
178 OVERFLOW_MASK = 0x008, | |
179 DIVBYZERO_MASK = 0x020, | |
180 INVALID_MASK = 0xF80 | |
181 } | |
182 } | |
183 private: | |
184 static IeeeFlags getIeeeFlags() | |
185 { | |
186 // This is a highly time-critical operation, and | |
187 // should really be an intrinsic. In this case, we | |
188 // take advantage of the fact that for DMD | |
189 // a struct containing only a int is returned in EAX. | |
190 version(D_InlineAsm_X86) { | |
191 asm { | |
192 fstsw AX; | |
193 // NOTE: If compiler supports SSE2, need to OR the result with | |
194 // the SSE2 status register. | |
195 // Clear all irrelevant bits | |
196 and EAX, 0x03D; | |
197 } | |
198 } else { | |
199 assert(0, "Not yet supported"); | |
200 } | |
201 } | |
202 static void resetIeeeFlags() | |
203 { | |
204 version(D_InlineAsm_X86) { | |
205 asm { | |
206 fnclex; | |
207 } | |
208 } else { | |
209 assert(0, "Not yet supported"); | |
210 } | |
211 } | |
212 public: | |
213 /// The result cannot be represented exactly, so rounding occured. | |
214 /// (example: x = sin(0.1); } | |
215 int inexact() { return m_flags & INEXACT_MASK; } | |
216 /// A zero was generated by underflow (example: x = real.min*real.epsilon/2;) | |
217 int underflow() { return m_flags & UNDERFLOW_MASK; } | |
218 /// An infinity was generated by overflow (example: x = real.max*2;) | |
219 int overflow() { return m_flags & OVERFLOW_MASK; } | |
220 /// An infinity was generated by division by zero (example: x = 3/0.0; ) | |
221 int divByZero() { return m_flags & DIVBYZERO_MASK; } | |
222 /// A machine NaN was generated. (example: x = real.infinity * 0.0; ) | |
223 int invalid() { return m_flags & INVALID_MASK; } | |
224 } | |
225 | |
226 /// Return a snapshot of the current state of the floating-point status flags. | |
227 IeeeFlags ieeeFlags() { return IeeeFlags.getIeeeFlags(); } | |
228 | |
229 /// Set all of the floating-point status flags to false. | |
230 void resetIeeeFlags() { IeeeFlags.resetIeeeFlags; } | |
231 | |
232 /** IEEE rounding modes. | |
233 * The default mode is ROUNDTONEAREST. | |
234 */ | |
235 enum RoundingMode : short { | |
236 ROUNDTONEAREST = 0x0000, | |
237 ROUNDDOWN = 0x0400, | |
238 ROUNDUP = 0x0800, | |
239 ROUNDTOZERO = 0x0C00 | |
240 }; | |
241 | |
242 /** Change the rounding mode used for all floating-point operations. | |
243 * | |
244 * Returns the old rounding mode. | |
245 * | |
246 * When changing the rounding mode, it is almost always necessary to restore it | |
247 * at the end of the function. Typical usage: | |
248 --- | |
249 auto oldrounding = setIeeeRounding(RoundingMode.ROUNDDOWN); | |
250 scope (exit) setIeeeRounding(oldrounding); | |
251 --- | |
252 */ | |
253 RoundingMode setIeeeRounding(RoundingMode roundingmode) { | |
254 version(D_InlineAsm_X86) { | |
255 // TODO: For SSE/SSE2, do we also need to set the SSE rounding mode? | |
256 short cont; | |
257 asm { | |
258 fstcw cont; | |
259 mov CX, cont; | |
260 mov AX, cont; | |
261 and EAX, 0x0C00; // Form the return value | |
262 and CX, 0xF3FF; | |
263 or CX, roundingmode; | |
264 mov cont, CX; | |
265 fldcw cont; | |
266 } | |
267 } else { | |
268 assert(0, "Not yet supported"); | |
269 } | |
270 } | |
271 | |
272 /** Get the IEEE rounding mode which is in use. | |
273 * | |
274 */ | |
275 RoundingMode getIeeeRounding() { | |
276 version(D_InlineAsm_X86) { | |
277 // TODO: For SSE/SSE2, do we also need to check the SSE rounding mode? | |
278 short cont; | |
279 asm { | |
280 mov EAX, 0x0C00; | |
281 fstcw cont; | |
282 and AX, cont; | |
283 } | |
284 } else { | |
285 assert(0, "Not yet supported"); | |
286 } | |
287 } | |
288 | |
289 debug(UnitTest) { | |
290 version(D_InlineAsm_X86) { // Won't work for anything else yet | |
291 unittest { | |
292 real a = 3.5; | |
293 resetIeeeFlags(); | |
294 assert(!ieeeFlags.divByZero); | |
295 a /= 0.0L; | |
296 assert(ieeeFlags.divByZero); | |
297 assert(a == real.infinity); | |
298 a *= 0.0L; | |
299 assert(ieeeFlags.invalid); | |
300 assert(isNaN(a)); | |
301 a = real.max; | |
302 a *= 2; | |
303 assert(ieeeFlags.overflow); | |
304 a = real.min * real.epsilon; | |
305 a /= 99; | |
306 assert(ieeeFlags.underflow); | |
307 assert(ieeeFlags.inexact); | |
308 | |
309 int r = getIeeeRounding; | |
310 assert(r == RoundingMode.ROUNDTONEAREST); | |
311 } | |
312 } | |
313 } | |
314 | |
315 // Note: Itanium supports more precision options than this. SSE/SSE2 does not support any. | |
316 enum PrecisionControl : short { | |
317 PRECISION80 = 0x300, | |
318 PRECISION64 = 0x200, | |
319 PRECISION32 = 0x000 | |
320 }; | |
321 | |
322 /** Set the number of bits of precision used by 'real'. | |
323 * | |
324 * Returns: the old precision. | |
325 * This is not supported on all platforms. | |
326 */ | |
327 PrecisionControl reduceRealPrecision(PrecisionControl prec) { | |
328 version(D_InlineAsm_X86) { | |
329 short cont; | |
330 asm { | |
331 fstcw cont; | |
332 mov CX, cont; | |
333 mov AX, cont; | |
334 and EAX, 0x0300; // Form the return value | |
335 and CX, 0xFCFF; | |
336 or CX, prec; | |
337 mov cont, CX; | |
338 fldcw cont; | |
339 } | |
340 } else { | |
341 assert(0, "Not yet supported"); | |
342 } | |
343 } | |
344 | |
345 /** | |
346 * Separate floating point value into significand and exponent. | |
347 * | |
348 * Returns: | |
349 * Calculate and return <i>x</i> and exp such that | |
350 * value =<i>x</i>*2$(SUP exp) and | |
351 * .5 <= |<i>x</i>| < 1.0<br> | |
352 * <i>x</i> has same sign as value. | |
353 * | |
354 * $(TABLE_SV | |
355 * <tr> <th> value <th> returns <th> exp | |
356 * <tr> <td> ±0.0 <td> ±0.0 <td> 0 | |
357 * <tr> <td> +∞ <td> +∞ <td> int.max | |
358 * <tr> <td> -∞ <td> -∞ <td> int.min | |
359 * <tr> <td> ±$(NAN) <td> ±$(NAN) <td> int.min | |
360 * ) | |
361 */ | |
362 real frexp(real value, out int exp) | |
363 { | |
364 ushort* vu = cast(ushort*)&value; | |
365 long* vl = cast(long*)&value; | |
366 uint ex; | |
367 | |
368 static if (real.mant_dig==64) const ushort EXPMASK = 0x7FFF; | |
369 else const ushort EXPMASK = 0x7FF0; | |
370 | |
371 version(LittleEndian) { | |
372 static if (real.mant_dig==64) const int EXPONENTPOS = 4; | |
373 else const int EXPONENTPOS = 3; | |
374 } else { // BigEndian | |
375 const int EXPONENTPOS = 0; | |
376 } | |
377 | |
378 ex = vu[EXPONENTPOS] & EXPMASK; | |
379 static if (real.mant_dig == 64) { | |
380 // 80-bit reals | |
381 if (ex) { // If exponent is non-zero | |
382 if (ex == EXPMASK) { // infinity or NaN | |
383 // 80-bit reals | |
384 if (*vl & 0x7FFFFFFFFFFFFFFF) { // NaN | |
385 *vl |= 0xC000000000000000; // convert $(NAN)S to $(NAN)Q | |
386 exp = int.min; | |
387 } else if (vu[EXPONENTPOS] & 0x8000) { // negative infinity | |
388 exp = int.min; | |
389 } else { // positive infinity | |
390 exp = int.max; | |
391 } | |
392 } else { | |
393 exp = ex - 0x3FFE; | |
394 vu[EXPONENTPOS] = cast(ushort)((0x8000 & vu[EXPONENTPOS]) | 0x3FFE); | |
395 } | |
396 } else if (!*vl) { | |
397 // value is +-0.0 | |
398 exp = 0; | |
399 } else { | |
400 // denormal | |
401 int i = -0x3FFD; | |
402 do { | |
403 i--; | |
404 *vl <<= 1; | |
405 } while (*vl > 0); | |
406 exp = i; | |
407 vu[EXPONENTPOS] = cast(ushort)((0x8000 & vu[EXPONENTPOS]) | 0x3FFE); | |
408 } | |
409 } else static if(real.mant_dig==106) { | |
410 // 128-bit reals | |
411 assert(0, "Unsupported"); | |
412 } else { | |
413 // 64-bit reals | |
414 if (ex) { // If exponent is non-zero | |
415 if (ex == EXPMASK) { // infinity or NaN | |
416 if (*vl==0x7FF0_0000_0000_0000) { // positive infinity | |
417 exp = int.max; | |
418 } else if (*vl==0xFFF0_0000_0000_0000) { // negative infinity | |
419 exp = int.min; | |
420 } else { // NaN | |
421 *vl |= 0x0008_0000_0000_0000; // convert $(NAN)S to $(NAN)Q | |
422 exp = int.min; | |
423 } | |
424 } else { | |
425 exp = (ex - 0x3FE0) >>> 4; | |
426 ve[EXPONENTPOS] = (0x8000 & ve[EXPONENTPOS]) | 0x3FE0; | |
427 } | |
428 } else if (!(*vl & 0x7FFF_FFFF_FFFF_FFFF)) { | |
429 // value is +-0.0 | |
430 exp = 0; | |
431 } else { | |
432 // denormal | |
433 ushort sgn; | |
434 sgn = (0x8000 & ve[EXPONENTPOS])| 0x3FE0; | |
435 *vl &= 0x7FFF_FFFF_FFFF_FFFF; | |
436 | |
437 int i = -0x3FD+11; | |
438 do { | |
439 i--; | |
440 *vl <<= 1; | |
441 } while (*vl > 0); | |
442 exp = i; | |
443 ve[EXPONENTPOS] = sgn; | |
444 } | |
445 } | |
446 return value; | |
447 } | |
448 | |
449 debug(UnitTest) { | |
450 | |
451 unittest | |
452 { | |
453 static real vals[][3] = // x,frexp,exp | |
454 [ | |
455 [0.0, 0.0, 0], | |
456 [-0.0, -0.0, 0], | |
457 [1.0, .5, 1], | |
458 [-1.0, -.5, 1], | |
459 [2.0, .5, 2], | |
460 [double.min/2.0, .5, -1022], | |
461 [real.infinity,real.infinity,int.max], | |
462 [-real.infinity,-real.infinity,int.min], | |
463 [real.nan,real.nan,int.min], | |
464 [-real.nan,-real.nan,int.min], | |
465 ]; | |
466 | |
467 int i; | |
468 | |
469 for (i = 0; i < vals.length; i++) { | |
470 real x = vals[i][0]; | |
471 real e = vals[i][1]; | |
472 int exp = cast(int)vals[i][2]; | |
473 int eptr; | |
474 real v = frexp(x, eptr); | |
475 // printf("frexp(%La) = %La, should be %La, eptr = %d, should be %d\n", x, v, e, eptr, exp); | |
476 assert(isIdentical(e, v)); | |
477 assert(exp == eptr); | |
478 | |
479 } | |
480 static if (real.mant_dig == 64) { | |
481 static real extendedvals[][3] = [ // x,frexp,exp | |
482 [0x1.a5f1c2eb3fe4efp+73, 0x1.A5F1C2EB3FE4EFp-1, 74], // normal | |
483 [0x1.fa01712e8f0471ap-1064, 0x1.fa01712e8f0471ap-1, -1063], | |
484 [real.min, .5, -16381], | |
485 [real.min/2.0L, .5, -16382] // denormal | |
486 ]; | |
487 | |
488 for (i = 0; i < extendedvals.length; i++) { | |
489 real x = extendedvals[i][0]; | |
490 real e = extendedvals[i][1]; | |
491 int exp = cast(int)extendedvals[i][2]; | |
492 int eptr; | |
493 real v = frexp(x, eptr); | |
494 assert(isIdentical(e, v)); | |
495 assert(exp == eptr); | |
496 | |
497 } | |
498 } | |
499 } | |
500 } | |
501 | |
502 /** | |
503 * Compute n * 2$(SUP exp) | |
504 * References: frexp | |
505 */ | |
506 real ldexp(real n, int exp) /* intrinsic */ | |
507 { | |
508 version(DigitalMars_D_InlineAsm_X86) | |
509 { | |
510 asm | |
511 { | |
512 fild exp; | |
513 fld n; | |
514 fscale; | |
515 fstp st(1), st(0); | |
516 } | |
517 } | |
518 else | |
519 { | |
520 return tango.stdc.math.ldexpl(n, exp); | |
521 } | |
522 } | |
523 | |
524 /** | |
525 * Extracts the exponent of x as a signed integral value. | |
526 * | |
527 * If x is not a special value, the result is the same as | |
528 * <tt>cast(int)logb(x)</tt>. | |
529 * | |
530 * Remarks: This function is consistent with IEEE754R, but it | |
531 * differs from the C function of the same name | |
532 * in the return value of infinity. (in C, ilogb(real.infinity)== int.max). | |
533 * Note that the special return values may all be equal. | |
534 * | |
535 * $(TABLE_SV | |
536 * <tr> <th> x <th>ilogb(x) <th>invalid? | |
537 * <tr> <td> 0 <td> FP_ILOGB0 <th> yes | |
538 * <tr> <td> ±∞ <td> FP_ILOGBINFINITY <th> yes | |
539 * <tr> <td> $(NAN) <td> FP_ILOGBNAN <th> yes | |
540 * ) | |
541 */ | |
542 int ilogb(real x) | |
543 { | |
544 version(DigitalMars_D_InlineAsm_X86) | |
545 { | |
546 int y; | |
547 asm { | |
548 fld x; | |
549 fxtract; | |
550 fstp ST(0), ST; // drop significand | |
551 fistp y, ST(0); // and return the exponent | |
552 } | |
553 return y; | |
554 } else static if (real.mant_dig==64) { // 80-bit reals | |
555 short e = (cast(short *)&x)[4] & 0x7FFF; | |
556 if (e == 0x7FFF) { | |
557 // BUG: should also set the invalid exception | |
558 ulong s = *cast(ulong *)&x; | |
559 if (s == 0x8000_0000_0000_0000) { | |
560 return FP_ILOGBINFINITY; | |
561 } | |
562 else return FP_ILOGBNAN; | |
563 } | |
564 if (e==0) { | |
565 ulong s = *cast(ulong *)&x; | |
566 if (s == 0x0000_0000_0000_0000) { | |
567 // BUG: should also set the invalid exception | |
568 return FP_ILOGB0; | |
569 } | |
570 // Denormals | |
571 x *= 0x1p+63; | |
572 short f = (cast(short *)&x)[4]; | |
573 return -0x3FFF - (63-f); | |
574 | |
575 } | |
576 return e - 0x3FFF; | |
577 } else { | |
578 return tango.stdc.math.ilogbl(x); | |
579 } | |
580 } | |
581 | |
582 version (X86) | |
583 { | |
584 const int FP_ILOGB0 = -int.max-1; | |
585 const int FP_ILOGBNAN = -int.max-1; | |
586 const int FP_ILOGBINFINITY = -int.max-1; | |
587 } else { | |
588 alias tango.stdc.math.FP_ILOGB0 FP_ILOGB0; | |
589 alias tango.stdc.math.FP_ILOGBNAN FP_ILOGBNAN; | |
590 const int FP_ILOGBINFINITY = int.max; | |
591 } | |
592 | |
593 debug(UnitTest) { | |
594 unittest { | |
595 assert(ilogb(1.0) == 0); | |
596 assert(ilogb(65536) == 16); | |
597 assert(ilogb(-65536) == 16); | |
598 assert(ilogb(1.0 / 65536) == -16); | |
599 assert(ilogb(real.nan) == FP_ILOGBNAN); | |
600 assert(ilogb(0.0) == FP_ILOGB0); | |
601 assert(ilogb(-0.0) == FP_ILOGB0); | |
602 // denormal | |
603 assert(ilogb(0.125 * real.min) == real.min_exp - 4); | |
604 assert(ilogb(real.infinity) == FP_ILOGBINFINITY); | |
605 } | |
606 } | |
607 | |
608 /** | |
609 * Extracts the exponent of x as a signed integral value. | |
610 * | |
611 * If x is subnormal, it is treated as if it were normalized. | |
612 * For a positive, finite x: | |
613 * | |
614 * ----- | |
615 * 1 <= $(I x) * FLT_RADIX$(SUP -logb(x)) < FLT_RADIX | |
616 * ----- | |
617 * | |
618 * $(TABLE_SV | |
619 * <tr> <th> x <th> logb(x) <th> Divide by 0? | |
620 * <tr> <td> ±∞ <td> +∞ <td> no | |
621 * <tr> <td> ±0.0 <td> -∞ <td> yes | |
622 * ) | |
623 */ | |
624 real logb(real x) | |
625 { | |
626 version(DigitalMars_D_InlineAsm_X86) | |
627 { | |
628 asm { | |
629 fld x; | |
630 fxtract; | |
631 fstp ST(0), ST; // drop significand | |
632 } | |
633 } else { | |
634 return tango.stdc.math.logbl(x); | |
635 } | |
636 } | |
637 | |
638 debug(UnitTest) { | |
639 unittest { | |
640 assert(logb(real.infinity)== real.infinity); | |
641 assert(isIdentical(logb(NaN(0xFCD)), NaN(0xFCD))); | |
642 assert(logb(1.0)== 0.0); | |
643 assert(logb(-65536) == 16); | |
644 assert(logb(0.0)== -real.infinity); | |
645 assert(ilogb(0.125*real.min) == real.min_exp-4); | |
646 } | |
647 } | |
648 | |
649 /** | |
650 * Efficiently calculates x * 2$(SUP n). | |
651 * | |
652 * scalbn handles underflow and overflow in | |
653 * the same fashion as the basic arithmetic operators. | |
654 * | |
655 * $(TABLE_SV | |
656 * <tr> <th> x <th> scalb(x) | |
657 * <tr> <td> ±∞ <td> ±∞ | |
658 * <tr> <td> ±0.0 <td> ±0.0 | |
659 * ) | |
660 */ | |
661 real scalbn(real x, int n) | |
662 { | |
663 version(DigitalMars_D_InlineAsm_X86) | |
664 { | |
665 asm { | |
666 fild n; | |
667 fld x; | |
668 fscale; | |
669 fstp st(1), st; | |
670 } | |
671 } else { | |
672 // BUG: Not implemented in DMD | |
673 return tango.stdc.math.scalbnl(x, n); | |
674 } | |
675 } | |
676 | |
677 debug(UnitTest) { | |
678 unittest { | |
679 assert(scalbn(-real.infinity, 5) == -real.infinity); | |
680 assert(isIdentical(scalbn(NaN(0xABC),7), NaN(0xABC))); | |
681 } | |
682 } | |
683 | |
684 /** | |
685 * Returns the positive difference between x and y. | |
686 * | |
687 * If either of x or y is $(NAN), it will be returned. | |
688 * Returns: | |
689 * $(TABLE_SV | |
690 * $(SVH Arguments, fdim(x, y)) | |
691 * $(SV x > y, x - y) | |
692 * $(SV x <= y, +0.0) | |
693 * ) | |
694 */ | |
695 real fdim(real x, real y) | |
696 { | |
697 return (x !<= y) ? x - y : +0.0; | |
698 } | |
699 | |
700 debug(UnitTest) { | |
701 unittest { | |
702 assert(isIdentical(fdim(NaN(0xABC), 58.2), NaN(0xABC))); | |
703 } | |
704 } | |
705 | |
706 /** | |
707 * Returns |x| | |
708 * | |
709 * $(TABLE_SV | |
710 * <tr> <th> x <th> fabs(x) | |
711 * <tr> <td> ±0.0 <td> +0.0 | |
712 * <tr> <td> ±∞ <td> +∞ | |
713 * ) | |
714 */ | |
715 real fabs(real x) /* intrinsic */ | |
716 { | |
717 version(D_InlineAsm_X86) | |
718 { | |
719 asm | |
720 { | |
721 fld x; | |
722 fabs; | |
723 } | |
724 } | |
725 else | |
726 { | |
727 return tango.stdc.math.fabsl(x); | |
728 } | |
729 } | |
730 | |
731 unittest { | |
732 assert(isIdentical(fabs(NaN(0xABC)), NaN(0xABC))); | |
733 } | |
734 | |
735 /** | |
736 * Returns (x * y) + z, rounding only once according to the | |
737 * current rounding mode. | |
738 * | |
739 * BUGS: Not currently implemented - rounds twice. | |
740 */ | |
741 real fma(float x, float y, float z) | |
742 { | |
743 return (x * y) + z; | |
744 } | |
745 | |
746 /** | |
747 * Calculate cos(y) + i sin(y). | |
748 * | |
749 * On x86 CPUs, this is a very efficient operation; | |
750 * almost twice as fast as calculating sin(y) and cos(y) | |
751 * seperately, and is the preferred method when both are required. | |
752 */ | |
753 creal expi(real y) | |
754 { | |
755 version(DigitalMars_D_InlineAsm_X86) | |
756 { | |
757 asm | |
758 { | |
759 fld y; | |
760 fsincos; | |
761 fxch st(1), st(0); | |
762 } | |
763 } | |
764 else | |
765 { | |
766 return tango.stdc.math.cosl(y) + tango.stdc.math.sinl(y)*1i; | |
767 } | |
768 } | |
769 | |
770 debug(UnitTest) { | |
771 unittest | |
772 { | |
773 assert(expi(1.3e5L) == tango.stdc.math.cosl(1.3e5L) + tango.stdc.math.sinl(1.3e5L) * 1i); | |
774 assert(expi(0.0L) == 1L + 0.0Li); | |
775 } | |
776 } | |
777 | |
778 /********************************* | |
779 * Returns !=0 if e is a NaN. | |
780 */ | |
781 | |
782 int isNaN(real x) | |
783 { | |
784 static if (real.mant_dig==double.mant_dig) { | |
785 // 64-bit real | |
786 ulong* p = cast(ulong *)&x; | |
787 return (*p & 0x7FF0_0000 == 0x7FF0_0000) && *p & 0x000F_FFFF; | |
788 } else { | |
789 // 80-bit real | |
790 ushort* pe = cast(ushort *)&x; | |
791 ulong* ps = cast(ulong *)&x; | |
792 | |
793 return (pe[4] & 0x7FFF) == 0x7FFF && | |
794 *ps & 0x7FFFFFFFFFFFFFFF; | |
795 } | |
796 } | |
797 | |
798 | |
799 debug(UnitTest) { | |
800 unittest | |
801 { | |
802 assert(isNaN(float.nan)); | |
803 assert(isNaN(-double.nan)); | |
804 assert(isNaN(real.nan)); | |
805 | |
806 assert(!isNaN(53.6)); | |
807 assert(!isNaN(float.infinity)); | |
808 } | |
809 } | |
810 | |
811 /** | |
812 * Returns !=0 if x is normalized. | |
813 * | |
814 * (Need one for each format because subnormal | |
815 * floats might be converted to normal reals) | |
816 */ | |
817 int isNormal(float x) | |
818 { | |
819 uint *p = cast(uint *)&x; | |
820 uint e; | |
821 | |
822 e = *p & 0x7F800000; | |
823 return e && e != 0x7F800000; | |
824 } | |
825 | |
826 /** ditto */ | |
827 int isNormal(double d) | |
828 { | |
829 uint *p = cast(uint *)&d; | |
830 uint e; | |
831 | |
832 e = p[1] & 0x7FF00000; | |
833 return e && e != 0x7FF00000; | |
834 } | |
835 | |
836 /** ditto */ | |
837 int isNormal(real x) | |
838 { | |
839 static if (real.mant_dig == double.mant_dig) { | |
840 return isNormal(cast(double)x); | |
841 } else { | |
842 ushort* pe = cast(ushort *)&x; | |
843 long* ps = cast(long *)&x; | |
844 | |
845 return (pe[4] & 0x7FFF) != 0x7FFF && *ps < 0; | |
846 } | |
847 } | |
848 | |
849 debug(UnitTest) { | |
850 unittest | |
851 { | |
852 float f = 3; | |
853 double d = 500; | |
854 real e = 10e+48; | |
855 | |
856 assert(isNormal(f)); | |
857 assert(isNormal(d)); | |
858 assert(isNormal(e)); | |
859 } | |
860 } | |
861 | |
862 /********************************* | |
863 * Is the binary representation of x identical to y? | |
864 * | |
865 * Same as ==, except that positive and negative zero are not identical, | |
866 * and two $(NAN)s are identical if they have the same 'payload'. | |
867 */ | |
868 | |
869 bool isIdentical(real x, real y) | |
870 { | |
871 long* pxs = cast(long *)&x; | |
872 long* pys = cast(long *)&y; | |
873 static if (real.mant_dig == double.mant_dig){ | |
874 return pxs[0] == pys[0]; | |
875 } else { | |
876 ushort* pxe = cast(ushort *)&x; | |
877 ushort* pye = cast(ushort *)&y; | |
878 return pxe[4] == pye[4] && pxs[0] == pys[0]; | |
879 } | |
880 } | |
881 | |
882 /** ditto */ | |
883 bool isIdentical(ireal x, ireal y) { | |
884 return isIdentical(x.im, y.im); | |
885 } | |
886 | |
887 /** ditto */ | |
888 bool isIdentical(creal x, creal y) { | |
889 return isIdentical(x.re, y.re) && isIdentical(x.im, y.im); | |
890 } | |
891 | |
892 | |
893 debug(UnitTest) { | |
894 unittest { | |
895 assert(isIdentical(0.0, 0.0)); | |
896 assert(!isIdentical(0.0, -0.0)); | |
897 assert(isIdentical(NaN(0xABC), NaN(0xABC))); | |
898 assert(!isIdentical(NaN(0xABC), NaN(218))); | |
899 assert(isIdentical(1.234e56, 1.234e56)); | |
900 assert(isNaN(NaN(0x12345))); | |
901 assert(isIdentical(3.1 + NaN(0xDEF) * 1i, 3.1 + NaN(0xDEF)*1i)); | |
902 assert(!isIdentical(3.1+0.0i, 3.1-0i)); | |
903 assert(!isIdentical(0.0i, 2.5e58i)); | |
904 } | |
905 } | |
906 | |
907 /********************************* | |
908 * Is number subnormal? (Also called "denormal".) | |
909 * Subnormals have a 0 exponent and a 0 most significant significand bit. | |
910 */ | |
911 | |
912 /* Need one for each format because subnormal floats might | |
913 * be converted to normal reals. | |
914 */ | |
915 | |
916 int isSubnormal(float f) | |
917 { | |
918 uint *p = cast(uint *)&f; | |
919 | |
920 return (*p & 0x7F800000) == 0 && *p & 0x007FFFFF; | |
921 } | |
922 | |
923 debug(UnitTest) { | |
924 unittest | |
925 { | |
926 float f = 3.0; | |
927 | |
928 for (f = 1.0; !isSubnormal(f); f /= 2) | |
929 assert(f != 0); | |
930 } | |
931 } | |
932 | |
933 /// ditto | |
934 | |
935 int isSubnormal(double d) | |
936 { | |
937 uint *p = cast(uint *)&d; | |
938 | |
939 return (p[1] & 0x7FF00000) == 0 && (p[0] || p[1] & 0x000FFFFF); | |
940 } | |
941 | |
942 debug(UnitTest) { | |
943 unittest | |
944 { | |
945 double f; | |
946 | |
947 for (f = 1; !isSubnormal(f); f /= 2) | |
948 assert(f != 0); | |
949 } | |
950 } | |
951 | |
952 /// ditto | |
953 | |
954 int isSubnormal(real e) | |
955 { | |
956 static if (real.mant_dig == double.mant_dig) { | |
957 return isSubnormal(cast(double)e); | |
958 } else { | |
959 ushort* pe = cast(ushort *)&e; | |
960 long* ps = cast(long *)&e; | |
961 | |
962 return (pe[4] & 0x7FFF) == 0 && *ps > 0; | |
963 } | |
964 } | |
965 | |
966 debug(UnitTest) { | |
967 unittest | |
968 { | |
969 real f; | |
970 | |
971 for (f = 1; !isSubnormal(f); f /= 2) | |
972 assert(f != 0); | |
973 } | |
974 } | |
975 | |
976 /********************************* | |
977 * Return !=0 if x is ±0. | |
978 */ | |
979 int isZero(real x) | |
980 { | |
981 static if (real.mant_dig == double.mant_dig) { | |
982 return ((*cast(ulong *)&x) & 0x7FFF_FFFF_FFFF_FFFF) == 0; | |
983 } else { | |
984 ushort* pe = cast(ushort *)&x; | |
985 ulong* ps = cast(ulong *)&x; | |
986 return (pe[4] & 0x7FFF) == 0 && *ps == 0; | |
987 } | |
988 } | |
989 | |
990 debug(UnitTest) { | |
991 unittest | |
992 { | |
993 assert(isZero(0.0)); | |
994 assert(isZero(-0.0)); | |
995 assert(!isZero(2.5)); | |
996 assert(!isZero(real.min / 1000)); | |
997 } | |
998 } | |
999 | |
1000 /********************************* | |
1001 * Return !=0 if e is ±∞. | |
1002 */ | |
1003 | |
1004 int isInfinity(real e) | |
1005 { | |
1006 static if (real.mant_dig == double.mant_dig) { | |
1007 return ((*cast(ulong *)&x)&0x7FFF_FFFF_FFFF_FFFF) == 0x7FF8_0000_0000_0000; | |
1008 } else { | |
1009 ushort* pe = cast(ushort *)&e; | |
1010 ulong* ps = cast(ulong *)&e; | |
1011 | |
1012 return (pe[4] & 0x7FFF) == 0x7FFF && | |
1013 *ps == 0x8000_0000_0000_0000; | |
1014 } | |
1015 } | |
1016 | |
1017 debug(UnitTest) { | |
1018 unittest | |
1019 { | |
1020 assert(isInfinity(float.infinity)); | |
1021 assert(!isInfinity(float.nan)); | |
1022 assert(isInfinity(double.infinity)); | |
1023 assert(isInfinity(-real.infinity)); | |
1024 | |
1025 assert(isInfinity(-1.0 / 0.0)); | |
1026 } | |
1027 } | |
1028 | |
1029 /** | |
1030 * Calculate the next largest floating point value after x. | |
1031 * | |
1032 * Return the least number greater than x that is representable as a real; | |
1033 * thus, it gives the next point on the IEEE number line. | |
1034 * This function is included in the forthcoming IEEE 754R standard. | |
1035 * | |
1036 * $(TABLE_SV | |
1037 * $(SVH x, nextup(x) ) | |
1038 * $(SV -∞, -real.max ) | |
1039 * $(SV ±0.0, real.min*real.epsilon ) | |
1040 * $(SV real.max, real.infinity ) | |
1041 * $(SV real.infinity, real.infinity ) | |
1042 * $(SV $(NAN), $(NAN) ) | |
1043 * ) | |
1044 * | |
1045 * nextDoubleUp and nextFloatUp are the corresponding functions for | |
1046 * the IEEE double and IEEE float number lines. | |
1047 */ | |
1048 real nextUp(real x) | |
1049 { | |
1050 static if (real.mant_dig == double.mant_dig) { | |
1051 return nextDoubleUp(x); | |
1052 } else { | |
1053 // For 80-bit reals, the "implied bit" is a nuisance... | |
1054 ushort *pe = cast(ushort *)&x; | |
1055 ulong *ps = cast(ulong *)&x; | |
1056 | |
1057 if ((pe[4] & 0x7FFF) == 0x7FFF) { | |
1058 // First, deal with NANs and infinity | |
1059 if (x == -real.infinity) return -real.max; | |
1060 return x; // +INF and NAN are unchanged. | |
1061 } | |
1062 if (pe[4] & 0x8000) { // Negative number -- need to decrease the significand | |
1063 --*ps; | |
1064 // Need to mask with 0x7FFF... so denormals are treated correctly. | |
1065 if ((*ps & 0x7FFFFFFFFFFFFFFF) == 0x7FFFFFFFFFFFFFFF) { | |
1066 if (pe[4] == 0x8000) { // it was negative zero | |
1067 *ps = 1; pe[4] = 0; // smallest subnormal. | |
1068 return x; | |
1069 } | |
1070 --pe[4]; | |
1071 if (pe[4] == 0x8000) { | |
1072 return x; // it's become a denormal, implied bit stays low. | |
1073 } | |
1074 *ps = 0xFFFFFFFFFFFFFFFF; // set the implied bit | |
1075 return x; | |
1076 } | |
1077 return x; | |
1078 } else { | |
1079 // Positive number -- need to increase the significand. | |
1080 // Works automatically for positive zero. | |
1081 ++*ps; | |
1082 if ((*ps & 0x7FFFFFFFFFFFFFFF) == 0) { | |
1083 // change in exponent | |
1084 ++pe[4]; | |
1085 *ps = 0x8000000000000000; // set the high bit | |
1086 } | |
1087 } | |
1088 return x; | |
1089 } | |
1090 } | |
1091 | |
1092 /** ditto */ | |
1093 double nextDoubleUp(double x) | |
1094 { | |
1095 ulong *ps = cast(ulong *)&x; | |
1096 | |
1097 if ((*ps & 0x7FF0_0000_0000_0000) == 0x7FF0_0000_0000_0000) { | |
1098 // First, deal with NANs and infinity | |
1099 if (x == -x.infinity) return -x.max; | |
1100 return x; // +INF and NAN are unchanged. | |
1101 } | |
1102 if (*ps & 0x8000_0000_0000_0000) { // Negative number | |
1103 if (*ps == 0x8000_0000_0000_0000) { // it was negative zero | |
1104 *ps = 0x0000_0000_0000_0001; // change to smallest subnormal | |
1105 return x; | |
1106 } | |
1107 --*ps; | |
1108 } else { // Positive number | |
1109 ++*ps; | |
1110 } | |
1111 return x; | |
1112 } | |
1113 | |
1114 /** ditto */ | |
1115 float nextFloatUp(float x) | |
1116 { | |
1117 uint *ps = cast(uint *)&x; | |
1118 | |
1119 if ((*ps & 0x7F80_0000) == 0x7F80_0000) { | |
1120 // First, deal with NANs and infinity | |
1121 if (x == -x.infinity) return -x.max; | |
1122 return x; // +INF and NAN are unchanged. | |
1123 } | |
1124 if (*ps & 0x8000_0000) { // Negative number | |
1125 if (*ps == 0x8000_0000) { // it was negative zero | |
1126 *ps = 0x0000_0001; // change to smallest subnormal | |
1127 return x; | |
1128 } | |
1129 --*ps; | |
1130 } else { // Positive number | |
1131 ++*ps; | |
1132 } | |
1133 return x; | |
1134 } | |
1135 | |
1136 debug(UnitTest) { | |
1137 unittest { | |
1138 static if (real.mant_dig == 64) { | |
1139 | |
1140 // Tests for 80-bit reals | |
1141 | |
1142 assert(isIdentical(nextUp(NaN(0xABC)), NaN(0xABC))); | |
1143 // negative numbers | |
1144 assert( nextUp(-real.infinity) == -real.max ); | |
1145 assert( nextUp(-1-real.epsilon) == -1.0 ); | |
1146 assert( nextUp(-2) == -2.0 + real.epsilon); | |
1147 // denormals and zero | |
1148 assert( nextUp(-real.min) == -real.min*(1-real.epsilon) ); | |
1149 assert( nextUp(-real.min*(1-real.epsilon) == -real.min*(1-2*real.epsilon)) ); | |
1150 assert( isIdentical(-0.0L, nextUp(-real.min*real.epsilon)) ); | |
1151 assert( nextUp(-0.0) == real.min*real.epsilon ); | |
1152 assert( nextUp(0.0) == real.min*real.epsilon ); | |
1153 assert( nextUp(real.min*(1-real.epsilon)) == real.min ); | |
1154 assert( nextUp(real.min) == real.min*(1+real.epsilon) ); | |
1155 // positive numbers | |
1156 assert( nextUp(1) == 1.0 + real.epsilon ); | |
1157 assert( nextUp(2.0-real.epsilon) == 2.0 ); | |
1158 assert( nextUp(real.max) == real.infinity ); | |
1159 assert( nextUp(real.infinity)==real.infinity ); | |
1160 } | |
1161 | |
1162 assert(isIdentical(nextDoubleUp(NaN(0xABC)), NaN(0xABC))); | |
1163 // negative numbers | |
1164 assert( nextDoubleUp(-double.infinity) == -double.max ); | |
1165 assert( nextDoubleUp(-1-double.epsilon) == -1.0 ); | |
1166 assert( nextDoubleUp(-2) == -2.0 + double.epsilon); | |
1167 // denormals and zero | |
1168 | |
1169 assert( nextDoubleUp(-double.min) == -double.min*(1-double.epsilon) ); | |
1170 assert( nextDoubleUp(-double.min*(1-double.epsilon) == -double.min*(1-2*double.epsilon)) ); | |
1171 assert( isIdentical(-0.0, nextDoubleUp(-double.min*double.epsilon)) ); | |
1172 assert( nextDoubleUp(0.0) == double.min*double.epsilon ); | |
1173 assert( nextDoubleUp(-0.0) == double.min*double.epsilon ); | |
1174 assert( nextDoubleUp(double.min*(1-double.epsilon)) == double.min ); | |
1175 assert( nextDoubleUp(double.min) == double.min*(1+double.epsilon) ); | |
1176 // positive numbers | |
1177 assert( nextDoubleUp(1) == 1.0 + double.epsilon ); | |
1178 assert( nextDoubleUp(2.0-double.epsilon) == 2.0 ); | |
1179 assert( nextDoubleUp(double.max) == double.infinity ); | |
1180 | |
1181 assert(isIdentical(nextFloatUp(NaN(0xABC)), NaN(0xABC))); | |
1182 assert( nextFloatUp(-float.min) == -float.min*(1-float.epsilon) ); | |
1183 assert( nextFloatUp(1.0) == 1.0+float.epsilon ); | |
1184 assert( nextFloatUp(-0.0) == float.min*float.epsilon); | |
1185 assert( nextFloatUp(float.infinity)==float.infinity ); | |
1186 | |
1187 assert(nextDown(1.0+real.epsilon)==1.0); | |
1188 assert(nextDoubleDown(1.0+double.epsilon)==1.0); | |
1189 assert(nextFloatDown(1.0+float.epsilon)==1.0); | |
1190 assert(nextafter(1.0+real.epsilon, -real.infinity)==1.0); | |
1191 } | |
1192 } | |
1193 | |
1194 package { | |
1195 /** Reduces the magnitude of x, so the bits in the lower half of its significand | |
1196 * are all zero. Returns the amount which needs to be added to x to restore its | |
1197 * initial value; this amount will also have zeros in all bits in the lower half | |
1198 * of its significand. | |
1199 */ | |
1200 X splitSignificand(X)(inout X x) | |
1201 { | |
1202 if (fabs(x) !< X.infinity) return 0; // don't change NaN or infinity | |
1203 X y = x; // copy the original value | |
1204 static if (X.mant_dig == float.mant_dig) { | |
1205 uint *ps = cast(uint *)&x; | |
1206 (*ps) &= 0xFFFF_FC00; | |
1207 } else static if (X.mant_dig == double.mant_dig) { | |
1208 ulong *ps = cast(ulong *)&x; | |
1209 (*ps) &= 0xFFFF_FFFF_FC00_0000; | |
1210 } else static if (X.mant_dig == 64){ // 80-bit real | |
1211 // An x87 real80 has 63 bits, because the 'implied' bit is stored explicitly. | |
1212 // This is annoying, because it means the significand cannot be | |
1213 // precisely halved. Instead, we split it into 31+32 bits. | |
1214 ulong *ps = cast(ulong *)&x; | |
1215 (*ps) &= 0xFFFF_FFFF_0000_0000; | |
1216 } //else static assert(0, "Unsupported size"); | |
1217 | |
1218 return y - x; | |
1219 } | |
1220 | |
1221 | |
1222 //import tango.stdc.stdio; | |
1223 unittest { | |
1224 double x = -0x1.234_567A_AAAA_AAp+250; | |
1225 double y = splitSignificand(x); | |
1226 assert(x == -0x1.234_5678p+250); | |
1227 assert(y == -0x0.000_000A_AAAA_A8p+248); | |
1228 assert(x + y == -0x1.234_567A_AAAA_AAp+250); | |
1229 } | |
1230 } | |
1231 | |
1232 /** | |
1233 * Calculate the next smallest floating point value after x. | |
1234 * | |
1235 * Return the greatest number less than x that is representable as a real; | |
1236 * thus, it gives the previous point on the IEEE number line. | |
1237 * Note: This function is included in the forthcoming IEEE 754R standard. | |
1238 * | |
1239 * Special values: | |
1240 * real.infinity real.max | |
1241 * real.min*real.epsilon 0.0 | |
1242 * 0.0 -real.min*real.epsilon | |
1243 * -0.0 -real.min*real.epsilon | |
1244 * -real.max -real.infinity | |
1245 * -real.infinity -real.infinity | |
1246 * NAN NAN | |
1247 * | |
1248 * nextDoubleDown and nextFloatDown are the corresponding functions for | |
1249 * the IEEE double and IEEE float number lines. | |
1250 */ | |
1251 real nextDown(real x) | |
1252 { | |
1253 return -nextUp(-x); | |
1254 } | |
1255 | |
1256 /** ditto */ | |
1257 double nextDoubleDown(double x) | |
1258 { | |
1259 return -nextDoubleUp(-x); | |
1260 } | |
1261 | |
1262 /** ditto */ | |
1263 float nextFloatDown(float x) | |
1264 { | |
1265 return -nextFloatUp(-x); | |
1266 } | |
1267 | |
1268 debug(UnitTest) { | |
1269 unittest { | |
1270 assert( nextDown(1.0 + real.epsilon) == 1.0); | |
1271 } | |
1272 } | |
1273 | |
1274 | |
1275 /** | |
1276 * Calculates the next representable value after x in the direction of y. | |
1277 * | |
1278 * If y > x, the result will be the next largest floating-point value; | |
1279 * if y < x, the result will be the next smallest value. | |
1280 * If x == y, the result is y. | |
1281 * | |
1282 * Remarks: | |
1283 * This function is not generally very useful; it's almost always better to use | |
1284 * the faster functions nextup() or nextdown() instead. | |
1285 * | |
1286 * IEEE 754 requirements not implemented: | |
1287 * The FE_INEXACT and FE_OVERFLOW exceptions will be raised if x is finite and | |
1288 * the function result is infinite. The FE_INEXACT and FE_UNDERFLOW | |
1289 * exceptions will be raised if the function value is subnormal, and x is | |
1290 * not equal to y. | |
1291 */ | |
1292 real nextafter(real x, real y) | |
1293 { | |
1294 if (x==y) return y; | |
1295 return (y>x) ? nextUp(x) : nextDown(x); | |
1296 } | |
1297 | |
1298 /************************************** | |
1299 * To what precision is x equal to y? | |
1300 * | |
1301 * Returns: the number of significand bits which are equal in x and y. | |
1302 * eg, 0x1.F8p+60 and 0x1.F1p+60 are equal to 5 bits of precision. | |
1303 * | |
1304 * $(TABLE_SV | |
1305 * $(SVH3 x, y, feqrel(x, y) ) | |
1306 * $(SV3 x, x, real.mant_dig ) | |
1307 * $(SV3 x, >= 2*x, 0 ) | |
1308 * $(SV3 x, <= x/2, 0 ) | |
1309 * $(SV3 $(NAN), any, 0 ) | |
1310 * $(SV3 any, $(NAN), 0 ) | |
1311 * ) | |
1312 * | |
1313 * Remarks: | |
1314 * This is a very fast operation, suitable for use in speed-critical code. | |
1315 * | |
1316 */ | |
1317 | |
1318 int feqrel(real x, real y) | |
1319 { | |
1320 /* Public Domain. Author: Don Clugston, 18 Aug 2005. | |
1321 */ | |
1322 | |
1323 if (x == y) return real.mant_dig; // ensure diff!=0, cope with INF. | |
1324 | |
1325 real diff = fabs(x - y); | |
1326 | |
1327 ushort *pa = cast(ushort *)(&x); | |
1328 ushort *pb = cast(ushort *)(&y); | |
1329 ushort *pd = cast(ushort *)(&diff); | |
1330 | |
1331 // The difference in abs(exponent) between x or y and abs(x-y) | |
1332 // is equal to the number of significand bits of x which are | |
1333 // equal to y. If negative, x and y have different exponents. | |
1334 // If positive, x and y are equal to 'bitsdiff' bits. | |
1335 // AND with 0x7FFF to form the absolute value. | |
1336 // To avoid out-by-1 errors, we subtract 1 so it rounds down | |
1337 // if the exponents were different. This means 'bitsdiff' is | |
1338 // always 1 lower than we want, except that if bitsdiff==0, | |
1339 // they could have 0 or 1 bits in common. | |
1340 | |
1341 static if (real.mant_dig==64) | |
1342 { | |
1343 | |
1344 int bitsdiff = ( ((pa[4]&0x7FFF) + (pb[4]&0x7FFF)-1)>>1) - pd[4]; | |
1345 | |
1346 if (pd[4] == 0) | |
1347 { // Difference is denormal | |
1348 // For denormals, we need to add the number of zeros that | |
1349 // lie at the start of diff's significand. | |
1350 // We do this by multiplying by 2^real.mant_dig | |
1351 diff *= 0x1p+63; | |
1352 return bitsdiff + real.mant_dig - pd[4]; | |
1353 } | |
1354 | |
1355 if (bitsdiff > 0) | |
1356 return bitsdiff + 1; // add the 1 we subtracted before | |
1357 | |
1358 // Avoid out-by-1 errors when factor is almost 2. | |
1359 return (bitsdiff == 0) ? (pa[4] == pb[4]) : 0; | |
1360 } else { | |
1361 // 64-bit reals | |
1362 version(LittleEndian) | |
1363 const int EXPONENTPOS = 3; | |
1364 else const int EXPONENTPOS = 0; | |
1365 | |
1366 int bitsdiff = ( ((pa[EXPONENTPOS]&0x7FF0) + (pb[EXPONENTPOS]&0x7FF0)-0x10)>>5) - (pd[EXPONENTPOS]&0x7FF0>>4); | |
1367 | |
1368 if (pd[EXPONENTPOS] == 0) | |
1369 { // Difference is denormal | |
1370 // For denormals, we need to add the number of zeros that | |
1371 // lie at the start of diff's significand. | |
1372 // We do this by multiplying by 2^real.mant_dig | |
1373 diff *= 0x1p+53; | |
1374 return bitsdiff + real.mant_dig - pd[EXPONENTPOS]; | |
1375 } | |
1376 | |
1377 if (bitsdiff > 0) | |
1378 return bitsdiff + 1; // add the 1 we subtracted before | |
1379 | |
1380 // Avoid out-by-1 errors when factor is almost 2. | |
1381 if (bitsdiff == 0 && (pa[EXPONENTPOS] ^ pb[EXPONENTPOS])&0x7FF0) return 1; | |
1382 else return 0; | |
1383 | |
1384 } | |
1385 | |
1386 } | |
1387 | |
1388 debug(UnitTest) { | |
1389 unittest | |
1390 { | |
1391 // Exact equality | |
1392 assert(feqrel(real.max,real.max)==real.mant_dig); | |
1393 assert(feqrel(0,0)==real.mant_dig); | |
1394 assert(feqrel(7.1824,7.1824)==real.mant_dig); | |
1395 assert(feqrel(real.infinity,real.infinity)==real.mant_dig); | |
1396 | |
1397 // a few bits away from exact equality | |
1398 real w=1; | |
1399 for (int i=1; i<real.mant_dig-1; ++i) { | |
1400 assert(feqrel(1+w*real.epsilon,1)==real.mant_dig-i); | |
1401 assert(feqrel(1-w*real.epsilon,1)==real.mant_dig-i); | |
1402 assert(feqrel(1,1+(w-1)*real.epsilon)==real.mant_dig-i+1); | |
1403 w*=2; | |
1404 } | |
1405 assert(feqrel(1.5+real.epsilon,1.5)==real.mant_dig-1); | |
1406 assert(feqrel(1.5-real.epsilon,1.5)==real.mant_dig-1); | |
1407 assert(feqrel(1.5-real.epsilon,1.5+real.epsilon)==real.mant_dig-2); | |
1408 | |
1409 assert(feqrel(real.min/8,real.min/17)==3);; | |
1410 | |
1411 // Numbers that are close | |
1412 assert(feqrel(0x1.Bp+84, 0x1.B8p+84)==5); | |
1413 assert(feqrel(0x1.8p+10, 0x1.Cp+10)==2); | |
1414 assert(feqrel(1.5*(1-real.epsilon), 1)==2); | |
1415 assert(feqrel(1.5, 1)==1); | |
1416 assert(feqrel(2*(1-real.epsilon), 1)==1); | |
1417 | |
1418 // Factors of 2 | |
1419 assert(feqrel(real.max,real.infinity)==0); | |
1420 assert(feqrel(2*(1-real.epsilon), 1)==1); | |
1421 assert(feqrel(1, 2)==0); | |
1422 assert(feqrel(4, 1)==0); | |
1423 | |
1424 // Extreme inequality | |
1425 assert(feqrel(real.nan,real.nan)==0); | |
1426 assert(feqrel(0,-real.nan)==0); | |
1427 assert(feqrel(real.nan,real.infinity)==0); | |
1428 assert(feqrel(real.infinity,-real.infinity)==0); | |
1429 assert(feqrel(-real.max,real.infinity)==0); | |
1430 assert(feqrel(real.max,-real.max)==0); | |
1431 } | |
1432 } | |
1433 | |
1434 /********************************* | |
1435 * Return 1 if sign bit of e is set, 0 if not. | |
1436 */ | |
1437 | |
1438 int signbit(real x) | |
1439 { | |
1440 static if (real.mant_dig == double.mant_dig) { | |
1441 return ((*cast(ulong *)&x) & 0x8000_0000_0000_0000) != 0; | |
1442 } else { | |
1443 ubyte* pe = cast(ubyte *)&x; | |
1444 return (pe[9] & 0x80) != 0; | |
1445 } | |
1446 } | |
1447 | |
1448 debug(UnitTest) { | |
1449 unittest | |
1450 { | |
1451 assert(!signbit(float.nan)); | |
1452 assert(signbit(-float.nan)); | |
1453 assert(!signbit(168.1234)); | |
1454 assert(signbit(-168.1234)); | |
1455 assert(!signbit(0.0)); | |
1456 assert(signbit(-0.0)); | |
1457 } | |
1458 } | |
1459 | |
1460 | |
1461 /********************************* | |
1462 * Return a value composed of to with from's sign bit. | |
1463 */ | |
1464 | |
1465 real copysign(real to, real from) | |
1466 { | |
1467 static if (real.mant_dig == double.mant_dig) { | |
1468 ulong* pto = cast(ulong *)&to; | |
1469 ulong* pfrom = cast(ulong *)&from; | |
1470 *pto &= 0x7FFF_FFFF_FFFF_FFFF; | |
1471 *pto |= (*pfrom) & 0x8000_0000_0000_0000; | |
1472 return to; | |
1473 } else { | |
1474 ubyte* pto = cast(ubyte *)&to; | |
1475 ubyte* pfrom = cast(ubyte *)&from; | |
1476 | |
1477 pto[9] &= 0x7F; | |
1478 pto[9] |= pfrom[9] & 0x80; | |
1479 | |
1480 return to; | |
1481 } | |
1482 } | |
1483 | |
1484 debug(UnitTest) { | |
1485 unittest | |
1486 { | |
1487 real e; | |
1488 | |
1489 e = copysign(21, 23.8); | |
1490 assert(e == 21); | |
1491 | |
1492 e = copysign(-21, 23.8); | |
1493 assert(e == 21); | |
1494 | |
1495 e = copysign(21, -23.8); | |
1496 assert(e == -21); | |
1497 | |
1498 e = copysign(-21, -23.8); | |
1499 assert(e == -21); | |
1500 | |
1501 e = copysign(real.nan, -23.8); | |
1502 assert(isNaN(e) && signbit(e)); | |
1503 } | |
1504 } | |
1505 | |
1506 /** Return the value that lies halfway between x and y on the IEEE number line. | |
1507 * | |
1508 * Formally, the result is the arithmetic mean of the binary significands of x | |
1509 * and y, multiplied by the geometric mean of the binary exponents of x and y. | |
1510 * x and y must have the same sign, and must not be NaN. | |
1511 * Note: this function is useful for ensuring O(log n) behaviour in algorithms | |
1512 * involving a 'binary chop'. | |
1513 * | |
1514 * Special cases: | |
1515 * If x and y are within a factor of 2, (ie, feqrel(x, y) > 0), the return value | |
1516 * is the arithmetic mean (x + y) / 2. | |
1517 * If x and y are even powers of 2, the return value is the geometric mean, | |
1518 * ieeeMean(x, y) = sqrt(x * y). | |
1519 * | |
1520 */ | |
1521 T ieeeMean(T)(T x, T y) | |
1522 in { | |
1523 // both x and y must have the same sign, and must not be NaN. | |
1524 assert(signbit(x) == signbit(y) && x<>=0 && y<>=0); | |
1525 } | |
1526 body { | |
1527 // Runtime behaviour for contract violation: | |
1528 // If signs are opposite, or one is a NaN, return 0. | |
1529 if (!((x>=0 && y>=0) || (x<=0 && y<=0))) return 0.0; | |
1530 | |
1531 // The implementation is simple: cast x and y to integers, | |
1532 // average them (avoiding overflow), and cast the result back to a floating-point number. | |
1533 | |
1534 T u; | |
1535 static if (T.mant_dig==64) { // x87, 80-bit reals | |
1536 // There's slight additional complexity because they are actually | |
1537 // 79-bit reals... | |
1538 ushort *ue = cast(ushort *)&u; | |
1539 ulong *ul = cast(ulong *)&u; | |
1540 ushort *xe = cast(ushort *)&x; | |
1541 ulong *xl = cast(ulong *)&x; | |
1542 ushort *ye = cast(ushort *)&y; | |
1543 ulong *yl = cast(ulong *)&y; | |
1544 // Ignore the useless implicit bit. | |
1545 ulong m = ((*xl) & 0x7FFF_FFFF_FFFF_FFFF) + ((*yl) & 0x7FFF_FFFF_FFFF_FFFF); | |
1546 | |
1547 ushort e = cast(ushort)((xe[4] & 0x7FFF) + (ye[4] & 0x7FFF)); | |
1548 if (m & 0x8000_0000_0000_0000) { | |
1549 ++e; | |
1550 m &= 0x7FFF_FFFF_FFFF_FFFF; | |
1551 } | |
1552 // Now do a multi-byte right shift | |
1553 uint c = e & 1; // carry | |
1554 e >>= 1; | |
1555 m >>>= 1; | |
1556 if (c) m |= 0x4000_0000_0000_0000; // shift carry into significand | |
1557 if (e) *ul = m | 0x8000_0000_0000_0000; // set implicit bit... | |
1558 else *ul = m; // ... unless exponent is 0 (denormal or zero). | |
1559 // Prevent a ridiculous warning (why does (ushort | ushort) get promoted to int???) | |
1560 ue[4]= cast(ushort)( e | (xe[4]& 0x8000)); // restore sign bit | |
1561 } else static if (T.mant_dig == double.mant_dig) { | |
1562 ulong *ul = cast(ulong *)&u; | |
1563 ulong *xl = cast(ulong *)&x; | |
1564 ulong *yl = cast(ulong *)&y; | |
1565 ulong m = (((*xl) & 0x7FFF_FFFF_FFFF_FFFF) + ((*yl) & 0x7FFF_FFFF_FFFF_FFFF)) >>> 1; | |
1566 m |= ((*xl) & 0x8000_0000_0000_0000); | |
1567 *ul = m; | |
1568 }else static if (T.mant_dig == float.mant_dig) { | |
1569 uint *ul = cast(uint *)&u; | |
1570 uint *xl = cast(uint *)&x; | |
1571 uint *yl = cast(uint *)&y; | |
1572 uint m = (((*xl) & 0x7FFF_FFFF) + ((*yl) & 0x7FFF_FFFF)) >>> 1; | |
1573 m |= ((*xl) & 0x8000_0000); | |
1574 *ul = m; | |
1575 } | |
1576 return u; | |
1577 } | |
1578 | |
1579 debug(UnitTest) { | |
1580 unittest { | |
1581 assert(ieeeMean(-0.0,-1e-20)<0); | |
1582 assert(ieeeMean(0.0,1e-20)>0); | |
1583 | |
1584 assert(ieeeMean(1.0L,4.0L)==2L); | |
1585 assert(ieeeMean(2.0*1.013,8.0*1.013)==4*1.013); | |
1586 assert(ieeeMean(-1.0L,-4.0L)==-2L); | |
1587 assert(ieeeMean(-1.0,-4.0)==-2); | |
1588 assert(ieeeMean(-1.0f,-4.0f)==-2f); | |
1589 assert(ieeeMean(-1.0,-2.0)==-1.5); | |
1590 assert(ieeeMean(-1*(1+8*real.epsilon),-2*(1+8*real.epsilon))==-1.5*(1+5*real.epsilon)); | |
1591 assert(ieeeMean(0x1p60,0x1p-10)==0x1p25); | |
1592 static if (real.mant_dig==64) { // x87, 80-bit reals | |
1593 assert(ieeeMean(1.0L,real.infinity)==0x1p8192L); | |
1594 assert(ieeeMean(0.0L,real.infinity)==1.5); | |
1595 } | |
1596 assert(ieeeMean(0.5*real.min*(1-4*real.epsilon),0.5*real.min)==0.5*real.min*(1-2*real.epsilon)); | |
1597 } | |
1598 } | |
1599 | |
1600 // Functions for NaN payloads | |
1601 /* | |
1602 * A 'payload' can be stored in the significand of a $(NAN). One bit is required | |
1603 * to distinguish between a quiet and a signalling $(NAN). This leaves 22 bits | |
1604 * of payload for a float; 51 bits for a double; 62 bits for an 80-bit real; | |
1605 * and 111 bits for a 128-bit quad. | |
1606 */ | |
1607 /** | |
1608 * Create a $(NAN), storing an integer inside the payload. | |
1609 * | |
1610 * For 80-bit or 128-bit reals, the largest possible payload is 0x3FFF_FFFF_FFFF_FFFF. | |
1611 * For doubles, it is 0x3_FFFF_FFFF_FFFF. | |
1612 * For floats, it is 0x3F_FFFF. | |
1613 */ | |
1614 real NaN(ulong payload) | |
1615 { | |
1616 static if (real.mant_dig == double.mant_dig) { | |
1617 ulong v = 2; // no implied bit. quiet bit = 1 | |
1618 } else { | |
1619 ulong v = 3; // implied bit = 1, quiet bit = 1 | |
1620 } | |
1621 | |
1622 ulong a = payload; | |
1623 | |
1624 // 22 Float bits | |
1625 ulong w = a & 0x3F_FFFF; | |
1626 a -= w; | |
1627 | |
1628 v <<=22; | |
1629 v |= w; | |
1630 a >>=22; | |
1631 | |
1632 // 29 Double bits | |
1633 v <<=29; | |
1634 w = a & 0xFFF_FFFF; | |
1635 v |= w; | |
1636 a -= w; | |
1637 a >>=29; | |
1638 | |
1639 static if (real.mant_dig == double.mant_dig) { | |
1640 v |=0x7FF0_0000_0000_0000; | |
1641 real x; | |
1642 * cast(ulong *)(&x) = v; | |
1643 return x; | |
1644 } else { | |
1645 // Extended real bits | |
1646 v <<=11; | |
1647 a &= 0x7FF; | |
1648 v |= a; | |
1649 | |
1650 real x = real.nan; | |
1651 * cast(ulong *)(&x) = v; | |
1652 return x; | |
1653 } | |
1654 } | |
1655 | |
1656 /** | |
1657 * Extract an integral payload from a $(NAN). | |
1658 * | |
1659 * Returns: | |
1660 * the integer payload as a ulong. | |
1661 * | |
1662 * For 80-bit or 128-bit reals, the largest possible payload is 0x3FFF_FFFF_FFFF_FFFF. | |
1663 * For doubles, it is 0x3_FFFF_FFFF_FFFF. | |
1664 * For floats, it is 0x3F_FFFF. | |
1665 */ | |
1666 ulong getNaNPayload(real x) | |
1667 { | |
1668 assert(isNaN(x)); | |
1669 ulong m = *cast(ulong *)(&x); | |
1670 static if (real.mant_dig == double.mant_dig) { | |
1671 // Make it look like an 80-bit significand. | |
1672 // Skip exponent, and quiet bit | |
1673 m &= 0x0007_FFFF_FFFF_FFFF; | |
1674 m <<= 10; | |
1675 } | |
1676 // ignore implicit bit and quiet bit | |
1677 ulong f = m & 0x3FFF_FF00_0000_0000L; | |
1678 ulong w = f >>> 40; | |
1679 w |= (m & 0x00FF_FFFF_F800L) << (22 - 11); | |
1680 w |= (m & 0x7FF) << 51; | |
1681 return w; | |
1682 } | |
1683 | |
1684 debug(UnitTest) { | |
1685 unittest { | |
1686 real nan4 = NaN(0x789_ABCD_EF12_3456); | |
1687 static if (real.mant_dig == 64) { | |
1688 assert (getNaNPayload(nan4) == 0x789_ABCD_EF12_3456); | |
1689 } else { | |
1690 assert (getNaNPayload(nan4) == 0x1_ABCD_EF12_3456); | |
1691 } | |
1692 double nan5 = nan4; | |
1693 assert (getNaNPayload(nan5) == 0x1_ABCD_EF12_3456); | |
1694 float nan6 = nan4; | |
1695 assert (getNaNPayload(nan6) == 0x12_3456); | |
1696 nan4 = NaN(0xFABCD); | |
1697 assert (getNaNPayload(nan4) == 0xFABCD); | |
1698 nan6 = nan4; | |
1699 assert (getNaNPayload(nan6) == 0xFABCD); | |
1700 nan5 = NaN(0x100_0000_0000_3456); | |
1701 assert(getNaNPayload(nan5) == 0x0000_0000_3456); | |
1702 } | |
1703 } | |
1704 |