comparison tango/tango/math/Probability.d @ 132:1700239cab2e trunk

[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author lindquist
date Fri, 11 Jan 2008 17:57:40 +0100
parents
children
comparison
equal deleted inserted replaced
131:5825d48b27d1 132:1700239cab2e
1 /**
2 * Cumulative Probability Distribution Functions
3 *
4 * Copyright: Based on the CEPHES math library, which is
5 * Copyright (C) 1994 Stephen L. Moshier (moshier@world.std.com).
6 * License: BSD style: $(LICENSE)
7 * Authors: Stephen L. Moshier (original C code), Don Clugston
8 */
9
10 /**
11 * Macros:
12 * NAN = $(RED NAN)
13 * SUP = <span style="vertical-align:super;font-size:smaller">$0</span>
14 * GAMMA = &#915;
15 * INTEGRAL = &#8747;
16 * INTEGRATE = $(BIG &#8747;<sub>$(SMALL $1)</sub><sup>$2</sup>)
17 * POWER = $1<sup>$2</sup>
18 * BIGSUM = $(BIG &Sigma; <sup>$2</sup><sub>$(SMALL $1)</sub>)
19 * CHOOSE = $(BIG &#40;) <sup>$(SMALL $1)</sup><sub>$(SMALL $2)</sub> $(BIG &#41;)
20 * TABLE_SV = <table border=1 cellpadding=4 cellspacing=0>
21 * <caption>Special Values</caption>
22 * $0</table>
23 * SVH = $(TR $(TH $1) $(TH $2))
24 * SV = $(TR $(TD $1) $(TD $2))
25 */
26
27 module tango.math.Probability;
28 static import tango.math.ErrorFunction;
29 private import tango.math.GammaFunction;
30 private import tango.math.Math;
31 private import tango.math.IEEE;
32
33
34 /***
35 Cumulative distribution function for the Normal distribution, and its complement.
36
37 The normal (or Gaussian, or bell-shaped) distribution is
38 defined as:
39
40 normalDist(x) = 1/$(SQRT) &pi; $(INTEGRAL -$(INFINITY), x) exp( - $(POWER t, 2)/2) dt
41 = 0.5 + 0.5 * erf(x/sqrt(2))
42 = 0.5 * erfc(- x/sqrt(2))
43
44 Note that
45 normalDistribution(x) = 1 - normalDistribution(-x).
46
47 Accuracy:
48 Within a few bits of machine resolution over the entire
49 range.
50
51 References:
52 $(LINK http://www.netlib.org/cephes/ldoubdoc.html),
53 G. Marsaglia, "Evaluating the Normal Distribution",
54 Journal of Statistical Software <b>11</b>, (July 2004).
55 */
56 real normalDistribution(real a)
57 {
58 return tango.math.ErrorFunction.normalDistributionImpl(a);
59 }
60
61 /** ditto */
62 real normalDistributionCompl(real a)
63 {
64 return -tango.math.ErrorFunction.normalDistributionImpl(-a);
65 }
66
67 /******************************
68 * Inverse of Normal distribution function
69 *
70 * Returns the argument, x, for which the area under the
71 * Normal probability density function (integrated from
72 * minus infinity to x) is equal to p.
73 *
74 * For small arguments 0 < p < exp(-2), the program computes
75 * z = sqrt( -2 log(p) ); then the approximation is
76 * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z) .
77 * For larger arguments, x/sqrt(2 pi) = w + w^3 R(w^2)/S(w^2)) ,
78 * where w = p - 0.5 .
79 */
80 real normalDistributionInv(real p)
81 {
82 return tango.math.ErrorFunction.normalDistributionInvImpl(p);
83 }
84
85 /** ditto */
86 real normalDistributionComplInv(real p)
87 {
88 return -tango.math.ErrorFunction.normalDistributionInvImpl(-p);
89 }
90
91 debug(UnitTest) {
92 unittest {
93 assert(feqrel(normalDistributionInv(normalDistribution(0.1)),0.1)>=real.mant_dig-4);
94 assert(feqrel(normalDistributionComplInv(normalDistributionCompl(0.1)),0.1)>=real.mant_dig-4);
95 }
96 }
97
98 /** Student's t cumulative distribution function
99 *
100 * Computes the integral from minus infinity to t of the Student
101 * t distribution with integer nu > 0 degrees of freedom:
102 *
103 * $(GAMMA)( (nu+1)/2) / ( sqrt(nu &pi;) $(GAMMA)(nu/2) ) *
104 * $(INTEGRATE -&infin;, t) $(POWER (1+$(POWER x, 2)/nu), -(nu+1)/2) dx
105 *
106 * Can be used to test whether the means of two normally distributed populations
107 * are equal.
108 *
109 * It is related to the incomplete beta integral:
110 * 1 - studentsDistribution(nu,t) = 0.5 * betaDistribution( nu/2, 1/2, z )
111 * where
112 * z = nu/(nu + t<sup>2</sup>).
113 *
114 * For t < -1.6, this is the method of computation. For higher t,
115 * a direct method is derived from integration by parts.
116 * Since the function is symmetric about t=0, the area under the
117 * right tail of the density is found by calling the function
118 * with -t instead of t.
119 */
120 real studentsTDistribution(int nu, real t)
121 in{
122 assert(nu>0);
123 }
124 body{
125 /* Based on code from Cephes Math Library Release 2.3: January, 1995
126 Copyright 1984, 1995 by Stephen L. Moshier
127 */
128
129 if ( nu <= 0 ) return NaN(TANGO_NAN.STUDENTSDDISTRIBUTION_DOMAIN); // domain error -- or should it return 0?
130 if ( t == 0.0 ) return 0.5;
131
132 real rk, z, p;
133
134 if ( t < -1.6 ) {
135 rk = nu;
136 z = rk / (rk + t * t);
137 return 0.5L * betaIncomplete( 0.5L*rk, 0.5L, z );
138 }
139
140 /* compute integral from -t to + t */
141
142 rk = nu; /* degrees of freedom */
143
144 real x;
145 if (t < 0) x = -t; else x = t;
146 z = 1.0L + ( x * x )/rk;
147
148 real f, tz;
149 int j;
150
151 if ( nu & 1) {
152 /* computation for odd nu */
153 real xsqk = x/sqrt(rk);
154 p = atan( xsqk );
155 if ( nu > 1 ) {
156 f = 1.0L;
157 tz = 1.0L;
158 j = 3;
159 while( (j<=(nu-2)) && ( (tz/f) > real.epsilon ) ) {
160 tz *= (j-1)/( z * j );
161 f += tz;
162 j += 2;
163 }
164 p += f * xsqk/z;
165 }
166 p *= 2.0L/PI;
167 } else {
168 /* computation for even nu */
169 f = 1.0L;
170 tz = 1.0L;
171 j = 2;
172
173 while ( ( j <= (nu-2) ) && ( (tz/f) > real.epsilon ) ) {
174 tz *= (j - 1)/( z * j );
175 f += tz;
176 j += 2;
177 }
178 p = f * x/sqrt(z*rk);
179 }
180 if ( t < 0.0L )
181 p = -p; /* note destruction of relative accuracy */
182
183 p = 0.5L + 0.5L * p;
184 return p;
185 }
186
187 /** Inverse of Student's t distribution
188 *
189 * Given probability p and degrees of freedom nu,
190 * finds the argument t such that the one-sided
191 * studentsDistribution(nu,t) is equal to p.
192 *
193 * Params:
194 * nu = degrees of freedom. Must be >1
195 * p = probability. 0 < p < 1
196 */
197 real studentsTDistributionInv(int nu, real p )
198 in {
199 assert(nu>0);
200 assert(p>=0.0L && p<=1.0L);
201 }
202 body
203 {
204 if (p==0) return -real.infinity;
205 if (p==1) return real.infinity;
206
207 real rk, z;
208 rk = nu;
209
210 if ( p > 0.25L && p < 0.75L ) {
211 if ( p == 0.5L ) return 0;
212 z = 1.0L - 2.0L * p;
213 z = betaIncompleteInv( 0.5L, 0.5L*rk, fabs(z) );
214 real t = sqrt( rk*z/(1.0L-z) );
215 if( p < 0.5L )
216 t = -t;
217 return t;
218 }
219 int rflg = -1; // sign of the result
220 if (p >= 0.5L) {
221 p = 1.0L - p;
222 rflg = 1;
223 }
224 z = betaIncompleteInv( 0.5L*rk, 0.5L, 2.0L*p );
225
226 if (z<0) return rflg * real.infinity;
227 return rflg * sqrt( rk/z - rk );
228 }
229
230 debug(UnitTest) {
231 unittest {
232
233 // There are simple forms for nu = 1 and nu = 2.
234
235 // if (nu == 1), tDistribution(x) = 0.5 + atan(x)/PI
236 // so tDistributionInv(p) = tan( PI * (p-0.5) );
237 // nu==2: tDistribution(x) = 0.5 * (1 + x/ sqrt(2+x*x) )
238
239 assert(studentsTDistribution(1, -0.4)== 0.5 + atan(-0.4)/PI);
240 assert(studentsTDistribution(2, 0.9) == 0.5L * (1 + 0.9L/sqrt(2.0L + 0.9*0.9)) );
241 assert(studentsTDistribution(2, -5.4) == 0.5L * (1 - 5.4L/sqrt(2.0L + 5.4*5.4)) );
242
243 // return true if a==b to given number of places.
244 bool isfeqabs(real a, real b, real diff)
245 {
246 return fabs(a-b) < diff;
247 }
248
249 // Check a few spot values with statsoft.com (Mathworld values are wrong!!)
250 // According to statsoft.com, studentsDistributionInv(10, 0.995)= 3.16927.
251
252 // The remaining values listed here are from Excel, and are unlikely to be accurate
253 // in the last decimal places. However, they are helpful as a sanity check.
254
255 // Microsoft Excel 2003 gives TINV(2*(1-0.995), 10) == 3.16927267160917
256 assert(isfeqabs(studentsTDistributionInv(10, 0.995), 3.169_272_67L, 0.000_000_005L));
257
258
259 assert(isfeqabs(studentsTDistributionInv(8, 0.6), 0.261_921_096_769_043L,0.000_000_000_05L));
260 // -TINV(2*0.4, 18) == -0.257123042655869
261
262 assert(isfeqabs(studentsTDistributionInv(18, 0.4), -0.257_123_042_655_869L, 0.000_000_000_05L));
263 assert( feqrel(studentsTDistribution(18, studentsTDistributionInv(18, 0.4L)),0.4L)
264 > real.mant_dig-2 );
265 assert( feqrel(studentsTDistribution(11, studentsTDistributionInv(11, 0.9L)),0.9L)
266 > real.mant_dig-2);
267 }
268 }
269
270 /** The F distribution, its complement, and inverse.
271 *
272 * The F density function (also known as Snedcor's density or the
273 * variance ratio density) is the density
274 * of x = (u1/df1)/(u2/df2), where u1 and u2 are random
275 * variables having $(POWER &chi;,2) distributions with df1
276 * and df2 degrees of freedom, respectively.
277 *
278 * fDistribution returns the area from zero to x under the F density
279 * function. The complementary function,
280 * fDistributionCompl, returns the area from x to &infin; under the F density function.
281 *
282 * The inverse of the complemented F distribution,
283 * fDistributionComplInv, finds the argument x such that the integral
284 * from x to infinity of the F density is equal to the given probability y.
285 *
286 * Can be used to test whether the means of multiple normally distributed
287 * populations, all with the same standard deviation, are equal;
288 * or to test that the standard deviations of two normally distributed
289 * populations are equal.
290 *
291 * Params:
292 * df1 = Degrees of freedom of the first variable. Must be >= 1
293 * df2 = Degrees of freedom of the second variable. Must be >= 1
294 * x = Must be >= 0
295 */
296 real fDistribution(int df1, int df2, real x)
297 in {
298 assert(df1>=1 && df2>=1);
299 assert(x>=0);
300 }
301 body{
302 real a = cast(real)(df1);
303 real b = cast(real)(df2);
304 real w = a * x;
305 w = w/(b + w);
306 return betaIncomplete(0.5L*a, 0.5L*b, w);
307 }
308
309 /** ditto */
310 real fDistributionCompl(int df1, int df2, real x)
311 in {
312 assert(df1>=1 && df2>=1);
313 assert(x>=0);
314 }
315 body{
316 real a = cast(real)(df1);
317 real b = cast(real)(df2);
318 real w = b / (b + a * x);
319 return betaIncomplete( 0.5L*b, 0.5L*a, w );
320 }
321
322 /*
323 * Inverse of complemented F distribution
324 *
325 * Finds the F density argument x such that the integral
326 * from x to infinity of the F density is equal to the
327 * given probability p.
328 *
329 * This is accomplished using the inverse beta integral
330 * function and the relations
331 *
332 * z = betaIncompleteInv( df2/2, df1/2, p ),
333 * x = df2 (1-z) / (df1 z).
334 *
335 * Note that the following relations hold for the inverse of
336 * the uncomplemented F distribution:
337 *
338 * z = betaIncompleteInv( df1/2, df2/2, p ),
339 * x = df2 z / (df1 (1-z)).
340 */
341
342 /** ditto */
343 real fDistributionComplInv(int df1, int df2, real p )
344 in {
345 assert(df1>=1 && df2>=1);
346 assert(p>=0 && p<=1.0);
347 }
348 body{
349 real a = df1;
350 real b = df2;
351 /* Compute probability for x = 0.5. */
352 real w = betaIncomplete( 0.5L*b, 0.5L*a, 0.5L );
353 /* If that is greater than p, then the solution w < .5.
354 Otherwise, solve at 1-p to remove cancellation in (b - b*w). */
355 if ( w > p || p < 0.001L) {
356 w = betaIncompleteInv( 0.5L*b, 0.5L*a, p );
357 return (b - b*w)/(a*w);
358 } else {
359 w = betaIncompleteInv( 0.5L*a, 0.5L*b, 1.0L - p );
360 return b*w/(a*(1.0L-w));
361 }
362 }
363
364 debug(UnitTest) {
365 unittest {
366 // fDistCompl(df1, df2, x) = Excel's FDIST(x, df1, df2)
367 assert(fabs(fDistributionCompl(6, 4, 16.5) - 0.00858719177897249L)< 0.0000000000005L);
368 assert(fabs((1-fDistribution(12, 23, 0.1)) - 0.99990562845505L)< 0.0000000000005L);
369 assert(fabs(fDistributionComplInv(8, 34, 0.2) - 1.48267037661408L)< 0.0000000005L);
370 assert(fabs(fDistributionComplInv(4, 16, 0.008) - 5.043_537_593_48596L)< 0.0000000005L);
371 // Regression test: This one used to fail because of a bug in the definition of MINLOG.
372 assert(feqrel(fDistributionCompl(4, 16, fDistributionComplInv(4,16, 0.008)), 0.008)>=real.mant_dig-3);
373 }
374 }
375
376 /** $(POWER &chi;,2) cumulative distribution function and its complement.
377 *
378 * Returns the area under the left hand tail (from 0 to x)
379 * of the Chi square probability density function with
380 * v degrees of freedom. The complement returns the area under
381 * the right hand tail (from x to &infin;).
382 *
383 * chiSqrDistribution(x | v) = ($(INTEGRATE 0, x)
384 * $(POWER t, v/2-1) $(POWER e, -t/2) dt )
385 * / $(POWER 2, v/2) $(GAMMA)(v/2)
386 *
387 * chiSqrDistributionCompl(x | v) = ($(INTEGRATE x, &infin;)
388 * $(POWER t, v/2-1) $(POWER e, -t/2) dt )
389 * / $(POWER 2, v/2) $(GAMMA)(v/2)
390 *
391 * Params:
392 * v = degrees of freedom. Must be positive.
393 * x = the $(POWER &chi;,2) variable. Must be positive.
394 *
395 */
396 real chiSqrDistribution(real v, real x)
397 in {
398 assert(x>=0);
399 assert(v>=1.0);
400 }
401 body{
402 return gammaIncomplete( 0.5*v, 0.5*x);
403 }
404
405 /** ditto */
406 real chiSqrDistributionCompl(real v, real x)
407 in {
408 assert(x>=0);
409 assert(v>=1.0);
410 }
411 body{
412 return gammaIncompleteCompl( 0.5L*v, 0.5L*x );
413 }
414
415 /**
416 * Inverse of complemented $(POWER &chi;, 2) distribution
417 *
418 * Finds the $(POWER &chi;, 2) argument x such that the integral
419 * from x to &infin; of the $(POWER &chi;, 2) density is equal
420 * to the given cumulative probability p.
421 *
422 * Params:
423 * p = Cumulative probability. 0<= p <=1.
424 * v = Degrees of freedom. Must be positive.
425 *
426 */
427 real chiSqrDistributionComplInv(real v, real p)
428 in {
429 assert(p>=0 && p<=1.0L);
430 assert(v>=1.0L);
431 }
432 body
433 {
434 return 2.0 * gammaIncompleteComplInv( 0.5*v, p);
435 }
436
437 debug(UnitTest) {
438 unittest {
439 assert(feqrel(chiSqrDistributionCompl(3.5L, chiSqrDistributionComplInv(3.5L, 0.1L)), 0.1L)>=real.mant_dig-3);
440 assert(chiSqrDistribution(19.02L, 0.4L) + chiSqrDistributionCompl(19.02L, 0.4L) ==1.0L);
441 }
442 }
443
444 /**
445 * The &Gamma; distribution and its complement
446 *
447 * The &Gamma; distribution is defined as the integral from 0 to x of the
448 * gamma probability density function. The complementary function returns the
449 * integral from x to &infin;
450 *
451 * gammaDistribution = ($(INTEGRATE 0, x) $(POWER t, b-1)$(POWER e, -at) dt) $(POWER a, b)/&Gamma;(b)
452 *
453 * x must be greater than 0.
454 */
455 real gammaDistribution(real a, real b, real x)
456 in {
457 assert(x>=0);
458 }
459 body {
460 return gammaIncomplete(b, a*x);
461 }
462
463 /** ditto */
464 real gammaDistributionCompl(real a, real b, real x )
465 in {
466 assert(x>=0);
467 }
468 body {
469 return gammaIncompleteCompl( b, a * x );
470 }
471
472 debug(UnitTest) {
473 unittest {
474 assert(gammaDistribution(7,3,0.18)+gammaDistributionCompl(7,3,0.18)==1);
475 }
476 }
477
478 /**********************
479 * Beta distribution and its inverse
480 *
481 * Returns the incomplete beta integral of the arguments, evaluated
482 * from zero to x. The function is defined as
483 *
484 * betaDistribution = &Gamma;(a+b)/(&Gamma;(a) &Gamma;(b)) *
485 * $(INTEGRATE 0, x) $(POWER t, a-1)$(POWER (1-t),b-1) dt
486 *
487 * The domain of definition is 0 <= x <= 1. In this
488 * implementation a and b are restricted to positive values.
489 * The integral from x to 1 may be obtained by the symmetry
490 * relation
491 *
492 * betaDistributionCompl(a, b, x ) = betaDistribution( b, a, 1-x )
493 *
494 * The integral is evaluated by a continued fraction expansion
495 * or, when b*x is small, by a power series.
496 *
497 * The inverse finds the value of x for which betaDistribution(a,b,x) - y = 0
498 */
499 real betaDistribution(real a, real b, real x )
500 {
501 return betaIncomplete(a, b, x );
502 }
503
504 /** ditto */
505 real betaDistributionCompl(real a, real b, real x)
506 {
507 return betaIncomplete(b, a, 1-x);
508 }
509
510 /** ditto */
511 real betaDistributionInv(real a, real b, real y)
512 {
513 return betaIncompleteInv(a, b, y);
514 }
515
516 /** ditto */
517 real betaDistributionComplInv(real a, real b, real y)
518 {
519 return 1-betaIncompleteInv(b, a, y);
520 }
521
522 debug(UnitTest) {
523 unittest {
524 assert(feqrel(betaDistributionInv(2, 6, betaDistribution(2,6, 0.7L)),0.7L)>=real.mant_dig-3);
525 assert(feqrel(betaDistributionComplInv(1.3, 8, betaDistributionCompl(1.3,8, 0.01L)),0.01L)>=real.mant_dig-4);
526 }
527 }
528
529 /**
530 * The Poisson distribution, its complement, and inverse
531 *
532 * k is the number of events. m is the mean.
533 * The Poisson distribution is defined as the sum of the first k terms of
534 * the Poisson density function.
535 * The complement returns the sum of the terms k+1 to &infin;.
536 *
537 * poissonDistribution = $(BIGSUM j=0, k) $(POWER e, -m) $(POWER m, j)/j!
538 *
539 * poissonDistributionCompl = $(BIGSUM j=k+1, &infin;) $(POWER e, -m) $(POWER m, j)/j!
540 *
541 * The terms are not summed directly; instead the incomplete
542 * gamma integral is employed, according to the relation
543 *
544 * y = poissonDistribution( k, m ) = gammaIncompleteCompl( k+1, m ).
545 *
546 * The arguments must both be positive.
547 */
548 real poissonDistribution(int k, real m )
549 in {
550 assert(k>=0);
551 assert(m>0);
552 }
553 body {
554 return gammaIncompleteCompl( k+1.0, m );
555 }
556
557 /** ditto */
558 real poissonDistributionCompl(int k, real m )
559 in {
560 assert(k>=0);
561 assert(m>0);
562 }
563 body {
564 return gammaIncomplete( k+1.0, m );
565 }
566
567 /** ditto */
568 real poissonDistributionInv( int k, real p )
569 in {
570 assert(k>=0);
571 assert(p>=0.0 && p<=1.0);
572 }
573 body {
574 return gammaIncompleteComplInv(k+1, p);
575 }
576
577 debug(UnitTest) {
578 unittest {
579 // = Excel's POISSON(k, m, TRUE)
580 assert( fabs(poissonDistribution(5, 6.3)
581 - 0.398771730072867L) < 0.000000000000005L);
582 assert( feqrel(poissonDistributionInv(8, poissonDistribution(8, 2.7e3L)), 2.7e3L)>=real.mant_dig-2);
583 assert( poissonDistribution(2, 8.4e-5) + poissonDistributionCompl(2, 8.4e-5) == 1.0L);
584 }
585 }
586
587 /***********************************
588 * Binomial distribution and complemented binomial distribution
589 *
590 * The binomial distribution is defined as the sum of the terms 0 through k
591 * of the Binomial probability density.
592 * The complement returns the sum of the terms k+1 through n.
593 *
594 binomialDistribution = $(BIGSUM j=0, k) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j)
595
596 binomialDistributionCompl = $(BIGSUM j=k+1, n) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j)
597 *
598 * The terms are not summed directly; instead the incomplete
599 * beta integral is employed, according to the formula
600 *
601 * y = binomialDistribution( k, n, p ) = betaDistribution( n-k, k+1, 1-p ).
602 *
603 * The arguments must be positive, with p ranging from 0 to 1, and k<=n.
604 */
605 real binomialDistribution(int k, int n, real p )
606 in {
607 assert(p>=0 && p<=1.0); // domain error
608 assert(k>=0 && k<=n);
609 }
610 body{
611 real dk, dn, q;
612 if( k == n )
613 return 1.0L;
614
615 q = 1.0L - p;
616 dn = n - k;
617 if ( k == 0 ) {
618 return pow( q, dn );
619 } else {
620 return betaIncomplete( dn, k + 1, q );
621 }
622 }
623
624 debug(UnitTest) {
625 unittest {
626 // = Excel's BINOMDIST(k, n, p, TRUE)
627 assert( fabs(binomialDistribution(8, 12, 0.5)
628 - 0.927001953125L) < 0.0000000000005L);
629 assert( fabs(binomialDistribution(0, 3, 0.008L)
630 - 0.976191488L) < 0.00000000005L);
631 assert(binomialDistribution(7,7, 0.3)==1.0);
632 }
633 }
634
635 /** ditto */
636 real binomialDistributionCompl(int k, int n, real p )
637 in {
638 assert(p>=0 && p<=1.0); // domain error
639 assert(k>=0 && k<=n);
640 }
641 body{
642 if ( k == n ) {
643 return 0;
644 }
645 real dn = n - k;
646 if ( k == 0 ) {
647 if ( p < .01L )
648 return -expm1( dn * log1p(-p) );
649 else
650 return 1.0L - pow( 1.0L-p, dn );
651 } else {
652 return betaIncomplete( k+1, dn, p );
653 }
654 }
655
656 debug(UnitTest){
657 unittest {
658 // = Excel's (1 - BINOMDIST(k, n, p, TRUE))
659 assert( fabs(1.0L-binomialDistributionCompl(0, 15, 0.003)
660 - 0.955932824838906L) < 0.0000000000000005L);
661 assert( fabs(1.0L-binomialDistributionCompl(0, 25, 0.2)
662 - 0.00377789318629572L) < 0.000000000000000005L);
663 assert( fabs(1.0L-binomialDistributionCompl(8, 12, 0.5)
664 - 0.927001953125L) < 0.00000000000005L);
665 assert(binomialDistributionCompl(7,7, 0.3)==0.0);
666 }
667 }
668
669 /** Inverse binomial distribution
670 *
671 * Finds the event probability p such that the sum of the
672 * terms 0 through k of the Binomial probability density
673 * is equal to the given cumulative probability y.
674 *
675 * This is accomplished using the inverse beta integral
676 * function and the relation
677 *
678 * 1 - p = betaDistributionInv( n-k, k+1, y ).
679 *
680 * The arguments must be positive, with 0 <= y <= 1, and k <= n.
681 */
682 real binomialDistributionInv( int k, int n, real y )
683 in {
684 assert(y>=0 && y<=1.0); // domain error
685 assert(k>=0 && k<=n);
686 }
687 body{
688 real dk, p;
689 real dn = n - k;
690 if ( k == 0 ) {
691 if( y > 0.8L )
692 p = -expm1( log1p(y-1.0L) / dn );
693 else
694 p = 1.0L - pow( y, 1.0L/dn );
695 } else {
696 dk = k + 1;
697 p = betaIncomplete( dn, dk, y );
698 if( p > 0.5 )
699 p = betaIncompleteInv( dk, dn, 1.0L-y );
700 else
701 p = 1.0 - betaIncompleteInv( dn, dk, y );
702 }
703 return p;
704 }
705
706 debug(UnitTest){
707 unittest {
708 real w = binomialDistribution(9, 15, 0.318L);
709 assert(feqrel(binomialDistributionInv(9, 15, w), 0.318L)>=real.mant_dig-3);
710 w = binomialDistribution(5, 35, 0.718L);
711 assert(feqrel(binomialDistributionInv(5, 35, w), 0.718L)>=real.mant_dig-3);
712 w = binomialDistribution(0, 24, 0.637L);
713 assert(feqrel(binomialDistributionInv(0, 24, w), 0.637L)>=real.mant_dig-3);
714 w = binomialDistributionInv(0, 59, 0.962L);
715 assert(feqrel(binomialDistribution(0, 59, w), 0.962L)>=real.mant_dig-5);
716 }
717 }
718
719 /** Negative binomial distribution and its inverse
720 *
721 * Returns the sum of the terms 0 through k of the negative
722 * binomial distribution:
723 *
724 * $(BIGSUM j=0, k) $(CHOOSE n+j-1, j-1) $(POWER p, n) $(POWER (1-p), j)
725 *
726 * In a sequence of Bernoulli trials, this is the probability
727 * that k or fewer failures precede the n-th success.
728 *
729 * The arguments must be positive, with 0 < p < 1 and r>0.
730 *
731 * The inverse finds the argument y such
732 * that negativeBinomialDistribution(k,n,y) is equal to p.
733 *
734 * The Geometric Distribution is a special case of the negative binomial
735 * distribution.
736 * -----------------------
737 * geometricDistribution(k, p) = negativeBinomialDistribution(k, 1, p);
738 * -----------------------
739 * References:
740 * $(LINK http://mathworld.wolfram.com/NegativeBinomialDistribution.html)
741 */
742
743 real negativeBinomialDistribution(int k, int n, real p )
744 in {
745 assert(p>=0 && p<=1.0); // domain error
746 assert(k>=0);
747 }
748 body{
749 if ( k == 0 ) return pow( p, n );
750 return betaIncomplete( n, k + 1, p );
751 }
752
753 /** ditto */
754 real negativeBinomialDistributionInv(int k, int n, real p )
755 in {
756 assert(p>=0 && p<=1.0); // domain error
757 assert(k>=0);
758 }
759 body{
760 return betaIncompleteInv(n, k + 1, p);
761 }
762
763 debug(UnitTest) {
764 unittest {
765 // Value obtained by sum of terms of MS Excel 2003's NEGBINOMDIST.
766 assert( fabs(negativeBinomialDistribution(10, 20, 0.2) - 3.830_52E-08)< 0.000_005e-08);
767 assert(feqrel(negativeBinomialDistributionInv(14, 208, negativeBinomialDistribution(14, 208, 1e-4L)), 1e-4L)>=real.mant_dig-3);
768 }
769 }