Mercurial > projects > ldc
comparison tango/tango/math/Probability.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!!
Initial commit after moving to Tango instead of Phobos.
Lots of bugfixes...
This build is not suitable for most things.
author | lindquist |
---|---|
date | Fri, 11 Jan 2008 17:57:40 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
131:5825d48b27d1 | 132:1700239cab2e |
---|---|
1 /** | |
2 * Cumulative Probability Distribution Functions | |
3 * | |
4 * Copyright: Based on the CEPHES math library, which is | |
5 * Copyright (C) 1994 Stephen L. Moshier (moshier@world.std.com). | |
6 * License: BSD style: $(LICENSE) | |
7 * Authors: Stephen L. Moshier (original C code), Don Clugston | |
8 */ | |
9 | |
10 /** | |
11 * Macros: | |
12 * NAN = $(RED NAN) | |
13 * SUP = <span style="vertical-align:super;font-size:smaller">$0</span> | |
14 * GAMMA = Γ | |
15 * INTEGRAL = ∫ | |
16 * INTEGRATE = $(BIG ∫<sub>$(SMALL $1)</sub><sup>$2</sup>) | |
17 * POWER = $1<sup>$2</sup> | |
18 * BIGSUM = $(BIG Σ <sup>$2</sup><sub>$(SMALL $1)</sub>) | |
19 * CHOOSE = $(BIG () <sup>$(SMALL $1)</sup><sub>$(SMALL $2)</sub> $(BIG )) | |
20 * TABLE_SV = <table border=1 cellpadding=4 cellspacing=0> | |
21 * <caption>Special Values</caption> | |
22 * $0</table> | |
23 * SVH = $(TR $(TH $1) $(TH $2)) | |
24 * SV = $(TR $(TD $1) $(TD $2)) | |
25 */ | |
26 | |
27 module tango.math.Probability; | |
28 static import tango.math.ErrorFunction; | |
29 private import tango.math.GammaFunction; | |
30 private import tango.math.Math; | |
31 private import tango.math.IEEE; | |
32 | |
33 | |
34 /*** | |
35 Cumulative distribution function for the Normal distribution, and its complement. | |
36 | |
37 The normal (or Gaussian, or bell-shaped) distribution is | |
38 defined as: | |
39 | |
40 normalDist(x) = 1/$(SQRT) π $(INTEGRAL -$(INFINITY), x) exp( - $(POWER t, 2)/2) dt | |
41 = 0.5 + 0.5 * erf(x/sqrt(2)) | |
42 = 0.5 * erfc(- x/sqrt(2)) | |
43 | |
44 Note that | |
45 normalDistribution(x) = 1 - normalDistribution(-x). | |
46 | |
47 Accuracy: | |
48 Within a few bits of machine resolution over the entire | |
49 range. | |
50 | |
51 References: | |
52 $(LINK http://www.netlib.org/cephes/ldoubdoc.html), | |
53 G. Marsaglia, "Evaluating the Normal Distribution", | |
54 Journal of Statistical Software <b>11</b>, (July 2004). | |
55 */ | |
56 real normalDistribution(real a) | |
57 { | |
58 return tango.math.ErrorFunction.normalDistributionImpl(a); | |
59 } | |
60 | |
61 /** ditto */ | |
62 real normalDistributionCompl(real a) | |
63 { | |
64 return -tango.math.ErrorFunction.normalDistributionImpl(-a); | |
65 } | |
66 | |
67 /****************************** | |
68 * Inverse of Normal distribution function | |
69 * | |
70 * Returns the argument, x, for which the area under the | |
71 * Normal probability density function (integrated from | |
72 * minus infinity to x) is equal to p. | |
73 * | |
74 * For small arguments 0 < p < exp(-2), the program computes | |
75 * z = sqrt( -2 log(p) ); then the approximation is | |
76 * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z) . | |
77 * For larger arguments, x/sqrt(2 pi) = w + w^3 R(w^2)/S(w^2)) , | |
78 * where w = p - 0.5 . | |
79 */ | |
80 real normalDistributionInv(real p) | |
81 { | |
82 return tango.math.ErrorFunction.normalDistributionInvImpl(p); | |
83 } | |
84 | |
85 /** ditto */ | |
86 real normalDistributionComplInv(real p) | |
87 { | |
88 return -tango.math.ErrorFunction.normalDistributionInvImpl(-p); | |
89 } | |
90 | |
91 debug(UnitTest) { | |
92 unittest { | |
93 assert(feqrel(normalDistributionInv(normalDistribution(0.1)),0.1)>=real.mant_dig-4); | |
94 assert(feqrel(normalDistributionComplInv(normalDistributionCompl(0.1)),0.1)>=real.mant_dig-4); | |
95 } | |
96 } | |
97 | |
98 /** Student's t cumulative distribution function | |
99 * | |
100 * Computes the integral from minus infinity to t of the Student | |
101 * t distribution with integer nu > 0 degrees of freedom: | |
102 * | |
103 * $(GAMMA)( (nu+1)/2) / ( sqrt(nu π) $(GAMMA)(nu/2) ) * | |
104 * $(INTEGRATE -∞, t) $(POWER (1+$(POWER x, 2)/nu), -(nu+1)/2) dx | |
105 * | |
106 * Can be used to test whether the means of two normally distributed populations | |
107 * are equal. | |
108 * | |
109 * It is related to the incomplete beta integral: | |
110 * 1 - studentsDistribution(nu,t) = 0.5 * betaDistribution( nu/2, 1/2, z ) | |
111 * where | |
112 * z = nu/(nu + t<sup>2</sup>). | |
113 * | |
114 * For t < -1.6, this is the method of computation. For higher t, | |
115 * a direct method is derived from integration by parts. | |
116 * Since the function is symmetric about t=0, the area under the | |
117 * right tail of the density is found by calling the function | |
118 * with -t instead of t. | |
119 */ | |
120 real studentsTDistribution(int nu, real t) | |
121 in{ | |
122 assert(nu>0); | |
123 } | |
124 body{ | |
125 /* Based on code from Cephes Math Library Release 2.3: January, 1995 | |
126 Copyright 1984, 1995 by Stephen L. Moshier | |
127 */ | |
128 | |
129 if ( nu <= 0 ) return NaN(TANGO_NAN.STUDENTSDDISTRIBUTION_DOMAIN); // domain error -- or should it return 0? | |
130 if ( t == 0.0 ) return 0.5; | |
131 | |
132 real rk, z, p; | |
133 | |
134 if ( t < -1.6 ) { | |
135 rk = nu; | |
136 z = rk / (rk + t * t); | |
137 return 0.5L * betaIncomplete( 0.5L*rk, 0.5L, z ); | |
138 } | |
139 | |
140 /* compute integral from -t to + t */ | |
141 | |
142 rk = nu; /* degrees of freedom */ | |
143 | |
144 real x; | |
145 if (t < 0) x = -t; else x = t; | |
146 z = 1.0L + ( x * x )/rk; | |
147 | |
148 real f, tz; | |
149 int j; | |
150 | |
151 if ( nu & 1) { | |
152 /* computation for odd nu */ | |
153 real xsqk = x/sqrt(rk); | |
154 p = atan( xsqk ); | |
155 if ( nu > 1 ) { | |
156 f = 1.0L; | |
157 tz = 1.0L; | |
158 j = 3; | |
159 while( (j<=(nu-2)) && ( (tz/f) > real.epsilon ) ) { | |
160 tz *= (j-1)/( z * j ); | |
161 f += tz; | |
162 j += 2; | |
163 } | |
164 p += f * xsqk/z; | |
165 } | |
166 p *= 2.0L/PI; | |
167 } else { | |
168 /* computation for even nu */ | |
169 f = 1.0L; | |
170 tz = 1.0L; | |
171 j = 2; | |
172 | |
173 while ( ( j <= (nu-2) ) && ( (tz/f) > real.epsilon ) ) { | |
174 tz *= (j - 1)/( z * j ); | |
175 f += tz; | |
176 j += 2; | |
177 } | |
178 p = f * x/sqrt(z*rk); | |
179 } | |
180 if ( t < 0.0L ) | |
181 p = -p; /* note destruction of relative accuracy */ | |
182 | |
183 p = 0.5L + 0.5L * p; | |
184 return p; | |
185 } | |
186 | |
187 /** Inverse of Student's t distribution | |
188 * | |
189 * Given probability p and degrees of freedom nu, | |
190 * finds the argument t such that the one-sided | |
191 * studentsDistribution(nu,t) is equal to p. | |
192 * | |
193 * Params: | |
194 * nu = degrees of freedom. Must be >1 | |
195 * p = probability. 0 < p < 1 | |
196 */ | |
197 real studentsTDistributionInv(int nu, real p ) | |
198 in { | |
199 assert(nu>0); | |
200 assert(p>=0.0L && p<=1.0L); | |
201 } | |
202 body | |
203 { | |
204 if (p==0) return -real.infinity; | |
205 if (p==1) return real.infinity; | |
206 | |
207 real rk, z; | |
208 rk = nu; | |
209 | |
210 if ( p > 0.25L && p < 0.75L ) { | |
211 if ( p == 0.5L ) return 0; | |
212 z = 1.0L - 2.0L * p; | |
213 z = betaIncompleteInv( 0.5L, 0.5L*rk, fabs(z) ); | |
214 real t = sqrt( rk*z/(1.0L-z) ); | |
215 if( p < 0.5L ) | |
216 t = -t; | |
217 return t; | |
218 } | |
219 int rflg = -1; // sign of the result | |
220 if (p >= 0.5L) { | |
221 p = 1.0L - p; | |
222 rflg = 1; | |
223 } | |
224 z = betaIncompleteInv( 0.5L*rk, 0.5L, 2.0L*p ); | |
225 | |
226 if (z<0) return rflg * real.infinity; | |
227 return rflg * sqrt( rk/z - rk ); | |
228 } | |
229 | |
230 debug(UnitTest) { | |
231 unittest { | |
232 | |
233 // There are simple forms for nu = 1 and nu = 2. | |
234 | |
235 // if (nu == 1), tDistribution(x) = 0.5 + atan(x)/PI | |
236 // so tDistributionInv(p) = tan( PI * (p-0.5) ); | |
237 // nu==2: tDistribution(x) = 0.5 * (1 + x/ sqrt(2+x*x) ) | |
238 | |
239 assert(studentsTDistribution(1, -0.4)== 0.5 + atan(-0.4)/PI); | |
240 assert(studentsTDistribution(2, 0.9) == 0.5L * (1 + 0.9L/sqrt(2.0L + 0.9*0.9)) ); | |
241 assert(studentsTDistribution(2, -5.4) == 0.5L * (1 - 5.4L/sqrt(2.0L + 5.4*5.4)) ); | |
242 | |
243 // return true if a==b to given number of places. | |
244 bool isfeqabs(real a, real b, real diff) | |
245 { | |
246 return fabs(a-b) < diff; | |
247 } | |
248 | |
249 // Check a few spot values with statsoft.com (Mathworld values are wrong!!) | |
250 // According to statsoft.com, studentsDistributionInv(10, 0.995)= 3.16927. | |
251 | |
252 // The remaining values listed here are from Excel, and are unlikely to be accurate | |
253 // in the last decimal places. However, they are helpful as a sanity check. | |
254 | |
255 // Microsoft Excel 2003 gives TINV(2*(1-0.995), 10) == 3.16927267160917 | |
256 assert(isfeqabs(studentsTDistributionInv(10, 0.995), 3.169_272_67L, 0.000_000_005L)); | |
257 | |
258 | |
259 assert(isfeqabs(studentsTDistributionInv(8, 0.6), 0.261_921_096_769_043L,0.000_000_000_05L)); | |
260 // -TINV(2*0.4, 18) == -0.257123042655869 | |
261 | |
262 assert(isfeqabs(studentsTDistributionInv(18, 0.4), -0.257_123_042_655_869L, 0.000_000_000_05L)); | |
263 assert( feqrel(studentsTDistribution(18, studentsTDistributionInv(18, 0.4L)),0.4L) | |
264 > real.mant_dig-2 ); | |
265 assert( feqrel(studentsTDistribution(11, studentsTDistributionInv(11, 0.9L)),0.9L) | |
266 > real.mant_dig-2); | |
267 } | |
268 } | |
269 | |
270 /** The F distribution, its complement, and inverse. | |
271 * | |
272 * The F density function (also known as Snedcor's density or the | |
273 * variance ratio density) is the density | |
274 * of x = (u1/df1)/(u2/df2), where u1 and u2 are random | |
275 * variables having $(POWER χ,2) distributions with df1 | |
276 * and df2 degrees of freedom, respectively. | |
277 * | |
278 * fDistribution returns the area from zero to x under the F density | |
279 * function. The complementary function, | |
280 * fDistributionCompl, returns the area from x to ∞ under the F density function. | |
281 * | |
282 * The inverse of the complemented F distribution, | |
283 * fDistributionComplInv, finds the argument x such that the integral | |
284 * from x to infinity of the F density is equal to the given probability y. | |
285 * | |
286 * Can be used to test whether the means of multiple normally distributed | |
287 * populations, all with the same standard deviation, are equal; | |
288 * or to test that the standard deviations of two normally distributed | |
289 * populations are equal. | |
290 * | |
291 * Params: | |
292 * df1 = Degrees of freedom of the first variable. Must be >= 1 | |
293 * df2 = Degrees of freedom of the second variable. Must be >= 1 | |
294 * x = Must be >= 0 | |
295 */ | |
296 real fDistribution(int df1, int df2, real x) | |
297 in { | |
298 assert(df1>=1 && df2>=1); | |
299 assert(x>=0); | |
300 } | |
301 body{ | |
302 real a = cast(real)(df1); | |
303 real b = cast(real)(df2); | |
304 real w = a * x; | |
305 w = w/(b + w); | |
306 return betaIncomplete(0.5L*a, 0.5L*b, w); | |
307 } | |
308 | |
309 /** ditto */ | |
310 real fDistributionCompl(int df1, int df2, real x) | |
311 in { | |
312 assert(df1>=1 && df2>=1); | |
313 assert(x>=0); | |
314 } | |
315 body{ | |
316 real a = cast(real)(df1); | |
317 real b = cast(real)(df2); | |
318 real w = b / (b + a * x); | |
319 return betaIncomplete( 0.5L*b, 0.5L*a, w ); | |
320 } | |
321 | |
322 /* | |
323 * Inverse of complemented F distribution | |
324 * | |
325 * Finds the F density argument x such that the integral | |
326 * from x to infinity of the F density is equal to the | |
327 * given probability p. | |
328 * | |
329 * This is accomplished using the inverse beta integral | |
330 * function and the relations | |
331 * | |
332 * z = betaIncompleteInv( df2/2, df1/2, p ), | |
333 * x = df2 (1-z) / (df1 z). | |
334 * | |
335 * Note that the following relations hold for the inverse of | |
336 * the uncomplemented F distribution: | |
337 * | |
338 * z = betaIncompleteInv( df1/2, df2/2, p ), | |
339 * x = df2 z / (df1 (1-z)). | |
340 */ | |
341 | |
342 /** ditto */ | |
343 real fDistributionComplInv(int df1, int df2, real p ) | |
344 in { | |
345 assert(df1>=1 && df2>=1); | |
346 assert(p>=0 && p<=1.0); | |
347 } | |
348 body{ | |
349 real a = df1; | |
350 real b = df2; | |
351 /* Compute probability for x = 0.5. */ | |
352 real w = betaIncomplete( 0.5L*b, 0.5L*a, 0.5L ); | |
353 /* If that is greater than p, then the solution w < .5. | |
354 Otherwise, solve at 1-p to remove cancellation in (b - b*w). */ | |
355 if ( w > p || p < 0.001L) { | |
356 w = betaIncompleteInv( 0.5L*b, 0.5L*a, p ); | |
357 return (b - b*w)/(a*w); | |
358 } else { | |
359 w = betaIncompleteInv( 0.5L*a, 0.5L*b, 1.0L - p ); | |
360 return b*w/(a*(1.0L-w)); | |
361 } | |
362 } | |
363 | |
364 debug(UnitTest) { | |
365 unittest { | |
366 // fDistCompl(df1, df2, x) = Excel's FDIST(x, df1, df2) | |
367 assert(fabs(fDistributionCompl(6, 4, 16.5) - 0.00858719177897249L)< 0.0000000000005L); | |
368 assert(fabs((1-fDistribution(12, 23, 0.1)) - 0.99990562845505L)< 0.0000000000005L); | |
369 assert(fabs(fDistributionComplInv(8, 34, 0.2) - 1.48267037661408L)< 0.0000000005L); | |
370 assert(fabs(fDistributionComplInv(4, 16, 0.008) - 5.043_537_593_48596L)< 0.0000000005L); | |
371 // Regression test: This one used to fail because of a bug in the definition of MINLOG. | |
372 assert(feqrel(fDistributionCompl(4, 16, fDistributionComplInv(4,16, 0.008)), 0.008)>=real.mant_dig-3); | |
373 } | |
374 } | |
375 | |
376 /** $(POWER χ,2) cumulative distribution function and its complement. | |
377 * | |
378 * Returns the area under the left hand tail (from 0 to x) | |
379 * of the Chi square probability density function with | |
380 * v degrees of freedom. The complement returns the area under | |
381 * the right hand tail (from x to ∞). | |
382 * | |
383 * chiSqrDistribution(x | v) = ($(INTEGRATE 0, x) | |
384 * $(POWER t, v/2-1) $(POWER e, -t/2) dt ) | |
385 * / $(POWER 2, v/2) $(GAMMA)(v/2) | |
386 * | |
387 * chiSqrDistributionCompl(x | v) = ($(INTEGRATE x, ∞) | |
388 * $(POWER t, v/2-1) $(POWER e, -t/2) dt ) | |
389 * / $(POWER 2, v/2) $(GAMMA)(v/2) | |
390 * | |
391 * Params: | |
392 * v = degrees of freedom. Must be positive. | |
393 * x = the $(POWER χ,2) variable. Must be positive. | |
394 * | |
395 */ | |
396 real chiSqrDistribution(real v, real x) | |
397 in { | |
398 assert(x>=0); | |
399 assert(v>=1.0); | |
400 } | |
401 body{ | |
402 return gammaIncomplete( 0.5*v, 0.5*x); | |
403 } | |
404 | |
405 /** ditto */ | |
406 real chiSqrDistributionCompl(real v, real x) | |
407 in { | |
408 assert(x>=0); | |
409 assert(v>=1.0); | |
410 } | |
411 body{ | |
412 return gammaIncompleteCompl( 0.5L*v, 0.5L*x ); | |
413 } | |
414 | |
415 /** | |
416 * Inverse of complemented $(POWER χ, 2) distribution | |
417 * | |
418 * Finds the $(POWER χ, 2) argument x such that the integral | |
419 * from x to ∞ of the $(POWER χ, 2) density is equal | |
420 * to the given cumulative probability p. | |
421 * | |
422 * Params: | |
423 * p = Cumulative probability. 0<= p <=1. | |
424 * v = Degrees of freedom. Must be positive. | |
425 * | |
426 */ | |
427 real chiSqrDistributionComplInv(real v, real p) | |
428 in { | |
429 assert(p>=0 && p<=1.0L); | |
430 assert(v>=1.0L); | |
431 } | |
432 body | |
433 { | |
434 return 2.0 * gammaIncompleteComplInv( 0.5*v, p); | |
435 } | |
436 | |
437 debug(UnitTest) { | |
438 unittest { | |
439 assert(feqrel(chiSqrDistributionCompl(3.5L, chiSqrDistributionComplInv(3.5L, 0.1L)), 0.1L)>=real.mant_dig-3); | |
440 assert(chiSqrDistribution(19.02L, 0.4L) + chiSqrDistributionCompl(19.02L, 0.4L) ==1.0L); | |
441 } | |
442 } | |
443 | |
444 /** | |
445 * The Γ distribution and its complement | |
446 * | |
447 * The Γ distribution is defined as the integral from 0 to x of the | |
448 * gamma probability density function. The complementary function returns the | |
449 * integral from x to ∞ | |
450 * | |
451 * gammaDistribution = ($(INTEGRATE 0, x) $(POWER t, b-1)$(POWER e, -at) dt) $(POWER a, b)/Γ(b) | |
452 * | |
453 * x must be greater than 0. | |
454 */ | |
455 real gammaDistribution(real a, real b, real x) | |
456 in { | |
457 assert(x>=0); | |
458 } | |
459 body { | |
460 return gammaIncomplete(b, a*x); | |
461 } | |
462 | |
463 /** ditto */ | |
464 real gammaDistributionCompl(real a, real b, real x ) | |
465 in { | |
466 assert(x>=0); | |
467 } | |
468 body { | |
469 return gammaIncompleteCompl( b, a * x ); | |
470 } | |
471 | |
472 debug(UnitTest) { | |
473 unittest { | |
474 assert(gammaDistribution(7,3,0.18)+gammaDistributionCompl(7,3,0.18)==1); | |
475 } | |
476 } | |
477 | |
478 /********************** | |
479 * Beta distribution and its inverse | |
480 * | |
481 * Returns the incomplete beta integral of the arguments, evaluated | |
482 * from zero to x. The function is defined as | |
483 * | |
484 * betaDistribution = Γ(a+b)/(Γ(a) Γ(b)) * | |
485 * $(INTEGRATE 0, x) $(POWER t, a-1)$(POWER (1-t),b-1) dt | |
486 * | |
487 * The domain of definition is 0 <= x <= 1. In this | |
488 * implementation a and b are restricted to positive values. | |
489 * The integral from x to 1 may be obtained by the symmetry | |
490 * relation | |
491 * | |
492 * betaDistributionCompl(a, b, x ) = betaDistribution( b, a, 1-x ) | |
493 * | |
494 * The integral is evaluated by a continued fraction expansion | |
495 * or, when b*x is small, by a power series. | |
496 * | |
497 * The inverse finds the value of x for which betaDistribution(a,b,x) - y = 0 | |
498 */ | |
499 real betaDistribution(real a, real b, real x ) | |
500 { | |
501 return betaIncomplete(a, b, x ); | |
502 } | |
503 | |
504 /** ditto */ | |
505 real betaDistributionCompl(real a, real b, real x) | |
506 { | |
507 return betaIncomplete(b, a, 1-x); | |
508 } | |
509 | |
510 /** ditto */ | |
511 real betaDistributionInv(real a, real b, real y) | |
512 { | |
513 return betaIncompleteInv(a, b, y); | |
514 } | |
515 | |
516 /** ditto */ | |
517 real betaDistributionComplInv(real a, real b, real y) | |
518 { | |
519 return 1-betaIncompleteInv(b, a, y); | |
520 } | |
521 | |
522 debug(UnitTest) { | |
523 unittest { | |
524 assert(feqrel(betaDistributionInv(2, 6, betaDistribution(2,6, 0.7L)),0.7L)>=real.mant_dig-3); | |
525 assert(feqrel(betaDistributionComplInv(1.3, 8, betaDistributionCompl(1.3,8, 0.01L)),0.01L)>=real.mant_dig-4); | |
526 } | |
527 } | |
528 | |
529 /** | |
530 * The Poisson distribution, its complement, and inverse | |
531 * | |
532 * k is the number of events. m is the mean. | |
533 * The Poisson distribution is defined as the sum of the first k terms of | |
534 * the Poisson density function. | |
535 * The complement returns the sum of the terms k+1 to ∞. | |
536 * | |
537 * poissonDistribution = $(BIGSUM j=0, k) $(POWER e, -m) $(POWER m, j)/j! | |
538 * | |
539 * poissonDistributionCompl = $(BIGSUM j=k+1, ∞) $(POWER e, -m) $(POWER m, j)/j! | |
540 * | |
541 * The terms are not summed directly; instead the incomplete | |
542 * gamma integral is employed, according to the relation | |
543 * | |
544 * y = poissonDistribution( k, m ) = gammaIncompleteCompl( k+1, m ). | |
545 * | |
546 * The arguments must both be positive. | |
547 */ | |
548 real poissonDistribution(int k, real m ) | |
549 in { | |
550 assert(k>=0); | |
551 assert(m>0); | |
552 } | |
553 body { | |
554 return gammaIncompleteCompl( k+1.0, m ); | |
555 } | |
556 | |
557 /** ditto */ | |
558 real poissonDistributionCompl(int k, real m ) | |
559 in { | |
560 assert(k>=0); | |
561 assert(m>0); | |
562 } | |
563 body { | |
564 return gammaIncomplete( k+1.0, m ); | |
565 } | |
566 | |
567 /** ditto */ | |
568 real poissonDistributionInv( int k, real p ) | |
569 in { | |
570 assert(k>=0); | |
571 assert(p>=0.0 && p<=1.0); | |
572 } | |
573 body { | |
574 return gammaIncompleteComplInv(k+1, p); | |
575 } | |
576 | |
577 debug(UnitTest) { | |
578 unittest { | |
579 // = Excel's POISSON(k, m, TRUE) | |
580 assert( fabs(poissonDistribution(5, 6.3) | |
581 - 0.398771730072867L) < 0.000000000000005L); | |
582 assert( feqrel(poissonDistributionInv(8, poissonDistribution(8, 2.7e3L)), 2.7e3L)>=real.mant_dig-2); | |
583 assert( poissonDistribution(2, 8.4e-5) + poissonDistributionCompl(2, 8.4e-5) == 1.0L); | |
584 } | |
585 } | |
586 | |
587 /*********************************** | |
588 * Binomial distribution and complemented binomial distribution | |
589 * | |
590 * The binomial distribution is defined as the sum of the terms 0 through k | |
591 * of the Binomial probability density. | |
592 * The complement returns the sum of the terms k+1 through n. | |
593 * | |
594 binomialDistribution = $(BIGSUM j=0, k) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j) | |
595 | |
596 binomialDistributionCompl = $(BIGSUM j=k+1, n) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j) | |
597 * | |
598 * The terms are not summed directly; instead the incomplete | |
599 * beta integral is employed, according to the formula | |
600 * | |
601 * y = binomialDistribution( k, n, p ) = betaDistribution( n-k, k+1, 1-p ). | |
602 * | |
603 * The arguments must be positive, with p ranging from 0 to 1, and k<=n. | |
604 */ | |
605 real binomialDistribution(int k, int n, real p ) | |
606 in { | |
607 assert(p>=0 && p<=1.0); // domain error | |
608 assert(k>=0 && k<=n); | |
609 } | |
610 body{ | |
611 real dk, dn, q; | |
612 if( k == n ) | |
613 return 1.0L; | |
614 | |
615 q = 1.0L - p; | |
616 dn = n - k; | |
617 if ( k == 0 ) { | |
618 return pow( q, dn ); | |
619 } else { | |
620 return betaIncomplete( dn, k + 1, q ); | |
621 } | |
622 } | |
623 | |
624 debug(UnitTest) { | |
625 unittest { | |
626 // = Excel's BINOMDIST(k, n, p, TRUE) | |
627 assert( fabs(binomialDistribution(8, 12, 0.5) | |
628 - 0.927001953125L) < 0.0000000000005L); | |
629 assert( fabs(binomialDistribution(0, 3, 0.008L) | |
630 - 0.976191488L) < 0.00000000005L); | |
631 assert(binomialDistribution(7,7, 0.3)==1.0); | |
632 } | |
633 } | |
634 | |
635 /** ditto */ | |
636 real binomialDistributionCompl(int k, int n, real p ) | |
637 in { | |
638 assert(p>=0 && p<=1.0); // domain error | |
639 assert(k>=0 && k<=n); | |
640 } | |
641 body{ | |
642 if ( k == n ) { | |
643 return 0; | |
644 } | |
645 real dn = n - k; | |
646 if ( k == 0 ) { | |
647 if ( p < .01L ) | |
648 return -expm1( dn * log1p(-p) ); | |
649 else | |
650 return 1.0L - pow( 1.0L-p, dn ); | |
651 } else { | |
652 return betaIncomplete( k+1, dn, p ); | |
653 } | |
654 } | |
655 | |
656 debug(UnitTest){ | |
657 unittest { | |
658 // = Excel's (1 - BINOMDIST(k, n, p, TRUE)) | |
659 assert( fabs(1.0L-binomialDistributionCompl(0, 15, 0.003) | |
660 - 0.955932824838906L) < 0.0000000000000005L); | |
661 assert( fabs(1.0L-binomialDistributionCompl(0, 25, 0.2) | |
662 - 0.00377789318629572L) < 0.000000000000000005L); | |
663 assert( fabs(1.0L-binomialDistributionCompl(8, 12, 0.5) | |
664 - 0.927001953125L) < 0.00000000000005L); | |
665 assert(binomialDistributionCompl(7,7, 0.3)==0.0); | |
666 } | |
667 } | |
668 | |
669 /** Inverse binomial distribution | |
670 * | |
671 * Finds the event probability p such that the sum of the | |
672 * terms 0 through k of the Binomial probability density | |
673 * is equal to the given cumulative probability y. | |
674 * | |
675 * This is accomplished using the inverse beta integral | |
676 * function and the relation | |
677 * | |
678 * 1 - p = betaDistributionInv( n-k, k+1, y ). | |
679 * | |
680 * The arguments must be positive, with 0 <= y <= 1, and k <= n. | |
681 */ | |
682 real binomialDistributionInv( int k, int n, real y ) | |
683 in { | |
684 assert(y>=0 && y<=1.0); // domain error | |
685 assert(k>=0 && k<=n); | |
686 } | |
687 body{ | |
688 real dk, p; | |
689 real dn = n - k; | |
690 if ( k == 0 ) { | |
691 if( y > 0.8L ) | |
692 p = -expm1( log1p(y-1.0L) / dn ); | |
693 else | |
694 p = 1.0L - pow( y, 1.0L/dn ); | |
695 } else { | |
696 dk = k + 1; | |
697 p = betaIncomplete( dn, dk, y ); | |
698 if( p > 0.5 ) | |
699 p = betaIncompleteInv( dk, dn, 1.0L-y ); | |
700 else | |
701 p = 1.0 - betaIncompleteInv( dn, dk, y ); | |
702 } | |
703 return p; | |
704 } | |
705 | |
706 debug(UnitTest){ | |
707 unittest { | |
708 real w = binomialDistribution(9, 15, 0.318L); | |
709 assert(feqrel(binomialDistributionInv(9, 15, w), 0.318L)>=real.mant_dig-3); | |
710 w = binomialDistribution(5, 35, 0.718L); | |
711 assert(feqrel(binomialDistributionInv(5, 35, w), 0.718L)>=real.mant_dig-3); | |
712 w = binomialDistribution(0, 24, 0.637L); | |
713 assert(feqrel(binomialDistributionInv(0, 24, w), 0.637L)>=real.mant_dig-3); | |
714 w = binomialDistributionInv(0, 59, 0.962L); | |
715 assert(feqrel(binomialDistribution(0, 59, w), 0.962L)>=real.mant_dig-5); | |
716 } | |
717 } | |
718 | |
719 /** Negative binomial distribution and its inverse | |
720 * | |
721 * Returns the sum of the terms 0 through k of the negative | |
722 * binomial distribution: | |
723 * | |
724 * $(BIGSUM j=0, k) $(CHOOSE n+j-1, j-1) $(POWER p, n) $(POWER (1-p), j) | |
725 * | |
726 * In a sequence of Bernoulli trials, this is the probability | |
727 * that k or fewer failures precede the n-th success. | |
728 * | |
729 * The arguments must be positive, with 0 < p < 1 and r>0. | |
730 * | |
731 * The inverse finds the argument y such | |
732 * that negativeBinomialDistribution(k,n,y) is equal to p. | |
733 * | |
734 * The Geometric Distribution is a special case of the negative binomial | |
735 * distribution. | |
736 * ----------------------- | |
737 * geometricDistribution(k, p) = negativeBinomialDistribution(k, 1, p); | |
738 * ----------------------- | |
739 * References: | |
740 * $(LINK http://mathworld.wolfram.com/NegativeBinomialDistribution.html) | |
741 */ | |
742 | |
743 real negativeBinomialDistribution(int k, int n, real p ) | |
744 in { | |
745 assert(p>=0 && p<=1.0); // domain error | |
746 assert(k>=0); | |
747 } | |
748 body{ | |
749 if ( k == 0 ) return pow( p, n ); | |
750 return betaIncomplete( n, k + 1, p ); | |
751 } | |
752 | |
753 /** ditto */ | |
754 real negativeBinomialDistributionInv(int k, int n, real p ) | |
755 in { | |
756 assert(p>=0 && p<=1.0); // domain error | |
757 assert(k>=0); | |
758 } | |
759 body{ | |
760 return betaIncompleteInv(n, k + 1, p); | |
761 } | |
762 | |
763 debug(UnitTest) { | |
764 unittest { | |
765 // Value obtained by sum of terms of MS Excel 2003's NEGBINOMDIST. | |
766 assert( fabs(negativeBinomialDistribution(10, 20, 0.2) - 3.830_52E-08)< 0.000_005e-08); | |
767 assert(feqrel(negativeBinomialDistributionInv(14, 208, negativeBinomialDistribution(14, 208, 1e-4L)), 1e-4L)>=real.mant_dig-3); | |
768 } | |
769 } |