132
|
1 /**
|
|
2 * Cumulative Probability Distribution Functions
|
|
3 *
|
|
4 * Copyright: Based on the CEPHES math library, which is
|
|
5 * Copyright (C) 1994 Stephen L. Moshier (moshier@world.std.com).
|
|
6 * License: BSD style: $(LICENSE)
|
|
7 * Authors: Stephen L. Moshier (original C code), Don Clugston
|
|
8 */
|
|
9
|
|
10 /**
|
|
11 * Macros:
|
|
12 * NAN = $(RED NAN)
|
|
13 * SUP = <span style="vertical-align:super;font-size:smaller">$0</span>
|
|
14 * GAMMA = Γ
|
|
15 * INTEGRAL = ∫
|
|
16 * INTEGRATE = $(BIG ∫<sub>$(SMALL $1)</sub><sup>$2</sup>)
|
|
17 * POWER = $1<sup>$2</sup>
|
|
18 * BIGSUM = $(BIG Σ <sup>$2</sup><sub>$(SMALL $1)</sub>)
|
|
19 * CHOOSE = $(BIG () <sup>$(SMALL $1)</sup><sub>$(SMALL $2)</sub> $(BIG ))
|
|
20 * TABLE_SV = <table border=1 cellpadding=4 cellspacing=0>
|
|
21 * <caption>Special Values</caption>
|
|
22 * $0</table>
|
|
23 * SVH = $(TR $(TH $1) $(TH $2))
|
|
24 * SV = $(TR $(TD $1) $(TD $2))
|
|
25 */
|
|
26
|
|
27 module tango.math.Probability;
|
|
28 static import tango.math.ErrorFunction;
|
|
29 private import tango.math.GammaFunction;
|
|
30 private import tango.math.Math;
|
|
31 private import tango.math.IEEE;
|
|
32
|
|
33
|
|
34 /***
|
|
35 Cumulative distribution function for the Normal distribution, and its complement.
|
|
36
|
|
37 The normal (or Gaussian, or bell-shaped) distribution is
|
|
38 defined as:
|
|
39
|
|
40 normalDist(x) = 1/$(SQRT) π $(INTEGRAL -$(INFINITY), x) exp( - $(POWER t, 2)/2) dt
|
|
41 = 0.5 + 0.5 * erf(x/sqrt(2))
|
|
42 = 0.5 * erfc(- x/sqrt(2))
|
|
43
|
|
44 Note that
|
|
45 normalDistribution(x) = 1 - normalDistribution(-x).
|
|
46
|
|
47 Accuracy:
|
|
48 Within a few bits of machine resolution over the entire
|
|
49 range.
|
|
50
|
|
51 References:
|
|
52 $(LINK http://www.netlib.org/cephes/ldoubdoc.html),
|
|
53 G. Marsaglia, "Evaluating the Normal Distribution",
|
|
54 Journal of Statistical Software <b>11</b>, (July 2004).
|
|
55 */
|
|
56 real normalDistribution(real a)
|
|
57 {
|
|
58 return tango.math.ErrorFunction.normalDistributionImpl(a);
|
|
59 }
|
|
60
|
|
61 /** ditto */
|
|
62 real normalDistributionCompl(real a)
|
|
63 {
|
|
64 return -tango.math.ErrorFunction.normalDistributionImpl(-a);
|
|
65 }
|
|
66
|
|
67 /******************************
|
|
68 * Inverse of Normal distribution function
|
|
69 *
|
|
70 * Returns the argument, x, for which the area under the
|
|
71 * Normal probability density function (integrated from
|
|
72 * minus infinity to x) is equal to p.
|
|
73 *
|
|
74 * For small arguments 0 < p < exp(-2), the program computes
|
|
75 * z = sqrt( -2 log(p) ); then the approximation is
|
|
76 * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z) .
|
|
77 * For larger arguments, x/sqrt(2 pi) = w + w^3 R(w^2)/S(w^2)) ,
|
|
78 * where w = p - 0.5 .
|
|
79 */
|
|
80 real normalDistributionInv(real p)
|
|
81 {
|
|
82 return tango.math.ErrorFunction.normalDistributionInvImpl(p);
|
|
83 }
|
|
84
|
|
85 /** ditto */
|
|
86 real normalDistributionComplInv(real p)
|
|
87 {
|
|
88 return -tango.math.ErrorFunction.normalDistributionInvImpl(-p);
|
|
89 }
|
|
90
|
|
91 debug(UnitTest) {
|
|
92 unittest {
|
|
93 assert(feqrel(normalDistributionInv(normalDistribution(0.1)),0.1)>=real.mant_dig-4);
|
|
94 assert(feqrel(normalDistributionComplInv(normalDistributionCompl(0.1)),0.1)>=real.mant_dig-4);
|
|
95 }
|
|
96 }
|
|
97
|
|
98 /** Student's t cumulative distribution function
|
|
99 *
|
|
100 * Computes the integral from minus infinity to t of the Student
|
|
101 * t distribution with integer nu > 0 degrees of freedom:
|
|
102 *
|
|
103 * $(GAMMA)( (nu+1)/2) / ( sqrt(nu π) $(GAMMA)(nu/2) ) *
|
|
104 * $(INTEGRATE -∞, t) $(POWER (1+$(POWER x, 2)/nu), -(nu+1)/2) dx
|
|
105 *
|
|
106 * Can be used to test whether the means of two normally distributed populations
|
|
107 * are equal.
|
|
108 *
|
|
109 * It is related to the incomplete beta integral:
|
|
110 * 1 - studentsDistribution(nu,t) = 0.5 * betaDistribution( nu/2, 1/2, z )
|
|
111 * where
|
|
112 * z = nu/(nu + t<sup>2</sup>).
|
|
113 *
|
|
114 * For t < -1.6, this is the method of computation. For higher t,
|
|
115 * a direct method is derived from integration by parts.
|
|
116 * Since the function is symmetric about t=0, the area under the
|
|
117 * right tail of the density is found by calling the function
|
|
118 * with -t instead of t.
|
|
119 */
|
|
120 real studentsTDistribution(int nu, real t)
|
|
121 in{
|
|
122 assert(nu>0);
|
|
123 }
|
|
124 body{
|
|
125 /* Based on code from Cephes Math Library Release 2.3: January, 1995
|
|
126 Copyright 1984, 1995 by Stephen L. Moshier
|
|
127 */
|
|
128
|
|
129 if ( nu <= 0 ) return NaN(TANGO_NAN.STUDENTSDDISTRIBUTION_DOMAIN); // domain error -- or should it return 0?
|
|
130 if ( t == 0.0 ) return 0.5;
|
|
131
|
|
132 real rk, z, p;
|
|
133
|
|
134 if ( t < -1.6 ) {
|
|
135 rk = nu;
|
|
136 z = rk / (rk + t * t);
|
|
137 return 0.5L * betaIncomplete( 0.5L*rk, 0.5L, z );
|
|
138 }
|
|
139
|
|
140 /* compute integral from -t to + t */
|
|
141
|
|
142 rk = nu; /* degrees of freedom */
|
|
143
|
|
144 real x;
|
|
145 if (t < 0) x = -t; else x = t;
|
|
146 z = 1.0L + ( x * x )/rk;
|
|
147
|
|
148 real f, tz;
|
|
149 int j;
|
|
150
|
|
151 if ( nu & 1) {
|
|
152 /* computation for odd nu */
|
|
153 real xsqk = x/sqrt(rk);
|
|
154 p = atan( xsqk );
|
|
155 if ( nu > 1 ) {
|
|
156 f = 1.0L;
|
|
157 tz = 1.0L;
|
|
158 j = 3;
|
|
159 while( (j<=(nu-2)) && ( (tz/f) > real.epsilon ) ) {
|
|
160 tz *= (j-1)/( z * j );
|
|
161 f += tz;
|
|
162 j += 2;
|
|
163 }
|
|
164 p += f * xsqk/z;
|
|
165 }
|
|
166 p *= 2.0L/PI;
|
|
167 } else {
|
|
168 /* computation for even nu */
|
|
169 f = 1.0L;
|
|
170 tz = 1.0L;
|
|
171 j = 2;
|
|
172
|
|
173 while ( ( j <= (nu-2) ) && ( (tz/f) > real.epsilon ) ) {
|
|
174 tz *= (j - 1)/( z * j );
|
|
175 f += tz;
|
|
176 j += 2;
|
|
177 }
|
|
178 p = f * x/sqrt(z*rk);
|
|
179 }
|
|
180 if ( t < 0.0L )
|
|
181 p = -p; /* note destruction of relative accuracy */
|
|
182
|
|
183 p = 0.5L + 0.5L * p;
|
|
184 return p;
|
|
185 }
|
|
186
|
|
187 /** Inverse of Student's t distribution
|
|
188 *
|
|
189 * Given probability p and degrees of freedom nu,
|
|
190 * finds the argument t such that the one-sided
|
|
191 * studentsDistribution(nu,t) is equal to p.
|
|
192 *
|
|
193 * Params:
|
|
194 * nu = degrees of freedom. Must be >1
|
|
195 * p = probability. 0 < p < 1
|
|
196 */
|
|
197 real studentsTDistributionInv(int nu, real p )
|
|
198 in {
|
|
199 assert(nu>0);
|
|
200 assert(p>=0.0L && p<=1.0L);
|
|
201 }
|
|
202 body
|
|
203 {
|
|
204 if (p==0) return -real.infinity;
|
|
205 if (p==1) return real.infinity;
|
|
206
|
|
207 real rk, z;
|
|
208 rk = nu;
|
|
209
|
|
210 if ( p > 0.25L && p < 0.75L ) {
|
|
211 if ( p == 0.5L ) return 0;
|
|
212 z = 1.0L - 2.0L * p;
|
|
213 z = betaIncompleteInv( 0.5L, 0.5L*rk, fabs(z) );
|
|
214 real t = sqrt( rk*z/(1.0L-z) );
|
|
215 if( p < 0.5L )
|
|
216 t = -t;
|
|
217 return t;
|
|
218 }
|
|
219 int rflg = -1; // sign of the result
|
|
220 if (p >= 0.5L) {
|
|
221 p = 1.0L - p;
|
|
222 rflg = 1;
|
|
223 }
|
|
224 z = betaIncompleteInv( 0.5L*rk, 0.5L, 2.0L*p );
|
|
225
|
|
226 if (z<0) return rflg * real.infinity;
|
|
227 return rflg * sqrt( rk/z - rk );
|
|
228 }
|
|
229
|
|
230 debug(UnitTest) {
|
|
231 unittest {
|
|
232
|
|
233 // There are simple forms for nu = 1 and nu = 2.
|
|
234
|
|
235 // if (nu == 1), tDistribution(x) = 0.5 + atan(x)/PI
|
|
236 // so tDistributionInv(p) = tan( PI * (p-0.5) );
|
|
237 // nu==2: tDistribution(x) = 0.5 * (1 + x/ sqrt(2+x*x) )
|
|
238
|
|
239 assert(studentsTDistribution(1, -0.4)== 0.5 + atan(-0.4)/PI);
|
|
240 assert(studentsTDistribution(2, 0.9) == 0.5L * (1 + 0.9L/sqrt(2.0L + 0.9*0.9)) );
|
|
241 assert(studentsTDistribution(2, -5.4) == 0.5L * (1 - 5.4L/sqrt(2.0L + 5.4*5.4)) );
|
|
242
|
|
243 // return true if a==b to given number of places.
|
|
244 bool isfeqabs(real a, real b, real diff)
|
|
245 {
|
|
246 return fabs(a-b) < diff;
|
|
247 }
|
|
248
|
|
249 // Check a few spot values with statsoft.com (Mathworld values are wrong!!)
|
|
250 // According to statsoft.com, studentsDistributionInv(10, 0.995)= 3.16927.
|
|
251
|
|
252 // The remaining values listed here are from Excel, and are unlikely to be accurate
|
|
253 // in the last decimal places. However, they are helpful as a sanity check.
|
|
254
|
|
255 // Microsoft Excel 2003 gives TINV(2*(1-0.995), 10) == 3.16927267160917
|
|
256 assert(isfeqabs(studentsTDistributionInv(10, 0.995), 3.169_272_67L, 0.000_000_005L));
|
|
257
|
|
258
|
|
259 assert(isfeqabs(studentsTDistributionInv(8, 0.6), 0.261_921_096_769_043L,0.000_000_000_05L));
|
|
260 // -TINV(2*0.4, 18) == -0.257123042655869
|
|
261
|
|
262 assert(isfeqabs(studentsTDistributionInv(18, 0.4), -0.257_123_042_655_869L, 0.000_000_000_05L));
|
|
263 assert( feqrel(studentsTDistribution(18, studentsTDistributionInv(18, 0.4L)),0.4L)
|
|
264 > real.mant_dig-2 );
|
|
265 assert( feqrel(studentsTDistribution(11, studentsTDistributionInv(11, 0.9L)),0.9L)
|
|
266 > real.mant_dig-2);
|
|
267 }
|
|
268 }
|
|
269
|
|
270 /** The F distribution, its complement, and inverse.
|
|
271 *
|
|
272 * The F density function (also known as Snedcor's density or the
|
|
273 * variance ratio density) is the density
|
|
274 * of x = (u1/df1)/(u2/df2), where u1 and u2 are random
|
|
275 * variables having $(POWER χ,2) distributions with df1
|
|
276 * and df2 degrees of freedom, respectively.
|
|
277 *
|
|
278 * fDistribution returns the area from zero to x under the F density
|
|
279 * function. The complementary function,
|
|
280 * fDistributionCompl, returns the area from x to ∞ under the F density function.
|
|
281 *
|
|
282 * The inverse of the complemented F distribution,
|
|
283 * fDistributionComplInv, finds the argument x such that the integral
|
|
284 * from x to infinity of the F density is equal to the given probability y.
|
|
285 *
|
|
286 * Can be used to test whether the means of multiple normally distributed
|
|
287 * populations, all with the same standard deviation, are equal;
|
|
288 * or to test that the standard deviations of two normally distributed
|
|
289 * populations are equal.
|
|
290 *
|
|
291 * Params:
|
|
292 * df1 = Degrees of freedom of the first variable. Must be >= 1
|
|
293 * df2 = Degrees of freedom of the second variable. Must be >= 1
|
|
294 * x = Must be >= 0
|
|
295 */
|
|
296 real fDistribution(int df1, int df2, real x)
|
|
297 in {
|
|
298 assert(df1>=1 && df2>=1);
|
|
299 assert(x>=0);
|
|
300 }
|
|
301 body{
|
|
302 real a = cast(real)(df1);
|
|
303 real b = cast(real)(df2);
|
|
304 real w = a * x;
|
|
305 w = w/(b + w);
|
|
306 return betaIncomplete(0.5L*a, 0.5L*b, w);
|
|
307 }
|
|
308
|
|
309 /** ditto */
|
|
310 real fDistributionCompl(int df1, int df2, real x)
|
|
311 in {
|
|
312 assert(df1>=1 && df2>=1);
|
|
313 assert(x>=0);
|
|
314 }
|
|
315 body{
|
|
316 real a = cast(real)(df1);
|
|
317 real b = cast(real)(df2);
|
|
318 real w = b / (b + a * x);
|
|
319 return betaIncomplete( 0.5L*b, 0.5L*a, w );
|
|
320 }
|
|
321
|
|
322 /*
|
|
323 * Inverse of complemented F distribution
|
|
324 *
|
|
325 * Finds the F density argument x such that the integral
|
|
326 * from x to infinity of the F density is equal to the
|
|
327 * given probability p.
|
|
328 *
|
|
329 * This is accomplished using the inverse beta integral
|
|
330 * function and the relations
|
|
331 *
|
|
332 * z = betaIncompleteInv( df2/2, df1/2, p ),
|
|
333 * x = df2 (1-z) / (df1 z).
|
|
334 *
|
|
335 * Note that the following relations hold for the inverse of
|
|
336 * the uncomplemented F distribution:
|
|
337 *
|
|
338 * z = betaIncompleteInv( df1/2, df2/2, p ),
|
|
339 * x = df2 z / (df1 (1-z)).
|
|
340 */
|
|
341
|
|
342 /** ditto */
|
|
343 real fDistributionComplInv(int df1, int df2, real p )
|
|
344 in {
|
|
345 assert(df1>=1 && df2>=1);
|
|
346 assert(p>=0 && p<=1.0);
|
|
347 }
|
|
348 body{
|
|
349 real a = df1;
|
|
350 real b = df2;
|
|
351 /* Compute probability for x = 0.5. */
|
|
352 real w = betaIncomplete( 0.5L*b, 0.5L*a, 0.5L );
|
|
353 /* If that is greater than p, then the solution w < .5.
|
|
354 Otherwise, solve at 1-p to remove cancellation in (b - b*w). */
|
|
355 if ( w > p || p < 0.001L) {
|
|
356 w = betaIncompleteInv( 0.5L*b, 0.5L*a, p );
|
|
357 return (b - b*w)/(a*w);
|
|
358 } else {
|
|
359 w = betaIncompleteInv( 0.5L*a, 0.5L*b, 1.0L - p );
|
|
360 return b*w/(a*(1.0L-w));
|
|
361 }
|
|
362 }
|
|
363
|
|
364 debug(UnitTest) {
|
|
365 unittest {
|
|
366 // fDistCompl(df1, df2, x) = Excel's FDIST(x, df1, df2)
|
|
367 assert(fabs(fDistributionCompl(6, 4, 16.5) - 0.00858719177897249L)< 0.0000000000005L);
|
|
368 assert(fabs((1-fDistribution(12, 23, 0.1)) - 0.99990562845505L)< 0.0000000000005L);
|
|
369 assert(fabs(fDistributionComplInv(8, 34, 0.2) - 1.48267037661408L)< 0.0000000005L);
|
|
370 assert(fabs(fDistributionComplInv(4, 16, 0.008) - 5.043_537_593_48596L)< 0.0000000005L);
|
|
371 // Regression test: This one used to fail because of a bug in the definition of MINLOG.
|
|
372 assert(feqrel(fDistributionCompl(4, 16, fDistributionComplInv(4,16, 0.008)), 0.008)>=real.mant_dig-3);
|
|
373 }
|
|
374 }
|
|
375
|
|
376 /** $(POWER χ,2) cumulative distribution function and its complement.
|
|
377 *
|
|
378 * Returns the area under the left hand tail (from 0 to x)
|
|
379 * of the Chi square probability density function with
|
|
380 * v degrees of freedom. The complement returns the area under
|
|
381 * the right hand tail (from x to ∞).
|
|
382 *
|
|
383 * chiSqrDistribution(x | v) = ($(INTEGRATE 0, x)
|
|
384 * $(POWER t, v/2-1) $(POWER e, -t/2) dt )
|
|
385 * / $(POWER 2, v/2) $(GAMMA)(v/2)
|
|
386 *
|
|
387 * chiSqrDistributionCompl(x | v) = ($(INTEGRATE x, ∞)
|
|
388 * $(POWER t, v/2-1) $(POWER e, -t/2) dt )
|
|
389 * / $(POWER 2, v/2) $(GAMMA)(v/2)
|
|
390 *
|
|
391 * Params:
|
|
392 * v = degrees of freedom. Must be positive.
|
|
393 * x = the $(POWER χ,2) variable. Must be positive.
|
|
394 *
|
|
395 */
|
|
396 real chiSqrDistribution(real v, real x)
|
|
397 in {
|
|
398 assert(x>=0);
|
|
399 assert(v>=1.0);
|
|
400 }
|
|
401 body{
|
|
402 return gammaIncomplete( 0.5*v, 0.5*x);
|
|
403 }
|
|
404
|
|
405 /** ditto */
|
|
406 real chiSqrDistributionCompl(real v, real x)
|
|
407 in {
|
|
408 assert(x>=0);
|
|
409 assert(v>=1.0);
|
|
410 }
|
|
411 body{
|
|
412 return gammaIncompleteCompl( 0.5L*v, 0.5L*x );
|
|
413 }
|
|
414
|
|
415 /**
|
|
416 * Inverse of complemented $(POWER χ, 2) distribution
|
|
417 *
|
|
418 * Finds the $(POWER χ, 2) argument x such that the integral
|
|
419 * from x to ∞ of the $(POWER χ, 2) density is equal
|
|
420 * to the given cumulative probability p.
|
|
421 *
|
|
422 * Params:
|
|
423 * p = Cumulative probability. 0<= p <=1.
|
|
424 * v = Degrees of freedom. Must be positive.
|
|
425 *
|
|
426 */
|
|
427 real chiSqrDistributionComplInv(real v, real p)
|
|
428 in {
|
|
429 assert(p>=0 && p<=1.0L);
|
|
430 assert(v>=1.0L);
|
|
431 }
|
|
432 body
|
|
433 {
|
|
434 return 2.0 * gammaIncompleteComplInv( 0.5*v, p);
|
|
435 }
|
|
436
|
|
437 debug(UnitTest) {
|
|
438 unittest {
|
|
439 assert(feqrel(chiSqrDistributionCompl(3.5L, chiSqrDistributionComplInv(3.5L, 0.1L)), 0.1L)>=real.mant_dig-3);
|
|
440 assert(chiSqrDistribution(19.02L, 0.4L) + chiSqrDistributionCompl(19.02L, 0.4L) ==1.0L);
|
|
441 }
|
|
442 }
|
|
443
|
|
444 /**
|
|
445 * The Γ distribution and its complement
|
|
446 *
|
|
447 * The Γ distribution is defined as the integral from 0 to x of the
|
|
448 * gamma probability density function. The complementary function returns the
|
|
449 * integral from x to ∞
|
|
450 *
|
|
451 * gammaDistribution = ($(INTEGRATE 0, x) $(POWER t, b-1)$(POWER e, -at) dt) $(POWER a, b)/Γ(b)
|
|
452 *
|
|
453 * x must be greater than 0.
|
|
454 */
|
|
455 real gammaDistribution(real a, real b, real x)
|
|
456 in {
|
|
457 assert(x>=0);
|
|
458 }
|
|
459 body {
|
|
460 return gammaIncomplete(b, a*x);
|
|
461 }
|
|
462
|
|
463 /** ditto */
|
|
464 real gammaDistributionCompl(real a, real b, real x )
|
|
465 in {
|
|
466 assert(x>=0);
|
|
467 }
|
|
468 body {
|
|
469 return gammaIncompleteCompl( b, a * x );
|
|
470 }
|
|
471
|
|
472 debug(UnitTest) {
|
|
473 unittest {
|
|
474 assert(gammaDistribution(7,3,0.18)+gammaDistributionCompl(7,3,0.18)==1);
|
|
475 }
|
|
476 }
|
|
477
|
|
478 /**********************
|
|
479 * Beta distribution and its inverse
|
|
480 *
|
|
481 * Returns the incomplete beta integral of the arguments, evaluated
|
|
482 * from zero to x. The function is defined as
|
|
483 *
|
|
484 * betaDistribution = Γ(a+b)/(Γ(a) Γ(b)) *
|
|
485 * $(INTEGRATE 0, x) $(POWER t, a-1)$(POWER (1-t),b-1) dt
|
|
486 *
|
|
487 * The domain of definition is 0 <= x <= 1. In this
|
|
488 * implementation a and b are restricted to positive values.
|
|
489 * The integral from x to 1 may be obtained by the symmetry
|
|
490 * relation
|
|
491 *
|
|
492 * betaDistributionCompl(a, b, x ) = betaDistribution( b, a, 1-x )
|
|
493 *
|
|
494 * The integral is evaluated by a continued fraction expansion
|
|
495 * or, when b*x is small, by a power series.
|
|
496 *
|
|
497 * The inverse finds the value of x for which betaDistribution(a,b,x) - y = 0
|
|
498 */
|
|
499 real betaDistribution(real a, real b, real x )
|
|
500 {
|
|
501 return betaIncomplete(a, b, x );
|
|
502 }
|
|
503
|
|
504 /** ditto */
|
|
505 real betaDistributionCompl(real a, real b, real x)
|
|
506 {
|
|
507 return betaIncomplete(b, a, 1-x);
|
|
508 }
|
|
509
|
|
510 /** ditto */
|
|
511 real betaDistributionInv(real a, real b, real y)
|
|
512 {
|
|
513 return betaIncompleteInv(a, b, y);
|
|
514 }
|
|
515
|
|
516 /** ditto */
|
|
517 real betaDistributionComplInv(real a, real b, real y)
|
|
518 {
|
|
519 return 1-betaIncompleteInv(b, a, y);
|
|
520 }
|
|
521
|
|
522 debug(UnitTest) {
|
|
523 unittest {
|
|
524 assert(feqrel(betaDistributionInv(2, 6, betaDistribution(2,6, 0.7L)),0.7L)>=real.mant_dig-3);
|
|
525 assert(feqrel(betaDistributionComplInv(1.3, 8, betaDistributionCompl(1.3,8, 0.01L)),0.01L)>=real.mant_dig-4);
|
|
526 }
|
|
527 }
|
|
528
|
|
529 /**
|
|
530 * The Poisson distribution, its complement, and inverse
|
|
531 *
|
|
532 * k is the number of events. m is the mean.
|
|
533 * The Poisson distribution is defined as the sum of the first k terms of
|
|
534 * the Poisson density function.
|
|
535 * The complement returns the sum of the terms k+1 to ∞.
|
|
536 *
|
|
537 * poissonDistribution = $(BIGSUM j=0, k) $(POWER e, -m) $(POWER m, j)/j!
|
|
538 *
|
|
539 * poissonDistributionCompl = $(BIGSUM j=k+1, ∞) $(POWER e, -m) $(POWER m, j)/j!
|
|
540 *
|
|
541 * The terms are not summed directly; instead the incomplete
|
|
542 * gamma integral is employed, according to the relation
|
|
543 *
|
|
544 * y = poissonDistribution( k, m ) = gammaIncompleteCompl( k+1, m ).
|
|
545 *
|
|
546 * The arguments must both be positive.
|
|
547 */
|
|
548 real poissonDistribution(int k, real m )
|
|
549 in {
|
|
550 assert(k>=0);
|
|
551 assert(m>0);
|
|
552 }
|
|
553 body {
|
|
554 return gammaIncompleteCompl( k+1.0, m );
|
|
555 }
|
|
556
|
|
557 /** ditto */
|
|
558 real poissonDistributionCompl(int k, real m )
|
|
559 in {
|
|
560 assert(k>=0);
|
|
561 assert(m>0);
|
|
562 }
|
|
563 body {
|
|
564 return gammaIncomplete( k+1.0, m );
|
|
565 }
|
|
566
|
|
567 /** ditto */
|
|
568 real poissonDistributionInv( int k, real p )
|
|
569 in {
|
|
570 assert(k>=0);
|
|
571 assert(p>=0.0 && p<=1.0);
|
|
572 }
|
|
573 body {
|
|
574 return gammaIncompleteComplInv(k+1, p);
|
|
575 }
|
|
576
|
|
577 debug(UnitTest) {
|
|
578 unittest {
|
|
579 // = Excel's POISSON(k, m, TRUE)
|
|
580 assert( fabs(poissonDistribution(5, 6.3)
|
|
581 - 0.398771730072867L) < 0.000000000000005L);
|
|
582 assert( feqrel(poissonDistributionInv(8, poissonDistribution(8, 2.7e3L)), 2.7e3L)>=real.mant_dig-2);
|
|
583 assert( poissonDistribution(2, 8.4e-5) + poissonDistributionCompl(2, 8.4e-5) == 1.0L);
|
|
584 }
|
|
585 }
|
|
586
|
|
587 /***********************************
|
|
588 * Binomial distribution and complemented binomial distribution
|
|
589 *
|
|
590 * The binomial distribution is defined as the sum of the terms 0 through k
|
|
591 * of the Binomial probability density.
|
|
592 * The complement returns the sum of the terms k+1 through n.
|
|
593 *
|
|
594 binomialDistribution = $(BIGSUM j=0, k) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j)
|
|
595
|
|
596 binomialDistributionCompl = $(BIGSUM j=k+1, n) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j)
|
|
597 *
|
|
598 * The terms are not summed directly; instead the incomplete
|
|
599 * beta integral is employed, according to the formula
|
|
600 *
|
|
601 * y = binomialDistribution( k, n, p ) = betaDistribution( n-k, k+1, 1-p ).
|
|
602 *
|
|
603 * The arguments must be positive, with p ranging from 0 to 1, and k<=n.
|
|
604 */
|
|
605 real binomialDistribution(int k, int n, real p )
|
|
606 in {
|
|
607 assert(p>=0 && p<=1.0); // domain error
|
|
608 assert(k>=0 && k<=n);
|
|
609 }
|
|
610 body{
|
|
611 real dk, dn, q;
|
|
612 if( k == n )
|
|
613 return 1.0L;
|
|
614
|
|
615 q = 1.0L - p;
|
|
616 dn = n - k;
|
|
617 if ( k == 0 ) {
|
|
618 return pow( q, dn );
|
|
619 } else {
|
|
620 return betaIncomplete( dn, k + 1, q );
|
|
621 }
|
|
622 }
|
|
623
|
|
624 debug(UnitTest) {
|
|
625 unittest {
|
|
626 // = Excel's BINOMDIST(k, n, p, TRUE)
|
|
627 assert( fabs(binomialDistribution(8, 12, 0.5)
|
|
628 - 0.927001953125L) < 0.0000000000005L);
|
|
629 assert( fabs(binomialDistribution(0, 3, 0.008L)
|
|
630 - 0.976191488L) < 0.00000000005L);
|
|
631 assert(binomialDistribution(7,7, 0.3)==1.0);
|
|
632 }
|
|
633 }
|
|
634
|
|
635 /** ditto */
|
|
636 real binomialDistributionCompl(int k, int n, real p )
|
|
637 in {
|
|
638 assert(p>=0 && p<=1.0); // domain error
|
|
639 assert(k>=0 && k<=n);
|
|
640 }
|
|
641 body{
|
|
642 if ( k == n ) {
|
|
643 return 0;
|
|
644 }
|
|
645 real dn = n - k;
|
|
646 if ( k == 0 ) {
|
|
647 if ( p < .01L )
|
|
648 return -expm1( dn * log1p(-p) );
|
|
649 else
|
|
650 return 1.0L - pow( 1.0L-p, dn );
|
|
651 } else {
|
|
652 return betaIncomplete( k+1, dn, p );
|
|
653 }
|
|
654 }
|
|
655
|
|
656 debug(UnitTest){
|
|
657 unittest {
|
|
658 // = Excel's (1 - BINOMDIST(k, n, p, TRUE))
|
|
659 assert( fabs(1.0L-binomialDistributionCompl(0, 15, 0.003)
|
|
660 - 0.955932824838906L) < 0.0000000000000005L);
|
|
661 assert( fabs(1.0L-binomialDistributionCompl(0, 25, 0.2)
|
|
662 - 0.00377789318629572L) < 0.000000000000000005L);
|
|
663 assert( fabs(1.0L-binomialDistributionCompl(8, 12, 0.5)
|
|
664 - 0.927001953125L) < 0.00000000000005L);
|
|
665 assert(binomialDistributionCompl(7,7, 0.3)==0.0);
|
|
666 }
|
|
667 }
|
|
668
|
|
669 /** Inverse binomial distribution
|
|
670 *
|
|
671 * Finds the event probability p such that the sum of the
|
|
672 * terms 0 through k of the Binomial probability density
|
|
673 * is equal to the given cumulative probability y.
|
|
674 *
|
|
675 * This is accomplished using the inverse beta integral
|
|
676 * function and the relation
|
|
677 *
|
|
678 * 1 - p = betaDistributionInv( n-k, k+1, y ).
|
|
679 *
|
|
680 * The arguments must be positive, with 0 <= y <= 1, and k <= n.
|
|
681 */
|
|
682 real binomialDistributionInv( int k, int n, real y )
|
|
683 in {
|
|
684 assert(y>=0 && y<=1.0); // domain error
|
|
685 assert(k>=0 && k<=n);
|
|
686 }
|
|
687 body{
|
|
688 real dk, p;
|
|
689 real dn = n - k;
|
|
690 if ( k == 0 ) {
|
|
691 if( y > 0.8L )
|
|
692 p = -expm1( log1p(y-1.0L) / dn );
|
|
693 else
|
|
694 p = 1.0L - pow( y, 1.0L/dn );
|
|
695 } else {
|
|
696 dk = k + 1;
|
|
697 p = betaIncomplete( dn, dk, y );
|
|
698 if( p > 0.5 )
|
|
699 p = betaIncompleteInv( dk, dn, 1.0L-y );
|
|
700 else
|
|
701 p = 1.0 - betaIncompleteInv( dn, dk, y );
|
|
702 }
|
|
703 return p;
|
|
704 }
|
|
705
|
|
706 debug(UnitTest){
|
|
707 unittest {
|
|
708 real w = binomialDistribution(9, 15, 0.318L);
|
|
709 assert(feqrel(binomialDistributionInv(9, 15, w), 0.318L)>=real.mant_dig-3);
|
|
710 w = binomialDistribution(5, 35, 0.718L);
|
|
711 assert(feqrel(binomialDistributionInv(5, 35, w), 0.718L)>=real.mant_dig-3);
|
|
712 w = binomialDistribution(0, 24, 0.637L);
|
|
713 assert(feqrel(binomialDistributionInv(0, 24, w), 0.637L)>=real.mant_dig-3);
|
|
714 w = binomialDistributionInv(0, 59, 0.962L);
|
|
715 assert(feqrel(binomialDistribution(0, 59, w), 0.962L)>=real.mant_dig-5);
|
|
716 }
|
|
717 }
|
|
718
|
|
719 /** Negative binomial distribution and its inverse
|
|
720 *
|
|
721 * Returns the sum of the terms 0 through k of the negative
|
|
722 * binomial distribution:
|
|
723 *
|
|
724 * $(BIGSUM j=0, k) $(CHOOSE n+j-1, j-1) $(POWER p, n) $(POWER (1-p), j)
|
|
725 *
|
|
726 * In a sequence of Bernoulli trials, this is the probability
|
|
727 * that k or fewer failures precede the n-th success.
|
|
728 *
|
|
729 * The arguments must be positive, with 0 < p < 1 and r>0.
|
|
730 *
|
|
731 * The inverse finds the argument y such
|
|
732 * that negativeBinomialDistribution(k,n,y) is equal to p.
|
|
733 *
|
|
734 * The Geometric Distribution is a special case of the negative binomial
|
|
735 * distribution.
|
|
736 * -----------------------
|
|
737 * geometricDistribution(k, p) = negativeBinomialDistribution(k, 1, p);
|
|
738 * -----------------------
|
|
739 * References:
|
|
740 * $(LINK http://mathworld.wolfram.com/NegativeBinomialDistribution.html)
|
|
741 */
|
|
742
|
|
743 real negativeBinomialDistribution(int k, int n, real p )
|
|
744 in {
|
|
745 assert(p>=0 && p<=1.0); // domain error
|
|
746 assert(k>=0);
|
|
747 }
|
|
748 body{
|
|
749 if ( k == 0 ) return pow( p, n );
|
|
750 return betaIncomplete( n, k + 1, p );
|
|
751 }
|
|
752
|
|
753 /** ditto */
|
|
754 real negativeBinomialDistributionInv(int k, int n, real p )
|
|
755 in {
|
|
756 assert(p>=0 && p<=1.0); // domain error
|
|
757 assert(k>=0);
|
|
758 }
|
|
759 body{
|
|
760 return betaIncompleteInv(n, k + 1, p);
|
|
761 }
|
|
762
|
|
763 debug(UnitTest) {
|
|
764 unittest {
|
|
765 // Value obtained by sum of terms of MS Excel 2003's NEGBINOMDIST.
|
|
766 assert( fabs(negativeBinomialDistribution(10, 20, 0.2) - 3.830_52E-08)< 0.000_005e-08);
|
|
767 assert(feqrel(negativeBinomialDistributionInv(14, 208, negativeBinomialDistribution(14, 208, 1e-4L)), 1e-4L)>=real.mant_dig-3);
|
|
768 }
|
|
769 } |