mirror of
				https://github.com/saitohirga/WSJT-X.git
				synced 2025-10-31 04:50:34 -04:00 
			
		
		
		
	
		
			
	
	
		
			333 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			333 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|  | [section:binom_eg Binomial Distribution Examples] | ||
|  | 
 | ||
|  | See also the reference documentation for the __binomial_distrib. | ||
|  | 
 | ||
|  | [section:binomial_coinflip_example Binomial Coin-Flipping Example] | ||
|  | 
 | ||
|  | [import ../../example/binomial_coinflip_example.cpp] | ||
|  | [binomial_coinflip_example1] | ||
|  | 
 | ||
|  | See [@../../example/binomial_coinflip_example.cpp binomial_coinflip_example.cpp] | ||
|  | for full source code, the program output looks like this: | ||
|  | 
 | ||
|  | [binomial_coinflip_example_output] | ||
|  | 
 | ||
|  | [endsect] [/section:binomial_coinflip_example Binomial coinflip example] | ||
|  | 
 | ||
|  | [section:binomial_quiz_example Binomial Quiz Example] | ||
|  | 
 | ||
|  | [import ../../example/binomial_quiz_example.cpp] | ||
|  | [binomial_quiz_example1] | ||
|  | [binomial_quiz_example2] | ||
|  | [discrete_quantile_real] | ||
|  | 
 | ||
|  | See [@../../example/binomial_quiz_example.cpp binomial_quiz_example.cpp] | ||
|  | for full source code and output. | ||
|  | 
 | ||
|  | [endsect] [/section:binomial_coinflip_quiz Binomial Coin-Flipping example] | ||
|  | 
 | ||
|  | [section:binom_conf Calculating Confidence Limits on the Frequency of Occurrence for a Binomial Distribution] | ||
|  | 
 | ||
|  | Imagine you have a process that follows a binomial distribution: for each | ||
|  | trial conducted, an event either occurs or does it does not, referred | ||
|  | to as "successes" and "failures".  If, by experiment, you want to measure the | ||
|  | frequency with which successes occur, the best estimate is given simply | ||
|  | by /k/ \/ /N/, for /k/ successes out of /N/ trials.  However our confidence in that | ||
|  | estimate will be shaped by how many trials were conducted, and how many successes | ||
|  | were observed.  The static member functions  | ||
|  | `binomial_distribution<>::find_lower_bound_on_p` and | ||
|  | `binomial_distribution<>::find_upper_bound_on_p` allow you to calculate | ||
|  | the confidence intervals for your estimate of the occurrence frequency. | ||
|  | 
 | ||
|  | The sample program [@../../example/binomial_confidence_limits.cpp  | ||
|  | binomial_confidence_limits.cpp] illustrates their use.  It begins by defining | ||
|  | a procedure that will print a table of confidence limits for various degrees | ||
|  | of certainty: | ||
|  | 
 | ||
|  |    #include <iostream> | ||
|  |    #include <iomanip> | ||
|  |    #include <boost/math/distributions/binomial.hpp> | ||
|  | 
 | ||
|  |    void confidence_limits_on_frequency(unsigned trials, unsigned successes) | ||
|  |    { | ||
|  |       // | ||
|  |       // trials = Total number of trials. | ||
|  |       // successes = Total number of observed successes. | ||
|  |       // | ||
|  |       // Calculate confidence limits for an observed | ||
|  |       // frequency of occurrence that follows a binomial | ||
|  |       // distribution. | ||
|  |       // | ||
|  |       using namespace std; | ||
|  |       using namespace boost::math; | ||
|  | 
 | ||
|  |       // Print out general info: | ||
|  |       cout << | ||
|  |          "___________________________________________\n" | ||
|  |          "2-Sided Confidence Limits For Success Ratio\n" | ||
|  |          "___________________________________________\n\n"; | ||
|  |       cout << setprecision(7); | ||
|  |       cout << setw(40) << left << "Number of Observations" << "=  " << trials << "\n"; | ||
|  |       cout << setw(40) << left << "Number of successes" << "=  " << successes << "\n"; | ||
|  |       cout << setw(40) << left << "Sample frequency of occurrence" << "=  " << double(successes) / trials << "\n"; | ||
|  | 
 | ||
|  | The procedure now defines a table of significance levels: these are the  | ||
|  | probabilities that the true occurrence frequency lies outside the calculated | ||
|  | interval: | ||
|  | 
 | ||
|  |       double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 }; | ||
|  | 
 | ||
|  | Some pretty printing of the table header follows: | ||
|  | 
 | ||
|  |    cout << "\n\n" | ||
|  |            "_______________________________________________________________________\n" | ||
|  |            "Confidence        Lower CP       Upper CP       Lower JP       Upper JP\n" | ||
|  |            " Value (%)        Limit          Limit          Limit          Limit\n" | ||
|  |            "_______________________________________________________________________\n"; | ||
|  | 
 | ||
|  | 
 | ||
|  | And now for the important part - the intervals themselves - for each | ||
|  | value of /alpha/, we call `find_lower_bound_on_p` and  | ||
|  | `find_lower_upper_on_p` to obtain lower and upper bounds | ||
|  | respectively.  Note that since we are calculating a two-sided interval, | ||
|  | we must divide the value of alpha in two. | ||
|  | 
 | ||
|  | Please note that calculating two separate /single sided bounds/, each with risk | ||
|  | level [alpha][space]is not the same thing as calculating a two sided interval. | ||
|  | Had we calculate two single-sided intervals each with a risk | ||
|  | that the true value is outside the interval of [alpha], then: | ||
|  | 
 | ||
|  | * The risk that it is less than the lower bound is [alpha]. | ||
|  | 
 | ||
|  | and | ||
|  | 
 | ||
|  | * The risk that it is greater than the upper bound is also [alpha]. | ||
|  | 
 | ||
|  | So the risk it is outside *upper or lower bound*, is *twice* alpha, and the  | ||
|  | probability that it is inside the bounds is therefore not nearly as high as  | ||
|  | one might have thought.  This is why [alpha]/2 must be used in  | ||
|  | the calculations below. | ||
|  | 
 | ||
|  | In contrast, had we been calculating a  | ||
|  | single-sided interval, for example: ['"Calculate a lower bound so that we are P% | ||
|  | sure that the true occurrence frequency is greater than some value"] | ||
|  | then we would *not* have divided by two. | ||
|  | 
 | ||
|  | Finally note that `binomial_distribution` provides a choice of two | ||
|  | methods for the calculation, we print out the results from both  | ||
|  | methods in this example: | ||
|  | 
 | ||
|  |       for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i) | ||
|  |       { | ||
|  |          // Confidence value: | ||
|  |          cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]); | ||
|  |          // Calculate Clopper Pearson bounds: | ||
|  |          double l = binomial_distribution<>::find_lower_bound_on_p( | ||
|  |                         trials, successes, alpha[i]/2); | ||
|  |          double u = binomial_distribution<>::find_upper_bound_on_p( | ||
|  |                         trials, successes, alpha[i]/2); | ||
|  |          // Print Clopper Pearson Limits: | ||
|  |          cout << fixed << setprecision(5) << setw(15) << right << l; | ||
|  |          cout << fixed << setprecision(5) << setw(15) << right << u; | ||
|  |          // Calculate Jeffreys Prior Bounds: | ||
|  |          l = binomial_distribution<>::find_lower_bound_on_p( | ||
|  |                trials, successes, alpha[i]/2,  | ||
|  |                binomial_distribution<>::jeffreys_prior_interval); | ||
|  |          u = binomial_distribution<>::find_upper_bound_on_p( | ||
|  |                trials, successes, alpha[i]/2,  | ||
|  |                binomial_distribution<>::jeffreys_prior_interval); | ||
|  |          // Print Jeffreys Prior Limits: | ||
|  |          cout << fixed << setprecision(5) << setw(15) << right << l; | ||
|  |          cout << fixed << setprecision(5) << setw(15) << right << u << std::endl; | ||
|  |       } | ||
|  |       cout << endl; | ||
|  |    } | ||
|  | 
 | ||
|  | And that's all there is to it.  Let's see some sample output for a 2 in 10 | ||
|  | success ratio, first for 20 trials: | ||
|  | 
 | ||
|  | [pre'''___________________________________________ | ||
|  | 2-Sided Confidence Limits For Success Ratio | ||
|  | ___________________________________________ | ||
|  | 
 | ||
|  | Number of Observations                  =  20 | ||
|  | Number of successes                     =  4 | ||
|  | Sample frequency of occurrence          =  0.2 | ||
|  | 
 | ||
|  | 
 | ||
|  | _______________________________________________________________________ | ||
|  | Confidence        Lower CP       Upper CP       Lower JP       Upper JP | ||
|  |  Value (%)        Limit          Limit          Limit          Limit | ||
|  | _______________________________________________________________________ | ||
|  |     50.000        0.12840        0.29588        0.14974        0.26916 | ||
|  |     75.000        0.09775        0.34633        0.11653        0.31861 | ||
|  |     90.000        0.07135        0.40103        0.08734        0.37274 | ||
|  |     95.000        0.05733        0.43661        0.07152        0.40823 | ||
|  |     99.000        0.03576        0.50661        0.04655        0.47859 | ||
|  |     99.900        0.01905        0.58632        0.02634        0.55960 | ||
|  |     99.990        0.01042        0.64997        0.01530        0.62495 | ||
|  |     99.999        0.00577        0.70216        0.00901        0.67897 | ||
|  | '''] | ||
|  | 
 | ||
|  | As you can see, even at the 95% confidence level the bounds are | ||
|  | really quite wide (this example is chosen to be easily compared to the one | ||
|  | in the __handbook | ||
|  | [@http://www.itl.nist.gov/div898/handbook/prc/section2/prc241.htm | ||
|  | here]).  Note also that the Clopper-Pearson calculation method (CP above) | ||
|  | produces quite noticeably more pessimistic estimates than the Jeffreys Prior | ||
|  | method (JP above). | ||
|  | 
 | ||
|  | 
 | ||
|  | Compare that with the program output for | ||
|  | 2000 trials: | ||
|  | 
 | ||
|  | [pre'''___________________________________________ | ||
|  | 2-Sided Confidence Limits For Success Ratio | ||
|  | ___________________________________________ | ||
|  | 
 | ||
|  | Number of Observations                  =  2000 | ||
|  | Number of successes                     =  400 | ||
|  | Sample frequency of occurrence          =  0.2000000 | ||
|  | 
 | ||
|  | 
 | ||
|  | _______________________________________________________________________ | ||
|  | Confidence        Lower CP       Upper CP       Lower JP       Upper JP | ||
|  |  Value (%)        Limit          Limit          Limit          Limit | ||
|  | _______________________________________________________________________ | ||
|  |     50.000        0.19382        0.20638        0.19406        0.20613 | ||
|  |     75.000        0.18965        0.21072        0.18990        0.21047 | ||
|  |     90.000        0.18537        0.21528        0.18561        0.21503 | ||
|  |     95.000        0.18267        0.21821        0.18291        0.21796 | ||
|  |     99.000        0.17745        0.22400        0.17769        0.22374 | ||
|  |     99.900        0.17150        0.23079        0.17173        0.23053 | ||
|  |     99.990        0.16658        0.23657        0.16681        0.23631 | ||
|  |     99.999        0.16233        0.24169        0.16256        0.24143 | ||
|  | '''] | ||
|  | 
 | ||
|  | Now even when the confidence level is very high, the limits are really | ||
|  | quite close to the experimentally calculated value of 0.2.  Furthermore | ||
|  | the difference between the two calculation methods is now really quite small. | ||
|  | 
 | ||
|  | [endsect] | ||
|  | 
 | ||
|  | [section:binom_size_eg Estimating Sample Sizes for a Binomial Distribution.] | ||
|  | 
 | ||
|  | Imagine you have a critical component that you know will fail in 1 in | ||
|  | N "uses" (for some suitable definition of "use").  You may want to schedule | ||
|  | routine replacement of the component so that its chance of failure between | ||
|  | routine replacements is less than P%.  If the failures follow a binomial | ||
|  | distribution (each time the component is "used" it either fails or does not) | ||
|  | then the static member function `binomial_distibution<>::find_maximum_number_of_trials` | ||
|  | can be used to estimate the maximum number of "uses" of that component for some | ||
|  | acceptable risk level /alpha/. | ||
|  | 
 | ||
|  | The example program  | ||
|  | [@../../example/binomial_sample_sizes.cpp binomial_sample_sizes.cpp] | ||
|  | demonstrates its usage.  It centres on a routine that prints out | ||
|  | a table of maximum sample sizes for various probability thresholds: | ||
|  | 
 | ||
|  |    void find_max_sample_size( | ||
|  |       double p,              // success ratio. | ||
|  |       unsigned successes)    // Total number of observed successes permitted. | ||
|  |    { | ||
|  | 
 | ||
|  | The routine then declares a table of probability thresholds: these are the | ||
|  | maximum acceptable probability that /successes/ or fewer events will be | ||
|  | observed.  In our example, /successes/ will be always zero, since we want | ||
|  | no component failures, but in other situations non-zero values may well | ||
|  | make sense. | ||
|  | 
 | ||
|  |    double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 }; | ||
|  | 
 | ||
|  | Much of the rest of the program is pretty-printing, the important part | ||
|  | is in the calculation of maximum number of permitted trials for each | ||
|  | value of alpha: | ||
|  | 
 | ||
|  |    for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i) | ||
|  |    { | ||
|  |       // Confidence value: | ||
|  |       cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]); | ||
|  |       // calculate trials: | ||
|  |       double t = binomial::find_maximum_number_of_trials( | ||
|  |                      successes, p, alpha[i]); | ||
|  |       t = floor(t); | ||
|  |       // Print Trials: | ||
|  |       cout << fixed << setprecision(5) << setw(15) << right << t << endl; | ||
|  |    } | ||
|  | 
 | ||
|  | Note that since we're | ||
|  | calculating the maximum number of trials permitted, we'll err on the safe | ||
|  | side and take the floor of the result.  Had we been calculating the | ||
|  | /minimum/ number of trials required to observe a certain number of /successes/ | ||
|  | using `find_minimum_number_of_trials` we would have taken the ceiling instead. | ||
|  | 
 | ||
|  | We'll finish off by looking at some sample output, firstly for | ||
|  | a 1 in 1000 chance of component failure with each use: | ||
|  | 
 | ||
|  | [pre | ||
|  | '''________________________ | ||
|  | Maximum Number of Trials | ||
|  | ________________________ | ||
|  | 
 | ||
|  | Success ratio                           =  0.001 | ||
|  | Maximum Number of "successes" permitted =  0 | ||
|  | 
 | ||
|  | 
 | ||
|  | ____________________________ | ||
|  | Confidence        Max Number | ||
|  |  Value (%)        Of Trials | ||
|  | ____________________________ | ||
|  |     50.000            692 | ||
|  |     75.000            287 | ||
|  |     90.000            105 | ||
|  |     95.000             51 | ||
|  |     99.000             10 | ||
|  |     99.900              0 | ||
|  |     99.990              0 | ||
|  |     99.999              0''' | ||
|  | ] | ||
|  | 
 | ||
|  | So 51 "uses" of the component would yield a 95% chance that no | ||
|  | component failures would be observed. | ||
|  | 
 | ||
|  | Compare that with a 1 in 1 million chance of component failure: | ||
|  | 
 | ||
|  | [pre''' | ||
|  | ________________________ | ||
|  | Maximum Number of Trials | ||
|  | ________________________ | ||
|  | 
 | ||
|  | Success ratio                           =  0.0000010 | ||
|  | Maximum Number of "successes" permitted =  0 | ||
|  | 
 | ||
|  | 
 | ||
|  | ____________________________ | ||
|  | Confidence        Max Number | ||
|  |  Value (%)        Of Trials | ||
|  | ____________________________ | ||
|  |     50.000         693146 | ||
|  |     75.000         287681 | ||
|  |     90.000         105360 | ||
|  |     95.000          51293 | ||
|  |     99.000          10050 | ||
|  |     99.900           1000 | ||
|  |     99.990            100 | ||
|  |     99.999             10''' | ||
|  | ] | ||
|  | 
 | ||
|  | In this case, even 1000 uses of the component would still yield a  | ||
|  | less than 1 in 1000 chance of observing a component failure  | ||
|  | (i.e. a 99.9% chance of no failure). | ||
|  | 
 | ||
|  | [endsect] [/section:binom_size_eg Estimating Sample Sizes for a Binomial Distribution.] | ||
|  | 
 | ||
|  | [endsect][/section:binom_eg Binomial Distribution] | ||
|  | 
 | ||
|  | [/  | ||
|  |   Copyright 2006 John Maddock and Paul A. Bristow. | ||
|  |   Distributed under the Boost Software License, Version 1.0. | ||
|  |   (See accompanying file LICENSE_1_0.txt or copy at | ||
|  |   http://www.boost.org/LICENSE_1_0.txt). | ||
|  | ] | ||
|  | 
 |