mirror of
https://github.com/saitohirga/WSJT-X.git
synced 2024-11-17 01:22:15 -05:00
447 lines
16 KiB
Plaintext
447 lines
16 KiB
Plaintext
|
[section:test_data Graphing, Profiling, and Generating Test Data for Special Functions]
|
||
|
|
||
|
The class `test_data` and associated helper functions are designed so that in just
|
||
|
a few lines of code you should be able to:
|
||
|
|
||
|
* Profile a continued fraction, or infinite series for convergence and accuracy.
|
||
|
* Generate csv data from a special function that can be imported into your favorite
|
||
|
graphing program (or spreadsheet) for further analysis.
|
||
|
* Generate high precision test data.
|
||
|
|
||
|
[h4 Synopsis]
|
||
|
|
||
|
#include <boost/math/tools/test_data.hpp>
|
||
|
|
||
|
[important
|
||
|
This is a non-core Boost.Math header that is predominantly used for internal
|
||
|
maintenance of the library: as a result the library is located under
|
||
|
`libs/math/include_private` and you will need to add that directory to
|
||
|
your include path in order to use this feature.
|
||
|
]
|
||
|
|
||
|
namespace boost{ namespace math{ namespace tools{
|
||
|
|
||
|
enum parameter_type
|
||
|
{
|
||
|
random_in_range = 0,
|
||
|
periodic_in_range = 1,
|
||
|
power_series = 2,
|
||
|
dummy_param = 0x80,
|
||
|
};
|
||
|
|
||
|
template <class T>
|
||
|
struct parameter_info;
|
||
|
|
||
|
template <class T>
|
||
|
parameter_info<T> make_random_param(T start_range, T end_range, int n_points);
|
||
|
|
||
|
template <class T>
|
||
|
parameter_info<T> make_periodic_param(T start_range, T end_range, int n_points);
|
||
|
|
||
|
template <class T>
|
||
|
parameter_info<T> make_power_param(T basis, int start_exponent, int end_exponent);
|
||
|
|
||
|
template <class T>
|
||
|
bool get_user_parameter_info(parameter_info<T>& info, const char* param_name);
|
||
|
|
||
|
template <class T>
|
||
|
class test_data
|
||
|
{
|
||
|
public:
|
||
|
typedef std::vector<T> row_type;
|
||
|
typedef row_type value_type;
|
||
|
private:
|
||
|
typedef std::set<row_type> container_type;
|
||
|
public:
|
||
|
typedef typename container_type::reference reference;
|
||
|
typedef typename container_type::const_reference const_reference;
|
||
|
typedef typename container_type::iterator iterator;
|
||
|
typedef typename container_type::const_iterator const_iterator;
|
||
|
typedef typename container_type::difference_type difference_type;
|
||
|
typedef typename container_type::size_type size_type;
|
||
|
|
||
|
// creation:
|
||
|
test_data(){}
|
||
|
template <class F>
|
||
|
test_data(F func, const parameter_info<T>& arg1);
|
||
|
|
||
|
// insertion:
|
||
|
template <class F>
|
||
|
test_data& insert(F func, const parameter_info<T>& arg1);
|
||
|
|
||
|
template <class F>
|
||
|
test_data& insert(F func, const parameter_info<T>& arg1,
|
||
|
const parameter_info<T>& arg2);
|
||
|
|
||
|
template <class F>
|
||
|
test_data& insert(F func, const parameter_info<T>& arg1,
|
||
|
const parameter_info<T>& arg2,
|
||
|
const parameter_info<T>& arg3);
|
||
|
|
||
|
void clear();
|
||
|
|
||
|
// access:
|
||
|
iterator begin();
|
||
|
iterator end();
|
||
|
const_iterator begin()const;
|
||
|
const_iterator end()const;
|
||
|
bool operator==(const test_data& d)const;
|
||
|
bool operator!=(const test_data& d)const;
|
||
|
void swap(test_data& other);
|
||
|
size_type size()const;
|
||
|
size_type max_size()const;
|
||
|
bool empty()const;
|
||
|
|
||
|
bool operator < (const test_data& dat)const;
|
||
|
bool operator <= (const test_data& dat)const;
|
||
|
bool operator > (const test_data& dat)const;
|
||
|
bool operator >= (const test_data& dat)const;
|
||
|
};
|
||
|
|
||
|
template <class charT, class traits, class T>
|
||
|
std::basic_ostream<charT, traits>& write_csv(
|
||
|
std::basic_ostream<charT, traits>& os,
|
||
|
const test_data<T>& data);
|
||
|
|
||
|
template <class charT, class traits, class T>
|
||
|
std::basic_ostream<charT, traits>& write_csv(
|
||
|
std::basic_ostream<charT, traits>& os,
|
||
|
const test_data<T>& data,
|
||
|
const charT* separator);
|
||
|
|
||
|
template <class T>
|
||
|
std::ostream& write_code(std::ostream& os,
|
||
|
const test_data<T>& data,
|
||
|
const char* name);
|
||
|
|
||
|
}}} // namespaces
|
||
|
|
||
|
[h4 Description]
|
||
|
|
||
|
This tool is best illustrated with the following series of examples.
|
||
|
|
||
|
The functionality of test_data is split into the following parts:
|
||
|
|
||
|
* A functor that implements the function for which data is being generated:
|
||
|
this is the bit you have to write.
|
||
|
* One of more parameters that are to be passed to the functor, these are
|
||
|
described in fairly abstract terms: give me N points distributed like /this/ etc.
|
||
|
* The class test_data, that takes the functor and descriptions of the parameters
|
||
|
and computes how ever many output points have been requested, these are stored
|
||
|
in a sorted container.
|
||
|
* Routines to iterate over the test_data container and output the data in either
|
||
|
csv format, or as C++ source code (as a table using Boost.Array).
|
||
|
|
||
|
[h5 Example 1: Output Data for Graph Plotting]
|
||
|
|
||
|
For example, lets say we want to graph the lgamma function between -3 and 100,
|
||
|
one could do this like so:
|
||
|
|
||
|
#include <boost/math/tools/test_data.hpp>
|
||
|
#include <boost/math/special_functions/gamma.hpp>
|
||
|
|
||
|
int main()
|
||
|
{
|
||
|
using namespace boost::math::tools;
|
||
|
|
||
|
// create an object to hold the data:
|
||
|
test_data<double> data;
|
||
|
|
||
|
// insert 500 points at uniform intervals between just after -3 and 100:
|
||
|
double (*pf)(double) = boost::math::lgamma;
|
||
|
data.insert(pf, make_periodic_param(-3.0 + 0.00001, 100.0, 500));
|
||
|
|
||
|
// print out in csv format:
|
||
|
write_csv(std::cout, data, ", ");
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
Which, when plotted, results in:
|
||
|
|
||
|
[graph lgamma]
|
||
|
|
||
|
[h5 Example 2: Creating Test Data]
|
||
|
|
||
|
As a second example, let's suppose we want to create highly accurate test
|
||
|
data for a special function. Since many special functions have two or
|
||
|
more independent parameters, it's very hard to effectively cover all of
|
||
|
the possible parameter space without generating gigabytes of data at
|
||
|
great computational expense. A second best approach is to provide the tools
|
||
|
by which a user (or the library maintainer) can quickly generate more data
|
||
|
on demand to probe the function over a particular domain of interest.
|
||
|
|
||
|
In this example we'll generate test data for the beta function using
|
||
|
[@http://shoup.net/ntl/doc/RR.txt NTL::RR] at 1000 bit precision.
|
||
|
Rather than call our generic
|
||
|
version of the beta function, we'll implement a deliberately naive version
|
||
|
of the beta function using lgamma, and rely on the high precision of the
|
||
|
data type used to get results accurate to at least 128-bit precision. In this
|
||
|
way our test data is independent of whatever clever tricks we may wish to
|
||
|
use inside the our beta function.
|
||
|
|
||
|
To start with then, here's the function object that creates the test data:
|
||
|
|
||
|
#include <boost/math/tools/ntl.hpp>
|
||
|
#include <boost/math/special_functions/gamma.hpp>
|
||
|
#include <boost/math/tools/test_data.hpp>
|
||
|
#include <fstream>
|
||
|
|
||
|
#include <boost/math/tools/test_data.hpp>
|
||
|
|
||
|
using namespace boost::math::tools;
|
||
|
|
||
|
struct beta_data_generator
|
||
|
{
|
||
|
NTL::RR operator()(NTL::RR a, NTL::RR b)
|
||
|
{
|
||
|
//
|
||
|
// If we throw a domain error then test_data will
|
||
|
// ignore this input point. We'll use this to filter
|
||
|
// out all cases where a < b since the beta function
|
||
|
// is symmetrical in a and b:
|
||
|
//
|
||
|
if(a < b)
|
||
|
throw std::domain_error("");
|
||
|
|
||
|
// very naively calculate spots with lgamma:
|
||
|
NTL::RR g1, g2, g3;
|
||
|
int s1, s2, s3;
|
||
|
g1 = boost::math::lgamma(a, &s1);
|
||
|
g2 = boost::math::lgamma(b, &s2);
|
||
|
g3 = boost::math::lgamma(a+b, &s3);
|
||
|
g1 += g2 - g3;
|
||
|
g1 = exp(g1);
|
||
|
g1 *= s1 * s2 * s3;
|
||
|
return g1;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
To create the data, we'll need to input the domains for a and b
|
||
|
for which the function will be tested: the function `get_user_parameter_info`
|
||
|
is designed for just that purpose. The start of main will look something like:
|
||
|
|
||
|
// Set the precision on RR:
|
||
|
NTL::RR::SetPrecision(1000); // bits.
|
||
|
NTL::RR::SetOutputPrecision(40); // decimal digits.
|
||
|
|
||
|
parameter_info<NTL::RR> arg1, arg2;
|
||
|
test_data<NTL::RR> data;
|
||
|
|
||
|
std::cout << "Welcome.\n"
|
||
|
"This program will generate spot tests for the beta function:\n"
|
||
|
" beta(a, b)\n\n";
|
||
|
|
||
|
bool cont;
|
||
|
std::string line;
|
||
|
|
||
|
do{
|
||
|
// prompt the user for the domain of a and b to test:
|
||
|
get_user_parameter_info(arg1, "a");
|
||
|
get_user_parameter_info(arg2, "b");
|
||
|
|
||
|
// create the data:
|
||
|
data.insert(beta_data_generator(), arg1, arg2);
|
||
|
|
||
|
// see if the user want's any more domains tested:
|
||
|
std::cout << "Any more data [y/n]?";
|
||
|
std::getline(std::cin, line);
|
||
|
boost::algorithm::trim(line);
|
||
|
cont = (line == "y");
|
||
|
}while(cont);
|
||
|
|
||
|
[caution At this point one potential stumbling block should be mentioned:
|
||
|
test_data<>::insert will create a matrix of test data when there are two
|
||
|
or more parameters, so if we have two parameters and we're asked for
|
||
|
a thousand points on each, that's a ['million test points in total].
|
||
|
Don't say you weren't warned!]
|
||
|
|
||
|
There's just one final step now, and that's to write the test data to file:
|
||
|
|
||
|
std::cout << "Enter name of test data file [default=beta_data.ipp]";
|
||
|
std::getline(std::cin, line);
|
||
|
boost::algorithm::trim(line);
|
||
|
if(line == "")
|
||
|
line = "beta_data.ipp";
|
||
|
std::ofstream ofs(line.c_str());
|
||
|
write_code(ofs, data, "beta_data");
|
||
|
|
||
|
The format of the test data looks something like:
|
||
|
|
||
|
#define SC_(x) static_cast<T>(BOOST_JOIN(x, L))
|
||
|
static const boost::array<boost::array<T, 3>, 1830>
|
||
|
beta_med_data = {
|
||
|
SC_(0.4883005917072296142578125),
|
||
|
SC_(0.4883005917072296142578125),
|
||
|
SC_(3.245912809500479157065104747353807392371),
|
||
|
SC_(3.5808107852935791015625),
|
||
|
SC_(0.4883005917072296142578125),
|
||
|
SC_(1.007653173802923954909901438393379243537),
|
||
|
/* ... lots of rows skipped */
|
||
|
};
|
||
|
|
||
|
The first two values in each row are the input parameters that were passed
|
||
|
to our functor and the last value is the return value from the functor.
|
||
|
Had our functor returned a __tuple rather than a value, then we would have had
|
||
|
one entry for each element in the tuple in addition to the input parameters.
|
||
|
|
||
|
The first #define serves two purposes:
|
||
|
|
||
|
* It reduces the file sizes considerably: all those `static_cast`'s add up to a lot
|
||
|
of bytes otherwise (they are needed to suppress compiler warnings when `T` is
|
||
|
narrower than a `long double`).
|
||
|
* It provides a useful customisation point: for example if we were testing
|
||
|
a user-defined type that has more precision than a `long double` we could change
|
||
|
it to:
|
||
|
|
||
|
[^#define SC_(x) lexical_cast<T>(BOOST_STRINGIZE(x))]
|
||
|
|
||
|
in order to ensure that no truncation of the values occurs prior to conversion
|
||
|
to `T`. Note that this isn't used by default as it's rather hard on the compiler
|
||
|
when the table is large.
|
||
|
|
||
|
[h5 Example 3: Profiling a Continued Fraction for Convergence and Accuracy]
|
||
|
|
||
|
Alternatively, lets say we want to profile a continued fraction for
|
||
|
convergence and error. As an example, we'll use the continued fraction
|
||
|
for the upper incomplete gamma function, the following function object
|
||
|
returns the next a[sub N ] and b[sub N ] of the continued fraction
|
||
|
each time it's invoked:
|
||
|
|
||
|
template <class T>
|
||
|
struct upper_incomplete_gamma_fract
|
||
|
{
|
||
|
private:
|
||
|
T z, a;
|
||
|
int k;
|
||
|
public:
|
||
|
typedef std::pair<T,T> result_type;
|
||
|
|
||
|
upper_incomplete_gamma_fract(T a1, T z1)
|
||
|
: z(z1-a1+1), a(a1), k(0)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
result_type operator()()
|
||
|
{
|
||
|
++k;
|
||
|
z += 2;
|
||
|
return result_type(k * (a - k), z);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
We want to measure both the relative error, and the rate of convergence
|
||
|
of this fraction, so we'll write a functor that returns both as a __tuple:
|
||
|
class test_data will unpack the tuple for us, and create one column of data
|
||
|
for each element in the tuple (in addition to the input parameters):
|
||
|
|
||
|
#include <boost/math/tools/test_data.hpp>
|
||
|
#include <boost/math/tools/test.hpp>
|
||
|
#include <boost/math/special_functions/gamma.hpp>
|
||
|
#include <boost/math/tools/ntl.hpp>
|
||
|
#include <boost/math/tools/tuple.hpp>
|
||
|
|
||
|
template <class T>
|
||
|
struct profile_gamma_fraction
|
||
|
{
|
||
|
typedef ``__tuple``<T, T> result_type;
|
||
|
|
||
|
result_type operator()(T val)
|
||
|
{
|
||
|
using namespace boost::math::tools;
|
||
|
// estimate the true value, using arbitary precision
|
||
|
// arithmetic and NTL::RR:
|
||
|
NTL::RR rval(val);
|
||
|
upper_incomplete_gamma_fract<NTL::RR> f1(rval, rval);
|
||
|
NTL::RR true_val = continued_fraction_a(f1, 1000);
|
||
|
//
|
||
|
// Now get the aproximation at double precision, along with the number of
|
||
|
// iterations required:
|
||
|
boost::uintmax_t iters = std::numeric_limits<boost::uintmax_t>::max();
|
||
|
upper_incomplete_gamma_fract<T> f2(val, val);
|
||
|
T found_val = continued_fraction_a(f2, std::numeric_limits<T>::digits, iters);
|
||
|
//
|
||
|
// Work out the relative error, as measured in units of epsilon:
|
||
|
T err = real_cast<T>(relative_error(true_val, NTL::RR(found_val)) / std::numeric_limits<T>::epsilon());
|
||
|
//
|
||
|
// now just return the results as a tuple:
|
||
|
return boost::math::make_tuple(err, iters);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
Feeding that functor into test_data allows rapid output of csv data,
|
||
|
for whatever type `T` we may be interested in:
|
||
|
|
||
|
int main()
|
||
|
{
|
||
|
using namespace boost::math::tools;
|
||
|
// create an object to hold the data:
|
||
|
test_data<double> data;
|
||
|
// insert 500 points at uniform intervals between just after 0 and 100:
|
||
|
data.insert(profile_gamma_fraction<double>(), make_periodic_param(0.01, 100.0, 100));
|
||
|
// print out in csv format:
|
||
|
write_csv(std::cout, data, ", ");
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
This time there's no need to plot a graph, the first few rows are:
|
||
|
|
||
|
a and z, Error/epsilon, Iterations required
|
||
|
|
||
|
0.01, 9723.14, 4726
|
||
|
1.0099, 9.54818, 87
|
||
|
2.0098, 3.84777, 40
|
||
|
3.0097, 0.728358, 25
|
||
|
4.0096, 2.39712, 21
|
||
|
5.0095, 0.233263, 16
|
||
|
|
||
|
So it's pretty clear that this fraction shouldn't be used for small values
|
||
|
of a and z.
|
||
|
|
||
|
[h4 reference]
|
||
|
|
||
|
Most of this tool has been described already in the examples above, we'll
|
||
|
just add the following notes on the non-member functions:
|
||
|
|
||
|
template <class T>
|
||
|
parameter_info<T> make_random_param(T start_range, T end_range, int n_points);
|
||
|
|
||
|
Tells class test_data to test /n_points/ random values in the range
|
||
|
[start_range,end_range].
|
||
|
|
||
|
template <class T>
|
||
|
parameter_info<T> make_periodic_param(T start_range, T end_range, int n_points);
|
||
|
|
||
|
Tells class test_data to test /n_points/ evenly spaced values in the range
|
||
|
[start_range,end_range].
|
||
|
|
||
|
template <class T>
|
||
|
parameter_info<T> make_power_param(T basis, int start_exponent, int end_exponent);
|
||
|
|
||
|
Tells class test_data to test points of the form ['basis + R * 2[super expon]] for each
|
||
|
/expon/ in the range [start_exponent, end_exponent], and /R/ a random number in \[0.5, 1\].
|
||
|
|
||
|
template <class T>
|
||
|
bool get_user_parameter_info(parameter_info<T>& info, const char* param_name);
|
||
|
|
||
|
Prompts the user for the parameter range and form to use.
|
||
|
|
||
|
Finally, if we don't want the parameter to be included in the output, we can tell
|
||
|
test_data by setting it a "dummy parameter":
|
||
|
|
||
|
parameter_info<double> p = make_random_param(2.0, 5.0, 10);
|
||
|
p.type |= dummy_param;
|
||
|
|
||
|
This is useful when the functor used transforms the parameter in some way
|
||
|
before passing it to the function under test, usually the functor will then
|
||
|
return both the transformed input and the result in a tuple, so there's no
|
||
|
need for the original pseudo-parameter to be included in program output.
|
||
|
|
||
|
[endsect][/section:test_data Graphing, Profiling, and Generating Test Data for Special Functions]
|
||
|
|
||
|
[/
|
||
|
Copyright 2006 John Maddock and Paul A. Bristow.
|
||
|
Distributed under the Boost Software License, Version 1.0.
|
||
|
(See accompanying file LICENSE_1_0.txt or copy at
|
||
|
http://www.boost.org/LICENSE_1_0.txt).
|
||
|
]
|