11#include <boost/accumulators/accumulators.hpp>
12#include <boost/accumulators/statistics/max.hpp>
13#include <boost/accumulators/statistics/min.hpp>
14#include <boost/accumulators/statistics/stats.hpp>
15#include <boost/accumulators/statistics/variance.hpp>
25Logger logger(
"Statistics");
27void assertMomentIsValid(
const int maxMoment) {
29 std::stringstream msg;
30 msg <<
"moment = " << maxMoment <<
" is statistically meaningless";
31 throw std::runtime_error(msg.str());
40 constexpr double nan = std::numeric_limits<double>::quiet_NaN();
52template <
typename TYPE>
double getMedian(
const vector<TYPE> &data) {
53 const size_t size = data.size();
55 return static_cast<double>(data[0]);
57 const bool isSorted = std::is_sorted(data.cbegin(), data.cend());
58 const bool is_even = (size % 2 == 0);
61 return (
static_cast<double>(data[size / 2 - 1]) +
static_cast<double>(data[size / 2])) / 2;
63 return static_cast<double>(data[size / 2]);
65 std::vector<TYPE> tmpSortedData(data);
66 std::sort(tmpSortedData.begin(), tmpSortedData.end());
68 return (
static_cast<double>(tmpSortedData[size / 2 - 1]) +
static_cast<double>(tmpSortedData[size / 2])) / 2;
70 return static_cast<double>(tmpSortedData[size / 2]);
78template <
typename TYPE> std::vector<double>
getZscore(
const vector<TYPE> &data) {
79 std::vector<double> Zscore;
80 if (data.size() < 3) {
81 Zscore.resize(data.size(), 0.);
86 Zscore.resize(data.size(), 0.);
89 for (
auto it = data.cbegin(); it != data.cend(); ++it) {
90 auto tmp =
static_cast<double>(*it);
100template <
typename TYPE> std::vector<double>
getWeightedZscore(
const vector<TYPE> &data,
const vector<TYPE> &weights) {
101 std::vector<double> Zscore;
102 if (data.size() < 3) {
103 Zscore.resize(data.size(), 0.);
108 Zscore.resize(data.size(), 0.);
111 double sumWeights = 0.0;
112 double sumWeightedData = 0.0;
113 double weightedVariance = 0.0;
114 for (
size_t it = 0; it != data.size(); ++it) {
115 sumWeights +=
static_cast<double>(weights[it]);
116 sumWeightedData +=
static_cast<double>(weights[it] * data[it]);
118 double weightedMean = sumWeightedData / sumWeights;
119 for (
size_t it = 0; it != data.size(); ++it) {
120 weightedVariance += std::pow(
static_cast<double>(data[it]) - weightedMean, 2) *
121 std::pow(
static_cast<double>(weights[it]) / sumWeights, 2);
123 for (
auto it = data.cbegin(); it != data.cend(); ++it) {
124 Zscore.emplace_back(
fabs((
static_cast<double>(*it) - weightedMean) / std::sqrt(weightedVariance)));
134 if (data.size() < 3) {
135 std::vector<double> Zscore(data.size(), 0.);
138 std::vector<double> MADvec;
141 for (
auto it = data.cbegin(); it != data.cend(); ++it) {
142 tmp =
static_cast<double>(*it);
143 MADvec.emplace_back(
fabs(
tmp - median));
147 std::vector<double> Zscore(data.size(), 0.);
151 std::vector<double> Zscore;
152 for (
auto it = data.begin(); it != data.end(); ++it) {
153 tmp =
static_cast<double>(*it);
154 Zscore.emplace_back(0.6745 *
fabs((
tmp - median) / MAD));
173 using namespace boost::accumulators;
174 accumulator_set<double, stats<tag::min, tag::max, tag::variance>> acc;
175 for (
auto &
value : data) {
176 acc(
static_cast<double>(
value));
180 statistics.
mean = mean(acc);
181 double var = variance(acc);
184 auto ndofs =
static_cast<double>(data.size());
185 var *= ndofs / (ndofs - 1.0);
190 using namespace boost::accumulators;
191 accumulator_set<double, stats<tag::mean>> acc;
192 for (
auto &
value : data) {
193 acc(
static_cast<double>(
value));
195 statistics.
mean = mean(acc);
227Rfactor getRFactor(
const std::vector<double> &obsI,
const std::vector<double> &calI,
const std::vector<double> &obsE) {
229 if (obsI.size() != calI.size() || obsI.size() != obsE.size()) {
230 std::stringstream errss;
231 errss <<
"GetRFactor() Input Error! Observed Intensity (" << obsI.size() <<
"), Calculated Intensity ("
232 << calI.size() <<
") and Observed Error (" << obsE.size() <<
") have different number of elements.";
233 throw std::runtime_error(errss.str());
236 throw std::runtime_error(
"getRFactor(): the input arrays are empty.");
242 double sumrpdenom = 0;
244 size_t numpts = obsI.size();
245 for (
size_t i = 0; i < numpts; ++i) {
246 double cal_i = calI[i];
247 double obs_i = obsI[i];
248 double sigma = obsE[i];
250 double diff = obs_i - cal_i;
252 if (weight == weight && weight <= DBL_MAX) {
254 sumrpnom +=
fabs(diff);
255 sumrpdenom +=
fabs(obs_i);
257 double tempnom = weight * diff * diff;
258 double tempden = weight * obs_i * obs_i;
263 if (tempnom != tempnom || tempden != tempden) {
264 logger.error() <<
"***** Error! ****** Data indexed " << i <<
" is NaN. "
265 <<
"i = " << i <<
": cal = " << calI[i] <<
", obs = " << obs_i <<
", weight = " << weight
272 rfactor.
Rp = (sumrpnom / sumrpdenom);
273 rfactor.
Rwp = std::sqrt(sumnom / sumdenom);
275 if (rfactor.
Rwp != rfactor.
Rwp)
276 logger.debug() <<
"Rwp is NaN. Denominator = " << sumnom <<
"; Nominator = " << sumdenom <<
". \n";
292template <
typename TYPE>
294 assertMomentIsValid(maxMoment);
297 bool isDensity(
x.size() ==
y.size());
300 if ((!isDensity) && (
x.size() !=
y.size() + 1)) {
301 std::stringstream msg;
302 msg <<
"length of x (" <<
x.size() <<
") and y (" <<
y.size() <<
")do not match";
303 throw std::out_of_range(msg.str());
307 std::vector<double> result(std::size_t(maxMoment + 1), 0.);
310 size_t numPoints =
y.size();
312 numPoints =
x.size() - 1;
318 for (
size_t j = 0; j < numPoints; ++j) {
320 const double xVal = .5 *
static_cast<double>(
x[j] +
x[j + 1]);
322 auto temp =
static_cast<double>(
y[j]);
324 const auto xDelta =
static_cast<double>(
x[j + 1] -
x[j]);
325 temp = .5 * (temp +
static_cast<double>(
y[j + 1])) * xDelta;
330 for (
size_t i = 1; i < result.size(); ++i) {
350template <
typename TYPE>
351std::vector<double>
getMomentsAboutMean(
const std::vector<TYPE> &
x,
const std::vector<TYPE> &
y,
const int maxMoment) {
352 assertMomentIsValid(maxMoment);
356 const double mean = momentsAboutOrigin[1];
359 std::vector<double> result(std::size_t(maxMoment + 1), 0.);
360 result[0] = momentsAboutOrigin[0];
367 bool isDensity(
x.size() ==
y.size());
370 size_t numPoints =
y.size();
372 numPoints =
x.size() - 1;
378 for (
size_t j = 0; j < numPoints; ++j) {
381 const double xVal = .5 *
static_cast<double>(
x[j] +
x[j + 1]) - mean;
386 const auto xDelta =
static_cast<double>(
x[j + 1] -
x[j]);
387 temp = xVal * .5 *
static_cast<double>(
y[j] +
y[j + 1]) * xDelta;
389 temp = xVal *
static_cast<double>(
y[j]);
394 for (
size_t i = 2; i < result.size(); ++i) {
405#define INSTANTIATE(TYPE) \
406 template MANTID_KERNEL_DLL Statistics getStatistics<TYPE>(const vector<TYPE> &, const unsigned int); \
407 template MANTID_KERNEL_DLL std::vector<double> getZscore<TYPE>(const vector<TYPE> &); \
408 template MANTID_KERNEL_DLL std::vector<double> getWeightedZscore<TYPE>(const vector<TYPE> &, const vector<TYPE> &); \
409 template MANTID_KERNEL_DLL std::vector<double> getModifiedZscore<TYPE>(const vector<TYPE> &); \
410 template MANTID_KERNEL_DLL std::vector<double> getMomentsAboutOrigin<TYPE>( \
411 const std::vector<TYPE> &x, const std::vector<TYPE> &y, const int maxMoment); \
412 template MANTID_KERNEL_DLL std::vector<double> getMomentsAboutMean<TYPE>( \
413 const std::vector<TYPE> &x, const std::vector<TYPE> &y, const int maxMoment);
double value
The value of the point.
#define INSTANTIATE(TYPE)
#define DLLExport
Definitions of the DLLImport compiler directives for MSVC.
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
std::vector< double > getModifiedZscore(const std::vector< TYPE > &data)
Return the modified Z score values for a dataset.
DLLExport Statistics getStatistics< string >(const vector< string > &data, const unsigned int flags)
Getting statistics of a string array should just give a bunch of NaNs.
Statistics getStatistics(const std::vector< TYPE > &data, const unsigned int flags=StatOptions::AllStats)
Return a statistics object for the given data set.
std::vector< double > getZscore(const std::vector< TYPE > &data)
Return the Z score values for a dataset.
std::vector< double > getMomentsAboutMean(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
std::vector< double > getWeightedZscore(const std::vector< TYPE > &data, const std::vector< TYPE > &weights)
There are enough special cases in determining the Z score where it useful to put it in a single funct...
double getMedian(const vector< TYPE > &data)
There are enough special cases in determining the median where it useful to put it in a single functi...
DLLExport Statistics getStatistics< bool >(const vector< bool > &data, const unsigned int flags)
Getting statistics of a boolean array should just give a bunch of NaNs.
std::vector< double > getMomentsAboutOrigin(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
Rfactor MANTID_KERNEL_DLL getRFactor(const std::vector< double > &obsI, const std::vector< double > &calI, const std::vector< double > &obsE)
Return the R-factors (Rwp) of a diffraction pattern data.
R factor for powder data analysis.
Simple struct to store statistics.
double median
Median value.
double minimum
Minimum value.
double maximum
Maximum value.
double standard_deviation
standard_deviation of the values
Statistics()
Default value for everything is nan.