11#include <boost/accumulators/accumulators.hpp>
12#include <boost/accumulators/statistics/max.hpp>
13#include <boost/accumulators/statistics/min.hpp>
14#include <boost/accumulators/statistics/stats.hpp>
15#include <boost/accumulators/statistics/variance.hpp>
25Logger logger(
"Statistics");
27void assertMomentIsValid(
const int maxMoment) {
29 std::stringstream msg;
30 msg <<
"moment = " << maxMoment <<
" is statistically meaningless";
31 throw std::runtime_error(msg.str());
40 constexpr double nan = std::numeric_limits<double>::quiet_NaN();
52template <
typename TYPE>
double getMedian(
const vector<TYPE> &data) {
53 const size_t size = data.size();
55 return static_cast<double>(data[0]);
57 const bool isSorted = std::is_sorted(data.cbegin(), data.cend());
58 std::vector<TYPE> tmpSortedData;
59 auto sortedDataRef = std::ref(data);
62 std::sort(tmpSortedData.begin(), tmpSortedData.end());
63 sortedDataRef = std::ref(std::as_const(tmpSortedData));
66 const bool is_even = (size % 2 == 0);
69 const auto left =
static_cast<double>(sortedDataRef.get()[size / 2 - 1]);
70 const auto right =
static_cast<double>(sortedDataRef.get()[size / 2]);
73 retVal =
static_cast<double>(sortedDataRef.get()[size / 2]);
82template <
typename TYPE> std::vector<double>
getZscore(
const vector<TYPE> &data) {
83 std::vector<double> Zscore;
84 if (data.size() < 3) {
85 Zscore.resize(data.size(), 0.);
90 Zscore.resize(data.size(), 0.);
93 for (
auto it = data.cbegin(); it != data.cend(); ++it) {
94 auto tmp =
static_cast<double>(*it);
104template <
typename TYPE> std::vector<double>
getWeightedZscore(
const vector<TYPE> &data,
const vector<TYPE> &weights) {
105 std::vector<double> Zscore;
106 if (data.size() < 3) {
107 Zscore.resize(data.size(), 0.);
112 Zscore.resize(data.size(), 0.);
115 double sumWeights = 0.0;
116 double sumWeightedData = 0.0;
117 double weightedVariance = 0.0;
118 for (
size_t it = 0; it != data.size(); ++it) {
119 sumWeights +=
static_cast<double>(weights[it]);
120 sumWeightedData +=
static_cast<double>(weights[it] * data[it]);
122 double weightedMean = sumWeightedData / sumWeights;
123 for (
size_t it = 0; it != data.size(); ++it) {
124 weightedVariance += std::pow(
static_cast<double>(data[it]) - weightedMean, 2) *
125 std::pow(
static_cast<double>(weights[it]) / sumWeights, 2);
127 for (
auto it = data.cbegin(); it != data.cend(); ++it) {
128 Zscore.emplace_back(
fabs((
static_cast<double>(*it) - weightedMean) / std::sqrt(weightedVariance)));
138 if (data.size() < 3) {
139 std::vector<double> Zscore(data.size(), 0.);
142 std::vector<double> MADvec;
145 for (
auto it = data.cbegin(); it != data.cend(); ++it) {
146 tmp =
static_cast<double>(*it);
147 MADvec.emplace_back(
fabs(
tmp - median));
151 std::vector<double> Zscore(data.size(), 0.);
155 std::vector<double> Zscore;
156 for (
auto it = data.begin(); it != data.end(); ++it) {
157 tmp =
static_cast<double>(*it);
158 Zscore.emplace_back(0.6745 *
fabs((
tmp - median) / MAD));
177 using namespace boost::accumulators;
178 accumulator_set<double, stats<tag::min, tag::max, tag::variance>> acc;
179 for (
auto &
value : data) {
180 acc(
static_cast<double>(
value));
184 statistics.
mean = mean(acc);
185 double var = variance(acc);
188 auto ndofs =
static_cast<double>(data.size());
189 var *= ndofs / (ndofs - 1.0);
194 using namespace boost::accumulators;
195 accumulator_set<double, stats<tag::mean>> acc;
196 for (
auto &
value : data) {
197 acc(
static_cast<double>(
value));
199 statistics.
mean = mean(acc);
231Rfactor getRFactor(
const std::vector<double> &obsI,
const std::vector<double> &calI,
const std::vector<double> &obsE) {
233 if (obsI.size() != calI.size() || obsI.size() != obsE.size()) {
234 std::stringstream errss;
235 errss <<
"GetRFactor() Input Error! Observed Intensity (" << obsI.size() <<
"), Calculated Intensity ("
236 << calI.size() <<
") and Observed Error (" << obsE.size() <<
") have different number of elements.";
237 throw std::runtime_error(errss.str());
240 throw std::runtime_error(
"getRFactor(): the input arrays are empty.");
246 double sumrpdenom = 0;
248 size_t numpts = obsI.size();
249 for (
size_t i = 0; i < numpts; ++i) {
250 double cal_i = calI[i];
251 double obs_i = obsI[i];
252 double sigma = obsE[i];
254 double diff = obs_i - cal_i;
256 if (weight == weight && weight <= DBL_MAX) {
258 sumrpnom +=
fabs(diff);
259 sumrpdenom +=
fabs(obs_i);
261 double tempnom = weight * diff * diff;
262 double tempden = weight * obs_i * obs_i;
267 if (tempnom != tempnom || tempden != tempden) {
268 logger.error() <<
"***** Error! ****** Data indexed " << i <<
" is NaN. "
269 <<
"i = " << i <<
": cal = " << calI[i] <<
", obs = " << obs_i <<
", weight = " << weight
276 rfactor.
Rp = (sumrpnom / sumrpdenom);
277 rfactor.
Rwp = std::sqrt(sumnom / sumdenom);
279 if (rfactor.
Rwp != rfactor.
Rwp)
280 logger.debug() <<
"Rwp is NaN. Denominator = " << sumnom <<
"; Nominator = " << sumdenom <<
". \n";
296template <
typename TYPE>
298 assertMomentIsValid(maxMoment);
301 bool isDensity(
x.size() ==
y.size());
304 if ((!isDensity) && (
x.size() !=
y.size() + 1)) {
305 std::stringstream msg;
306 msg <<
"length of x (" <<
x.size() <<
") and y (" <<
y.size() <<
")do not match";
307 throw std::out_of_range(msg.str());
311 std::vector<double> result(std::size_t(maxMoment + 1), 0.);
314 size_t numPoints =
y.size();
316 numPoints =
x.size() - 1;
322 for (
size_t j = 0; j < numPoints; ++j) {
324 const double xVal = .5 *
static_cast<double>(
x[j] +
x[j + 1]);
326 auto temp =
static_cast<double>(
y[j]);
328 const auto xDelta =
static_cast<double>(
x[j + 1] -
x[j]);
329 temp = .5 * (temp +
static_cast<double>(
y[j + 1])) * xDelta;
334 for (
size_t i = 1; i < result.size(); ++i) {
354template <
typename TYPE>
355std::vector<double>
getMomentsAboutMean(
const std::vector<TYPE> &
x,
const std::vector<TYPE> &
y,
const int maxMoment) {
356 assertMomentIsValid(maxMoment);
360 const double mean = momentsAboutOrigin[1];
363 std::vector<double> result(std::size_t(maxMoment + 1), 0.);
364 result[0] = momentsAboutOrigin[0];
371 bool isDensity(
x.size() ==
y.size());
374 size_t numPoints =
y.size();
376 numPoints =
x.size() - 1;
382 for (
size_t j = 0; j < numPoints; ++j) {
385 const double xVal = .5 *
static_cast<double>(
x[j] +
x[j + 1]) - mean;
390 const auto xDelta =
static_cast<double>(
x[j + 1] -
x[j]);
391 temp = xVal * .5 *
static_cast<double>(
y[j] +
y[j + 1]) * xDelta;
393 temp = xVal *
static_cast<double>(
y[j]);
398 for (
size_t i = 2; i < result.size(); ++i) {
409#define INSTANTIATE(TYPE) \
410 template MANTID_KERNEL_DLL Statistics getStatistics<TYPE>(const vector<TYPE> &, const unsigned int); \
411 template MANTID_KERNEL_DLL std::vector<double> getZscore<TYPE>(const vector<TYPE> &); \
412 template MANTID_KERNEL_DLL std::vector<double> getWeightedZscore<TYPE>(const vector<TYPE> &, const vector<TYPE> &); \
413 template MANTID_KERNEL_DLL std::vector<double> getModifiedZscore<TYPE>(const vector<TYPE> &); \
414 template MANTID_KERNEL_DLL std::vector<double> getMomentsAboutOrigin<TYPE>( \
415 const std::vector<TYPE> &x, const std::vector<TYPE> &y, const int maxMoment); \
416 template MANTID_KERNEL_DLL std::vector<double> getMomentsAboutMean<TYPE>( \
417 const std::vector<TYPE> &x, const std::vector<TYPE> &y, const int maxMoment);
double value
The value of the point.
#define INSTANTIATE(TYPE)
#define DLLExport
Definitions of the DLLImport compiler directives for MSVC.
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
std::vector< double > getModifiedZscore(const std::vector< TYPE > &data)
Return the modified Z score values for a dataset.
DLLExport Statistics getStatistics< string >(const vector< string > &data, const unsigned int flags)
Getting statistics of a string array should just give a bunch of NaNs.
Statistics getStatistics(const std::vector< TYPE > &data, const unsigned int flags=StatOptions::AllStats)
Return a statistics object for the given data set.
std::vector< double > getZscore(const std::vector< TYPE > &data)
Return the Z score values for a dataset.
std::vector< double > getMomentsAboutMean(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
std::vector< double > getWeightedZscore(const std::vector< TYPE > &data, const std::vector< TYPE > &weights)
There are enough special cases in determining the Z score where it useful to put it in a single funct...
double getMedian(const vector< TYPE > &data)
There are enough special cases in determining the median where it useful to put it in a single functi...
DLLExport Statistics getStatistics< bool >(const vector< bool > &data, const unsigned int flags)
Getting statistics of a boolean array should just give a bunch of NaNs.
std::vector< double > getMomentsAboutOrigin(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
Rfactor MANTID_KERNEL_DLL getRFactor(const std::vector< double > &obsI, const std::vector< double > &calI, const std::vector< double > &obsE)
Return the R-factors (Rwp) of a diffraction pattern data.
R factor for powder data analysis.
Simple struct to store statistics.
double median
Median value.
double minimum
Minimum value.
double maximum
Maximum value.
double standard_deviation
standard_deviation of the values
Statistics()
Default value for everything is nan.