11#include <boost/accumulators/accumulators.hpp>
12#include <boost/accumulators/statistics/max.hpp>
13#include <boost/accumulators/statistics/min.hpp>
14#include <boost/accumulators/statistics/stats.hpp>
15#include <boost/accumulators/statistics/variance.hpp>
24Logger logger(
"Statistics");
35 double nan = std::numeric_limits<double>::quiet_NaN();
51template <
typename TYPE>
double getMedian(
const vector<TYPE> &data) {
52 const size_t size = data.size();
54 return static_cast<double>(data[0]);
56 const bool isSorted = std::is_sorted(data.cbegin(), data.cend());
57 std::vector<TYPE> tmpSortedData;
58 auto sortedDataRef = std::ref(data);
61 std::sort(tmpSortedData.begin(), tmpSortedData.end());
62 sortedDataRef = std::ref(std::as_const(tmpSortedData));
65 const bool is_even = (size % 2 == 0);
68 const auto left =
static_cast<double>(sortedDataRef.get()[size / 2 - 1]);
69 const auto right =
static_cast<double>(sortedDataRef.get()[size / 2]);
72 retVal =
static_cast<double>(sortedDataRef.get()[size / 2]);
81template <
typename TYPE> std::vector<double>
getZscore(
const vector<TYPE> &data) {
82 std::vector<double> Zscore;
83 if (data.size() < 3) {
84 Zscore.resize(data.size(), 0.);
89 Zscore.resize(data.size(), 0.);
92 for (
auto it = data.cbegin(); it != data.cend(); ++it) {
93 auto tmp =
static_cast<double>(*it);
102template <
typename TYPE> std::vector<double>
getWeightedZscore(
const vector<TYPE> &data,
const vector<TYPE> &weights) {
103 std::vector<double> Zscore;
104 if (data.size() < 3) {
105 Zscore.resize(data.size(), 0.);
110 Zscore.resize(data.size(), 0.);
113 double sumWeights = 0.0;
114 double sumWeightedData = 0.0;
115 double weightedVariance = 0.0;
116 for (
size_t it = 0; it != data.size(); ++it) {
117 sumWeights +=
static_cast<double>(weights[it]);
118 sumWeightedData +=
static_cast<double>(weights[it] * data[it]);
120 double weightedMean = sumWeightedData / sumWeights;
121 for (
size_t it = 0; it != data.size(); ++it) {
122 weightedVariance += std::pow(
static_cast<double>(data[it]) - weightedMean, 2) *
123 std::pow(
static_cast<double>(weights[it]) / sumWeights, 2);
125 for (
auto it = data.cbegin(); it != data.cend(); ++it) {
126 Zscore.emplace_back(
fabs((
static_cast<double>(*it) - weightedMean) / std::sqrt(weightedVariance)));
136 if (data.size() < 3) {
137 std::vector<double> Zscore(data.size(), 0.);
140 std::vector<double> MADvec;
143 for (
auto it = data.cbegin(); it != data.cend(); ++it) {
144 tmp =
static_cast<double>(*it);
145 MADvec.emplace_back(
fabs(
tmp - median));
149 std::vector<double> Zscore(data.size(), 0.);
153 std::vector<double> Zscore;
154 for (
auto it = data.begin(); it != data.end(); ++it) {
155 tmp =
static_cast<double>(*it);
156 Zscore.emplace_back(0.6745 *
fabs((
tmp - median) / MAD));
175 using namespace boost::accumulators;
176 accumulator_set<double, stats<tag::min, tag::max, tag::variance>> acc;
177 for (
auto &
value : data) {
178 acc(
static_cast<double>(
value));
182 statistics.
mean = mean(acc);
183 double var = variance(acc);
186 auto ndofs =
static_cast<double>(data.size());
187 var *= ndofs / (ndofs - 1.0);
192 using namespace boost::accumulators;
193 accumulator_set<double, stats<tag::mean>> acc;
194 for (
auto &
value : data) {
195 acc(
static_cast<double>(
value));
197 statistics.
mean = mean(acc);
229Rfactor getRFactor(
const std::vector<double> &obsI,
const std::vector<double> &calI,
const std::vector<double> &obsE) {
231 if (obsI.size() != calI.size() || obsI.size() != obsE.size()) {
232 std::stringstream errss;
233 errss <<
"GetRFactor() Input Error! Observed Intensity (" << obsI.size() <<
"), Calculated Intensity ("
234 << calI.size() <<
") and Observed Error (" << obsE.size() <<
") have different number of elements.";
235 throw std::runtime_error(errss.str());
238 throw std::runtime_error(
"getRFactor(): the input arrays are empty.");
244 double sumrpdenom = 0;
246 size_t numpts = obsI.size();
247 for (
size_t i = 0; i < numpts; ++i) {
248 double cal_i = calI[i];
249 double obs_i = obsI[i];
250 double sigma = obsE[i];
252 double diff = obs_i - cal_i;
254 if (weight == weight && weight <= DBL_MAX) {
256 sumrpnom +=
fabs(diff);
257 sumrpdenom +=
fabs(obs_i);
259 double tempnom = weight * diff * diff;
260 double tempden = weight * obs_i * obs_i;
265 if (tempnom != tempnom || tempden != tempden) {
266 logger.error() <<
"***** Error! ****** Data indexed " << i <<
" is NaN. "
267 <<
"i = " << i <<
": cal = " << calI[i] <<
", obs = " << obs_i <<
", weight = " << weight
274 rfactor.
Rp = (sumrpnom / sumrpdenom);
275 rfactor.
Rwp = std::sqrt(sumnom / sumdenom);
277 if (rfactor.
Rwp != rfactor.
Rwp)
278 logger.debug() <<
"Rwp is NaN. Denominator = " << sumnom <<
"; Nominator = " << sumdenom <<
". \n";
294template <
typename TYPE>
297 bool isDensity(
x.size() ==
y.size());
300 if ((!isDensity) && (
x.size() !=
y.size() + 1)) {
301 std::stringstream msg;
302 msg <<
"length of x (" <<
x.size() <<
") and y (" <<
y.size() <<
")do not match";
303 throw std::out_of_range(msg.str());
307 std::vector<double> result(maxMoment + 1, 0.);
310 size_t numPoints =
y.size();
312 numPoints =
x.size() - 1;
318 for (
size_t j = 0; j < numPoints; ++j) {
320 const double xVal = .5 *
static_cast<double>(
x[j] +
x[j + 1]);
322 auto temp =
static_cast<double>(
y[j]);
324 const auto xDelta =
static_cast<double>(
x[j + 1] -
x[j]);
325 temp = .5 * (temp +
static_cast<double>(
y[j + 1])) * xDelta;
330 for (
size_t i = 1; i < result.size(); ++i) {
350template <
typename TYPE>
351std::vector<double>
getMomentsAboutMean(
const std::vector<TYPE> &
x,
const std::vector<TYPE> &
y,
const int maxMoment) {
354 const double mean = momentsAboutOrigin[1];
357 std::vector<double> result(maxMoment + 1, 0.);
358 result[0] = momentsAboutOrigin[0];
365 bool isDensity(
x.size() ==
y.size());
368 size_t numPoints =
y.size();
370 numPoints =
x.size() - 1;
376 for (
size_t j = 0; j < numPoints; ++j) {
379 const double xVal = .5 *
static_cast<double>(
x[j] +
x[j + 1]) - mean;
384 const auto xDelta =
static_cast<double>(
x[j + 1] -
x[j]);
385 temp = xVal * .5 *
static_cast<double>(
y[j] +
y[j + 1]) * xDelta;
387 temp = xVal *
static_cast<double>(
y[j]);
392 for (
size_t i = 2; i < result.size(); ++i) {
403#define INSTANTIATE(TYPE) \
404 template MANTID_KERNEL_DLL Statistics getStatistics<TYPE>(const vector<TYPE> &, const unsigned int); \
405 template MANTID_KERNEL_DLL std::vector<double> getZscore<TYPE>(const vector<TYPE> &); \
406 template MANTID_KERNEL_DLL std::vector<double> getWeightedZscore<TYPE>(const vector<TYPE> &, const vector<TYPE> &); \
407 template MANTID_KERNEL_DLL std::vector<double> getModifiedZscore<TYPE>(const vector<TYPE> &); \
408 template MANTID_KERNEL_DLL std::vector<double> getMomentsAboutOrigin<TYPE>( \
409 const std::vector<TYPE> &x, const std::vector<TYPE> &y, const int maxMoment); \
410 template MANTID_KERNEL_DLL std::vector<double> getMomentsAboutMean<TYPE>( \
411 const std::vector<TYPE> &x, const std::vector<TYPE> &y, const int maxMoment);
double value
The value of the point.
#define INSTANTIATE(TYPE)
#define DLLExport
Definitions of the DLLImport compiler directives for MSVC.
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
std::vector< double > getModifiedZscore(const std::vector< TYPE > &data)
Return the modified Z score values for a dataset.
DLLExport Statistics getStatistics< string >(const vector< string > &data, const unsigned int flags)
Getting statistics of a string array should just give a bunch of NaNs.
Statistics getStatistics(const std::vector< TYPE > &data, const unsigned int flags=StatOptions::AllStats)
Return a statistics object for the given data set.
std::vector< double > getZscore(const std::vector< TYPE > &data)
Return the Z score values for a dataset.
std::vector< double > getMomentsAboutMean(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
std::vector< double > getWeightedZscore(const std::vector< TYPE > &data, const std::vector< TYPE > &weights)
There are enough special cases in determining the Z score where it useful to put it in a single funct...
double getMedian(const vector< TYPE > &data)
There are enough special cases in determining the median where it useful to put it in a single functi...
DLLExport Statistics getStatistics< bool >(const vector< bool > &data, const unsigned int flags)
Getting statistics of a boolean array should just give a bunch of NaNs.
std::vector< double > getMomentsAboutOrigin(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
Statistics getNanStatistics()
Generate a Statistics object where all of the values are NaN.
Rfactor MANTID_KERNEL_DLL getRFactor(const std::vector< double > &obsI, const std::vector< double > &calI, const std::vector< double > &obsE)
Return the R-factors (Rwp) of a diffraction pattern data.
R factor for powder data analysis.
Simple struct to store statistics.
double median
Median value.
double minimum
Minimum value.
double maximum
Maximum value.
double standard_deviation
standard_deviation of the values