Mantid
Loading...
Searching...
No Matches
Statistics.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
12
13#include <boost/python/class.hpp>
14#include <boost/python/def.hpp>
15#include <boost/python/overloads.hpp>
16#include <boost/python/return_value_policy.hpp>
17#include <boost/python/scope.hpp>
18
19// See
20// http://docs.scipy.org/doc/numpy/reference/c-api.array.html#PY_ARRAY_UNIQUE_SYMBOL
21#define PY_ARRAY_UNIQUE_SYMBOL KERNEL_ARRAY_API
22#define NO_IMPORT_ARRAY
23#include <numpy/arrayobject.h>
24
26using namespace Mantid::PythonInterface;
27using namespace boost::python;
28
29namespace {
31
32// Dummy class used to define Stats "namespace" in python
33class Stats {};
34
35// For all methods below we have to extract specific types from Python to C++.
36// We choose to support only Python float arrays (C++ double)
37
44bool isFloatArray(PyObject *obj) {
45#if NPY_API_VERSION >= 0x00000007 // 1.7
46 return PyArray_ISFLOAT((const PyArrayObject *)obj);
47#else
48 return PyArray_ISFLOAT((PyArrayObject *)obj);
49#endif
50}
51
60bool typesEqual(PyObject *first, PyObject *second) {
61#if NPY_API_VERSION >= 0x00000007 // 1.7
62 const auto *firstArray = reinterpret_cast<const PyArrayObject *>(first);
63 const auto *secondArray = reinterpret_cast<const PyArrayObject *>(second);
64#else
65 PyArrayObject *firstArray = (PyArrayObject *)first;
66 PyArrayObject *secondArray = (PyArrayObject *)second;
67#endif
68 return PyArray_TYPE(firstArray) != PyArray_TYPE(secondArray);
69}
70
72class UnknownDataType : public std::invalid_argument {
73public:
74 UnknownDataType()
75 : std::invalid_argument("Unknown datatype. Currently only arrays of "
76 "Python floats are supported ") {}
77};
78
79//============================ getStatistics
80//============================================
81
88Statistics getStatisticsNumpy(const NDArray &data, const bool sorted = false) {
92
93 if (isFloatArray(data.ptr())) {
94 unsigned int flags = StatOptions::AllStats;
95 if (sorted)
97 return getStatistics(NDArrayToVector<double>(data)(), flags);
98 } else {
99 throw UnknownDataType();
100 }
101}
102
103GNU_DIAG_OFF("unused-local-typedef")
104// Ignore -Wconversion warnings coming from boost::python
105// Seen with GCC 7.1.1 and Boost 1.63.0
106GNU_DIAG_OFF("conversion")
107
108// Define an overload to handle the default argument
109// cppcheck-suppress unknownMacro
110BOOST_PYTHON_FUNCTION_OVERLOADS(getStatisticsOverloads, getStatisticsNumpy, 1, 2)
111GNU_DIAG_ON("conversion")
112GNU_DIAG_ON("unused-local-typedef")
113//============================ Z score
114//============================================
115
116
121std::vector<double> getZscoreNumpy(const NDArray &data) {
124
125 if (isFloatArray(data.ptr())) {
126 return getZscore(NDArrayToVector<double>(data)());
127 } else {
128 throw UnknownDataType();
129 }
130}
131
137std::vector<double> getZscoreNumpyDeprecated(const NDArray &data, const bool sorted) {
138 UNUSED_ARG(sorted);
139 PyErr_Warn(PyExc_DeprecationWarning, "getZScore no longer requires the second sorted argument.");
140 return getZscoreNumpy(data);
141}
142
147std::vector<double> getModifiedZscoreNumpy(const NDArray &data, const bool sorted = false) {
148 UNUSED_ARG(sorted) // We explicitly check in the kernel now
151
152 if (isFloatArray(data.ptr())) {
153 return getModifiedZscore(NDArrayToVector<double>(data)());
154 } else {
155 throw UnknownDataType();
156 }
157}
158
159GNU_DIAG_OFF("unused-local-typedef")
160// Ignore -Wconversion warnings coming from boost::python
161// Seen with GCC 7.1.1 and Boost 1.63.0
162GNU_DIAG_OFF("conversion")
163
164// Define an overload to handle the default argument
165BOOST_PYTHON_FUNCTION_OVERLOADS(getModifiedZscoreOverloads, getModifiedZscoreNumpy, 1, 2)
166GNU_DIAG_ON("conversion")
167GNU_DIAG_ON("unused-local-typedef")
168
169//============================ getMoments
170//============================================
171
172// Function pointer to real implementation of getMoments
173using MomentsFunction = std::vector<double> (*)(const std::vector<double> &, const std::vector<double> &, const int);
174
186std::vector<double> getMomentsNumpyImpl(MomentsFunction momentsFunc, const NDArray &indep, const NDArray &depend,
187 const int maxMoment) {
189
190 // Both input arrays must have the same typed data
191 if (typesEqual(indep.ptr(), depend.ptr())) {
192 throw std::invalid_argument("Datatypes of input arrays must match.");
193 }
194
195 if (isFloatArray(indep.ptr()) && isFloatArray(indep.ptr())) {
196 return momentsFunc(NDArrayToVector<double>(indep)(), NDArrayToVector<double>(depend)(), maxMoment);
197 } else {
198 throw UnknownDataType();
199 }
200}
201
206std::vector<double> getMomentsAboutOriginNumpy(const NDArray &indep, const NDArray &depend, const int maxMoment = 3) {
208 return getMomentsNumpyImpl(&getMomentsAboutOrigin, indep, depend, maxMoment);
209}
210
211GNU_DIAG_OFF("unused-local-typedef")
212// Ignore -Wconversion warnings coming from boost::python
213// Seen with GCC 7.1.1 and Boost 1.63.0
214GNU_DIAG_OFF("conversion")
215// Define an overload to handle the default argument
216BOOST_PYTHON_FUNCTION_OVERLOADS(getMomentsAboutOriginOverloads, getMomentsAboutOriginNumpy, 2, 3)
217GNU_DIAG_ON("conversion")
218GNU_DIAG_ON("unused-local-typedef")
223std::vector<double> getMomentsAboutMeanNumpy(const NDArray &indep, NDArray &depend, const int maxMoment = 3) {
225 return getMomentsNumpyImpl(&getMomentsAboutMean, indep, depend, maxMoment);
226}
227
228GNU_DIAG_OFF("unused-local-typedef")
229// Ignore -Wconversion warnings coming from boost::python
230// Seen with GCC 7.1.1 and Boost 1.63.0
231GNU_DIAG_OFF("conversion")
232// Define an overload to handle the default argument
233BOOST_PYTHON_FUNCTION_OVERLOADS(getMomentsAboutMeanOverloads, getMomentsAboutMeanNumpy, 2, 3)
234GNU_DIAG_ON("conversion")
235GNU_DIAG_ON("unused-local-typedef")
236
237
238} // namespace
239
240// -------------------------------------- Exports start here
241// --------------------------------------
242
244 // typedef std::vector --> numpy array result converter
245 using ReturnNumpyArray = return_value_policy<Policies::VectorToNumpy>;
246
247 // define a new "Statistics" scope so that everything is called as
248 // Statistics.getXXX
249 // this affects everything defined within the lifetime of the scope object
250 scope stats =
251 class_<Stats>("Stats", no_init)
252 .def("getStatistics", &getStatisticsNumpy,
253 getStatisticsOverloads((arg("data"), arg("sorted")), "Determine the statistics for an array of data"))
254 .staticmethod("getStatistics")
255
256 .def("getZscore", &getZscoreNumpy, arg("data"), "Determine the Z score for an array of data")
257 .def("getZscore", &getZscoreNumpyDeprecated, (arg("data"), arg("sorted")),
258 "Determine the Z score for an array of "
259 "data (deprecated + ignored sorted argument)")
260 .staticmethod("getZscore")
261
262 .def("getModifiedZscore", &getModifiedZscoreNumpy,
263 getModifiedZscoreOverloads((arg("data"), arg("sorted")),
264 "Determine the modified Z score for an array of data"))
265 .staticmethod("getModifiedZscore")
266
267 .def("getMomentsAboutOrigin", &getMomentsAboutOriginNumpy,
268 getMomentsAboutOriginOverloads(
269 (arg("indep"), arg("depend"), arg("maxMoment")),
270 "Calculate the first n-moments (inclusive) about the origin")[ReturnNumpyArray()])
271 .staticmethod("getMomentsAboutOrigin")
272
273 .def("getMomentsAboutMean", &getMomentsAboutMeanNumpy,
274 getMomentsAboutMeanOverloads(
275 (arg("indep"), arg("depend"), arg("maxMoment")),
276 "Calculate the first n-moments (inclusive) about the mean")[ReturnNumpyArray()])
277 .staticmethod("getMomentsAboutMean");
278
279 // Want this in the same scope as above so must be here
280 class_<Statistics>("Statistics")
281 .add_property("minimum", &Statistics::minimum, "Minimum value of the data set")
282 .add_property("maximum", &Statistics::maximum, "Maximum value of the data set")
283 .add_property("mean", &Statistics::mean, "Simple mean, sum(data)/nvalues, of the data set")
284 .add_property("median", &Statistics::median, "Middle value of the data set")
285 .add_property("standard_deviation", &Statistics::standard_deviation, "Standard width of distribution");
286}
tagPyArrayObject PyArrayObject
void export_Statistics()
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
Definition System.h:48
double obj
the value of the quadratic function
#define GNU_DIAG_ON(x)
#define GNU_DIAG_OFF(x)
This is a collection of macros for turning compiler warnings off in a controlled manner.
Thin object wrapper around a numpy array.
Definition NDArray.h:31
std::vector< double > getModifiedZscore(const std::vector< TYPE > &data)
Return the modified Z score values for a dataset.
Statistics getStatistics(const std::vector< TYPE > &data, const unsigned int flags=StatOptions::AllStats)
Return a statistics object for the given data set.
std::vector< double > getZscore(const std::vector< TYPE > &data)
Return the Z score values for a dataset.
std::vector< double > getMomentsAboutMean(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
std::vector< double > getMomentsAboutOrigin(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
Controls the computation of statisical data.
Definition Statistics.h:52
Simple struct to store statistics.
Definition Statistics.h:35
double mean
Mean value.
Definition Statistics.h:41
double median
Median value.
Definition Statistics.h:43
double minimum
Minimum value.
Definition Statistics.h:37
double maximum
Maximum value.
Definition Statistics.h:39
double standard_deviation
standard_deviation of the values
Definition Statistics.h:45
Converter taking an input numpy array and converting it to a std::vector.