Mantid
Loading...
Searching...
No Matches
Statistics.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
12
13#include <boost/python/class.hpp>
14#include <boost/python/def.hpp>
15#include <boost/python/overloads.hpp>
16#include <boost/python/return_value_policy.hpp>
17#include <boost/python/scope.hpp>
18
19// See
20// http://docs.scipy.org/doc/numpy/reference/c-api.array.html#PY_ARRAY_UNIQUE_SYMBOL
21#define PY_ARRAY_UNIQUE_SYMBOL KERNEL_ARRAY_API
22#define NO_IMPORT_ARRAY
23#include <numpy/arrayobject.h>
24
26using namespace Mantid::PythonInterface;
27using namespace boost::python;
28
29namespace {
31
32// Dummy class used to define Stats "namespace" in python
33class Stats {};
34
35// For all methods below we have to extract specific types from Python to C++.
36// We choose to support only Python float arrays (C++ double)
37
44bool isFloatArray(PyObject *obj) {
45#if NPY_API_VERSION >= 0x00000007 // 1.7
46 return PyArray_ISFLOAT((const PyArrayObject *)obj);
47#else
48 return PyArray_ISFLOAT((PyArrayObject *)obj);
49#endif
50}
51
60bool typesEqual(PyObject *first, PyObject *second) {
61#if NPY_API_VERSION >= 0x00000007 // 1.7
62 const auto *firstArray = reinterpret_cast<const PyArrayObject *>(first);
63 const auto *secondArray = reinterpret_cast<const PyArrayObject *>(second);
64#else
65 PyArrayObject *firstArray = (PyArrayObject *)first;
66 PyArrayObject *secondArray = (PyArrayObject *)second;
67#endif
68 return PyArray_TYPE(firstArray) != PyArray_TYPE(secondArray);
69}
70
72class UnknownDataType : public std::invalid_argument {
73public:
74 UnknownDataType()
75 : std::invalid_argument("Unknown datatype. Currently only arrays of "
76 "Python floats are supported ") {}
77};
78
79//============================ getStatistics
80//============================================
81
88Statistics getStatisticsNumpy(const NDArray &data, const bool sorted = false) {
92
93 if (isFloatArray(data.ptr())) {
94 unsigned int flags = StatOptions::AllStats;
95 if (sorted)
97 return getStatistics(NDArrayToVector<double>(data)(), flags);
98 } else {
99 throw UnknownDataType();
100 }
101}
102
103GNU_DIAG_OFF("unused-local-typedef")
104// Ignore -Wconversion warnings coming from boost::python
105// Seen with GCC 7.1.1 and Boost 1.63.0
106GNU_DIAG_OFF("conversion")
107
108// Define an overload to handle the default argument
109BOOST_PYTHON_FUNCTION_OVERLOADS(getStatisticsOverloads, getStatisticsNumpy, 1, 2)
110GNU_DIAG_ON("conversion")
111GNU_DIAG_ON("unused-local-typedef")
112//============================ Z score
113//============================================
114
115
120std::vector<double> getZscoreNumpy(const NDArray &data) {
123
124 if (isFloatArray(data.ptr())) {
125 return getZscore(NDArrayToVector<double>(data)());
126 } else {
127 throw UnknownDataType();
128 }
129}
130
136std::vector<double> getZscoreNumpyDeprecated(const NDArray &data, const bool sorted) {
137 UNUSED_ARG(sorted);
138 PyErr_Warn(PyExc_DeprecationWarning, "getZScore no longer requires the second sorted argument.");
139 return getZscoreNumpy(data);
140}
141
146std::vector<double> getModifiedZscoreNumpy(const NDArray &data, const bool sorted = false) {
147 UNUSED_ARG(sorted) // We explicitly check in the kernel now
150
151 if (isFloatArray(data.ptr())) {
152 return getModifiedZscore(NDArrayToVector<double>(data)());
153 } else {
154 throw UnknownDataType();
155 }
156}
157
158GNU_DIAG_OFF("unused-local-typedef")
159// Ignore -Wconversion warnings coming from boost::python
160// Seen with GCC 7.1.1 and Boost 1.63.0
161GNU_DIAG_OFF("conversion")
162
163// Define an overload to handle the default argument
164BOOST_PYTHON_FUNCTION_OVERLOADS(getModifiedZscoreOverloads, getModifiedZscoreNumpy, 1, 2)
165GNU_DIAG_ON("conversion")
166GNU_DIAG_ON("unused-local-typedef")
167
168//============================ getMoments
169//============================================
170
171// Function pointer to real implementation of getMoments
172using MomentsFunction = std::vector<double> (*)(const std::vector<double> &, const std::vector<double> &, const int);
173
185std::vector<double> getMomentsNumpyImpl(MomentsFunction momentsFunc, const NDArray &indep, const NDArray &depend,
186 const int maxMoment) {
188
189 // Both input arrays must have the same typed data
190 if (typesEqual(indep.ptr(), depend.ptr())) {
191 throw std::invalid_argument("Datatypes of input arrays must match.");
192 }
193
194 if (isFloatArray(indep.ptr()) && isFloatArray(indep.ptr())) {
195 return momentsFunc(NDArrayToVector<double>(indep)(), NDArrayToVector<double>(depend)(), maxMoment);
196 } else {
197 throw UnknownDataType();
198 }
199}
200
205std::vector<double> getMomentsAboutOriginNumpy(const NDArray &indep, const NDArray &depend, const int maxMoment = 3) {
207 return getMomentsNumpyImpl(&getMomentsAboutOrigin, indep, depend, maxMoment);
208}
209
210GNU_DIAG_OFF("unused-local-typedef")
211// Ignore -Wconversion warnings coming from boost::python
212// Seen with GCC 7.1.1 and Boost 1.63.0
213GNU_DIAG_OFF("conversion")
214// Define an overload to handle the default argument
215BOOST_PYTHON_FUNCTION_OVERLOADS(getMomentsAboutOriginOverloads, getMomentsAboutOriginNumpy, 2, 3)
216GNU_DIAG_ON("conversion")
217GNU_DIAG_ON("unused-local-typedef")
222std::vector<double> getMomentsAboutMeanNumpy(const NDArray &indep, NDArray &depend, const int maxMoment = 3) {
224 return getMomentsNumpyImpl(&getMomentsAboutMean, indep, depend, maxMoment);
225}
226
227GNU_DIAG_OFF("unused-local-typedef")
228// Ignore -Wconversion warnings coming from boost::python
229// Seen with GCC 7.1.1 and Boost 1.63.0
230GNU_DIAG_OFF("conversion")
231// Define an overload to handle the default argument
232BOOST_PYTHON_FUNCTION_OVERLOADS(getMomentsAboutMeanOverloads, getMomentsAboutMeanNumpy, 2, 3)
233GNU_DIAG_ON("conversion")
234GNU_DIAG_ON("unused-local-typedef")
235
236
237} // namespace
238
239// -------------------------------------- Exports start here
240// --------------------------------------
241
243 // typedef std::vector --> numpy array result converter
244 using ReturnNumpyArray = return_value_policy<Policies::VectorToNumpy>;
245
246 // define a new "Statistics" scope so that everything is called as
247 // Statistics.getXXX
248 // this affects everything defined within the lifetime of the scope object
249 scope stats =
250 class_<Stats>("Stats", no_init)
251 .def("getStatistics", &getStatisticsNumpy,
252 getStatisticsOverloads((arg("data"), arg("sorted")), "Determine the statistics for an array of data"))
253 .staticmethod("getStatistics")
254
255 .def("getZscore", &getZscoreNumpy, arg("data"), "Determine the Z score for an array of data")
256 .def("getZscore", &getZscoreNumpyDeprecated, (arg("data"), arg("sorted")),
257 "Determine the Z score for an array of "
258 "data (deprecated + ignored sorted argument)")
259 .staticmethod("getZscore")
260
261 .def("getModifiedZscore", &getModifiedZscoreNumpy,
262 getModifiedZscoreOverloads((arg("data"), arg("sorted")),
263 "Determine the modified Z score for an array of data"))
264 .staticmethod("getModifiedZscore")
265
266 .def("getMomentsAboutOrigin", &getMomentsAboutOriginNumpy,
267 getMomentsAboutOriginOverloads(
268 (arg("indep"), arg("depend"), arg("maxMoment")),
269 "Calculate the first n-moments (inclusive) about the origin")[ReturnNumpyArray()])
270 .staticmethod("getMomentsAboutOrigin")
271
272 .def("getMomentsAboutMean", &getMomentsAboutMeanNumpy,
273 getMomentsAboutMeanOverloads(
274 (arg("indep"), arg("depend"), arg("maxMoment")),
275 "Calculate the first n-moments (inclusive) about the mean")[ReturnNumpyArray()])
276 .staticmethod("getMomentsAboutMean");
277
278 // Want this in the same scope as above so must be here
279 class_<Statistics>("Statistics")
280 .add_property("minimum", &Statistics::minimum, "Minimum value of the data set")
281 .add_property("maximum", &Statistics::maximum, "Maximum value of the data set")
282 .add_property("mean", &Statistics::mean, "Simple mean, sum(data)/nvalues, of the data set")
283 .add_property("median", &Statistics::median, "Middle value of the data set")
284 .add_property("standard_deviation", &Statistics::standard_deviation, "Standard width of distribution");
285}
tagPyArrayObject PyArrayObject
void export_Statistics()
Definition: Statistics.cpp:242
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
Definition: System.h:64
double obj
the value of the quadratic function
#define GNU_DIAG_ON(x)
#define GNU_DIAG_OFF(x)
This is a collection of macros for turning compiler warnings off in a controlled manner.
Thin object wrapper around a numpy array.
Definition: NDArray.h:31
std::vector< double > getModifiedZscore(const std::vector< TYPE > &data)
Return the modified Z score values for a dataset.
Definition: Statistics.cpp:135
Statistics getStatistics(const std::vector< TYPE > &data, const unsigned int flags=StatOptions::AllStats)
Return a statistics object for the given data set.
Definition: Statistics.cpp:167
std::vector< double > getZscore(const std::vector< TYPE > &data)
Return the Z score values for a dataset.
Definition: Statistics.cpp:81
std::vector< double > getMomentsAboutMean(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
Definition: Statistics.cpp:351
std::vector< double > getMomentsAboutOrigin(const std::vector< TYPE > &x, const std::vector< TYPE > &y, const int maxMoment=3)
Return the first n-moments of the supplied data.
Definition: Statistics.cpp:295
Controls the computation of statisical data.
Definition: Statistics.h:39
Simple struct to store statistics.
Definition: Statistics.h:25
double mean
Mean value.
Definition: Statistics.h:31
double median
Median value.
Definition: Statistics.h:33
double minimum
Minimum value.
Definition: Statistics.h:27
double maximum
Maximum value.
Definition: Statistics.h:29
double standard_deviation
standard_deviation of the values
Definition: Statistics.h:35
Converter taking an input numpy array and converting it to a std::vector.