Mantid
Loading...
Searching...
No Matches
LoadRKH.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7//---------------------------------------------------
8// Includes
9//---------------------------------------------------
22
23#include <boost/algorithm/string.hpp>
24#include <boost/date_time/date_parsing.hpp>
25#include <boost/date_time/gregorian/gregorian.hpp>
26#include <boost/lexical_cast.hpp>
27
28#include <boost/regex.hpp>
29#include <istream>
30#include <numeric>
31
32namespace {
33// Check if we are dealing with a unit line
34bool isUnit(const Mantid::Kernel::StringTokenizer &codes) {
35 // The unit line needs to have the format of
36 // 1. Either 0 or 6 [06]
37 // 2. Then several characters [\w]
38 // 3. Open bracket
39 // 4. Several characters
40 // 5. Close bracket
41 std::string input = std::accumulate(codes.begin(), codes.end(), std::string(""));
42 std::string reg(R"(^[06][\w]+\‍([/ \w\^-]+\)$)");
43 boost::regex baseRegex(reg);
44 return boost::regex_match(input, baseRegex);
45}
46} // namespace
47
48namespace Mantid::DataHandling {
49using namespace Mantid::API;
50using namespace Mantid::Kernel;
51
53
54
67void LoadRKH::readLinesForRKH1D(std::istream &stream, int readStart, int readEnd, HistogramData::Points &x,
68 HistogramData::Counts &y, HistogramData::CountStandardDeviations &ye,
69 HistogramData::PointStandardDeviations &xe, Progress &prog, bool readXError) {
70
71 std::vector<double> xData;
72 std::vector<double> yData;
73 std::vector<double> xError;
74 std::vector<double> yError;
75
76 xData.reserve(readEnd);
77 yData.reserve(readEnd);
78 xError.reserve(readEnd);
79 yError.reserve(readEnd);
80
81 std::string fileline;
82 for (int index = 1; index <= readEnd; ++index) {
83 getline(stream, fileline);
84 if (index < readStart)
85 continue;
86
87 double xValue(0.), yValue(0.), yErrorValue(0.);
88 std::istringstream datastr(fileline);
89 datastr >> xValue >> yValue >> yErrorValue;
90
91 xData.emplace_back(xValue);
92 yData.emplace_back(yValue);
93 yError.emplace_back(yErrorValue);
94
95 // check if we need to read in x error values
96 if (readXError) {
97 double xErrorValue(0.);
98 datastr >> xErrorValue;
99 xError.emplace_back(xErrorValue);
100 }
101
102 prog.report();
103 }
104
105 x = xData;
106 y = yData;
107 ye = yError;
108 xe = xError;
109}
110
118 if (!descriptor.isAscii())
119 return 0;
120
121 auto &file = descriptor.data();
122 std::string fileline;
123
124 // Header looks something like this where the text inside [] could be anything
125 // LOQ Thu 28-OCT-2004 12:23 [W 26 INST_DIRECT_BEAM]
126
127 // -- First line --
128 std::getline(file, fileline);
129 // LOQ or SANS2D (case insensitive)
130 if (boost::ifind_first(fileline, "loq").empty() && boost::ifind_first(fileline, "sans2d").empty())
131 return 0;
132
133 // Next should be date time string
134 static const char *MONTHS[12] = {"-JAN-", "-FEB-", "-MAR-", "-APR-", "-MAY-", "-JUN-",
135 "-JUL-", "-AUG-", "-SEP-", "-OCT-", "-NOV-", "-DEC-"};
136
137 bool foundMonth(false);
138 for (auto &month : MONTHS) {
139 if (!boost::ifind_first(fileline, month).empty()) {
140 foundMonth = true;
141 break;
142 }
143 }
144 if (!foundMonth)
145 return 0;
146
147 // there are no constraints on the second line
148 std::getline(file, fileline);
149
150 // read 3rd line - should contain sequence "0 0 0 1"
151 std::getline(file, fileline);
152 if (fileline.find("0 0 0 1") == std::string::npos)
153 return 0;
154
155 // read 4th line - should contain sequence ""0 0 0 0"
156 std::getline(file, fileline);
157 if (fileline.find("0 0 0 0") == std::string::npos)
158 return 0;
159
160 // read 5th line - should contain sequence "3 (F12.5,2E16.6)"
161 std::getline(file, fileline);
162 if (fileline.find("3 (F12.5,2E16.6)") == std::string::npos)
163 return 0;
164
165 return 20; // Better than LoadAscii
166}
167
172 const std::vector<std::string> exts{".txt", ".q", ".dat"};
173 declareProperty(std::make_unique<API::FileProperty>("Filename", "", API::FileProperty::Load, exts),
174 "Name of the RKH file to load");
175 declareProperty(std::make_unique<API::WorkspaceProperty<>>("OutputWorkspace", "", Kernel::Direction::Output),
176 "The name to use for the output workspace");
177 // Get the units registered with the UnitFactory
178 std::vector<std::string> propOptions = Kernel::UnitFactory::Instance().getKeys();
179 m_unitKeys.insert(propOptions.begin(), propOptions.end());
180
181 // m_RKHKeys will be taken as axis(1) units, the first axis will have only one
182 // value
183 // and so selection of one of these units will result in a workspace
184 // orientated differently
185 // from selection of the above
186 m_RKHKeys.insert("SpectrumNumber");
187 propOptions.insert(propOptions.end(), m_RKHKeys.begin(), m_RKHKeys.end());
188
189 declareProperty("FirstColumnValue", "Wavelength", std::make_shared<Kernel::StringListValidator>(propOptions),
190 "Only used for 1D files, the units of the first column in the RKH "
191 "file (default Wavelength)");
192}
193
198 using namespace Mantid::Kernel;
199 using namespace Mantid::API;
200
201 // Retrieve filename and try to open the file
202 std::string filename = getPropertyValue("Filename");
203
204 m_fileIn.open(filename.c_str());
205 if (!m_fileIn) {
206 g_log.error("Unable to open file " + filename);
207 throw Exception::FileError("Unable to open File: ", filename);
208 }
209 g_log.information() << "Opened file \"" << filename << "\" for reading\n";
210
211 std::string line;
212 // The first line contains human readable information about the original
213 // workspace that we don't need
214 getline(m_fileIn, line);
215 getline(m_fileIn, line);
216
217 // Use one line of the file to diagnose if it is 1D or 2D, this line contains
218 // some data required by the 2D data reader
219 MatrixWorkspace_sptr result = is2D(line) ? read2D(line) : read1D();
220
221 // all RKH files contain distribution data
222 result->setDistribution(true);
223 // Set the output workspace
224 setProperty("OutputWorkspace", result);
225}
226
232bool LoadRKH::is2D(const std::string &testLine) {
233 // split the line into words
236 return isUnit(codes);
237}
238
243 g_log.information() << "file appears to contain 1D information, reading in 1D data mode\n";
244
245 // The 3rd line contains information regarding the number of points in the
246 // file and
247 // start and end reading points
248 int totalPoints(0), readStart(0), readEnd(0), buried(0);
249 std::string fileline;
250
251 getline(m_fileIn, fileline);
252 std::istringstream is(fileline);
253 // Get data information
254 for (int counter = 1; counter < 8; ++counter) {
255 switch (counter) {
256 case 1:
257 is >> totalPoints;
258 break;
259 case 5:
260 is >> readStart;
261 break;
262 case 6:
263 is >> readEnd;
264 break;
265 default:
266 is >> buried;
267 break;
268 }
269 }
270
271 g_log.information() << "Total number of data points declared to be in the data file: " << totalPoints << "\n";
272
273 // What are we reading?
274 std::string firstColVal = getProperty("FirstColumnValue");
275 bool colIsUnit(true);
276 if (m_RKHKeys.find(firstColVal) != m_RKHKeys.end()) {
277 colIsUnit = false;
278 readStart = 1;
279 readEnd = totalPoints;
280 }
281
282 if (readStart < 1 || readEnd < 1 || readEnd < readStart || readStart > totalPoints || readEnd > totalPoints) {
283 g_log.error("Invalid data range specfied.");
284 m_fileIn.close();
285 throw std::invalid_argument("Invalid data range specfied.");
286 }
287
288 g_log.information() << "Reading started on data line: " << readStart << "\n";
289 g_log.information() << "Reading finished on data line: " << readEnd << "\n";
290
291 // The 4th and 5th line do not contain useful information either
293
294 int pointsToRead = readEnd - readStart + 1;
295 // Now stream sits at the first line of data
296 HistogramData::Points columnOne;
297 HistogramData::Counts ydata;
298 HistogramData::PointStandardDeviations xError;
299 HistogramData::CountStandardDeviations errdata;
300
301 auto hasXError = hasXerror(m_fileIn);
302
303 Progress prog(this, 0.0, 1.0, readEnd);
304
305 readLinesForRKH1D(m_fileIn, readStart, readEnd, columnOne, ydata, errdata, xError, prog, hasXError);
306
307 m_fileIn.close();
308
309 assert(pointsToRead == static_cast<int>(columnOne.size()));
310 assert(pointsToRead == static_cast<int>(ydata.size()));
311 assert(pointsToRead == static_cast<int>(errdata.size()));
312
313 if (hasXError) {
314 assert(pointsToRead == static_cast<int>(xError.size()));
315 }
316
317 if (colIsUnit) {
318 MatrixWorkspace_sptr localworkspace =
319 WorkspaceFactory::Instance().create("Workspace2D", 1, pointsToRead, pointsToRead);
320 localworkspace->getSpectrum(0).setDetectorID(static_cast<detid_t>(1));
321 localworkspace->getAxis(0)->unit() = UnitFactory::Instance().create(firstColVal);
322 localworkspace->setPoints(0, columnOne);
323 localworkspace->setCounts(0, ydata);
324 localworkspace->setCountStandardDeviations(0, errdata);
325 if (hasXError) {
326 localworkspace->setPointStandardDeviations(0, xError);
327 }
328 return localworkspace;
329 } else {
330 MatrixWorkspace_sptr localworkspace = WorkspaceFactory::Instance().create("Workspace2D", pointsToRead, 1, 1);
331 // Set the appropriate values
332 for (int index = 0; index < pointsToRead; ++index) {
333 localworkspace->getSpectrum(index).setSpectrumNo(static_cast<int>(columnOne[index]));
334 localworkspace->getSpectrum(index).setDetectorID(static_cast<detid_t>(index + 1));
335 localworkspace->dataY(index)[0] = ydata[index];
336 localworkspace->dataE(index)[0] = errdata[index];
337 }
338
339 if (hasXError) {
340 for (int index = 0; index < pointsToRead; ++index) {
341 localworkspace->setPointStandardDeviations(0, 1, xError[index]);
342 }
343 }
344 return localworkspace;
345 }
346}
355const MatrixWorkspace_sptr LoadRKH::read2D(const std::string &firstLine) {
356 g_log.information() << "file appears to contain 2D information, reading in 2D data mode\n";
357
358 MatrixWorkspace_sptr outWrksp;
359 MantidVec axis0Data;
360 Progress prog(read2DHeader(firstLine, outWrksp, axis0Data));
361 const size_t nAxis1Values = outWrksp->getNumberHistograms();
362
363 // set the X-values to the common bin values we read above
364 auto toPass = Kernel::make_cow<HistogramData::HistogramX>(axis0Data);
365 for (size_t i = 0; i < nAxis1Values; ++i) {
366 outWrksp->setX(i, toPass);
367
368 // now read in the Y values
369 MantidVec &YOut = outWrksp->dataY(i);
370 for (double &value : YOut) {
371 m_fileIn >> value;
372 }
373 prog.report("Loading Y data");
374 } // loop on to the next spectrum
375
376 // the error values form one big block after the Y-values
377 for (size_t i = 0; i < nAxis1Values; ++i) {
378 MantidVec &EOut = outWrksp->dataE(i);
379 for (double &value : EOut) {
380 m_fileIn >> value;
381 }
382 prog.report("Loading error estimates");
383 } // loop on to the next spectrum
384
385 return outWrksp;
386}
396Progress LoadRKH::read2DHeader(const std::string &initalLine, MatrixWorkspace_sptr &outWrksp, MantidVec &axis0Data) {
397 const std::string XUnit(readUnit(initalLine));
398
399 std::string fileLine;
400 std::getline(m_fileIn, fileLine);
401 const std::string YUnit(readUnit(fileLine));
402
403 std::getline(m_fileIn, fileLine);
404 const std::string intensityUnit(readUnit(fileLine));
405
406 // the next line should contain just "1", but I'm not enforcing that
407 std::getline(m_fileIn, fileLine);
408 std::string title;
409 std::getline(m_fileIn, title);
410
411 std::getline(m_fileIn, fileLine);
412 boost::trim(fileLine);
413 const auto nAxis0Boundaries = boost::lexical_cast<int>(fileLine);
414 axis0Data.resize(nAxis0Boundaries);
415 readNumEntrys(nAxis0Boundaries, axis0Data);
416
417 std::getline(m_fileIn, fileLine);
418 boost::trim(fileLine);
419 int nAxis1Boundaries;
420 try {
421 nAxis1Boundaries = boost::lexical_cast<int>(fileLine);
422 } catch (boost::bad_lexical_cast &) {
423 // using readNumEntrys() above broke the sequence of getline()s and so try
424 // again in case we just read the end of a line
425 std::getline(m_fileIn, fileLine);
426 boost::trim(fileLine);
427 nAxis1Boundaries = boost::lexical_cast<int>(fileLine);
428 }
429 MantidVec axis1Data(nAxis1Boundaries);
430 readNumEntrys(nAxis1Boundaries, axis1Data);
431
432 std::getline(m_fileIn, fileLine);
433 // check for the file pointer being left at the end of a line
434 if (fileLine.size() < 5) {
435 std::getline(m_fileIn, fileLine);
436 }
439 if (wsDimensions.count() < 2) {
440 throw Exception::NotFoundError("Input file", "dimensions");
441 }
442 const auto nAxis0Values = boost::lexical_cast<int>(wsDimensions[0]);
443 const auto nAxis1Values = boost::lexical_cast<int>(wsDimensions[1]);
444
445 Progress prog(this, 0.05, 1.0, 2 * nAxis1Values);
446
447 // we now have all the data we need to create the output workspace
448 outWrksp = WorkspaceFactory::Instance().create("Workspace2D", nAxis1Values, nAxis0Boundaries, nAxis0Values);
449 for (int i = 0; i < nAxis1Values; ++i) {
450 outWrksp->getSpectrum(i).setDetectorID(static_cast<detid_t>(i + 1));
451 }
452 outWrksp->getAxis(0)->unit() = UnitFactory::Instance().create(XUnit);
453 outWrksp->setYUnitLabel(intensityUnit);
454
455 auto axis1 = std::make_unique<Mantid::API::NumericAxis>(nAxis1Boundaries);
456 auto axis1Raw = axis1.get();
457 axis1->unit() = Mantid::Kernel::UnitFactory::Instance().create(YUnit);
458 outWrksp->replaceAxis(1, std::move(axis1));
459 for (int i = 0; i < nAxis1Boundaries; ++i) {
460 axis1Raw->setValue(i, axis1Data[i]);
461 }
462
463 outWrksp->setTitle(title);
464 // move over the next line which is there to help with loading from Fortran
465 // routines
466 std::getline(m_fileIn, fileLine);
467
468 return prog;
469}
476void LoadRKH::readNumEntrys(const int nEntries, MantidVec &output) {
477 output.resize(nEntries);
478 for (int i = 0; i < nEntries; ++i) {
479 m_fileIn >> output[i];
480 }
481}
487const std::string LoadRKH::readUnit(const std::string &line) {
488 // split the line into words
491
492 if (!isUnit(codes)) {
493 return "C++ no unit found";
494 }
495
496 if (codes.count() < 1) {
497 return "C++ no unit found";
498 }
499
500 // the symbol for the quantity q = MomentumTransfer, etc.
501 const std::string symbol(codes[0]);
502 // this is units used to measure the quantity e.g. angstroms, counts, ...
503 auto itUnitsToken = codes.cend() - 1;
504 const std::string unit(*itUnitsToken);
505
506 // theQuantity will contain the name of the unit, which can be many words long
507 std::string theQuantity;
508 for (auto current = codes.cbegin() + 1; current != itUnitsToken; ++current) {
509 theQuantity += *current;
510 }
511
512 // this is a syntax check the line before returning its data
513 if (codes.count() >= 3) {
514 // For the next line it is possible to use str.compare instead of str.find, this would be more efficient if the line
515 // was very long however to use is safely other checks would be required that would impair readability, therefore in
516 // this case the unlikely performance hit is accepted.
517 if ((!unit.starts_with('(')) || (unit.find(')') != unit.size())) {
518 std::string qCode = std::to_string(SaveRKH::Q_CODE);
519 if (symbol == qCode && theQuantity == "q" &&
520 (unit == "(1/Angstrom)" || unit == "(Angstrom^-1)")) { // 6 q (1/Angstrom) is the synatx for
521 // MomentumTransfer
522 return "MomentumTransfer";
523 }
524
525 if (symbol == "0" && theQuantity != "q") { // zero means the unit is not q
526 // but something else, which
527 // I'm assuming is legal
528 return theQuantity + " " + unit;
529 }
530 }
531 }
532 // the line doesn't contain a valid 2D data file unit line
533 return "C++ no unit found";
534}
540void LoadRKH::skipLines(std::istream &strm, int nlines) {
541 std::string buried;
542 for (int i = 0; i < nlines; ++i) {
543 getline(strm, buried);
544 }
545}
551void LoadRKH::binCenter(const MantidVec &oldBoundaries, MantidVec &toCenter) const {
552 VectorHelper::convertToBinCentre(oldBoundaries, toCenter);
553}
554
559bool LoadRKH::hasXerror(std::ifstream &stream) {
560 auto containsXerror = false;
561 auto currentPutLocation = stream.tellg();
562 std::string line;
563 getline(stream, line);
564
565 std::string x, y, yerr, xerr;
566 std::istringstream datastr(line);
567 datastr >> x >> y >> yerr >> xerr;
568 if (!xerr.empty()) {
569 containsXerror = true;
570 }
571 // Reset the original location of the stream
572 stream.seekg(currentPutLocation, stream.beg);
573 return containsXerror;
574}
575} // namespace Mantid::DataHandling
double value
The value of the point.
Definition FitMW.cpp:51
std::map< DeltaEMode::Type, std::string > index
#define DECLARE_FILELOADER_ALGORITHM(classname)
DECLARE_FILELOADER_ALGORITHM should be used in place of the standard DECLARE_ALGORITHM macro when wri...
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
std::string getPropertyValue(const std::string &name) const override
Get the value of a property as a string.
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
Kernel::Logger & g_log
Definition Algorithm.h:422
@ Load
allowed here which will be passed to the algorithm
Helper class for reporting progress from algorithms.
Definition Progress.h:25
A property class for workspaces.
Loads an RKH file into a Mantid 1D workspace.
Definition LoadRKH.h:35
int confidence(Kernel::FileDescriptor &descriptor) const override
Returns a confidence value that this algorithm can load a file.
Definition LoadRKH.cpp:117
const API::MatrixWorkspace_sptr read1D()
Read a data file that contains only one spectrum into a workspace.
Definition LoadRKH.cpp:242
std::unordered_set< std::string > m_RKHKeys
Store the units added as options for this algorithm.
Definition LoadRKH.h:55
const std::string readUnit(const std::string &line)
Convert the units specification line from the RKH file into a Mantid unit name.
Definition LoadRKH.cpp:487
void init() override
Initialise the algorithm.
Definition LoadRKH.cpp:171
void readNumEntrys(const int nEntries, MantidVec &output)
Read the specified number of entries from input file into the the array that is passed.
Definition LoadRKH.cpp:476
void binCenter(const MantidVec &oldBoundaries, MantidVec &toCenter) const
PAss a vector of bin boundaries and get a vector of bin centers.
Definition LoadRKH.cpp:551
void exec() override
Execute the algorithm.
Definition LoadRKH.cpp:197
bool hasXerror(std::ifstream &stream)
Check if we the data set stores an X-Error values.
Definition LoadRKH.cpp:559
std::unordered_set< std::string > m_unitKeys
Store the units known to the UnitFactory.
Definition LoadRKH.h:53
const API::MatrixWorkspace_sptr read2D(const std::string &firstLine)
Reads from the third line of the input file to the end assuming it contains 2D data.
Definition LoadRKH.cpp:355
void readLinesForRKH1D(std::istream &stream, int readStart, int readEnd, HistogramData::Points &x, HistogramData::Counts &y, HistogramData::CountStandardDeviations &ye, HistogramData::PointStandardDeviations &xe, API::Progress &prog, bool readXError=false)
Read data from the RKH file.
Definition LoadRKH.cpp:67
API::Progress read2DHeader(const std::string &initalLine, API::MatrixWorkspace_sptr &outWrksp, MantidVec &axis0Data)
Reads the header information from a file containing 2D data.
Definition LoadRKH.cpp:396
std::ifstream m_fileIn
the input stream for the file being loaded
Definition LoadRKH.h:57
bool is2D(const std::string &testLine)
Determines if the file is 1D or 2D based on the first after the workspace's title.
Definition LoadRKH.cpp:232
void skipLines(std::istream &strm, int nlines)
Remove lines from an input stream.
Definition LoadRKH.cpp:540
@ Q_CODE
this is the integer code the RKH file format associates
Definition SaveRKH.h:48
Records the filename and the description of failure.
Definition Exception.h:98
Exception for when an item is not found in a collection.
Definition Exception.h:145
Defines a wrapper around an open file.
static bool isAscii(const std::string &filename, const size_t nbytes=256)
Returns true if the file is considered ascii.
std::istream & data()
Access the open file stream.
IPropertyManager * setProperty(const std::string &name, const T &value)
Templated method to set the value of a PropertyWithValue.
void error(const std::string &msg)
Logs at error level.
Definition Logger.cpp:108
void information(const std::string &msg)
Logs at information level.
Definition Logger.cpp:136
void report()
Increments the loop counter by 1, then sends the progress notification on behalf of its algorithm.
static T & Instance()
Return a reference to the Singleton instance, creating it if it does not already exist Creation is do...
Iterator begin()
Iterator referring to first element in the container.
@ TOK_IGNORE_EMPTY
ignore empty tokens
@ TOK_TRIM
remove leading and trailing whitespace from tokens
Iterator end()
Iterator referring to the past-the-end element in the container.
ConstIterator cend() const
Const iterator referring to the past-the-end element in the container.
ConstIterator cbegin() const
Const iterator referring to first element in the container.
std::size_t count() const
Get the total number of tokens.
std::shared_ptr< MatrixWorkspace > MatrixWorkspace_sptr
shared pointer to the matrix workspace base class
void MANTID_KERNEL_DLL convertToBinCentre(const std::vector< double > &bin_edges, std::vector< double > &bin_centres)
Convert an array of bin boundaries to bin center values.
int32_t detid_t
Typedef for a detector ID.
std::vector< double > MantidVec
typedef for the data storage used in Mantid matrix workspaces
Definition cow_ptr.h:172
STL namespace.
std::string to_string(const wide_integer< Bits, Signed > &n)
@ Output
An output workspace.
Definition Property.h:54