Mantid
Loading...
Searching...
No Matches
LoadRKH.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7//---------------------------------------------------
8// Includes
9//---------------------------------------------------
21
22// clang-format off
23#include <boost/date_time/gregorian/gregorian.hpp>
24#include <boost/date_time/date_parsing.hpp>
25// clang-format on
26#include <boost/lexical_cast.hpp>
27#include <boost/algorithm/string.hpp>
29
30#include <istream>
31#include <numeric>
32#include <boost/regex.hpp>
33
34namespace {
35// Check if we are dealing with a unit line
36bool isUnit(const Mantid::Kernel::StringTokenizer &codes) {
37 // The unit line needs to have the format of
38 // 1. Either 0 or 6 [06]
39 // 2. Then several characters [\w]
40 // 3. Open bracket
41 // 4. Several characters
42 // 5. Close bracket
43 std::string input = std::accumulate(codes.begin(), codes.end(), std::string(""));
44 std::string reg(R"(^[06][\w]+\‍([/ \w\^-]+\)$)");
45 boost::regex baseRegex(reg);
46 return boost::regex_match(input, baseRegex);
47}
48} // namespace
49
50namespace Mantid::DataHandling {
51using namespace Mantid::API;
52using namespace Mantid::Kernel;
53
55
56
69void LoadRKH::readLinesForRKH1D(std::istream &stream, int readStart, int readEnd, HistogramData::Points &x,
70 HistogramData::Counts &y, HistogramData::CountStandardDeviations &ye,
71 HistogramData::PointStandardDeviations &xe, Progress &prog, bool readXError) {
72
73 std::vector<double> xData;
74 std::vector<double> yData;
75 std::vector<double> xError;
76 std::vector<double> yError;
77
78 xData.reserve(readEnd);
79 yData.reserve(readEnd);
80 xError.reserve(readEnd);
81 yError.reserve(readEnd);
82
83 std::string fileline;
84 for (int index = 1; index <= readEnd; ++index) {
85 getline(stream, fileline);
86 if (index < readStart)
87 continue;
88
89 double xValue(0.), yValue(0.), yErrorValue(0.);
90 std::istringstream datastr(fileline);
91 datastr >> xValue >> yValue >> yErrorValue;
92
93 xData.emplace_back(xValue);
94 yData.emplace_back(yValue);
95 yError.emplace_back(yErrorValue);
96
97 // check if we need to read in x error values
98 if (readXError) {
99 double xErrorValue(0.);
100 datastr >> xErrorValue;
101 xError.emplace_back(xErrorValue);
102 }
103
104 prog.report();
105 }
106
107 x = xData;
108 y = yData;
109 ye = yError;
110 xe = xError;
111}
112
120 if (!descriptor.isAscii())
121 return 0;
122
123 auto &file = descriptor.data();
124 std::string fileline;
125
126 // Header looks something like this where the text inside [] could be anything
127 // LOQ Thu 28-OCT-2004 12:23 [W 26 INST_DIRECT_BEAM]
128
129 // -- First line --
130 std::getline(file, fileline);
131 // LOQ or SANS2D (case insensitive)
132 if (boost::ifind_first(fileline, "loq").empty() && boost::ifind_first(fileline, "sans2d").empty())
133 return 0;
134
135 // Next should be date time string
136 static const char *MONTHS[12] = {"-JAN-", "-FEB-", "-MAR-", "-APR-", "-MAY-", "-JUN-",
137 "-JUL-", "-AUG-", "-SEP-", "-OCT-", "-NOV-", "-DEC-"};
138
139 bool foundMonth(false);
140 for (auto &month : MONTHS) {
141 if (!boost::ifind_first(fileline, month).empty()) {
142 foundMonth = true;
143 break;
144 }
145 }
146 if (!foundMonth)
147 return 0;
148
149 // there are no constraints on the second line
150 std::getline(file, fileline);
151
152 // read 3rd line - should contain sequence "0 0 0 1"
153 std::getline(file, fileline);
154 if (fileline.find("0 0 0 1") == std::string::npos)
155 return 0;
156
157 // read 4th line - should contain sequence ""0 0 0 0"
158 std::getline(file, fileline);
159 if (fileline.find("0 0 0 0") == std::string::npos)
160 return 0;
161
162 // read 5th line - should contain sequence "3 (F12.5,2E16.6)"
163 std::getline(file, fileline);
164 if (fileline.find("3 (F12.5,2E16.6)") == std::string::npos)
165 return 0;
166
167 return 20; // Better than LoadAscii
168}
169
174 const std::vector<std::string> exts{".txt", ".q", ".dat"};
175 declareProperty(std::make_unique<API::FileProperty>("Filename", "", API::FileProperty::Load, exts),
176 "Name of the RKH file to load");
177 declareProperty(std::make_unique<API::WorkspaceProperty<>>("OutputWorkspace", "", Kernel::Direction::Output),
178 "The name to use for the output workspace");
179 // Get the units registered with the UnitFactory
180 std::vector<std::string> propOptions = Kernel::UnitFactory::Instance().getKeys();
181 m_unitKeys.insert(propOptions.begin(), propOptions.end());
182
183 // m_RKHKeys will be taken as axis(1) units, the first axis will have only one
184 // value
185 // and so selection of one of these units will result in a workspace
186 // orientated differently
187 // from selection of the above
188 m_RKHKeys.insert("SpectrumNumber");
189 propOptions.insert(propOptions.end(), m_RKHKeys.begin(), m_RKHKeys.end());
190
191 declareProperty("FirstColumnValue", "Wavelength", std::make_shared<Kernel::StringListValidator>(propOptions),
192 "Only used for 1D files, the units of the first column in the RKH "
193 "file (default Wavelength)");
194}
195
200 using namespace Mantid::Kernel;
201 using namespace Mantid::API;
202
203 // Retrieve filename and try to open the file
204 std::string filename = getPropertyValue("Filename");
205
206 m_fileIn.open(filename.c_str());
207 if (!m_fileIn) {
208 g_log.error("Unable to open file " + filename);
209 throw Exception::FileError("Unable to open File: ", filename);
210 }
211 g_log.information() << "Opened file \"" << filename << "\" for reading\n";
212
213 std::string line;
214 // The first line contains human readable information about the original
215 // workspace that we don't need
216 getline(m_fileIn, line);
217 getline(m_fileIn, line);
218
219 // Use one line of the file to diagnose if it is 1D or 2D, this line contains
220 // some data required by the 2D data reader
221 MatrixWorkspace_sptr result = is2D(line) ? read2D(line) : read1D();
222
223 // all RKH files contain distribution data
224 result->setDistribution(true);
225 // Set the output workspace
226 setProperty("OutputWorkspace", result);
227}
228
234bool LoadRKH::is2D(const std::string &testLine) {
235 // split the line into words
238 return isUnit(codes);
239}
240
245 g_log.information() << "file appears to contain 1D information, reading in 1D data mode\n";
246
247 // The 3rd line contains information regarding the number of points in the
248 // file and
249 // start and end reading points
250 int totalPoints(0), readStart(0), readEnd(0), buried(0);
251 std::string fileline;
252
253 getline(m_fileIn, fileline);
254 std::istringstream is(fileline);
255 // Get data information
256 for (int counter = 1; counter < 8; ++counter) {
257 switch (counter) {
258 case 1:
259 is >> totalPoints;
260 break;
261 case 5:
262 is >> readStart;
263 break;
264 case 6:
265 is >> readEnd;
266 break;
267 default:
268 is >> buried;
269 break;
270 }
271 }
272
273 g_log.information() << "Total number of data points declared to be in the data file: " << totalPoints << "\n";
274
275 // What are we reading?
276 std::string firstColVal = getProperty("FirstColumnValue");
277 bool colIsUnit(true);
278 if (m_RKHKeys.find(firstColVal) != m_RKHKeys.end()) {
279 colIsUnit = false;
280 readStart = 1;
281 readEnd = totalPoints;
282 }
283
284 if (readStart < 1 || readEnd < 1 || readEnd < readStart || readStart > totalPoints || readEnd > totalPoints) {
285 g_log.error("Invalid data range specfied.");
286 m_fileIn.close();
287 throw std::invalid_argument("Invalid data range specfied.");
288 }
289
290 g_log.information() << "Reading started on data line: " << readStart << "\n";
291 g_log.information() << "Reading finished on data line: " << readEnd << "\n";
292
293 // The 4th and 5th line do not contain useful information either
295
296 int pointsToRead = readEnd - readStart + 1;
297 // Now stream sits at the first line of data
298 HistogramData::Points columnOne;
299 HistogramData::Counts ydata;
300 HistogramData::PointStandardDeviations xError;
301 HistogramData::CountStandardDeviations errdata;
302
303 auto hasXError = hasXerror(m_fileIn);
304
305 Progress prog(this, 0.0, 1.0, readEnd);
306
307 readLinesForRKH1D(m_fileIn, readStart, readEnd, columnOne, ydata, errdata, xError, prog, hasXError);
308
309 m_fileIn.close();
310
311 assert(pointsToRead == static_cast<int>(columnOne.size()));
312 assert(pointsToRead == static_cast<int>(ydata.size()));
313 assert(pointsToRead == static_cast<int>(errdata.size()));
314
315 if (hasXError) {
316 assert(pointsToRead == static_cast<int>(xError.size()));
317 }
318
319 if (colIsUnit) {
320 MatrixWorkspace_sptr localworkspace =
321 WorkspaceFactory::Instance().create("Workspace2D", 1, pointsToRead, pointsToRead);
322 localworkspace->getSpectrum(0).setDetectorID(static_cast<detid_t>(1));
323 localworkspace->getAxis(0)->unit() = UnitFactory::Instance().create(firstColVal);
324 localworkspace->setPoints(0, columnOne);
325 localworkspace->setCounts(0, ydata);
326 localworkspace->setCountStandardDeviations(0, errdata);
327 if (hasXError) {
328 localworkspace->setPointStandardDeviations(0, xError);
329 }
330 return localworkspace;
331 } else {
332 MatrixWorkspace_sptr localworkspace = WorkspaceFactory::Instance().create("Workspace2D", pointsToRead, 1, 1);
333 // Set the appropriate values
334 for (int index = 0; index < pointsToRead; ++index) {
335 localworkspace->getSpectrum(index).setSpectrumNo(static_cast<int>(columnOne[index]));
336 localworkspace->getSpectrum(index).setDetectorID(static_cast<detid_t>(index + 1));
337 localworkspace->dataY(index)[0] = ydata[index];
338 localworkspace->dataE(index)[0] = errdata[index];
339 }
340
341 if (hasXError) {
342 for (int index = 0; index < pointsToRead; ++index) {
343 localworkspace->setPointStandardDeviations(0, 1, xError[index]);
344 }
345 }
346 return localworkspace;
347 }
348}
357const MatrixWorkspace_sptr LoadRKH::read2D(const std::string &firstLine) {
358 g_log.information() << "file appears to contain 2D information, reading in 2D data mode\n";
359
360 MatrixWorkspace_sptr outWrksp;
361 MantidVec axis0Data;
362 Progress prog(read2DHeader(firstLine, outWrksp, axis0Data));
363 const size_t nAxis1Values = outWrksp->getNumberHistograms();
364
365 // set the X-values to the common bin values we read above
366 auto toPass = Kernel::make_cow<HistogramData::HistogramX>(axis0Data);
367 for (size_t i = 0; i < nAxis1Values; ++i) {
368 outWrksp->setX(i, toPass);
369
370 // now read in the Y values
371 MantidVec &YOut = outWrksp->dataY(i);
372 for (double &value : YOut) {
373 m_fileIn >> value;
374 }
375 prog.report("Loading Y data");
376 } // loop on to the next spectrum
377
378 // the error values form one big block after the Y-values
379 for (size_t i = 0; i < nAxis1Values; ++i) {
380 MantidVec &EOut = outWrksp->dataE(i);
381 for (double &value : EOut) {
382 m_fileIn >> value;
383 }
384 prog.report("Loading error estimates");
385 } // loop on to the next spectrum
386
387 return outWrksp;
388}
398Progress LoadRKH::read2DHeader(const std::string &initalLine, MatrixWorkspace_sptr &outWrksp, MantidVec &axis0Data) {
399 const std::string XUnit(readUnit(initalLine));
400
401 std::string fileLine;
402 std::getline(m_fileIn, fileLine);
403 const std::string YUnit(readUnit(fileLine));
404
405 std::getline(m_fileIn, fileLine);
406 const std::string intensityUnit(readUnit(fileLine));
407
408 // the next line should contain just "1", but I'm not enforcing that
409 std::getline(m_fileIn, fileLine);
410 std::string title;
411 std::getline(m_fileIn, title);
412
413 std::getline(m_fileIn, fileLine);
414 boost::trim(fileLine);
415 const auto nAxis0Boundaries = boost::lexical_cast<int>(fileLine);
416 axis0Data.resize(nAxis0Boundaries);
417 readNumEntrys(nAxis0Boundaries, axis0Data);
418
419 std::getline(m_fileIn, fileLine);
420 boost::trim(fileLine);
421 int nAxis1Boundaries;
422 try {
423 nAxis1Boundaries = boost::lexical_cast<int>(fileLine);
424 } catch (boost::bad_lexical_cast &) {
425 // using readNumEntrys() above broke the sequence of getline()s and so try
426 // again in case we just read the end of a line
427 std::getline(m_fileIn, fileLine);
428 boost::trim(fileLine);
429 nAxis1Boundaries = boost::lexical_cast<int>(fileLine);
430 }
431 MantidVec axis1Data(nAxis1Boundaries);
432 readNumEntrys(nAxis1Boundaries, axis1Data);
433
434 std::getline(m_fileIn, fileLine);
435 // check for the file pointer being left at the end of a line
436 if (fileLine.size() < 5) {
437 std::getline(m_fileIn, fileLine);
438 }
441 if (wsDimensions.count() < 2) {
442 throw Exception::NotFoundError("Input file", "dimensions");
443 }
444 const auto nAxis0Values = boost::lexical_cast<int>(wsDimensions[0]);
445 const auto nAxis1Values = boost::lexical_cast<int>(wsDimensions[1]);
446
447 Progress prog(this, 0.05, 1.0, 2 * nAxis1Values);
448
449 // we now have all the data we need to create the output workspace
450 outWrksp = WorkspaceFactory::Instance().create("Workspace2D", nAxis1Values, nAxis0Boundaries, nAxis0Values);
451 for (int i = 0; i < nAxis1Values; ++i) {
452 outWrksp->getSpectrum(i).setDetectorID(static_cast<detid_t>(i + 1));
453 }
454 outWrksp->getAxis(0)->unit() = UnitFactory::Instance().create(XUnit);
455 outWrksp->setYUnitLabel(intensityUnit);
456
457 auto axis1 = std::make_unique<Mantid::API::NumericAxis>(nAxis1Boundaries);
458 auto axis1Raw = axis1.get();
459 axis1->unit() = Mantid::Kernel::UnitFactory::Instance().create(YUnit);
460 outWrksp->replaceAxis(1, std::move(axis1));
461 for (int i = 0; i < nAxis1Boundaries; ++i) {
462 axis1Raw->setValue(i, axis1Data[i]);
463 }
464
465 outWrksp->setTitle(title);
466 // move over the next line which is there to help with loading from Fortran
467 // routines
468 std::getline(m_fileIn, fileLine);
469
470 return prog;
471}
478void LoadRKH::readNumEntrys(const int nEntries, MantidVec &output) {
479 output.resize(nEntries);
480 for (int i = 0; i < nEntries; ++i) {
481 m_fileIn >> output[i];
482 }
483}
489const std::string LoadRKH::readUnit(const std::string &line) {
490 // split the line into words
493
494 if (!isUnit(codes)) {
495 return "C++ no unit found";
496 }
497
498 if (codes.count() < 1) {
499 return "C++ no unit found";
500 }
501
502 // the symbol for the quantity q = MomentumTransfer, etc.
503 const std::string symbol(codes[0]);
504 // this is units used to measure the quantity e.g. angstroms, counts, ...
505 auto itUnitsToken = codes.cend() - 1;
506 const std::string unit(*itUnitsToken);
507
508 // theQuantity will contain the name of the unit, which can be many words long
509 std::string theQuantity;
510 for (auto current = codes.cbegin() + 1; current != itUnitsToken; ++current) {
511 theQuantity += *current;
512 }
513
514 // this is a syntax check the line before returning its data
515 if (codes.count() >= 3) {
516 // For the next line it is possible to use str.compare instead of str.find,
517 // this would be more efficient if the line was very long
518 // however to use is safely other checks would be required that would impair
519 // readability, therefore in this case the unlikely performance hit is
520 // accepted.
521 if (unit.find('(') != 0 || unit.find(')') != unit.size()) {
522 std::string qCode = std::to_string(SaveRKH::Q_CODE);
523 if (symbol == qCode && theQuantity == "q" &&
524 (unit == "(1/Angstrom)" || unit == "(Angstrom^-1)")) { // 6 q (1/Angstrom) is the synatx for
525 // MomentumTransfer
526 return "MomentumTransfer";
527 }
528
529 if (symbol == "0" && theQuantity != "q") { // zero means the unit is not q
530 // but something else, which
531 // I'm assuming is legal
532 return theQuantity + " " + unit;
533 }
534 }
535 }
536 // the line doesn't contain a valid 2D data file unit line
537 return "C++ no unit found";
538}
544void LoadRKH::skipLines(std::istream &strm, int nlines) {
545 std::string buried;
546 for (int i = 0; i < nlines; ++i) {
547 getline(strm, buried);
548 }
549}
555void LoadRKH::binCenter(const MantidVec &oldBoundaries, MantidVec &toCenter) const {
556 VectorHelper::convertToBinCentre(oldBoundaries, toCenter);
557}
558
563bool LoadRKH::hasXerror(std::ifstream &stream) {
564 auto containsXerror = false;
565 auto currentPutLocation = stream.tellg();
566 std::string line;
567 getline(stream, line);
568
569 std::string x, y, yerr, xerr;
570 std::istringstream datastr(line);
571 datastr >> x >> y >> yerr >> xerr;
572 if (!xerr.empty()) {
573 containsXerror = true;
574 }
575 // Reset the original location of the stream
576 stream.seekg(currentPutLocation, stream.beg);
577 return containsXerror;
578}
579} // namespace Mantid::DataHandling
double value
The value of the point.
Definition: FitMW.cpp:51
std::map< DeltaEMode::Type, std::string > index
Definition: DeltaEMode.cpp:19
#define DECLARE_FILELOADER_ALGORITHM(classname)
DECLARE_FILELOADER_ALGORITHM should be used in place of the standard DECLARE_ALGORITHM macro when wri...
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
Definition: Algorithm.cpp:1913
std::string getPropertyValue(const std::string &name) const override
Get the value of a property as a string.
Definition: Algorithm.cpp:2026
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
Definition: Algorithm.cpp:2076
@ Load
allowed here which will be passed to the algorithm
Definition: FileProperty.h:52
Helper class for reporting progress from algorithms.
Definition: Progress.h:25
A property class for workspaces.
Loads an RKH file into a Mantid 1D workspace.
Definition: LoadRKH.h:35
int confidence(Kernel::FileDescriptor &descriptor) const override
Returns a confidence value that this algorithm can load a file.
Definition: LoadRKH.cpp:119
const API::MatrixWorkspace_sptr read1D()
Read a data file that contains only one spectrum into a workspace.
Definition: LoadRKH.cpp:244
std::unordered_set< std::string > m_RKHKeys
Store the units added as options for this algorithm.
Definition: LoadRKH.h:55
const std::string readUnit(const std::string &line)
Convert the units specification line from the RKH file into a Mantid unit name.
Definition: LoadRKH.cpp:489
void init() override
Initialise the algorithm.
Definition: LoadRKH.cpp:173
void readNumEntrys(const int nEntries, MantidVec &output)
Read the specified number of entries from input file into the the array that is passed.
Definition: LoadRKH.cpp:478
void binCenter(const MantidVec &oldBoundaries, MantidVec &toCenter) const
PAss a vector of bin boundaries and get a vector of bin centers.
Definition: LoadRKH.cpp:555
void exec() override
Execute the algorithm.
Definition: LoadRKH.cpp:199
bool hasXerror(std::ifstream &stream)
Check if we the data set stores an X-Error values.
Definition: LoadRKH.cpp:563
std::unordered_set< std::string > m_unitKeys
Store the units known to the UnitFactory.
Definition: LoadRKH.h:53
const API::MatrixWorkspace_sptr read2D(const std::string &firstLine)
Reads from the third line of the input file to the end assuming it contains 2D data.
Definition: LoadRKH.cpp:357
void readLinesForRKH1D(std::istream &stream, int readStart, int readEnd, HistogramData::Points &x, HistogramData::Counts &y, HistogramData::CountStandardDeviations &ye, HistogramData::PointStandardDeviations &xe, API::Progress &prog, bool readXError=false)
Read data from the RKH file.
Definition: LoadRKH.cpp:69
API::Progress read2DHeader(const std::string &initalLine, API::MatrixWorkspace_sptr &outWrksp, MantidVec &axis0Data)
Reads the header information from a file containing 2D data.
Definition: LoadRKH.cpp:398
std::ifstream m_fileIn
the input stream for the file being loaded
Definition: LoadRKH.h:57
bool is2D(const std::string &testLine)
Determines if the file is 1D or 2D based on the first after the workspace's title.
Definition: LoadRKH.cpp:234
void skipLines(std::istream &strm, int nlines)
Remove lines from an input stream.
Definition: LoadRKH.cpp:544
@ Q_CODE
this is the integer code the RKH file format associates
Definition: SaveRKH.h:46
Records the filename and the description of failure.
Definition: Exception.h:98
Exception for when an item is not found in a collection.
Definition: Exception.h:145
Defines a wrapper around an open file.
static bool isAscii(const std::string &filename, const size_t nbytes=256)
Returns true if the file is considered ascii.
std::istream & data()
Access the open file stream.
IPropertyManager * setProperty(const std::string &name, const T &value)
Templated method to set the value of a PropertyWithValue.
void error(const std::string &msg)
Logs at error level.
Definition: Logger.cpp:77
void information(const std::string &msg)
Logs at information level.
Definition: Logger.cpp:105
void report()
Increments the loop counter by 1, then sends the progress notification on behalf of its algorithm.
Definition: ProgressBase.h:51
static T & Instance()
Return a reference to the Singleton instance, creating it if it does not already exist Creation is do...
Iterator begin()
Iterator referring to first element in the container.
@ TOK_IGNORE_EMPTY
ignore empty tokens
@ TOK_TRIM
remove leading and trailing whitespace from tokens
Iterator end()
Iterator referring to the past-the-end element in the container.
ConstIterator cend() const
Const iterator referring to the past-the-end element in the container.
ConstIterator cbegin() const
Const iterator referring to first element in the container.
std::size_t count() const
Get the total number of tokens.
Kernel::Logger g_log("ExperimentInfo")
static logger object
std::shared_ptr< MatrixWorkspace > MatrixWorkspace_sptr
shared pointer to the matrix workspace base class
void MANTID_KERNEL_DLL convertToBinCentre(const std::vector< double > &bin_edges, std::vector< double > &bin_centres)
Convert an array of bin boundaries to bin center values.
int32_t detid_t
Typedef for a detector ID.
Definition: SpectrumInfo.h:21
std::vector< double > MantidVec
typedef for the data storage used in Mantid matrix workspaces
Definition: cow_ptr.h:172
STL namespace.
std::string to_string(const wide_integer< Bits, Signed > &n)
@ Output
An output workspace.
Definition: Property.h:54