Mantid
Loading...
Searching...
No Matches
LoadAscii.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
8#include "MantidAPI/Axis.h"
11#include "MantidAPI/Run.h"
19// String utilities
20#include <boost/algorithm/string.hpp>
21
22#include <fstream>
23
24namespace Mantid::DataHandling {
26
27using namespace Kernel;
28using namespace API;
29
31LoadAscii::LoadAscii() : m_columnSep(), m_separatorIndex() { this->useAlgorithm("LoadAscii", 2); }
32
40 const std::string &filePath = descriptor.filename();
41 const size_t filenameLength = filePath.size();
42
43 // Avoid some known file types that have different loaders
44 int confidence(0);
45 if (filenameLength > 12 ? (filePath.compare(filenameLength - 12, 12, "_runinfo.xml") == 0)
46 : false || filenameLength > 6 ? (filePath.compare(filenameLength - 6, 6, ".peaks") == 0)
47 : false || filenameLength > 10 ? (filePath.compare(filenameLength - 10, 10, ".integrate") == 0)
48 : false) {
49 confidence = 0;
50 } else if (descriptor.isAscii()) {
51 confidence = 9; // Low so that others may try but not stopping version 2
52 }
53 return confidence;
54}
55
60void LoadAscii::processHeader(std::ifstream &file) const {
61
62 // Most files will have some sort of header. If we've haven't been told how
63 // many lines to
64 // skip then try and guess
65 int numToSkip = getProperty("SkipNumLines");
66 if (numToSkip == EMPTY_INT()) {
67 const int rowsToMatch(5);
68 // Have a guess where the data starts. Basically say, when we have say
69 // "rowsToMatch" lines of pure numbers
70 // in a row then the line that started block is the top of the data
71 int numCols(-1), matchingRows(0), row(0);
72 std::string line;
73 std::vector<double> values;
74 while (getline(file, line)) {
75 ++row;
76 // int nchars = (int)line.length(); TODO dead code?
77 boost::trim(line);
78 if (this->skipLine(line)) {
79 continue;
80 }
81
82 std::list<std::string> columns;
83 int lineCols = this->splitIntoColumns(columns, line);
84 try {
85 fillInputValues(values, columns);
86 } catch (boost::bad_lexical_cast &) {
87 continue;
88 }
89 if (numCols < 0)
90 numCols = lineCols;
91 if (lineCols == numCols) {
92 ++matchingRows;
93 if (matchingRows == rowsToMatch)
94 break;
95 } else {
96 numCols = lineCols;
97 matchingRows = 1;
98 }
99 }
100 // if the file does not have more than rowsToMatch + skipped lines, it will
101 // stop
102 // and raise the EndOfFile, this may cause problems for small workspaces.
103 // In this case clear the flag
104 if (file.eof()) {
105 file.clear(file.eofbit);
106 }
107 // Seek the file pointer back to the start.
108 // NOTE: Originally had this as finding the stream position of the data and
109 // then moving the file pointer
110 // back to the start of the data. This worked when a file was read on the
111 // same platform it was written
112 // but failed when read on a different one due to underlying differences in
113 // the stream translation.
114 file.seekg(0, std::ios::beg);
115 // We've read the header plus the number of rowsToMatch
116 numToSkip = row - rowsToMatch;
117 }
118 int i(0);
119 std::string line;
120 while (i < numToSkip && getline(file, line)) {
121 ++i;
122 }
123 g_log.information() << "Skipped " << numToSkip << " line(s) of header information()\n";
124}
125
133API::Workspace_sptr LoadAscii::readData(std::ifstream &file) const {
134 // Get the first line and find the number of spectra from the number of
135 // columns
136 std::string line;
137 getline(file, line);
138 boost::trim(line);
139
140 std::list<std::string> columns;
141 const int numCols = splitIntoColumns(columns, line);
142 if (numCols < 2) {
143 g_log.error() << "Invalid data format found in file \"" << getPropertyValue("Filename") << "\"\n";
144 throw std::runtime_error("Invalid data format. Fewer than 2 columns found.");
145 }
146 size_t numSpectra(0);
147 bool haveErrors(false);
148 bool haveXErrors(false);
149 // Assume single data set with no errors
150 if (numCols == 2) {
151 numSpectra = numCols / 2;
152 }
153 // Data with errors
154 else if ((numCols - 1) % 2 == 0) {
155 numSpectra = (numCols - 1) / 2;
156 haveErrors = true;
157 }
158 // Data with errors on both X and Y (4-column file)
159 else if (numCols == 4) {
160 numSpectra = 1;
161 haveErrors = true;
162 haveXErrors = true;
163 } else {
164 g_log.error() << "Invalid data format found in file \"" << getPropertyValue("Filename") << "\"\n";
165 g_log.error() << "LoadAscii requires the number of columns to be an even "
166 "multiple of either 2 or 3.";
167 throw std::runtime_error("Invalid data format.");
168 }
169
170 // A quick check at the number of lines won't be accurate enough as
171 // potentially there
172 // could be blank lines and comment lines
173 int numBins(0), lineNo(0);
174 std::vector<DataObjects::Histogram1D> spectra(
175 numSpectra,
176 DataObjects::Histogram1D(HistogramData::Histogram::XMode::Points, HistogramData::Histogram::YMode::Counts));
177 std::vector<double> dx;
178 std::vector<double> values(numCols, 0.);
179 do {
180 ++lineNo;
181 boost::trim(line);
182 if (this->skipLine(line))
183 continue;
184 columns.clear();
185 int lineCols = this->splitIntoColumns(columns, line);
186 if (lineCols != numCols) {
187 std::ostringstream ostr;
188 ostr << "Number of columns changed at line " << lineNo;
189 throw std::runtime_error(ostr.str());
190 }
191
192 try {
193 fillInputValues(values, columns); // ignores nans and replaces them with 0
194 } catch (boost::bad_lexical_cast &) {
195 g_log.error() << "Invalid value on line " << lineNo << " of \"" << getPropertyValue("Filename") << "\"\n";
196 throw std::runtime_error("Invalid value encountered.");
197 }
198
199 for (size_t i = 0; i < numSpectra; ++i) {
200 auto hist = spectra[i].histogram();
201 hist.resize(hist.size() + 1);
202 hist.mutableX().back() = values[0];
203 hist.mutableY().back() = values[i * 2 + 1];
204 if (haveErrors) {
205 hist.mutableE().back() = values[i * 2 + 2];
206 }
207 spectra[i].setHistogram(hist);
208 }
209 if (haveXErrors) {
210 // Note: we only have X errors with 4-column files.
211 // We are only here when i=0.
212 dx.emplace_back(values[3]);
213 }
214 ++numBins;
215 } while (getline(file, line));
216 auto sharedDx = Kernel::make_cow<HistogramData::HistogramDx>(dx);
217 for (size_t i = 0; i < numSpectra; ++i) {
218 if (haveXErrors) {
219 spectra[i].setSharedDx(sharedDx);
220 }
221 }
222
223 MatrixWorkspace_sptr localWorkspace = std::dynamic_pointer_cast<MatrixWorkspace>(
224 WorkspaceFactory::Instance().create("Workspace2D", numSpectra, numBins, numBins));
225 try {
226 localWorkspace->getAxis(0)->unit() = UnitFactory::Instance().create(getProperty("Unit"));
227 } catch (Exception::NotFoundError &) {
228 // Asked for dimensionless workspace (obviously not in unit factory)
229 }
230
231 for (size_t i = 0; i < numSpectra; ++i) {
232 localWorkspace->setHistogram(i, spectra[i].histogram());
233
234 // Just have spectrum number start at 1 and count up
235 localWorkspace->getSpectrum(i).setSpectrumNo(static_cast<specnum_t>(i) + 1);
236 }
237 return localWorkspace;
238}
239
243void LoadAscii::peekLine(std::ifstream &is, std::string &str) const { str = Kernel::Strings::peekLine(is); }
244
250bool LoadAscii::skipLine(const std::string &line) const { return Kernel::Strings::skipLine(line); }
251
258int LoadAscii::splitIntoColumns(std::list<std::string> &columns, const std::string &str) const {
259 boost::split(columns, str, boost::is_any_of(m_columnSep), boost::token_compress_on);
260 return static_cast<int>(columns.size());
261}
262
268void LoadAscii::fillInputValues(std::vector<double> &values, const std::list<std::string> &columns) const {
269 values.resize(columns.size());
270 int i = 0;
271 for (auto value : columns) {
272 boost::trim(value);
273 boost::to_lower(value);
274 if (value == "nan" || value == "1.#qnan") // ignores nans (not a number) and
275 // replaces them with a nan
276 {
277 double nan = std::numeric_limits<double>::quiet_NaN(); //(0.0/0.0);
278 values[i] = nan;
279 } else {
280 values[i] = boost::lexical_cast<double>(value);
281 }
282 ++i;
283 }
284}
285
286//--------------------------------------------------------------------------
287// Private methods
288//--------------------------------------------------------------------------
291 const std::vector<std::string> extensions{".dat", ".txt", ".csv", ""};
292 declareProperty(std::make_unique<FileProperty>("Filename", "", FileProperty::Load, extensions),
293 "The name of the text file to read, including its full or "
294 "relative path. The file extension must be .txt, .dat, or "
295 ".csv");
296 declareProperty(std::make_unique<WorkspaceProperty<Workspace>>("OutputWorkspace", "", Direction::Output),
297 "The name of the workspace that will be created, filled with "
298 "the read-in data and stored in the [[Analysis Data "
299 "Service]].");
300
301 std::string spacers[6][6] = {{"Automatic", ",\t:; "}, {"CSV", ","}, {"Tab", "\t"},
302 {"Space", " "}, {"Colon", ":"}, {"SemiColon", ";"}};
303 // For the ListValidator
304 std::array<std::string, 5> sepOptions;
305 for (size_t i = 0; i < 5; ++i) {
306 const auto &option = spacers[i][0];
307 m_separatorIndex.insert(std::pair<std::string, std::string>(option, spacers[i][1]));
308 sepOptions[i] = option;
309 }
310
311 declareProperty("Separator", "Automatic", std::make_shared<StringListValidator>(sepOptions),
312 "The separator between data columns in the data file. The possible "
313 "values are \"CSV\", \"Tab\", "
314 "\"Space\", \"SemiColon\", or \"Colon\" (default: Automatic selection).");
315
316 std::vector<std::string> units = UnitFactory::Instance().getKeys();
317 units.insert(units.begin(), "Dimensionless");
318 declareProperty("Unit", "Energy", std::make_shared<StringListValidator>(units),
319 "The unit to assign to the X axis (anything known to the "
320 "[[Unit Factory]] or \"Dimensionless\")");
321
322 auto mustBePosInt = std::make_shared<BoundedValidator<int>>();
323 mustBePosInt->setLower(0);
324 declareProperty("SkipNumLines", EMPTY_INT(), mustBePosInt,
325 "If given, skip this number of lines at the start of the file.");
326}
327
332 std::string filename = getProperty("Filename");
333 std::ifstream file(filename.c_str());
334 if (!file) {
335 g_log.error("Unable to open file: " + filename);
336 throw Exception::FileError("Unable to open file: ", filename);
337 }
338
339 std::string sepOption = getProperty("Separator");
340 m_columnSep = m_separatorIndex[sepOption];
341 // Process the header information.
342 processHeader(file);
343 // Read the data
344 MatrixWorkspace_sptr outputWS = std::dynamic_pointer_cast<MatrixWorkspace>(readData(file));
345 outputWS->mutableRun().addProperty("Filename", filename);
346 setProperty("OutputWorkspace", outputWS);
347}
348
349} // namespace Mantid::DataHandling
double value
The value of the point.
Definition: FitMW.cpp:51
#define DECLARE_FILELOADER_ALGORITHM(classname)
DECLARE_FILELOADER_ALGORITHM should be used in place of the standard DECLARE_ALGORITHM macro when wri...
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
Definition: Algorithm.cpp:1913
std::string getPropertyValue(const std::string &name) const override
Get the value of a property as a string.
Definition: Algorithm.cpp:2026
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
Definition: Algorithm.cpp:2076
void useAlgorithm(const std::string &, const int version=-1)
The algorithm to use instead of this one.
@ Load
allowed here which will be passed to the algorithm
Definition: FileProperty.h:52
A property class for workspaces.
int splitIntoColumns(std::list< std::string > &columns, const std::string &str) const
Split the data into columns.
Definition: LoadAscii.cpp:258
void peekLine(std::ifstream &is, std::string &str) const
Peek at a line without extracting it from the stream.
Definition: LoadAscii.cpp:243
int confidence(Kernel::FileDescriptor &descriptor) const override
Returns a confidence value that this algorithm can load a file.
Definition: LoadAscii.cpp:39
void fillInputValues(std::vector< double > &values, const std::list< std::string > &columns) const
Fill the given vector with the data values.
Definition: LoadAscii.cpp:268
std::string m_columnSep
The column separator.
Definition: LoadAscii.h:70
void exec() override
Execute the algorithm.
Definition: LoadAscii.cpp:331
LoadAscii()
Default constructor.
Definition: LoadAscii.cpp:31
bool skipLine(const std::string &line) const
Return true if the line is to be skipped.
Definition: LoadAscii.cpp:250
std::map< std::string, std::string > m_separatorIndex
Map the separator options to their string equivalents.
Definition: LoadAscii.h:79
virtual void processHeader(std::ifstream &file) const
Process the header information within the file.
Definition: LoadAscii.cpp:60
virtual API::Workspace_sptr readData(std::ifstream &file) const
Read the data from the file.
Definition: LoadAscii.cpp:133
void init() override
Declare properties.
Definition: LoadAscii.cpp:290
1D histogram implementation.
Definition: Histogram1D.h:18
Records the filename and the description of failure.
Definition: Exception.h:98
Exception for when an item is not found in a collection.
Definition: Exception.h:145
Defines a wrapper around an open file.
const std::string & filename() const
Access the filename.
static bool isAscii(const std::string &filename, const size_t nbytes=256)
Returns true if the file is considered ascii.
IPropertyManager * setProperty(const std::string &name, const T &value)
Templated method to set the value of a PropertyWithValue.
void error(const std::string &msg)
Logs at error level.
Definition: Logger.cpp:77
void information(const std::string &msg)
Logs at information level.
Definition: Logger.cpp:105
static T & Instance()
Return a reference to the Singleton instance, creating it if it does not already exist Creation is do...
std::shared_ptr< Workspace > Workspace_sptr
shared pointer to Mantid::API::Workspace
Definition: Workspace_fwd.h:20
Kernel::Logger g_log("ExperimentInfo")
static logger object
std::shared_ptr< MatrixWorkspace > MatrixWorkspace_sptr
shared pointer to the matrix workspace base class
std::unique_ptr< T > create(const P &parent, const IndexArg &indexArg, const HistArg &histArg)
This is the create() method that all the other create() methods call.
MANTID_KERNEL_DLL std::string peekLine(std::istream &fh)
Peek at a line without extracting it from the stream.
Definition: Strings.cpp:344
MANTID_KERNEL_DLL bool skipLine(const std::string &line)
Determines if a string starts with a #.
Definition: Strings.cpp:408
constexpr int EMPTY_INT() noexcept
Returns what we consider an "empty" integer within a property.
Definition: EmptyValues.h:25
int32_t specnum_t
Typedef for a spectrum Number.
Definition: IDTypes.h:16
@ Output
An output workspace.
Definition: Property.h:54