Mantid
Loading...
Searching...
No Matches
LoadTBL.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7//----------------------------------------------------------------------
8// Includes
9//----------------------------------------------------------------------
13#include "MantidAPI/TableRow.h"
16#include <fstream>
17
19#include <boost/tokenizer.hpp>
20// String utilities
21#include <boost/algorithm/string.hpp>
22
23namespace Mantid::DataHandling {
25
26using namespace Kernel;
27using namespace API;
28
30LoadTBL::LoadTBL() = default;
31
39 const std::string &filePath = descriptor.filename();
40 const size_t filenameLength = filePath.size();
41
42 // Avoid some known file types that have different loaders
43 int confidence(0);
44 if (filenameLength > 12 ? (filePath.compare(filenameLength - 12, 12, "_runinfo.xml") == 0)
45 : false || filenameLength > 6 ? (filePath.compare(filenameLength - 6, 6, ".peaks") == 0)
46 : false || filenameLength > 10 ? (filePath.compare(filenameLength - 10, 10, ".integrate") == 0)
47 : false) {
48 confidence = 0;
49 } else if (descriptor.isAscii()) {
50 std::istream &stream = descriptor.data();
51 std::string firstLine;
52 Kernel::Strings::extractToEOL(stream, firstLine);
53 std::vector<std::string> columns;
54 try {
55 if (getCells(firstLine, columns, 16, true) == 17) // right ammount of columns
56 {
57 if (filePath.compare(filenameLength - 4, 4, ".tbl") == 0) {
58 confidence = 40;
59 } else {
60 confidence = 20;
61 }
62 } else // incorrect amount of columns
63 {
64 confidence = 0;
65 }
66 } catch (const std::length_error &) {
67 confidence = 0;
68 }
69 }
70 return confidence;
71}
72
78size_t LoadTBL::countCommas(const std::string &line) const {
79 size_t found = 0;
80 size_t pos = line.find(',', 0);
81 if (pos != std::string::npos) {
82 ++found;
83 }
84 while (pos != std::string::npos) {
85 pos = line.find(',', pos + 1);
86 if (pos != std::string::npos) {
87 ++found;
88 }
89 }
90 return found;
91}
92
100size_t LoadTBL::findQuotePairs(const std::string &line, std::vector<std::vector<size_t>> &quoteBounds) const {
101 size_t quoteOne = 0;
102 size_t quoteTwo = 0;
103 while (quoteOne != std::string::npos && quoteTwo != std::string::npos) {
104 if (quoteTwo == 0) {
105 quoteOne = line.find('"');
106 } else {
107 quoteOne = line.find('"', quoteTwo + 1);
108 }
109 if (quoteOne != std::string::npos) {
110 quoteTwo = line.find('"', quoteOne + 1);
111 if (quoteTwo != std::string::npos) {
112 std::vector<size_t> quotepair;
113 quotepair.emplace_back(quoteOne);
114 quotepair.emplace_back(quoteTwo);
115 quoteBounds.emplace_back(quotepair);
116 }
117 }
118 }
119 return quoteBounds.size();
120}
121
132void LoadTBL::csvParse(const std::string &line, std::vector<std::string> &cols,
133 std::vector<std::vector<size_t>> &quoteBounds, size_t expectedCommas) const {
134 size_t pairID = 0;
135 size_t lastComma = 0;
136 size_t pos = 0;
137 bool firstCheck = true;
138 bool firstCell = true;
139 cols.clear();
140 while (pos != std::string::npos) {
141 if (firstCheck) {
142 pos = line.find(',');
143 firstCheck = false;
144 // lastpos = pos;
145 } else {
146 pos = line.find(',', pos + 1);
147 // lastpos = pos;
148 }
149 if (pos != std::string::npos) {
150 if (pairID < quoteBounds.size() && pos > quoteBounds.at(pairID).at(0)) {
151 if (pos > quoteBounds.at(pairID).at(1)) {
152 // use the quote indexes to get the substring
153 cols.emplace_back(line.substr(quoteBounds.at(pairID).at(0) + 1,
154 quoteBounds.at(pairID).at(1) - (quoteBounds.at(pairID).at(0) + 1)));
155 ++pairID;
156 }
157 } else {
158 if (firstCell) {
159 cols.emplace_back(line.substr(0, pos));
160 firstCell = false;
161 } else {
162 auto colVal = line.substr(lastComma + 1, pos - (lastComma + 1));
163 cols.emplace_back(line.substr(lastComma + 1, pos - (lastComma + 1)));
164 }
165 }
166 lastComma = pos;
167 } else {
168 if (lastComma + 1 < line.length()) {
169 cols.emplace_back(line.substr(lastComma + 1));
170 } else {
171 cols.emplace_back("");
172 }
173 }
174 }
175 if (cols.size() != expectedCommas + 1) {
176 std::string message = "A line must contain " + std::to_string(expectedCommas) + " cell-delimiting commas. Found " +
177 std::to_string(cols.size() - 1) + ".";
178 throw std::length_error(message);
179 }
180}
181
194size_t LoadTBL::getCells(std::string line, std::vector<std::string> &cols, size_t expectedCommas, bool isOldTBL) const {
195 // first check the number of commas in the line.
196 size_t found = countCommas(line);
197 if (isOldTBL) {
198 if (found == expectedCommas) {
199 // If there are 16 that simplifies things and i can get boost to do the
200 // hard
201 // work
202 boost::split(cols, line, boost::is_any_of(","), boost::token_compress_off);
203 } else if (found < expectedCommas) {
204 // less than 16 means the line isn't properly formatted. So Throw
205 std::string message = "A line must contain " + std::to_string(expectedCommas) +
206 " cell-delimiting commas. Found " + std::to_string(found) + ".";
207 throw std::length_error(message);
208 } else {
209 // More than 16 will need further checks as more is only ok when pairs of
210 // quotes surround a comma, meaning it isn't a delimiter
211 std::vector<std::vector<size_t>> quoteBounds;
212 findQuotePairs(line, quoteBounds);
213 // if we didn't find any quotes, then there are too many commas and we
214 // definitely have too many delimiters
215 if (quoteBounds.empty()) {
216 std::string message = "A line must contain " + std::to_string(expectedCommas) +
217 " cell-delimiting commas. Found " + std::to_string(found) + ".";
218 throw std::length_error(message);
219 }
220 // now go through and split it up manually. Throw if we find ourselves in
221 // a
222 // positon where we'd add a 18th value to the vector
223 csvParse(line, cols, quoteBounds, expectedCommas);
224 }
225 } else {
226 std::vector<std::vector<size_t>> quoteBounds;
227 findQuotePairs(line, quoteBounds);
228 csvParse(line, cols, quoteBounds, expectedCommas);
229 if (cols.size() > expectedCommas) {
230 for (size_t i = expectedCommas + 1; i < cols.size(); i++) {
231 cols[expectedCommas].append(boost::lexical_cast<std::string>("," + cols[i]));
232 }
233 } else if (cols.size() < expectedCommas) {
234 std::string message = "A line must contain " + std::to_string(expectedCommas) +
235 " cell-delimiting commas. Found " + std::to_string(found) + ".";
236 throw std::length_error(message);
237 }
238 }
239 return cols.size();
240}
241bool LoadTBL::getColumnHeadings(std::string line, std::vector<std::string> &cols) {
242 boost::split(cols, line, boost::is_any_of(","), boost::token_compress_off);
243 std::string firstEntry = cols[0];
244 if (std::all_of(firstEntry.begin(), firstEntry.end(), ::isdigit)) {
245 // TBL file contains column headings
246 cols.clear();
247 return true;
248 } else {
249 return false;
250 }
251}
252//--------------------------------------------------------------------------
253// Private methods
254//--------------------------------------------------------------------------
257 declareProperty(std::make_unique<FileProperty>("Filename", "", FileProperty::Load, ".tbl"),
258 "The name of the table file to read, including its full or "
259 "relative path. The file extension must be .tbl");
260 declareProperty(std::make_unique<WorkspaceProperty<ITableWorkspace>>("OutputWorkspace", "", Direction::Output),
261 "The name of the workspace that will be created.");
262}
263
268 std::string filename = getProperty("Filename");
269 std::ifstream file(filename.c_str());
270 if (!file) {
271 throw Exception::FileError("Unable to open file: ", filename);
272 }
273 std::string line;
274
276
277 std::vector<std::string> columnHeadings;
278
280 // We want to check if the first line contains an empty string or series of
281 // ",,,,,"
282 // to see if we are loading a TBL file that actually contains data or not.
283 boost::split(columnHeadings, line, boost::is_any_of(","), boost::token_compress_off);
284 for (auto entry = columnHeadings.begin(); entry != columnHeadings.end();) {
285 if (entry->empty()) {
286 // erase the empty values
287 entry = columnHeadings.erase(entry);
288 } else {
289 // keep any non-empty values
290 ++entry;
291 }
292 }
293 if (columnHeadings.empty()) {
294 // we have an empty string or series of ",,,,,"
295 throw std::runtime_error("The file you are trying to load is Empty. \n "
296 "Please load a non-empty TBL file");
297 } else {
298 // set columns back to empty ready to populated with columnHeadings.
299 columnHeadings.clear();
300 }
301 // this will tell us if we need to just fill in the cell values
302 // or whether we will have to create the column headings as well.
303 bool isOld = getColumnHeadings(line, columnHeadings);
304
305 std::vector<std::string> rowVec;
306 if (isOld) {
308 // create the column headings
309 ws->addColumn("str", "StitchGroup");
310 ws->addColumn("str", "Run(s)");
311 ws->addColumn("str", "ThetaIn");
312 ws->addColumn("str", "TransRun(s)");
313 ws->addColumn("str", "Qmin");
314 ws->addColumn("str", "Qmax");
315 ws->addColumn("str", "dq/q");
316 ws->addColumn("str", "Scale");
317 ws->addColumn("str", "Options");
318 ws->addColumn("str", "HiddenOptions");
319
320 for (size_t i = 0; i < ws->columnCount(); i++) {
321 auto col = ws->getColumn(i);
322 col->setPlotType(0);
323 }
324
325 // we are using the old ReflTBL format
326 // where all of the entries are on one line
327 // so we must reset the stream to reread the first line.
328 std::ifstream fileReopened(filename.c_str());
329 if (!fileReopened) {
330 throw Exception::FileError("Unable to open file: ", filename);
331 }
332 std::string lineRevisited;
333 int stitchID = 1;
334 while (Kernel::Strings::extractToEOL(fileReopened, lineRevisited)) {
335 if (lineRevisited.empty() || lineRevisited == ",,,,,,,,,,,,,,,,") {
336 continue;
337 }
338 getCells(lineRevisited, rowVec, 16, isOld);
339 const std::string scaleStr = rowVec.at(16);
340 const std::string stitchStr = boost::lexical_cast<std::string>(stitchID);
341
342 // check if the first run in the row has any data associated with it
343 // 0 = runs, 1 = theta, 2 = trans, 3 = qmin, 4 = qmax
344 if (!rowVec[0].empty() || !rowVec[1].empty() || !rowVec[2].empty() || !rowVec[3].empty() || !rowVec[4].empty()) {
345 TableRow row = ws->appendRow();
346 row << stitchStr;
347 for (int i = 0; i < 5; ++i) {
348 row << rowVec.at(i);
349 }
350 row << rowVec.at(15);
351 row << scaleStr;
352 }
353
354 // check if the second run in the row has any data associated with it
355 // 5 = runs, 6 = theta, 7 = trans, 8 = qmin, 9 = qmax
356 if (!rowVec[5].empty() || !rowVec[6].empty() || !rowVec[7].empty() || !rowVec[8].empty() || !rowVec[9].empty()) {
357 TableRow row = ws->appendRow();
358 row << stitchStr;
359 for (int i = 5; i < 10; ++i) {
360 row << rowVec.at(i);
361 }
362 row << rowVec.at(15);
363 row << scaleStr;
364 }
365
366 // check if the third run in the row has any data associated with it
367 // 10 = runs, 11 = theta, 12 = trans, 13 = qmin, 14 = qmax
368 if (!rowVec[10].empty() || !rowVec[11].empty() || !rowVec[12].empty() || !rowVec[13].empty() ||
369 !rowVec[14].empty()) {
370 TableRow row = ws->appendRow();
371 row << stitchStr;
372 for (int i = 10; i < 17; ++i) {
373 if (i == 16)
374 row << scaleStr;
375 else
376 row << rowVec.at(i);
377 }
378 }
379 ++stitchID;
380 setProperty("OutputWorkspace", ws);
381 }
382
383 } else {
384 // we have a TBL format that contains column headings
385 // on the first row. These are now entries in the columns vector
386 if (!columnHeadings.empty()) {
387 // now we need to add the custom column headings from
388 // the columns vector to the TableWorkspace
389 for (auto heading = columnHeadings.begin(); heading != columnHeadings.end();) {
390 if (heading->empty()) {
391 // there is no need to have empty column headings.
392 heading = columnHeadings.erase(heading);
393 } else {
395 col = ws->addColumn("str", *heading);
396 col->setPlotType(0);
397 heading++;
398 }
399 }
400 }
401 size_t expectedCommas = columnHeadings.size() - 1;
402 while (Kernel::Strings::extractToEOL(file, line)) {
403 if (line.empty() || line == ",,,,,,,,,,,,,,,,") {
404 // skip over any empty lines
405 continue;
406 }
407 getCells(line, rowVec, columnHeadings.size() - 1, isOld);
408 // populate the columns with their values for this row.
409 TableRow row = ws->appendRow();
410 for (size_t i = 0; i < expectedCommas + 1; ++i) {
411 row << rowVec.at(i);
412 }
413 }
414 setProperty("OutputWorkspace", ws);
415 }
416}
417
418} // namespace Mantid::DataHandling
#define DECLARE_FILELOADER_ALGORITHM(classname)
DECLARE_FILELOADER_ALGORITHM should be used in place of the standard DECLARE_ALGORITHM macro when wri...
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
Definition: Algorithm.cpp:1913
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
Definition: Algorithm.cpp:2076
@ Load
allowed here which will be passed to the algorithm
Definition: FileProperty.h:52
TableRow represents a row in a TableWorkspace.
Definition: TableRow.h:39
A property class for workspaces.
void init() override
Declare properties.
Definition: LoadTBL.cpp:256
bool getColumnHeadings(std::string line, std::vector< std::string > &cols)
Split into Column headings with respect to comma delimiters.
Definition: LoadTBL.cpp:241
void exec() override
Execute the algorithm.
Definition: LoadTBL.cpp:267
int confidence(Kernel::FileDescriptor &descriptor) const override
Returns a confidence value that this algorithm can load a file.
Definition: LoadTBL.cpp:38
size_t countCommas(const std::string &line) const
count the number of commas in the line
Definition: LoadTBL.cpp:78
size_t findQuotePairs(const std::string &line, std::vector< std::vector< size_t > > &quoteBounds) const
find all pairs of quotes in the line
Definition: LoadTBL.cpp:100
void csvParse(const std::string &line, std::vector< std::string > &cols, std::vector< std::vector< size_t > > &quoteBounds, size_t expectedCommas) const
Parse more complex CSV, used when the data involves commas in the data and quoted values.
Definition: LoadTBL.cpp:132
size_t getCells(std::string line, std::vector< std::string > &cols, size_t expectedCommas, bool isOldTBL) const
Split into columns with respect to the comma delimiters.
Definition: LoadTBL.cpp:194
LoadTBL()
Default constructor.
Records the filename and the description of failure.
Definition: Exception.h:98
Defines a wrapper around an open file.
const std::string & filename() const
Access the filename.
static bool isAscii(const std::string &filename, const size_t nbytes=256)
Returns true if the file is considered ascii.
std::istream & data()
Access the open file stream.
IPropertyManager * setProperty(const std::string &name, const T &value)
Templated method to set the value of a PropertyWithValue.
static T & Instance()
Return a reference to the Singleton instance, creating it if it does not already exist Creation is do...
std::shared_ptr< Column > Column_sptr
Definition: Column.h:228
std::shared_ptr< ITableWorkspace > ITableWorkspace_sptr
shared pointer to Mantid::API::ITableWorkspace
MANTID_KERNEL_DLL std::istream & extractToEOL(std::istream &is, std::string &str)
Extract a line from input stream, discarding any EOL characters encountered.
Definition: Strings.cpp:1137
std::string to_string(const wide_integer< Bits, Signed > &n)
@ Output
An output workspace.
Definition: Property.h:54