Mantid
Loading...
Searching...
No Matches
LoadLog.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7//----------------------------------------------------------------------
8// Includes
9//----------------------------------------------------------------------
11#include "LoadRaw/isisraw2.h"
15#include "MantidKernel/Glob.h"
20
21#include "MantidTypes/Core/DateAndTimeHelpers.h"
22
23#include <Poco/DateTimeFormat.h>
24#include <Poco/DateTimeParser.h>
25#include <Poco/DirectoryIterator.h>
26#include <boost/algorithm/string.hpp>
27#include <filesystem>
28#include <fstream> // used to get ifstream
29#include <regex>
30#include <sstream>
31#include <utility>
32
33using Mantid::Types::Core::DateAndTime;
34
35namespace Mantid::DataHandling {
36// Register the algorithm into the algorithm factory
37DECLARE_ALGORITHM(LoadLog)
38
39using namespace Kernel;
40using API::FileProperty;
41using API::MatrixWorkspace;
43using API::WorkspaceProperty;
45using Types::Core::DateAndTime;
46
47namespace {
48
49template <class MapClass, class LoggerType>
50void addLogDataToRun(Mantid::API::Run &run, MapClass &aMap, LoggerType &logger) {
51 for (auto &itr : aMap) {
52 try {
53 run.addLogData(itr.second.release());
54 } catch (std::invalid_argument &e) {
55 logger.warning() << e.what() << '\n';
56 } catch (Exception::ExistsError &e) {
57 logger.warning() << e.what() << '\n';
58 }
59 }
60}
61
62} // namespace
63
65LoadLog::LoadLog() = default;
66
69 // When used as a Child Algorithm the workspace name is not used - hence the
70 // "Anonymous" to satisfy the validator
71 declareProperty(std::make_unique<WorkspaceProperty<MatrixWorkspace>>("Workspace", "Anonymous", Direction::InOut),
72 "The name of the workspace to which the log data will be added.");
73
74 const std::vector<std::string> exts{".txt", ".log"};
75 declareProperty(std::make_unique<FileProperty>("Filename", "", FileProperty::Load, exts),
76 "The filename (including its full or relative path) of a SNS "
77 "text log file (not cvinfo), "
78 "an ISIS log file, or an ISIS raw file. "
79 "If a raw file is specified all log files associated with "
80 "that raw file are loaded into the specified workspace. The "
81 "file extension must "
82 "either be .raw or .s when specifying a raw file");
83
84 declareProperty(std::make_unique<ArrayProperty<std::string>>("Names"),
85 "For SNS-style log files only: the names of each column's log, separated "
86 "by commas. "
87 "This must be one fewer than the number of columns in the file.");
88
89 declareProperty(std::make_unique<ArrayProperty<std::string>>("Units"),
90 "For SNS-style log files only: the units of each column's log, separated "
91 "by commas. "
92 "This must be one fewer than the number of columns in the file. "
93 "Optional: leave blank for no units in any log.");
94
95 declareProperty("NumberOfColumns", Mantid::EMPTY_INT(),
96 "Number of columns in the file. If not set Mantid will "
97 "attempt to guess.");
98}
99
107 // Retrieve the filename from the properties and perform some initial checks
108 // on the filename
109 m_filename = getPropertyValue("Filename");
110 // Get the log file names if provided.
111 std::vector<std::string> names = getProperty("Names");
112 // Open file, in order to pass it once to all functions that will load it.
113 std::ifstream logFileStream(m_filename.c_str());
114
115 // File property checks whether the given path exists, just check that is
116 // actually a file
117 std::filesystem::path l_path(m_filename);
118 if (std::filesystem::is_directory(l_path)) {
119 throw Exception::FileError("Filename is a directory:", m_filename);
120 }
121
122 // Get the input workspace and retrieve run from workspace.
123 // the log file(s) will be loaded into the run object of the workspace
124 const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");
125
126 if (isAscii(m_filename)) {
127 // Is it a SNS style file? If so, we load it and abort.
128 if (LoadSNSText()) {
129 return;
130 } // Otherwise we continue.
131 }
132
133 // If there's more than one log name provided, then it's an invalid ISIS file.
134 if (names.size() > 1) {
135 throw std::invalid_argument("More than one log name provided. Invalid ISIS log file.");
136 }
137
138 // If it's an old log file (pre-2007), then it is not currently supported.
139 if (isOldDateTimeFormat(logFileStream)) {
140 throw std::invalid_argument("File " + m_filename + " cannot be read because it has an old unsupported format.");
141 }
142
143 int colNum = static_cast<int>(getProperty("NumberOfColumns"));
144
145 if (colNum == Mantid::EMPTY_INT()) {
146 colNum = countNumberColumns(logFileStream, m_filename);
147 }
148
149 switch (colNum) {
150 case 2:
151 loadTwoColumnLogFile(logFileStream, extractLogName(names), localWorkspace->mutableRun());
152 break;
153 case 3:
154 loadThreeColumnLogFile(logFileStream, m_filename, localWorkspace->mutableRun());
155 break;
156 default:
157 throw std::invalid_argument("The log file provided is invalid as it has "
158 "less than 2 or more than three columns.");
159 break;
160 }
161}
162
169void LoadLog::loadTwoColumnLogFile(std::ifstream &logFileStream, std::string logFileName, API::Run &run) {
170 if (!logFileStream) {
171 throw std::invalid_argument("Unable to open file " + m_filename);
172 }
173
174 // figure out if second column is a number or a string
175 std::string aLine;
176 if (Mantid::Kernel::Strings::extractToEOL(logFileStream, aLine)) {
177 if (!isDateTimeString(aLine)) {
178 throw std::invalid_argument("File " + m_filename +
179 " is not a standard ISIS log file. Expected "
180 "to be a two column file.");
181 }
182
183 std::string DateAndTime;
184 std::stringstream ins(aLine);
185 ins >> DateAndTime;
186
187 // read in what follows the date-time string in the log file and figure out
188 // what type it is
189 std::string whatType;
190 ins >> whatType;
191 kind l_kind = classify(whatType);
192
193 if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
194 throw std::invalid_argument("ISIS log file contains unrecognised second column entries: " + m_filename);
195 }
196
197 try {
198 Property *log = LogParser::createLogProperty(m_filename, stringToLower(std::move(logFileName)));
199 if (log) {
200 run.addLogData(log);
201 }
202 } catch (std::exception &) {
203 }
204 }
205}
206
214void LoadLog::loadThreeColumnLogFile(std::ifstream &logFileStream, const std::string &logFileName, API::Run &run) {
215 std::string str;
216 std::string propname;
217 std::map<std::string, std::unique_ptr<Kernel::TimeSeriesProperty<double>>> dMap;
218 std::map<std::string, std::unique_ptr<Kernel::TimeSeriesProperty<std::string>>> sMap;
219 kind l_kind(LoadLog::empty);
220 bool isNumeric(false);
221
222 if (!logFileStream) {
223 throw std::invalid_argument("Unable to open file " + m_filename);
224 }
225
226 while (Mantid::Kernel::Strings::extractToEOL(logFileStream, str)) {
227 if (!isDateTimeString(str) && !str.empty()) {
228 throw std::invalid_argument("File " + logFileName +
229 " is not a standard ISIS log file. Expected "
230 "to be a file starting with DateTime String "
231 "format.");
232 }
233
234 if (!Kernel::TimeSeriesProperty<double>::isTimeString(str) || (str.empty() || str[0] == '#')) {
235 // if the line doesn't start with a time read the next line
236 continue;
237 }
238
239 std::stringstream line(str);
240 std::string timecolumn;
241 line >> timecolumn;
242
243 std::string blockcolumn;
244 line >> blockcolumn;
245 l_kind = classify(blockcolumn);
246
247 if (LoadLog::empty == l_kind) {
248 g_log.warning() << "Failed to parse line in log file: " << timecolumn << "\t" << blockcolumn;
249 continue;
250 }
251
252 if (LoadLog::string != l_kind) {
253 throw std::invalid_argument("ISIS log file contains unrecognised second column entries: " + logFileName);
254 }
255
256 std::string valuecolumn;
257 line >> valuecolumn;
258 l_kind = classify(valuecolumn);
259
260 if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
261 continue; // no value defined, just skip this entry
262 }
263
264 // column two in .log file is called block column
265 propname = stringToLower(blockcolumn);
266 // check if the data is numeric
267 std::istringstream istr(valuecolumn);
268 double dvalue;
269 istr >> dvalue;
270 isNumeric = !istr.fail();
271
272 if (isNumeric) {
273 auto ditr = dMap.find(propname);
274 if (ditr != dMap.end()) {
275 auto prop = ditr->second.get();
276 if (prop)
277 prop->addValue(timecolumn, dvalue);
278 } else {
279 auto logd = std::make_unique<Kernel::TimeSeriesProperty<double>>(propname);
280 logd->addValue(timecolumn, dvalue);
281 dMap.emplace(propname, std::move(logd));
282 }
283 } else {
284 auto sitr = sMap.find(propname);
285 if (sitr != sMap.end()) {
286 auto prop = sitr->second.get();
287 if (prop)
288 prop->addValue(timecolumn, valuecolumn);
289 } else {
290 auto logs = std::make_unique<Kernel::TimeSeriesProperty<std::string>>(propname);
291 logs->addValue(timecolumn, valuecolumn);
292 sMap.emplace(propname, std::move(logs));
293 }
294 }
295 }
296 addLogDataToRun(run, dMap, g_log);
297 addLogDataToRun(run, sMap, g_log);
298}
299
307std::string LoadLog::extractLogName(const std::vector<std::string> &logName) {
308 if (logName.empty()) {
309 return std::filesystem::path(m_filename).stem().string();
310 } else {
311 return logName.front();
312 }
313}
314
320
321 // Get the SNS-specific parameter
322 std::vector<std::string> names = getProperty("Names");
323 std::vector<std::string> units = getProperty("Units");
324
325 // Get the input workspace and retrieve run from workspace.
326 // the log file(s) will be loaded into the run object of the workspace
327 const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");
328
329 // open log file
330 std::ifstream inLogFile(m_filename.c_str());
331
332 // Get the first line
333 std::string aLine;
334 if (!Mantid::Kernel::Strings::extractToEOL(inLogFile, aLine))
335 return false;
336
337 std::vector<double> cols;
338 bool ret = SNSTextFormatColumns(aLine, cols);
339 // Any error?
340 if (!ret || cols.size() < 2)
341 return false;
342
343 auto numCols = static_cast<size_t>(cols.size() - 1);
344 if (names.size() != numCols)
345 throw std::invalid_argument("The Names parameter should have one fewer "
346 "entry as the number of columns in a SNS-style "
347 "text log file.");
348 if ((!units.empty()) && (units.size() != numCols))
349 throw std::invalid_argument("The Units parameter should have either 0 "
350 "entries or one fewer entry as the number of "
351 "columns in a SNS-style text log file.");
352
353 // Ok, create all the logs
354 std::vector<TimeSeriesProperty<double> *> props;
355 for (size_t i = 0; i < numCols; i++) {
356 auto p = new TimeSeriesProperty<double>(names[i]);
357 if (units.size() == numCols)
358 p->setUnits(units[i]);
359 props.emplace_back(p);
360 }
361 // Go back to start
362 inLogFile.seekg(0);
363 while (Mantid::Kernel::Strings::extractToEOL(inLogFile, aLine)) {
364 if (aLine.empty())
365 break;
366
367 if (SNSTextFormatColumns(aLine, cols)) {
368 if (cols.size() == numCols + 1) {
369 DateAndTime time(cols[0], 0.0);
370 for (size_t i = 0; i < numCols; i++)
371 props[i]->addValue(time, cols[i + 1]);
372 } else
373 throw std::runtime_error("Inconsistent number of columns while reading "
374 "SNS-style text file.");
375 } else
376 throw std::runtime_error("Error while reading columns in SNS-style text file.");
377 }
378 // Now add all the full logs to the workspace
379 for (size_t i = 0; i < numCols; i++) {
380 std::string propName = props[i]->name();
381 if (localWorkspace->mutableRun().hasProperty(propName)) {
382 localWorkspace->mutableRun().removeLogData(propName);
383 g_log.information() << "Log data named " << propName << " already existed and was overwritten.\n";
384 }
385 localWorkspace->mutableRun().addLogData(props[i]);
386 }
387
388 return true;
389}
390
396LoadLog::kind LoadLog::classify(const std::string &s) const {
397 if (s.empty()) {
398 return LoadLog::empty;
399 }
400
401 using std::string;
402 const string lower("abcdefghijklmnopqrstuvwxyz");
403 const string upper("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
404 const string letters = lower + upper + '_';
405
406 if (letters.find_first_of(s) != string::npos) {
407 return LoadLog::string;
408 }
409
410 const auto isNumber = [](const std::string &str) {
411 // try and get stold to parse a number out of the string
412 // if this throws then we don't have a number
413 try {
414 (void)std::stold(str);
415 return true;
416 } catch (const std::invalid_argument &) {
417 return false;
418 } catch (const std::out_of_range &) {
419 return false;
420 }
421 };
422
423 return (isNumber(s)) ? LoadLog::number : LoadLog::empty;
424}
425
431std::string LoadLog::stringToLower(std::string strToConvert) {
432 std::transform(strToConvert.begin(), strToConvert.end(), strToConvert.begin(), tolower);
433 return strToConvert;
434}
435
441bool LoadLog::isAscii(const std::string &filename) {
442 FILE *file = fopen(filename.c_str(), "rb");
443 if (file) {
444 char data[256];
445 size_t file_size = fread(data, 1, sizeof(data), file);
446 fclose(file);
447 char const *pend = &data[file_size];
448 /*
449 * Call it a binary file if we find a non-ascii character in the
450 * first 256 bytes of the file.
451 */
452 for (char *char_pos = data; char_pos < pend; ++char_pos) {
453 auto char_value = static_cast<unsigned long>(*char_pos);
454 if (char_value > 0x7F) {
455 return false;
456 }
457 }
458 return true;
459 } else {
460 return false; // failed to open the file
461 }
462}
463
469bool LoadLog::isDateTimeString(const std::string &str) const {
470 return Types::Core::DateAndTimeHelpers::stringIsISO8601(str.substr(0, 19));
471}
472
481bool LoadLog::isOldDateTimeFormat(std::ifstream &logFileStream) const {
482 // extract first line of file
483 std::string firstLine;
484 Mantid::Kernel::Strings::extractToEOL(logFileStream, firstLine);
485 // reset file back to the beginning
486 logFileStream.seekg(0);
487
488 std::regex oldDateFormat(R"([A-Z][a-z]{2} [ 1-3]\d-[A-Z]{3}-\d{4} \d{2}:\d{2}:\d{2})");
489
490 return std::regex_match(firstLine.substr(0, 24), oldDateFormat);
491}
492
499bool LoadLog::SNSTextFormatColumns(const std::string &input, std::vector<double> &out) const {
500 std::vector<std::string> strs;
501 out.clear();
502 boost::split(strs, input, boost::is_any_of("\t "));
503 double val;
504 // Every column must evaluate to a double
505 for (auto &str : strs) {
506 if (!Strings::convert<double>(str, val))
507 return false;
508 else
509 out.emplace_back(val);
510 }
511 // Nothing failed = it is that format.
512 return true;
513}
514
520int LoadLog::countNumberColumns(std::ifstream &logFileStream, const std::string &logFileName) {
521 if (!logFileStream) {
522 throw std::invalid_argument("Unable to open file " + m_filename);
523 }
524
525 std::string str;
526 kind l_kind(LoadLog::empty);
527
528 // extract first line of file
529 Mantid::Kernel::Strings::extractToEOL(logFileStream, str);
530
531 if (!isDateTimeString(str)) {
532 throw std::invalid_argument("File " + logFileName +
533 " is not a standard ISIS log file. Expected to "
534 "be a file starting with DateTime String "
535 "format.");
536 }
537
538 std::stringstream line(str);
539 std::string timecolumn;
540 line >> timecolumn;
541
542 std::string blockcolumn;
543 line >> blockcolumn;
544 l_kind = classify(blockcolumn);
545
546 if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
547 throw std::invalid_argument("ISIS log file contains unrecognised second column entries: " + logFileName);
548 }
549
550 std::string valuecolumn;
551 line >> valuecolumn;
552 l_kind = classify(valuecolumn);
553
554 // reset file back to the beginning
555 logFileStream.seekg(0);
556
557 if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
558 return 2; // looks like a two column file
559 } else {
560 return 3; // looks like a three column file
561 }
562}
563
564} // namespace Mantid::DataHandling
#define DECLARE_ALGORITHM(classname)
Definition Algorithm.h:538
double lower
lower and upper bounds on the multiplier, if known
double upper
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
std::string getPropertyValue(const std::string &name) const override
Get the value of a property as a string.
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
Kernel::Logger & g_log
Definition Algorithm.h:422
@ Load
allowed here which will be passed to the algorithm
void addLogData(Kernel::Property *p)
Add a log entry.
Definition LogManager.h:127
This class stores information regarding an experimental run as a series of log entries.
Definition Run.h:35
A property class for workspaces.
bool LoadSNSText()
SNS text.
Definition LoadLog.cpp:319
void init() override
Overwrites Algorithm method.
Definition LoadLog.cpp:68
std::string m_filename
The name and path of an input file.
Definition LoadLog.h:86
kind classify(const std::string &s) const
Takes as input a string and try to determine what type it is.
Definition LoadLog.cpp:396
bool isAscii(const std::string &filename)
Checks if the file is an ASCII file.
Definition LoadLog.cpp:441
LoadLog()
Default constructor.
bool isDateTimeString(const std::string &str) const
Check if first 19 characters of a string is date-time string according to yyyy-mm-ddThh:mm:ss.
Definition LoadLog.cpp:469
std::string extractLogName(const std::vector< std::string > &logName)
Checks if a log file name was provided (e.g.
Definition LoadLog.cpp:307
void exec() override
Overwrites Algorithm method.
Definition LoadLog.cpp:106
bool SNSTextFormatColumns(const std::string &input, std::vector< double > &out) const
Check for SNS-style text file.
Definition LoadLog.cpp:499
std::string stringToLower(std::string strToConvert)
Convert string to lower case.
Definition LoadLog.cpp:431
int countNumberColumns(std::ifstream &logFileStream, const std::string &logFileName)
Returns the number of columns in the log file.
Definition LoadLog.cpp:520
bool isOldDateTimeFormat(std::ifstream &logFileStream) const
Check whether the first 24 characters of a string are consistent with the date-time format used in ol...
Definition LoadLog.cpp:481
void loadThreeColumnLogFile(std::ifstream &logFileStream, const std::string &logFileName, API::Run &run)
Create timeseries property from .log file and adds that to sample object.
Definition LoadLog.cpp:214
void loadTwoColumnLogFile(std::ifstream &logFileStream, std::string logFileName, API::Run &run)
Loads two column log file data into local workspace.
Definition LoadLog.cpp:169
kind
type returned by classify
Definition LoadLog.h:89
Support for a property that holds an array of values.
Records the filename and the description of failure.
Definition Exception.h:98
static Kernel::Property * createLogProperty(const std::string &logFName, const std::string &name)
Creates a TimeSeriesProperty of either double or string type depending on the log data Returns a poin...
Definition LogParser.cpp:39
void warning(const std::string &msg)
Logs at warning level.
Definition Logger.cpp:117
void information(const std::string &msg)
Logs at information level.
Definition Logger.cpp:136
Base class for properties.
Definition Property.h:94
A specialised Property class for holding a series of time-value pairs.
std::shared_ptr< MatrixWorkspace > MatrixWorkspace_sptr
shared pointer to the matrix workspace base class
std::shared_ptr< Workspace2D > Workspace2D_sptr
shared pointer to Mantid::DataObjects::Workspace2D
MANTID_KERNEL_DLL std::istream & extractToEOL(std::istream &is, std::string &str)
Extract a line from input stream, discarding any EOL characters encountered.
Definition Strings.cpp:1167
constexpr int EMPTY_INT() noexcept
Returns what we consider an "empty" integer within a property.
Definition EmptyValues.h:24
@ InOut
Both an input & output workspace.
Definition Property.h:55