Mantid
Loading...
Searching...
No Matches
LoadLog.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7//----------------------------------------------------------------------
8// Includes
9//----------------------------------------------------------------------
11#include "LoadRaw/isisraw2.h"
15#include "MantidKernel/Glob.h"
20
21#include "MantidTypes/Core/DateAndTimeHelpers.h"
22
23#include <Poco/DateTimeFormat.h>
24#include <Poco/DateTimeParser.h>
25#include <Poco/DirectoryIterator.h>
26#include <Poco/File.h>
27#include <Poco/Path.h>
28#include <boost/algorithm/string.hpp>
29#include <fstream> // used to get ifstream
30#include <regex>
31#include <sstream>
32#include <utility>
33
34using Mantid::Types::Core::DateAndTime;
35
36namespace Mantid::DataHandling {
37// Register the algorithm into the algorithm factory
38DECLARE_ALGORITHM(LoadLog)
39
40using namespace Kernel;
41using API::FileProperty;
42using API::MatrixWorkspace;
44using API::WorkspaceProperty;
46using Types::Core::DateAndTime;
47
48namespace {
49
50template <class MapClass, class LoggerType>
51void addLogDataToRun(Mantid::API::Run &run, MapClass &aMap, LoggerType &logger) {
52 for (auto &itr : aMap) {
53 try {
54 run.addLogData(itr.second.release());
55 } catch (std::invalid_argument &e) {
56 logger.warning() << e.what() << '\n';
57 } catch (Exception::ExistsError &e) {
58 logger.warning() << e.what() << '\n';
59 }
60 }
61}
62
63} // namespace
64
66LoadLog::LoadLog() = default;
67
70 // When used as a Child Algorithm the workspace name is not used - hence the
71 // "Anonymous" to satisfy the validator
72 declareProperty(std::make_unique<WorkspaceProperty<MatrixWorkspace>>("Workspace", "Anonymous", Direction::InOut),
73 "The name of the workspace to which the log data will be added.");
74
75 const std::vector<std::string> exts{".txt", ".log"};
76 declareProperty(std::make_unique<FileProperty>("Filename", "", FileProperty::Load, exts),
77 "The filename (including its full or relative path) of a SNS "
78 "text log file (not cvinfo), "
79 "an ISIS log file, or an ISIS raw file. "
80 "If a raw file is specified all log files associated with "
81 "that raw file are loaded into the specified workspace. The "
82 "file extension must "
83 "either be .raw or .s when specifying a raw file");
84
85 declareProperty(std::make_unique<ArrayProperty<std::string>>("Names"),
86 "For SNS-style log files only: the names of each column's log, separated "
87 "by commas. "
88 "This must be one fewer than the number of columns in the file.");
89
90 declareProperty(std::make_unique<ArrayProperty<std::string>>("Units"),
91 "For SNS-style log files only: the units of each column's log, separated "
92 "by commas. "
93 "This must be one fewer than the number of columns in the file. "
94 "Optional: leave blank for no units in any log.");
95
96 declareProperty("NumberOfColumns", Mantid::EMPTY_INT(),
97 "Number of columns in the file. If not set Mantid will "
98 "attempt to guess.");
99}
100
108 // Retrieve the filename from the properties and perform some initial checks
109 // on the filename
110 m_filename = getPropertyValue("Filename");
111 // Get the log file names if provided.
112 std::vector<std::string> names = getProperty("Names");
113 // Open file, in order to pass it once to all functions that will load it.
114 std::ifstream logFileStream(m_filename.c_str());
115
116 // File property checks whether the given path exists, just check that is
117 // actually a file
118 Poco::File l_path(m_filename);
119 if (l_path.isDirectory()) {
120 throw Exception::FileError("Filename is a directory:", m_filename);
121 }
122
123 // Get the input workspace and retrieve run from workspace.
124 // the log file(s) will be loaded into the run object of the workspace
125 const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");
126
127 if (isAscii(m_filename)) {
128 // Is it a SNS style file? If so, we load it and abort.
129 if (LoadSNSText()) {
130 return;
131 } // Otherwise we continue.
132 }
133
134 // If there's more than one log name provided, then it's an invalid ISIS file.
135 if (names.size() > 1) {
136 throw std::invalid_argument("More than one log name provided. Invalid ISIS log file.");
137 }
138
139 // If it's an old log file (pre-2007), then it is not currently supported.
140 if (isOldDateTimeFormat(logFileStream)) {
141 throw std::invalid_argument("File " + m_filename + " cannot be read because it has an old unsupported format.");
142 }
143
144 int colNum = static_cast<int>(getProperty("NumberOfColumns"));
145
146 if (colNum == Mantid::EMPTY_INT()) {
147 colNum = countNumberColumns(logFileStream, m_filename);
148 }
149
150 switch (colNum) {
151 case 2:
152 loadTwoColumnLogFile(logFileStream, extractLogName(names), localWorkspace->mutableRun());
153 break;
154 case 3:
155 loadThreeColumnLogFile(logFileStream, m_filename, localWorkspace->mutableRun());
156 break;
157 default:
158 throw std::invalid_argument("The log file provided is invalid as it has "
159 "less than 2 or more than three columns.");
160 break;
161 }
162}
163
170void LoadLog::loadTwoColumnLogFile(std::ifstream &logFileStream, std::string logFileName, API::Run &run) {
171 if (!logFileStream) {
172 throw std::invalid_argument("Unable to open file " + m_filename);
173 }
174
175 // figure out if second column is a number or a string
176 std::string aLine;
177 if (Mantid::Kernel::Strings::extractToEOL(logFileStream, aLine)) {
178 if (!isDateTimeString(aLine)) {
179 throw std::invalid_argument("File " + m_filename +
180 " is not a standard ISIS log file. Expected "
181 "to be a two column file.");
182 }
183
184 std::string DateAndTime;
185 std::stringstream ins(aLine);
186 ins >> DateAndTime;
187
188 // read in what follows the date-time string in the log file and figure out
189 // what type it is
190 std::string whatType;
191 ins >> whatType;
192 kind l_kind = classify(whatType);
193
194 if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
195 throw std::invalid_argument("ISIS log file contains unrecognised second column entries: " + m_filename);
196 }
197
198 try {
199 Property *log = LogParser::createLogProperty(m_filename, stringToLower(std::move(logFileName)));
200 if (log) {
201 run.addLogData(log);
202 }
203 } catch (std::exception &) {
204 }
205 }
206}
207
215void LoadLog::loadThreeColumnLogFile(std::ifstream &logFileStream, const std::string &logFileName, API::Run &run) {
216 std::string str;
217 std::string propname;
218 std::map<std::string, std::unique_ptr<Kernel::TimeSeriesProperty<double>>> dMap;
219 std::map<std::string, std::unique_ptr<Kernel::TimeSeriesProperty<std::string>>> sMap;
220 kind l_kind(LoadLog::empty);
221 bool isNumeric(false);
222
223 if (!logFileStream) {
224 throw std::invalid_argument("Unable to open file " + m_filename);
225 }
226
227 while (Mantid::Kernel::Strings::extractToEOL(logFileStream, str)) {
228 if (!isDateTimeString(str) && !str.empty()) {
229 throw std::invalid_argument("File " + logFileName +
230 " is not a standard ISIS log file. Expected "
231 "to be a file starting with DateTime String "
232 "format.");
233 }
234
235 if (!Kernel::TimeSeriesProperty<double>::isTimeString(str) || (str.empty() || str[0] == '#')) {
236 // if the line doesn't start with a time read the next line
237 continue;
238 }
239
240 std::stringstream line(str);
241 std::string timecolumn;
242 line >> timecolumn;
243
244 std::string blockcolumn;
245 line >> blockcolumn;
246 l_kind = classify(blockcolumn);
247
248 if (LoadLog::empty == l_kind) {
249 g_log.warning() << "Failed to parse line in log file: " << timecolumn << "\t" << blockcolumn;
250 continue;
251 }
252
253 if (LoadLog::string != l_kind) {
254 throw std::invalid_argument("ISIS log file contains unrecognised second column entries: " + logFileName);
255 }
256
257 std::string valuecolumn;
258 line >> valuecolumn;
259 l_kind = classify(valuecolumn);
260
261 if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
262 continue; // no value defined, just skip this entry
263 }
264
265 // column two in .log file is called block column
266 propname = stringToLower(blockcolumn);
267 // check if the data is numeric
268 std::istringstream istr(valuecolumn);
269 double dvalue;
270 istr >> dvalue;
271 isNumeric = !istr.fail();
272
273 if (isNumeric) {
274 auto ditr = dMap.find(propname);
275 if (ditr != dMap.end()) {
276 auto prop = ditr->second.get();
277 if (prop)
278 prop->addValue(timecolumn, dvalue);
279 } else {
280 auto logd = std::make_unique<Kernel::TimeSeriesProperty<double>>(propname);
281 logd->addValue(timecolumn, dvalue);
282 dMap.emplace(propname, std::move(logd));
283 }
284 } else {
285 auto sitr = sMap.find(propname);
286 if (sitr != sMap.end()) {
287 auto prop = sitr->second.get();
288 if (prop)
289 prop->addValue(timecolumn, valuecolumn);
290 } else {
291 auto logs = std::make_unique<Kernel::TimeSeriesProperty<std::string>>(propname);
292 logs->addValue(timecolumn, valuecolumn);
293 sMap.emplace(propname, std::move(logs));
294 }
295 }
296 }
297 addLogDataToRun(run, dMap, g_log);
298 addLogDataToRun(run, sMap, g_log);
299}
300
308std::string LoadLog::extractLogName(const std::vector<std::string> &logName) {
309 if (logName.empty()) {
310 return (Poco::Path(Poco::Path(m_filename).getFileName()).getBaseName());
311 } else {
312 return (logName.front());
313 }
314}
315
321
322 // Get the SNS-specific parameter
323 std::vector<std::string> names = getProperty("Names");
324 std::vector<std::string> units = getProperty("Units");
325
326 // Get the input workspace and retrieve run from workspace.
327 // the log file(s) will be loaded into the run object of the workspace
328 const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");
329
330 // open log file
331 std::ifstream inLogFile(m_filename.c_str());
332
333 // Get the first line
334 std::string aLine;
335 if (!Mantid::Kernel::Strings::extractToEOL(inLogFile, aLine))
336 return false;
337
338 std::vector<double> cols;
339 bool ret = SNSTextFormatColumns(aLine, cols);
340 // Any error?
341 if (!ret || cols.size() < 2)
342 return false;
343
344 auto numCols = static_cast<size_t>(cols.size() - 1);
345 if (names.size() != numCols)
346 throw std::invalid_argument("The Names parameter should have one fewer "
347 "entry as the number of columns in a SNS-style "
348 "text log file.");
349 if ((!units.empty()) && (units.size() != numCols))
350 throw std::invalid_argument("The Units parameter should have either 0 "
351 "entries or one fewer entry as the number of "
352 "columns in a SNS-style text log file.");
353
354 // Ok, create all the logs
355 std::vector<TimeSeriesProperty<double> *> props;
356 for (size_t i = 0; i < numCols; i++) {
357 auto p = new TimeSeriesProperty<double>(names[i]);
358 if (units.size() == numCols)
359 p->setUnits(units[i]);
360 props.emplace_back(p);
361 }
362 // Go back to start
363 inLogFile.seekg(0);
364 while (Mantid::Kernel::Strings::extractToEOL(inLogFile, aLine)) {
365 if (aLine.empty())
366 break;
367
368 if (SNSTextFormatColumns(aLine, cols)) {
369 if (cols.size() == numCols + 1) {
370 DateAndTime time(cols[0], 0.0);
371 for (size_t i = 0; i < numCols; i++)
372 props[i]->addValue(time, cols[i + 1]);
373 } else
374 throw std::runtime_error("Inconsistent number of columns while reading "
375 "SNS-style text file.");
376 } else
377 throw std::runtime_error("Error while reading columns in SNS-style text file.");
378 }
379 // Now add all the full logs to the workspace
380 for (size_t i = 0; i < numCols; i++) {
381 std::string name = props[i]->name();
382 if (localWorkspace->mutableRun().hasProperty(name)) {
383 localWorkspace->mutableRun().removeLogData(name);
384 g_log.information() << "Log data named " << name << " already existed and was overwritten.\n";
385 }
386 localWorkspace->mutableRun().addLogData(props[i]);
387 }
388
389 return true;
390}
391
398LoadLog::kind LoadLog::classify(const std::string &s) const {
399 if (s.empty()) {
400 return LoadLog::empty;
401 }
402
403 using std::string;
404 const string lower("abcdefghijklmnopqrstuvwxyz");
405 const string upper("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
406 const string letters = lower + upper + '_';
407
408 if (letters.find_first_of(s) != string::npos) {
409 return LoadLog::string;
410 }
411
412 const auto isNumber = [](const std::string &str) {
413 // try and get stold to parse a number out of the string
414 // if this throws then we don't have a number
415 try {
416 // cppcheck-suppress ignoredReturnValue
417 std::stold(str);
418 return true;
419 } catch (const std::invalid_argument &) {
420 return false;
421 } catch (const std::out_of_range &) {
422 return false;
423 }
424 };
425
426 return (isNumber(s)) ? LoadLog::number : LoadLog::empty;
427}
428
434std::string LoadLog::stringToLower(std::string strToConvert) {
435 std::transform(strToConvert.begin(), strToConvert.end(), strToConvert.begin(), tolower);
436 return strToConvert;
437}
438
444bool LoadLog::isAscii(const std::string &filename) {
445 FILE *file = fopen(filename.c_str(), "rb");
446 char data[256];
447 size_t n = fread(data, 1, sizeof(data), file);
448 fclose(file);
449 char *pend = &data[n];
450 /*
451 * Call it a binary file if we find a non-ascii character in the
452 * first 256 bytes of the file.
453 */
454 for (char *p = data; p < pend; ++p) {
455 auto ch = static_cast<unsigned long>(*p);
456 if (!(ch <= 0x7F)) {
457 return false;
458 }
459 }
460 return true;
461}
462
468bool LoadLog::isDateTimeString(const std::string &str) const {
469 return Types::Core::DateAndTimeHelpers::stringIsISO8601(str.substr(0, 19));
470}
471
480bool LoadLog::isOldDateTimeFormat(std::ifstream &logFileStream) const {
481 // extract first line of file
482 std::string firstLine;
483 Mantid::Kernel::Strings::extractToEOL(logFileStream, firstLine);
484 // reset file back to the beginning
485 logFileStream.seekg(0);
486
487 std::regex oldDateFormat(R"([A-Z][a-z]{2} [ 1-3]\d-[A-Z]{3}-\d{4} \d{2}:\d{2}:\d{2})");
488
489 return std::regex_match(firstLine.substr(0, 24), oldDateFormat);
490}
491
498bool LoadLog::SNSTextFormatColumns(const std::string &input, std::vector<double> &out) const {
499 std::vector<std::string> strs;
500 out.clear();
501 boost::split(strs, input, boost::is_any_of("\t "));
502 double val;
503 // Every column must evaluate to a double
504 for (auto &str : strs) {
505 if (!Strings::convert<double>(str, val))
506 return false;
507 else
508 out.emplace_back(val);
509 }
510 // Nothing failed = it is that format.
511 return true;
512}
513
519int LoadLog::countNumberColumns(std::ifstream &logFileStream, const std::string &logFileName) {
520 if (!logFileStream) {
521 throw std::invalid_argument("Unable to open file " + m_filename);
522 }
523
524 std::string str;
525 kind l_kind(LoadLog::empty);
526
527 // extract first line of file
528 Mantid::Kernel::Strings::extractToEOL(logFileStream, str);
529
530 if (!isDateTimeString(str)) {
531 throw std::invalid_argument("File " + logFileName +
532 " is not a standard ISIS log file. Expected to "
533 "be a file starting with DateTime String "
534 "format.");
535 }
536
537 std::stringstream line(str);
538 std::string timecolumn;
539 line >> timecolumn;
540
541 std::string blockcolumn;
542 line >> blockcolumn;
543 l_kind = classify(blockcolumn);
544
545 if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
546 throw std::invalid_argument("ISIS log file contains unrecognised second column entries: " + logFileName);
547 }
548
549 std::string valuecolumn;
550 line >> valuecolumn;
551 l_kind = classify(valuecolumn);
552
553 // reset file back to the beginning
554 logFileStream.seekg(0);
555
556 if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
557 return 2; // looks like a two column file
558 } else {
559 return 3; // looks like a three column file
560 }
561}
562
563} // namespace Mantid::DataHandling
#define DECLARE_ALGORITHM(classname)
Definition: Algorithm.h:576
double lower
lower and upper bounds on the multiplier, if known
double upper
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
Definition: Algorithm.cpp:1913
std::string getPropertyValue(const std::string &name) const override
Get the value of a property as a string.
Definition: Algorithm.cpp:2026
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
Definition: Algorithm.cpp:2076
Kernel::Logger & g_log
Definition: Algorithm.h:451
@ Load
allowed here which will be passed to the algorithm
Definition: FileProperty.h:52
void addLogData(Kernel::Property *p)
Add a log entry.
Definition: LogManager.h:115
This class stores information regarding an experimental run as a series of log entries.
Definition: Run.h:38
A property class for workspaces.
const std::string name() const override
Algorithm's name for identification overriding a virtual method.
Definition: LoadLog.h:64
bool LoadSNSText()
SNS text.
Definition: LoadLog.cpp:320
void init() override
Overwrites Algorithm method.
Definition: LoadLog.cpp:69
std::string m_filename
The name and path of an input file.
Definition: LoadLog.h:86
kind classify(const std::string &s) const
Takes as input a string and try to determine what type it is.
Definition: LoadLog.cpp:398
bool isAscii(const std::string &filename)
Checks if the file is an ASCII file.
Definition: LoadLog.cpp:444
LoadLog()
Default constructor.
bool isDateTimeString(const std::string &str) const
Check if first 19 characters of a string is date-time string according to yyyy-mm-ddThh:mm:ss.
Definition: LoadLog.cpp:468
std::string extractLogName(const std::vector< std::string > &logName)
Checks if a log file name was provided (e.g.
Definition: LoadLog.cpp:308
void exec() override
Overwrites Algorithm method.
Definition: LoadLog.cpp:107
bool SNSTextFormatColumns(const std::string &input, std::vector< double > &out) const
Check for SNS-style text file.
Definition: LoadLog.cpp:498
std::string stringToLower(std::string strToConvert)
Convert string to lower case.
Definition: LoadLog.cpp:434
int countNumberColumns(std::ifstream &logFileStream, const std::string &logFileName)
Returns the number of columns in the log file.
Definition: LoadLog.cpp:519
bool isOldDateTimeFormat(std::ifstream &logFileStream) const
Check whether the first 24 characters of a string are consistent with the date-time format used in ol...
Definition: LoadLog.cpp:480
void loadThreeColumnLogFile(std::ifstream &logFileStream, const std::string &logFileName, API::Run &run)
Create timeseries property from .log file and adds that to sample object.
Definition: LoadLog.cpp:215
void loadTwoColumnLogFile(std::ifstream &logFileStream, std::string logFileName, API::Run &run)
Loads two column log file data into local workspace.
Definition: LoadLog.cpp:170
kind
type returned by classify
Definition: LoadLog.h:89
Support for a property that holds an array of values.
Definition: ArrayProperty.h:28
Records the filename and the description of failure.
Definition: Exception.h:98
static Kernel::Property * createLogProperty(const std::string &logFName, const std::string &name)
Creates a TimeSeriesProperty of either double or string type depending on the log data Returns a poin...
Definition: LogParser.cpp:38
void warning(const std::string &msg)
Logs at warning level.
Definition: Logger.cpp:86
void information(const std::string &msg)
Logs at information level.
Definition: Logger.cpp:105
Base class for properties.
Definition: Property.h:94
A specialised Property class for holding a series of time-value pairs.
std::shared_ptr< MatrixWorkspace > MatrixWorkspace_sptr
shared pointer to the matrix workspace base class
std::shared_ptr< Workspace2D > Workspace2D_sptr
shared pointer to Mantid::DataObjects::Workspace2D
MANTID_KERNEL_DLL std::istream & extractToEOL(std::istream &is, std::string &str)
Extract a line from input stream, discarding any EOL characters encountered.
Definition: Strings.cpp:1137
constexpr int EMPTY_INT() noexcept
Returns what we consider an "empty" integer within a property.
Definition: EmptyValues.h:25
@ InOut
Both an input & output workspace.
Definition: Property.h:55