Mantid
Loading...
Searching...
No Matches
MultipleFileProperty.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
10
15#include "MantidKernel/System.h"
17
18#include <Poco/Path.h>
19#include <boost/algorithm/string.hpp>
20#include <boost/regex.hpp>
21
22#include <algorithm>
23#include <cctype>
24#include <functional>
25#include <numeric>
26
27using namespace Mantid::Kernel;
28using namespace Mantid::API;
29
30namespace // anonymous
31{
33Mantid::Kernel::Logger g_log("MultipleFileProperty");
34
39bool doesNotContainWildCard(const std::string &ext) { return std::string::npos == ext.find('*'); }
40
41static const std::string SUCCESS("");
42
43// Regular expressions for any adjacent + or , operators
44const std::string INVALID = R"(\+\+|,,|\+,|,\+)";
45static const boost::regex REGEX_INVALID(INVALID);
46
47// Regular expressions that represent the allowed instances of , operators
48const std::string NUM_COMMA_ALPHA(R"((?<=\d)\s*,\s*(?=\D))");
49const std::string ALPHA_COMMA_ALPHA(R"((?<=\D)\s*,\s*(?=\D))");
50const std::string COMMA_OPERATORS = NUM_COMMA_ALPHA + "|" + ALPHA_COMMA_ALPHA;
51static const boost::regex REGEX_COMMA_OPERATORS(COMMA_OPERATORS);
52
53// Regular expressions that represent the allowed instances of + operators
54const std::string NUM_PLUS_ALPHA(R"((?<=\d)\s*\+\s*(?=\D))");
55const std::string ALPHA_PLUS_ALPHA(R"((?<=\D)\s*\+\s*(?=\D))");
56const std::string PLUS_OPERATORS = NUM_PLUS_ALPHA + "|" + ALPHA_PLUS_ALPHA;
57static const boost::regex REGEX_PLUS_OPERATORS(PLUS_OPERATORS, boost::regex_constants::perl);
58
59bool isASCII(const std::string &str) {
60 return !std::any_of(str.cbegin(), str.cend(), [](char c) { return static_cast<unsigned char>(c) > 127; });
61}
62
63} // anonymous namespace
64
65namespace Mantid::API {
74MultipleFileProperty::MultipleFileProperty(const std::string &name, unsigned int action,
75 const std::vector<std::string> &exts, bool allowEmptyTokens)
76 : PropertyWithValue<std::vector<std::vector<std::string>>>(
77 name, std::vector<std::vector<std::string>>(),
78 std::make_shared<MultiFileValidator>(exts, (action == FileProperty::Load)), Direction::Input),
79 m_allowEmptyTokens(allowEmptyTokens) {
80 if (action != FileProperty::Load && action != FileProperty::OptionalLoad) {
82 throw std::runtime_error("Specified action is not supported for MultipleFileProperty");
83 } else {
84 m_action = action;
85 }
86
87 m_multiFileLoadingEnabled = Kernel::ConfigService::Instance().getValue<bool>("loading.multifile").get_value_or(false);
88 std::copy_if(exts.cbegin(), exts.cend(), std::back_inserter(m_exts), doesNotContainWildCard);
89}
90
97MultipleFileProperty::MultipleFileProperty(const std::string &name, const std::vector<std::string> &exts)
98 : MultipleFileProperty(name, FileProperty::Load, exts) {}
99
105
110 if (isOptional()) {
111 return SUCCESS;
112 } else {
113 return "No file specified.";
114 }
115}
116
129std::string MultipleFileProperty::setValue(const std::string &propValue) {
130 // No empty value is allowed, unless optional.
131 // This is yet aditional check that is beyond the underlying
132 // MultiFileValidator, so isOptional needs to be inspected here as well
133 if (propValue.empty() && !isOptional())
134 return "No file(s) specified.";
135
136 // If multiple file loading is disabled, then set value assuming it is a
137 // single file.
139 g_log.debug("MultiFile loading is not enabled, acting as standard FileProperty.");
140 return setValueAsSingleFile(propValue);
141 }
142
143 try {
144 // Else try and set the value, assuming it could be one or more files.
145 return setValueAsMultipleFiles(propValue);
146 } catch (const std::range_error &re) {
147 // it was a valid multi file string but for too many files.
148 return std::string(re.what());
149 } catch (const std::runtime_error &re) {
150 g_log.debug("MultiFile loading has failed. Trying as standard FileProperty.");
151
152 const std::string error = setValueAsSingleFile(propValue);
153
154 if (error.empty())
155 return SUCCESS;
156
157 // If we failed return the error message from the multiple file load attempt
158 // as the single file was a guess and probably not what the user will expect
159 // to see
160 return re.what();
161 }
162}
163
164std::string MultipleFileProperty::value() const {
166 return toString(m_value, "", "");
167
168 return toString(m_value);
169}
170
177 return toString(m_initialValue, "", "");
178
179 return toString(m_initialValue);
180}
181
191std::string MultipleFileProperty::setValueAsSingleFile(const std::string &propValue) {
192 // if value is unchanged use the cached version
193 if ((propValue == m_oldPropValue) && (!m_oldFoundValue.empty())) {
195 return SUCCESS;
196 }
197
198 // Use a slave FileProperty to do the job for us.
199 FileProperty slaveFileProp("Slave", "", FileProperty::Load, m_exts, Direction::Input);
200
201 std::string error = slaveFileProp.setValue(propValue);
202
203 if (!error.empty())
204 return error;
205
206 // Store.
207 std::vector<std::vector<std::string>> foundFiles;
208 try {
209 toValue(slaveFileProp(), foundFiles, "", "");
211 } catch (std::invalid_argument &except) {
212 g_log.debug() << "Could not set property " << name() << ": " << except.what();
213 return except.what();
214 }
215
216 // cache the new version of things
217 m_oldPropValue = propValue;
218 m_oldFoundValue = std::move(foundFiles);
219
220 return SUCCESS;
221}
222
235std::string MultipleFileProperty::setValueAsMultipleFiles(const std::string &propValue) {
236 // if value is unchanged use the cached version
237 if ((propValue == m_oldPropValue) && (!m_oldFoundValue.empty())) {
239 return SUCCESS;
240 }
241
242 // Return error if there are any adjacent + or , operators.
243 boost::smatch invalid_substring;
244 if (!m_allowEmptyTokens && boost::regex_search(propValue.begin(), propValue.end(), invalid_substring, REGEX_INVALID))
245 return "Unable to parse filename due to an empty token.";
246 if (!isASCII(propValue))
247 return "Unable to parse filename due to an unsupported non-ASCII character being found.";
248
249 std::vector<std::vector<std::string>> fileNames;
250
251 // Tokenise on allowed comma operators, and iterate over each token.
252 boost::sregex_token_iterator end;
253 boost::sregex_token_iterator commaToken(propValue.begin(), propValue.end(), REGEX_COMMA_OPERATORS, -1);
254
255 for (; commaToken != end; ++commaToken) {
256 const std::string commaTokenString = commaToken->str();
257
258 // Tokenise on allowed plus operators.
259 boost::sregex_token_iterator plusToken(commaTokenString.begin(), commaTokenString.end(), REGEX_PLUS_OPERATORS, -1);
260
261 std::vector<std::vector<std::string>> temp;
262
263 // Put the tokens into a vector before iterating over it this time,
264 // so we can see how many we have.
265 std::vector<std::string> plusTokenStrings;
266 for (; plusToken != end; ++plusToken)
267 plusTokenStrings.emplace_back(plusToken->str());
268
269 m_parser.setTrimWhiteSpaces(autoTrim()); // keep trimming whitespaces in parser consistent with this property
270 for (auto &plusTokenString : plusTokenStrings) {
271 try {
272 m_parser.parse(plusTokenString);
273 } catch (const std::range_error &re) {
274 g_log.error(re.what());
275 throw;
276 } catch (const std::runtime_error &) {
277 // We should be able to safely ignore runtime_errors from parse(),
278 // see below.
279 }
280
281 std::vector<std::vector<std::string>> f = m_parser.fileNames();
282
283 // If there are no files, then we should keep this token as it was passed
284 // to the property, in its untampered form. This will enable us to deal
285 // with the case where a user is trying to load a single (and possibly
286 // existing) file within a token, but which has unexpected zero padding,
287 // or some other anomaly.
288 if (VectorHelper::flattenVector(f).empty())
289 f.emplace_back(1, plusTokenString);
290
291 if (plusTokenStrings.size() > 1) {
292 // See [3] in header documentation. Basically, for reasons of
293 // ambiguity, we cant add together plusTokens if they contain a range
294 // of files. So throw on any instances of this when there is more than
295 // plusToken.
296 if (f.size() > 1)
297 return "Adding a range of files to another file(s) is not currently "
298 "supported.";
299
300 if (temp.empty())
301 temp.emplace_back(f[0]);
302 else {
303 for (auto &parsedFile : f[0])
304 temp[0].emplace_back(parsedFile);
305 }
306 } else {
307 temp.insert(temp.end(), f.begin(), f.end());
308 }
309 }
310
311 fileNames.insert(fileNames.end(), std::make_move_iterator(temp.begin()), std::make_move_iterator(temp.end()));
312 }
313
314 std::vector<std::vector<std::string>> allUnresolvedFileNames = fileNames;
315 std::vector<std::vector<std::string>> allFullFileNames;
316
317 // First, find the default extension. Flatten all the unresolved filenames
318 // first, to make this easier.
319 std::vector<std::string> flattenedAllUnresolvedFileNames = VectorHelper::flattenVector(allUnresolvedFileNames);
320 std::string defaultExt;
321 for (const auto &unresolvedFileName : flattenedAllUnresolvedFileNames) {
322 try {
323 // Check for an extension.
324 Poco::Path path(unresolvedFileName);
325 if (!path.getExtension().empty()) {
326 defaultExt = "." + path.getExtension();
327 break;
328 }
329
330 } catch (Poco::Exception &) {
331 // Safe to ignore? Need a better understanding of the circumstances under
332 // which this throws.
333 }
334 }
335
336 // Cycle through each vector of unresolvedFileNames in allUnresolvedFileNames.
337 // Remember, each vector contains files that are to be added together.
338 for (const auto &unresolvedFileNames : allUnresolvedFileNames) {
339 // Check for the existance of wild cards. (Instead of iterating over all the
340 // filenames just join them together and search for "*" in the result.)
341 if (std::string::npos != boost::algorithm::join(unresolvedFileNames, "").find("*"))
342 return "Searching for files by wildcards is not currently supported.";
343
344 std::vector<std::string> fullFileNames;
345
346 for (const auto &unresolvedFileName : unresolvedFileNames) {
347 bool useDefaultExt;
348
349 try {
350 // Check for an extension.
351 Poco::Path path(unresolvedFileName);
352
353 useDefaultExt = path.getExtension().empty();
354 } catch (Poco::Exception &) {
355 // Just shove the problematic filename straight into FileProperty and
356 // see if we have any luck.
357 useDefaultExt = false;
358 }
359
360 std::string fullyResolvedFile;
361
362 if (!useDefaultExt) {
363 FileProperty slaveFileProp("Slave", "", FileProperty::Load, m_exts, Direction::Input);
364 std::string error = slaveFileProp.setValue(unresolvedFileName);
365
366 // If an error was returned then pass it along.
367 if (!error.empty()) {
368 throw std::runtime_error(error);
369 }
370
371 fullyResolvedFile = slaveFileProp();
372 } else {
373 // If a default ext has been specified/found, then use it.
374 if (!defaultExt.empty()) {
375 fullyResolvedFile =
376 FileFinder::Instance().findRun(unresolvedFileName, std::vector<std::string>(1, defaultExt));
377 } else {
378 fullyResolvedFile = FileFinder::Instance().findRun(unresolvedFileName, m_exts);
379 }
380 if (fullyResolvedFile.empty()) {
381 bool doThrow = false;
382 if (m_allowEmptyTokens) {
383 try {
384 const int unresolvedInt = std::stoi(unresolvedFileName);
385 if (unresolvedInt != 0) {
386 doThrow = true;
387 }
388 } catch (std::invalid_argument &) {
389 doThrow = true;
390 }
391 } else {
392 doThrow = true;
393 }
394 if (doThrow) {
395 throw std::runtime_error("Unable to find file matching the string \"" + unresolvedFileName +
396 "\", please check the data search directories.");
397 } else {
398 // if the fullyResolvedFile is empty, it means it failed to find the
399 // file so keep the unresolvedFileName as a hint to be displayed
400 // later on in the error message
401 fullyResolvedFile = unresolvedFileName;
402 }
403 }
404 }
405
406 // Append the file name to result.
407 fullFileNames.emplace_back(std::move(fullyResolvedFile));
408 }
409 allFullFileNames.emplace_back(std::move(fullFileNames));
410 }
411
412 // Now re-set the value using the full paths found.
413 PropertyWithValue<std::vector<std::vector<std::string>>>::operator=(allFullFileNames);
414
415 // cache the new version of things
416 m_oldPropValue = propValue;
417 m_oldFoundValue = std::move(allFullFileNames);
418
419 return SUCCESS;
420}
421
422} // namespace Mantid::API
double error
Definition: IndexPeaks.cpp:133
#define SUCCESS
A specialized class for dealing with file properties.
Definition: FileProperty.h:42
std::string setValue(const std::string &propValue) override
Overridden setValue method.
@ OptionalLoad
to specify a file to read but the file doesn't have to exist
Definition: FileProperty.h:53
@ Load
allowed here which will be passed to the algorithm
Definition: FileProperty.h:52
A property to allow a user to specify multiple files to load.
unsigned int m_action
The action type of this property Load (dafault) or OptionalLoad are supported.
std::string setValueAsSingleFile(const std::string &propValue)
Called by setValue in the case where a user has disabled multiple file loading.
std::string getDefault() const override
Get the value the property was initialised with -its default value.
bool isOptional() const
Check if this property is optional.
std::string value() const override
Returns the value of the property as a string.
std::string isEmptyValueValid() const
Returns a string depending on whether an empty value is valid.
std::vector< std::string > m_exts
Suggested extensions.
std::string setValueAsMultipleFiles(const std::string &propValue)
Called by setValue in the case where multiple file loading is enabled.
std::vector< std::vector< std::string > > m_oldFoundValue
Last value of the found files used in MultipleFileProperty::setValueAsMultipleFiles and MultipleFileP...
std::string setValue(const std::string &propValue) override
Convert the given propValue into a comma and plus separated list of full filenames,...
MultipleFileProperty(const std::string &name, unsigned int action, const std::vector< std::string > &exts=std::vector< std::string >(), bool allowEmptyTokens=false)
Alternative constructor with action.
std::string m_oldPropValue
Last value of propValue used in MultipleFileProperty::setValueAsMultipleFiles and MultipleFilePropert...
bool m_multiFileLoadingEnabled
Whether or not the user has turned on multifile loading.
bool m_allowEmptyTokens
Whether to allow for empty tokens.
Kernel::MultiFileNameParsing::Parser m_parser
Parser used to parse multi-file strings.
Loads a workspace from a data file.
Definition: Load.h:23
The Logger class is in charge of the publishing messages from the framework through various channels.
Definition: Logger.h:52
void debug(const std::string &msg)
Logs at debug level.
Definition: Logger.cpp:114
void error(const std::string &msg)
Logs at error level.
Definition: Logger.cpp:77
void setTrimWhiteSpaces(const bool &setting)
Set the flag for trimming whitespaces in run string.
std::vector< std::vector< std::string > > fileNames() const
Return the vector of vectors of parsed file names.
void parse(const std::string &multiFileName)
Parse the given multiFileNameString.
The MultiFileValidator validates a MultiFileProperty, which contains a vector of vectors* of filename...
The concrete, templated class for properties.
std::vector< std::vector< std::string > > m_initialValue
the property's default value which is also its initial value
std::vector< std::vector< std::string > > m_value
The value of the property.
bool autoTrim() const
Returns if the property is set to automatically trim string unput values of whitespace.
Definition: Property.cpp:364
const std::string & name() const
Get the property's name.
Definition: Property.cpp:60
static T & Instance()
Return a reference to the Singleton instance, creating it if it does not already exist Creation is do...
Kernel::Logger g_log("ExperimentInfo")
static logger object
std::vector< T > flattenVector(const std::vector< std::vector< T > > &v)
A convenience function to "flatten" the given vector of vectors into a single vector.
Definition: VectorHelper.h:69
STL namespace.
Describes the direction (within an algorithm) of a Property.
Definition: Property.h:50
@ Input
An input workspace.
Definition: Property.h:53