Mantid
Loading...
Searching...
No Matches
MultipleFileProperty.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
10
17
18#include <algorithm>
19#include <cctype>
20#include <filesystem>
21#include <functional>
22#include <numeric>
23#include <regex>
24
25using namespace Mantid::Kernel;
26using namespace Mantid::API;
27
28namespace {
29Mantid::Kernel::Logger g_log("MultipleFileProperty");
30
31bool doesNotContainWildCard(const std::string &ext) { return std::string::npos == ext.find('*'); }
32
33static const std::string SUCCESS("");
34
35// Regular expressions for any adjacent + or , operators
36static const std::regex REGEX_INVALID(R"(\+\+|,,|\+,|,\+)");
37
38// Comma/plus operators that act as token separators: any char on the left,
39// non-digit on the right (after optional whitespace). The digit→digit case
40// is left for the run-number list parser to handle. The original Boost
41// patterns used left-side lookbehinds that std::regex does not support, but
42// the alternation digit-or-non-digit on the left is equivalent to "any
43// preceding char" and so can be dropped.
44static const std::regex REGEX_COMMA_OPERATORS(R"(\s*,\s*(?=\D))");
45static const std::regex REGEX_PLUS_OPERATORS(R"(\s*\+\s*(?=\D))");
46
47bool isASCII(const std::string &str) {
48 return !std::any_of(str.cbegin(), str.cend(), [](char c) { return static_cast<unsigned char>(c) > 127; });
49}
50
51} // anonymous namespace
52
53namespace Mantid::API {
62MultipleFileProperty::MultipleFileProperty(const std::string &name, unsigned int action,
63 const std::vector<std::string> &exts, bool allowEmptyTokens)
64 : PropertyWithValue<std::vector<std::vector<std::string>>>(
65 name, std::vector<std::vector<std::string>>(),
66 std::make_shared<MultiFileValidator>(exts, (action == FileProperty::Load)), Direction::Input),
67 m_allowEmptyTokens(allowEmptyTokens) {
68 if (action != FileProperty::Load && action != FileProperty::OptionalLoad) {
70 throw std::runtime_error("Specified action is not supported for MultipleFileProperty");
71 } else {
72 m_action = action;
73 }
74
75 m_multiFileLoadingEnabled = Kernel::ConfigService::Instance().getValue<bool>("loading.multifile").value_or(false);
76 std::copy_if(exts.cbegin(), exts.cend(), std::back_inserter(m_exts), doesNotContainWildCard);
77}
78
85MultipleFileProperty::MultipleFileProperty(const std::string &name, const std::vector<std::string> &exts)
87
93
98 if (isOptional()) {
99 return SUCCESS;
100 } else {
101 return "No file specified.";
102 }
103}
104
117std::string MultipleFileProperty::setValue(const std::string &propValue) {
118 // No empty value is allowed, unless optional.
119 // This is yet aditional check that is beyond the underlying
120 // MultiFileValidator, so isOptional needs to be inspected here as well
121 if (propValue.empty() && !isOptional())
122 return "No file(s) specified.";
123
124 // If multiple file loading is disabled, then set value assuming it is a
125 // single file.
127 g_log.debug("MultiFile loading is not enabled, acting as standard FileProperty.");
128 return setValueAsSingleFile(propValue);
129 }
130
131 try {
132 // Else try and set the value, assuming it could be one or more files.
133 return setValueAsMultipleFiles(propValue);
134 } catch (const std::range_error &re) {
135 // it was a valid multi file string but for too many files.
136 return std::string(re.what());
137 } catch (const std::runtime_error &re) {
138 g_log.debug("MultiFile loading has failed. Trying as standard FileProperty.");
139
140 const std::string error = setValueAsSingleFile(propValue);
141
142 if (error.empty())
143 return SUCCESS;
144
145 // If we failed return the error message from the multiple file load attempt
146 // as the single file was a guess and probably not what the user will expect
147 // to see
148 return re.what();
149 }
150}
151
152std::string MultipleFileProperty::value() const {
154 return toString(m_value, "", "");
155
156 return toString(m_value);
157}
158
165 return toString(m_initialValue, "", "");
166
167 return toString(m_initialValue);
168}
169
179std::string MultipleFileProperty::setValueAsSingleFile(const std::string &propValue) {
180 // if value is unchanged use the cached version
181 if ((propValue == m_oldPropValue) && (!m_oldFoundValue.empty())) {
183 return SUCCESS;
184 }
185
186 // Use a temporary single FileProperty to do the job for us using this name
187 FileProperty singleFileProperty(this->name(), "", FileProperty::Load, m_exts, Direction::Input);
188
189 std::string error = singleFileProperty.setValue(propValue);
190
191 if (!error.empty())
192 return error;
193
194 // Store.
195 std::vector<std::vector<std::string>> foundFiles;
196 try {
197 toValue(singleFileProperty(), foundFiles, "", "");
199 } catch (std::invalid_argument &except) {
200 g_log.debug() << "Could not set property " << name() << ": " << except.what();
201 return except.what();
202 }
203
204 // cache the new version of things
205 m_oldPropValue = propValue;
206 m_oldFoundValue = std::move(foundFiles);
207
208 return SUCCESS;
209}
210
223std::string MultipleFileProperty::setValueAsMultipleFiles(const std::string &propValue) {
224 // Empty input (for optional properties — required ones are rejected upstream
225 // in setValue) means "no files selected". Short-circuit so we don't generate
226 // a spurious empty hint that would later fail file resolution. Boost's
227 // sregex_token_iterator used to silently yield zero tokens here; std::regex
228 // yields one empty token, hence the explicit guard.
229 if (propValue.empty()) {
230 PropertyWithValue<std::vector<std::vector<std::string>>>::operator=(std::vector<std::vector<std::string>>{});
231 m_oldPropValue = propValue;
232 m_oldFoundValue.clear();
233 return SUCCESS;
234 }
235
236 // if value is unchanged use the cached version
237 if ((propValue == m_oldPropValue) && (!m_oldFoundValue.empty())) {
239 return SUCCESS;
240 }
241
242 // Return error if there are any adjacent + or , operators.
243 std::smatch invalid_substring;
244 if (!m_allowEmptyTokens && std::regex_search(propValue, invalid_substring, REGEX_INVALID))
245 return "Unable to parse filename due to an empty token.";
246 if (!isASCII(propValue))
247 return "Unable to parse filename due to an unsupported non-ASCII character being found.";
248
249 std::vector<std::vector<std::string>> fileNames;
250
251 // Tokenise on allowed comma operators, and iterate over each token.
252 std::sregex_token_iterator end;
253 std::sregex_token_iterator commaToken(propValue.begin(), propValue.end(), REGEX_COMMA_OPERATORS, -1);
254
255 for (; commaToken != end; ++commaToken) {
256 const std::string commaTokenString = commaToken->str();
257
258 // Tokenise on allowed plus operators.
259 std::sregex_token_iterator plusToken(commaTokenString.begin(), commaTokenString.end(), REGEX_PLUS_OPERATORS, -1);
260
261 std::vector<std::vector<std::string>> temp;
262
263 // Put the tokens into a vector before iterating over it this time,
264 // so we can see how many we have.
265 std::vector<std::string> plusTokenStrings;
266 for (; plusToken != end; ++plusToken)
267 plusTokenStrings.emplace_back(plusToken->str());
268
269 m_parser.setTrimWhiteSpaces(autoTrim()); // keep trimming whitespaces in parser consistent with this property
270 for (auto &plusTokenString : plusTokenStrings) {
271 try {
272 m_parser.parse(plusTokenString);
273 } catch (const std::range_error &re) {
274 g_log.error(re.what());
275 throw;
276 } catch (const std::runtime_error &) {
277 // We should be able to safely ignore runtime_errors from parse(),
278 // see below.
279 }
280
281 std::vector<std::vector<std::string>> f = m_parser.fileNames();
282
283 // If there are no files, then we should keep this token as it was passed
284 // to the property, in its untampered form. This will enable us to deal
285 // with the case where a user is trying to load a single (and possibly
286 // existing) file within a token, but which has unexpected zero padding,
287 // or some other anomaly.
288 if (VectorHelper::flattenVector(f).empty())
289 f.emplace_back(1, plusTokenString);
290
291 if (plusTokenStrings.size() > 1) {
292 // See [3] in header documentation. Basically, for reasons of
293 // ambiguity, we cant add together plusTokens if they contain a range
294 // of files. So throw on any instances of this when there is more than
295 // plusToken.
296 if (f.size() > 1)
297 return "Adding a range of files to another file(s) is not currently "
298 "supported.";
299
300 if (temp.empty())
301 temp.emplace_back(f[0]);
302 else {
303 for (auto &parsedFile : f[0])
304 temp[0].emplace_back(parsedFile);
305 }
306 } else {
307 temp.insert(temp.end(), f.begin(), f.end());
308 }
309 }
310
311 fileNames.insert(fileNames.end(), std::make_move_iterator(temp.begin()), std::make_move_iterator(temp.end()));
312 }
313
314 std::vector<std::vector<std::string>> allUnresolvedFileNames = fileNames;
315 std::vector<std::vector<std::string>> allFullFileNames;
316
317 // First, find the default extension. Flatten all the unresolved filenames
318 // first, to make this easier.
319 std::vector<std::string> flattenedAllUnresolvedFileNames = VectorHelper::flattenVector(allUnresolvedFileNames);
320 std::string defaultExt;
321 for (const auto &unresolvedFileName : flattenedAllUnresolvedFileNames) {
322 try {
323 // Check for an extension.
324 std::filesystem::path path(unresolvedFileName);
325 if (path.has_extension()) {
326 defaultExt = path.extension().string();
327 break;
328 }
329
330 } catch (const std::exception &) {
331 // Safe to ignore? Need a better understanding of the circumstances under
332 // which this throws.
333 }
334 }
335
336 // Cycle through each vector of unresolvedFileNames in allUnresolvedFileNames.
337 // Remember, each vector contains files that are to be added together.
338 for (const auto &unresolvedFileNames : allUnresolvedFileNames) {
339 const auto hasWildCard = [](const std::string &name) { return name.find('*') != std::string::npos; };
340 if (std::any_of(unresolvedFileNames.cbegin(), unresolvedFileNames.cend(), hasWildCard))
341 return "Searching for files by wildcards is not currently supported.";
342
343 // Separate files into two groups: those with explicit extensions and those
344 // that need default extension resolution. resolvedFiles is sized up-front
345 // and indexed by the original position so the input order survives the
346 // batched resolution path below.
347 std::vector<std::string> filesToResolveWithExtension;
348 std::vector<size_t> resolutionIndices;
349 std::vector<std::string> resolvedFiles(unresolvedFileNames.size());
350
351 for (size_t i = 0; i < unresolvedFileNames.size(); ++i) {
352 const auto &unresolvedFileName = unresolvedFileNames[i];
353 bool useDefaultExt;
354
355 try {
356 // Check for an extension.
357 std::filesystem::path path(unresolvedFileName);
358
359 useDefaultExt = !path.has_extension();
360 } catch (const std::exception &) {
361 // Just shove the problematic filename straight into FileProperty and
362 // see if we have any luck.
363 useDefaultExt = false;
364 }
365
366 if (!useDefaultExt) {
367 FileProperty slaveFileProp("Slave", "", FileProperty::Load, m_exts, Direction::Input);
368 std::string error = slaveFileProp.setValue(unresolvedFileName);
369
370 // If an error was returned then pass it along.
371 if (!error.empty()) {
372 throw std::runtime_error(error);
373 }
374
375 resolvedFiles[i] = slaveFileProp();
376 } else {
377 // Collect files that need extension resolution for batch processing
378 filesToResolveWithExtension.emplace_back(unresolvedFileName);
379 resolutionIndices.emplace_back(i);
380 }
381 }
382
383 // Batch resolve files with extension using findRuns for better performance.
384 // When the batch succeeds we get a single call into the archive search,
385 // which lets back-ends like ONCat resolve all runs in one network round
386 // trip. If the batch throws (any one file is missing) we fall back to
387 // per-file findRun so the error names the actually missing file rather
388 // than whichever hint findRuns happened to report first.
389 if (!filesToResolveWithExtension.empty()) {
390 const auto extsToUse = !defaultExt.empty() ? std::vector<std::string>(1, defaultExt) : m_exts;
391 bool batchSucceeded = false;
392 try {
393 auto resolvedPaths = FileFinder::Instance().findRuns(filesToResolveWithExtension, extsToUse);
394 for (size_t i = 0; i < resolvedPaths.size(); ++i) {
395 resolvedFiles[resolutionIndices[i]] = resolvedPaths[i].string();
396 }
397 batchSucceeded = true;
398 } catch (const Exception::NotFoundError &) {
399 // Fall through to per-file resolution below.
400 }
401
402 if (!batchSucceeded) {
403 for (size_t i = 0; i < filesToResolveWithExtension.size(); ++i) {
404 const auto &unresolvedFileName = filesToResolveWithExtension[i];
405 auto run = FileFinder::Instance().findRun(unresolvedFileName, extsToUse);
406 if (run) {
407 resolvedFiles[resolutionIndices[i]] = run.result().string();
408 continue;
409 }
410
411 bool doThrow = !m_allowEmptyTokens;
412 if (m_allowEmptyTokens) {
413 try {
414 if (std::stoi(unresolvedFileName) != 0)
415 doThrow = true;
416 } catch (std::invalid_argument &) {
417 doThrow = true;
418 }
419 }
420 if (doThrow)
421 throw Exception::NotFoundError("Unable to find file:", unresolvedFileName);
422
423 // Empty token allowed: keep the hint as the resolved value so it
424 // surfaces in any downstream error message.
425 resolvedFiles[resolutionIndices[i]] = unresolvedFileName;
426 }
427 }
428 }
429
430 allFullFileNames.emplace_back(std::move(resolvedFiles));
431 }
432
433 PropertyWithValue<std::vector<std::vector<std::string>>>::operator=(allFullFileNames);
434 m_oldPropValue = propValue;
435 m_oldFoundValue = std::move(allFullFileNames);
436 return SUCCESS;
437}
438
439} // namespace Mantid::API
std::string name
Definition Run.cpp:60
double error
#define SUCCESS
A specialized class for dealing with file properties.
std::string setValue(const std::string &propValue) override
Overridden setValue method.
@ OptionalLoad
to specify a file to read but the file doesn't have to exist
@ Load
allowed here which will be passed to the algorithm
A property to allow a user to specify multiple files to load.
unsigned int m_action
The action type of this property Load (dafault) or OptionalLoad are supported.
std::string setValueAsSingleFile(const std::string &propValue)
Called by setValue in the case where a user has disabled multiple file loading.
std::string getDefault() const override
Get the value the property was initialised with -its default value.
bool isOptional() const
Check if this property is optional.
std::string value() const override
Returns the value of the property as a string.
std::string isEmptyValueValid() const
Returns a string depending on whether an empty value is valid.
std::vector< std::string > m_exts
Suggested extensions.
std::string setValueAsMultipleFiles(const std::string &propValue)
Called by setValue in the case where multiple file loading is enabled.
std::vector< std::vector< std::string > > m_oldFoundValue
Last value of the found files used in MultipleFileProperty::setValueAsMultipleFiles and MultipleFileP...
std::string setValue(const std::string &propValue) override
Convert the given propValue into a comma and plus separated list of full filenames,...
MultipleFileProperty(const std::string &name, unsigned int action, const std::vector< std::string > &exts=std::vector< std::string >(), bool allowEmptyTokens=false)
Alternative constructor with action.
std::string m_oldPropValue
Last value of propValue used in MultipleFileProperty::setValueAsMultipleFiles and MultipleFilePropert...
bool m_multiFileLoadingEnabled
Whether or not the user has turned on multifile loading.
bool m_allowEmptyTokens
Whether to allow for empty tokens.
Kernel::MultiFileNameParsing::Parser m_parser
Parser used to parse multi-file strings.
Loads a workspace from a data file.
Definition Load.h:23
Exception for when an item is not found in a collection.
Definition Exception.h:145
The Logger class is in charge of the publishing messages from the framework through various channels.
Definition Logger.h:51
void debug(const std::string &msg)
Logs at debug level.
Definition Logger.cpp:145
void error(const std::string &msg)
Logs at error level.
Definition Logger.cpp:108
void setTrimWhiteSpaces(const bool &setting)
Set the flag for trimming whitespaces in run string.
const std::vector< std::vector< std::string > > & fileNames() const
Return the vector of vectors of parsed file names.
void parse(const std::string &multiFileName)
Parse the given multiFileNameString.
The MultiFileValidator validates a MultiFileProperty, which contains a vector of vectors* of filename...
The concrete, templated class for properties.
std::vector< std::vector< std::string > > m_initialValue
the property's default value which is also its initial value
std::vector< std::vector< std::string > > m_value
The value of the property.
bool autoTrim() const
Returns if the property is set to automatically trim string unput values of whitespace.
Definition Property.cpp:357
const std::string & name() const
Get the property's name.
Definition Property.cpp:63
Kernel::Logger g_log("ExperimentInfo")
static logger object
std::vector< T > flattenVector(const std::vector< std::vector< T > > &v)
A convenience function to "flatten" the given vector of vectors into a single vector.
void toValue(const std::string &strvalue, T &value)
std::string toString(const T &value)
Convert values to strings.
STL namespace.
Describes the direction (within an algorithm) of a Property.
Definition Property.h:50
@ Input
An input workspace.
Definition Property.h:53