21#include <boost/lexical_cast.hpp>
22#include <json/value.h>
32bool containsWildCard(
const std::string &ext) {
return std::string::npos != ext.find(
'*'); }
34std::string
toUpper(
const std::string &src) {
36 std::transform(out.begin(), out.end(), out.begin(), ::toupper);
40std::string
toLower(
const std::string &src) {
42 std::transform(out.begin(), out.end(), out.begin(), ::tolower);
51const std::regex COMMA_OPERATORS(R
"(\s*,\s*(?=\D))");
53bool isASCII(
const std::string &str) {
54 return !std::any_of(str.cbegin(), str.cend(), [](
char c) { return static_cast<unsigned char>(c) > 127; });
59std::vector<std::string> splitOnCommaOperators(
const std::string &input) {
60 std::vector<std::string> tokens;
61 std::sregex_token_iterator end;
62 std::sregex_token_iterator it(input.begin(), input.end(), COMMA_OPERATORS, -1);
63 for (; it != end; ++it)
64 tokens.emplace_back(it->str());
87 std::vector<std::string> expanded;
88 for (
const auto &
group : parser.fileNames())
89 expanded.insert(expanded.end(),
group.cbegin(),
group.cend());
99 if (expanded.size() == 1 && std::filesystem::path(token).has_extension())
115 FrameworkManager::Instance().loadPlugins();
121 setCaseSensitive(Kernel::ConfigService::Instance().getValue<bool>(
"filefinder.casesensitive").value_or(
false));
154 return Kernel::ConfigService::Instance().getFullPath(filename, ignoreDirs,
m_globOption);
169 std::filesystem::path entry(userString);
170 std::string noExt(entry.stem().string());
173 userString.replace(userString.size() - repNumChars, repNumChars,
"");
188 const bool returnDefaultIfNotFound,
189 const std::string &defaultInstrument)
const {
190 if ((!hintstr.empty()) && (!isdigit(hintstr[0]))) {
192 std::string filename = toUpper(std::filesystem::path(hintstr).filename().
string());
195 std::string instrName = Kernel::ConfigService::Instance().findLongestInstrumentPrefix(filename);
198 if (instrName.empty()) {
202 return Kernel::ConfigService::Instance().getInstrument(instrName);
205 if (!returnDefaultIfNotFound)
209 return Kernel::ConfigService::Instance().getInstrument(defaultInstrument);
220 const std::string &defaultInstrument)
const {
233 g_log.
debug() <<
"toInstrumentAndNumber(" << hintstr <<
")\n";
236 if (hintstr.empty()) {
237 throw std::invalid_argument(
"Malformed hint: empty hint");
240 if (isdigit(hintstr[0])) {
243 const auto hintUpper = toUpper(hintstr);
244 std::string instrPart = instr.
name();
245 if (!hintUpper.starts_with(instrPart)) {
247 if (!hintUpper.starts_with(instrPart)) {
248 throw std::invalid_argument(
"Malformed hint: does not start with instrument name or short name");
253 size_t nChars = instrPart.length();
254 while (nChars < hintstr.size() && !std::isdigit(
static_cast<unsigned char>(hintstr[nChars])))
256 if (nChars == hintstr.size())
257 throw std::invalid_argument(
"Malformed hint: no run number found");
258 runPart = hintstr.substr(nChars);
261 unsigned int irunPart(0);
263 irunPart = boost::lexical_cast<unsigned int>(runPart);
264 }
catch (boost::bad_lexical_cast &) {
265 std::ostringstream os;
266 os <<
"Cannot convert '" << runPart <<
"' to run number.";
267 throw std::invalid_argument(os.str());
271 std::string::size_type i = runPart.find_first_not_of(
'0');
273 while (runPart.size() < nZero)
274 runPart.insert(0,
"0");
275 if (runPart.size() > nZero && nZero != 0) {
276 throw std::invalid_argument(
"Run number does not match instrument's zero padding");
279 return std::make_pair(instr.
filePrefix(irunPart), runPart);
299 if (!isdigit(hintstr[0])) {
301 std::string hintUpper = toUpper(hintstr);
302 std::string shortName = toUpper(instrument.
shortName());
303 std::string
name = toUpper(instrument.
name());
304 if (hintUpper.rfind(shortName, 0) != 0 && hintUpper.rfind(
name, 0) != 0) {
307 }
catch (
const std::exception &ex) {
308 g_log.
debug() <<
"Failed to resolve instrument from hint '" << hintstr <<
"' in makeFileName: " << ex.what();
312 std::string filename(hintstr);
314 const std::string shortName = instrToUse.
shortName();
315 std::string delimiter = instrToUse.
delimiter();
318 if (filename.substr(0, shortName.size()) == shortName) {
319 filename = filename.substr(shortName.size());
320 if ((!delimiter.empty()) && (filename.substr(0, delimiter.size()) == delimiter))
321 filename = filename.substr(delimiter.size());
323 filename = shortName + filename;
329 filename = instrumentName + delimiter + runNumber + suffix;
343 g_log.
debug() <<
"getExtension(" << filename <<
", exts[" << exts.size() <<
"])\n";
346 for (
const auto &ext : exts) {
347 std::string extension = toUpper(ext);
348 if (extension.rfind(
'*') == extension.size() - 1)
350 extension.resize(extension.rfind(
'*'));
353 std::size_t found = toUpper(filename).rfind(extension);
354 if (found != std::string::npos) {
355 g_log.
debug() <<
"matched extension \"" << extension <<
"\" based on \"" << ext <<
"\"\n";
356 return filename.substr(found);
360 g_log.
debug() <<
"Failed to find extension. Just using last \'.\'\n";
361 std::size_t pos = filename.find_last_of(
'.');
362 if (pos != std::string::npos) {
363 return filename.substr(pos);
376bool shouldUseArchiveForFacility(
const std::string &archiveOpt,
const std::string &facilityName) {
377 if (archiveOpt ==
"all")
379 if (archiveOpt ==
"on")
380 return facilityName == toLower(Mantid::Kernel::ConfigService::Instance().getString(
"default.facility"));
381 return archiveOpt.find(facilityName) != std::string::npos;
386 const auto archiveOpt = toLower(Kernel::ConfigService::Instance().getString(
"datasearch.searcharchive"));
387 if (archiveOpt.empty() || archiveOpt ==
"off" || facility.
archiveSearch().empty())
389 if (!shouldUseArchiveForFacility(archiveOpt, toLower(facility.
name())))
392 std::vector<IArchiveSearch_sptr> archs;
395 g_log.
debug() <<
"get archive search for the facility..." <<
name <<
"\n";
396 archs.emplace_back(ArchiveSearchFactory::Instance().create(
name));
413 const std::vector<std::string> &extensionsProvided,
414 const bool useOnlyExtensionsProvided)
const {
423 .found = !filePath.empty(),
425 .instr = std::make_shared<Kernel::InstrumentInfo>(this->
getInstrument(hintsStr,
true))};
427 std::vector<FileInfo> fileInfos{fileInfo};
429 const auto &resolvedFileInfo = fileInfos[0];
431 if (resolvedFileInfo.found)
434 g_log.
debug() <<
"Failed to find file for hint: " << hintstr <<
"\n";
435 if (resolvedFileInfo.error)
436 g_log.
debug() <<
"Error message: " << resolvedFileInfo.errorMsg <<
"\n";
438 std::filesystem::path(), resolvedFileInfo.errorMsg.empty() ?
"Not found." : resolvedFileInfo.errorMsg);
450 std::vector<std::string> &uniqueExts)
const {
452 for (
const auto &ext : extensionsToAdd) {
453 const auto normalized = isCaseSensitive ? ext : toLower(ext);
454 if (std::find(uniqueExts.begin(), uniqueExts.end(), normalized) == uniqueExts.end())
455 uniqueExts.emplace_back(normalized);
466 if (!isASCII(searchText))
467 return "An unsupported non-ASCII character was found in the search text.";
489 if (token.find(
'/') != std::string::npos || token.find(
'\\') != std::string::npos ||
490 token.find(
ALLOWED_SUFFIX) != std::string::npos || std::filesystem::path(token).has_extension())
495 return parts.
count() >= 2;
524 const std::vector<std::string> &extensionsProvided,
525 const bool useOnlyExtensionsProvided)
const {
528 throw std::invalid_argument(
error);
538 std::vector<std::string> hints;
539 for (
const auto &token : tokens) {
541 const auto expanded = expandHint(token, parser);
542 hints.insert(hints.end(), expanded.cbegin(), expanded.cend());
543 }
catch (
const std::range_error &re) {
546 throw std::invalid_argument(re.what());
547 }
catch (
const std::exception &) {
553 throw std::invalid_argument(
"Malformed range of runs: " + token);
554 hints.push_back(token);
558 return findRuns(hints, extensionsProvided, useOnlyExtensionsProvided);
562 bool useOnlyExtensionsProvided)
const {
570 const std::vector<std::string> facilityExtensions = facility.
extensions();
576 std::filesystem::path filePath(fileInfo.
hint);
577 const auto extension = filePath.extension();
578 std::string filename = filePath.replace_extension().string();
580 if (filePath.parent_path().empty()) {
585 }
catch (std::invalid_argument &) {
586 if (filename.length() >= fileInfo.
hint.length()) {
587 g_log.
information() <<
"Could not form filename from standard rules '" << filename <<
"'\n";
592 if (filename.empty()) {
593 g_log.
warning() <<
"Unable to determine filename for hint '" << fileInfo.
hint <<
"\n";
594 fileInfo.
error =
true;
595 fileInfo.
errorMsg =
"Unable to determine filename from hint.";
599 g_log.
debug() <<
"filename to search for: " << filename <<
" with extension: " << extension <<
"\n";
605 fileInfo.
filenames.insert(toUpper(filename));
606 fileInfo.
filenames.insert(toLower(filename));
611 fileInfo.
extensionsToSearch.reserve(1 + extensionsProvided.size() + facilityExtensions.size());
613 if (useOnlyExtensionsProvided) {
618 if (!extension.empty())
629 const std::vector<std::string> &extensionsProvided,
630 bool useOnlyExtensionsProvided)
const {
631 for (
auto &fileInfo : fileInfos)
632 prepareFileInfo(fileInfo, extensionsProvided, useOnlyExtensionsProvided);
640 const std::vector<std::string> &extensionsProvided,
641 const bool useOnlyExtensionsProvided)
const {
645 for (
const auto &hint : hints) {
648 throw std::invalid_argument(
error);
651 std::vector<FileInfo> fileInfos;
652 fileInfos.reserve(hints.size());
653 std::shared_ptr<Kernel::InstrumentInfo> cachedInstr;
654 for (
const auto &hint : hints) {
656 if (!filePath.empty()) {
657 fileInfos.emplace_back(
FileInfo{.
hint = hint, .found =
true, .path = filePath});
660 cachedInstr = std::make_shared<Kernel::InstrumentInfo>(
661 this->
getInstrument(hint,
true, cachedInstr ? cachedInstr->shortName() : std::string()));
662 fileInfos.emplace_back(
FileInfo{.
hint = hint, .instr = cachedInstr});
667 std::vector<std::filesystem::path> res;
668 res.reserve(fileInfos.size());
669 for (
const auto &fileInfo : fileInfos) {
670 if (!fileInfo.found) {
672 g_log.
warning() <<
"Error while searching for '" << fileInfo.hint <<
"': " << fileInfo.errorMsg <<
"\n";
674 g_log.
warning() <<
"Failed to find file for hint '" << fileInfo.hint <<
"'\n";
677 if (fileInfo.error) {
678 g_log.
debug() <<
"Non-fatal error during search for '" << fileInfo.hint <<
"': " << fileInfo.errorMsg <<
"\n";
680 res.emplace_back(fileInfo.path);
693 const std::vector<std::string> &searchPaths = Kernel::ConfigService::Instance().getDataSearchDirs();
695 for (
auto &fileInfo : fileInfos) {
696 if (fileInfo.found || fileInfo.error)
699 std::vector<std::string> extensions;
700 extensions.assign(fileInfo.extensionsToSearch.begin(), fileInfo.extensionsToSearch.end());
703 extensions.erase(std::remove_if(extensions.begin(), extensions.end(), containsWildCard), extensions.end());
709 for (
const auto &extension : extensions) {
710 for (
const auto &filename : fileInfo.filenames) {
711 for (
const auto &searchPath : searchPaths) {
712 const auto filePath = std::filesystem::path(searchPath) / (filename + extension);
713 if (std::filesystem::exists(filePath, ec)) {
714 fileInfo.found =
true;
715 fileInfo.path = filePath;
727 for (
const auto &extension : extensions) {
728 for (
const auto &filename : fileInfo.filenames) {
729 const auto filepath =
getFullPath(filename + extension);
730 if (!filepath.empty() && std::filesystem::exists(filepath, ec)) {
731 g_log.
debug() <<
"path returned from getFullPath() = " << filepath <<
'\n';
732 fileInfo.found =
true;
733 fileInfo.path = filepath;
745 std::filesystem::path cachePathToSearch(Kernel::ConfigService::Instance().getString(
"datacachesearch.directory"));
747 if (std::filesystem::exists(cachePathToSearch)) {
748 for (
auto &fileInfo : fileInfos) {
749 if (fileInfo.found || fileInfo.error)
756 g_log.
debug() <<
"Found file in data cache: " << cacheFilePath.result() <<
"\n";
757 fileInfo.found =
true;
758 fileInfo.path = cacheFilePath.result();
760 fileInfo.errorMsg = cacheFilePath.errors();
764 g_log.
debug() <<
"Data cache directory not found, proceeding with the search."
775 const auto isUnfound = [](
const auto &fi) {
return !fi.found && !fi.error; };
776 const auto first = std::find_if(fileInfos.cbegin(), fileInfos.cend(), isUnfound);
777 if (first == fileInfos.cend() || first->archs.size() != 1)
779 const auto &arch = first->archs[0];
780 if (!arch || !arch->supportsMultipleHints())
783 for (
auto it = std::next(first); it != fileInfos.cend(); ++it) {
786 if (it->archs.size() != 1 || *it->instr != refInstr)
793 if (fileInfos.empty())
803 const auto stillUnfound =
804 std::any_of(fileInfos.cbegin(), fileInfos.cend(), [](
const auto &fi) { return !fi.found && !fi.error; });
814 g_log.
debug() <<
"performArchiveSearch: batching unfound hints through a single archive call\n";
818 std::vector<std::string> hints;
819 for (
const auto &fileInfo : fileInfos) {
820 if (fileInfo.found || fileInfo.error)
822 hints.push_back(*fileInfo.filenames.cbegin());
825 const auto archivePaths = sharedArch->getArchivePaths(hints);
827 g_log.
error() <<
"Archive search failed: " << archivePaths.errors() <<
"\n";
830 if (archivePaths.result().size() != hints.size()) {
831 g_log.
error() <<
"Archive search returned a different number of paths than hints. Expected " << hints.size()
832 <<
" but got " << archivePaths.result().size() <<
".\n";
838 const auto &paths = archivePaths.result();
840 for (
auto &fileInfo : fileInfos) {
841 if (fileInfo.found || fileInfo.error)
843 const auto &archivePath = paths[
index++];
845 if (std::filesystem::exists(archivePath)) {
846 fileInfo.found =
true;
847 fileInfo.path = archivePath;
849 }
catch (std::exception &e) {
850 g_log.
error() <<
"Cannot open file " << archivePath <<
": " << e.what() <<
'\n';
851 fileInfo.error =
true;
852 fileInfo.errorMsg =
"Cannot open file from archive: " + std::string(e.what());
861 std::filesystem::path lastFoundDir;
862 std::string lastFoundExt;
864 const auto tryLastFoundShortcut = [&](
FileInfo &fileInfo) {
865 if (lastFoundDir.empty() || lastFoundExt.empty())
868 for (
const auto &filename : fileInfo.filenames) {
869 const auto candidate = lastFoundDir / (filename + lastFoundExt);
870 if (std::filesystem::exists(candidate)) {
871 fileInfo.found =
true;
872 fileInfo.path = candidate;
877 lastFoundDir.clear();
878 lastFoundExt.clear();
883 for (
auto &fileInfo : fileInfos) {
884 if (fileInfo.found || fileInfo.error)
887 if (tryLastFoundShortcut(fileInfo)) {
892 if (!fileInfo.archs.empty()) {
893 g_log.
debug() <<
"Search the archives for file: " << fileInfo.hint <<
"\n";
894 const auto archivePath =
getArchivePath(fileInfo.archs, fileInfo.filenames, fileInfo.extensionsToSearch);
897 if (std::filesystem::exists(archivePath.result())) {
898 fileInfo.found =
true;
899 fileInfo.path = archivePath.result();
901 }
catch (std::exception &e) {
902 g_log.
error() <<
"Cannot open file " << archivePath <<
": " << e.what() <<
'\n';
903 fileInfo.error =
true;
904 fileInfo.errorMsg =
"Cannot open file from archive: " + std::string(e.what());
909 if (fileInfo.found) {
910 lastFoundDir = fileInfo.path.parent_path();
911 lastFoundExt = fileInfo.path.extension().string();
918 const std::set<std::string> &hintstrs,
919 const std::vector<std::string> &exts)
const {
923 for (
const auto &hint : hintstrs) {
924 std::filesystem::path parentDir;
927 parentDir = dataCache.getFileParentDirectoryPath(hint);
928 }
catch (
const std::invalid_argument &e) {
929 errors +=
"Data cache: " + std::string(e.what());
931 }
catch (
const Json::Exception &e) {
932 errors +=
"Data cache: Failed parsing to JSON: " + std::string(e.what()) +
933 "Error likely due to accessing instrument index file while it was being updated on IDAaaS.";
937 if (!std::filesystem::exists(parentDir)) {
938 errors +=
"Data cache: Directory not found: " + parentDir.string();
942 for (
const auto &ext : exts) {
943 const auto filePath = parentDir / (hint + ext);
945 if (std::filesystem::exists(filePath)) {
948 }
catch (
const std::filesystem::filesystem_error &e) {
949 errors +=
"Data cache: " + std::string(e.what());
953 errors +=
"Data cache: " + hint +
" not found in " + parentDir.string();
969 const std::set<std::string> &hintstrs,
970 const std::vector<std::string> &exts)
const {
971 g_log.
debug() <<
"getArchivePath([IArchiveSearch_sptr], [ ";
972 for (
const auto &iter : hintstrs)
975 for (
const auto &iter : exts)
980 for (
const auto &arch : archs) {
982 g_log.
debug() <<
"Getting archive path for requested files\n";
983 auto path = arch->getArchivePath(hintstrs, exts);
987 errors += path.errors();
995 if (!std::filesystem::path(hint).has_extension())
1000 g_log.
debug() <<
"found path = " << path <<
'\n';
std::map< DeltaEMode::Type, std::string > index
std::pair< std::string, std::string > toInstrumentAndNumber(const std::string &hintstr, const std::string &defaultInstrument="") const
Extracts the instrument name and run number from a hint.
std::string validateRuns(const std::string &searchText) const
A method that returns error messages if the provided runs are invalid.
static std::vector< IArchiveSearch_sptr > getArchiveSearch(const Kernel::FacilityInfo &facility)
void performBatchedArchiveSearch(std::vector< FileInfo > &fileInfos, const IArchiveSearch_sptr &sharedArch) const
void prepareFileInfo(FileInfo &fileInfo, const std::vector< std::string > &extensionsProvided, bool useOnlyExtensionsProvided) const
bool getCaseSensitive() const
Option to get if file finder should be case sensitive.
std::string getExtension(const std::string &filename, const std::vector< std::string > &exts) const
DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
void performCacheSearch(std::vector< FileInfo > &fileInfos) const
int m_globOption
glob option - set to case sensitive or insensitive
void performFileSearch(std::vector< FileInfo > &fileInfos) const
std::string extractAllowedSuffix(std::string &userString) const
Run numbers can be followed by an allowed string.
const Kernel::InstrumentInfo getInstrument(const std::string &hintstr, const bool returnDefaultIfNotFound=true, const std::string &defaultInstrument="") const
DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
const API::Result< std::filesystem::path > findRun(const std::string &hintstr, const std::vector< std::string > &exts={}, const bool useExtsOnly=false) const
Find a path to a single file from a hint.
static const std::string ALLOWED_SUFFIX
a string that is allowed at the end of any run number
std::filesystem::path getFullPath(const std::string &filename, const bool ignoreDirs=false) const
Return the full path to the file given its name.
FileFinderImpl()
Default constructor.
void getUniqueExtensions(const std::vector< std::string > &extensionsToAdd, std::vector< std::string > &uniqueExts) const
Given a set of already determined extensions and new extensions, create a set of all extensions.
const API::Result< std::filesystem::path > getArchivePath(const std::vector< IArchiveSearch_sptr > &archs, const std::set< std::string > &hintstrs, const std::vector< std::string > &exts) const
Return the path to the file found in archive.
void performPerFileArchiveSearch(std::vector< FileInfo > &fileInfos) const
static IArchiveSearch_sptr batchableArchive(const std::vector< FileInfo > &fileInfos)
If every unfound FileInfo shares a single archive (and instrument) that supports batched multi-hint l...
void performArchiveSearch(std::vector< FileInfo > &fileInfos) const
void setCaseSensitive(const bool cs)
Option to set if file finder should be case sensitive.
std::filesystem::path tryResolvePathWithExtension(const std::string &filename) const
If the hint already carries an extension, return its resolved full path in the data search dirs (or e...
const API::Result< std::filesystem::path > getISISInstrumentDataCachePath(const std::filesystem::path &cacheDir, const std::set< std::string > &hintstrs, const std::vector< std::string > &exts) const
bool isMalformedRange(const std::string &token) const
Decide whether a token the parser could not expand is a malformed run range rather than a literal fil...
void processFileInfos(std::vector< FileInfo > &fileInfos, const std::vector< std::string > &extensionsProvided, bool useOnlyExtensionsProvided) const
std::vector< std::filesystem::path > findRuns(const std::string &hintstr, const std::vector< std::string > &exts={}, const bool useExtsOnly=false) const
Find a list of files from a comma- and range-separated hint string.
std::string makeFileName(const std::string &hintstr, const Kernel::InstrumentInfo &instrument) const
DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
Exception for when an item is not found in a collection.
const char * what() const noexcept override
Writes out the range and limits.
A class that holds information about a facility.
const std::vector< std::string > & archiveSearch() const
Return the archive search interface names.
const std::vector< std::string > & extensions() const
Returns a list of file extensions.
bool noFilePrefix() const
Returns a bool indicating whether prefix is required in file names.
const std::string & name() const
Return the name of the facility.
static constexpr int GLOB_DEFAULT
Glob option constants (compatible with Poco::Glob)
static constexpr int GLOB_CASELESS
A class that holds information about an instrument.
const std::string & shortName() const
Return the short name of the instrument.
std::string filePrefix(unsigned int runNumber) const
Returns file prefix for this instrument and a run number.
const std::string & delimiter() const
Returns the default delimiter between instrument name and run number.
const std::string & name() const
Return the name of the instrument.
int zeroPadding(unsigned int runNumber) const
Returns zero padding for this instrument and a run number.
The Logger class is in charge of the publishing messages from the framework through various channels.
void debug(const std::string &msg)
Logs at debug level.
void error(const std::string &msg)
Logs at error level.
void warning(const std::string &msg)
Logs at warning level.
void information(const std::string &msg)
Logs at information level.
This class takes a string representing multiple files and parses it into a vector of vectors of file ...
void setTrimWhiteSpaces(const bool &setting)
Set the flag for trimming whitespaces in run string.
void parse(const std::string &multiFileName)
Parse the given multiFileNameString.
@ TOK_IGNORE_EMPTY
ignore empty tokens
@ TOK_TRIM
remove leading and trailing whitespace from tokens
std::size_t count() const
Get the total number of tokens.
std::shared_ptr< IArchiveSearch > IArchiveSearch_sptr
Typedef for a shared pointer to an IArchiveSearch.
Kernel::Logger g_log("ExperimentInfo")
static logger object
Kernel::Logger g_log("DetermineSpinStateOrder")
MANTID_KERNEL_DLL std::string toLower(const std::string &input)
Converts string to all lowercase.
MANTID_KERNEL_DLL std::string strip(const std::string &A)
strip pre/post spaces
MANTID_KERNEL_DLL std::string toUpper(const std::string &input)
Converts string to all uppercase.
Per-hint state threaded through the file-search pipeline.
std::vector< Mantid::API::IArchiveSearch_sptr > archs
std::shared_ptr< Mantid::Kernel::InstrumentInfo > instr
std::vector< std::string > extensionsToSearch
std::set< std::string > filenames