Mantid
Loading...
Searching...
No Matches
FileFinder.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
15#include "MantidKernel/Glob.h"
20
21#include <boost/lexical_cast.hpp>
22#include <json/value.h>
23
24#include <algorithm>
25#include <cctype>
26#include <filesystem>
27#include <regex>
28
29namespace {
30Mantid::Kernel::Logger g_log("FileFinder");
31
32bool containsWildCard(const std::string &ext) { return std::string::npos != ext.find('*'); }
33
34std::string toUpper(const std::string &src) {
35 std::string out(src);
36 std::transform(out.begin(), out.end(), out.begin(), ::toupper);
37 return out;
38}
39
40std::string toLower(const std::string &src) {
41 std::string out(src);
42 std::transform(out.begin(), out.end(), out.begin(), ::tolower);
43 return out;
44}
45
46// Commas that act as separators between tokens: any character followed by a
47// comma followed by a non-digit. Digit→digit commas (e.g. "15196,15197") are
48// left for the MultiFileNameParsing::Parser to handle as run-number lists.
49// Mirrors MultipleFileProperty's REGEX_COMMA_OPERATORS without the redundant
50// left-side lookbehind (which std::regex does not support).
51const std::regex COMMA_OPERATORS(R"(\s*,\s*(?=\D))");
52
53bool isASCII(const std::string &str) {
54 return !std::any_of(str.cbegin(), str.cend(), [](char c) { return static_cast<unsigned char>(c) > 127; });
55}
56
59std::vector<std::string> splitOnCommaOperators(const std::string &input) {
60 std::vector<std::string> tokens;
61 std::sregex_token_iterator end;
62 std::sregex_token_iterator it(input.begin(), input.end(), COMMA_OPERATORS, -1);
63 for (; it != end; ++it)
64 tokens.emplace_back(it->str());
65 return tokens;
66}
67
81std::vector<std::string> expandHint(const std::string &token, Mantid::Kernel::MultiFileNameParsing::Parser &parser) {
82 if (token.empty())
83 return {};
84
85 parser.parse(token);
86
87 std::vector<std::string> expanded;
88 for (const auto &group : parser.fileNames())
89 expanded.insert(expanded.end(), group.cbegin(), group.cend());
90
91 // Parser produced nothing — fall back to the literal token.
92 if (expanded.empty())
93 return {token};
94
95 // Single-file token with an explicit extension: prefer the user's exact
96 // filename, as the parser may have applied zero-padding that won't match
97 // the on-disk filename. Ranges/lists (size > 1) keep the parser's output
98 // since expansion was the whole point of the token.
99 if (expanded.size() == 1 && std::filesystem::path(token).has_extension())
100 return {token};
101
102 return expanded;
103}
104
105} // namespace
106
107namespace Mantid::API {
108
109// this allowed string could be made into an array of allowed, currently used
110// only by the ISIS SANS group
111const std::string FileFinderImpl::ALLOWED_SUFFIX = "-add";
112
114 // Make sure plugins are loaded
115 FrameworkManager::Instance().loadPlugins();
116
117// determine from Mantid property how sensitive Mantid should be
118#ifdef _WIN32
120#else
121 setCaseSensitive(Kernel::ConfigService::Instance().getValue<bool>("filefinder.casesensitive").value_or(false));
122#endif
123}
124
135
142
153std::filesystem::path FileFinderImpl::getFullPath(const std::string &filename, const bool ignoreDirs) const {
154 return Kernel::ConfigService::Instance().getFullPath(filename, ignoreDirs, m_globOption);
155}
156
162std::string FileFinderImpl::extractAllowedSuffix(std::string &userString) const {
163 if (userString.find(ALLOWED_SUFFIX) == std::string::npos) {
164 // short cut processing as normally there is no suffix
165 return "";
166 }
167
168 // ignore any file extension in checking if a suffix is present
169 std::filesystem::path entry(userString);
170 std::string noExt(entry.stem().string());
171 const size_t repNumChars = ALLOWED_SUFFIX.size();
172 if (noExt.find(ALLOWED_SUFFIX) == noExt.size() - repNumChars) {
173 userString.replace(userString.size() - repNumChars, repNumChars, "");
174 return ALLOWED_SUFFIX;
175 }
176 return "";
177}
178
188 const bool returnDefaultIfNotFound,
189 const std::string &defaultInstrument) const {
190 if ((!hintstr.empty()) && (!isdigit(hintstr[0]))) {
191 // If hint contains path components, use only the filename part for instrument detection
192 std::string filename = toUpper(std::filesystem::path(hintstr).filename().string());
193
194 try {
195 std::string instrName = Kernel::ConfigService::Instance().findLongestInstrumentPrefix(filename);
196
197 // if still empty, throw not found
198 if (instrName.empty()) {
199 throw Kernel::Exception::NotFoundError("Instrument not found", hintstr);
200 }
201
202 return Kernel::ConfigService::Instance().getInstrument(instrName);
204 g_log.debug() << e.what() << "\n";
205 if (!returnDefaultIfNotFound)
206 throw;
207 }
208 }
209 return Kernel::ConfigService::Instance().getInstrument(defaultInstrument);
210}
211
219std::pair<std::string, std::string> FileFinderImpl::toInstrumentAndNumber(const std::string &hintstr,
220 const std::string &defaultInstrument) const {
221 Kernel::InstrumentInfo instr = this->getInstrument(hintstr, true, defaultInstrument);
222 return toInstrumentAndNumber(hintstr, instr);
223}
224
231std::pair<std::string, std::string> FileFinderImpl::toInstrumentAndNumber(const std::string &hintstr,
232 const Kernel::InstrumentInfo &instr) const {
233 g_log.debug() << "toInstrumentAndNumber(" << hintstr << ")\n";
234 std::string runPart;
235
236 if (hintstr.empty()) {
237 throw std::invalid_argument("Malformed hint: empty hint");
238 }
239
240 if (isdigit(hintstr[0])) {
241 runPart = hintstr;
242 } else {
243 const auto hintUpper = toUpper(hintstr);
244 std::string instrPart = instr.name();
245 if (!hintUpper.starts_with(instrPart)) {
246 instrPart = instr.shortName();
247 if (!hintUpper.starts_with(instrPart)) {
248 throw std::invalid_argument("Malformed hint: does not start with instrument name or short name");
249 }
250 }
251
252 // need to advance to the first digit after the instrument name to handle underscores, etc.
253 size_t nChars = instrPart.length();
254 while (nChars < hintstr.size() && !std::isdigit(static_cast<unsigned char>(hintstr[nChars])))
255 ++nChars;
256 if (nChars == hintstr.size())
257 throw std::invalid_argument("Malformed hint: no run number found");
258 runPart = hintstr.substr(nChars);
259 }
260
261 unsigned int irunPart(0);
262 try {
263 irunPart = boost::lexical_cast<unsigned int>(runPart);
264 } catch (boost::bad_lexical_cast &) {
265 std::ostringstream os;
266 os << "Cannot convert '" << runPart << "' to run number.";
267 throw std::invalid_argument(os.str());
268 }
269 size_t nZero = instr.zeroPadding(irunPart);
270 // remove any leading zeros in case there are too many of them
271 std::string::size_type i = runPart.find_first_not_of('0');
272 runPart.erase(0, i);
273 while (runPart.size() < nZero)
274 runPart.insert(0, "0");
275 if (runPart.size() > nZero && nZero != 0) {
276 throw std::invalid_argument("Run number does not match instrument's zero padding");
277 }
278
279 return std::make_pair(instr.filePrefix(irunPart), runPart);
280}
281
294std::string FileFinderImpl::makeFileName(const std::string &hintstr, const Kernel::InstrumentInfo &instrument) const {
295 if (hintstr.empty())
296 return "";
297
298 Kernel::InstrumentInfo instrToUse = instrument;
299 if (!isdigit(hintstr[0])) {
300 try {
301 std::string hintUpper = toUpper(hintstr);
302 std::string shortName = toUpper(instrument.shortName());
303 std::string name = toUpper(instrument.name());
304 if (hintUpper.rfind(shortName, 0) != 0 && hintUpper.rfind(name, 0) != 0) {
305 instrToUse = getInstrument(hintstr, false);
306 }
307 } catch (const std::exception &ex) {
308 g_log.debug() << "Failed to resolve instrument from hint '" << hintstr << "' in makeFileName: " << ex.what();
309 }
310 }
311
312 std::string filename(hintstr);
313 const std::string suffix = extractAllowedSuffix(filename);
314 const std::string shortName = instrToUse.shortName();
315 std::string delimiter = instrToUse.delimiter();
316
317 // see if starts with the provided instrument name
318 if (filename.substr(0, shortName.size()) == shortName) {
319 filename = filename.substr(shortName.size());
320 if ((!delimiter.empty()) && (filename.substr(0, delimiter.size()) == delimiter))
321 filename = filename.substr(delimiter.size());
322
323 filename = shortName + filename;
324 }
325
326 auto [instrumentName, runNumber] = toInstrumentAndNumber(filename, instrToUse);
327
328 // delimiter and suffix might be empty strings
329 filename = instrumentName + delimiter + runNumber + suffix;
330 return filename;
331}
332
342std::string FileFinderImpl::getExtension(const std::string &filename, const std::vector<std::string> &exts) const {
343 g_log.debug() << "getExtension(" << filename << ", exts[" << exts.size() << "])\n";
344
345 // go through the list of supplied extensions
346 for (const auto &ext : exts) {
347 std::string extension = toUpper(ext);
348 if (extension.rfind('*') == extension.size() - 1) // there is a wildcard at play
349 {
350 extension.resize(extension.rfind('*'));
351 }
352
353 std::size_t found = toUpper(filename).rfind(extension);
354 if (found != std::string::npos) {
355 g_log.debug() << "matched extension \"" << extension << "\" based on \"" << ext << "\"\n";
356 return filename.substr(found); // grab the actual extensions found
357 }
358 }
359
360 g_log.debug() << "Failed to find extension. Just using last \'.\'\n";
361 std::size_t pos = filename.find_last_of('.');
362 if (pos != std::string::npos) {
363 return filename.substr(pos);
364 }
365
366 // couldn't find an extension
367 return "";
368}
369
370namespace {
376bool shouldUseArchiveForFacility(const std::string &archiveOpt, const std::string &facilityName) {
377 if (archiveOpt == "all")
378 return true;
379 if (archiveOpt == "on")
380 return facilityName == toLower(Mantid::Kernel::ConfigService::Instance().getString("default.facility"));
381 return archiveOpt.find(facilityName) != std::string::npos;
382}
383} // namespace
384
385std::vector<IArchiveSearch_sptr> FileFinderImpl::getArchiveSearch(const Kernel::FacilityInfo &facility) {
386 const auto archiveOpt = toLower(Kernel::ConfigService::Instance().getString("datasearch.searcharchive"));
387 if (archiveOpt.empty() || archiveOpt == "off" || facility.archiveSearch().empty())
388 return {};
389 if (!shouldUseArchiveForFacility(archiveOpt, toLower(facility.name())))
390 return {};
391
392 std::vector<IArchiveSearch_sptr> archs;
393 archs.reserve(facility.archiveSearch().size());
394 for (const auto &name : facility.archiveSearch()) {
395 g_log.debug() << "get archive search for the facility..." << name << "\n";
396 archs.emplace_back(ArchiveSearchFactory::Instance().create(name));
397 }
398 return archs;
399}
400
413 const std::vector<std::string> &extensionsProvided,
414 const bool useOnlyExtensionsProvided) const {
415
416 auto const error = validateRuns(hintstr);
417 if (!error.empty())
418 return API::Result<std::filesystem::path>(std::filesystem::path(), error);
419
420 std::string hintsStr = Kernel::Strings::strip(hintstr);
421 auto filePath = tryResolvePathWithExtension(hintsStr);
422 auto fileInfo = FileInfo{.hint = hintsStr,
423 .found = !filePath.empty(),
424 .path = filePath,
425 .instr = std::make_shared<Kernel::InstrumentInfo>(this->getInstrument(hintsStr, true))};
426
427 std::vector<FileInfo> fileInfos{fileInfo};
428 processFileInfos(fileInfos, extensionsProvided, useOnlyExtensionsProvided);
429 const auto &resolvedFileInfo = fileInfos[0];
430
431 if (resolvedFileInfo.found)
432 return API::Result<std::filesystem::path>(resolvedFileInfo.path);
433
434 g_log.debug() << "Failed to find file for hint: " << hintstr << "\n";
435 if (resolvedFileInfo.error)
436 g_log.debug() << "Error message: " << resolvedFileInfo.errorMsg << "\n";
438 std::filesystem::path(), resolvedFileInfo.errorMsg.empty() ? "Not found." : resolvedFileInfo.errorMsg);
439}
440
449void FileFinderImpl::getUniqueExtensions(const std::vector<std::string> &extensionsToAdd,
450 std::vector<std::string> &uniqueExts) const {
451 const bool isCaseSensitive = getCaseSensitive();
452 for (const auto &ext : extensionsToAdd) {
453 const auto normalized = isCaseSensitive ? ext : toLower(ext);
454 if (std::find(uniqueExts.begin(), uniqueExts.end(), normalized) == uniqueExts.end())
455 uniqueExts.emplace_back(normalized);
456 }
457}
458
465std::string FileFinderImpl::validateRuns(const std::string &searchText) const {
466 if (!isASCII(searchText))
467 return "An unsupported non-ASCII character was found in the search text.";
468 return "";
469}
470
488bool FileFinderImpl::isMalformedRange(const std::string &token) const {
489 if (token.find('/') != std::string::npos || token.find('\\') != std::string::npos ||
490 token.find(ALLOWED_SUFFIX) != std::string::npos || std::filesystem::path(token).has_extension())
491 return false;
492
493 const Kernel::StringTokenizer parts(token, "-",
495 return parts.count() >= 2;
496}
497
523std::vector<std::filesystem::path> FileFinderImpl::findRuns(const std::string &hintstr,
524 const std::vector<std::string> &extensionsProvided,
525 const bool useOnlyExtensionsProvided) const {
526 auto const error = validateRuns(hintstr);
527 if (!error.empty())
528 throw std::invalid_argument(error);
529
530 // Pre-split on comma operators (any-char→non-digit commas), mirroring
531 // MultipleFileProperty. Digit→digit commas (e.g. "INST15196,15197") stay
532 // intact so the parser can expand them as run-number lists.
533 const auto tokens = splitOnCommaOperators(Kernel::Strings::strip(hintstr));
534
536 parser.setTrimWhiteSpaces(true);
537
538 std::vector<std::string> hints;
539 for (const auto &token : tokens) {
540 try {
541 const auto expanded = expandHint(token, parser);
542 hints.insert(hints.end(), expanded.cbegin(), expanded.cend());
543 } catch (const std::range_error &re) {
544 // The parser refused this range as too large. Surface as invalid_argument
545 // so callers see a validation error rather than a NotFoundError.
546 throw std::invalid_argument(re.what());
547 } catch (const std::exception &) {
548 // The parser could not interpret the token. If it looks like a malformed
549 // run range, report that explicitly (so the user can rule out a genuine
550 // file-not-found); otherwise pass it through as a literal file hint to be
551 // resolved, or surfaced as a NotFoundError, downstream.
552 if (isMalformedRange(token))
553 throw std::invalid_argument("Malformed range of runs: " + token);
554 hints.push_back(token);
555 }
556 }
557
558 return findRuns(hints, extensionsProvided, useOnlyExtensionsProvided);
559}
560
561void FileFinderImpl::prepareFileInfo(FileInfo &fileInfo, const std::vector<std::string> &extensionsProvided,
562 bool useOnlyExtensionsProvided) const {
563 if (fileInfo.found || fileInfo.error)
564 return;
565
566 g_log.debug() << " " << fileInfo.hint << " instrument: " << (fileInfo.instr ? fileInfo.instr->name() : "null")
567 << "\n";
568
569 const Kernel::FacilityInfo &facility = fileInfo.instr->facility();
570 const std::vector<std::string> facilityExtensions = facility.extensions();
571
572 // NB: std::filesystem::path::extension() only returns the *final* extension,
573 // so a hint like "INST_123.nxs.h5" is split into filename "INST_123.nxs" and
574 // extension ".h5". This matches how such double-extension files are searched
575 // for on disk (the leading ".nxs" stays part of the stem).
576 std::filesystem::path filePath(fileInfo.hint);
577 const auto extension = filePath.extension();
578 std::string filename = filePath.replace_extension().string();
579
580 if (filePath.parent_path().empty()) {
581 try {
582 if (!facility.noFilePrefix()) {
583 filename = makeFileName(filename, *fileInfo.instr);
584 }
585 } catch (std::invalid_argument &) {
586 if (filename.length() >= fileInfo.hint.length()) {
587 g_log.information() << "Could not form filename from standard rules '" << filename << "'\n";
588 }
589 }
590 }
591
592 if (filename.empty()) {
593 g_log.warning() << "Unable to determine filename for hint '" << fileInfo.hint << "\n";
594 fileInfo.error = true;
595 fileInfo.errorMsg = "Unable to determine filename from hint.";
596 return;
597 }
598
599 g_log.debug() << "filename to search for: " << filename << " with extension: " << extension << "\n";
600
601 // Look first at the original filename then for case variations. This is important
602 // on platforms where file names ARE case sensitive.
603 fileInfo.filenames.insert(filename);
604 if (!getCaseSensitive()) {
605 fileInfo.filenames.insert(toUpper(filename));
606 fileInfo.filenames.insert(toLower(filename));
607 }
608
609 // Merge the extensions & throw out duplicates
610 // On Windows throw out ones that only vary in case
611 fileInfo.extensionsToSearch.reserve(1 + extensionsProvided.size() + facilityExtensions.size());
612
613 if (useOnlyExtensionsProvided) {
614 getUniqueExtensions(extensionsProvided, fileInfo.extensionsToSearch);
615 } else {
616 // Search the hint's own extension first (highest priority), then the
617 // provided and facility extensions.
618 if (!extension.empty())
619 fileInfo.extensionsToSearch.emplace_back(extension.string());
620
621 getUniqueExtensions(extensionsProvided, fileInfo.extensionsToSearch);
622 getUniqueExtensions(facilityExtensions, fileInfo.extensionsToSearch);
623 }
624
625 fileInfo.archs = getArchiveSearch(facility);
626}
627
628void FileFinderImpl::processFileInfos(std::vector<FileInfo> &fileInfos,
629 const std::vector<std::string> &extensionsProvided,
630 bool useOnlyExtensionsProvided) const {
631 for (auto &fileInfo : fileInfos)
632 prepareFileInfo(fileInfo, extensionsProvided, useOnlyExtensionsProvided);
633
634 performFileSearch(fileInfos);
635 performCacheSearch(fileInfos);
636 performArchiveSearch(fileInfos);
637}
638
639std::vector<std::filesystem::path> FileFinderImpl::findRuns(const std::vector<std::string> &hints,
640 const std::vector<std::string> &extensionsProvided,
641 const bool useOnlyExtensionsProvided) const {
642 if (hints.empty())
643 return {};
644
645 for (const auto &hint : hints) {
646 auto const error = validateRuns(hint);
647 if (!error.empty())
648 throw std::invalid_argument(error);
649 }
650
651 std::vector<FileInfo> fileInfos;
652 fileInfos.reserve(hints.size());
653 std::shared_ptr<Kernel::InstrumentInfo> cachedInstr;
654 for (const auto &hint : hints) {
655 auto filePath = tryResolvePathWithExtension(hint);
656 if (!filePath.empty()) {
657 fileInfos.emplace_back(FileInfo{.hint = hint, .found = true, .path = filePath});
658 continue;
659 }
660 cachedInstr = std::make_shared<Kernel::InstrumentInfo>(
661 this->getInstrument(hint, true, cachedInstr ? cachedInstr->shortName() : std::string()));
662 fileInfos.emplace_back(FileInfo{.hint = hint, .instr = cachedInstr});
663 }
664
665 processFileInfos(fileInfos, extensionsProvided, useOnlyExtensionsProvided);
666
667 std::vector<std::filesystem::path> res;
668 res.reserve(fileInfos.size());
669 for (const auto &fileInfo : fileInfos) {
670 if (!fileInfo.found) {
671 if (fileInfo.error)
672 g_log.warning() << "Error while searching for '" << fileInfo.hint << "': " << fileInfo.errorMsg << "\n";
673 else
674 g_log.warning() << "Failed to find file for hint '" << fileInfo.hint << "'\n";
675 throw Kernel::Exception::NotFoundError("Unable to find file:", fileInfo.hint);
676 }
677 if (fileInfo.error) {
678 g_log.debug() << "Non-fatal error during search for '" << fileInfo.hint << "': " << fileInfo.errorMsg << "\n";
679 }
680 res.emplace_back(fileInfo.path);
681 }
682 return res;
683}
684
685void FileFinderImpl::performFileSearch(std::vector<FileInfo> &fileInfos) const {
686 // Before we try any globbing, make sure we exhaust all reasonable attempts at
687 // constructing the possible filename.
688 // Avoiding the globbing of getFullPath() for as long as possible will help
689 // performance when calling findRuns()
690 // with a large range of files, especially when searchPaths consists of
691 // folders containing a large number of runs.
692
693 const std::vector<std::string> &searchPaths = Kernel::ConfigService::Instance().getDataSearchDirs();
694
695 for (auto &fileInfo : fileInfos) {
696 if (fileInfo.found || fileInfo.error)
697 continue;
698
699 std::vector<std::string> extensions;
700 extensions.assign(fileInfo.extensionsToSearch.begin(), fileInfo.extensionsToSearch.end());
701
702 // Remove wild cards.
703 extensions.erase(std::remove_if(extensions.begin(), extensions.end(), containsWildCard), extensions.end());
704
705 // Use the std::error_code overloads of exists() so a single unreadable
706 // search path (permission denied, dangling symlink, dead network mount)
707 // doesn't abort the search for every other path.
708 std::error_code ec;
709 for (const auto &extension : extensions) {
710 for (const auto &filename : fileInfo.filenames) {
711 for (const auto &searchPath : searchPaths) {
712 const auto filePath = std::filesystem::path(searchPath) / (filename + extension);
713 if (std::filesystem::exists(filePath, ec)) {
714 fileInfo.found = true;
715 fileInfo.path = filePath;
716 break;
717 }
718 }
719 if (fileInfo.found)
720 break;
721 }
722 if (fileInfo.found)
723 break;
724 }
725
726 if (!fileInfo.found)
727 for (const auto &extension : extensions) {
728 for (const auto &filename : fileInfo.filenames) {
729 const auto filepath = getFullPath(filename + extension);
730 if (!filepath.empty() && std::filesystem::exists(filepath, ec)) {
731 g_log.debug() << "path returned from getFullPath() = " << filepath << '\n';
732 fileInfo.found = true;
733 fileInfo.path = filepath;
734 break;
735 }
736 }
737 if (fileInfo.found)
738 break;
739 }
740 }
741}
742
743void FileFinderImpl::performCacheSearch(std::vector<FileInfo> &fileInfos) const {
744 // Search data cache
745 std::filesystem::path cachePathToSearch(Kernel::ConfigService::Instance().getString("datacachesearch.directory"));
746 // Only expect to find path to data cache on IDAaaS
747 if (std::filesystem::exists(cachePathToSearch)) {
748 for (auto &fileInfo : fileInfos) {
749 if (fileInfo.found || fileInfo.error)
750 continue;
751
752 auto cacheFilePath =
753 getISISInstrumentDataCachePath(cachePathToSearch, fileInfo.filenames, fileInfo.extensionsToSearch);
754
755 if (cacheFilePath) {
756 g_log.debug() << "Found file in data cache: " << cacheFilePath.result() << "\n";
757 fileInfo.found = true;
758 fileInfo.path = cacheFilePath.result();
759 } else {
760 fileInfo.errorMsg = cacheFilePath.errors();
761 }
762 }
763 } else {
764 g_log.debug() << "Data cache directory not found, proceeding with the search."
765 << "\n";
766 }
767}
768
774IArchiveSearch_sptr FileFinderImpl::batchableArchive(const std::vector<FileInfo> &fileInfos) {
775 const auto isUnfound = [](const auto &fi) { return !fi.found && !fi.error; };
776 const auto first = std::find_if(fileInfos.cbegin(), fileInfos.cend(), isUnfound);
777 if (first == fileInfos.cend() || first->archs.size() != 1)
778 return nullptr;
779 const auto &arch = first->archs[0];
780 if (!arch || !arch->supportsMultipleHints())
781 return nullptr;
782 const Kernel::InstrumentInfo &refInstr = *first->instr;
783 for (auto it = std::next(first); it != fileInfos.cend(); ++it) {
784 if (!isUnfound(*it))
785 continue;
786 if (it->archs.size() != 1 || *it->instr != refInstr)
787 return nullptr;
788 }
789 return arch;
790}
791
792void FileFinderImpl::performArchiveSearch(std::vector<FileInfo> &fileInfos) const {
793 if (fileInfos.empty())
794 return;
795
796 if (const auto sharedArch = batchableArchive(fileInfos)) {
797 performBatchedArchiveSearch(fileInfos, sharedArch);
798 // The batched call may have failed outright, returned a mismatched number
799 // of paths, or only resolved some of the hints. Fall back to a per-file
800 // search for anything still unfound so a partial batch result doesn't doom
801 // files that an individual lookup could locate. Entries already found (or
802 // flagged with an error) are skipped by the per-file search.
803 const auto stillUnfound =
804 std::any_of(fileInfos.cbegin(), fileInfos.cend(), [](const auto &fi) { return !fi.found && !fi.error; });
805 if (stillUnfound)
807 } else {
809 }
810}
811
812void FileFinderImpl::performBatchedArchiveSearch(std::vector<FileInfo> &fileInfos,
813 const IArchiveSearch_sptr &sharedArch) const {
814 g_log.debug() << "performArchiveSearch: batching unfound hints through a single archive call\n";
815
816 // One hint per unfound file (archive search is case-insensitive so the
817 // first filename in the set is sufficient).
818 std::vector<std::string> hints;
819 for (const auto &fileInfo : fileInfos) {
820 if (fileInfo.found || fileInfo.error)
821 continue;
822 hints.push_back(*fileInfo.filenames.cbegin());
823 }
824
825 const auto archivePaths = sharedArch->getArchivePaths(hints);
826 if (!archivePaths) {
827 g_log.error() << "Archive search failed: " << archivePaths.errors() << "\n";
828 return;
829 }
830 if (archivePaths.result().size() != hints.size()) {
831 g_log.error() << "Archive search returned a different number of paths than hints. Expected " << hints.size()
832 << " but got " << archivePaths.result().size() << ".\n";
833 return;
834 }
835
836 // Walk the unfound entries in the same order the hints were collected and
837 // assign each archive result back.
838 const auto &paths = archivePaths.result();
839 size_t index = 0;
840 for (auto &fileInfo : fileInfos) {
841 if (fileInfo.found || fileInfo.error)
842 continue;
843 const auto &archivePath = paths[index++];
844 try {
845 if (std::filesystem::exists(archivePath)) {
846 fileInfo.found = true;
847 fileInfo.path = archivePath;
848 }
849 } catch (std::exception &e) {
850 g_log.error() << "Cannot open file " << archivePath << ": " << e.what() << '\n';
851 fileInfo.error = true;
852 fileInfo.errorMsg = "Cannot open file from archive: " + std::string(e.what());
853 }
854 }
855}
856
857void FileFinderImpl::performPerFileArchiveSearch(std::vector<FileInfo> &fileInfos) const {
858 // Cache the directory and extension of the last archive hit so consecutive
859 // runs in a range can be resolved with a local existence check rather than
860 // another network call.
861 std::filesystem::path lastFoundDir;
862 std::string lastFoundExt;
863
864 const auto tryLastFoundShortcut = [&](FileInfo &fileInfo) {
865 if (lastFoundDir.empty() || lastFoundExt.empty())
866 return false;
867 try {
868 for (const auto &filename : fileInfo.filenames) {
869 const auto candidate = lastFoundDir / (filename + lastFoundExt);
870 if (std::filesystem::exists(candidate)) {
871 fileInfo.found = true;
872 fileInfo.path = candidate;
873 return true;
874 }
875 }
876 } catch (...) {
877 lastFoundDir.clear();
878 lastFoundExt.clear();
879 }
880 return false;
881 };
882
883 for (auto &fileInfo : fileInfos) {
884 if (fileInfo.found || fileInfo.error)
885 continue;
886
887 if (tryLastFoundShortcut(fileInfo)) {
888 // Shortcut hit: lastFoundDir/Ext already point at this fileInfo's parent.
889 continue;
890 }
891
892 if (!fileInfo.archs.empty()) {
893 g_log.debug() << "Search the archives for file: " << fileInfo.hint << "\n";
894 const auto archivePath = getArchivePath(fileInfo.archs, fileInfo.filenames, fileInfo.extensionsToSearch);
895 if (archivePath) {
896 try {
897 if (std::filesystem::exists(archivePath.result())) {
898 fileInfo.found = true;
899 fileInfo.path = archivePath.result();
900 }
901 } catch (std::exception &e) {
902 g_log.error() << "Cannot open file " << archivePath << ": " << e.what() << '\n';
903 fileInfo.error = true;
904 fileInfo.errorMsg = "Cannot open file from archive: " + std::string(e.what());
905 }
906 }
907 }
908
909 if (fileInfo.found) {
910 lastFoundDir = fileInfo.path.parent_path();
911 lastFoundExt = fileInfo.path.extension().string();
912 }
913 }
914}
915
917FileFinderImpl::getISISInstrumentDataCachePath(const std::filesystem::path &cacheDir,
918 const std::set<std::string> &hintstrs,
919 const std::vector<std::string> &exts) const {
920 std::string errors;
921 auto dataCache = API::ISISInstrumentDataCache(cacheDir.string());
922
923 for (const auto &hint : hintstrs) {
924 std::filesystem::path parentDir;
925
926 try {
927 parentDir = dataCache.getFileParentDirectoryPath(hint);
928 } catch (const std::invalid_argument &e) {
929 errors += "Data cache: " + std::string(e.what());
930 return API::Result<std::filesystem::path>("", errors);
931 } catch (const Json::Exception &e) {
932 errors += "Data cache: Failed parsing to JSON: " + std::string(e.what()) +
933 "Error likely due to accessing instrument index file while it was being updated on IDAaaS.";
934 return API::Result<std::filesystem::path>("", errors);
935 }
936
937 if (!std::filesystem::exists(parentDir)) {
938 errors += "Data cache: Directory not found: " + parentDir.string();
939 return API::Result<std::filesystem::path>("", errors);
940 }
941
942 for (const auto &ext : exts) {
943 const auto filePath = parentDir / (hint + ext);
944 try { // Catches error for permission denied
945 if (std::filesystem::exists(filePath)) {
946 return API::Result<std::filesystem::path>(filePath);
947 }
948 } catch (const std::filesystem::filesystem_error &e) {
949 errors += "Data cache: " + std::string(e.what());
950 return API::Result<std::filesystem::path>("", errors);
951 }
952 }
953 errors += "Data cache: " + hint + " not found in " + parentDir.string();
954 }
955 return API::Result<std::filesystem::path>("", errors);
956}
957
968const API::Result<std::filesystem::path> FileFinderImpl::getArchivePath(const std::vector<IArchiveSearch_sptr> &archs,
969 const std::set<std::string> &hintstrs,
970 const std::vector<std::string> &exts) const {
971 g_log.debug() << "getArchivePath([IArchiveSearch_sptr], [ ";
972 for (const auto &iter : hintstrs)
973 g_log.debug() << iter << " ";
974 g_log.debug() << "], [ ";
975 for (const auto &iter : exts)
976 g_log.debug() << iter << " ";
977 g_log.debug() << "])\n";
978
979 std::string errors;
980 for (const auto &arch : archs) {
981 try {
982 g_log.debug() << "Getting archive path for requested files\n";
983 auto path = arch->getArchivePath(hintstrs, exts);
984 if (path)
985 return path;
986 else
987 errors += path.errors();
988 } catch (...) {
989 }
990 }
991 return API::Result<std::filesystem::path>("", errors);
992}
993
994std::filesystem::path FileFinderImpl::tryResolvePathWithExtension(const std::string &hint) const {
995 if (!std::filesystem::path(hint).has_extension())
996 return {};
997 // getFullPath already verifies existence and returns empty on miss.
998 auto path = getFullPath(hint);
999 if (!path.empty())
1000 g_log.debug() << "found path = " << path << '\n';
1001 return path;
1002}
1003
1004} // namespace Mantid::API
std::string name
Definition Run.cpp:60
double error
std::map< DeltaEMode::Type, std::string > index
std::pair< std::string, std::string > toInstrumentAndNumber(const std::string &hintstr, const std::string &defaultInstrument="") const
Extracts the instrument name and run number from a hint.
std::string validateRuns(const std::string &searchText) const
A method that returns error messages if the provided runs are invalid.
static std::vector< IArchiveSearch_sptr > getArchiveSearch(const Kernel::FacilityInfo &facility)
void performBatchedArchiveSearch(std::vector< FileInfo > &fileInfos, const IArchiveSearch_sptr &sharedArch) const
void prepareFileInfo(FileInfo &fileInfo, const std::vector< std::string > &extensionsProvided, bool useOnlyExtensionsProvided) const
bool getCaseSensitive() const
Option to get if file finder should be case sensitive.
std::string getExtension(const std::string &filename, const std::vector< std::string > &exts) const
DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
void performCacheSearch(std::vector< FileInfo > &fileInfos) const
int m_globOption
glob option - set to case sensitive or insensitive
Definition FileFinder.h:125
void performFileSearch(std::vector< FileInfo > &fileInfos) const
std::string extractAllowedSuffix(std::string &userString) const
Run numbers can be followed by an allowed string.
const Kernel::InstrumentInfo getInstrument(const std::string &hintstr, const bool returnDefaultIfNotFound=true, const std::string &defaultInstrument="") const
DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
const API::Result< std::filesystem::path > findRun(const std::string &hintstr, const std::vector< std::string > &exts={}, const bool useExtsOnly=false) const
Find a path to a single file from a hint.
static const std::string ALLOWED_SUFFIX
a string that is allowed at the end of any run number
Definition FileFinder.h:94
std::filesystem::path getFullPath(const std::string &filename, const bool ignoreDirs=false) const
Return the full path to the file given its name.
FileFinderImpl()
Default constructor.
void getUniqueExtensions(const std::vector< std::string > &extensionsToAdd, std::vector< std::string > &uniqueExts) const
Given a set of already determined extensions and new extensions, create a set of all extensions.
const API::Result< std::filesystem::path > getArchivePath(const std::vector< IArchiveSearch_sptr > &archs, const std::set< std::string > &hintstrs, const std::vector< std::string > &exts) const
Return the path to the file found in archive.
void performPerFileArchiveSearch(std::vector< FileInfo > &fileInfos) const
static IArchiveSearch_sptr batchableArchive(const std::vector< FileInfo > &fileInfos)
If every unfound FileInfo shares a single archive (and instrument) that supports batched multi-hint l...
void performArchiveSearch(std::vector< FileInfo > &fileInfos) const
void setCaseSensitive(const bool cs)
Option to set if file finder should be case sensitive.
std::filesystem::path tryResolvePathWithExtension(const std::string &filename) const
If the hint already carries an extension, return its resolved full path in the data search dirs (or e...
const API::Result< std::filesystem::path > getISISInstrumentDataCachePath(const std::filesystem::path &cacheDir, const std::set< std::string > &hintstrs, const std::vector< std::string > &exts) const
bool isMalformedRange(const std::string &token) const
Decide whether a token the parser could not expand is a malformed run range rather than a literal fil...
void processFileInfos(std::vector< FileInfo > &fileInfos, const std::vector< std::string > &extensionsProvided, bool useOnlyExtensionsProvided) const
std::vector< std::filesystem::path > findRuns(const std::string &hintstr, const std::vector< std::string > &exts={}, const bool useExtsOnly=false) const
Find a list of files from a comma- and range-separated hint string.
std::string makeFileName(const std::string &hintstr, const Kernel::InstrumentInfo &instrument) const
DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
Exception for when an item is not found in a collection.
Definition Exception.h:145
const char * what() const noexcept override
Writes out the range and limits.
A class that holds information about a facility.
const std::vector< std::string > & archiveSearch() const
Return the archive search interface names.
const std::vector< std::string > & extensions() const
Returns a list of file extensions.
bool noFilePrefix() const
Returns a bool indicating whether prefix is required in file names.
const std::string & name() const
Return the name of the facility.
static constexpr int GLOB_DEFAULT
Glob option constants (compatible with Poco::Glob)
Definition Glob.h:29
static constexpr int GLOB_CASELESS
Definition Glob.h:30
A class that holds information about an instrument.
const std::string & shortName() const
Return the short name of the instrument.
std::string filePrefix(unsigned int runNumber) const
Returns file prefix for this instrument and a run number.
const std::string & delimiter() const
Returns the default delimiter between instrument name and run number.
const std::string & name() const
Return the name of the instrument.
int zeroPadding(unsigned int runNumber) const
Returns zero padding for this instrument and a run number.
The Logger class is in charge of the publishing messages from the framework through various channels.
Definition Logger.h:51
void debug(const std::string &msg)
Logs at debug level.
Definition Logger.cpp:145
void error(const std::string &msg)
Logs at error level.
Definition Logger.cpp:108
void warning(const std::string &msg)
Logs at warning level.
Definition Logger.cpp:117
void information(const std::string &msg)
Logs at information level.
Definition Logger.cpp:136
This class takes a string representing multiple files and parses it into a vector of vectors of file ...
void setTrimWhiteSpaces(const bool &setting)
Set the flag for trimming whitespaces in run string.
void parse(const std::string &multiFileName)
Parse the given multiFileNameString.
@ TOK_IGNORE_EMPTY
ignore empty tokens
@ TOK_TRIM
remove leading and trailing whitespace from tokens
std::size_t count() const
Get the total number of tokens.
std::shared_ptr< IArchiveSearch > IArchiveSearch_sptr
Typedef for a shared pointer to an IArchiveSearch.
Kernel::Logger g_log("ExperimentInfo")
static logger object
Kernel::Logger g_log("DetermineSpinStateOrder")
MANTID_KERNEL_DLL std::string toLower(const std::string &input)
Converts string to all lowercase.
Definition Strings.cpp:129
MANTID_KERNEL_DLL std::string strip(const std::string &A)
strip pre/post spaces
Definition Strings.cpp:419
MANTID_KERNEL_DLL std::string toUpper(const std::string &input)
Converts string to all uppercase.
Definition Strings.cpp:137
Per-hint state threaded through the file-search pipeline.
Definition FileFinder.h:79
std::vector< Mantid::API::IArchiveSearch_sptr > archs
Definition FileFinder.h:88
std::shared_ptr< Mantid::Kernel::InstrumentInfo > instr
Definition FileFinder.h:83
std::vector< std::string > extensionsToSearch
Definition FileFinder.h:87
std::set< std::string > filenames
Definition FileFinder.h:86