23#include <boost/lexical_cast.hpp>
24#include <boost/regex.hpp>
29#include <boost/algorithm/string.hpp>
32#include <json/value.h>
46bool containsWildCard(
const std::string &ext) {
return std::string::npos != ext.find(
'*'); }
48bool isASCII(
const std::string &str) {
49 return !std::any_of(str.cbegin(), str.cend(), [](
char c) { return static_cast<unsigned char>(c) > 127; });
68 FrameworkManager::Instance().loadPlugins();
74 setCaseSensitive(Kernel::ConfigService::Instance().getValue<bool>(
"filefinder.casesensitive").value_or(
false));
107 return Kernel::ConfigService::Instance().getFullPath(filename, ignoreDirs,
m_globOption);
122 std::filesystem::path entry(userString);
123 std::string noExt(entry.stem().string());
126 userString.replace(userString.size() - repNumChars, repNumChars,
"");
141 const bool returnDefaultIfNotFound,
142 const std::string &defaultInstrument)
const {
143 if ((!hintstr.empty()) && (!isdigit(hintstr[0]))) {
145 std::string filename =
toUpper(std::filesystem::path(hintstr).filename().
string());
148 std::string instrName = Kernel::ConfigService::Instance().findLongestInstrumentPrefix(filename);
151 if (instrName.empty()) {
155 return Kernel::ConfigService::Instance().getInstrument(instrName);
158 if (!returnDefaultIfNotFound) {
163 return Kernel::ConfigService::Instance().getInstrument(defaultInstrument);
174 const std::string &defaultInstrument)
const {
187 g_log.
debug() <<
"toInstrumentAndNumber(" << hintstr <<
")\n";
190 if (hintstr.empty()) {
191 throw std::invalid_argument(
"Malformed hint: empty hint");
194 if (isdigit(hintstr[0])) {
197 const auto hintUpper =
toUpper(hintstr);
198 std::string instrPart = instr.
name();
199 if (!hintUpper.starts_with(instrPart)) {
201 if (!hintUpper.starts_with(instrPart)) {
202 throw std::invalid_argument(
"Malformed hint: does not start with instrument name or short name");
207 size_t nChars = instrPart.length();
208 while (nChars < hintstr.size() && !std::isdigit(
static_cast<unsigned char>(hintstr[nChars])))
210 if (nChars == hintstr.size())
211 throw std::invalid_argument(
"Malformed hint: no run number found");
212 runPart = hintstr.substr(nChars);
215 unsigned int irunPart(0);
217 irunPart = boost::lexical_cast<unsigned int>(runPart);
218 }
catch (boost::bad_lexical_cast &) {
219 std::ostringstream os;
220 os <<
"Cannot convert '" << runPart <<
"' to run number.";
221 throw std::invalid_argument(os.str());
225 std::string::size_type i = runPart.find_first_not_of(
'0');
227 while (runPart.size() < nZero)
228 runPart.insert(0,
"0");
229 if (runPart.size() > nZero && nZero != 0) {
230 throw std::invalid_argument(
"Run number does not match instrument's zero padding");
233 return std::make_pair(instr.
filePrefix(irunPart), runPart);
253 if (!isdigit(hintstr[0])) {
255 std::string hintUpper =
toUpper(hintstr);
258 if (hintUpper.rfind(shortName, 0) != 0 && hintUpper.rfind(
name, 0) != 0) {
261 }
catch (
const std::exception &ex) {
262 g_log.
debug() <<
"Failed to resolve instrument from hint '" << hintstr <<
"' in makeFileName: " << ex.what();
264 g_log.
debug() <<
"Failed to resolve instrument from hint '" << hintstr
265 <<
"' in makeFileName due to an unknown exception.";
269 std::string filename(hintstr);
271 const std::string shortName = instrToUse.
shortName();
272 std::string delimiter = instrToUse.
delimiter();
275 if (filename.substr(0, shortName.size()) == shortName) {
276 filename = filename.substr(shortName.size());
277 if ((!delimiter.empty()) && (filename.substr(0, delimiter.size()) == delimiter))
278 filename = filename.substr(delimiter.size());
280 filename = shortName + filename;
286 filename = instrumentName + delimiter + runNumber + suffix;
300 g_log.
debug() <<
"getExtension(" << filename <<
", exts[" << exts.size() <<
"])\n";
303 for (
const auto &ext : exts) {
304 std::string extension =
toUpper(ext);
305 if (extension.rfind(
'*') == extension.size() - 1)
307 extension.resize(extension.rfind(
'*'));
310 std::size_t found =
toUpper(filename).rfind(extension);
311 if (found != std::string::npos) {
312 g_log.
debug() <<
"matched extension \"" << extension <<
"\" based on \"" << ext <<
"\"\n";
313 return filename.substr(found);
317 g_log.
debug() <<
"Failed to find extension. Just using last \'.\'\n";
318 std::size_t pos = filename.find_last_of(
'.');
319 if (pos != std::string::npos) {
320 return filename.substr(pos);
328 std::vector<IArchiveSearch_sptr> archs;
331 std::string archiveOpt = Kernel::ConfigService::Instance().getString(
"datasearch.searcharchive");
332 std::transform(archiveOpt.begin(), archiveOpt.end(), archiveOpt.begin(), tolower);
336 if (archiveOpt.empty() || archiveOpt ==
"off" || facility.
archiveSearch().empty())
340 auto createArchiveSearch = bool(archiveOpt ==
"all");
344 if (!createArchiveSearch) {
345 std::string faciltyName = facility.
name();
346 std::transform(faciltyName.begin(), faciltyName.end(), faciltyName.begin(), tolower);
347 if (archiveOpt ==
"on") {
348 std::string defaultFacility = Kernel::ConfigService::Instance().getString(
"default.facility");
349 std::transform(defaultFacility.begin(), defaultFacility.end(), defaultFacility.begin(), tolower);
350 createArchiveSearch = bool(faciltyName == defaultFacility);
352 createArchiveSearch = bool(archiveOpt.find(faciltyName) != std::string::npos);
357 if (createArchiveSearch) {
359 g_log.
debug() <<
"get archive search for the facility..." << facilityname <<
"\n";
360 archs.emplace_back(ArchiveSearchFactory::Instance().create(facilityname));
379 const std::vector<std::string> &extensionsProvided,
380 const bool useOnlyExtensionsProvided,
381 const std::string &defaultInstrument)
const {
388 return findRun(hint, instrument, extensionsProvided, useOnlyExtensionsProvided);
405 const std::vector<std::string> &extensionsProvided,
406 const bool useOnlyExtensionsProvided)
const {
408 g_log.
debug() <<
"vector findRun(\'" << hint <<
"\', exts[" << extensionsProvided.size() <<
"])\n";
415 std::filesystem::path hintPath(hint);
416 if (hintPath.has_extension()) {
418 g_log.
debug() <<
"hintPath is not empty, check in normal search locations"
423 if (std::filesystem::exists(path)) {
427 }
catch (
const std::exception &) {
430 g_log.
debug() <<
"Unable to find files via directory search with the "
431 "filename that looks like a full filename"
438 const std::vector<std::string> facilityExtensions = facility.
extensions();
441 std::string filename(hint);
442 std::string extension =
getExtension(hint, facilityExtensions);
443 if (!facilityExtensions.empty())
444 filename = hint.substr(0, hint.rfind(extension));
445 if (hintPath.parent_path().empty()) {
450 }
catch (std::invalid_argument &) {
451 if (filename.length() >= hint.length()) {
452 g_log.
information() <<
"Could not form filename from standard rules '" << filename <<
"'\n";
457 if (filename.empty())
467 std::set<std::string> filenames;
468 filenames.insert(filename);
470 std::string transformed(filename);
471 std::transform(filename.begin(), filename.end(), transformed.begin(), toupper);
472 filenames.insert(transformed);
473 std::transform(filename.begin(), filename.end(), transformed.begin(), tolower);
474 filenames.insert(transformed);
479 std::vector<std::string> extensionsToSearch;
480 extensionsToSearch.reserve(1 + extensionsProvided.size() + facilityExtensions.size());
482 if (useOnlyExtensionsProvided) {
486 if (!extension.empty()) {
487 extensionsToSearch.emplace_back(extension);
498 auto path =
getPath(archs, filenames, extensionsToSearch);
506 if (!useOnlyExtensionsProvided && extensionsToSearch.size() == 1) {
508 extensionsToSearch.pop_back();
512 g_log.
warning() <<
"Extension ['" << extension <<
"'] not found.\n";
513 g_log.
warning() <<
"Searching for other facility extensions." << std::endl;
515 path =
getPath(archs, filenames, extensionsToSearch);
534 std::vector<std::string> &uniqueExts)
const {
536 for (
const auto &cit : extensionsToAdd) {
537 std::string transformed(cit);
538 if (!isCaseSensitive) {
539 std::transform(cit.begin(), cit.end(), transformed.begin(), tolower);
541 const auto searchItr = std::find(uniqueExts.begin(), uniqueExts.end(), transformed);
542 if (searchItr == uniqueExts.end()) {
543 uniqueExts.emplace_back(transformed);
555 if (!isASCII(searchText))
556 return "An unsupported non-ASCII character was found in the search text.";
576 const std::vector<std::string> &extensionsProvided,
577 const bool useOnlyExtensionsProvided)
const {
580 throw std::invalid_argument(
error);
583 g_log.
debug() <<
"findRuns hint = " << hint <<
"\n";
584 std::vector<std::filesystem::path> res;
587 static const boost::regex digits(
"[0-9]+");
588 auto h = hints.
begin();
590 std::string instrSName;
591 for (; h != hints.
end(); ++h) {
593 bool fileSuspected =
false;
595 if ((*h).find(
"\\") != std::string::npos) {
596 fileSuspected =
true;
598 if ((*h).find(
"/") != std::string::npos) {
599 fileSuspected =
true;
602 fileSuspected =
true;
607 if ((range.
count() > 2) && (!fileSuspected)) {
608 throw std::invalid_argument(
"Malformed range of runs: " + *h);
609 }
else if ((range.
count() == 2) && (!fileSuspected)) {
613 instrSName = p1.first;
616 std::string run = p1.second;
617 size_t nZero = run.size();
618 if (range[1].size() > nZero) {
619 throw std::invalid_argument(
"Malformed range of runs: " + *h +
620 ". The end of string value is longer than "
621 "the instrument's zero padding");
623 auto runNumber = boost::lexical_cast<int>(run);
624 std::string runEnd = run;
626 runEnd.replace(runEnd.end() - range[1].
size(), runEnd.end(), range[1]);
629 if (!boost::regex_match(runEnd, digits))
630 throw std::invalid_argument(
"Malformed range of runs: Part of the run "
631 "has a non-digit character in it.");
633 auto runEndNumber = boost::lexical_cast<int>(runEnd);
634 if (runEndNumber < runNumber) {
635 throw std::invalid_argument(
"Malformed range of runs: " + *h);
637 std::string previousPath, previousExt;
638 for (
int irun = runNumber; irun <= runEndNumber; ++irun) {
640 while (run.size() < nZero)
645 if (!previousPath.empty() && !previousExt.empty()) {
647 const std::filesystem::path file(previousPath + p1.first + run + previousExt);
648 if (std::filesystem::exists(file)) {
649 res.emplace_back(file.string());
654 previousPath = previousExt =
"";
658 auto path =
findRun(p1.first + run, cachedInstr, extensionsProvided, useOnlyExtensionsProvided).result();
662 previousExt = path.extension().string();
664 previousPath = path.parent_path().string() + std::string(1, std::filesystem::path::preferred_separator);
665 res.emplace_back(path);
675 auto path =
findRun(*h, instr, extensionsProvided, useOnlyExtensionsProvided).result();
678 res.emplace_back(path);
690 const std::set<std::string> &hintstrs,
691 const std::vector<std::string> &exts)
const {
695 for (
const auto &hint : hintstrs) {
697 std::string parentDirPath;
700 parentDirPath = dataCache.getFileParentDirectoryPath(hint);
702 }
catch (
const std::invalid_argument &e) {
703 errors +=
"Data cache: " + std::string(e.what());
706 }
catch (
const Json::Exception &e) {
707 errors +=
"Data cache: Failed parsing to JSON: " + std::string(e.what()) +
708 "Error likely due to accessing instrument index file while it was being updated on IDAaaS.";
712 if (!std::filesystem::exists(parentDirPath)) {
713 errors +=
"Data cache: Directory not found: " + parentDirPath;
717 for (
const auto &ext : exts) {
718 std::filesystem::path filePath(parentDirPath +
'/' + hint + ext);
721 if (std::filesystem::exists(filePath)) {
724 }
catch (
const std::filesystem::filesystem_error &e) {
725 errors +=
"Data cache: " + std::string(e.what());
729 errors +=
"Data cache: " + hint +
" not found in " + parentDirPath;
745 const std::set<std::string> &hintstrs,
746 const std::vector<std::string> &exts)
const {
747 g_log.
debug() <<
"getArchivePath([IArchiveSearch_sptr], [ ";
748 for (
const auto &iter : hintstrs)
751 for (
const auto &iter : exts)
756 for (
const auto &arch : archs) {
758 g_log.
debug() <<
"Getting archive path for requested files\n";
759 auto path = arch->getArchivePath(hintstrs, exts);
763 errors += path.errors();
782 const std::set<std::string> &hintstrs,
783 const std::vector<std::string> &exts)
const {
784 std::filesystem::path path;
786 std::vector<std::string> extensions;
787 extensions.assign(exts.begin(), exts.end());
790 extensions.erase(std::remove_if(extensions.begin(), extensions.end(), containsWildCard), extensions.end());
792 const std::vector<std::string> &searchPaths = Kernel::ConfigService::Instance().getDataSearchDirs();
800 for (
const auto &extension : extensions) {
801 for (
const auto &hint : hintstrs) {
802 for (
const auto &searchPath : searchPaths) {
804 const auto filePath = std::filesystem::path(searchPath) / (hint + extension);
805 if (std::filesystem::exists(filePath))
808 }
catch (
const std::exception &) {
814 for (
const auto &extension : extensions) {
815 for (
const auto &hint : hintstrs) {
818 if (!path.empty() && std::filesystem::exists(path)) {
819 g_log.
debug() <<
"path returned from getFullPath() = " << path <<
'\n';
822 }
catch (std::exception &e) {
823 g_log.
error() <<
"Cannot open file " << path <<
": " << e.what() <<
'\n';
831 std::filesystem::path cachePathToSearch(Kernel::ConfigService::Instance().getString(
"datacachesearch.directory"));
833 if (std::filesystem::exists(cachePathToSearch)) {
838 return cacheFilePath;
840 errors += cacheFilePath.errors();
843 g_log.
debug() <<
"Data cache directory not found, proceeding with the search." << std::endl;
844 errors +=
"Could not find data cache directory: " + cachePathToSearch.string() +
'\n';
848 if (!archs.empty()) {
853 if (std::filesystem::exists(archivePath.result()))
855 }
catch (std::exception &e) {
856 g_log.
error() <<
"Cannot open file " << archivePath <<
": " << e.what() <<
'\n';
860 errors += archivePath.errors();
867 std::string result = src;
868 std::transform(result.begin(), result.end(), result.begin(), toupper);
std::pair< std::string, std::string > toInstrumentAndNumber(const std::string &hintstr, const std::string &defaultInstrument="") const
Extracts the instrument name and run number from a hint.
std::string validateRuns(const std::string &searchText) const
A method that returns error messages if the provided runs are invalid.
const API::Result< std::filesystem::path > getPath(const std::vector< IArchiveSearch_sptr > &archs, const std::set< std::string > &hintstrs, const std::vector< std::string > &exts) const
Return the full path to the file given its name, checking local directories first.
static std::vector< IArchiveSearch_sptr > getArchiveSearch(const Kernel::FacilityInfo &facility)
bool getCaseSensitive() const
Option to get if file finder should be case sensitive.
std::string getExtension(const std::string &filename, const std::vector< std::string > &exts) const
DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
int m_globOption
glob option - set to case sensitive or insensitive
std::string extractAllowedSuffix(std::string &userString) const
Run numbers can be followed by an allowed string.
const Kernel::InstrumentInfo getInstrument(const std::string &hintstr, const bool returnDefaultIfNotFound=true, const std::string &defaultInstrument="") const
DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
const API::Result< std::filesystem::path > getISISInstrumentDataCachePath(const std::string &cachePathToSearch, const std::set< std::string > &hintstrs, const std::vector< std::string > &exts) const
const API::Result< std::filesystem::path > findRun(const std::string &hintstr, const std::vector< std::string > &exts={}, const bool useExtsOnly=false, const std::string &defaultInstrument="") const
Find a path to a single file from a hint.
static const std::string ALLOWED_SUFFIX
a string that is allowed at the end of any run number
std::filesystem::path getFullPath(const std::string &filename, const bool ignoreDirs=false) const
Return the full path to the file given its name.
FileFinderImpl()
Default constructor.
void getUniqueExtensions(const std::vector< std::string > &extensionsToAdd, std::vector< std::string > &uniqueExts) const
Given a set of already determined extensions and new extensions, create a set of all extensions.
std::string toUpper(const std::string &src) const
const API::Result< std::filesystem::path > getArchivePath(const std::vector< IArchiveSearch_sptr > &archs, const std::set< std::string > &hintstrs, const std::vector< std::string > &exts) const
Return the path to the file found in archive.
void setCaseSensitive(const bool cs)
Option to set if file finder should be case sensitive.
std::vector< std::filesystem::path > findRuns(const std::string &hintstr, const std::vector< std::string > &exts={}, const bool useExtsOnly=false) const
Find a list of files file given a hint.
std::string makeFileName(const std::string &hintstr, const Kernel::InstrumentInfo &instrument) const
DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
Exception for when an item is not found in a collection.
const char * what() const noexcept override
Writes out the range and limits.
A class that holds information about a facility.
const std::vector< std::string > & archiveSearch() const
Return the archive search interface names.
const std::vector< std::string > & extensions() const
Returns a list of file extensions.
bool noFilePrefix() const
Returns a bool indicating whether prefix is required in file names.
const std::string & name() const
Return the name of the facility.
static constexpr int GLOB_DEFAULT
Glob option constants (compatible with Poco::Glob)
static constexpr int GLOB_CASELESS
A class that holds information about an instrument.
const std::string & shortName() const
Return the short name of the instrument.
std::string filePrefix(unsigned int runNumber) const
Returns file prefix for this instrument and a run number.
const FacilityInfo & facility() const
The facility to which this instrument belongs.
const std::string & delimiter() const
Returns the default delimiter between instrument name and run number.
const std::string & name() const
Return the name of the instrument.
int zeroPadding(unsigned int runNumber) const
Returns zero padding for this instrument and a run number.
The Logger class is in charge of the publishing messages from the framework through various channels.
void debug(const std::string &msg)
Logs at debug level.
void error(const std::string &msg)
Logs at error level.
void warning(const std::string &msg)
Logs at warning level.
void information(const std::string &msg)
Logs at information level.
Iterator begin()
Iterator referring to first element in the container.
@ TOK_IGNORE_EMPTY
ignore empty tokens
@ TOK_TRIM
remove leading and trailing whitespace from tokens
std::size_t size() const noexcept
Get the total number of tokens.
Iterator end()
Iterator referring to the past-the-end element in the container.
std::size_t count() const
Get the total number of tokens.
Kernel::Logger g_log("ExperimentInfo")
static logger object
Kernel::Logger g_log("DetermineSpinStateOrder")
MANTID_KERNEL_DLL std::string strip(const std::string &A)
strip pre/post spaces
std::string to_string(const wide_integer< Bits, Signed > &n)