19#include <boost/algorithm/string.hpp>
20#include <boost/regex.hpp>
29const std::string
INST =
"([A-Za-z]+|PG3|pg3)";
32const std::string
SPACE =
"(\\s*)";
47const std::string
ANY =
58void parseToken(std::vector<std::vector<unsigned int>> &parsedRuns,
const std::string &token);
59std::vector<std::vector<unsigned int>> generateRange(
const unsigned int from,
const unsigned int to,
60 const unsigned int stepSize,
const bool addRuns);
61void validateToken(
const std::string &token);
62bool matchesFully(
const std::string &stringToMatch,
const std::string ®exString,
const bool caseless =
false);
63std::string getMatchingString(
const std::string ®exString,
const std::string &toParse,
const bool caseless =
false);
64std::string pad(
const unsigned int run,
const std::string &instString);
66std::set<std::pair<unsigned int, unsigned int>>
67mergeAdjacentRanges(std::set<std::pair<unsigned int, unsigned int>> ranges,
68 const std::pair<unsigned int, unsigned int> &range);
71struct RangeContainsRun {
72 bool operator()(
const std::pair<unsigned int, unsigned int> &range,
const unsigned int run);
73 bool operator()(
const unsigned int run,
const std::pair<unsigned int, unsigned int> &range);
76std::string
toString(
const RunRangeList &runRangeList);
77std::string accumulateString(std::string output, std::pair<unsigned int, unsigned int> runRange);
102 for (
const auto &fileName : fileNames) {
103 parser.
parse(fileName);
123 lowerA.resize(a.size());
125 lowerB.resize(b.size());
127 std::transform(a.cbegin(), a.cend(), lowerA.begin(), tolower);
128 std::transform(b.cbegin(), b.cend(), lowerB.begin(), tolower);
130 return lowerA > lowerB;
139 : m_runs(), m_fileNames(), m_multiFileName(), m_dirString(), m_instString(), m_underscoreString(), m_runString(),
140 m_extString(), m_validInstNames(), m_trimWhiteSpaces(true) {
144 for (
const auto facility : facilities) {
145 const std::vector<InstrumentInfo> instruments = facility->instruments();
147 for (
const auto &instrument : instruments) {
195 return std::vector<std::vector<unsigned int>>();
205 if (!matchesFully(
runString,
"([0-9]|\\+|\\-|,|:)+")) {
206 throw std::runtime_error(
"Non-numeric or otherwise unaccetable character(s) detected.");
210 std::vector<std::string> tokens;
211 tokens = boost::split(tokens,
runString, boost::is_any_of(
","));
214 std::for_each(tokens.begin(), tokens.end(), validateToken);
217 std::vector<std::vector<unsigned int>> runGroups;
218 for (
auto const &token : tokens) {
219 parseToken(runGroups, token);
267 throw std::runtime_error(
"No file name to parse.");
277 if (lastDot != std::string::npos)
282 if (lastSeparator != std::string::npos)
288 throw std::runtime_error(
"Unable to parse.");
295 throw std::runtime_error(
"There does not appear to be any runs present.");
298 return matchesFully(base, name +
".*", true);
303 m_instString = getMatchingString(
"^" + *instrumentNameIt, base,
true);
310 throw std::runtime_error(
"There does not appear to be any runs present.");
312 if (isdigit(base[0]))
313 m_instString = ConfigService::Instance().getString(
"default.instrument");
315 throw std::runtime_error(
"There does not appear to be a valid instrument name present.");
322 throw std::runtime_error(
"There does not appear to be any runs present.");
324 const auto &instInfo = ConfigService::Instance().getInstrument(
m_instString);
329 if (base.starts_with(instInfo.delimiter())) {
338 throw std::runtime_error(
"There is an unparsable token present.");
354 : m_prefix(
std::move(prefix)), m_suffix(
std::move(suffix)), m_instString(
std::move(instString)) {}
365 std::vector<std::string> fileNames;
367 std::transform(runs.begin(), runs.end(), std::back_inserter(fileNames),
382 std::stringstream fileName;
386 return fileName.str();
413 std::set<std::pair<unsigned int, unsigned int>>(), mergeAdjacentRanges);
423 for (; from <= to; ++from)
449void parseToken(std::vector<std::vector<unsigned int>> &parsedRuns,
const std::string &token) {
450 std::vector<std::vector<unsigned int>> runs;
452 std::vector<std::string> subTokens;
453 boost::split(subTokens, token, boost::is_any_of(
"+"));
454 std::vector<unsigned int> runsToAdd;
455 for (
auto const &subToken : subTokens) {
458 runsToAdd.emplace_back(std::stoi(subToken));
463 constexpr bool addRuns{
false};
464 std::vector<std::string> rangeDetails;
465 rangeDetails.reserve(2);
466 boost::split(rangeDetails, subToken, boost::is_any_of(
":"));
467 runs = generateRange(std::stoi(rangeDetails.front()), std::stoi(rangeDetails.back()), 1, addRuns);
472 constexpr bool addRuns{
false};
473 std::vector<std::string> rangeDetails;
474 rangeDetails.reserve(3);
475 boost::split(rangeDetails, subToken, boost::is_any_of(
":"));
476 runs = generateRange(std::stoi(rangeDetails[0]), std::stoi(rangeDetails[1]), std::stoi(rangeDetails[2]), addRuns);
480 constexpr bool addRuns{
true};
481 std::vector<std::string> rangeDetails;
482 rangeDetails.reserve(2);
483 boost::split(rangeDetails, subToken, boost::is_any_of(
"-"));
484 const auto generated = generateRange(std::stoi(rangeDetails.front()), std::stoi(rangeDetails.back()), 1, addRuns);
485 std::copy(generated.front().cbegin(), generated.front().cend(), back_inserter(runsToAdd));
489 constexpr bool addRuns{
true};
490 std::vector<std::string> rangeDetails;
491 rangeDetails.reserve(3);
492 boost::split(rangeDetails, subToken, boost::is_any_of(
"-:"));
493 const auto generated =
494 generateRange(std::stoi(rangeDetails[0]), std::stoi(rangeDetails[1]), std::stoi(rangeDetails[2]), addRuns);
495 std::copy(generated.front().cbegin(), generated.front().cend(), back_inserter(runsToAdd));
502 if (!runsToAdd.empty()) {
505 throw std::runtime_error(
"Unable to handle a mixture of add ranges and step ranges");
507 runs.emplace_back(runsToAdd);
510 std::copy(runs.begin(), runs.end(), std::back_inserter(parsedRuns));
528std::vector<std::vector<unsigned int>> generateRange(
unsigned int const from,
unsigned int const to,
529 unsigned int const stepSize,
bool const addRuns) {
531 throw std::runtime_error(
"Unable to generate a range with a step size of zero.");
534 auto limitStr = ConfigService::Instance().getValue<std::string>(
"loading.multifilelimit");
536 limit = ConfigService::Instance().getFacility().multiFileLimit();
539 unsigned int const orderedTo = from > to ? from : to;
540 unsigned int const orderedFrom = from > to ? to : from;
541 unsigned int const numberOfFiles = (orderedTo - orderedFrom) / stepSize;
542 if (numberOfFiles > limit) {
543 std::stringstream sstream;
544 sstream <<
"The range from " << orderedFrom <<
" to " << orderedTo <<
" with step " << stepSize
545 <<
" would generate " << numberOfFiles <<
" files. "
546 <<
"This is greater than the current limit of " << limit <<
". "
547 <<
"This limit can be configured in the Mantid.user.properties "
548 "file using the key loading.multifilelimit=200.";
549 throw std::range_error(sstream.str());
552 unsigned int currentRun = from;
553 std::vector<std::vector<unsigned int>> runs;
557 while (currentRun <= to) {
560 runs.emplace_back(1, currentRun);
562 runs.front().emplace_back(currentRun);
564 runs.emplace_back(1, currentRun);
567 currentRun += stepSize;
572 while (currentRun >= to) {
575 runs.emplace_back(1, currentRun);
577 runs.front().emplace_back(currentRun);
579 runs.emplace_back(1, currentRun);
584 if (
static_cast<int>(currentRun) -
static_cast<int>(stepSize) < 0)
587 currentRun -= stepSize;
601void validateToken(
const std::string &token) {
604 throw std::runtime_error(
"A comma-separated token is empty.");
607 if (!matchesFully(token,
"[0-9].+[0-9]|[0-9]"))
608 throw std::runtime_error(
"The token \"" + token +
609 "\" is of an incorrect form. Does it begin or "
610 "end with a plus, minus or colon?");
615 throw std::runtime_error(
"The token \"" + token +
"\" is of an incorrect form.");
627bool matchesFully(
const std::string &stringToMatch,
const std::string ®exString,
const bool caseless) {
631 regex = boost::regex(
"^(" + regexString +
"$)", boost::regex::icase);
633 regex = boost::regex(
"^(" + regexString +
"$)");
635 return boost::regex_match(stringToMatch, regex);
647std::string getMatchingString(
const std::string ®exString,
const std::string &toParse,
const bool caseless) {
650 regex = boost::regex(regexString, boost::regex::icase);
652 regex = boost::regex(regexString);
655 boost::sregex_iterator it(toParse.begin(), toParse.end(), regex);
657 if (it == boost::sregex_iterator())
672std::string
pad(
const unsigned int run,
const std::string &instString) {
673 InstrumentInfo
const instInfo = ConfigService::Instance().getInstrument(instString);
675 if (!instInfo.facility().noFilePrefix())
676 prefix = instInfo.filePrefix(run) + instInfo.delimiter();
677 unsigned int padLength = instInfo.zeroPadding(run);
679 if (runStr.size() < padLength)
680 runStr.insert(0, padLength - runStr.size(),
'0');
681 else if (padLength > 0 && runStr.size() > padLength)
682 throw std::runtime_error(
"Could not parse run number \"" + runStr +
683 "\" since the instrument run number length required is " +
std::to_string(padLength));
684 runStr.insert(0, prefix);
695bool RangeContainsRun::operator()(
const std::pair<unsigned int, unsigned int> &range,
const unsigned int run) {
696 return range.second < run;
698bool RangeContainsRun::operator()(
const unsigned int run,
const std::pair<unsigned int, unsigned int> &range) {
699 return run < range.first;
711std::set<std::pair<unsigned int, unsigned int>>
712mergeAdjacentRanges(std::set<std::pair<unsigned int, unsigned int>> ranges,
713 const std::pair<unsigned int, unsigned int> &range) {
715 if (ranges.empty()) {
716 ranges.insert(range);
721 if (ranges.rbegin()->second + 1 == range.first) {
722 unsigned int from = ranges.rbegin()->first;
723 unsigned int to = range.second;
724 std::pair<unsigned int, unsigned int> temp(from, to);
726 ranges.erase(--ranges.end(), ranges.end());
731 ranges.insert(range);
747std::string accumulateString(std::string output, std::pair<unsigned int, unsigned int> runRange) {
751 if (runRange.first == runRange.second)
766std::string
toString(
const RunRangeList &runRangeList) {
767 std::set<std::pair<unsigned int, unsigned int>> runRanges = runRangeList.rangeList();
771 return std::accumulate(runRanges.begin(), runRanges.end(), std::string(), accumulateString);
The ConfigService class provides a simple facade to access the Configuration functionality of the Man...
const std::vector< FacilityInfo * > getFacilities() const
Get the list of facilities.
A functor that generates a vector of file names from the given vector of runs, and other state passed...
std::string m_instString
String that identifies the instrument.
std::string m_prefix
String that prefixes any generated file names.
GenerateFileName(std::string prefix, std::string suffix, std::string instString)
Constructor.
std::vector< std::string > operator()(const std::vector< unsigned int > &runs)
Overloaded function operator that generates a vector of file names from a vector of runs.
std::string m_suffix
String that suffixes any generated file names.
This class takes a string representing multiple files and parses it into a vector of vectors of file ...
const std::string & runString() const
Return the parsed run string.
std::string m_multiFileName
The given string to parse.
std::vector< std::vector< std::string > > m_fileNames
A vector of vectors of the parsed file names.
std::vector< std::vector< unsigned int > > parseMultiRunString(std::string runString)
Parses a string consisting of only run number info, into a vector of vector of run numbers.
std::vector< std::vector< unsigned int > > m_runs
A vector of vectors of the parsed runs.
std::string m_underscoreString
void split()
Split the string to parse into its component parts.
std::set< std::string, ReverseCaselessCompare > m_validInstNames
All the valid instrument names.
const std::string & underscoreString() const
Return the parsed underscore string.
const std::vector< std::vector< unsigned int > > & runs() const
Return the vector of vectors of parsed file names.
bool trimWhiteSpaces() const
Return the setting for trimming whitespaces in run string.
void clear()
Clear all member variables.
bool m_trimWhiteSpaces
Flag to determine if string input should be trimmed of whitespace.
void setTrimWhiteSpaces(const bool &setting)
Set the flag for trimming whitespaces in run string.
std::string m_dirString
The various sections of the given string to parse.
const std::string & instString() const
Return the parsed instrument string.
void parse(const std::string &multiFileName)
Parse the given multiFileNameString.
bool operator()(const std::string &a, const std::string &b) const
Comparator for the set that holds instrument names in Parser.
A class that holds a list of ranges of runs.
RunRangeList()
Constructor.
std::set< std::pair< unsigned int, unsigned int > > m_rangeList
A set of pairs of unsigned ints, where each pair represents a range of runs.
void addRunRange(const unsigned int from, const unsigned int to)
Add a range of runs.
void addRun(const unsigned int run)
Add a run to the list of run ranges.
const std::string ADD_STEP_RANGE
const std::string SINGLE_OR_STEP_OR_ADD_RANGE
const std::string ADD_RANGE
const std::string ADD_LIST
const std::string UNDERSCORE
const std::string STEP_RANGE
MANTID_KERNEL_DLL std::string suggestWorkspaceName(const std::vector< std::string > &fileNames)
Suggests a workspace name, given a vector of file names.
int convert(const std::string &A, T &out)
Convert a string into a number.
std::string toString(const T &value)
Convert values to strings.
MortonT pad(IntT)
Pad an integer with a given number of padding bits.
std::string to_string(const wide_integer< Bits, Signed > &n)