16#include <boost/algorithm/string.hpp>
29const std::string
INST =
"([A-Za-z]+|PG3|pg3)";
32const std::string
SPACE =
"(\\s*)";
47const std::string
ANY =
58void parseToken(std::vector<std::vector<unsigned int>> &parsedRuns,
const std::string &token);
59std::vector<std::vector<unsigned int>> generateRange(
const unsigned int from,
const unsigned int to,
60 const unsigned int stepSize,
const bool addRuns);
61void validateToken(
const std::string &token);
62bool matchesFully(
const std::string &stringToMatch,
const std::string ®exString,
const bool caseless =
false);
63std::string getMatchingString(
const std::string ®exString,
const std::string &toParse,
const bool caseless =
false);
64std::string pad(
const unsigned int run,
const std::string &instString);
66std::set<std::pair<unsigned int, unsigned int>>
67mergeAdjacentRanges(std::set<std::pair<unsigned int, unsigned int>> ranges,
68 const std::pair<unsigned int, unsigned int> &range);
71struct RangeContainsRun {
72 bool operator()(
const std::pair<unsigned int, unsigned int> &range,
const unsigned int run);
73 bool operator()(
const unsigned int run,
const std::pair<unsigned int, unsigned int> &range);
76std::string
toString(
const RunRangeList &runRangeList);
77std::string accumulateString(std::string output, std::pair<unsigned int, unsigned int> runRange);
102 for (
const auto &fileName : fileNames) {
103 parser.
parse(fileName);
123 lowerA.resize(a.size());
125 lowerB.resize(b.size());
127 std::transform(a.cbegin(), a.cend(), lowerA.begin(), tolower);
128 std::transform(b.cbegin(), b.cend(), lowerB.begin(), tolower);
130 return lowerA > lowerB;
139 : m_runs(), m_fileNames(), m_multiFileName(), m_dirString(), m_instString(), m_underscoreString(), m_runString(),
140 m_extString(), m_validInstNames(), m_compoundLeadExtensions(), m_trimWhiteSpaces(true) {
144 for (
const auto facility : facilities) {
145 for (
const auto &instrument : facility->instruments()) {
155 for (
const auto &ext : facility->extensions()) {
156 if (std::count(ext.cbegin(), ext.cend(),
'.') != 2)
158 auto lead = ext.substr(0, ext.find_last_of(
'.'));
159 if (!lead.empty() && lead.front() ==
'.')
206 return std::vector<std::vector<unsigned int>>();
216 if (!matchesFully(
runString,
"([0-9]|\\+|\\-|,|:)+")) {
217 throw std::runtime_error(
"Non-numeric or otherwise unaccetable character(s) detected.");
221 std::vector<std::string> tokens;
222 tokens = boost::split(tokens,
runString, boost::is_any_of(
","));
225 std::for_each(tokens.begin(), tokens.end(), validateToken);
228 std::vector<std::vector<unsigned int>> runGroups;
229 for (
auto const &token : tokens) {
230 parseToken(runGroups, token);
278 throw std::runtime_error(
"No file name to parse.");
288 if (lastSeparator != std::string::npos)
297 if (lastDot != std::string::npos && lastDot >
m_dirString.size()) {
299 if (prevDot != std::string::npos && prevDot >=
m_dirString.size()) {
300 const auto midExt =
m_multiFileName.substr(prevDot, lastDot - prevDot);
311 throw std::runtime_error(
"Unable to parse.");
318 throw std::runtime_error(
"There does not appear to be any runs present.");
321 return matchesFully(base, name +
".*", true);
326 m_instString = getMatchingString(
"^" + *instrumentNameIt, base,
true);
333 throw std::runtime_error(
"There does not appear to be any runs present.");
335 if (isdigit(base[0]))
336 m_instString = ConfigService::Instance().getString(
"default.instrument");
338 throw std::runtime_error(
"There does not appear to be a valid instrument name present.");
345 throw std::runtime_error(
"There does not appear to be any runs present.");
347 const auto &instInfo = ConfigService::Instance().getInstrument(
m_instString);
352 if (base.starts_with(instInfo.delimiter())) {
360 if (base.find_first_of(
"0123456789") == std::string::npos)
361 throw std::runtime_error(
"There do not appear to be any runs present.");
366 throw std::runtime_error(
"There is an unparsable token present.");
382 : m_prefix(
std::move(prefix)), m_suffix(
std::move(suffix)), m_instString(
std::move(instString)) {}
393 std::vector<std::string> fileNames;
395 std::transform(runs.begin(), runs.end(), std::back_inserter(fileNames),
410 std::stringstream fileName;
414 return fileName.str();
441 std::set<std::pair<unsigned int, unsigned int>>(), mergeAdjacentRanges);
451 for (; from <= to; ++from)
477void parseToken(std::vector<std::vector<unsigned int>> &parsedRuns,
const std::string &token) {
478 std::vector<std::vector<unsigned int>> runs;
480 std::vector<std::string> subTokens;
481 boost::split(subTokens, token, boost::is_any_of(
"+"));
482 std::vector<unsigned int> runsToAdd;
483 for (
auto const &subToken : subTokens) {
486 runsToAdd.emplace_back(std::stoi(subToken));
491 constexpr bool addRuns{
false};
492 std::vector<std::string> rangeDetails;
493 rangeDetails.reserve(2);
494 boost::split(rangeDetails, subToken, boost::is_any_of(
":"));
495 runs = generateRange(std::stoi(rangeDetails.front()), std::stoi(rangeDetails.back()), 1, addRuns);
500 constexpr bool addRuns{
false};
501 std::vector<std::string> rangeDetails;
502 rangeDetails.reserve(3);
503 boost::split(rangeDetails, subToken, boost::is_any_of(
":"));
504 runs = generateRange(std::stoi(rangeDetails[0]), std::stoi(rangeDetails[1]), std::stoi(rangeDetails[2]), addRuns);
508 constexpr bool addRuns{
true};
509 std::vector<std::string> rangeDetails;
510 rangeDetails.reserve(2);
511 boost::split(rangeDetails, subToken, boost::is_any_of(
"-"));
512 const auto generated = generateRange(std::stoi(rangeDetails.front()), std::stoi(rangeDetails.back()), 1, addRuns);
513 std::copy(generated.front().cbegin(), generated.front().cend(), back_inserter(runsToAdd));
517 constexpr bool addRuns{
true};
518 std::vector<std::string> rangeDetails;
519 rangeDetails.reserve(3);
520 boost::split(rangeDetails, subToken, boost::is_any_of(
"-:"));
521 const auto generated =
522 generateRange(std::stoi(rangeDetails[0]), std::stoi(rangeDetails[1]), std::stoi(rangeDetails[2]), addRuns);
523 std::copy(generated.front().cbegin(), generated.front().cend(), back_inserter(runsToAdd));
530 if (!runsToAdd.empty()) {
533 throw std::runtime_error(
"Unable to handle a mixture of add ranges and step ranges");
535 runs.emplace_back(runsToAdd);
538 std::copy(runs.begin(), runs.end(), std::back_inserter(parsedRuns));
556std::vector<std::vector<unsigned int>> generateRange(
unsigned int const from,
unsigned int const to,
557 unsigned int const stepSize,
bool const addRuns) {
559 throw std::runtime_error(
"Unable to generate a range with a step size of zero.");
562 auto limitStr = ConfigService::Instance().getValue<std::string>(
"loading.multifilelimit");
564 limit = ConfigService::Instance().getFacility().multiFileLimit();
567 unsigned int const orderedTo = from > to ? from : to;
568 unsigned int const orderedFrom = from > to ? to : from;
569 unsigned int const numberOfFiles = (orderedTo - orderedFrom) / stepSize;
570 if (numberOfFiles > limit) {
571 std::stringstream sstream;
572 sstream <<
"The range from " << orderedFrom <<
" to " << orderedTo <<
" with step " << stepSize
573 <<
" would generate " << numberOfFiles <<
" files. "
574 <<
"This is greater than the current limit of " << limit <<
". "
575 <<
"This limit can be configured in the Mantid.user.properties "
576 "file using the key loading.multifilelimit=200.";
577 throw std::range_error(sstream.str());
580 unsigned int currentRun = from;
581 std::vector<std::vector<unsigned int>> runs;
585 while (currentRun <= to) {
588 runs.emplace_back(1, currentRun);
590 runs.front().emplace_back(currentRun);
592 runs.emplace_back(1, currentRun);
595 currentRun += stepSize;
600 while (currentRun >= to) {
603 runs.emplace_back(1, currentRun);
605 runs.front().emplace_back(currentRun);
607 runs.emplace_back(1, currentRun);
612 if (
static_cast<int>(currentRun) -
static_cast<int>(stepSize) < 0)
615 currentRun -= stepSize;
629void validateToken(
const std::string &token) {
632 throw std::runtime_error(
"A comma-separated token is empty.");
635 if (!matchesFully(token,
"[0-9].+[0-9]|[0-9]"))
636 throw std::runtime_error(
"The token \"" + token +
637 "\" is of an incorrect form. Does it begin or "
638 "end with a plus, minus or colon?");
643 throw std::runtime_error(
"The token \"" + token +
"\" is of an incorrect form.");
655bool matchesFully(
const std::string &stringToMatch,
const std::string ®exString,
const bool caseless) {
656 const auto flags = caseless ? std::regex::ECMAScript | std::regex::icase : std::regex::ECMAScript;
657 const std::regex regex(
"^(" + regexString +
"$)", flags);
658 return std::regex_match(stringToMatch, regex);
670std::string getMatchingString(
const std::string ®exString,
const std::string &toParse,
const bool caseless) {
671 const auto flags = caseless ? std::regex::ECMAScript | std::regex::icase : std::regex::ECMAScript;
672 const std::regex regex(regexString, flags);
674 std::sregex_iterator it(toParse.begin(), toParse.end(), regex);
675 if (it == std::sregex_iterator())
690std::string
pad(
const unsigned int run,
const std::string &instString) {
691 InstrumentInfo
const instInfo = ConfigService::Instance().getInstrument(instString);
693 if (!instInfo.facility().noFilePrefix())
694 prefix = instInfo.filePrefix(run) + instInfo.delimiter();
695 unsigned int padLength = instInfo.zeroPadding(run);
697 if (runStr.size() < padLength)
698 runStr.insert(0, padLength - runStr.size(),
'0');
699 else if (padLength > 0 && runStr.size() > padLength)
700 throw std::runtime_error(
"Could not parse run number \"" + runStr +
701 "\" since the instrument run number length required is " +
std::to_string(padLength));
702 runStr.insert(0, prefix);
713bool RangeContainsRun::operator()(
const std::pair<unsigned int, unsigned int> &range,
const unsigned int run) {
714 return range.second < run;
716bool RangeContainsRun::operator()(
const unsigned int run,
const std::pair<unsigned int, unsigned int> &range) {
717 return run < range.first;
729std::set<std::pair<unsigned int, unsigned int>>
730mergeAdjacentRanges(std::set<std::pair<unsigned int, unsigned int>> ranges,
731 const std::pair<unsigned int, unsigned int> &range) {
733 if (ranges.empty()) {
734 ranges.insert(range);
739 if (ranges.rbegin()->second + 1 == range.first) {
740 unsigned int from = ranges.rbegin()->first;
741 unsigned int to = range.second;
742 std::pair<unsigned int, unsigned int> temp(from, to);
744 ranges.erase(--ranges.end(), ranges.end());
749 ranges.insert(range);
765std::string accumulateString(std::string output, std::pair<unsigned int, unsigned int> runRange) {
769 if (runRange.first == runRange.second)
784std::string
toString(
const RunRangeList &runRangeList) {
785 std::set<std::pair<unsigned int, unsigned int>> runRanges = runRangeList.rangeList();
789 return std::accumulate(runRanges.begin(), runRanges.end(), std::string(), accumulateString);
The ConfigService class provides a simple facade to access the Configuration functionality of the Man...
const std::vector< FacilityInfo * > getFacilities() const
Get the list of facilities.
A functor that generates a vector of file names from the given vector of runs, and other state passed...
std::string m_instString
String that identifies the instrument.
std::string m_prefix
String that prefixes any generated file names.
GenerateFileName(std::string prefix, std::string suffix, std::string instString)
Constructor.
std::vector< std::string > operator()(const std::vector< unsigned int > &runs)
Overloaded function operator that generates a vector of file names from a vector of runs.
std::string m_suffix
String that suffixes any generated file names.
This class takes a string representing multiple files and parses it into a vector of vectors of file ...
const std::string & runString() const
Return the parsed run string.
std::string m_multiFileName
The given string to parse.
std::vector< std::vector< std::string > > m_fileNames
A vector of vectors of the parsed file names.
std::vector< std::vector< unsigned int > > parseMultiRunString(std::string runString)
Parses a string consisting of only run number info, into a vector of vector of run numbers.
std::vector< std::vector< unsigned int > > m_runs
A vector of vectors of the parsed runs.
std::set< std::string > m_compoundLeadExtensions
Compound-extension leads derived from Facilities.xml.
std::string m_underscoreString
void split()
Split the string to parse into its component parts.
std::set< std::string, ReverseCaselessCompare > m_validInstNames
All the valid instrument names.
const std::string & underscoreString() const
Return the parsed underscore string.
const std::vector< std::vector< unsigned int > > & runs() const
Return the vector of vectors of parsed file names.
bool trimWhiteSpaces() const
Return the setting for trimming whitespaces in run string.
void clear()
Clear all member variables.
bool m_trimWhiteSpaces
Flag to determine if string input should be trimmed of whitespace.
void setTrimWhiteSpaces(const bool &setting)
Set the flag for trimming whitespaces in run string.
std::string m_dirString
The various sections of the given string to parse.
const std::string & instString() const
Return the parsed instrument string.
void parse(const std::string &multiFileName)
Parse the given multiFileNameString.
bool operator()(const std::string &a, const std::string &b) const
Comparator for the set that holds instrument names in Parser.
A class that holds a list of ranges of runs.
RunRangeList()
Constructor.
std::set< std::pair< unsigned int, unsigned int > > m_rangeList
A set of pairs of unsigned ints, where each pair represents a range of runs.
void addRunRange(const unsigned int from, const unsigned int to)
Add a range of runs.
void addRun(const unsigned int run)
Add a run to the list of run ranges.
const std::string ADD_STEP_RANGE
const std::string SINGLE_OR_STEP_OR_ADD_RANGE
const std::string ADD_RANGE
const std::string ADD_LIST
const std::string UNDERSCORE
const std::string STEP_RANGE
MANTID_KERNEL_DLL std::string suggestWorkspaceName(const std::vector< std::string > &fileNames)
Suggests a workspace name, given a vector of file names.
int convert(const std::string &A, T &out)
Convert a string into a number.
std::string toString(const T &value)
Convert values to strings.
MortonT pad(IntT)
Pad an integer with a given number of padding bits.
std::string to_string(const wide_integer< Bits, Signed > &n)