Mantid
Loading...
Searching...
No Matches
InstrumentFileFinder.cpp
Go to the documentation of this file.
2
8
9#include <Poco/SAX/Attributes.h>
10#include <Poco/SAX/ContentHandler.h>
11#include <Poco/SAX/SAXParser.h>
12#include <boost/algorithm/string/find.hpp>
13#include <boost/regex.hpp>
14#include <filesystem>
15
16#include <string>
17#include <utility>
18#include <vector>
19
20using namespace Mantid::Kernel;
21using namespace Mantid::Types::Core;
22using namespace Poco::XML;
23
24namespace {
26Mantid::Kernel::Logger g_log("InstrumentFileFinder");
27
28// used to terminate SAX process
29class DummyException {
30public:
31 std::string m_validFrom;
32 std::string m_validTo;
33 DummyException(std::string validFrom, std::string validTo)
34 : m_validFrom(std::move(validFrom)), m_validTo(std::move(validTo)) {}
35};
36// SAX content handler for grapping stuff quickly from IDF
37class myContentHandler : public Poco::XML::ContentHandler {
38 void startElement(const XMLString & /*uri*/, const XMLString &localName, const XMLString & /*qname*/,
39 const Attributes &attrList) override {
40 if (localName == "instrument" || localName == "parameter-file") {
41 throw DummyException(static_cast<std::string>(attrList.getValue("", "valid-from")),
42 static_cast<std::string>(attrList.getValue("", "valid-to")));
43 }
44 }
45 void endElement(const XMLString & /*uri*/, const XMLString & /*localName*/, const XMLString & /*qname*/) override {}
46 void startDocument() override {}
47 void endDocument() override {}
48 void characters(const XMLChar /*ch*/[], int /*start*/, int /*length*/) override {}
49 void endPrefixMapping(const XMLString & /*prefix*/) override {}
50 void ignorableWhitespace(const XMLChar /*ch*/[], int /*start*/, int /*length*/) override {}
51 void processingInstruction(const XMLString & /*target*/, const XMLString & /*data*/) override {}
52 void setDocumentLocator(const Locator * /*loc*/) override {}
53 void skippedEntity(const XMLString & /*name*/) override {}
54 void startPrefixMapping(const XMLString & /*prefix*/, const XMLString & /*uri*/) override {}
55};
56} // namespace
57
58namespace Mantid::API {
59
65const std::string InstrumentFileFinder::getNormalisedDate(const std::string &date) {
66 static const boost::regex dateOnlyRegex("\\d{4}-\\d{2}-\\d{2}");
67 return boost::regex_match(date, dateOnlyRegex) ? date + "T00:00:00" : date;
68};
69
88std::string InstrumentFileFinder::getFilenameByInstrumentDateAndSearchTerm(const std::string &instrumentName,
89 const std::string &date,
90 const std::string &searchTerm,
91 const std::vector<std::string> &fileFormats,
92 const std::string &dirHint) {
93 std::string fileType;
94 if (searchTerm == "_Definition")
95 fileType = "instrument file";
96 else if (searchTerm == "_Parameters")
97 fileType = "parameter file";
98 else
99 fileType = searchTerm + " file";
100
101 g_log.debug() << "Looking for " << fileType << " for " << instrumentName << " that is valid on '" << date << "'\n";
102 // Lookup the instrument (long) name, falling back to the provided name if not found in any facility
103 std::string instrument;
104 try {
105 instrument = Kernel::ConfigService::Instance().getInstrument(instrumentName).name();
106 } catch (const Kernel::Exception::NotFoundError &) {
107 instrument = instrumentName;
108 }
109
110 // Build the directory search list: dirHint (if any) is checked first so that
111 // parameter files co-located with the IDF in a non-standard directory (e.g.
112 // unit_testing/) are preferred over files in the standard instrument dirs.
113 const std::vector<std::string> &configDirs = Kernel::ConfigService::Instance().getInstrumentDirectories();
114 std::vector<std::string> directoryNames;
115 if (!dirHint.empty()) {
116 directoryNames.push_back(dirHint);
117 directoryNames.insert(directoryNames.end(), configDirs.begin(), configDirs.end());
118 } else {
119 directoryNames = configDirs;
120 }
121
122 // matching files sorted with newest files coming first
123 const std::vector<std::string> matchingFiles =
124 getResourceFilenames(instrument + searchTerm, fileFormats, directoryNames, date);
125 std::string foundFile;
126 if (!matchingFiles.empty()) {
127 foundFile = matchingFiles[0];
128 g_log.debug() << "The " << fileType << " selected is " << foundFile << '\n';
129 } else {
130 g_log.debug() << "No " << fileType << " found\n";
131 }
132 return foundFile;
133}
134
153std::string InstrumentFileFinder::getInstrumentFilename(const std::string &instrumentName, const std::string &date) {
154 return getFilenameByInstrumentDateAndSearchTerm(instrumentName, date, "_Definition", {"xml", "nxs", "hdf5"});
155}
156
176std::string InstrumentFileFinder::getParameterFilename(const std::string &instrumentName, const std::string &date,
177 const std::string &dirHint) {
178 return getFilenameByInstrumentDateAndSearchTerm(instrumentName, date, "_Parameters", {"xml"}, dirHint);
179}
180
183// directoryName must include a final '/'.
184std::string InstrumentFileFinder::getParameterPath(const std::string &instName, const std::string &dirHint) {
185 // Remove the path from the filename, some legacy callers will pass in
186 // a full path rather than a filename
187 std::filesystem::path filePath(instName);
188 const std::string filename = filePath.filename().string();
189
190 // Try the hinted dir first
191 if (!dirHint.empty()) {
192 const std::string result = lookupIPF(dirHint, filename);
193 if (!result.empty()) {
194 return result;
195 }
196 }
197
198 const Kernel::ConfigServiceImpl &configService = Kernel::ConfigService::Instance();
199 const std::vector<std::string> directoryNames = configService.getInstrumentDirectories();
200
201 for (const auto &dirName : directoryNames) {
202 // This will iterate around the directories from user ->etc ->install, and
203 // find the first beat file
204 const std::string result = lookupIPF(dirName, filename);
205 if (!result.empty()) {
206 g_log.debug() << "Found: " << result << '\n';
207 return result;
208 }
209 }
210
211 g_log.debug() << "Found Nothing \n";
212 return "";
213}
214
215std::string InstrumentFileFinder::lookupIPF(const std::string &dir, std::string filename) {
216 const std::string ext = ".xml";
217 // Remove .xml for example if abc.xml was passed
218 boost::algorithm::ierase_all(filename, ext);
219
220 const std::string suffixSeperator("_Definition");
221
222 std::string prefix;
223 std::string suffix;
224
225 if (auto sepPos = boost::algorithm::ifind_first(filename, suffixSeperator)) {
226 prefix = std::string(filename.begin(), sepPos.begin());
227 suffix = std::string(sepPos.end(), filename.end());
228 } else {
229 prefix = filename;
230 }
231
232 std::filesystem::path directoryPath(dir);
233
234 // Assemble parameter file name
235 std::string fullPathParamIDF = (directoryPath / (prefix + "_Parameters" + suffix + ext)).string();
236
237 if (std::filesystem::exists(fullPathParamIDF)) {
238 return fullPathParamIDF;
239 }
240
241 fullPathParamIDF = (directoryPath / (prefix + "_Parameters" + ext)).string();
242 if (std::filesystem::exists(fullPathParamIDF)) {
243 return fullPathParamIDF;
244 }
245
246 return "";
247}
248
264std::vector<std::string> InstrumentFileFinder::getResourceFilenames(const std::string &prefix,
265 const std::vector<std::string> &fileFormats,
266 const std::vector<std::string> &directoryNames,
267 const std::string &date) {
268
269 if (date.empty()) {
270 // Just use the current date
271 g_log.debug() << "No date specified, using current date and time.\n";
272 const std::string now = Types::Core::DateAndTime::getCurrentTime().toISO8601String();
273 // Recursively call this method, but with all parameters.
274 return InstrumentFileFinder::getResourceFilenames(prefix, fileFormats, directoryNames, now);
275 }
276
277 // Join all the file formats into a single string
278 std::stringstream ss;
279 ss << "(";
280 for (size_t i = 0; i < fileFormats.size(); ++i) {
281 if (i != 0)
282 ss << "|";
283 ss << fileFormats[i];
284 }
285 ss << ")";
286 const std::string allFileFormats = ss.str();
287
288 const boost::regex regex(prefix + ".*\\." + allFileFormats, boost::regex_constants::icase);
289
290 // Normalise date: if only YYYY-MM-DD was provided (no time component), append midnight so
291 // DateAndTime can parse it. Parameter files commonly store date-only valid-from attributes.
292 const std::string normalisedDate = getNormalisedDate(date);
293
294 DateAndTime d;
295 try {
296 d = DateAndTime(normalisedDate);
297 } catch (const std::invalid_argument &) {
298 // Some legacy data files store dates in non-ISO8601 formats.
299 // In this case fall back to the current time so we select the most recent matching file.
300 g_log.warning() << "Could not parse date '" << date
301 << "' as ISO8601; using current time for instrument file lookup.\n";
302 d = DateAndTime::getCurrentTime();
303 }
304
305 DateAndTime refDate("1899-01-01 23:59:00"); // used to help determine the most
306 // recently starting file, if none match
307 DateAndTime refDateGoodFile("1899-01-01 23:59:00"); // used to help determine the most recently
308
309 // Two files could have the same `from` date so multimap is required.
310 // Sort with newer dates placed at the beginning
311 std::multimap<DateAndTime, std::string, std::greater<DateAndTime>> matchingFiles;
312 bool foundFile = false;
313 std::string mostRecentFile; // path to the file with most recent "valid-from"
314 for (const auto &directoryName : directoryNames) {
315 // Iterate over the directories from user ->etc ->install, and find the
316 // first beat file
317 for (const auto &dir_entry : std::filesystem::directory_iterator(directoryName)) {
318
319 const auto &filePath = dir_entry.path();
320 if (!std::filesystem::is_regular_file(filePath))
321 continue;
322
323 const std::string l_filenamePart = filePath.filename().string();
324 if (regex_match(l_filenamePart, regex)) {
325 const std::string pathName = filePath.string();
326 g_log.debug() << "Found file: '" << pathName << "'\n";
327
328 std::string validFrom, validTo;
329 getValidFromTo(pathName, validFrom, validTo);
330 g_log.debug() << "File '" << pathName << " valid dates: from '" << validFrom << "' to '" << validTo << "'\n";
331 // Use default valid "from" and "to" dates if none were found.
332 // Normalise date-only strings (YYYY-MM-DD) to full datetimes before parsing.
333 // Some legacy instrument files store dates in non-ISO8601 formats; treat them as lowest-priority
334 // catch-alls (valid for all time) so they are still considered but ranked last.
335 DateAndTime to, from;
336 try {
337 if (validFrom.length() > 0) {
338 const std::string normFrom = getNormalisedDate(validFrom);
339 from.setFromISO8601(normFrom);
340 } else {
341 from = refDate;
342 }
343 } catch (const std::invalid_argument &) {
344 g_log.debug() << "Could not parse valid-from='" << validFrom << "' in '" << pathName
345 << "'; treating as lowest priority.\n";
346 from = refDate;
347 }
348 try {
349 if (validTo.length() > 0) {
350 const std::string normTo = getNormalisedDate(validTo);
351 to.setFromISO8601(normTo);
352 } else {
353 to.setFromISO8601("2100-01-01T00:00:00");
354 }
355 } catch (const std::invalid_argument &) {
356 g_log.debug() << "Could not parse valid-to='" << validTo << "' in '" << pathName
357 << "'; treating as lowest priority.\n";
358 to.setFromISO8601("2100-01-01T00:00:00");
359 }
360
361 if (from <= d && d <= to) {
362 foundFile = true;
363 matchingFiles.insert(std::pair<DateAndTime, std::string>(from, pathName));
364 }
365 // Consider the most recent file in the absence of matching files
366 if (!foundFile && (from >= refDate)) {
367 refDate = from;
368 mostRecentFile = pathName;
369 }
370 }
371 }
372 }
373
374 // Retrieve the file names only
375 std::vector<std::string> pathNames;
376 if (!matchingFiles.empty()) {
377 pathNames.reserve(matchingFiles.size());
378
379 std::transform(matchingFiles.begin(), matchingFiles.end(), std::back_inserter(pathNames),
380 [](const auto &elem) { return elem.second; });
381 } else {
382 pathNames.emplace_back(std::move(mostRecentFile));
383 }
384
385 return pathNames;
386}
387
394void InstrumentFileFinder::getValidFromTo(const std::string &IDFfilename, std::string &outValidFrom,
395 std::string &outValidTo) {
396 SAXParser pParser;
397 // Create on stack to ensure deletion. Relies on pParser also being local
398 // variable.
399 myContentHandler conHand;
400 pParser.setContentHandler(&conHand);
401
402 try {
403 pParser.parse(IDFfilename);
404 } catch (const DummyException &e) {
405 outValidFrom = e.m_validFrom;
406 outValidTo = e.m_validTo;
407 } catch (...) {
408 // should throw some sensible here
409 }
410}
411
412} // Namespace Mantid::API
static void getValidFromTo(const std::string &IDFfilename, std::string &outValidFrom, std::string &outValidTo)
Utility to retrieve the validity dates for the given IDF.
static std::string getFilenameByInstrumentDateAndSearchTerm(const std::string &instrumentName, const std::string &date, const std::string &searchTerm, const std::vector< std::string > &fileFormats, const std::string &dirHint="")
This method returns a file name which finds a file which contains the given instrument name + search ...
static std::vector< std::string > getResourceFilenames(const std::string &prefix, const std::vector< std::string > &fileFormats, const std::vector< std::string > &directoryNames, const std::string &date)
Utility to retrieve a resource file (IDF, Parameters, ..)
static std::string getParameterPath(const std::string &instName, const std::string &dirHint="")
Search instrument directories for Parameter file, return full path name if found, else "".
static std::string getParameterFilename(const std::string &instrumentName, const std::string &date="", const std::string &dirHint="")
Get the Parameter file using the instrument name and date, optionally searching dirHint first before ...
static std::string lookupIPF(const std::string &dir, std::string filename)
static const std::string getNormalisedDate(const std::string &date)
If date with only YYYY - MM - DD was provided (no time component), append midnight so DateAndTime can...
static std::string getInstrumentFilename(const std::string &instrumentName, const std::string &date="")
Get the IDF using the instrument name and date.
The ConfigService class provides a simple facade to access the Configuration functionality of the Man...
const std::vector< std::string > & getInstrumentDirectories() const
Get instrument search directories.
Exception for when an item is not found in a collection.
Definition Exception.h:145
The Logger class is in charge of the publishing messages from the framework through various channels.
Definition Logger.h:51
void debug(const std::string &msg)
Logs at debug level.
Definition Logger.cpp:145
void warning(const std::string &msg)
Logs at warning level.
Definition Logger.cpp:117
Kernel::Logger g_log("ExperimentInfo")
static logger object
STL namespace.