Mantid
Loading...
Searching...
No Matches
ORNLDataArchive.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
13#include "MantidKernel/Logger.h"
14
15#include <boost/algorithm/string/join.hpp>
16
17#include <map>
18#include <regex>
19#include <sstream>
20
21using Mantid::Catalog::Exception::CatalogError;
22using Mantid::Catalog::ONCat::ONCat;
23using Mantid::Catalog::ONCat::ONCat_uptr;
24using Mantid::Catalog::ONCat::ONCatEntity;
25using Mantid::Catalog::ONCat::QueryParameter;
26using Mantid::Catalog::ONCat::QueryParameters;
27
28namespace {
29Mantid::Kernel::Logger g_log("ORNLDataArchive");
30
31const static std::regex FILE_REGEX("^(.*?)_(\\d+).*$");
32const static std::string NOT_FOUND("");
33
34std::string toUpperCase(const std::string &s) {
35 std::string result(s);
36 std::transform(s.begin(), s.end(), result.begin(), toupper);
37 return result;
38}
39
42std::pair<std::string, std::string> toInstrumentAndRunNumber(const std::string &filename) {
43 std::smatch result;
44 if (!std::regex_match(filename, result, FILE_REGEX))
45 return {"", ""};
46 assert(result.size() == 3);
47 return {toUpperCase(result[1]), result[2]};
48}
49} // namespace
50
51namespace Mantid::DataHandling {
52
53DECLARE_ARCHIVESEARCH(ORNLDataArchive, ORNLDataSearch)
54DECLARE_ARCHIVESEARCH(ORNLDataArchive, SNSDataSearch)
55
56
103const API::Result<std::filesystem::path>
104ORNLDataArchive::getArchivePath(const std::set<std::string> &basenames,
105 const std::vector<std::string> &suffixes) const {
106 if (basenames.size() == 0) {
107 return API::Result<std::filesystem::path>(NOT_FOUND, "Not found.");
108 }
109
110 // Mimic previous functionality by only using the first basename.
111 const auto basename = *basenames.cbegin();
112
113 // Validate and parse the basename.
114 const auto [instrument, run] = toInstrumentAndRunNumber(basename);
115 if (instrument.empty() || run.empty()) {
116 g_log.debug() << "Unexpected input passed to getArchivePath():" << std::endl << basename << std::endl;
117 return API::Result<std::filesystem::path>(NOT_FOUND, "Not found.");
118 }
119
120 const auto &config = Mantid::Kernel::ConfigService::Instance();
121 std::string facility;
122 try {
123 facility = config.getInstrument(instrument).facility().name();
124
125 if (facility != "HFIR" && facility != "SNS") {
126 return API::Result<std::filesystem::path>(NOT_FOUND, "Not found.");
127 }
129 g_log.debug() << "\"" << instrument << "\" is not an instrument known to Mantid." << std::endl;
130 return API::Result<std::filesystem::path>(NOT_FOUND, "Not found.");
131 }
132
133 // Note that we will only be asking for raw files with the given instrument
134 // and run number, and *not* filtering by suffix at this point. (ONCat has
135 // a strict definition of what a file "extension" is, and has no way of
136 // filtering by, for example, "_event.nxs".)
137 const QueryParameters params{{"facility", facility},
138 {"instrument", instrument},
139 {"projection", "location"},
140 {"tags", "type/raw"},
141 {"sort_by", "ingested"},
142 {"sort_direction", "DESCENDING"},
143 {"ranges_q", "indexed.run_number:" + run}};
144
145 // If we've not manually set up an ONCat instance (presumably for testing
146 // purposes) then we must instead create one using the settings in the
147 // currently-running instance of Mantid, making sure to run it in an
148 // "unauthenticated" mode. If we were to authenticate we'd be able to see
149 // more information, but that would require users logging in and publically
150 // available information is more than enough for our purposes here, anyway.
151 auto defaultOncat = ONCat::fromMantidSettings();
152 auto *oncat = m_oncat ? m_oncat.get() : defaultOncat.get();
153
154 const auto datafiles = [&]() {
155 try {
156 return oncat->list("api", "datafiles", params);
157 } catch (CatalogError &ce) {
158 g_log.debug() << "Error while calling ONCat:" << std::endl << ce.what() << std::endl;
159 return std::vector<ONCatEntity>();
160 }
161 }();
162
163 if (datafiles.size() == 0) {
164 g_log.debug() << "ONCat does not know the location of run \"" << run << "\" for \"" << instrument << "\"."
165 << std::endl;
166 return API::Result<std::filesystem::path>(NOT_FOUND, "Not found.");
167 }
168
169 g_log.debug() << "All datafiles returned from ONCat:" << std::endl;
170 for (const auto &datafile : datafiles) {
171 g_log.debug() << datafile.toString() << std::endl;
172 }
173
174 // It's technically possible to have been given multiple locations for a
175 // single run, since runs are occasionally written out to the wrong IPTS and
176 // therefore need to be "re-translated", leaving us with duplicates in the
177 // catalog. Duplicates require manual intervention to be removed, and so in
178 // the meantime, since we have asked for locations to be returned to us in
179 // descending order of the time at which they were ingested, we can take the
180 // first one and be (quite) sure we end up with the correct run location.
181 const auto location = *datafiles.cbegin()->get<std::string>("location");
182
183 // Mimic the previous ICAT-calling functionality by taking "full"
184 // suffixes into account.
185 for (const auto &suffix : suffixes) {
186 const std::string fullSuffix = basename + suffix;
187 if (toUpperCase(location).ends_with(toUpperCase(fullSuffix))) {
188 return API::Result<std::filesystem::path>(location);
189 }
190 }
191
192 if (toUpperCase(location).ends_with(toUpperCase(basename))) {
193 return API::Result<std::filesystem::path>(location);
194 }
195
196 return API::Result<std::filesystem::path>(NOT_FOUND, "Not found.");
197}
198
200ORNLDataArchive::getArchivePaths(const std::vector<std::string> &hintstrs) const {
201
202 std::vector<std::filesystem::path> results(hintstrs.size());
203 if (hintstrs.empty()) {
204 // No hints to look up — return an empty success rather than a failure so
205 // callers don't need to special-case empty input.
207 }
208
209 std::string instrumentName;
210 std::vector<std::string> runNumbers;
211
212 for (const auto &hintstr : hintstrs) {
213 const auto [instrument, run] = toInstrumentAndRunNumber(hintstr);
214 if (instrument.empty() || run.empty()) {
215 g_log.debug() << "Unexpected input passed to getArchivePaths():" << std::endl << hintstr << std::endl;
216 return API::Result<std::vector<std::filesystem::path>>(results, "Not found.");
217 }
218 if (instrumentName.empty()) {
219 instrumentName = instrument;
220 } else if (instrumentName != instrument) {
221 g_log.debug() << "Multiple different instruments found in hints passed to getArchivePaths():" << std::endl;
222 return API::Result<std::vector<std::filesystem::path>>(results, "Not found.");
223 }
224 runNumbers.push_back(run);
225 }
226
227 const auto &config = Mantid::Kernel::ConfigService::Instance();
228 std::string facility;
229 try {
230 facility = config.getInstrument(instrumentName).facility().name();
231 if (facility != "HFIR" && facility != "SNS") {
232 return API::Result<std::vector<std::filesystem::path>>(results, "Not found.");
233 }
235 g_log.debug() << "\"" << instrumentName << "\" is not an instrument known to Mantid." << std::endl;
236 return API::Result<std::vector<std::filesystem::path>>(results, "Not found.");
237 }
238
239 std::string runNumbersStr = boost::algorithm::join(runNumbers, ",");
240
241 // Note that we will only be asking for raw files with the given instrument
242 // and run number, and *not* filtering by suffix at this point. (ONCat has
243 // a strict definition of what a file "extension" is, and has no way of
244 // filtering by, for example, "_event.nxs".)
245 const QueryParameters params{{"facility", facility},
246 {"instrument", instrumentName},
247 {"projection", "location"},
248 {"tags", "type/raw"},
249 {"sort_by", "run_number"},
250 {"sort_direction", "ASCENDING"},
251 {"ranges_q", "indexed.run_number:" + runNumbersStr}};
252
253 // If we've not manually set up an ONCat instance (presumably for testing
254 // purposes) then we must instead create one using the settings in the
255 // currently-running instance of Mantid, making sure to run it in an
256 // "unauthenticated" mode. If we were to authenticate we'd be able to see
257 // more information, but that would require users logging in and publically
258 // available information is more than enough for our purposes here, anyway.
259 auto defaultOncat = ONCat::fromMantidSettings();
260 auto *oncat = m_oncat ? m_oncat.get() : defaultOncat.get();
261
262 const auto datafiles = [&]() {
263 try {
264 return oncat->list("api", "datafiles", params);
265 } catch (CatalogError &ce) {
266 g_log.debug() << "Error while calling ONCat:" << std::endl << ce.what() << std::endl;
267 return std::vector<ONCatEntity>();
268 }
269 }();
270
271 if (datafiles.size() == 0) {
272 g_log.debug() << "ONCat does not know the location of runs \"" << runNumbersStr << "\" for \"" << instrumentName
273 << "\"." << std::endl;
275 }
276
277 g_log.debug() << "All datafiles returned from ONCat:" << std::endl;
278 std::map<std::string, std::filesystem::path> runToLocation;
279 for (const auto &datafile : datafiles) {
280 g_log.debug() << datafile.toString() << std::endl;
281 const auto location = *datafile.get<std::string>("location");
282 const auto filename = std::filesystem::path(location).filename().string();
283 const auto [_, run] = toInstrumentAndRunNumber(filename);
284 if (run.empty()) {
285 continue;
286 }
287
288 // Keep the first location returned for each run.
289 // If multiple files exist for the same run, ONCat ordering determines which one we take.
290 if (!runToLocation.contains(run)) {
291 runToLocation.emplace(run, std::filesystem::path(location));
292 }
293 }
294
295 for (size_t i = 0; i < runNumbers.size(); ++i) {
296 const auto it = runToLocation.find(runNumbers[i]);
297 if (it != runToLocation.end()) {
298 results[i] = it->second;
299 }
300 }
301
303}
304
305void ORNLDataArchive::setONCat(ONCat_uptr oncat) { m_oncat = std::move(oncat); }
306
307} // namespace Mantid::DataHandling
#define DECLARE_ARCHIVESEARCH(classname, facility)
const API::Result< std::vector< std::filesystem::path > > getArchivePaths(const std::vector< std::string > &hintstrs) const override
const API::Result< std::filesystem::path > getArchivePath(const std::set< std::string > &hintstrs, const std::vector< std::string > &suffixes) const override
Exception for when an item is not found in a collection.
Definition Exception.h:145
The Logger class is in charge of the publishing messages from the framework through various channels.
Definition Logger.h:51
void debug(const std::string &msg)
Logs at debug level.
Definition Logger.cpp:145
Kernel::Logger g_log("ExperimentInfo")
static logger object
Kernel::Logger g_log("DetermineSpinStateOrder")