Mantid
Loading...
Searching...
No Matches
DownloadInstrument.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
11
12// Poco
13#include <Poco/DateTimeFormat.h>
14#include <Poco/DateTimeFormatter.h>
15// Visual Studio complains with the inclusion of Poco/NullStream
16// disabling this warning.
17#if defined(_WIN32) || defined(_WIN64)
18#pragma warning(push)
19#pragma warning(disable : 4250)
20#include <Poco/NullStream.h>
21#include <Winhttp.h>
22#pragma warning(pop)
23#else
24#include <Poco/NullStream.h>
25#include <cstdlib>
26#endif
27
28// jsoncpp
29#include <json/json.h>
30
31// std
32#include <filesystem>
33#include <fstream>
34
35namespace Mantid::DataHandling {
36using namespace Kernel;
37using namespace Poco::Net;
38
39// Register the algorithm into the AlgorithmFactory
40DECLARE_ALGORITHM(DownloadInstrument)
41
42//----------------------------------------------------------------------------------------------
46
47//----------------------------------------------------------------------------------------------
48
50const std::string DownloadInstrument::name() const { return "DownloadInstrument"; }
51
53int DownloadInstrument::version() const { return 1; }
54
56const std::string DownloadInstrument::category() const { return "DataHandling\\Instrument"; }
57
59const std::string DownloadInstrument::summary() const {
60 return "Checks the Mantid instrument repository against the local "
61 "instrument files, and downloads updates as appropriate.";
62}
63
64//----------------------------------------------------------------------------------------------
69
70 declareProperty("ForceUpdate", false, "Ignore cache information");
71 declareProperty("FileDownloadCount", 0, "The number of files downloaded by this algorithm", Direction::Output);
72}
73
74//----------------------------------------------------------------------------------------------
78 setProperty("FileDownloadCount", 0);
79
80 // to aid in general debugging, always ask github for what the rate limit
81 // status is. This doesn't count against rate limit.
82 try {
83 GitHubApiHelper inetHelper;
86 g_log.debug() << "Unable to get the rate limit from GitHub: " << ex.what() << '\n';
87 }
88
89 StringToStringMap fileMap;
90 try {
91 fileMap = processRepository();
93 std::string errorText(ex.what());
94 if (errorText.find("rate limit") != std::string::npos) {
95 g_log.information() << "Instrument Definition Update: " << errorText << '\n';
96 } else {
97 // log the failure at Notice Level
98 g_log.notice("Internet Connection Failed - cannot update instrument "
99 "definitions. Please check your connection. If you are behind a "
100 "proxy server, consider setting proxy.host and proxy.port in "
101 "the Mantid properties file or using the config object.");
102 // log this error at information level
103 g_log.information() << errorText << '\n';
104 }
105 return;
106 }
107
108 if (fileMap.empty()) {
109 g_log.notice("All instrument definitions up to date");
110 } else {
111 std::string s = (fileMap.size() > 1) ? "s" : "";
112 g_log.notice() << "Downloading " << fileMap.size() << " file" << s << " from the instrument repository\n";
113 }
114
115 for (auto &itMap : fileMap) {
116 // download a file
117 if (itMap.second.ends_with("Facilities.xml")) {
118 g_log.notice("A new Facilities.xml file has been downloaded, this will "
119 "take effect next time Mantid is started.");
120 } else {
121 g_log.information() << "Downloading \"" << itMap.second << "\" from \"" << itMap.first << "\"\n";
122 }
123 doDownloadFile(itMap.first, itMap.second);
125 }
126
127 setProperty("FileDownloadCount", static_cast<int>(fileMap.size()));
128}
129
130namespace {
131// Converts a json chunk to a url for the raw file contents.
132std::string getDownloadUrl(Json::Value &contents) {
133 std::string url = contents.get("download_url", "").asString();
134 if (url.empty()) { // guess it from html url
135 url = contents.get("html_url", "").asString();
136 if (url.empty())
137 throw std::runtime_error("Failed to find download link");
138 url = url + "?raw=1";
139 }
140
141 return url;
142}
143} // namespace
144
146 // get the instrument directories
147 auto instrumentDirs = Mantid::Kernel::ConfigService::Instance().getInstrumentDirectories();
148 std::filesystem::path installPath(instrumentDirs.back());
149 std::filesystem::create_directories(installPath);
150 std::filesystem::path localPath(instrumentDirs[0]);
151 std::filesystem::create_directories(localPath);
152
153 // get the date of the local github.json file if it exists
154 std::filesystem::path gitHubJsonFile = localPath / "github.json";
155 Poco::DateTime gitHubJsonDate(1900, 1, 1);
156 bool forceUpdate = this->getProperty("ForceUpdate");
157 if ((!forceUpdate) && std::filesystem::exists(gitHubJsonFile) && std::filesystem::is_regular_file(gitHubJsonFile)) {
158 auto ftime = std::filesystem::last_write_time(gitHubJsonFile);
159 auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(
160 ftime - std::filesystem::file_time_type::clock::now() + std::chrono::system_clock::now());
161 std::time_t cftime = std::chrono::system_clock::to_time_t(sctp);
162 gitHubJsonDate = Poco::Timestamp::fromEpochTime(cftime);
163 }
164
165 // get the file list from github
166 StringToStringMap headers;
167 headers.emplace("if-modified-since",
168 Poco::DateTimeFormatter::format(gitHubJsonDate, Poco::DateTimeFormat::HTTP_FORMAT));
169 std::string gitHubInstrumentRepoUrl = ConfigService::Instance().getString("UpdateInstrumentDefinitions.URL");
170 if (gitHubInstrumentRepoUrl.empty()) {
171 throw std::runtime_error("Property UpdateInstrumentDefinitions.URL is not defined, "
172 "this should point to the location of the instrument "
173 "directory in the github API "
174 "e.g. "
175 "https://api.github.com/repos/mantidproject/mantid/contents/"
176 "instrument.");
177 }
178 StringToStringMap fileMap;
179 try {
180 doDownloadFile(gitHubInstrumentRepoUrl, gitHubJsonFile.string(), headers);
181 } catch (Exception::InternetError &ex) {
182 if (ex.errorCode() == static_cast<int>(InternetHelper::HTTPStatus::NOT_MODIFIED)) {
183 // No changes since last time
184 return fileMap;
185 } else {
186 throw;
187 }
188 }
189
190 // update local repo files
191 std::filesystem::path installRepoFile = localPath / "install.json";
192 StringToStringMap installShas = getFileShas(installPath);
193 std::filesystem::path localRepoFile = localPath / "local.json";
194 StringToStringMap localShas = getFileShas(localPath);
195
196 // verify repo info was downloaded correctly
197 if (std::filesystem::file_size(gitHubJsonFile) == 0) {
198 std::stringstream msg;
199 msg << "Encountered empty file \"" << gitHubJsonFile.string() << "\" while determining what to download";
200 throw std::runtime_error(msg.str());
201 }
202
203 // Parse the server JSON response
204 ::Json::CharReaderBuilder readerBuilder;
205 Json::Value serverContents;
206 std::ifstream fileStream(gitHubJsonFile.string(), std::ios::in);
207
208 std::string errors;
209 Json::parseFromStream(readerBuilder, fileStream, &serverContents, &errors);
210 if (errors.size() != 0) {
211 throw std::runtime_error("Unable to parse server JSON file \"" + gitHubJsonFile.string() + "\"");
212 }
213 fileStream.close();
214
215 std::unordered_set<std::string> repoFilenames;
216
217 for (auto &serverElement : serverContents) {
218 std::string elementName = serverElement.get("name", "").asString();
219 repoFilenames.insert(elementName);
220 std::filesystem::path filePath = localPath / elementName;
221 if (filePath.extension().string() != ".xml")
222 continue;
223 std::string sha = serverElement.get("sha", "").asString();
224 std::string downloadUrl = getDownloadUrl(serverElement);
225
226 // Find shas
227 std::string localSha = getValueOrDefault(localShas, elementName, "");
228 std::string installSha = getValueOrDefault(installShas, elementName, "");
229 // Different sha1 on github cf local and global
230 // this will also catch when file is only present on github (as local sha
231 // will be "")
232 if ((sha != installSha) && (sha != localSha)) {
233 fileMap.emplace(downloadUrl,
234 filePath.string()); // ACTION - DOWNLOAD to localPath
235 } else if ((!localSha.empty()) && (sha == installSha) && (sha != localSha)) // matches install, but different local
236 {
237 fileMap.emplace(downloadUrl, filePath.string()); // ACTION - DOWNLOAD to
238 // localPath and
239 // overwrite
240 }
241 }
242
243 // remove any .xml files from the local appdata directory that are not present
244 // in the remote instrument repo
245 removeOrphanedFiles(localPath.string(), repoFilenames);
246
247 return fileMap;
248}
249
258 const std::string &key, const std::string &defaultValue) const {
259 auto element = mapping.find(key);
260 return (element != mapping.end()) ? element->second : defaultValue;
261}
262
268 StringToStringMap filesToSha;
269 try {
270 for (auto const &it : std::filesystem::directory_iterator{directoryPath}) {
271 auto const &entryPath = it.path();
272 if (entryPath.extension().string() != ".xml")
273 continue;
274 std::string sha1 = ChecksumHelper::gitSha1FromFile(entryPath.string());
275 // Track sha1
276 filesToSha.emplace(entryPath.filename().string(), sha1);
277 }
278 } catch (Poco::Exception &ex) {
279 g_log.error() << "DownloadInstrument: failed to parse the directory: " << directoryPath << " : " << ex.className()
280 << " : " << ex.displayText() << '\n';
281 // silently ignore this exception.
282 } catch (std::exception &ex) {
283 std::stringstream ss;
284 ss << "unknown exception while checking local file system. " << ex.what() << ". Input = " << directoryPath;
285 throw std::runtime_error(ss.str());
286 }
287
288 return filesToSha;
289}
290
296size_t DownloadInstrument::removeOrphanedFiles(const std::filesystem::path &directoryPath,
297 const std::unordered_set<std::string> &filenamesToKeep) const {
298 // hold files to delete in a set so we don't remove files while iterating over
299 // the directory.
300 std::vector<std::filesystem::path> filesToDelete;
301
302 try {
303 for (auto const &it : std::filesystem::directory_iterator{directoryPath}) {
304 auto const entryPath = it.path();
305 if (entryPath.extension().string() != ".xml")
306 continue;
307 if (filenamesToKeep.find(entryPath.filename().string()) == filenamesToKeep.end()) {
308 g_log.debug() << "File not found in remote instrument repository, will "
309 "be deleted: "
310 << entryPath.filename().string() << '\n';
311 filesToDelete.emplace_back(entryPath);
312 }
313 }
314 } catch (Poco::Exception &ex) {
315 g_log.error() << "DownloadInstrument: failed to list the directory: " << directoryPath << " : " << ex.className()
316 << " : " << ex.displayText() << '\n';
317 // silently ignore this exception.
318 } catch (std::exception &ex) {
319 std::stringstream ss;
320 ss << "unknown exception while checking local file system. " << ex.what() << ". Input = " << directoryPath;
321 throw std::runtime_error(ss.str());
322 }
323
324 // delete any identified files
325 try {
326 for (const auto &filepath : filesToDelete) {
327 std::filesystem::remove(filepath);
328 }
329 } catch (Poco::Exception &ex) {
330 g_log.error() << "DownloadInstrument: failed to delete file: " << ex.className() << " : " << ex.displayText()
331 << '\n';
332 // silently ignore this exception.
333 } catch (std::exception &ex) {
334 std::stringstream ss;
335 ss << "unknown exception while deleting file: " << ex.what();
336 throw std::runtime_error(ss.str());
337 }
338
339 g_log.debug() << filesToDelete.size() << " Files deleted.\n";
340
341 return filesToDelete.size();
342}
343
358 const std::string &localFilePath,
359 const StringToStringMap &headers) {
360 std::filesystem::path localFile(localFilePath);
361 if (std::filesystem::exists(localFile)) {
362 auto perms = std::filesystem::status(localFile).permissions();
363 if ((perms & std::filesystem::perms::owner_write) == std::filesystem::perms::none) {
364 std::stringstream msg;
365 msg << "Cannot write file \"" << localFilePath << "\"";
366 throw std::runtime_error(msg.str());
367 }
368 } else {
369 localFile = std::filesystem::path(localFilePath).parent_path();
370 auto perms = std::filesystem::status(localFile).permissions();
371 if ((perms & std::filesystem::perms::owner_write) == std::filesystem::perms::none) {
372 std::stringstream msg;
373 msg << "Cannot write file \"" << localFilePath << "\"";
374 throw std::runtime_error(msg.str());
375 }
376 }
377
378 GitHubApiHelper inetHelper;
379 inetHelper.headers().insert(headers.begin(), headers.end());
380 const auto retStatus = inetHelper.downloadFile(urlFile, localFilePath);
381 return retStatus;
382}
383
384} // namespace Mantid::DataHandling
#define DECLARE_ALGORITHM(classname)
Definition Algorithm.h:538
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
Kernel::Logger & g_log
Definition Algorithm.h:422
void interruption_point()
This is called during long-running operations, and check if the algorithm has requested that it be ca...
DownloadInstrument : Downloads one or more instrument files to the local instrument cache from the in...
void exec() override
Execute the algorithm.
StringToStringMap getFileShas(const std::filesystem::path &directoryPath)
Creates or updates the json file of a directories contents.
int version() const override
Algorithm's version for identification.
virtual Kernel::InternetHelper::HTTPStatus doDownloadFile(const std::string &urlFile, const std::string &localFilePath="", const StringToStringMap &headers=StringToStringMap())
Download a url and fetch it inside the local path given.
std::map< std::string, std::string > StringToStringMap
void init() override
Initialize the algorithm's properties.
const std::string name() const override
Algorithms name for identification.
std::string getValueOrDefault(const StringToStringMap &mapping, const std::string &key, const std::string &defaultValue) const
size_t removeOrphanedFiles(const std::filesystem::path &directoryPath, const std::unordered_set< std::string > &filenamesToKeep) const
removes any .xml files in a directory that are not in filenamesToKeep
const std::string summary() const override
Algorithm's summary for use in the GUI and help.
const std::string category() const override
Algorithm's category for identification.
Exception thrown when error occurs accessing an internet resource.
Definition Exception.h:321
const char * what() const noexcept override
Overloaded reporting method.
const int & errorCode() const
Writes out the range and limits.
GitHubApiHelper : A helper class for supporting access to the github api through HTTP and HTTPS,...
std::string getRateLimitDescription()
String describing the rate limit status.
IPropertyManager * setProperty(const std::string &name, const T &value)
Templated method to set the value of a PropertyWithValue.
StringToStringMap & headers()
Returns a reference to the headers map.
virtual HTTPStatus downloadFile(const std::string &urlFile, const std::string &localFilePath="")
Download a url and fetch it inside the local path given.
void debug(const std::string &msg)
Logs at debug level.
Definition Logger.cpp:145
void notice(const std::string &msg)
Logs at notice level.
Definition Logger.cpp:126
void error(const std::string &msg)
Logs at error level.
Definition Logger.cpp:108
void information(const std::string &msg)
Logs at information level.
Definition Logger.cpp:136
MANTID_KERNEL_DLL std::string gitSha1FromFile(const std::string &filepath)
create a git checksum from a file (these match the git hash-object command)
Describes the direction (within an algorithm) of a Property.
Definition Property.h:50
@ Output
An output workspace.
Definition Property.h:54