Mantid
Loading...
Searching...
No Matches
MergeMDFiles.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
17#include "MantidNexus/NexusFile.h"
18
19#include <Poco/File.h>
20#include <boost/scoped_ptr.hpp>
21
22using namespace Mantid::Kernel;
23using namespace Mantid::API;
24using namespace Mantid::DataObjects;
25
26namespace Mantid::MDAlgorithms {
27
28// Register the algorithm into the AlgorithmFactory
29DECLARE_ALGORITHM(MergeMDFiles)
30
31//----------------------------------------------------------------------------------------------
35 : m_nDims(0), m_MDEventType(), m_fileBasedTargetWS(false), m_Filenames(), m_EventLoader(), m_OutIWS(),
36 m_totalEvents(0), m_totalLoaded(0), m_fileMutex(), m_statsMutex() {}
37
38//----------------------------------------------------------------------------------------------
42
43//----------------------------------------------------------------------------------------------
44
45//----------------------------------------------------------------------------------------------
49 std::vector<std::string> exts(1, ".nxs");
50 declareProperty(std::make_unique<MultipleFileProperty>("Filenames", exts),
51 "Select several MDEventWorkspace NXS files to merge "
52 "together. Files must have common box structure.");
53
54 declareProperty(std::make_unique<FileProperty>("OutputFilename", "", FileProperty::OptionalSave, exts),
55 "Choose a file to which to save the output workspace. \n"
56 "Optional: if specified, the workspace created will be file-backed. \n"
57 "If not, it will be created in memory.");
58
59 declareProperty("Parallel", false,
60 "Run the loading tasks in parallel.\n"
61 "This can be faster but might use more memory.");
62
63 declareProperty(std::make_unique<WorkspaceProperty<IMDEventWorkspace>>("OutputWorkspace", "", Direction::Output),
64 "An output MDEventWorkspace.");
65}
66
67//----------------------------------------------------------------------------------------------
72 this->progress(0.05, "Loading File Info");
73 // Get plain box structure and box tree
74 std::vector<API::IMDNode *> &Boxes = m_BoxStruct.getBoxes();
75 std::vector<uint64_t> &targetEventIndexes = m_BoxStruct.getEventIndex();
76 // clear the averages for target event indexes;
77 targetEventIndexes.assign(targetEventIndexes.size(), 0);
78
79 // Total number of events in ALL files.
80 m_totalEvents = 0;
81
83 m_EventLoader.assign(m_Filenames.size(), nullptr);
84
85 try {
86 for (size_t i = 0; i < m_Filenames.size(); i++) {
87 // load box structure and the experimental info from each target
88 // workspace.
89 m_fileComponentsStructure[i].loadBoxStructure(m_Filenames[i], m_nDims, m_MDEventType, true, true);
90 // export just loaded experiment info to the target workspace
91 m_fileComponentsStructure[i].exportExperiment(m_OutIWS);
92
93 // Check for consistency
94 if (i > 0) {
95 if (m_fileComponentsStructure[i].getEventIndex().size() != targetEventIndexes.size())
96 throw std::runtime_error("Inconsistent number of boxes found in file " + m_Filenames[i] +
97 ". Cannot merge these files. Did you generate them all with "
98 "exactly the same box structure?");
99 }
100
101 // calculate total number of events per target cell, which will be
102 size_t nBoxes = Boxes.size();
103 for (size_t j = 0; j < nBoxes; j++) {
104 size_t ID = Boxes[j]->getID();
105 targetEventIndexes[2 * ID + 1] += m_fileComponentsStructure[i].getEventIndex()[2 * ID + 1];
106 m_totalEvents += m_fileComponentsStructure[i].getEventIndex()[2 * ID + 1];
107 }
108
109 // Open the event data, track the total number of events
110 auto bc = std::shared_ptr<API::BoxController>(new API::BoxController(static_cast<size_t>(m_nDims)));
111 bc->fromXMLString(m_fileComponentsStructure[i].getBCXMLdescr());
112
113 m_EventLoader[i] = new BoxControllerNeXusIO(bc.get());
114 m_EventLoader[i]->setDataType(sizeof(coord_t), m_MDEventType);
115 m_EventLoader[i]->openFile(m_Filenames[i], "r");
116 }
117 } catch (...) {
118 // Close all open files in case of error
120 throw;
121 }
122
123 const std::vector<int> &boxType = m_BoxStruct.getBoxType();
124 // calculate event positions in the target file.
125 uint64_t eventsStart = 0;
126 for (auto mdBox : Boxes) {
127 mdBox->clear();
128 size_t ID = mdBox->getID();
129
130 // avoid grid boxes;
131 if (boxType[ID] == 2)
132 continue;
133
134 uint64_t nEvents = targetEventIndexes[2 * ID + 1];
135 targetEventIndexes[ID * 2] = eventsStart;
137 mdBox->setFileBacked(eventsStart, nEvents, false);
138
139 eventsStart += nEvents;
140 }
141
142 g_log.notice() << m_totalEvents << " events in " << m_Filenames.size() << " files.\n";
143}
144
152 TargetBox->clear();
153
154 uint64_t nBoxEvents(0);
155 std::vector<size_t> numFileEvents(m_EventLoader.size());
156
157 for (size_t iw = 0; iw < this->m_EventLoader.size(); iw++) {
158 size_t ID = TargetBox->getID();
159 numFileEvents[iw] = static_cast<size_t>(m_fileComponentsStructure[iw].getEventIndex()[2 * ID + 1]);
160 nBoxEvents += numFileEvents[iw];
161 }
162
163 // At this point memory required is known, so it is reserved all in one go
164 TargetBox->reserveMemoryForLoad(nBoxEvents);
165
166 for (size_t iw = 0; iw < this->m_EventLoader.size(); iw++) {
167 size_t ID = TargetBox->getID();
168 uint64_t fileLocation = m_fileComponentsStructure[iw].getEventIndex()[2 * ID + 0];
169 if (numFileEvents[iw] == 0)
170 continue;
171 TargetBox->loadAndAddFrom(m_EventLoader[iw], fileLocation, numFileEvents[iw]);
172 }
173
174 return nBoxEvents;
175}
176
177//----------------------------------------------------------------------------------------------
186void MergeMDFiles::doExecByCloning(const Mantid::API::IMDEventWorkspace_sptr &ws, const std::string &outputFile) {
187 m_OutIWS = ws;
188 m_MDEventType = ws->getEventTypeName();
189
190 // Run the tasks in parallel? TODO: enable
191 // bool Parallel = this->getProperty("Parallel");
192
193 // Fix the box controller settings in the output workspace so that it splits
194 // normally
195 BoxController_sptr bc = ws->getBoxController();
196 // set up internal variables characterizing the workspace.
197 m_nDims = static_cast<int>(bc->getNDims());
198
199 // Fix the max depth to something bigger.
200 bc->setMaxDepth(20);
201 bc->setSplitThreshold(5000);
202 auto saver = std::shared_ptr<API::IBoxControllerIO>(new DataObjects::BoxControllerNeXusIO(bc.get()));
203 saver->setDataType(sizeof(coord_t), m_MDEventType);
205 bc->setFileBacked(saver, outputFile);
206 // Complete the file-back-end creation.
207 g_log.notice() << "Setting cache to 400 MB write.\n";
208 bc->getFileIO()->setWriteBufferSize(400000000 / m_OutIWS->sizeofEvent());
209 }
210
211 /* else
212 {
213 saver->openFile(outputFile,"w");
214 }*/
215 // Init box structure used for memory/file space calculations
216 m_BoxStruct.initFlatStructure(ws, outputFile);
217
218 // First, load all the box data and experiment info and calculate file
219 // positions of the target workspace
220 this->loadBoxData();
221
222 size_t numBoxes = m_BoxStruct.getNBoxes();
223 // Progress report based on events processed.
224 m_progress = std::make_unique<Progress>(this, 0.1, 0.9, size_t(numBoxes));
225 m_progress->setNotifyStep(0.1);
226
227 // For tracking progress
228 // uint64_t m_totalEventsInTasks = 0;
229
230 // Prepare thread pool
231 CPUTimer overallTime;
232
233 auto ts = new ThreadSchedulerFIFO();
234 ThreadPool tp(ts);
235
236 Kernel::DiskBuffer *DiskBuf(nullptr);
238 DiskBuf = bc->getFileIO();
239 }
240
241 this->m_totalLoaded = 0;
242 const std::vector<API::IMDNode *> &boxes = m_BoxStruct.getBoxes();
243
244 for (size_t ib = 0; ib < numBoxes; ib++) {
245 auto box = boxes[ib];
246 if (!box->isBox())
247 continue;
248 // load all contributed events into current box;
249 this->loadEventsFromSubBoxes(box);
250
251 if (DiskBuf) {
252 if (box->getDataInMemorySize() > 0) { // data position has been already pre-calculated
253 box->getISaveable()->save();
254 box->clearDataFromMemory();
255 // Kernel::ISaveable *Saver = box->getISaveable();
256 // DiskBuf->toWrite(Saver);
257 }
258 }
259 // else
260 //{ size_t ID = box->getID();
261 // uint64_t filePosition = targetEventIndexes[2*ID];
262 // box->saveAt(saver.get(), filePosition);
263 //}
264 //
265 // if (!Parallel)
266 //{
267 // // Run the task serially only
268 // task->run();
269 // delete task;
270 //}
271 // else
272 //{
273 // // Enqueue to run in parallel (at the joinAll() call below).
274 // ts->push(task);
275 //}
276
277 m_progress->reportIncrement(ib, "Loading and merging box data");
278 }
279 if (DiskBuf) {
280 DiskBuf->flushCache();
281 bc->getFileIO()->flushData();
282 }
284 // tp.joinAll();
285 g_log.information() << overallTime << " to do all the adding.\n";
286
287 // Close any open file handle
289
290 // Finish things up
291 this->finalizeOutput(outputFile);
292}
293
294//----------------------------------------------------------------------------------------------
296void MergeMDFiles::finalizeOutput(const std::string &outputFile) {
297 CPUTimer overallTime;
298
299 this->progress(0.90, "Refreshing Cache");
300 m_OutIWS->refreshCache();
301
302 g_log.information() << overallTime << " to run refreshCache().\n";
303
304 if (!outputFile.empty()) {
305 g_log.notice() << "Starting SaveMD to update the file back-end.\n";
306 // create or open WS group and put there additional information about WS and
307 // its dimensions
308 bool old_data_there;
309 boost::scoped_ptr<Nexus::File> file(
310 MDBoxFlatTree::createOrOpenMDWSgroup(outputFile, m_nDims, m_MDEventType, false, old_data_there));
311 this->progress(0.94, "Saving ws history and dimensions");
313 // Save each ExperimentInfo to a spot in the file
314 this->progress(0.98, "Saving experiment infos");
316
317 file->closeGroup();
318 file->close();
319 // -------------- Save Box Structure -------------------------------------
320 // OK, we've filled these big arrays of data representing flat box
321 // structure. Save them.
322 progress(0.91, "Writing Box Data");
323 m_progress->resetNumSteps(8, 0.92, 1.00);
324
325 // Save box structure;
326 m_BoxStruct.saveBoxStructure(outputFile);
327
328 g_log.information() << overallTime << " to run SaveMD structure\n";
329 }
330}
331
332//----------------------------------------------------------------------------------------------
336 // clear disk buffer which can remain from previous runs
337 // the existence/ usage of the buffer indicates if the algorithm works with
338 // file based or memory based target workspaces;
339 // pDiskBuffer = NULL;
340 MultipleFileProperty *multiFileProp = dynamic_cast<MultipleFileProperty *>(getPointerToProperty("Filenames"));
341 if (!multiFileProp) {
342 throw std::logic_error("Filenames property must have MultipleFileProperty type.");
343 }
344 m_Filenames = VectorHelper::flattenVector(multiFileProp->operator()());
345 if (m_Filenames.empty())
346 throw std::invalid_argument("Must specify at least one filename.");
347 std::string firstFile = m_Filenames[0];
348
349 std::string outputFile = getProperty("OutputFilename");
350 m_fileBasedTargetWS = false;
351 if (!outputFile.empty()) {
352 m_fileBasedTargetWS = true;
353 if (Poco::File(outputFile).exists())
354 throw std::invalid_argument(" File " + outputFile +
355 " already exists. Can not use existing file "
356 "as the target to MergeMD files.\n" +
357 " Use it as one of source files if you want to add MD data to it");
358 }
359
360 // Start by loading the first file but just the box structure, no events, and
361 // not file-backed
362 // m_BoxStruct.loadBoxStructure(firstFile,
363 auto loader = createChildAlgorithm("LoadMD", 0.0, 0.05, false);
364 loader->setPropertyValue("Filename", firstFile);
365 loader->setProperty("MetadataOnly", false);
366 loader->setProperty("BoxStructureOnly", true);
367 loader->setProperty("FileBackEnd", false);
368 loader->executeAsChildAlg();
369 IMDWorkspace_sptr result = (loader->getProperty("OutputWorkspace"));
370
371 auto firstWS = std::dynamic_pointer_cast<API::IMDEventWorkspace>(result);
372 if (!firstWS)
373 throw std::runtime_error("Can not load MDEventWorkspace from initial file " + firstFile);
374
375 // do the job
376 this->doExecByCloning(firstWS, outputFile);
377
378 m_OutIWS->setFileNeedsUpdating(false);
379
380 setProperty("OutputWorkspace", m_OutIWS);
381}
384 for (auto &loader : m_EventLoader) {
385 delete loader;
386 loader = nullptr;
387 }
388}
389
390} // namespace Mantid::MDAlgorithms
#define DECLARE_ALGORITHM(classname)
Definition Algorithm.h:538
static std::unique_ptr< QThreadPool > tp
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
Kernel::Property * getPointerToProperty(const std::string &name) const override
Get a property by name.
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
virtual std::shared_ptr< Algorithm > createChildAlgorithm(const std::string &name, const double startProgress=-1., const double endProgress=-1., const bool enableLogging=true, const int &version=-1)
Create a Child Algorithm.
Kernel::Logger & g_log
Definition Algorithm.h:422
void progress(double p, const std::string &msg="", double estimatedTime=0.0, int progressPrecision=0)
Sends ProgressNotification.
This class is used by MDBox and MDGridBox in order to intelligently determine optimal behavior.
@ OptionalSave
to specify a file to write to but an empty string is
virtual void loadAndAddFrom(API::IBoxControllerIO *const, uint64_t, size_t, std::vector< coord_t > &)=0
Load the additional box data of specified size from the disk location provided using the class,...
virtual void clear()=0
Clear all contained data including precalculated averages.
virtual size_t getID() const =0
virtual void reserveMemoryForLoad(uint64_t)=0
A property to allow a user to specify multiple files to load.
A property class for workspaces.
The class responsible for saving events into nexus file using generic box controller interface Expect...
std::vector< API::IMDNode * > & getBoxes()
void initFlatStructure(const API::IMDEventWorkspace_sptr &pws, const std::string &fileName)
convert MDWS box structure into flat structure used for saving/loading on hdd
std::vector< uint64_t > & getEventIndex()
const std::vector< int > & getBoxType() const
static void saveWSGenericInfo(Mantid::Nexus::File *const file, const API::IMDWorkspace_const_sptr &ws)
Save workspace generic info like dimension structure, history, title dimensions etc.
void saveBoxStructure(const std::string &fileName)
Save flat box structure into a file, defined by the file name.
static Mantid::Nexus::File * createOrOpenMDWSgroup(const std::string &fileName, int &nDims, const std::string &WSEventType, bool readOnly, bool &alreadyExists)
The function to create a NeXus MD workspace group with specified events type and number of dimensions...
static void saveExperimentInfos(Mantid::Nexus::File *const file, const API::IMDEventWorkspace_const_sptr &ws)
Save each NEW ExperimentInfo to a spot in the file.
CPUTimer : Timer that uses the CPU time, rather than wall-clock time to measure execution time.
Definition CPUTimer.h:24
Buffer objects that need to be written out to disk so as to optimize writing operations.
Definition DiskBuffer.h:42
void flushCache()
Flush out all the data in the memory; and writes out everything in the to-write cache.
IPropertyManager * setProperty(const std::string &name, const T &value)
Templated method to set the value of a PropertyWithValue.
void notice(const std::string &msg)
Logs at notice level.
Definition Logger.cpp:126
void information(const std::string &msg)
Logs at information level.
Definition Logger.cpp:136
A Thread Pool implementation that keeps a certain number of threads running (normally,...
Definition ThreadPool.h:36
A First-In-First-Out Thread Scheduler.
Algorithm to merge multiple MDEventWorkspaces from files that obey a common box format.
std::string m_MDEventType
string describes type of the event, stored in the workspaces.
void doExecByCloning(const Mantid::API::IMDEventWorkspace_sptr &ws, const std::string &outputFile)
Perform the merging, but clone the initial workspace and use the same splitting as its structure is e...
std::vector< std::string > m_Filenames
Files to load.
std::vector< API::IBoxControllerIO * > m_EventLoader
Vector of file handles to each input file //TODO unique?
uint64_t loadEventsFromSubBoxes(API::IMDNode *TargetBox)
Task that loads all of the events from corresponded boxes of all files that is being merged into a pa...
void finalizeOutput(const std::string &outputFile)
Now re-save the MDEventWorkspace to update the file back end.
std::unique_ptr< Mantid::API::Progress > m_progress
Progress reporter.
int m_nDims
number of workspace dimensions
~MergeMDFiles() override
Destructor.
bool m_fileBasedTargetWS
if the workspace is indeed file-based
DataObjects::MDBoxFlatTree m_BoxStruct
std::vector< DataObjects::MDBoxFlatTree > m_fileComponentsStructure
void exec() override
Run the algorithm.
Mantid::API::IMDEventWorkspace_sptr m_OutIWS
Output IMDEventWorkspace.
void clearEventLoaders()
Set to true if the output is cloned of the first one.
void init() override
Initialise the properties.
void loadBoxData()
Loads all of the box data required (no events) for later use.
std::shared_ptr< IMDEventWorkspace > IMDEventWorkspace_sptr
Shared pointer to Mantid::API::IMDEventWorkspace.
std::shared_ptr< IMDWorkspace > IMDWorkspace_sptr
Shared pointer to the IMDWorkspace base class.
std::shared_ptr< BoxController > BoxController_sptr
Shared ptr to BoxController.
bool exists(Nexus::File &file, const std::string &name)
std::vector< T > flattenVector(const std::vector< std::vector< T > > &v)
A convenience function to "flatten" the given vector of vectors into a single vector.
float coord_t
Typedef for the data type to use for coordinate axes in MD objects such as MDBox, MDEventWorkspace,...
Definition MDTypes.h:27
@ Output
An output workspace.
Definition Property.h:54