Mantid
Loading...
Searching...
No Matches
DetermineChunking.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
8#include "LoadRaw/isisraw.h"
11#include "MantidAPI/TableRow.h"
19#include "MantidKernel/System.h"
21
22// clang-format off
23#include <nexus/NeXusFile.hpp>
24#include <nexus/NeXusException.hpp>
25// clang-format on
26
27#ifdef MPI_BUILD
28#include <boost/mpi.hpp>
29namespace mpi = boost::mpi;
30#endif
31
32#include <Poco/File.h>
33#include <exception>
34#include <fstream>
35#include <set>
36
37using namespace ::NeXus;
38using namespace Mantid::Kernel;
39using namespace Mantid::API;
40using std::map;
41using std::size_t;
42using std::string;
43using std::vector;
44
45namespace Mantid::DataHandling {
46const int NUM_EXT_PRENEXUS(1);
48const std::string PRENEXUS_EXT[NUM_EXT_PRENEXUS] = {"_runinfo.xml"};
49const int NUM_EXT_EVENT_NEXUS(3);
51const std::string EVENT_NEXUS_EXT[NUM_EXT_EVENT_NEXUS] = {"_event.nxs", ".nxs", ".nxs.h5"};
52const int NUM_EXT_HISTO_NEXUS(1);
54const std::string HISTO_NEXUS_EXT[NUM_EXT_HISTO_NEXUS] = {"_histo.nxs"};
55const int NUM_EXT_RAW(1);
57const std::string RAW_EXT[NUM_EXT_RAW] = {".raw"};
58
59// Register the algorithm into the AlgorithmFactory
61
62namespace {
63constexpr double BYTES_TO_GiB = 1. / 1024. / 1024. / 1024.;
64}
65
66//----------------------------------------------------------------------------------------------
68const std::string DetermineChunking::name() const { return "DetermineChunking"; }
69
71int DetermineChunking::version() const { return 1; }
72
74const std::string DetermineChunking::category() const { return "DataHandling\\PreNexus;Workflow\\DataHandling"; }
75
76//----------------------------------------------------------------------------------------------
79 // runfile to read in
80 std::set<std::string> exts_set;
81 exts_set.insert(PRENEXUS_EXT, PRENEXUS_EXT + NUM_EXT_PRENEXUS);
84 exts_set.insert(RAW_EXT, RAW_EXT + NUM_EXT_RAW);
85 std::vector<std::string> exts(exts_set.begin(), exts_set.end());
86 this->declareProperty(std::make_unique<FileProperty>("Filename", "", FileProperty::Load, exts),
87 "The name of the event nexus, runinfo.xml, raw, or histo nexus file to "
88 "read, including its full or relative path. The Event NeXus file name is "
89 "typically of the form INST_####_event.nxs (N.B. case sensitive if "
90 "running on Linux).");
91
92 auto mustBePositive = std::make_shared<BoundedValidator<double>>();
93 mustBePositive->setLower(0.0);
94 declareProperty("MaxChunkSize", EMPTY_DBL(), mustBePositive,
95 "Get chunking strategy for chunks with this number of "
96 "Gbytes. File will not be loaded if this option is set.");
97
98 declareProperty(std::make_unique<WorkspaceProperty<API::ITableWorkspace>>("OutputWorkspace", "", Direction::Output),
99 "An output workspace.");
100}
101
102//----------------------------------------------------------------------------------------------
105 // get the chunking parameter and fix it up
106 double maxChunk = this->getProperty("MaxChunkSize");
107 if (maxChunk == 0) {
108 g_log.debug() << "Converting maxChunk=0 to maxChunk=EMPTY_DBL\n";
109 maxChunk = EMPTY_DBL();
110 }
111
112 // get the filename and determine the file type
113 int m_numberOfSpectra = 0;
114 string filename = this->getPropertyValue("Filename");
115 FileType fileType = getFileType(filename);
116
117 // setup the chunking table with the correct column headings
118 Mantid::API::ITableWorkspace_sptr strategy = Mantid::API::WorkspaceFactory::Instance().createTable("TableWorkspace");
119 if (fileType == PRENEXUS_FILE || fileType == EVENT_NEXUS_FILE) {
120 strategy->addColumn("int", "ChunkNumber");
121 strategy->addColumn("int", "TotalChunks");
122 } else if (fileType == RAW_FILE || fileType == HISTO_NEXUS_FILE) {
123 strategy->addColumn("int", "SpectrumMin");
124 strategy->addColumn("int", "SpectrumMax");
125 }
126 this->setProperty("OutputWorkspace", strategy);
127
128#ifndef MPI_BUILD
129 // mpi needs work for every core, so don't do this
130 if (maxChunk == 0 || isEmpty(maxChunk)) {
131 return;
132 }
133#endif
134
135 Poco::File fileinfo(filename);
136 const double fileSizeGiB = static_cast<double>(fileinfo.getSize()) * BYTES_TO_GiB;
137
138#ifndef MPI_BUILD
139 // don't bother opening the file if its size is "small"
140 // note that prenexus "_runinfo.xml" files don't represent what
141 // is actually loaded
142 if (fileType != PRENEXUS_FILE && 6. * fileSizeGiB < maxChunk)
143 return;
144#endif
145
146 // --------------------- DETERMINE NUMBER OF CHUNKS
147 double wkspSizeGiB = 0;
148 // PreNexus
149 if (fileType == PRENEXUS_FILE) {
150 vector<string> eventFilenames;
151 string dataDir;
152 LoadPreNexus lp;
153 lp.parseRuninfo(filename, dataDir, eventFilenames);
154 for (auto &eventFilename : eventFilenames) {
155 BinaryFile<DasEvent> eventfile(dataDir + eventFilename);
156 // Factor of 2 for compression
157 wkspSizeGiB += static_cast<double>(eventfile.getNumElements()) * 48.0 * BYTES_TO_GiB;
158 }
159 }
160 // Event Nexus
161 else if (fileType == EVENT_NEXUS_FILE) {
162
163 // top level file information
164 ::NeXus::File file(filename);
165 std::string m_top_entry_name = setTopEntryName(filename);
166
167 // Start with the base entry
168 file.openGroup(m_top_entry_name, "NXentry");
169
170 // Now we want to go through all the bankN_event entries
171 map<string, string> entries = file.getEntries();
172 map<string, string>::const_iterator it = entries.begin();
173 std::string classType = "NXevent_data";
174 size_t total_events = 0;
175 for (; it != entries.end(); ++it) {
176 std::string entry_name(it->first);
177 std::string entry_class(it->second);
178 if (entry_class == classType) {
179 if (!isEmpty(maxChunk)) {
180 try {
181 // Get total number of events for each bank
182 file.openGroup(entry_name, entry_class);
183 file.openData("total_counts");
184 if (file.getInfo().type == NX_UINT64) {
185 std::vector<uint64_t> bank_events;
186 file.getData(bank_events);
187 total_events += bank_events[0];
188 } else {
189 std::vector<int> bank_events;
190 file.getDataCoerce(bank_events);
191 total_events += bank_events[0];
192 }
193 file.closeData();
194 file.closeGroup();
195 } catch (::NeXus::Exception &) {
196 g_log.error() << "Unable to find total counts to determine "
197 "chunking strategy.\n";
198 }
199 }
200 }
201 }
202
203 // Close up the file
204 file.closeGroup();
205 file.close();
206 // Factor of 2 for compression
207 wkspSizeGiB = static_cast<double>(total_events) * 48.0 * BYTES_TO_GiB;
208 } else if (fileType == RAW_FILE) {
209 // Check the size of the file loaded
210 wkspSizeGiB = fileSizeGiB * 24.0;
211 g_log.notice() << "Wksp size is " << wkspSizeGiB << " GB\n";
212
213 LoadRawHelper helper;
214 FILE *file = helper.openRawFile(filename);
215 ISISRAW iraw;
216 iraw.ioRAW(file, true);
217
218 // Read in the number of spectra in the RAW file
219 m_numberOfSpectra = iraw.t_nsp1;
220 g_log.notice() << "Spectra size is " << m_numberOfSpectra << " spectra\n";
221 fclose(file);
222 }
223 // Histo Nexus
224 else if (fileType == HISTO_NEXUS_FILE) {
225 // Check the size of the file loaded
226 wkspSizeGiB = fileSizeGiB * 144.0;
227 g_log.notice() << "Wksp size is " << wkspSizeGiB << " GB\n";
229 lp.m_signalNo = 1;
230 // Find the entry name we want.
231 std::string entry_name = LoadTOFRawNexus::getEntryName(filename);
232 std::vector<std::string> bankNames;
233 lp.countPixels(filename, entry_name, bankNames);
234 m_numberOfSpectra = static_cast<int>(lp.m_numPixels);
235 g_log.notice() << "Spectra size is " << m_numberOfSpectra << " spectra\n";
236 } else {
237 throw(std::invalid_argument("unsupported file type"));
238 }
239
240 int numChunks = 0;
241 if (maxChunk != 0.0) // protect from divide by zero
242 {
243 numChunks = static_cast<int>(wkspSizeGiB / maxChunk);
244 }
245
246 numChunks++; // So maxChunkSize is not exceeded
247 if (numChunks <= 1 || isEmpty(maxChunk)) {
248#ifdef MPI_BUILD
249 numChunks = 1;
250#else
251 g_log.information() << "Everything can be done in a single chunk returning empty table\n";
252 return;
253#endif
254 }
255
256// --------------------- FILL IN THE CHUNKING TABLE
257#ifdef MPI_BUILD
258 // use all cores so number of chunks should be a multiple of cores
259 if (mpi::communicator().size() > 1) {
260 int imult = numChunks / mpi::communicator().size() + 1;
261 numChunks = imult * mpi::communicator().size();
262 }
263#endif
264
265 for (int i = 1; i <= numChunks; i++) {
266#ifdef MPI_BUILD
267 if (mpi::communicator().size() > 1) {
268 // chunk 1 should go to rank=0, chunk 2 to rank=1, etc.
269 if ((i - 1) % mpi::communicator().size() != mpi::communicator().rank())
270 continue;
271 }
272#endif
273 Mantid::API::TableRow row = strategy->appendRow();
274 if (fileType == PRENEXUS_FILE || fileType == EVENT_NEXUS_FILE) {
275 row << i << numChunks;
276 } else if (fileType == RAW_FILE || fileType == HISTO_NEXUS_FILE) {
277 int spectraPerChunk = m_numberOfSpectra / numChunks;
278 int first = (i - 1) * spectraPerChunk + 1;
279 int last = first + spectraPerChunk - 1;
280 if (i == numChunks)
281 last = m_numberOfSpectra;
282 row << first << last;
283 }
284 }
285}
286
288std::string DetermineChunking::setTopEntryName(const std::string &filename) {
289 std::string top_entry_name;
290 using string_map_t = std::map<std::string, std::string>;
291 try {
292 string_map_t::const_iterator it;
293 ::NeXus::File file = ::NeXus::File(filename);
294 string_map_t entries = file.getEntries();
295
296 // Choose the first entry as the default
297 top_entry_name = entries.begin()->first;
298
299 for (it = entries.begin(); it != entries.end(); ++it) {
300 if (((it->first == "entry") || (it->first == "raw_data_1")) && (it->second == "NXentry")) {
301 top_entry_name = it->first;
302 break;
303 }
304 }
305 } catch (const std::exception &) {
306 g_log.error() << "Unable to determine name of top level NXentry - assuming "
307 "\"entry\".\n";
308 top_entry_name = "entry";
309 }
310 return top_entry_name;
311}
312
321 // check for prenexus
322 for (const auto &extension : PRENEXUS_EXT) {
323 if (filename.find(extension) != std::string::npos) {
324 g_log.information() << "Determined \'" << filename << "\' is a prenexus file\n";
325 return PRENEXUS_FILE;
326 }
327 }
328
329 // check for histogram nexus
330 for (const auto &extension : HISTO_NEXUS_EXT) {
331 if (filename.find(extension) != std::string::npos) {
332 g_log.information() << "Determined \'" << filename << "\' is a histogram nexus file\n";
333 return HISTO_NEXUS_FILE;
334 }
335 }
336
337 // check for event nexus - must be last because a valid extension is ".nxs"
338 for (const auto &extension : EVENT_NEXUS_EXT) {
339 if (filename.find(extension) != std::string::npos) {
340 g_log.information() << "Determined \'" << filename << "\' is an event nexus file\n";
341 return EVENT_NEXUS_FILE;
342 }
343 }
344
345 // check for isis raw files
346 for (const auto &extension : RAW_EXT) {
347 if (filename.find(extension) != std::string::npos) {
348 g_log.information() << "Determined \'" << filename << "\' is an ISIS raw file\n";
349 return RAW_FILE;
350 }
351 }
352
353 throw std::invalid_argument("Unsupported file type");
354}
355} // namespace Mantid::DataHandling
#define DECLARE_ALGORITHM(classname)
Definition: Algorithm.h:576
isis raw file.
Definition: isisraw.h:272
int t_nsp1
number of spectra in time regime 1
Definition: isisraw.h:329
virtual int ioRAW(FILE *file, bool from_file, bool read_data=true)
stuff
Definition: isisraw.cpp:401
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
Definition: Algorithm.cpp:1913
std::string getPropertyValue(const std::string &name) const override
Get the value of a property as a string.
Definition: Algorithm.cpp:2026
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
Definition: Algorithm.cpp:2076
Kernel::Logger & g_log
Definition: Algorithm.h:451
static bool isEmpty(const NumT toCheck)
checks that the value was not set by users, uses the value in empty double/int.
@ Load
allowed here which will be passed to the algorithm
Definition: FileProperty.h:52
TableRow represents a row in a TableWorkspace.
Definition: TableRow.h:39
A property class for workspaces.
void init() override
Virtual method - must be overridden by concrete algorithm.
int version() const override
function to return a version of the algorithm, must be overridden in all algorithms
void exec() override
Virtual method - must be overridden by concrete algorithm.
const std::string name() const override
function to return a name of the algorithm, must be overridden in all algorithms
FileType getFileType(const std::string &filename)
Determine the file type using the filename.
const std::string category() const override
function to return a category of the algorithm.
std::string setTopEntryName(const std::string &filename)
set the name of the top level NXentry m_top_entry_name
LoadPreNexus : Workflow algorithm to load a collection of preNeXus files.
Definition: LoadPreNexus.h:23
void parseRuninfo(const std::string &runinfo, std::string &dataDir, std::vector< std::string > &eventFilenames)
Parse the runinfo file to find the names of the neutron event files.
Helper class for LoadRaw algorithms.
Definition: LoadRawHelper.h:40
FILE * openRawFile(const std::string &fileName)
Opens Raw File.
Loads a NeXus file that conforms to the TOFRaw instrument definition format and stores it in a 2D wor...
int m_signalNo
Signal # to load. Default 1.
static std::string getEntryName(const std::string &filename)
void countPixels(const std::string &nexusfilename, const std::string &entry_name, std::vector< std::string > &bankNames)
Goes thoguh a histogram NXS file and counts the number of pixels.
size_t m_numPixels
Number of pixels.
The BinaryFile template is a helper function for loading simple binary files.
Definition: BinaryFile.h:44
size_t getNumElements() const
Returns the # of elements in the file (cached result of getFileSize)
Definition: BinaryFile.h:86
IPropertyManager * setProperty(const std::string &name, const T &value)
Templated method to set the value of a PropertyWithValue.
void debug(const std::string &msg)
Logs at debug level.
Definition: Logger.cpp:114
void notice(const std::string &msg)
Logs at notice level.
Definition: Logger.cpp:95
void error(const std::string &msg)
Logs at error level.
Definition: Logger.cpp:77
void information(const std::string &msg)
Logs at information level.
Definition: Logger.cpp:105
static T & Instance()
Return a reference to the Singleton instance, creating it if it does not already exist Creation is do...
ISIS VMS raw file definitions.
std::shared_ptr< ITableWorkspace > ITableWorkspace_sptr
shared pointer to Mantid::API::ITableWorkspace
const int NUM_EXT_HISTO_NEXUS(1)
Number of histogram nexus extensions.
const int NUM_EXT_RAW(1)
Number of raw file extensions.
const std::string HISTO_NEXUS_EXT[NUM_EXT_HISTO_NEXUS]
Valid extensions for histogram nexus files.
const std::string PRENEXUS_EXT[NUM_EXT_PRENEXUS]
Valid extensions for prenexus files.
const int NUM_EXT_EVENT_NEXUS(3)
Number of event nexus extensions.
const std::string RAW_EXT[NUM_EXT_RAW]
Valid extensions for ISIS raw files.
FileType
Allowed file types.
@ RAW_FILE
ISIS raw files.
@ PRENEXUS_FILE
PreNeXus files.
@ HISTO_NEXUS_FILE
Histogram NeXus files.
@ EVENT_NEXUS_FILE
Event NeXus files.
const std::string EVENT_NEXUS_EXT[NUM_EXT_EVENT_NEXUS]
Valid extensions for event nexus files.
const int NUM_EXT_PRENEXUS(1)
Number of prenexus extensions.
constexpr double EMPTY_DBL() noexcept
Returns what we consider an "empty" double within a property.
Definition: EmptyValues.h:43
@ Output
An output workspace.
Definition: Property.h:54