Mantid
Loading...
Searching...
No Matches
DetermineChunking.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
8#include "LoadRaw/isisraw.h"
11#include "MantidAPI/TableRow.h"
19#include "MantidNexus/NexusFile.h"
20
21#include <exception>
22#include <filesystem>
23#include <set>
24#include <vector>
25
26using namespace Mantid::Kernel;
27using namespace Mantid::API;
28using std::map;
29using std::size_t;
30using std::string;
31using std::vector;
32
33namespace Mantid::DataHandling {
35const std::vector<std::string> PRENEXUS_EXT = {"_runinfo.xml"};
37const std::vector<std::string> EVENT_NEXUS_EXT = {"_event.nxs", ".nxs", ".nxs.h5"};
39const std::vector<std::string> HISTO_NEXUS_EXT = {"_histo.nxs"};
41const std::vector<std::string> RAW_EXT = {".raw"};
42
43// Register the algorithm into the AlgorithmFactory
45
46namespace {
47constexpr double BYTES_TO_GiB = 1. / 1024. / 1024. / 1024.;
48}
49
50//----------------------------------------------------------------------------------------------
52const std::string DetermineChunking::name() const { return "DetermineChunking"; }
53
55int DetermineChunking::version() const { return 1; }
56
58const std::string DetermineChunking::category() const { return "DataHandling\\PreNexus;Workflow\\DataHandling"; }
59
60//----------------------------------------------------------------------------------------------
63 // runfile to read in
64 std::set<std::string> exts_set;
65 exts_set.insert(PRENEXUS_EXT.cbegin(), PRENEXUS_EXT.cend());
66 exts_set.insert(EVENT_NEXUS_EXT.cbegin(), EVENT_NEXUS_EXT.cend());
67 exts_set.insert(HISTO_NEXUS_EXT.cbegin(), HISTO_NEXUS_EXT.cend());
68 exts_set.insert(RAW_EXT.cbegin(), RAW_EXT.end());
69 std::vector<std::string> exts(exts_set.begin(), exts_set.end());
70 this->declareProperty(std::make_unique<FileProperty>("Filename", "", FileProperty::Load, exts),
71 "The name of the event nexus, runinfo.xml, raw, or histo nexus file to "
72 "read, including its full or relative path. The Event NeXus file name is "
73 "typically of the form INST_####_event.nxs (N.B. case sensitive if "
74 "running on Linux).");
75
76 auto mustBePositive = std::make_shared<BoundedValidator<double>>();
77 mustBePositive->setLower(0.0);
78 declareProperty("MaxChunkSize", EMPTY_DBL(), mustBePositive,
79 "Get chunking strategy for chunks with this number of "
80 "Gbytes. File will not be loaded if this option is set.");
81
82 declareProperty(std::make_unique<WorkspaceProperty<API::ITableWorkspace>>("OutputWorkspace", "", Direction::Output),
83 "An output workspace.");
84}
85
86//----------------------------------------------------------------------------------------------
89 // get the chunking parameter and fix it up
90 double maxChunk = this->getProperty("MaxChunkSize");
91 if (maxChunk == 0) {
92 g_log.debug() << "Converting maxChunk=0 to maxChunk=EMPTY_DBL\n";
93 maxChunk = EMPTY_DBL();
94 }
95
96 // get the filename and determine the file type
97 int m_numberOfSpectra = 0;
98 string filename = this->getPropertyValue("Filename");
99 FileType fileType = getFileType(filename);
100
101 // setup the chunking table with the correct column headings
102 Mantid::API::ITableWorkspace_sptr strategy = Mantid::API::WorkspaceFactory::Instance().createTable("TableWorkspace");
103 if (fileType == PRENEXUS_FILE || fileType == EVENT_NEXUS_FILE) {
104 strategy->addColumn("int", "ChunkNumber");
105 strategy->addColumn("int", "TotalChunks");
106 } else if (fileType == RAW_FILE || fileType == HISTO_NEXUS_FILE) {
107 strategy->addColumn("int", "SpectrumMin");
108 strategy->addColumn("int", "SpectrumMax");
109 }
110 this->setProperty("OutputWorkspace", strategy);
111
112 if (maxChunk == 0 || isEmpty(maxChunk)) {
113 return;
114 }
115
116 std::filesystem::path fileinfo(filename);
117 const double fileSizeGiB = static_cast<double>(std::filesystem::file_size(fileinfo)) * BYTES_TO_GiB;
118
119 // don't bother opening the file if its size is "small"
120 // note that prenexus "_runinfo.xml" files don't represent what
121 // is actually loaded
122 if (fileType != PRENEXUS_FILE && 6. * fileSizeGiB < maxChunk)
123 return;
124
125 // --------------------- DETERMINE NUMBER OF CHUNKS
126 double wkspSizeGiB = 0;
127 // PreNexus
128 if (fileType == PRENEXUS_FILE) {
129 vector<string> eventFilenames;
130 string dataDir;
131 LoadPreNexus lp;
132 lp.parseRuninfo(filename, dataDir, eventFilenames);
133 std::filesystem::path dataPath(dataDir);
134 for (const auto &eventFilename : eventFilenames) {
135 BinaryFile<DasEvent> eventfile(dataPath / eventFilename);
136 // Factor of 2 for compression
137 wkspSizeGiB += static_cast<double>(eventfile.getNumElements()) * 48.0 * BYTES_TO_GiB;
138 }
139 }
140 // Event Nexus
141 else if (fileType == EVENT_NEXUS_FILE) {
142
143 // top level file information
144 Nexus::File file(filename);
145 std::string m_top_entry_name = setTopEntryName(filename);
146
147 // Start with the base entry
148 file.openGroup(m_top_entry_name, "NXentry");
149
150 // Now we want to go through all the bankN_event entries
151 map<string, string> entries = file.getEntries();
152 map<string, string>::const_iterator it = entries.begin();
153 std::string classType = "NXevent_data";
154 size_t total_events = 0;
155 for (; it != entries.end(); ++it) {
156 const std::string entry_class(it->second);
157 if (entry_class == classType) {
158 if (!isEmpty(maxChunk)) {
159 try {
160 const std::string entry_name(it->first);
161 // Get total number of events for each bank
162 file.openGroup(entry_name, entry_class);
163 file.openData("total_counts");
164 if (file.getInfo().type == NXnumtype::UINT64) {
165 std::vector<uint64_t> bank_events;
166 file.getData(bank_events);
167 total_events += bank_events[0];
168 } else {
169 std::vector<int> bank_events;
170 file.getDataCoerce(bank_events);
171 total_events += bank_events[0];
172 }
173 file.closeData();
174 file.closeGroup();
175 } catch (Nexus::Exception const &) {
176 g_log.error() << "Unable to find total counts to determine "
177 "chunking strategy.\n";
178 }
179 }
180 }
181 }
182
183 // Close up the file
184 file.closeGroup();
185 file.close();
186 // Factor of 2 for compression
187 wkspSizeGiB = static_cast<double>(total_events) * 48.0 * BYTES_TO_GiB;
188 } else if (fileType == RAW_FILE) {
189 // Check the size of the file loaded
190 wkspSizeGiB = fileSizeGiB * 24.0;
191 g_log.notice() << "Wksp size is " << wkspSizeGiB << " GB\n";
192
193 LoadRawHelper helper;
194 FILE *file = helper.openRawFile(filename);
195 ISISRAW iraw;
196 iraw.ioRAW(file, true);
197
198 // Read in the number of spectra in the RAW file
199 m_numberOfSpectra = iraw.t_nsp1;
200 g_log.notice() << "Spectra size is " << m_numberOfSpectra << " spectra\n";
201 fclose(file);
202 }
203 // Histo Nexus
204 else if (fileType == HISTO_NEXUS_FILE) {
205 // Check the size of the file loaded
206 wkspSizeGiB = fileSizeGiB * 144.0;
207 g_log.notice() << "Wksp size is " << wkspSizeGiB << " GB\n";
209 lp.m_signalNo = 1;
210 // Find the entry name we want.
211 std::string entry_name = LoadTOFRawNexus::getEntryName(filename);
212 std::vector<std::string> bankNames;
213 lp.countPixels(filename, entry_name, bankNames);
214 m_numberOfSpectra = static_cast<int>(lp.m_numPixels);
215 g_log.notice() << "Spectra size is " << m_numberOfSpectra << " spectra\n";
216 } else {
217 throw(std::invalid_argument("unsupported file type"));
218 }
219
220 int numChunks = 0;
221 if (maxChunk != 0.0) // protect from divide by zero
222 {
223 numChunks = static_cast<int>(wkspSizeGiB / maxChunk);
224 }
225
226 numChunks++; // So maxChunkSize is not exceeded
227 if (numChunks <= 1 || isEmpty(maxChunk)) {
228 g_log.information() << "Everything can be done in a single chunk returning empty table\n";
229 return;
230 }
231
232 // --------------------- FILL IN THE CHUNKING TABLE
233 for (int i = 1; i <= numChunks; i++) {
234 Mantid::API::TableRow row = strategy->appendRow();
235 if (fileType == PRENEXUS_FILE || fileType == EVENT_NEXUS_FILE) {
236 row << i << numChunks;
237 } else if (fileType == RAW_FILE || fileType == HISTO_NEXUS_FILE) {
238 int spectraPerChunk = m_numberOfSpectra / numChunks;
239 int first = (i - 1) * spectraPerChunk + 1;
240 int last = first + spectraPerChunk - 1;
241 if (i == numChunks)
242 last = m_numberOfSpectra;
243 row << first << last;
244 }
245 }
246}
247
249std::string DetermineChunking::setTopEntryName(const std::string &filename) {
250 std::string top_entry_name;
251 using string_map_t = std::map<std::string, std::string>;
252 try {
253 string_map_t::const_iterator it;
254 Nexus::File file = Nexus::File(filename);
255 string_map_t entries = file.getEntries();
256
257 // Choose the first entry as the default
258 top_entry_name = entries.begin()->first;
259
260 for (it = entries.begin(); it != entries.end(); ++it) {
261 if (((it->first == "entry") || (it->first == "raw_data_1")) && (it->second == "NXentry")) {
262 top_entry_name = it->first;
263 break;
264 }
265 }
266 } catch (const std::exception &) {
267 g_log.error() << "Unable to determine name of top level NXentry - assuming "
268 "\"entry\".\n";
269 top_entry_name = "entry";
270 }
271 return top_entry_name;
272}
273
282 // check for prenexus
283 if (filenameHasExtension(filename, PRENEXUS_EXT)) {
284 g_log.information() << "Determined \'" << filename << "\' is a prenexus file\n";
285 return PRENEXUS_FILE;
286 }
287
288 // check for histogram nexus
289 if (filenameHasExtension(filename, HISTO_NEXUS_EXT)) {
290 g_log.information() << "Determined \'" << filename << "\' is a histogram nexus file\n";
291 return HISTO_NEXUS_FILE;
292 }
293
294 // check for event nexus - must be last because a valid extension is ".nxs"
295 if (filenameHasExtension(filename, EVENT_NEXUS_EXT)) {
296 g_log.information() << "Determined \'" << filename << "\' is an event nexus file\n";
297 return EVENT_NEXUS_FILE;
298 }
299
300 // check for isis raw files
301 if (filenameHasExtension(filename, RAW_EXT)) {
302 g_log.information() << "Determined \'" << filename << "\' is an ISIS raw file\n";
303 return RAW_FILE;
304 }
305
306 throw std::invalid_argument("Unsupported file type");
307}
308
309bool DetermineChunking::filenameHasExtension(const std::string &filename,
310 const std::vector<std::string> &fileExtensions) {
311 return std::any_of(fileExtensions.cbegin(), fileExtensions.cend(),
312 [&filename](const auto &extension) { return filename.find(extension) != std::string::npos; });
313}
314
315} // namespace Mantid::DataHandling
#define DECLARE_ALGORITHM(classname)
Definition Algorithm.h:538
isis raw file.
Definition isisraw.h:267
int t_nsp1
number of spectra in time regime 1
Definition isisraw.h:324
virtual int ioRAW(FILE *file, bool from_file, bool read_data=true)
stuff
Definition isisraw.cpp:401
void declareProperty(std::unique_ptr< Kernel::Property > p, const std::string &doc="") override
Add a property to the list of managed properties.
std::string getPropertyValue(const std::string &name) const override
Get the value of a property as a string.
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
Kernel::Logger & g_log
Definition Algorithm.h:422
static bool isEmpty(const NumT toCheck)
checks that the value was not set by users, uses the value in empty double/int.
@ Load
allowed here which will be passed to the algorithm
TableRow represents a row in a TableWorkspace.
Definition TableRow.h:39
A property class for workspaces.
void init() override
Virtual method - must be overridden by concrete algorithm.
int version() const override
function to return a version of the algorithm, must be overridden in all algorithms
void exec() override
Virtual method - must be overridden by concrete algorithm.
const std::string name() const override
function to return a name of the algorithm, must be overridden in all algorithms
FileType getFileType(const std::string &filename)
Determine the file type using the filename.
const std::string category() const override
function to return a category of the algorithm.
std::string setTopEntryName(const std::string &filename)
set the name of the top level NXentry m_top_entry_name
bool filenameHasExtension(const std::string &filename, const std::vector< std::string > &fileExtensions)
LoadPreNexus : Workflow algorithm to load a collection of preNeXus files.
void parseRuninfo(const std::string &runinfo, std::string &dataDir, std::vector< std::string > &eventFilenames)
Parse the runinfo file to find the names of the neutron event files.
Helper class for LoadRaw algorithms.
FILE * openRawFile(const std::string &fileName)
Opens Raw File.
Loads a NeXus file that conforms to the TOFRaw instrument definition format and stores it in a 2D wor...
int m_signalNo
Signal # to load. Default 1.
static std::string getEntryName(const std::string &filename)
void countPixels(const std::string &nexusfilename, const std::string &entry_name, std::vector< std::string > &bankNames)
Goes thoguh a histogram NXS file and counts the number of pixels.
The BinaryFile template is a helper function for loading simple binary files.
Definition BinaryFile.h:43
size_t getNumElements() const
Returns the # of elements in the file (cached result of getFileSize)
Definition BinaryFile.h:83
IPropertyManager * setProperty(const std::string &name, const T &value)
Templated method to set the value of a PropertyWithValue.
void debug(const std::string &msg)
Logs at debug level.
Definition Logger.cpp:145
void notice(const std::string &msg)
Logs at notice level.
Definition Logger.cpp:126
void error(const std::string &msg)
Logs at error level.
Definition Logger.cpp:108
void information(const std::string &msg)
Logs at information level.
Definition Logger.cpp:136
static T & Instance()
Return a reference to the Singleton instance, creating it if it does not already exist Creation is do...
Class that provides for a standard Nexus exception.
static unsigned short constexpr UINT64
ISIS VMS raw file definitions.
std::shared_ptr< ITableWorkspace > ITableWorkspace_sptr
shared pointer to Mantid::API::ITableWorkspace
const std::vector< std::string > RAW_EXT
Valid extensions for ISIS raw files.
FileType
Allowed file types.
@ RAW_FILE
ISIS raw files.
@ PRENEXUS_FILE
PreNeXus files.
@ HISTO_NEXUS_FILE
Histogram NeXus files.
@ EVENT_NEXUS_FILE
Event NeXus files.
const std::vector< std::string > PRENEXUS_EXT
Valid extensions for prenexus files.
const std::vector< std::string > HISTO_NEXUS_EXT
Valid extensions for histogram nexus files.
const std::vector< std::string > EVENT_NEXUS_EXT
Valid extensions for event nexus files.
constexpr double EMPTY_DBL() noexcept
Returns what we consider an "empty" double within a property.
Definition EmptyValues.h:42
@ Output
An output workspace.
Definition Property.h:54