Mantid
Loading...
Searching...
No Matches
NexusDescriptorLazy.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2007 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7
12
14#include <H5Cpp.h>
15#include <hdf5.h>
16
17#include <algorithm>
18#include <cstdlib> // malloc, calloc
19#include <cstring> // strcpy
20#include <filesystem>
21#include <map>
22#include <stdexcept> // std::invalid_argument
23#include <unordered_set>
24#include <utility>
25
26static unsigned int const INIT_DEPTH = 1;
27static unsigned int const ENTRY_DEPTH = 2;
28static unsigned int const INSTR_DEPTH = 5;
29static std::unordered_set<std::string> const SPECIAL_ADDRESS{"/entry", "/entry0", "/entry1", "/raw_data_1"};
30static std::string const UNKNOWN_CLASS = "UNKNOWN_CLASS";
31
32namespace {
33template <herr_t (*H5Xclose)(hid_t)> std::string readNXClass(Mantid::Nexus::UniqueID<H5Xclose> const &oid) {
34 std::string nxClass = UNKNOWN_CLASS;
35 if (H5Aexists(oid, Mantid::Nexus::GROUP_CLASS_SPEC.c_str()) > 0) {
36 Mantid::Nexus::UniqueID<&H5Aclose> attrID = H5Aopen(oid, Mantid::Nexus::GROUP_CLASS_SPEC.c_str(), H5P_DEFAULT);
37 if (attrID.isValid()) {
38 Mantid::Nexus::UniqueID<&H5Tclose> atype(H5Aget_type(attrID));
39 if (H5Tis_variable_str(atype)) {
40 // variable length string
41 char *rdata = nullptr;
42 if (H5Aread(attrID, atype, &rdata) >= 0) {
43 nxClass = std::string(rdata);
44 }
45 // reclaim memory allocated for rdata by HDF5
46 H5free_memory(rdata);
47 } else {
48 // fixed length string
49 std::size_t size = H5Tget_size(atype);
50 nxClass.resize(size);
51 H5Aread(attrID, atype, nxClass.data());
52 }
53 }
54 }
55 return nxClass;
56}
57} // namespace
58
59namespace Mantid::Nexus {
60
61// PUBLIC
62
63NexusDescriptorLazy::NexusDescriptorLazy(std::string const &filename)
64 : m_filename(filename), m_extension(std::filesystem::path(m_filename).extension().string()), m_firstEntryNameType(),
65 m_allEntries(initAllEntries()), m_allMisses() {}
66
67// open the object to determine its type
68bool NexusDescriptorLazy::isEntry(std::string const &entryName) const {
69 bool known_miss = false, known_hit = false;
70 {
71 // wait for any writes to m_allMisses to end
72 std::shared_lock<std::shared_mutex> lock(m_readNexusMutex);
73 known_miss = m_allMisses.contains(entryName);
74 known_hit = m_allEntries.contains(entryName);
75 }
76 if (known_miss) {
77 // if we know this doesn't exist, return early
78 return false;
79 } else if (known_hit) {
80 // if we know it does exist, return
81 return true;
82 } else {
83 if (H5Oexists_by_name(m_fileID, entryName.c_str(), H5P_DEFAULT) > 0) {
84 // if it is there, save the correct class type for it
85 std::string nxclass;
86 H5O_info_t oinfo;
87 // otherwise, try to open this group and see if it is there
88 UniqueID<&H5Oclose> entryID(H5Oopen(m_fileID, entryName.c_str(), H5P_DEFAULT));
89 H5Oget_info(entryID, &oinfo, H5O_INFO_BASIC);
90 if (oinfo.type == H5O_TYPE_DATASET) {
91 nxclass = SCIENTIFIC_DATA_SET;
92 } else {
93 // read NX_class attribute
94 nxclass = readNXClass(entryID);
95 }
96 // modifying m_allEntries, need write lock
97 std::lock_guard<std::shared_mutex> lock(m_readNexusMutex);
98 m_allEntries[entryName] = std::move(nxclass);
99 return true;
100 } else {
101 // otherwise register failure, need write lock
102 std::lock_guard<std::shared_mutex> lock(m_readNexusMutex);
103 m_allMisses.insert(entryName);
104 return false;
105 }
106 }
107}
108
112bool NexusDescriptorLazy::classTypeExists(std::string const &classType) const {
113 // wait for writes to end
114 std::shared_lock<std::shared_mutex> lock(m_readNexusMutex);
115 return std::any_of(m_allEntries.begin(), m_allEntries.end(),
116 [&classType](auto const &entry) { return entry.second == classType; });
117}
118
119bool NexusDescriptorLazy::classTypeExistsChild(const std::string &parentPath, const std::string &classType) const {
120 // if the parent doesn't exist, the child doesn't either
121 if (!this->isEntry(parentPath))
122 return false;
123
124 // wait for writes to end
125 std::shared_lock<std::shared_mutex> lock(m_readNexusMutex);
126
127 // linear search through all entries - stop at first match
128 const auto delimitedEntryName = parentPath + '/';
129 for (auto const &[name, cls] : m_allEntries) {
130 // match the class first since that limits the list more
131 if (cls == classType && name.starts_with(delimitedEntryName)) {
132 return true;
133 }
134 }
135 return false;
136}
137
138bool NexusDescriptorLazy::hasRootAttr(std::string const &name) const {
139 bool known_hit = false;
140 { // wait for writes to end
141 std::shared_lock<std::shared_mutex> lock(m_readNexusMutex);
142 known_hit = m_rootAttrs.contains(name);
143 }
144 if (known_hit) {
145 return true;
146 } else {
147 // check the file since it wasn't in the cache
148 if (H5Aexists(m_fileID, name.c_str()) > 0) {
149 // mutex has the wrong name, but it's what we have
150 std::lock_guard<std::shared_mutex> lock(m_readNexusMutex);
151 m_rootAttrs.emplace(name);
152 return true;
153 } else {
154 return false;
155 }
156 }
157}
158
160std::string NexusDescriptorLazy::getStrData(std::string const &address) {
161 std::string strData;
162 if (isEntry(address, SCIENTIFIC_DATA_SET)) {
163 // open the data set and get its string data
164 // using H5Cpp interface because trying to read string data is an absolute nightmare with the C API
165 UniqueID<&H5Dclose> did(H5Dopen(m_fileID, address.c_str(), H5P_DEFAULT));
166 H5::DataSet dataset(did);
167 H5::DataType dtype = dataset.getDataType();
168 if (dtype.isVariableStr() || dtype.getClass() == H5T_STRING) {
169 dataset.read(strData, dtype, dataset.getSpace());
170 }
171 }
172 return strData;
173}
174
175// PRIVATE
176
177void NexusDescriptorLazy::loadGroups(std::map<std::string, std::string> &allEntries, std::string const &address,
178 unsigned int depth, const unsigned int maxDepth) {
179 UniqueID<&H5Gclose> groupID(H5Gopen(m_fileID, address.c_str(), H5P_DEFAULT));
180 if (!groupID.isValid()) {
181 return;
182 }
183
184 // get NX_class attribute
185 allEntries[address] = readNXClass(groupID);
186
187 if (depth >= maxDepth)
188 return;
189
190 // iterate over members
191 hsize_t numObjs = 0;
192 H5Gget_num_objs(groupID.get(), &numObjs);
193 for (hsize_t i = 0; i < numObjs; i++) {
194 H5G_obj_t type = H5Gget_objtype_by_idx(groupID, i);
195 ssize_t name_len = H5Gget_objname_by_idx(groupID, i, nullptr, 0);
196 if (name_len <= 0)
197 continue;
198 std::string memberName(name_len, 'X'); // fill with X for obvious errors
199 H5Gget_objname_by_idx(groupID, i, memberName.data(), name_len + 1); // +1 for null terminator,
200 std::string memberAddress = address;
201 if (!memberAddress.ends_with("/"))
202 memberAddress += "/";
203 memberAddress += memberName;
204
205 if (type == H5G_GROUP) {
206 loadGroups(allEntries, memberAddress, depth + 1, maxDepth);
207 } else if (type == H5G_DATASET) {
208 allEntries[memberAddress] = SCIENTIFIC_DATA_SET;
209 }
210 }
211}
212
213std::map<std::string, std::string> NexusDescriptorLazy::initAllEntries() {
214
215 H5Eset_auto(H5E_DEFAULT, nullptr, nullptr);
216
217 std::map<std::string, std::string> allEntries;
218
219 // if the file exists read it
220 if (std::filesystem::exists(m_filename)) {
221 // if the file exists but cannot be opened, throw invalid
222 // NOTE must be std::invalid_argument for expected errors to be raised in python API
223 if (!H5::H5File::isAccessible(m_filename, Mantid::Nexus::H5Util::defaultFileAcc())) {
224 throw std::invalid_argument("ERROR: NexusDescriptorLazy couldn't open hdf5 file " + m_filename + "\n");
225 } else {
226 m_fileID = H5Fopen(m_filename.c_str(), H5F_ACC_RDONLY, Mantid::Nexus::H5Util::defaultFileAcc().getId());
227 }
228 if (!m_fileID.isValid()) {
229 throw std::invalid_argument("ERROR: NexusDescriptorLazy couldn't open hdf5 file " + m_filename + "\n");
230 }
231
232 // get all top-level entries
233 unsigned int depth = 0;
234 loadGroups(allEntries, "/", depth, INIT_DEPTH);
235 // set the first entry name/type
236 if (allEntries.size() > 1) {
237 m_firstEntryNameType = *(++allEntries.begin());
238 m_firstEntryNameType.first = m_firstEntryNameType.first.substr(1); // remove leading /
239 } else {
240 m_firstEntryNameType = std::make_pair("", UNKNOWN_CLASS);
241 }
242
243 // for levels beyond 2, only load special entries
244 depth = INIT_DEPTH;
245 for (std::string const &specialAddress : SPECIAL_ADDRESS) {
246 if (allEntries.contains(specialAddress))
247 loadGroups(allEntries, specialAddress, depth, ENTRY_DEPTH);
248 }
249
250 // get instrument up to a depth of 5
251 depth = ENTRY_DEPTH;
252 for (std::string const &specialAddress : SPECIAL_ADDRESS) {
253 if (allEntries.contains(specialAddress)) {
254 std::string instrumentAddress = specialAddress + "/instrument";
255 if (allEntries.contains(instrumentAddress)) {
256 loadGroups(allEntries, instrumentAddress, depth, INSTR_DEPTH);
257 }
258 }
259 }
260 } else {
261 // if the file does not exist, then leave allEntries empty
262 }
263 // rely on move semantics for single return
264 return allEntries;
265}
266
267} // namespace Mantid::Nexus
std::string name
Definition Run.cpp:60
static unsigned int const INSTR_DEPTH
static unsigned int const ENTRY_DEPTH
static std::unordered_set< std::string > const SPECIAL_ADDRESS
static unsigned int const INIT_DEPTH
static std::string const UNKNOWN_CLASS
uint64_t hsize_t
bool isValid() const
Return whether the UniqueId corresponds to a valid HDF5 object.
Definition UniqueID.h:53
hid_t get() const
Return the managed HDF5 handle.
Definition UniqueID.h:49
bool classTypeExistsChild(const std::string &parentPath, const std::string &classType) const
Query if a given type exists as a decendant of the supplied parentPath.
std::string const m_filename
Nexus HDF5 file name.
std::string getStrData(std::string const &address)
Get string data from a dataset at address.
std::unordered_set< std::string > m_allMisses
the set of non-existent entries that have been checked
std::pair< std::string, std::string > m_firstEntryNameType
std::shared_mutex m_readNexusMutex
mutex to protect reading from file after initialization in const methods
std::unordered_set< std::string > m_rootAttrs
Root attributes cache.
void loadGroups(std::map< std::string, std::string > &allEntries, std::string const &address, unsigned int depth, const unsigned int maxDepth)
bool hasRootAttr(std::string const &name) const
Query if the given attribute exists on the root node.
bool isEntry(std::string const &entryName, std::string const &groupClass) const
Checks if a full-address entry exists for a particular groupClass in a Nexus dataset.
bool classTypeExists(std::string const &classType) const
Query if a given type exists somewhere in the file.
std::map< std::string, std::string > m_allEntries
All entries metadata.
std::map< std::string, std::string > initAllEntries()
Sets m_allEntries, called in HDF5 constructor.
UniqueID<&H5Fclose > m_fileID
HDF5 File Handle.
A wrapper class for managing HDF5 object handles (hid_t).
Definition UniqueID.h:92
MANTID_NEXUS_DLL H5::FileAccPropList defaultFileAcc()
Default file access is H5F_CLOSE_STRONG.
Definition H5Util.cpp:119
Header for a base Nexus::Exception.
std::string const SCIENTIFIC_DATA_SET("SDS")
std::string const GROUP_CLASS_SPEC("NX_class")
STL namespace.