Mantid
Loading...
Searching...
No Matches
ConnectedComponentLabeling.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7#include <utility>
8
10
18#include "MantidKernel/Memory.h"
19
20using namespace Mantid::API;
21using namespace Mantid::Kernel;
23
24namespace Mantid::Crystal {
25namespace {
33size_t calculateMaxNeighbours(IMDHistoWorkspace const *const ws) {
34 const size_t ndims = ws->getNumDims();
35 size_t maxNeighbours = 3;
36 for (size_t i = 1; i < ndims; ++i) {
37 maxNeighbours *= 3;
38 }
39 maxNeighbours -= 1;
40 return maxNeighbours;
41}
42
51size_t calculateMaxClusters(IMDHistoWorkspace const *const ws, size_t nIterators) {
52 size_t maxClusters = 1;
53 for (size_t i = 0; i < ws->getNumDims(); ++i) {
54 maxClusters *= ws->getDimension(i)->getNBins() / 2;
55 }
56 maxClusters /= nIterators;
57 if (maxClusters == 0) {
58 maxClusters = ws->getNPoints();
59 }
60 return maxClusters;
61}
62
68std::shared_ptr<Mantid::API::IMDHistoWorkspace> cloneInputWorkspace(IMDHistoWorkspace_sptr &inWS) {
69 IMDHistoWorkspace_sptr outWS(inWS->clone());
70
71 // Initialize to zero.
73 for (int i = 0; i < static_cast<int>(outWS->getNPoints()); ++i) {
74 outWS->setSignalAt(i, 0);
75 outWS->setErrorSquaredAt(i, 0);
76 }
77
78 return outWS;
79}
80
87template <typename T> T reportEvery(const T &maxReports, const T &maxIterations) {
88 T frequency = maxReports;
89 if (maxIterations >= maxReports) {
90 frequency = maxIterations / maxReports;
91 }
92 return frequency;
93}
94
95// Helper function for determining if a set contains a specific value.
96template <typename Container>
97bool does_contain_key(const Container &container, const typename Container::key_type &value) {
98 return container.find(value) != container.end();
99}
100
101using EdgeIndexPair = boost::tuple<size_t, size_t>;
102using VecEdgeIndexPair = std::vector<EdgeIndexPair>;
103
120size_t doConnectedComponentLabeling(IMDIterator *iterator, BackgroundStrategy *const strategy,
121 VecElements &neighbourElements, Progress &progress, size_t maxNeighbours,
122 size_t startLabelId, VecEdgeIndexPair &edgeIndexVec) {
123 size_t currentLabelCount = startLabelId;
124 strategy->configureIterator(iterator); // Set up such things as desired Normalization.
125 do {
126 if (!strategy->isBackground(iterator)) {
127
128 size_t currentIndex = iterator->getLinearIndex();
129 progress.report();
130 // Linear indexes of neighbours
131 VecIndexes neighbourIndexes = iterator->findNeighbourIndexes();
132 VecIndexes nonEmptyNeighbourIndexes;
133 nonEmptyNeighbourIndexes.reserve(maxNeighbours);
134 SetIds neighbourIds;
135 // Discover non-empty neighbours
136 for (auto neighIndex : neighbourIndexes) {
137 if (!iterator->isWithinBounds(neighIndex)) {
138 /* Record labels which appear to belong to the same cluster, but
139 cannot be combined in this
140 pass and will later need to be conjoined and resolved. As Labels
141 cannot be guarnteed to be
142 correcly provided for all neighbours until the end. We must store
143 indexes instead.
144 */
145 edgeIndexVec.emplace_back(currentIndex, neighIndex);
146 continue;
147 }
148
149 const DisjointElement &neighbourElement = neighbourElements[neighIndex];
150
151 if (!neighbourElement.isEmpty()) {
152 nonEmptyNeighbourIndexes.emplace_back(neighIndex);
153 neighbourIds.insert(neighbourElement.getId());
154 }
155 }
156
157 if (nonEmptyNeighbourIndexes.empty()) {
158 DisjointElement &element = neighbourElements[currentIndex];
159 element.setId(static_cast<int>(currentLabelCount));
160 ++currentLabelCount;
161 } else if (neighbourIds.size() == 1) // Do we have a single unique id amongst all neighbours.
162 {
163 neighbourElements[currentIndex] = neighbourElements[nonEmptyNeighbourIndexes.front()]; // Copy
164 // non-empty
165 // neighbour
166 } else {
167 // Choose the lowest neighbour index as the parent.
168 size_t candidateSourceParentIndex = nonEmptyNeighbourIndexes[0];
169 for (size_t i = 1; i < nonEmptyNeighbourIndexes.size(); ++i) {
170 size_t neighIndex = nonEmptyNeighbourIndexes[i];
171 if (neighbourElements[neighIndex].getRoot() < neighbourElements[candidateSourceParentIndex].getRoot()) {
172 candidateSourceParentIndex = neighIndex;
173 }
174 }
175 // Get the chosen parent
176 DisjointElement &parentElement = neighbourElements[candidateSourceParentIndex];
177 // Union remainder parents with the chosen parent
178 for (auto neighIndex : nonEmptyNeighbourIndexes) {
179 if (neighIndex != candidateSourceParentIndex) {
180 neighbourElements[neighIndex].unionWith(&parentElement);
181 }
182 }
183
184 neighbourElements[currentIndex].unionWith(&parentElement);
185 }
186 }
187 } while (iterator->next());
188
189 return currentLabelCount;
190}
191
192Logger g_log("ConnectedComponentLabeling");
193
194void memoryCheck(size_t nPoints) {
195 size_t sizeOfElement = (3 * sizeof(signal_t)) + sizeof(bool);
196
197 MemoryStats memoryStats;
198 const size_t freeMemory = memoryStats.availMem(); // in kB
199 const size_t memoryCost = sizeOfElement * nPoints / 1000; // in kB
200 if (memoryCost > freeMemory) {
201 std::string basicMessage = "CCL requires more free memory than you have available.";
202 std::stringstream sstream;
203 sstream << basicMessage << " Requires " << memoryCost << " KB of contiguous memory.";
204 g_log.notice(sstream.str());
205 throw std::runtime_error(basicMessage);
206 }
207}
208} // namespace
209
215ConnectedComponentLabeling::ConnectedComponentLabeling(const size_t &startId, const std::optional<int> &nThreads)
216 : m_startId(startId) {
217 if (nThreads.has_value()) {
218 if (nThreads.value() < 0) {
219 throw std::invalid_argument("Cannot request that CCL runs with less than one thread!");
220 } else {
221 m_nThreadsToUse = nThreads.value(); // Follow explicit instructions if provided.
222 }
223 } else {
224 m_nThreadsToUse = API::FrameworkManager::Instance().getNumOMPThreads(); // Figure it out.
225 }
226}
227
235
240
241//----------------------------------------------------------------------------------------------
245
257 BackgroundStrategy *const baseStrategy,
258 Progress &progress) const {
259 std::map<size_t, std::shared_ptr<ICluster>> clusterMap;
260 VecElements neighbourElements(ws->getNPoints());
261
262 const size_t maxNeighbours = calculateMaxNeighbours(ws.get());
263
264 progress.doReport("Identifying clusters");
265 auto frequency = reportEvery<size_t>(10000, ws->getNPoints());
266 progress.resetNumSteps(frequency, 0.0, 0.8);
267
268 // For each process maintains pair of index from within process bounds to
269 // index outside process bounds
270 if (m_nThreadsToUse > 1) {
271 auto iterators = ws->createIterators(m_nThreadsToUse);
272 const size_t maxClustersPossible = calculateMaxClusters(ws.get(), m_nThreadsToUse);
273
274 std::vector<VecEdgeIndexPair> parallelEdgeVec(m_nThreadsToUse);
275
276 std::vector<std::map<size_t, std::shared_ptr<Cluster>>> parallelClusterMapVec(m_nThreadsToUse);
277
278 // ------------- Stage One. Local CCL in parallel.
279 g_log.debug("Parallel solve local CCL");
280 // PARALLEL_FOR_NO_WSP_CHECK()
281 for (int i = 0; i < m_nThreadsToUse; ++i) {
282 API::IMDIterator *iterator = iterators[i].get();
283 boost::scoped_ptr<BackgroundStrategy> strategy(baseStrategy->clone()); // local strategy
284 VecEdgeIndexPair &edgeVec = parallelEdgeVec[i]; // local edge indexes
285
286 const size_t startLabel = m_startId + (i * maxClustersPossible); // Ensure that label ids are
287 // totally unique within each
288 // parallel unit.
289 const size_t endLabel = doConnectedComponentLabeling(iterator, strategy.get(), neighbourElements, progress,
290 maxNeighbours, startLabel, edgeVec);
291
292 // Create clusters from labels.
293 std::map<size_t, std::shared_ptr<Cluster>> &localClusterMap = parallelClusterMapVec[i]; // local cluster map.
294 for (size_t labelId = startLabel; labelId != endLabel; ++labelId) {
295 auto cluster = std::make_shared<Cluster>(labelId); // Create a cluster for the label and key it by the label.
296 localClusterMap[labelId] = cluster;
297 }
298
299 // Associate the member DisjointElements with a cluster. Involves looping
300 // back over iterator.
301 iterator->jumpTo(0); // Reset
302 do {
303 if (!baseStrategy->isBackground(iterator)) {
304 // Second pass smoothing step
305 const size_t currentIndex = iterator->getLinearIndex();
306
307 const size_t &labelAtIndex = neighbourElements[currentIndex].getRoot();
308 localClusterMap[labelAtIndex]->addIndex(currentIndex);
309 }
310 } while (iterator->next());
311 }
312
313 // -------------------- Stage 2 --- Preparation stage for combining
314 // equivalent clusters. Must be done in sequence.
315 // Combine cluster maps processed by each thread.
316 ClusterRegister clusterRegister;
317 for (const auto &parallelClusterMap : parallelClusterMapVec) {
318 for (const auto &cluster : parallelClusterMap) {
319 clusterRegister.add(cluster.first, cluster.second);
320 }
321 }
322
323 // Percolate minimum label across boundaries for indexes where there is
324 // ambiguity.
325 g_log.debug("Percolate minimum label across boundaries");
326
327 for (auto &indexPairVec : parallelEdgeVec) {
328 for (auto &iit : indexPairVec) {
329 const DisjointElement &a = neighbourElements[iit.get<0>()];
330 const DisjointElement &b = neighbourElements[iit.get<1>()];
331 clusterRegister.merge(a, b);
332 }
333 }
334 clusterMap = clusterRegister.clusters(neighbourElements);
335
336 } else {
337 auto iterator = ws->createIterator(nullptr);
338 VecEdgeIndexPair edgeIndexPair; // This should never get filled in a single
339 // threaded situation.
340 size_t endLabelId = doConnectedComponentLabeling(iterator.get(), baseStrategy, neighbourElements, progress,
341 maxNeighbours, m_startId, edgeIndexPair);
342
343 // Create clusters from labels.
344 for (size_t labelId = m_startId; labelId != endLabelId; ++labelId) {
345 auto cluster = std::make_shared<Cluster>(labelId); // Create a cluster for the label and key it by the label.
346 clusterMap[labelId] = cluster;
347 }
348
349 // Associate the member DisjointElements with a cluster. Involves looping
350 // back over iterator.
351 iterator->jumpTo(0); // Reset
352 do {
353 const size_t currentIndex = iterator->getLinearIndex();
354 if (!baseStrategy->isBackground(iterator.get())) {
355 const int labelAtIndex = neighbourElements[currentIndex].getRoot();
356 clusterMap[labelAtIndex]->addIndex(currentIndex);
357 }
358 } while (iterator->next());
359 }
360 return clusterMap;
361}
362
370std::shared_ptr<Mantid::API::IMDHistoWorkspace> ConnectedComponentLabeling::execute(IMDHistoWorkspace_sptr ws,
371 BackgroundStrategy *const strategy,
372 Progress &progress) const {
373 ClusterTuple result = executeAndFetchClusters(std::move(ws), strategy, progress);
374 IMDHistoWorkspace_sptr outWS = result.get<0>(); // Get the workspace, but discard cluster objects.
375 return outWS;
376}
377
387 BackgroundStrategy *const strategy,
388 Progress &progress) const {
389 // Can we run the analysis
390 memoryCheck(ws->getNPoints());
391
392 // Perform the bulk of the connected component analysis, but don't collapse
393 // the elements yet.
394 ClusterMap clusters = calculateDisjointTree(ws, strategy, progress);
395
396 // Create the output workspace from the input workspace
397 g_log.debug("Start cloning input workspace");
398 IMDHistoWorkspace_sptr outWS = cloneInputWorkspace(ws);
399 g_log.debug("Finish cloning input workspace");
400
401 // Get the keys (label ids) first in order to do the next stage in parallel.
402 VecIndexes keys;
403 keys.reserve(clusters.size());
404 std::transform(clusters.cbegin(), clusters.cend(), std::back_inserter(keys),
405 [](const auto &cluster) { return cluster.first; });
406 // Write each cluster out to the output workspace
408 for (int i = 0; i < static_cast<int>(keys.size()); ++i) { // NOLINT
409 clusters[keys[i]]->writeTo(outWS);
410 }
411
412 return ClusterTuple(outWS, clusters);
413}
414
415} // namespace Mantid::Crystal
double value
The value of the point.
Definition FitMW.cpp:51
#define PARALLEL_FOR_NO_WSP_CHECK()
Abstract interface to MDHistoWorkspace, for use in exposing to Python.
This is an interface to an iterator of an IMDWorkspace.
Definition IMDIterator.h:39
virtual std::vector< size_t > findNeighbourIndexes() const =0
Find neighbouring indexes vertex touching.
virtual bool isWithinBounds(size_t index) const =0
Is index reachable by the iterator.
virtual bool next()=0
Advance to the next cell.
virtual size_t getLinearIndex() const =0
Get the linear index.
virtual void jumpTo(size_t index)=0
Jump to the index^th cell.
virtual uint64_t getNPoints() const =0
Get the number of points associated with the workspace.
virtual std::shared_ptr< const Mantid::Geometry::IMDDimension > getDimension(size_t index) const
Get a dimension.
virtual size_t getNumDims() const
Helper class for reporting progress from algorithms.
Definition Progress.h:25
void doReport(const std::string &msg="") override
Actually do the reporting, without changing the loop counter.
Definition Progress.cpp:70
BackgroundStrategy : Abstract class used for identifying elements of a IMDWorkspace that are not cons...
virtual void configureIterator(Mantid::API::IMDIterator *const iterator) const =0
virtual BackgroundStrategy * clone() const =0
virtual bool isBackground(Mantid::API::IMDIterator *const iterator) const =0
ClusterRegister : A fly-weight ICluster regeister.
void add(const size_t &label, const std::shared_ptr< ICluster > &cluster)
Add clusters.
MapCluster clusters(std::vector< DisjointElement > &elements) const
Get all combined clusters.
void merge(const DisjointElement &a, const DisjointElement &b) const
Merge clusters on the basis of known pairs of disjoint elements.
ConnectedComponentLabeling(const size_t &startId=1, const std::optional< int > &nThreads=std::nullopt)
Constructor.
void startLabelingId(const size_t &id)
Setter for the label id.
size_t getStartLabelId() const
Getter for the start label id.
ConnectedComponentMappingTypes::ClusterTuple executeAndFetchClusters(Mantid::API::IMDHistoWorkspace_sptr ws, BackgroundStrategy *const strategy, Mantid::API::Progress &progress) const
Execute and return clusters, as well as maps to integratable clusters.
std::shared_ptr< Mantid::API::IMDHistoWorkspace > execute(Mantid::API::IMDHistoWorkspace_sptr ws, BackgroundStrategy *const strategy, Mantid::API::Progress &progress) const
Execute and return clusters.
ConnectedComponentMappingTypes::ClusterMap calculateDisjointTree(const Mantid::API::IMDHistoWorkspace_sptr &ws, BackgroundStrategy *const baseStrategy, Mantid::API::Progress &progress) const
Calculate the disjoint element tree across the image.
virtual ~ConnectedComponentLabeling()
Destructor.
DisjointElement : Cluster item used in a disjoint-set data structure.
void unionWith(DisjointElement *other)
Union with other.
Models a Container is used to hold a sample in the beam.
Definition Container.h:24
The Logger class is in charge of the publishing messages from the framework through various channels.
Definition Logger.h:51
void debug(const std::string &msg)
Logs at debug level.
Definition Logger.cpp:145
void notice(const std::string &msg)
Logs at notice level.
Definition Logger.cpp:126
This class is responsible for memory statistics.
Definition Memory.h:28
std::size_t availMem() const
Returns the available memory of the system in kiB.
Definition Memory.cpp:402
void resetNumSteps(int64_t nsteps, double start, double end)
Change the number of steps between start/end.
void report()
Increments the loop counter by 1, then sends the progress notification on behalf of its algorithm.
Kernel::Logger g_log("ExperimentInfo")
static logger object
std::shared_ptr< IMDHistoWorkspace > IMDHistoWorkspace_sptr
shared pointer to Mantid::API::IMDHistoWorkspace
std::map< size_t, std::shared_ptr< Mantid::Crystal::ICluster > > ClusterMap
boost::tuple< Mantid::API::IMDHistoWorkspace_sptr, ClusterMap > ClusterTuple
double signal_t
Typedef for the signal recorded in a MDBox, etc.
Definition MDTypes.h:36