Mantid
Loading...
Searching...
No Matches
ConnectedComponentLabeling.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7#include <utility>
8
10
18#include "MantidKernel/Memory.h"
19
20using namespace Mantid::API;
21using namespace Mantid::Kernel;
23
24namespace Mantid::Crystal {
25namespace {
33size_t calculateMaxNeighbours(IMDHistoWorkspace const *const ws) {
34 const size_t ndims = ws->getNumDims();
35 size_t maxNeighbours = 3;
36 for (size_t i = 1; i < ndims; ++i) {
37 maxNeighbours *= 3;
38 }
39 maxNeighbours -= 1;
40 return maxNeighbours;
41}
42
51size_t calculateMaxClusters(IMDHistoWorkspace const *const ws, size_t nIterators) {
52 size_t maxClusters = 1;
53 for (size_t i = 0; i < ws->getNumDims(); ++i) {
54 maxClusters *= ws->getDimension(i)->getNBins() / 2;
55 }
56 maxClusters /= nIterators;
57 if (maxClusters == 0) {
58 maxClusters = ws->getNPoints();
59 }
60 return maxClusters;
61}
62
68std::shared_ptr<Mantid::API::IMDHistoWorkspace> cloneInputWorkspace(IMDHistoWorkspace_sptr &inWS) {
69 IMDHistoWorkspace_sptr outWS(inWS->clone());
70
71 // Initialize to zero.
73 for (int i = 0; i < static_cast<int>(outWS->getNPoints()); ++i) {
74 outWS->setSignalAt(i, 0);
75 outWS->setErrorSquaredAt(i, 0);
76 }
77
78 return outWS;
79}
80
87template <typename T> T reportEvery(const T &maxReports, const T &maxIterations) {
88 T frequency = maxReports;
89 if (maxIterations >= maxReports) {
90 frequency = maxIterations / maxReports;
91 }
92 return frequency;
93}
94
95// Helper function for determining if a set contains a specific value.
96template <typename Container>
97bool does_contain_key(const Container &container, const typename Container::key_type &value) {
98 return container.find(value) != container.end();
99}
100
101using EdgeIndexPair = boost::tuple<size_t, size_t>;
102using VecEdgeIndexPair = std::vector<EdgeIndexPair>;
103
120size_t doConnectedComponentLabeling(IMDIterator *iterator, BackgroundStrategy *const strategy,
121 VecElements &neighbourElements, Progress &progress, size_t maxNeighbours,
122 size_t startLabelId, VecEdgeIndexPair &edgeIndexVec) {
123 size_t currentLabelCount = startLabelId;
124 strategy->configureIterator(iterator); // Set up such things as desired Normalization.
125 do {
126 if (!strategy->isBackground(iterator)) {
127
128 size_t currentIndex = iterator->getLinearIndex();
129 progress.report();
130 // Linear indexes of neighbours
131 VecIndexes neighbourIndexes = iterator->findNeighbourIndexes();
132 VecIndexes nonEmptyNeighbourIndexes;
133 nonEmptyNeighbourIndexes.reserve(maxNeighbours);
134 SetIds neighbourIds;
135 // Discover non-empty neighbours
136 for (auto neighIndex : neighbourIndexes) {
137 if (!iterator->isWithinBounds(neighIndex)) {
138 /* Record labels which appear to belong to the same cluster, but
139 cannot be combined in this
140 pass and will later need to be conjoined and resolved. As Labels
141 cannot be guarnteed to be
142 correcly provided for all neighbours until the end. We must store
143 indexes instead.
144 */
145 edgeIndexVec.emplace_back(currentIndex, neighIndex);
146 continue;
147 }
148
149 const DisjointElement &neighbourElement = neighbourElements[neighIndex];
150
151 if (!neighbourElement.isEmpty()) {
152 nonEmptyNeighbourIndexes.emplace_back(neighIndex);
153 neighbourIds.insert(neighbourElement.getId());
154 }
155 }
156
157 if (nonEmptyNeighbourIndexes.empty()) {
158 DisjointElement &element = neighbourElements[currentIndex];
159 element.setId(static_cast<int>(currentLabelCount));
160 ++currentLabelCount;
161 } else if (neighbourIds.size() == 1) // Do we have a single unique id amongst all neighbours.
162 {
163 neighbourElements[currentIndex] = neighbourElements[nonEmptyNeighbourIndexes.front()]; // Copy
164 // non-empty
165 // neighbour
166 } else {
167 // Choose the lowest neighbour index as the parent.
168 size_t candidateSourceParentIndex = nonEmptyNeighbourIndexes[0];
169 for (size_t i = 1; i < nonEmptyNeighbourIndexes.size(); ++i) {
170 size_t neighIndex = nonEmptyNeighbourIndexes[i];
171 if (neighbourElements[neighIndex].getRoot() < neighbourElements[candidateSourceParentIndex].getRoot()) {
172 candidateSourceParentIndex = neighIndex;
173 }
174 }
175 // Get the chosen parent
176 DisjointElement &parentElement = neighbourElements[candidateSourceParentIndex];
177 // Union remainder parents with the chosen parent
178 for (auto neighIndex : nonEmptyNeighbourIndexes) {
179 if (neighIndex != candidateSourceParentIndex) {
180 neighbourElements[neighIndex].unionWith(&parentElement);
181 }
182 }
183
184 neighbourElements[currentIndex].unionWith(&parentElement);
185 }
186 }
187 } while (iterator->next());
188
189 return currentLabelCount;
190}
191
192Logger g_log("ConnectedComponentLabeling");
193
194void memoryCheck(size_t nPoints) {
195 size_t sizeOfElement = (3 * sizeof(signal_t)) + sizeof(bool);
196
197 MemoryStats memoryStats;
198 const size_t freeMemory = memoryStats.availMem(); // in kB
199 const size_t memoryCost = sizeOfElement * nPoints / 1000; // in kB
200 if (memoryCost > freeMemory) {
201 std::string basicMessage = "CCL requires more free memory than you have available.";
202 std::stringstream sstream;
203 sstream << basicMessage << " Requires " << memoryCost << " KB of contiguous memory.";
204 g_log.notice(sstream.str());
205 throw std::runtime_error(basicMessage);
206 }
207}
208} // namespace
209
215ConnectedComponentLabeling::ConnectedComponentLabeling(const size_t &startId, boost::optional<int> nThreads)
216 : m_startId(startId), m_nThreads(std::move(nThreads)) {
217 if (m_nThreads.is_initialized() && m_nThreads.get() < 0) {
218 throw std::invalid_argument("Cannot request that CCL runs with less than one thread!");
219 }
220}
221
229
234
235//----------------------------------------------------------------------------------------------
239
245 if (m_nThreads.is_initialized()) {
246 return m_nThreads.get(); // Follow explicit instructions if provided.
247 } else {
248 return API::FrameworkManager::Instance().getNumOMPThreads(); // Figure it out.
249 }
250}
251
263 BackgroundStrategy *const baseStrategy,
264 Progress &progress) const {
265 std::map<size_t, std::shared_ptr<ICluster>> clusterMap;
266 VecElements neighbourElements(ws->getNPoints());
267
268 const size_t maxNeighbours = calculateMaxNeighbours(ws.get());
269
270 progress.doReport("Identifying clusters");
271 auto frequency = reportEvery<size_t>(10000, ws->getNPoints());
272 progress.resetNumSteps(frequency, 0.0, 0.8);
273
274 // For each process maintains pair of index from within process bounds to
275 // index outside process bounds
276 const int nThreadsToUse = getNThreads();
277
278 if (nThreadsToUse > 1) {
279 auto iterators = ws->createIterators(nThreadsToUse);
280 const size_t maxClustersPossible = calculateMaxClusters(ws.get(), nThreadsToUse);
281
282 std::vector<VecEdgeIndexPair> parallelEdgeVec(nThreadsToUse);
283
284 std::vector<std::map<size_t, std::shared_ptr<Cluster>>> parallelClusterMapVec(nThreadsToUse);
285
286 // ------------- Stage One. Local CCL in parallel.
287 g_log.debug("Parallel solve local CCL");
288 // PARALLEL_FOR_NO_WSP_CHECK()
289 for (int i = 0; i < nThreadsToUse; ++i) {
290 API::IMDIterator *iterator = iterators[i].get();
291 boost::scoped_ptr<BackgroundStrategy> strategy(baseStrategy->clone()); // local strategy
292 VecEdgeIndexPair &edgeVec = parallelEdgeVec[i]; // local edge indexes
293
294 const size_t startLabel = m_startId + (i * maxClustersPossible); // Ensure that label ids are
295 // totally unique within each
296 // parallel unit.
297 const size_t endLabel = doConnectedComponentLabeling(iterator, strategy.get(), neighbourElements, progress,
298 maxNeighbours, startLabel, edgeVec);
299
300 // Create clusters from labels.
301 std::map<size_t, std::shared_ptr<Cluster>> &localClusterMap = parallelClusterMapVec[i]; // local cluster map.
302 for (size_t labelId = startLabel; labelId != endLabel; ++labelId) {
303 auto cluster = std::make_shared<Cluster>(labelId); // Create a cluster for the label and key it by the label.
304 localClusterMap[labelId] = cluster;
305 }
306
307 // Associate the member DisjointElements with a cluster. Involves looping
308 // back over iterator.
309 iterator->jumpTo(0); // Reset
310 do {
311 if (!baseStrategy->isBackground(iterator)) {
312 // Second pass smoothing step
313 const size_t currentIndex = iterator->getLinearIndex();
314
315 const size_t &labelAtIndex = neighbourElements[currentIndex].getRoot();
316 localClusterMap[labelAtIndex]->addIndex(currentIndex);
317 }
318 } while (iterator->next());
319 }
320
321 // -------------------- Stage 2 --- Preparation stage for combining
322 // equivalent clusters. Must be done in sequence.
323 // Combine cluster maps processed by each thread.
324 ClusterRegister clusterRegister;
325 for (const auto &parallelClusterMap : parallelClusterMapVec) {
326 for (const auto &cluster : parallelClusterMap) {
327 clusterRegister.add(cluster.first, cluster.second);
328 }
329 }
330
331 // Percolate minimum label across boundaries for indexes where there is
332 // ambiguity.
333 g_log.debug("Percolate minimum label across boundaries");
334
335 for (auto &indexPairVec : parallelEdgeVec) {
336 for (auto &iit : indexPairVec) {
337 DisjointElement &a = neighbourElements[iit.get<0>()];
338 DisjointElement &b = neighbourElements[iit.get<1>()];
339 clusterRegister.merge(a, b);
340 }
341 }
342 clusterMap = clusterRegister.clusters(neighbourElements);
343
344 } else {
345 auto iterator = ws->createIterator(nullptr);
346 VecEdgeIndexPair edgeIndexPair; // This should never get filled in a single
347 // threaded situation.
348 size_t endLabelId = doConnectedComponentLabeling(iterator.get(), baseStrategy, neighbourElements, progress,
349 maxNeighbours, m_startId, edgeIndexPair);
350
351 // Create clusters from labels.
352 for (size_t labelId = m_startId; labelId != endLabelId; ++labelId) {
353 auto cluster = std::make_shared<Cluster>(labelId); // Create a cluster for the label and key it by the label.
354 clusterMap[labelId] = cluster;
355 }
356
357 // Associate the member DisjointElements with a cluster. Involves looping
358 // back over iterator.
359 iterator->jumpTo(0); // Reset
360 do {
361 const size_t currentIndex = iterator->getLinearIndex();
362 if (!baseStrategy->isBackground(iterator.get())) {
363 const int labelAtIndex = neighbourElements[currentIndex].getRoot();
364 clusterMap[labelAtIndex]->addIndex(currentIndex);
365 }
366 } while (iterator->next());
367 }
368 return clusterMap;
369}
370
378std::shared_ptr<Mantid::API::IMDHistoWorkspace> ConnectedComponentLabeling::execute(IMDHistoWorkspace_sptr ws,
379 BackgroundStrategy *const strategy,
380 Progress &progress) const {
381 ClusterTuple result = executeAndFetchClusters(std::move(ws), strategy, progress);
382 IMDHistoWorkspace_sptr outWS = result.get<0>(); // Get the workspace, but discard cluster objects.
383 return outWS;
384}
385
395 BackgroundStrategy *const strategy,
396 Progress &progress) const {
397 // Can we run the analysis
398 memoryCheck(ws->getNPoints());
399
400 // Perform the bulk of the connected component analysis, but don't collapse
401 // the elements yet.
402 ClusterMap clusters = calculateDisjointTree(ws, strategy, progress);
403
404 // Create the output workspace from the input workspace
405 g_log.debug("Start cloning input workspace");
406 IMDHistoWorkspace_sptr outWS = cloneInputWorkspace(ws);
407 g_log.debug("Finish cloning input workspace");
408
409 // Get the keys (label ids) first in order to do the next stage in parallel.
410 VecIndexes keys;
411 keys.reserve(clusters.size());
412 std::transform(clusters.cbegin(), clusters.cend(), std::back_inserter(keys),
413 [](const auto &cluster) { return cluster.first; });
414 // Write each cluster out to the output workspace
416 for (int i = 0; i < static_cast<int>(keys.size()); ++i) { // NOLINT
417 clusters[keys[i]]->writeTo(outWS);
418 }
419
420 return ClusterTuple(outWS, clusters);
421}
422
423} // namespace Mantid::Crystal
double value
The value of the point.
Definition: FitMW.cpp:51
#define PARALLEL_FOR_NO_WSP_CHECK()
Abstract interface to MDHistoWorkspace, for use in exposing to Python.
This is an interface to an iterator of an IMDWorkspace.
Definition: IMDIterator.h:39
virtual std::vector< size_t > findNeighbourIndexes() const =0
Find neighbouring indexes vertex touching.
virtual bool isWithinBounds(size_t index) const =0
Is index reachable by the iterator.
virtual bool next()=0
Advance to the next cell.
virtual size_t getLinearIndex() const =0
Get the linear index.
virtual void jumpTo(size_t index)=0
Jump to the index^th cell.
virtual uint64_t getNPoints() const =0
Get the number of points associated with the workspace.
virtual std::shared_ptr< const Mantid::Geometry::IMDDimension > getDimension(size_t index) const
Get a dimension.
Definition: MDGeometry.cpp:161
virtual size_t getNumDims() const
Definition: MDGeometry.cpp:148
Helper class for reporting progress from algorithms.
Definition: Progress.h:25
void doReport(const std::string &msg="") override
Actually do the reporting, without changing the loop counter.
Definition: Progress.cpp:70
BackgroundStrategy : Abstract class used for identifying elements of a IMDWorkspace that are not cons...
virtual void configureIterator(Mantid::API::IMDIterator *const iterator) const =0
virtual BackgroundStrategy * clone() const =0
virtual bool isBackground(Mantid::API::IMDIterator *const iterator) const =0
ClusterRegister : A fly-weight ICluster regeister.
void add(const size_t &label, const std::shared_ptr< ICluster > &cluster)
Add clusters.
MapCluster clusters(std::vector< DisjointElement > &elements) const
Get all combined clusters.
void merge(const DisjointElement &a, const DisjointElement &b) const
Merge clusters on the basis of known pairs of disjoint elements.
const boost::optional< int > m_nThreads
Run multithreaded.
void startLabelingId(const size_t &id)
Setter for the label id.
size_t getStartLabelId() const
Getter for the start label id.
ConnectedComponentLabeling(const size_t &startId=1, boost::optional< int > nThreads=boost::none)
Constructor.
ConnectedComponentMappingTypes::ClusterTuple executeAndFetchClusters(Mantid::API::IMDHistoWorkspace_sptr ws, BackgroundStrategy *const strategy, Mantid::API::Progress &progress) const
Execute and return clusters, as well as maps to integratable clusters.
std::shared_ptr< Mantid::API::IMDHistoWorkspace > execute(Mantid::API::IMDHistoWorkspace_sptr ws, BackgroundStrategy *const strategy, Mantid::API::Progress &progress) const
Execute and return clusters.
ConnectedComponentMappingTypes::ClusterMap calculateDisjointTree(const Mantid::API::IMDHistoWorkspace_sptr &ws, BackgroundStrategy *const baseStrategy, Mantid::API::Progress &progress) const
Calculate the disjoint element tree across the image.
virtual ~ConnectedComponentLabeling()
Destructor.
int getNThreads() const
Get the number of threads to use.
DisjointElement : Cluster item used in a disjoint-set data structure.
void unionWith(DisjointElement *other)
Union with other.
Models a Container is used to hold a sample in the beam.
Definition: Container.h:24
The Logger class is in charge of the publishing messages from the framework through various channels.
Definition: Logger.h:52
void debug(const std::string &msg)
Logs at debug level.
Definition: Logger.cpp:114
void notice(const std::string &msg)
Logs at notice level.
Definition: Logger.cpp:95
This class is responsible for memory statistics.
Definition: Memory.h:28
std::size_t availMem() const
Returns the available memory of the system in kiB.
Definition: Memory.cpp:398
void resetNumSteps(int64_t nsteps, double start, double end)
Change the number of steps between start/end.
void report()
Increments the loop counter by 1, then sends the progress notification on behalf of its algorithm.
Definition: ProgressBase.h:51
static T & Instance()
Return a reference to the Singleton instance, creating it if it does not already exist Creation is do...
Kernel::Logger g_log("ExperimentInfo")
static logger object
std::shared_ptr< IMDHistoWorkspace > IMDHistoWorkspace_sptr
shared pointer to Mantid::API::IMDHistoWorkspace
std::map< size_t, std::shared_ptr< Mantid::Crystal::ICluster > > ClusterMap
boost::tuple< Mantid::API::IMDHistoWorkspace_sptr, ClusterMap > ClusterTuple
double signal_t
Typedef for the signal recorded in a MDBox, etc.
Definition: MDTypes.h:36
STL namespace.