49 #ifndef OPENMS_COMPARISON_CLUSTERING_GRIDBASEDCLUSTERING_H
50 #define OPENMS_COMPARISON_CLUSTERING_GRIDBASEDCLUSTERING_H
65 MinimumDistance(
const int& cluster_index,
const int& nearest_neighbour_index,
const double& distance);
70 int getClusterIndex()
const;
75 int getNearestNeighbourIndex()
const;
122 template <
typename Metric>
145 const std::vector<double>& data_y,
const std::vector<int>& properties_A,
146 const std::vector<int>& properties_B, std::vector<double> grid_spacing_x,
147 std::vector<double> grid_spacing_y) :
149 grid_(grid_spacing_x, grid_spacing_y)
151 init_(data_x, data_y, properties_A, properties_B);
163 const std::vector<double>& data_y, std::vector<double> grid_spacing_x,
164 std::vector<double> grid_spacing_y) :
166 grid_(grid_spacing_x, grid_spacing_y)
169 std::vector<int> properties_A(data_x.size(), -1);
170 std::vector<int> properties_B(data_x.size(), -1);
171 init_(data_x, data_y, properties_A, properties_B);
185 typedef std::multiset<MinimumDistance>::iterator MultisetIterator;
192 MultisetIterator smallest_distance_it =
distances_.lower_bound(zero_distance);
201 std::vector<int> points1 = cluster1.
getPoints();
202 std::vector<int> points2 = cluster2.
getPoints();
203 std::vector<int> new_points;
204 new_points.reserve(points1.size() + points2.size());
205 new_points.insert(new_points.end(), points1.begin(), points1.end());
206 new_points.insert(new_points.end(), points2.begin(), points2.end());
208 double new_x = (cluster1.
getCentre().
getX() * points1.size() + cluster2.
getCentre().
getX() * points2.size()) / (points1.size() + points2.size());
209 double new_y = (cluster1.
getCentre().
getY() * points1.size() + cluster2.
getCentre().
getY() * points2.size()) / (points1.size() + points2.size());
213 Rectangle new_box(box1);
221 throw Exception::InvalidValue(__FILE__, __LINE__, __PRETTY_FUNCTION__,
"Property A of both clusters not the same. ",
"A");
227 std::vector<int> new_B;
228 new_B.reserve(B1.size() + B2.size());
229 new_B.insert(new_B.end(), B1.begin(), B1.end());
230 new_B.insert(new_B.end(), B2.begin(), B2.end());
236 clusters_.insert(std::make_pair(cluster_index1, new_cluster));
248 std::vector<int> clusters_to_be_updated;
249 clusters_to_be_updated.push_back(cluster_index1);
272 for (std::vector<int>::const_iterator cluster_index = clusters_to_be_updated.begin(); cluster_index != clusters_to_be_updated.end(); ++cluster_index)
297 std::vector<double> grid_spacing_y_new;
298 grid_spacing_y_new.push_back(grid_spacing_y.front());
299 grid_spacing_y_new.push_back(grid_spacing_y.back());
305 int cluster_index = it->first;
312 for (
unsigned cell = 0; cell < grid_spacing_x.size(); ++cell)
314 CellIndex grid_index(cell, 1);
317 std::list<int> cluster_indices = grid_x_only.
getClusters(grid_index);
318 if (cluster_indices.size() > 1)
321 std::list<GridBasedCluster> cluster_list;
322 std::map<GridBasedCluster, int> index_list;
323 for (std::list<int>::iterator it = cluster_indices.begin(); it != cluster_indices.end(); ++it)
326 index_list.insert(std::make_pair(
clusters_final_.find(*it)->second, *it));
331 std::list<GridBasedCluster>::iterator c1 = cluster_list.begin();
332 std::list<GridBasedCluster>::iterator c2 = cluster_list.begin();
334 while (c1 != cluster_list.end())
336 double centre1x = (*c1).getCentre().getX();
337 double centre1y = (*c1).getCentre().getY();
338 double centre2x = (*c2).getCentre().getX();
339 double centre2y = (*c2).getCentre().getY();
341 double box1x_min = (*c1).getBoundingBox().minX();
342 double box1x_max = (*c1).getBoundingBox().maxX();
343 double box1y_min = (*c1).getBoundingBox().minY();
344 double box1y_max = (*c1).getBoundingBox().maxY();
345 double box2x_min = (*c2).getBoundingBox().minX();
346 double box2x_max = (*c2).getBoundingBox().maxX();
347 double box2y_min = (*c2).getBoundingBox().minY();
348 double box2y_max = (*c2).getBoundingBox().maxY();
355 bool overlap = (box1x_min <= box2x_max && box1x_min >= box2x_min) || (box1x_max >= box2x_min && box1x_max <= box2x_max);
369 std::vector<int> points1 = (*c1).getPoints();
370 std::vector<int> points2 = (*c2).getPoints();
371 std::vector<int> new_points;
372 new_points.reserve(points1.size() + points2.size());
373 new_points.insert(new_points.end(), points1.begin(), points1.end());
374 new_points.insert(new_points.end(), points2.begin(), points2.end());
376 double new_x = (centre1x * points1.size() + centre2x * points2.size()) / (points1.size() + points2.size());
377 double new_y = (centre1y * points1.size() + centre2y * points2.size()) / (points1.size() + points2.size());
379 double min_x = std::min(box1x_min, box2x_min);
380 double min_y = std::min(box1y_min, box2y_min);
381 double max_x = std::max(box1x_max, box2x_max);
382 double max_y = std::max(box1y_max, box2y_max);
384 Point new_centre(new_x, new_y);
385 Point position_min(min_x, min_y);
386 Point position_max(max_x, max_y);
387 Rectangle new_bounding_box(position_min, position_max);
394 clusters_final_.insert(std::make_pair(index_list.find(*c1)->second, new_cluster));
397 CellIndex cell_for_cluster1 = grid_x_only.
getIndex((*c1).getCentre());
398 CellIndex cell_for_cluster2 = grid_x_only.
getIndex((*c2).getCentre());
399 CellIndex cell_for_new_cluster = grid_x_only.
getIndex(new_centre);
401 grid_x_only.
removeCluster(cell_for_cluster1, index_list.find(*c1)->second);
402 grid_x_only.
removeCluster(cell_for_cluster2, index_list.find(*c2)->second);
403 grid_x_only.
addCluster(cell_for_new_cluster, index_list.find(*c1)->second);
420 std::map<int, GridBasedCluster>::iterator it =
clusters_final_.begin();
423 Rectangle box = it->second.getBoundingBox();
424 if (box.
maxY() - box.
minY() < threshold_y)
482 void init_(
const std::vector<double>& data_x,
const std::vector<double>& data_y,
483 const std::vector<int>& properties_A,
const std::vector<int>& properties_B)
486 for (
unsigned i = 0; i < data_x.size(); ++i)
488 Point position(data_x[i], data_y[i]);
489 Rectangle box(position, position);
494 pb.push_back(properties_B[i]);
498 clusters_.insert(std::make_pair(i, cluster));
505 std::map<int, GridBasedCluster>::iterator iterator = clusters_.begin();
506 while (iterator != clusters_.end())
508 int cluster_index = iterator->first;
516 clusters_.erase(iterator++);
546 if (A1 == -1 || A2 == -1 || std::find(B1.begin(), B1.end(), -1) != B1.end() || std::find(B2.begin(), B2.end(), -1) != B2.end())
552 bool vetoA = !(A1 == A2);
556 std::vector<int> B_intersection;
557 sort(B1.begin(), B1.end());
558 sort(B2.begin(), B2.end());
559 set_intersection(B1.begin(), B1.end(), B2.begin(), B2.end(), back_inserter(B_intersection));
560 bool vetoB = !B_intersection.empty();
562 return vetoA || vetoB;
581 CellIndex cell_index = grid_.
getIndex(centre);
584 int nearest_neighbour = -1;
587 for (
int i = -1; i <= 1; ++i)
589 for (
int j = -1; j <= 1; ++j)
591 CellIndex cell_index2(cell_index);
592 cell_index2.first += i;
593 cell_index2.second += j;
596 std::list<int> cluster_indices = grid_.
getClusters(cell_index2);
597 for (std::list<int>::const_iterator cluster_index2 = cluster_indices.begin(); cluster_index2 != cluster_indices.end(); ++cluster_index2)
599 if (*cluster_index2 != cluster_index)
603 double distance =
metric_(centre, centre2);
605 if (!veto && (distance < min_dist || nearest_neighbour == -1))
608 nearest_neighbour = *cluster_index2;
616 if (nearest_neighbour == -1)
619 clusters_final_.insert(std::make_pair(cluster_index, clusters_.find(cluster_index)->second));
624 distances_.insert(
MinimumDistance(cluster_index, nearest_neighbour, min_dist));
CoordinateType maxY() const
Accessor for max_ coordinate maximum.
Definition: DIntervalBase.h:259
void removeCluster(const CellIndex &cell_index, const int &cluster_index)
removes a cluster from this grid cell and removes the cell if no other cluster left ...
std::pair< int, int > CellIndex
Definition: ClusteringGrid.h:57
double distance_
distance between cluster and its nearest neighbour
Definition: GridBasedClustering.h:103
std::vector< double > getGridSpacingX() const
returns grid spacing in x direction
std::map< int, GridBasedCluster > clusters_final_
list of final clusters i.e. clusters that are no longer merged
Definition: GridBasedClustering.h:466
void enlarge(const PositionType &p)
Enlarges the bounding box such that it contains a position.
Definition: DBoundingBox.h:122
std::multiset< MinimumDistance > distances_
list of minimum distances stores the smallest of the distances in the head
Definition: GridBasedClustering.h:472
GridBasedCluster::Point Point
cluster centre, cluster bounding box, grid index
Definition: GridBasedClustering.h:130
std::list< int > getClusters(const CellIndex &cell_index) const
returns clusters in this grid cell
void removeSmallClustersY(double threshold_y)
removes clusters with bounding box dimension in y-direction below certain threshold ...
Definition: GridBasedClustering.h:418
bool findNearestNeighbour_(GridBasedCluster cluster, int cluster_index)
determines the nearest neighbour for each cluster
Definition: GridBasedClustering.h:578
std::vector< double > getGridSpacingY() const
returns grid spacing in y direction
void cluster()
performs the hierarchical clustering (merges clusters until their dimension exceeds that of cell) ...
Definition: GridBasedClustering.h:178
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
ClusteringGrid grid_
grid on which the position of the clusters are registered used in cluster method
Definition: GridBasedClustering.h:454
GridBasedCluster::Rectangle Rectangle
Definition: GridBasedClustering.h:131
std::map< int, GridBasedCluster > clusters_
list of clusters maps cluster indices to clusters
Definition: GridBasedClustering.h:460
Point getCentre() const
returns cluster centre
void extendClustersY()
extends clusters in y-direction if possible (merges clusters further in y-direction, i.e. clusters can now span multiple cells)
Definition: GridBasedClustering.h:291
GridBasedClustering(Metric metric, const std::vector< double > &data_x, const std::vector< double > &data_y, std::vector< double > grid_spacing_x, std::vector< double > grid_spacing_y)
initialises all data structures
Definition: GridBasedClustering.h:162
bool isNonEmptyCell(const CellIndex &cell_index) const
checks if there are clusters at this cell index
2D hierarchical clustering implementation optimized for large data sets containing many small cluster...
Definition: GridBasedClustering.h:123
int getClusterIndex() const
returns cluster index
CoordinateType minY() const
Accessor for max_ coordinate minimum.
Definition: DIntervalBase.h:247
void addCluster(const CellIndex &cell_index, const int &cluster_index)
adds a cluster to this grid cell
void endProgress() const
Ends the progress display.
data structure to store 2D data to be clustered e.g. (m/z, retention time) coordinates from multiplex...
Definition: ClusteringGrid.h:51
int cluster_index_
index in the cluster list
Definition: GridBasedClustering.h:93
int getPropertyA() const
returns property A
void init_(const std::vector< double > &data_x, const std::vector< double > &data_y, const std::vector< int > &properties_A, const std::vector< int > &properties_B)
initialises all data structures
Definition: GridBasedClustering.h:482
GridBasedClustering(Metric metric, const std::vector< double > &data_x, const std::vector< double > &data_y, const std::vector< int > &properties_A, const std::vector< int > &properties_B, std::vector< double > grid_spacing_x, std::vector< double > grid_spacing_y)
initialises all data structures
Definition: GridBasedClustering.h:144
Metric metric_
metric for measuring the distance between points in the 2D plane
Definition: GridBasedClustering.h:448
int getNearestNeighbourIndex() const
returns index of nearest cluster
std::vector< int > getPropertiesB() const
returns properties B of all points
std::map< int, GridBasedCluster > getResults() const
returns final results (mapping of cluster indices to clusters)
Definition: GridBasedClustering.h:438
Invalid value exception.
Definition: Exception.h:336
PositionType const & minPosition() const
Accessor to minimum position.
Definition: DIntervalBase.h:122
CellIndex getIndex(const Point &position) const
returns grid cell index (i,j) for the positions (x,y)
bool mergeVeto_(GridBasedCluster c1, GridBasedCluster c2) const
checks if two clusters can be merged Each point in a cluster can (optionally) have two properties A a...
Definition: GridBasedClustering.h:538
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void setProgress(SignedSize value) const
Sets the current progress.
ClusteringGrid::CellIndex CellIndex
Definition: GridBasedClustering.h:132
Rectangle getBoundingBox() const
returns bounding box
PositionType const & maxPosition() const
Accessor to maximum position.
Definition: DIntervalBase.h:128
CoordinateType getY() const
Name accessor for the second dimension. Only for DPosition<2>, for visualization. ...
Definition: DPosition.h:158
basic data structure for distances between clusters
Definition: GridBasedClustering.h:58
basic data structure for clustering
Definition: GridBasedCluster.h:55
int nearest_neighbour_index_
index of the nearest neighbour of the above cluster
Definition: GridBasedClustering.h:98
CoordinateType getX() const
Name accessor for the first dimension. Only for DPosition<2>, for visualization.
Definition: DPosition.h:151
std::vector< int > getPoints() const
returns indices of points in cluster