diff --git a/include/clustering.hpp b/include/clustering.hpp index 922b4a4..6b4b9fb 100644 --- a/include/clustering.hpp +++ b/include/clustering.hpp @@ -21,29 +21,40 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include "simplification.hpp" namespace Clustering { - -struct Distance_Matrix : public std::vector> { - Distance_Matrix() {} - Distance_Matrix(const curve_number_t n, const curve_number_t m) : std::vector>(n, std::vector(m, -1.0)) {} - - void print() const; -}; + +struct Distance_Matrix; extern Distance_Matrix distances; extern Curves simplifications; -class Cluster_Assignment : public std::vector> { +struct Distance_Matrix : public std::vector { + Distance_Matrix() = default; + Distance_Matrix(const curve_number_t n, const curve_number_t m) : std::vector(n, Distances(m, -1.0)) {} + void print() const; +}; + +class Cluster_Assignment : public std::vector { public: - inline Cluster_Assignment(const curve_number_t k = 0) : std::vector>(k, std::vector()) {} - - inline curve_number_t count(const curve_number_t i) const { - return operator[](i).size(); - } - - inline curve_number_t get(const curve_number_t i, const curve_number_t j) const { - return operator[](i)[j]; - } + explicit Cluster_Assignment(const curve_number_t k = 0) : std::vector(k, Curve_Numbers()) {} + curve_number_t count(const curve_number_t) const; + curve_number_t get(const curve_number_t, const curve_number_t) const; +}; + +struct Clustering_Result { + Curves centers; + distance_t value; + double running_time; + Cluster_Assignment assignment; + Curve& get(const curve_number_t); + curve_number_t size() const; + Curves::const_iterator cbegin() const; + Curves::const_iterator cend() const; + void compute_assignment(const Curves&, const bool = false); + void set_center_indices(const Curve_Numbers&); + +private: + Curve_Numbers center_indices; }; inline distance_t _cheap_dist(const curve_number_t i, const curve_number_t j, const Curves &in, const Curves &simplified_in, Distance_Matrix &distances) { @@ -54,7 +65,7 @@ inline distance_t _cheap_dist(const curve_number_t i, const curve_number_t j, co return distances[i][j]; } -inline curve_number_t _nearest_center(const curve_number_t i, const Curves &in, const Curves &simplified_in, const std::vector ¢ers, Distance_Matrix &distances) { +inline curve_number_t _nearest_center(const curve_number_t i, const Curves &in, const Curves &simplified_in, const Curve_Numbers ¢ers, Distance_Matrix &distances) { const auto infty = std::numeric_limits::infinity(); // cost for curve is infinity auto min_cost = infty; @@ -70,11 +81,11 @@ inline curve_number_t _nearest_center(const curve_number_t i, const Curves &in, return nearest; } -inline distance_t _curve_cost(const curve_number_t i, const Curves &in, const Curves &simplified_in, const std::vector ¢ers, Distance_Matrix &distances) { +inline distance_t _curve_cost(const curve_number_t i, const Curves &in, const Curves &simplified_in, const Curve_Numbers ¢ers, Distance_Matrix &distances) { return _cheap_dist(i, centers[_nearest_center(i, in, simplified_in, centers, distances)], in, simplified_in, distances); } -inline distance_t _center_cost_sum(const Curves &in, const Curves &simplified_in, const std::vector ¢ers, Distance_Matrix &distances) { +inline distance_t _center_cost_sum(const Curves &in, const Curves &simplified_in, const Curve_Numbers ¢ers, Distance_Matrix &distances) { distance_t cost = 0; // for all curves @@ -85,47 +96,6 @@ inline distance_t _center_cost_sum(const Curves &in, const Curves &simplified_in return cost; } -struct Clustering_Result { - Curves centers; - distance_t value; - double running_time; - Cluster_Assignment assignment; - - inline Curve& get(const curve_number_t i) { - return centers[i]; - } - inline curve_number_t size() const { - return centers.size(); - } - - inline Curves::const_iterator cbegin() const { - return centers.cbegin(); - } - - inline Curves::const_iterator cend() const { - return centers.cend(); - } - - inline void compute_assignment(const Curves &in, const bool consecutive_call = false) { - assignment = Cluster_Assignment(centers.size()); - if (consecutive_call and in.size() == distances.size()) { - for (curve_number_t i = 0; i < in.size(); ++i) assignment[_nearest_center(i, in, simplifications, center_indices, distances)].push_back(i); - } else { - auto ndistances = Distance_Matrix(in.size(), centers.size()); - auto ncenter_indices = std::vector(centers.size()); - std::iota(ncenter_indices.begin(), ncenter_indices.end(), 0); - for (curve_number_t i = 0; i < in.size(); ++i) assignment[_nearest_center(i, in, centers, ncenter_indices, ndistances)].push_back(i); - } - } - - inline void set_center_indices(const std::vector &pcenter_indices) { - center_indices = pcenter_indices; - } - -private: - std::vector center_indices; -}; - Clustering_Result kl_cluster(const curve_number_t, const curve_size_t, const Curves &, const bool, const bool, const bool, const bool); Clustering_Result kl_center(const curve_number_t, const curve_size_t, const Curves &, const bool = false, const bool = true, const bool = false); diff --git a/include/coreset.hpp b/include/coreset.hpp index 49e4933..cb004da 100644 --- a/include/coreset.hpp +++ b/include/coreset.hpp @@ -23,11 +23,11 @@ class Median_Coreset { parameter_t epsilon; distance_t constant; Clustering::Clustering_Result c_approx; - std::vector cluster_costs; - std::vector cluster_sizes; - std::vector coreset; - std::vector lambda; - std::vector probabilities; + Distances cluster_costs; + Curve_Numbers cluster_sizes; + Curve_Numbers coreset; + Distances lambda; + Parameters probabilities; public: inline Median_Coreset(const curve_number_t k, curve_size_t ell, const Curves &in, const parameter_t epsilon, const distance_t constant = 1) : in{in}, k{k}, ell{ell}, epsilon{epsilon}, constant{constant}, cluster_costs(k, 0), cluster_sizes(k, 0), lambda(in.size()), Lambda{2*k + 12*std::sqrt(k) + 18}, probabilities(in.size()), c_approx{Clustering::kl_median(k, ell, in)} { diff --git a/include/frechet.hpp b/include/frechet.hpp index 348c48c..a4d5b9c 100644 --- a/include/frechet.hpp +++ b/include/frechet.hpp @@ -35,8 +35,8 @@ namespace Continuous { Distance _distance(const Curve&, const Curve&, distance_t, distance_t); bool _less_than_or_equal(const distance_t, const Curve&, const Curve&, - std::vector>&, std::vector>&, - std::vector>&, std::vector>&); + std::vector&, std::vector&, + std::vector&, std::vector&); distance_t _greedy_upper_bound(const Curve&, const Curve&); distance_t _projective_lower_bound(const Curve&, const Curve&); @@ -52,7 +52,7 @@ namespace Discrete { Distance distance(const Curve&, const Curve&); - distance_t _dp(std::vector> &a, const curve_size_t i, const curve_size_t j, + distance_t _dp(std::vector &a, const curve_size_t i, const curve_size_t j, const Curve &curve1, const Curve &curve2); } } diff --git a/include/simplification.hpp b/include/simplification.hpp index 07bca63..629c3b6 100644 --- a/include/simplification.hpp +++ b/include/simplification.hpp @@ -27,11 +27,11 @@ namespace Simplification { class Subcurve_Shortcut_Graph { Curve& curve; - std::vector> edges; + std::vector edges; public: - Subcurve_Shortcut_Graph(Curve &curve) : curve{curve}, edges{std::vector>(curve.complexity(), std::vector(curve.complexity(), std::numeric_limits::infinity()))} { + Subcurve_Shortcut_Graph(Curve &curve) : curve{curve}, edges{std::vector(curve.complexity(), Distances(curve.complexity(), std::numeric_limits::infinity()))} { if (Config::verbosity > 1) std::cout << "SIMPL: computing shortcut graph" << std::endl; const curve_size_t complexity = curve.complexity(); Curve segment(2, curve.front().dimensions()); @@ -70,10 +70,10 @@ class Subcurve_Shortcut_Graph { return result; } - std::vector> distances(curve.complexity(), std::vector(l, std::numeric_limits::infinity())); + std::vector distances(curve.complexity(), Distances(l, std::numeric_limits::infinity())); std::vector> predecessors(curve.complexity(), std::vector(l)); - std::vector others; + Distances others; curve_size_t best = 0; for (curve_size_t i = 0; i < l; ++i) { diff --git a/include/types.hpp b/include/types.hpp index aa1e348..95e2ecd 100644 --- a/include/types.hpp +++ b/include/types.hpp @@ -14,24 +14,26 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include #include -typedef double distance_t; -typedef double coordinate_t; -typedef long double parameter_t; +typedef double distance_t; // Distances +typedef double coordinate_t; // Coordinates +typedef long double parameter_t; // Parameters, i.e., values in [0,1] -typedef unsigned long dimensions_t; -typedef unsigned long curve_size_t; -typedef unsigned long curve_number_t; +typedef unsigned long dimensions_t; // Dimensions +typedef unsigned long curve_size_t; // Curve complexities +typedef unsigned long curve_number_t; // Number of curves class Point; class Curve; class Interval; using Vector = Point; - using Intervals = std::vector; using Coordinates = std::vector; +using Distances = std::vector; +using Curve_Numbers = std::vector; +using Parameters = std::vector; -template +template::value, bool> = true> inline bool near_eq(T x, T y) { return std::abs(x - y) <= std::min(std::abs(x), std::abs(y)) * std::numeric_limits::epsilon(); } diff --git a/src/clustering.cpp b/src/clustering.cpp index dbda182..550c108 100644 --- a/src/clustering.cpp +++ b/src/clustering.cpp @@ -8,15 +8,15 @@ The above copyright notice and this permission notice shall be included in all c THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "clustering.hpp" - #include +#include "clustering.hpp" + namespace Clustering { Distance_Matrix distances; Curves simplifications; - + void Distance_Matrix::print() const { for (const auto &row : *this) { for (const auto elem : row) { @@ -26,6 +26,46 @@ void Distance_Matrix::print() const { } } +Curve& Clustering_Result::get(const curve_number_t i) { + return centers[i]; +} + +curve_number_t Clustering_Result::size() const { + return centers.size(); +} + +Curves::const_iterator Clustering_Result::cbegin() const { + return centers.cbegin(); +} + +Curves::const_iterator Clustering_Result::cend() const { + return centers.cend(); +} + +void Clustering_Result::compute_assignment(const Curves &in, const bool consecutive_call) { + assignment = Cluster_Assignment(centers.size()); + if (consecutive_call and in.size() == distances.size()) { + for (curve_number_t i = 0; i < in.size(); ++i) assignment[_nearest_center(i, in, simplifications, center_indices, distances)].push_back(i); + } else { + auto ndistances = Distance_Matrix(in.size(), centers.size()); + auto ncenter_indices = Curve_Numbers(centers.size()); + std::iota(ncenter_indices.begin(), ncenter_indices.end(), 0); + for (curve_number_t i = 0; i < in.size(); ++i) assignment[_nearest_center(i, in, centers, ncenter_indices, ndistances)].push_back(i); + } +} + +void Clustering_Result::set_center_indices(const Curve_Numbers &pcenter_indices) { + center_indices = pcenter_indices; +} + +curve_number_t Cluster_Assignment::count(const curve_number_t i) const { + return operator[](i).size(); +} + +curve_number_t Cluster_Assignment::get(const curve_number_t i, const curve_number_t j) const { + return operator[](i)[j]; +} + Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, const bool local_search = false, const bool consecutive_call = false, const bool random_start_center = true, const bool fast_simplification = false) { @@ -49,9 +89,9 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_ } } - std::vector centers; + Curve_Numbers centers; - auto simplify = [&](const curve_number_t i) { + const auto simplify = [&](const curve_number_t i) { if (fast_simplification) { if (Config::verbosity > 0) std::cout << "KL_CLUST: computing approximate vertex restricted minimum error simplification" << std::endl; auto simplified_curve = Simplification::approximate_minimum_error_simplification(const_cast(in[i]), ell); @@ -172,7 +212,7 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_ Curves simpl_centers; for (const auto center: centers) simpl_centers.push_back(simplifications[center]); - auto end = std::clock(); + const auto end = std::clock(); result.centers = simpl_centers; result.set_center_indices(centers); result.value = curr_maxdist; diff --git a/src/frechet.cpp b/src/frechet.cpp index 41c51a6..b8f7787 100644 --- a/src/frechet.cpp +++ b/src/frechet.cpp @@ -66,11 +66,11 @@ Distance _distance(const Curve &curve1, const Curve &curve2, distance_t ub, dist if (Config::verbosity > 2) std::cout << "CFD: binary search using FSD" << std::endl; const auto infty = std::numeric_limits::infinity(); - std::vector> reachable1(curve1.complexity() - 1, std::vector(curve2.complexity(), infty)); - std::vector> reachable2(curve1.complexity(), std::vector(curve2.complexity() - 1, infty)); + std::vector reachable1(curve1.complexity() - 1, Parameters(curve2.complexity(), infty)); + std::vector reachable2(curve1.complexity(), Parameters(curve2.complexity() - 1, infty)); - std::vector> free_intervals1(curve2.complexity(), std::vector(curve1.complexity(), Interval())); - std::vector> free_intervals2(curve1.complexity(), std::vector(curve2.complexity(), Interval())); + std::vector free_intervals1(curve2.complexity(), Intervals(curve1.complexity(), Interval())); + std::vector free_intervals2(curve1.complexity(), Intervals(curve2.complexity(), Interval())); if (std::isnan(lb) or std::isnan(ub)) { result.value = std::numeric_limits::signaling_NaN(); @@ -101,8 +101,8 @@ Distance _distance(const Curve &curve1, const Curve &curve2, distance_t ub, dist } bool _less_than_or_equal(const distance_t distance, Curve const& curve1, Curve const& curve2, - std::vector> &reachable1, std::vector> &reachable2, - std::vector> &free_intervals1, std::vector> &free_intervals2) { + std::vector &reachable1, std::vector &reachable2, + std::vector &free_intervals1, std::vector &free_intervals2) { if (Config::verbosity > 2) std::cout << "CFD: constructing FSD" << std::endl; const distance_t dist_sqr = distance * distance;