diff --git a/CMakeLists.txt b/CMakeLists.txt index da1e809..c233563 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,10 +13,7 @@ endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive") #supress error in older gcc set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-trapping-math") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftree-vectorize") -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopt-info-vec") -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopt-info-loop") include_directories(${CMAKE_SOURCE_DIR}/include) diff --git a/Fred/__init__.py b/Fred/__init__.py index 2525212..2ce2e58 100644 --- a/Fred/__init__.py +++ b/Fred/__init__.py @@ -18,8 +18,8 @@ config = Config() config.available_memory = available_memory -def _optimize_centers(self, curves, consecutive_call=False, distance_func = 0): - all_balls = self.compute_center_enclosing_balls(curves, consecutive_call, distance_func) +def _optimize_centers(self, curves, consecutive_call=False): + all_balls = self.compute_center_enclosing_balls(curves, consecutive_call) for i, center_balls in enumerate(all_balls): path, _ = _stabbing_path(center_balls) self[i] = Curve(path, "{} (optimized)".format(self[i].name)) diff --git a/README.md b/README.md index 79cc67d..4d4188e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Fred ![alt text](https://raw.githubusercontent.com/derohde/Fred/master/logo/logo.png "Fred logo") -A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves. +A fast, scalable and light-weight C++ Fréchet and DTW distance library, exposed to python and focused on clustering of polygonal curves. ### DISTANCE FUNCTION FOR CLUSTERING CAN BE CHOSEN (heuristic in case of DTW) ### CURVE AND CURVES CAN NOW BE PICKLED @@ -66,18 +66,21 @@ All simplifications are vertex-restricted! #### Underlying distance function -The variable `distance_func` controls which distance function to use. Possible values: +The parameter `distance_func` controls which distance function to use. Possible values: - `0`: continuous Fréchet distance - `1`: discrete Fréchet distance - `2`: discrete dynamic time warping distance (algorithms are then of heuristic nature) +#### Consecutive call option + +The parameter `consecutive_call` controls whether distances and simplifications already computed in a previous clustering call are resused (set to `true` then); defaults to `false`. + #### discrete (k,l)-center clustering (continuous Fréchet) - from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616) - signature: `fred.discrete_klcenter(int k, int l, fred.Curves, bool local_search, bool consecutive_call, bool random_first_center, bool fast_simplification, int distance_func)` with parameters - `k`: number of centers - `l`: maximum complexity of the centers - `local_search`: number of iterations of local search to improve solution, defaults to `0` - - `consecutive_call`: reuses distances and simplifications already computed in a previous call if `true`, defaults to `false` - `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true - `fast_simplification`: determines whether to use the minimum error simplification or the faster approximate minimum error simplification, defaults to `false` - returns: `fred.Clustering_Result` with mebers @@ -90,7 +93,6 @@ The variable `distance_func` controls which distance function to use. Possible v - signature: `fred.discrete_klmedian(int k, int l, fred.Curves, bool consecutive_call, bool fast_simplification, int distance_func)` with parameters - `k`: number of centers - `l`: maximum complexity of the centers - - `consecutive_call`: reuses distances and simplifications already computed in a previous call if `true`, defaults to `false` - `fast_simplification`: determines whether to use the minimum error simplification or the faster approximate minimum error simplification, defaults to `false` - returns: `fred.Clustering_Result` with mebers - `value`: objective value @@ -102,8 +104,8 @@ The variable `distance_func` controls which distance function to use. Possible v - methods: - `len(fred.Clustering_Result)`: number of centers - `fred.Clustering_Result[i]`: get ith center - - `fred.Clustering_Result.compute_assignment(fred.Curves, bool consecutive_call, int distance_func)`: assigns every curve to its nearest center with parameter `consecutive_call`, which defaults to `false`; set to true, if you want to assign the curves used for clustering - - `fred.Clustering_Result.optimize(fred.Curves, bool consecutive_call, int distance_func)`: (heuristically) optimizes cluster centers using a [stabbing algorithm](https://arxiv.org/abs/2212.01458) + - `fred.Clustering_Result.compute_assignment(fred.Curves, bool consecutive_call)`: assigns every curve to its nearest center with parameter `consecutive_call`, which defaults to `false`; set to true, if you want to assign the curves used for clustering + - `fred.Clustering_Result.optimize(fred.Curves, bool consecutive_call)`: (heuristically) optimizes cluster centers using a [stabbing algorithm](https://arxiv.org/abs/2212.01458) - members: - `value`: objective value - `time`: running-time @@ -115,7 +117,7 @@ The variable `distance_func` controls which distance function to use. Possible v - `len(fred.Cluster_Assignment)`: number of centers - `fred.Cluster_Assignment.count(i)`: number of curves assigned to center `i` - `fred.Cluster_Assignment.get(i,j)`: get index of `j`th curve assigned to center `i` - - `fred.Cluster_Assignment.distance(i,j)`: get distance of `j`th curve assigned to center `i` to center `i` (only available when distance matrix is used) + - `fred.Cluster_Assignment.distance(i,j)`: get distance of `j`th curve assigned to center `i` to center `i` ### Dimension Reduction via Gaussian Random Projection - [Section 2 in **Random Projections and Sampling Algorithms for Clustering of High Dimensional Polygonal Curves**](https://papers.nips.cc/paper/9443-random-projections-and-sampling-algorithms-for-clustering-of-high-dimensional-polygonal-curves) @@ -257,7 +259,7 @@ curves.add(fred.Curve([[-10.92366833, -37.04470333],[-10.92388667, -37.04459 ] curves.add(fred.Curve([[-10.89688234, -37.05339968],[-10.89689515, -37.05332973],[-10.89688042, -37.05333247],[-10.89688029, -37.05333484],[-10.89688058, -37.05333512],[-10.89688073, -37.05333528],[-10.8968805 , -37.05333564],[-10.89688045, -37.0533357 ],[-10.89688034, -37.05333584],[-10.89688022, -37.05333665],[-10.89687998, -37.05333665],[-10.8968798 , -37.05333672],[-10.8968771 , -37.05333475],[-10.89686571, -37.05335033],[-10.89686453, -37.05338245],[-10.89690625, -37.053412 ],[-10.89714799, -37.05345149],[-10.89731552, -37.05353814],[-10.89722363, -37.0537264 ],[-10.89712402, -37.05392281],[-10.89700272, -37.05391454],[-10.89669729, -37.05377559],[-10.89637283, -37.05373788],[-10.89628634, -37.05376364],[-10.89625468, -37.05380163],[-10.89626958, -37.05404301],[-10.89630975, -37.05439376],[-10.8963886 , -37.05509832],[-10.89644765, -37.05538955],[-10.8965611 , -37.05575682],[-10.89665346, -37.05613108],[-10.89677345, -37.05672517],[-10.89689319, -37.05728734],[-10.89692781, -37.05739169],[-10.89695023, -37.05753143],[-10.89702285, -37.05769891],[-10.8971346 , -37.057792 ],[-10.89734674, -37.05797843],[-10.89746565, -37.05809327],[-10.89745873, -37.05809087],[-10.89746601, -37.05809045],[-10.89746603, -37.05809043],[-10.89746598, -37.0580906 ],[-10.89746595, -37.05809065],[-10.89750503, -37.05812029],[-10.89776568, -37.05832533],[-10.89815091, -37.05824039],[-10.89867417, -37.05785587],[-10.89905788, -37.05755751],[-10.89965258, -37.05710931],[-10.90031391, -37.05666742],[-10.90072693, -37.05639989],[-10.90117312, -37.0560867 ],[-10.90167986, -37.05570044],[-10.90223155, -37.05532095],[-10.90267435, -37.05501594],[-10.90281205, -37.05492839],[-10.90281977, -37.05492466],[-10.9028111 , -37.05493056],[-10.90281184, -37.05493098],[-10.90281179, -37.05493125],[-10.90281184, -37.05493146],[-10.9028119 , -37.05493155],[-10.90281195, -37.05493173],[-10.9028117 , -37.05493193],[-10.90281944, -37.05492637],[-10.90318115, -37.05466438],[-10.90361849, -37.05433795],[-10.90395364, -37.05420787],[-10.90395573, -37.05453654],[-10.90394111, -37.05505263],[-10.90394185, -37.05587736],[-10.90392791, -37.05664783],[-10.90389758, -37.05750308],[-10.90388737, -37.05786664],[-10.90403 , -37.05799884],[-10.90431907, -37.05799646],[-10.90463524, -37.05800058],[-10.90492265, -37.0580025 ],[-10.90509223, -37.0580149 ],[-10.90513417, -37.05805725],[-10.90519558, -37.05826165],[-10.90527994, -37.05877507],[-10.90535515, -37.05906472],[-10.90619649, -37.05905899],[-10.90641173, -37.06018562],[-10.90644041, -37.06067098],[-10.90642897, -37.06086107],[-10.90641925, -37.06087587],[-10.90642255, -37.06088143],[-10.90642329, -37.06088235],[-10.90642264, -37.06088388],[-10.90642211, -37.06090121],[-10.90642028, -37.06112732],[-10.90649051, -37.06164138],[-10.90728268, -37.06183192],[-10.90753491, -37.06201037],[-10.90761894, -37.06208487],[-10.90763191, -37.06209038],[-10.90764293, -37.06208956],[-10.90764576, -37.06209575],[-10.90764926, -37.06209673],[-10.90764498, -37.06210186],[-10.90764444, -37.0621018 ],[-10.9076441 , -37.06210176],[-10.90764407, -37.06210173],[-10.90791968, -37.06228879],[-10.90840258, -37.06261833],[-10.90885022, -37.06293506],[-10.90941396, -37.06335066],[-10.91002845, -37.06379331],[-10.91021656, -37.06385412],[-10.91021107, -37.06382905],[-10.91020892, -37.0638263 ],[-10.91020903, -37.06382629],[-10.91020921, -37.06382624],[-10.91020941, -37.06382628],[-10.91020941, -37.06382636],[-10.91020942, -37.0638264 ],[-10.91020943, -37.06382641],[-10.91021067, -37.06382699],[-10.91051354, -37.06396407],[-10.91086507, -37.06407459],[-10.91112765, -37.06418145],[-10.91112402, -37.06418525],[-10.91112739, -37.064179 ],[-10.91113069, -37.06417743],[-10.91113068, -37.06417746],[-10.91113069, -37.06417745],[-10.91113068, -37.06417741],[-10.91113062, -37.06417743],[-10.91113027, -37.06417782],[-10.91113022, -37.06417796],[-10.91113014, -37.06417815],[-10.91113018, -37.06417815],[-10.9111301 , -37.06417819],[-10.91113003, -37.06417818],[-10.91113018, -37.06417821],[-10.91121402, -37.06421624],[-10.91152921, -37.06426605],[-10.91195342, -37.06430617],[-10.91243864, -37.06431631],[-10.91301115, -37.06431624],[-10.91373203, -37.06430726],[-10.9143199 , -37.06424356],[-10.91526178, -37.06407409],[-10.91568718, -37.06396253],[-10.91601655, -37.06391468],[-10.91614016, -37.06400304],[-10.91613907, -37.06426894],[-10.91611837, -37.0645705 ],[-10.91620218, -37.06472138],[-10.91657107, -37.0648288 ],[-10.9170171 , -37.06493571],[-10.91728016, -37.06502682],[-10.91734995, -37.06509569],[-10.91735968, -37.06534346],[-10.91731253, -37.06557038],[-10.91730704, -37.06556575],[-10.91730599, -37.06555167]])) clustering = fred.discrete_klcenter(6, 8, curves, local_search = 10, fast_simplification=True) -clustering.optimize_centers(curves) +clustering.optimize_centers(curves, consecutive_call = True) -fred.plot_clustering(clustering, curves) +fred.plot_clustering(clustering, curves, vertex_markings=False) ``` diff --git a/include/clustering.hpp b/include/clustering.hpp index 7b28c0a..0620750 100644 --- a/include/clustering.hpp +++ b/include/clustering.hpp @@ -32,59 +32,70 @@ namespace py = pybind11; namespace Clustering { struct Distance_Matrix; +struct Cluster_Assignment; extern Distance_Matrix distances; extern Curves simplifications; -extern bool use_distance_matrix; struct Distance_Matrix : public std::vector { Distance_Matrix() = default; Distance_Matrix(const curve_number_t n, const curve_number_t m) : std::vector(n) { - for (curve_number_t i = 0; i < m; ++i) { + for (curve_number_t i = 0; i < n; ++i) { operator[](i).reserve(m); - std::generate_n(std::back_inserter(operator[](i)), m, [] { return std::make_unique(); }); + std::generate_n(std::back_inserter(operator[](i)), m, [] { return std::make_unique(); }); } } void print() const; }; -struct Cluster_Assignment : public std::vector { - explicit Cluster_Assignment(const curve_number_t k = 0) : std::vector(k, Curve_Numbers()) {} - curve_number_t count(const curve_number_t) const; - curve_number_t get(const curve_number_t, const curve_number_t) const; - distance_t distance(const curve_number_t, const curve_number_t) const; -}; - struct Clustering_Result { Curves centers; distance_t value; double running_time; - Cluster_Assignment assignment; - Curve& get(const curve_number_t); + explicit Clustering_Result(const unsigned int distance_func = 0) : distance_func{distance_func} {} + const Curve& get(const curve_number_t) const; + Cluster_Assignment& get_assignment() const; void set(const curve_number_t, const Curve&); curve_number_t size() const; Curves::const_iterator cbegin() const; Curves::const_iterator cend() const; - void compute_assignment(const Curves&, const bool = false, const unsigned int distance_func = 0); + void compute_assignment(const Curves&, const bool = false); void set_center_indices(const Curve_Numbers&); - py::list compute_center_enclosing_balls(const Curves&, const bool, const unsigned int); + py::list compute_center_enclosing_balls(const Curves&, const bool); + private: Curve_Numbers center_indices; + unsigned int distance_func; + std::unique_ptr assignment; +}; + +struct Cluster_Assignment : public std::vector { + explicit Cluster_Assignment(const Clustering_Result &cr, const Curves &ac, const unsigned int distance_func) : + std::vector(cr.size(), Curve_Numbers()), clustering_result{cr}, assignment_curves{ac}, distance_func{distance_func} {} + + curve_number_t count(const curve_number_t) const; + curve_number_t get(const curve_number_t, const curve_number_t) const; + distance_t distance(const curve_number_t, const curve_number_t) const; + +private: + const Clustering_Result &clustering_result; + const Curves &assignment_curves; + const unsigned int distance_func; }; inline distance_t _cheap_dist(const curve_number_t i, const curve_number_t j, const Curves &in, const Curves &simplified_in, Distance_Matrix &distances, const unsigned int distance_func) { - if (use_distance_matrix) { - if (not distances[i][j]) { + if (Config::use_distance_matrix) { + if (not *distances[i][j]) { switch (distance_func) { case 0: - distances[i][j] = std::make_unique(Frechet::Continuous::distance(in[i], simplified_in[j])); + distances[i][j] = std::make_unique(Frechet::Continuous::distance(in[i], simplified_in[j])); break; case 1: - distances[i][j] = std::make_unique(Frechet::Discrete::distance(in[i], simplified_in[j])); + distances[i][j] = std::make_unique(Frechet::Discrete::distance(in[i], simplified_in[j])); break; case 2: - distances[i][j] = std::make_unique(Dynamic_Time_Warping::Discrete::distance(in[i], simplified_in[j])); + distances[i][j] = std::make_unique(Dynamic_Time_Warping::Discrete::distance(in[i], simplified_in[j])); break; } } @@ -106,13 +117,14 @@ inline distance_t _cheap_dist(const curve_number_t i, const curve_number_t j, co inline curve_number_t _nearest_center(const curve_number_t i, const Curves &in, const Curves &simplified_in, const Curve_Numbers ¢ers, Distance_Matrix &distances, const unsigned int distance_func) { const distance_t infty = std::numeric_limits::infinity(); // cost for curve is infinity - distance_t min_cost = infty; + distance_t min_cost = infty, curr_cost; curve_number_t nearest = 0; // except there is a center with smaller cost, then choose the one with smallest cost for (curve_number_t j = 0; j < centers.size(); ++j) { - if (_cheap_dist(i, centers[j], in, simplified_in, distances, distance_func) < min_cost) { - min_cost = _cheap_dist(i, centers[j], in, simplified_in, distances, distance_func); + curr_cost = _cheap_dist(i, centers[j], in, simplified_in, distances, distance_func); + if (curr_cost < min_cost) { + min_cost = curr_cost; nearest = j; } } @@ -128,7 +140,7 @@ inline distance_t _center_cost_sum(const Curves &in, const Curves &simplified_in // for all curves for (curve_number_t i = 0; i < in.size(); ++i) { - const auto min_cost_elem = _curve_cost(i, in, simplified_in, centers, distances, distance_func); + const distance_t min_cost_elem = _curve_cost(i, in, simplified_in, centers, distances, distance_func); cost += min_cost_elem; } return cost; diff --git a/include/coreset.hpp b/include/coreset.hpp index b885cd7..f3df7b1 100644 --- a/include/coreset.hpp +++ b/include/coreset.hpp @@ -33,16 +33,16 @@ class Median_Coreset { inline Median_Coreset(const curve_number_t k, curve_size_t ell, const Curves &in, const parameter_t epsilon, const distance_t constant = 1) : in{in}, k{k}, ell{ell}, epsilon{epsilon}, constant{constant}, cluster_costs(k, 0), cluster_sizes(k, 0), lambda(in.size()), Lambda{2*k + 12*std::sqrt(k) + 18}, probabilities(in.size()), c_approx{Clustering::kl_median(k, ell, in)} { c_approx.compute_assignment(in); for (curve_number_t i = 0; i < k; ++i) { - for (curve_number_t j = 0; j < c_approx.assignment.count(i); ++j) { - cluster_costs[i] += Frechet::Continuous::distance(in[c_approx.assignment.get(i, j)], c_approx.centers[i]).value; + for (curve_number_t j = 0; j < c_approx.get_assignment().count(i); ++j) { + cluster_costs[i] += Frechet::Continuous::distance(in[c_approx.get_assignment().get(i, j)], c_approx.centers[i]).value; cluster_sizes[i]++; } } for (curve_number_t i = 0; i < k; ++i) { - for (curve_number_t j = 0; j < c_approx.assignment.count(i); ++j) { - lambda[c_approx.assignment.get(i, j)] = (1+ std::sqrt(2*k/18)) * (6 * Frechet::Continuous::distance(in[c_approx.assignment.get(i, j)], c_approx.centers[i]).value / c_approx.value + 6 * cluster_costs[i] / (c_approx.value * cluster_sizes[i])) + (1 + std::sqrt(18/(2*k))) * 2 / cluster_sizes[i]; - probabilities[c_approx.assignment.get(i, j)] = (lambda[c_approx.assignment.get(i, j)]) / Lambda; + for (curve_number_t j = 0; j < c_approx.get_assignment().count(i); ++j) { + lambda[c_approx.get_assignment().get(i, j)] = (1+ std::sqrt(2*k/18)) * (6 * Frechet::Continuous::distance(in[c_approx.get_assignment().get(i, j)], c_approx.centers[i]).value / c_approx.value + 6 * cluster_costs[i] / (c_approx.value * cluster_sizes[i])) + (1 + std::sqrt(18/(2*k))) * 2 / cluster_sizes[i]; + probabilities[c_approx.get_assignment().get(i, j)] = (lambda[c_approx.get_assignment().get(i, j)]) / Lambda; } } diff --git a/setup.py b/setup.py index fffc389..e2577a2 100644 --- a/setup.py +++ b/setup.py @@ -80,10 +80,10 @@ def build_extension(self, ext): setup( name='Fred-Frechet', - version='1.13.1', + version='1.14', author='Dennis Rohde', author_email='dennis.rohde@tu-dortmund.de', - description='A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.', + description='A fast, scalable and light-weight C++ Fréchet and DTW distance library, exposed to python and focused on clustering of polygonal curves.', long_description=open('README.md').read(), long_description_content_type='text/markdown', url="http://fred.dennisrohde.work", diff --git a/src/clustering.cpp b/src/clustering.cpp index dae4ec9..f05613f 100644 --- a/src/clustering.cpp +++ b/src/clustering.cpp @@ -16,7 +16,6 @@ namespace Clustering { Distance_Matrix distances; Curves simplifications; -bool use_distance_matrix; void Distance_Matrix::print() const { for (const auto &row : *this) { @@ -27,10 +26,14 @@ void Distance_Matrix::print() const { } } -Curve& Clustering_Result::get(const curve_number_t i) { +const Curve& Clustering_Result::get(const curve_number_t i) const { return centers[i]; } +Cluster_Assignment& Clustering_Result::get_assignment() const { + return *assignment; +} + void Clustering_Result::set(const curve_number_t i, const Curve &curve) { centers[i] = curve; } @@ -47,17 +50,22 @@ Curves::const_iterator Clustering_Result::cend() const { return centers.cend(); } -void Clustering_Result::compute_assignment(const Curves &in, const bool consecutive_call, const unsigned int distance_func) { +void Clustering_Result::compute_assignment(const Curves &in, const bool consecutive_call) { if (Config::verbosity > 0) py::print("Clustering Result: computing assignment"); - assignment = Cluster_Assignment(centers.size()); + assignment = std::make_unique(*this, in, distance_func); if (consecutive_call and in.size() == distances.size()) { - for (curve_number_t i = 0; i < in.size(); ++i) assignment[_nearest_center(i, in, simplifications, center_indices, distances, distance_func)].push_back(i); + for (curve_number_t i = 0; i < in.size(); ++i) assignment->operator[](_nearest_center(i, in, simplifications, center_indices, distances, distance_func)).push_back(i); } else { - if (use_distance_matrix) distances = Distance_Matrix(in.size(), centers.size()); - auto ncenter_indices = Curve_Numbers(centers.size()); - std::iota(ncenter_indices.begin(), ncenter_indices.end(), 0); - for (curve_number_t i = 0; i < in.size(); ++i) - assignment[_nearest_center(i, in, centers, ncenter_indices, distances, distance_func)].push_back(i); + if (Config::use_distance_matrix) distances = Distance_Matrix(in.size(), centers.size()); + + Curve_Numbers ncenter_indices = Curve_Numbers(centers.size()); + + for (curve_number_t i = 0; i < centers.size(); ++i) + ncenter_indices[i] = i; + + for (curve_number_t i = 0; i < in.size(); ++i) { + assignment->operator[](_nearest_center(i, in, centers, ncenter_indices, distances, distance_func)).push_back(i); + } } } @@ -65,12 +73,12 @@ void Clustering_Result::set_center_indices(const Curve_Numbers &pcenter_indices) center_indices = pcenter_indices; } -py::list Clustering_Result::compute_center_enclosing_balls(const Curves &in, const bool consecutive_call, const unsigned int distance_func) { +py::list Clustering_Result::compute_center_enclosing_balls(const Curves &in, const bool consecutive_call) { if (Config::verbosity > 1) py::print("Clustering Result: computing enclosing balls"); py::list result; - compute_assignment(in, consecutive_call, distance_func); + compute_assignment(in, consecutive_call); std::vector> center_matching_points; @@ -81,23 +89,23 @@ py::list Clustering_Result::compute_center_enclosing_balls(const Curves &in, con for (curve_number_t i = 0; i < size(); ++i) { if (Config::verbosity > 2) py::print("Clustering Result: computing points for center ", i); py::list center_list; - for (curve_number_t j = 0; j < assignment[i].size(); ++j) { + for (curve_number_t j = 0; j < (*assignment)[i].size(); ++j) { Points tpoints(get(i).dimensions()); switch (distance_func) { case 0: - if (use_distance_matrix) tpoints = Frechet::Continuous::vertices_matching_points(get(i), in[assignment[i][j]], dynamic_cast(*distances[assignment[i][j]][i])); + if (Config::use_distance_matrix) tpoints = Frechet::Continuous::vertices_matching_points(get(i), in[(*assignment)[i][j]], dynamic_cast(*distances[(*assignment)[i][j]][i])); else { - Frechet::Continuous::Distance curr_dist = Frechet::Continuous::distance(get(i), in[assignment[i][j]]); - tpoints = Frechet::Continuous::vertices_matching_points(get(i), in[assignment[i][j]], curr_dist); + Frechet::Continuous::Distance curr_dist = Frechet::Continuous::distance(get(i), in[(*assignment)[i][j]]); + tpoints = Frechet::Continuous::vertices_matching_points(get(i), in[(*assignment)[i][j]], curr_dist); } break; case 1: break; case 2: - if (use_distance_matrix) tpoints = Dynamic_Time_Warping::Discrete::vertices_matching_points(get(i), in[assignment[i][j]], dynamic_cast(*distances[assignment[i][j]][i])); + if (Config::use_distance_matrix) tpoints = Dynamic_Time_Warping::Discrete::vertices_matching_points(get(i), in[(*assignment)[i][j]], dynamic_cast(*distances[(*assignment)[i][j]][i])); else { - Dynamic_Time_Warping::Discrete::Distance curr_dist = Dynamic_Time_Warping::Discrete::distance(get(i), in[assignment[i][j]]); - tpoints = Dynamic_Time_Warping::Discrete::vertices_matching_points(get(i), in[assignment[i][j]], curr_dist); + Dynamic_Time_Warping::Discrete::Distance curr_dist = Dynamic_Time_Warping::Discrete::distance(get(i), in[(*assignment)[i][j]]); + tpoints = Dynamic_Time_Warping::Discrete::vertices_matching_points(get(i), in[(*assignment)[i][j]], curr_dist); } break; default: @@ -149,36 +157,45 @@ curve_number_t Cluster_Assignment::get(const curve_number_t i, const curve_numbe } distance_t Cluster_Assignment::distance(const curve_number_t i, const curve_number_t j) const { - if (use_distance_matrix) return distances[i][j]->value; - else return std::numeric_limits::signaling_NaN(); + if (Config::use_distance_matrix) return distances[i][j]->value; + else { + switch (distance_func) { + case 0: + return Frechet::Continuous::distance(clustering_result.get(i), assignment_curves[(*this)[i][j]]).value; + case 1: + return Frechet::Discrete::distance(clustering_result.get(i), assignment_curves[(*this)[i][j]]).value; + case 2: + return Dynamic_Time_Warping::Discrete::distance(clustering_result.get(i), assignment_curves[(*this)[i][j]]).value; + default: + return std::numeric_limits::signaling_NaN(); + } + } } - Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, unsigned int local_search = 0, const bool median = false, const bool consecutive_call = false, const bool random_start_center = true, const bool fast_simplification = false, const unsigned int distance_func = 0) { const auto start = std::clock(); - Clustering_Result result; + Clustering_Result result(distance_func); if (in.empty()) return result; std::size_t memory_distance_matrix = std::pow(in.size(), 2) * (sizeof(distance_t) + 3 * sizeof(double) + ell * in.get_m() * 2 * sizeof(curve_number_t) + sizeof(std::size_t)), memory_available = .666 * Config::available_memory; - bool use_distance_matrix = Config::use_distance_matrix; - - if (memory_distance_matrix > memory_available and use_distance_matrix == true) { + + if (memory_distance_matrix > memory_available and Config::use_distance_matrix == true) { py::print("KL_CLUST: WARNING distance preprocessing requires more memory (", memory_distance_matrix * 1e-9, "GB) than available (", memory_available * 1e-9, "GB), consecutive_call will NOT be available"); - use_distance_matrix = Config::use_distance_matrix; + Config::use_distance_matrix = false; } if (not consecutive_call) { - if (use_distance_matrix) { + if (Config::use_distance_matrix) { if (Config::verbosity > 0) py::print("KL_CLUST: allocating ", in.size(), " x ", in.size(), " distance_matrix"); distances = Distance_Matrix(in.size(), in.size()); } if (Config::verbosity > 0) py::print("KL_CLUST: allocating space for ", in.size(), " simplifications, each of complexity ", ell); simplifications = Curves(in.size(), ell, in.dimensions()); - } else if (use_distance_matrix) { + } else if (Config::use_distance_matrix) { if (distances.empty()) { py::print("WARNING: consecutive_call is used wrongly"); if (Config::verbosity > 0) py::print("KL_CLUST: allocating ", in.size(), " x ", in.size(), " distance_matrix"); @@ -236,9 +253,8 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_ } if (Config::verbosity > 0) py::print("KL_CLUST: first center is ", centers[0]); - distance_t curr_maxdist = 0; + distance_t curr_curve_cost, curr_maxdist = 0; curve_number_t curr_maxcurve = 0; - distance_t curr_curve_cost; if (Config::verbosity > 0) py::print("KL_CLUST: computing remaining centers"); { @@ -262,6 +278,7 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_ } if (Config::verbosity > 0) py::print("KL_CLUST: center ", i + 1, " is curve ", curr_maxcurve); + if (Config::verbosity > 0) py::print("KL_CLUST: current cost is ", curr_maxdist); if (simplifications[curr_maxcurve].empty()) { if (Config::verbosity > 0) py::print("KL_CLUST: computing simplification of ", curr_maxcurve); @@ -276,8 +293,8 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_ if (local_search > 0) { - auto curr_centers = centers; - auto cost = curr_maxdist, curr_cost = cost; + Curve_Numbers curr_centers = centers; + distance_t cost = curr_maxdist, curr_cost = cost; if (Config::verbosity > 0) py::print("KL_CLUST: starting local search for k-center objective for ", local_search, " iterations"); @@ -317,11 +334,11 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_ if (median) { if (Config::verbosity > 0) py::print("KL_CLUST: computing k-median cost"); - auto cost = _center_cost_sum(in, simplifications, centers, distances, distance_func), approxcost = cost, curr_cost = cost; + distance_t cost = _center_cost_sum(in, simplifications, centers, distances, distance_func), approxcost = cost, curr_cost = cost; if (Config::verbosity > 0) py::print("KL_CLUST: k-median cost is ", cost); distance_t gamma = 1/(10 * num_centers); - auto found = true; - auto curr_centers = centers; + bool found = true; + Curve_Numbers curr_centers = centers; if (Config::verbosity > 0) py::print("KL_CLUST: starting k-median local search"); // try to improve current solution @@ -362,8 +379,9 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_ curr_maxdist = cost; } - Curves simpl_centers; - for (const auto center: centers) simpl_centers.push_back(simplifications[center]); + Curves simpl_centers(centers.size(), ell, in.dimensions()); + + for (curve_number_t i = 0; i < centers.size(); ++i) simpl_centers[i] = simplifications[centers[i]]; const auto end = std::clock(); result.centers = simpl_centers; diff --git a/src/fred_python_wrapper.cpp b/src/fred_python_wrapper.cpp index 6b2431e..ba8fe72 100644 --- a/src/fred_python_wrapper.cpp +++ b/src/fred_python_wrapper.cpp @@ -177,12 +177,12 @@ PYBIND11_MODULE(backend, m) { py::class_(m, "Clustering_Result") .def_readwrite("value", &Clustering::Clustering_Result::value) .def_readwrite("time", &Clustering::Clustering_Result::running_time) - .def_readwrite("assignment", &Clustering::Clustering_Result::assignment) + .def_property_readonly("assignment", &Clustering::Clustering_Result::get_assignment, py::return_value_policy::reference) .def("__getitem__", &Clustering::Clustering_Result::get, py::return_value_policy::reference) .def("__setitem__", &Clustering::Clustering_Result::set) .def("__len__", &Clustering::Clustering_Result::size) .def("__iter__", [](Clustering::Clustering_Result &v) { return py::make_iterator(v.cbegin(), v.cend()); }, py::keep_alive<0, 1>()) - .def("compute_assignment", &Clustering::Clustering_Result::compute_assignment, py::arg("curves"), py::arg("consecutive_call") = false, py::arg("distance_func") = 0) + .def("compute_assignment", &Clustering::Clustering_Result::compute_assignment, py::arg("curves"), py::arg("consecutive_call") = false) .def("compute_center_enclosing_balls", &Clustering::Clustering_Result::compute_center_enclosing_balls) ;