Skip to content

Commit

Permalink
DTW DBA
Browse files Browse the repository at this point in the history
  • Loading branch information
Dennis Rohde committed Aug 3, 2023
1 parent c5cafc4 commit 6ce4cab
Show file tree
Hide file tree
Showing 14 changed files with 208 additions and 131 deletions.
14 changes: 8 additions & 6 deletions Fred/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@
config = Config()
config.available_memory = available_memory

def _optimize_centers(self, curves, consecutive_call=False):
all_balls = self.compute_center_enclosing_balls(curves, False)
def _optimize_centers(self, curves, consecutive_call=False, distance_func = 0):
if self.optimize_called:
return
all_balls = self.compute_center_enclosing_balls(curves, consecutive_call, distance_func)
for i, center_balls in enumerate(all_balls):
path, _ = _stabbing_path(center_balls)
self[i] = Curve(path, "{} (optimized)".format(self[i].name))
self.optimize_called = True

Clustering_Result.optimize_centers = _optimize_centers
Clustering_Result.optimize_called = False

def plot_curve(*curves, vertex_markings=True, savename=None, saveextension=None, return_fig=False, legend=True):
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -100,8 +104,7 @@ def plot_curve(*curves, vertex_markings=True, savename=None, saveextension=None,
elif return_fig:
return fig
else:
plt.show()
plt.close()
plt.show(block=False)

def plot_clustering(clustering_result, curves, vertex_markings=True, savename=None, saveextension=None, return_fig=False, legend=True):
if not (isinstance(clustering_result, backend.Clustering_Result) and isinstance(curves, backend.Curves)):
Expand Down Expand Up @@ -166,5 +169,4 @@ def plot_clustering(clustering_result, curves, vertex_markings=True, savename=No
elif return_fig:
return fig
else:
plt.show()
plt.close()
plt.show(block=False)
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ The variable `distance_func` controls which distance function to use. Possible v

#### discrete (k,l)-center clustering (continuous Fréchet)
- from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616)
- signature: `fred.discrete_klcenter(k, l, curves, local_search, consecutive_call, random_first_center, fast_simplification, distance_func)` with parameters
- signature: `fred.discrete_klcenter(int k, int l, fred.Curves, bool local_search, bool consecutive_call, bool random_first_center, bool fast_simplification, int distance_func)` with parameters
- `k`: number of centers
- `l`: maximum complexity of the centers
- `local_search`: number of iterations of local search to improve solution, defaults to `0`
Expand All @@ -87,7 +87,7 @@ The variable `distance_func` controls which distance function to use. Possible v

#### discrete (k,l)-median clustering (continuous Fréchet)
- Algorithm from section 4.3 in [**Geometric Approximation Algorithms**](http://www.ams.org/books/surv/173/) + simplification
- signature: `fred.discrete_klmedian(k, l, curves, consecutive_call, fast_simplification, distance_func)` with parameters
- signature: `fred.discrete_klmedian(int k, int l, fred.Curves, bool consecutive_call, bool fast_simplification, int distance_func)` with parameters
- `k`: number of centers
- `l`: maximum complexity of the centers
- `consecutive_call`: reuses distances and simplifications already computed in a previous call if `true`, defaults to `false`
Expand All @@ -102,8 +102,8 @@ The variable `distance_func` controls which distance function to use. Possible v
- methods:
- `len(fred.Clustering_Result)`: number of centers
- `fred.Clustering_Result[i]`: get ith center
- `fred.Clustering_Result.compute_assignment(fred.Curves, bool consecutive_call, distance_func)`: assigns every curve to its nearest center with parameter `consecutive_call`, which defaults to `false`; set to true, if you want to assign the curves used for clustering
- `fred.Clustering_Result.optimize(fred.Curves, bool consecutive_call)`: (heuristically) optimizes cluster centers using a [stabbing algorithm](https://arxiv.org/abs/2212.01458)
- `fred.Clustering_Result.compute_assignment(fred.Curves, bool consecutive_call, int distance_func)`: assigns every curve to its nearest center with parameter `consecutive_call`, which defaults to `false`; set to true, if you want to assign the curves used for clustering
- `fred.Clustering_Result.optimize(fred.Curves, bool consecutive_call, int distance_func)`: (heuristically) optimizes cluster centers using a [stabbing algorithm](https://arxiv.org/abs/2212.01458)
- members:
- `value`: objective value
- `time`: running-time
Expand All @@ -115,7 +115,7 @@ The variable `distance_func` controls which distance function to use. Possible v
- `len(fred.Cluster_Assignment)`: number of centers
- `fred.Cluster_Assignment.count(i)`: number of curves assigned to center `i`
- `fred.Cluster_Assignment.get(i,j)`: get index of `j`th curve assigned to center `i`
- `fred.Cluster_Assignment.distance(i,j)`: get distance of `j`th curve assigned to center `i` to center `i`
- `fred.Cluster_Assignment.distance(i,j)`: get distance of `j`th curve assigned to center `i` to center `i` (only available when distance matrix is used)

### Dimension Reduction via Gaussian Random Projection
- [Section 2 in **Random Projections and Sampling Algorithms for Clustering of High Dimensional Polygonal Curves**](https://papers.nips.cc/paper/9443-random-projections-and-sampling-algorithms-for-clustering-of-high-dimensional-polygonal-curves)
Expand Down
24 changes: 15 additions & 9 deletions include/clustering.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI

#include <unordered_map>
#include <cmath>
#include <memory>

#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
Expand All @@ -38,7 +39,12 @@ extern bool use_distance_matrix;

struct Distance_Matrix : public std::vector<Distances> {
Distance_Matrix() = default;
Distance_Matrix(const curve_number_t n, const curve_number_t m) : std::vector<Distances>(n, Distances(m, -1.0)) {}
Distance_Matrix(const curve_number_t n, const curve_number_t m) : std::vector<Distances>(n) {
for (curve_number_t i = 0; i < m; ++i) {
operator[](i).reserve(m);
std::generate_n(std::back_inserter(operator[](i)), m, [] { return std::make_unique<PDistance>(); });
}
}
void print() const;
};

Expand All @@ -62,27 +68,27 @@ struct Clustering_Result {
Curves::const_iterator cend() const;
void compute_assignment(const Curves&, const bool = false, const unsigned int distance_func = 0);
void set_center_indices(const Curve_Numbers&);
py::list compute_center_enclosing_balls(const Curves&, const bool);
py::list compute_center_enclosing_balls(const Curves&, const bool, const unsigned int);
private:
Curve_Numbers center_indices;
};

inline distance_t _cheap_dist(const curve_number_t i, const curve_number_t j, const Curves &in, const Curves &simplified_in, Distance_Matrix &distances, const unsigned int distance_func) {
if (use_distance_matrix) {
if (distances[i][j] < 0) {
if (not distances[i][j]) {
switch (distance_func) {
case 0:
distances[i][j] = Frechet::Continuous::distance(in[i], simplified_in[j]).value;
distances[i][j] = std::make_unique<Frechet::Continuous::Distance>(Frechet::Continuous::distance(in[i], simplified_in[j]));
break;
case 1:
distances[i][j] = Frechet::Discrete::distance(in[i], simplified_in[j]).value;
distances[i][j] = std::make_unique<Frechet::Discrete::Distance>(Frechet::Discrete::distance(in[i], simplified_in[j]));
break;
case 2:
distances[i][j] = Dynamic_Time_Warping::Discrete::distance(in[i], simplified_in[j]).value;
distances[i][j] = std::make_unique<Dynamic_Time_Warping::Discrete::Distance>(Dynamic_Time_Warping::Discrete::distance(in[i], simplified_in[j]));
break;
}
}
return distances[i][j];
return distances[i][j]->value;
} else {
switch (distance_func) {
case 0:
Expand All @@ -98,9 +104,9 @@ inline distance_t _cheap_dist(const curve_number_t i, const curve_number_t j, co
}

inline curve_number_t _nearest_center(const curve_number_t i, const Curves &in, const Curves &simplified_in, const Curve_Numbers &centers, Distance_Matrix &distances, const unsigned int distance_func) {
const auto infty = std::numeric_limits<distance_t>::infinity();
const distance_t infty = std::numeric_limits<distance_t>::infinity();
// cost for curve is infinity
auto min_cost = infty;
distance_t min_cost = infty;
curve_number_t nearest = 0;

// except there is a center with smaller cost, then choose the one with smallest cost
Expand Down
4 changes: 2 additions & 2 deletions include/coreset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ class Median_Coreset {
parameter_t epsilon;
distance_t constant;
Clustering::Clustering_Result c_approx;
Distances cluster_costs;
std::vector<distance_t> cluster_costs;
Curve_Numbers cluster_sizes;
Curve_Numbers coreset;
Distances lambda;
std::vector<distance_t> lambda;
Parameters probabilities;

public:
Expand Down
13 changes: 9 additions & 4 deletions include/dynamic_time_warping.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,25 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
#include "interval.hpp"
#include "curve.hpp"
#include "random.hpp"
#include "config.hpp"

namespace Dynamic_Time_Warping {

namespace Discrete {

struct Distance {
distance_t value;
double time;
struct Distance : public PDistance {
explicit operator bool() const {
return true;
}

std::string repr() const;

std::vector<std::pair<curve_number_t, curve_number_t>> matching;
};

Points vertices_matching_points(const Curve&, const Curve&, Distance&);

Distance distance(const Curve&, const Curve&);
Distance distance_randomized(const Curve&, const Curve&);
}

}
20 changes: 12 additions & 8 deletions include/frechet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,21 @@ namespace Continuous {

extern distance_t error;

struct Distance {
distance_t value;
struct Distance : public PDistance {
explicit operator bool() const {
return true;
}

std::string repr() const;

double time_searches;
double time_bounds;
std::size_t number_searches;

std::string repr() const;
};

Distance distance(const Curve&, const Curve&);

Points vertices_matching_points(const Curve&, const Curve&, const distance_t);
Points vertices_matching_points(const Curve&, const Curve&, Distance&);

Distance _distance(const Curve&, const Curve&, distance_t, distance_t);

Expand All @@ -47,9 +50,10 @@ namespace Continuous {
}
namespace Discrete {

struct Distance {
distance_t value;
double time;
struct Distance : public PDistance {
explicit operator bool() const {
return true;
}

std::string repr() const;
};
Expand Down
2 changes: 1 addition & 1 deletion include/simplification.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace Simplification {
class Subcurve_Shortcut_Graph {

Curve& curve;
std::vector<Distances> edges;
std::vector<std::vector<distance_t>> edges;

public:

Expand Down
33 changes: 32 additions & 1 deletion include/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
#include <vector>
#include <cmath>
#include <limits>
#include <string>
#include <sstream>
#include <memory>

typedef double distance_t; // Distances
typedef double coordinate_t; // Coordinates
Expand All @@ -26,10 +29,38 @@ class Point;
class Curve;
class Interval;

struct PDistance {

PDistance() {};
PDistance(distance_t value) : value{value} {};

explicit virtual operator bool() const {
return false;
}

operator distance_t() const {
return value;
}

bool operator<(const PDistance &other) const {
return value < other.value;
}

virtual std::string repr() const {
std::stringstream ss;
ss << "Prototype of distance";
return ss.str();
}

distance_t value = std::numeric_limits<distance_t>::signaling_NaN();
double time = 0;

};

using Vector = Point;
using Intervals = std::vector<Interval>;
using Coordinates = std::vector<coordinate_t>;
using Distances = std::vector<distance_t>;
using Distances = std::vector<std::unique_ptr<PDistance>>;
using Curve_Numbers = std::vector<curve_number_t>;
using Parameters = std::vector<parameter_t>;

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def build_extension(self, ext):

setup(
name='Fred-Frechet',
version='1.12.4',
version='1.13',
author='Dennis Rohde',
author_email='[email protected]',
description='A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.',
Expand Down
Loading

0 comments on commit 6ce4cab

Please sign in to comment.