Skip to content

Commit

Permalink
do not use distance_matrix if not enough memory available
Browse files Browse the repository at this point in the history
  • Loading branch information
Dennis Rohde committed May 8, 2023
1 parent 3da00c9 commit 375784d
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 13 deletions.
6 changes: 6 additions & 0 deletions Fred/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@
from .backend import *
from .stabbing import stabbing_path as _stabbing_path

import psutil

virtual_memory = psutil.virtual_memory()
available_memory = virtual_memory.available

config = Config()
config.available_memory = available_memory

def _optimize_centers(self, curves, consecutive_call=False):
all_balls = self.compute_center_enclosing_balls(curves, False)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ All simplifications are vertex-restricted!

#### discrete (k,l)-center clustering (continuous Fréchet)
- from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616)
- signature: `fred.discrete_klcenter(k, l, curves, distances, random_first_center, fast_simplification)` with parameters
- signature: `fred.discrete_klcenter(k, l, curves, local_search, consecutive_call, random_first_center, fast_simplification)` with parameters
- `k`: number of centers
- `l`: maximum complexity of the centers
- `local_search`: number of iterations of local search to improve solution, defaults to `0`
Expand All @@ -74,7 +74,7 @@ All simplifications are vertex-restricted!

#### discrete (k,l)-median clustering (continuous Fréchet)
- Algorithm from section 4.3 in [**Geometric Approximation Algorithms**](http://www.ams.org/books/surv/173/) + simplification
- signature: `fred.discrete_klmedian(k, l, curves, distances, fast_simplification)` with parameters
- signature: `fred.discrete_klmedian(k, l, curves, consecutive_call, fast_simplification)` with parameters
- `k`: number of centers
- `l`: maximum complexity of the centers
- `consecutive_call`: reuses distances and simplifications already computed in a previous call if `true`, defaults to `false`
Expand Down
14 changes: 9 additions & 5 deletions include/clustering.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
#pragma once

#include <unordered_map>
#include <cmath>

#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
Expand All @@ -33,6 +34,7 @@ struct Distance_Matrix;

extern Distance_Matrix distances;
extern Curves simplifications;
extern bool use_distance_matrix;

struct Distance_Matrix : public std::vector<Distances> {
Distance_Matrix() = default;
Expand Down Expand Up @@ -66,11 +68,13 @@ struct Clustering_Result {
};

inline distance_t _cheap_dist(const curve_number_t i, const curve_number_t j, const Curves &in, const Curves &simplified_in, Distance_Matrix &distances) {
if (distances[i][j] < 0) {
const auto dist = Frechet::Continuous::distance(in[i], simplified_in[j]);
distances[i][j] = dist.value;
}
return distances[i][j];
if (use_distance_matrix) {
if (distances[i][j] < 0) {
const auto dist = Frechet::Continuous::distance(in[i], simplified_in[j]);
distances[i][j] = dist.value;
}
return distances[i][j];
} else return Frechet::Continuous::distance(in[i], simplified_in[j]).value;
}

inline curve_number_t _nearest_center(const curve_number_t i, const Curves &in, const Curves &simplified_in, const Curve_Numbers &centers, Distance_Matrix &distances) {
Expand Down
5 changes: 4 additions & 1 deletion include/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI

#pragma once

#include <cstddef>

namespace Config {

class Config{};
struct Config{};

extern std::size_t available_memory;
extern unsigned int verbosity;
extern bool mp_dynamic;
extern int number_threads;
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def build_extension(self, ext):

setup(
name='Fred-Frechet',
version='1.10.10',
version='1.11',
author='Dennis Rohde',
author_email='[email protected]',
description='A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.',
Expand All @@ -90,7 +90,7 @@ def build_extension(self, ext):
packages=setuptools.find_packages(),
ext_package="Fred",
ext_modules=[CMakeExtension('backend')],
install_requires=['cvxopt', 'matplotlib', 'scipy'],
install_requires=['cvxopt', 'matplotlib', 'scipy', 'psutil'],
cmdclass=dict(build_ext=CMakeBuild),
zip_safe=False,
)
13 changes: 11 additions & 2 deletions src/clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ namespace Clustering {

Distance_Matrix distances;
Curves simplifications;
bool use_distance_matrix;

void Distance_Matrix::print() const {
for (const auto &row : *this) {
Expand Down Expand Up @@ -117,13 +118,21 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_
Clustering_Result result;

if (in.empty()) return result;

std::size_t memory_distance_matrix = std::pow(in.size(), 2) * sizeof(distance_t);
use_distance_matrix = true;

if (memory_distance_matrix > static_cast<std::size_t>(0.666 * Config::available_memory)) {
py::print("KL_CLUST: WARNING distance preprocessing requires more memory (", memory_distance_matrix, ") than available (", Config::available_memory, "), consecutive_call will NOT be available");
use_distance_matrix = false;
}

if (not consecutive_call) {
if (Config::verbosity > 0) py::print("KL_CLUST: allocating ", in.size(), " x ", in.size(), " distance_matrix");
distances = Distance_Matrix(in.size(), in.size());
if (use_distance_matrix) distances = Distance_Matrix(in.size(), in.size());
if (Config::verbosity > 0) py::print("KL_CLUST: allocating space for ", in.size(), " simplifications, each of complexity ", ell);
simplifications = Curves(in.size(), ell, in.dimensions());
} else {
} else if (use_distance_matrix) {
if (distances.empty()) {
py::print("WARNING: consecutive_call is used wrongly");
if (Config::verbosity > 0) py::print("KL_CLUST: allocating ", in.size(), " x ", in.size(), " distance_matrix");
Expand Down
3 changes: 3 additions & 0 deletions src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ The above copyright notice and this permission notice shall be included in all c
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include "config.hpp"

namespace Config {

std::size_t available_memory = 0;
unsigned int verbosity = 0;
bool mp_dynamic = true;
int number_threads = -1;
Expand Down
3 changes: 2 additions & 1 deletion src/fred_python_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ PYBIND11_MODULE(backend, m) {

py::class_<Config::Config>(m, "Config")
.def(py::init<>())
.def_property("available_memory", [&](Config::Config&) { return Config::available_memory; }, [&](Config::Config&, const std::size_t available_memory) { Config::available_memory = available_memory; })
.def_property("continuous_frechet_error", [&](Config::Config&) { return fc::error; }, [&](Config::Config&, const distance_t error) { fc::error = error; })
.def_property("verbosity", [&](Config::Config&) { return &Config::verbosity; }, [&](Config::Config&, const unsigned int verbosity) { Config::verbosity = verbosity; })
.def_property("number_threads", [&](Config::Config&){ return &Config::number_threads; }, [&](Config::Config&, const int number_threads) {
Expand Down Expand Up @@ -176,7 +177,7 @@ PYBIND11_MODULE(backend, m) {

m.def("dimension_reduction", &JLTransform::transform_naive, py::arg("curves"), py::arg("epsilon") = 0.5, py::arg("empirical_constant") = true);

m.def("discrete_klcenter", &Clustering::kl_center, py::arg("k") = 2, py::arg("l") = 2, py::arg("curves"), py::arg("local_search") = 0, py::arg("consecutive_call") = false, py::arg("random_start_center") = true, py::arg("fast_simplification") = false);
m.def("discrete_klcenter", &Clustering::kl_center, py::arg("k") = 2, py::arg("l") = 2, py::arg("curves"), py::arg("local_search") = 0, py::arg("consecutive_call") = false, py::arg("random_first_center") = true, py::arg("fast_simplification") = false);
m.def("discrete_klmedian", &Clustering::kl_median, py::arg("k") = 2, py::arg("l") = 2, py::arg("curves"), py::arg("consecutive_call") = false, py::arg("fast_simplification") = false);

}

0 comments on commit 375784d

Please sign in to comment.