diff --git a/Fred/__init__.py b/Fred/__init__.py index ab5011a..2db81b3 100644 --- a/Fred/__init__.py +++ b/Fred/__init__.py @@ -10,7 +10,13 @@ from .backend import * from .stabbing import stabbing_path as _stabbing_path +import psutil + +virtual_memory = psutil.virtual_memory() +available_memory = virtual_memory.available + config = Config() +config.available_memory = available_memory def _optimize_centers(self, curves, consecutive_call=False): all_balls = self.compute_center_enclosing_balls(curves, False) diff --git a/README.md b/README.md index c8005ba..ae4a14d 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ All simplifications are vertex-restricted! #### discrete (k,l)-center clustering (continuous Fréchet) - from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616) -- signature: `fred.discrete_klcenter(k, l, curves, distances, random_first_center, fast_simplification)` with parameters +- signature: `fred.discrete_klcenter(k, l, curves, local_search, consecutive_call, random_first_center, fast_simplification)` with parameters - `k`: number of centers - `l`: maximum complexity of the centers - `local_search`: number of iterations of local search to improve solution, defaults to `0` @@ -74,7 +74,7 @@ All simplifications are vertex-restricted! #### discrete (k,l)-median clustering (continuous Fréchet) - Algorithm from section 4.3 in [**Geometric Approximation Algorithms**](http://www.ams.org/books/surv/173/) + simplification -- signature: `fred.discrete_klmedian(k, l, curves, distances, fast_simplification)` with parameters +- signature: `fred.discrete_klmedian(k, l, curves, consecutive_call, fast_simplification)` with parameters - `k`: number of centers - `l`: maximum complexity of the centers - `consecutive_call`: reuses distances and simplifications already computed in a previous call if `true`, defaults to `false` diff --git a/include/clustering.hpp b/include/clustering.hpp index 8ca4463..250ad87 100644 --- a/include/clustering.hpp +++ b/include/clustering.hpp @@ -11,6 +11,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #pragma once #include +#include #include #include @@ -33,6 +34,7 @@ struct Distance_Matrix; extern Distance_Matrix distances; extern Curves simplifications; +extern bool use_distance_matrix; struct Distance_Matrix : public std::vector { Distance_Matrix() = default; @@ -66,11 +68,13 @@ struct Clustering_Result { }; inline distance_t _cheap_dist(const curve_number_t i, const curve_number_t j, const Curves &in, const Curves &simplified_in, Distance_Matrix &distances) { - if (distances[i][j] < 0) { - const auto dist = Frechet::Continuous::distance(in[i], simplified_in[j]); - distances[i][j] = dist.value; - } - return distances[i][j]; + if (use_distance_matrix) { + if (distances[i][j] < 0) { + const auto dist = Frechet::Continuous::distance(in[i], simplified_in[j]); + distances[i][j] = dist.value; + } + return distances[i][j]; + } else return Frechet::Continuous::distance(in[i], simplified_in[j]).value; } inline curve_number_t _nearest_center(const curve_number_t i, const Curves &in, const Curves &simplified_in, const Curve_Numbers ¢ers, Distance_Matrix &distances) { diff --git a/include/config.hpp b/include/config.hpp index b0a580a..42d3557 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -10,10 +10,13 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #pragma once +#include + namespace Config { - class Config{}; + struct Config{}; + extern std::size_t available_memory; extern unsigned int verbosity; extern bool mp_dynamic; extern int number_threads; diff --git a/setup.py b/setup.py index 02bd647..dad6063 100644 --- a/setup.py +++ b/setup.py @@ -80,7 +80,7 @@ def build_extension(self, ext): setup( name='Fred-Frechet', - version='1.10.10', + version='1.11', author='Dennis Rohde', author_email='dennis.rohde@tu-dortmund.de', description='A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.', @@ -90,7 +90,7 @@ def build_extension(self, ext): packages=setuptools.find_packages(), ext_package="Fred", ext_modules=[CMakeExtension('backend')], - install_requires=['cvxopt', 'matplotlib', 'scipy'], + install_requires=['cvxopt', 'matplotlib', 'scipy', 'psutil'], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, ) diff --git a/src/clustering.cpp b/src/clustering.cpp index b647caf..cffa511 100644 --- a/src/clustering.cpp +++ b/src/clustering.cpp @@ -16,6 +16,7 @@ namespace Clustering { Distance_Matrix distances; Curves simplifications; +bool use_distance_matrix; void Distance_Matrix::print() const { for (const auto &row : *this) { @@ -117,13 +118,21 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_ Clustering_Result result; if (in.empty()) return result; + + std::size_t memory_distance_matrix = std::pow(in.size(), 2) * sizeof(distance_t); + use_distance_matrix = true; + + if (memory_distance_matrix > static_cast(0.666 * Config::available_memory)) { + py::print("KL_CLUST: WARNING distance preprocessing requires more memory (", memory_distance_matrix, ") than available (", Config::available_memory, "), consecutive_call will NOT be available"); + use_distance_matrix = false; + } if (not consecutive_call) { if (Config::verbosity > 0) py::print("KL_CLUST: allocating ", in.size(), " x ", in.size(), " distance_matrix"); - distances = Distance_Matrix(in.size(), in.size()); + if (use_distance_matrix) distances = Distance_Matrix(in.size(), in.size()); if (Config::verbosity > 0) py::print("KL_CLUST: allocating space for ", in.size(), " simplifications, each of complexity ", ell); simplifications = Curves(in.size(), ell, in.dimensions()); - } else { + } else if (use_distance_matrix) { if (distances.empty()) { py::print("WARNING: consecutive_call is used wrongly"); if (Config::verbosity > 0) py::print("KL_CLUST: allocating ", in.size(), " x ", in.size(), " distance_matrix"); diff --git a/src/config.cpp b/src/config.cpp index 6bde7cc..2932bcb 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -8,8 +8,11 @@ The above copyright notice and this permission notice shall be included in all c THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "config.hpp" + namespace Config { + std::size_t available_memory = 0; unsigned int verbosity = 0; bool mp_dynamic = true; int number_threads = -1; diff --git a/src/fred_python_wrapper.cpp b/src/fred_python_wrapper.cpp index bb3e877..a9abba6 100644 --- a/src/fred_python_wrapper.cpp +++ b/src/fred_python_wrapper.cpp @@ -51,6 +51,7 @@ PYBIND11_MODULE(backend, m) { py::class_(m, "Config") .def(py::init<>()) + .def_property("available_memory", [&](Config::Config&) { return Config::available_memory; }, [&](Config::Config&, const std::size_t available_memory) { Config::available_memory = available_memory; }) .def_property("continuous_frechet_error", [&](Config::Config&) { return fc::error; }, [&](Config::Config&, const distance_t error) { fc::error = error; }) .def_property("verbosity", [&](Config::Config&) { return &Config::verbosity; }, [&](Config::Config&, const unsigned int verbosity) { Config::verbosity = verbosity; }) .def_property("number_threads", [&](Config::Config&){ return &Config::number_threads; }, [&](Config::Config&, const int number_threads) { @@ -176,7 +177,7 @@ PYBIND11_MODULE(backend, m) { m.def("dimension_reduction", &JLTransform::transform_naive, py::arg("curves"), py::arg("epsilon") = 0.5, py::arg("empirical_constant") = true); - m.def("discrete_klcenter", &Clustering::kl_center, py::arg("k") = 2, py::arg("l") = 2, py::arg("curves"), py::arg("local_search") = 0, py::arg("consecutive_call") = false, py::arg("random_start_center") = true, py::arg("fast_simplification") = false); + m.def("discrete_klcenter", &Clustering::kl_center, py::arg("k") = 2, py::arg("l") = 2, py::arg("curves"), py::arg("local_search") = 0, py::arg("consecutive_call") = false, py::arg("random_first_center") = true, py::arg("fast_simplification") = false); m.def("discrete_klmedian", &Clustering::kl_median, py::arg("k") = 2, py::arg("l") = 2, py::arg("curves"), py::arg("consecutive_call") = false, py::arg("fast_simplification") = false); }