From a4576e584e1973627b6e56bb9116c404a4c3d1f1 Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Fri, 28 Jun 2024 10:37:56 +0200 Subject: [PATCH 01/14] Instead of filtering then concatenating, do in inverse order. --- src/python/gudhi/representations/vector_methods.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 36f445884c..348dd3bd17 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -809,18 +809,19 @@ def fit(self, X, y=None, sample_weight=None): if not hasattr(self.quantiser, 'fit'): raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser)) - # In fitting we remove infinite death time points so that every center is finite - X = [dgm[~np.isinf(dgm).any(axis=1), :] for dgm in X] - if sample_weight is None: sample_weight = [self.get_weighting_method()(measure) for measure in X] measures_concat = np.concatenate(X) weights_concat = np.concatenate(sample_weight) + # In fitting we remove infinite birth/death time points so that every center is finite + filtered_measures_concat = measures_concat[~np.isinf(measures_concat).any(axis=1), :] if len(measures_concat) else measures_concat + filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat - self.quantiser.fit(X=measures_concat, sample_weight=weights_concat) + self.quantiser.fit(X=filtered_measures_concat, sample_weight=filtered_weights_concat) self.centers = self.quantiser.cluster_centers_ + # Hack, but some people are unhappy if the order depends on the version of sklearn self.centers = self.centers[np.lexsort(self.centers.T)] if self.quantiser.n_clusters == 1: From c389eeafba6c704bc044317b6d7cf393634b638a Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Fri, 28 Jun 2024 10:43:20 +0200 Subject: [PATCH 02/14] add random measures for cases when there is not enough points to fit [0, 1)^2 is arbitrary, questionable choice. --- src/python/gudhi/representations/vector_methods.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 348dd3bd17..298337b28a 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -818,6 +818,13 @@ def fit(self, X, y=None, sample_weight=None): filtered_measures_concat = measures_concat[~np.isinf(measures_concat).any(axis=1), :] if len(measures_concat) else measures_concat filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat + n_clusters = self.quantiser.n_clusters + n_points = len(filtered_measures_concat) + if n_points < n_clusters: + # If not enough points to fit (including 0), let's arbitrarily put centers in [0, 1)^2 + print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding random points in [0, 1)^2.") + filtered_weights_concat = np.concatenate((filtered_weights_concat, np.ones(shape=(n_clusters - n_points)))) + filtered_measures_concat = np.concatenate((filtered_measures_concat, np.random.random((n_clusters - n_points, 2)))) self.quantiser.fit(X=filtered_measures_concat, sample_weight=filtered_weights_concat) self.centers = self.quantiser.cluster_centers_ From c72d4eeb4e18adc628584d4b4cc07cd94e4a2d71 Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Fri, 28 Jun 2024 10:43:37 +0200 Subject: [PATCH 03/14] modern print --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 298337b28a..5a154e2883 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -807,7 +807,7 @@ def fit(self, X, y=None, sample_weight=None): self """ if not hasattr(self.quantiser, 'fit'): - raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser)) + raise TypeError(f"quantiser {self.quantiser} has no `fit` attribute.") if sample_weight is None: sample_weight = [self.get_weighting_method()(measure) for measure in X] From 4fabf81e0f886d2f29b3c14e61dd1ca1e67ce2ff Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Fri, 28 Jun 2024 10:45:29 +0200 Subject: [PATCH 04/14] testing interface for vectorizers: - fit - fit empty diagrams - transform - transform empty diagrams - sklearn set_output - sklearn compose with ColumnTransformer (not sure how this could fail with all the other tests but who knows) --- .../test/test_representations_interface.py | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 src/python/test/test_representations_interface.py diff --git a/src/python/test/test_representations_interface.py b/src/python/test/test_representations_interface.py new file mode 100644 index 0000000000..ca4d189de4 --- /dev/null +++ b/src/python/test/test_representations_interface.py @@ -0,0 +1,90 @@ +from copy import deepcopy +import numpy as np + +from sklearn.cluster import KMeans + +from gudhi.representations import (Atol, Landscape, Silhouette, BettiCurve, ComplexPolynomial, \ + TopologicalVector, PersistenceImage, Entropy) + +vectorizers = { + "atol": Atol(quantiser=KMeans(n_clusters=2, random_state=202312, n_init="auto")), + # "betti": BettiCurve(), +} + +diag1 = [np.array([[0., np.inf], + [0., 8.94427191], + [0., 7.28010989], + [0., 6.08276253], + [0., 5.83095189], + [0., 5.38516481], + [0., 5.]]), + np.array([[11., np.inf], + [6.32455532, 6.70820393]]), + np.empty(shape=[0, 2])] + +diag2 = [np.array([[0., np.inf], + [0., 8.94427191], + [0., 7.28010989], + [0., 6.08276253], + [0., 5.83095189], + [0., 5.38516481], + [0., 5.]]), + np.array([[11., np.inf], + [6.32455532, 6.70820393]]), + np.array([[0., np.inf], + [0., 1]])] + +diag3 = [np.empty(shape=[0, 2])] + + +def test_fit(): + print(f" > Testing `fit`.") + for name, vectorizer in vectorizers.items(): + print(f" >> Testing {name}") + deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]]) + + +def test_fit_empty(): + print(f" > Testing `fit_empty`.") + for name, vectorizer in vectorizers.items(): + print(f" >> Testing {name}") + deepcopy(vectorizer).fit(X=[diag3[0], diag3[0]]) + + +def test_transform(): + print(f" > Testing `transform`.") + for name, vectorizer in vectorizers.items(): + print(f" >> Testing {name}") + deepcopy(vectorizer).fit_transform(X=[diag1[0], diag2[0], diag3[0]]) + + +def test_transform_empty(): + print(f" > Testing `transform_empty`.") + for name, vectorizer in vectorizers.items(): + print(f" >> Testing {name}") + copy_vec = deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]]) + copy_vec.transform(X=[diag3[0], diag3[0]]) + + +def test_set_output(): + print(f" > Testing `set_output`.") + try: + import pandas + for name, vectorizer in vectorizers.items(): + print(f" >> Testing {name}") + deepcopy(vectorizer).set_output(transform="pandas") + except ImportError: + print("Missing pandas, skipping set_output test") + + +def test_compose(): + print(f" > Testing composition with `sklearn.compose.ColumnTransformer`.") + from sklearn.compose import ColumnTransformer + for name, vectorizer in vectorizers.items(): + print(f" >> Testing {name}") + ct = ColumnTransformer([ + (f"{name}-0", deepcopy(vectorizer), 0), + (f"{name}-1", deepcopy(vectorizer), 1), + (f"{name}-2", deepcopy(vectorizer), 2)] + ) + ct.fit_transform(X=[diag1, diag2]) From 632a55b914c4d5af0b246e9f6a7f61661fce4317 Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Fri, 28 Jun 2024 18:02:37 +0200 Subject: [PATCH 05/14] sklearn.base.clone instead of deepcopy --- .../test/test_representations_interface.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/python/test/test_representations_interface.py b/src/python/test/test_representations_interface.py index ca4d189de4..2d2c7f796a 100644 --- a/src/python/test/test_representations_interface.py +++ b/src/python/test/test_representations_interface.py @@ -1,6 +1,6 @@ -from copy import deepcopy import numpy as np +from sklearn.base import clone from sklearn.cluster import KMeans from gudhi.representations import (Atol, Landscape, Silhouette, BettiCurve, ComplexPolynomial, \ @@ -41,28 +41,28 @@ def test_fit(): print(f" > Testing `fit`.") for name, vectorizer in vectorizers.items(): print(f" >> Testing {name}") - deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]]) + clone(vectorizer).fit(X=[diag1[0], diag2[0]]) def test_fit_empty(): print(f" > Testing `fit_empty`.") for name, vectorizer in vectorizers.items(): print(f" >> Testing {name}") - deepcopy(vectorizer).fit(X=[diag3[0], diag3[0]]) + clone(vectorizer).fit(X=[diag3[0], diag3[0]]) def test_transform(): print(f" > Testing `transform`.") for name, vectorizer in vectorizers.items(): print(f" >> Testing {name}") - deepcopy(vectorizer).fit_transform(X=[diag1[0], diag2[0], diag3[0]]) + clone(vectorizer).fit_transform(X=[diag1[0], diag2[0], diag3[0]]) def test_transform_empty(): print(f" > Testing `transform_empty`.") for name, vectorizer in vectorizers.items(): print(f" >> Testing {name}") - copy_vec = deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]]) + copy_vec = clone(vectorizer).fit(X=[diag1[0], diag2[0]]) copy_vec.transform(X=[diag3[0], diag3[0]]) @@ -72,7 +72,7 @@ def test_set_output(): import pandas for name, vectorizer in vectorizers.items(): print(f" >> Testing {name}") - deepcopy(vectorizer).set_output(transform="pandas") + clone(vectorizer).set_output(transform="pandas") except ImportError: print("Missing pandas, skipping set_output test") @@ -83,8 +83,8 @@ def test_compose(): for name, vectorizer in vectorizers.items(): print(f" >> Testing {name}") ct = ColumnTransformer([ - (f"{name}-0", deepcopy(vectorizer), 0), - (f"{name}-1", deepcopy(vectorizer), 1), - (f"{name}-2", deepcopy(vectorizer), 2)] + (f"{name}-0", clone(vectorizer), 0), + (f"{name}-1", clone(vectorizer), 1), + (f"{name}-2", clone(vectorizer), 2)] ) ct.fit_transform(X=[diag1, diag2]) From 03c61bb2febd6e9ff5d2d9cf51117c641717c98a Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Fri, 28 Jun 2024 18:03:02 +0200 Subject: [PATCH 06/14] "interface tests" top file comment --- src/python/test/test_representations_interface.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/python/test/test_representations_interface.py b/src/python/test/test_representations_interface.py index 2d2c7f796a..26aa92ece0 100644 --- a/src/python/test/test_representations_interface.py +++ b/src/python/test/test_representations_interface.py @@ -1,3 +1,5 @@ +# The following tests only check that the program runs, not what it outputs + import numpy as np from sklearn.base import clone From 442f40184f920ad84112eaa6f9ba55823e80e596 Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Mon, 1 Jul 2024 15:49:42 +0200 Subject: [PATCH 07/14] instead of adding centers in [0, 1]^2, add infinitely far way centers with null inertia --- .../gudhi/representations/vector_methods.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 5a154e2883..da71d9c357 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -821,10 +821,9 @@ def fit(self, X, y=None, sample_weight=None): n_clusters = self.quantiser.n_clusters n_points = len(filtered_measures_concat) if n_points < n_clusters: - # If not enough points to fit (including 0), let's arbitrarily put centers in [0, 1)^2 - print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding random points in [0, 1)^2.") - filtered_weights_concat = np.concatenate((filtered_weights_concat, np.ones(shape=(n_clusters - n_points)))) - filtered_measures_concat = np.concatenate((filtered_measures_concat, np.random.random((n_clusters - n_points, 2)))) + # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end + print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding meaningless cluster centers.") + self.quantiser.n_clusters = n_points self.quantiser.fit(X=filtered_measures_concat, sample_weight=filtered_weights_concat) self.centers = self.quantiser.cluster_centers_ @@ -840,6 +839,14 @@ def fit(self, X, y=None, sample_weight=None): dist_centers = pairwise.pairwise_distances(self.centers) dist_centers[dist_centers == 0] = np.inf self.inertias = np.min(dist_centers, axis=0)/2 + + if n_points < n_clusters: + # Where we arbitrarily put centers as [-np.inf]^measure_dim at the end + fill_center = np.array([[-np.inf, -np.inf]]) + fill_inertia = 0 + self.centers = np.concatenate([self.centers, np.repeat(fill_center, repeats=n_clusters-n_points, axis=0)]) + self.inertias = np.concatenate([self.inertias, np.repeat(fill_inertia, repeats=n_clusters-n_points)]) + self.quantiser.n_clusters = n_clusters return self def __call__(self, measure, sample_weight=None): From bd99872117c1adc1210cd8ed6a58826eee134da0 Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Mon, 1 Jul 2024 16:23:35 +0200 Subject: [PATCH 08/14] infer atol measure space dimension at fit --- .../gudhi/representations/vector_methods.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index da71d9c357..ec8a3dd88e 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -756,7 +756,7 @@ def __init__( self, quantiser=KMeans(n_clusters=2, n_init="auto"), weighting_method="cloud", - contrast="gaussian" + contrast="gaussian", ): """ Constructor for the Atol measure vectorisation class. @@ -794,7 +794,8 @@ def get_weighting_method(self): def fit(self, X, y=None, sample_weight=None): """ - Calibration step: fit centers to the sample measures and derive inertias between centers. + Calibration step: fit centers to the target sample measures and derive inertias between centers. If the target + does not contain enough points for creating the intended number of centers, we fill in with bogus centers. Parameters: X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias @@ -806,22 +807,24 @@ def fit(self, X, y=None, sample_weight=None): Returns: self """ - if not hasattr(self.quantiser, 'fit'): - raise TypeError(f"quantiser {self.quantiser} has no `fit` attribute.") + n_clusters = self.quantiser.n_clusters + if not len(X): + raise Exception("Cannot fit Atol on empty target.") + measures_concat = np.concatenate(X) if sample_weight is None: sample_weight = [self.get_weighting_method()(measure) for measure in X] - - measures_concat = np.concatenate(X) weights_concat = np.concatenate(sample_weight) - # In fitting we remove infinite birth/death time points so that every center is finite + + # In fitting we remove infinite birth/death time points so that every center is finite. We do not care about duplicates. filtered_measures_concat = measures_concat[~np.isinf(measures_concat).any(axis=1), :] if len(measures_concat) else measures_concat filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat - - n_clusters = self.quantiser.n_clusters n_points = len(filtered_measures_concat) + if not n_points: + raise Exception("Cannot fit Atol on empty target.") + if n_points < n_clusters: - # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end + # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end. print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding meaningless cluster centers.") self.quantiser.n_clusters = n_points @@ -841,10 +844,10 @@ def fit(self, X, y=None, sample_weight=None): self.inertias = np.min(dist_centers, axis=0)/2 if n_points < n_clusters: - # Where we arbitrarily put centers as [-np.inf]^measure_dim at the end - fill_center = np.array([[-np.inf, -np.inf]]) + # Where we arbitrarily put centers as [-np.inf]^measure_dim. + fill_center = np.repeat(np.inf, repeats=X[0].shape[1]) fill_inertia = 0 - self.centers = np.concatenate([self.centers, np.repeat(fill_center, repeats=n_clusters-n_points, axis=0)]) + self.centers = np.concatenate([self.centers, np.repeat([fill_center], repeats=n_clusters-n_points, axis=0)]) self.inertias = np.concatenate([self.inertias, np.repeat(fill_inertia, repeats=n_clusters-n_points)]) self.quantiser.n_clusters = n_clusters return self From 7338eaf02c94cc09ef892dcaab6b0a16e5b19da2 Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Mon, 1 Jul 2024 17:26:21 +0200 Subject: [PATCH 09/14] bug fix --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index ec8a3dd88e..1fa1a12c06 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -834,7 +834,7 @@ def fit(self, X, y=None, sample_weight=None): # Hack, but some people are unhappy if the order depends on the version of sklearn self.centers = self.centers[np.lexsort(self.centers.T)] if self.quantiser.n_clusters == 1: - dist_centers = pairwise.pairwise_distances(measures_concat) + dist_centers = pairwise.pairwise_distances(filtered_measures_concat) np.fill_diagonal(dist_centers, 0) best_inertia = np.max(dist_centers)/2 if np.max(dist_centers)/2 > 0 else 1 self.inertias = np.array([best_inertia]) From 9a94b4e52ec1e142546504b540525801b1233eab Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Mon, 1 Jul 2024 17:54:37 +0200 Subject: [PATCH 10/14] remove test_fit_empty, probably shouldn't be one interface behaviour in this instance. --- src/python/gudhi/representations/vector_methods.py | 2 +- src/python/test/test_representations_interface.py | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 1fa1a12c06..93946d1db3 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -821,7 +821,7 @@ def fit(self, X, y=None, sample_weight=None): filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat n_points = len(filtered_measures_concat) if not n_points: - raise Exception("Cannot fit Atol on empty target.") + raise Exception("Cannot fit Atol on measure with infinite components only.") if n_points < n_clusters: # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end. diff --git a/src/python/test/test_representations_interface.py b/src/python/test/test_representations_interface.py index 26aa92ece0..d22f412939 100644 --- a/src/python/test/test_representations_interface.py +++ b/src/python/test/test_representations_interface.py @@ -46,13 +46,6 @@ def test_fit(): clone(vectorizer).fit(X=[diag1[0], diag2[0]]) -def test_fit_empty(): - print(f" > Testing `fit_empty`.") - for name, vectorizer in vectorizers.items(): - print(f" >> Testing {name}") - clone(vectorizer).fit(X=[diag3[0], diag3[0]]) - - def test_transform(): print(f" > Testing `transform`.") for name, vectorizer in vectorizers.items(): From 88a7a16d76799508f5c90da11eaa4f2f64c790cb Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Tue, 2 Jul 2024 13:50:31 +0200 Subject: [PATCH 11/14] use ValueError instead of generic Exceptions --- src/python/gudhi/representations/vector_methods.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 93946d1db3..d1a6f4c323 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -810,7 +810,7 @@ def fit(self, X, y=None, sample_weight=None): n_clusters = self.quantiser.n_clusters if not len(X): - raise Exception("Cannot fit Atol on empty target.") + raise ValueError("Cannot fit Atol on empty target.") measures_concat = np.concatenate(X) if sample_weight is None: sample_weight = [self.get_weighting_method()(measure) for measure in X] @@ -821,7 +821,7 @@ def fit(self, X, y=None, sample_weight=None): filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat n_points = len(filtered_measures_concat) if not n_points: - raise Exception("Cannot fit Atol on measure with infinite components only.") + raise ValueError("Cannot fit Atol on measure with infinite components only.") if n_points < n_clusters: # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end. From 5e1732368ff971fac4cf38dc122a7190da9cad7f Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Tue, 2 Jul 2024 13:55:19 +0200 Subject: [PATCH 12/14] Replace and relocate print with warning --- src/python/gudhi/representations/vector_methods.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index d1a6f4c323..b5644cfbfc 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -9,6 +9,8 @@ # - 2020/12 Gard: A more flexible Betti curve class capable of computing exact curves. # - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range +import warnings + import numpy as np from scipy.spatial.distance import cdist from sklearn.base import BaseEstimator, TransformerMixin @@ -819,13 +821,11 @@ def fit(self, X, y=None, sample_weight=None): # In fitting we remove infinite birth/death time points so that every center is finite. We do not care about duplicates. filtered_measures_concat = measures_concat[~np.isinf(measures_concat).any(axis=1), :] if len(measures_concat) else measures_concat filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat + n_points = len(filtered_measures_concat) if not n_points: raise ValueError("Cannot fit Atol on measure with infinite components only.") - if n_points < n_clusters: - # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end. - print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding meaningless cluster centers.") self.quantiser.n_clusters = n_points self.quantiser.fit(X=filtered_measures_concat, sample_weight=filtered_weights_concat) @@ -844,7 +844,9 @@ def fit(self, X, y=None, sample_weight=None): self.inertias = np.min(dist_centers, axis=0)/2 if n_points < n_clusters: - # Where we arbitrarily put centers as [-np.inf]^measure_dim. + # There weren't enough points to fit n_clusters, so we arbitrarily put centers as [-np.inf]^measure_dim. + warnings.warn(f"[Atol] after flitering had only {n_points} points to fit {n_clusters} clusters," + f"adding meaningless cluster centers.", RuntimeWarning) fill_center = np.repeat(np.inf, repeats=X[0].shape[1]) fill_inertia = 0 self.centers = np.concatenate([self.centers, np.repeat([fill_center], repeats=n_clusters-n_points, axis=0)]) From f7b14d8cef464cedd37c49b13444771b5a39e862 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Wed, 3 Jul 2024 10:11:36 +0200 Subject: [PATCH 13/14] Rollback on warning (vs print) bogus center addition --- src/python/gudhi/representations/vector_methods.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index b5644cfbfc..ac2c1ee462 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -845,8 +845,7 @@ def fit(self, X, y=None, sample_weight=None): if n_points < n_clusters: # There weren't enough points to fit n_clusters, so we arbitrarily put centers as [-np.inf]^measure_dim. - warnings.warn(f"[Atol] after flitering had only {n_points} points to fit {n_clusters} clusters," - f"adding meaningless cluster centers.", RuntimeWarning) + print(f"[Atol] after filtering had only {n_points=} to fit {n_clusters=}, adding meaningless centers.") fill_center = np.repeat(np.inf, repeats=X[0].shape[1]) fill_inertia = 0 self.centers = np.concatenate([self.centers, np.repeat([fill_center], repeats=n_clusters-n_points, axis=0)]) From 040fb547f1ac317445d87f2b2fc518e45651b53f Mon Sep 17 00:00:00 2001 From: Martin ROYER Date: Fri, 26 Jul 2024 10:04:22 +0200 Subject: [PATCH 14/14] remove unused imports --- src/python/gudhi/representations/vector_methods.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index ac2c1ee462..14d2802e71 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -9,8 +9,6 @@ # - 2020/12 Gard: A more flexible Betti curve class capable of computing exact curves. # - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range -import warnings - import numpy as np from scipy.spatial.distance import cdist from sklearn.base import BaseEstimator, TransformerMixin