From a4576e584e1973627b6e56bb9116c404a4c3d1f1 Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Fri, 28 Jun 2024 10:37:56 +0200
Subject: [PATCH 01/14] Instead of filtering then concatenating, do in inverse
 order.

---
 src/python/gudhi/representations/vector_methods.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 36f445884c..348dd3bd17 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -809,18 +809,19 @@ def fit(self, X, y=None, sample_weight=None):
         if not hasattr(self.quantiser, 'fit'):
             raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser))
 
-        # In fitting we remove infinite death time points so that every center is finite
-        X = [dgm[~np.isinf(dgm).any(axis=1), :] for dgm in X]
-
         if sample_weight is None:
             sample_weight = [self.get_weighting_method()(measure) for measure in X]
 
         measures_concat = np.concatenate(X)
         weights_concat = np.concatenate(sample_weight)
+        # In fitting we remove infinite birth/death time points so that every center is finite
+        filtered_measures_concat = measures_concat[~np.isinf(measures_concat).any(axis=1), :] if len(measures_concat) else measures_concat
+        filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat
 
-        self.quantiser.fit(X=measures_concat, sample_weight=weights_concat)
 
+        self.quantiser.fit(X=filtered_measures_concat, sample_weight=filtered_weights_concat)
         self.centers = self.quantiser.cluster_centers_
+
         # Hack, but some people are unhappy if the order depends on the version of sklearn
         self.centers = self.centers[np.lexsort(self.centers.T)]
         if self.quantiser.n_clusters == 1:

From c389eeafba6c704bc044317b6d7cf393634b638a Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Fri, 28 Jun 2024 10:43:20 +0200
Subject: [PATCH 02/14] add random measures for cases when there is not enough
 points to fit

[0, 1)^2 is arbitrary, questionable choice.
---
 src/python/gudhi/representations/vector_methods.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 348dd3bd17..298337b28a 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -818,6 +818,13 @@ def fit(self, X, y=None, sample_weight=None):
         filtered_measures_concat = measures_concat[~np.isinf(measures_concat).any(axis=1), :] if len(measures_concat) else measures_concat
         filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat
 
+        n_clusters = self.quantiser.n_clusters
+        n_points = len(filtered_measures_concat)
+        if n_points < n_clusters:
+            # If not enough points to fit (including 0), let's arbitrarily put centers in [0, 1)^2
+            print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding random points in [0, 1)^2.")
+            filtered_weights_concat = np.concatenate((filtered_weights_concat, np.ones(shape=(n_clusters - n_points))))
+            filtered_measures_concat = np.concatenate((filtered_measures_concat, np.random.random((n_clusters - n_points, 2))))
 
         self.quantiser.fit(X=filtered_measures_concat, sample_weight=filtered_weights_concat)
         self.centers = self.quantiser.cluster_centers_

From c72d4eeb4e18adc628584d4b4cc07cd94e4a2d71 Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Fri, 28 Jun 2024 10:43:37 +0200
Subject: [PATCH 03/14] modern print

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 298337b28a..5a154e2883 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -807,7 +807,7 @@ def fit(self, X, y=None, sample_weight=None):
             self
         """
         if not hasattr(self.quantiser, 'fit'):
-            raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser))
+            raise TypeError(f"quantiser {self.quantiser} has no `fit` attribute.")
 
         if sample_weight is None:
             sample_weight = [self.get_weighting_method()(measure) for measure in X]

From 4fabf81e0f886d2f29b3c14e61dd1ca1e67ce2ff Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Fri, 28 Jun 2024 10:45:29 +0200
Subject: [PATCH 04/14] testing interface for vectorizers: - fit - fit empty
 diagrams - transform - transform empty diagrams - sklearn set_output -
 sklearn compose with ColumnTransformer (not sure how this could fail with all
 the other tests but who knows)

---
 .../test/test_representations_interface.py    | 90 +++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 src/python/test/test_representations_interface.py

diff --git a/src/python/test/test_representations_interface.py b/src/python/test/test_representations_interface.py
new file mode 100644
index 0000000000..ca4d189de4
--- /dev/null
+++ b/src/python/test/test_representations_interface.py
@@ -0,0 +1,90 @@
+from copy import deepcopy
+import numpy as np
+
+from sklearn.cluster import KMeans
+
+from gudhi.representations import (Atol, Landscape, Silhouette, BettiCurve, ComplexPolynomial, \
+                                   TopologicalVector, PersistenceImage, Entropy)
+
+vectorizers = {
+    "atol": Atol(quantiser=KMeans(n_clusters=2, random_state=202312, n_init="auto")),
+    # "betti": BettiCurve(),
+}
+
+diag1 = [np.array([[0., np.inf],
+                   [0., 8.94427191],
+                   [0., 7.28010989],
+                   [0., 6.08276253],
+                   [0., 5.83095189],
+                   [0., 5.38516481],
+                   [0., 5.]]),
+         np.array([[11., np.inf],
+                   [6.32455532, 6.70820393]]),
+         np.empty(shape=[0, 2])]
+
+diag2 = [np.array([[0., np.inf],
+                   [0., 8.94427191],
+                   [0., 7.28010989],
+                   [0., 6.08276253],
+                   [0., 5.83095189],
+                   [0., 5.38516481],
+                   [0., 5.]]),
+         np.array([[11., np.inf],
+                   [6.32455532, 6.70820393]]),
+         np.array([[0., np.inf],
+                   [0., 1]])]
+
+diag3 = [np.empty(shape=[0, 2])]
+
+
+def test_fit():
+    print(f" > Testing `fit`.")
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]])
+
+
+def test_fit_empty():
+    print(f" > Testing `fit_empty`.")
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        deepcopy(vectorizer).fit(X=[diag3[0], diag3[0]])
+
+
+def test_transform():
+    print(f" > Testing `transform`.")
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        deepcopy(vectorizer).fit_transform(X=[diag1[0], diag2[0], diag3[0]])
+
+
+def test_transform_empty():
+    print(f" > Testing `transform_empty`.")
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        copy_vec = deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]])
+        copy_vec.transform(X=[diag3[0], diag3[0]])
+
+
+def test_set_output():
+    print(f" > Testing `set_output`.")
+    try:
+        import pandas
+        for name, vectorizer in vectorizers.items():
+            print(f" >> Testing {name}")
+            deepcopy(vectorizer).set_output(transform="pandas")
+    except ImportError:
+        print("Missing pandas, skipping set_output test")
+
+
+def test_compose():
+    print(f" > Testing composition with `sklearn.compose.ColumnTransformer`.")
+    from sklearn.compose import ColumnTransformer
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        ct = ColumnTransformer([
+            (f"{name}-0", deepcopy(vectorizer), 0),
+            (f"{name}-1", deepcopy(vectorizer), 1),
+            (f"{name}-2", deepcopy(vectorizer), 2)]
+        )
+        ct.fit_transform(X=[diag1, diag2])

From 632a55b914c4d5af0b246e9f6a7f61661fce4317 Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Fri, 28 Jun 2024 18:02:37 +0200
Subject: [PATCH 05/14] sklearn.base.clone instead of deepcopy

---
 .../test/test_representations_interface.py     | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/python/test/test_representations_interface.py b/src/python/test/test_representations_interface.py
index ca4d189de4..2d2c7f796a 100644
--- a/src/python/test/test_representations_interface.py
+++ b/src/python/test/test_representations_interface.py
@@ -1,6 +1,6 @@
-from copy import deepcopy
 import numpy as np
 
+from sklearn.base import clone
 from sklearn.cluster import KMeans
 
 from gudhi.representations import (Atol, Landscape, Silhouette, BettiCurve, ComplexPolynomial, \
@@ -41,28 +41,28 @@ def test_fit():
     print(f" > Testing `fit`.")
     for name, vectorizer in vectorizers.items():
         print(f" >> Testing {name}")
-        deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]])
+        clone(vectorizer).fit(X=[diag1[0], diag2[0]])
 
 
 def test_fit_empty():
     print(f" > Testing `fit_empty`.")
     for name, vectorizer in vectorizers.items():
         print(f" >> Testing {name}")
-        deepcopy(vectorizer).fit(X=[diag3[0], diag3[0]])
+        clone(vectorizer).fit(X=[diag3[0], diag3[0]])
 
 
 def test_transform():
     print(f" > Testing `transform`.")
     for name, vectorizer in vectorizers.items():
         print(f" >> Testing {name}")
-        deepcopy(vectorizer).fit_transform(X=[diag1[0], diag2[0], diag3[0]])
+        clone(vectorizer).fit_transform(X=[diag1[0], diag2[0], diag3[0]])
 
 
 def test_transform_empty():
     print(f" > Testing `transform_empty`.")
     for name, vectorizer in vectorizers.items():
         print(f" >> Testing {name}")
-        copy_vec = deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]])
+        copy_vec = clone(vectorizer).fit(X=[diag1[0], diag2[0]])
         copy_vec.transform(X=[diag3[0], diag3[0]])
 
 
@@ -72,7 +72,7 @@ def test_set_output():
         import pandas
         for name, vectorizer in vectorizers.items():
             print(f" >> Testing {name}")
-            deepcopy(vectorizer).set_output(transform="pandas")
+            clone(vectorizer).set_output(transform="pandas")
     except ImportError:
         print("Missing pandas, skipping set_output test")
 
@@ -83,8 +83,8 @@ def test_compose():
     for name, vectorizer in vectorizers.items():
         print(f" >> Testing {name}")
         ct = ColumnTransformer([
-            (f"{name}-0", deepcopy(vectorizer), 0),
-            (f"{name}-1", deepcopy(vectorizer), 1),
-            (f"{name}-2", deepcopy(vectorizer), 2)]
+            (f"{name}-0", clone(vectorizer), 0),
+            (f"{name}-1", clone(vectorizer), 1),
+            (f"{name}-2", clone(vectorizer), 2)]
         )
         ct.fit_transform(X=[diag1, diag2])

From 03c61bb2febd6e9ff5d2d9cf51117c641717c98a Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Fri, 28 Jun 2024 18:03:02 +0200
Subject: [PATCH 06/14] "interface tests" top file comment

---
 src/python/test/test_representations_interface.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/python/test/test_representations_interface.py b/src/python/test/test_representations_interface.py
index 2d2c7f796a..26aa92ece0 100644
--- a/src/python/test/test_representations_interface.py
+++ b/src/python/test/test_representations_interface.py
@@ -1,3 +1,5 @@
+# The following tests only check that the program runs, not what it outputs
+
 import numpy as np
 
 from sklearn.base import clone

From 442f40184f920ad84112eaa6f9ba55823e80e596 Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Mon, 1 Jul 2024 15:49:42 +0200
Subject: [PATCH 07/14] instead of adding centers in [0, 1]^2, add infinitely
 far way centers with null inertia

---
 .../gudhi/representations/vector_methods.py       | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 5a154e2883..da71d9c357 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -821,10 +821,9 @@ def fit(self, X, y=None, sample_weight=None):
         n_clusters = self.quantiser.n_clusters
         n_points = len(filtered_measures_concat)
         if n_points < n_clusters:
-            # If not enough points to fit (including 0), let's arbitrarily put centers in [0, 1)^2
-            print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding random points in [0, 1)^2.")
-            filtered_weights_concat = np.concatenate((filtered_weights_concat, np.ones(shape=(n_clusters - n_points))))
-            filtered_measures_concat = np.concatenate((filtered_measures_concat, np.random.random((n_clusters - n_points, 2))))
+            # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end
+            print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding meaningless cluster centers.")
+            self.quantiser.n_clusters = n_points
 
         self.quantiser.fit(X=filtered_measures_concat, sample_weight=filtered_weights_concat)
         self.centers = self.quantiser.cluster_centers_
@@ -840,6 +839,14 @@ def fit(self, X, y=None, sample_weight=None):
             dist_centers = pairwise.pairwise_distances(self.centers)
             dist_centers[dist_centers == 0] = np.inf
             self.inertias = np.min(dist_centers, axis=0)/2
+
+        if n_points < n_clusters:
+            # Where we arbitrarily put centers as [-np.inf]^measure_dim at the end
+            fill_center = np.array([[-np.inf, -np.inf]])
+            fill_inertia = 0
+            self.centers = np.concatenate([self.centers, np.repeat(fill_center, repeats=n_clusters-n_points, axis=0)])
+            self.inertias = np.concatenate([self.inertias, np.repeat(fill_inertia, repeats=n_clusters-n_points)])
+            self.quantiser.n_clusters = n_clusters
         return self
 
     def __call__(self, measure, sample_weight=None):

From bd99872117c1adc1210cd8ed6a58826eee134da0 Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Mon, 1 Jul 2024 16:23:35 +0200
Subject: [PATCH 08/14] infer atol measure space dimension at fit

---
 .../gudhi/representations/vector_methods.py   | 29 ++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index da71d9c357..ec8a3dd88e 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -756,7 +756,7 @@ def __init__(
             self,
             quantiser=KMeans(n_clusters=2, n_init="auto"),
             weighting_method="cloud",
-            contrast="gaussian"
+            contrast="gaussian",
     ):
         """
         Constructor for the Atol measure vectorisation class.
@@ -794,7 +794,8 @@ def get_weighting_method(self):
 
     def fit(self, X, y=None, sample_weight=None):
         """
-        Calibration step: fit centers to the sample measures and derive inertias between centers.
+        Calibration step: fit centers to the target sample measures and derive inertias between centers. If the target
+        does not contain enough points for creating the intended number of centers, we fill in with bogus centers.
 
         Parameters:
             X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias
@@ -806,22 +807,24 @@ def fit(self, X, y=None, sample_weight=None):
         Returns:
             self
         """
-        if not hasattr(self.quantiser, 'fit'):
-            raise TypeError(f"quantiser {self.quantiser} has no `fit` attribute.")
+        n_clusters = self.quantiser.n_clusters
 
+        if not len(X):
+            raise Exception("Cannot fit Atol on empty target.")
+        measures_concat = np.concatenate(X)
         if sample_weight is None:
             sample_weight = [self.get_weighting_method()(measure) for measure in X]
-
-        measures_concat = np.concatenate(X)
         weights_concat = np.concatenate(sample_weight)
-        # In fitting we remove infinite birth/death time points so that every center is finite
+
+        # In fitting we remove infinite birth/death time points so that every center is finite. We do not care about duplicates.
         filtered_measures_concat = measures_concat[~np.isinf(measures_concat).any(axis=1), :] if len(measures_concat) else measures_concat
         filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat
-
-        n_clusters = self.quantiser.n_clusters
         n_points = len(filtered_measures_concat)
+        if not n_points:
+            raise Exception("Cannot fit Atol on empty target.")
+
         if n_points < n_clusters:
-            # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end
+            # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end.
             print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding meaningless cluster centers.")
             self.quantiser.n_clusters = n_points
 
@@ -841,10 +844,10 @@ def fit(self, X, y=None, sample_weight=None):
             self.inertias = np.min(dist_centers, axis=0)/2
 
         if n_points < n_clusters:
-            # Where we arbitrarily put centers as [-np.inf]^measure_dim at the end
-            fill_center = np.array([[-np.inf, -np.inf]])
+            # Where we arbitrarily put centers as [-np.inf]^measure_dim.
+            fill_center = np.repeat(np.inf, repeats=X[0].shape[1])
             fill_inertia = 0
-            self.centers = np.concatenate([self.centers, np.repeat(fill_center, repeats=n_clusters-n_points, axis=0)])
+            self.centers = np.concatenate([self.centers, np.repeat([fill_center], repeats=n_clusters-n_points, axis=0)])
             self.inertias = np.concatenate([self.inertias, np.repeat(fill_inertia, repeats=n_clusters-n_points)])
             self.quantiser.n_clusters = n_clusters
         return self

From 7338eaf02c94cc09ef892dcaab6b0a16e5b19da2 Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Mon, 1 Jul 2024 17:26:21 +0200
Subject: [PATCH 09/14] bug fix

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index ec8a3dd88e..1fa1a12c06 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -834,7 +834,7 @@ def fit(self, X, y=None, sample_weight=None):
         # Hack, but some people are unhappy if the order depends on the version of sklearn
         self.centers = self.centers[np.lexsort(self.centers.T)]
         if self.quantiser.n_clusters == 1:
-            dist_centers = pairwise.pairwise_distances(measures_concat)
+            dist_centers = pairwise.pairwise_distances(filtered_measures_concat)
             np.fill_diagonal(dist_centers, 0)
             best_inertia = np.max(dist_centers)/2 if np.max(dist_centers)/2 > 0 else 1
             self.inertias = np.array([best_inertia])

From 9a94b4e52ec1e142546504b540525801b1233eab Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Mon, 1 Jul 2024 17:54:37 +0200
Subject: [PATCH 10/14] remove test_fit_empty, probably shouldn't be one
 interface behaviour in this instance.

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 src/python/test/test_representations_interface.py  | 7 -------
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 1fa1a12c06..93946d1db3 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -821,7 +821,7 @@ def fit(self, X, y=None, sample_weight=None):
         filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat
         n_points = len(filtered_measures_concat)
         if not n_points:
-            raise Exception("Cannot fit Atol on empty target.")
+            raise Exception("Cannot fit Atol on measure with infinite components only.")
 
         if n_points < n_clusters:
             # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end.
diff --git a/src/python/test/test_representations_interface.py b/src/python/test/test_representations_interface.py
index 26aa92ece0..d22f412939 100644
--- a/src/python/test/test_representations_interface.py
+++ b/src/python/test/test_representations_interface.py
@@ -46,13 +46,6 @@ def test_fit():
         clone(vectorizer).fit(X=[diag1[0], diag2[0]])
 
 
-def test_fit_empty():
-    print(f" > Testing `fit_empty`.")
-    for name, vectorizer in vectorizers.items():
-        print(f" >> Testing {name}")
-        clone(vectorizer).fit(X=[diag3[0], diag3[0]])
-
-
 def test_transform():
     print(f" > Testing `transform`.")
     for name, vectorizer in vectorizers.items():

From 88a7a16d76799508f5c90da11eaa4f2f64c790cb Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Tue, 2 Jul 2024 13:50:31 +0200
Subject: [PATCH 11/14] use ValueError instead of generic Exceptions

---
 src/python/gudhi/representations/vector_methods.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 93946d1db3..d1a6f4c323 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -810,7 +810,7 @@ def fit(self, X, y=None, sample_weight=None):
         n_clusters = self.quantiser.n_clusters
 
         if not len(X):
-            raise Exception("Cannot fit Atol on empty target.")
+            raise ValueError("Cannot fit Atol on empty target.")
         measures_concat = np.concatenate(X)
         if sample_weight is None:
             sample_weight = [self.get_weighting_method()(measure) for measure in X]
@@ -821,7 +821,7 @@ def fit(self, X, y=None, sample_weight=None):
         filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat
         n_points = len(filtered_measures_concat)
         if not n_points:
-            raise Exception("Cannot fit Atol on measure with infinite components only.")
+            raise ValueError("Cannot fit Atol on measure with infinite components only.")
 
         if n_points < n_clusters:
             # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end.

From 5e1732368ff971fac4cf38dc122a7190da9cad7f Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Tue, 2 Jul 2024 13:55:19 +0200
Subject: [PATCH 12/14] Replace and relocate print with warning

---
 src/python/gudhi/representations/vector_methods.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index d1a6f4c323..b5644cfbfc 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -9,6 +9,8 @@
 #   - 2020/12 Gard: A more flexible Betti curve class capable of computing exact curves.
 #   - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range
 
+import warnings
+
 import numpy as np
 from scipy.spatial.distance import cdist
 from sklearn.base          import BaseEstimator, TransformerMixin
@@ -819,13 +821,11 @@ def fit(self, X, y=None, sample_weight=None):
         # In fitting we remove infinite birth/death time points so that every center is finite. We do not care about duplicates.
         filtered_measures_concat = measures_concat[~np.isinf(measures_concat).any(axis=1), :] if len(measures_concat) else measures_concat
         filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat
+
         n_points = len(filtered_measures_concat)
         if not n_points:
             raise ValueError("Cannot fit Atol on measure with infinite components only.")
-
         if n_points < n_clusters:
-            # If not enough points to fit (including 0), we will arbitrarily put centers as [-np.inf]^measure_dim at the end.
-            print(f"[Atol] had {n_points} points to fit {n_clusters} clusters, adding meaningless cluster centers.")
             self.quantiser.n_clusters = n_points
 
         self.quantiser.fit(X=filtered_measures_concat, sample_weight=filtered_weights_concat)
@@ -844,7 +844,9 @@ def fit(self, X, y=None, sample_weight=None):
             self.inertias = np.min(dist_centers, axis=0)/2
 
         if n_points < n_clusters:
-            # Where we arbitrarily put centers as [-np.inf]^measure_dim.
+            # There weren't enough points to fit n_clusters, so we arbitrarily put centers as [-np.inf]^measure_dim.
+            warnings.warn(f"[Atol] after flitering had only {n_points} points to fit {n_clusters} clusters,"
+                          f"adding meaningless cluster centers.", RuntimeWarning)
             fill_center = np.repeat(np.inf, repeats=X[0].shape[1])
             fill_inertia = 0
             self.centers = np.concatenate([self.centers, np.repeat([fill_center], repeats=n_clusters-n_points, axis=0)])

From f7b14d8cef464cedd37c49b13444771b5a39e862 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Wed, 3 Jul 2024 10:11:36 +0200
Subject: [PATCH 13/14] Rollback on warning (vs print) bogus center addition

---
 src/python/gudhi/representations/vector_methods.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index b5644cfbfc..ac2c1ee462 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -845,8 +845,7 @@ def fit(self, X, y=None, sample_weight=None):
 
         if n_points < n_clusters:
             # There weren't enough points to fit n_clusters, so we arbitrarily put centers as [-np.inf]^measure_dim.
-            warnings.warn(f"[Atol] after flitering had only {n_points} points to fit {n_clusters} clusters,"
-                          f"adding meaningless cluster centers.", RuntimeWarning)
+            print(f"[Atol] after filtering had only {n_points=} to fit {n_clusters=}, adding meaningless centers.")
             fill_center = np.repeat(np.inf, repeats=X[0].shape[1])
             fill_inertia = 0
             self.centers = np.concatenate([self.centers, np.repeat([fill_center], repeats=n_clusters-n_points, axis=0)])

From 040fb547f1ac317445d87f2b2fc518e45651b53f Mon Sep 17 00:00:00 2001
From: Martin ROYER <martin.royer@irtsystemx.fr>
Date: Fri, 26 Jul 2024 10:04:22 +0200
Subject: [PATCH 14/14] remove unused imports

---
 src/python/gudhi/representations/vector_methods.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index ac2c1ee462..14d2802e71 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -9,8 +9,6 @@
 #   - 2020/12 Gard: A more flexible Betti curve class capable of computing exact curves.
 #   - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range
 
-import warnings
-
 import numpy as np
 from scipy.spatial.distance import cdist
 from sklearn.base          import BaseEstimator, TransformerMixin