From 0e75d35fc3e1c0a479f40c4d093353eecff52949 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89mile=20Royer?= <emile.royer@inria.fr>
Date: Fri, 19 Jul 2024 17:44:08 +0200
Subject: [PATCH 1/4] Replace btdtri with betaincinv

The function scipy.special.btdtri was deprecated in SciPy 1.12, and will
be removed in SciPy 1.14.

scipy.special.betaincinv should be a drop-in replacement.
---
 river/bandit/bayes_ucb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/river/bandit/bayes_ucb.py b/river/bandit/bayes_ucb.py
index c60b6353ea..b7868fe38d 100644
--- a/river/bandit/bayes_ucb.py
+++ b/river/bandit/bayes_ucb.py
@@ -79,7 +79,7 @@ def compute_index(self, arm_id):
         """the p-th quantile of the beta distribution for the arm"""
         p = 1 - 1 / (self._n + 1)
         posterior = self._posteriors[arm_id]
-        return scipy.special.btdtri(posterior.alpha, posterior.beta, p)
+        return scipy.special.betaincinv(posterior.alpha, posterior.beta, p)
 
     def update(self, arm_id, *reward_args, **reward_kwargs):
         """Rewrite update function"""

From 9b2c89054fcdebeba84d0ed2914c92886b1c2576 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89mile=20Royer?= <emile.royer@inria.fr>
Date: Fri, 19 Jul 2024 17:53:48 +0200
Subject: [PATCH 2/4] Use plain datetime.now() in test

datetime.datetime.utcnow() is deprecated because it is too easy of a
footgun.

Since we don't need timezone-aware objects, now() has the same behaviour
(to the difference of the local time difference to UTC).

The exact time used (or timezone-equivalent) has no impact on this test,
the only requirements is for both calls to have the same timestamp.
---
 river/utils/test_rolling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/river/utils/test_rolling.py b/river/utils/test_rolling.py
index 5d484ac7de..7e77ba3d10 100644
--- a/river/utils/test_rolling.py
+++ b/river/utils/test_rolling.py
@@ -47,6 +47,6 @@ def test_issue_1343():
 
     """
     rmean = utils.TimeRolling(proba.MultivariateGaussian(), period=dt.timedelta(microseconds=1))
-    t = dt.datetime.utcnow()
+    t = dt.datetime.now()
     rmean.update({"a": 0}, t=t)
     rmean.update({"a": 1}, t=t)

From 8dec117c138c88420883046a601f569722ebb7dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89mile=20Royer?= <emile.royer@inria.fr>
Date: Fri, 16 Aug 2024 10:36:18 +0200
Subject: [PATCH 3/4] Remove concatenation with an empty dataframe

Concatenation of empty dataframes causes a FutureWarning in pandas.
None values in concatenation are ignored, as long as not all objects to
concatenate are None.
---
 river/covariance/test_emp.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/river/covariance/test_emp.py b/river/covariance/test_emp.py
index 53167d1cf1..56e7edf74d 100644
--- a/river/covariance/test_emp.py
+++ b/river/covariance/test_emp.py
@@ -95,7 +95,7 @@ def test_covariance_update_sampled():
 def test_covariance_update_many(ddof):
     cov = covariance.EmpiricalCovariance(ddof=ddof)
     p = 5
-    X_all = pd.DataFrame(columns=range(p))
+    X_all = None
 
     for _ in range(p):
         n = np.random.randint(1, 31)
@@ -123,7 +123,7 @@ def test_covariance_update_many(ddof):
 def test_covariance_update_many_shuffled(ddof):
     cov = covariance.EmpiricalCovariance(ddof=ddof)
     p = 5
-    X_all = pd.DataFrame(columns=range(p))
+    X_all = None
 
     for _ in range(p):
         n = np.random.randint(5, 31)
@@ -143,7 +143,7 @@ def test_covariance_update_many_sampled():
     ddof = 1
     cov = covariance.EmpiricalCovariance(ddof=ddof)
     p = 5
-    X_all = pd.DataFrame(columns=range(p))
+    X_all = None
 
     for _ in range(p):
         n = np.random.randint(5, 31)

From 422aafb75ad09c098d07de6e0c8170a8a808bcd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89mile=20Royer?= <emile.royer@inria.fr>
Date: Wed, 13 Nov 2024 15:19:26 +0100
Subject: [PATCH 4/4] Preserve the sparse nature of dataframes

In a sparse dataframe, all elements must have a sparse Dtype.
Values added after the fact need to be converted.
---
 river/naive_bayes/bernoulli.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/river/naive_bayes/bernoulli.py b/river/naive_bayes/bernoulli.py
index 111dbabf89..0967cb47cd 100644
--- a/river/naive_bayes/bernoulli.py
+++ b/river/naive_bayes/bernoulli.py
@@ -252,18 +252,24 @@ def joint_log_likelihood_many(self, X: pd.DataFrame) -> pd.DataFrame:
         unknown = [x for x in X.columns if x not in self.feature_counts]
         missing = [x for x in self.feature_counts if x not in X.columns]
 
+        is_sparse = hasattr(X, "sparse")
+
         if unknown:
             X = X.drop(unknown, axis="columns")
 
         if missing:
-            X[missing] = False
+            X[missing] = 0
+            if is_sparse:
+                # The new values need to be converted to preserve the sparseness of the dataframe.
+                # Input values can be intergers or floats, converting all to float preserves the behaviour without the need for complex conversion logic.
+                X = X.astype(pd.SparseDtype(float, 0.0))
 
         index, columns = X.index, X.columns
 
         if not self.class_counts or not self.feature_counts:
             return pd.DataFrame(index=index)
 
-        if hasattr(X, "sparse"):
+        if is_sparse:
             X = sparse.csr_matrix(X.sparse.to_coo())
             X.data = X.data > self.true_threshold
         else: