From 648ea2a93f401721e38035c91583a0706b259015 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Wed, 30 Oct 2024 11:51:57 +0100
Subject: [PATCH] refactor and move binarize to defn

---
 lib/scholar/naive_bayes/bernoulli.ex | 37 +++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/lib/scholar/naive_bayes/bernoulli.ex b/lib/scholar/naive_bayes/bernoulli.ex
index f6da0e44..da49979e 100644
--- a/lib/scholar/naive_bayes/bernoulli.ex
+++ b/lib/scholar/naive_bayes/bernoulli.ex
@@ -80,19 +80,29 @@ defmodule Scholar.NaiveBayes.Bernoulli do
   Fits a naive Bayes model. The function assumes that the targets `y` are integers
   between 0 and `num_classes` - 1 (inclusive). Otherwise, those samples will not
   contribute to `class_count`.
+
   ## Options
+
   #{NimbleOptions.docs(@opts_schema)}
+
   ## Return Values
+
   The function returns a struct with the following parameters:
+
     * `:class_count` - Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
+
     * `:class_log_priors` - Smoothed empirical log probability for each class.
+
     * `:feature_count` - Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
         provided.
+
     * `:feature_log_probability` - Empirical log probability of features
         given a class, ``P(x_i|y)``.
+
   ## Examples
+
       iex> x = Nx.iota({4, 3})
       iex> y = Nx.tensor([1, 2, 0, 2])
       iex> Scholar.NaiveBayes.Bernoulli.fit(x, y, num_classes: 3, binarize: 1.0)
@@ -118,6 +128,7 @@ defmodule Scholar.NaiveBayes.Bernoulli do
               ]
             )
           }
+
       iex> x = Nx.iota({4, 3})
       iex> y = Nx.tensor([1, 2, 0, 2])
       iex> Scholar.NaiveBayes.Bernoulli.fit(x, y, num_classes: 3, force_alpha: false, alpha: 0.0)
@@ -144,6 +155,7 @@ defmodule Scholar.NaiveBayes.Bernoulli do
             )
           }
   """
+
   deftransform fit(x, y, opts \\ []) do
     if Nx.rank(x) != 2 do
       raise ArgumentError,
@@ -174,11 +186,6 @@ defmodule Scholar.NaiveBayes.Bernoulli do
     opts = NimbleOptions.validate!(opts, @opts_schema)
     type = to_float_type(x)
 
-    x_binarize =
-      if opts[:binarize] != nil,
-        do: Scholar.Preprocessing.Binarizer.fit_transform(x, threshold: opts[:binarize]),
-        else: x
-
     {alpha, opts} = Keyword.pop!(opts, :alpha)
     alpha = Nx.tensor(alpha, type: type)
 
@@ -226,7 +233,7 @@ defmodule Scholar.NaiveBayes.Bernoulli do
           sample_weights_flag: sample_weights_flag
         ]
 
-    fit_n(x_binarize, y, class_priors, sample_weights, alpha, opts)
+    fit_n(x, y, class_priors, sample_weights, alpha, opts)
   end
 
   defnp fit_n(x, y, class_priors, sample_weights, alpha, opts) do
@@ -235,6 +242,12 @@ defmodule Scholar.NaiveBayes.Bernoulli do
 
     num_classes = opts[:num_classes]
 
+    x =
+      case opts[:binarize] do
+        nil -> x
+        binarize ->Scholar.Preprocessing.Binarizer.fit_transform(x, threshold: binarize)
+      end
+
     y_one_hot = Scholar.Preprocessing.OneHotEncoder.fit_transform(y, num_categories: num_classes)
     y_one_hot = Nx.select(y_one_hot, Nx.tensor(1, type: type), Nx.tensor(0, type: type))
 
@@ -281,7 +294,9 @@ defmodule Scholar.NaiveBayes.Bernoulli do
   @doc """
   Perform classification on an array of test vectors `x` using `model`.
   You need to add sorted classes from the training data as the second argument.
+
   ## Examples
+
       iex> x = Nx.iota({4, 3})
       iex> y = Nx.tensor([1, 2, 0, 2])
       iex> model = Scholar.NaiveBayes.Bernoulli.fit(x, y, num_classes: 3)
@@ -291,6 +306,7 @@ defmodule Scholar.NaiveBayes.Bernoulli do
         [2, 2]
       >
   """
+
   defn predict(%__MODULE__{} = model, x, classes) do
     check_dim(x, Nx.axis_size(model.feature_count, 1))
 
@@ -316,7 +332,9 @@ defmodule Scholar.NaiveBayes.Bernoulli do
 
   @doc """
   Return log-probability estimates for the test vector `x` using `model`.
+
   ## Examples
+
       iex> x = Nx.iota({4, 3})
       iex> y = Nx.tensor([1, 2, 0, 2])
       iex> model = Scholar.NaiveBayes.Bernoulli.fit(x, y, num_classes: 3)
@@ -329,6 +347,7 @@ defmodule Scholar.NaiveBayes.Bernoulli do
         ]
       >
   """
+
   defn predict_log_probability(%__MODULE__{} = model, x) do
     check_dim(x, Nx.axis_size(model.feature_count, 1))
     jll = joint_log_likelihood(model, x)
@@ -344,7 +363,9 @@ defmodule Scholar.NaiveBayes.Bernoulli do
 
   @doc """
   Return probability estimates for the test vector `x` using `model`.
+
   ## Examples
+
       iex> x = Nx.iota({4, 3})
       iex> y = Nx.tensor([1, 2, 0, 2])
       iex> model = Scholar.NaiveBayes.Bernoulli.fit(x, y, num_classes: 3)
@@ -357,13 +378,16 @@ defmodule Scholar.NaiveBayes.Bernoulli do
         ]
       >
   """
+
   defn predict_probability(%__MODULE__{} = model, x) do
     Nx.exp(predict_log_probability(model, x))
   end
 
   @doc """
   Return joint log probability estimates for the test vector `x` using `model`.
+
   ## Examples
+
       iex> x = Nx.iota({4, 3})
       iex> y = Nx.tensor([1, 2, 0, 2])
       iex> model = Scholar.NaiveBayes.Bernoulli.fit(x, y, num_classes: 3)
@@ -376,6 +400,7 @@ defmodule Scholar.NaiveBayes.Bernoulli do
         ]
       >
   """
+
   defn predict_joint_log_probability(%__MODULE__{} = model, x) do
     check_dim(x, Nx.axis_size(model.feature_count, 1))
     joint_log_likelihood(model, x)