Fix Multi_Class

Parietal-INRIA · achamma723 · Jun 2, 2024 · Jun 2, 2024 · Jun 2, 2024 · 9e20546896e21cc3b4bd8e417c952711a03f161e
commit 9e20546896e21cc3b4bd8e417c952711a03f161e
diff --git a/hidimstat/BBI.py b/hidimstat/BBI.py
@@ -134,6 +134,7 @@ def __init__(
         self.random_state = random_state
         self.X_test = [None] * max(self.k_fold, 1)
         self.y_test = [None] * max(self.k_fold, 1)
+        self.y_train = [None] * max(self.k_fold, 1)
         self.org_pred = [None] * max(self.k_fold, 1)
         self.pred_scores = [None] * max(self.k_fold, 1)
         self.X_nominal = [None] * max(self.k_fold, 1)
@@ -449,6 +450,7 @@ def fit(self, X, y=None):
 
                 self.X_test[ind_fold] = X_test.copy()
                 self.y_test[ind_fold] = y_test.copy()
+                self.y_train[ind_fold] = y_train.copy()
 
                 # Find the list of optimal sub-models to be used in the
                 # following steps (Default estimator)
@@ -459,6 +461,7 @@ def fit(self, X, y=None):
                 self.list_estimators[ind_fold] = copy(self.estimator)
 
         else:
+            self.y_train = y.copy()
             if not self.apply_ridge:
                 if self.coffeine_transformer is not None:
                     X = self.coffeine_transformers[0].fit_transform(
@@ -549,7 +552,12 @@ def func(x):
 
                         self.estimator.fit(X_train_scaled, y_train_curr)
 
-                        list_loss.append(self.loss(y_valid_curr, func(X_valid_scaled)))
+                        if self.prob_type == "classification":
+                            list_loss.append(self.loss(y_valid_curr,
+                                                       func(X_valid_scaled)[:, np.unique(y_valid_curr)]))
+                        else:
+                            list_loss.append(self.loss(y_valid_curr,
+                                                       func(X_valid_scaled)))
 
             ind_min = np.argmin(list_loss)
             best_hyper = list_hyper[ind_min]
@@ -761,11 +769,10 @@ def compute_importance(self, X=None, y=None):
                     )[y_col]
             else:
                 if self.prob_type in ("classification", "binary"):
-                    y[ind_fold] = (
-                        OneHotEncoder(handle_unknown="ignore")
-                        .fit_transform(y[ind_fold].reshape(-1, 1))
-                        .toarray()
-                    )
+                    one_hot = (OneHotEncoder(handle_unknown="ignore")
+                                .fit(self.y_train[ind_fold].reshape(-1, 1)))
+                    y[ind_fold] = (one_hot.transform(y[ind_fold]
+                                                    .reshape(-1, 1)).toarray())
             if self.com_imp:
                 if not self.conditional:
                     self.pred_scores[ind_fold], score_cur = list(

diff --git a/hidimstat/compute_importance.py b/hidimstat/compute_importance.py
@@ -407,7 +407,8 @@ def joblib_compute_conditional(
                     )
 
     if prob_type in ("classification", "binary"):
-        score = roc_auc_score(y_test, org_pred)
+        nonzero_cols = np.where(y_test.any(axis=0))[0]
+        score = roc_auc_score(y_test[:, nonzero_cols], org_pred[:, nonzero_cols])
     else:
         score = (
             mean_absolute_error(y_test, org_pred),
@@ -710,7 +711,9 @@ def joblib_compute_permutation(
 
         res = (y_test - pred_i) ** 2 - (y_test - org_pred) ** 2
     else:
-        score = roc_auc_score(y_test, org_pred)
+        nonzero_cols = np.where(y_test.any(axis=0))[0]
+        score = roc_auc_score(y_test[:, nonzero_cols],
+                              org_pred[:, nonzero_cols])
         if type_predictor == "DNN":
             pred_i = estimator.predict_proba(current_X_test_list, scale=False)
         else: