From 1fc49015567655d4c01b7307f88fcf109979547b Mon Sep 17 00:00:00 2001 From: Haifeng Wu Date: Fri, 19 Jul 2024 18:59:22 +0800 Subject: [PATCH] add normalization to enhance numerical stability --- hypernets/tabular/sklearn_ex.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/hypernets/tabular/sklearn_ex.py b/hypernets/tabular/sklearn_ex.py index d0a1d84..9efd759 100644 --- a/hypernets/tabular/sklearn_ex.py +++ b/hypernets/tabular/sklearn_ex.py @@ -14,7 +14,8 @@ from sklearn.impute import SimpleImputer from sklearn.metrics import log_loss, mean_squared_error from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder, KBinsDiscretizer, OrdinalEncoder, StandardScaler, OneHotEncoder +from sklearn.preprocessing import LabelEncoder, KBinsDiscretizer, OrdinalEncoder, StandardScaler, OneHotEncoder, \ + MinMaxScaler from sklearn.utils import column_or_1d from sklearn.utils.validation import check_is_fitted @@ -374,6 +375,28 @@ def transform(self, X): return X +@tb_transformer(pd.DataFrame) +class MinMaxScalerTransformer(BaseEstimator): + def __init__(self, columns, copy=True): + super(MinMaxScalerTransformer, self).__init__() + + self.scaler = MinMaxScaler(copy=copy) + self.copy = copy + self.columns = columns + + def fit(self, X, y=None): + df_continuous = X[self.columns] + self.scaler.fit(df_continuous.values) + return self + + def transform(self, X): + df_continuous = X[self.columns] + np_continuous = self.scaler.transform(df_continuous.values) + for i, v in enumerate(self.columns): + X[v] = np_continuous[:, i] + return X + + @tb_transformer(pd.DataFrame) class SkewnessKurtosisTransformer(BaseEstimator): def __init__(self, transform_fn=None, skew_threshold=0.5, kurtosis_threshold=0.5):