Skip to content

Commit

Permalink
Remove n_pairs parameter from KhiopsRegressor
Browse files Browse the repository at this point in the history
It was never supported.
  • Loading branch information
folmos-at-orange committed Dec 20, 2024
1 parent e947fea commit 860e5ce
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 79 deletions.
111 changes: 60 additions & 51 deletions khiops/sklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,7 +1388,6 @@ class KhiopsSupervisedEstimator(KhiopsEstimator):
def __init__(
self,
n_features=100,
n_pairs=0,
n_trees=10,
specific_pairs=None,
all_possible_pairs=True,
Expand All @@ -1407,7 +1406,6 @@ def __init__(
internal_sort=internal_sort,
)
self.n_features = n_features
self.n_pairs = n_pairs
self.n_trees = n_trees
self.specific_pairs = specific_pairs
self.all_possible_pairs = all_possible_pairs
Expand Down Expand Up @@ -1497,25 +1495,6 @@ def _fit_check_params(self, ds, **kwargs):
raise TypeError(type_error_message("n_trees", self.n_trees, int))
if self.n_trees < 0:
raise ValueError("'n_trees' must be positive")
if not isinstance(self.n_pairs, int):
raise TypeError(type_error_message("n_pairs", self.n_pairs, int))
if self.n_pairs < 0:
raise ValueError("'n_pairs' must be positive")
if self.specific_pairs is not None:
if not is_list_like(self.specific_pairs):
raise TypeError(
type_error_message(
"specific_pairs", self.specific_pairs, "list-like"
)
)
else:
for pair in self.specific_pairs:
if not isinstance(pair, tuple):
raise TypeError(type_error_message(pair, pair, tuple))
if not isinstance(self.all_possible_pairs, bool):
raise TypeError(
type_error_message("all_possible_pairs", self.all_possible_pairs, bool)
)
if self.construction_rules is not None:
if not is_list_like(self.construction_rules):
raise TypeError(
Expand Down Expand Up @@ -1602,7 +1581,6 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):

# Rename parameters to be compatible with khiops.core
kwargs["max_constructed_variables"] = kwargs.pop("n_features")
kwargs["max_pairs"] = kwargs.pop("n_pairs")
kwargs["max_trees"] = kwargs.pop("n_trees")

# Add the additional_data_tables parameter
Expand Down Expand Up @@ -1766,7 +1744,6 @@ class KhiopsPredictor(KhiopsSupervisedEstimator):
def __init__(
self,
n_features=100,
n_pairs=0,
n_trees=10,
n_selected_features=0,
n_evaluated_features=0,
Expand All @@ -1781,7 +1758,6 @@ def __init__(
):
super().__init__(
n_features=n_features,
n_pairs=n_pairs,
n_trees=n_trees,
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
Expand Down Expand Up @@ -2061,19 +2037,19 @@ def __init__(
):
super().__init__(
n_features=n_features,
n_pairs=n_pairs,
n_trees=n_trees,
n_selected_features=n_selected_features,
n_evaluated_features=n_evaluated_features,
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
construction_rules=construction_rules,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
key=key,
internal_sort=internal_sort,
)
self.n_pairs = n_pairs
self.specific_pairs = specific_pairs
self.all_possible_pairs = all_possible_pairs
self.group_target_value = group_target_value
self._khiops_model_prefix = "SNB_"
self._predicted_target_meta_data_tag = "Prediction"
Expand Down Expand Up @@ -2119,12 +2095,44 @@ def _fit_check_params(self, ds, **kwargs):
# Call parent method
super()._fit_check_params(ds, **kwargs)

# Check the pair related parameters
if not isinstance(self.n_pairs, int):
raise TypeError(type_error_message("n_pairs", self.n_pairs, int))
if self.n_pairs < 0:
raise ValueError("'n_pairs' must be positive")
if self.specific_pairs is not None:
if not is_list_like(self.specific_pairs):
raise TypeError(
type_error_message(
"specific_pairs", self.specific_pairs, "list-like"
)
)
else:
for pair in self.specific_pairs:
if not isinstance(pair, tuple):
raise TypeError(type_error_message(pair, pair, tuple))
if not isinstance(self.all_possible_pairs, bool):
raise TypeError(
type_error_message("all_possible_pairs", self.all_possible_pairs, bool)
)

# Check 'group_target_value' parameter
if not isinstance(self.group_target_value, bool):
raise TypeError(
type_error_message("group_target_value", self.group_target_value, bool)
)

def _fit_prepare_training_function_inputs(self, ds, computation_dir):
# Call the parent method
args, kwargs = super()._fit_prepare_training_function_inputs(
ds, computation_dir
)

# Rename parameters to be compatible with khiops.core
kwargs["max_pairs"] = kwargs.pop("n_pairs")

return args, kwargs

def fit(self, X, y, **kwargs):
"""Fits a Selective Naive Bayes classifier according to X, y
Expand Down Expand Up @@ -2406,27 +2414,12 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
n_features : int, default 100
*Multi-table only* : Maximum number of multi-table aggregate features to
construct. See :doc:`/multi_table_primer` for more details.
n_pairs : int, default 0
Maximum number of pair features to construct. These features are 2D grid
partitions of univariate feature pairs. The grid is optimized such that in each
cell the target distribution is well approximated by a constant histogram. Only
pairs that are jointly more informative than their marginals may be taken into
account in the regressor.
n_selected_features : int, default 0
Maximum number of features to be selected in the SNB predictor. If equal to
0 it selects all the features kept in the training.
n_evaluated_features : int, default 0
Maximum number of features to be evaluated in the SNB predictor training. If
equal to 0 it evaluates all informative features.
specific_pairs : list of tuple, optional
User-specified pairs as a list of 2-tuples of feature names. If a given tuple
contains only one non-empty feature name, then it generates all the pairs
containing it (within the maximum limit ``n_pairs``). These pairs have top
priority: they are constructed first.
all_possible_pairs : bool, default ``True``
If ``True`` tries to create all possible pairs within the limit ``n_pairs``.
Pairs specified with ``specific_pairs`` have top priority: they are constructed
first.
construction_rules : list of str, optional
Allowed rules for the automatic feature construction. If not set, it uses all
possible rules.
Expand Down Expand Up @@ -2508,12 +2501,9 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
def __init__(
self,
n_features=100,
n_pairs=0,
n_trees=0,
n_selected_features=0,
n_evaluated_features=0,
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
verbose=False,
output_dir=None,
Expand All @@ -2523,12 +2513,9 @@ def __init__(
):
super().__init__(
n_features=n_features,
n_pairs=n_pairs,
n_trees=n_trees,
n_selected_features=n_selected_features,
n_evaluated_features=n_evaluated_features,
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
construction_rules=construction_rules,
verbose=verbose,
output_dir=output_dir,
Expand Down Expand Up @@ -2820,17 +2807,17 @@ def __init__(
):
super().__init__(
n_features=n_features,
n_pairs=n_pairs,
n_trees=n_trees,
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
construction_rules=construction_rules,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
key=key,
internal_sort=internal_sort,
)
self.n_pairs = n_pairs
self.specific_pairs = specific_pairs
self.all_possible_pairs = all_possible_pairs
self.categorical_target = categorical_target
self.group_target_value = group_target_value
self.transform_type_categorical = transform_type_categorical
Expand Down Expand Up @@ -2903,6 +2890,27 @@ def _fit_check_params(self, ds, **kwargs):
# Call parent method
super()._fit_check_params(ds, **kwargs)

# Check the pair related parameters
if not isinstance(self.n_pairs, int):
raise TypeError(type_error_message("n_pairs", self.n_pairs, int))
if self.n_pairs < 0:
raise ValueError("'n_pairs' must be positive")
if self.specific_pairs is not None:
if not is_list_like(self.specific_pairs):
raise TypeError(
type_error_message(
"specific_pairs", self.specific_pairs, "list-like"
)
)
else:
for pair in self.specific_pairs:
if not isinstance(pair, tuple):
raise TypeError(type_error_message(pair, pair, tuple))
if not isinstance(self.all_possible_pairs, bool):
raise TypeError(
type_error_message("all_possible_pairs", self.all_possible_pairs, bool)
)

# Check 'transform_type_categorical' parameter
if not isinstance(self.transform_type_categorical, str):
raise TypeError(
Expand Down Expand Up @@ -3025,6 +3033,7 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):
)
# Rename encoder parameters, delete unused ones
# to be compatible with khiops.core
kwargs["max_pairs"] = kwargs.pop("n_pairs")
kwargs["keep_initial_categorical_variables"] = kwargs["keep_initial_variables"]
kwargs["keep_initial_numerical_variables"] = kwargs.pop(
"keep_initial_variables"
Expand Down
2 changes: 1 addition & 1 deletion tests/test_estimator_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def test_regressor_attributes_monotable(self):
adult_df = pd.read_csv(adult_dataset_path, sep="\t").sample(750)
X = adult_df.drop("age", axis=1)
y = adult_df["age"]
khr_adult = KhiopsRegressor(n_trees=0, n_pairs=5)
khr_adult = KhiopsRegressor(n_trees=0)
with warnings.catch_warnings():
warnings.filterwarnings(
action="ignore",
Expand Down
27 changes: 0 additions & 27 deletions tests/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,12 +1112,9 @@ def setUpClass(cls):
"field_separator": "\t",
"detect_format": False,
"header_line": True,
"max_pairs": 1,
"max_trees": 0,
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"additional_data_tables": {},
}
Expand Down Expand Up @@ -1204,12 +1201,9 @@ def setUpClass(cls):
"field_separator": "\t",
"detect_format": False,
"header_line": True,
"max_pairs": 1,
"max_trees": 0,
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"additional_data_tables": {},
}
Expand Down Expand Up @@ -1304,12 +1298,9 @@ def setUpClass(cls):
"detect_format": False,
"header_line": True,
"max_constructed_variables": 10,
"max_pairs": 1,
"max_trees": 0,
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"additional_data_tables": {
"SpliceJunction`SpliceJunctionDNA"
Expand Down Expand Up @@ -1414,12 +1405,9 @@ def setUpClass(cls):
"detect_format": False,
"header_line": True,
"max_constructed_variables": 10,
"max_pairs": 1,
"max_trees": 0,
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"log_file_path": os.path.join(
cls.output_dir, "khiops.log"
Expand Down Expand Up @@ -2405,11 +2393,8 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe(self):
schema_type="monotable",
source_type="dataframe",
extra_estimator_kwargs={
"n_pairs": 1,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand All @@ -2424,11 +2409,8 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe_with_df_y(
schema_type="monotable",
source_type="dataframe_xy",
extra_estimator_kwargs={
"n_pairs": 1,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand All @@ -2441,11 +2423,8 @@ def test_parameter_transfer_regressor_fit_from_monotable_file_dataset(self):
schema_type="monotable",
source_type="file_dataset",
extra_estimator_kwargs={
"n_pairs": 1,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand All @@ -2459,12 +2438,9 @@ def test_parameter_transfer_regressor_fit_from_multitable_dataframe(self):
source_type="dataframe",
extra_estimator_kwargs={
"n_features": 10,
"n_pairs": 1,
"n_trees": 0,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand All @@ -2478,12 +2454,9 @@ def test_parameter_transfer_regressor_fit_from_multitable_file_dataset(self):
source_type="file_dataset",
extra_estimator_kwargs={
"n_features": 10,
"n_pairs": 1,
"n_trees": 0,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand Down

0 comments on commit 860e5ce

Please sign in to comment.