From 88e31c5abf5c6636f46170ffcccdcc1bc9fc0d1b Mon Sep 17 00:00:00 2001 From: Flaminia Date: Tue, 22 Oct 2024 21:39:40 +0200 Subject: [PATCH] refactor: add normalization as a method, failing tests --- mcda/configuration/configuration_validator.py | 11 +- mcda/mcda_functions/normalization.py | 2 +- mcda/models/ProMCDA.py | 106 ++++++++++-------- mcda/models/mcda_with_robustness.py | 2 - mcda/models/mcda_without_robustness.py | 1 - tests/unit_tests/test_promcda.py | 47 +++++++- 6 files changed, 110 insertions(+), 59 deletions(-) diff --git a/mcda/configuration/configuration_validator.py b/mcda/configuration/configuration_validator.py index 67c85a1..e4de691 100644 --- a/mcda/configuration/configuration_validator.py +++ b/mcda/configuration/configuration_validator.py @@ -160,12 +160,13 @@ def check_configuration_values(extracted_values: dict) -> Tuple[int, int, List[s marginal_distribution = extracted_values["marginal_distribution_for_each_indicator"] # Check for sensitivity-related configuration errors + valid_norm_methods = ['minmax', 'target', 'standardized', 'rank'] + valid_agg_methods = ['weighted_sum', 'geometric', 'harmonic', 'minimum'] if sensitivity_on == "no": - check_config_error(normalization not in ['minmax', 'target', 'standardized', 'rank'], - 'The available normalization functions are: minmax, target, standardized, rank.') - check_config_error(aggregation not in ['weighted_sum', 'geometric', 'harmonic', 'minimum'], - 'The available aggregation functions are: weighted_sum, geometric, harmonic, minimum.' - '\nWatch the correct spelling in the configuration.') + check_config_error(normalization not in valid_norm_methods, + f'Invalid normalization method: {normalization}. Available methods: {valid_norm_methods}') + check_config_error(aggregation not in valid_agg_methods, + f'Invalid aggregation method: {aggregation}. Available methods: {valid_agg_methods}') logger.info("ProMCDA will only use one pair of norm/agg functions: " + normalization + '/' + aggregation) else: logger.info("ProMCDA will use a set of different pairs of norm/agg functions") diff --git a/mcda/mcda_functions/normalization.py b/mcda/mcda_functions/normalization.py index 1a3a0c6..ad1b9ad 100644 --- a/mcda/mcda_functions/normalization.py +++ b/mcda/mcda_functions/normalization.py @@ -16,7 +16,7 @@ class Normalization(object): Ratio: target. """ - def __init__(self, input_matrix: pd.DataFrame, polarities: list): + def __init__(self, input_matrix: pd.DataFrame, polarities: tuple): self._input_matrix = copy.deepcopy(input_matrix) self.polarities = polarities diff --git a/mcda/models/ProMCDA.py b/mcda/models/ProMCDA.py index c95bd24..bcb6bd1 100644 --- a/mcda/models/ProMCDA.py +++ b/mcda/models/ProMCDA.py @@ -6,6 +6,7 @@ from mcda.configuration.configuration_validator import extract_configuration_values, check_configuration_values, \ check_configuration_keys +from mcda.mcda_functions.normalization import Normalization from mcda.utils.utils_for_main import run_mcda_without_indicator_uncertainty, run_mcda_with_indicator_uncertainty log = logging.getLogger(__name__) @@ -29,12 +30,10 @@ def __init__(self, input_matrix: pd.DataFrame, polarity: Tuple[str, ...], sensit :param output_path: path for saving output files. # Example of instantiating the class and using it - promcda = ProMCDA(input_matrix, polarity, sensitivity, robustness, monte_carlo) - sensitivity = sensitivity_class(input1, input2) - aggregate = aggregate_class(input1, input2) - promcda.run_mcda() - df_normalized = promcda.normalize() - df_aggregated = promcda.aggregate() + promcda = ProMCDA(input_matrix, polarity, sensitivity, robustness, monte_carlo) + promcda.run_mcda() + df_normalized = promcda.normalize() + df_aggregated = promcda.aggregate() """ self.logger = logging.getLogger("ProMCDA") self.input_matrix = input_matrix @@ -72,51 +71,62 @@ def validate_inputs(self) -> Tuple[int, int, list, Union[list, List[list], dict] return is_robustness_indicators, is_robustness_weights, polar, weights, configuration_values + def normalize(self, feature_range=(0, 1)) -> Union[pd.DataFrame, dict]: + """ + Normalize the decision matrix based on the configuration `f_norm`. + If `f_norm` is a string representing a single normalization method, + it applies that method to the decision matrix. - # self.validate_normalization(self.sensitivity['normalization']) - # self.validate_aggregation(self.sensitivity['aggregation']) - # self.validate_robustness(self.robustness) + If `f_norm` is a list of functions, each normalization function will be + applied to the input matrix sequentially, and the results will be stored + in a dictionary where the keys are function names. + + Args: + feature_range (tuple): Range for normalization methods that require it, like MinMax normalization. + The range (0.1, 1) is not needed when no aggregation will follow. + + Returns: + A single normalized DataFrame or a dictionary of DataFrames if multiple + normalization methods are applied. + """ + normalization = Normalization(self.input_matrix, self.polarity) + + sensitivity_on = self.sensitivity['sensitivity_on'] + f_norm = self.sensitivity['normalization'] + f_norm_list = ['minmax', 'target', 'standardized', 'rank'] + + if sensitivity_on == "yes": + self.normalized_matrix = {} + for norm_function in f_norm_list: + self.logger.info("Applying normalization method: %s", norm_function) + norm_method = getattr(normalization, norm_function, None) + if norm_function in ['minmax', 'target', 'standardized']: + result = norm_method(feature_range) + if result is None: + raise ValueError(f"{norm_function} method returned None") + self.normalized_matrix[norm_function] = result + else: + result = normalization.rank() + if result is None: + raise ValueError(f"{norm_function} method returned None") + self.normalized_matrix[norm_function] = result + else: + self.logger.info("Normalizing matrix with method(s): %s", f_norm) + norm_method = getattr(normalization, f_norm, None) + if f_norm in ['minmax', 'target', 'standardized']: + result = norm_method(feature_range) + if result is None: + raise ValueError(f"{f_norm} method returned None") + self.normalized_matrix = result + else: + result = norm_method() + if result is None: + raise ValueError(f"{f_norm} method returned None") + self.normalized_matrix = result + + return self.normalized_matrix - # def validate_normalization(self, f_norm): - # """ - # Validate the normalization method. - # """ - # valid_norm_methods = ['minmax', 'target', 'standardized', 'rank'] - # if f_norm not in valid_norm_methods: - # raise ValueError(f"Invalid normalization method: {f_norm}. Available methods: {valid_norm_methods}") - # - # def validate_aggregation(self, f_agg): - # """ - # Validate the aggregation method. - # """ - # valid_agg_methods = ['weighted_sum', 'geometric', 'harmonic', 'minimum'] - # if f_agg not in valid_agg_methods: - # raise ValueError(f"Invalid aggregation method: {f_agg}. Available methods: {valid_agg_methods}") - # - # def validate_robustness(self, robustness): - # """ - # Validate robustness analysis settings. - # """ - # if not isinstance(robustness, dict): - # raise ValueError("Robustness settings must be a dictionary.") - # - # # Add more specific checks based on robustness config structure - # if robustness['on_single_weights'] == 'yes' and robustness['on_all_weights'] == 'yes': - # raise ValueError("Conflicting settings for robustness analysis on weights.") - # - # def normalize(self): - # """ - # Normalize the decision matrix based on the configuration. - # """ - # f_norm = self.sensitivity['normalization'] - # self.logger.info("Normalizing matrix with method: %s", f_norm) - # - # # Perform normalization (replace this with actual logic) - # self.normalized_matrix = normalize_matrix(self.input_matrix, f_norm) - # - # return self.normalized_matrix - # # def aggregate(self): # """ # Aggregate the decision matrix based on the configuration. diff --git a/mcda/models/mcda_with_robustness.py b/mcda/models/mcda_with_robustness.py index 366a332..df8e677 100644 --- a/mcda/models/mcda_with_robustness.py +++ b/mcda/models/mcda_with_robustness.py @@ -6,8 +6,6 @@ import pandas as pd import numpy as np -from mcda.configuration.config import Config - log = logging.getLogger(__name__) formatter = '%(levelname)s: %(asctime)s - %(name)s - %(message)s' diff --git a/mcda/models/mcda_without_robustness.py b/mcda/models/mcda_without_robustness.py index 8f7fba8..ab9da71 100644 --- a/mcda/models/mcda_without_robustness.py +++ b/mcda/models/mcda_without_robustness.py @@ -3,7 +3,6 @@ import logging import pandas as pd -from mcda.configuration.config import Config from mcda.mcda_functions.normalization import Normalization from mcda.mcda_functions.aggregation import Aggregation diff --git a/tests/unit_tests/test_promcda.py b/tests/unit_tests/test_promcda.py index e0b935c..c3785d8 100644 --- a/tests/unit_tests/test_promcda.py +++ b/tests/unit_tests/test_promcda.py @@ -45,8 +45,10 @@ def test_init(self): """ Test if ProMCDA initializes correctly. """ + # Given promcda = ProMCDA(self.input_matrix, self.polarity, self.sensitivity, self.robustness, self.monte_carlo, self.output_path) + # Then self.assertEqual(promcda.input_matrix.shape, (3, 2)) self.assertEqual(promcda.polarity, self.polarity) self.assertEqual(promcda.sensitivity, self.sensitivity) @@ -57,11 +59,13 @@ def test_validate_inputs(self): """ Test if input validation works and returns the expected values. """ + # Given promcda = ProMCDA(self.input_matrix, self.polarity, self.sensitivity, self.robustness, self.monte_carlo, self.output_path) + # When (is_robustness_indicators, is_robustness_weights, polar, weights, config) = promcda.validate_inputs() - # Validate the result + # Then self.assertIsInstance(is_robustness_indicators, int) self.assertIsInstance(is_robustness_weights, int) self.assertIsInstance(polar, tuple) @@ -70,6 +74,46 @@ def test_validate_inputs(self): self.assertEqual(is_robustness_indicators, 0) self.assertEqual(is_robustness_weights, 0) + def test_normalize_single_method(self): + """ + Test normalization with a single methods. + Test the correctness of the output values happens in unit_tests/test_normalization.py + """ + # Given + self.sensitivity['sensitivity_on'] = 'no' + + # When + promcda = ProMCDA(self.input_matrix, self.polarity, self.sensitivity, self.robustness, self.monte_carlo, + self.output_path) + normalized_matrix = promcda.normalize() + + # Then + self.assertIsInstance(normalized_matrix, pd.DataFrame) + + def test_normalize_multiple_methods(self): + """ + Test normalization with multiple methods. + Test the correctness of the output values happens in unit_tests/test_normalization.py + """ + self.sensitivity['sensitivity_on'] = 'yes' + self.sensitivity['normalization'] = ['minmax', 'standardized', 'rank', 'target'] + + promcda = ProMCDA(self.input_matrix, self.polarity, self.sensitivity, self.robustness, self.monte_carlo, + self.output_path) + normalized_matrices = promcda.normalize() + + self.assertIsInstance(normalized_matrices, dict) + self.assertIn('minmax', normalized_matrices) + self.assertIn('standardized', normalized_matrices) + self.assertIn('rank', normalized_matrices) + self.assertIn('target', normalized_matrices) + + self.assertIsInstance(normalized_matrices['minmax'], pd.DataFrame) + self.assertIsInstance(normalized_matrices['standardized'], pd.DataFrame) + self.assertIsInstance(normalized_matrices['rank'], pd.DataFrame) + self.assertIsInstance(normalized_matrices['target'], pd.DataFrame) + + def tearDown(self): """ Clean up temporary directories and files after each test. @@ -80,5 +124,4 @@ def tearDown(self): if __name__ == '__main__': unittest.main() - # TODO: write additional tests for normalization, aggregation, etc.