diff --git a/.gitignore b/.gitignore index a2eb120..cfdde19 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ .DS_Store .idea .ipynb_checkpoints/ +.png +.CSV +demo_in_notebook/output_files/mock_output/ + diff --git a/demo_in_notebook/test_import.py b/demo_in_notebook/test_import.py new file mode 100644 index 0000000..62b2558 --- /dev/null +++ b/demo_in_notebook/test_import.py @@ -0,0 +1,10 @@ +import os +import sys + +# Set the package path to the parent directory +package_path = '/Users/flaminia/Documents/work/ProMCDA' +sys.path.append(package_path) + +# Now import ProMCDA from models +from mcda.models.ProMCDA import ProMCDA +print("Import successful!") diff --git a/demo_in_notebook/use_promcda_library.ipynb b/demo_in_notebook/use_promcda_library.ipynb new file mode 100644 index 0000000..a4bdad3 --- /dev/null +++ b/demo_in_notebook/use_promcda_library.ipynb @@ -0,0 +1,1215 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "96df2c84-1509-4e93-952a-9beba8d0ec45", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Import successful!\n" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "import pandas as pd\n", + "\n", + "package_path = '/Users/flaminia/Documents/work/ProMCDA'\n", + "\n", + "if package_path not in sys.path:\n", + " sys.path.append(package_path)\n", + "\n", + "try:\n", + " from mcda.models.ProMCDA import ProMCDA\n", + " print(\"Import successful!\")\n", + "except ModuleNotFoundError as e:\n", + " print(f\"ModuleNotFoundError: {e}\")\n", + "\n", + "from mcda.configuration.enums import NormalizationFunctions, AggregationFunctions, OutputColumnNames4Sensitivity, NormalizationNames4Sensitivity, PDFType" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9dbbd731-e201-47cc-8bfb-159a52559b25", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_matrix': Criteria 1 Criteria 2\n", + " Alternatives \n", + " A 0.5 0.3\n", + " B 0.2 0.6\n", + " C 0.8 0.1,\n", + " 'polarity': ('+', '-'),\n", + " 'robustness_weights': False,\n", + " 'robustness_indicators': False,\n", + " 'marginal_distributions': [,\n", + " ],\n", + " 'num_runs': 5,\n", + " 'num_cores': 1,\n", + " 'output_path': 'mock_output/'}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def setUpNoRobustnessIndicators():\n", + " \n", + " # Mock input data for testing\n", + " input_matrix_without_uncertainty = pd.DataFrame({\n", + " 'Alternatives': ['A', 'B', 'C'],\n", + " 'Criteria 1': [0.5, 0.2, 0.8],\n", + " 'Criteria 2': [0.3, 0.6, 0.1]\n", + " }, index=['A', 'B', 'C'])\n", + " \n", + " input_matrix_without_uncertainty.set_index('Alternatives', inplace=True)\n", + "\n", + " polarity = ('+', '-')\n", + "\n", + " robustness_weights = False\n", + " robustness_indicators = False\n", + "\n", + " marginal_distributions = [PDFType.NORMAL, PDFType.NORMAL]\n", + "\n", + " num_runs = 5\n", + " num_cores = 1\n", + "\n", + " output_path = 'mock_output/'\n", + "\n", + " # Return the setup parameters as a dictionary\n", + " return {\n", + " 'input_matrix': input_matrix_without_uncertainty, # Decide what type of input matrix\n", + " 'polarity': polarity,\n", + " 'robustness_weights': robustness_weights,\n", + " 'robustness_indicators': robustness_indicators,\n", + " 'marginal_distributions': marginal_distributions,\n", + " 'num_runs': num_runs,\n", + " 'num_cores': num_cores,\n", + " 'output_path': output_path\n", + " }\n", + "\n", + "# Run the setup and store parameters in a variable\n", + "setup_no_robustness_indicators = setUpNoRobustnessIndicators()\n", + "\n", + "# Check the setup parameters\n", + "setup_no_robustness_indicators" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cef40536-5942-44a4-9a2c-2a9e9b02b7a0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_matrix': Criterion1_mean Criterion1_std Criterion2_mean Criterion2_std\n", + " Alternatives \n", + " A 0.5 0.10 0.3 0.03\n", + " B 0.2 0.02 0.6 0.06\n", + " C 0.8 0.07 0.1 0.01,\n", + " 'polarity': ('+', '-'),\n", + " 'robustness_weights': False,\n", + " 'robustness_indicators': True,\n", + " 'marginal_distributions': [,\n", + " ],\n", + " 'num_runs': 5,\n", + " 'num_cores': 1,\n", + " 'output_path': 'mock_output/'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def setUpRobustnessIndicators():\n", + " \n", + " # Mock input data for testing\n", + " input_matrix_with_uncertainty = pd.DataFrame({\n", + " 'Alternatives': ['A', 'B', 'C'],\n", + " 'Criterion1_mean': [0.5, 0.2, 0.8],\n", + " 'Criterion1_std': [0.1, 0.02, 0.07],\n", + " 'Criterion2_mean': [0.3, 0.6, 0.1],\n", + " 'Criterion2_std': [0.03, 0.06, 0.01]\n", + " })\n", + " \n", + " input_matrix_with_uncertainty.set_index('Alternatives', inplace=True)\n", + "\n", + " polarity = ('+', '-')\n", + "\n", + " robustness_weights = False\n", + " robustness_indicators = True\n", + "\n", + " marginal_distributions = [PDFType.NORMAL, PDFType.NORMAL]\n", + "\n", + " num_runs = 5\n", + " num_cores = 1\n", + "\n", + " output_path = 'mock_output/'\n", + "\n", + " # Return the setup parameters as a dictionary\n", + " return {\n", + " 'input_matrix': input_matrix_with_uncertainty, # Decide what type of input matrix\n", + " 'polarity': polarity,\n", + " 'robustness_weights': robustness_weights,\n", + " 'robustness_indicators': robustness_indicators,\n", + " 'marginal_distributions': marginal_distributions,\n", + " 'num_runs': num_runs,\n", + " 'num_cores': num_cores,\n", + " 'output_path': output_path\n", + " }\n", + "\n", + "# Run the setup and store parameters in a variable\n", + "setup_robustness_indicators = setUpRobustnessIndicators()\n", + "\n", + "# Check the setup parameters\n", + "setup_robustness_indicators" + ] + }, + { + "cell_type": "markdown", + "id": "07775901-e7d2-447c-a96e-b882748e9f4f", + "metadata": {}, + "source": [ + "## TEST NO ROBUSTNESS" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c9ffcdc6-0243-4303-8afd-33c7f3de5c21", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: 2024-12-12 16:48:33,048 - ProMCDA - Alternatives are ['A', 'B', 'C']\n" + ] + } + ], + "source": [ + "promcda = ProMCDA(\n", + " input_matrix=setup_no_robustness_indicators['input_matrix'],\n", + " polarity=setup_no_robustness_indicators['polarity'],\n", + " robustness_weights=setup_no_robustness_indicators['robustness_weights'],\n", + " robustness_indicators=setup_no_robustness_indicators['robustness_indicators'],\n", + " marginal_distributions=setup_no_robustness_indicators['marginal_distributions'],\n", + " num_runs=setup_no_robustness_indicators['num_runs'],\n", + " num_cores=setup_no_robustness_indicators['num_cores'],\n", + " #output_path=setup_parameters['output_path']\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "94796bf7-fd25-4f0d-bdd6-f828f3e0b5f1", + "metadata": {}, + "source": [ + "### Test normalize with sensitivity¶" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "dfd69479-446e-4919-8b3e-c888d9e675f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Criteria 1_minmax_01Criteria 2_minmax_01Criteria 1_minmax_without_zeroCriteria 2_minmax_without_zeroCriteria 1_target_01Criteria 2_target_01Criteria 1_target_without_zeroCriteria 2_target_without_zeroCriteria 1_standardized_anyCriteria 2_standardized_anyCriteria 1_standardized_without_zeroCriteria 2_standardized_without_zeroCriteria 1_rankCriteria 2_rank
00.50.60.550.640.6250.50.66250.550.00.1324531.11.2920792.02.0
10.00.00.10.10.250.00.3250.1-1.0-1.0596260.10.11.01.0
21.01.01.01.01.00.8333331.00.851.00.9271732.12.0867993.03.0
\n", + "
" + ], + "text/plain": [ + " Criteria 1_minmax_01 Criteria 2_minmax_01 Criteria 1_minmax_without_zero \\\n", + "0 0.5 0.6 0.55 \n", + "1 0.0 0.0 0.1 \n", + "2 1.0 1.0 1.0 \n", + "\n", + " Criteria 2_minmax_without_zero Criteria 1_target_01 Criteria 2_target_01 \\\n", + "0 0.64 0.625 0.5 \n", + "1 0.1 0.25 0.0 \n", + "2 1.0 1.0 0.833333 \n", + "\n", + " Criteria 1_target_without_zero Criteria 2_target_without_zero \\\n", + "0 0.6625 0.55 \n", + "1 0.325 0.1 \n", + "2 1.0 0.85 \n", + "\n", + " Criteria 1_standardized_any Criteria 2_standardized_any \\\n", + "0 0.0 0.132453 \n", + "1 -1.0 -1.059626 \n", + "2 1.0 0.927173 \n", + "\n", + " Criteria 1_standardized_without_zero Criteria 2_standardized_without_zero \\\n", + "0 1.1 1.292079 \n", + "1 0.1 0.1 \n", + "2 2.1 2.086799 \n", + "\n", + " Criteria 1_rank Criteria 2_rank \n", + "0 2.0 2.0 \n", + "1 1.0 1.0 \n", + "2 3.0 3.0 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promcda.normalize()" + ] + }, + { + "cell_type": "markdown", + "id": "267aae1e-3b4d-4ad0-8337-fa46a68081f8", + "metadata": {}, + "source": [ + "### Test normalize with specific method" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2ba273b0-18ac-43a1-8afc-f3280f6e2d9f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Criteria 1_minmax_01Criteria 2_minmax_01Criteria 1_minmax_without_zeroCriteria 2_minmax_without_zero
00.50.60.550.64
10.00.00.10.1
21.01.01.01.0
\n", + "
" + ], + "text/plain": [ + " Criteria 1_minmax_01 Criteria 2_minmax_01 Criteria 1_minmax_without_zero \\\n", + "0 0.5 0.6 0.55 \n", + "1 0.0 0.0 0.1 \n", + "2 1.0 1.0 1.0 \n", + "\n", + " Criteria 2_minmax_without_zero \n", + "0 0.64 \n", + "1 0.1 \n", + "2 1.0 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promcda.normalize(NormalizationFunctions.MINMAX)" + ] + }, + { + "cell_type": "markdown", + "id": "eb96c3cb-6a6c-493a-9e30-089fc3676052", + "metadata": {}, + "source": [ + "### Test aggregate with full sensitivity" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "dccaf3ad-79e2-4110-a286-8bfef5abbd6a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: 2024-12-12 17:20:58,500 - ProMCDA - Number of alternatives: 3\n", + "INFO: 2024-12-12 17:20:58,501 - ProMCDA - Number of indicators: 2\n", + "INFO: 2024-12-12 17:20:58,502 - ProMCDA - Polarities: ('+', '-')\n", + "INFO: 2024-12-12 17:20:58,502 - ProMCDA - Weights: [0.5, 0.5]\n", + "INFO: 2024-12-12 17:20:58,503 - ProMCDA - Normalized weights: [0.5, 0.5]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
minmax_weighted_sumtarget_weighted_sumstandardized_weighted_sumrank_weighted_summinmax_geometrictarget_geometricstandardized_geometricrank_geometricminmax_harmonictarget_harmonicstandardized_harmonicrank_harmonicstandardized_minimum
00.550.56250.0662272.00.5932960.6036351.1921772.00.5915970.6010311.1883282.00.0
10.00.125-1.0298131.00.1000000.1802780.1000001.00.10.1529410.11.0-1.059626
21.00.9166670.9635863.01.0000000.9219542.0933893.01.00.9189192.0933783.00.927173
\n", + "
" + ], + "text/plain": [ + " minmax_weighted_sum target_weighted_sum standardized_weighted_sum \\\n", + "0 0.55 0.5625 0.066227 \n", + "1 0.0 0.125 -1.029813 \n", + "2 1.0 0.916667 0.963586 \n", + "\n", + " rank_weighted_sum minmax_geometric target_geometric \\\n", + "0 2.0 0.593296 0.603635 \n", + "1 1.0 0.100000 0.180278 \n", + "2 3.0 1.000000 0.921954 \n", + "\n", + " standardized_geometric rank_geometric minmax_harmonic target_harmonic \\\n", + "0 1.192177 2.0 0.591597 0.601031 \n", + "1 0.100000 1.0 0.1 0.152941 \n", + "2 2.093389 3.0 1.0 0.918919 \n", + "\n", + " standardized_harmonic rank_harmonic standardized_minimum \n", + "0 1.188328 2.0 0.0 \n", + "1 0.1 1.0 -1.059626 \n", + "2 2.093378 3.0 0.927173 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promcda.normalize()\n", + "promcda.aggregate()" + ] + }, + { + "cell_type": "markdown", + "id": "3d221807-cd40-41fc-b295-a9e0e1c4dc08", + "metadata": {}, + "source": [ + "### Test aggregate with sensitivity on aggregation" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "587b6905-e9bc-4cbc-a923-501f35e225ad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: 2024-12-12 17:21:45,563 - ProMCDA - Number of alternatives: 3\n", + "INFO: 2024-12-12 17:21:45,564 - ProMCDA - Number of indicators: 2\n", + "INFO: 2024-12-12 17:21:45,567 - ProMCDA - Polarities: ('+', '-')\n", + "INFO: 2024-12-12 17:21:45,567 - ProMCDA - Weights: [0.5, 0.5]\n", + "INFO: 2024-12-12 17:21:45,568 - ProMCDA - Normalized weights: [0.5, 0.5]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
minmax_weighted_summinmax_geometricminmax_harmonic
00.550.5932960.591597
10.00.1000000.1
21.01.0000001.0
\n", + "
" + ], + "text/plain": [ + " minmax_weighted_sum minmax_geometric minmax_harmonic\n", + "0 0.55 0.593296 0.591597\n", + "1 0.0 0.100000 0.1\n", + "2 1.0 1.000000 1.0" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promcda.normalize(NormalizationFunctions.MINMAX)\n", + "promcda.aggregate()" + ] + }, + { + "cell_type": "markdown", + "id": "29e6c4e2-e347-4e9c-8897-2113770441e0", + "metadata": {}, + "source": [ + "### Test aggregate with sensitivity on normalization" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "89425df3-407c-4d13-945a-4355182d2900", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: 2024-12-12 17:23:04,104 - ProMCDA - Number of alternatives: 3\n", + "INFO: 2024-12-12 17:23:04,105 - ProMCDA - Number of indicators: 2\n", + "INFO: 2024-12-12 17:23:04,105 - ProMCDA - Polarities: ('+', '-')\n", + "INFO: 2024-12-12 17:23:04,106 - ProMCDA - Weights: [0.5, 0.5]\n", + "INFO: 2024-12-12 17:23:04,106 - ProMCDA - Normalized weights: [0.5, 0.5]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
minmax_weighted_sumtarget_weighted_sumstandardized_weighted_sumrank_weighted_sum
00.550.56250.0662272.0
10.00.125-1.0298131.0
21.00.9166670.9635863.0
\n", + "
" + ], + "text/plain": [ + " minmax_weighted_sum target_weighted_sum standardized_weighted_sum \\\n", + "0 0.55 0.5625 0.066227 \n", + "1 0.0 0.125 -1.029813 \n", + "2 1.0 0.916667 0.963586 \n", + "\n", + " rank_weighted_sum \n", + "0 2.0 \n", + "1 1.0 \n", + "2 3.0 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promcda.normalize()\n", + "promcda.aggregate(aggregation_method=AggregationFunctions.WEIGHTED_SUM)" + ] + }, + { + "cell_type": "markdown", + "id": "cd50f9eb-2987-451b-bf1d-c67a0b2a80e8", + "metadata": {}, + "source": [ + "### Test aggregate with robustness on weights" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "3e8bfc0c-9b78-4058-bdc2-90d00bc8ae7e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: 2024-12-12 17:25:09,956 - ProMCDA - Alternatives are ['A', 'B', 'C']\n" + ] + } + ], + "source": [ + "promcda = ProMCDA(\n", + " input_matrix=setup_no_robustness_indicators['input_matrix'],\n", + " polarity=setup_no_robustness_indicators['polarity'],\n", + " robustness_weights=True,\n", + " robustness_indicators=setup_no_robustness_indicators['robustness_indicators'],\n", + " marginal_distributions=setup_no_robustness_indicators['marginal_distributions'],\n", + " num_runs=setup_no_robustness_indicators['num_runs'],\n", + " num_cores=setup_no_robustness_indicators['num_cores'],\n", + " #output_path=setup_parameters['output_path']\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "8ab04ddd-69a0-4e41-808b-bf57506eda95", + "metadata": {}, + "outputs": [], + "source": [ + "#promcda.normalize(NormalizationFunctions.MINMAX)\n", + "#promcda.aggregate(aggregation_method=AggregationFunctions.WEIGHTED_SUM)" + ] + }, + { + "cell_type": "markdown", + "id": "04da27ae-0303-447a-b454-5324361fbdf3", + "metadata": {}, + "source": [ + "## TEST ROBUSTNESS INDICATORS" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cd0e175f-9d59-4c96-bcb5-a53d8555988c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: 2024-12-12 16:45:43,773 - ProMCDA - Alternatives are ['A', 'B', 'C']\n" + ] + } + ], + "source": [ + "promcda = ProMCDA(\n", + " input_matrix=setup_robustness_indicators['input_matrix'],\n", + " polarity=setup_robustness_indicators['polarity'],\n", + " robustness_weights=setup_robustness_indicators['robustness_weights'],\n", + " robustness_indicators=setup_robustness_indicators['robustness_indicators'],\n", + " marginal_distributions=setup_robustness_indicators['marginal_distributions'],\n", + " num_runs=setup_robustness_indicators['num_runs'],\n", + " num_cores=setup_robustness_indicators['num_cores'],\n", + " #output_path=setup_parameters['output_path']\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "448f86cd-6136-4636-9c5e-b803de97ca74", + "metadata": {}, + "source": [ + "### Test normalize with sensitivity" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1518d4a0-9351-4a5e-91db-806f21d32e96", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'standardized_any': 0 1\n", + " 0 0.107857 0.228287\n", + " 1 -1.049557 -1.094406\n", + " 2 0.941699 0.866119,\n", + " 'standardized_without_zero': 0 1\n", + " 0 1.257414 1.422692\n", + " 1 0.1 0.1\n", + " 2 2.091256 2.060524,\n", + " 'minmax_01': 0 1\n", + " 0 0.581248 0.674663\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.623123 0.707196\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0,\n", + " 'target_01': 0 1\n", + " 0 0.679686 0.586277\n", + " 1 0.235073 0.0\n", + " 2 1.0 0.868992,\n", + " 'target_without_zero': 0 1\n", + " 0 0.711717 0.627649\n", + " 1 0.311566 0.1\n", + " 2 1.0 0.882093,\n", + " 'rank': 0 1\n", + " 0 2.0 2.0\n", + " 1 1.0 1.0\n", + " 2 3.0 3.0},\n", + " {'standardized_any': 0 1\n", + " 0 -0.114056 0.161992\n", + " 1 -0.938082 -1.071107\n", + " 2 1.052138 0.909114,\n", + " 'standardized_without_zero': 0 1\n", + " 0 0.924026 1.333099\n", + " 1 0.1 0.1\n", + " 2 2.09022 2.080221,\n", + " 'minmax_01': 0 1\n", + " 0 0.414038 0.622708\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.472634 0.660437\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0,\n", + " 'target_01': 0 1\n", + " 0 0.579062 0.508629\n", + " 1 0.281629 0.0\n", + " 2 1.0 0.816802,\n", + " 'target_without_zero': 0 1\n", + " 0 0.621156 0.557766\n", + " 1 0.353467 0.1\n", + " 2 1.0 0.835122,\n", + " 'rank': 0 1\n", + " 0 2.0 2.0\n", + " 1 1.0 1.0\n", + " 2 3.0 3.0},\n", + " {'standardized_any': 0 1\n", + " 0 -0.029782 0.244632\n", + " 1 -0.984776 -1.099617\n", + " 2 1.014558 0.854985,\n", + " 'standardized_without_zero': 0 1\n", + " 0 1.054994 1.444249\n", + " 1 0.1 0.1\n", + " 2 2.099335 2.054601,\n", + " 'minmax_01': 0 1\n", + " 0 0.477656 0.687736\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.52989 0.718962\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0,\n", + " 'target_01': 0 1\n", + " 0 0.618886 0.597756\n", + " 1 0.270378 0.0\n", + " 2 1.0 0.869166,\n", + " 'target_without_zero': 0 1\n", + " 0 0.656998 0.637981\n", + " 1 0.34334 0.1\n", + " 2 1.0 0.882249,\n", + " 'rank': 0 1\n", + " 0 2.0 2.0\n", + " 1 1.0 1.0\n", + " 2 3.0 3.0},\n", + " {'standardized_any': 0 1\n", + " 0 -0.1171 0.176795\n", + " 1 -0.936295 -1.076607\n", + " 2 1.053394 0.899812,\n", + " 'standardized_without_zero': 0 1\n", + " 0 0.919195 1.353401\n", + " 1 0.1 0.1\n", + " 2 2.089689 2.076419,\n", + " 'minmax_01': 0 1\n", + " 0 0.41172 0.634178\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.470548 0.67076\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0,\n", + " 'target_01': 0 1\n", + " 0 0.548255 0.538117\n", + " 1 0.232092 0.0\n", + " 2 1.0 0.848526,\n", + " 'target_without_zero': 0 1\n", + " 0 0.59343 0.584305\n", + " 1 0.308883 0.1\n", + " 2 1.0 0.863673,\n", + " 'rank': 0 1\n", + " 0 2.0 2.0\n", + " 1 1.0 1.0\n", + " 2 3.0 3.0},\n", + " {'standardized_any': 0 1\n", + " 0 -0.34284 0.126952\n", + " 1 -0.783486 -1.057414\n", + " 2 1.126326 0.930462,\n", + " 'standardized_without_zero': 0 1\n", + " 0 0.540646 1.284365\n", + " 1 0.1 0.1\n", + " 2 2.009812 2.087876,\n", + " 'minmax_01': 0 1\n", + " 0 0.230727 0.595794\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.307655 0.636215\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0,\n", + " 'target_01': 0 1\n", + " 0 0.3992 0.49227\n", + " 1 0.219002 0.0\n", + " 2 1.0 0.826242,\n", + " 'target_without_zero': 0 1\n", + " 0 0.45928 0.543043\n", + " 1 0.297102 0.1\n", + " 2 1.0 0.843618,\n", + " 'rank': 0 1\n", + " 0 2.0 2.0\n", + " 1 1.0 1.0\n", + " 2 3.0 3.0}]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promcda.normalize()\n", + "promcda.get_normalized_values_with_robustness() # If robustness_indicators" + ] + }, + { + "cell_type": "markdown", + "id": "e3d4c24e-6203-4ab6-bfb4-72defe219ac3", + "metadata": {}, + "source": [ + "### Test normalize with specific method" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1aec0eab-5c5a-4279-91b1-891a3fc9a868", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'minmax_01': 0 1\n", + " 0 0.581248 0.674663\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.623123 0.707196\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0},\n", + " {'minmax_01': 0 1\n", + " 0 0.414038 0.622708\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.472634 0.660437\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0},\n", + " {'minmax_01': 0 1\n", + " 0 0.477656 0.687736\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.52989 0.718962\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0},\n", + " {'minmax_01': 0 1\n", + " 0 0.41172 0.634178\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.470548 0.67076\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0},\n", + " {'minmax_01': 0 1\n", + " 0 0.230727 0.595794\n", + " 1 0.0 0.0\n", + " 2 1.0 1.0,\n", + " 'minmax_without_zero': 0 1\n", + " 0 0.307655 0.636215\n", + " 1 0.1 0.1\n", + " 2 1.0 1.0}]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promcda.normalize(NormalizationFunctions.MINMAX)\n", + "promcda.get_normalized_values_with_robustness() # If robustness_indicators" + ] + }, + { + "cell_type": "markdown", + "id": "2c6ad5ee-a10e-4c89-8bb3-c60190bb587d", + "metadata": {}, + "source": [ + "### Test aggregate with robustness - need setUpRobustnessIndicators" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "15bb84ff-e194-4326-a368-c467c9b6e3a7", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: 2024-12-12 16:46:06,406 - ProMCDA - Number of alternatives: 3\n", + "INFO: 2024-12-12 16:46:06,406 - ProMCDA - Number of indicators: 2\n", + "INFO: 2024-12-12 16:46:06,407 - ProMCDA - Polarities: ('+', '-')\n", + "INFO: 2024-12-12 16:46:06,407 - ProMCDA - Weights: [0.5, 0.5]\n", + "INFO: 2024-12-12 16:46:06,407 - ProMCDA - Normalized weights: [0.5, 0.5]\n", + "INFO: 2024-12-12 16:46:06,407 - ProMCDA - Start ProMCDA with uncertainty on the indicators\n", + "INFO: 2024-12-12 16:46:06,407 - ProMCDA - The number of Monte-Carlo runs is only 5\n", + "INFO: 2024-12-12 16:46:06,408 - ProMCDA - A meaningful number of Monte-Carlo runs is equal or larger than 1000\n" + ] + }, + { + "data": { + "text/plain": [ + "'Aggregation considered uncertainty on indicators, resulsts are not explicitly shown.'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promcda.normalize(normalization_method=NormalizationFunctions.MINMAX)\n", + "promcda.aggregate(aggregation_method=AggregationFunctions.WEIGHTED_SUM, weights=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "128cc88d-928e-439e-896a-be71faa60075", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "( ws-minmax_01\n", + " 0 0.533047\n", + " 1 0.000000\n", + " 2 1.000000,\n", + " ws-minmax_01\n", + " 0 0.080837\n", + " 1 0.000000\n", + " 2 0.000000)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promcda.get_aggregated_values_with_robustness()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cf166df-6244-4364-8006-bacf98570225", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ProMCDA (Python)", + "language": "python", + "name": "promcda" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.20" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/mcda/configuration/config.py b/mcda/configuration/config.py deleted file mode 100644 index 7b69db4..0000000 --- a/mcda/configuration/config.py +++ /dev/null @@ -1,143 +0,0 @@ -""" -This module serves as a configuration object for ProMCDA. -It is designed to store and manage configuration settings in a structured way. -""" - -import copy -from typing import List, Dict, Any - - -# noinspection PyMethodMayBeStatic -class Config: - """ - Class representing configuration settings. - - This class encapsulates the configuration settings. - It expects the following keys in the input dictionary: - - input_matrix_path: path to the input matrix file. - - polarity_for_each_indicator: list of polarities, one for each indicator. - - sensitivity: sensitivity configuration. - - robustness: robustness configuration. - - monte_carlo_sampling: Monte Carlo sampling configuration. - - output_directory_path: path to the output file. - - Attributes: - _valid_keys (List[str]): list of valid keys expected in the input dictionary. - _list_values (List[str]): list of keys corresponding to list values. - _str_values (List[str]): list of keys corresponding to string values. - _int_values (List[str]): list of keys corresponding to integer values. - _dict_values (List[str]): list of keys corresponding to dictionary values. - _keys_of_dict_values (Dict[str, List[str]]): dictionary containing keys and their corresponding sub-keys. - - Methods: - __init__(input_config: dict): instantiate a configuration object. - _validate(input_config, valid_keys, str_values, int_values, list_values, dict_values): validate the input - configuration. - get_property(property_name: str): retrieve a property from the configuration. - check_dict_keys(dic: Dict[str, Any], keys: List[str]): check if a specific key is in a dictionary. - check_key(dic: dict, key: str): check if a key is in a dictionary. - """ - - _valid_keys: List[str] = ['input_matrix_path', - 'polarity_for_each_indicator', - 'sensitivity', - 'robustness', - 'monte_carlo_sampling', - 'output_directory_path'] - - _list_values: List[str] = [ - 'marginal_distribution_for_each_indicator', 'polarity_for_each_indicator'] - - _str_values: List[str] = ['input_matrix_path', 'output_directory_path', 'sensitivity_on', 'normalization', 'aggregation', - 'robustness_on', 'on_single_weights', 'on_all_weights', 'given_weights', 'on_indicators'] - - _int_values: List[str] = ['monte_carlo_runs', 'num_cores', 'random_seed'] - - _dict_values: List[str] = ['sensitivity', 'robustness', 'monte_carlo_sampling'] - - _keys_of_dict_values = {'sensitivity': ['sensitivity_on', 'normalization', 'aggregation'], - 'robustness': ['robustness_on', 'on_single_weights', 'on_all_weights', - 'given_weights', 'on_indicators'], - 'monte_carlo_sampling': ['monte_carlo_runs', 'num_cores', 'random_seed', - 'marginal_distribution_for_each_indicator']} - - def __init__(self, input_config: dict): - - valid_keys = self._valid_keys - str_values = self._str_values - int_values = self._int_values - list_values = self._list_values - dict_values = self._dict_values - # keys_of_dict_values = self._keys_of_dict_values - - self._validate(input_config, valid_keys, str_values, - int_values, list_values, dict_values) - self._config = copy.deepcopy(input_config) - - def _validate(self, input_config, valid_keys, str_values, int_values, list_values, dict_values): - if not isinstance(input_config, dict): - raise TypeError("input configuration file is not a dictionary") - - for key in valid_keys: - if key not in input_config: - raise KeyError("key {} is not in the input config".format(key)) - - if key in str_values: - if not isinstance(input_config[key], str): - raise TypeError( - "value of {} in the input config is not a string".format(key)) - - if key in int_values: - if not isinstance(input_config[key], int): - raise TypeError( - "value of {} in the input config is not an integer".format(key)) - - if key in list_values: - if not isinstance(input_config[key], list): - raise TypeError( - "value of {} in the input config is not a list".format(key)) - - if key in dict_values: - if not isinstance(input_config[key], dict): - raise TypeError( - "value of {} in the input config is not a dictionary".format(key)) - Config.check_dict_keys( - input_config[key], Config._keys_of_dict_values[key]) - - def get_property(self, property_name: str): - return self._config[property_name] - - @property - def input_matrix_path(self): - return self.get_property('input_matrix_path') - - @property - def polarity_for_each_indicator(self): - return self.get_property('polarity_for_each_indicator') - - @property - def sensitivity(self): - return self.get_property('sensitivity') - - @property - def robustness(self): - return self.get_property('robustness') - - @property - def monte_carlo_sampling(self): - return self.get_property('monte_carlo_sampling') - - @property - def output_file_path(self): - return self.get_property('output_directory_path') - - @staticmethod - def check_dict_keys(dic: Dict[str, Any], keys: List[str]): - for key in keys: - Config.check_key(dic, key) - - @staticmethod - def check_key(dic: dict, key: str): - if key not in dic.keys(): - raise KeyError( - "The key = {} is not present in dictionary: {}".format(key, dic)) diff --git a/mcda/configuration/configuration_validator.py b/mcda/configuration/configuration_validator.py new file mode 100644 index 0000000..098ec97 --- /dev/null +++ b/mcda/configuration/configuration_validator.py @@ -0,0 +1,493 @@ +import sys +import logging + +import numpy as np +import pandas as pd +from typing import Tuple, Union + +from pandas.core import series + +from mcda.configuration.enums import NormalizationFunctions, AggregationFunctions +from mcda.utils.utils_for_main import pop_indexed_elements, check_norm_sum_weights, randomly_sample_all_weights, \ + randomly_sample_ix_weight, check_input_matrix + +log = logging.getLogger(__name__) +logging.getLogger('PIL').setLevel(logging.WARNING) +FORMATTER: str = '%(levelname)s: %(asctime)s - %(name)s - %(message)s' +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=FORMATTER) +logger = logging.getLogger("ProMCDA") + +from typing import Dict, List, Any + + +def check_configuration_keys(robustness: dict, monte_carlo: dict) -> bool: + """ + Checks for required keys in sensitivity, robustness, and monte_carlo dictionaries. + TODO: revisit this logic when substitute classes to handle configuration settings. + + :param robustness : dict + :param monte_carlo: dict + :rtype: bool + """ + + keys_of_dict_values = { + 'robustness': ['robustness_on', 'on_single_weights', 'on_all_weights', 'given_weights', 'on_indicators'], + 'monte_carlo': ['monte_carlo_runs', 'num_cores', 'random_seed', 'marginal_distribution_for_each_indicator'] + } + _check_dict_keys(robustness, keys_of_dict_values['robustness']) + _check_dict_keys(monte_carlo, keys_of_dict_values['monte_carlo']) + + return True + + +def _check_dict_keys(dic: Dict[str, Any], expected_keys: List[str]) -> None: + """ + Helper function to check if the dictionary contains the required keys. + """ + for key in expected_keys: + if key not in dic: + raise KeyError(f"The key '{key}' is missing in the provided dictionary") + + +def extract_configuration_values(input_matrix: pd.DataFrame, polarity: Tuple[str], robustness: dict, + monte_carlo: dict) -> dict: + """ + Extracts relevant configuration values required for running the ProMCDA process. + + This function takes input parameters related to the decision matrix, polarity, sensitivity analysis, + robustness analysis, and Monte Carlo simulations, and returns a dictionary containing the necessary + configuration values for further processing. + + Parameters: + ----------- + The decision matrix containing the alternatives and indicators. + + A tuple indicating the polarity (positive/negative) of each indicator. + + A dictionary specifying the sensitivity analysis configuration, including whether sensitivity is enabled. + + A dictionary specifying the robustness analysis configuration, including which robustness options are + enabled (e.g., on single weights, on all weights, and on indicators). + + A dictionary containing Monte Carlo simulation parameters such as the number of runs and the random seed. + + :param input_matrix : pd.DataFrame + :param polarity : Tuple[str] + :param: output_path: str + :return: extracted_values: dict + """ + + extracted_values = { + "input_matrix": input_matrix, + "polarity": polarity, + # Robustness settings + "robustness_on": robustness["robustness_on"], + "robustness_on_single_weights": robustness["on_single_weights"], + "robustness_on_all_weights": robustness["on_all_weights"], + "given_weights": robustness["given_weights"], + "robustness_on_indicators": robustness["on_indicators"], + # Monte Carlo settings + "monte_carlo_runs": monte_carlo["monte_carlo_runs"], + "num_cores": monte_carlo["num_cores"], + "random_seed": monte_carlo["random_seed"], + "marginal_distribution_for_each_indicator": monte_carlo["marginal_distribution_for_each_indicator"] + } + + return extracted_values + + +def check_configuration_values(extracted_values: dict) -> Tuple[int, int, List[str], Union[list, List[list], dict]]: + """ + Validates the configuration settings for the ProMCDA process based on the input parameters. + + This function checks the validity of the input parameters related to sensitivity analysis, robustness analysis, + and Monte Carlo simulations. It ensures that the configuration is coherent and alerts the user to any inconsistencies. + + Parameters: + ----------- + A dictionary containing configuration values extracted from the input parameters. It includes: + - input_matrix (pd.DataFrame): The decision matrix for alternatives and indicators. + - polarity (Tuple[str]): A tuple indicating the polarity of each indicator. + - normalization (str): The normalization method to be used if sensitivity analysis is enabled. + - aggregation (str): The aggregation method to be used if sensitivity analysis is enabled. + - robustness_on (str): Indicates whether robustness analysis is enabled ("yes" or "no"). + - robustness_on_single_weights (str): Indicates if robustness is applied on individual weights. + - robustness_on_all_weights (str): Indicates if robustness is applied on all weights. + - robustness_on_indicators (str): Indicates if robustness is applied on indicators. + - monte_carlo_runs (int): The number of Monte Carlo simulation runs. + - random_seed (int): The seed for random number generation. + - marginal_distribution_for_each_indicator (List[str]): The distribution types for each indicator. + + Raises: + ------- + ValueError + If any configuration settings are found to be inconsistent or contradictory. + + Returns: + -------- + int + A flag indicating whether robustness analysis will be performed on indicators (1) or not (0). + + :param: extracted_values : dict + :return: is_robustness_indicators: int + """ + + is_robustness_indicators = 0 + is_robustness_weights = 0 + + # Access the values from the dictionary + input_matrix = extracted_values["input_matrix"] + polarity = extracted_values["polarity"] + aggregation = extracted_values["aggregation"] + robustness_on = extracted_values["robustness_on"] + robustness_on_single_weights = extracted_values["robustness_on_single_weights"] + robustness_on_all_weights = extracted_values["robustness_on_all_weights"] + robustness_on_indicators = extracted_values["robustness_on_indicators"] + monte_carlo_runs = extracted_values["monte_carlo_runs"] + random_seed = extracted_values["random_seed"] + marginal_distribution = extracted_values["marginal_distribution_for_each_indicator"] + + # Check for sensitivity-related configuration errors + valid_norm_methods = [method.value for method in NormalizationFunctions] + valid_agg_methods = [method.value for method in AggregationFunctions] + if isinstance(aggregation, AggregationFunctions): + aggregation = aggregation.value + + # Check for robustness-related configuration errors + if robustness_on == "no": + logger.info("ProMCDA will run without uncertainty on the indicators or weights") + else: + check_config_error((robustness_on_single_weights == "no" and + robustness_on_all_weights == "no" and + robustness_on_indicators == "no"), + 'Robustness analysis has been requested, but it’s unclear whether it should be applied to ' + 'weights or indicators. Please clarify it.') + + check_config_error((robustness_on_single_weights == "yes" and + robustness_on_all_weights == "yes" and + robustness_on_indicators == "no"), + 'Robustness analysis has been requested for the weights, but it’s unclear whether it should ' + 'be applied to all weights or just one at a time? Please clarify.') + + check_config_error(((robustness_on_single_weights == "yes" and + robustness_on_all_weights == "yes" and + robustness_on_indicators == "yes") or + (robustness_on_single_weights == "yes" and + robustness_on_all_weights == "no" and + robustness_on_indicators == "yes") or + (robustness_on_single_weights == "no" and + robustness_on_all_weights == "yes" and + robustness_on_indicators == "yes")), + 'Robustness analysis has been requested, but it’s unclear whether it should be applied to ' + 'weights or indicators. Please clarify.') + + # Check seetings for robustness analysis on weights or indicators + condition_robustness_on_weights = ( + (robustness_on_single_weights == 'yes' and + robustness_on_all_weights == 'no' and + robustness_on_indicators == 'no') or + (robustness_on_single_weights == 'no' and + robustness_on_all_weights == 'yes' and + robustness_on_indicators == 'no')) + + condition_robustness_on_indicators = ( + (robustness_on_single_weights == 'no' and + robustness_on_all_weights == 'no' and + robustness_on_indicators == 'yes')) + + is_robustness_weights, is_robustness_indicators = check_config_setting(condition_robustness_on_weights, + condition_robustness_on_indicators, + monte_carlo_runs, random_seed) + + # Check the input matrix for duplicated rows in the alternatives, + # rescale negative indicator values and drop the column containing the alternatives + input_matrix_no_alternatives = check_input_matrix(input_matrix) + + if is_robustness_indicators == 0: + num_indicators = input_matrix_no_alternatives.shape[1] + else: + num_non_exact_and_non_poisson = \ + len(marginal_distribution) - marginal_distribution.count('exact') - marginal_distribution.count('poisson') + num_indicators = (input_matrix_no_alternatives.shape[1] - num_non_exact_and_non_poisson) + + # Process indicators and weights based on input parameters in the configuration + polar, weights = process_indicators_and_weights(extracted_values, input_matrix_no_alternatives, + is_robustness_indicators, + is_robustness_weights, polarity, monte_carlo_runs, num_indicators) + + # Check the number of indicators, weights, and polarities + try: + check_indicator_weights_polarities(num_indicators, polar, extracted_values) + except ValueError as e: + logging.error(str(e), stack_info=True) + raise + + return is_robustness_indicators, is_robustness_weights, polar, weights + + +def check_config_error(condition: bool, error_message: str): + """ + Check a condition and raise a ValueError with a specified error message if the condition is True. + + Parameters: + - condition (bool): The condition to check. + - error_message (str): The error message to raise if the condition is True. + + Raises: + - ValueError: If the condition is True, with the specified error message. + + :param error_message: str + :param condition: bool + :return: None + """ + + if condition: + logger.error('Error Message', stack_info=True) + raise ValueError(error_message) + + +def check_config_setting(condition_robustness_on_weights: bool, condition_robustness_on_indicators: bool, mc_runs: int, + random_seed: int) -> (int, int): + """ + Checks configuration settings and logs information messages. + + Returns: + - is_robustness_weights, is_robustness_indicators, booleans indicating if robustness is considered + on weights or indicators. + + Example: + ```python + is_robustness_weights, is_robustness_indicators = check_config_setting(True, False, 1000, 42) + ``` + + :param condition_robustness_on_weights: bool + :param condition_robustness_on_indicators: bool + :param mc_runs: int + :param random_seed: int + :return: (is_robustness_weights, is_robustness_indicators) + :rtype: Tuple[int, int] + """ + is_robustness_weights = 0 + is_robustness_indicators = 0 + + if condition_robustness_on_weights: + logger.info("ProMCDA will consider uncertainty on the weights.") + logger.info("Number of Monte Carlo runs: {}".format(mc_runs)) + logger.info("The random seed used is: {}".format(random_seed)) + is_robustness_weights = 1 + + elif condition_robustness_on_indicators: + logger.info("ProMCDA will consider uncertainty on the indicators.") + logger.info("Number of Monte Carlo runs: {}".format(mc_runs)) + logger.info("The random seed used is: {}".format(random_seed)) + is_robustness_indicators = 1 + + return is_robustness_weights, is_robustness_indicators + + +def process_indicators_and_weights(input_matrix: pd.DataFrame, + robustness_indicators: bool, + robustness_weights: bool, + robustness_single_weights: bool, + polarity: Tuple[str, ...], + mc_runs: int, + num_indicators: int, + weights: List[str]) \ + -> Tuple[List[str], Union[list, List[list], dict]]: + """ + Process indicators and weights based on input parameters in the configuration. + + Parameters: + - input_matrix: the input matrix without alternatives. + - robustness_indicators: a flag indicating whether the matrix should include indicator uncertainties + (True or False). + - robustness_weights: a flag indicating whether robustness analysis is considered for the weights (True or False). + - robustness_single_weights: a flag indicating whether robustness analysis is considered for a single weight + at time (True or False). + - polarity: a tuple containing the original polarity associated to each indicator. + - mc_runs: number of Monte Carlo runs for robustness analysis. + - num_indicators: the number of indicators in the input matrix. + - weights: a list containing the assigned weights. + + Raises: + - ValueError: If there are duplicated rows in the input matrix or if there is an issue with the configuration. + + Returns: + - a shorter list of polarities if one has been dropped together with the relative indicator, + which brings no information. Otherwise, the same list. + - the normalised weights (either fixed or random sampled weights, depending on the settings) + + Notes: + - For robustness_indicators == False: + - Identifies and removes columns with constant values. + - Logs the number of alternatives and indicators. + + - For robustness_indicators == True: + - Handles uncertainty in indicators. + - Logs the number of alternatives and indicators. + + - For robustness_weights == False: + - Processes fixed weights if given. + - Logs weights and normalised weights. + + - For robustness_weights == True: + - Performs robustness analysis on weights. + - Logs randomly sampled weights. + + :param input_matrix: pd.DataFrame + :param robustness_weights: bool + :param robustness_single_weights: + :param robustness_indicators: bool + :param polarity: List[str] + :param mc_runs: int + :param num_indicators: int + :param weights: List[str] + :rtype: Tuple[List[str], Union[List[list], dict]] + """ + num_unique = input_matrix.nunique() + cols_to_drop = num_unique[num_unique == 1].index + col_to_drop_indexes = input_matrix.columns.get_indexer(cols_to_drop) + + if robustness_indicators is False: + _handle_no_robustness_indicators(input_matrix) + else: # matrix with uncertainty on indicators + logger.info("Number of alternatives: {}".format(input_matrix.shape[0])) + logger.info("Number of indicators: {}".format(num_indicators)) + # TODO: eliminate indicators with constant values (i.e. same mean and 0 std) - optional + + polarities_and_weights = _handle_polarities_and_weights(robustness_indicators, robustness_weights, + robustness_single_weights, num_unique, + col_to_drop_indexes, polarity, mc_runs, num_indicators, + weights) + + polar, norm_weights = tuple(item for item in polarities_and_weights if item is not None) + + return polar, norm_weights + + +def _handle_polarities_and_weights(robustness_indicators: bool, + robustness_weights: bool, + robustness_single_weights: bool, + num_unique: series, + col_to_drop_indexes: np.ndarray, + polarity: Tuple[str, ...], + mc_runs: int, + num_indicators: int, + weights: List[str]) \ + -> Union[Tuple[List[str], list, None, None], Tuple[List[str], None, List[List], None], + Tuple[List[str], None, None, dict]]: + """ + Manage polarities and weights based on the specified robustness settings, ensuring that the appropriate adjustments + and normalizations are applied before returning the necessary data structures. + """ + norm_random_weights = [] + rand_weight_per_indicator = {} + + # Managing polarities + if robustness_indicators is False: + if any(value == 1 for value in num_unique): + polarity = pop_indexed_elements(col_to_drop_indexes, polarity) + logger.info("Polarities: {}".format(polarity)) + + # Managing weights + if robustness_weights is False: + fixed_weights = weights + if any(value == 1 for value in num_unique): + fixed_weights = pop_indexed_elements(col_to_drop_indexes, fixed_weights) + norm_fixed_weights = check_norm_sum_weights(fixed_weights) + logger.info("Weights: {}".format(fixed_weights)) + logger.info("Normalized weights: {}".format(norm_fixed_weights)) + return polarity, norm_fixed_weights, None, None + # Return None for norm_random_weights and rand_weight_per_indicator + else: + output_weights = _handle_robustness_weights(mc_runs, num_indicators, robustness_indicators, robustness_weights, + robustness_single_weights) + if output_weights is not None: + norm_random_weights, rand_weight_per_indicator = output_weights + if norm_random_weights: + return polarity, None, norm_random_weights, None + else: + return polarity, None, None, rand_weight_per_indicator + # Return None for norm_fixed_weights and one of the other two cases of randomness + + +def _handle_robustness_weights(mc_runs: int, num_indicators: int, robustness_indicators: bool, robustness_weights: bool, + robustness_single_weight: bool) -> Tuple[Union[List[list], None], Union[dict, None]]: + """ + Handle the generation and normalization of random weights based on the specified settings + when a robustness analysis is requested on all the weights. + """ + norm_random_weights = [] + rand_weight_per_indicator = {} + + if mc_runs == 0: + logger.error('Error Message', stack_info=True) + raise ValueError('The number of MC runs should be larger than 0 for robustness analysis') + + if robustness_single_weight is False and robustness_weights is True: + random_weights = randomly_sample_all_weights(num_indicators, mc_runs) + for weights in random_weights: + weights = check_norm_sum_weights(weights) + norm_random_weights.append(weights) + return norm_random_weights, None # Return norm_random_weights, and None for rand_weight_per_indicator + elif robustness_single_weight is True and robustness_weights is False: + i = 0 + while i < num_indicators: + random_weights = randomly_sample_ix_weight(num_indicators, i, mc_runs) + norm_random_weight = [] + for weights in random_weights: + weights = check_norm_sum_weights(weights) + norm_random_weight.append(weights) + rand_weight_per_indicator["indicator_{}".format(i + 1)] = norm_random_weight + i += 1 + return None, rand_weight_per_indicator # Return None for norm_random_weights, and rand_weight_per_indicator + + +def _handle_no_robustness_indicators(input_matrix: pd.DataFrame): + """ + Handle the indicators in case of no robustness analysis required. + (The input matrix is without the alternative column) + """ + num_unique = input_matrix.nunique() + cols_to_drop = num_unique[num_unique == 1].index + + if any(value == 1 for value in num_unique): + logger.info("Indicators {} have been dropped because they carry no information".format(cols_to_drop)) + input_matrix = input_matrix.drop(cols_to_drop, axis=1) + + num_indicators = input_matrix.shape[1] + logger.info("Number of alternatives: {}".format(input_matrix.shape[0])) + logger.info("Number of indicators: {}".format(num_indicators)) + + +def check_indicator_weights_polarities(num_indicators: int, polar: List[str], robustness_weights: bool, + weights: List[int]): + """ + Check the consistency of indicators, polarities, and fixed weights in a configuration. + + Parameters: + - num_indicators: the number of indicators in the input matrix. + - polar: a list containing the polarity associated to each indicator. + - config: the configuration dictionary. + + This function raises a ValueError if the following conditions are not met: + 1. The number of indicators does not match the number of polarities. + 2. "robustness_on_all_weights" is set to "no," and the number of fixed weights + does not correspond to the number of indicators. + + Raises: + - ValueError: if the conditions for indicator-polarity and fixed weights consistency are not met. + + :param weights: List[int] + :param robustness_weights: bool + :param num_indicators: int + :param polar: List[str] + :param config: dict + :return: None + """ + if num_indicators != len(polar): + raise ValueError('The number of polarities does not correspond to the no. of indicators') + + # Check the number of fixed weights if "robustness_on_all_weights" is set to "no" + if (robustness_weights is False) and (num_indicators != len(weights)): + raise ValueError('The no. of fixed weights does not correspond to the no. of indicators') diff --git a/mcda/configuration/enums.py b/mcda/configuration/enums.py new file mode 100644 index 0000000..ede2d89 --- /dev/null +++ b/mcda/configuration/enums.py @@ -0,0 +1,73 @@ +from enum import Enum + +""" +This module defines enumerations for use throughout the package to enhance maintainability. + +Enumerations (enums) provide a way to define a set of named values, which can be used to represent options or +categories in a more manageable manner, avoiding string literals or hard-coded values. +""" + +class NormalizationFunctions(Enum): + """ + Implemented normalization functions + """ + MINMAX = 'minmax' + STANDARDIZED = 'standardized' + TARGET = 'target' + RANK = 'rank' + + +class AggregationFunctions(Enum): + """ + Implemented aggregation functions + """ + WEIGHTED_SUM = 'weighted_sum' + GEOMETRIC = 'geometric' + HARMONIC = 'harmonic' + MINIMUM = 'minimum' + + +class NormalizationNames4Sensitivity(Enum): + """ + Names of normalization functions in case of sensitivity analysis + """ + MINMAX_WITHOUT_ZERO = 'minmax_without_zero' + MINMAX_01 = 'minmax_01' + TARGET_WITHOUT_ZERO = 'target_without_zero' + TARGET_01 = 'target_01' + STANDARDIZED_ANY = 'standardized_any' + STANDARDIZED_WITHOUT_ZERO = 'standardized_without_zero' + RANK = 'rank' + + +class OutputColumnNames4Sensitivity(Enum): + """ + Names of output columns in case of sensitivity analysis + """ + WS_MINMAX_01 = 'ws-minmax_01' + WS_TARGET_01 = 'ws-target_01' + WS_STANDARDIZED_ANY = 'ws-standardized_any' + WS_RANK = 'ws-rank' + GEOM_MINMAX_WITHOUT_ZERO = 'geom-minmax_without_zero' + GEOM_TARGET_WITHOUT_ZERO = 'geom-target_without_zero' + GEOM_STANDARDIZED_WITHOUT_ZERO = 'geom-standardized_without_zero' + GEOM_RANK = 'geom-rank' + HARM_MINMAX_WITHOUT_ZERO = 'harm-minmax_without_zero' + HARM_TARGET_WITHOUT_ZERO = 'harm-target_without_zero' + HARM_STANDARDIZED_WITHOUT_ZERO = 'harm-standardized_without_zero' + HARM_RANK = 'harm-rank' + MIN_STANDARDIZED_ANY = 'min-standardized_any' + + +class PDFType(Enum): + """ + Names of probability density functions, which describe the indicators in case of robustness analysis + """ + EXACT = "exact" + UNIFORM = "uniform" + NORMAL = "normal" + LOGNORMAL = "lognormal" + POISSON = "poisson" + + + diff --git a/mcda/mcda_functions/aggregation.py b/mcda/mcda_functions/aggregation.py index 77071ef..9b363db 100644 --- a/mcda/mcda_functions/aggregation.py +++ b/mcda/mcda_functions/aggregation.py @@ -21,9 +21,12 @@ class Aggregation(object): """ def __init__(self, weights: list): - self.weights = weights - if sum(self.weights) != 1: + if isinstance(self.weights, list) and all(isinstance(i, list) for i in self.weights): + for i in range(len(self.weights)): + if sum(self.weights[i]) != 1: + self.weights[i] = [val / sum(self.weights[i]) for val in self.weights[i]] + elif sum(self.weights) != 1: self.weights = [val / sum(self.weights) for val in self.weights] def weighted_sum(self, norm_indicators: pd.DataFrame) -> pd.Series(dtype='object'): diff --git a/mcda/mcda_functions/normalization.py b/mcda/mcda_functions/normalization.py index 1a3a0c6..ad1b9ad 100644 --- a/mcda/mcda_functions/normalization.py +++ b/mcda/mcda_functions/normalization.py @@ -16,7 +16,7 @@ class Normalization(object): Ratio: target. """ - def __init__(self, input_matrix: pd.DataFrame, polarities: list): + def __init__(self, input_matrix: pd.DataFrame, polarities: tuple): self._input_matrix = copy.deepcopy(input_matrix) self.polarities = polarities diff --git a/mcda/mcda_run.py b/mcda/mcda_run.py deleted file mode 100644 index 6337882..0000000 --- a/mcda/mcda_run.py +++ /dev/null @@ -1,170 +0,0 @@ -#! /usr/bin/env python3 - -""" -This script serves as the main entry point for running all pieces of functionality in a consequential way by -following the settings given in the configuration file 'configuration.json'. - -Usage (from root directory): - $ python3 -m mcda.mcda_run -c configuration.json -""" - -import time -import logging - -from mcda.configuration.config import Config -from mcda.utils.utils_for_main import * -from mcda.utils.utils_for_plotting import * -from mcda.utils.utils_for_parallelization import * - -log = logging.getLogger(__name__) - -FORMATTER: str = '%(levelname)s: %(asctime)s - %(name)s - %(message)s' -logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=FORMATTER) -logger = logging.getLogger("ProMCDA") - -# noinspection PyTypeChecker -def main(input_config: dict): - """ - Execute the ProMCDA (Probabilistic Multi-Criteria Decision Analysis) process. - - Parameters: - - input_config : Configuration parameters for the ProMCDA process. - - Raises: - - ValueError: If there are issues with the input matrix, weights, or indicators. - - This function performs the ProMCDA process based on the provided configuration. - It handles various aspects such as the sensitivity analysis and the robustness analysis. - The results are saved in output files, and plots are generated to visualize the scores and rankings. - - Note: Ensure that the input matrix, weights, polarities and indicators (with or without uncertainty) - are correctly specified in the input configuration. - - :param input_config: dict - :return: None - """ - is_sensitivity = None - is_robustness = None - is_robustness_indicators = 0 - is_robustness_weights = 0 - f_norm = None - f_agg = None - marginal_pdf = [] - num_unique = [] - - t = time.time() - - # Extracting relevant configuration values - config = Config(input_config) - input_matrix = read_matrix(config.input_matrix_path) - index_column_name = input_matrix.index.name - index_column_values = input_matrix.index.tolist() - polar = config.polarity_for_each_indicator - is_sensitivity = config.sensitivity['sensitivity_on'] - is_robustness = config.robustness['robustness_on'] - mc_runs = config.monte_carlo_sampling["monte_carlo_runs"] - random_seed = config.monte_carlo_sampling["random_seed"] - - # Check for sensitivity-related configuration errors - if is_sensitivity == "no": - f_norm = config.sensitivity['normalization'] - f_agg = config.sensitivity['aggregation'] - check_config_error(f_norm not in ['minmax', 'target', 'standardized', 'rank'], - 'The available normalization functions are: minmax, target, standardized, rank.') - check_config_error(f_agg not in ['weighted_sum', 'geometric', 'harmonic', 'minimum'], - 'The available aggregation functions are: weighted_sum, geometric, harmonic, minimum.' - '\nWatch the correct spelling in the configuration file.') - logger.info("ProMCDA will only use one pair of norm/agg functions: " + f_norm + '/' + f_agg) - else: - logger.info("ProMCDA will use a set of different pairs of norm/agg functions") - - # Check for robustness-related configuration errors - if is_robustness == "no": - logger.info("ProMCDA will run without uncertainty on the indicators or weights") - logger.info("Read input matrix without uncertainties at {}".format(config.input_matrix_path)) - else: - check_config_error((config.robustness["on_single_weights"] == "no" and - config.robustness["on_all_weights"] == "no" and - config.robustness["on_indicators"] == "no"), - 'Robustness analysis is requested but where is not defined: weights or indicators? Please clarify.') - - check_config_error((config.robustness["on_single_weights"] == "yes" and - config.robustness["on_all_weights"] == "yes" and - config.robustness["on_indicators"] == "no"), - 'Robustness analysis is requested on the weights: but on all or one at a time? Please clarify.') - - check_config_error(((config.robustness["on_single_weights"] == "yes" and - config.robustness["on_all_weights"] == "yes" and - config.robustness["on_indicators"] == "yes") or - (config.robustness["on_single_weights"] == "yes" and - config.robustness["on_all_weights"] == "no" and - config.robustness["on_indicators"] == "yes") or - (config.robustness["on_single_weights"] == "no" and - config.robustness["on_all_weights"] == "yes" and - config.robustness["on_indicators"] == "yes")), - 'Robustness analysis is requested: but on weights or indicators? Please clarify.') - - # Check seetings for robustness analysis on weights or indicators - condition_robustness_on_weights = ( - (config.robustness['on_single_weights'] == 'yes' and - config.robustness['on_all_weights'] == 'no' and - config.robustness['on_indicators'] == 'no') or - (config.robustness['on_single_weights'] == 'no' and - config.robustness['on_all_weights'] == 'yes' and - config.robustness['on_indicators'] == 'no') - ) - condition_robustness_on_indicators = ( - (config.robustness['on_single_weights'] == 'no' and - config.robustness['on_all_weights'] == 'no' and - config.robustness['on_indicators'] == 'yes') - ) - - - is_robustness_weights, is_robustness_indicators = \ - check_config_setting(condition_robustness_on_weights, - condition_robustness_on_indicators, - mc_runs, random_seed) - - marginal_pdf = config.monte_carlo_sampling["marginal_distribution_for_each_indicator"] - logger.info("Read input matrix with uncertainty of the indicators at {}".format( - config.input_matrix_path)) - - # Check the input matrix for duplicated rows in the alternatives, rescale negative indicator values and - # drop the column containing the alternatives - input_matrix_no_alternatives = check_input_matrix(input_matrix) - if is_robustness_indicators == 0: - num_indicators = input_matrix_no_alternatives.shape[1] - else: - num_non_exact_and_non_poisson = len(marginal_pdf) - marginal_pdf.count('exact') - marginal_pdf.count('poisson') - num_indicators = (input_matrix_no_alternatives.shape[1] - num_non_exact_and_non_poisson) - - # Process indicators and weights based on input parameters in the configuration - polar, weights = process_indicators_and_weights(config, input_matrix_no_alternatives, is_robustness_indicators, - is_robustness_weights, polar, mc_runs, num_indicators) - - # Check the number of indicators, weights, and polarities - try: - check_indicator_weights_polarities(num_indicators, polar, config) - except ValueError as e: - logging.error(str(e), stack_info=True) - raise - - # If there is no uncertainty of the indicators: - if is_robustness_indicators == 0: - run_mcda_without_indicator_uncertainty(input_config, index_column_name, index_column_values, - input_matrix_no_alternatives, weights, f_norm, f_agg, - is_robustness_weights) - # else (i.e. there is uncertainty): - else: - run_mcda_with_indicator_uncertainty(input_config, input_matrix_no_alternatives, index_column_name, - index_column_values, mc_runs, random_seed, is_sensitivity, f_agg, f_norm, - weights, polar, marginal_pdf) - - logger.info("ProMCDA finished calculations: check the output files") - elapsed = time.time() - t - logger.info("All calculations finished in seconds {}".format(elapsed)) - -if __name__ == '__main__': - config_path = parse_args() - input_config = get_config(config_path) - main(input_config=input_config) diff --git a/mcda/mcda_without_robustness.py b/mcda/mcda_without_robustness.py deleted file mode 100644 index 2eb52a4..0000000 --- a/mcda/mcda_without_robustness.py +++ /dev/null @@ -1,157 +0,0 @@ -import sys -import copy -import logging -import pandas as pd - -from mcda.configuration.config import Config -from mcda.mcda_functions.normalization import Normalization -from mcda.mcda_functions.aggregation import Aggregation - -log = logging.getLogger(__name__) - -formatter = '%(levelname)s: %(asctime)s - %(name)s - %(message)s' -logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=formatter) -logger = logging.getLogger("ProMCDA aggregation") - - -class MCDAWithoutRobustness: - """ - Class MCDA without indicators' uncertainty - - This class allows one to run MCDA without considering the uncertainties related to the indicators. - All indicators are described by the exact marginal distribution. - However, it's possible to have randomly sampled weights. - """ - - def __init__(self, config: Config, input_matrix: pd.DataFrame): - self.normalized_indicators = None - self.weights = None - self._config = copy.deepcopy(config) - self._input_matrix = copy.deepcopy(input_matrix) - - def normalize_indicators(self, method=None) -> dict: - """ - Normalize the input matrix using the specified normalization method. - - Parameters: - - method (optional): the normalization method to use. If None, all available methods will be applied. - Supported methods: 'minmax', 'target', 'standardized', 'rank'. - - Returns: - - a dictionary containing the normalized values of each indicator per normalization method. - - Notes: - Some aggregation methods do not work with indicator values equal or smaller than zero. For that reason: - - for the 'minmax' method, two sets of normalized indicators are returned: one with the range (0, 1) and - another with the range (0.1, 1). - - for the 'target' method, two sets of normalized indicators are returned: one with the range (0, 1) and - another with the range (0.1, 1). - - for the 'standardized' method, two sets of normalized indicators are returned: one with the range (-inf, +inf) - and another with the range (0.1, +inf). - """ - norm = Normalization(self._input_matrix, - self._config.polarity_for_each_indicator) - - normalized_indicators = {} - - if method is None or method == 'minmax': - indicators_scaled_minmax_01 = norm.minmax(feature_range=(0, 1)) - # for aggregation "geometric" and "harmonic" that do not accept 0 - indicators_scaled_minmax_without_zero = norm.minmax(feature_range=(0.1, 1)) - normalized_indicators["minmax_without_zero"] = indicators_scaled_minmax_without_zero - normalized_indicators["minmax_01"] = indicators_scaled_minmax_01 - if method is None or method == 'target': - indicators_scaled_target_01 = norm.target(feature_range=(0, 1)) - indicators_scaled_target_without_zero = norm.target( - feature_range=(0.1, 1)) # for aggregation "geometric" and "harmonic" that do not accept 0 - normalized_indicators["target_without_zero"] = indicators_scaled_target_without_zero - normalized_indicators["target_01"] = indicators_scaled_target_01 - if method is None or method == 'standardized': - indicators_scaled_standardized_any = norm.standardized( - feature_range=('-inf', '+inf')) - indicators_scaled_standardized_without_zero = norm.standardized( - feature_range=(0.1, '+inf')) - normalized_indicators["standardized_any"] = indicators_scaled_standardized_any - normalized_indicators["standardized_without_zero"] = indicators_scaled_standardized_without_zero - if method is None or method == 'rank': - indicators_scaled_rank = norm.rank() - normalized_indicators["rank"] = indicators_scaled_rank - if method is not None and method not in ['minmax', 'target', 'standardized', 'rank']: - logger.error('Error Message', stack_info=True) - raise ValueError( - 'The selected normalization method is not supported') - - return normalized_indicators - - def aggregate_indicators(self, normalized_indicators: dict, weights: list, method=None) -> pd.DataFrame: - """ - Aggregate the normalized indicators using the specified aggregation method. - - Parameters: - - normalized_indicators: a dictionary containing the normalized values of each indicator per normalization - method. - - weights: the weights to be applied during aggregation. - - method (optional): The aggregation method to use. If None, all available methods will be applied. - Supported methods: 'weighted_sum', 'geometric', 'harmonic', 'minimum'. - - Returns: - - a DataFrame containing the aggregated scores per each alternative, and per each normalization method. - - :param normalized_indicators: dict - :param weights: list - :param method: str - :return scores: pd.DataFrame - """ - self.normalized_indicators = normalized_indicators - self.weights = weights - - agg = Aggregation(self.weights) - - scores_weighted_sum = {} - scores_geometric = {} - scores_harmonic = {} - scores_minimum = {} - - scores = pd.DataFrame() - col_names_method = [] - col_names = ['ws-minmax_01', 'ws-target_01', 'ws-standardized_any', 'ws-rank', - 'geom-minmax_without_zero', 'geom-target_without_zero', 'geom-standardized_without_zero', - 'geom-rank', 'harm-minmax_without_zero', 'harm-target_without_zero', - 'harm-standardized_without_zero', 'harm-rank', 'min-standardized_any'] - # column names has the same order as in the following loop - - for key, values in self.normalized_indicators.items(): - if method is None or method == 'weighted_sum': - if key in ["standardized_any", "minmax_01", "target_01", - "rank"]: # ws goes only with some specific normalizations - scores_weighted_sum[key] = agg.weighted_sum(values) - col_names_method.append("ws-" + key) - if method is None or method == 'geometric': - if key in ["standardized_without_zero", "minmax_without_zero", "target_without_zero", - "rank"]: # geom goes only with some specific normalizations - scores_geometric[key] = pd.Series(agg.geometric(values)) - col_names_method.append("geom-" + key) - if method is None or method == 'harmonic': - if key in ["standardized_without_zero", "minmax_without_zero", "target_without_zero", - "rank"]: # harm goes only with some specific normalizations - scores_harmonic[key] = pd.Series(agg.harmonic(values)) - col_names_method.append("harm-" + key) - if method is None or method == 'minimum': - if key == "standardized_any": - scores_minimum[key] = pd.Series(agg.minimum( - self.normalized_indicators["standardized_any"])) - col_names_method.append("min-" + key) - - dict_list = [scores_weighted_sum, scores_geometric, - scores_harmonic, scores_minimum] - - for d in dict_list: - if d: - scores = pd.concat([scores, pd.DataFrame.from_dict(d)], axis=1) - - if method is None: - scores.columns = col_names - else: - scores.columns = col_names_method - - return scores diff --git a/mcda/models/ProMCDA.py b/mcda/models/ProMCDA.py new file mode 100644 index 0000000..7891bed --- /dev/null +++ b/mcda/models/ProMCDA.py @@ -0,0 +1,402 @@ +import logging +import sys +import time +import pandas as pd +from typing import Tuple, List, Union, Optional + +from build.lib.mcda.mcda_with_robustness import MCDAWithRobustness +from mcda.configuration.configuration_validator import (check_indicator_weights_polarities, + process_indicators_and_weights) +from mcda.configuration.enums import PDFType, NormalizationFunctions, AggregationFunctions +from mcda.models.mcda_without_robustness import MCDAWithoutRobustness +from mcda.utils import utils_for_parallelization +from mcda.utils.utils_for_main import run_mcda_without_indicator_uncertainty, run_mcda_with_indicator_uncertainty, \ + check_input_matrix, check_if_pdf_is_exact, check_if_pdf_is_poisson, check_parameters_pdf, rescale_minmax, \ + compute_scores_for_all_random_weights, compute_scores_for_single_random_weight + +log = logging.getLogger(__name__) +formatter = '%(levelname)s: %(asctime)s - %(name)s - %(message)s' +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=formatter) +logger = logging.getLogger("ProMCDA") + +class ProMCDA: + def __init__(self, input_matrix: pd.DataFrame, polarity: Tuple[str, ...], robustness_weights: Optional[bool] = False, + robustness_single_weights: Optional[bool] = False, robustness_indicators: Optional[bool] = False, + marginal_distributions: Optional[Tuple[PDFType, ...]] = None, + num_runs: Optional[int] = 10000, num_cores: Optional[int] = 1, random_seed: Optional[int] = 43): + """ + Initialize the ProMCDA class with configuration parameters. + + # Required parameters + :param input_matrix: DataFrame containing the alternatives and criteria. + :param polarity: Tuple of polarities for each indicator ("+" or "-"). + + # Optional parameters + :param robustness_weights: Boolean flag indicating whether to perform robustness analysis on weights + (True or False). + :param robustness_single_weights: Boolean flag indicating whether to perform robustness analysis on one single + weight at time (True or False). + :param robustness_indicators: Boolean flag indicating whether to perform robustness analysis on indicators + (True or False). + :param marginal_distributions: Tuple of marginal distributions, which describe the indicators + (the distribution types are defined in the enums class). + :param num_runs: Number of Monte Carlo sampling runs (default: 10000). + :param num_cores: Number of cores used for the calculations (default: 1). + :param random_seed: The random seed used for the sampling (default: 43). + + # Example of instantiating the class and using its methods: + from promcda import ProMCDA + + data = {'Criterion1': [3, 4, 5], 'Criterion2': [7, 2, 8], 'Criterion3': [1, 6, 4]} + input_matrix = pd.DataFrame(data, index=['Alternative1', 'Alternative2', 'Alternative3']) + + # Define polarities for each criterion + polarity = ("+", "-", "+") + + # Optional robustness and distributions + robustness_weights = True + robustness_indicators = False + marginal_distributions = (PDFType.NORMAL, PDFType.UNIFORM, PDFType.NORMAL) + + promcda = ProMCDA(input_matrix=input_matrix, + polarity=polarity, + robustness_weights=robustness_weights, + robustness_indicators=robustness_indicators, + marginal_distributions=marginal_distributions, + num_runs=5000, + num_cores=2, + random_seed=123) + + # Run normalization, aggregation, and MCDA methods + df_normalized = promcda.normalize() + df_aggregated = promcda.aggregate() + promcda.run_mcda() + """ + self.input_matrix = input_matrix + self.polarity = polarity + self.robustness_weights = robustness_weights + self.robustness_single_weights = robustness_single_weights + self.robustness_indicators = robustness_indicators + self.num_runs = num_runs + self.marginal_distributions = marginal_distributions + self.num_cores = num_cores + self.random_seed = random_seed + self.normalized_values_without_robustness = None + self.normalized_values_with_robustness = None + self.aggregated_scores = None + self.all_indicators_scores_means = None + self.all_indicators_scores_stds = None + self.all_indicators_means_scores_normalized = None + self.all_indicators_scores_stds_normalized = None + self.all_weights_score_means = None + self.all_weights_score_stds = None, + self.all_weights_score_means_normalized = None, + self.all_weights_score_stds_normalized = None, + self.iterative_random_w_score_means = None, + self.iterative_random_w_score_stds = None, + self.iterative_random_w_score_means_normalized = None + + self.input_matrix_no_alternatives = check_input_matrix(self.input_matrix) + + # def validate_inputs(self) -> Tuple[int, int, list, Union[list, List[list], dict], dict]: + # """ + # Extract and validate input configuration parameters to ensure they are correct. + # Return a flag indicating whether robustness analysis will be performed on indicators (1) or not (0). + # """ + # + # configuration_values = extract_configuration_values(self.input_matrix, self.polarity, + # self.robustness, self.monte_carlo) + # is_robustness_indicators, is_robustness_weights, polar, weights = check_configuration_values( + # configuration_values) + # + # return is_robustness_indicators, is_robustness_weights, polar, weights, configuration_values + + def normalize(self, normalization_method: Optional[NormalizationFunctions] = None) -> Union[pd.DataFrame, str]: + """ + Normalize the input data using the specified method. + + Notes: + The normalizations methods are defined in the NormalizationFunctions enum class. + + Parameters: + - method (optional): The normalization method to use. If None, all available methods will be applied for a + Sensitivity Analysis. + + Returns: + - A pd.DataFrame containing the normalized values of each indicator per normalization method, + if no robustness on indicators is performed. + + :param normalization_method: NormalizationFunctions + :return normalized_df: pd.DataFrame or string + """ + + if not self.robustness_indicators: + mcda_without_robustness = MCDAWithoutRobustness(self.polarity, self.input_matrix_no_alternatives) + self.normalized_values_without_robustness = mcda_without_robustness.normalize_indicators(normalization_method) + + return self.normalized_values_without_robustness + + elif self.robustness_indicators and not self.robustness_weights: + check_parameters_pdf(self.input_matrix_no_alternatives, self.marginal_distributions, for_testing=False) + is_exact_pdf_mask = check_if_pdf_is_exact(self.marginal_distributions) + is_poisson_pdf_mask = check_if_pdf_is_poisson(self.marginal_distributions) + + mcda_with_robustness = MCDAWithRobustness(self.input_matrix_no_alternatives, self.marginal_distributions, + self.num_runs, is_exact_pdf_mask, is_poisson_pdf_mask, + self.random_seed) + n_random_input_matrices = mcda_with_robustness.create_n_randomly_sampled_matrices() + + if not normalization_method: + n_normalized_input_matrices = utils_for_parallelization.parallelize_normalization( + n_random_input_matrices, self.polarity) + else: + n_normalized_input_matrices = utils_for_parallelization.parallelize_normalization( + n_random_input_matrices, self.polarity, normalization_method) + + self.normalized_values_with_robustness = n_normalized_input_matrices + + return f"{self.num_runs} randomly sampled matrices have been normalized." + + if self.robustness_weights and self.robustness_indicators: + raise ValueError( + "Inconsistent configuration: 'robustness_weights' and 'robustness_indicators' are both enabled.") + + def get_normalized_values_with_robustness(self) -> Optional[pd.DataFrame]: + """ + Getter method to access normalized values when robustness on indicators is performed. + + Returns: + A dictionary containing normalized values if robustness is enabled; otherwise None. + """ + return getattr(self, 'normalized_values_with_robustness', None) + + def aggregate(self, aggregation_method: Optional[AggregationFunctions] = None, weights: Optional[List[str]] = None) \ + -> Union[pd.DataFrame, str]: + """ + Aggregate normalized indicators using the specified agg_method. + + Notes: + The aggregation methods are defined in the AggregationFunctions enum class. + This agg_method should follow the normalization. It acquires the normalized + values from the normalization step. + + Parameters (optional): + - aggregation_method: The aggregation agg_method to use. If None, all available methods will be applied. + - weights: The weights to be used for aggregation. If None, they are set all the same. Or, if robustness on + weights is enabled, then the weights are sampled from the Monte Carlo simulation. + + Returns: + - A pd.DataFrame containing the aggregated scores per normalization and aggregation methods, + if robustness on indicators is not performed. + + :param aggregation_method: AggregationFunctions + :param weights : list or None + :return scores_df: pd.DataFrame or string + """ + num_indicators = self.input_matrix_no_alternatives.shape[1] + index_column_name = self.input_matrix.index.name + index_column_values = self.input_matrix.index.tolist() + # Assign values to weights when they are None + if weights is None and self.robustness_weights is False and self.robustness_single_weights is False: + if self.robustness_indicators: + num_non_indicators = ( + len(self.marginal_distributions) - self.marginal_distributions.count('exact') + - self.marginal_distributions.count('poisson')) + num_indicators = (self.input_matrix_no_alternatives.shape[1] - num_non_indicators) + weights = [0.5] * num_indicators + else: + weights = [0.5] * num_indicators + + # Process indicators and weights based on input parameters in the configuration + polar, weights = process_indicators_and_weights(self.input_matrix_no_alternatives, + self.robustness_indicators, + self.robustness_weights, self.robustness_single_weights, + self.polarity, self.num_runs, num_indicators, weights) + + # Check the number of indicators, weights, and polarities, assign random weights if uncertainty is enabled + try: + check_indicator_weights_polarities(num_indicators, polar, robustness_weights=self.robustness_weights, + weights=weights) + except ValueError as e: + logging.error(str(e), stack_info=True) + raise + + # Apply aggregation in the different configuration settings + # NO UNCERTAINTY ON INDICATORS AND WEIGHTS + if not self.robustness_indicators and not self.robustness_weights and not self.robustness_single_weights: + mcda_without_robustness = MCDAWithoutRobustness(self.polarity, self.input_matrix_no_alternatives) + normalized_indicators = self.normalized_values_without_robustness + if normalized_indicators is None: + raise ValueError("Normalization must be performed before aggregation.") + if aggregation_method is None: + aggregated_scores = pd.DataFrame() + for agg_method in AggregationFunctions: + result = mcda_without_robustness.aggregate_indicators( + normalized_indicators=normalized_indicators, + weights=weights, + agg_method=agg_method + ) + aggregated_scores = pd.concat([aggregated_scores, result], axis=1) + else: + aggregated_scores = mcda_without_robustness.aggregate_indicators( + normalized_indicators=normalized_indicators, + weights=weights, + agg_method=aggregation_method + ) + self.aggregated_scores = aggregated_scores + return self.aggregated_scores + + # NO UNCERTAINTY ON INDICATORS, ALL RANDOMLY SAMPLED WEIGHTS (MCDA runs num_samples times) + elif self.robustness_weights and not self.robustness_single_weights and not self.robustness_indicators: + logger.info("Start ProMCDA with uncertainty on the weights") + all_weights_score_means, all_weights_score_stds, \ + all_weights_score_means_normalized, all_weights_score_stds_normalized = \ + compute_scores_for_all_random_weights(self.normalized_values_without_robustness, weights, + aggregation_method) + self.all_weights_score_means = all_weights_score_means + self.all_weights_score_stds = all_weights_score_stds + self.all_weights_score_means_normalized = all_weights_score_means_normalized + self.all_weights_score_stds_normalized = all_weights_score_stds_normalized + return "Aggregation considered uncertainty on all weights, results are not explicitly shown." + + # NO UNCERTAINTY ON INDICATORS, ONE SINGLE RANDOM WEIGHT AT TIME + elif self.robustness_single_weights and not self.robustness_weights and not self.robustness_indicators: + logger.info("Start ProMCDA with uncertainty on one weight at time") + iterative_random_weights_statistics: dict = compute_scores_for_single_random_weight( + self.normalized_values_without_robustness, weights, index_column_name, index_column_values, + self.input_matrix, aggregation_method) + iterative_random_w_score_means = iterative_random_weights_statistics['score_means'] + iterative_random_w_score_stds = iterative_random_weights_statistics['score_stds'] + iterative_random_w_score_means_normalized = ( + iterative_random_weights_statistics)['score_means_normalized'] + self.iterative_random_w_score_means = iterative_random_w_score_means + self.iterative_random_w_score_stds = iterative_random_w_score_stds + self.iterative_random_w_score_means_normalized = iterative_random_w_score_means_normalized + return "Aggregation considered uncertainty on one weight at time, results are not explicitly shown." + + # UNCERTAINTY ON INDICATORS, NO UNCERTAINTY ON WEIGHTS + elif self.robustness_indicators and not self.robustness_weights and not self.robustness_single_weights: + all_indicators_scores_normalized = [] + logger.info("Start ProMCDA with uncertainty on the indicators") + n_normalized_input_matrices = self.normalized_values_with_robustness + if self.num_runs <= 0: + logger.error('Error Message', stack_info=True) + raise ValueError('The number of MC runs should be larger than 0 for a robustness analysis') + if self.num_runs < 1000: + logger.info("The number of Monte-Carlo runs is only {}".format(self.num_runs)) + logger.info("A meaningful number of Monte-Carlo runs is equal or larger than 1000") + args_for_parallel_agg = [(weights, normalized_indicators) + for normalized_indicators in n_normalized_input_matrices] + if aggregation_method is None: + all_indicators_scores = utils_for_parallelization.parallelize_aggregation(args_for_parallel_agg) + else: + all_indicators_scores = utils_for_parallelization.parallelize_aggregation(args_for_parallel_agg, + aggregation_method) + for matrix in all_indicators_scores: + normalized_matrix = rescale_minmax(matrix) + all_indicators_scores_normalized.append(normalized_matrix) + + all_indicators_scores_means, all_indicators_scores_stds = \ + utils_for_parallelization.estimate_runs_mean_std(all_indicators_scores) + all_indicators_means_scores_normalized, all_indicators_scores_stds_normalized = \ + utils_for_parallelization.estimate_runs_mean_std(all_indicators_scores_normalized) + + self.aggregated_scores = all_indicators_scores_normalized + self.all_indicators_scores_means = all_indicators_scores_means + self.all_indicators_scores_stds = all_indicators_scores_stds + self.all_indicators_means_scores_normalized = all_indicators_means_scores_normalized + self.all_indicators_scores_stds_normalized = all_indicators_scores_stds_normalized + return "Aggregation considered uncertainty on indicators, results are not explicitly shown." + else: + logger.error('Error Message', stack_info=True) + raise ValueError('Inconsistent configuration: robustness_weights and robustness_indicators are both enabled.') + + def get_aggregated_values_with_robustness_indicators(self) \ + -> Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]]: + """ + Getter method to access aggregated scores when robustness on indicators is performed. + + Returns: + A tuple containing two DataFrames: + - The mean scores of the aggregated indicators. + - The standard deviations of the aggregated indicators. + If robustness is not enabled, returns None. + """ + + means = getattr(self, 'all_indicators_scores_means', None) + normalized_means = getattr(self, 'all_indicators_means_scores_normalized', None) + stds = getattr(self, 'all_indicators_scores_stds', None) + + if means is not None and normalized_means is not None and stds is not None: + return means, normalized_means, stds + return None + + def get_aggregated_values_with_robustness_weights(self) \ + -> Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]]: + """ + Getter method to access aggregated scores when robustness on weights is performed. + + Returns: + A tuple containing two DataFrames: + - The mean scores of the aggregated indicators. + - The standard deviations of the aggregated indicators. + If robustness is not enabled, returns None. + """ + + means = getattr(self, 'all_weights_score_means', None) + normalized_means = getattr(self, 'all_weights_score_means_normalized', None) + stds = getattr(self, 'all_weights_score_stds', None) + + if means is not None and normalized_means is not None and stds is not None: + return means, normalized_means, stds + return None + + def get_aggregated_values_with_robustness_one_weight(self) \ + -> Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]]: + """ + Getter method to access aggregated scores when robustness on one weight at time is performed. + + Returns: + A tuple containing two DataFrames: + - The mean scores of the aggregated indicators. + - The standard deviations of the aggregated indicators. + If robustness is not enabled, returns None. + """ + + means = getattr(self, 'iterative_random_w_score_means', None) + normalized_means = getattr(self, 'iterative_random_w_score_means_normalized', None) + stds = getattr(self, 'iterative_random_w_score_stds', None) + + if means is not None and normalized_means is not None and stds is not None: + return means, normalized_means, stds + return None + + def run_mcda(self, is_robustness_indicators: int, is_robustness_weights: int, + weights: Union[list, List[list], dict]): + """ + Execute the full ProMCDA process, either with or without uncertainties on the indicators. + """ + start_time = time.time() + + # Normalize + # self.normalize() + + # Aggregate + # self.aggregate() + + # Run + # no uncertainty + if is_robustness_indicators == 0: + run_mcda_without_indicator_uncertainty(self.configuration_settings, is_robustness_weights, weights) + # uncertainty + else: + run_mcda_with_indicator_uncertainty(self.configuration_settings) + + elapsed_time = time.time() - start_time + + # def get_results(self): + # """ + # Return the final results as a DataFrame or other relevant structure. + # """ + # # Return the aggregated results (or any other relevant results) + # return self.aggregated_matrix diff --git a/mcda/models/__init__.py b/mcda/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mcda/mcda_with_robustness.py b/mcda/models/mcda_with_robustness.py similarity index 94% rename from mcda/mcda_with_robustness.py rename to mcda/models/mcda_with_robustness.py index 449d0cb..dceab06 100644 --- a/mcda/mcda_with_robustness.py +++ b/mcda/models/mcda_with_robustness.py @@ -6,8 +6,6 @@ import pandas as pd import numpy as np -from mcda.configuration.config import Config - log = logging.getLogger(__name__) formatter = '%(levelname)s: %(asctime)s - %(name)s - %(message)s' @@ -29,12 +27,11 @@ class MCDAWithRobustness: """ - def __init__(self, config: Config, input_matrix: pd.DataFrame(), is_exact_pdf_mask=None, is_poisson_pdf_mask=None, + def __init__(self, input_matrix: pd.DataFrame(), is_exact_pdf_mask=None, is_poisson_pdf_mask=None, random_seed=None): self.is_exact_pdf_mask = is_exact_pdf_mask self.is_poisson_pdf_mask = is_poisson_pdf_mask self.random_seed = random_seed - self._config = copy.deepcopy(config) self._input_matrix = copy.deepcopy(input_matrix) @staticmethod @@ -99,8 +96,8 @@ def create_n_randomly_sampled_matrices(self) -> List[pd.DataFrame]: :return list_random_matrix: List[pd.DataFrame] """ - marginal_pdf = self._config.monte_carlo_sampling["marginal_distribution_for_each_indicator"] - num_runs = self._config.monte_carlo_sampling["monte_carlo_runs"] # N + marginal_pdf = self._config["marginal_distribution_for_each_indicator"] + num_runs = self._config["monte_carlo_runs"] # N input_matrix = self._input_matrix # (AxnI) is_exact_pdf_mask = self.is_exact_pdf_mask is_poisson_pdf_mask = self.is_poisson_pdf_mask diff --git a/mcda/models/mcda_without_robustness.py b/mcda/models/mcda_without_robustness.py new file mode 100644 index 0000000..fd591a0 --- /dev/null +++ b/mcda/models/mcda_without_robustness.py @@ -0,0 +1,191 @@ +import sys +import copy +import logging +from typing import Tuple + +import numpy as np +import pandas as pd +from pandas import DataFrame + +from mcda.configuration.enums import NormalizationFunctions, AggregationFunctions +from mcda.mcda_functions.normalization import Normalization +from mcda.mcda_functions.aggregation import Aggregation + +log = logging.getLogger(__name__) + +formatter = '%(levelname)s: %(asctime)s - %(name)s - %(message)s' +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=formatter) +logger = logging.getLogger("ProMCDA aggregation") + + +class MCDAWithoutRobustness: + """ + Class MCDA without indicators' uncertainty + + This class allows one to run MCDA without considering the uncertainties related to the indicators. + All indicators are described by the exact marginal distribution. + However, it's possible to have randomly sampled weights. + """ + + def __init__(self, polarity: Tuple[str, ...], input_matrix: pd.DataFrame): + self.normalized_indicators = None + self.weights = None + self.polarity = polarity + self._input_matrix = copy.deepcopy(input_matrix) + + import pandas as pd + + def normalize_indicators(self, method=None) -> pd.DataFrame: + """ + Normalize the input matrix using the specified normalization method. + + Parameters: + - method (optional): the normalization method to use. If None, all available methods will be applied. + Supported methods: 'minmax', 'target', 'standardized', 'rank'. + + Returns: + - A DataFrame containing the normalized values of each indicator per normalization method. + Columns are named according to the normalization method applied. + + Notes: + Some aggregation methods do not work with indicator values equal or smaller than zero. For that reason: + - for the 'minmax' method, two sets of normalized indicators are returned: one with the range (0, 1) and + another with the range (0.1, 1). + - for the 'target' method, two sets of normalized indicators are returned: one with the range (0, 1) and + another with the range (0.1, 1). + - for the 'standardized' method, two sets of normalized indicators are returned: one with the range (-inf, +inf) + and another with the range (0.1, +inf). + """ + norm = Normalization(self._input_matrix, self.polarity) + + normalized_dataframes = [] + + def add_normalized_df(df, method_name): + df.columns = [f"{col}_{method_name}" for col in self._input_matrix.columns.tolist()] + normalized_dataframes.append(df) + + if isinstance(method, NormalizationFunctions): + method = method.value + + if method is None or method == NormalizationFunctions.MINMAX.value: + indicators_minmax_01 = norm.minmax(feature_range=(0, 1)) + indicators_minmax_without_zero = norm.minmax(feature_range=(0.1, 1)) + add_normalized_df(indicators_minmax_01, "minmax_01") + add_normalized_df(indicators_minmax_without_zero, "minmax_without_zero") + + if method is None or method == NormalizationFunctions.TARGET.value: + indicators_target_01 = norm.target(feature_range=(0, 1)) + indicators_target_without_zero = norm.target(feature_range=(0.1, 1)) + add_normalized_df(indicators_target_01, "target_01") + add_normalized_df(indicators_target_without_zero, "target_without_zero") + + if method is None or method == NormalizationFunctions.STANDARDIZED.value: + indicators_standardized_any = norm.standardized(feature_range=('-inf', '+inf')) + indicators_standardized_without_zero = norm.standardized(feature_range=(0.1, '+inf')) + add_normalized_df(indicators_standardized_any, "standardized_any") + add_normalized_df(indicators_standardized_without_zero, "standardized_without_zero") + + if method is None or method == NormalizationFunctions.RANK.value: + indicators_rank = norm.rank() + add_normalized_df(indicators_rank, "rank") + + if method is not None and method not in [method.value for method in NormalizationFunctions]: + logger.error('Error Message', stack_info=True) + raise ValueError('The selected normalization method is not supported') + + # Concatenate all normalized DataFrames along columns + normalized_df = pd.concat(normalized_dataframes, axis=1) + + return normalized_df + + def aggregate_indicators(self, normalized_indicators: pd.DataFrame, weights: list, agg_method=None) -> pd.DataFrame: + """ + Aggregate the normalized indicators using the specified aggregation method. + + Parameters: + - normalized_indicators: a DataFrame containing the normalized values of each indicator per normalization + method. + - weights: the weights to be applied during aggregation. + - method (optional): The aggregation method to use. If None, all available methods will be applied. + Supported methods: 'weighted_sum', 'geometric', 'harmonic', 'minimum'. + + Returns: + - A DataFrame containing the aggregated scores for each alternative and normalization method. + :rtype: object + """ + #if isinstance(agg_method, AggregationFunctions): + # method = agg_method.value + + self.normalized_indicators = normalized_indicators + self.weights = weights + + agg= Aggregation(self.weights) + + final_scores = pd.DataFrame() + + def _apply_aggregation(norm_function, agg_function, df_subset): + """ + Apply the aggregation method to a subset of the DataFrame and store results in the appropriate DataFrame. + """ + agg_functions = { + AggregationFunctions.WEIGHTED_SUM.value: agg.weighted_sum, + AggregationFunctions.GEOMETRIC.value: agg.geometric, + AggregationFunctions.HARMONIC.value: agg.harmonic, + AggregationFunctions.MINIMUM.value: agg.minimum, + } + + agg_methods = list(agg_functions.keys()) if agg_function is None else [agg_function] + + for agg_function in agg_methods: + agg_function = agg_functions[agg_function.value] + aggregated_scores = agg_function(df_subset) + + if isinstance(aggregated_scores, np.ndarray): + aggregated_scores = pd.DataFrame(aggregated_scores, index=df_subset.index) + elif isinstance(aggregated_scores, pd.Series): + aggregated_scores = aggregated_scores.to_frame() + + aggregated_scores.columns = [f"{norm_function}_{agg_method.value}"] + + score_list.append(aggregated_scores) + + for norm_method in self.normalized_indicators.columns.str.split("_", n=0).str[1].unique(): + score_list = [] + + norm_method_columns = self.normalized_indicators.filter(regex=rf"{norm_method}") + + without_zero_columns = norm_method_columns.filter(regex="without_zero$") + with_zero_columns = norm_method_columns[norm_method_columns.columns.difference(without_zero_columns.columns)] + rank_columns = norm_method_columns.filter(regex="rank$") + without_zero_columns_rank = pd.concat([without_zero_columns, rank_columns], axis=1) + + # Apply WEIGHTED_SUM only to columns with zero in the suffix + if agg_method is None or agg_method == AggregationFunctions.WEIGHTED_SUM: + # Apply WEIGHTED_SUM to columns with zero in the suffix and only some normalization methods + if norm_method in [NormalizationFunctions.STANDARDIZED.value, NormalizationFunctions.MINMAX.value, + NormalizationFunctions.TARGET.value, NormalizationFunctions.RANK.value]: + _apply_aggregation(norm_method, AggregationFunctions.WEIGHTED_SUM, + with_zero_columns) + # Apply GEOMETRIC and HARMONIC only to columns without zero in the suffix and only some normalization methods + if agg_method is None or agg_method == AggregationFunctions.GEOMETRIC: + if norm_method in [NormalizationFunctions.STANDARDIZED.value, NormalizationFunctions.MINMAX.value, + NormalizationFunctions.TARGET.value, NormalizationFunctions.RANK.value]: + _apply_aggregation(norm_method, AggregationFunctions.GEOMETRIC, + without_zero_columns_rank) + elif agg_method is None or agg_method == AggregationFunctions.HARMONIC: + if norm_method in [NormalizationFunctions.STANDARDIZED.value, NormalizationFunctions.MINMAX.value, + NormalizationFunctions.TARGET.value, NormalizationFunctions.RANK.value]: + _apply_aggregation(norm_method, AggregationFunctions.HARMONIC, + without_zero_columns_rank) + # Apply MINIMUM to columns with zero in the suffix and only some normalization methods + if agg_method is None or agg_method == AggregationFunctions.MINIMUM: + if norm_method in [NormalizationFunctions.STANDARDIZED.value]: + _apply_aggregation(norm_method, AggregationFunctions.MINIMUM, + with_zero_columns) + + # Concatenate all score DataFrames into a single DataFrame if there are any + if score_list: + scores: DataFrame = pd.concat(score_list, axis=1) + final_scores = pd.concat([final_scores, scores], axis=1) + + return final_scores diff --git a/mcda/utils/utils_for_main.py b/mcda/utils/utils_for_main.py index ea6f789..f512ff4 100644 --- a/mcda/utils/utils_for_main.py +++ b/mcda/utils/utils_for_main.py @@ -1,4 +1,3 @@ - import os import argparse import json @@ -6,6 +5,7 @@ import random import logging import sys +from enum import Enum from typing import Union, Any, List, Tuple from typing import Optional @@ -17,9 +17,9 @@ import mcda.utils.utils_for_parallelization as utils_for_parallelization import mcda.utils.utils_for_plotting as utils_for_plotting -from mcda.configuration.config import Config -from mcda.mcda_without_robustness import MCDAWithoutRobustness -from mcda.mcda_with_robustness import MCDAWithRobustness +from mcda.configuration.enums import PDFType, AggregationFunctions +from mcda.models.mcda_without_robustness import MCDAWithoutRobustness +from mcda.models.mcda_with_robustness import MCDAWithRobustness DEFAULT_INPUT_DIRECTORY_PATH = './input_files' # present in the root directory of ProMCDA DEFAULT_OUTPUT_DIRECTORY_PATH = './output_files' # present in the root directory of ProMCDA @@ -39,287 +39,6 @@ logger = logging.getLogger("ProMCDA") -def check_config_error(condition: bool, error_message: str): - """ - Check a condition and raise a ValueError with a specified error message if the condition is True. - - Parameters: - - condition (bool): The condition to check. - - error_message (str): The error message to raise if the condition is True. - - Raises: - - ValueError: If the condition is True, with the specified error message. - - :param error_message: str - :param condition: bool - :return: None - """ - - if condition: - logger.error('Error Message', stack_info=True) - raise ValueError(error_message) - - -def check_config_setting(condition_robustness_on_weights: bool, condition_robustness_on_indicators: bool, mc_runs: int, - random_seed: int) -> (int, int): - """ - Checks configuration settings and logs information messages. - - Returns: - - is_robustness_weights, is_robustness_indicators, booleans indicating if robustness is considered - on weights or indicators. - - Example: - ```python - is_robustness_weights, is_robustness_indicators = check_config_setting(True, False, 1000, 42) - ``` - - :param condition_robustness_on_weights: bool - :param condition_robustness_on_indicators: bool - :param mc_runs: int - :param random_seed: int - :return: (is_robustness_weights, is_robustness_indicators) - :rtype: Tuple[int, int] - """ - is_robustness_weights = 0 - is_robustness_indicators = 0 - - if condition_robustness_on_weights: - logger.info("ProMCDA will consider uncertainty on the weights.") - logger.info("Number of Monte Carlo runs: {}".format(mc_runs)) - logger.info("The random seed used is: {}".format(random_seed)) - is_robustness_weights = 1 - - elif condition_robustness_on_indicators: - logger.info("ProMCDA will consider uncertainty on the indicators.") - logger.info("Number of Monte Carlo runs: {}".format(mc_runs)) - logger.info("The random seed used is: {}".format(random_seed)) - is_robustness_indicators = 1 - - return is_robustness_weights, is_robustness_indicators - - -def process_indicators_and_weights(config: dict, input_matrix: pd.DataFrame, - is_robustness_indicators: int, is_robustness_weights: int, polar: List[str], - mc_runs: int, num_indicators: int) \ - -> Tuple[List[str], Union[list, List[list], dict]]: - """ - Process indicators and weights based on input parameters in the configuration. - - Parameters: - - config: the configuration dictionary. - - input_matrix: the input matrix without alternatives. - - is_robustness_indicators: a flag indicating whether the matrix should include indicator uncertainties - (0 or 1). - - is_robustness_weights: a flag indicating whether robustness analysis is considered for the weights (0 or 1). - - marginal_pdf: a list of marginal probability density functions for indicators. - - mc_runs: number of Monte Carlo runs for robustness analysis. - - num_indicators: the number of indicators in the input matrix. - - Raises: - - ValueError: If there are duplicated rows in the input matrix or if there is an issue with the configuration. - - Returns: - - a shorter list of polarities if one has been dropped together with the relative indicator, - which brings no information. Otherwise, the same list. - - the normalised weights (either fixed or random sampled weights, depending on the settings) - - Notes: - - For is_robustness_indicators == 0: - - Identifies and removes columns with constant values. - - Logs the number of alternatives and indicators. - - - For is_robustness_indicators == 1: - - Handles uncertainty in indicators. - - Logs the number of alternatives and indicators. - - - For is_robustness_weights == 0: - - Processes fixed weights if given. - - Logs weights and normalised weights. - - - For is_robustness_weights == 1: - - Performs robustness analysis on weights. - - Logs randomly sampled weights. - - :param mc_runs: int - :param polar: List[str] - :param is_robustness_weights: int - :param is_robustness_indicators: int - :param input_matrix: pd.DataFrame - :param config: dict - :param num_indicators: int - :return: polar, norm_weights - :rtype: Tuple[List[str], Union[List[list], dict]] - """ - num_unique = input_matrix.nunique() - cols_to_drop = num_unique[num_unique == 1].index - col_to_drop_indexes = input_matrix.columns.get_indexer(cols_to_drop) - - if is_robustness_indicators == 0: - _handle_no_robustness_indicators(input_matrix) - else: # matrix with uncertainty on indicators - logger.info("Number of alternatives: {}".format(input_matrix.shape[0])) - logger.info("Number of indicators: {}".format(num_indicators)) - # TODO: eliminate indicators with constant values (i.e. same mean and 0 std) - optional - - polarities_and_weights = _handle_polarities_and_weights(is_robustness_indicators, is_robustness_weights, num_unique, - col_to_drop_indexes, polar, config, mc_runs, num_indicators) - - polar, norm_weights = tuple(item for item in polarities_and_weights if item is not None) - - return polar, norm_weights - - -def _handle_polarities_and_weights(is_robustness_indicators: int, is_robustness_weights: int, num_unique, - col_to_drop_indexes: np.ndarray, polar: List[str], config: dict, mc_runs: int, - num_indicators: int) \ - -> Union[Tuple[List[str], list, None, None], Tuple[List[str], None, List[List], None], - Tuple[List[str], None, None, dict]]: - """ - Manage polarities and weights based on the specified robustness settings, ensuring that the appropriate adjustments - and normalizations are applied before returning the necessary data structures. - """ - norm_random_weights = [] - rand_weight_per_indicator = {} - - # Managing polarities - if is_robustness_indicators == 0: - if any(value == 1 for value in num_unique): - polar = pop_indexed_elements(col_to_drop_indexes, polar) - logger.info("Polarities: {}".format(polar)) - - # Managing weights - if is_robustness_weights == 0: - fixed_weights = config.robustness["given_weights"] - if any(value == 1 for value in num_unique): - fixed_weights = pop_indexed_elements(col_to_drop_indexes, fixed_weights) - norm_fixed_weights = check_norm_sum_weights(fixed_weights) - logger.info("Weights: {}".format(fixed_weights)) - logger.info("Normalized weights: {}".format(norm_fixed_weights)) - return polar, norm_fixed_weights, None, None - # Return None for norm_random_weights and rand_weight_per_indicator - else: - output_weights = _handle_robustness_weights(config, mc_runs, num_indicators) - if output_weights is not None: - norm_random_weights, rand_weight_per_indicator = output_weights - if norm_random_weights: - return polar, None, norm_random_weights, None - else: - return polar, None, None, rand_weight_per_indicator - # Return None for norm_fixed_weights and one of the other two cases of randomness - - -def _handle_robustness_weights(config: dict, mc_runs: int, num_indicators: int) \ - -> Tuple[Union[List[list], None], Union[dict, None]]: - """ - Handle the generation and normalization of random weights based on the specified settings - when a robustness analysis is requested on all the weights. - """ - norm_random_weights = [] - rand_weight_per_indicator = {} - - if mc_runs == 0: - logger.error('Error Message', stack_info=True) - raise ValueError('The number of MC runs should be larger than 0 for a robustness analysis') - - if config.robustness["on_single_weights"] == "no" and config.robustness["on_all_weights"] == "yes": - random_weights = randomly_sample_all_weights(num_indicators, mc_runs) - for weights in random_weights: - weights = check_norm_sum_weights(weights) - norm_random_weights.append(weights) - return norm_random_weights, None # Return norm_random_weights, and None for rand_weight_per_indicator - elif config.robustness["on_single_weights"] == "yes" and config.robustness["on_all_weights"] == "no": - i = 0 - while i < num_indicators: - random_weights = randomly_sample_ix_weight(num_indicators, i, mc_runs) - norm_random_weight = [] - for weights in random_weights: - weights = check_norm_sum_weights(weights) - norm_random_weight.append(weights) - rand_weight_per_indicator["indicator_{}".format(i + 1)] = norm_random_weight - i += 1 - return None, rand_weight_per_indicator # Return None for norm_random_weights, and rand_weight_per_indicator - - -def _handle_no_robustness_indicators(input_matrix: pd.DataFrame): - """ - Handle the indicators in case of no robustness analysis required. - (The input matrix is without the alternative column) - """ - num_unique = input_matrix.nunique() - cols_to_drop = num_unique[num_unique == 1].index - - if any(value == 1 for value in num_unique): - logger.info("Indicators {} have been dropped because they carry no information".format(cols_to_drop)) - input_matrix = input_matrix.drop(cols_to_drop, axis=1) - - num_indicators = input_matrix.shape[1] - logger.info("Number of alternatives: {}".format(input_matrix.shape[0])) - logger.info("Number of indicators: {}".format(num_indicators)) - - -def check_indicator_weights_polarities(num_indicators: int, polar: List[str], config: dict): - """ - Check the consistency of indicators, polarities, and fixed weights in a configuration. - - Parameters: - - num_indicators: the number of indicators in the input matrix. - - polar: a list containing the polarity associated to each indicator. - - config: the configuration dictionary. - - This function raises a ValueError if the following conditions are not met: - 1. The number of indicators does not match the number of polarities. - 2. "on_all_weights" is set to "no," and the number of fixed weights - does not correspond to the number of indicators. - - Raises: - - ValueError: if the conditions for indicator-polarity and fixed weights consistency are not met. - - :param num_indicators: int - :param polar: List[str] - :param config: dict - :return: None - """ - if num_indicators != len(polar): - raise ValueError('The number of polarities does not correspond to the no. of indicators') - - # Check the number of fixed weights if "on_all_weights" is set to "no" - if (config.robustness["on_all_weights"] == "no") and ( - num_indicators != len(config.robustness["given_weights"])): - raise ValueError('The no. of fixed weights does not correspond to the no. of indicators') - - -def check_input_matrix(input_matrix: pd.DataFrame) -> pd.DataFrame: - """ - Check the input matrix for duplicated rows in the alternatives column, rescale negative indicator values - and drop the index column of alternatives. - - Parameters: - - input_matrix: The input matrix containing the alternatives and indicators. - - Raises: - - ValueError: If duplicated rows are found in the alternative column. - - UserStoppedInfo: If the user chooses to stop when duplicates are found. - - :param input_matrix: pd.DataFrame - :rtype: pd.DataFrame - :return: input_matrix - """ - if input_matrix.duplicated().any(): - raise ValueError('Error: Duplicated rows in the alternatives column.') - elif input_matrix.iloc[:, 0].duplicated().any(): - logger.info('Duplicated rows in the alternatives column.') - - index_column_values = input_matrix.index.tolist() - logger.info("Alternatives are {}".format(index_column_values)) - input_matrix_no_alternatives = input_matrix.reset_index(drop=True) # drop the alternative - - input_matrix_no_alternatives = _check_and_rescale_negative_indicators( - input_matrix_no_alternatives) - - return input_matrix_no_alternatives - - def ensure_directory_exists(path): """ Ensure that the directory specified by the given path exists. @@ -345,7 +64,7 @@ def ensure_directory_exists(path): raise # Re-raise the exception to propagate it to the caller - +# TODO: maybe give the option of giving either a pd.DataFrame or a path as input parameter in ProMCDA def read_matrix(input_matrix_path: str) -> pd.DataFrame: """ Read an input matrix from a CSV file and return it as a DataFrame. @@ -395,21 +114,6 @@ def reset_index_if_needed(series): return series -def _check_and_rescale_negative_indicators(input_matrix: pd.DataFrame) -> pd.DataFrame: - """ - Rescale indicators of the input matrix if negative into [0-1]. - """ - - if (input_matrix < 0).any().any(): - scaler = MinMaxScaler() - scaled_data = scaler.fit_transform(input_matrix) - scaled_matrix = pd.DataFrame( - scaled_data, columns=input_matrix.columns, index=input_matrix.index) - return scaled_matrix - else: - return input_matrix - - def parse_args(): """ Parse command line arguments for configuration path. @@ -487,6 +191,7 @@ def save_df(df: pd.DataFrame, folder_path: str, filename: str): except IOError as e: logging.error(f"Error while writing data frame into a CSV file: {e}") + def save_dict(dictionary: dict, folder_path: str, filename: str): """ Save a dictionary to a binary file using pickle with a timestamped filename. @@ -529,6 +234,29 @@ def save_dict(dictionary: dict, folder_path: str, filename: str): logging.error(f"Error while dumping the dictionary into a pickle file: {e}") +def preprocess_enums(data) -> Union[Union[dict, list[str]], Any]: + """ + Preprocess data to convert enums to strings + + Parameters: + - data: to be processed + + Example: + ```python + preprocess_enums(data) + ``` + :param data: enums + :return: string + """ + if isinstance(data, dict): + return {k: preprocess_enums(v) for k, v in data.items()} + elif isinstance(data, list): + return [preprocess_enums(v) for v in data] + elif isinstance(data, Enum): + return data.value + return data + + def save_config(config: dict, folder_path: str, filename: str): """ Save a configuration dictionary to a JSON file with a timestamped filename. @@ -567,11 +295,35 @@ def save_config(config: dict, folder_path: str, filename: str): try: with open(full_output_path, 'w') as fp: - json.dump(config, fp) + processed_config = preprocess_enums(config) + serializable_config = _prepare_config_for_json(processed_config) + json.dump(serializable_config, fp) except IOError as e: logging.error(f"Error while dumping the configuration into a JSON file: {e}") +def _convert_dataframe_to_serializable(df): + """ + Convert a pandas DataFrame into a serializable dictionary format. + """ + return { + 'data': df.values.tolist(), # Convert data to list of lists + 'columns': df.columns.tolist(), # Convert column names to list + 'index': df.index.tolist() # Convert index labels to list + } + + +def _prepare_config_for_json(config): + """ + Prepare the config dictionary by converting non-serializable objects into serializable ones. + """ + config_copy = config.copy() # Create a copy to avoid modifying the original config + if isinstance(config_copy['input_matrix'], pd.DataFrame): + # Convert DataFrame to serializable format + config_copy['input_matrix'] = _convert_dataframe_to_serializable(config_copy['input_matrix']) + return config_copy + + def check_path_exists(path: str): """ Check if a directory path exists, and create it if it doesn't. @@ -702,7 +454,7 @@ def check_norm_sum_weights(weights: list) -> list: return weights -def pop_indexed_elements(indexes: np.ndarray, original_list: list) -> list: +def pop_indexed_elements(indexes: np.ndarray, original_list: List[str]) -> list: """ Eliminate elements from a list at specified indexes. @@ -731,7 +483,8 @@ def pop_indexed_elements(indexes: np.ndarray, original_list: list) -> list: return new_list -def check_parameters_pdf(input_matrix: pd.DataFrame, config: dict, for_testing=False) -> Union[List[bool], None]: +def check_parameters_pdf(input_matrix: pd.DataFrame, marginal_distributions: Tuple[PDFType, ...], for_testing=False) \ + -> Union[List[bool], None]: """ Check conditions on parameters based on the type of probability distribution function (PDF) for each indicator and raise logging information in case of any problem. @@ -744,7 +497,7 @@ def check_parameters_pdf(input_matrix: pd.DataFrame, config: dict, for_testing=F Parameters: - input_matrix: the input matrix containing uncertainties for indicators, no alternatives. - - config: configuration dictionary containing the Monte Carlo sampling information. + - marginal_distributions: the PDFs associated to each indicator. - for_testing: true only for unit testing Returns: @@ -752,16 +505,15 @@ def check_parameters_pdf(input_matrix: pd.DataFrame, config: dict, for_testing=F - None: default :param input_matrix: pd.DataFrame - :param config: dict + :param marginal_distributions: PDFType :param for_testing: bool :return: Union[list, None] """ - config = Config(config) satisfies_condition = False problem_logged = False - marginal_pdf = config.monte_carlo_sampling["marginal_distribution_for_each_indicator"] + marginal_pdf = marginal_distributions is_exact_pdf_mask = check_if_pdf_is_exact(marginal_pdf) is_poisson_pdf_mask = check_if_pdf_is_poisson(marginal_pdf) is_uniform_pdf_mask = check_if_pdf_is_uniform(marginal_pdf) @@ -805,7 +557,7 @@ def check_parameters_pdf(input_matrix: pd.DataFrame, config: dict, for_testing=F return list_of_satisfied_conditions -def check_if_pdf_is_exact(marginal_pdf: list) -> list: +def check_if_pdf_is_exact(marginal_pdf: tuple[PDFType, ...]) -> list: """ Check if each indicator's probability distribution function (PDF) is of type 'exact'. @@ -829,7 +581,7 @@ def check_if_pdf_is_exact(marginal_pdf: list) -> list: return exact_pdf_mask -def check_if_pdf_is_poisson(marginal_pdf: list) -> list: +def check_if_pdf_is_poisson(marginal_pdf: tuple[PDFType, ...]) -> list: """ Check if each indicator's probability distribution function (PDF) is of type 'poisson'. @@ -877,10 +629,8 @@ def check_if_pdf_is_uniform(marginal_pdf: list) -> list: return uniform_pdf_mask -def run_mcda_without_indicator_uncertainty(input_config: dict, index_column_name: str, index_column_values: list, - input_matrix: pd.DataFrame, - weights: Union[list, List[list], dict], - f_norm: str, f_agg: str, is_robustness_weights: int): +def run_mcda_without_indicator_uncertainty(extracted_values: dict, is_robustness_weights: int, + weights: Union[List[str], List[pd.DataFrame], dict, None]): """ Runs ProMCDA without uncertainty on the indicators, i.e. without performing a robustness analysis. @@ -890,18 +640,12 @@ def run_mcda_without_indicator_uncertainty(input_config: dict, index_column_name and logs the completion time. Parameters: - - input_matrix: the input_matrix without the alternatives. - - index_column_name: the name of the index column of the original input matrix. - - index_column_values: the values of the index column of the original input matrix. + - extracted_values: a dictionary containing configuration values extracted from the input parameters. + - is_robustness_weights: a flag indicating whether robustness analysis will be performed on indicators or not. - weights: the normalised weights (either fixed or random sampled weights, depending on the settings). - :param input_config: dict - :param index_column_name: str - :param index_column_values: list - :param input_matrix: pd:DataFrame :param weights: Union[List[str], List[pd.DataFrame], dict, None] - :param f_norm: str - :param f_agg: str + :param extracted_values: dict :param is_robustness_weights: int :return: None """ @@ -915,11 +659,19 @@ def run_mcda_without_indicator_uncertainty(input_config: dict, index_column_name iterative_random_w_score_means = {} iterative_random_w_score_stds = {} + # Extract relevant values + input_matrix = extracted_values["input_matrix"] + index_column_name = input_matrix.index.name + index_column_values = input_matrix.index.tolist() + input_matrix_no_alternatives = check_input_matrix(input_matrix) + is_sensitivity = extracted_values['sensitivity_on'] + is_robustness = extracted_values['robustness_on'] + f_norm = extracted_values["normalization"] + f_agg = extracted_values["aggregation"] + + mcda_no_uncert \ + = MCDAWithoutRobustness(extracted_values, input_matrix_no_alternatives) logger.info("Start ProMCDA without robustness of the indicators") - config = Config(input_config) - is_sensitivity = config.sensitivity['sensitivity_on'] - is_robustness = config.robustness['robustness_on'] - mcda_no_uncert = MCDAWithoutRobustness(config, input_matrix) normalized_indicators = mcda_no_uncert.normalize_indicators() if is_sensitivity == "yes" \ else mcda_no_uncert.normalize_indicators(f_norm) @@ -929,15 +681,16 @@ def run_mcda_without_indicator_uncertainty(input_config: dict, index_column_name if is_sensitivity == "yes" \ else mcda_no_uncert.aggregate_indicators(normalized_indicators, weights, f_agg) normalized_scores = rescale_minmax(scores) - elif config.robustness["on_all_weights"] == "yes" and config.robustness["robustness_on"] == "yes": + elif extracted_values["on_all_weights"] == "yes" and extracted_values["robustness_on"] == "yes": # ALL RANDOMLY SAMPLED WEIGHTS (MCDA runs num_samples times) all_weights_score_means, all_weights_score_stds, \ all_weights_score_means_normalized, all_weights_score_stds_normalized = \ - _compute_scores_for_all_random_weights(normalized_indicators, is_sensitivity, weights, f_agg) - elif (config.robustness["on_single_weights"] == "yes") and (config.robustness["robustness_on"] == "yes"): + compute_scores_for_all_random_weights(normalized_indicators, is_sensitivity, weights, f_agg) + elif (extracted_values["on_single_weights"] == "yes") and (extracted_values["robustness_on"] == "yes"): # ONE RANDOMLY SAMPLED WEIGHT A TIME (MCDA runs (num_samples * num_indicators) times) - iterative_random_weights_statistics: dict = _compute_scores_for_single_random_weight( - normalized_indicators, weights, is_sensitivity, index_column_name, index_column_values, f_agg, input_matrix) + iterative_random_weights_statistics: dict = compute_scores_for_single_random_weight( + normalized_indicators, weights, is_sensitivity, index_column_name, index_column_values, f_agg, + input_matrix_no_alternatives) iterative_random_w_score_means = iterative_random_weights_statistics['score_means'] iterative_random_w_score_stds = iterative_random_weights_statistics['score_stds'] iterative_random_w_score_means_normalized = iterative_random_weights_statistics['score_means_normalized'] @@ -952,7 +705,7 @@ def run_mcda_without_indicator_uncertainty(input_config: dict, index_column_name iterative_random_w_score_means_normalized=iterative_random_w_score_means_normalized, iterative_random_w_score_stds=iterative_random_w_score_stds, index_column_name=index_column_name, index_column_values=index_column_values, - input_config=input_config) + input_config=extracted_values) _plot_and_save_charts(scores=scores, normalized_scores=normalized_scores, score_means=all_weights_score_means, score_stds=all_weights_score_stds, @@ -960,14 +713,12 @@ def run_mcda_without_indicator_uncertainty(input_config: dict, index_column_name iterative_random_w_score_means=iterative_random_w_score_means, iterative_random_w_score_stds=iterative_random_w_score_stds, iterative_random_w_score_means_normalized=iterative_random_w_score_means_normalized, - input_matrix=input_matrix, config=input_config, + input_matrix=input_matrix_no_alternatives, config=extracted_values, is_robustness_weights=is_robustness_weights) -def run_mcda_with_indicator_uncertainty(input_config: dict, input_matrix: pd.DataFrame, index_column_name: str, - index_column_values: list, mc_runs: int, random_seed: int, is_sensitivity: str, - f_agg: str, f_norm: str, weights: Union[List[list], List[pd.DataFrame], dict], - polar: List[str], marginal_pdf: List[str]) -> None: +def run_mcda_with_indicator_uncertainty(extracted_values: dict, weights: Union[List[str], List[pd.DataFrame], +dict, None]) -> None: """ Runs ProMCDA with uncertainty on the indicators, i.e. with a robustness analysis. @@ -981,24 +732,29 @@ def run_mcda_with_indicator_uncertainty(input_config: dict, input_matrix: pd.Dat - weights: the normalised weights (either fixed or random sampled weights, depending on the settings). In the context of the robustness analysis, only fixed normalised weights are used, i.e. weights[0]. - :param input_config: dict - :param index_column_name: str - :param index_column_values: list - :param input_matrix: pd:DataFrame - :param mc_runs: int - :param is_sensitivity: str + :param extracted_values: dict :param weights: Union[List[str], List[pd.DataFrame], dict, None] - :param f_norm: str - :param f_agg: str - :param polar: List[str] - :param marginal_pdf: List[str] :return: None """ logger.info("Start ProMCDA with uncertainty on the indicators") - config = Config(input_config) is_robustness_indicators = True all_indicators_scores_normalized = [] + # Extract relevant values + input_matrix = extracted_values["input_matrix"] + alternatives_column_name = input_matrix.columns[0] + input_matrix = input_matrix.set_index(alternatives_column_name) + index_column_name = input_matrix.index.name + index_column_values = input_matrix.index.tolist() + input_matrix_no_alternatives = check_input_matrix(input_matrix) + mc_runs = extracted_values["monte_carlo_runs"] + marginal_pdf = extracted_values["marginal_distribution_for_each_indicator"] + random_seed = extracted_values["random_seed"] + is_sensitivity = extracted_values['sensitivity_on'] + f_norm = extracted_values["normalization"] + f_agg = extracted_values["aggregation"] + polar = extracted_values["polarity_for_each_indicator"] + if mc_runs <= 0: logger.error('Error Message', stack_info=True) raise ValueError('The number of MC runs should be larger than 0 for a robustness analysis') @@ -1007,17 +763,20 @@ def run_mcda_with_indicator_uncertainty(input_config: dict, input_matrix: pd.Dat logger.info("The number of Monte-Carlo runs is only {}".format(mc_runs)) logger.info("A meaningful number of Monte-Carlo runs is equal or larger than 1000") - check_parameters_pdf(input_matrix, input_config) + check_parameters_pdf(input_matrix, extracted_values) is_exact_pdf_mask = check_if_pdf_is_exact(marginal_pdf) is_poisson_pdf_mask = check_if_pdf_is_poisson(marginal_pdf) - mcda_with_uncert = MCDAWithRobustness(config, input_matrix, is_exact_pdf_mask, is_poisson_pdf_mask, random_seed) + mcda_with_uncert = MCDAWithRobustness(extracted_values, input_matrix_no_alternatives, is_exact_pdf_mask, + is_poisson_pdf_mask, random_seed) n_random_input_matrices = mcda_with_uncert.create_n_randomly_sampled_matrices() if is_sensitivity == "yes": - n_normalized_input_matrices = utils_for_parallelization.parallelize_normalization(n_random_input_matrices, polar) + n_normalized_input_matrices = utils_for_parallelization.parallelize_normalization(n_random_input_matrices, + polar) else: - n_normalized_input_matrices = utils_for_parallelization.parallelize_normalization(n_random_input_matrices, polar, f_norm) + n_normalized_input_matrices = utils_for_parallelization.parallelize_normalization(n_random_input_matrices, + polar, f_norm) args_for_parallel_agg = [(weights, normalized_indicators) for normalized_indicators in n_normalized_input_matrices] @@ -1046,7 +805,7 @@ def run_mcda_with_indicator_uncertainty(input_config: dict, input_matrix: pd.Dat iterative_random_w_score_means=None, iterative_random_w_score_means_normalized=None, iterative_random_w_score_stds=None, - input_config=input_config, + input_config=extracted_values, index_column_name=index_column_name, index_column_values=index_column_values) _plot_and_save_charts(scores=None, normalized_scores=None, @@ -1055,13 +814,60 @@ def run_mcda_with_indicator_uncertainty(input_config: dict, input_matrix: pd.Dat iterative_random_w_score_means=None, iterative_random_w_score_stds=None, iterative_random_w_score_means_normalized=None, - input_matrix=input_matrix, config=input_config, + input_matrix=input_matrix, config=extracted_values, is_robustness_indicators=is_robustness_indicators) -def _compute_scores_for_all_random_weights(indicators: dict, is_sensitivity: str, - weights: Union[List[str], List[pd.DataFrame], dict, None], - f_agg: str) -> tuple[Any, Any, Any, Any]: +def check_input_matrix(input_matrix: pd.DataFrame) -> pd.DataFrame: + """ + Check the input matrix for duplicated rows in the alternatives column, rescale negative indicator values + and drop the index column of alternatives. + + Parameters: + - input_matrix: The input matrix containing the alternatives and indicators. + + Raises: + - ValueError: If duplicated rows are found in the alternative column. + - UserStoppedInfo: If the user chooses to stop when duplicates are found. + + :param input_matrix: pd.DataFrame + :rtype: pd.DataFrame + :return: input_matrix + """ + if input_matrix.duplicated().any(): + raise ValueError('Error: Duplicated rows in the alternatives column.') + elif input_matrix.iloc[:, 0].duplicated().any(): + logger.info('Duplicated rows in the alternatives column.') + + index_column_values = input_matrix.index.tolist() + logger.info("Alternatives are {}".format(index_column_values)) + input_matrix_no_alternatives = input_matrix.reset_index(drop=True) # drop the alternative + + input_matrix_no_alternatives = _check_and_rescale_negative_indicators( + input_matrix_no_alternatives) + + return input_matrix_no_alternatives + + +def _check_and_rescale_negative_indicators(input_matrix: pd.DataFrame) -> pd.DataFrame: + """ + Rescale indicators of the input matrix if negative into [0-1]. + """ + + if (input_matrix < 0).any().any(): + scaler = MinMaxScaler() + scaled_data = scaler.fit_transform(input_matrix) + scaled_matrix = pd.DataFrame( + scaled_data, columns=input_matrix.columns, index=input_matrix.index) + return scaled_matrix + else: + return input_matrix + + +def compute_scores_for_all_random_weights(indicators: pd.DataFrame, + weights: Union[List[str], List[pd.DataFrame], dict, None], + aggregation_method: Optional[AggregationFunctions] = None) \ + -> tuple[Any, Any, Any, Any]: """ Computes the normalized mean scores and std of the alternatives in the case of randomly sampled weights. """ @@ -1077,10 +883,10 @@ def _compute_scores_for_all_random_weights(indicators: dict, is_sensitivity: str args_for_parallel_agg = [(lst, indicators) for lst in random_weights] - if is_sensitivity == "yes": + if aggregation_method is None: all_weights_scores = utils_for_parallelization.parallelize_aggregation(args_for_parallel_agg) else: - all_weights_scores = utils_for_parallelization.parallelize_aggregation(args_for_parallel_agg, f_agg) + all_weights_scores = utils_for_parallelization.parallelize_aggregation(args_for_parallel_agg, aggregation_method) for matrix in all_weights_scores: normalized_matrix = rescale_minmax(matrix) # all score normalization @@ -1094,10 +900,11 @@ def _compute_scores_for_all_random_weights(indicators: dict, is_sensitivity: str all_weights_score_means_normalized, all_weights_score_stds_normalized -def _compute_scores_for_single_random_weight(indicators: dict, - weights: Union[List[str], List[pd.DataFrame], dict, None], - is_sensitivity: str, index_column_name: str, index_column_values: list, - f_agg: str, input_matrix: pd.DataFrame) -> dict: +def compute_scores_for_single_random_weight(indicators: pd.DataFrame, + weights: Union[List[str], List[pd.DataFrame], dict, None], + index_column_name: str, index_column_values: list, + input_matrix: pd.DataFrame, + aggregation_method: Optional[AggregationFunctions] = None) -> dict: """ Computes the mean scores and std of the alternatives in the case of one randomly sampled weight at time. """ @@ -1120,10 +927,11 @@ def _compute_scores_for_single_random_weight(indicators: dict, for index in range(num_indicators): norm_one_random_weight = rand_weight_per_indicator.get("indicator_{}".format(index + 1), []) args_for_parallel_agg = [(lst, indicators) for lst in norm_one_random_weight] - if is_sensitivity == "yes": + if aggregation_method is None: scores_one_random_weight = utils_for_parallelization.parallelize_aggregation(args_for_parallel_agg) else: - scores_one_random_weight = utils_for_parallelization.parallelize_aggregation(args_for_parallel_agg, f_agg) + scores_one_random_weight = utils_for_parallelization.parallelize_aggregation(args_for_parallel_agg, + aggregation_method) scores_one_random_weight_normalized["indicator_{}".format(index + 1)] = [] for matrix in scores_one_random_weight: @@ -1188,33 +996,33 @@ def _save_output_files(scores: Optional[pd.DataFrame], """ Save output files based of the computed scores, ranks, and configuration data. """ - config = Config(input_config) - full_output_path = os.path.join(output_directory_path, config.output_file_path) + output_path = input_config["output_path"] + full_output_path = os.path.join(output_directory_path, output_path) logger.info("Saving results in {}".format(full_output_path)) - check_path_exists(config.output_file_path) + check_path_exists(output_path) if scores is not None and not scores.empty: scores.insert(0, index_column_name, index_column_values) normalized_scores.insert(0, index_column_name, index_column_values) ranks.insert(0, index_column_name, index_column_values) - save_df(scores, config.output_file_path, 'scores.csv') - save_df(normalized_scores, config.output_file_path, 'normalized_scores.csv') - save_df(ranks, config.output_file_path, 'ranks.csv') + save_df(scores, output_path, 'scores.csv') + save_df(normalized_scores, output_path, 'normalized_scores.csv') + save_df(ranks, output_path, 'ranks.csv') elif score_means is not None and not score_means.empty: score_means.insert(0, index_column_name, index_column_values) score_stds.insert(0, index_column_name, index_column_values) score_means_normalized.insert(0, index_column_name, index_column_values) - save_df(score_means, config.output_file_path, 'score_means.csv') - save_df(score_stds, config.output_file_path, 'score_stds.csv') - save_df(score_means_normalized, config.output_file_path, 'score_means_normalized.csv') + save_df(score_means, output_path, 'score_means.csv') + save_df(score_stds, output_path, 'score_stds.csv') + save_df(score_means_normalized, output_path, 'score_means_normalized.csv') elif iterative_random_w_score_means is not None: - save_dict(iterative_random_w_score_means, config.output_file_path, 'score_means.pkl') - save_dict(iterative_random_w_score_stds, config.output_file_path, 'score_stds.pkl') - save_dict(iterative_random_w_score_means_normalized, config.output_file_path, 'score_means_normalized.pkl') + save_dict(iterative_random_w_score_means, output_path, 'score_means.pkl') + save_dict(iterative_random_w_score_stds, output_path, 'score_stds.pkl') + save_dict(iterative_random_w_score_means_normalized, output_path, 'score_means_normalized.pkl') - save_config(input_config, config.output_file_path, 'configuration.json') + save_config(input_config, output_path, 'configuration.json') def _plot_and_save_charts(scores: Optional[pd.DataFrame], @@ -1232,15 +1040,15 @@ def _plot_and_save_charts(scores: Optional[pd.DataFrame], """ Generate plots based on the computed scores and save them. """ - config = Config(config) + output_path = config["output_path"] num_indicators = input_matrix.shape[1] if scores is not None and not scores.empty: plot_no_norm_scores = utils_for_plotting.plot_non_norm_scores_without_uncert(scores) - utils_for_plotting.save_figure(plot_no_norm_scores, config.output_file_path, "MCDA_rough_scores.png") + utils_for_plotting.save_figure(plot_no_norm_scores, output_path, "MCDA_rough_scores.png") plot_norm_scores = utils_for_plotting.plot_norm_scores_without_uncert(normalized_scores) - utils_for_plotting.save_figure(plot_norm_scores, config.output_file_path, "MCDA_norm_scores.png") + utils_for_plotting.save_figure(plot_norm_scores, output_path, "MCDA_norm_scores.png") elif score_means is not None and not score_means.empty: if is_robustness_weights is not None and is_robustness_weights == 1: @@ -1252,8 +1060,8 @@ def _plot_and_save_charts(scores: Optional[pd.DataFrame], chart_mean_scores_norm = utils_for_plotting.plot_mean_scores(score_means_normalized, "not_plot_std", "indicators", score_stds) - utils_for_plotting.save_figure(chart_mean_scores, config.output_file_path, "MCDA_rough_scores.png") - utils_for_plotting.save_figure(chart_mean_scores_norm, config.output_file_path, "MCDA_norm_scores.png") + utils_for_plotting.save_figure(chart_mean_scores, output_path, "MCDA_rough_scores.png") + utils_for_plotting.save_figure(chart_mean_scores_norm, output_path, "MCDA_norm_scores.png") elif iterative_random_w_score_means is not None: images = [] @@ -1274,5 +1082,7 @@ def _plot_and_save_charts(scores: Optional[pd.DataFrame], images.append(plot_weight_mean_scores) images_norm.append(plot_weight_mean_scores_norm) - utils_for_plotting.combine_images(images, config.output_file_path, "MCDA_one_weight_randomness_rough_scores.png") - utils_for_plotting.combine_images(images_norm, config.output_file_path, "MCDA_one_weight_randomness_norm_scores.png") + utils_for_plotting.combine_images(images, output_path, + "MCDA_one_weight_randomness_rough_scores.png") + utils_for_plotting.combine_images(images_norm, output_path, + "MCDA_one_weight_randomness_norm_scores.png") diff --git a/mcda/utils/utils_for_parallelization.py b/mcda/utils/utils_for_parallelization.py index 73da4ce..85f3f16 100644 --- a/mcda/utils/utils_for_parallelization.py +++ b/mcda/utils/utils_for_parallelization.py @@ -1,18 +1,21 @@ -from mcda.mcda_functions.aggregation import Aggregation -from mcda.mcda_functions.normalization import Normalization import sys import logging import pandas as pd import multiprocessing from functools import partial -from typing import List, Tuple +from typing import List, Tuple, Optional + +from mcda.mcda_functions.aggregation import Aggregation +from mcda.mcda_functions.normalization import Normalization +from mcda.configuration.enums import NormalizationFunctions, AggregationFunctions, OutputColumnNames4Sensitivity formatter = '%(levelname)s: %(asctime)s - %(name)s - %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=formatter) logger = logging.getLogger("ProMCDA utils for parallelization") -def initialize_and_call_aggregation(args: Tuple[list, dict], method=None) -> pd.DataFrame: +def initialize_and_call_aggregation(args: Tuple[list, dict], method: Optional[AggregationFunctions] = None) \ + -> pd.DataFrame: """ Initialize an Aggregation object with given weights and call the aggregation method to calculate scores. @@ -52,13 +55,13 @@ def initialize_and_call_aggregation(args: Tuple[list, dict], method=None) -> pd. return scores_one_run -def initialize_and_call_normalization(args: Tuple[pd.DataFrame, list, str]) -> dict: +def initialize_and_call_normalization(args: Tuple[pd.DataFrame, Tuple[str, ...], NormalizationFunctions]) -> dict: """ Initialize a Normalization object with given matrix and polarities, and call the normalization method to calculate normalized indicators. Parameters: - - args: a tuple containing a DataFrame of indicators, a list of polarities, + - args: a tuple containing a DataFrame of indicators, a tuple of polarities, and a string specifying the normalization method. Returns: @@ -131,22 +134,38 @@ def normalize_indicators_in_parallel(norm: object, method=None) -> dict: indicators_scaled_target_without_zero = None indicators_scaled_rank = None - if method is None or method == 'minmax': + def _rename_columns(df, method_name): + """ Helper function to rename columns based on the normalization method """ + if df is not None: + df.columns = [f"{col}_{method_name}" for col in df.columns.tolist()] + return df + + if method is None or method == NormalizationFunctions.MINMAX: indicators_scaled_minmax_01 = norm.minmax(feature_range=(0, 1)) + indicators_scaled_minmax_01 = _rename_columns(indicators_scaled_minmax_01, "minmax_01") # for aggregation "geometric" and "harmonic" that accept no 0 indicators_scaled_minmax_without_zero = norm.minmax(feature_range=(0.1, 1)) - if method is None or method == 'target': + indicators_scaled_minmax_without_zero = _rename_columns(indicators_scaled_minmax_without_zero, + "minmax_without_zero") + if method is None or method == NormalizationFunctions.TARGET: indicators_scaled_target_01 = norm.target(feature_range=(0, 1)) + indicators_scaled_target_01 = _rename_columns(indicators_scaled_target_01, "target_01") # for aggregation "geometric" and "harmonic" that accept no 0 indicators_scaled_target_without_zero = norm.target(feature_range=(0.1, 1)) - if method is None or method == 'standardized': + indicators_scaled_target_without_zero = _rename_columns(indicators_scaled_target_without_zero, + "target_without_zero") + if method is None or method == NormalizationFunctions.STANDARDIZED: indicators_scaled_standardized_any = norm.standardized( feature_range=('-inf', '+inf')) + indicators_scaled_standardized_any = _rename_columns(indicators_scaled_standardized_any, "standardized_any") indicators_scaled_standardized_without_zero = norm.standardized( feature_range=(0.1, '+inf')) - if method is None or method == 'rank': + indicators_scaled_standardized_without_zero = _rename_columns(indicators_scaled_standardized_without_zero, + "standardized_without_zero") + if method is None or method == NormalizationFunctions.RANK: indicators_scaled_rank = norm.rank() - if method is not None and method not in ['minmax', 'target', 'standardized', 'rank']: + indicators_scaled_rank = _rename_columns(indicators_scaled_rank, "rank") + if method is not None and method not in [e for e in NormalizationFunctions]: logger.error('Error Message', stack_info=True) raise ValueError('The selected normalization method is not supported') @@ -165,7 +184,8 @@ def normalize_indicators_in_parallel(norm: object, method=None) -> dict: return normalized_indicators -def aggregate_indicators_in_parallel(agg: object, normalized_indicators: dict, method=None) -> pd.DataFrame: +def aggregate_indicators_in_parallel(agg: object, normalized_indicators: dict, + method: Optional[AggregationFunctions] = None) -> pd.DataFrame: """ Aggregate normalized indicators in parallel using different aggregation methods. @@ -201,37 +221,90 @@ def aggregate_indicators_in_parallel(agg: object, normalized_indicators: dict, m scores = pd.DataFrame() col_names_method = [] - col_names = ['ws-minmax_01', 'ws-target_01', 'ws-standardized_any', 'ws-rank', - 'geom-minmax_without_zero', 'geom-target_without_zero', 'geom-standardized_without_zero', 'geom-rank', - 'harm-minmax_without_zero', 'harm-target_without_zero', 'harm-standardized_without_zero', 'harm-rank', - 'min-standardized_any'] # same order as in the following loop - for key, values in normalized_indicators.items(): - if method is None or method == 'weighted_sum': + col_names = [member.value for member in OutputColumnNames4Sensitivity] + + if isinstance(normalized_indicators, dict): # robustness on indicators + for key, values in normalized_indicators.items(): + if method is None or method == AggregationFunctions.WEIGHTED_SUM: + # ws goes only with some specific normalizations + valid_suffixes = ["standardized_any", "minmax_01", "target_01", "rank"] + if any(substring in key for substring in valid_suffixes): + scores_weighted_sum[key] = agg.weighted_sum(values) + col_names_method.append("ws-" + key) + if method is None or method == AggregationFunctions.GEOMETRIC: + valid_suffixes = ["standardized_without_zero", "minmax_without_zero", "target_without_zero", "rank"] + # geom goes only with some specific normalizations + if any(substring in key for substring in valid_suffixes): + scores_geometric[key] = pd.Series(agg.geometric(values)) + col_names_method.append("geom-" + key) + if method is None or method == AggregationFunctions.HARMONIC: + valid_suffixes = ["standardized_without_zero", "minmax_without_zero", "target_without_zero", "rank"] + # harm goes only with some specific normalizations + if any(substring in key for substring in valid_suffixes): + scores_harmonic[key] = pd.Series(agg.harmonic(values)) + col_names_method.append("harm-" + key) + if method is None or method == AggregationFunctions.MINIMUM: + valid_suffixes = ["standardized_any"] + if any(substring in key for substring in valid_suffixes): + scores_minimum[key] = pd.Series(agg.minimum( + normalized_indicators["standardized_any"])) + col_names_method.append("min-" + key) + elif isinstance(normalized_indicators, pd.DataFrame): # robustness on weights + if method is None or method == AggregationFunctions.WEIGHTED_SUM: # ws goes only with some specific normalizations - if key in ["standardized_any", "minmax_01", "target_01", "rank"]: - scores_weighted_sum[key] = agg.weighted_sum(values) - col_names_method.append("ws-" + key) - if method is None or method == 'geometric': + valid_suffixes = ["standardized_any", "minmax_01", "target_01", "rank"] + selected_columns = [ + column for column in normalized_indicators.columns + if any(substring in column for substring in valid_suffixes) + ] + if selected_columns: + scores_weighted_sum = agg.weighted_sum(normalized_indicators[selected_columns]) + col_names_method.extend( + ["ws-" + suffix for suffix in valid_suffixes + if any(column.endswith("_" + suffix) for column in normalized_indicators.columns)]) + if method is None or method == AggregationFunctions.GEOMETRIC: # geom goes only with some specific normalizations - if key in ["standardized_without_zero", "minmax_without_zero", "target_without_zero", "rank"]: - scores_geometric[key] = pd.Series(agg.geometric(values)) - col_names_method.append("geom-" + key) - if method is None or method == 'harmonic': + valid_suffixes = ["standardized_without_zero", "minmax_without_zero", "target_without_zero", "rank"] + selected_columns = [ + column for column in normalized_indicators.columns + if any(substring in column for substring in valid_suffixes) + ] + if selected_columns: + scores_weighted_sum = agg.geometric(normalized_indicators[selected_columns]) + col_names_method.extend( + ["ws-" + suffix for suffix in valid_suffixes + if any(column.endswith("_" + suffix) for column in normalized_indicators.columns)]) + if method is None or method == AggregationFunctions.HARMONIC: # harm goes only with some specific normalizations - if key in ["standardized_without_zero", "minmax_without_zero", "target_without_zero", "rank"]: - scores_harmonic[key] = pd.Series(agg.harmonic(values)) - col_names_method.append("harm-" + key) - if method is None or method == 'minimum': - if key == "standardized_any": - scores_minimum[key] = pd.Series(agg.minimum( - normalized_indicators["standardized_any"])) - col_names_method.append("min-" + key) + valid_suffixes = ["standardized_without_zero", "minmax_without_zero", "target_without_zero", "rank"] + selected_columns = [ + column for column in normalized_indicators.columns + if any(substring in column for substring in valid_suffixes) + ] + if selected_columns: + scores_weighted_sum = agg.harmonic(normalized_indicators[selected_columns]) + col_names_method.extend( + ["ws-" + suffix for suffix in valid_suffixes + if any(column.endswith("_" + suffix) for column in normalized_indicators.columns)]) + if method is None or method == AggregationFunctions.MINIMUM: + valid_suffixes = ["standardized_any"] + selected_columns = [ + column for column in normalized_indicators.columns + if any(substring in column for substring in valid_suffixes) + ] + if selected_columns: + scores_weighted_sum = agg.minimum(normalized_indicators[selected_columns]) + col_names_method.extend( + ["ws-" + suffix for suffix in valid_suffixes + if any(column.endswith("_" + suffix) for column in normalized_indicators.columns)]) dict_list = [scores_weighted_sum, scores_geometric, scores_harmonic, scores_minimum] for d in dict_list: - if d: + if isinstance(d, pd.Series): # Robustness weights + scores = pd.concat([scores, d.to_frame()], axis=1) + elif isinstance(d, dict): # Robustness indicators scores = pd.concat([scores, pd.DataFrame.from_dict(d)], axis=1) if method is None: @@ -242,8 +315,8 @@ def aggregate_indicators_in_parallel(agg: object, normalized_indicators: dict, m return scores -def parallelize_aggregation(args: List[tuple], method=None) -> List[pd.DataFrame]: - partial_func = partial(initialize_and_call_aggregation, method=method) +def parallelize_aggregation(args: List[tuple], aggregation_method=None) -> List[pd.DataFrame]: + partial_func = partial(initialize_and_call_aggregation, method=aggregation_method) # create a synchronous multiprocessing pool with the desired number of processes pool = multiprocessing.Pool() res = pool.map(partial_func, args) @@ -253,7 +326,7 @@ def parallelize_aggregation(args: List[tuple], method=None) -> List[pd.DataFrame return res -def parallelize_normalization(input_matrices: List[pd.DataFrame], polar: list, method=None) -> List[dict]: +def parallelize_normalization(input_matrices: List[pd.DataFrame], polar: Tuple[str, ...], method=None) -> List[dict]: """ Parallelize the normalization process for multiple input matrices using multiprocessing. diff --git a/mcda/utils/utils_for_plotting.py b/mcda/utils/utils_for_plotting.py index 2e9f516..a8de801 100644 --- a/mcda/utils/utils_for_plotting.py +++ b/mcda/utils/utils_for_plotting.py @@ -54,7 +54,7 @@ def plot_norm_scores_without_uncert(scores: pd.DataFrame) -> object: yaxis=dict( range=[scores.iloc[:, 1:].values.min() - 0.5, scores.iloc[:, 1:].values.max() + 0.5]) ) - fig.show() + # fig.show() return fig @@ -95,7 +95,7 @@ def plot_non_norm_scores_without_uncert(scores: pd.DataFrame) -> object: ticktext=scores[alternatives_column_name][:], tickangle=45) ) - fig.show() + # fig.show() it triggers an open socket warning when on return fig @@ -154,7 +154,7 @@ def plot_mean_scores(all_means: pd.DataFrame, plot_std: str, rand_on: str, all_s ticktext=all_means[alternatives_column_name][:], tickangle=45) ) - fig.show() + # fig.show() return fig diff --git a/requirements.txt b/requirements.txt index f8ef914..c1dd98b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,16 @@ # Requirements automatically generated by pigar. # https://github.com/damnever/pigar. -numpy -pandas -plotly -pytest -scikit-learn -scipy -Pillow +numpy~=2.0.2 +pandas~=2.2.3 +plotly~=5.24.1 +pytest~=8.3.3 +scikit-learn~=1.5.2 +scipy~=1.13.1 +Pillow~=11.0.0 kaleido + +matplotlib~=3.9.3 +setuptools~=68.2.0 \ No newline at end of file diff --git a/tests/unit_tests/test_mcda_with_robustness.py b/tests/unit_tests/test_mcda_with_robustness.py index c91ec7a..cc20f57 100644 --- a/tests/unit_tests/test_mcda_with_robustness.py +++ b/tests/unit_tests/test_mcda_with_robustness.py @@ -2,7 +2,7 @@ import numpy as np import unittest -from mcda.mcda_with_robustness import MCDAWithRobustness +from mcda.models.mcda_with_robustness import MCDAWithRobustness from mcda.configuration.config import Config diff --git a/tests/unit_tests/test_mcda_without_robustness.py b/tests/unit_tests/test_mcda_without_robustness.py index 687ab22..95a2506 100644 --- a/tests/unit_tests/test_mcda_without_robustness.py +++ b/tests/unit_tests/test_mcda_without_robustness.py @@ -4,7 +4,7 @@ from unittest import TestCase -from mcda.mcda_without_robustness import MCDAWithoutRobustness +from mcda.models.mcda_without_robustness import MCDAWithoutRobustness from mcda.configuration.config import Config from mcda.mcda_functions.aggregation import Aggregation import mcda.utils.utils_for_main as utils_for_main diff --git a/tests/unit_tests/test_promcda.py b/tests/unit_tests/test_promcda.py new file mode 100644 index 0000000..ca86934 --- /dev/null +++ b/tests/unit_tests/test_promcda.py @@ -0,0 +1,293 @@ +import unittest +import warnings + +import pandas as pd + +from mcda.models.ProMCDA import ProMCDA +from mcda.configuration.enums import NormalizationFunctions, AggregationFunctions, OutputColumnNames4Sensitivity, \ + NormalizationNames4Sensitivity, PDFType + + +class TestProMCDA(unittest.TestCase): + + def setUp(self): + warnings.filterwarnings("error", category=ResourceWarning) + # Mock input data for testing + self.input_matrix = pd.DataFrame({ + 'Alternatives': ['A', 'B', 'C'], + 'Criterion1': [0.5, 0.2, 0.8], + 'Criterion2': [0.3, 0.6, 0.1] + }) + self.input_matrix.set_index('Alternatives', inplace=True) + + self.input_matrix_with_uncertainty = pd.DataFrame({ + 'Alternatives': ['A', 'B', 'C'], + 'Criterion1_mean': [0.5, 0.2, 0.8], + 'Criterion1_std': [0.1, 0.02, 0.07], + 'Criterion2_mean': [0.3, 0.6, 0.1], + 'Criterion2_std': [0.03, 0.06, 0.01] + }) + self.input_matrix_with_uncertainty.set_index('Alternatives', inplace=True) + + self.polarity = ('+', '-',) + + # Define optional parameters + self.robustness_weights = False + self.robustness_indicators = False + self.marginal_distributions = (PDFType.NORMAL, PDFType.NORMAL) + self.num_runs = 5 + self.num_cores = 2 + self.random_seed = 123 + + def test_init(self): + """ + Test if ProMCDA initializes correctly. + """ + # Given + promcda = ProMCDA( + input_matrix=self.input_matrix, + polarity=self.polarity, + robustness_weights=self.robustness_weights, + robustness_indicators=self.robustness_indicators, + marginal_distributions=self.marginal_distributions, + num_runs=self.num_runs, + num_cores=self.num_cores, + random_seed=self.random_seed + ) + + # Then + self.assertEqual(promcda.input_matrix.shape, (3, 2)) + self.assertEqual(promcda.polarity, self.polarity) + self.assertFalse(promcda.robustness_weights) + self.assertFalse(promcda.robustness_indicators) + self.assertEqual(promcda.marginal_distributions, self.marginal_distributions) + self.assertEqual(promcda.num_runs, self.num_runs) + self.assertEqual(promcda.num_cores, self.num_cores) + self.assertEqual(promcda.random_seed, self.random_seed) + self.assertIsNone(promcda.normalized_values_without_robustness) + self.assertIsNone(promcda.normalized_values_with_robustness) + self.assertIsNone(promcda.aggregated_scores) + self.assertIsNone(promcda.all_indicators_scores_means) + self.assertIsNone(promcda.all_indicators_scores_stds) + self.assertIsNone(promcda.all_indicators_means_scores_normalized) + self.assertIsNone(promcda.all_indicators_scores_stds_normalized) + self.assertIsNone(promcda.all_weights_score_means) + self.assertEqual(promcda.all_weights_score_stds, (None,)) + self.assertEqual(promcda.all_weights_score_means_normalized, (None,)) + self.assertEqual(promcda.all_weights_score_stds_normalized, (None,)) + self.assertEqual(promcda.iterative_random_w_score_means, (None,)) + self.assertEqual(promcda.iterative_random_w_score_stds, (None,)) + self.assertIsNone(promcda.iterative_random_w_score_means_normalized) + #self.assertIsNone(promcda.scores) + + # def test_validate_inputs(self): + # """ + # Test if input validation works and returns the expected values. + # """ + # # Given + # promcda = ProMCDA(self.input_matrix, self.polarity, self.robustness, self.monte_carlo) + # # When + # (is_robustness_indicators, is_robustness_weights, polar, weights, config) = promcda.validate_inputs() + # + # # Then + # self.assertIsInstance(is_robustness_indicators, int) + # self.assertIsInstance(is_robustness_weights, int) + # self.assertIsInstance(polar, tuple) + # self.assertIsInstance(weights, list) + # self.assertIsInstance(config, dict) + # self.assertEqual(is_robustness_indicators, 0) + # self.assertEqual(is_robustness_weights, 0) + + def test_normalize_all_methods(self): + # Given + normalization_method = None + promcda = ProMCDA( + input_matrix=self.input_matrix, + polarity=self.polarity, + robustness_weights=self.robustness_weights, + robustness_indicators=self.robustness_indicators, + marginal_distributions=self.marginal_distributions, + num_runs=self.num_runs, + num_cores=self.num_cores, + random_seed=self.random_seed + ) + + # When + expected_suffixes = [method.value for method in NormalizationNames4Sensitivity] + normalized_values = promcda.normalize(normalization_method) + actual_suffixes = {"_".join(col.split("_", 2)[1:]) for col in normalized_values.columns} + + # Then + self.assertCountEqual(actual_suffixes, expected_suffixes, + "Not all methods were applied or extra suffixes found in column names.") + + def test_normalize_specific_method(self): + # Given + promcda = ProMCDA( + input_matrix=self.input_matrix, + polarity=self.polarity, + robustness_weights=self.robustness_weights, + robustness_indicators=self.robustness_indicators, + marginal_distributions=self.marginal_distributions, + num_runs=self.num_runs, + num_cores=self.num_cores, + random_seed=self.random_seed + ) + + # When + normalized_values = promcda.normalize(normalization_method=NormalizationFunctions.MINMAX) + expected_keys = ['Criterion1_minmax_01', 'Criterion2_minmax_01', 'Criterion1_minmax_without_zero', 'Criterion2_minmax_without_zero'] + + # Then + self.assertCountEqual(expected_keys, list(normalized_values.keys())) + self.assertEqual(list(normalized_values), expected_keys) + + def test_normalization_with_robustness(self): + # Given + robustness_indicators=True + promcda = ProMCDA( + input_matrix=self.input_matrix_with_uncertainty, + polarity=self.polarity, + robustness_weights=self.robustness_weights, + robustness_indicators=robustness_indicators, + marginal_distributions=self.marginal_distributions, + num_runs=self.num_runs, + num_cores=self.num_cores, + random_seed=self.random_seed + ) + + # When + promcda.normalize(normalization_method=NormalizationFunctions.MINMAX) + + # Then + normalized_values = promcda.get_normalized_values_with_robustness() + self.assertIsNotNone(normalized_values) + self.assertEqual(len(normalized_values), self.num_runs) + + + def test_aggregate_all_methods(self): + # Given + promcda = ProMCDA( + input_matrix=self.input_matrix, + polarity=self.polarity, + robustness_weights=self.robustness_weights, + robustness_indicators=self.robustness_indicators, + marginal_distributions=self.marginal_distributions, + num_runs=self.num_runs, + num_cores=self.num_cores, + random_seed=self.random_seed + ) + promcda.normalize() + + # When + aggregated_scores = promcda.aggregate() + expected_columns = [ + 'minmax_weighted_sum', 'target_weighted_sum', 'standardized_weighted_sum', 'rank_weighted_sum', + 'minmax_geometric', 'target_geometric', 'standardized_geometric', 'rank_geometric', + 'minmax_harmonic', 'target_harmonic', 'standardized_harmonic', 'rank_harmonic', + 'standardized_minimum'] + + # Then + self.assertCountEqual(aggregated_scores.columns, expected_columns, + "Not all methods were applied or extra columns found.") + self.assertEqual(len(aggregated_scores), len(self.input_matrix), + "Number of alternatives does not match input matrix rows.") + + def test_aggregate_with_specific_aggregation_method(self): + # Given + normalization_method = NormalizationFunctions.MINMAX + aggregation_method = AggregationFunctions.WEIGHTED_SUM + + # When + promcda = ProMCDA( + input_matrix=self.input_matrix, + polarity=self.polarity, + robustness_weights=self.robustness_weights, + robustness_indicators=self.robustness_indicators, + marginal_distributions=self.marginal_distributions, + num_runs=self.num_runs, + num_cores=self.num_cores, + random_seed=self.random_seed + ) + promcda.normalize(normalization_method) + aggregated_scores = promcda.aggregate(aggregation_method=aggregation_method) + expected_columns = ['minmax_weighted_sum'] + + # Then + self.assertCountEqual(aggregated_scores.columns, expected_columns, "Only specified methods should be applied.") + self.assertTrue( + (aggregated_scores['minmax_weighted_sum'] >= 0).all() and (aggregated_scores['minmax_weighted_sum'] <= 1).all(), + "Values should be in the range [0, 1] for minmax normalization with weighted sum.") + + def test_aggregate_with_robustness_indicators(self): + # Given + normalization_method = NormalizationFunctions.MINMAX + aggregation_method = AggregationFunctions.WEIGHTED_SUM + + # When + promcda = ProMCDA( + input_matrix=self.input_matrix_with_uncertainty, + polarity=self.polarity, + robustness_weights=self.robustness_weights, + robustness_indicators=True, + marginal_distributions=self.marginal_distributions, + num_runs=self.num_runs, + num_cores=self.num_cores, + random_seed=self.random_seed + ) + promcda.normalize(normalization_method) + promcda.aggregate(aggregation_method=aggregation_method) + aggregated_scores, aggregated_scores_normalized, aggregated_stds = promcda.get_aggregated_values_with_robustness_indicators() + expected_columns = ['ws-minmax_01'] + + # Then + self.assertCountEqual(aggregated_scores.columns, expected_columns, + "Only specified methods should be applied.") + self.assertTrue( + (aggregated_scores['ws-minmax_01'] >= 0).all() and ( + aggregated_scores['ws-minmax_01'] <= 1).all(), + "Values should be in the range [0, 1] for minmax normalization with weighted sum.") + + + def test_aggregate_with_robustness_weights(self): + # Given + normalization_method = NormalizationFunctions.MINMAX + aggregation_method = AggregationFunctions.WEIGHTED_SUM + + # When + promcda = ProMCDA( + input_matrix=self.input_matrix, + polarity=self.polarity, + robustness_weights=True, + robustness_indicators=self.robustness_indicators, + marginal_distributions=self.marginal_distributions, + num_runs=self.num_runs, + num_cores=self.num_cores, + random_seed=self.random_seed + ) + promcda.normalize(normalization_method) + promcda.aggregate(aggregation_method=aggregation_method) + aggregated_scores, aggregated_scores_normalized, aggregated_stds = promcda.get_aggregated_values_with_robustness_weights() + expected_columns = ['ws-minmax_01'] + + # Then + self.assertCountEqual(aggregated_scores.columns, expected_columns, + "Only specified methods should be applied.") + self.assertTrue( + (aggregated_scores['ws-minmax_01'] >= 0).all() and ( + aggregated_scores['ws-minmax_01'] <= 1).all(), + "Values should be in the range [0, 1] for minmax normalization with weighted sum.") + + +# def tearDown(self): +# """ +# Clean up temporary directories and files after each test. +# """ +# if os.path.exists(self.output_path): +# shutil.rmtree(self.output_path) + + +if __name__ == '__main__': + unittest.main() + + diff --git a/tests/unit_tests/test_utils_for_parallelization.py b/tests/unit_tests/test_utils_for_parallelization.py index 36d1a02..144da64 100644 --- a/tests/unit_tests/test_utils_for_parallelization.py +++ b/tests/unit_tests/test_utils_for_parallelization.py @@ -3,7 +3,7 @@ from pandas.testing import assert_frame_equal -from mcda.mcda_without_robustness import * +from mcda.models.mcda_without_robustness import * from mcda.utils.utils_for_parallelization import *