Allow for support for multiple occurences of design matrices

- Add merging strategy for multiple design matrices in the config - Add ui tests for multi dm occurence
equinor · Jan 14, 2025 · 2491185 · 2491185
1 parent 60fc8f8
commit 2491185
Show file tree

Hide file tree

Showing 6 changed files with 262 additions and 42 deletions.
diff --git a/src/ert/config/analysis_config.py b/src/ert/config/analysis_config.py
@@ -80,7 +80,7 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig:
 
         min_realization = min(min_realization, num_realization)
 
-        design_matrix_config_list = config_dict.get(ConfigKeys.DESIGN_MATRIX, None)
+        design_matrix_config_lists = config_dict.get(ConfigKeys.DESIGN_MATRIX, [])
 
         options: dict[str, dict[str, Any]] = {"STD_ENKF": {}, "IES_ENKF": {}}
         observation_settings: dict[str, Any] = {
@@ -186,15 +186,22 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig:
         if all_errors:
             raise ConfigValidationError.from_collected(all_errors)
 
+        design_matrices = [
+            DesignMatrix.from_config_list(design_matrix_config_list)
+            for design_matrix_config_list in design_matrix_config_lists
+        ]
+        design_matrix: DesignMatrix | None = None
+        if design_matrices:
+            design_matrix = design_matrices[0]
+            for dm_other in design_matrices[1:]:
+                design_matrix.merge_with_other(dm_other)
         config = cls(
             minimum_required_realizations=min_realization,
             update_log_path=config_dict.get(ConfigKeys.UPDATE_LOG_PATH, "update_log"),
             observation_settings=obs_settings,
             es_module=es_settings,
             ies_module=ies_settings,
-            design_matrix=DesignMatrix.from_config_list(design_matrix_config_list)
-            if design_matrix_config_list is not None
-            else None,
+            design_matrix=design_matrix,
         )
         return config
 

diff --git a/src/ert/config/design_matrix.py b/src/ert/config/design_matrix.py
@@ -78,6 +78,38 @@ def from_config_list(cls, config_list: list[str]) -> DesignMatrix:
             default_sheet=default_sheet,
         )
 
+    def merge_with_other(self, dm_other: DesignMatrix) -> None:
+        errors = []
+        if self.active_realizations != dm_other.active_realizations:
+            errors.append(
+                ErrorInfo("Design Matrices don't have the same active realizations!")
+            )
+
+        common_keys = set(self.design_matrix_df.columns) & set(
+            dm_other.design_matrix_df.columns
+        )
+        if common_keys:
+            errors.append(
+                ErrorInfo(f"Design Matrices do not have unique keys {common_keys}!")
+            )
+
+        try:
+            self.design_matrix_df = pd.concat(
+                [self.design_matrix_df, dm_other.design_matrix_df], axis=1
+            )
+        except ValueError as exc:
+            errors.append(ErrorInfo(f"Error when merging design matrices {exc}!"))
+
+        pc_other = dm_other.parameter_configuration[DESIGN_MATRIX_GROUP]
+        pc_self = self.parameter_configuration[DESIGN_MATRIX_GROUP]
+        assert isinstance(pc_other, GenKwConfig)
+        assert isinstance(pc_self, GenKwConfig)
+        for tfd in pc_other.transform_function_definitions:
+            pc_self.transform_function_definitions.append(tfd)
+
+        if errors:
+            raise ConfigValidationError.from_collected(errors)
+
     def merge_with_existing_parameters(
         self, existing_parameters: list[ParameterConfig]
     ) -> tuple[list[ParameterConfig], ParameterConfig | None]:

diff --git a/src/ert/config/parsing/config_schema.py b/src/ert/config/parsing/config_schema.py
@@ -264,7 +264,7 @@ def design_matrix_keyword() -> SchemaItem:
             SchemaItemType.STRING,
             SchemaItemType.STRING,
         ],
-        multi_occurrence=False,
+        multi_occurrence=True,
     )
 
 

diff --git a/tests/ert/ui_tests/cli/analysis/test_design_matrix.py b/tests/ert/ui_tests/cli/analysis/test_design_matrix.py
@@ -13,24 +13,30 @@
 from tests.ert.ui_tests.cli.run_cli import run_cli
 
 
+def _create_design_matrix(filename, design_sheet_df, default_sheet_df=None):
+    with pd.ExcelWriter(filename) as xl_write:
+        design_sheet_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
+        if default_sheet_df is not None:
+            default_sheet_df.to_excel(
+                xl_write, index=False, sheet_name="DefaultSheet", header=False
+            )
+
+
 @pytest.mark.usefixtures("copy_poly_case")
 def test_run_poly_example_with_design_matrix():
-    design_matrix = "poly_design.xlsx"
     num_realizations = 10
     a_values = list(range(num_realizations))
-    design_matrix_df = pd.DataFrame(
-        {
-            "REAL": list(range(num_realizations)),
-            "a": a_values,
-            "category": 5 * ["cat1"] + 5 * ["cat2"],
-        }
+    _create_design_matrix(
+        "poly_design.xlsx",
+        pd.DataFrame(
+            {
+                "REAL": list(range(num_realizations)),
+                "a": a_values,
+                "category": 5 * ["cat1"] + 5 * ["cat2"],
+            }
+        ),
+        pd.DataFrame([["b", 1], ["c", 2]]),
     )
-    default_sheet_df = pd.DataFrame([["b", 1], ["c", 2]])
-    with pd.ExcelWriter(design_matrix) as xl_write:
-        design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
-        default_sheet_df.to_excel(
-            xl_write, index=False, sheet_name="DefaultSheet", header=False
-        )
 
     with open("poly.ert", "w", encoding="utf-8") as fout:
         fout.write(
@@ -105,21 +111,18 @@ def _evaluate(coeffs, x):
     ],
 )
 def test_run_poly_example_with_design_matrix_and_genkw_merge(default_values, error_msg):
-    design_matrix = "poly_design.xlsx"
     num_realizations = 10
     a_values = list(range(num_realizations))
-    design_matrix_df = pd.DataFrame(
-        {
-            "REAL": list(range(num_realizations)),
-            "a": a_values,
-        }
+    _create_design_matrix(
+        "poly_design.xlsx",
+        pd.DataFrame(
+            {
+                "REAL": list(range(num_realizations)),
+                "a": a_values,
+            }
+        ),
+        pd.DataFrame(default_values),
     )
-    default_sheet_df = pd.DataFrame(default_values)
-    with pd.ExcelWriter(design_matrix) as xl_write:
-        design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
-        default_sheet_df.to_excel(
-            xl_write, index=False, sheet_name="DefaultSheet", header=False
-        )
 
     with open("poly.ert", "w", encoding="utf-8") as fout:
         fout.write(
@@ -191,3 +194,92 @@ def _evaluate(coeffs, x):
         np.testing.assert_array_equal(params[:, 0], a_values)
         np.testing.assert_array_equal(params[:, 1], 10 * [1])
         np.testing.assert_array_equal(params[:, 2], 10 * [2])
+
+
+@pytest.mark.usefixtures("copy_poly_case")
+def test_run_poly_example_with_multiple_design_matrix_instances():
+    num_realizations = 10
+    a_values = list(range(num_realizations))
+    _create_design_matrix(
+        "poly_design_1.xlsx",
+        pd.DataFrame(
+            {
+                "REAL": list(range(num_realizations)),
+                "a": a_values,
+            }
+        ),
+        pd.DataFrame([["b", 1], ["c", 2]]),
+    )
+    _create_design_matrix(
+        "poly_design_2.xlsx",
+        pd.DataFrame(
+            {
+                "REAL": list(range(num_realizations)),
+                "d": num_realizations * [3],
+            }
+        ),
+        pd.DataFrame([["g", 4]]),
+    )
+
+    with open("poly.ert", "w", encoding="utf-8") as fout:
+        fout.write(
+            dedent(
+                """\
+                QUEUE_OPTION LOCAL MAX_RUNNING 10
+                RUNPATH poly_out/realization-<IENS>/iter-<ITER>
+                NUM_REALIZATIONS 10
+                MIN_REALIZATIONS 1
+                GEN_DATA POLY_RES RESULT_FILE:poly.out
+                DESIGN_MATRIX poly_design_1.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet
+                DESIGN_MATRIX poly_design_2.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet
+                INSTALL_JOB poly_eval POLY_EVAL
+                FORWARD_MODEL poly_eval
+                """
+            )
+        )
+
+    with open("poly_eval.py", "w", encoding="utf-8") as f:
+        f.write(
+            dedent(
+                """\
+                #!/usr/bin/env python
+                import json
+
+                def _load_coeffs(filename):
+                    with open(filename, encoding="utf-8") as f:
+                        return json.load(f)["DESIGN_MATRIX"]
+
+                def _evaluate(coeffs, x):
+                    return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"]
+
+                if __name__ == "__main__":
+                    coeffs = _load_coeffs("parameters.json")
+                    output = [_evaluate(coeffs, x) for x in range(10)]
+                    with open("poly.out", "w", encoding="utf-8") as f:
+                        f.write("\\n".join(map(str, output)))
+                """
+            )
+        )
+    os.chmod(
+        "poly_eval.py",
+        os.stat("poly_eval.py").st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH,
+    )
+
+    run_cli(
+        ENSEMBLE_EXPERIMENT_MODE,
+        "--disable-monitor",
+        "poly.ert",
+        "--experiment-name",
+        "test-experiment",
+    )
+    storage_path = ErtConfig.from_file("poly.ert").ens_path
+    with open_storage(storage_path) as storage:
+        experiment = storage.get_experiment_by_name("test-experiment")
+        params = experiment.get_ensemble_by_name("default").load_parameters(
+            "DESIGN_MATRIX"
+        )["values"]
+        np.testing.assert_array_equal(params[:, 0], a_values)
+        np.testing.assert_array_equal(params[:, 1], 10 * [1])
+        np.testing.assert_array_equal(params[:, 2], 10 * [2])
+        np.testing.assert_array_equal(params[:, 3], 10 * [3])
+        np.testing.assert_array_equal(params[:, 4], 10 * [4])
diff --git a/tests/ert/unit_tests/config/test_analysis_config.py b/tests/ert/unit_tests/config/test_analysis_config.py
@@ -47,9 +47,11 @@ def test_analysis_config_from_file_is_same_as_from_dict(monkeypatch, tmp_path):
                 ("STD_ENKF", "ENKF_TRUNCATION", 0.8),
             ],
             ConfigKeys.DESIGN_MATRIX: [
-                "my_design_matrix.xlsx",
-                "DESIGN_SHEET:my_sheet",
-                "DEFAULT_SHEET:my_default_sheet",
+                [
+                    "my_design_matrix.xlsx",
+                    "DESIGN_SHEET:my_sheet",
+                    "DEFAULT_SHEET:my_default_sheet",
+                ]
             ],
         }
     )
@@ -110,9 +112,11 @@ def test_invalid_design_matrix_format_raises_validation_error():
             {
                 ConfigKeys.NUM_REALIZATIONS: 1,
                 ConfigKeys.DESIGN_MATRIX: [
-                    "my_matrix.txt",
-                    "DESIGN_SHEET:sheet1",
-                    "DEFAULT_SHEET:sheet2",
+                    [
+                        "my_matrix.txt",
+                        "DESIGN_SHEET:sheet1",
+                        "DEFAULT_SHEET:sheet2",
+                    ],
                 ],
             }
         )
@@ -123,9 +127,11 @@ def test_design_matrix_without_design_sheet_raises_validation_error():
         AnalysisConfig.from_dict(
             {
                 ConfigKeys.DESIGN_MATRIX: [
-                    "my_matrix.xlsx",
-                    "DESIGN_:design",
-                    "DEFAULT_SHEET:default",
+                    [
+                        "my_matrix.xlsx",
+                        "DESIGN_:design",
+                        "DEFAULT_SHEET:default",
+                    ]
                 ],
             }
         )
@@ -136,9 +142,11 @@ def test_design_matrix_without_default_sheet_raises_validation_error():
         AnalysisConfig.from_dict(
             {
                 ConfigKeys.DESIGN_MATRIX: [
-                    "my_matrix.xlsx",
-                    "DESIGN_SHEET:design",
-                    "DEFAULT_:default",
+                    [
+                        "my_matrix.xlsx",
+                        "DESIGN_SHEET:design",
+                        "DEFAULT_:default",
+                    ]
                 ],
             }
         )

diff --git a/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py b/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py
@@ -6,6 +6,87 @@
 from ert.config.gen_kw_config import GenKwConfig, TransformFunctionDefinition
 
 
+def _create_design_matrix(xls_path, design_matrix_df, default_sheet_df) -> DesignMatrix:
+    with pd.ExcelWriter(xls_path) as xl_write:
+        design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
+        default_sheet_df.to_excel(
+            xl_write, index=False, sheet_name="DefaultValues", header=False
+        )
+    return DesignMatrix(xls_path, "DesignSheet01", "DefaultValues")
+
+
+@pytest.mark.parametrize(
+    "design_sheet_pd, default_sheet_pd, error_msg",
+    [
+        pytest.param(
+            pd.DataFrame(
+                {
+                    "REAL": [0, 1, 2],
+                    "c": [1, 2, 3],
+                    "d": [0, 2, 0],
+                }
+            ),
+            pd.DataFrame([["e", 1]]),
+            "",
+            id="ok_merge",
+        ),
+        pytest.param(
+            pd.DataFrame(
+                {
+                    "REAL": [0, 1, 2],
+                    "a": [1, 2, 3],
+                }
+            ),
+            pd.DataFrame([["e", 1]]),
+            "Design Matrices do not have unique keys",
+            id="not_unique_keys",
+        ),
+        pytest.param(
+            pd.DataFrame(
+                {
+                    "REAL": [0, 1],
+                    "d": [1, 2],
+                }
+            ),
+            pd.DataFrame([["e", 1]]),
+            "Design Matrices don't have the same active realizations!",
+            id="not_same_acitve_realizations",
+        ),
+    ],
+)
+def test_merge_multiple_occurrences(
+    tmp_path, design_sheet_pd, default_sheet_pd, error_msg
+):
+    design_matrix_1 = _create_design_matrix(
+        tmp_path / "design_matrix_1.xlsx",
+        pd.DataFrame(
+            {
+                "REAL": [0, 1, 2],
+                "a": [1, 2, 3],
+                "b": [0, 2, 0],
+            },
+        ),
+        pd.DataFrame([["a", 1], ["b", 4]]),
+    )
+
+    design_matrix_2 = _create_design_matrix(
+        tmp_path / "design_matrix_2.xlsx", design_sheet_pd, default_sheet_pd
+    )
+    if error_msg:
+        with pytest.raises(ValueError, match=error_msg):
+            design_matrix_1.merge_with_other(design_matrix_2)
+    else:
+        design_matrix_1.merge_with_other(design_matrix_2)
+        design_params = design_matrix_1.parameter_configuration.get("DESIGN_MATRIX", [])
+        assert all(param in design_params for param in ("a", "b", "c", "d"))
+        assert design_matrix_1.active_realizations == [True, True, True]
+        df = design_matrix_1.design_matrix_df
+        np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "a"], np.array([1, 2, 3]))
+        np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "b"], np.array([0, 2, 0]))
+        np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "c"], np.array([1, 2, 3]))
+        np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "d"], np.array([0, 2, 0]))
+
+
 @pytest.mark.parametrize(
     "parameters, error_msg",
     [