Skip to content

Commit

Permalink
Allow for support for multiple occurences of design matrices
Browse files Browse the repository at this point in the history
- Add merging strategy for multiple design matrices in the config
- Add ui tests for multi dm occurence
  • Loading branch information
xjules committed Jan 14, 2025
1 parent 60fc8f8 commit 2491185
Show file tree
Hide file tree
Showing 6 changed files with 262 additions and 42 deletions.
15 changes: 11 additions & 4 deletions src/ert/config/analysis_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig:

min_realization = min(min_realization, num_realization)

design_matrix_config_list = config_dict.get(ConfigKeys.DESIGN_MATRIX, None)
design_matrix_config_lists = config_dict.get(ConfigKeys.DESIGN_MATRIX, [])

options: dict[str, dict[str, Any]] = {"STD_ENKF": {}, "IES_ENKF": {}}
observation_settings: dict[str, Any] = {
Expand Down Expand Up @@ -186,15 +186,22 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig:
if all_errors:
raise ConfigValidationError.from_collected(all_errors)

design_matrices = [
DesignMatrix.from_config_list(design_matrix_config_list)
for design_matrix_config_list in design_matrix_config_lists
]
design_matrix: DesignMatrix | None = None
if design_matrices:
design_matrix = design_matrices[0]
for dm_other in design_matrices[1:]:
design_matrix.merge_with_other(dm_other)
config = cls(
minimum_required_realizations=min_realization,
update_log_path=config_dict.get(ConfigKeys.UPDATE_LOG_PATH, "update_log"),
observation_settings=obs_settings,
es_module=es_settings,
ies_module=ies_settings,
design_matrix=DesignMatrix.from_config_list(design_matrix_config_list)
if design_matrix_config_list is not None
else None,
design_matrix=design_matrix,
)
return config

Expand Down
32 changes: 32 additions & 0 deletions src/ert/config/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,38 @@ def from_config_list(cls, config_list: list[str]) -> DesignMatrix:
default_sheet=default_sheet,
)

def merge_with_other(self, dm_other: DesignMatrix) -> None:
errors = []
if self.active_realizations != dm_other.active_realizations:
errors.append(
ErrorInfo("Design Matrices don't have the same active realizations!")
)

common_keys = set(self.design_matrix_df.columns) & set(
dm_other.design_matrix_df.columns
)
if common_keys:
errors.append(
ErrorInfo(f"Design Matrices do not have unique keys {common_keys}!")
)

try:
self.design_matrix_df = pd.concat(
[self.design_matrix_df, dm_other.design_matrix_df], axis=1
)
except ValueError as exc:
errors.append(ErrorInfo(f"Error when merging design matrices {exc}!"))

pc_other = dm_other.parameter_configuration[DESIGN_MATRIX_GROUP]
pc_self = self.parameter_configuration[DESIGN_MATRIX_GROUP]
assert isinstance(pc_other, GenKwConfig)
assert isinstance(pc_self, GenKwConfig)
for tfd in pc_other.transform_function_definitions:
pc_self.transform_function_definitions.append(tfd)

if errors:
raise ConfigValidationError.from_collected(errors)

def merge_with_existing_parameters(
self, existing_parameters: list[ParameterConfig]
) -> tuple[list[ParameterConfig], ParameterConfig | None]:
Expand Down
2 changes: 1 addition & 1 deletion src/ert/config/parsing/config_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def design_matrix_keyword() -> SchemaItem:
SchemaItemType.STRING,
SchemaItemType.STRING,
],
multi_occurrence=False,
multi_occurrence=True,
)


Expand Down
142 changes: 117 additions & 25 deletions tests/ert/ui_tests/cli/analysis/test_design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,30 @@
from tests.ert.ui_tests.cli.run_cli import run_cli


def _create_design_matrix(filename, design_sheet_df, default_sheet_df=None):
with pd.ExcelWriter(filename) as xl_write:
design_sheet_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
if default_sheet_df is not None:
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultSheet", header=False
)


@pytest.mark.usefixtures("copy_poly_case")
def test_run_poly_example_with_design_matrix():
design_matrix = "poly_design.xlsx"
num_realizations = 10
a_values = list(range(num_realizations))
design_matrix_df = pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
"category": 5 * ["cat1"] + 5 * ["cat2"],
}
_create_design_matrix(
"poly_design.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
"category": 5 * ["cat1"] + 5 * ["cat2"],
}
),
pd.DataFrame([["b", 1], ["c", 2]]),
)
default_sheet_df = pd.DataFrame([["b", 1], ["c", 2]])
with pd.ExcelWriter(design_matrix) as xl_write:
design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultSheet", header=False
)

with open("poly.ert", "w", encoding="utf-8") as fout:
fout.write(
Expand Down Expand Up @@ -105,21 +111,18 @@ def _evaluate(coeffs, x):
],
)
def test_run_poly_example_with_design_matrix_and_genkw_merge(default_values, error_msg):
design_matrix = "poly_design.xlsx"
num_realizations = 10
a_values = list(range(num_realizations))
design_matrix_df = pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
}
_create_design_matrix(
"poly_design.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
}
),
pd.DataFrame(default_values),
)
default_sheet_df = pd.DataFrame(default_values)
with pd.ExcelWriter(design_matrix) as xl_write:
design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultSheet", header=False
)

with open("poly.ert", "w", encoding="utf-8") as fout:
fout.write(
Expand Down Expand Up @@ -191,3 +194,92 @@ def _evaluate(coeffs, x):
np.testing.assert_array_equal(params[:, 0], a_values)
np.testing.assert_array_equal(params[:, 1], 10 * [1])
np.testing.assert_array_equal(params[:, 2], 10 * [2])


@pytest.mark.usefixtures("copy_poly_case")
def test_run_poly_example_with_multiple_design_matrix_instances():
num_realizations = 10
a_values = list(range(num_realizations))
_create_design_matrix(
"poly_design_1.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
}
),
pd.DataFrame([["b", 1], ["c", 2]]),
)
_create_design_matrix(
"poly_design_2.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"d": num_realizations * [3],
}
),
pd.DataFrame([["g", 4]]),
)

with open("poly.ert", "w", encoding="utf-8") as fout:
fout.write(
dedent(
"""\
QUEUE_OPTION LOCAL MAX_RUNNING 10
RUNPATH poly_out/realization-<IENS>/iter-<ITER>
NUM_REALIZATIONS 10
MIN_REALIZATIONS 1
GEN_DATA POLY_RES RESULT_FILE:poly.out
DESIGN_MATRIX poly_design_1.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet
DESIGN_MATRIX poly_design_2.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet
INSTALL_JOB poly_eval POLY_EVAL
FORWARD_MODEL poly_eval
"""
)
)

with open("poly_eval.py", "w", encoding="utf-8") as f:
f.write(
dedent(
"""\
#!/usr/bin/env python
import json
def _load_coeffs(filename):
with open(filename, encoding="utf-8") as f:
return json.load(f)["DESIGN_MATRIX"]
def _evaluate(coeffs, x):
return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"]
if __name__ == "__main__":
coeffs = _load_coeffs("parameters.json")
output = [_evaluate(coeffs, x) for x in range(10)]
with open("poly.out", "w", encoding="utf-8") as f:
f.write("\\n".join(map(str, output)))
"""
)
)
os.chmod(
"poly_eval.py",
os.stat("poly_eval.py").st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH,
)

run_cli(
ENSEMBLE_EXPERIMENT_MODE,
"--disable-monitor",
"poly.ert",
"--experiment-name",
"test-experiment",
)
storage_path = ErtConfig.from_file("poly.ert").ens_path
with open_storage(storage_path) as storage:
experiment = storage.get_experiment_by_name("test-experiment")
params = experiment.get_ensemble_by_name("default").load_parameters(
"DESIGN_MATRIX"
)["values"]
np.testing.assert_array_equal(params[:, 0], a_values)
np.testing.assert_array_equal(params[:, 1], 10 * [1])
np.testing.assert_array_equal(params[:, 2], 10 * [2])
np.testing.assert_array_equal(params[:, 3], 10 * [3])
np.testing.assert_array_equal(params[:, 4], 10 * [4])
32 changes: 20 additions & 12 deletions tests/ert/unit_tests/config/test_analysis_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ def test_analysis_config_from_file_is_same_as_from_dict(monkeypatch, tmp_path):
("STD_ENKF", "ENKF_TRUNCATION", 0.8),
],
ConfigKeys.DESIGN_MATRIX: [
"my_design_matrix.xlsx",
"DESIGN_SHEET:my_sheet",
"DEFAULT_SHEET:my_default_sheet",
[
"my_design_matrix.xlsx",
"DESIGN_SHEET:my_sheet",
"DEFAULT_SHEET:my_default_sheet",
]
],
}
)
Expand Down Expand Up @@ -110,9 +112,11 @@ def test_invalid_design_matrix_format_raises_validation_error():
{
ConfigKeys.NUM_REALIZATIONS: 1,
ConfigKeys.DESIGN_MATRIX: [
"my_matrix.txt",
"DESIGN_SHEET:sheet1",
"DEFAULT_SHEET:sheet2",
[
"my_matrix.txt",
"DESIGN_SHEET:sheet1",
"DEFAULT_SHEET:sheet2",
],
],
}
)
Expand All @@ -123,9 +127,11 @@ def test_design_matrix_without_design_sheet_raises_validation_error():
AnalysisConfig.from_dict(
{
ConfigKeys.DESIGN_MATRIX: [
"my_matrix.xlsx",
"DESIGN_:design",
"DEFAULT_SHEET:default",
[
"my_matrix.xlsx",
"DESIGN_:design",
"DEFAULT_SHEET:default",
]
],
}
)
Expand All @@ -136,9 +142,11 @@ def test_design_matrix_without_default_sheet_raises_validation_error():
AnalysisConfig.from_dict(
{
ConfigKeys.DESIGN_MATRIX: [
"my_matrix.xlsx",
"DESIGN_SHEET:design",
"DEFAULT_:default",
[
"my_matrix.xlsx",
"DESIGN_SHEET:design",
"DEFAULT_:default",
]
],
}
)
Expand Down
81 changes: 81 additions & 0 deletions tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,87 @@
from ert.config.gen_kw_config import GenKwConfig, TransformFunctionDefinition


def _create_design_matrix(xls_path, design_matrix_df, default_sheet_df) -> DesignMatrix:
with pd.ExcelWriter(xls_path) as xl_write:
design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultValues", header=False
)
return DesignMatrix(xls_path, "DesignSheet01", "DefaultValues")


@pytest.mark.parametrize(
"design_sheet_pd, default_sheet_pd, error_msg",
[
pytest.param(
pd.DataFrame(
{
"REAL": [0, 1, 2],
"c": [1, 2, 3],
"d": [0, 2, 0],
}
),
pd.DataFrame([["e", 1]]),
"",
id="ok_merge",
),
pytest.param(
pd.DataFrame(
{
"REAL": [0, 1, 2],
"a": [1, 2, 3],
}
),
pd.DataFrame([["e", 1]]),
"Design Matrices do not have unique keys",
id="not_unique_keys",
),
pytest.param(
pd.DataFrame(
{
"REAL": [0, 1],
"d": [1, 2],
}
),
pd.DataFrame([["e", 1]]),
"Design Matrices don't have the same active realizations!",
id="not_same_acitve_realizations",
),
],
)
def test_merge_multiple_occurrences(
tmp_path, design_sheet_pd, default_sheet_pd, error_msg
):
design_matrix_1 = _create_design_matrix(
tmp_path / "design_matrix_1.xlsx",
pd.DataFrame(
{
"REAL": [0, 1, 2],
"a": [1, 2, 3],
"b": [0, 2, 0],
},
),
pd.DataFrame([["a", 1], ["b", 4]]),
)

design_matrix_2 = _create_design_matrix(
tmp_path / "design_matrix_2.xlsx", design_sheet_pd, default_sheet_pd
)
if error_msg:
with pytest.raises(ValueError, match=error_msg):
design_matrix_1.merge_with_other(design_matrix_2)
else:
design_matrix_1.merge_with_other(design_matrix_2)
design_params = design_matrix_1.parameter_configuration.get("DESIGN_MATRIX", [])
assert all(param in design_params for param in ("a", "b", "c", "d"))
assert design_matrix_1.active_realizations == [True, True, True]
df = design_matrix_1.design_matrix_df
np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "a"], np.array([1, 2, 3]))
np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "b"], np.array([0, 2, 0]))
np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "c"], np.array([1, 2, 3]))
np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "d"], np.array([0, 2, 0]))


@pytest.mark.parametrize(
"parameters, error_msg",
[
Expand Down

0 comments on commit 2491185

Please sign in to comment.