Skip to content

Commit

Permalink
Use read_summary for refcase
Browse files Browse the repository at this point in the history
Ensures consistent naming of summary keys. This also fixes an issue
where history keys were not handled correctly: e.g.

the history key of "BOPR:1,1,3" was before interpreted to be
"BOPR:1,1,3H" but is now interpreted as "BOPRH:1,1,3".

Whether that always makes sense in all simulators is not confirmed, but
it "BOPR:1,1,3H" is guaranteed to not be found in the summary file. The
keys that did work correctly before, FIELD, OTHER, GROUP and WELL still
works correctly.
  • Loading branch information
eivindjahren committed Jan 17, 2024
1 parent adf59e8 commit 8a6fc5f
Show file tree
Hide file tree
Showing 16 changed files with 156 additions and 182 deletions.
1 change: 0 additions & 1 deletion src/clib/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ pybind11_add_module(
job_queue/slurm_driver.cpp
job_queue/torque_driver.cpp
job_queue/spawn.cpp
enkf/enkf_obs.cpp
enkf/row_scaling.cpp)

# -----------------------------------------------------------------
Expand Down
28 changes: 0 additions & 28 deletions src/clib/lib/enkf/enkf_obs.cpp

This file was deleted.

6 changes: 3 additions & 3 deletions src/ert/config/_read_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def _find_file_matching(
kind: str, case: str, predicate: Callable[[str, str], bool]
) -> str:
dir, base = os.path.split(case)
candidates = list(filter(lambda x: predicate(base, x), os.listdir(dir)))
candidates = list(filter(lambda x: predicate(base, x), os.listdir(dir or ".")))
if not candidates:
raise ValueError(f"Could not find any {kind} matching case path {case}")
if len(candidates) > 1:
Expand All @@ -220,7 +220,7 @@ def _get_summary_filenames(filepath: str) -> Tuple[str, str]:

def read_summary(
filepath: str, fetch_keys: Sequence[str]
) -> Tuple[List[str], Sequence[datetime], Any]:
) -> Tuple[datetime, List[str], Sequence[datetime], Any]:
summary, spec = _get_summary_filenames(filepath)
try:
date_index, start_date, date_units, keys, indices = _read_spec(spec, fetch_keys)
Expand All @@ -229,7 +229,7 @@ def read_summary(
)
except resfo.ResfoParsingError as err:
raise ValueError(f"Failed to read summary file {filepath}: {err}") from err
return (keys, time_map, fetched)
return (start_date, keys, time_map, fetched)


def _key2str(key: Union[bytes, str]) -> str:
Expand Down
122 changes: 64 additions & 58 deletions src/ert/config/ensemble_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,27 @@
import logging
import os
from collections import Counter
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Type, Union, no_type_check, overload

from resdata.summary import Summary
from fnmatch import fnmatch
from typing import (
Any,
Dict,
List,
Optional,
Sequence,
Type,
Union,
no_type_check,
overload,
)

import numpy as np
import numpy.typing as npt

from ert.field_utils import get_shape

from ._read_summary import read_summary
from .field import Field
from .gen_data_config import GenDataConfig
from .gen_kw_config import GenKwConfig
Expand Down Expand Up @@ -39,36 +52,29 @@ def _get_abs_path(file: Optional[str]) -> Optional[str]:
return file


class EnsembleConfig:
@staticmethod
def _load_refcase(refcase_file: Optional[str]) -> Optional[Summary]:
if refcase_file is None:
return None

refcase_filepath = Path(refcase_file).absolute()
refcase_file = str(refcase_filepath.parent / refcase_filepath.stem)
@dataclass(eq=False)
class Refcase:
start_date: datetime
keys: List[str]
dates: Sequence[datetime]
values: npt.NDArray[Any]

if not os.path.exists(refcase_file + ".UNSMRY"):
raise ConfigValidationError(
f"Cannot find UNSMRY file for refcase provided! {refcase_file}.UNSMRY"
)
def __eq__(self, other: object) -> bool:
if not isinstance(other, Refcase):
return False
return bool(
self.start_date == other.start_date
and self.keys == other.keys
and self.dates == other.dates
and np.all(self.values == other.values)
)

if not os.path.exists(refcase_file + ".SMSPEC"):
raise ConfigValidationError(
f"Cannot find SMSPEC file for refcase provided! {refcase_file}.SMSPEC"
)
@property
def all_dates(self) -> List[datetime]:
return [self.start_date] + list(self.dates)

# defaults for loading refcase - necessary for using the function
# exposed in python part of ecl
refcase_load_args = {
"load_case": refcase_file,
"join_string": ":",
"include_restart": True,
"lazy_load": False,
"file_options": 0,
}
return Summary(**refcase_load_args)

class EnsembleConfig:
def __init__(
self,
grid_file: Optional[str] = None,
Expand All @@ -77,7 +83,7 @@ def __init__(
surface_list: Optional[List[SurfaceConfig]] = None,
summary_config: Optional[SummaryConfig] = None,
field_list: Optional[List[Field]] = None,
refcase: Optional[Summary] = None,
refcase: Optional[Refcase] = None,
) -> None:
_genkw_list = [] if genkw_list is None else genkw_list
_gendata_list = [] if gendata_list is None else gendata_list
Expand Down Expand Up @@ -150,28 +156,35 @@ def make_field(field_list: List[str]) -> Field:
ecl_base = config_dict.get("ECLBASE")
if ecl_base is not None:
ecl_base = ecl_base.replace("%d", "<IENS>")
refcase = None
summary_keys = [item for sublist in summary_list for item in sublist]
optional_keys = []
refcase_keys = []
time_map = []
data = None
if refcase_file_path is not None:
refcase = cls._load_refcase(refcase_file_path)
time_map = set(
datetime(date.year, date.month, date.day)
for date in refcase.report_dates
)
optional_keys = []
summary_keys = [item for sublist in summary_list for item in sublist]
try:
start_date, refcase_keys, time_map, data = read_summary(
refcase_file_path, ["*"]
)
except Exception as err:
raise ConfigValidationError(f"Could not read refcase: {err}") from err
to_add = list(refcase_keys)
for key in summary_keys:
if "*" in key and refcase:
optional_keys.extend(list(refcase.keys(pattern=key)))
if "*" in key and refcase_keys:
for i, rkey in list(enumerate(to_add))[::-1]:
if fnmatch(rkey, key) and rkey != "TIME":
optional_keys.append(rkey)
del to_add[i]
else:
optional_keys.append(key)

summary_config = None
if ecl_base:
summary_config = SummaryConfig(
name="summary",
input_file=ecl_base,
keys=optional_keys,
refcase=time_map,
refcase=set(time_map),
)

return cls(
Expand All @@ -181,7 +194,9 @@ def make_field(field_list: List[str]) -> Field:
surface_list=[SurfaceConfig.from_config_list(s) for s in surface_list],
summary_config=summary_config,
field_list=[make_field(f) for f in field_list],
refcase=refcase,
refcase=Refcase(start_date, refcase_keys, time_map, data)
if data is not None
else None,
)

def _node_info(self, object_type: Type[Any]) -> str:
Expand Down Expand Up @@ -277,21 +292,12 @@ def __eq__(self, other: object) -> bool:
if not isinstance(other, EnsembleConfig):
return False

if (
self.keys != other.keys
or self._grid_file != other._grid_file
or self.parameter_configs != other.parameter_configs
or self.response_configs != other.response_configs
):
return False

if self.refcase is None:
return other.refcase is None
if other.refcase is None:
return self.refcase is None

return os.path.realpath(self.refcase.case) == os.path.realpath(
other.refcase.case
return (
self.keys == other.keys
and self._grid_file == other._grid_file
and self.parameter_configs == other.parameter_configs
and self.response_configs == other.response_configs
and self.refcase == other.refcase
)

def get_summary_keys(self) -> List[str]:
Expand Down
9 changes: 3 additions & 6 deletions src/ert/config/ert_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
Dict,
List,
Optional,
Sequence,
Tuple,
Union,
cast,
overload,
)

import xarray as xr
from resdata.util.util import CTime
from typing_extensions import Self

from ert.substitution_list import SubstitutionList
Expand Down Expand Up @@ -651,12 +651,9 @@ def preferred_num_cpu(self) -> int:

def _create_observations(self) -> EnkfObs:
obs_config_file = self.model_config.obs_config_file
obs_time_list: List[datetime] = []
obs_time_list: Sequence[datetime] = []
if self.ensemble_config.refcase is not None:
refcase = self.ensemble_config.refcase
obs_time_list = [refcase.get_start_time()] + [
CTime(t).datetime() for t in refcase.alloc_time_vector(True)
]
obs_time_list = self.ensemble_config.refcase.all_dates
elif self.model_config.time_map is not None:
obs_time_list = self.model_config.time_map
if obs_config_file:
Expand Down
45 changes: 16 additions & 29 deletions src/ert/config/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@

import numpy as np
import xarray as xr
from resdata.summary import SummaryVarType

from ert._clib.enkf_obs import read_from_refcase
from ert.validation import rangestring_to_list

from .enkf_observation_implementation_type import EnkfObservationImplementationType
Expand All @@ -32,6 +30,11 @@
DEFAULT_TIME_DELTA = timedelta(seconds=30)


def history_key(key: str) -> str:
keyword, *rest = key.split(":")
return ":".join([keyword + "H"] + rest)


class EnkfObs:
def __init__(self, obs_vectors: Dict[str, ObsVector], obs_time: List[datetime]):
self.obs_vectors = obs_vectors
Expand Down Expand Up @@ -111,24 +114,14 @@ def _handle_history_observation(
error_mode = history_observation["ERROR_MODE"]

if history_type == HistorySource.REFCASE_HISTORY:
var_type = refcase.var_type(summary_key)
local_key = None
if var_type in [
SummaryVarType.RD_SMSPEC_WELL_VAR,
SummaryVarType.RD_SMSPEC_GROUP_VAR,
]:
summary_node = refcase.smspec_node(summary_key)
local_key = summary_node.keyword + "H:" + summary_node.wgname
elif var_type == SummaryVarType.RD_SMSPEC_FIELD_VAR:
summary_node = refcase.smspec_node(summary_key)
local_key = summary_node.keyword + "H"
local_key = history_key(summary_key)
else:
local_key = summary_key
if local_key is None:
return {}
if local_key not in refcase:
if local_key not in refcase.keys:
return {}
valid, values = read_from_refcase(refcase, local_key)
values = refcase.values[refcase.keys.index(local_key)]
std_dev = cls._handle_error_mode(values, error, error_min, error_mode)
for segment_name, segment_instance in history_observation["SEGMENT"]:
start = segment_instance["START"]
Expand Down Expand Up @@ -169,21 +162,15 @@ def _handle_history_observation(
segment_instance["ERROR_MODE"],
)
data: Dict[Union[int, datetime], Union[GenObservation, SummaryObservation]] = {}
dates = [
datetime(date.year, date.month, date.day) for date in refcase.report_dates
]
for i, (good, error, value) in enumerate(zip(valid, std_dev, values)):
if good:
if error <= std_cutoff:
ConfigWarning.ert_context_warn(
"Too small observation error in observation"
f" {summary_key}:{i} - ignored",
summary_key,
)
continue
data[dates[i - 1]] = SummaryObservation(
summary_key, summary_key, value, error
for i, (date, error, value) in enumerate(zip(refcase.dates, std_dev, values)):
if error <= std_cutoff:
ConfigWarning.ert_context_warn(
"Too small observation error in observation"
f" {summary_key}:{i} - ignored",
summary_key,
)
continue
data[date] = SummaryObservation(summary_key, summary_key, value, error)

return {
summary_key: ObsVector(
Expand Down
2 changes: 1 addition & 1 deletion src/ert/config/summary_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __post_init__(self) -> None:

def read_from_file(self, run_path: str, iens: int) -> xr.Dataset:
filename = self.input_file.replace("<IENS>", str(iens))
keys, time_map, data = read_summary(f"{run_path}/{filename}", self.keys)
_, keys, time_map, data = read_summary(f"{run_path}/{filename}", self.keys)

if self.refcase:
assert isinstance(self.refcase, set)
Expand Down
5 changes: 2 additions & 3 deletions tests/performance_tests/test_memory_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,10 @@ def fill_storage_with_data(poly_template: Path, ert_config: ErtConfig) -> None:
real,
)
else:
obs_time_list = ens_config.refcase.all_dates
source.save_response(
data_key,
make_summary_data(
summary_obs_keys, ens_config.refcase.numpy_dates
),
make_summary_data(summary_obs_keys, obs_time_list),
real,
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Realization,FOPR,WOPR_OP1_108,WOPR_OP1_144,WOPR_OP1_190,WOPR_OP1_36,WOPR_OP1_72,WOPR_OP1_9,WPR_DIFF_1
0,1572.4551516177269,4.663157777414048,1.2280040962512109,24.150873738474047,0.16579549917171352,16.6031985311065,0.5786173169058768,17.52338148044444
1,564.7325457301008,4.368782497900839,32.6530612244898,2.25,7.513238617348759,7.502955389862009,4.0,3.917211792502779
2,760.2134818146507,0.6758601761400266,0.0495481141274234,0.878978328860087,0.53148428819629,10.315068719501141,0.5691876200100965,21.326953031224996
3,762.2886413785844,0.057372834892021204,2.003570229564708,89.39209245855437,1.0729962591656552,0.23633929814081966,1.963529806065796,4.454344394944445
4,978.6854544674984,0.6099979595035421,11.1651322515757,2.3617365751122437,0.5138762294603726,41.03398708587926,0.04177266072298375,27.46179846677778
0,1572.455126624968,4.663157777414048,1.2280040962512109,24.150873738474047,0.16579549917171352,16.6031985311065,0.5786173169058768,17.52338148044444
1,564.7325372587165,4.368782497900839,32.6530612244898,2.25,7.513238617348759,7.502955389862009,4.0,3.917211792502779
2,760.2134646694443,0.6758601761400266,0.0495481141274234,0.878978328860087,0.53148428819629,10.315068719501141,0.5691876200100965,21.326953031224996
3,762.2886383355541,0.057372834892021204,2.003570229564708,89.39209245855437,1.0729962591656552,0.23633929814081966,1.963529806065796,4.454344394944445
4,978.68543806519,0.6099979595035421,11.1651322515757,2.3617365751122437,0.5138762294603726,41.03398708587926,0.04177266072298375,27.46179846677778
Loading

0 comments on commit 8a6fc5f

Please sign in to comment.