diff --git a/src/clib/lib/CMakeLists.txt b/src/clib/lib/CMakeLists.txt index 749202c654f..919600d03bc 100644 --- a/src/clib/lib/CMakeLists.txt +++ b/src/clib/lib/CMakeLists.txt @@ -13,7 +13,6 @@ pybind11_add_module( job_queue/torque_driver.cpp job_queue/spawn.cpp enkf/enkf_obs.cpp - enkf/read_summary.cpp enkf/row_scaling.cpp) # ----------------------------------------------------------------- diff --git a/src/clib/lib/enkf/read_summary.cpp b/src/clib/lib/enkf/read_summary.cpp deleted file mode 100644 index 877dee3948a..00000000000 --- a/src/clib/lib/enkf/read_summary.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include // must be included after pybind11.h - -static bool matches(const std::vector &patterns, const char *key) { - return std::any_of(patterns.cbegin(), patterns.cend(), - [key](const std::string &pattern) { - return fnmatch(pattern.c_str(), key, 0) == 0; - }); -} - -ERT_CLIB_SUBMODULE("_read_summary", m) { - m.def("read_dates", [](Cwrap summary) { - if (!PyDateTimeAPI) - PyDateTime_IMPORT; - - time_t_vector_type *tvec = rd_sum_alloc_time_vector(summary, true); - int size = time_t_vector_size(tvec); - pybind11::list result(size); - auto t = tm{}; - for (int i = 0; i < size; i++) { - auto timestamp = time_t_vector_iget(tvec, i); - auto success = ::gmtime_r(×tamp, &t); - if (success == nullptr) - throw std::runtime_error("Unable to parse unix timestamp: " + - std::to_string(timestamp)); - - if (!PyDateTimeAPI) // this is here to silence the linters. it will always be set. - throw std::runtime_error("Python DateTime API not loaded"); - - auto py_time = PyDateTime_FromDateAndTime( - t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, - t.tm_sec, 0); - if (py_time == nullptr) - throw std::runtime_error("Unable to create DateTime object " - "from unix timestamp: " + - std::to_string(timestamp)); - - result[i] = pybind11::reinterpret_steal(py_time); - } - - time_t_vector_free(tvec); - return result; - }); - m.def("read_summary", [](Cwrap summary, - std::vector keys) { - const rd_smspec_type *smspec = rd_sum_get_smspec(summary); - std::vector>> - summary_vectors{}; - std::vector seen_keys{}; - for (int i = 0; i < rd_smspec_num_nodes(smspec); i++) { - const rd::smspec_node &smspec_node = - rd_smspec_iget_node_w_node_index(smspec, i); - const char *key = smspec_node.get_gen_key1(); - if ((matches(keys, key)) && - !(std::find(seen_keys.begin(), seen_keys.end(), key) != - seen_keys.end())) { - seen_keys.push_back(key); - int start = rd_sum_get_first_report_step(summary); - int end = rd_sum_get_last_report_step(summary); - std::vector data{}; - int key_index = - rd_sum_get_general_var_params_index(summary, key); - for (int tstep = start; tstep <= end; tstep++) { - if (rd_sum_has_report_step(summary, tstep)) { - int time_index = rd_sum_iget_report_end(summary, tstep); - data.push_back( - rd_sum_iget(summary, time_index, key_index)); - } - } - summary_vectors.emplace_back(key, data); - } - } - return summary_vectors; - }); -} diff --git a/src/ert/config/_read_summary.py b/src/ert/config/_read_summary.py new file mode 100644 index 00000000000..6e2bc500be4 --- /dev/null +++ b/src/ert/config/_read_summary.py @@ -0,0 +1,446 @@ +from __future__ import annotations + +import os +import os.path +import re +from datetime import datetime, timedelta +from enum import Enum, auto +from fnmatch import fnmatch +from typing import TYPE_CHECKING + +import numpy as np +import resfo +from pydantic import PositiveInt + +if TYPE_CHECKING: + from typing import Any, Dict, List, Optional, Sequence, Tuple, Union + + import numpy.typing as npt + +SPECIAL_KEYWORDS = [ + "NEWTON", + "NAIMFRAC", + "NLINEARS", + "NLINSMIN", + "NLINSMAX", + "ELAPSED", + "MAXDPR", + "MAXDSO", + "MAXDSG", + "MAXDSW", + "STEPTYPE", + "WNEWTON", +] + + +class _SummaryType(Enum): + AQUIFER = auto() + BLOCK = auto() + COMPLETION = auto() + FIELD = auto() + GROUP = auto() + LOCAL_BLOCK = auto() + LOCAL_COMPLETION = auto() + LOCAL_WELL = auto() + NETWORK = auto() + SEGMENT = auto() + WELL = auto() + REGION = auto() + INTER_REGION = auto() + OTHER = auto() + + @classmethod + def from_keyword(cls, summary_keyword: str) -> _SummaryType: + KEYWORD_TYPE_MAPPING = { + "A": cls.AQUIFER, + "B": cls.BLOCK, + "C": cls.COMPLETION, + "F": cls.FIELD, + "G": cls.GROUP, + "LB": cls.LOCAL_BLOCK, + "LC": cls.LOCAL_COMPLETION, + "LW": cls.LOCAL_WELL, + "N": cls.NETWORK, + "S": cls.SEGMENT, + "W": cls.WELL, + } + if summary_keyword == "": + raise ValueError("Got empty summary_keyword") + if any(special in summary_keyword for special in SPECIAL_KEYWORDS): + return cls.OTHER + if summary_keyword[0] in KEYWORD_TYPE_MAPPING: + return KEYWORD_TYPE_MAPPING[summary_keyword[0]] + if summary_keyword[0:2] in KEYWORD_TYPE_MAPPING: + return KEYWORD_TYPE_MAPPING[summary_keyword[0:2]] + if summary_keyword == "RORFR": + return cls.REGION + + if any( + re.match(pattern, summary_keyword) + for pattern in [r"R.FT.*", r"R..FT.*", r"R.FR.*", r"R..FR.*", r"R.F"] + ): + return cls.INTER_REGION + if summary_keyword[0] == "R": + return cls.REGION + + return cls.OTHER + + +def _cell_index( + array_index: int, nx: PositiveInt, ny: PositiveInt +) -> Tuple[int, int, int]: + k = array_index // (nx * ny) + array_index -= k * (nx * ny) + j = array_index // nx + array_index -= j * nx + + return array_index + 1, j + 1, k + 1 + + +def _check_missing( + keyword_name: str, missing_key: str, *test_vars: Optional[int] +) -> List[int]: + if any(v is None for v in test_vars): + raise ValueError( + f"Found {keyword_name} keyword in summary " + f"specification without {missing_key} keyword" + ) + return test_vars # type: ignore + + +def make_summary_key( + keyword: str, + number: Optional[int] = None, + name: Optional[str] = None, + nx: Optional[int] = None, + ny: Optional[int] = None, + lgr_name: Optional[str] = None, + li: Optional[int] = None, + lj: Optional[int] = None, + lk: Optional[int] = None, +) -> Optional[str]: + sum_type = _SummaryType.from_keyword(keyword) + if sum_type in [ + _SummaryType.FIELD, + _SummaryType.OTHER, + ]: + return keyword + if sum_type in [ + _SummaryType.REGION, + _SummaryType.AQUIFER, + ]: + return f"{keyword}:{number}" + if sum_type == _SummaryType.BLOCK: + nx, ny = _check_missing("block", "dimens", nx, ny) + (number,) = _check_missing("block", "nums", number) + i, j, k = _cell_index(number - 1, nx, ny) + return f"{keyword}:{i},{j},{k}" + if sum_type in [ + _SummaryType.GROUP, + _SummaryType.WELL, + ]: + return f"{keyword}:{name}" + if sum_type == _SummaryType.SEGMENT: + return f"{keyword}:{name}:{number}" + if sum_type == _SummaryType.COMPLETION: + nx, ny = _check_missing("completion", "dimens", nx, ny) + (number,) = _check_missing("completion", "nums", number) + i, j, k = _cell_index(number - 1, nx, ny) + return f"{keyword}:{name}:{i},{j},{k}" + if sum_type == _SummaryType.INTER_REGION: + (number,) = _check_missing("inter region", "nums", number) + r1 = number % 32768 + r2 = ((number - r1) // 32768) - 10 + return f"{keyword}:{r1}-{r2}" + if sum_type == _SummaryType.LOCAL_WELL: + return f"{keyword}:{lgr_name}:{name}" + if sum_type == _SummaryType.LOCAL_BLOCK: + return f"{keyword}:{lgr_name}:{li},{lj},{lk}" + if sum_type == _SummaryType.LOCAL_COMPLETION: + nx, ny = _check_missing("local completion", "dimens", nx, ny) + (number,) = _check_missing("local completion", "nums", number) + i, j, k = _cell_index(number - 1, nx, ny) + return f"{keyword}:{lgr_name}:{name}:{li},{lj},{lk}" + if sum_type == _SummaryType.NETWORK: + # This is consistent with resinsight but + # has a bug in resdata + # https://github.com/equinor/resdata/issues/943 + return keyword + raise ValueError(f"Unexpected keyword type: {sum_type}") + + +class DateUnit(Enum): + HOURS = auto() + DAYS = auto() + + def make_delta(self, val: float) -> timedelta: + if self == DateUnit.HOURS: + return timedelta(hours=val) + if self == DateUnit.DAYS: + return timedelta(days=val) + raise ValueError(f"Unknown date unit {val}") + + +def _is_unsmry(base: str, path: str) -> bool: + if "." not in path: + return False + splitted = path.split(".") + return splitted[-2].endswith(base) and splitted[-1].lower() in ["unsmry", "funsmry"] + + +def _is_smspec(base: str, path: str) -> bool: + if "." not in path: + return False + splitted = path.split(".") + return splitted[-2].endswith(base) and splitted[-1].lower() in ["smspec", "fsmspec"] + + +def _get_summary_filenames(filepath: str) -> Tuple[str, str]: + filepath = filepath + base = os.path.basename(filepath) + dir = os.path.dirname(filepath) + summary_candidates = list( + filter( + lambda x: _is_unsmry(base, x), + os.listdir(dir), + ) + ) + if not summary_candidates: + raise ValueError( + f"Could not find any unified summary file matching case path {filepath}" + ) + if len(summary_candidates) > 1: + raise ValueError( + "Ambigous reference to unified " + f"summary file {filepath}, could be any of {summary_candidates}" + ) + summary = os.path.join(dir, summary_candidates[0]) + spec_candidates = list(filter(lambda x: _is_smspec(base, x), os.listdir(dir))) + if not spec_candidates: + raise ValueError( + f"Could not find any smspec file matching the case path {filepath}" + ) + if len(spec_candidates) > 1: + raise ValueError( + "Ambigous reference to summary" + f" spec {filepath}, could be any of {spec_candidates}" + ) + spec = os.path.join(dir, spec_candidates[0]) + return summary, spec + + +def read_summary( + filepath: str, fetch_keys: Sequence[str] +) -> Tuple[List[str], Sequence[datetime], Any]: + summary, spec = _get_summary_filenames(filepath) + date_index, start_date, date_units, keys, mask = _read_spec(spec, fetch_keys) + fetched, time_map = _read_summary(summary, start_date, date_units, mask, date_index) + + return (keys, time_map, fetched) + + +def _key2str(key: Union[bytes, str]) -> str: + if isinstance(key, bytes): + ret = key.decode() + else: + ret = key + assert isinstance(ret, str) + return ret.rstrip() + + +def _read_spec( + spec: str, fetch_keys: Sequence[str] +) -> Tuple[int, datetime, DateUnit, List[str], npt.NDArray[Any]]: + date = None + n = None + nx = None + ny = None + + arrays: Dict[str, Optional[npt.NDArray[Any]]] = { + kw: None + for kw in [ + "WGNAMES ", + "NUMS ", + "KEYWORDS", + "NUMLX ", + "NUMLY ", + "NUMLZ ", + "LGRNAMES", + "UNITS ", + ] + } + + if spec.lower().endswith("fsmspec"): + mode = "rt" + format = resfo.Format.FORMATTED + else: + mode = "rb" + format = resfo.Format.UNFORMATTED + + with open(spec, mode) as fp: + for entry in resfo.lazy_read(fp, format): + if all( + p is not None + for p in ( + [ + date, + n, + nx, + ny, + ] + + list(arrays.values()) + ) + ): + break + kw = entry.read_keyword() + if kw in arrays: + vals = entry.read_array() + if vals is resfo.MESS or isinstance(vals, resfo.MESS): + raise ValueError(f"{kw} in {spec} was MESS") + arrays[kw] = vals + if kw == "DIMENS ": + vals = entry.read_array() + if vals is resfo.MESS or isinstance(vals, resfo.MESS): + raise ValueError(f"DIMENS in {spec} was MESS") + size = len(vals) + n = vals[0] if size > 0 else None + nx = vals[1] if size > 1 else None + ny = vals[2] if size > 2 else None + if kw == "STARTDAT": + vals = entry.read_array() + if vals is resfo.MESS or isinstance(vals, resfo.MESS): + raise ValueError(f"Startdate in {spec} was MESS") + size = len(vals) + day = vals[0] if size > 0 else 0 + month = vals[1] if size > 1 else 0 + year = vals[2] if size > 2 else 0 + hour = vals[3] if size > 3 else 0 + minute = vals[4] if size > 4 else 0 + microsecond = vals[5] if size > 5 else 0 + date = datetime( + day=day, + month=month, + year=year, + hour=hour, + minute=minute, + second=microsecond // 10**6, + microsecond=microsecond % 10**6, + ) + keywords = arrays["KEYWORDS"] + wgnames = arrays["WGNAMES "] + nums = arrays["NUMS "] + numlx = arrays["NUMLX "] + numly = arrays["NUMLY "] + numlz = arrays["NUMLZ "] + lgr_names = arrays["LGRNAMES"] + + if date is None: + raise ValueError(f"keyword startdat missing in {spec}") + if keywords is None: + raise ValueError(f"keywords missing in {spec}") + if n is None: + n = len(keywords) + + indices: List[int] = [] + keys: List[str] = [] + date_index = None + + def optional_get(arr: Optional[npt.NDArray[Any]], idx: int) -> Any: + if arr is None: + return None + if len(arr) <= idx: + return None + return arr[idx] + + for i in range(n): + keyword = _key2str(keywords[i]) + if keyword == "TIME": + date_index = i + + name = optional_get(wgnames, i) + if name is not None: + name = _key2str(name) + num = optional_get(nums, i) + lgr_name = optional_get(lgr_names, i) + if lgr_name is not None: + lgr_name = _key2str(lgr_name) + li = optional_get(numlx, i) + lj = optional_get(numly, i) + lk = optional_get(numlz, i) + + key = make_summary_key(keyword, num, name, nx, ny, lgr_name, li, lj, lk) + if key is not None and _should_load_summary_key(key, fetch_keys): + if key in keys: + # only keep the index of the last occurrence of a key + # this is done for backwards compatability + # and to have unique keys + indices[keys.index(key)] = i + else: + indices.append(i) + keys.append(key) + + keys_array = np.array(keys) + rearranged = keys_array.argsort() + keys_array = keys_array[rearranged] + + indices_array = np.array(indices)[rearranged] + mask = np.in1d(np.arange(n), indices_array) + + units = arrays["UNITS "] + if units is None: + raise ValueError(f"keyword units missing in {spec}") + if date_index is None: + raise ValueError(f"KEYWORDS did not contain TIME in {spec}") + if date_index >= len(units): + raise ValueError(f"Unit missing for TIME in {spec}") + + return ( + date_index, + date, + DateUnit[_key2str(units[date_index])], + list(keys_array), + mask, + ) + + +def _read_summary( + summary: str, + start_date: datetime, + unit: DateUnit, + mask: npt.NDArray[Any], + date_index: int, +) -> Tuple[npt.NDArray[np.float32], List[datetime]]: + if summary.lower().endswith("funsmry"): + mode = "rt" + format = resfo.Format.FORMATTED + else: + mode = "rb" + format = resfo.Format.UNFORMATTED + + last_params = None + values = [] + dates = [] + + def read_params(): + nonlocal last_params, values + if last_params is not None: + vals = last_params.read_array() + if vals is resfo.MESS or isinstance(vals, resfo.MESS): + raise ValueError(f"PARAMS in {summary} was MESS") + values.append(vals[mask]) + dates.append(start_date + unit.make_delta(float(vals[date_index]))) + last_params = None + + with open(summary, mode) as fp: + for entry in resfo.lazy_read(fp, format): + kw = entry.read_keyword() + if kw == "PARAMS ": + last_params = entry + if kw == "SEQHDR ": + read_params() + read_params() + return np.array(values).T, dates + + +def _should_load_summary_key(data_key: Any, user_set_keys: Sequence[str]) -> bool: + return any(fnmatch(data_key, key) for key in user_set_keys) diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py index 1ec0fd1baae..a6760d02c21 100644 --- a/src/ert/config/summary_config.py +++ b/src/ert/config/summary_config.py @@ -6,13 +6,8 @@ from typing import TYPE_CHECKING, Set, Union import xarray as xr -from resdata.summary import Summary - -from ert._clib._read_summary import ( # pylint: disable=import-error - read_dates, - read_summary, -) +from ._read_summary import read_summary from .response_config import ResponseConfig if TYPE_CHECKING: @@ -34,18 +29,8 @@ def __post_init__(self) -> None: def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: filename = self.input_file.replace("", str(iens)) - try: - summary = Summary( - f"{run_path}/{filename}", - include_restart=False, - lazy_load=False, - ) - except IOError as e: - raise ValueError( - "Could not find SUMMARY file or using non unified SUMMARY " - f"file from: {run_path}/{filename}.UNSMRY", - ) from e - time_map = read_dates(summary) + keys, time_map, data = read_summary(f"{run_path}/{filename}", self.keys) + if self.refcase: assert isinstance(self.refcase, set) missing = self.refcase.difference(time_map) @@ -58,10 +43,6 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: f"{last} from: {run_path}/{filename}.UNSMRY" ) - summary_data = read_summary(summary, list(set(self.keys))) - summary_data.sort(key=lambda x: x[0]) - data = [d for _, d in summary_data] - keys = [k for k, _ in summary_data] ds = xr.Dataset( {"values": (["name", "time"], data)}, coords={"time": time_map, "name": keys}, diff --git a/tests/integration_tests/status/test_tracking_integration.py b/tests/integration_tests/status/test_tracking_integration.py index 131c8e7bf31..a6e0f1cb859 100644 --- a/tests/integration_tests/status/test_tracking_integration.py +++ b/tests/integration_tests/status/test_tracking_integration.py @@ -449,10 +449,10 @@ def test_tracking_missing_ecl( assert ( f"Realization: 0 failed after reaching max submit (1):\n\t\n" "status from done callback: " - "Could not find " - f"SUMMARY file or using non unified SUMMARY file from: " + "Could not find any unified " + f"summary file matching case path " f"{Path().absolute()}/simulations/realization-0/" - "iter-0/ECLIPSE_CASE.UNSMRY" + "iter-0/ECLIPSE_CASE" ) in caplog.messages # Just also check that it failed for the expected reason @@ -460,10 +460,10 @@ def test_tracking_missing_ecl( assert ( f"Realization: 0 failed after reaching max submit (1):\n\t\n" "status from done callback: " - "Could not find " - f"SUMMARY file or using non unified SUMMARY file from: " + "Could not find any unified " + f"summary file matching case path " f"{Path().absolute()}/simulations/realization-0/" - "iter-0/ECLIPSE_CASE.UNSMRY" + "iter-0/ECLIPSE_CASE" ) in failures[0].failed_msg thread.join() diff --git a/tests/unit_tests/config/config_dict_generator.py b/tests/unit_tests/config/config_dict_generator.py index d57cd991cad..c6026767dda 100644 --- a/tests/unit_tests/config/config_dict_generator.py +++ b/tests/unit_tests/config/config_dict_generator.py @@ -393,16 +393,7 @@ def ert_config_values(draw, use_eclbase=booleans): smspec = draw( smspecs( sum_keys=st.just(sum_keys), - start_date=st.just( - Date( - year=first_date.year, - month=first_date.month, - day=first_date.day, - hour=first_date.hour, - minutes=first_date.minute, - micro_seconds=first_date.second * 10**6 + first_date.microsecond, - ) - ), + start_date=st.just(Date.from_datetime(first_date)), ) ) std_cutoff = draw(small_floats) diff --git a/tests/unit_tests/config/summary_generator.py b/tests/unit_tests/config/summary_generator.py index d4fc227cee1..ba69399be94 100644 --- a/tests/unit_tests/config/summary_generator.py +++ b/tests/unit_tests/config/summary_generator.py @@ -4,56 +4,146 @@ See https://opm-project.org/?page_id=955 """ from dataclasses import astuple, dataclass +from datetime import datetime, timedelta from enum import Enum, unique -from typing import Any, List, Tuple +from typing import Any, List, Optional, Tuple import hypothesis.strategies as st import numpy as np import resfo +from hypothesis import assume from hypothesis.extra.numpy import from_dtype from pydantic import PositiveInt, conint +from typing_extensions import Self + +from ert.config._read_summary import SPECIAL_KEYWORDS from .egrid_generator import GrdeclKeyword +""" +See section 11.2 in opm flow reference manual 2022-10 +for definition of summary variable names. +""" + +inter_region_summary_variables = [ + "RGFR", + "RGFR+", + "RGFR-", + "RGFT", + "RGFT+", + "RGFT-", + "RGFTG", + "RGFTL", + "ROFR", + "ROFR+", + "ROFR-", + "ROFT", + "ROFT+", + "ROFT-", + "ROFTG", + "ROFTL", + "RWFR", + "RWFR+", + "RWFR-", + "RWFT", + "RWFT+", + "RWFT-", + "RCFT", + "RSFT", + "RNFT", +] + @st.composite -def summary_variables(draw): - """ - Generator for summary variable mnemonic, See - section 11.2.1 in opm flow reference manual 2022-10. - """ +def root_memnonic(draw): first_character = draw(st.sampled_from("ABFGRWCS")) if first_character == "A": second_character = draw(st.sampled_from("ALN")) third_character = draw(st.sampled_from("QL")) fourth_character = draw(st.sampled_from("RT")) return first_character + second_character + third_character + fourth_character + else: + second_character = draw(st.sampled_from("OWGVLPT")) + third_character = draw(st.sampled_from("PIF")) + fourth_character = draw(st.sampled_from("RT")) + # local = draw(st.sampled_from(["", "L"])) if first_character in "BCW" else "" + return first_character + second_character + third_character + fourth_character + - kind = draw(st.sampled_from([1, 2, 3, 4])) - if kind == 1: +@st.composite +def summary_variables(draw): + kind = draw( + st.sampled_from( + [ + "special", + "network", + "exceptions", + "directional", + "up_or_down", + "mnemonic", + "segment", + "well", + "region2region", + "memnonic", + "region", + ] + ) + ) + if kind == "special": + return draw(st.sampled_from(SPECIAL_KEYWORDS)) + if kind == "exceptions": return draw( st.sampled_from( - ["BAPI", "BOSAT", "BPR", "FAQR", "FPR", "FWCT", "WBHP", "WWCT"] + ["BAPI", "BOSAT", "BPR", "FAQR", "FPR", "FWCT", "WBHP", "WWCT", "ROFR"] ) ) - elif kind == 2: + elif kind == "directional": direction = draw(st.sampled_from("IJK")) return ( draw(st.sampled_from(["FLOO", "VELG", "VELO", "FLOW", "VELW"])) + direction ) - elif kind == 3: + elif kind == "up_or_down": dimension = draw(st.sampled_from("XYZRT")) direction = draw(st.sampled_from(["", "-"])) return draw(st.sampled_from(["GKR", "OKR", "WKR"])) + dimension + direction + elif kind == "network": + root = draw(root_memnonic()) + return "N" + root + elif kind == "segment": + return draw( + st.sampled_from(["SALQ", "SFR", "SGFR", "SGFRF", "SGFRS", "SGFTA", "SGFT"]) + ) + elif kind == "well": + return draw( + st.one_of( + st.builds(lambda r: "W" + r, root_memnonic()), + st.sampled_from( + [ + "WBHP", + "WBP5", + "WBP4", + "WBP9", + "WBP", + "WBHPH", + "WBHPT", + "WPIG", + "WPIL", + "WPIO", + "WPI5", + ] + ), + ) + ) + elif kind == "region2region": + return draw(st.sampled_from(inter_region_summary_variables)) + elif kind == "region": + return draw(st.builds(lambda r: "R" + r, root_memnonic())) else: - second_character = draw(st.sampled_from("OWGVLPT")) - third_character = draw(st.sampled_from("PIF")) - fourth_character = draw(st.sampled_from("RT")) - return first_character + second_character + third_character + fourth_character + return draw(root_memnonic()) unit_names = st.sampled_from( - ["SM3/DAY", "BARSA", "SM3/SM3", "FRACTION", "DAYS", "YEARS", "SM3", "SECONDS"] + ["SM3/DAY", "BARSA", "SM3/SM3", "FRACTION", "DAYS", "HOURS", "SM3"] ) names = st.text( @@ -101,6 +191,28 @@ class Date: def to_ecl(self): return astuple(self) + def to_datetime(self) -> datetime: + return datetime( + year=self.year, + month=self.month, + day=self.day, + hour=self.hour, + minute=self.minutes, + second=self.micro_seconds // 10**6, + microsecond=self.micro_seconds % 10**6, + ) + + @classmethod + def from_datetime(cls, dt: datetime) -> Self: + return cls( + year=dt.year, + month=dt.month, + day=dt.day, + hour=dt.hour, + minutes=dt.minute, + micro_seconds=dt.second * 10**6 + dt.microsecond, + ) + @dataclass class Smspec: @@ -113,9 +225,14 @@ class Smspec: restarted_from_step: PositiveInt keywords: List[str] well_names: List[str] - region_numbers: List[str] + region_numbers: List[int] units: List[str] start_date: Date + lgr_names: Optional[List[str]] = None + lgrs: Optional[List[str]] = None + numlx: Optional[List[PositiveInt]] = None + numly: Optional[List[PositiveInt]] = None + numlz: Optional[List[PositiveInt]] = None def to_ecl(self) -> List[Tuple[str, Any]]: # The restart field contains 9 strings of length 8 which @@ -124,29 +241,36 @@ def to_ecl(self) -> List[Tuple[str, Any]]: # are spaces. (opm manual table F.44, keyword name RESTART) restart = self.restart.ljust(72, " ") restart_list = [restart[i * 8 : i * 8 + 8] for i in range(9)] - return [ - ("INTEHEAD", np.array(self.intehead.to_ecl(), dtype=np.int32)), - ("RESTART ", restart_list), - ( - "DIMENS ", - np.array( - [ - self.num_keywords, - self.nx, - self.ny, - self.nz, - 0, - self.restarted_from_step, - ], - dtype=np.int32, + return ( + [ + ("INTEHEAD", np.array(self.intehead.to_ecl(), dtype=np.int32)), + ("RESTART ", restart_list), + ( + "DIMENS ", + np.array( + [ + self.num_keywords, + self.nx, + self.ny, + self.nz, + 0, + self.restarted_from_step, + ], + dtype=np.int32, + ), ), - ), - ("KEYWORDS", [kw.ljust(8) for kw in self.keywords]), - ("WGNAMES ", self.well_names), - ("NUMS ", np.array(self.region_numbers, dtype=np.int32)), - ("UNITS ", self.units), - ("STARTDAT", np.array(self.start_date.to_ecl(), dtype=np.int32)), - ] + ("KEYWORDS", [kw.ljust(8) for kw in self.keywords]), + ("WGNAMES ", self.well_names), + ("NUMS ", np.array(self.region_numbers, dtype=np.int32)), + ("UNITS ", self.units), + ("STARTDAT", np.array(self.start_date.to_ecl(), dtype=np.int32)), + ] + + ([("LGRS ", self.lgrs)] if self.lgrs is not None else []) + + ([("LGRNAMES", self.lgr_names)] if self.lgr_names is not None else []) + + ([("NUMLX ", self.numlx)] if self.numlx is not None else []) + + ([("NUMLY ", self.numly)] if self.numly is not None else []) + + ([("NUMLZ ", self.numlz)] if self.numlz is not None else []) + ) def to_file(self, filelike, file_format: resfo.Format = resfo.Format.UNFORMATTED): resfo.write(filelike, self.to_ecl(), file_format) @@ -166,6 +290,7 @@ def smspecs( Strategy for smspec that ensures that the TIME parameter, as required by ert, is in the parameters list. """ + use_locals = draw(st.booleans()) sum_keys = draw(sum_keys) n = len(sum_keys) + 1 nx = draw(small_ints) @@ -174,9 +299,21 @@ def smspecs( keywords = ["TIME "] + sum_keys units = ["DAYS "] + draw(st.lists(unit_names, min_size=n - 1, max_size=n - 1)) well_names = [":+:+:+:+"] + draw(st.lists(names, min_size=n - 1, max_size=n - 1)) + if use_locals: # use local + lgrs = draw(st.lists(names, min_size=n, max_size=n)) + numlx = draw(st.lists(small_ints, min_size=n, max_size=n)) + numly = draw(st.lists(small_ints, min_size=n, max_size=n)) + numlz = draw(st.lists(small_ints, min_size=n, max_size=n)) + lgr_names = list(set(lgrs)) + else: + lgrs = None + numlx = None + numly = None + numlz = None + lgr_names = None region_numbers = [-32676] + draw( st.lists( - from_dtype(np.dtype(np.int32), min_value=0, max_value=10), + from_dtype(np.dtype(np.int32), min_value=1, max_value=nx * ny * nz), min_size=len(sum_keys), max_size=len(sum_keys), ) @@ -195,6 +332,11 @@ def smspecs( restart=names, keywords=st.just(keywords), well_names=st.just(well_names), + lgrs=st.just(lgrs), + numlx=st.just(numlx), + numly=st.just(numly), + numlz=st.just(numlz), + lgr_names=st.just(lgr_names), region_numbers=st.just(region_numbers), units=st.just(units), start_date=start_date, @@ -271,3 +413,68 @@ def unsmrys( ] steps.append(SummaryStep(r, minis)) return Unsmry(steps) + + +@st.composite +def summaries(draw): + sum_keys = draw(st.lists(summary_variables(), min_size=1)) + first_date = draw(st.datetimes(min_value=datetime.strptime("1999-1-1", "%Y-%m-%d"))) + smspec = draw( + smspecs( + sum_keys=st.just(sum_keys), + start_date=st.just( + Date( + year=first_date.year, + month=first_date.month, + day=first_date.day, + hour=first_date.hour, + minutes=first_date.minute, + micro_seconds=first_date.second * 10**6 + first_date.microsecond, + ) + ), + ) + ) + assume( + len(set(zip(smspec.keywords, smspec.region_numbers, smspec.well_names))) + == len(smspec.keywords) + ) + dates = [0.0] + draw( + st.lists( + st.floats( + min_value=0.1, + allow_nan=False, + allow_infinity=False, + ), + min_size=2, + max_size=100, + ) + ) + try: + _ = first_date + timedelta(days=max(dates)) + except (ValueError, OverflowError): # datetime has a max year + assume(False) + + ds = sorted(dates, reverse=True) + steps = [] + i = 0 + j = 0 + while len(ds) > 0: + minis = [] + for _ in range(draw(st.integers(min_value=1, max_value=len(ds)))): + minis.append( + SummaryMiniStep( + i, + [ds.pop()] + + draw( + st.lists( + positive_floats, + min_size=len(sum_keys), + max_size=len(sum_keys), + ) + ), + ) + ) + i += 1 + steps.append(SummaryStep(j, minis)) + j += 1 + return smspec, Unsmry(steps) diff --git a/tests/unit_tests/config/test_read_summary.py b/tests/unit_tests/config/test_read_summary.py new file mode 100644 index 00000000000..7d9036c98a6 --- /dev/null +++ b/tests/unit_tests/config/test_read_summary.py @@ -0,0 +1,290 @@ +from datetime import datetime, timedelta +from itertools import zip_longest + +import hypothesis.strategies as st +import pytest +from ecl.summary import EclSum, EclSumVarType +from hypothesis import given +from resfo import Format + +from ert.config._read_summary import _SummaryType, make_summary_key, read_summary + +from .summary_generator import ( + inter_region_summary_variables, + summaries, + summary_variables, +) + + +def to_ecl(st: _SummaryType) -> EclSumVarType: + if st == _SummaryType.AQUIFER: + return EclSumVarType.ECL_SMSPEC_AQUIFER_VAR + if st == _SummaryType.BLOCK: + return EclSumVarType.ECL_SMSPEC_BLOCK_VAR + if st == _SummaryType.COMPLETION: + return EclSumVarType.ECL_SMSPEC_COMPLETION_VAR + if st == _SummaryType.FIELD: + return EclSumVarType.ECL_SMSPEC_FIELD_VAR + if st == _SummaryType.GROUP: + return EclSumVarType.ECL_SMSPEC_GROUP_VAR + if st == _SummaryType.LOCAL_BLOCK: + return EclSumVarType.ECL_SMSPEC_LOCAL_BLOCK_VAR + if st == _SummaryType.LOCAL_COMPLETION: + return EclSumVarType.ECL_SMSPEC_LOCAL_COMPLETION_VAR + if st == _SummaryType.LOCAL_WELL: + return EclSumVarType.ECL_SMSPEC_LOCAL_WELL_VAR + if st == _SummaryType.NETWORK: + return EclSumVarType.ECL_SMSPEC_NETWORK_VAR + if st == _SummaryType.SEGMENT: + return EclSumVarType.ECL_SMSPEC_SEGMENT_VAR + if st == _SummaryType.WELL: + return EclSumVarType.ECL_SMSPEC_WELL_VAR + if st == _SummaryType.REGION: + return EclSumVarType.ECL_SMSPEC_REGION_VAR + if st == _SummaryType.INTER_REGION: + return EclSumVarType.ECL_SMSPEC_REGION_2_REGION_VAR + if st == _SummaryType.OTHER: + return EclSumVarType.ECL_SMSPEC_MISC_VAR + + +@pytest.mark.parametrize("keyword", ["AAQR", "AAQT"]) +def test_aquifer_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_AQUIFER_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.AQUIFER + + +@pytest.mark.parametrize("keyword", ["BOSAT"]) +def test_block_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_BLOCK_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.BLOCK + + +@pytest.mark.parametrize("keyword", ["LBOSAT"]) +def test_local_block_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_LOCAL_BLOCK_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.LOCAL_BLOCK + + +@pytest.mark.parametrize("keyword", ["CGORL"]) +def test_completion_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_COMPLETION_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.COMPLETION + + +@pytest.mark.parametrize("keyword", ["LCGORL"]) +def test_local_completion_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_LOCAL_COMPLETION_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.LOCAL_COMPLETION + + +@pytest.mark.parametrize("keyword", ["FGOR", "FOPR"]) +def test_field_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_FIELD_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.FIELD + + +@pytest.mark.parametrize("keyword", ["GGFT", "GOPR"]) +def test_group_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_GROUP_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.GROUP + + +@pytest.mark.parametrize("keyword", ["NOPR", "NGPR"]) +def test_network_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_NETWORK_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.NETWORK + + +@pytest.mark.parametrize("keyword", inter_region_summary_variables) +def test_inter_region_summary_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_REGION_2_REGION_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.INTER_REGION + + +@pytest.mark.parametrize("keyword", ["RORFR", "RPR", "ROPT"]) +def test_region_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_REGION_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.REGION + + +@pytest.mark.parametrize("keyword", ["SOPR"]) +def test_segment_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_SEGMENT_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.SEGMENT + + +@pytest.mark.parametrize("keyword", ["WOPR"]) +def test_well_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_WELL_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.WELL + + +@pytest.mark.parametrize("keyword", ["LWOPR"]) +def test_local_well_variables_are_recognized(keyword): + assert EclSum.var_type(keyword) == EclSumVarType.ECL_SMSPEC_LOCAL_WELL_VAR + assert _SummaryType.from_keyword(keyword) == _SummaryType.LOCAL_WELL + + +@given(summary_variables()) +def test_that_identify_var_type_is_same_as_ecl(variable): + assert EclSum.var_type(variable) == to_ecl(_SummaryType.from_keyword(variable)) + + +@given(st.integers(), st.text(), st.integers(), st.integers()) +@pytest.mark.parametrize("keyword", ["FOPR", "NEWTON"]) +def test_summary_key_format_of_field_and_misc_is_identity( + keyword, number, name, nx, ny +): + assert make_summary_key(keyword, number, name, nx, ny) == keyword + + +@given(st.integers(), st.text(), st.integers(), st.integers()) +def test_network_variable_keys_has_keyword_as_summary_key(number, name, nx, ny): + assert make_summary_key("NOPR", number, name, nx, ny) == "NOPR" + + +@given(st.integers(), st.text(), st.integers(), st.integers()) +@pytest.mark.parametrize("keyword", ["GOPR", "WOPR"]) +def test_group_and_well_have_named_format(keyword, number, name, nx, ny): + assert make_summary_key(keyword, number, name, nx, ny) == f"{keyword}:{name}" + + +@given(st.text(), st.integers(), st.integers()) +@pytest.mark.parametrize("keyword", inter_region_summary_variables) +def test_inter_region_summary_format_contains_in_and_out_regions(keyword, name, nx, ny): + number = 3014660 + assert make_summary_key(keyword, number, name, nx, ny) == f"{keyword}:4-82" + + +@given(name=st.text()) +@pytest.mark.parametrize("keyword", ["BOPR", "BOSAT"]) +@pytest.mark.parametrize( + "nx,ny,number,indices", + [ + (1, 1, 1, "1,1,1"), + (2, 1, 2, "2,1,1"), + (1, 2, 2, "1,2,1"), + (3, 2, 3, "3,1,1"), + (3, 2, 9, "3,1,2"), + ], +) +def test_block_summary_format_have_cell_index(keyword, number, indices, name, nx, ny): + assert make_summary_key(keyword, number, name, nx, ny) == f"{keyword}:{indices}" + + +@given(name=st.text()) +@pytest.mark.parametrize("keyword", ["COPR"]) +@pytest.mark.parametrize( + "nx,ny,number,indices", + [ + (1, 1, 1, "1,1,1"), + (2, 1, 2, "2,1,1"), + (1, 2, 2, "1,2,1"), + (3, 2, 3, "3,1,1"), + (3, 2, 9, "3,1,2"), + ], +) +def test_completion_summary_format_have_cell_index_and_name( + keyword, number, indices, name, nx, ny +): + assert ( + make_summary_key(keyword, number, name, nx, ny) == f"{keyword}:{name}:{indices}" + ) + + +@given(summaries(), st.sampled_from(Format)) +def test_that_reading_summaries_returns_the_contents_of_the_file( + tmp_path_factory, summary, format +): + tmp_path = tmp_path_factory.mktemp("summary") + format_specifier = "F" if format == Format.FORMATTED else "" + smspec, unsmry = summary + unsmry.to_file(tmp_path / f"TEST.{format_specifier}UNSMRY", format) + smspec.to_file(tmp_path / f"TEST.{format_specifier}SMSPEC", format) + (keys, time_map, data) = read_summary(str(tmp_path / "TEST"), ["*"]) + + local_name = smspec.lgrs if smspec.lgrs else [] + lis = smspec.numlx if smspec.numlx else [] + ljs = smspec.numly if smspec.numly else [] + lks = smspec.numlz if smspec.numlz else [] + keys_in_smspec = [ + x + for x in map( + lambda x: make_summary_key(*x[:3], smspec.nx, smspec.ny, *x[3:]), + zip_longest( + [k.rstrip() for k in smspec.keywords], + smspec.region_numbers, + smspec.well_names, + local_name, + lis, + ljs, + lks, + fillvalue=None, + ), + ) + ] + assert set(keys) == set((k for k in keys_in_smspec if k)) + + def to_date(start_date: datetime, offset: float, unit: str) -> datetime: + if unit == "DAYS": + return start_date + timedelta(days=offset) + if unit == "HOURS": + return start_date + timedelta(hours=offset) + raise ValueError(f"Unknown time unit {unit}") + + assert all( + abs(actual - expected) <= timedelta(minutes=15) + for actual, expected in zip_longest( + time_map, + [ + to_date( + smspec.start_date.to_datetime(), + s.ministeps[-1].params[0], + smspec.units[0].strip(), + ) + for s in unsmry.steps + ], + ) + ) + for key, d in zip_longest(keys, data): + index = [i for i, k in enumerate(sorted(keys_in_smspec)) if k == key][-1] + assert [s.ministeps[-1].params[index] for s in unsmry.steps] == pytest.approx(d) + + +@pytest.mark.parametrize( + "spec_contents, smry_contents", + [ + (b"", b""), + (b"1", b"1"), + (b"\x00\x00\x00\x10", b"1"), + (b"\x00\x00\x00\x10UNEXPECTED", b"\x00\x00\x00\x10UNEXPECTED"), + ( + b"\x00\x00\x00\x10UNEXPECTED", + b"\x00\x00\x00\x10KEYWORD1" + (2200).to_bytes(4, byteorder="big"), + ), + ( + b"\x00\x00\x00\x10FOOOOOOO\x00", + b"\x00\x00\x00\x10FOOOOOOO" + + (2300).to_bytes(4, byteorder="big") + + b"INTE\x00\x00\x00\x10" + + b"\x00" * (4 * 2300 + 4 * 6), + ), + ( + b"\x00\x00\x00\x10FOOOOOOO\x00\x00\x00\x01" + + b"INTE" + + b"\x00\x00\x00\x10" + + (4).to_bytes(4, signed=True, byteorder="big") + + b"\x00" * 4 + + (4).to_bytes(4, signed=True, byteorder="big"), + b"\x00\x00\x00\x10FOOOOOOO\x00", + ), + ], +) +def test_that_incorrect_summary_files_raises_informative_errors( + smry_contents, spec_contents, tmp_path +): + (tmp_path / "test.UNSMRY").write_bytes(smry_contents) + (tmp_path / "test.SMSPEC").write_bytes(spec_contents) + + with pytest.raises(ValueError): + read_summary(str(tmp_path / "test"), ["*"])