From 57b9aa6b53cfd1abf51be97f7b7b21b413ffba3a Mon Sep 17 00:00:00 2001 From: Eivind Jahren Date: Mon, 12 Aug 2024 14:15:47 +0200 Subject: [PATCH] Fix issue with from_pandas --- .github/workflows/testing.yml | 2 +- lib/include/resdata/rd_sum.hpp | 6 + lib/resdata/rd_sum.cpp | 16 + python/resdata/summary/rd_sum.py | 126 +++++-- python/tests/conftest.py | 11 + python/tests/egrid_generator.py | 366 +++++++++++++++++++ python/tests/rd_tests/test_rd_sum.py | 47 ++- python/tests/summary_generator.py | 515 +++++++++++++++++++++++++++ test_requirements.txt | 2 + 9 files changed, 1055 insertions(+), 36 deletions(-) create mode 100644 python/tests/conftest.py create mode 100644 python/tests/egrid_generator.py create mode 100644 python/tests/summary_generator.py create mode 100644 test_requirements.txt diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 5a9d9ce76..17a4c19ed 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -107,7 +107,7 @@ jobs: - name: Run Python tests run: | # Runs tests on installed distribution from an empty directory - python -m pip install pytest + python -m pip install -r test_requirements.txt # pytest adds every directory up-to and including python/ into sys.path, # meaning that "import resdata" will import python/resdata and not the installed diff --git a/lib/include/resdata/rd_sum.hpp b/lib/include/resdata/rd_sum.hpp index eb01070fe..4e6ebc39a 100644 --- a/lib/include/resdata/rd_sum.hpp +++ b/lib/include/resdata/rd_sum.hpp @@ -253,6 +253,12 @@ const rd::smspec_node *rd_sum_add_smspec_node(rd_sum_type *rd_sum, const rd::smspec_node *rd_sum_add_var(rd_sum_type *rd_sum, const char *keyword, const char *wgname, int num, const char *unit, float default_value); +const rd::smspec_node *rd_sum_add_local_var(rd_sum_type *rd_sum, + const char *keyword, + const char *wgname, int num, + const char *unit, const char *lgr, + int lgr_i, int lgr_j, int lgr_k, + float default_value); rd_sum_tstep_type *rd_sum_add_tstep(rd_sum_type *rd_sum, int report_step, double sim_seconds); diff --git a/lib/resdata/rd_sum.cpp b/lib/resdata/rd_sum.cpp index c62b0728f..a7fac9714 100644 --- a/lib/resdata/rd_sum.cpp +++ b/lib/resdata/rd_sum.cpp @@ -282,6 +282,22 @@ const rd::smspec_node *rd_sum_add_var(rd_sum_type *rd_sum, const char *keyword, default_value); } +const rd::smspec_node *rd_sum_add_local_var(rd_sum_type *rd_sum, + const char *keyword, + const char *wgname, int num, + const char *unit, const char *lgr, + int lgr_i, int lgr_j, int lgr_k, + float default_value) { + if (rd_sum_data_get_length(rd_sum->data) > 0) + throw std::invalid_argument( + "Can not interchange variable adding and timesteps.\n"); + + int params_index = rd_smspec_num_nodes(rd_sum->smspec); + return rd_smspec_add_node(rd_sum->smspec, params_index, keyword, wgname, + num, unit, lgr, lgr_i, lgr_j, lgr_k, + default_value); +} + const rd::smspec_node *rd_sum_add_smspec_node(rd_sum_type *rd_sum, const rd::smspec_node *node) { return rd_smspec_add_node(rd_sum->smspec, *node); diff --git a/python/resdata/summary/rd_sum.py b/python/resdata/summary/rd_sum.py index 065e461fd..3d7e32d22 100644 --- a/python/resdata/summary/rd_sum.py +++ b/python/resdata/summary/rd_sum.py @@ -13,6 +13,7 @@ import ctypes import pandas import re +from typing import Sequence, List, Tuple, Optional # Observe that there is some convention conflict with the C code # regarding order of arguments: The C code generally takes the time @@ -180,6 +181,9 @@ class Summary(BaseCClass): _add_variable = ResdataPrototype( "rd_smspec_node_ref rd_sum_add_var(rd_sum, char*, char*, int, char*, double)" ) + _add_local_variable = ResdataPrototype( + "rd_smspec_node_ref rd_sum_add_local_var(rd_sum, char*, char*, int, char*, char*, int, int, int, double)" + ) _add_tstep = ResdataPrototype( "rd_sum_tstep_ref rd_sum_add_tstep(rd_sum, int, double)" ) @@ -360,7 +364,21 @@ def restart_writer( smry._load_case = "restart_writer" return smry - def add_variable(self, variable, wgname=None, num=0, unit="None", default_value=0): + def add_variable( + self, + variable, + wgname=None, + num=0, + unit="None", + default_value=0, + lgr=None, + lgr_ijk=None, + ): + if lgr is not None: + return self._add_local_variable( + variable, wgname, num, unit, lgr, *lgr_ijk, default_value + ).setParent(parent=self) + return self._add_variable(variable, wgname, num, unit, default_value).setParent( parent=self ) @@ -606,36 +624,85 @@ def pandas_frame(self, time_index=None, column_keys=None): return frame @staticmethod - def _compile_headers_list(headers, dims): + def _compile_headers_list( + headers: Sequence[str], dims: Optional[List[int]] + ) -> List[Tuple[str, str, int, str, Optional[str], Optional[Tuple[int, int, int]]]]: + """ + Converts column names generated with `Summary.pandas_frame()` so + that `Summary.from_pandas(sum.pandas_frame()) == sum`. + + The column names are specified by `smspec_node::gen_key1` see + `smspec_node::gen_key1`. + """ var_list = [] for key in headers: lst = re.split(":", key) kw = lst[0] wgname = None - num = 0 + lgr = None + nums = None + num = None unit = "UNIT" - if len(lst) > 1: - nums = [] - if lst[1][0].isdigit(): - nums = re.split(",", lst[1]) - else: - wgname = lst[1] - if len(lst) == 3: - nums = re.split(",", lst[2]) - if len(nums) == 3: - i = int(nums[0]) - 1 - j = int(nums[1]) - 1 - k = int(nums[2]) - 1 - if dims is None: - raise ValueError( - "For key %s When using indexing i,j,k you must supply a valid value for the dims argument" - % key - ) - num = i + j * dims[0] + k * dims[0] * dims[1] + 1 - elif len(nums) == 1: - num = int(nums[0]) - - var_list.append([kw, wgname, num, unit]) + var_type = Summary.var_type(kw) + if var_type == SummaryVarType.RD_SMSPEC_INVALID_VAR: + raise ValueError(f"Invalid var type: {kw}") + elif var_type == SummaryVarType.RD_SMSPEC_FIELD_VAR: + pass + elif var_type == SummaryVarType.RD_SMSPEC_REGION_VAR: + num = int(lst[1]) + elif var_type == SummaryVarType.RD_SMSPEC_GROUP_VAR: + wgname = lst[1] + elif var_type == SummaryVarType.RD_SMSPEC_WELL_VAR: + wgname = lst[1] + elif var_type == SummaryVarType.RD_SMSPEC_SEGMENT_VAR: + kw, wgname, num = lst + num = int(num) + elif var_type == SummaryVarType.RD_SMSPEC_BLOCK_VAR: + kw, nums = lst + nums = tuple(int(i) for i in nums.split(",")) + elif var_type == SummaryVarType.RD_SMSPEC_AQUIFER_VAR: + kw, num = lst + num = int(num) + elif var_type == SummaryVarType.RD_SMSPEC_COMPLETION_VAR: + kw, wgname, nums = lst + nums = tuple(int(i) for i in nums.split(",")) + elif var_type == SummaryVarType.RD_SMSPEC_NETWORK_VAR: + kw, wgname = lst + elif var_type == SummaryVarType.RD_SMSPEC_REGION_2_REGION_VAR: + kw, r1r2 = lst + r1, r2 = tuple(int(i) for i in r1r2.split("-", 1)) + num = (r2 + 10) * 32768 + r1 + elif var_type == SummaryVarType.RD_SMSPEC_LOCAL_BLOCK_VAR: + kw, lgr, nums = lst + nums = tuple(int(i) for i in nums.split(",")) + elif var_type == SummaryVarType.RD_SMSPEC_LOCAL_COMPLETION_VAR: + kw, lgr, wgname, nums = lst + nums = tuple(int(i) for i in nums.split(",")) + elif var_type == SummaryVarType.RD_SMSPEC_LOCAL_WELL_VAR: + kw, lgr, wgname = lst + nums = (0, 0, 0) # We don't know from the list so use dummy + elif var_type == SummaryVarType.RD_SMSPEC_MISC_VAR: + pass + else: + raise ValueError(f"Unknown SummaryVarType {var_type}") + + if nums and num is None: + i = int(nums[0]) - 1 + j = int(nums[1]) - 1 + k = int(nums[2]) - 1 + if dims is None: + raise ValueError( + "For key %s When using indexing i,j,k you must supply a valid value for the dims argument" + % key + ) + num = i + j * dims[0] + k * dims[0] * dims[1] + 1 + + if num is None: + num = 0 + if wgname is None: + wgname = "" + + var_list.append((kw, wgname, num, unit, lgr, nums)) return var_list @classmethod @@ -656,18 +723,21 @@ def from_pandas(cls, case, frame, dims=None, headers=None): if dims is None: dims = [1, 1, 1] rd_sum = Summary.writer(case, start_time, dims[0], dims[1], dims[2]) - for kw, wgname, num, unit in header_list: + for kw, wgname, num, unit, lgr, lgr_ijk in header_list: var_list.append( - rd_sum.addVariable(kw, wgname=wgname, num=num, unit=unit).getKey1() + rd_sum.add_variable( + kw, wgname=wgname, num=num, unit=unit, lgr=lgr, lgr_ijk=lgr_ijk + ).getKey1() ) for i, time in enumerate(frame.index): - days = (time - start_time).days + days = (time - start_time).total_seconds() / 86400 t_step = rd_sum.addTStep(i + 1, days) for var in var_list: t_step[var] = frame.iloc[i][var] + rd_sum._load_case = case return rd_sum def get_key_index(self, key): diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 000000000..1295be434 --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,11 @@ +from hypothesis import HealthCheck, settings + +# Timeout settings are unreliable both on CI and +# when running pytest with xdist so we disable it +settings.register_profile( + "no_timeouts", + deadline=None, + suppress_health_check=[HealthCheck.too_slow], + print_blob=True, +) +settings.load_profile("no_timeouts") diff --git a/python/tests/egrid_generator.py b/python/tests/egrid_generator.py new file mode 100644 index 000000000..a6a2bb313 --- /dev/null +++ b/python/tests/egrid_generator.py @@ -0,0 +1,366 @@ +from dataclasses import astuple, dataclass +from enum import Enum, auto, unique +from typing import Any, List, Optional, Tuple + +import hypothesis.strategies as st +import numpy as np +import resfo +from hypothesis.extra.numpy import arrays + + +@unique +class Units(Enum): + METRES = auto() + CM = auto() + FEET = auto() + + def to_ecl(self): + return self.name.ljust(8) + + +@unique +class GridRelative(Enum): + """GridRelative is the second value given GRIDUNIT keyword. + + MAP means map relative units, while + leaving it blank means relative to the origin given by the + MAPAXES keyword. + """ + + MAP = auto() + ORIGIN = auto() + + def to_ecl(self) -> str: + if self == GridRelative.MAP: + return "MAP".ljust(8) + else: + return "".ljust(8) + + +@dataclass +class GrdeclKeyword: + """An abstract grdecl keyword. + + Gives a general implementation of to/from grdecl which recurses on + fields. Ie. a dataclass such as + >>> class A(GrdeclKeyword): + ... ... + >>> class B(GrdeclKeyword): + ... ... + + >>> @dataclass + ... class MyKeyword(GrdeclKeyword): + ... field1: A + ... field2: B + + will have a to_ecl method that will be similar to + + >>> def to_ecl(self): + ... return [self.field1.to_ecl(), self.field2.to_ecl] + """ + + def to_ecl(self) -> List[Any]: + return [value.to_ecl() for value in astuple(self)] + + +@dataclass +class GridUnit(GrdeclKeyword): + """Defines the units used for grid dimensions. + + The first value is a string describing the units used, defaults to METRES, + known accepted other units are FIELD and LAB. The last value describes + whether the measurements are relative to the map or to the origin of + MAPAXES. + """ + + unit: Units = Units.METRES + grid_relative: GridRelative = GridRelative.ORIGIN + + +@unique +class CoordinateType(Enum): + """The coordinate system type given in the SPECGRID keyword. + + This is given by either T or F in the last value of SPECGRID, meaning + either cylindrical or cartesian coordinates respectively. + """ + + CARTESIAN = auto() + CYLINDRICAL = auto() + + def to_ecl(self) -> int: + if self == CoordinateType.CARTESIAN: + return 0 + else: + return 1 + + +@unique +class TypeOfGrid(Enum): + """ + A Grid has three possible data layout formats, UNSTRUCTURED, CORNER_POINT, + BLOCK_CENTER and COMPOSITE (meaning combination of the two former). Only + CORNER_POINT layout is supported by XTGeo. + """ + + COMPOSITE = 0 + CORNER_POINT = 1 + UNSTRUCTURED = 2 + BLOCK_CENTER = 3 + + @property + def alternate_value(self): + """Inverse of alternate_code.""" + alternate_value = 0 + if self == TypeOfGrid.CORNER_POINT: + alternate_value = 0 + elif self == TypeOfGrid.UNSTRUCTURED: + alternate_value = 1 + elif self == TypeOfGrid.COMPOSITE: + alternate_value = 2 + elif self == TypeOfGrid.BLOCK_CENTER: + alternate_value = 3 + else: + raise ValueError(f"Unknown grid type {self}") + return alternate_value + + +@unique +class RockModel(Enum): + """ + Type of rock model. + """ + + SINGLE_PERMEABILITY_POROSITY = 0 + DUAL_POROSITY = 1 + DUAL_PERMEABILITY = 2 + + +@unique +class GridFormat(Enum): + """ + The format of the "original grid", ie., what + method was used to construct the values in the file. + """ + + UNKNOWN = 0 + IRREGULAR_CORNER_POINT = 1 + REGULAR_CARTESIAN = 2 + + +@dataclass +class Filehead: + """ + The first keyword in an egrid file is the FILEHEAD + keyword, containing metadata about the file and its + content. + """ + + version_number: int + year: int + version_bound: int + type_of_grid: TypeOfGrid + rock_model: RockModel + grid_format: GridFormat + + def to_ecl(self) -> np.ndarray: + """ + Returns: + List of values, as layed out after the FILEHEAD keyword for + the given filehead. + """ + # The data is expected to consist of + # 100 integers, but only a subset is used. + result = np.zeros((100,), dtype=np.int32) + result[0] = self.version_number + result[1] = self.year + result[3] = self.version_bound + result[4] = self.type_of_grid.alternate_value + result[5] = self.rock_model.value + result[6] = self.grid_format.value + return result + + +@dataclass +class GridHead: + """ + Both for lgr (see LGRSection) and the global grid (see GlobalGrid) + the GRIDHEAD keyword indicates the start of the grid layout for that + section. + """ + + type_of_grid: TypeOfGrid + num_x: int + num_y: int + num_z: int + grid_reference_number: int + numres: int + nseg: int + coordinate_type: CoordinateType + lgr_start: Tuple[int, int, int] + lgr_end: Tuple[int, int, int] + + def to_ecl(self) -> np.ndarray: + # The data is expected to consist of + # 100 integers, but only a subset is used. + result = np.zeros((100,), dtype=np.int32) + result[0] = self.type_of_grid.value + result[1] = self.num_x + result[2] = self.num_y + result[3] = self.num_z + result[4] = self.grid_reference_number + result[24] = self.numres + result[25] = self.nseg + result[26] = self.coordinate_type.to_ecl() + result[[27, 28, 29]] = np.array(self.lgr_start) + result[[30, 31, 32]] = np.array(self.lgr_end) + return result + + +@dataclass +class GlobalGrid: + """ + The global grid contains the layout of the grid before + refinements, and the sectioning into grid coarsening + through the optional corsnum keyword. + """ + + grid_head: GridHead + coord: np.ndarray + zcorn: np.ndarray + actnum: Optional[np.ndarray] = None + + def __eq__(self, other: object) -> bool: + if not isinstance(other, GlobalGrid): + return False + return ( + self.grid_head == other.grid_head + and np.array_equal(self.actnum, other.actnum) + and np.array_equal(self.coord, other.coord) + and np.array_equal(self.zcorn, other.zcorn) + ) + + def to_ecl(self) -> List[Tuple[str, Any]]: + result = [ + ("GRIDHEAD", self.grid_head.to_ecl()), + ("COORD ", self.coord.astype(np.float32)), + ("ZCORN ", self.zcorn.astype(np.float32)), + ] + if self.actnum is not None: + result.append(("ACTNUM ", self.actnum.astype(np.int32))) + result.append(("ENDGRID ", np.array([], dtype=np.int32))) + return result + + +@dataclass +class EGrid: + """Contains the data of an EGRID file. + + Args: + file_head: The file header starting with the FILEHEAD keyword + global_grid: The global grid + lgr_sections: List of local grid refinements. + nnc_sections: Describe non-neighboring sections as a list of either + NNCSections or AmalgamationSection's. + """ + + file_head: Filehead + global_grid: GlobalGrid + + @property + def shape(self) -> Tuple[int, int, int]: + grid_head = self.global_grid.grid_head + return (grid_head.num_x, grid_head.num_y, grid_head.num_z) + + def to_file( + self, + filelike, + ): + """ + write the EGrid to file. + Args: + filelike (str,Path,stream): The egrid file to write to. + """ + contents = [] + contents.append(("FILEHEAD", self.file_head.to_ecl())) + contents += self.global_grid.to_ecl() + resfo.write(filelike, contents) + + +finites = st.floats( + min_value=-100.0, max_value=100.0, allow_nan=False, allow_infinity=False, width=32 +) + +indices = st.integers(min_value=1, max_value=4) +units = st.sampled_from(Units) +grid_relatives = st.sampled_from(GridRelative) +coordinate_types = st.sampled_from(CoordinateType) +grid_units = st.builds(GridUnit, units, grid_relatives) + + +@st.composite +def zcorns(draw, dims): + return draw( + arrays( + shape=8 * dims[0] * dims[1] * dims[2], + dtype=np.float32, + elements=finites, + ) + ) + + +types_of_grid = st.just(TypeOfGrid.CORNER_POINT) +rock_models = st.sampled_from(RockModel) +grid_formats = st.just(GridFormat.IRREGULAR_CORNER_POINT) +file_heads = st.builds( + Filehead, + st.integers(min_value=0, max_value=5), + st.integers(min_value=2000, max_value=2022), + st.integers(min_value=0, max_value=5), + types_of_grid, + rock_models, + grid_formats, +) + +grid_heads = st.builds( + GridHead, + types_of_grid, + indices, + indices, + indices, + indices, + st.just(1), + st.just(1), + coordinate_types, + st.tuples(indices, indices, indices), + st.tuples(indices, indices, indices), +) + + +@st.composite +def global_grids(draw): + grid_head = draw(grid_heads) + dims = (grid_head.num_x, grid_head.num_y, grid_head.num_z) + corner_size = (dims[0] + 1) * (dims[1] + 1) * 6 + coord = arrays( + shape=corner_size, + dtype=np.float32, + elements=finites, + ) + actnum = st.one_of( + st.just(None), + arrays( + shape=dims[0] * dims[1] * dims[2], + dtype=np.int32, + elements=st.integers(min_value=0, max_value=3), + ), + ) + return GlobalGrid( + coord=draw(coord), + zcorn=draw(zcorns(dims)), + actnum=draw(actnum), + grid_head=grid_head, + ) + + +egrids = st.builds(EGrid, file_heads, global_grids()) diff --git a/python/tests/rd_tests/test_rd_sum.py b/python/tests/rd_tests/test_rd_sum.py index a18e71b2a..d29af0824 100644 --- a/python/tests/rd_tests/test_rd_sum.py +++ b/python/tests/rd_tests/test_rd_sum.py @@ -1,16 +1,17 @@ -#!/usr/bin/env python import datetime import os.path -import pandas as pd -import numpy as np -from cwrap import CFILE -from cwrap import Prototype, load, open as copen - -from resdata.resfile import ResdataFile, FortIO, openFortIO, openResdataFile, ResdataKW +import numpy as np +import pandas as pd +import pytest +from cwrap import open as copen +from hypothesis import assume, given +from pandas.testing import assert_frame_equal +from resdata.resfile import FortIO, ResdataKW, openFortIO, openResdataFile from resdata.summary import Summary, SummaryKeyWordVector from resdata.util.test import TestAreaContext from tests import ResdataTest, equinor_test +from tests.summary_generator import summaries def test_write_repr(): @@ -154,3 +155,35 @@ def test_labscale(self): self.assertEqual(sum.getStartTime(), datetime.datetime(2013, 1, 1, 0, 0, 0)) self.assertEqual(sum.getEndTime(), datetime.datetime(2013, 1, 1, 19, 30, 0)) self.assertFloatEqual(sum.getSimulationLength(), 19.50) + + +@pytest.fixture +def use_tmpdir(tmpdir): + with tmpdir.as_wcd(): + yield + + +@given(summaries()) +@pytest.mark.use_fixtures("use_tmpdir") +def test_to_from_pandas(summary): + smspec, unsmry = summary + assume(len(smspec.keywords) == len(set(smspec.keywords))) + smspec.to_file("TEST.SMSPEC") + unsmry.to_file("TEST.UNSMRY") + sum = Summary("TEST", lazy_load=False) + + baseline = sum.pandas_frame() + roundtrip = Summary.from_pandas( + "TEST", baseline, dims=(smspec.nx, smspec.ny, smspec.nz) + ).pandas_frame() + + # round to days + roundtrip.index = roundtrip.index.round(freq="D") + baseline.index = baseline.index.round(freq="D") + + assert_frame_equal( + roundtrip, + baseline, + check_exact=False, + atol=17, + ) diff --git a/python/tests/summary_generator.py b/python/tests/summary_generator.py new file mode 100644 index 000000000..763555d30 --- /dev/null +++ b/python/tests/summary_generator.py @@ -0,0 +1,515 @@ +""" +Implements a hypothesis strategy for unified summary files +(.SMSPEC and .UNSMRY) without any optional fields. +See https://opm-project.org/?page_id=955 +""" + +from dataclasses import astuple, dataclass +from datetime import datetime, timedelta +from enum import Enum, unique +from typing import Any, List, Optional, Tuple + +import hypothesis.strategies as st +import numpy as np +import resfo +from hypothesis import assume +from hypothesis.extra.numpy import from_dtype +from pydantic import PositiveInt, conint +from typing_extensions import Self + +from ert.config._read_summary import SPECIAL_KEYWORDS + +from .egrid_generator import GrdeclKeyword + +""" +See section 11.2 in opm flow reference manual 2022-10 +for definition of summary variable names. +""" + +inter_region_summary_variables = [ + "RGFR", + "RGFR+", + "RGFR-", + "RGFT", + "RGFT+", + "RGFT-", + "RGFTG", + "RGFTL", + "ROFR", + "ROFR+", + "ROFR-", + "ROFT", + "ROFT+", + "ROFT-", + "ROFTG", + "ROFTL", + "RWFR", + "RWFR+", + "RWFR-", + "RWFT", + "RWFT+", + "RWFT-", + "RCFT", + "RSFT", + "RNFT", +] + + +@st.composite +def root_memnonic(draw): + first_character = draw(st.sampled_from("ABFGRWCS")) + if first_character == "A": + second_character = draw(st.sampled_from("ALN")) + third_character = draw(st.sampled_from("QL")) + fourth_character = draw(st.sampled_from("RT")) + return first_character + second_character + third_character + fourth_character + else: + second_character = draw(st.sampled_from("OWGVLPT")) + third_character = draw(st.sampled_from("PIF")) + fourth_character = draw(st.sampled_from("RT")) + local = draw(st.sampled_from(["", "L"])) if first_character in "BCW" else "" + return ( + local + + first_character + + second_character + + third_character + + fourth_character + ) + + +@st.composite +def summary_variables(draw): + kind = draw( + st.sampled_from( + [ + "special", + "network", + "exceptions", + "directional", + "up_or_down", + "mnemonic", + "segment", + "well", + "region2region", + "memnonic", + "region", + ] + ) + ) + if kind == "special": + return draw(st.sampled_from(SPECIAL_KEYWORDS)) + if kind == "exceptions": + return draw( + st.sampled_from( + ["BAPI", "BOSAT", "BPR", "FAQR", "FPR", "FWCT", "WBHP", "WWCT", "ROFR"] + ) + ) + elif kind == "directional": + direction = draw(st.sampled_from("IJK")) + return ( + draw(st.sampled_from(["FLOO", "VELG", "VELO", "FLOW", "VELW"])) + direction + ) + elif kind == "up_or_down": + dimension = draw(st.sampled_from("XYZRT")) + direction = draw(st.sampled_from(["", "-"])) + return draw(st.sampled_from(["GKR", "OKR", "WKR"])) + dimension + direction + elif kind == "network": + root = draw(root_memnonic()) + return "N" + root + elif kind == "segment": + return draw( + st.sampled_from(["SALQ", "SFR", "SGFR", "SGFRF", "SGFRS", "SGFTA", "SGFT"]) + ) + elif kind == "well": + return draw( + st.one_of( + st.builds(lambda r: "W" + r, root_memnonic()), + st.sampled_from( + [ + "WBHP", + "WBP5", + "WBP4", + "WBP9", + "WBP", + "WBHPH", + "WBHPT", + "WPIG", + "WPIL", + "WPIO", + "WPI5", + ] + ), + ) + ) + elif kind == "region2region": + return draw(st.sampled_from(inter_region_summary_variables)) + elif kind == "region": + return draw(st.builds(lambda r: "R" + r, root_memnonic())) + else: + return draw(root_memnonic()) + + +unit_names = st.sampled_from( + ["SM3/DAY", "BARSA", "SM3/SM3", "FRACTION", "DAYS", "HOURS", "SM3"] +) + +names = st.text( + min_size=8, max_size=8, alphabet=st.characters(min_codepoint=65, max_codepoint=90) +) + + +@unique +class UnitSystem(Enum): + METRIC = 1 + FIELD = 2 + LAB = 3 + + def to_ecl(self): + return self.value + + +@unique +class Simulator(Enum): + ECLIPSE_100 = 100 + ECLIPSE_300 = 300 + ECLIPSE_300_THERMAL = 500 + INTERSECT = 700 + FRONTSIM = 800 + + def to_ecl(self): + return self.value + + +@dataclass +class SmspecIntehead(GrdeclKeyword): + unit: UnitSystem + simulator: Simulator + + +@dataclass +class Date: + day: conint(ge=1, le=31) # type: ignore + month: conint(ge=1, le=12) # type: ignore + year: conint(gt=1901, lt=2038) # type: ignore + hour: conint(ge=0, lt=24) # type: ignore + minutes: conint(ge=0, lt=60) # type: ignore + micro_seconds: conint(ge=0, lt=60000000) # type: ignore + + def to_ecl(self): + return astuple(self) + + def to_datetime(self) -> datetime: + return datetime( + year=self.year, + month=self.month, + day=self.day, + hour=self.hour, + minute=self.minutes, + second=self.micro_seconds // 10**6, + # Due to https://github.com/equinor/ert/issues/6952 + # microseconds have to be ignored to avoid overflow + # in netcdf3 files + # microsecond=self.micro_seconds % 10**6, + ) + + @classmethod + def from_datetime(cls, dt: datetime) -> Self: + return cls( + year=dt.year, + month=dt.month, + day=dt.day, + hour=dt.hour, + minutes=dt.minute, + # Due to https://github.com/equinor/ert/issues/6952 + # microseconds have to be ignored to avoid overflow + # in netcdf3 files + micro_seconds=dt.second * 10**6, + # micro_seconds=dt.second * 10**6 + dt.microsecond, + ) + + +@dataclass +class Smspec: + intehead: SmspecIntehead + restart: str + num_keywords: PositiveInt + nx: PositiveInt + ny: PositiveInt + nz: PositiveInt + restarted_from_step: PositiveInt + keywords: List[str] + well_names: List[str] + region_numbers: List[int] + units: List[str] + start_date: Date + lgrs: Optional[List[str]] = None + numlx: Optional[List[PositiveInt]] = None + numly: Optional[List[PositiveInt]] = None + numlz: Optional[List[PositiveInt]] = None + use_names: bool = False # whether to use the alias NAMES for WGNAMES + + def to_ecl(self) -> List[Tuple[str, Any]]: + # The restart field contains 9 strings of length 8 which + # should contain the name of the file restarted from. + # If shorter than 72 characters (most likely), the rest + # are spaces. (opm manual table F.44, keyword name RESTART) + restart = self.restart.ljust(72, " ") + restart_list = [restart[i * 8 : i * 8 + 8] for i in range(9)] + return ( + [ + ("INTEHEAD", np.array(self.intehead.to_ecl(), dtype=np.int32)), + ("RESTART ", restart_list), + ( + "DIMENS ", + np.array( + [ + self.num_keywords, + self.nx, + self.ny, + self.nz, + 0, + self.restarted_from_step, + ], + dtype=np.int32, + ), + ), + ("KEYWORDS", [kw.ljust(8) for kw in self.keywords]), + ( + ("NAMES ", self.well_names) + if self.use_names + else ("WGNAMES ", self.well_names) + ), + ("NUMS ", np.array(self.region_numbers, dtype=np.int32)), + ("UNITS ", self.units), + ("STARTDAT", np.array(self.start_date.to_ecl(), dtype=np.int32)), + ] + + ([("LGRS ", self.lgrs)] if self.lgrs is not None else []) + + ([("NUMLX ", self.numlx)] if self.numlx is not None else []) + + ([("NUMLY ", self.numly)] if self.numly is not None else []) + + ([("NUMLZ ", self.numlz)] if self.numlz is not None else []) + ) + + def to_file(self, filelike, file_format: resfo.Format = resfo.Format.UNFORMATTED): + resfo.write(filelike, self.to_ecl(), file_format) + + +positives = from_dtype(np.dtype(np.int32), min_value=1, max_value=10000) +small_ints = from_dtype(np.dtype(np.int32), min_value=1, max_value=10) + + +@st.composite +def smspecs(draw, sum_keys, start_date, use_days=None): + """ + Strategy for smspec that ensures that the TIME parameter, as required by + ert, is in the parameters list. + """ + use_days = st.booleans() if use_days is None else use_days + use_locals = draw(st.booleans()) + sum_keys = draw(sum_keys) + if any(sk.startswith("L") for sk in sum_keys): + use_locals = True + n = len(sum_keys) + 1 + nx = draw(small_ints) + ny = draw(small_ints) + nz = draw(small_ints) + keywords = ["TIME "] + sum_keys + if draw(use_days): + units = ["DAYS "] + draw( + st.lists(unit_names, min_size=n - 1, max_size=n - 1) + ) + else: + units = ["HOURS "] + draw( + st.lists(unit_names, min_size=n - 1, max_size=n - 1) + ) + well_names = [":+:+:+:+"] + draw(st.lists(names, min_size=n - 1, max_size=n - 1)) + if use_locals: # use local + lgrs = draw(st.lists(names, min_size=n, max_size=n)) + numlx = draw(st.lists(small_ints, min_size=n, max_size=n)) + numly = draw(st.lists(small_ints, min_size=n, max_size=n)) + numlz = draw(st.lists(small_ints, min_size=n, max_size=n)) + else: + lgrs = None + numlx = None + numly = None + numlz = None + region_numbers = [-32676] + draw( + st.lists( + from_dtype(np.dtype(np.int32), min_value=1, max_value=nx * ny * nz), + min_size=len(sum_keys), + max_size=len(sum_keys), + ) + ) + return draw( + st.builds( + Smspec, + nx=st.just(nx), + ny=st.just(ny), + nz=st.just(nz), + # restarted_from_step is hardcoded to 0 because + # of a bug in enkf_obs where it assumes that + # ecl_sum_get_first_report_step is always 1 + restarted_from_step=st.just(0), + num_keywords=st.just(n), + restart=names, + keywords=st.just(keywords), + well_names=st.just(well_names), + lgrs=st.just(lgrs), + numlx=st.just(numlx), + numly=st.just(numly), + numlz=st.just(numlz), + region_numbers=st.just(region_numbers), + units=st.just(units), + start_date=start_date, + use_names=st.booleans(), + ) + ) + + +@dataclass +class SummaryMiniStep: + mini_step: int + params: List[float] + + def to_ecl(self): + return [ + ("MINISTEP", np.array([self.mini_step], dtype=np.int32)), + ("PARAMS ", np.array(self.params, dtype=np.float32)), + ] + + +@dataclass +class SummaryStep: + seqnum: int + ministeps: List[SummaryMiniStep] + + def to_ecl(self): + return [("SEQHDR ", np.array([self.seqnum], dtype=np.int32))] + [ + i for ms in self.ministeps for i in ms.to_ecl() + ] + + +@dataclass +class Unsmry: + steps: List[SummaryStep] + + def to_ecl(self): + a = [i for step in self.steps for i in step.to_ecl()] + return a + + def to_file(self, filelike, file_format: resfo.Format = resfo.Format.UNFORMATTED): + resfo.write(filelike, self.to_ecl(), file_format) + + +positive_floats = from_dtype( + np.dtype(np.float32), + min_value=np.float32(0.1), + max_value=np.float32(1e19), + allow_nan=False, + allow_infinity=False, +) + + +@st.composite +def unsmrys( + draw, + num_params, + report_steps, + mini_steps, + days, +): + """ + Simplified strategy for unsmrys that will generate a summary file + with one ministep per report step. + """ + n = num_params + rs = sorted(draw(report_steps)) + ms = sorted(draw(mini_steps), reverse=True) + ds = sorted(draw(days), reverse=True) + steps = [] + for r in rs: + minis = [ + SummaryMiniStep( + ms.pop(), + [ds.pop()] + draw(st.lists(positive_floats, min_size=n, max_size=n)), + ) + ] + steps.append(SummaryStep(r, minis)) + return Unsmry(steps) + + +start_dates = st.datetimes( + min_value=datetime.strptime("1969-1-1", "%Y-%m-%d"), + max_value=datetime.strptime("2100-1-1", "%Y-%m-%d"), +) + +time_delta_lists = st.lists( + st.floats( + min_value=0.1, + max_value=25_000, # in days ~= 68.4 years + allow_nan=False, + allow_infinity=False, + ), + min_size=2, + max_size=100, +) + +summary_keys = st.lists(summary_variables(), min_size=1) + + +@st.composite +def summaries( + draw, + start_date=start_dates, + time_deltas=time_delta_lists, + summary_keys=summary_keys, + use_days=None, + report_step_only=False, +): + sum_keys = draw(summary_keys) + first_date = draw(start_date) + days = draw(use_days if use_days is not None else st.booleans()) + smspec = draw( + smspecs( + sum_keys=st.just(sum_keys), + start_date=st.just(Date.from_datetime(first_date)), + use_days=st.just(days), + ) + ) + assume( + len(set(zip(smspec.keywords, smspec.region_numbers, smspec.well_names))) + == len(smspec.keywords) + ) + dates = [0.0] + draw(time_deltas) + try: + if days: + _ = first_date + timedelta(days=max(dates)) + else: + _ = first_date + timedelta(hours=max(dates)) + except (ValueError, OverflowError): # datetime has a max year + assume(False) + + ds = sorted(dates, reverse=True) + steps = [] + i = 0 + j = 0 + while len(ds) > 0: + minis = [] + max_val = 1 if report_step_only else len(ds) + for _ in range(draw(st.integers(min_value=1, max_value=max_val))): + minis.append( + SummaryMiniStep( + i, + [ds.pop()] + + draw( + st.lists( + positive_floats, + min_size=len(sum_keys), + max_size=len(sum_keys), + ) + ), + ) + ) + i += 1 + steps.append(SummaryStep(j, minis)) + j += 1 + return smspec, Unsmry(steps) diff --git a/test_requirements.txt b/test_requirements.txt new file mode 100644 index 000000000..9b9e5f998 --- /dev/null +++ b/test_requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis