From 05a8afb347ad0502ce16569ca3595f9ea1eb82fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Wed, 4 Oct 2023 20:28:42 +0200 Subject: [PATCH] WIP again --- pyproject.toml | 19 ++++---- src/xtgeo/well/_blockedwell_roxapi.py | 8 ++-- src/xtgeo/well/_well_oper.py | 56 +++++++++++----------- src/xtgeo/well/_well_roxapi.py | 2 +- src/xtgeo/well/well1.py | 14 ++++-- src/xtgeo/xyz_common/_xyz_data.py | 43 ++++++++++++++--- tests/test_well/test_well_xyzdata_class.py | 18 +++---- 7 files changed, 99 insertions(+), 61 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eddc1d771..a856dca1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ [build-system] requires = [ - "scikit-build-core[pyproject]", - "swig", - "numpy==1.19.2; python_version == '3.8'", - "numpy==1.19.5; python_version == '3.9'", - "numpy==1.21.6; python_version == '3.10'", - "numpy==1.23.5; python_version == '3.11'", + "scikit-build-core[pyproject]", + "swig", + "numpy==1.19.2; python_version == '3.8'", + "numpy==1.19.5; python_version == '3.9'", + "numpy==1.21.6; python_version == '3.10'", + "numpy==1.23.5; python_version == '3.11'", ] build-backend = "scikit_build_core.build" @@ -22,9 +22,7 @@ description = "XTGeo is a Python library for 3D grids, surfaces, wells, etc" readme = "README.md" requires-python = ">=3.8" license = { text = "LGPL-3.0" } -authors = [ - { name = "Equinor", email = "jriv@equinor.com" }, -] +authors = [{ name = "Equinor", email = "jriv@equinor.com" }] keywords = ["grids", "surfaces", "wells", "cubes"] classifiers = [ "Development Status :: 5 - Production/Stable", @@ -50,6 +48,7 @@ dependencies = [ "ecl-data-io>=2.1", "h5py>=3", "hdf5plugin>=2.3", + "joblib", "matplotlib>=3.3", "numpy>=1.19", "pandas>=1.1", @@ -57,7 +56,7 @@ dependencies = [ "scipy>=1.5", "segyio>1.8.0", "shapely>=1.6.2", - "tables;platform_system != 'Darwin'", # TODO: update when fixed for mac + "tables;platform_system != 'Darwin'", # TODO: update when fixed for mac "typing-extensions", ] diff --git a/src/xtgeo/well/_blockedwell_roxapi.py b/src/xtgeo/well/_blockedwell_roxapi.py index f824d7ccd..039c2009f 100644 --- a/src/xtgeo/well/_blockedwell_roxapi.py +++ b/src/xtgeo/well/_blockedwell_roxapi.py @@ -112,11 +112,11 @@ def _roxapi_import_bwell( tmplog = npma.filled(tmplog, fill_value=np.nan) tmplog[tmplog == -999] = np.nan if "discrete" in str(bwprop.type): - self._wlogtypes[lname] = "DISC" - self._wlogrecords[lname] = bwprop.code_names + self.set_logtype(lname, "DISC") + self.set_logrecord(lname, bwprop.code_names) else: - self._wlogtypes[lname] = "CONT" - self._wlogrecords[lname] = None + self.set_logtype(lname, "CONT") + self.set_logrecord(lname, None) logs[lname] = tmplog diff --git a/src/xtgeo/well/_well_oper.py b/src/xtgeo/well/_well_oper.py index 81d675158..ef2312a90 100644 --- a/src/xtgeo/well/_well_oper.py +++ b/src/xtgeo/well/_well_oper.py @@ -75,8 +75,8 @@ def rescale(self, delta=0.15, tvdrange=None): dfr.reset_index(inplace=True, drop=True) for lname in dfr.columns: - if lname in self._wlogtypes: - ltype = self._wlogtypes[lname] + if lname in self.wlogtypes: + ltype = self.wlogtypes[lname] if ltype == "DISC": dfr = dfr.round({lname: 0}) @@ -143,7 +143,9 @@ def make_zone_qual_log(self, zqname): # now create the new log self.create_log(zqname, logtype="DISC", logrecord=codes) for key, val in dcode.items(): - self._df[zqname][dff["ztmp"] == key] = val + dff[zqname][dff["ztmp"] == key] = val + + self._wdata.set_dataframe(dff) # set the metadata self.set_logtype(zqname, "DISC") @@ -217,16 +219,16 @@ def _make_ijk_from_grid_v1(self, grid, grid_id=""): jcellname = "JCELL" + grid_id kcellname = "KCELL" + grid_id - self._df[icellname] = indarray - self._df[jcellname] = jndarray - self._df[kcellname] = kndarray + self._wdata.data[icellname] = indarray + self._wdata.data[jcellname] = jndarray + self._wdata.data[kcellname] = kndarray for cellname in [icellname, jcellname, kcellname]: - self._wlogtypes[cellname] = "DISC" + self.set_logtype(cellname, "DISC") - self._wlogrecords[icellname] = {ncel: str(ncel) for ncel in range(1, grid.ncol + 1)} - self._wlogrecords[jcellname] = {ncel: str(ncel) for ncel in range(1, grid.nrow + 1)} - self._wlogrecords[kcellname] = {ncel: str(ncel) for ncel in range(1, grid.nlay + 1)} + self.set_logrecord(icellname, {ncel: str(ncel) for ncel in range(1, grid.ncol + 1)}) + self.set_logrecord(jcellname, {ncel: str(ncel) for ncel in range(1, grid.nrow + 1)}) + self.set_logrecord(kcellname, {ncel: str(ncel) for ncel in range(1, grid.nlay + 1)}) _cxtgeo.delete_intarray(wivec) _cxtgeo.delete_intarray(wjvec) @@ -316,8 +318,8 @@ def get_gridproperties(self, gridprops, grid=("ICELL", "JCELL", "KCELL"), prop_i pname = prop.name + prop_id self.dataframe[pname] = arr if prop.isdiscrete: - self._wlogtypes[pname] = "DISC" - self._wlogrecords[pname] = copy.deepcopy(prop.codes) + self.set_logtype(pname, "DISC") + self.set_logrecord(pname, copy.deepcopy(prop.codes)) self._ensure_consistency() self.delete_logs(["ICELL_tmp", "JCELL_tmp", "KCELL_tmp"]) @@ -331,15 +333,15 @@ def report_zonation_holes(self, threshold=5): wellreport = [] - zlog = self._df[self.zonelogname].values.copy() + zlog = self._wdata.data[self.zonelogname].values.copy() mdlog = None if self.mdlogname: - mdlog = self._df[self.mdlogname].values + mdlog = self._wdata.data[self.mdlogname].values - xvv = self._df["X_UTME"].values - yvv = self._df["Y_UTMN"].values - zvv = self._df["Z_TVDSS"].values + xvv = self._wdata.data["X_UTME"].values + yvv = self._wdata.data["Y_UTMN"].values + zvv = self._wdata.data["Z_TVDSS"].values zlog[np.isnan(zlog)] = const.UNDEF_INT ncv = 0 @@ -417,22 +419,22 @@ def mask_shoulderbeds(self, inputlogs, targetlogs, nsamples, strict): return False for inlog in useinputs: - inseries = self._df[inlog] + inseries = self._wdata.data[inlog] if use_numeric: bseries = _get_bseries(inseries, nsamples) else: mode, value = list(nsamples.items())[0] - depth = self._df["Z_TVDSS"] + depth = self._wdata.data["Z_TVDSS"] if mode == "md" and self.mdlogname is not None: - depth = self._df[self.mdlogname] + depth = self._wdata.data[self.mdlogname] elif mode == "md" and self.mdlogname is None: raise ValueError("There is no mdlogname attribute present.") bseries = _get_bseries_by_distance(depth, inseries, value) for target in usetargets: - self._df.loc[bseries, target] = np.nan + self._wdata.data.loc[bseries, target] = np.nan logger.info("Mask shoulderbeds for some logs... done") return True @@ -443,23 +445,23 @@ def _mask_shoulderbeds_checks(self, inputlogs, targetlogs, nsamples, strict): # check that inputlogs exists and that they are discrete, and targetlogs useinputs = [] for inlog in inputlogs: - if inlog not in self._wlogtypes.keys() and strict is True: + if inlog not in self.wlogtypes.keys() and strict is True: raise ValueError(f"Input log {inlog} is missing and strict=True") - if inlog in self._wlogtypes.keys() and self._wlogtypes[inlog] != "DISC": + if inlog in self.wlogtypes.keys() and self.wlogtypes[inlog] != "DISC": raise ValueError(f"Input log {inlog} is not of type DISC") - if inlog in self._wlogtypes.keys(): + if inlog in self.wlogtypes.keys(): useinputs.append(inlog) usetargets = [] for target in targetlogs: - if target not in self._wlogtypes.keys() and strict is True: + if target not in self.wlogtypes.keys() and strict is True: raise ValueError(f"Target log {target} is missing and strict=True") - if target in self._wlogtypes.keys(): + if target in self.wlogtypes.keys(): usetargets.append(target) use_numeric = True if isinstance(nsamples, int): - maxlen = len(self._df) // 2 + maxlen = self.nrow // 2 if nsamples < 1 or nsamples > maxlen: raise ValueError(f"Keyword nsamples must be an int > 1 and < {maxlen}") elif isinstance(nsamples, dict): diff --git a/src/xtgeo/well/_well_roxapi.py b/src/xtgeo/well/_well_roxapi.py index f43edc6f4..c00c162c7 100644 --- a/src/xtgeo/well/_well_roxapi.py +++ b/src/xtgeo/well/_well_roxapi.py @@ -205,7 +205,7 @@ def _roxapi_update_well(self, rox, wname, lognames, logrun, trajectory, realisat for lname in uselognames: isdiscrete = False xtglimit = xtgeo.UNDEF_LIMIT - if self._wlogtypes[lname] == "DISC": + if self.wlogtypes[lname] == "DISC": isdiscrete = True xtglimit = xtgeo.UNDEF_INT_LIMIT diff --git a/src/xtgeo/well/well1.py b/src/xtgeo/well/well1.py index 0688589cd..a612d5d3d 100644 --- a/src/xtgeo/well/well1.py +++ b/src/xtgeo/well/well1.py @@ -381,7 +381,7 @@ def wlogtypes(self): @property def wlogrecords(self): """Returns wlogrecords""" - return self._wdata.attr_records + return deepcopy(self._wdata.attr_records) # ================================================================================== # Methods @@ -782,8 +782,8 @@ def copy(self): self._wdata.data.copy(), self.mdlogname, self.zonelogname, - deepcopy(self._wdata.attr_types), - deepcopy(self._wdata.attr_records), + self.wlogtypes, + self.wlogrecords, self._filesrc, ) @@ -884,6 +884,10 @@ def get_logrecord_codename(self, lname, key): return None + def get_dataframe(self): + """Get, by intention, a copy of the dataframe""" + return self._wdata.get_dataframe_copy(infer_dtype=False, filled=False) + def get_filled_dataframe( self, fill_value=const.UNDEF, fill_value_int=const.UNDEF_INT ): @@ -907,6 +911,10 @@ def get_filled_dataframe( fill_value_int=fill_value_int, ) + def set_dataframe(self, dfr): + """Set the dataframe.""" + self._wdata.set_dataframe(dfr) + def create_relative_hlen(self): """Make a relative length of a well, as a log. diff --git a/src/xtgeo/xyz_common/_xyz_data.py b/src/xtgeo/xyz_common/_xyz_data.py index e81f19d19..e7f8729f0 100644 --- a/src/xtgeo/xyz_common/_xyz_data.py +++ b/src/xtgeo/xyz_common/_xyz_data.py @@ -38,6 +38,7 @@ import numpy as np import pandas as pd +from joblib import hash as jhash import xtgeo.common.constants as const from xtgeo import XTGeoCLibError # type: ignore[attr-defined] @@ -118,6 +119,8 @@ def __init__( if xyztype == "well": self._xyztype = _XYZType.WELL + self._hash = ("0", "0", "0") + self.ensure_consistency() @property @@ -205,13 +208,14 @@ def _ensure_consistency_attr_records(self): first. """ for attr_name, dtype in self._attr_types.items(): + print("XXXXXZ", attr_name, dtype) if attr_name not in self._attr_records or not isinstance( self._attr_records[attr_name], (dict, list, tuple) ): - if dtype == _AttrType.CONT.value: + if dtype == _AttrType.CONT: self._attr_records[attr_name] = CONT_DEFAULT_RECORD - if dtype == _AttrType.DISC.value: + if dtype == _AttrType.DISC: # it is a discrete log with missing record; try to find # a default one based on current values... lvalues = self._df[attr_name].values.round(decimals=0) @@ -229,7 +233,7 @@ def _ensure_consistency_attr_records(self): # correct when attr_types is CONT but attr_records for that entry is a dict if ( attr_name in self._attr_records - and self._attr_types[attr_name] == _AttrType.CONT.value + and self._attr_types[attr_name] == _AttrType.CONT ): if isinstance(self._attr_records[attr_name], dict): self._attr_records[attr_name] = CONT_DEFAULT_RECORD @@ -259,7 +263,7 @@ def _ensure_consistency_df_dtypes(self): self._undef_disc, np.int32(const.UNDEF_DISC) ) - def ensure_consistency(self): + def ensure_consistency(self) -> bool: """Ensure that data and wlog* are consistent. This is important for many operations on the dataframe, an should keep @@ -268,8 +272,22 @@ def ensure_consistency(self): * When adding one or columns to the dataframe * When removing one or more columns from the dataframe * ... + + Returns True is consistency is ran, while False means that no changes have + occured, hence no consistency checks are done """ + # the purpose of this hash check is to avoid psending time on consistency + # checks if no changes + hash_proposed = ( + jhash(self._df), + jhash(self._attr_types), + jhash(self._attr_records), + ) + + if self._hash == hash_proposed: + return False + if list(self._df.columns[:3]) != [self._xname, self._yname, self._zname]: raise ValueError( f"Dataframe must include '{self._xname}', '{self._yname}' " @@ -281,6 +299,14 @@ def ensure_consistency(self): self._ensure_consistency_attr_records() self._ensure_consistency_df_dtypes() + self._hash = ( + jhash(self._df), + jhash(self._attr_types), + jhash(self._attr_records), + ) + + return True + def set_attr_type(self, name: str, attrtype: str) -> None: """Set a type (DISC, CONT) for a named attribute. @@ -356,18 +382,21 @@ def get_dataframe_copy( ): """Get a deep copy of the dataframe, with options. - If infer_dtype is True, then DISC columns will be of "int32" type + If infer_dtype is True, then DISC columns will be of "int32" type, but + since int32 do not support np.nan, the value for undefined values will be + ``fill_value_int`` """ dfr = self._df.copy() if infer_dtype: for name, attrtype in self._attr_types.items(): - if "DISC" in attrtype: + if attrtype.name == "DISC": + dfr[name] = dfr[name].fillna(fill_value_int) dfr[name] = dfr[name].astype("int32") if filled: dfill = {} for attrname in self._df: - if "DISC" in self._attr_types[attrname]: + if self._attr_types[attrname] == _AttrType.DISC: dfill[attrname] = fill_value_int else: dfill[attrname] = fill_value diff --git a/tests/test_well/test_well_xyzdata_class.py b/tests/test_well/test_well_xyzdata_class.py index 0830a467a..761ef64f6 100644 --- a/tests/test_well/test_well_xyzdata_class.py +++ b/tests/test_well/test_well_xyzdata_class.py @@ -98,7 +98,7 @@ def test_well_xyzdata_consistency_add_column(generate_data: pd.DataFrame): } instance.data["NEW"] = 1.992 - instance.ensure_consistency() + assert instance.ensure_consistency() is True assert instance.attr_types == { "X_UTME": _AttrType.CONT, @@ -112,7 +112,10 @@ def test_well_xyzdata_consistency_add_column(generate_data: pd.DataFrame): } instance.data["DNEW"] = [1, -999, 3, 4, 4, 1, 1] - instance.ensure_consistency() + assert instance.ensure_consistency() is True + + # rerun on SAME data shall not run ensure_consistency(), hence -> False + assert instance.ensure_consistency() is False assert instance.attr_types == { "X_UTME": _AttrType.CONT, @@ -128,6 +131,7 @@ def test_well_xyzdata_consistency_add_column(generate_data: pd.DataFrame): empty = ("", "") + print("XXXX", instance.attr_records) assert instance.attr_records == { "X_UTME": empty, "Y_UTMN": empty, @@ -147,13 +151,9 @@ def test_attrtype_class(): assert _AttrType.DISC.value == 2 assert _AttrType.CONT.value == 1 - print("YYYY", list(_AttrType)) - assert "CONT" in _AttrType - assert "DISC" in _AttrType - assert "FOO" not in _AttrType - - assert _AttrType("DISC") - assert _AttrType("CONT") + assert "CONT" in _AttrType.__members__ + assert "DISC" in _AttrType.__members__ + assert "FOO" not in _AttrType.__members__ with pytest.raises(ValueError, match="is not a valid"): _AttrType("FOO")