From 05a8afb347ad0502ce16569ca3595f9ea1eb82fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= <jan@rivenaes.net>
Date: Wed, 4 Oct 2023 20:28:42 +0200
Subject: [PATCH] WIP again

---
 pyproject.toml                             | 19 ++++----
 src/xtgeo/well/_blockedwell_roxapi.py      |  8 ++--
 src/xtgeo/well/_well_oper.py               | 56 +++++++++++-----------
 src/xtgeo/well/_well_roxapi.py             |  2 +-
 src/xtgeo/well/well1.py                    | 14 ++++--
 src/xtgeo/xyz_common/_xyz_data.py          | 43 ++++++++++++++---
 tests/test_well/test_well_xyzdata_class.py | 18 +++----
 7 files changed, 99 insertions(+), 61 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index eddc1d771..a856dca1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,11 +1,11 @@
 [build-system]
 requires = [
-  "scikit-build-core[pyproject]",
-  "swig",
-  "numpy==1.19.2; python_version == '3.8'",
-  "numpy==1.19.5; python_version == '3.9'",
-  "numpy==1.21.6; python_version == '3.10'",
-  "numpy==1.23.5; python_version == '3.11'",
+    "scikit-build-core[pyproject]",
+    "swig",
+    "numpy==1.19.2; python_version == '3.8'",
+    "numpy==1.19.5; python_version == '3.9'",
+    "numpy==1.21.6; python_version == '3.10'",
+    "numpy==1.23.5; python_version == '3.11'",
 ]
 build-backend = "scikit_build_core.build"
 
@@ -22,9 +22,7 @@ description = "XTGeo is a Python library for 3D grids, surfaces, wells, etc"
 readme = "README.md"
 requires-python = ">=3.8"
 license = { text = "LGPL-3.0" }
-authors = [
-    { name = "Equinor", email = "jriv@equinor.com" },
-]
+authors = [{ name = "Equinor", email = "jriv@equinor.com" }]
 keywords = ["grids", "surfaces", "wells", "cubes"]
 classifiers = [
     "Development Status :: 5 - Production/Stable",
@@ -50,6 +48,7 @@ dependencies = [
     "ecl-data-io>=2.1",
     "h5py>=3",
     "hdf5plugin>=2.3",
+    "joblib",
     "matplotlib>=3.3",
     "numpy>=1.19",
     "pandas>=1.1",
@@ -57,7 +56,7 @@ dependencies = [
     "scipy>=1.5",
     "segyio>1.8.0",
     "shapely>=1.6.2",
-    "tables;platform_system != 'Darwin'",  # TODO: update when fixed for mac
+    "tables;platform_system != 'Darwin'", # TODO: update when fixed for mac
     "typing-extensions",
 ]
 
diff --git a/src/xtgeo/well/_blockedwell_roxapi.py b/src/xtgeo/well/_blockedwell_roxapi.py
index f824d7ccd..039c2009f 100644
--- a/src/xtgeo/well/_blockedwell_roxapi.py
+++ b/src/xtgeo/well/_blockedwell_roxapi.py
@@ -112,11 +112,11 @@ def _roxapi_import_bwell(
         tmplog = npma.filled(tmplog, fill_value=np.nan)
         tmplog[tmplog == -999] = np.nan
         if "discrete" in str(bwprop.type):
-            self._wlogtypes[lname] = "DISC"
-            self._wlogrecords[lname] = bwprop.code_names
+            self.set_logtype(lname, "DISC")
+            self.set_logrecord(lname, bwprop.code_names)
         else:
-            self._wlogtypes[lname] = "CONT"
-            self._wlogrecords[lname] = None
+            self.set_logtype(lname, "CONT")
+            self.set_logrecord(lname, None)
 
         logs[lname] = tmplog
 
diff --git a/src/xtgeo/well/_well_oper.py b/src/xtgeo/well/_well_oper.py
index 81d675158..ef2312a90 100644
--- a/src/xtgeo/well/_well_oper.py
+++ b/src/xtgeo/well/_well_oper.py
@@ -75,8 +75,8 @@ def rescale(self, delta=0.15, tvdrange=None):
     dfr.reset_index(inplace=True, drop=True)
 
     for lname in dfr.columns:
-        if lname in self._wlogtypes:
-            ltype = self._wlogtypes[lname]
+        if lname in self.wlogtypes:
+            ltype = self.wlogtypes[lname]
             if ltype == "DISC":
                 dfr = dfr.round({lname: 0})
 
@@ -143,7 +143,9 @@ def make_zone_qual_log(self, zqname):
     # now create the new log
     self.create_log(zqname, logtype="DISC", logrecord=codes)
     for key, val in dcode.items():
-        self._df[zqname][dff["ztmp"] == key] = val
+        dff[zqname][dff["ztmp"] == key] = val
+
+    self._wdata.set_dataframe(dff)
 
     # set the metadata
     self.set_logtype(zqname, "DISC")
@@ -217,16 +219,16 @@ def _make_ijk_from_grid_v1(self, grid, grid_id=""):
     jcellname = "JCELL" + grid_id
     kcellname = "KCELL" + grid_id
 
-    self._df[icellname] = indarray
-    self._df[jcellname] = jndarray
-    self._df[kcellname] = kndarray
+    self._wdata.data[icellname] = indarray
+    self._wdata.data[jcellname] = jndarray
+    self._wdata.data[kcellname] = kndarray
 
     for cellname in [icellname, jcellname, kcellname]:
-        self._wlogtypes[cellname] = "DISC"
+        self.set_logtype(cellname, "DISC")
 
-    self._wlogrecords[icellname] = {ncel: str(ncel) for ncel in range(1, grid.ncol + 1)}
-    self._wlogrecords[jcellname] = {ncel: str(ncel) for ncel in range(1, grid.nrow + 1)}
-    self._wlogrecords[kcellname] = {ncel: str(ncel) for ncel in range(1, grid.nlay + 1)}
+    self.set_logrecord(icellname, {ncel: str(ncel) for ncel in range(1, grid.ncol + 1)})
+    self.set_logrecord(jcellname, {ncel: str(ncel) for ncel in range(1, grid.nrow + 1)})
+    self.set_logrecord(kcellname, {ncel: str(ncel) for ncel in range(1, grid.nlay + 1)})
 
     _cxtgeo.delete_intarray(wivec)
     _cxtgeo.delete_intarray(wjvec)
@@ -316,8 +318,8 @@ def get_gridproperties(self, gridprops, grid=("ICELL", "JCELL", "KCELL"), prop_i
         pname = prop.name + prop_id
         self.dataframe[pname] = arr
         if prop.isdiscrete:
-            self._wlogtypes[pname] = "DISC"
-            self._wlogrecords[pname] = copy.deepcopy(prop.codes)
+            self.set_logtype(pname, "DISC")
+            self.set_logrecord(pname, copy.deepcopy(prop.codes))
     self._ensure_consistency()
     self.delete_logs(["ICELL_tmp", "JCELL_tmp", "KCELL_tmp"])
 
@@ -331,15 +333,15 @@ def report_zonation_holes(self, threshold=5):
 
     wellreport = []
 
-    zlog = self._df[self.zonelogname].values.copy()
+    zlog = self._wdata.data[self.zonelogname].values.copy()
 
     mdlog = None
     if self.mdlogname:
-        mdlog = self._df[self.mdlogname].values
+        mdlog = self._wdata.data[self.mdlogname].values
 
-    xvv = self._df["X_UTME"].values
-    yvv = self._df["Y_UTMN"].values
-    zvv = self._df["Z_TVDSS"].values
+    xvv = self._wdata.data["X_UTME"].values
+    yvv = self._wdata.data["Y_UTMN"].values
+    zvv = self._wdata.data["Z_TVDSS"].values
     zlog[np.isnan(zlog)] = const.UNDEF_INT
 
     ncv = 0
@@ -417,22 +419,22 @@ def mask_shoulderbeds(self, inputlogs, targetlogs, nsamples, strict):
         return False
 
     for inlog in useinputs:
-        inseries = self._df[inlog]
+        inseries = self._wdata.data[inlog]
         if use_numeric:
             bseries = _get_bseries(inseries, nsamples)
         else:
             mode, value = list(nsamples.items())[0]
 
-            depth = self._df["Z_TVDSS"]
+            depth = self._wdata.data["Z_TVDSS"]
             if mode == "md" and self.mdlogname is not None:
-                depth = self._df[self.mdlogname]
+                depth = self._wdata.data[self.mdlogname]
             elif mode == "md" and self.mdlogname is None:
                 raise ValueError("There is no mdlogname attribute present.")
 
             bseries = _get_bseries_by_distance(depth, inseries, value)
 
         for target in usetargets:
-            self._df.loc[bseries, target] = np.nan
+            self._wdata.data.loc[bseries, target] = np.nan
 
     logger.info("Mask shoulderbeds for some logs... done")
     return True
@@ -443,23 +445,23 @@ def _mask_shoulderbeds_checks(self, inputlogs, targetlogs, nsamples, strict):
     # check that inputlogs exists and that they are discrete, and targetlogs
     useinputs = []
     for inlog in inputlogs:
-        if inlog not in self._wlogtypes.keys() and strict is True:
+        if inlog not in self.wlogtypes.keys() and strict is True:
             raise ValueError(f"Input log {inlog} is missing and strict=True")
-        if inlog in self._wlogtypes.keys() and self._wlogtypes[inlog] != "DISC":
+        if inlog in self.wlogtypes.keys() and self.wlogtypes[inlog] != "DISC":
             raise ValueError(f"Input log {inlog} is not of type DISC")
-        if inlog in self._wlogtypes.keys():
+        if inlog in self.wlogtypes.keys():
             useinputs.append(inlog)
 
     usetargets = []
     for target in targetlogs:
-        if target not in self._wlogtypes.keys() and strict is True:
+        if target not in self.wlogtypes.keys() and strict is True:
             raise ValueError(f"Target log {target} is missing and strict=True")
-        if target in self._wlogtypes.keys():
+        if target in self.wlogtypes.keys():
             usetargets.append(target)
 
     use_numeric = True
     if isinstance(nsamples, int):
-        maxlen = len(self._df) // 2
+        maxlen = self.nrow // 2
         if nsamples < 1 or nsamples > maxlen:
             raise ValueError(f"Keyword nsamples must be an int > 1 and < {maxlen}")
     elif isinstance(nsamples, dict):
diff --git a/src/xtgeo/well/_well_roxapi.py b/src/xtgeo/well/_well_roxapi.py
index f43edc6f4..c00c162c7 100644
--- a/src/xtgeo/well/_well_roxapi.py
+++ b/src/xtgeo/well/_well_roxapi.py
@@ -205,7 +205,7 @@ def _roxapi_update_well(self, rox, wname, lognames, logrun, trajectory, realisat
     for lname in uselognames:
         isdiscrete = False
         xtglimit = xtgeo.UNDEF_LIMIT
-        if self._wlogtypes[lname] == "DISC":
+        if self.wlogtypes[lname] == "DISC":
             isdiscrete = True
             xtglimit = xtgeo.UNDEF_INT_LIMIT
 
diff --git a/src/xtgeo/well/well1.py b/src/xtgeo/well/well1.py
index 0688589cd..a612d5d3d 100644
--- a/src/xtgeo/well/well1.py
+++ b/src/xtgeo/well/well1.py
@@ -381,7 +381,7 @@ def wlogtypes(self):
     @property
     def wlogrecords(self):
         """Returns wlogrecords"""
-        return self._wdata.attr_records
+        return deepcopy(self._wdata.attr_records)
 
     # ==================================================================================
     # Methods
@@ -782,8 +782,8 @@ def copy(self):
             self._wdata.data.copy(),
             self.mdlogname,
             self.zonelogname,
-            deepcopy(self._wdata.attr_types),
-            deepcopy(self._wdata.attr_records),
+            self.wlogtypes,
+            self.wlogrecords,
             self._filesrc,
         )
 
@@ -884,6 +884,10 @@ def get_logrecord_codename(self, lname, key):
 
         return None
 
+    def get_dataframe(self):
+        """Get, by intention, a copy of the dataframe"""
+        return self._wdata.get_dataframe_copy(infer_dtype=False, filled=False)
+
     def get_filled_dataframe(
         self, fill_value=const.UNDEF, fill_value_int=const.UNDEF_INT
     ):
@@ -907,6 +911,10 @@ def get_filled_dataframe(
             fill_value_int=fill_value_int,
         )
 
+    def set_dataframe(self, dfr):
+        """Set the dataframe."""
+        self._wdata.set_dataframe(dfr)
+
     def create_relative_hlen(self):
         """Make a relative length of a well, as a log.
 
diff --git a/src/xtgeo/xyz_common/_xyz_data.py b/src/xtgeo/xyz_common/_xyz_data.py
index e81f19d19..e7f8729f0 100644
--- a/src/xtgeo/xyz_common/_xyz_data.py
+++ b/src/xtgeo/xyz_common/_xyz_data.py
@@ -38,6 +38,7 @@
 
 import numpy as np
 import pandas as pd
+from joblib import hash as jhash
 
 import xtgeo.common.constants as const
 from xtgeo import XTGeoCLibError  # type: ignore[attr-defined]
@@ -118,6 +119,8 @@ def __init__(
         if xyztype == "well":
             self._xyztype = _XYZType.WELL
 
+        self._hash = ("0", "0", "0")
+
         self.ensure_consistency()
 
     @property
@@ -205,13 +208,14 @@ def _ensure_consistency_attr_records(self):
         first.
         """
         for attr_name, dtype in self._attr_types.items():
+            print("XXXXXZ", attr_name, dtype)
             if attr_name not in self._attr_records or not isinstance(
                 self._attr_records[attr_name], (dict, list, tuple)
             ):
-                if dtype == _AttrType.CONT.value:
+                if dtype == _AttrType.CONT:
                     self._attr_records[attr_name] = CONT_DEFAULT_RECORD
 
-                if dtype == _AttrType.DISC.value:
+                if dtype == _AttrType.DISC:
                     # it is a discrete log with missing record; try to find
                     # a default one based on current values...
                     lvalues = self._df[attr_name].values.round(decimals=0)
@@ -229,7 +233,7 @@ def _ensure_consistency_attr_records(self):
             # correct when attr_types is CONT but attr_records for that entry is a dict
             if (
                 attr_name in self._attr_records
-                and self._attr_types[attr_name] == _AttrType.CONT.value
+                and self._attr_types[attr_name] == _AttrType.CONT
             ):
                 if isinstance(self._attr_records[attr_name], dict):
                     self._attr_records[attr_name] = CONT_DEFAULT_RECORD
@@ -259,7 +263,7 @@ def _ensure_consistency_df_dtypes(self):
                     self._undef_disc, np.int32(const.UNDEF_DISC)
                 )
 
-    def ensure_consistency(self):
+    def ensure_consistency(self) -> bool:
         """Ensure that data and wlog* are consistent.
 
         This is important for many operations on the dataframe, an should keep
@@ -268,8 +272,22 @@ def ensure_consistency(self):
         * When adding one or columns to the dataframe
         * When removing one or more columns from the dataframe
         * ...
+
+        Returns True is consistency is ran, while False means that no changes have
+        occured, hence no consistency checks are done
         """
 
+        # the purpose of this hash check is to avoid psending time on consistency
+        # checks if no changes
+        hash_proposed = (
+            jhash(self._df),
+            jhash(self._attr_types),
+            jhash(self._attr_records),
+        )
+
+        if self._hash == hash_proposed:
+            return False
+
         if list(self._df.columns[:3]) != [self._xname, self._yname, self._zname]:
             raise ValueError(
                 f"Dataframe must include '{self._xname}', '{self._yname}' "
@@ -281,6 +299,14 @@ def ensure_consistency(self):
         self._ensure_consistency_attr_records()
         self._ensure_consistency_df_dtypes()
 
+        self._hash = (
+            jhash(self._df),
+            jhash(self._attr_types),
+            jhash(self._attr_records),
+        )
+
+        return True
+
     def set_attr_type(self, name: str, attrtype: str) -> None:
         """Set a type (DISC, CONT) for a named attribute.
 
@@ -356,18 +382,21 @@ def get_dataframe_copy(
     ):
         """Get a deep copy of the dataframe, with options.
 
-        If infer_dtype is True, then DISC columns will be of "int32" type
+        If infer_dtype is True, then DISC columns will be of "int32" type, but
+        since int32 do not support np.nan, the value for undefined values will be
+        ``fill_value_int``
         """
         dfr = self._df.copy()
         if infer_dtype:
             for name, attrtype in self._attr_types.items():
-                if "DISC" in attrtype:
+                if attrtype.name == "DISC":
+                    dfr[name] = dfr[name].fillna(fill_value_int)
                     dfr[name] = dfr[name].astype("int32")
 
         if filled:
             dfill = {}
             for attrname in self._df:
-                if "DISC" in self._attr_types[attrname]:
+                if self._attr_types[attrname] == _AttrType.DISC:
                     dfill[attrname] = fill_value_int
                 else:
                     dfill[attrname] = fill_value
diff --git a/tests/test_well/test_well_xyzdata_class.py b/tests/test_well/test_well_xyzdata_class.py
index 0830a467a..761ef64f6 100644
--- a/tests/test_well/test_well_xyzdata_class.py
+++ b/tests/test_well/test_well_xyzdata_class.py
@@ -98,7 +98,7 @@ def test_well_xyzdata_consistency_add_column(generate_data: pd.DataFrame):
     }
 
     instance.data["NEW"] = 1.992
-    instance.ensure_consistency()
+    assert instance.ensure_consistency() is True
 
     assert instance.attr_types == {
         "X_UTME": _AttrType.CONT,
@@ -112,7 +112,10 @@ def test_well_xyzdata_consistency_add_column(generate_data: pd.DataFrame):
     }
 
     instance.data["DNEW"] = [1, -999, 3, 4, 4, 1, 1]
-    instance.ensure_consistency()
+    assert instance.ensure_consistency() is True
+
+    # rerun on SAME data shall not run ensure_consistency(), hence -> False
+    assert instance.ensure_consistency() is False
 
     assert instance.attr_types == {
         "X_UTME": _AttrType.CONT,
@@ -128,6 +131,7 @@ def test_well_xyzdata_consistency_add_column(generate_data: pd.DataFrame):
 
     empty = ("", "")
 
+    print("XXXX", instance.attr_records)
     assert instance.attr_records == {
         "X_UTME": empty,
         "Y_UTMN": empty,
@@ -147,13 +151,9 @@ def test_attrtype_class():
     assert _AttrType.DISC.value == 2
     assert _AttrType.CONT.value == 1
 
-    print("YYYY", list(_AttrType))
-    assert "CONT" in _AttrType
-    assert "DISC" in _AttrType
-    assert "FOO" not in _AttrType
-
-    assert _AttrType("DISC")
-    assert _AttrType("CONT")
+    assert "CONT" in _AttrType.__members__
+    assert "DISC" in _AttrType.__members__
+    assert "FOO" not in _AttrType.__members__
 
     with pytest.raises(ValueError, match="is not a valid"):
         _AttrType("FOO")