From 0a13ace87b4d3c905ff461fc573704050e0fdec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Tue, 3 Oct 2023 08:18:36 +0200 Subject: [PATCH] WIP --- src/xtgeo/common/constants.py | 4 + src/xtgeo/well/_well_io.py | 2 +- src/xtgeo/well/_welldata.py | 501 ------------------ src/xtgeo/well/well1.py | 84 +-- src/xtgeo/xyz_common/__init__.py | 3 + src/xtgeo/xyz_common/_xyz_data.py | 581 +++++++++++++++++++++ tests/test_well/test_well.py | 8 - tests/test_well/test_well_xyzdata_class.py | 192 +++++++ tests/test_well/test_welldata_class.py | 198 ------- 9 files changed, 828 insertions(+), 745 deletions(-) delete mode 100644 src/xtgeo/well/_welldata.py create mode 100644 src/xtgeo/xyz_common/__init__.py create mode 100644 src/xtgeo/xyz_common/_xyz_data.py create mode 100644 tests/test_well/test_well_xyzdata_class.py delete mode 100644 tests/test_well/test_welldata_class.py diff --git a/src/xtgeo/common/constants.py b/src/xtgeo/common/constants.py index 3bd0912b9..f22df3d7b 100644 --- a/src/xtgeo/common/constants.py +++ b/src/xtgeo/common/constants.py @@ -21,3 +21,7 @@ MAXKEYWORDS = cx.MAXKEYWORDS # maximum keywords for ECL and ROFF scanning MAXDATES = cx.MAXDATES # maximum keywords for ECL scanning + +# for XYZ data, restricted to float32 and int32 +UNDEF_CONT = UNDEF +UNDEF_DISC = UNDEF_INT diff --git a/src/xtgeo/well/_well_io.py b/src/xtgeo/well/_well_io.py index 42e798ab2..ddf333c75 100644 --- a/src/xtgeo/well/_well_io.py +++ b/src/xtgeo/well/_well_io.py @@ -95,7 +95,7 @@ def import_rms_ascii( xdict = {int(rxv[i]): rxv[i + 1] for i in range(0, len(rxv), 2)} wlogrecords[lname] = xdict else: - wlogrecords[lname] = rxv + wlogrecords[lname] = tuple(row[1:]) nlogread += 1 diff --git a/src/xtgeo/well/_welldata.py b/src/xtgeo/well/_welldata.py deleted file mode 100644 index 972231893..000000000 --- a/src/xtgeo/well/_welldata.py +++ /dev/null @@ -1,501 +0,0 @@ -"""Module for private _WellData class - - X_UTME Y_UTMN Z_TVDSS MDepth PHIT KLOGH Sw -0 463256.911 5930542.294 -49.0000 0.0000 NaN NaN NaN ... -1 463256.912 5930542.295 -48.2859 0.5000 NaN NaN NaN ... -2 463256.913 5930542.296 -47.5735 1.0000 NaN NaN NaN ... -3 463256.914 5930542.299 -46.8626 1.5000 NaN NaN NaN ... -4 463256.916 5930542.302 -46.1533 2.0000 NaN NaN NaN ... - ... ... ... ... ... ... ... - -Where each log has a wlogtypes dictionary, telling if the logs are treated -as discrete (DISC) or continuous (CONT). In addition there is a wlogrecords -dict, storing the unit for continuous logs (defaulted to None) or a dictionary -of codes if the log in DISC type. - -The 3 first columsn are the coordinates. - -The purpose here is to automate and improve; if a column is added to the dataframe, then -the class methods here will try to guess the wlogtype and wlogtype, and add those; -similarly of a column is removed, the corresponding entries in wlogtypes and wlogrecords -will be deleted. -""" -from __future__ import annotations - -import math -from dataclasses import dataclass, field -from enum import Enum, EnumMeta, unique -from typing import Any, Optional - -import numpy as np -import pandas as pd - -import xtgeo.common.constants as const -from xtgeo import XTGeoCLibError # type: ignore[attr-defined] -from xtgeo.cxtgeo import _cxtgeo - - -class _LogTypeMeta(EnumMeta): - """For enabling 'in' method, cf https://stackoverflow.com/questions/43634618""" - - def __contains__(cls, item): - try: - cls(item) # pylint: disable=E1120 - except ValueError: - return False - else: - return True - - -@unique -class _LogType(Enum, metaclass=_LogTypeMeta): - """Enumerate type of log""" - - CONT = "CONT" - DISC = "DISC" - - -CONT_DEFAULT_WLOGRECORD = ("UNKNOWN", "LINEAR") - - -@dataclass -class _WellData: - """Private class for the Well log data, where a Pandas dataframe is core. - - The data are stored in pandas dataframes, and by default, all logs are float, and - np.nan defines undefined values. Even if they are DISC. The reason for this is - restrictions in older versions of Pandas. - - All values in the dataframe shall be numbers. - - The wlogstypes is on form {"PHIT": CONT, "FACIES": DISC, ...} - - The wlogrecords is somewhat heterogeneous, on form: - {"PHIT": ("unit", "scale"), "FACIES": {0:BG, 2: "SST", 4: "CALC"}} - Hence the CONT logs hold a tuple or list with 2 str members, or None, while DISC - log holds a dict where the key is an int and the value is a string. - - Note:: - - Callers shall not use properties, but methods, e.g.:: - - instance.well = some_new_dataframe # not - - but - - instance.set_dataframe(some_new_dataframe) - """ - - data: pd.DataFrame - wlogtypes: dict = field(default_factory=dict) - wlogrecords: dict = field(default_factory=dict) - xname: str = "X_UTME" - yname: str = "Y_UTMN" - zname: str = "Z_TVDSS" - - def __post_init__(self): - self.ensure_consistency() - - def _infer_log_dtypes(self): - """Return as dict on form {"X_UTME": "CONT", .... "FACIES": "DISC"}. - - There are some important restrictions: - * The first 3 columns X Y Z) are always CONT, even if input appears as DISC. - * A check is made towards existing wlogtypes; if the key,value pair exists - already, this function will *not* force a change but keep as is. - """ - - new_df = self.data.convert_dtypes() - - dlist = new_df.dtypes.to_dict() - print(self.wlogtypes) - - datatypes = {} - for name, dtype in dlist.items(): - if name in self.wlogtypes: - datatypes[name] = self.wlogtypes[name] - if "DISC" in datatypes[name]: - datatypes[name] = _LogType.DISC.value - else: - datatypes[name] = _LogType.CONT.value - - continue - - if name in (self.xname, self.yname, self.zname): - # force coordinates, first 3 columns, to be CONT - datatypes[name] = _LogType.CONT.value - continue - - if "Float" in str(dtype): - datatypes[name] = _LogType.CONT.value - elif "Int" in str(dtype): - datatypes[name] = _LogType.DISC.value - else: - raise RuntimeError( - f"Log type seems to be something else than Float or Int for {name}" - ) - return datatypes - - def _ensure_consistency_wlogtypes(self): - """Ensure that dataframe and wlogtypes are consistent. - - wlogtypes are on form {"GR": "CONT", "ZONES": "DISC", ...} - - The column data in the dataframe takes precedence; i.e. if a column is removed - in a pandas operation, then wlogtypes are adapted silently by removing the item - from the dict. - """ - # check first if a log is removed in the dataframe (e.g. by pandas operations) - for logname in list(self.wlogtypes.keys()): - if logname not in self.data.columns[3:]: - del self.wlogtypes[logname] - - self.wlogtypes = self._infer_log_dtypes() - - def _ensure_consistency_wlogrecords(self): - """Ensure that data and wloglogrecords are consistent; cf wlogtypes. - - Important that wlogtypes are correct; i.e. run _ensure_consistency_wlogtypes() - first. - """ - for logname, dtype in self.wlogtypes.items(): - if logname not in self.wlogrecords or not isinstance( - self.wlogrecords[logname], (dict, list, tuple) - ): - if dtype == _LogType.CONT.value: - self.wlogrecords[logname] = CONT_DEFAULT_WLOGRECORD - - if dtype == _LogType.DISC.value: - # it is a discrete log with missing record; try to find - # a default one based on current values... - lvalues = self.data[logname].values.round(decimals=0) - lmin = int(lvalues.min()) - lmax = int(lvalues.max()) - - lvalues = lvalues.astype("int") - codes = {} - for lval in range(lmin, lmax + 1): - if lval in lvalues: - codes[lval] = str(lval) - - self.wlogrecords[logname] = codes - - # correct when wlogtypes is CONT but wlogrecords for that entry is a dict - if ( - logname in self.wlogrecords - and self.wlogtypes[logname] == _LogType.CONT.value - ): - if isinstance(self.wlogrecords[logname], dict): - self.wlogrecords[logname] = CONT_DEFAULT_WLOGRECORD - - def _ensure_consistency_df_dtypes(self): - """Ensure that dataframe float32 for all logs, except for X Y Z -> float64.""" - - col = list(self.data) - - coords_dtypes = [str(entry) for entry in self.data[col[0:3]].dtypes] - - if not all(["float64" in entry for entry in coords_dtypes]): - self.data[col[0:3]] = self.data.iloc[:, 0:3].astype("float64") - - logs_dtypes = [str(entry) for entry in self.data[col[3:]].dtypes] - - if not all(["float32" in entry for entry in logs_dtypes]): - self.data[col[3:]] = self.data.iloc[:, 3:].astype("float32") - - def ensure_consistency(self): - """Ensure that data and wlog* are consistent. - - This is important for many operations on the dataframe, an should keep wlogtypes - and wlogrecords 'in sync' with the dataframe. - - * When adding one or columns to the dataframe - * When removing one or more columns from the dataframe - * ... - """ - - if list(self.data.columns[:3]) != [self.xname, self.yname, self.zname]: - raise ValueError( - f"Well dataframe must include '{self.xname}', '{self.yname}' " - f"and '{self.zname}', got {list(self.data.columns[:3])}" - ) - - # order matters: - self._ensure_consistency_wlogtypes() - self._ensure_consistency_wlogrecords() - self._ensure_consistency_df_dtypes() - - def set_wlogtype(self, name: str, wtype: str) -> None: - """Set a wlogtype for a named log. - - A bit flexibility is added for wtype, e.g. allowing "float*" for CONT etc, and - allow lowercase "cont" for CONT - - """ - - apply_wtype = wtype.upper() - if "FLOAT" in apply_wtype: - apply_wtype = "CONT" - if "INT" in apply_wtype: - apply_wtype = "DISC" - - if name not in self.wlogtypes: - raise ValueError(f"No such log name present: {name}") - - if apply_wtype in _LogType: - self.wlogtypes[name] = _LogType(apply_wtype) - else: - raise ValueError( - f"Cannot set wlogtype as {wtype}, not in {list(_LogType.__members__)}" - ) - - self.ensure_consistency() - - def set_wlogrecord(self, name: str, record: dict) -> None: - """Set a wlogrecord for a named log.""" - - if name not in self.wlogtypes: - raise ValueError(f"No such logname: {name}") - - if self.wlogtypes[name] == _LogType.CONT.value and isinstance( - record, (list, tuple) - ): - if len(record) == 2: - self.wlogrecords[name] = tuple(record) # prefer as tuple - elif self.wlogtypes[name] == _LogType.CONT.value and isinstance(record, dict): - raise ValueError( - "Cannot set a log record for a continuous log: input record is " - "dictionary, not a list or tuple" - ) - elif self.wlogtypes[name] == _LogType.DISC.value and isinstance(record, dict): - self.wlogrecords[name] = record - elif self.wlogtypes[name] == _LogType.DISC.value and not isinstance( - record, dict - ): - raise ValueError( - "Input is not a dictionary. Cannot set a log record for a discrete log" - ) - else: - raise ValueError( - "Something went wrong when setting logrecord: " - f"({self.wlogtypes[name]} {type(record)})." - ) - - self.ensure_consistency() - - def get_dataframe_copy( - self, - infer_dtype: bool = False, - filled=False, - fill_value=const.UNDEF, - fill_value_int=const.UNDEF_INT, - ): - """Get a deep copy of the dataframe, with options. - - If infer_dtype is True, then DISC columns will be of "int32" type - """ - dfr = self.data.copy() - if infer_dtype: - for name, wtype in self.wlogtypes.items(): - if "DISC" in wtype: - dfr[name] = dfr[name].astype("int32") - - if filled: - dfill = {} - for lname in self.data: - if "DISC" in self.wlogtypes[lname]: - dfill[lname] = fill_value_int - else: - dfill[lname] = fill_value - - dfr = dfr.fillna(dfill) - - return dfr - - def get_dataframe(self): - """Get the dataframe.""" - return self.data - - def set_dataframe(self, dfr): - """Set the dataframe in a controlled manner, shall be used""" - # TODO: more checks, and possibly acceptance of lists, dicts? - if isinstance(dfr, pd.DataFrame): - self.data = dfr - else: - raise ValueError("Input dfr is not a pandas dataframe") - self.ensure_consistency() - - def rename_log(self, lname, newname): - """Rename a log, e.g. Poro to PORO.""" - - if lname not in list(self.data): - raise ValueError("Input log does not exist") - - if newname in list(self.data): - raise ValueError("New log name exists already") - - # rename in dataframe - self.data.rename(index=str, columns={lname: newname}, inplace=True) - - self.wlogtypes[newname] = self.wlogtypes.pop(lname) - self.wlogrecords[newname] = self.wlogrecords.pop(lname) - - self.ensure_consistency() - - def create_log( - self, lname, logtype="CONT", logrecord=None, value=0.0, force=True - ) -> bool: - """Create a new log.""" - - if lname in list(self.data) and force is False: - return False - - self.wlogtypes[lname] = logtype - self.wlogrecords[lname] = logrecord - - # make a new column - self.data[lname] = float(value) - self.ensure_consistency() - return True - - def delete_log(self, lname): - """Delete/remove an existing log, or list of logs.""" - if not isinstance(lname, list): - lname = [lname] - - lcount = 0 - for logn in lname: - if logn not in list(self.data): - continue - - lcount += 1 - self.data.drop(logn, axis=1, inplace=True) - - self.ensure_consistency() - - return lcount - - def create_relative_hlen(self): - """Make a relative length of a well, as a log.""" - # extract numpies from XYZ trajectory logs - xv = self.data[self.xname].values - yv = self.data[self.yname].values - - distance = [] - previous_x, previous_y = xv[0], yv[0] - for _, (x, y) in enumerate(zip(xv, yv)): - distance.append(math.hypot((previous_x - x), (y - previous_y))) - previous_x, previous_y = x, y - - self.data["R_HLEN"] = pd.Series(np.cumsum(distance), index=self.data.index) - self.ensure_consistency() - - def geometrics(self): - """Compute some well geometrical arrays MD, INCL, AZI, as logs. - - These are kind of quasi measurements hence the logs will named - with a Q in front as Q_MDEPTH, Q_INCL, and Q_AZI. - - These logs will be added to the dataframe. If the mdlogname - attribute does not exist in advance, it will be set to 'Q_MDEPTH'. - - Returns: - False if geometrics cannot be computed - - """ - # TODO: rewrite in pure python? - if self.data.shape[0] < 3: - raise ValueError( - f"Cannot compute geometrics. Not enough " - f"trajectory points (need >3, have: {self.data.shape[0]})" - ) - - # extract numpies from XYZ trajetory logs - ptr_xv = self._get_carray(self.xname) - ptr_yv = self._get_carray(self.yname) - ptr_zv = self._get_carray(self.zname) - - # get number of rows in pandas - nlen = len(self.data.index) - - ptr_md = _cxtgeo.new_doublearray(nlen) - ptr_incl = _cxtgeo.new_doublearray(nlen) - ptr_az = _cxtgeo.new_doublearray(nlen) - - ier = _cxtgeo.well_geometrics( - nlen, ptr_xv, ptr_yv, ptr_zv, ptr_md, ptr_incl, ptr_az, 0 - ) - - if ier != 0: - raise XTGeoCLibError(f"well_geometrics failed with error code: {ier}") - - dnumpy = self._convert_carr_double_np(ptr_md) - self.data["Q_MDEPTH"] = pd.Series(dnumpy, index=self.data.index) - - dnumpy = self._convert_carr_double_np(ptr_incl) - self.data["Q_INCL"] = pd.Series(dnumpy, index=self.data.index) - - dnumpy = self._convert_carr_double_np(ptr_az) - self.data["Q_AZI"] = pd.Series(dnumpy, index=self.data.index) - - # delete tmp pointers - _cxtgeo.delete_doublearray(ptr_xv) - _cxtgeo.delete_doublearray(ptr_yv) - _cxtgeo.delete_doublearray(ptr_zv) - _cxtgeo.delete_doublearray(ptr_md) - _cxtgeo.delete_doublearray(ptr_incl) - _cxtgeo.delete_doublearray(ptr_az) - - return True - - # ---------------------------------------------------------------------------------- - # Special methods for nerds, todo is to move to private module - # ---------------------------------------------------------------------------------- - - def _convert_np_carr_int(self, np_array): - """Convert numpy 1D array to C array, assuming int type. - - The numpy is always a double (float64), so need to convert first - """ - carr = _cxtgeo.new_intarray(len(self.data.index)) - - np_array = np_array.astype(np.int32) - - _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) - - return carr - - def _convert_np_carr_double(self, np_array): - """Convert numpy 1D array to C array, assuming double type.""" - carr = _cxtgeo.new_doublearray(len(self.data.index)) - - _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) - - return carr - - def _convert_carr_double_np(self, carray, nlen=None): - """Convert a C array to numpy, assuming double type.""" - if nlen is None: - nlen = len(self.data.index) - - nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) - - return nparray - - def _get_carray(self, lname: str) -> Optional[Any]: - """Returns the C array pointer (via SWIG) for a given log. - - Type conversion is double if float64, int32 if DISC log. - Returns None if log does not exist. - """ - if lname in self.data: - np_array = self.data[lname].values - else: - return None - - if "DISC" in self.wlogtypes[lname]: - carr = self._convert_np_carr_int(np_array) - else: - carr = self._convert_np_carr_double(np_array) - - return carr diff --git a/src/xtgeo/well/well1.py b/src/xtgeo/well/well1.py index 35e23d555..1cbb105b5 100644 --- a/src/xtgeo/well/well1.py +++ b/src/xtgeo/well/well1.py @@ -15,9 +15,9 @@ import xtgeo import xtgeo.common.constants as const import xtgeo.cxtgeo._cxtgeo as _cxtgeo -from xtgeo import XTGeoCLibError # type: ignore[attr-defined] -from . import _well_aux, _well_io, _well_oper, _well_roxapi, _welldata, _wellmarkers +from ..xyz_common import _xyz_data # type: ignore[attr-defined] +from . import _well_aux, _well_io, _well_oper, _well_roxapi, _wellmarkers xtg = xtgeo.XTGeoDialog() logger = xtg.functionlogger(__name__) @@ -194,9 +194,9 @@ def __init__( self._mdlogname = mdlogname self._zonelogname = zonelogname - self._wdata = _welldata._WellData(df, wlogtypes, wlogrecords) - self._wlogtypes = self._wdata.wlogtypes - self._wlogrecords = self._wdata.wlogrecords + self._wdata = _xyz_data._XYZData(df, wlogtypes, wlogrecords) + # self._wlogtypes = self._wdata.attr_types + # self._wlogrecords = self._wdata.attr_records self._ensure_consistency() @@ -212,8 +212,9 @@ def __repr__(self): # noqa: D105 f"{self.__class__.__name__} (rkb={self._rkb}, xpos={self._xpos}, " f"ypos={self._ypos}, wname='{self._wname}', " f"filesrc='{self._filesrc}', mdlogname='{self._mdlogname}', " - f"zonelogname='{self._zonelogname}', \nwlogtypes='{self._wlogtypes}', " - f"\nwlogrecords='{self._wlogrecords}', " + f"zonelogname='{self._zonelogname}', \n" + f"wlogtypes='{self._wdata.attr_types}', " + f"\nwlogrecords='{self._wdata.attr_records}', " f"df=\n{repr(self._wdata.data)}))" ) @@ -694,10 +695,10 @@ def get_wlogs(self) -> dict: for key in self.get_lognames(): wtype = "CONT" wrecord = None - if key in self._wlogtypes: - wtype = self._wlogtypes[key] - if key in self._wlogrecords: - wrecord = self._wlogrecords[key] + if key in self._wdata.attr_types: + wtype = self._wdata.attr_types[key] + if key in self._wdata.attr_records: + wrecord = self._wdata.attr_records[key] res[key] = [wtype, wrecord] @@ -725,14 +726,17 @@ def set_wlogs(self, wlogs: dict): typ, rec = wlogs[key] if typ in Well.VALID_LOGTYPES: - self._wlogtypes[key] = deepcopy(typ) + # self._wlogtypes[key] = deepcopy(typ) + self._wdata.set_attr_type(key, deepcopy(typ)) else: raise ValueError(f"Invalid log type found in input: {typ}") if isinstance(rec, dict): - self._wlogrecords[key] = deepcopy(rec) + self._wdata.set_attr_record(key, deepcopy(rec)) + # self._wlogrecords[key] = deepcopy(rec) elif not rec: - self._wlogrecords[key] = "" + self._wdata.set_attr_record(key, ("", "")) + # self._wlogrecords[key] = "" else: raise ValueError(f"Invalid log record found in input: {rec}") @@ -767,14 +771,14 @@ def copy(self): self._wdata.data.copy(), self.mdlogname, self.zonelogname, - deepcopy(self._wlogtypes), - deepcopy(self._wlogrecords), + deepcopy(self._wdata.attr_types), + deepcopy(self._wdata.attr_records), self._filesrc, ) def rename_log(self, lname, newname): """Rename a log, e.g. Poro to PORO.""" - self._wdata.rename_log(lname, newname) + self._wdata.rename_attr(lname, newname) if self._mdlogname == lname: self._mdlogname = newname @@ -782,18 +786,25 @@ def rename_log(self, lname, newname): if self._zonelogname == lname: self._zonelogname = newname - def create_log(self, lname, logtype="CONT", logrecord=None, value=0.0, force=True): + def create_log( + self, + lname: str, + logtype: str = "CONT", + logrecord: Optional[dict] = None, + value: float = 0.0, + force: bool = True, + ) -> bool: """Create a new log with initial values. If the logname already exists, it will be silently overwritten, unless the option force=False. Args: - lname (str): name of new log - logtype (str): Must be 'CONT' (default) or 'DISC' (discrete) - logrecord (dict): A dictionary of key: values for 'DISC' logs - value (float): initia value to set_index - force (bool): If True, and lname exists, it will be overwritten, if + lname: name of new log + logtype: Must be 'CONT' (default) or 'DISC' (discrete) + logrecord: A dictionary of key: values for 'DISC' logs + value: initial value to set + force: If True, and lname exists, it will be overwritten, if False, no new log will be made. Will return False. Returns: @@ -803,18 +814,19 @@ def create_log(self, lname, logtype="CONT", logrecord=None, value=0.0, force=Tru Note:: - A new log can also be created + A new log can also be created by adding it to the dataframe directly, but + with less control over e.g. logrecord """ - self._wdata.create_log(lname, logtype, logrecord, value, force) + return self._wdata.create_attr(lname, logtype, logrecord, value, force) - def delete_log(self, lname): + def delete_log(self, lname: Union[str, List[str]]) -> int: """Delete/remove an existing log, or list of logs. Will continue silently if a log does not exist. Args: - lname(str or list): A logname or a list of lognames + lname: A logname or a list of lognames Returns: Number of logs deleted @@ -824,30 +836,28 @@ def delete_log(self, lname): A log can also be deleted by simply removing it from the dataframe. """ - return _well_oper.delete_log(self, lname) + return self._wdata.delete_attr(lname) delete_logs = delete_log # alias function def get_logtype(self, lname) -> Optional[str]: """Returns the type of a given log (e.g. DISC or CONT), None if not present.""" - if lname in self._wdata.wlogtypes: - return self._wdata.wlogtypes[lname] + if lname in self._wdata.attr_types: + return self._wdata.attr_types[lname] return None def set_logtype(self, lname, ltype): """Sets the type of a give log (e.g. DISC or CONT).""" - self._wdata.set_wlogtype(lname, ltype) + self._wdata.set_attr_type(lname, ltype) def get_logrecord(self, lname): """Returns the record (dict) of a given log name, None if not exists.""" - if lname in self._wlogtypes: - return self._wlogrecords[lname] - return None + return self._wdata.get_attr_record(lname) def set_logrecord(self, lname, newdict): """Sets the record (dict) of a given discrete log.""" - self._wdata.set_wlogrecord(lname, newdict) + self._wdata.set_attr_record(lname, newdict) def get_logrecord_codename(self, lname, key): """Returns the name entry of a log record, for a given key. @@ -940,8 +950,8 @@ def truncate_parallel_path( if self.dataframe.shape[0] < 3 or other.dataframe.shape[0] < 3: raise ValueError( - f"Too few points to truncate parallel path, was {self._wdata.data.size} and " - f"{other._df.size}, must be >3" + f"Too few points to truncate parallel path, was " + f"{self._wdata.data.size} and {other._df.size}, must be >3" ) # extract numpies from XYZ trajectory logs diff --git a/src/xtgeo/xyz_common/__init__.py b/src/xtgeo/xyz_common/__init__.py new file mode 100644 index 000000000..a250ef494 --- /dev/null +++ b/src/xtgeo/xyz_common/__init__.py @@ -0,0 +1,3 @@ +# common low level and private modules for XYZ and Well + +from xtgeo.xyz_common._xyz_data import _XYZData, _AttrType diff --git a/src/xtgeo/xyz_common/_xyz_data.py b/src/xtgeo/xyz_common/_xyz_data.py new file mode 100644 index 000000000..9f3cad1a7 --- /dev/null +++ b/src/xtgeo/xyz_common/_xyz_data.py @@ -0,0 +1,581 @@ +"""Module for private _XYZData class. + +Note that that the design of this targets Well and general XYZ data (Points/Polygons), +hence the intentions is to let this work as a general 'engine' for dataframe'ish data +in xtgeo, at least Well, Points, Polygons. (But in the first round, it is implemented +for Wells only). Dataframes looks like: + + X_UTME Y_UTMN Z_TVDSS MDepth PHIT KLOGH Sw +0 463256.911 5930542.294 -49.0000 0.0000 NaN NaN NaN ... +1 463256.912 5930542.295 -48.2859 0.5000 NaN NaN NaN ... +2 463256.913 5930542.296 -47.5735 1.0000 NaN NaN NaN ... +3 463256.914 5930542.299 -46.8626 1.5000 NaN NaN NaN ... +4 463256.916 5930542.302 -46.1533 2.0000 NaN NaN NaN ... + ... ... ... ... ... ... ... + +Where each attr (log) has a attr_types dictionary, telling if the columns are treated +as discrete (DISC) or continuous (CONT). In addition there is a attr_records +dict, storing the unit for continuous logs/attr (defaulted to ("", "")) or a dictionary +of codes if the column if DISC type (this is optional, and perhaps only relevant for +Well data). + +The 3 first columns are the XYZ coordinates or XY coordinates + value: +X, Y, Z or X, Y, V. An optional fourth column as also possible as polygon_id. +All the rest are free 'attributes', which for wells will be well logs. Hence: + + attrtypes ~ refer to attr_types for XYZ and Well data + attrrecords ~ refer to attr_records for Well data and possibly Points/Polygons + +If a column is added to the dataframe, then the methods here will try to guess the +attr_type and attr_record, and add those; similarly of a column is removed, the +corresponding entries in attr_types and attr_records will be deleted. +""" +from __future__ import annotations + +import math +from enum import Enum, EnumMeta, unique +from typing import Any, Optional, Sequence, Union + +import numpy as np +import pandas as pd + +import xtgeo.common.constants as const +from xtgeo import XTGeoCLibError # type: ignore[attr-defined] +from xtgeo.cxtgeo import _cxtgeo + +# class _AttrTypeMeta(EnumMeta): +# """For enabling 'in' method, cf https://stackoverflow.com/questions/43634618""" + +# def __contains__(cls, item): +# try: +# cls(item) # pylint: disable=E1120 +# except ValueError: +# return False +# else: +# return True + + +@unique +class _AttrType(Enum): # (Enum, metaclass=_AttrTypeMeta): + """Enumerate type of attribute/log""" + + CONT = 1 + DISC = 2 + + +@unique +class _XYZType(Enum): # (Enum, metaclass=_AttrTypeMeta): + """Enumerate type of context""" + + POINTS = 1 + POLYGONS = 2 # ie. same here as PolyLines + WELL = 3 + + +CONT_DEFAULT_RECORD = ("", "") # unit and scale, where emptry string indicates ~unknown + + +class _XYZData: + """Private class for the XYZ and Well log data, where a Pandas dataframe is core. + + The data are stored in pandas dataframes, and by default, all columns are float, and + np.nan defines undefined values. Even if they are DISC. The reason for this is + restrictions in older versions of Pandas. + + All values in the dataframe shall be numbers. + + The attr_types is on form {"PHIT": CONT, "FACIES": DISC, ...} + + The attr_records is somewhat heterogeneous, on form: + {"PHIT": ("unit", "scale"), "FACIES": {0:BG, 2: "SST", 4: "CALC"}} + Hence the CONT logs hold a tuple or list with 2 str members, or None, while DISC + log holds a dict where the key is an int and the value is a string. + """ + + def __init__( + self, + dataframe: pd.DataFrame, + attr_types: Optional[dict] = None, + attr_records: Optional[dict] = None, + xname: str = "X_UTME", + yname: str = "Y_UTMN", + zname: str = "Z_TVDSS", + idname: Optional[str] = None, + undef: Union[float, Sequence[float, float]] = -999.0, + xyztype: str = "well", + ): + self._df = dataframe + + self._attr_types = {} + if isinstance(attr_types, dict): + for name, atype in attr_types.items(): + self._attr_types[name] = _AttrType[atype] + + self._attr_records = attr_records if attr_records is not None else {} + self._xname = xname + self._yname = yname + self._zname = zname + self._idname = idname + + # undefined data are given by a value, that may be different for cont vs disc + if isinstance(undef, list): + self._undef_disc = undef[0] + self._undef_cont = undef[1] + else: + self._undef_disc = undef + self._undef_cont = undef + + if xyztype == "well": + self._xyztype = _XYZType.WELL + + self.ensure_consistency() + + @property + def dataframe(self): + return self._df + + data = dataframe # alias + + @property + def attr_types(self): + return self._attr_types + + @property + def attr_records(self): + return self._attr_records + + @property + def xname(self): + return self._xname + + @property + def yname(self): + return self._yname + + @property + def zname(self): + return self._zname + + def _infer_attr_dtypes(self): + """Return as dict on form {"X_UTME": "CONT", .... "FACIES": "DISC"}. + + There are some important restrictions: + * The first 3 columns X Y Z) are always CONT, even if input appears as DISC. + * A check is made towards existing attr_types; if the key,value pair exists + already, this function will *not* force a change but keep as is. + """ + + new_df = self._df.convert_dtypes() + + dlist = new_df.dtypes.to_dict() + print(self._attr_types) + + datatypes = {} + for name, dtype in dlist.items(): + if name in self._attr_types: + datatypes[name] = self._attr_types[name] + + continue + + if name in (self._xname, self._yname, self._zname): + # force coordinates, first 3 columns, to be CONT + datatypes[name] = _AttrType.CONT + continue + + if "Float" in str(dtype): + datatypes[name] = _AttrType.CONT + elif "Int" in str(dtype): + datatypes[name] = _AttrType.DISC + else: + raise RuntimeError( + f"Log type seems to be something else than Float or Int for {name}" + ) + return datatypes + + def _ensure_consistency_attr_types(self): + """Ensure that dataframe and attr_types are consistent. + + attr_types are on form {"GR": "CONT", "ZONES": "DISC", ...} + + The column data in the dataframe takes precedence; i.e. if a column is removed + in a pandas operation, then attr_types are adapted silently by removing the item + from the dict. + """ + # check first if a log is removed in the dataframe (e.g. by pandas operations) + for attr_name in list(self._attr_types.keys()): + if attr_name not in self._df.columns[3:]: + del self._attr_types[attr_name] + + self._attr_types = self._infer_attr_dtypes() + + def _ensure_consistency_attr_records(self): + """Ensure that data and attr_records are consistent; cf attr_types. + + Important that attr_types are correct; i.e. run _ensure_consistency_attr_types() + first. + """ + for attr_name, dtype in self._attr_types.items(): + if attr_name not in self._attr_records or not isinstance( + self._attr_records[attr_name], (dict, list, tuple) + ): + if dtype == _AttrType.CONT.value: + self._attr_records[attr_name] = CONT_DEFAULT_RECORD + + if dtype == _AttrType.DISC.value: + # it is a discrete log with missing record; try to find + # a default one based on current values... + lvalues = self._df[attr_name].values.round(decimals=0) + lmin = int(lvalues.min()) + lmax = int(lvalues.max()) + + lvalues = lvalues.astype("int") + codes = {} + for lval in range(lmin, lmax + 1): + if lval in lvalues: + codes[lval] = str(lval) + + self._attr_records[attr_name] = codes + + # correct when attr_types is CONT but attr_records for that entry is a dict + if ( + attr_name in self._attr_records + and self._attr_types[attr_name] == _AttrType.CONT.value + ): + if isinstance(self._attr_records[attr_name], dict): + self._attr_records[attr_name] = CONT_DEFAULT_RECORD + + def _ensure_consistency_df_dtypes(self): + """Ensure that dataframe float32 for all logs, except for X Y Z -> float64.""" + + col = list(self._df) + + coords_dtypes = [str(entry) for entry in self._df[col[0:3]].dtypes] + + if not all(["float64" in entry for entry in coords_dtypes]): + self._df[col[0:3]] = self._df.iloc[:, 0:3].astype("float64") + + logs_dtypes = [str(entry) for entry in self._df[col[3:]].dtypes] + + if not all(["float32" in entry for entry in logs_dtypes]): + self._df[col[3:]] = self._df.iloc[:, 3:].astype("float32") + + for name, attr_type in self._attr_types.items(): + if attr_type == "CONT": + self._df[name] = self._df[name].replace( + self._undef_cont, np.float32(const.UNDEF_CONT) + ) + else: + self._df[name] = self._df[name].replace( + self._undef_disc, np.int32(const.UNDEF_DISC) + ) + + def ensure_consistency(self): + """Ensure that data and wlog* are consistent. + + This is important for many operations on the dataframe, an should keep + attr_types and attr_records 'in sync' with the dataframe. + + * When adding one or columns to the dataframe + * When removing one or more columns from the dataframe + * ... + """ + + if list(self._df.columns[:3]) != [self._xname, self._yname, self._zname]: + raise ValueError( + f"Dataframe must include '{self._xname}', '{self._yname}' " + f"and '{self._zname}', got {list(self._df.columns[:3])}" + ) + + # order matters: + self._ensure_consistency_attr_types() + self._ensure_consistency_attr_records() + self._ensure_consistency_df_dtypes() + + def set_attr_type(self, name: str, attrtype: str) -> None: + """Set a type (DISC, CONT) for a named attribute. + + A bit flexibility is added for attrtype, e.g. allowing "float*" for CONT + etc, and allow lowercase "cont" for CONT + + """ + + apply_attrtype = attrtype.upper() + if "FLOAT" in apply_attrtype: + apply_attrtype = "CONT" + if "INT" in apply_attrtype: + apply_attrtype = "DISC" + + if name not in self._attr_types: + raise ValueError(f"No such log name present: {name}") + + if apply_attrtype in _AttrType: + self._attr_types[name] = _AttrType(apply_attrtype) + else: + raise ValueError( + f"Cannot set wlogtype as {attrtype}, not in " + f"{list(_AttrType.__members__)}" + ) + + self.ensure_consistency() + + def get_attr_record(self, name: str): + """Get a record for a named attribute.""" + return self._attr_records[name] + + def set_attr_record(self, name: str, record: dict) -> None: + """Set a record for a named log.""" + + if name not in self._attr_types: + raise ValueError(f"No such attr_name: {name}") + + if self._attr_types[name] == _AttrType.CONT.value and isinstance( + record, (list, tuple) + ): + if len(record) == 2: + self._attr_records[name] = tuple(record) # prefer as tuple + elif self._attr_types[name] == _AttrType.CONT.value and isinstance( + record, dict + ): + raise ValueError( + "Cannot set a log record for a continuous log: input record is " + "dictionary, not a list or tuple" + ) + elif self._attr_types[name] == _AttrType.DISC.value and isinstance( + record, dict + ): + self._attr_records[name] = record + elif self._attr_types[name] == _AttrType.DISC.value and not isinstance( + record, dict + ): + raise ValueError( + "Input is not a dictionary. Cannot set a log record for a discrete log" + ) + else: + raise ValueError( + "Something went wrong when setting logrecord: " + f"({self._attr_types[name]} {type(record)})." + ) + + self.ensure_consistency() + + def get_dataframe_copy( + self, + infer_dtype: bool = False, + filled=False, + fill_value=const.UNDEF_CONT, + fill_value_int=const.UNDEF_DISC, + ): + """Get a deep copy of the dataframe, with options. + + If infer_dtype is True, then DISC columns will be of "int32" type + """ + dfr = self._df.copy() + if infer_dtype: + for name, attrtype in self._attr_types.items(): + if "DISC" in attrtype: + dfr[name] = dfr[name].astype("int32") + + if filled: + dfill = {} + for attrname in self._df: + if "DISC" in self._attr_types[attrname]: + dfill[attrname] = fill_value_int + else: + dfill[attrname] = fill_value + + dfr = dfr.fillna(dfill) + + return dfr + + def get_dataframe(self): + """Get the dataframe.""" + return self._df + + def set_dataframe(self, dfr): + """Set the dataframe in a controlled manner, shall be used""" + # TODO: more checks, and possibly acceptance of lists, dicts? + if isinstance(dfr, pd.DataFrame): + self._df = dfr + else: + raise ValueError("Input dfr is not a pandas dataframe") + self.ensure_consistency() + + def rename_attr(self, attrname, newname): + """Rename a attribute, e.g. Poro to PORO.""" + + if attrname not in list(self._df): + raise ValueError("Input log does not exist") + + if newname in list(self._df): + raise ValueError("New log name exists already") + + # rename in dataframe + self._df.rename(index=str, columns={attrname: newname}, inplace=True) + + self._attr_types[newname] = self._attr_types.pop(attrname) + self._attr_records[newname] = self._attr_records.pop(attrname) + + self.ensure_consistency() + + def create_attr( + self, attrname, attr_type="CONT", attr_record=None, value=0.0, force=True + ) -> bool: + """Create a new attribute, e.g. a log.""" + + if attrname in list(self._df) and force is False: + return False + + self._attr_types[attrname] = attr_type + self._attr_records[attrname] = attr_record + + # make a new column + self._df[attrname] = float(value) + self.ensure_consistency() + return True + + def delete_attr(self, attrname: str) -> int: + """Delete/remove an existing attribute, or list of attributes. + + Returns number of logs deleted + """ + if not isinstance(attrname, list): + attrname = [attrname] + + lcount = 0 + for logn in attrname: + if logn not in list(self._df): + continue + + lcount += 1 + self._df.drop(logn, axis=1, inplace=True) + + self.ensure_consistency() + + return lcount + + def create_relative_hlen(self): + """Make a relative length of e.g. a well, as a attribute (log).""" + # extract numpies from XYZ trajectory logs + xv = self._df[self._xname].values + yv = self._df[self._yname].values + + distance = [] + previous_x, previous_y = xv[0], yv[0] + for _, (x, y) in enumerate(zip(xv, yv)): + distance.append(math.hypot((previous_x - x), (y - previous_y))) + previous_x, previous_y = x, y + + self._df["R_HLEN"] = pd.Series(np.cumsum(distance), index=self._df.index) + self.ensure_consistency() + + def geometrics(self): + """Compute geometrical arrays MD, INCL, AZI, as attributes (logs) (~well data). + + These are kind of quasi measurements hence the attributes (logs) will named + with a Q in front as Q_MDEPTH, Q_INCL, and Q_AZI. + + These attributes will be added to the dataframe. + + TODO: If the mdlogname + attribute does not exist in advance, it will be set to 'Q_MDEPTH'. + + Returns: + False if geometrics cannot be computed + + """ + # TODO: rewrite in pure python? + if self._df.shape[0] < 3: + raise ValueError( + f"Cannot compute geometrics. Not enough " + f"trajectory points (need >3, have: {self._df.shape[0]})" + ) + + # extract numpies from XYZ trajetory logs + ptr_xv = self._get_carray(self._xname) + ptr_yv = self._get_carray(self._yname) + ptr_zv = self._get_carray(self._zname) + + # get number of rows in pandas + nlen = len(self._df.index) + + ptr_md = _cxtgeo.new_doublearray(nlen) + ptr_incl = _cxtgeo.new_doublearray(nlen) + ptr_az = _cxtgeo.new_doublearray(nlen) + + ier = _cxtgeo.well_geometrics( + nlen, ptr_xv, ptr_yv, ptr_zv, ptr_md, ptr_incl, ptr_az, 0 + ) + + if ier != 0: + raise XTGeoCLibError(f"XYZ/well_geometrics failed with error code: {ier}") + + dnumpy = self._convert_carr_double_np(ptr_md) + self._df["Q_MDEPTH"] = pd.Series(dnumpy, index=self._df.index) + + dnumpy = self._convert_carr_double_np(ptr_incl) + self._df["Q_INCL"] = pd.Series(dnumpy, index=self._df.index) + + dnumpy = self._convert_carr_double_np(ptr_az) + self._df["Q_AZI"] = pd.Series(dnumpy, index=self._df.index) + + # delete tmp pointers + _cxtgeo.delete_doublearray(ptr_xv) + _cxtgeo.delete_doublearray(ptr_yv) + _cxtgeo.delete_doublearray(ptr_zv) + _cxtgeo.delete_doublearray(ptr_md) + _cxtgeo.delete_doublearray(ptr_incl) + _cxtgeo.delete_doublearray(ptr_az) + + return True + + # ---------------------------------------------------------------------------------- + # Special methods for nerds, todo is to move to private module + # ---------------------------------------------------------------------------------- + + def _convert_np_carr_int(self, np_array): + """Convert numpy 1D array to C array, assuming int type. + + The numpy is always a double (float64), so need to convert first + """ + carr = _cxtgeo.new_intarray(len(self._df.index)) + + np_array = np_array.astype(np.int32) + + _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) + + return carr + + def _convert_np_carr_double(self, np_array): + """Convert numpy 1D array to C array, assuming double type.""" + carr = _cxtgeo.new_doublearray(len(self._df.index)) + + _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) + + return carr + + def _convert_carr_double_np(self, carray, nlen=None): + """Convert a C array to numpy, assuming double type.""" + if nlen is None: + nlen = len(self._df.index) + + nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) + + return nparray + + def _get_carray(self, attrname: str) -> Optional[Any]: + """Returns the C array pointer (via SWIG) for a given attr. + + Type conversion is double if float64, int32 if DISC attr. + Returns None if log does not exist. + """ + if attrname in self._df: + np_array = self._df[attrname].values + else: + return None + + if "DISC" in self._attr_types[attrname]: + carr = self._convert_np_carr_int(np_array) + else: + carr = self._convert_np_carr_double(np_array) + + return carr diff --git a/tests/test_well/test_well.py b/tests/test_well/test_well.py index 56b186725..2a991114a 100644 --- a/tests/test_well/test_well.py +++ b/tests/test_well/test_well.py @@ -1,7 +1,3 @@ -# -*- coding: utf-8 -*- - - -from collections import OrderedDict from os.path import join import numpy as np @@ -1249,7 +1245,3 @@ def test_get_polygons_skipname(string_to_well): polygons = well.get_polygons(skipname=True) assert "NAME" not in polygons.dataframe.columns assert polygons.name == "custom_name" - - -def test_get_fence_poly(string_to_well): - pass diff --git a/tests/test_well/test_well_xyzdata_class.py b/tests/test_well/test_well_xyzdata_class.py new file mode 100644 index 000000000..0830a467a --- /dev/null +++ b/tests/test_well/test_well_xyzdata_class.py @@ -0,0 +1,192 @@ +"""Test _XYZData class, in a Well context""" +import pandas as pd +import pytest + +from xtgeo.xyz_common import _AttrType, _XYZData + + +@pytest.fixture(name="generate_data") +def fixture_generate_data() -> pd.DataFrame: + """Make a test dataframe""" + + data = { + "X_UTME": [1.3, 2.0, 3.0, 4.0, 5.2, 6.0, 9.0], + "Y_UTMN": [11.0, 21.0, 31.0, 41.1, 51.0, 61.0, 91.0], + "Z_TVDSS": [21.0, 22.0, 23.0, 24.0, 25.3, 26.0, 29.0], + "MDEPTH": [13.0, 23.0, 33.0, 43.0, 53.2, 63.0, 93.0], + "GR": [133.0, 2234.0, -999, 1644.0, 2225.5, 6532.0, 92.0], + "FACIES": [1, -999, 3, 4, 4, 1, 1], + "ZONES": [1, 2, 3, 3, 3, 4, -999], + } + + return pd.DataFrame(data) + + +def test_well_xyzdata_initialize(generate_data: pd.DataFrame): + """Initialize data with no attr_records and attr_types given. + + The init shall than then try to infer 'best' guess""" + + instance = _XYZData(generate_data) + + assert instance.dataframe.columns[0] == instance.xname + assert instance.dataframe.columns[2] == instance.zname + + +def test_well_xyzdata_ensure_attr_types(generate_data: pd.DataFrame): + """Testing private method _ensure_attr_types""" + + instance = _XYZData(generate_data) + assert "FACIES" in instance._df.columns + + del instance.dataframe["FACIES"] + + instance._ensure_consistency_attr_types() + assert "FACIES" not in instance.dataframe.columns + + instance.dataframe["NEW"] = 1 + instance._ensure_consistency_attr_types() + assert "NEW" in instance.dataframe.columns + assert "NEW" in instance.attr_types + + +def test_infer_attr_dtypes(generate_data: pd.DataFrame): + """Testing private method _infer_log_dtypes""" + + instance = _XYZData(generate_data) + + instance._attr_types = {} # for testing, make private _attr_types empty + + res = instance._infer_attr_dtypes() + assert res["X_UTME"].name == "CONT" + assert res["FACIES"].name == "DISC" + + # next, FACIES is predefined in attr_types prior to parsing; here as CONT + # which shall 'win' in this setting + instance._attr_types = {"FACIES": _AttrType.CONT} + res = instance._infer_attr_dtypes() + assert res["X_UTME"].name == "CONT" + assert res["FACIES"].name == "CONT" + + +def test_ensure_dataframe_dtypes(generate_data: pd.DataFrame): + """Testing private method _ensure_cosistency_df_dtypes""" + + instance = _XYZData(generate_data) + + assert instance.data["FACIES"].dtype == "float32" + instance.data["FACIES"] = instance.data["FACIES"].astype("int32") + assert instance.data["FACIES"].dtype == "int32" + + instance._ensure_consistency_df_dtypes() + assert instance.data["FACIES"].dtype == "float32" + + +def test_well_xyzdata_consistency_add_column(generate_data: pd.DataFrame): + """Add column to the dataframe; check if attr_types and attr_records are updated.""" + + instance = _XYZData(generate_data) + + assert instance.attr_types == { + "X_UTME": _AttrType.CONT, + "Y_UTMN": _AttrType.CONT, + "Z_TVDSS": _AttrType.CONT, + "MDEPTH": _AttrType.CONT, + "GR": _AttrType.CONT, + "FACIES": _AttrType.DISC, + "ZONES": _AttrType.DISC, + } + + instance.data["NEW"] = 1.992 + instance.ensure_consistency() + + assert instance.attr_types == { + "X_UTME": _AttrType.CONT, + "Y_UTMN": _AttrType.CONT, + "Z_TVDSS": _AttrType.CONT, + "MDEPTH": _AttrType.CONT, + "GR": _AttrType.CONT, + "FACIES": _AttrType.DISC, + "ZONES": _AttrType.DISC, + "NEW": _AttrType.CONT, + } + + instance.data["DNEW"] = [1, -999, 3, 4, 4, 1, 1] + instance.ensure_consistency() + + assert instance.attr_types == { + "X_UTME": _AttrType.CONT, + "Y_UTMN": _AttrType.CONT, + "Z_TVDSS": _AttrType.CONT, + "MDEPTH": _AttrType.CONT, + "GR": _AttrType.CONT, + "FACIES": _AttrType.DISC, + "ZONES": _AttrType.DISC, + "NEW": _AttrType.CONT, + "DNEW": _AttrType.DISC, + } + + empty = ("", "") + + assert instance.attr_records == { + "X_UTME": empty, + "Y_UTMN": empty, + "Z_TVDSS": empty, + "MDEPTH": empty, + "GR": empty, + "FACIES": {-999: "-999", 1: "1", 3: "3", 4: "4"}, + "ZONES": {-999: "-999", 1: "1", 2: "2", 3: "3", 4: "4"}, + "NEW": empty, + "DNEW": {-999: "-999", 1: "1", 3: "3", 4: "4"}, + } + + +def test_attrtype_class(): + """Test the ENUM type _LogClass""" + + assert _AttrType.DISC.value == 2 + assert _AttrType.CONT.value == 1 + + print("YYYY", list(_AttrType)) + assert "CONT" in _AttrType + assert "DISC" in _AttrType + assert "FOO" not in _AttrType + + assert _AttrType("DISC") + assert _AttrType("CONT") + + with pytest.raises(ValueError, match="is not a valid"): + _AttrType("FOO") + + +def test_well_xyzdata_dataframe_copy(generate_data: pd.DataFrame): + """Test get dataframe method, with option""" + + instance = _XYZData(generate_data) + + copy = instance.get_dataframe_copy() + col = list(copy) + + dtypes = [str(entry) for entry in copy[col].dtypes] + assert dtypes == [ + "float64", + "float64", + "float64", + "float32", + "float32", + "float32", + "float32", + ] + + copy = instance.get_dataframe_copy(infer_dtype=True) + + dtypes = [str(entry) for entry in copy[col].dtypes] + assert dtypes == [ + "float64", + "float64", + "float64", + "float32", + "float32", + "int32", + "int32", + ] diff --git a/tests/test_well/test_welldata_class.py b/tests/test_well/test_welldata_class.py deleted file mode 100644 index c1eafd04e..000000000 --- a/tests/test_well/test_welldata_class.py +++ /dev/null @@ -1,198 +0,0 @@ -import pandas as pd -import pytest - -from xtgeo.well._welldata import _LogType, _WellData - - -@pytest.fixture(name="generate_data") -def fixture_generate_data() -> pd.DataFrame: - """Make a test dataframe""" - - data = { - "X_UTME": [1.3, 2.0, 3.0, 4.0, 5.2, 6.0, 9.0], - "Y_UTMN": [11.0, 21.0, 31.0, 41.1, 51.0, 61.0, 91.0], - "Z_TVDSS": [21.0, 22.0, 23.0, 24.0, 25.3, 26.0, 29.0], - "MDEPTH": [13.0, 23.0, 33.0, 43.0, 53.2, 63.0, 93.0], - "GR": [133.0, 2234.0, -999, 1644.0, 2225.5, 6532.0, 92.0], - "FACIES": [1, -999, 3, 4, 4, 1, 1], - "ZONES": [1, 2, 3, 3, 3, 4, -999], - } - - return pd.DataFrame(data) - - -def test_welldata_initialize(generate_data: pd.DataFrame): - """Initialize data with no wlogrecords and wlogtypes given. - - The init shall than then try to infer 'best' guess""" - - instance = _WellData(generate_data) - - assert instance.data.columns[0] == instance.xname - assert instance.data.columns[2] == instance.zname - - -def test_welldata_setters(generate_data: pd.DataFrame): - """Initialize data, and try a direct setter.""" - - instance = _WellData(generate_data) - - dfr = instance.data.copy() - with pytest.raises(AttributeError, match="Don't use direct metods"): - instance.data = dfr - - -def test_welldata_ensure_wlogtypes(generate_data: pd.DataFrame): - """Testing private method _ensure_wlogtypes""" - - instance = _WellData(generate_data) - assert "FACIES" in instance.data.columns - - del instance.data["FACIES"] - - instance._ensure_consistency_wlogtypes() - assert "FACIES" not in instance.data.columns - - instance.data["NEW"] = 1 - instance._ensure_consistency_wlogtypes() - assert "NEW" in instance.data.columns - assert "NEW" in instance.wlogtypes - - -def test_infer_log_dtypes(generate_data: pd.DataFrame): - """Testing private method _infer_log_dtypes""" - - instance = _WellData(generate_data) - - instance.wlogtypes = {} # for testing, make wlogtypes empty - - res = instance._infer_log_dtypes() - assert res["X_UTME"] == "CONT" - assert res["FACIES"] == "DISC" - - # next, FACIES is predefined in wlogtypes prior to parsing; here as CONT - # which shall 'win' in this setting - instance.wlogtypes = {"FACIES": "CONT"} - res = instance._infer_log_dtypes() - assert res["X_UTME"] == "CONT" - assert res["FACIES"] == "CONT" - - -def test_ensure_dataframe_dtypes(generate_data: pd.DataFrame): - """Testing private method _ensure_cosistency_df_dtypes""" - - instance = _WellData(generate_data) - - assert instance.data["FACIES"].dtype == "float32" - instance.data["FACIES"] = instance.data["FACIES"].astype("int32") - assert instance.data["FACIES"].dtype == "int32" - - instance._ensure_consistency_df_dtypes() - assert instance.data["FACIES"].dtype == "float32" - - -def test_welldata_consistency_add_column(generate_data: pd.DataFrame): - """Add a column to the dataframe; check if wlogtypes and wlogrecords are updated.""" - - instance = _WellData(generate_data) - - assert instance.wlogtypes == { - "X_UTME": "CONT", - "Y_UTMN": "CONT", - "Z_TVDSS": "CONT", - "MDEPTH": "CONT", - "GR": "CONT", - "FACIES": "DISC", - "ZONES": "DISC", - } - - instance.data["NEW"] = 1.992 - instance.ensure_consistency() - - assert instance.wlogtypes == { - "X_UTME": "CONT", - "Y_UTMN": "CONT", - "Z_TVDSS": "CONT", - "MDEPTH": "CONT", - "GR": "CONT", - "FACIES": "DISC", - "ZONES": "DISC", - "NEW": "CONT", - } - - instance.data["DNEW"] = [1, -999, 3, 4, 4, 1, 1] - instance.ensure_consistency() - - assert instance.wlogtypes == { - "X_UTME": "CONT", - "Y_UTMN": "CONT", - "Z_TVDSS": "CONT", - "MDEPTH": "CONT", - "GR": "CONT", - "FACIES": "DISC", - "ZONES": "DISC", - "NEW": "CONT", - "DNEW": "DISC", - } - - assert instance.wlogrecords == { - "X_UTME": None, - "Y_UTMN": None, - "Z_TVDSS": None, - "MDEPTH": None, - "GR": None, - "FACIES": {-999: "-999", 1: "1", 3: "3", 4: "4"}, - "ZONES": {-999: "-999", 1: "1", 2: "2", 3: "3", 4: "4"}, - "NEW": None, - "DNEW": {-999: "-999", 1: "1", 3: "3", 4: "4"}, - } - - -def test_logtype_class(): - """Test the ENUM type _LogClass""" - - assert _LogType.DISC.value == "DISC" - assert _LogType.CONT.value == "CONT" - - assert "CONT" in _LogType - assert "DISC" in _LogType - assert "FOO" not in _LogType - - assert _LogType("DISC") - assert _LogType("CONT") - - with pytest.raises(ValueError, match="is not a valid"): - _LogType("FOO") - - -def test_welldata_dataframe_copy(generate_data: pd.DataFrame): - """Test get dataframe method, with option""" - - instance = _WellData(generate_data) - - copy = instance.get_dataframe_copy() - col = list(copy) - - dtypes = [str(entry) for entry in copy[col].dtypes] - assert dtypes == [ - "float64", - "float64", - "float64", - "float32", - "float32", - "float32", - "float32", - ] - - copy = instance.get_dataframe_copy(infer_dtype=True) - - dtypes = [str(entry) for entry in copy[col].dtypes] - assert dtypes == [ - "float64", - "float64", - "float64", - "float32", - "float32", - "int32", - "int32", - ]