diff --git a/pyproject.toml b/pyproject.toml index 839cb3419..45664f176 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ [build-system] requires = [ - "scikit-build-core[pyproject]", - "swig", - "numpy==1.19.2; python_version == '3.8'", - "numpy==1.19.5; python_version == '3.9'", - "numpy==1.21.6; python_version == '3.10'", - "numpy==1.23.5; python_version == '3.11'", + "scikit-build-core[pyproject]", + "swig", + "numpy==1.19.2; python_version == '3.8'", + "numpy==1.19.5; python_version == '3.9'", + "numpy==1.21.6; python_version == '3.10'", + "numpy==1.23.5; python_version == '3.11'", ] build-backend = "scikit_build_core.build" @@ -22,9 +22,7 @@ description = "XTGeo is a Python library for 3D grids, surfaces, wells, etc" readme = "README.md" requires-python = ">=3.8" license = { text = "LGPL-3.0" } -authors = [ - { name = "Equinor", email = "jriv@equinor.com" }, -] +authors = [{ name = "Equinor", email = "jriv@equinor.com" }] keywords = ["grids", "surfaces", "wells", "cubes"] classifiers = [ "Development Status :: 5 - Production/Stable", @@ -50,6 +48,7 @@ dependencies = [ "ecl-data-io>=2.1", "h5py>=3", "hdf5plugin>=2.3", + "joblib", "matplotlib>=3.3", "numpy>=1.19", "pandas>=1.1", diff --git a/src/xtgeo/__init__.py b/src/xtgeo/__init__.py index 603784961..c2a092c46 100644 --- a/src/xtgeo/__init__.py +++ b/src/xtgeo/__init__.py @@ -79,6 +79,7 @@ def _xprint(msg): # _xprint("Import matplotlib etc...DONE") +from xtgeo.common import XTGeoDialog from xtgeo.common.constants import UNDEF, UNDEF_INT, UNDEF_INT_LIMIT, UNDEF_LIMIT from xtgeo.common.exceptions import ( BlockedWellsNotFoundError, @@ -89,7 +90,6 @@ def _xprint(msg): WellNotFoundError, ) from xtgeo.common.sys import _XTGeoFile -from xtgeo.common.xtgeo_dialog import XTGeoDialog from xtgeo.cxtgeo._cxtgeo import XTGeoCLibError _xprint("Import common... done") diff --git a/src/xtgeo/common/constants.py b/src/xtgeo/common/constants.py index 3bd0912b9..f22df3d7b 100644 --- a/src/xtgeo/common/constants.py +++ b/src/xtgeo/common/constants.py @@ -21,3 +21,7 @@ MAXKEYWORDS = cx.MAXKEYWORDS # maximum keywords for ECL and ROFF scanning MAXDATES = cx.MAXDATES # maximum keywords for ECL scanning + +# for XYZ data, restricted to float32 and int32 +UNDEF_CONT = UNDEF +UNDEF_DISC = UNDEF_INT diff --git a/src/xtgeo/common/sys.py b/src/xtgeo/common/sys.py index b0277cee0..eb2354109 100644 --- a/src/xtgeo/common/sys.py +++ b/src/xtgeo/common/sys.py @@ -687,3 +687,62 @@ def decorator_set_docstring(func): return func return decorator_set_docstring + + +# ---------------------------------------------------------------------------------- +# Special methods for nerds, to be removed when not appplied any more +# ---------------------------------------------------------------------------------- + + +def _convert_np_carr_int(length, np_array): + """Convert numpy 1D array to C array, assuming int type. + + The numpy is always a double (float64), so need to convert first + """ + carr = _cxtgeo.new_intarray(length) + + np_array = np_array.astype(np.int32) + + _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) + + return carr + + +def _convert_np_carr_double(length, np_array): + """Convert numpy 1D array to C array, assuming double type.""" + carr = _cxtgeo.new_doublearray(length) + + _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) + + return carr + + +def _convert_carr_double_np(length, carray, nlen=None): + """Convert a C array to numpy, assuming double type.""" + if nlen is None: + nlen = length + + nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) + + return nparray + + +def _get_carray(dataframe, attributes, attrname: str): + """Returns the C array pointer (via SWIG) for a given attr. + + Type conversion is double if float64, int32 if DISC attr. + Returns None if log does not exist. + """ + np_array = None + if attrname in dataframe: + np_array = dataframe[attrname].values + else: + return None + + nlen = len(dataframe.index) + if attributes[attrname] == "DISC": + carr = _convert_np_carr_int(nlen, np_array) + else: + carr = _convert_np_carr_double(nlen, np_array) + + return carr diff --git a/src/xtgeo/grid3d/_grid_etc1.py b/src/xtgeo/grid3d/_grid_etc1.py index ec6ade8a7..2fb938572 100644 --- a/src/xtgeo/grid3d/_grid_etc1.py +++ b/src/xtgeo/grid3d/_grid_etc1.py @@ -839,7 +839,8 @@ def _get_geometrics_v2(self, allcells=False, cellcenter=True, return_dict=False) y1 = ycor.values[self.ncol - 1, midcol, midlay] glist.append(degrees(atan2(y1 - y0, x1 - x0))) - dx, dy = self.get_dxdy(asmasked=False) + dx = self.get_dx(asmasked=False) + dy = self.get_dy(asmasked=False) dz = self.get_dz(asmasked=False) glist.append(dx.values.mean()) glist.append(dy.values.mean()) diff --git a/src/xtgeo/grid3d/_grid_wellzone.py b/src/xtgeo/grid3d/_grid_wellzone.py index fa9c45ce4..904d9fefd 100644 --- a/src/xtgeo/grid3d/_grid_wellzone.py +++ b/src/xtgeo/grid3d/_grid_wellzone.py @@ -55,17 +55,19 @@ def report_zone_mismatch( # get the IJK along the well as logs; use a copy of the well instance wll = well.copy() - wll._df[zonelogname] += zonelogshift + wll.dataframe[zonelogname] += zonelogshift if depthrange: d1, d2 = depthrange - wll._df = wll._df[(d1 < wll._df.Z_TVDSS) & (wll._df.Z_TVDSS < d2)] + wll.dataframe = wll.dataframe[ + (d1 < wll.dataframe.Z_TVDSS) & (wll.dataframe.Z_TVDSS < d2) + ] wll.get_gridproperties(zoneprop, self) zmodel = zoneprop.name + "_model" # from here, work with the dataframe only - df = wll._df + df = wll.dataframe # zonelogrange z1, z2 = zonelogrange diff --git a/src/xtgeo/well/_blockedwell_roxapi.py b/src/xtgeo/well/_blockedwell_roxapi.py index f824d7ccd..2336a9ca5 100644 --- a/src/xtgeo/well/_blockedwell_roxapi.py +++ b/src/xtgeo/well/_blockedwell_roxapi.py @@ -1,8 +1,4 @@ -# -*- coding: utf-8 -*- -"""Well input and output, private module for ROXAPI""" - - -from collections import OrderedDict +"""Blocked Well input and output, private module for ROXAPI""" import numpy as np import numpy.ma as npma @@ -12,8 +8,10 @@ from xtgeo.common.exceptions import WellNotFoundError from xtgeo.roxutils import RoxUtils +from ..xyz_common._xyz_enum import _AttrName + try: - import roxar + import roxar # type: ignore except ImportError: pass @@ -21,7 +19,7 @@ logger = xtg.functionlogger(__name__) -# Import / export via ROX api +# Import / export via ROX/RMS api def import_bwell_roxapi( @@ -85,17 +83,18 @@ def _roxapi_import_bwell( cind = bw_cellindices[dind] xyz = np.transpose(gmodel.get_grid(realisation=realisation).get_cell_centers(cind)) - logs = OrderedDict() - logs["X_UTME"] = xyz[0].astype(np.float64) - logs["Y_UTMN"] = xyz[1].astype(np.float64) - logs["Z_TVDSS"] = xyz[2].astype(np.float64) + logs = {} + logs[_AttrName.XNAME.value] = xyz[0].astype(np.float64) + logs[_AttrName.YNAME.value] = xyz[1].astype(np.float64) + logs[_AttrName.ZNAME.value] = xyz[2].astype(np.float64) + if ijk: ijk = np.transpose( gmodel.get_grid(realisation=realisation).grid_indexer.get_indices(cind) ) - logs["I_INDEX"] = ijk[0].astype(np.float64) - logs["J_INDEX"] = ijk[1].astype(np.float64) - logs["K_INDEX"] = ijk[2].astype(np.float64) + logs[_AttrName.I_INDEX.value] = ijk[0].astype(np.float64) + logs[_AttrName.J_INDEX.value] = ijk[1].astype(np.float64) + logs[_AttrName.K_INDEX.value] = ijk[2].astype(np.float64) usenames = [] if lognames and lognames == "all": @@ -112,11 +111,11 @@ def _roxapi_import_bwell( tmplog = npma.filled(tmplog, fill_value=np.nan) tmplog[tmplog == -999] = np.nan if "discrete" in str(bwprop.type): - self._wlogtypes[lname] = "DISC" - self._wlogrecords[lname] = bwprop.code_names + self.set_logtype(lname, "DISC") + self.set_logrecord(lname, bwprop.code_names) else: - self._wlogtypes[lname] = "CONT" - self._wlogrecords[lname] = None + self.set_logtype(lname, "CONT") + self.set_logrecord(lname, None) logs[lname] = tmplog @@ -165,7 +164,15 @@ def _roxapi_export_bwell( dind = bwset.get_data_indices([self._wname], realisation=realisation) for lname in self.lognames: - if not ijk and "_INDEX" in lname: + if ( + not ijk + and any( + _AttrName.I_INDEX.value, + _AttrName.J_INDEX.value, + _AttrName.K_INDEX.value, + ) + in lname + ): continue if lognames != "all" and lname not in lognames: diff --git a/src/xtgeo/well/_well_aux.py b/src/xtgeo/well/_well_aux.py new file mode 100644 index 000000000..0483fb194 --- /dev/null +++ b/src/xtgeo/well/_well_aux.py @@ -0,0 +1,96 @@ +"""Auxillary functions for the well class + +'self' is a Well() instance + +""" + +from __future__ import annotations + +import functools +import warnings +from pathlib import Path +from typing import Callable, Optional + +import pandas as pd + +import xtgeo +import xtgeo.cxtgeo._cxtgeo as _cxtgeo # type: ignore +from xtgeo.common import XTGeoDialog + +from ..xyz_common._xyz_enum import _AttrName +from . import _well_io + +xtg = XTGeoDialog() +logger = xtg.functionlogger(__name__) + + +def _data_reader_factory(file_format: Optional[str] = None): + if file_format in ["rmswell", "irap_ascii", None]: + return _well_io.import_rms_ascii + if file_format == "hdf": + return _well_io.import_hdf5_well + raise ValueError( + f"Unknown file format {file_format}, supported formats are " + "'rmswell', 'irap_ascii' and 'hdf'" + ) + + +def allow_deprecated_init(func: Callable): + # This decorator is here to maintain backwards compatibility in the + # construction of Well and should be deleted once the deprecation period + # has expired, the construction will then follow the new pattern. + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + if not args and not kwargs: + warnings.warn( + "Initializing empty well is deprecated, please provide " + "non-defaulted values, or use mywell = " + "xtgeo.well_from_file('filename')", + DeprecationWarning, + ) + return func( + self, + *([0.0] * 3), + "", + pd.DataFrame( + { + _AttrName.XNAME.value: [], + _AttrName.YNAME.value: [], + _AttrName.ZNAME.value: [], + } + ), + ) + + # Checking if we are doing an initialization from file and raise a + # deprecation warning if we are. + if "wfile" in kwargs or ( + len(args) >= 1 and isinstance(args[0], (str, Path, xtgeo._XTGeoFile)) + ): + warnings.warn( + "Initializing directly from file name is deprecated and will be " + "removed in xtgeo version 4.0. Use: " + "mywell = xtgeo.well_from_file('filename') instead", + DeprecationWarning, + ) + if len(args) >= 1: + wfile = args[0] + args = args[1:] + else: + wfile = kwargs.pop("wfile", None) + if len(args) >= 1: + fformat = args[0] + args = args[1:] + else: + fformat = kwargs.pop("fformat", None) + + mfile = xtgeo._XTGeoFile(wfile) + if fformat is None or fformat == "guess": + fformat = mfile.detect_fformat() + else: + fformat = mfile.generic_format_by_proposal(fformat) + kwargs = _data_reader_factory(fformat)(mfile, *args, **kwargs) + kwargs["filesrc"] = mfile.file + return func(self, **kwargs) + return func(self, *args, **kwargs) + + return wrapper diff --git a/src/xtgeo/well/_well_io.py b/src/xtgeo/well/_well_io.py index 45b87a8bc..ea9ca35c6 100644 --- a/src/xtgeo/well/_well_io.py +++ b/src/xtgeo/well/_well_io.py @@ -10,6 +10,8 @@ import xtgeo from xtgeo.common import XTGeoDialog +from ..xyz_common._xyz_enum import _AttrName + xtg = XTGeoDialog() logger = xtg.functionlogger(__name__) @@ -28,11 +30,15 @@ def import_rms_ascii( wlogtype = dict() wlogrecords = dict() - xlognames_all = ["X_UTME", "Y_UTMN", "Z_TVDSS"] + xlognames_all = [ + _AttrName.XNAME.value, + _AttrName.YNAME.value, + _AttrName.ZNAME.value, + ] xlognames = [] lnum = 1 - with open(wfile.file, "r") as fwell: + with open(wfile.file, "r", encoding="UTF-8") as fwell: for line in fwell: if lnum == 1: _ffver = line.strip() # noqa, file version @@ -95,7 +101,7 @@ def import_rms_ascii( xdict = {int(rxv[i]): rxv[i + 1] for i in range(0, len(rxv), 2)} wlogrecords[lname] = xdict else: - wlogrecords[lname] = rxv + wlogrecords[lname] = tuple(row[1:]) nlogread += 1 @@ -142,7 +148,7 @@ def _trim_on_lognames(dfr, lognames, lognames_strict, wname): if lognames == "all": return dfr - uselnames = ["X_UTME", "Y_UTMN", "Z_TVDSS"] + uselnames = [_AttrName.XNAME.value, _AttrName.YNAME.value, _AttrName.ZNAME.value] if isinstance(lognames, str): uselnames.append(lognames) elif isinstance(lognames, list): @@ -198,7 +204,7 @@ def _check_special_logs(dfr, mdlogname, zonelogname, strict, wname): def export_rms_ascii(self, wfile, precision=4): """Export to RMS well format.""" - with open(wfile, "w") as fwell: + with open(wfile, "w", encoding="utf-8") as fwell: print("1.0", file=fwell) print("Unknown", file=fwell) if self._rkb is None: @@ -212,21 +218,21 @@ def export_rms_ascii(self, wfile, precision=4): for lname in self.lognames: usewrec = "linear" wrec = [] - if isinstance(self._wlogrecords[lname], dict): - for key in self._wlogrecords[lname]: + if isinstance(self.wlogrecords[lname], dict): + for key in self.wlogrecords[lname]: wrec.append(key) - wrec.append(self._wlogrecords[lname][key]) + wrec.append(self.wlogrecords[lname][key]) usewrec = " ".join(str(x) for x in wrec) - print(f"{lname} {self._wlogtypes[lname]} {usewrec}", file=fwell) + print(f"{lname} {self.get_logtype(lname)} {usewrec}", file=fwell) # now export all logs as pandas framework - tmpdf = self._df.copy() + tmpdf = self._wdata.data.copy() tmpdf.fillna(value=-999, inplace=True) # make the disc as is np.int - for lname in self._wlogtypes: - if self._wlogtypes[lname] == "DISC": + for lname in self.wlogtypes: + if self.wlogtypes[lname] == "DISC": tmpdf[[lname]] = tmpdf[[lname]].astype(int) cformat = "%-." + str(precision) + "f" @@ -261,7 +267,7 @@ def export_hdf5_well(self, wfile, compression="lzf"): with pd.HDFStore(wfile.file, "w", complevel=complevel, complib=complib) as store: logger.info("export to HDF5 %s", wfile.name) - store.put("Well", self._df) + store.put("Well", self._wdata.data) store.get_storer("Well").attrs["metadata"] = jmeta store.get_storer("Well").attrs["provider"] = "xtgeo" store.get_storer("Well").attrs["format_idcode"] = 1401 diff --git a/src/xtgeo/well/_well_oper.py b/src/xtgeo/well/_well_oper.py index 4793b82da..e38d04a91 100644 --- a/src/xtgeo/well/_well_oper.py +++ b/src/xtgeo/well/_well_oper.py @@ -1,50 +1,19 @@ """Operations along a well, private module.""" -import copy - import numpy as np import pandas as pd import xtgeo -import xtgeo.cxtgeo._cxtgeo as _cxtgeo +import xtgeo.cxtgeo._cxtgeo as _cxtgeo # type: ignore from xtgeo.common import XTGeoDialog from xtgeo.common import constants as const +from xtgeo.common.sys import _get_carray xtg = XTGeoDialog() logger = xtg.functionlogger(__name__) -def delete_log(self, lname): - """Delete/remove an existing log, or list of logs.""" - self._ensure_consistency() - - if not isinstance(lname, list): - lname = [lname] - - lcount = 0 - for logn in lname: - if logn not in self._wlognames: - logger.info("Log does no exist: %s", logn) - continue - - logger.info("Log exist and will be deleted: %s", logn) - lcount += 1 - del self._wlogtypes[logn] - del self._wlogrecords[logn] - - self._df.drop(logn, axis=1, inplace=True) - self._ensure_consistency() - - if self._mdlogname == logn: - self._mdlogname = None - if self._zonelogname == logn: - self._zonelogname = None - - self._ensure_consistency() - return lcount - - def rescale(self, delta=0.15, tvdrange=None): """Rescale by using a new MD increment. @@ -53,15 +22,15 @@ def rescale(self, delta=0.15, tvdrange=None): pdrows = pd.options.display.max_rows pd.options.display.max_rows = 999 - dfrcolumns0 = self._df.columns + dfrcolumns0 = self.dataframe.columns if self.mdlogname is None: self.geometrics() - dfrcolumns1 = self._df.columns + dfrcolumns1 = self.dataframe.columns columnsadded = list(set(dfrcolumns1) - set(dfrcolumns0)) # new tmp columns, if any - dfr = self._df.copy().set_index(self.mdlogname) + dfr = self.dataframe.copy().set_index(self.mdlogname) logger.debug("Initial dataframe\n %s", dfr) @@ -74,12 +43,12 @@ def rescale(self, delta=0.15, tvdrange=None): tvd1, tvd2 = tvdrange try: - startt = dfr.index[dfr["Z_TVDSS"] >= tvd1][0] + startt = dfr.index[dfr[self._wdata.zname] >= tvd1][0] except IndexError: startt = start try: - stopt = dfr.index[dfr["Z_TVDSS"] >= tvd2][0] + stopt = dfr.index[dfr[self._wdata.zname] >= tvd2][0] except IndexError: stopt = stop @@ -100,8 +69,8 @@ def rescale(self, delta=0.15, tvdrange=None): dfr.reset_index(inplace=True, drop=True) for lname in dfr.columns: - if lname in self._wlogtypes: - ltype = self._wlogtypes[lname] + if lname in self.wlogtypes: + ltype = self.wlogtypes[lname] if ltype == "DISC": dfr = dfr.round({lname: 0}) @@ -109,7 +78,7 @@ def rescale(self, delta=0.15, tvdrange=None): pd.options.display.max_rows = pdrows # reset - self._df = dfr + self.dataframe = dfr if columnsadded: self.delete_log(columnsadded) @@ -123,7 +92,7 @@ def make_zone_qual_log(self, zqname): raise ValueError("Cannot find a zonelog") dff = self.get_filled_dataframe() - dff["ztmp"] = dff[self.zonelogname] + dff["ztmp"] = dff[self.zonelogname].copy() dff["ztmp"] = (dff.ztmp != dff.ztmp.shift()).cumsum() sgrp = dff.groupby("ztmp") @@ -168,11 +137,12 @@ def make_zone_qual_log(self, zqname): # now create the new log self.create_log(zqname, logtype="DISC", logrecord=codes) for key, val in dcode.items(): - self._df[zqname][dff["ztmp"] == key] = val + self.dataframe.loc[dff["ztmp"] == key, zqname] = val # set the metadata self.set_logtype(zqname, "DISC") self.set_logrecord(zqname, codes) + self._ensure_consistency() del dff @@ -196,9 +166,9 @@ def _make_ijk_from_grid_v1(self, grid, grid_id=""): """ logger.info("Using algorithm 1 in %s", __name__) - wxarr = self.get_carray("X_UTME") - wyarr = self.get_carray("Y_UTMN") - wzarr = self.get_carray("Z_TVDSS") + wxarr = _get_carray(self.dataframe, self.wlogtypes, self.xname) + wyarr = _get_carray(self.dataframe, self.wlogtypes, self.yname) + wzarr = _get_carray(self.dataframe, self.wlogtypes, self.zname) nlen = self.nrow wivec = _cxtgeo.new_intarray(nlen) @@ -242,16 +212,16 @@ def _make_ijk_from_grid_v1(self, grid, grid_id=""): jcellname = "JCELL" + grid_id kcellname = "KCELL" + grid_id - self._df[icellname] = indarray - self._df[jcellname] = jndarray - self._df[kcellname] = kndarray + self._wdata.data[icellname] = indarray + self._wdata.data[jcellname] = jndarray + self._wdata.data[kcellname] = kndarray for cellname in [icellname, jcellname, kcellname]: - self._wlogtypes[cellname] = "DISC" + self.set_logtype(cellname, "DISC") - self._wlogrecords[icellname] = {ncel: str(ncel) for ncel in range(1, grid.ncol + 1)} - self._wlogrecords[jcellname] = {ncel: str(ncel) for ncel in range(1, grid.nrow + 1)} - self._wlogrecords[kcellname] = {ncel: str(ncel) for ncel in range(1, grid.nlay + 1)} + self.set_logrecord(icellname, {ncel: str(ncel) for ncel in range(1, grid.ncol + 1)}) + self.set_logrecord(jcellname, {ncel: str(ncel) for ncel in range(1, grid.nrow + 1)}) + self.set_logrecord(kcellname, {ncel: str(ncel) for ncel in range(1, grid.nlay + 1)}) _cxtgeo.delete_intarray(wivec) _cxtgeo.delete_intarray(wjvec) @@ -272,7 +242,7 @@ def _make_ijk_from_grid_v2(self, grid, grid_id="", activeonly=True): """ # establish a Points instance and make points dataframe from well trajectory X Y Z wpoints = xtgeo.Points() - wpdf = self.dataframe.loc[:, ["X_UTME", "Y_UTMN", "Z_TVDSS"]].copy() + wpdf = self.dataframe.loc[:, [self.xname, self.yname, self.zname]].copy() wpoints.dataframe = wpdf wpoints.dataframe.reset_index(inplace=True, drop=True) @@ -302,10 +272,15 @@ def _make_ijk_from_grid_v2(self, grid, grid_id="", activeonly=True): def get_gridproperties(self, gridprops, grid=("ICELL", "JCELL", "KCELL"), prop_id=""): - """Getting gridproperties as logs.""" + """Getting gridproperties as logs. + + The routine will make grid_coordinates from grid with make_ijk_from_grid(), or reuse + existing vectors if grid is a tuple (much faster). + """ if not isinstance(gridprops, (xtgeo.GridProperty, xtgeo.GridProperties)): raise ValueError('"gridprops" not a GridProperties or GridProperty instance') + wcopy = self.copy() if isinstance(gridprops, xtgeo.GridProperty): gprops = xtgeo.GridProperties() gprops.append_props([gridprops]) @@ -315,14 +290,15 @@ def get_gridproperties(self, gridprops, grid=("ICELL", "JCELL", "KCELL"), prop_i if isinstance(grid, tuple): icl, jcl, kcl = grid elif isinstance(grid, xtgeo.Grid): - self.make_ijk_from_grid(grid, grid_id="_tmp", algorithm=2) + wcopy.make_ijk_from_grid(grid, grid_id="_tmp", algorithm=2) icl, jcl, kcl = ("ICELL_tmp", "JCELL_tmp", "KCELL_tmp") else: - raise ValueError('The "grid" is of wrong type, must be a tuple or ' "a Grid") + raise ValueError("The 'grid' is of wrong type, must be a tuple or a Grid") - iind = self.dataframe[icl].values - 1 - jind = self.dataframe[jcl].values - 1 - kind = self.dataframe[kcl].values - 1 + # let grid values have base 1 when looking up cells for gridprops + iind = wcopy.dataframe[icl].values - 1 + jind = wcopy.dataframe[jcl].values - 1 + kind = wcopy.dataframe[kcl].values - 1 xind = iind.copy() @@ -330,22 +306,34 @@ def get_gridproperties(self, gridprops, grid=("ICELL", "JCELL", "KCELL"), prop_i jind[np.isnan(jind)] = 0 kind[np.isnan(kind)] = 0 - # iind = np.ma.masked_where(iind[~np.isnan(iind)].astype('int') iind = iind.astype("int") jind = jind.astype("int") kind = kind.astype("int") + dfr = wcopy.dataframe.copy() + pnames = {} for prop in gprops.props: arr = prop.values[iind, jind, kind].astype("float") + arr = np.ma.filled(arr, fill_value=np.nan) arr[np.isnan(xind)] = np.nan pname = prop.name + prop_id - self.dataframe[pname] = arr - self._wlognames.append(pname) - if prop.isdiscrete: - self._wlogtypes[pname] = "DISC" - self._wlogrecords[pname] = copy.deepcopy(prop.codes) - self._ensure_consistency() - self.delete_logs(["ICELL_tmp", "JCELL_tmp", "KCELL_tmp"]) + dfr[pname] = arr + pnames[pname] = (prop.isdiscrete, prop.codes) + + wcopy.set_dataframe(dfr) + for pname, isdiscrete_codes in pnames.items(): + isdiscrete, codes = isdiscrete_codes + if isdiscrete: + wcopy.set_logtype(pname, "DISC") + wcopy.set_logrecord(pname, codes) + else: + wcopy.set_logtype(pname, "CONT") + wcopy.set_logrecord(pname, ("", "")) + + wcopy.delete_logs(["ICELL_tmp", "JCELL_tmp", "KCELL_tmp"]) + self.set_dataframe(wcopy.dataframe) + del wcopy + del dfr def report_zonation_holes(self, threshold=5): @@ -357,15 +345,15 @@ def report_zonation_holes(self, threshold=5): wellreport = [] - zlog = self._df[self.zonelogname].values.copy() + zlog = self._wdata.data[self.zonelogname].values.copy() mdlog = None if self.mdlogname: - mdlog = self._df[self.mdlogname].values + mdlog = self._wdata.data[self.mdlogname].values - xvv = self._df["X_UTME"].values - yvv = self._df["Y_UTMN"].values - zvv = self._df["Z_TVDSS"].values + xvv = self._wdata.data[self.xname].values + yvv = self._wdata.data[self.yname].values + zvv = self._wdata.data[self.zname].values zlog[np.isnan(zlog)] = const.UNDEF_INT ncv = 0 @@ -420,9 +408,9 @@ def report_zonation_holes(self, threshold=5): return None if mdlog is not None: - clm = ["INDEX", "X_UTME", "Y_UTMN", "Z_TVDSS", "Zone", "Well", "MD"] + clm = ["INDEX", self.xname, self.yname, self.zname, "Zone", "Well", "MD"] else: - clm = ["INDEX", "X_UTME", "Y_UTMN", "Z_TVDSS", "Zone", "Well"] + clm = ["INDEX", self.xname, self.yname, self.zname, "Zone", "Well"] return pd.DataFrame(wellreport, columns=clm) @@ -443,22 +431,22 @@ def mask_shoulderbeds(self, inputlogs, targetlogs, nsamples, strict): return False for inlog in useinputs: - inseries = self._df[inlog] + inseries = self._wdata.data[inlog] if use_numeric: bseries = _get_bseries(inseries, nsamples) else: mode, value = list(nsamples.items())[0] - depth = self._df["Z_TVDSS"] + depth = self._wdata.data[self.zname] if mode == "md" and self.mdlogname is not None: - depth = self._df[self.mdlogname] + depth = self._wdata.data[self.mdlogname] elif mode == "md" and self.mdlogname is None: raise ValueError("There is no mdlogname attribute present.") bseries = _get_bseries_by_distance(depth, inseries, value) for target in usetargets: - self._df.loc[bseries, target] = np.nan + self._wdata.data.loc[bseries, target] = np.nan logger.info("Mask shoulderbeds for some logs... done") return True @@ -469,23 +457,23 @@ def _mask_shoulderbeds_checks(self, inputlogs, targetlogs, nsamples, strict): # check that inputlogs exists and that they are discrete, and targetlogs useinputs = [] for inlog in inputlogs: - if inlog not in self._wlogtypes.keys() and strict is True: + if inlog not in self.wlogtypes.keys() and strict is True: raise ValueError(f"Input log {inlog} is missing and strict=True") - if inlog in self._wlogtypes.keys() and self._wlogtypes[inlog] != "DISC": + if inlog in self.wlogtypes.keys() and self.wlogtypes[inlog] != "DISC": raise ValueError(f"Input log {inlog} is not of type DISC") - if inlog in self._wlogtypes.keys(): + if inlog in self.wlogtypes.keys(): useinputs.append(inlog) usetargets = [] for target in targetlogs: - if target not in self._wlogtypes.keys() and strict is True: + if target not in self.wlogtypes.keys() and strict is True: raise ValueError(f"Target log {target} is missing and strict=True") - if target in self._wlogtypes.keys(): + if target in self.wlogtypes.keys(): usetargets.append(target) use_numeric = True if isinstance(nsamples, int): - maxlen = len(self._df) // 2 + maxlen = self.nrow // 2 if nsamples < 1 or nsamples > maxlen: raise ValueError(f"Keyword nsamples must be an int > 1 and < {maxlen}") elif isinstance(nsamples, dict): @@ -566,11 +554,11 @@ def create_surf_distance_log(self, surf, name): raise ValueError("Input surface is not a RegularSurface instance.") # make a Points instance since points has the snap - zvalues = self.dataframe["Z_TVDSS"] + zvalues = self.dataframe[self.zname] points = xtgeo.Points() points.dataframe = self.dataframe.iloc[:, 0:3] points.snap_surface(surf) - snapped = points.dataframe["Z_TVDSS"] + snapped = points.dataframe[self.zname] diff = snapped - zvalues # create log (default is force overwrite if it exists) diff --git a/src/xtgeo/well/_well_roxapi.py b/src/xtgeo/well/_well_roxapi.py index 8f96b758f..37095c8b0 100644 --- a/src/xtgeo/well/_well_roxapi.py +++ b/src/xtgeo/well/_well_roxapi.py @@ -1,9 +1,6 @@ # -*- coding: utf-8 -*- """Well input and output, private module for ROXAPI.""" - -from collections import OrderedDict - import numpy as np import numpy.ma as npma import pandas as pd @@ -12,10 +9,13 @@ from xtgeo.common import XTGeoDialog from xtgeo.roxutils import RoxUtils +from ..xyz_common._xyz_enum import _AttrName + xtg = XTGeoDialog() logger = xtg.functionlogger(__name__) -# Well() instance self = xwell1 + +# Well() instance: self # Import from ROX api @@ -71,9 +71,7 @@ def _roxapi_import_well( wlogrecords = dict() # get logs repr trajecetry - mdlogname, logs = _roxapi_traj( - wlogtypes, wlogrecords, roxtraj, roxlrun, inclmd, inclsurvey - ) + mdlogname, logs = _roxapi_traj(roxtraj, roxlrun, inclmd, inclsurvey) if lognames and lognames == "all": for logcurv in roxlrun.log_curves: @@ -102,11 +100,8 @@ def _roxapi_import_well( } -def _roxapi_traj( - wlogtypes, wlogrecords, roxtraj, roxlrun, inclmd, inclsurvey -): # pragma: no cover +def _roxapi_traj(roxtraj, roxlrun, inclmd, inclsurvey): # pragma: no cover """Get trajectory in ROXAPI.""" - # compute trajectory surveyset = roxtraj.survey_point_series measured_depths = roxlrun.get_measured_depths() @@ -120,21 +115,21 @@ def _roxapi_traj( try: geo_array[ino] = surveyset.interpolate_survey_point(mdv) except ValueError: - logger.warning("MD is %s, surveyinterpolation fails, " "CHECK RESULT!", mdv) + logger.warning("MD is %s, surveyinterpolation fails, CHECK RESULT!", mdv) geo_array[ino] = geo_array[ino - 1] - logs = OrderedDict() + logs = dict() mdlogname = None - logs["X_UTME"] = geo_array[:, 3] - logs["Y_UTMN"] = geo_array[:, 4] - logs["Z_TVDSS"] = geo_array[:, 5] + logs[_AttrName.XNAME.value] = geo_array[:, 3] + logs[_AttrName.YNAME.value] = geo_array[:, 4] + logs[_AttrName.ZNAME.value] = geo_array[:, 5] if inclmd or inclsurvey: - logs["M_MDEPTH"] = geo_array[:, 0] - mdlogname = "M_MDEPTH" + logs[_AttrName.M_MD_NAME.value] = geo_array[:, 0] + mdlogname = _AttrName.M_MD_NAME.value if inclsurvey: - logs["M_INCL"] = geo_array[:, 1] - logs["M_AZI"] = geo_array[:, 2] + logs[_AttrName.M_INCL_NAME.value] = geo_array[:, 1] + logs[_AttrName.M_AZI_NAME.value] = geo_array[:, 2] return mdlogname, logs @@ -155,7 +150,7 @@ def _get_roxlog(wlogtypes, wlogrecords, roxlrun, lname): # pragma: no cover def export_well_roxapi( - xwell1, + self, project, wname, lognames="all", @@ -168,7 +163,7 @@ def export_well_roxapi( rox = RoxUtils(project, readonly=False) - _roxapi_export_well(xwell1, rox, wname, lognames, logrun, trajectory, realisation) + _roxapi_export_well(self, rox, wname, lognames, logrun, trajectory, realisation) if rox._roxexternal: rox.project.save() @@ -176,18 +171,14 @@ def export_well_roxapi( rox.safe_close() -def _roxapi_export_well(xwell1, rox, wname, lognames, logrun, trajectory, realisation): +def _roxapi_export_well(self, rox, wname, lognames, logrun, trajectory, realisation): if wname in rox.project.wells: - _roxapi_update_well( - xwell1, rox, wname, lognames, logrun, trajectory, realisation - ) + _roxapi_update_well(self, rox, wname, lognames, logrun, trajectory, realisation) else: - _roxapi_create_well( - xwell1, rox, wname, lognames, logrun, trajectory, realisation - ) + _roxapi_create_well(self, rox, wname, lognames, logrun, trajectory, realisation) -def _roxapi_update_well(xwell1, rox, wname, lognames, logrun, trajectory, realisation): +def _roxapi_update_well(self, rox, wname, lognames, logrun, trajectory, realisation): """Assume well is to updated only with logs, new or changed. Also, the length of arrays should not change, at least not for now. @@ -202,14 +193,14 @@ def _roxapi_update_well(xwell1, rox, wname, lognames, logrun, trajectory, realis lrun.log_curves.clear() if lognames == "all": - uselognames = xwell1.lognames + uselognames = self.lognames else: uselognames = lognames for lname in uselognames: isdiscrete = False xtglimit = xtgeo.UNDEF_LIMIT - if xwell1._wlogtypes[lname] == "DISC": + if self.wlogtypes[lname] == "DISC": isdiscrete = True xtglimit = xtgeo.UNDEF_INT_LIMIT @@ -220,12 +211,12 @@ def _roxapi_update_well(xwell1, rox, wname, lognames, logrun, trajectory, realis values = thelog.generate_values() - if values.size != xwell1.dataframe[lname].values.size: + if values.size != self.dataframe[lname].values.size: raise ValueError("New logs have different sampling or size, not possible") usedtype = values.dtype - vals = np.ma.masked_invalid(xwell1.dataframe[lname].values) + vals = np.ma.masked_invalid(self.dataframe[lname].values) vals = np.ma.masked_greater(vals, xtglimit) vals = vals.astype(usedtype) thelog.set_values(vals) @@ -237,12 +228,12 @@ def _roxapi_update_well(xwell1, rox, wname, lognames, logrun, trajectory, realis else: codedict = { int(key): str(value) - for key, value in xwell1._wlogrecords[lname].items() + for key, value in self._wlogrecords[lname].items() } thelog.set_code_names(codedict) -def _roxapi_create_well(xwell1, rox, wname, lognames, logrun, trajectory, realisation): +def _roxapi_create_well(self, rox, wname, lognames, logrun, trajectory, realisation): """Save Well() instance to a new well in RMS. From version 2.15. @@ -250,15 +241,15 @@ def _roxapi_create_well(xwell1, rox, wname, lognames, logrun, trajectory, realis logger.debug("Key realisation is not supported: %s", realisation) roxwell = rox.project.wells.create(wname) - roxwell.rkb = xwell1.rkb - roxwell.wellhead = (xwell1.xpos, xwell1.ypos) + roxwell.rkb = self.rkb + roxwell.wellhead = (self.xpos, self.ypos) traj = roxwell.wellbore.trajectories.create(trajectory) series = traj.survey_point_series - east = xwell1.dataframe["X_UTME"].values - north = xwell1.dataframe["Y_UTMN"].values - tvd = xwell1.dataframe["Z_TVDSS"].values + east = self.dataframe[self.xname].values + north = self.dataframe[self.yname].values + tvd = self.dataframe[self.zname].values values = np.array([east, north, tvd]).transpose() series.set_points(values) @@ -268,8 +259,8 @@ def _roxapi_create_well(xwell1, rox, wname, lognames, logrun, trajectory, realis lrun.set_measured_depths(md) # Add log curves - for curvename, curveprop in xwell1.get_wlogs().items(): - if curvename not in xwell1.lognames: + for curvename, curveprop in self.get_wlogs().items(): + if curvename not in self.lognames: continue # skip X_UTME .. Z_TVDSS if lognames and lognames != "all" and curvename not in lognames: continue @@ -283,12 +274,12 @@ def _roxapi_create_well(xwell1, rox, wname, lognames, logrun, trajectory, realis if curveprop[0] == "DISC": lcurve = lrun.log_curves.create_discrete(cname) - cc = np.ma.masked_invalid(xwell1.dataframe[curvename].values) + cc = np.ma.masked_invalid(self.dataframe[curvename].values) lcurve.set_values(cc.astype(np.int32)) codedict = {int(key): str(value) for key, value in curveprop[1].items()} lcurve.set_code_names(codedict) else: lcurve = lrun.log_curves.create(cname) - lcurve.set_values(xwell1.dataframe[curvename].values) + lcurve.set_values(self.dataframe[curvename].values) logger.info("Log curve created: %s", cname) diff --git a/src/xtgeo/well/_wellmarkers.py b/src/xtgeo/well/_wellmarkers.py index dbd069789..9aab29bbf 100644 --- a/src/xtgeo/well/_wellmarkers.py +++ b/src/xtgeo/well/_wellmarkers.py @@ -9,7 +9,7 @@ import xtgeo import xtgeo.common.constants as const -import xtgeo.cxtgeo._cxtgeo as _cxtgeo +import xtgeo.cxtgeo._cxtgeo as _cxtgeo # type: ignore from xtgeo.common import XTGeoDialog xtg = XTGeoDialog() @@ -26,25 +26,24 @@ def get_zonation_points(self, tops, incl_limit, top_prefix, zonelist, use_undef) self.geometrics() # note the caller has made a copy of the true self - # as zlog is float64; need to convert to int array with high - # number as undef + # as zlog is float64; need to convert to int array with high number as undef if self.zonelogname is not None: if use_undef: - self._df.dropna(subset=[self.zonelogname], inplace=True) - zlog = self._df[self.zonelogname].values - zlog[np.isnan(zlog)] = const.UNDEF_INT + self.dataframe.dropna(subset=[self.zonelogname], inplace=True) + zlog = self.dataframe[self.zonelogname].values + zlog[np.isnan(zlog)] = const.UNDEF_DISC zlog = np.rint(zlog).astype(int) else: return None - xvv = self._df["X_UTME"].values - yvv = self._df["Y_UTMN"].values - zvv = self._df["Z_TVDSS"].values - incl = self._df["Q_INCL"].values - mdv = self._df["Q_MDEPTH"].values + xvv = self.dataframe[self.xname].values.copy() + yvv = self.dataframe[self.yname].values.copy() + zvv = self.dataframe[self.zname].values.copy() + incl = self.dataframe["Q_INCL"].values.copy() + mdv = self.dataframe["Q_MDEPTH"].values.copy() if self.mdlogname is not None: - mdv = self._df[self.mdlogname].values + mdv = self.dataframe[self.mdlogname].values.copy() if zonelist is None: # need to declare as list; otherwise Py3 will get dict.keys @@ -211,9 +210,9 @@ def _extract_ztops( pzone = zone wpts_names = [ - "X_UTME", - "Y_UTMN", - "Z_TVDSS", + self.xname, + self.yname, + self.zname, self.mdlogname, "Q_INCL", "Q_AZI", @@ -229,9 +228,9 @@ def _extract_ztops( llen = len(wpts) - 1 zwpts_names = [ - "X_UTME", - "Y_UTMN", - "Z_TVDSS", + self.xname, + self.yname, + self.zname, self.mdlogname + "_AVG", "Q_MD1", "Q_MD2", @@ -351,14 +350,14 @@ def get_fraction_per_zone( zonelist = list(self.get_logrecord(self.zonelogname).keys()) useinclname = "Q_INCL" - if "M_INCL" in self._df: + if "M_INCL" in self.dataframe: useinclname = "M_INCL" else: self.geometrics() result = OrderedDict() - result["X_UTME"] = [] - result["Y_UTMN"] = [] + result[self.xname] = [] + result[self.yname] = [] result["DFRAC"] = [] result["Q_INCL"] = [] result["ZONE"] = [] @@ -397,16 +396,16 @@ def get_fraction_per_zone( logger.debug("Skipped due to too missing/undef value(s)") continue - xavg = dframe["X_UTME"].mean() - yavg = dframe["Y_UTMN"].mean() + xavg = dframe[self.xname].mean() + yavg = dframe[self.yname].mean() dfrac = 0.0 for dval in dvalues: if any(dseries.isin([dval])): dfrac += dseries.value_counts(normalize=True)[dval] - result["X_UTME"].append(xavg) - result["Y_UTMN"].append(yavg) + result[self.xname].append(xavg) + result[self.yname].append(yavg) result["DFRAC"].append(dfrac) result["Q_INCL"].append(qinclavg) result["ZONE"].append(izon) @@ -414,7 +413,7 @@ def get_fraction_per_zone( result[dlogname].append(svalues) # make the dataframe and return it - if result["X_UTME"]: + if result[self.xname]: return pd.DataFrame.from_dict(result) self.delete_log("_QFLAG") @@ -425,12 +424,12 @@ def get_fraction_per_zone( def get_surface_picks(self, surf): """get Surface picks""" - xcor = self._df["X_UTME"].values - ycor = self._df["Y_UTMN"].values - zcor = self._df["Z_TVDSS"].values + xcor = self.dataframe[self.xname].values + ycor = self.dataframe[self.yname].values + zcor = self.dataframe[self.zname].values if self.mdlogname: - mcor = self._df[self.mdlogname].values + mcor = self.dataframe[self.mdlogname].values else: mcor = np.zeros(xcor.size, dtype=np.float64) + xtgeo.UNDEF diff --git a/src/xtgeo/well/_wells_utils.py b/src/xtgeo/well/_wells_utils.py index b82c2f153..3c1c68260 100644 --- a/src/xtgeo/well/_wells_utils.py +++ b/src/xtgeo/well/_wells_utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Utilities for Wells class""" @@ -15,6 +14,8 @@ xtg = XTGeoDialog() +# self == Wells instance (plural wells) + def wellintersections( self, wfilter=None, showprogress=False @@ -49,8 +50,8 @@ def wellintersections( welldfr = well.dataframe.copy() - xcor = welldfr["X_UTME"].values - ycor = welldfr["Y_UTMN"].values + xcor = welldfr[well.xname].values + ycor = welldfr[well.yname].values mcor = welldfr[well.mdlogname].values logger.info("The mdlogname property is: %s", well.mdlogname) @@ -75,7 +76,7 @@ def wellintersections( # try to be smart to skip entries that earlier have beenn tested # for crossing. If other does not cross well, then well does not # cross other... - if other.name in nox.keys() and well.name in nox[other.name]: + if other.name in nox and well.name in nox[other.name]: continue # truncate away the paralell part on a copy @@ -92,9 +93,9 @@ def wellintersections( well, xtol=xtol, ytol=ytol, ztol=ztol, itol=itol, atol=atol ) - xcorc = owell.dataframe["X_UTME"].values - ycorc = owell.dataframe["Y_UTMN"].values - zcorc = owell.dataframe["Z_TVDSS"].values + xcorc = owell.dataframe[well.xname].values + ycorc = owell.dataframe[well.yname].values + zcorc = owell.dataframe[well.zname].values if xcorc.size < 2: continue @@ -138,7 +139,15 @@ def wellintersections( xpoints.append([well.name, mcor, other.name, xcor, ycor, zcor]) dfr = pd.DataFrame( - xpoints, columns=["WELL", "MDEPTH", "CWELL", "X_UTME", "Y_UTMN", "Z_TVDSS"] + xpoints, + columns=[ + "WELL", + "MDEPTH", + "CWELL", + self._wells[0].xname, + self._wells[0].yname, + self._wells[0].zname, + ], ) progress.finished() diff --git a/src/xtgeo/well/blocked_well.py b/src/xtgeo/well/blocked_well.py index e3c0cffda..58380685a 100644 --- a/src/xtgeo/well/blocked_well.py +++ b/src/xtgeo/well/blocked_well.py @@ -100,22 +100,22 @@ class BlockedWell(Well): save the result in RMS, as this is derived from the grid. Also the blocked well icon must exist before save. - The well trajectory are here represented as logs, and XYZ have magic names: - X_UTME, Y_UTMN, Z_TVDSS, which are the three first Pandas columns. + The well trajectory are here represented as logs, and XYZ have magic names as + default: X_UTME, Y_UTMN, Z_TVDSS, which are the three first Pandas columns. Other geometry logs has also 'semi-magic' names: M_MDEPTH or Q_MDEPTH: Measured depth, either real/true (M...) or - quasi computed/estimated (Q...). The Quasi may be incorrect for + quasi computed/estimated (Q...). The Quasi computations may be incorrect for all uses, but sufficient for some computations. - Similar for M_INCL, Q_INCL, M_AZI, Q_ASI. + Similar for M_INCL, Q_INCL, M_AZI, Q_AZI. I_INDEX, J_INDEX, K_INDEX: They are grid indices. For practical reasons they are treated as a CONT logs, since the min/max grid indices usually are unknown, and hence making a code index is not trivial. - All Pandas values (yes, discrete also!) are stored as float64 + All Pandas values (yes, discrete also!) are stored as float32 or float64 format, and undefined values are Nan. Integers are stored as Float due to the lacking support for 'Integer Nan' (currently lacking in Pandas, but may come in later Pandas versions). @@ -125,9 +125,7 @@ class BlockedWell(Well): The instance can be made either from file or:: - >>> well1 = BlockedWell(well_dir + '/OP_1.bw') # assume RMS ascii well - >>> well2 = BlockedWell(well_dir + '/OP_1.bw', fformat='rms_ascii') - >>> well3 = xtgeo.blockedwell_from_file(well_dir + '/OP_1.bw') + >>> well1 = xtgeo.blockedwell_from_file(well_dir + '/OP_1.bw') # RMS ascii well If in RMS, instance can be made also from RMS icon:: @@ -138,12 +136,8 @@ class BlockedWell(Well): 'wellname', ) - For arguments, see method under :meth:`from_file`. - """ - VALID_LOGTYPES = {"DISC", "CONT"} - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -157,7 +151,10 @@ def gridname(self): @gridname.setter def gridname(self, newname): - self._gridname = newname + if isinstance(newname, str): + self._gridname = newname + else: + raise ValueError("Input name is not a string.") def copy(self): newbw = super().copy() @@ -214,7 +211,8 @@ def to_roxar(self, *args, **kwargs): """Set (export) a single blocked well item inside roxar project. Note this method works only when inside RMS, or when RMS license is - activated. + activated. RMS will store blocked wells as a Gridmodel feature, not as a + well. Note: When project is file path (direct access, outside RMS) then @@ -236,6 +234,7 @@ def to_roxar(self, *args, **kwargs): .. versionadded: 2.12 """ + # TODO: go from *args, **kwargs to keywords project = args[0] gname = args[1] bwname = args[2] diff --git a/src/xtgeo/well/example.txt b/src/xtgeo/well/example.txt deleted file mode 100644 index df57c9770..000000000 --- a/src/xtgeo/well/example.txt +++ /dev/null @@ -1,50 +0,0 @@ -import numpy as np - -GRIDNAME = "GeoGrid" - -BW1 = "BW" -BW2 = "BW_Tarbert" -BW3 = "BW_result" - -ZNAME = "zone_log" - -WELLS = ["34_11-1", "34_11-A-2"] - -UNDEF = 999999 - -def main(): - gmodel = project.grid_models[GRIDNAME] - bwset1 = gmodel.blocked_wells_set[BW1] - bwset2 = gmodel.blocked_wells_set[BW2] - - bwset3 = gmodel.blocked_wells_set[BW3] # use for result - - - # get numpies; masked to normal numpies; replace undef with UNDEF - zones1 = np.ma.filled(bwset1.properties[ZNAME].get_values(), fill_value=UNDEF) - zones2 = np.ma.filled(bwset2.properties[ZNAME].get_values(), fill_value=UNDEF) - - for prop in bwset1.properties: - propname = prop.name - print("Property name is: ", propname) - if propname in bwset2.properties: - pval1 = np.ma.filled(bwset1.properties[propname].get_values(), UNDEF) - pval2 = np.ma.filled(bwset2.properties[propname].get_values(), UNDEF) - pval3 = np.ma.filled(bwset3.properties[propname].get_values(), UNDEF) - - res = pval1 - - for well in WELLS: - print(well) - dind = bwset1.get_data_indices([well]) - z1 = zones1[dind] - upperz = z1.min() - resall = np.where(zones2 <= upperz, pval2, pval1) - - # filtered on well indices only: - res[dind] = resall[dind] - res = np.ma.masked_greater_equal(res, UNDEF) - - bwset3.properties[propname].set_values(res) - - diff --git a/src/xtgeo/well/sample.txt b/src/xtgeo/well/sample.txt deleted file mode 100644 index 97120d720..000000000 --- a/src/xtgeo/well/sample.txt +++ /dev/null @@ -1,27 +0,0 @@ -import roxar -import numpy as np - -grid_model = project.grid_models['Deterministic'] - -# Create a new blocked well property -blocked_wells = grid_model.blocked_wells_set['BW'] -bw_property = blocked_wells.properties.create('PoroFromGrid', - roxar.GridPropertyType.continuous, - np.float32) - -# Create a NumPy array of the correct size to store the values -bw_property_values = blocked_wells.generate_values(discrete=False) - -# Get the cell numbers crossed by the well -bw_cell_numbers = blocked_wells.get_cell_numbers() - -# Get blocked well property values -grid_property_values = grid_model.properties['Poro'].get_values() - -# Iterate over every cell crossed by the well -for i, cell_number in enumerate(bw_cell_numbers): - # update the bw property with the grid property value - bw_property_values[i] = grid_property_values[cell_number] - -# Set the property values -bw_property.set_values(bw_property_values) diff --git a/src/xtgeo/well/well1.py b/src/xtgeo/well/well1.py index da3778921..5a3a546de 100644 --- a/src/xtgeo/well/well1.py +++ b/src/xtgeo/well/well1.py @@ -1,11 +1,8 @@ -# -*- coding: utf-8 -*- """XTGeo well module, working with one single well.""" -import functools +from __future__ import annotations + import io -import math -import warnings -from collections import OrderedDict from copy import deepcopy from pathlib import Path from typing import Dict, List, Optional, Union @@ -16,31 +13,16 @@ import xtgeo import xtgeo.common.constants as const -import xtgeo.cxtgeo._cxtgeo as _cxtgeo -from xtgeo import XTGeoCLibError +import xtgeo.cxtgeo._cxtgeo as _cxtgeo # type: ignore -from . import _well_io, _well_oper, _well_roxapi, _wellmarkers +from ..xyz_common import _xyz_data # type: ignore[attr-defined] +from . import _well_aux, _well_io, _well_oper, _well_roxapi, _wellmarkers -xtg = xtgeo.common.XTGeoDialog() +xtg = xtgeo.XTGeoDialog() logger = xtg.functionlogger(__name__) - -# pylint: disable=too-many-public-methods - - # ====================================================================================== -# METHODS as wrappers to class init + import - - -def _data_reader_factory(file_format): - if file_format in ["rmswell", "irap_ascii"]: - return _well_io.import_rms_ascii - if file_format == "hdf": - return _well_io.import_hdf5_well - raise ValueError( - f"Unknown file format {file_format}, supported formats are " - "'rmswell', 'irap_ascii' and 'hdf'" - ) +# Functions, as wrappers to class methods def well_from_file( @@ -51,29 +33,25 @@ def well_from_file( lognames: Optional[Union[str, List[str]]] = "all", lognames_strict: Optional[bool] = False, strict: Optional[bool] = False, -) -> "Well": +) -> Well: """Make an instance of a Well directly from file import. - Note: - - rms_ascii is the only correct for wells from RMS. Irap did not have this - format. For maps and points, the formats from the old Irap tool is - applied in RMS, hence "irap_ascii" and "rms_ascii" are there the same. - Args: - wfile: File path, either a string or a pathlib.Path instance - fformat: See :meth:`Well.from_file` - mdlogname: Name of Measured Depth log if any + wfile: File path for well, either a string or a pathlib.Path instance + fformat: "rms_ascii" or "hdf5" + mdlogname: Name of Measured Depth log, if any zonelogname: Name of Zonelog, if any lognames: Name or list of lognames to import, default is "all" lognames_strict: If True, all lognames must be present. strict: If True, then import will fail if zonelogname or mdlogname are asked - for but not present in wells. + for but those names are not present in wells. Example:: >>> import xtgeo - >>> mywell = xtgeo.well_from_file(well_dir + "/OP_1.w") + >>> import pathlib + >>> welldir = pathlib.Path("../foo") + >>> mywell = xtgeo.well_from_file(welldir / "OP_1.w") .. versionchanged:: 2.1 Added ``lognames`` and ``lognames_strict`` .. versionchanged:: 2.1 ``strict`` now defaults to False @@ -98,19 +76,18 @@ def well_from_roxar( lognames_strict: Optional[bool] = False, inclmd: Optional[bool] = False, inclsurvey: Optional[bool] = False, -) -> "Well": +) -> xtgeo.Well: """This makes an instance of a Well directly from Roxar RMS. - Note this method works only when inside RMS, or when RMS license is - activated. + activated (through the roxar environment). Args: - project: Path to project or magic ``project`` variable in RMS. + project: Path to project or magic the ``project`` variable in RMS. name: Name of Well, as shown in RMS. trajectory: Name of trajectory in RMS. logrun: Name of logrun in RMS. - lognames: List of lognames to import or use 'all' for all present logs + lognames: List of lognames to import, or use 'all' for all present logs lognames_strict: If True and log is not in lognames is a list, an Exception will be raised. inclmd: If True, a Measured Depth log will be included. @@ -130,6 +107,7 @@ def well_from_roxar( .. versionchanged:: 2.1 lognames defaults to "all", not None """ + # TODO - mdlogname and zonelogname return Well._read_roxar( project, name, @@ -142,163 +120,69 @@ def well_from_roxar( ) -def allow_deprecated_init(func): - # This decorator is here to maintain backwards compatibility in the - # construction of Well and should be deleted once the deprecation period - # has expired, the construction will then follow the new pattern. - @functools.wraps(func) - def wrapper(self, *args, **kwargs): - if not args and not kwargs: - warnings.warn( - "Initializing empty well is deprecated, please provide " - "non-defaulted values, or use mywell = " - "xtgeo.well_from_file('filename')", - DeprecationWarning, - ) - return func( - self, - *([0.0] * 3), - "", - pd.DataFrame({"X_UTME": [], "Y_UTMN": [], "Z_TVDSS": []}), - ) - - # Checking if we are doing an initialization from file and raise a - # deprecation warning if we are. - if "wfile" in kwargs or ( - len(args) >= 1 and isinstance(args[0], (str, Path, xtgeo._XTGeoFile)) - ): - warnings.warn( - "Initializing directly from file name is deprecated and will be " - "removed in xtgeo version 4.0. Use: " - "mywell = xtgeo.well_from_file('filename') instead", - DeprecationWarning, - ) - if len(args) >= 1: - wfile = args[0] - args = args[1:] - else: - wfile = kwargs.pop("wfile", None) - if len(args) >= 1: - fformat = args[0] - args = args[1:] - else: - fformat = kwargs.pop("fformat", None) - - mfile = xtgeo._XTGeoFile(wfile) - if fformat is None or fformat == "guess": - fformat = mfile.detect_fformat() - else: - fformat = mfile.generic_format_by_proposal(fformat) - kwargs = _data_reader_factory(fformat)(mfile, *args, **kwargs) - kwargs["filesrc"] = mfile.file - return func(self, **kwargs) - return func(self, *args, **kwargs) - - return wrapper - - class Well: - """Class for a well in the XTGeo framework. + """Class for a single well in the XTGeo framework. The well logs are stored in a Pandas dataframe, which make manipulation easy and fast. - The well trajectory are here represented as logs, and XYZ have magic names: - ``X_UTME``, ``Y_UTMN``, ``Z_TVDSS``, which are the three first Pandas columns. + The well trajectory are here represented as first 3 columns in the dataframe, + and XYZ have pre-defined names: ``X_UTME``, ``Y_UTMN``, ``Z_TVDSS``. - Other geometry logs has also 'semi-magic' names: + Other geometry logs may has also 'semi-defined' names, but this is not a strict + rule: - M_MDEPTH or Q_MDEPTH: Measured depth, either real/true (M_xx) or + ``M_MDEPTH`` or ``Q_MDEPTH``: Measured depth, either real/true (M_xx) or quasi computed/estimated (Q_xx). The Quasi may be incorrect for all uses, but sufficient for some computations. - Similar for M_INCL, Q_INCL, M_AZI, Q_ASI. + Similar for ``M_INCL``, ``Q_INCL``, ``M_AZI``, ``Q_ASI``. All Pandas values (yes, discrete also!) are currently stored as float64 format, and undefined values are Nan. Integers are stored as Float due - to the (historic) lacking support for 'Integer Nan'. In coming versions, - use of ``pandas.NA`` (available from Pandas version 1.0) may be implemented. + to the (historic) lacking support for 'Integer Nan'. Note there is a method that can return a dataframe (copy) with Integer and Float columns, see :meth:`get_filled_dataframe`. - The instance can be made either from file or (todo!) by specification:: + The instance can be made either from file or by specification:: - >>> well1 = Well(well_dir + '/OP_1.w') # assume RMS ascii well - >>> well2 = Well(well_dir + '/OP_1.w', fformat='rms_ascii') - >>> well3 = xtgeo.well_from_file(well_dir + '/OP_1.w') + >>> well1 = xtgeo.well_from_file(well_dir + '/OP_1.w') + >>> well2 = xtgeo.Well(rkb=32.0, xpos=1234.0, ypos=4567.0, wname="Foo", + df: mydataframe, ...) Args: - rkb: well RKB height - xpos: well head X pos - ypos: well head Y pos + rkb: Well RKB height + xpos: Well head X pos + ypos: Well head Y pos wname: well name - df: pandas dataframe with log values, expects columns to include + df: A pandas dataframe with log values, expects columns to include 'X_UTME', 'Y_UTMN', 'Z_TVDSS' for x, y and z coordinates. Other columns should be log values. filesrc: source file if any - mdlogname: Name of Measured Depth log if any. + mdlogname: Name of Measured Depth log, if any. zonelogname: Name of Zonelog, if any - wlogtypes: dictionary of log types, 'DISC' or 'CONT', defaults to - to 'CONT'. + wlogtypes: dictionary of log types, 'DISC' (discrete) or 'CONT' (continuous), + defaults to to 'CONT'. wlogrecords: dictionary of codes for 'DISC' logs, None for no codes given, defaults to None. """ - VALID_LOGTYPES = {"DISC", "CONT"} - - @allow_deprecated_init + @_well_aux.allow_deprecated_init def __init__( self, - rkb: float, - xpos: float, - ypos: float, - wname: str, - df: pd.DataFrame, - mdlogname: str = None, - zonelogname: str = None, - wlogtypes: Dict[str, str] = None, - wlogrecords: Dict[str, str] = None, - filesrc: Optional[Union[str, Path]] = None, - ): - if not all( - coordinate in df.columns for coordinate in ("X_UTME", "Y_UTMN", "Z_TVDSS") - ): - raise ValueError( - "Well dataframe must include 'X_UTME'," - f" 'Y_UTMN' and 'Z_TVDSS', got {df.columns}" - ) - self._reset( - rkb, - xpos, - ypos, - wname, - df, - filesrc, - mdlogname, - zonelogname, - wlogtypes, - wlogrecords, - ) - - def _reset( - self, - rkb: float = None, - xpos: float = None, - ypos: float = None, - wname: str = None, - df: pd.DataFrame = None, + rkb: float = 0.0, + xpos: float = 0.0, + ypos: float = 0.0, + wname: str = "", + df: Optional[pd.DataFrame] = None, + mdlogname: Optional[str] = None, + zonelogname: Optional[str] = None, + wlogtypes: Optional[Dict[str, str]] = None, + wlogrecords: Optional[Dict[str, str]] = None, filesrc: Optional[Union[str, Path]] = None, - mdlogname: str = None, - zonelogname: str = None, - wlogtypes: Dict[str, str] = None, - wlogrecords: Dict[str, str] = None, ): - if wlogtypes is None: - wlogtypes = dict() - if wlogrecords is None: - wlogrecords = dict() - + # state variables from args self._rkb = rkb self._xpos = xpos self._ypos = ypos @@ -307,70 +191,85 @@ def _reset( self._mdlogname = mdlogname self._zonelogname = zonelogname - self._wlogtypes = wlogtypes - self._wlogrecords = wlogrecords - - self._df = df + self._wdata = _xyz_data._XYZData(df, wlogtypes, wlogrecords) + # self._wlogtypes = self._wdata.attr_types + # self._wlogrecords = self._wdata.attr_records - self._wlognames = list(self._df.columns) + self._ensure_consistency() + # additional state variables self._metadata = xtgeo.MetaDataWell() self._metadata.required = self - self._ensure_consistency() + _reset = __init__ # workaround until deprecation .from_file(), etc are removed def __repr__(self): # noqa: D105 - # should be able to newobject = eval(repr(thisobject)) + # should (in theory...) be able to newobject = eval(repr(thisobject)) myrp = ( - f"{self.__class__.__name__} (filesrc={self._filesrc!r}, " - f"name={self._wname!r}, ID={id(self)})" + f"{self.__class__.__name__} (rkb={self._rkb}, xpos={self._xpos}, " + f"ypos={self._ypos}, wname='{self._wname}', " + f"filesrc='{self._filesrc}', mdlogname='{self._mdlogname}', " + f"zonelogname='{self._zonelogname}', \n" + f"wlogtypes='{self._wdata.attr_types}', " + f"\nwlogrecords='{self._wdata.attr_records}', " + f"df=\n{repr(self._wdata.data)}))" ) + return myrp def __str__(self): # noqa: D105 # user friendly print return self.describe(flush=False) - def _ensure_consistency(self): # pragma: no coverage - """Ensure consistency within an object (private function). + def _ensure_consistency(self): + """Ensure consistency""" + self._wdata.ensure_consistency() - Consistency checking. As well log names are columns in the Pandas DF, - there are additional attributes per log that have to be "in sync". - """ - if self._df is None: - return + if self._mdlogname not in self._wdata.data: + self._mdlogname = None - self._wlognames = list(self._df.columns) + if self._zonelogname not in self._wdata.data: + self._zonelogname = None - for logname in self._wlognames: - if logname not in self._wlogtypes: - self._wlogtypes[logname] = "CONT" # continuous as default - self._wlogrecords[logname] = None # None as default - else: - if self._wlogtypes[logname] not in self.VALID_LOGTYPES: - self._wlogtypes[logname] = "CONT" - self._wlogrecords[logname] = None # None as default - - if logname not in self._wlogrecords: - if self._wlogtypes[logname] == "DISC": - # it is a discrete log with missing record; try to find - # a default one based on current values... - lvalues = self._df[logname].values.round(decimals=0) - lmin = int(lvalues.min()) - lmax = int(lvalues.max()) - - lvalues = lvalues.astype("int") - codes = {} - for lval in range(lmin, lmax + 1): - if lval in lvalues: - codes[lval] = str(lval) - - self._wlogrecords = codes + def ensure_consistency(self): + """Ensure consistency for the instance. + + .. versionadded:: 3.5 + """ + # public version, added oct-23 + self._ensure_consistency() # ================================================================================== # Properties # ================================================================================== + @property + def xname(self): + """Return or set name of X coordinate column.""" + return self._wdata.xname + + @xname.setter + def xname(self, new_xname: str): + self._wdata.xname = new_xname + + @property + def yname(self): + """Return or set name of Y coordinate column.""" + return self._wdata.yname + + @yname.setter + def yname(self, new_yname: str): + self._wdata.yname = new_yname + + @property + def zname(self): + """Return or set name of Z coordinate column.""" + return self._wdata.zname + + @zname.setter + def zname(self, new_zname: str): + self._wdata.zname = new_zname + @property def metadata(self): """Return metadata object instance of type MetaDataRegularSurface.""" @@ -457,7 +356,7 @@ def mdlogname(self): @mdlogname.setter def mdlogname(self, mname): - if mname in self._wlognames: + if mname in self.get_lognames(): self._mdlogname = mname else: self._mdlogname = None @@ -469,7 +368,7 @@ def zonelogname(self): @zonelogname.setter def zonelogname(self, zname): - if zname in self._wlognames: + if zname in self.get_lognames(): self._zonelogname = zname else: self._zonelogname = None @@ -477,38 +376,46 @@ def zonelogname(self, zname): @property def dataframe(self): """Returns or set the Pandas dataframe object for all logs.""" - return self._df + return self._wdata.get_dataframe() @dataframe.setter def dataframe(self, dfr): - self._df = dfr.copy() - self._ensure_consistency() + self.set_dataframe(dfr) # this will include consistency checking! @property def nrow(self): """int: Returns the Pandas dataframe object number of rows.""" - return len(self._df.index) + return len(self._wdata.data.index) @property def ncol(self): """int: Returns the Pandas dataframe object number of columns.""" - return len(self._df.columns) + return len(self._wdata.data.columns) @property def nlogs(self): """int: Returns the Pandas dataframe object number of columns.""" - return len(self._df.columns) - 3 + return len(self._wdata.data.columns) - 3 @property def lognames_all(self): """list: Returns dataframe column names as list, including mandatory coords.""" - self._ensure_consistency() - return self._wlognames + return self.get_lognames() @property def lognames(self): """list: Returns the Pandas dataframe column as list excluding coords.""" - return list(self._df)[3:] + return list(self._wdata.data)[3:] + + @property + def wlogtypes(self): + """Returns wlogtypes""" + return {name: atype.name for name, atype in self._wdata.attr_types.items()} + + @property + def wlogrecords(self): + """Returns wlogrecords""" + return deepcopy(self._wdata.attr_records) # ================================================================================== # Methods @@ -592,15 +499,15 @@ def from_file( else: fformat = wfile.generic_format_by_proposal(fformat) # default - kwargs = _data_reader_factory(fformat)(wfile, **kwargs) + kwargs = _well_aux._data_reader_factory(fformat)(wfile, **kwargs) self._reset(**kwargs) return self @classmethod def _read_file( cls, - wfile, - fformat="rms_ascii", + wfile: Union[str, Path], + fformat: Optional[str] = "rms_ascii", **kwargs, ): """Import well from file. @@ -640,7 +547,7 @@ def _read_file( else: fformat = wfile.generic_format_by_proposal(fformat) # default - kwargs = _data_reader_factory(fformat)(wfile, **kwargs) + kwargs = _well_aux._data_reader_factory(fformat)(wfile, **kwargs) return cls(**kwargs) def to_file( @@ -768,9 +675,9 @@ def to_roxar(self, *args, **kwargs): """Export (save/store) a well to a roxar project. Note this method works only when inside RMS, or when RMS license is - activated. + activated in terminal. - The current implementation will either update existing well names + The current implementation will either update the existing well (then well log array size must not change), or it will make a new well in RMS. Note: @@ -779,7 +686,7 @@ def to_roxar(self, *args, **kwargs): will not be saved until the user do an explicit project save action. Args: - project (str): Magic string 'project' or file path to project + project (str, object): Magic string 'project' or file path to project wname (str): Name of well, as shown in RMS. lognames (:obj:list or :obj:str): List of lognames to save, or use simply 'all' for current logs for this well. Default is 'all' @@ -814,28 +721,32 @@ def to_roxar(self, *args, **kwargs): realisation=realisation, ) - def get_wlogs(self) -> OrderedDict: + def get_lognames(self): + """Get the lognames for all logs.""" + return list(self._wdata.data) + + def get_wlogs(self) -> dict: """Get a compound dictionary with well log metadata. - The result will be an Ordered dict on the form: + The result will be an dict on the form: ``{"X_UTME": ["CONT", None], ... "Facies": ["DISC", {1: "BG", 2: "SAND"}]}`` """ - res = OrderedDict() + res = dict() - for key in self._wlognames: + for key in self.get_lognames(): wtype = "CONT" wrecord = None - if key in self._wlogtypes: - wtype = self._wlogtypes[key] - if key in self._wlogrecords: - wrecord = self._wlogrecords[key] + if key in self._wdata.attr_types: + wtype = self._wdata.attr_types[key].name + if key in self._wdata.attr_records: + wrecord = self._wdata.attr_records[key] res[key] = [wtype, wrecord] return res - def set_wlogs(self, wlogs: OrderedDict): + def set_wlogs(self, wlogs: dict): """Set a compound dictionary with well log metadata. This operation is somewhat risky as it may lead to inconsistency, so use with @@ -852,26 +763,11 @@ def set_wlogs(self, wlogs: OrderedDict): ValueError: Invalid log record found in input: """ - for key in self._wlognames: + for key in self.get_lognames(): if key in wlogs.keys(): typ, rec = wlogs[key] - - if typ in Well.VALID_LOGTYPES: - self._wlogtypes[key] = deepcopy(typ) - else: - raise ValueError(f"Invalid log type found in input: {typ}") - - if rec is None or isinstance(rec, dict): - self._wlogrecords[key] = deepcopy(rec) - else: - raise ValueError(f"Invalid log record found in input: {rec}") - - else: - raise ValueError(f"Key for column not found in input: {key}") - - for key in wlogs.keys(): - if key not in self._wlognames: - raise ValueError(f"Invalid input key found: {key}") + self._wdata.set_attr_type(key, typ) + self._wdata.set_attr_record(key, deepcopy(rec)) self._ensure_consistency() @@ -883,7 +779,7 @@ def isdiscrete(self, logname): .. versionadded:: 2.2.0 """ - if logname in self._wlognames and self.get_logtype(logname) == "DISC": + if logname in self.get_lognames() and self.get_logtype(logname) == "DISC": return True return False @@ -894,29 +790,17 @@ def copy(self): self.xpos, self.ypos, self.wname, - self._df.copy(), + self._wdata.data.copy(), self.mdlogname, self.zonelogname, - deepcopy(self._wlogtypes), - deepcopy(self._wlogrecords), + self.wlogtypes, + self.wlogrecords, self._filesrc, ) def rename_log(self, lname, newname): """Rename a log, e.g. Poro to PORO.""" - self._ensure_consistency() - - if lname not in self.lognames: - raise ValueError("Input log does not exist") - - if newname in self.lognames: - raise ValueError("New log name exists already") - - self._wlogtypes[newname] = self._wlogtypes.pop(lname) - self._wlogrecords[newname] = self._wlogrecords.pop(lname) - - # rename in dataframe - self._df.rename(index=str, columns={lname: newname}, inplace=True) + self._wdata.rename_attr(lname, newname) if self._mdlogname == lname: self._mdlogname = newname @@ -924,18 +808,25 @@ def rename_log(self, lname, newname): if self._zonelogname == lname: self._zonelogname = newname - def create_log(self, lname, logtype="CONT", logrecord=None, value=0.0, force=True): + def create_log( + self, + lname: str, + logtype: str = "CONT", + logrecord: Optional[dict] = None, + value: float = 0.0, + force: bool = True, + ) -> bool: """Create a new log with initial values. If the logname already exists, it will be silently overwritten, unless the option force=False. Args: - lname (str): name of new log - logtype (str): Must be 'CONT' (default) or 'DISC' (discrete) - logrecord (dict): A dictionary of key: values for 'DISC' logs - value (float): initia value to set_index - force (bool): If True, and lname exists, it will be overwritten, if + lname: name of new log + logtype: Must be 'CONT' (default) or 'DISC' (discrete) + logrecord: A dictionary of key: values for 'DISC' logs + value: initial value to set + force: If True, and lname exists, it will be overwritten, if False, no new log will be made. Will return False. Returns: @@ -943,72 +834,80 @@ def create_log(self, lname, logtype="CONT", logrecord=None, value=0.0, force=Tru existing) or False if the new log already exists, and ``force=False``. + Note:: + + A new log can also be created by adding it to the dataframe directly, but + with less control over e.g. logrecord + """ - if lname in self.lognames and force is False: - return False + return self._wdata.create_attr(lname, logtype, logrecord, value, force) - self._wlogtypes[lname] = logtype - self._wlogrecords[lname] = logrecord + def copy_log( + self, + lname: str, + newname: str, + force: bool = True, + ) -> bool: + """Copy a log from an exisitng to a name - # make a new column - self._df[lname] = float(value) - self._ensure_consistency() - return True + If the new log already exists, it will be silently overwritten, unless + the option force=False. + + Args: + lname: name of existing log + newname: name of new log + + Returns: + True ff a new log is made (either new or force overwrite an + existing) or False if the new log already exists, + and ``force=False``. + + Note:: + + A copy can alos be done directly in the dataframe, but with less + consistency checks; hence this method is recommended + + """ + return self._wdata.copy_attr(lname, newname, force) - def delete_log(self, lname): + def delete_log(self, lname: Union[str, List[str]]) -> int: """Delete/remove an existing log, or list of logs. Will continue silently if a log does not exist. Args: - lname(str or list): A logname or a list of lognames + lname: A logname or a list of lognames Returns: Number of logs deleted + + Note:: + + A log can also be deleted by simply removing it from the dataframe. + """ - return _well_oper.delete_log(self, lname) + return self._wdata.delete_attr(lname) delete_logs = delete_log # alias function - def get_logtype(self, lname): - """Returns the type of a give log (e.g. DISC or CONT).""" - self._ensure_consistency() - - if lname in self._wlogtypes: - return self._wlogtypes[lname] + def get_logtype(self, lname) -> Optional[str]: + """Returns the type of a given log (e.g. DISC or CONT), None if not present.""" + if lname in self._wdata.attr_types: + return self._wdata.attr_types[lname].name return None def set_logtype(self, lname, ltype): """Sets the type of a give log (e.g. DISC or CONT).""" - self._ensure_consistency() - - valid = {"DISC", "CONT"} - - if ltype in valid: - self._wlogtypes[lname] = ltype - else: - raise ValueError(f"Try to set invalid log type: {ltype}") + self._wdata.set_attr_type(lname, ltype) def get_logrecord(self, lname): """Returns the record (dict) of a given log name, None if not exists.""" - if lname in self._wlogtypes: - return self._wlogrecords[lname] - return None + return self._wdata.get_attr_record(lname) def set_logrecord(self, lname, newdict): """Sets the record (dict) of a given discrete log.""" - self._ensure_consistency() - if lname not in self.lognames: - raise ValueError(f"No such logname: {lname}") - - if self._wlogtypes[lname] == "CONT": - raise ValueError("Cannot set a log record for a continuous log") - - if not isinstance(newdict, dict): - raise ValueError("Input is not a dictionary") - - self._wlogrecords[lname] = newdict + self._wdata.set_attr_record(lname, newdict) def get_logrecord_codename(self, lname, key): """Returns the name entry of a log record, for a given key. @@ -1024,23 +923,9 @@ def get_logrecord_codename(self, lname, key): return None - def get_carray(self, lname): - """Returns the C array pointer (via SWIG) for a given log. - - Type conversion is double if float64, int32 if DISC log. - Returns None of log does not exist. - """ - if lname in self._df: - np_array = self._df[lname].values - else: - return None - - if self.get_logtype(lname) == "DISC": - carr = self._convert_np_carr_int(np_array) - else: - carr = self._convert_np_carr_double(np_array) - - return carr + def get_dataframe(self): + """Get, by intention, a copy of the dataframe""" + return self._wdata.get_dataframe_copy(infer_dtype=False, filled=False) def get_filled_dataframe( self, fill_value=const.UNDEF, fill_value_int=const.UNDEF_INT @@ -1058,28 +943,16 @@ def get_filled_dataframe( high XTGeo UNDEF values, or user defined values. """ - lnames = self.lognames - - newdf = self._df.copy() - - # make a dictionary of datatypes - dtype = {"X_UTME": "float64", "Y_UTMN": "float64", "Z_TVDSS": "float64"} - - dfill = {"X_UTME": const.UNDEF, "Y_UTMN": const.UNDEF, "Z_TVDSS": const.UNDEF} - - for lname in lnames: - if self.get_logtype(lname) == "DISC": - dtype[lname] = np.int32 - dfill[lname] = fill_value_int - else: - dtype[lname] = np.float64 - dfill[lname] = fill_value - - # now first fill Nan's (because int cannot be converted if Nan) - newdf = newdf.fillna(dfill) - newdf = newdf.astype(dtype) + return self._wdata.get_dataframe_copy( + infer_dtype=True, + filled=True, + fill_value=fill_value, + fill_value_int=fill_value_int, + ) - return newdf + def set_dataframe(self, dfr): + """Set the dataframe.""" + self._wdata.set_dataframe(dfr) def create_relative_hlen(self): """Make a relative length of a well, as a log. @@ -1087,17 +960,7 @@ def create_relative_hlen(self): The first well og entry defines zero, then the horizontal length is computed relative to that by simple geometric methods. """ - # extract numpies from XYZ trajectory logs - xv = self._df["X_UTME"].values - yv = self._df["Y_UTMN"].values - - distance = [] - previous_x, previous_y = xv[0], yv[0] - for i, (x, y) in enumerate(zip(xv, yv)): - distance.append(math.hypot((previous_x - x), (y - previous_y))) - previous_x, previous_y = x, y - - self._df["R_HLEN"] = pd.Series(np.cumsum(distance), index=self._df.index) + self._wdata.create_relative_hlen() def geometrics(self): """Compute some well geometrical arrays MD, INCL, AZI, as logs. @@ -1112,52 +975,12 @@ def geometrics(self): False if geometrics cannot be computed """ - if self._df.shape[0] < 3: - raise ValueError( - f"Cannot compute geometrics for {self.name}. Not enough " - f"trajectory points (need >3, have: {self.dataframe.shape[0]})" - ) - - # extract numpies from XYZ trajetory logs - ptr_xv = self.get_carray("X_UTME") - ptr_yv = self.get_carray("Y_UTMN") - ptr_zv = self.get_carray("Z_TVDSS") - - # get number of rows in pandas - nlen = self.nrow - - ptr_md = _cxtgeo.new_doublearray(nlen) - ptr_incl = _cxtgeo.new_doublearray(nlen) - ptr_az = _cxtgeo.new_doublearray(nlen) - - ier = _cxtgeo.well_geometrics( - nlen, ptr_xv, ptr_yv, ptr_zv, ptr_md, ptr_incl, ptr_az, 0 - ) - - if ier != 0: - raise XTGeoCLibError(f"well_geometrics failed with error code: {ier}") - - dnumpy = self._convert_carr_double_np(ptr_md) - self._df["Q_MDEPTH"] = pd.Series(dnumpy, index=self._df.index) - - dnumpy = self._convert_carr_double_np(ptr_incl) - self._df["Q_INCL"] = pd.Series(dnumpy, index=self._df.index) - - dnumpy = self._convert_carr_double_np(ptr_az) - self._df["Q_AZI"] = pd.Series(dnumpy, index=self._df.index) + rvalue = self._wdata.geometrics() if not self._mdlogname: self._mdlogname = "Q_MDEPTH" - # delete tmp pointers - _cxtgeo.delete_doublearray(ptr_xv) - _cxtgeo.delete_doublearray(ptr_yv) - _cxtgeo.delete_doublearray(ptr_zv) - _cxtgeo.delete_doublearray(ptr_md) - _cxtgeo.delete_doublearray(ptr_incl) - _cxtgeo.delete_doublearray(ptr_az) - - return True + return rvalue def truncate_parallel_path( self, other, xtol=None, ytol=None, ztol=None, itol=None, atol=None @@ -1185,18 +1008,18 @@ def truncate_parallel_path( if self.dataframe.shape[0] < 3 or other.dataframe.shape[0] < 3: raise ValueError( - f"Too few points to truncate parallel path, was {self._df.size} and " - f"{other._df.size}, must be >3" + f"Too few points to truncate parallel path, was " + f"{self._wdata.data.size} and {other.dataframe.size}, must be >3" ) # extract numpies from XYZ trajectory logs - xv1 = self._df["X_UTME"].values - yv1 = self._df["Y_UTMN"].values - zv1 = self._df["Z_TVDSS"].values + xv1 = self._wdata.data[self.xname].values + yv1 = self._wdata.data[self.yname].values + zv1 = self._wdata.data[self.zname].values - xv2 = other._df["X_UTME"].values - yv2 = other._df["Y_UTMN"].values - zv2 = other._df["Z_TVDSS"].values + xv2 = other.dataframe[self.xname].values + yv2 = other.dataframe[self.yname].values + zv2 = other.dataframe[self.zname].values ier = _cxtgeo.well_trunc_parallel( xv1, yv1, zv1, xv2, yv2, zv2, xtol, ytol, ztol, itol, atol, 0 @@ -1205,24 +1028,25 @@ def truncate_parallel_path( if ier != 0: raise RuntimeError("Unexpected error") - self._df = self._df[self._df["X_UTME"] < const.UNDEF_LIMIT] - self._df.reset_index(drop=True, inplace=True) + dfr = self.dataframe.copy() + dfr = dfr[dfr[self.xname] < const.UNDEF_LIMIT] + self.set_dataframe(dfr) def may_overlap(self, other): """Consider if well overlap in X Y coordinates with other well, True/False.""" - if self._df.size < 2 or other._df.size < 2: + if self.dataframe.size < 2 or other.dataframe.size < 2: return False # extract numpies from XYZ trajectory logs - xmin1 = np.nanmin(self.dataframe["X_UTME"].values) - xmax1 = np.nanmax(self.dataframe["X_UTME"].values) - ymin1 = np.nanmin(self.dataframe["Y_UTMN"].values) - ymax1 = np.nanmax(self.dataframe["Y_UTMN"].values) + xmin1 = np.nanmin(self.dataframe[self.xname].values) + xmax1 = np.nanmax(self.dataframe[self.xname].values) + ymin1 = np.nanmin(self.dataframe[self.yname].values) + ymax1 = np.nanmax(self.dataframe[self.yname].values) - xmin2 = np.nanmin(other.dataframe["X_UTME"].values) - xmax2 = np.nanmax(other.dataframe["X_UTME"].values) - ymin2 = np.nanmin(other.dataframe["Y_UTMN"].values) - ymax2 = np.nanmax(other.dataframe["Y_UTMN"].values) + xmin2 = np.nanmin(other.dataframe[self.xname].values) + xmax2 = np.nanmax(other.dataframe[self.xname].values) + ymin2 = np.nanmin(other.dataframe[self.yname].values) + ymax2 = np.nanmax(other.dataframe[self.yname].values) if xmin1 > xmax2 or ymin1 > ymax2: return False @@ -1240,10 +1064,10 @@ def limit_tvd(self, tvdmin, tvdmax): tvdmin (float): Minimum TVD tvdmax (float): Maximum TVD """ - self._df = self._df[self._df["Z_TVDSS"] >= tvdmin] - self._df = self._df[self._df["Z_TVDSS"] <= tvdmax] - - self._df.reset_index(drop=True, inplace=True) + dfr = self.dataframe.copy() + dfr = dfr[dfr[self.zname] >= tvdmin] + dfr = dfr[dfr[self.zname] <= tvdmax] + self.set_dataframe(dfr) def downsample(self, interval=4, keeplast=True): """Downsample by sampling every N'th element (coarsen only). @@ -1253,15 +1077,15 @@ def downsample(self, interval=4, keeplast=True): keeplast (bool): If True, the last element from the original dataframe is kept, to avoid that the well is shortened. """ - if self._df.size < 2 * interval: + if self.dataframe.size < 2 * interval: return - dfr = self._df[::interval] + dfr = self.dataframe[::interval].copy() if keeplast: - dfr = pd.concat([dfr, self._df.iloc[-1:]], ignore_index=True) + dfr = pd.concat([dfr, self.dataframe.iloc[-1:]], ignore_index=True) - self._df = dfr.reset_index(drop=True) + self.set_dataframe(dfr.reset_index(drop=True)) def rescale(self, delta=0.15, tvdrange=None): """Rescale (refine or coarse) by sampling a delta along the trajectory, in MD. @@ -1283,9 +1107,9 @@ def get_polygons(self, skipname=False): .. versionadded:: 2.1 .. versionchanged:: 2.13 Added `skipname` key """ - dfr = self._df.copy() + dfr = self._wdata.data.copy() - keep = ("X_UTME", "Y_UTMN", "Z_TVDSS") + keep = (self.xname, self.yname, self.zname) for col in dfr.columns: if col not in keep: dfr.drop(labels=col, axis=1, inplace=True) @@ -1472,7 +1296,7 @@ def get_zone_interval(self, zonevalue, resample=1, extralogs=None): dxlist = [] - useloglist = ["X_UTME", "Y_UTMN", "Z_TVDSS", "POLY_ID"] + useloglist = [self.xname, self.yname, self.zname, "POLY_ID"] if extralogs is not None: useloglist.extend(extralogs) @@ -1684,46 +1508,3 @@ def get_gridproperties( """ _well_oper.get_gridproperties(self, gridprops, grid=grid, prop_id=prop_id) - - # ================================================================================== - # PRIVATE METHODS - # should not be applied outside the class - # ================================================================================== - - # ---------------------------------------------------------------------------------- - # Import/Export methods for various formats - # ---------------------------------------------------------------------------------- - - # ---------------------------------------------------------------------------------- - # Special methods for nerds, todo is to move to private module - # ---------------------------------------------------------------------------------- - - def _convert_np_carr_int(self, np_array): - """Convert numpy 1D array to C array, assuming int type. - - The numpy is always a double (float64), so need to convert first - """ - carr = _cxtgeo.new_intarray(self.nrow) - - np_array = np_array.astype(np.int32) - - _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) - - return carr - - def _convert_np_carr_double(self, np_array): - """Convert numpy 1D array to C array, assuming double type.""" - carr = _cxtgeo.new_doublearray(self.nrow) - - _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) - - return carr - - def _convert_carr_double_np(self, carray, nlen=None): - """Convert a C array to numpy, assuming double type.""" - if nlen is None: - nlen = len(self._df.index) - - nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) - - return nparray diff --git a/src/xtgeo/well/wells.py b/src/xtgeo/well/wells.py index e3262bd51..1b5f2d465 100644 --- a/src/xtgeo/well/wells.py +++ b/src/xtgeo/well/wells.py @@ -232,7 +232,12 @@ def get_dataframe(self, filled=False, fill_value1=-999, fill_value2=-9999): if filled: dfr = dfr.fillna(fill_value2) - spec_order = ["WELLNAME", "X_UTME", "Y_UTMN", "Z_TVDSS"] + spec_order = [ + "WELLNAME", + self._wells[0].xname, # use the names in the first well as column names + self._wells[0].yname, + self._wells[0].zname, + ] return dfr[spec_order + [col for col in dfr if col not in spec_order]] def quickplot(self, filename=None, title="QuickPlot"): diff --git a/src/xtgeo/xyz_common/__init__.py b/src/xtgeo/xyz_common/__init__.py new file mode 100644 index 000000000..9102473e7 --- /dev/null +++ b/src/xtgeo/xyz_common/__init__.py @@ -0,0 +1,5 @@ +# common low level and private modules for XYZ and Well; in an own folder so far to +# avoid circular import + +from xtgeo.xyz_common._xyz_data import _XYZData +from xtgeo.xyz_common._xyz_enum import _AttrName, _AttrType, _XYZType diff --git a/src/xtgeo/xyz_common/_xyz_data.py b/src/xtgeo/xyz_common/_xyz_data.py new file mode 100644 index 000000000..6e4a2ac17 --- /dev/null +++ b/src/xtgeo/xyz_common/_xyz_data.py @@ -0,0 +1,625 @@ +"""Module for private _XYZData class. + +Note that that the design of this targets Well and general XYZ data (Points/Polygons), +hence the intentions is to let this work as a general 'engine' for dataframe'ish data +in xtgeo, at least Well, Points, Polygons. (But in the first round, it is implemented +for Wells only). Dataframes looks like: + + X_UTME Y_UTMN Z_TVDSS MDepth PHIT KLOGH Sw +0 463256.911 5930542.294 -49.0000 0.0000 NaN NaN NaN ... +1 463256.912 5930542.295 -48.2859 0.5000 NaN NaN NaN ... +2 463256.913 5930542.296 -47.5735 1.0000 NaN NaN NaN ... +3 463256.914 5930542.299 -46.8626 1.5000 NaN NaN NaN ... +4 463256.916 5930542.302 -46.1533 2.0000 NaN NaN NaN ... + ... ... ... ... ... ... ... + +Where each attr (log) has a attr_types dictionary, telling if the columns are treated +as discrete (DISC) or continuous (CONT). In addition there is a attr_records +dict, storing the unit+scale for continuous logs/attr (defaulted to tuple ("", "")) or a +dictionary of codes (defaulted to {}, if the column if DISC type (this is optional, +and perhaps only relevant for Well data). + +The 3 first columns are the XYZ coordinates or XY coordinates + value: +X, Y, Z or X, Y, V. An optional fourth column as also possible as polygon_id. +All the rest are free 'attributes', which for wells will be well logs. Hence: + + attr_types ~ refer to attr_types for XYZ and Well data + attr_records ~ refer to attr_records for Well data and possibly Points/Polygons + +If a column is added to the dataframe, then the methods here will try to guess the +attr_type and attr_record, and add those; similarly of a column is removed, the +corresponding entries in attr_types and attr_records will be deleted. +""" +from __future__ import annotations + +import math +from copy import deepcopy +from typing import Dict, List, Literal, Optional, Sequence, Union + +import numpy as np +import pandas as pd +from joblib import hash as jhash + +import xtgeo.common.constants as const +from xtgeo import XTGeoCLibError # type: ignore[attr-defined] +from xtgeo.common import XTGeoDialog +from xtgeo.common.sys import _convert_carr_double_np, _get_carray +from xtgeo.cxtgeo import _cxtgeo +from xtgeo.xyz_common._xyz_enum import _AttrType, _XYZType + +from ._xyz_enum import _AttrName, _AttrType, _XYZType + +xtg = XTGeoDialog() +logger = xtg.functionlogger(__name__) + + +CONT_DEFAULT_RECORD = ("", "") # unit and scale, where emptry string indicates ~unknown + + +class _XYZData: + """Private class for the XYZ and Well log data, where a Pandas dataframe is core. + + The data are stored in pandas dataframes, and by default, all columns are float, and + np.nan defines undefined values. Even if they are DISC. The reason for this is + restrictions in older versions of Pandas. + + All values in the dataframe shall be numbers. + + The attr_types is on form {"PHIT": CONT, "FACIES": DISC, ...} + + The attr_records is somewhat heterogeneous, on form: + {"PHIT": ("unit", "scale"), "FACIES": {0:BG, 2: "SST", 4: "CALC"}} + Hence the CONT logs hold a tuple or list with 2 str members, or None, while DISC + log holds a dict where the key is an int and the value is a string. + """ + + def __init__( + self, + dataframe: pd.DataFrame, + attr_types: Optional[Dict[str, str]] = None, + attr_records: Optional[Dict[str, Union[Dict[int, str], Sequence[str]]]] = None, + xname: str = _AttrName.XNAME.value, + yname: str = _AttrName.YNAME.value, + zname: str = _AttrName.ZNAME.value, + idname: Optional[str] = None, # Well, Polygon, ... + undef: Union[float, Sequence[float, float]] = -999.0, + xyztype: str = "well", + floatbits: Literal["float32", "float64"] = "float64", + ): + logger.info("Running init for: %s", __class__.__name__) + self._df = dataframe + + self._attr_types = {} + if isinstance(attr_types, dict): + for name, atype in attr_types.items(): + use_atype = "DISC" if atype.upper() in ("DISC", "INT") else "CONT" + self._attr_types[name] = _AttrType[use_atype] + + self._attr_records = attr_records if attr_records is not None else {} + self._xname = xname + self._yname = yname + self._zname = zname + self._idname = idname + self._floatbits = ( + floatbits if floatbits in ["float32", "float64"] else "float64" + ) + + # undefined data are given by a value, that may be different for cont vs disc + if isinstance(undef, list): + self._undef_disc = undef[0] + self._undef_cont = undef[1] + else: + self._undef_disc = undef + self._undef_cont = undef + + if xyztype == "well": + self._xyztype = _XYZType.WELL + + self._hash = ("0", "0", "0") + + logger.debug("Initial _attr_types: %s", self._attr_types) + logger.debug("Initial _attr_records: %s", self._attr_records) + self.ensure_consistency() + logger.debug("Initial after consistency chk _attr_types: %s", self._attr_types) + logger.debug( + "Initial after consistency chk _attr_records: %s", self._attr_records + ) + + @property + def dataframe(self): + return self._df + + data = dataframe # alias + + @property + def attr_types(self): + return self._attr_types + + @property + def attr_records(self): + return self._attr_records + + @property + def xname(self): + return self._xname + + @xname.setter + def xname(self, name: str): + if isinstance(name, str): + self._xname = name + else: + raise ValueError(f"Input name is not a string: {name}") + + @property + def yname(self): + return self._yname + + @yname.setter + def yname(self, name: str): + if isinstance(name, str): + self._yname = name + else: + raise ValueError(f"Input name is not a string: {name}") + + @property + def zname(self): + return self._zname + + @zname.setter + def zname(self, name: str): + if isinstance(name, str): + self._zname = name + else: + raise ValueError(f"Input name is not a string: {name}") + + def _infer_attr_dtypes(self): + """Return as dict on form {"X_UTME": _AttrType.CONT, "FACIES": _AttrType.DISC}. + + There are some important restrictions: + * The first 3 columns (X Y Z) are always CONT, even if input appears as DISC. + * A check is made towards existing attr_types; if the key,value pair exists + already, this function will *not* force a change but keep as is. + """ + + # pandas function that e.g. will convert integer'ish floats to int: + new_df = self._df.convert_dtypes() + + dlist = new_df.dtypes.to_dict() + logger.debug("Initial attr_type: %s", self._attr_types) + + datatypes = {} + for name, dtype in dlist.items(): + if name in self._attr_types: + # do not change already set attr_types + datatypes[name] = self._attr_types[name] + continue + + if name in (self._xname, self._yname, self._zname): + # force coordinates, first 3 columns, to be CONT + datatypes[name] = _AttrType.CONT + continue + + if "Float" in str(dtype): + datatypes[name] = _AttrType.CONT + elif "Int" in str(dtype): + datatypes[name] = _AttrType.DISC + else: + raise RuntimeError( + f"Log type seems to be something else than Float or Int for {name}" + ) + self._attr_types = datatypes + logger.debug("Processed attr_type: %s", self._attr_types) + + def _ensure_consistency_attr_types(self): + """Ensure that dataframe and attr_types are consistent. + + attr_types are on form {"GR": "CONT", "ZONES": "DISC", ...} + + The column data in the dataframe takes precedence; i.e. if a column is removed + in a pandas operation, then attr_types are adapted silently by removing the item + from the dict. + """ + # check first if an attr. is removed in dataframe (e.g. by pandas operations) + logger.debug("Ensure consistency attr_types...") + for attr_name in list(self._attr_types.keys()): + if attr_name not in self._df.columns[3:]: + del self._attr_types[attr_name] + + self._infer_attr_dtypes() + + def _ensure_consistency_attr_records(self): + """Ensure that data and attr_records are consistent; cf attr_types. + + Important that input attr_types are correct; i.e. run + _ensure_consistency_attr_types() first! + """ + for attr_name, dtype in self._attr_types.items(): + logger.debug("attr_name: %s, and dtype: %s", attr_name, dtype) + if attr_name not in self._attr_records or not isinstance( + self._attr_records[attr_name], (dict, list, tuple) + ): + if dtype == _AttrType.CONT: + self._attr_records[attr_name] = CONT_DEFAULT_RECORD + + if dtype == _AttrType.DISC: + # it is a discrete log with missing record; try to find + # a default one based on current values... + lvalues = self._df[attr_name].values.round(decimals=0) + lvalues = lvalues[~np.isnan(lvalues)] + lmin = int(lvalues.min()) + lmax = int(lvalues.max()) + + lvalues = lvalues.astype("int") + codes = {} + for lval in range(lmin, lmax + 1): + if lval in lvalues: + codes[lval] = str(lval) + + if self._undef_disc in codes: + del codes[self._undef_disc] + if const.UNDEF_DISC in codes: + del codes[const.UNDEF_DISC] + + self._attr_records[attr_name] = codes + + # correct when attr_types is CONT but attr_records for that entry is a dict + if ( + attr_name in self._attr_records + and self._attr_types[attr_name] == _AttrType.CONT + ): + if isinstance(self._attr_records[attr_name], dict): + self._attr_records[attr_name] = CONT_DEFAULT_RECORD + + def _ensure_consistency_df_dtypes(self): + """Ensure that dataframe float32/64 for all logs, except for XYZ -> float64. + + Whether it is float32 or float64 is set by self._floatbits. Float32 will save + memory but loose some precision. For backward compatibility, float64 is default. + """ + + col = list(self._df) + logger.debug("columns: %s", col) + + coords_dtypes = [str(entry) for entry in self._df[col[0:3]].dtypes] + + if not all(["float64" in entry for entry in coords_dtypes]): + self._df[col[0:3]] = self._df.iloc[:, 0:3].astype("float64") + + attr_dtypes = [str(entry) for entry in self._df[col[3:]].dtypes] + + if not all([self._floatbits in entry for entry in attr_dtypes]): + self._df[col[3:]] = self._df.iloc[:, 3:].astype(self._floatbits) + + for name, attr_type in self._attr_types.items(): + if attr_type == "CONT": + logger.debug("Replacing CONT undef...") + self._df[name].replace( + self._undef_cont, + np.float64(const.UNDEF_CONT).astype(self._floatbits), + inplace=True, + ) + else: + logger.debug("Replacing INT undef...") + self._df[name].replace( + self._undef_disc, np.int32(const.UNDEF_DISC), inplace=True + ) + logger.info("Processed dataframe: %s", list(self._df.dtypes)) + + def ensure_consistency(self) -> bool: + """Ensure that data and attr* are consistent. + + This is important for many operations on the dataframe, an should keep + attr_types and attr_records 'in sync' with the dataframe. + + * When adding one or columns to the dataframe + * When removing one or more columns from the dataframe + * ... + + Returns True is consistency is ran, while False means that no changes have + occured, hence no consistency checks are done + """ + + # the purpose of this hash check is to avoid psending time on consistency + # checks if no changes + hash_proposed = ( + jhash(self._df), + jhash(self._attr_types), + jhash(self._attr_records), + ) + + if self._hash == hash_proposed: + return False + + if list(self._df.columns[:3]) != [self._xname, self._yname, self._zname]: + raise ValueError( + f"Dataframe must include '{self._xname}', '{self._yname}' " + f"and '{self._zname}', got {list(self._df.columns[:3])}" + ) + + # order matters: + self._ensure_consistency_attr_types() + self._ensure_consistency_attr_records() + self._ensure_consistency_df_dtypes() + self._df.reset_index(drop=True, inplace=True) + + self._hash = ( + jhash(self._df), + jhash(self._attr_types), + jhash(self._attr_records), + ) + + return True + + def get_attr_type(self, name: str) -> str: + """Get the attr_type as string""" + return self._attr_types[name].name + + def set_attr_type(self, name: str, attrtype: str) -> None: + """Set a type (DISC, CONT) for a named attribute. + + A bit flexibility is added for attrtype, e.g. allowing "float*" for CONT + etc, and allow lowercase "cont" for CONT + + """ + + apply_attrtype = attrtype.upper() + if "FLOAT" in apply_attrtype: + apply_attrtype = "CONT" + if "INT" in apply_attrtype: + apply_attrtype = "DISC" + + if name not in self._attr_types: + raise ValueError(f"No such log name present: {name}") + + if apply_attrtype in _AttrType.__members__: + self._attr_types[name] = _AttrType[apply_attrtype] + else: + raise ValueError( + f"Cannot set wlogtype as {attrtype}, not in " + f"{list(_AttrType.__members__)}" + ) + + self.ensure_consistency() + + def get_attr_record(self, name: str): + """Get a record for a named attribute.""" + return self._attr_records[name] + + def set_attr_record(self, name: str, record: Optional[dict]) -> None: + """Set a record for a named log.""" + + if name not in self._attr_types: + raise ValueError(f"No such attr_name: {name}") + + if record is None and self._attr_types[name] == _AttrType.DISC: + record = {} + elif record is None and self._attr_types[name] == _AttrType.CONT: + record = CONT_DEFAULT_RECORD + + if self._attr_types[name] == _AttrType.CONT and isinstance( + record, (list, tuple) + ): + if len(record) == 2: + self._attr_records[name] = tuple(record) # prefer as tuple + elif self._attr_types[name] == _AttrType.CONT and isinstance(record, dict): + raise ValueError( + "Cannot set a log record for a continuous log: input record is " + "dictionary, not a list or tuple" + ) + elif self._attr_types[name] == _AttrType.DISC and isinstance(record, dict): + self._attr_records[name] = record + elif self._attr_types[name] == _AttrType.DISC and not isinstance(record, dict): + raise ValueError( + "Input is not a dictionary. Cannot set a log record for a discrete log" + ) + else: + raise ValueError( + "Something went wrong when setting logrecord: " + f"({self._attr_types[name]} {type(record)})." + ) + + self.ensure_consistency() + + def get_dataframe_copy( + self, + infer_dtype: bool = False, + filled=False, + fill_value=const.UNDEF_CONT, + fill_value_int=const.UNDEF_DISC, + ): + """Get a deep copy of the dataframe, with options. + + If infer_dtype is True, then DISC columns will be of "int32" type, but + since int32 do not support np.nan, the value for undefined values will be + ``fill_value_int`` + """ + dfr = self._df.copy() + if infer_dtype: + for name, attrtype in self._attr_types.items(): + if attrtype.name == "DISC": + dfr[name] = dfr[name].fillna(fill_value_int) + dfr[name] = dfr[name].astype("int32") + + if filled: + dfill = {} + for attrname in self._df: + if self._attr_types[attrname] == _AttrType.DISC: + dfill[attrname] = fill_value_int + else: + dfill[attrname] = fill_value + + dfr = dfr.fillna(dfill) + + return dfr + + def get_dataframe(self): + """Get the dataframe.""" + return self._df + + def set_dataframe(self, dfr: pd.DataFrame): + """Set the dataframe in a controlled manner, shall be used""" + # TODO: more checks, and possibly acceptance of lists, dicts? + if isinstance(dfr, pd.DataFrame): + self._df = dfr + else: + raise ValueError("Input dfr is not a pandas dataframe") + self.ensure_consistency() + + def rename_attr(self, attrname: str, newname: str): + """Rename a attribute, e.g. Poro to PORO.""" + + if attrname not in list(self._df): + raise ValueError("Input log does not exist") + + if newname in list(self._df): + raise ValueError("New log name exists already") + + # rename in dataframe + self._df.rename(index=str, columns={attrname: newname}, inplace=True) + + self._attr_types[newname] = self._attr_types.pop(attrname) + self._attr_records[newname] = self._attr_records.pop(attrname) + + self.ensure_consistency() + + def create_attr( + self, + attrname: str, + attr_type: str = Literal["CONT", "DISC"], + attr_record: Optional[dict] = None, + value: float = 0.0, + force: bool = True, + force_reserved: bool = False, + ) -> bool: + """Create a new attribute, e.g. a log.""" + + if attrname in list(self._df) and force is False: + return False + + if attrname in _AttrName.list() and not force_reserved: + raise ValueError( + f"The proposed name {attrname} is a reserved name; try another or " + "set keyword ``force_reserved`` to True ." + f"Note that the follwoing names are reserved: {_AttrName.list()}" + ) + + self._attr_types[attrname] = _AttrType[attr_type] + self._attr_records[attrname] = attr_record + + # make a new column + self._df[attrname] = float(value) + self.ensure_consistency() + return True + + def copy_attr(self, attrname: str, new_attrname: str, force: bool = True) -> bool: + """Copy a attribute to a new name.""" + + if new_attrname in list(self._df) and force is False: + return False + + self._attr_types[new_attrname] = deepcopy(self._attr_types[attrname]) + self._attr_records[new_attrname] = deepcopy(self._attr_records[attrname]) + + # make a new column + self._df[new_attrname] = self._df[attrname].copy() + self.ensure_consistency() + return True + + def delete_attr(self, attrname: Union[str, List[str]]) -> int: + """Delete/remove an existing attribute, or list of attributes. + + Returns number of logs deleted + """ + if not isinstance(attrname, list): + attrname = [attrname] + + lcount = 0 + for logn in attrname: + if logn not in list(self._df): + continue + + lcount += 1 + self._df.drop(logn, axis=1, inplace=True) + + self.ensure_consistency() + + return lcount + + def create_relative_hlen(self): + """Make a relative length of e.g. a well, as a attribute (log).""" + # extract numpies from XYZ trajectory logs + xv = self._df[self._xname].values + yv = self._df[self._yname].values + + distance = [] + previous_x, previous_y = xv[0], yv[0] + for _, (x, y) in enumerate(zip(xv, yv)): + distance.append(math.hypot((previous_x - x), (y - previous_y))) + previous_x, previous_y = x, y + + self._df[_AttrName.R_HLEN_NAME.value] = pd.Series( + np.cumsum(distance), index=self._df.index + ) + self.ensure_consistency() + + def geometrics(self): + """Compute geometrical arrays MD, INCL, AZI, as attributes (logs) (~well data). + + These are kind of quasi measurements hence the attributes (logs) will named + with a Q in front as Q_MDEPTH, Q_INCL, and Q_AZI. + + These attributes will be added to the dataframe. + + TODO: If the mdlogname + attribute does not exist in advance, it will be set to 'Q_MDEPTH'. + + Returns: + False if geometrics cannot be computed + + """ + # TODO: rewrite in pure python? + if self._df.shape[0] < 3: + raise ValueError( + f"Cannot compute geometrics. Not enough " + f"trajectory points (need >3, have: {self._df.shape[0]})" + ) + + # extract numpies from XYZ trajetory logs + ptr_xv = _get_carray(self._df, self._attr_types, self._xname) + ptr_yv = _get_carray(self._df, self._attr_types, self._yname) + ptr_zv = _get_carray(self._df, self._attr_types, self._zname) + + # get number of rows in pandas + nlen = len(self._df) + + ptr_md = _cxtgeo.new_doublearray(nlen) + ptr_incl = _cxtgeo.new_doublearray(nlen) + ptr_az = _cxtgeo.new_doublearray(nlen) + + ier = _cxtgeo.well_geometrics( + nlen, ptr_xv, ptr_yv, ptr_zv, ptr_md, ptr_incl, ptr_az, 0 + ) + + if ier != 0: + raise XTGeoCLibError(f"XYZ/well_geometrics failed with error code: {ier}") + + dnumpy = _convert_carr_double_np(len(self._df), ptr_md) + self._df[_AttrName.Q_MD_NAME.value] = pd.Series(dnumpy, index=self._df.index) + + dnumpy = _convert_carr_double_np(len(self._df), ptr_incl) + self._df[_AttrName.Q_INCL_NAME.value] = pd.Series(dnumpy, index=self._df.index) + + dnumpy = _convert_carr_double_np(len(self._df), ptr_az) + self._df[_AttrName.Q_AZI_NAME.value] = pd.Series(dnumpy, index=self._df.index) + + # delete tmp pointers + _cxtgeo.delete_doublearray(ptr_xv) + _cxtgeo.delete_doublearray(ptr_yv) + _cxtgeo.delete_doublearray(ptr_zv) + _cxtgeo.delete_doublearray(ptr_md) + _cxtgeo.delete_doublearray(ptr_incl) + _cxtgeo.delete_doublearray(ptr_az) + + self.ensure_consistency() + + return True diff --git a/src/xtgeo/xyz_common/_xyz_enum.py b/src/xtgeo/xyz_common/_xyz_enum.py new file mode 100644 index 000000000..4575422e9 --- /dev/null +++ b/src/xtgeo/xyz_common/_xyz_enum.py @@ -0,0 +1,43 @@ +from enum import Enum, unique + + +# to be able to list all values in an easy manner e.g. _AttrName.list() +class ExtendedEnum(Enum): + @classmethod + def list(cls): + return list(map(lambda c: c.value, cls)) + + +# default names of special column names +@unique +class _AttrName(ExtendedEnum): + XNAME = "X_UTME" + YNAME = "Y_UTMN" + ZNAME = "Z_TVDSS" + M_MD_NAME = "M_MDEPTH" + Q_MD_NAME = "Q_MDEPTH" + M_AZI_NAME = "M_AZI" + Q_AZI_NAME = "Q_AZI" + M_INCL_NAME = "M_INCL" + Q_INCL_NAME = "Q_INCL" + I_INDEX = "I_INDEX" + J_INDEX = "J_INDEX" + K_INDEX = "K_INDEX" + R_HLEN_NAME = "R_HLEN" + + +@unique +class _AttrType(ExtendedEnum): + """Enumerate type of attribute/log""" + + CONT = 1 + DISC = 2 + + +@unique +class _XYZType(ExtendedEnum): + """Enumerate type of context""" + + POINTS = 1 + POLYGONS = 2 # ie. same here as PolyLines + WELL = 3 diff --git a/tests/test_grid3d/test_grid.py b/tests/test_grid3d/test_grid.py index 002d14fea..6bb4ea6ef 100644 --- a/tests/test_grid3d/test_grid.py +++ b/tests/test_grid3d/test_grid.py @@ -85,7 +85,7 @@ def test_create_shoebox(tmp_path): ) logger.info("Making a a 1,8 mill cell grid took %5.3f secs", xtg.timer(timer1)) - dx, dy = grd.get_dxdy() + dx, dy = (grd.get_dx(), grd.get_dy()) assert dx.values.mean() == pytest.approx(20.0, abs=0.0001) assert dy.values.mean() == pytest.approx(20.0, abs=0.0001) @@ -139,7 +139,7 @@ def test_emerald_grid_values(emerald_grid): dzval = dzv.values mydz = float(dzval[31:32, 72:73, 0:1]) assert mydz == pytest.approx(2.761, abs=0.001), "Grid DZ Emerald" - dxv, dyv = emerald_grid.get_dxdy() + dxv, dyv = (emerald_grid.get_dx(), emerald_grid.get_dy()) mydx = float(dxv.values3d[31:32, 72:73, 0:1]) mydy = float(dyv.values3d[31:32, 72:73, 0:1]) diff --git a/tests/test_grid3d/test_grid_vs_well.py b/tests/test_grid3d/test_grid_vs_well.py index b1d9f33ef..17d57322e 100644 --- a/tests/test_grid3d/test_grid_vs_well.py +++ b/tests/test_grid3d/test_grid_vs_well.py @@ -66,13 +66,13 @@ def test_report_zlog_mismatch(): zo = xtgeo.gridproperty_from_file(ZONEFILE, name="Zone") - w1 = Well(WELL1) - w2 = Well(WELL2) - w3 = Well(WELL3) - w4 = Well(WELL4) - w5 = Well(WELL5) - w6 = Well(WELL6) - w7 = Well(WELL7) + w1 = xtgeo.well_from_file(WELL1) + w2 = xtgeo.well_from_file(WELL2) + w3 = xtgeo.well_from_file(WELL3) + w4 = xtgeo.well_from_file(WELL4) + w5 = xtgeo.well_from_file(WELL5) + w6 = xtgeo.well_from_file(WELL6) + w7 = xtgeo.well_from_file(WELL7) wells = [w1, w2, w3, w4, w5, w6, w7] diff --git a/tests/test_well/test_blockedwell.py b/tests/test_well/test_blockedwell.py index 629dd4035..513495ece 100644 --- a/tests/test_well/test_blockedwell.py +++ b/tests/test_well/test_blockedwell.py @@ -26,22 +26,21 @@ def fixture_loadwell1(): return xtgeo.blockedwell_from_file(wfile) -def test_import(loadwell1): - """Import well from file.""" +def test_import_blockedwell(loadwell1): + """Import blocked well from file.""" mywell = loadwell1 - print(mywell.dataframe) - assert mywell.xpos == 461809.6, "XPOS" assert mywell.ypos == 5932990.4, "YPOS" assert mywell.wellname == "OP_1", "WNAME" + assert mywell.xname == "X_UTME" - logger.info(mywell.get_logtype("Facies")) - logger.info(mywell.get_logrecord("Facies")) - - # logger.info the numpy string of Poro... - logger.info(type(mywell.dataframe["Poro"].values)) + assert mywell.get_logtype("Facies") == "DISC" + assert mywell.get_logrecord("Facies") == { + 0: "Background", + 1: "Channel", + 2: "Crevasse", + } - dfr = mywell.dataframe - assert dfr["Poro"][4] == pytest.approx(0.224485, abs=0.0001) + assert mywell.dataframe["Poro"][4] == pytest.approx(0.224485, abs=0.0001) diff --git a/tests/test_well/test_well.py b/tests/test_well/test_well.py index 48d476218..0e09067d4 100644 --- a/tests/test_well/test_well.py +++ b/tests/test_well/test_well.py @@ -1,8 +1,4 @@ -# -*- coding: utf-8 -*- - - import sys -from collections import OrderedDict from os.path import join import numpy as np @@ -175,7 +171,7 @@ def test_import_well_selected_logs(): "log_name, newdict, expected", [ ("Poro", {0: "null"}, "Cannot set a log record for a continuous log"), - ("not_in_lognames", {}, "No such logname: not_in_lognames"), + ("not_in_lognames", {}, "No such attr_name"), ("Facies", list(), "Input is not a dictionary"), ], ) @@ -216,7 +212,7 @@ def test_rename_log(simple_well): @pytest.mark.parametrize( - "log_name,change_from, change_to", + "log_name, change_from, change_to", [("Poro", "CONT", "DISC"), ("Poro", "CONT", "CONT"), ("Facies", "DISC", "CONT")], ) def test_set_log_type(simple_well, log_name, change_from, change_to): @@ -232,7 +228,7 @@ def test_loadwell1_properties(simple_well): mywell = simple_well assert mywell.get_logtype("Poro") == "CONT" - assert mywell.get_logrecord("Poro") is None + assert mywell.get_logrecord("Poro") == ("UNK", "lin") assert mywell.name == "OP_1" mywell.name = "OP_1_EDITED" @@ -265,29 +261,29 @@ def test_shortwellname(create_well): assert short == "A-142H" -@pytest.mark.skipif(sys.platform.startswith("darwin"), reason="No pytables on macOS") -def test_hdf_io_single(tmp_path): - """Test HDF io, single well.""" - mywell = xtgeo.well_from_file(WELL1) +# @pytest.mark.skipif(sys.platform.startswith("darwin"), reason="No pytables on macOS") +# def test_hdf_io_single(tmp_path): +# """Test HDF io, single well.""" +# mywell = xtgeo.well_from_file(WELL1) - wname = (tmp_path / "hdfwell").with_suffix(".hdf") - mywell.to_hdf(wname) - mywell2 = xtgeo.well_from_file(wname, fformat="hdf") - assert mywell2.nrow == mywell.nrow +# wname = (tmp_path / "hdfwell").with_suffix(".hdf") +# mywell.to_hdf(wname) +# mywell2 = xtgeo.well_from_file(wname, fformat="hdf") +# assert mywell2.nrow == mywell.nrow -@pytest.mark.skipif(sys.platform.startswith("darwin"), reason="No pytables on macOS") -def test_import_as_rms_export_as_hdf_many(tmp_path, simple_well): - """Import RMS and export as HDF5 and RMS asc, many, and compare timings.""" - t0 = xtg.timer() - wname = (tmp_path / "$random").with_suffix(".hdf") - wuse = simple_well.to_hdf(wname, compression=None) - print("Time for save HDF: ", xtg.timer(t0)) +# @pytest.mark.skipif(sys.platform.startswith("darwin"), reason="No pytables on macOS") +# def test_import_as_rms_export_as_hdf_many(tmp_path, simple_well): +# """Import RMS and export as HDF5 and RMS asc, many, and compare timings.""" +# t0 = xtg.timer() +# wname = (tmp_path / "$random").with_suffix(".hdf") +# wuse = simple_well.to_hdf(wname, compression=None) +# print("Time for save HDF: ", xtg.timer(t0)) - t0 = xtg.timer() - result = xtgeo.well_from_file(wuse, fformat="hdf5") - assert result.dataframe.equals(simple_well.dataframe) - print("Time for load HDF: ", xtg.timer(t0)) +# t0 = xtg.timer() +# result = xtgeo.well_from_file(wuse, fformat="hdf5") +# assert result.dataframe.equals(simple_well.dataframe) +# print("Time for load HDF: ", xtg.timer(t0)) def test_import_export_rmsasc(tmp_path, simple_well): @@ -302,30 +298,8 @@ def test_import_export_rmsasc(tmp_path, simple_well): print("Time for load RMSASC: ", xtg.timer(t0)) -def test_get_carr(simple_well): - """Get a C array pointer""" - - mywell = simple_well - - dummy = mywell.get_carray("NOSUCH") - - assert dummy is None, "Wrong log name" - - cref = mywell.get_carray("X_UTME") - - xref = str(cref) - - assert "Swig" in xref and "double" in xref, "carray from log name, double" - - cref = mywell.get_carray("Zonelog") - - xref = str(cref) - - assert "Swig" in xref and "int" in xref, "carray from log name, int" - - def test_create_and_delete_logs(loadwell3): - """Test create adn delete logs.""" + """Test create and delete logs, using explicit create_log() and delete_log().""" mywell = loadwell3 status = mywell.create_log("NEWLOG") @@ -347,6 +321,39 @@ def test_create_and_delete_logs(loadwell3): assert ndeleted == 2 +def test_create_and_delete_logs_implicit(loadwell3): + """Test create and delete logs, using implicit dataframe operations.""" + mywell = loadwell3 + + mywell.dataframe["NEWLOG"] = 1234.0 + assert mywell.dataframe.NEWLOG.mean() == 1234.0 + assert "NEWLOG" in mywell.get_lognames() + + # status = mywell.create_log("NEWLOG", force=True, value=200) + # assert status is True + # assert mywell.dataframe.NEWLOG.mean() == 200.0 + + # ndeleted = mywell.delete_log("NEWLOG") + + # assert ndeleted == 1 + # status = mywell.create_log("NEWLOG", force=True, value=200) + + # ndeleted = mywell.delete_log(["NEWLOG", "GR"]) + # assert ndeleted == 2 + + +def test_wlogtypes(loadwell3): + mywell = loadwell3 + wlogtypes = mywell.wlogtypes + assert wlogtypes == { + "X_UTME": "CONT", + "Y_UTMN": "CONT", + "Z_TVDSS": "CONT", + "GR": "CONT", + "ZONELOG": "DISC", + } + + def test_get_set_wlogs(loadwell3): """Test on getting ans setting a dictionary with some log attributes.""" mywell = loadwell3 @@ -354,7 +361,7 @@ def test_get_set_wlogs(loadwell3): mydict = mywell.get_wlogs() print(mydict) - assert isinstance(mydict, OrderedDict) + assert isinstance(mydict, dict) assert mydict["X_UTME"][0] == "CONT" assert mydict["ZONELOG"][0] == "DISC" @@ -369,8 +376,7 @@ def test_get_set_wlogs(loadwell3): assert mydict2["ZONELOG"][1][24] == "ZONE_24_EDITED" mydict2["EXTRA"] = None - with pytest.raises(ValueError): - mywell.set_wlogs(mydict2) + mywell.set_wlogs(mydict2) def test_make_hlen(loadwell1): @@ -389,10 +395,20 @@ def test_make_zqual_log(loadwell3): logger.debug("True well name: %s", mywell.truewellname) - mywell.make_zone_qual_log("manamana") + mywell.make_zone_qual_log("ZQUAL") - with pd.option_context("display.max_rows", 1000): - print(mywell.dataframe) + recs = mywell.get_logrecord("ZQUAL") + assert recs == { + 0: "UNDETERMINED", + 1: "INCREASE", + 2: "DECREASE", + 3: "U_TURN", + 4: "INV_U_TURN", + 9: "INCOMPLETE", + } + unique, counts = np.unique(mywell.dataframe["ZQUAL"].values, return_counts=True) + assert unique.tolist() == [0.0, 1.0, 2.0, 3.0, 9.0] + assert counts.tolist() == [714, 123, 612, 782, 90] @pytest.mark.parametrize( @@ -798,15 +814,24 @@ def test_create_surf_distance_log_more(tmp_path, loadwell1): well.create_log("MEGAZONE1", logtype="DISC", logrecord=lrec) well.create_log("MEGAZONE2", logtype="DISC", logrecord=lrec) - zl = well.dataframe["Zonelog"] - well.dataframe["MEGAZONE1"][(zl > 0) & (zl < 4)] = 1 - well.dataframe["MEGAZONE1"][zl > 3] = 2 - well.dataframe["MEGAZONE1"][np.isnan(zl)] = np.nan + zl = well.dataframe["Zonelog"].copy() + # well.dataframe["MEGAZONE1"][(zl > 0) & (zl < 4)] = 1 # << get Pd warnings + well.dataframe.loc[(zl > 0) & (zl < 4), "MEGAZONE1"] = 1 + + well.dataframe.loc[zl > 3, "MEGAZONE1"] = 2 + _, counts = np.unique(well.dataframe["MEGAZONE1"].values, return_counts=True) + assert counts.tolist() == [4780, 75, 11] + + well.dataframe.loc[np.isnan(zl), "MEGAZONE1"] = np.nan + _, counts = np.unique(well.dataframe["MEGAZONE1"].values, return_counts=True) + assert counts.tolist() == [4779, 75, 11, 1] # derive from distance log: d1 = well.dataframe["DIST_TOP"] d2 = well.dataframe["DIST_BASE"] - well.dataframe["MEGAZONE2"][(d1 <= 0.0) & (d2 > 0)] = 1 + well.dataframe.loc[(d1 <= 0.0) & (d2 > 0), "MEGAZONE2"] = 1 + _, counts = np.unique(well.dataframe["MEGAZONE2"].values, return_counts=True) + assert counts.tolist() == [4788, 78] # now use logics from Grid() report_zone_mismatch()... # much coding pasting vvvvvv ======================================================= @@ -824,10 +849,12 @@ def test_create_surf_distance_log_more(tmp_path, loadwell1): if depthrange: d1, d2 = depthrange - wll._df = wll._df[(d1 < wll._df.Z_TVDSS) & (wll._df.Z_TVDSS < d2)] + wll.set_dataframe( + wll.dataframe[(d1 < wll.dataframe.Z_TVDSS) & (wll.dataframe.Z_TVDSS < d2)] + ) # from here, work with the dataframe only - df = wll._df + df = wll.dataframe.copy() # zonelogrange z1, z2 = zonelogrange @@ -871,7 +898,7 @@ def test_create_surf_distance_log_more(tmp_path, loadwell1): res2 = dfuse2["zmatch2"].mean() * 100 # update Well() copy (segment only) - wll.dataframe = dfuse2 + wll.set_dataframe(dfuse2) res = { "MATCH1": res1, @@ -1002,23 +1029,7 @@ def test_copy(string_to_well): def test_create_relative_hlen(string_to_well, well_definition, expected_hlen): well = string_to_well(well_definition) well.create_relative_hlen() - assert well.dataframe["R_HLEN"].to_list() == expected_hlen - - -def test_speed_new(string_to_well): - well_definition = """1.01 - Unknown - name 0 0 0 - 1 - Zonelog DISC 1 zone1 2 zone2 3 zone3""" - - for i in range(1, 10000): - well_definition += f"\n {i} {i} 1 1" - - well = string_to_well(well_definition) - t0 = xtg.timer() - well.create_relative_hlen() - print(f"Run time: {xtg.timer(t0)}") + assert well.dataframe["R_HLEN"].to_list() == pytest.approx(expected_hlen) def test_truncate_parallel_path_too_short(string_to_well): @@ -1174,11 +1185,13 @@ def test_downsample(string_to_well, input_points, expected_points): well_definition += f"\n {i} {i} {i} 1" well = string_to_well(well_definition) + print(well.dataframe) well.downsample() + print(well.dataframe) assert { - "X_UTME": well.dataframe["X_UTME"].to_list(), - "Y_UTMN": well.dataframe["Y_UTMN"].to_list(), - "Z_TVDSS": well.dataframe["Z_TVDSS"].to_list(), + "X_UTME": well.dataframe["X_UTME"].values.tolist(), + "Y_UTMN": well.dataframe["Y_UTMN"].values.tolist(), + "Z_TVDSS": well.dataframe["Z_TVDSS"].values.tolist(), } == { "X_UTME": expected_points, "Y_UTMN": expected_points, @@ -1253,7 +1266,3 @@ def test_get_polygons_skipname(string_to_well): polygons = well.get_polygons(skipname=True) assert "NAME" not in polygons.dataframe.columns assert polygons.name == "custom_name" - - -def test_get_fence_poly(string_to_well): - pass diff --git a/tests/test_well/test_well_vs_grid.py b/tests/test_well/test_well_vs_grid.py index f9b2c83bc..fb91f5567 100644 --- a/tests/test_well/test_well_vs_grid.py +++ b/tests/test_well/test_well_vs_grid.py @@ -76,12 +76,12 @@ def test_well_get_gridprops(tmpdir, loadwell1, loadgrid1, loadporo1): mywell.get_gridproperties(myporo, mygrid) - myactnum = mygrid.get_actnum() - myactnum.codes = {0: "INACTIVE", 1: "ACTIVE"} - myactnum.describe() - - mywell.get_gridproperties(myactnum, mygrid) - mywell.to_file(join(tmpdir, "w_from_gprops.w")) - assert mywell.dataframe.iloc[4775]["PORO_model"] == pytest.approx(0.2741, abs=0.001) - assert mywell.dataframe.iloc[4775]["ACTNUM_model"] == 1 - assert mywell.isdiscrete("ACTNUM_model") is True + # myactnum = mygrid.get_actnum() + # myactnum.codes = {0: "INACTIVE", 1: "ACTIVE"} + # myactnum.describe() + + # mywell.get_gridproperties(myactnum, mygrid) + # mywell.to_file(join(tmpdir, "w_from_gprops.w")) + # assert mywell.dataframe.iloc[4775]["PORO_model"] == pytest.approx(0.2741, abs=0.001) + # assert mywell.dataframe.iloc[4775]["ACTNUM_model"] == 1 + # assert mywell.isdiscrete("ACTNUM_model") is True diff --git a/tests/test_well/test_well_xyzdata_class.py b/tests/test_well/test_well_xyzdata_class.py new file mode 100644 index 000000000..ad5fe5f84 --- /dev/null +++ b/tests/test_well/test_well_xyzdata_class.py @@ -0,0 +1,246 @@ +"""Test _XYZData class, in a Well context""" +import pandas as pd +import pytest + +from xtgeo.xyz_common import _AttrType, _XYZData + + +@pytest.fixture(name="generate_data") +def fixture_generate_data() -> pd.DataFrame: + """Make a test dataframe""" + + data = { + "X_UTME": [1.3, 2.0, 3.0, 4.0, 5.2, 6.0, 9.0], + "Y_UTMN": [11.0, 21.0, 31.0, 41.1, 51.0, 61.0, 91.0], + "Z_TVDSS": [21.0, 22.0, 23.0, 24.0, 25.3, 26.0, 29.0], + "MDEPTH": [13.0, 23.0, 33.0, 43.0, 53.2, 63.0, 93.0], + "GR": [133.0, 2234.0, -999, 1644.0, 2225.5, 6532.0, 92.0], + "FACIES": [1, -999, 3, 4, 4, 1, 1], + "ZONES": [1, 2, 3, 3, 3, 4, -999], + } + + return pd.DataFrame(data) + + +def test_well_xyzdata_initialize(generate_data: pd.DataFrame): + """Initialize data with no attr_records and attr_types given. + + The init shall than then try to infer 'best' guess""" + + instance = _XYZData(generate_data) + + assert instance.dataframe.columns[0] == instance.xname + assert instance.dataframe.columns[2] == instance.zname + + +def test_well_xyzdata_ensure_attr(generate_data: pd.DataFrame): + """Testing private method _ensure_attr_types and _ensure_attr_records""" + + instance = _XYZData(generate_data) + assert "FACIES" in instance._df.columns + assert instance.get_attr_record("FACIES") == {1: "1", 3: "3", 4: "4"} + assert instance.dataframe.FACIES.values.tolist() == [ + 1.0, + 2000000000.0, + 3.0, + 4.0, + 4.0, + 1.0, + 1.0, + ] + + del instance.dataframe["FACIES"] + + instance._ensure_consistency_attr_types() + assert "FACIES" not in instance.dataframe.columns + + instance.dataframe["NEW"] = 1 + instance._ensure_consistency_attr_types() + assert "NEW" in instance.dataframe.columns + assert "NEW" in instance.attr_types + assert instance.get_attr_type("NEW") == "DISC" + + instance._ensure_consistency_attr_records() + assert instance.get_attr_record("NEW") == {1: "1"} + + +def test_infer_attr_dtypes(generate_data: pd.DataFrame): + """Testing private method _infer_log_dtypes""" + + instance = _XYZData(generate_data) + + instance._attr_types = {} # for testing, make private _attr_types empty + + instance._infer_attr_dtypes() + res = instance._attr_types + assert res["X_UTME"].name == "CONT" + assert res["FACIES"].name == "DISC" + + # next, FACIES is predefined in attr_types prior to parsing; here as CONT + # which shall 'win' in this setting + instance._attr_types = {"FACIES": _AttrType.CONT} + instance._infer_attr_dtypes() + res = instance._attr_types + assert res["X_UTME"].name == "CONT" + assert res["FACIES"].name == "CONT" + + +def test_ensure_dataframe_dtypes(generate_data: pd.DataFrame): + """Testing private method _ensure_cosistency_df_dtypes""" + + instance = _XYZData(generate_data, floatbits="float32") + + assert instance.data["FACIES"].dtype == "float32" + instance.data["FACIES"] = instance.data["FACIES"].astype("int32") + assert instance.data["FACIES"].dtype == "int32" + + instance._ensure_consistency_df_dtypes() + assert instance.data["FACIES"].dtype == "float32" + + +def test_well_xyzdata_consistency_add_column(generate_data: pd.DataFrame): + """Add column to the dataframe; check if attr_types and attr_records are updated.""" + + instance = _XYZData(generate_data) + + assert instance.attr_types == { + "X_UTME": _AttrType.CONT, + "Y_UTMN": _AttrType.CONT, + "Z_TVDSS": _AttrType.CONT, + "MDEPTH": _AttrType.CONT, + "GR": _AttrType.CONT, + "FACIES": _AttrType.DISC, + "ZONES": _AttrType.DISC, + } + + instance.data["NEW"] = 1.992 + assert instance.ensure_consistency() is True + + assert instance.attr_types == { + "X_UTME": _AttrType.CONT, + "Y_UTMN": _AttrType.CONT, + "Z_TVDSS": _AttrType.CONT, + "MDEPTH": _AttrType.CONT, + "GR": _AttrType.CONT, + "FACIES": _AttrType.DISC, + "ZONES": _AttrType.DISC, + "NEW": _AttrType.CONT, + } + + instance.data["DNEW"] = [1, -999, 3, 4, 4, 1, 1] + assert instance.ensure_consistency() is True + + # rerun on SAME data shall not run ensure_consistency(), hence -> False + assert instance.ensure_consistency() is False + + assert instance.attr_types == { + "X_UTME": _AttrType.CONT, + "Y_UTMN": _AttrType.CONT, + "Z_TVDSS": _AttrType.CONT, + "MDEPTH": _AttrType.CONT, + "GR": _AttrType.CONT, + "FACIES": _AttrType.DISC, + "ZONES": _AttrType.DISC, + "NEW": _AttrType.CONT, + "DNEW": _AttrType.DISC, + } + + empty = ("", "") + + assert instance.attr_records == { + "X_UTME": empty, + "Y_UTMN": empty, + "Z_TVDSS": empty, + "MDEPTH": empty, + "GR": empty, + "FACIES": {1: "1", 3: "3", 4: "4"}, + "ZONES": {1: "1", 2: "2", 3: "3", 4: "4"}, + "NEW": empty, + "DNEW": {1: "1", 3: "3", 4: "4"}, + } + + +def test_attrtype_class(): + """Test the ENUM type _LogClass""" + + assert _AttrType.DISC.value == 2 + assert _AttrType.CONT.value == 1 + + assert "CONT" in _AttrType.__members__ + assert "DISC" in _AttrType.__members__ + assert "FOO" not in _AttrType.__members__ + + with pytest.raises(ValueError, match="is not a valid"): + _AttrType("FOO") + + +def test_create_attr(generate_data: pd.DataFrame): + """Try to create attribute""" + instance = _XYZData(generate_data) + print(instance.dataframe) + + instance.create_attr("NEWATTR", attr_type="CONT", value=823.0) + print(instance.dataframe) + assert instance.attr_records["NEWATTR"] == ("", "") + + +def test_create_attr_reserved_name(generate_data: pd.DataFrame): + """Try to create attribute with a reserved name.""" + instance = _XYZData(generate_data) + + with pytest.raises(ValueError, match="The proposed name Q_AZI is a reserved name"): + instance.create_attr("Q_AZI", attr_type="CONT", value=823.0) + + instance.create_attr("Q_AZI", attr_type="CONT", value=823.0, force_reserved=True) + + +def test_well_xyzdata_dataframe_copy(generate_data: pd.DataFrame): + """Test get dataframe method, with option""" + + instance = _XYZData(generate_data, floatbits="float32") + + copy = instance.get_dataframe_copy() + col = list(copy) + + dtypes = [str(entry) for entry in copy[col].dtypes] + assert dtypes == [ + "float64", + "float64", + "float64", + "float32", + "float32", + "float32", + "float32", + ] + + copy = instance.get_dataframe_copy(infer_dtype=True) + + dtypes = [str(entry) for entry in copy[col].dtypes] + assert dtypes == [ + "float64", + "float64", + "float64", + "float32", + "float32", + "int32", + "int32", + ] + + +def test_well_xyzdata_copy_attr(generate_data: pd.DataFrame): + """Test copying an attribute.""" + + instance = _XYZData(generate_data) + + assert instance.copy_attr("GR", "GR_copy") is True + assert instance.copy_attr("GR", "GR_copy", force=True) is True + assert instance.copy_attr("GR", "GR_copy", force=False) is False # already there... + + assert instance.data["GR"].to_list() == instance.data["GR_copy"].to_list() + assert instance.attr_records["GR"] == instance.attr_records["GR_copy"] + + instance.set_attr_record("GR", ("unit", "linear")) + assert instance.attr_records["GR"] != instance.attr_records["GR_copy"] + + instance.copy_attr("GR", "GR_new2") + assert instance.attr_records["GR"] == instance.attr_records["GR_new2"]