From 679b64b97f76a87a257b68e20ae720d44787bacb Mon Sep 17 00:00:00 2001 From: mferrera Date: Fri, 29 Sep 2023 12:50:40 +0200 Subject: [PATCH 01/13] BLD: Build with pyproject.toml and skbuild-core --- setup.cfg => .flake8 | 7 -- .pre-commit-config.yaml | 36 -------- CMakeLists.txt | 25 ++--- docs/contributing.rst | 21 ++--- pyproject.toml | 140 +++++++++++++++++++++++----- pytest.ini | 15 --- scripts/setup_utilities.py | 155 ------------------------------- scripts/swig_manylinux.sh | 27 ------ setup.py | 107 ---------------------- src/clib/CMakeLists.txt | 171 +++++++++++++++++++---------------- tests/test_well/test_well.py | 3 + 11 files changed, 228 insertions(+), 479 deletions(-) rename setup.cfg => .flake8 (62%) delete mode 100644 .pre-commit-config.yaml delete mode 100644 pytest.ini delete mode 100644 scripts/setup_utilities.py delete mode 100755 scripts/swig_manylinux.sh delete mode 100644 setup.py diff --git a/setup.cfg b/.flake8 similarity index 62% rename from setup.cfg rename to .flake8 index 9465f6cb4..4c7aaf9cd 100644 --- a/setup.cfg +++ b/.flake8 @@ -2,10 +2,3 @@ exclude = docs/conf.py, src/xtgeo/cxtgeo/*.py, .eggs, _skbuild, tests/**/snapshots max-line-length = 88 ignore = E402, W503, E203, C901 - -[pydocstyle] -convention = google -match = (?!(test_|_)).*\.py - -[aliases] -test = pytest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index f3c10a261..000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# to disable hooks: SKIP=pylint git commit ... -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 - hooks: - - id: trailing-whitespace - exclude: "snapshots/" - - id: mixed-line-ending - exclude: "snapshots/" - - - repo: https://github.com/psf/black - rev: 20.8b1 - hooks: - - id: black - - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.3 - hooks: - - id: flake8 - - repo: https://github.com/pycqa/pydocstyle - rev: 5.1.1 - hooks: - - id: pydocstyle - files: ^src/xtgeo/plot - exclude: _.*\.py - - repo: https://github.com/pycqa/pylint - rev: pylint-2.6.0 - hooks: - - id: pylint - files: ^src/xtgeo/plot - # - repo: https://github.com/PyCQA/bandit - # rev: 1.7.0 - # hooks: - # - id: bandit - # args: [--skip, "B311", --recursive, --quiet] - # files: src/xtgeo diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f7993666..3d9b22a3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,20 +1,11 @@ -# Main CMakeLists.txt file. Most of the work is inside -# the CMakeLists file under add_subdirectory() - -cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR) -if (CMAKE_VERSION VERSION_LESS 3) - project(clib C) -else() - project(clib LANGUAGES C) -endif() - - -set(CMAKE_VERBOSE_MAKEFILE ON) - -if(SKBUILD) - message(STATUS "The project is built using scikit-build") +cmake_minimum_required(VERSION 3.15.3) +project(${SKBUILD_PROJECT_NAME} LANGUAGES C) + +if(NOT SKBUILD) + message( + WARNING + "This CMakeLists.txt file should not be used directly.\n" + "Use 'pip install ${CMAKE_SOURCE_DIR}' to install this as a Python package.") endif() -find_package(PythonExtensions REQUIRED) - add_subdirectory(src/clib) diff --git a/docs/contributing.rst b/docs/contributing.rst index 50deaad96..e728f3335 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -83,9 +83,7 @@ up your fork for local development (first time): $ source bin/activate $ cd /your_path_to_git_clone/xtgeo/ $ pip install pip -U - $ pip install -r requirements/requirements_dev.txt - $ python setup.py clean - $ python setup.py develop + $ pip install ".[dev,docs]" $ pytest # to check that stuff works 4. Create a branch for local development: @@ -100,13 +98,12 @@ Now you can make your changes locally. .. code-block:: bash - $ flake8 - $ pylint - $ Use `pytest` for running tests - $ Run `black` on your python code, then there is no discussions on formatting - -To get ``flake8``, ``pylint`` and ``black`` and just pip install them into -your virtualenv. + $ black src tests + $ flake8 src tests + $ isort src tests + $ mypy src + $ pylint src tests + $ pytest tests 6. If you want to edit C code, take contact with the author for detailed instructions. @@ -137,9 +134,7 @@ and it is targeted to Equinor usage using bash shell in Linux: $ source ~/venv/py36_rms12.0.2/bin/activate $ cd path_to_xtgeo/ $ python -m pip install pip -U - $ pip install requirements/requirements_dev.txt - $ python setup.py clean - $ python setup.py develop + $ pip install ".[dev]" $ pytest Now you have an editable install in your virtual environment that can be ran diff --git a/pyproject.toml b/pyproject.toml index efc95c0ca..9f7f3940a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,102 @@ -# NOTE: you have to use single-quoted strings in TOML for regular expressions. +[build-system] +requires = [ + "scikit-build-core[pyproject]", + "swig", + "numpy==1.19.2; python_version == '3.8'", + "numpy==1.19.5; python_version == '3.9'", + "numpy==1.21.6; python_version == '3.10'", + "numpy==1.23.5; python_version == '3.11'", +] +build-backend = "scikit_build_core.build" + +[tool.scikit-build] +cmake.verbose = true +logging.level = "INFO" +metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" +sdist.include = ["src/xtgeo/_theversion.py"] +wheel.install-dir = "xtgeo" + +[project] +name = "xtgeo" +description = "XTGeo is a Python library for 3D grids, surfaces, wells, etc" +readme = "README.md" +requires-python = ">=3.8" +license = { text = "LGPL-3.0" } +authors = [ + { name = "Equinor", email = "jriv@equinor.com" }, +] +keywords = ["grids", "surfaces", "wells", "cubes"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: GNU Lesser General Public", + "License v3 or later (LGPLv3+)", + "Operating System :: POSIX :: Linux", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Physics", + "Topic :: Software Development :: Libraries", + "Topic :: Utilities", +] +dynamic = ["version"] +dependencies = [ + "deprecation", + "ecl-data-io>=2.1", + "h5py>=3", + "hdf5plugin>=2.3", + "matplotlib>=3.3", + "numpy>=1.19", + "pandas>=1.1", + "roffio>=0.0.2", + "scipy>=1.5", + "segyio>1.8.0", + "shapely>=1.6.2", + "tables;platform_system != 'Darwin'", # TODO: update when fixed for mac + "typing-extensions", +] + +[project.urls] +Homepage = "https://github.com/equinor/xtgeo" +Repository = "https://github.com/equinor/xtgeo" +Issues = "https://github.com/equinor/xtgeo/issues" +Documentation = "https://xtgeo.readthedocs.io" + +[project.optional-dependencies] +dev = [ + "black", + "coverage>=4.1", + "flake8", + "isort", + "hypothesis", + "mypy", + "pylint", + "pytest", + "pytest-benchmark", + "pytest-cov", + "pydocstyle", + "pytest-runner", + "pytest-mock", + "pytest-snapshot", + "pytest-xdist", +] +docs = [ + "pydocstyle", + "Sphinx<4.0", + "sphinx-rtd-theme", + "sphinx-toolbox", + "autoclasstoc", + "myst-parser", +] [tool.black] line-length = 88 -target-version = ['py38', 'py39', 'py310', 'py311'] +target-version = ["py38", "py39", "py310", "py311"] include = '\.pyi?$' exclude = ''' /( @@ -16,6 +110,7 @@ exclude = ''' | buck-out | build | dist + | _theversion.py # The following are specific to Black, you probably don't want those. | blib2to3 | tests/data @@ -23,28 +118,29 @@ exclude = ''' )/ ''' -# numpy version scheme to fulfill Roxar API compatibility: -# RMS 12.1.* --> numpy == 1.19.2 with python 3.8 +[tool.isort] +profile = "black" -[build-system] -requires = [ - "setuptools>=43", - "scikit-build<0.17", - 'cmake==3.15.3; python_version >= "3.8" and platform_system == "Linux"', - 'cmake==3.18.0; platform_system != "Linux"', - "ninja", - 'swig', - "setuptools_scm>=3.2.0", - 'numpy==1.19.2; python_version == "3.8"', - 'numpy==1.19.5; python_version == "3.9"', - 'numpy==1.21.6; python_version == "3.10"', - 'numpy==1.23.5; python_version == "3.11"', - 'Sphinx<4.0', - 'sphinx-rtd-theme', - 'sphinx-toolbox', - 'autoclasstoc', - 'myst-parser', +[tool.mypy] +ignore_missing_imports = true + +[tool.pydocstyle] +convention = "google" +match = '(?!(test_|_)).*\.py' + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "--verbose" +log_cli = "False" +log_cli_format = "%(levelname)8s (%(relativeCreated)6.0fms) %(filename)44s [%(funcName)40s()] %(lineno)4d >> %(message)s" +log_cli_level = "INFO" +testpaths = "tests" +markers = [ + "bigtest: A big test not executed by default", + "requires_opm: A test requiring opm in the environment", + "requires_roxar: Test requires to run in RMS python", ] +doctest_optionflags = "ELLIPSIS" [tool.setuptools_scm] write_to = "src/xtgeo/_theversion.py" diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 9c5a40285..000000000 --- a/pytest.ini +++ /dev/null @@ -1,15 +0,0 @@ -#pytest tests/test_grid3d/test_grid.py::test_roffbin_import_v2_emerald -o log_cli=true -s - -[pytest] -minversion = 6.0 -addopts = --verbose -log_cli = False -log_cli_format = %(levelname)8s (%(relativeCreated)6.0fms) %(filename)44s [%(funcName)40s()] %(lineno)4d >> %(message)s -log_cli_level = INFO -testpaths = - tests -markers = - bigtest: A big test not executed by default - requires_opm: A test requiring opm in the environment - requires_roxar: Test requires to run in RMS python -doctest_optionflags = ELLIPSIS diff --git a/scripts/setup_utilities.py b/scripts/setup_utilities.py deleted file mode 100644 index 72c178a42..000000000 --- a/scripts/setup_utilities.py +++ /dev/null @@ -1,155 +0,0 @@ -"""Functions/classes for use in setup.py in order to make the latter clean and lean.""" -import fnmatch -import os -import sys -from distutils.command.clean import clean as _clean -from glob import glob -from os.path import dirname, exists -from shutil import rmtree - -from setuptools_scm import get_version -from skbuild.command import set_build_base_mixin -from skbuild.constants import CMAKE_BUILD_DIR, CMAKE_INSTALL_DIR, SKBUILD_DIR - -CMD = sys.argv[1] - - -def new_style(klass): - """This function is stolen from scikit-build <= 0.15, as it is gone in >= 0.16. - - The distutils/setuptools command classes are old-style classes, which won't work - with mixins. To work around this limitation, we dynamically convert them to new - style classes by creating a new class that inherits from them and also . - This ensures that is always at the end of the MRO, even after being mixed - in with other classes. - """ - return type(f"NewStyleClass<{klass.__name__}>", (klass, object), {}) - - -# ====================================================================================== -# Overriding and extending setup commands -# ====================================================================================== -class CleanUp(set_build_base_mixin, new_style(_clean)): - """Custom implementation of ``clean`` setuptools command. - - Overriding clean in order to get rid if "dist" folder and etc - """ - - skroot = dirname(SKBUILD_DIR()) - - CLEANFOLDERS = ( - CMAKE_INSTALL_DIR(), - CMAKE_BUILD_DIR(), - SKBUILD_DIR(), - skroot, - "__pycache__", - "pip-wheel-metadata", - ".eggs", - "dist", - "sdist", - "wheel", - ".pytest_cache", - "docs/_apiref", - "docs/_build", - "htmlcov", - ) - - CLEANFOLDERSRECURSIVE = ["__pycache__", "_tmp_*", "xtgeo.egg-info"] - CLEANFILESRECURSIVE = ["*.pyc", "*.pyo", ".coverage", "coverage.xml"] - - CLEANFILES = glob("src/xtgeo/cxtgeo/cxtgeo*") - CLEANFILES.extend(glob("src/xtgeo/cxtgeo/_cxtgeo*")) - - @staticmethod - def ffind(pattern, path): - """Find files.""" - result = [] - for root, _, files in os.walk(path): - for name in files: - if fnmatch.fnmatch(name, pattern): - result.append(os.path.join(root, name)) - return result - - @staticmethod - def dfind(pattern, path): - """Find folders.""" - result = [] - for root, dirs, _ in os.walk(path): - for name in dirs: - if fnmatch.fnmatch(name, pattern): - result.append(os.path.join(root, name)) - return result - - def run(self): - """Execute run. - - After calling the super class implementation, this function removes - the directories specific to scikit-build ++. - """ - # super().run() - - for dir_ in CleanUp.CLEANFOLDERS: - if exists(dir_): - print(f"Removing: {dir_}") - if exists(dir_): - rmtree(dir_) - - for dir_ in CleanUp.CLEANFOLDERSRECURSIVE: - for pd in self.dfind(dir_, "."): - print(f"Remove folder {pd}") - rmtree(pd) - - for fil_ in CleanUp.CLEANFILESRECURSIVE: - for pf in self.ffind(fil_, "."): - print(f"Remove file {pf}") - os.unlink(pf) - - for fil_ in CleanUp.CLEANFILES: - if exists(fil_): - print(f"Removing: {fil_}") - if exists(fil_): - os.remove(fil_) - - -# ====================================================================================== -# Sphinx -# ====================================================================================== - -CMDSPHINX = { - "build_sphinx": { - "project": ("setup.py", "xtgeo"), - "version": ("setup.py", get_version()), - "release": ("setup.py", ""), - "source_dir": ("setup.py", "docs"), - } -} - - -# ====================================================================================== -# README stuff and Sphinx -# ====================================================================================== - - -def readmestuff(filename): - """For README, HISTORY etc.""" - response = "See " + filename - try: - with open(filename, encoding="utf-8") as some_file: - response = some_file.read() - except OSError: - pass - return response - - -# ====================================================================================== -# Other helpers -# ====================================================================================== - - -def parse_requirements(filename): - """Load requirements from a pip requirements file.""" - try: - lineiter = (line.strip() for line in open(filename, encoding="utf-8")) - return [line for line in lineiter if line and not line.startswith("#")] - except OSError: - return [] diff --git a/scripts/swig_manylinux.sh b/scripts/swig_manylinux.sh deleted file mode 100755 index cdd20ab66..000000000 --- a/scripts/swig_manylinux.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh -# For manylinux docker installs in CI - -echo "Download swig..." -SWIGURL="https://ftp.osuosl.org/pub/blfs/conglomeration/swig" -SWIG="swig-4.0.1" -curl -O ${SWIGURL}/${SWIG}.tar.gz -tar xzf ${SWIG}.tar.gz -cd $SWIG -echo "Download swig... done" - -echo "Download pcre..." -curl -O "https://ftp.pcre.org/pub/pcre/pcre-8.38.tar.gz" -sh Tools/pcre-build.sh > /dev/null -echo "PCRE is built locally" - -sh ./configure \ - --with-python \ - --with-python3 \ - --without-perl5 \ - --without-ruby \ - --without-tcl \ - --without-maximum-compile-warnings \ - > /dev/null -make > /dev/null -make install > /dev/null -echo "SWIG installed" diff --git a/setup.py b/setup.py deleted file mode 100644 index be90a84f4..000000000 --- a/setup.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python3 -"""Setup for XTGeo - subsurface reservoir tool for maps, 3D grids etc.""" -import sys - -try: - import setuptools - from setuptools import setup as setuptools_setup -except ImportError: - print("\n*** Some requirements are missing, please run:") - print("\n*** pip install -r requirements/requirements_setup.txt\n\n") - raise - -try: - import skbuild -except ImportError: - print("\n*** Some requirements are missing, please run:") - print("\n*** pip install -r requirements/requirements_setup.txt\n") - raise - -from scripts import setup_utilities as setuputils - -CMD = sys.argv[1] - -README = setuputils.readmestuff("README.md") -HISTORY = setuputils.readmestuff("HISTORY.md") - -REQUIREMENTS = setuputils.parse_requirements("requirements/requirements.txt") - -TEST_REQUIREMENTS = setuputils.parse_requirements("requirements/requirements_test.txt") -SETUP_REQUIREMENTS = setuputils.parse_requirements( - "requirements/requirements_setup.txt" -) -DOCS_REQUIREMENTS = setuputils.parse_requirements("requirements/requirements_docs.txt") -EXTRAS_REQUIRE = {"tests": TEST_REQUIREMENTS, "docs": DOCS_REQUIREMENTS} - -CMDCLASS = {"clean": setuputils.CleanUp} - - -skbuild.setup( - name="xtgeo", - description="XTGeo is a Python library for 3D grids, surfaces, wells, etc", - long_description=README + "\n\n" + HISTORY, - long_description_content_type="text/markdown", - author="Equinor R&T", - url="https://github.com/equinor/xtgeo", - project_urls={ - "Documentation": "https://xtgeo.readthedocs.io/", - "Issue Tracker": "https://github.com/equinor/xtgeo/issues", - }, - license="LGPL-3.0", - cmake_args=["-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"], - packages=setuptools.find_packages(where="src"), - package_dir={"": "src"}, - cmdclass=CMDCLASS, - zip_safe=False, - keywords="xtgeo", - command_options=setuputils.CMDSPHINX, - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: GNU Lesser General Public " - "License v3 or later (LGPLv3+)", - "Operating System :: POSIX :: Linux", - "Natural Language :: English", - "Programming Language :: Python", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Physics", - "Topic :: Software Development :: Libraries", - "Topic :: Utilities", - ], - test_suite="tests", - install_requires=REQUIREMENTS, - setup_requires=SETUP_REQUIREMENTS, - tests_require=TEST_REQUIREMENTS, - extras_require=EXTRAS_REQUIRE, -) - -# Below is a hack to make "python setup.py develop" or "pip install -e ." to work. -# Without this, the xtgeo.egg-link file will be wrong, e.g.: -# /home/jan/work/git/xtg/xtgeo -# . -# -# instead of the correct: -# /home/jan/work/git/xtg/xtgeo/src -# ../ -# -# The wrong egg-link comes when find_packages(where="src") finds a list of packages in -# scikit-build version of setup(). No clue why... - -if CMD == "develop": - print("Run in DEVELOP mode") - setuptools_setup( # use setuptools version of setup - name="xtgeo", - packages=setuptools.find_packages(where="src"), - package_dir={"": "src"}, - zip_safe=False, - test_suite="tests", - install_requires=REQUIREMENTS, - setup_requires=SETUP_REQUIREMENTS, - tests_require=TEST_REQUIREMENTS, - extras_require=EXTRAS_REQUIRE, - ) diff --git a/src/clib/CMakeLists.txt b/src/clib/CMakeLists.txt index 5552d9b1b..e51f19216 100644 --- a/src/clib/CMakeLists.txt +++ b/src/clib/CMakeLists.txt @@ -1,64 +1,21 @@ -message(STATUS "XTGeo library...") - include_directories(${CMAKE_CURRENT_LIST_DIR}/xtg) +set(SRC "${CMAKE_CURRENT_LIST_DIR}/xtg") -find_package(Threads) - - -if (MSVC) - set(XTGFLAGS /Ox /wd4996 /wd4267 /wd4244 /wd4305) - set(CXTGEOFLAGS /Ox /wd4996 /wd4267 /wd4244 /wd4305) -else() - set(XTGFLAGS -Wall -Wno-unused-but-set-variable -fPIC) - set(CXTGEOFLAGS -Wl,--no-undefined) -endif() - -set (SRC "${CMAKE_CURRENT_LIST_DIR}/xtg") - -# todo: replace globbing with unique list, as globbing is bad practice +# TODO: replace globbing with unique list, as globbing is bad practice FILE(GLOB SOURCES ${SRC}/*.c) +add_library(xtg STATIC ${SOURCES}) -add_library(xtg - STATIC - ${SOURCES} - ) - -target_compile_options(xtg PRIVATE ${XTGFLAGS}) +find_package(Python COMPONENTS Interpreter Development.Module NumPy REQUIRED) +find_package(SWIG 3.0.1 COMPONENTS REQUIRED) +include(UseSWIG) -# ====================================================================================== -# Find Python and SWIG -# ====================================================================================== - -# in case required SWIG is missing or too old and we are within Equinor, need a path to -# a SWIG version that works (RHEL7 have SWIG 2 by default which is too old) -if("$ENV{HOSTNAME}" MATCHES "statoil" AND CMAKE_SYSTEM_NAME MATCHES "Linux") - LIST(APPEND CMAKE_PROGRAM_PATH "/prog/res/opt/rhel7/swig_4.0.2/bin") - message(STATUS "Using Swig from /prog/res") -endif() - -# These policies are related to SWIG python libraries naming -if (CMAKE_VERSION VERSION_GREATER 3.12) - cmake_policy(SET CMP0074 NEW) - if (POLICY CMP0078) - cmake_policy(SET CMP0078 OLD) - endif() - - if (POLICY CMP0086) - cmake_policy(SET CMP0086 OLD) - endif() -else() - set(UseSWIG_TARGET_NAME_PREFERENCE LEGACY) +# CMP0053: Simplify variable reference and escape sequence evaluation. +# The OLD behavior for this policy is to honor the legacy behavior for +# variable references and escape sequences. +if (POLICY CMP0053) + cmake_policy(SET CMP0053 OLD) endif() -find_package(SWIG 3.0.1 REQUIRED) -include(${SWIG_USE_FILE}) - -find_package(PythonLibs REQUIRED) - -message(STATUS "XTGeo Python include path: ${PYTHON_INCLUDE_PATH}") -message(STATUS "XTGeo Python include dirs: ${PYTHON_INCLUDE_DIRS}") -message(STATUS "XTGeo Python executable ok: ${PYTHON_EXECUTABLE}") - # ====================================================================================== # Looking for numpy; need to avoid that weird old system numpy is used (aka RHEL6) # THIS WAS A PAIN TO DEBUG @@ -67,11 +24,7 @@ message(STATUS "XTGeo Python executable ok: ${PYTHON_EXECUTABLE}") # via https://github.com/DeepLearnPhysics/larcv3/blob/src/CMakeLists.txt # ====================================================================================== -if (POLICY CMP0053) - cmake_policy(SET CMP0053 OLD) -endif() - -if(NOT PYTHON_EXECUTABLE) +if(NOT Python_EXECUTABLE) if(NumPy_FIND_QUIETLY) find_package(PythonInterp QUIET) else() @@ -80,7 +33,7 @@ if(NOT PYTHON_EXECUTABLE) endif() endif() -if (PYTHON_EXECUTABLE) +if (Python_EXECUTABLE) # Find out the include path execute_process( COMMAND @PYTHON_EXECUTABLE@ -c @@ -94,7 +47,7 @@ if (PYTHON_EXECUTABLE) message(STATUS "Detected NumPy: ${__numpy_path}; version ${__numpy_version}") elseif(__numpy_out) message(ERROR "Python executable not found.") -endif(PYTHON_EXECUTABLE) +endif(Python_EXECUTABLE) find_path(PYTHON_NUMPY_INCLUDE_DIR numpy/arrayobject.h HINTS "${__numpy_path}" "${PYTHON_INCLUDE_PATH}" NO_DEFAULT_PATH) @@ -103,34 +56,92 @@ if(PYTHON_NUMPY_INCLUDE_DIR) set(PYTHON_NUMPY_FOUND 1 CACHE INTERNAL "Python numpy found") endif(PYTHON_NUMPY_INCLUDE_DIR) -message(STATUS "XTGeo Numpy include path: ${PYTHON_NUMPY_INCLUDE_DIR}") +message(STATUS "XTGeo Python include path: ${PYTHON_INCLUDE_PATH}") +message(STATUS "XTGeo Python include dirs: ${PYTHON_INCLUDE_DIRS}") +message(STATUS "XTGeo Python executable : ${PYTHON_EXECUTABLE}") +message(STATUS "XTGeo numpy include path : ${PYTHON_NUMPY_INCLUDE_DIR}") # ====================================================================================== -# Swigging and linking, with backward compatibility to older cmake versions +# Find swig # ====================================================================================== -message(STATUS "Swigging") -set(LIBRARYNAME cxtgeo) +# in case required SWIG is missing or too old and we are within Equinor, need a path to +# a SWIG version that works (RHEL7 have SWIG 2 by default which is too old) +if("$ENV{HOSTNAME}" MATCHES "statoil" AND CMAKE_SYSTEM_NAME MATCHES "Linux") + LIST(APPEND CMAKE_PROGRAM_PATH "/prog/res/opt/rhel7/swig_4.0.2/bin") + message(STATUS "Using Swig from /prog/res") +endif() -if (CMAKE_VERSION VERSION_GREATER 3.7) - swig_add_library(${LIBRARYNAME} LANGUAGE python SOURCES cxtgeo.i) -else() - swig_add_module(${LIBRARYNAME} python cxtgeo.i) +# ====================================================================================== +# Adjust policies +# ====================================================================================== + +# CMP0074: find_package() uses _ROOT variables. +# The OLD behavior for this policy is to ignore _ROOT variables. +# The NEW behavior for this policy is to use _ROOT variables. +cmake_policy(SET CMP0074 NEW) + +# CMP0078: UseSWIG generates standard target names. +# The OLD behavior for this policy relies on UseSWIG_TARGET_NAME_PREFERENCE +# variable that can be used to specify an explicit preference. +# The value may be one of: +# LEGACY: legacy strategy is applied. Variable SWIG_MODULE__REAL_NAME +# must be used to get real target name. This is the default if not specified. +# STANDARD: target name matches specified name. +if (POLICY CMP0078) + cmake_policy(SET CMP0078 OLD) +endif() + +# CMP0086: UseSWIG honors SWIG_MODULE_NAME via -module flag. +# The OLD behavior for this policy is to never pass -module option. +# The NEW behavior is to pass -module option to SWIG compiler if +# SWIG_MODULE_NAME is specified. +if (POLICY CMP0086) + cmake_policy(SET CMP0086 OLD) endif() -set(SWIGTARGET ${SWIG_MODULE_${LIBRARYNAME}_REAL_NAME}) +# ====================================================================================== +# Compile swig bindings +# ====================================================================================== -target_include_directories(${SWIGTARGET} PUBLIC - ${PYTHON_INCLUDE_DIRS} - ${PYTHON_NUMPY_INCLUDE_DIR} - ${CMAKE_CURRENT_LIST_DIR} - ) -target_compile_options(${SWIGTARGET} PUBLIC ${CXTGEOFLAGS}) +message(STATUS "Compiling swig bindings") -swig_link_libraries(${LIBRARYNAME} xtg ${PTHREAD_LIBRARY}) +if (MSVC) + set(XTGFLAGS /Ox /wd4996 /wd4267 /wd4244 /wd4305) + set(CXTGEOFLAGS /Ox /wd4996 /wd4267 /wd4244 /wd4305) +else() + set(XTGFLAGS -Wall -Wno-unused-but-set-variable -fPIC) + set(CXTGEOFLAGS -Wl,--no-undefined) +endif() +target_compile_options(xtg PRIVATE ${XTGFLAGS}) -python_extension_module(${SWIGTARGET}) +set(PYTHON_MODULE cxtgeo) +swig_add_library( + ${PYTHON_MODULE} + LANGUAGE python + OUTPUT_DIR "${SKBUILD_PLATLIB_DIR}" + SOURCES cxtgeo.i +) +set(SWIG_MODULE ${SWIG_MODULE_${PYTHON_MODULE}_REAL_NAME}) + +target_include_directories( + ${SWIG_MODULE} PUBLIC + ${PYTHON_INCLUDE_DIRS} + ${PYTHON_NUMPY_INCLUDE_DIR} + ${CMAKE_CURRENT_LIST_DIR}) +target_compile_options(${SWIG_MODULE} PUBLIC ${CXTGEOFLAGS}) +target_link_libraries(${SWIG_MODULE} xtg Python::Module) -set(CXTGEO src/xtgeo/cxtgeo) +# scikit-build-core docs recommend this +if(WIN32) + set_property(TARGET ${SWIG_MODULE} PROPERTY SUFFIX ".${Python_SOABI}.pyd") +else() + set_property( + TARGET ${SWIG_MODULE} + PROPERTY SUFFIX ".${Python_SOABI}${CMAKE_SHARED_MODULE_SUFFIX}") +endif() -install(TARGETS ${SWIGTARGET} LIBRARY DESTINATION ${CXTGEO}) +# Root installation directory is set in pyproject.toml +# SWIG_MODULE == _cxtgeo, so this installs to +# xtgeo.cxtgeo._cxtgeo +install(TARGETS ${SWIG_MODULE} LIBRARY DESTINATION ${PYTHON_MODULE}) diff --git a/tests/test_well/test_well.py b/tests/test_well/test_well.py index cdc7e83fa..9bfea8f3d 100644 --- a/tests/test_well/test_well.py +++ b/tests/test_well/test_well.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd import pytest +import sys import xtgeo from xtgeo.common import XTGeoDialog @@ -264,6 +265,7 @@ def test_shortwellname(create_well): assert short == "A-142H" +@pytest.mark.skipif(sys.platform.startswith("darwin"), reason="No pytables on macOS") def test_hdf_io_single(tmp_path): """Test HDF io, single well.""" mywell = xtgeo.well_from_file(WELL1) @@ -274,6 +276,7 @@ def test_hdf_io_single(tmp_path): assert mywell2.nrow == mywell.nrow +@pytest.mark.skipif(sys.platform.startswith("darwin"), reason="No pytables on macOS") def test_import_as_rms_export_as_hdf_many(tmp_path, simple_well): """Import RMS and export as HDF5 and RMS asc, many, and compare timings.""" t0 = xtg.timer() From 5f93c1c4aeaae5ee08bc8701a981e5c03c55d112 Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 2 Oct 2023 08:04:34 +0200 Subject: [PATCH 02/13] CLN: Lint with isort --- src/xtgeo/common/__init__.py | 12 ++++-------- src/xtgeo/cube/__init__.py | 4 +++- src/xtgeo/cube/_cube_export.py | 6 +++--- src/xtgeo/grid3d/__init__.py | 2 +- src/xtgeo/grid3d/_grid3d_fence.py | 2 +- src/xtgeo/grid3d/_grid_etc1.py | 3 ++- src/xtgeo/grid3d/_grid_refine.py | 3 ++- src/xtgeo/grid3d/_grid_wellzone.py | 1 + src/xtgeo/grid3d/_gridprop_import_roff.py | 1 + src/xtgeo/grid3d/_gridprop_op1.py | 2 +- src/xtgeo/grid3d/_gridprops_import_eclrun.py | 3 ++- src/xtgeo/metadata/__init__.py | 3 +-- src/xtgeo/plot/__init__.py | 3 ++- src/xtgeo/plot/xtmap.py | 5 +++-- src/xtgeo/surface/_regsurf_cube_window_v2.py | 1 + src/xtgeo/surface/_regsurf_import.py | 1 + src/xtgeo/well/__init__.py | 6 +++--- src/xtgeo/well/_blockedwell_roxapi.py | 2 +- src/xtgeo/well/_blockedwells_roxapi.py | 1 + src/xtgeo/well/_wellmarkers.py | 3 ++- src/xtgeo/well/_wells_utils.py | 4 ++-- src/xtgeo/xyz/_xyz_lowlevel.py | 3 ++- tests/conftest.py | 3 +-- tests/test_common/test_calc.py | 1 + tests/test_cube/test_cube.py | 3 ++- tests/test_cube/test_cube_deprecations.py | 1 + tests/test_etc/test_etc_make_avg_maps.py | 1 + tests/test_grid3d/eclrun_fixtures.py | 1 + tests/test_grid3d/test_ecl_inte_head.py | 1 + tests/test_grid3d/test_eclrun.py | 1 + tests/test_grid3d/test_grid.py | 3 ++- tests/test_grid3d/test_grid_bytesio.py | 1 + tests/test_grid3d/test_grid_ecl_grid.py | 5 +++-- tests/test_grid3d/test_grid_egrid.py | 3 ++- tests/test_grid3d/test_grid_grdecl.py | 3 ++- tests/test_grid3d/test_grid_property_grdecl.py | 3 ++- tests/test_grid3d/test_grid_vs_points.py | 1 + tests/test_grid3d/test_grid_xtgformats_io.py | 3 ++- tests/test_grid3d/test_gridprop_import_eclrun.py | 3 ++- tests/test_plot/test_colortables.py | 1 + tests/test_surface/test_forks.py | 1 + tests/test_surface/test_regular_surface_vs_points.py | 1 + tests/test_surface/test_surf_xyz_from_ij.py | 5 +++-- tests/test_surface/test_zmap_spec.py | 1 + tests/test_well/conftest.py | 1 + tests/test_well/test_blockedwell.py | 1 + tests/test_well/test_blockedwells.py | 1 + tests/test_well/test_well.py | 2 +- tests/test_well/test_well_deprecations.py | 3 ++- tests/test_well/test_well_to_points.py | 1 + tests/test_well/test_well_vs_grid.py | 1 + tests/test_well/test_well_vs_surface.py | 1 + tests/test_well/test_wells.py | 1 + tests/test_xyz/test_points.py | 3 ++- tests/test_xyz/test_points_from_surface.py | 1 + tests/test_xyz/test_points_from_wells.py | 1 + tests/test_xyz/test_points_vs_other.py | 1 + tests/test_xyz/test_xyz_deprecated.py | 3 ++- tests/test_xyz/test_xyz_roxapi_mock.py | 3 ++- 59 files changed, 92 insertions(+), 49 deletions(-) diff --git a/src/xtgeo/common/__init__.py b/src/xtgeo/common/__init__.py index 377fdc73d..049094384 100644 --- a/src/xtgeo/common/__init__.py +++ b/src/xtgeo/common/__init__.py @@ -2,12 +2,8 @@ """The XTGeo common package""" -# flake8: noqa -from xtgeo.common.xtgeo_dialog import XTGeoDialog -from xtgeo.common.xtgeo_dialog import XTGDescription -from xtgeo.common.xtgeo_dialog import XTGShowProgress - -from xtgeo.common.sys import _XTGeoFile -from xtgeo.common.sys import inherit_docstring - from xtgeo.common.exceptions import WellNotFoundError +from xtgeo.common.sys import _XTGeoFile, inherit_docstring + +# flake8: noqa +from xtgeo.common.xtgeo_dialog import XTGDescription, XTGeoDialog, XTGShowProgress diff --git a/src/xtgeo/cube/__init__.py b/src/xtgeo/cube/__init__.py index 97282e5f7..65e839891 100644 --- a/src/xtgeo/cube/__init__.py +++ b/src/xtgeo/cube/__init__.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- """The XTGeo cube package.""" -from xtgeo.cube.cube1 import Cube # type: ignore # noqa # pylint: disable=undefined-variable +from xtgeo.cube.cube1 import ( # type: ignore # noqa # pylint: disable=undefined-variable + Cube, +) diff --git a/src/xtgeo/cube/_cube_export.py b/src/xtgeo/cube/_cube_export.py index 93c8db608..d258e8ebd 100644 --- a/src/xtgeo/cube/_cube_export.py +++ b/src/xtgeo/cube/_cube_export.py @@ -1,15 +1,15 @@ """Export Cube data via SegyIO library or XTGeo CLIB.""" +import json import shutil import struct -import json -import numpy as np +import numpy as np import segyio import xtgeo import xtgeo.cxtgeo._cxtgeo as _cxtgeo -from xtgeo.common import XTGeoDialog from xtgeo import XTGeoCLibError +from xtgeo.common import XTGeoDialog xtg = XTGeoDialog() diff --git a/src/xtgeo/grid3d/__init__.py b/src/xtgeo/grid3d/__init__.py index ca4e7325f..0bee383ca 100644 --- a/src/xtgeo/grid3d/__init__.py +++ b/src/xtgeo/grid3d/__init__.py @@ -11,5 +11,5 @@ from ._ecl_grid import GridRelative, Units from .grid import Grid -from .grid_property import GridProperty from .grid_properties import GridProperties +from .grid_property import GridProperty diff --git a/src/xtgeo/grid3d/_grid3d_fence.py b/src/xtgeo/grid3d/_grid3d_fence.py index 9e945ed75..426b93203 100644 --- a/src/xtgeo/grid3d/_grid3d_fence.py +++ b/src/xtgeo/grid3d/_grid3d_fence.py @@ -4,9 +4,9 @@ import numpy as np import xtgeo +import xtgeo.cxtgeo._cxtgeo as _cxtgeo from xtgeo.grid3d import _gridprop_lowlevel as gl from xtgeo.surface import _regsurf_lowlevel as rl -import xtgeo.cxtgeo._cxtgeo as _cxtgeo xtg = xtgeo.common.XTGeoDialog() logger = xtg.functionlogger(__name__) diff --git a/src/xtgeo/grid3d/_grid_etc1.py b/src/xtgeo/grid3d/_grid_etc1.py index 000a98e5d..ec6ade8a7 100644 --- a/src/xtgeo/grid3d/_grid_etc1.py +++ b/src/xtgeo/grid3d/_grid_etc1.py @@ -2,13 +2,14 @@ from collections import OrderedDict from copy import deepcopy -from packaging.version import parse as versionparse from math import atan2, degrees from typing import Tuple import numpy as np import numpy.ma as ma import pandas as pd +from packaging.version import parse as versionparse + import xtgeo import xtgeo.cxtgeo._cxtgeo as _cxtgeo from xtgeo.common import XTGeoDialog diff --git a/src/xtgeo/grid3d/_grid_refine.py b/src/xtgeo/grid3d/_grid_refine.py index 0c82cebdc..39d221239 100644 --- a/src/xtgeo/grid3d/_grid_refine.py +++ b/src/xtgeo/grid3d/_grid_refine.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- """Private module for refinement of a grid.""" from collections import OrderedDict + import numpy as np -from xtgeo.common import XTGeoDialog import xtgeo.cxtgeo._cxtgeo as _cxtgeo +from xtgeo.common import XTGeoDialog xtg = XTGeoDialog() diff --git a/src/xtgeo/grid3d/_grid_wellzone.py b/src/xtgeo/grid3d/_grid_wellzone.py index a346ba1f6..fa9c45ce4 100644 --- a/src/xtgeo/grid3d/_grid_wellzone.py +++ b/src/xtgeo/grid3d/_grid_wellzone.py @@ -1,6 +1,7 @@ """Private module for grid vs well zonelog checks.""" import numpy as np + import xtgeo xtg = xtgeo.common.XTGeoDialog() diff --git a/src/xtgeo/grid3d/_gridprop_import_roff.py b/src/xtgeo/grid3d/_gridprop_import_roff.py index 550e2e620..30f009eba 100644 --- a/src/xtgeo/grid3d/_gridprop_import_roff.py +++ b/src/xtgeo/grid3d/_gridprop_import_roff.py @@ -4,6 +4,7 @@ import warnings import numpy as np + import xtgeo from ._roff_parameter import RoffParameter diff --git a/src/xtgeo/grid3d/_gridprop_op1.py b/src/xtgeo/grid3d/_gridprop_op1.py index 6c81d0cba..c768532f2 100644 --- a/src/xtgeo/grid3d/_gridprop_op1.py +++ b/src/xtgeo/grid3d/_gridprop_op1.py @@ -4,9 +4,9 @@ import numpy as np import xtgeo +import xtgeo.cxtgeo._cxtgeo as _cxtgeo from xtgeo.common import XTGeoDialog from xtgeo.grid3d import _gridprop_lowlevel as gl -import xtgeo.cxtgeo._cxtgeo as _cxtgeo xtg = XTGeoDialog() diff --git a/src/xtgeo/grid3d/_gridprops_import_eclrun.py b/src/xtgeo/grid3d/_gridprops_import_eclrun.py index 3a5843760..f3daf3bd5 100644 --- a/src/xtgeo/grid3d/_gridprops_import_eclrun.py +++ b/src/xtgeo/grid3d/_gridprops_import_eclrun.py @@ -1,8 +1,9 @@ from copy import deepcopy from typing import List, Tuple, Union -import xtgeo from typing_extensions import Literal + +import xtgeo from xtgeo.common.constants import MAXKEYWORDS from . import _grid3d_utils as utils diff --git a/src/xtgeo/metadata/__init__.py b/src/xtgeo/metadata/__init__.py index 99529fffa..ffd35178b 100644 --- a/src/xtgeo/metadata/__init__.py +++ b/src/xtgeo/metadata/__init__.py @@ -2,5 +2,4 @@ # flake8: noqa """XTGeo metadata package.""" -from xtgeo.metadata.metadata import MetaDataRegularSurface -from xtgeo.metadata.metadata import MetaDataRegularCube +from xtgeo.metadata.metadata import MetaDataRegularCube, MetaDataRegularSurface diff --git a/src/xtgeo/plot/__init__.py b/src/xtgeo/plot/__init__.py index c27f7bbd4..b967ebbef 100644 --- a/src/xtgeo/plot/__init__.py +++ b/src/xtgeo/plot/__init__.py @@ -2,9 +2,10 @@ """The XTGeo plot package""" +from xtgeo.plot.grid3d_slice import Grid3DSlice + # flake8: noqa from xtgeo.plot.xsection import XSection from xtgeo.plot.xtmap import Map -from xtgeo.plot.grid3d_slice import Grid3DSlice # from ._colortables import random, random40, xtgeocolors, colorsfromfile diff --git a/src/xtgeo/plot/xtmap.py b/src/xtgeo/plot/xtmap.py index 7a170071c..778db6a9b 100644 --- a/src/xtgeo/plot/xtmap.py +++ b/src/xtgeo/plot/xtmap.py @@ -1,13 +1,14 @@ """Module for map plots of surfaces, using matplotlib.""" -import matplotlib.pyplot as plt import matplotlib.patches as mplp -from matplotlib import ticker +import matplotlib.pyplot as plt import numpy as np import numpy.ma as ma +from matplotlib import ticker from xtgeo.common import XTGeoDialog + from .baseplot import BasePlot xtg = XTGeoDialog() diff --git a/src/xtgeo/surface/_regsurf_cube_window_v2.py b/src/xtgeo/surface/_regsurf_cube_window_v2.py index 987fc8b39..b189ad00f 100644 --- a/src/xtgeo/surface/_regsurf_cube_window_v2.py +++ b/src/xtgeo/surface/_regsurf_cube_window_v2.py @@ -3,6 +3,7 @@ import numpy as np + import xtgeo import xtgeo.cxtgeo._cxtgeo as _cxtgeo from xtgeo.common import XTGeoDialog diff --git a/src/xtgeo/surface/_regsurf_import.py b/src/xtgeo/surface/_regsurf_import.py index 813336541..a05f472b8 100644 --- a/src/xtgeo/surface/_regsurf_import.py +++ b/src/xtgeo/surface/_regsurf_import.py @@ -8,6 +8,7 @@ import h5py import numpy as np import numpy.ma as ma + import xtgeo import xtgeo.common.sys as xsys import xtgeo.cxtgeo._cxtgeo as _cxtgeo # pylint: disable=no-name-in-module diff --git a/src/xtgeo/well/__init__.py b/src/xtgeo/well/__init__.py index 1ad210128..c1c2affd8 100644 --- a/src/xtgeo/well/__init__.py +++ b/src/xtgeo/well/__init__.py @@ -2,9 +2,9 @@ """The XTGeo well package""" +from .blocked_well import BlockedWell +from .blocked_wells import BlockedWells + # flake8: noqa from .well1 import Well from .wells import Wells - -from .blocked_well import BlockedWell -from .blocked_wells import BlockedWells diff --git a/src/xtgeo/well/_blockedwell_roxapi.py b/src/xtgeo/well/_blockedwell_roxapi.py index d86c56dc2..f824d7ccd 100644 --- a/src/xtgeo/well/_blockedwell_roxapi.py +++ b/src/xtgeo/well/_blockedwell_roxapi.py @@ -9,8 +9,8 @@ import pandas as pd from xtgeo.common import XTGeoDialog -from xtgeo.roxutils import RoxUtils from xtgeo.common.exceptions import WellNotFoundError +from xtgeo.roxutils import RoxUtils try: import roxar diff --git a/src/xtgeo/well/_blockedwells_roxapi.py b/src/xtgeo/well/_blockedwells_roxapi.py index a6a76ba0a..b50863c06 100644 --- a/src/xtgeo/well/_blockedwells_roxapi.py +++ b/src/xtgeo/well/_blockedwells_roxapi.py @@ -4,6 +4,7 @@ from xtgeo.common import XTGeoDialog from xtgeo.roxutils import RoxUtils + from .blocked_well import BlockedWell xtg = XTGeoDialog() diff --git a/src/xtgeo/well/_wellmarkers.py b/src/xtgeo/well/_wellmarkers.py index 43924a827..dbd069789 100644 --- a/src/xtgeo/well/_wellmarkers.py +++ b/src/xtgeo/well/_wellmarkers.py @@ -3,13 +3,14 @@ from collections import OrderedDict + import numpy as np import pandas as pd import xtgeo +import xtgeo.common.constants as const import xtgeo.cxtgeo._cxtgeo as _cxtgeo from xtgeo.common import XTGeoDialog -import xtgeo.common.constants as const xtg = XTGeoDialog() logger = xtg.functionlogger(__name__) diff --git a/src/xtgeo/well/_wells_utils.py b/src/xtgeo/well/_wells_utils.py index 8b60887dc..b82c2f153 100644 --- a/src/xtgeo/well/_wells_utils.py +++ b/src/xtgeo/well/_wells_utils.py @@ -3,12 +3,12 @@ import logging + import numpy as np import pandas as pd import shapely.geometry as sg -from xtgeo.common import XTGeoDialog -from xtgeo.common import XTGShowProgress +from xtgeo.common import XTGeoDialog, XTGShowProgress logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) diff --git a/src/xtgeo/xyz/_xyz_lowlevel.py b/src/xtgeo/xyz/_xyz_lowlevel.py index 8b90b9e1d..d646c5cb1 100644 --- a/src/xtgeo/xyz/_xyz_lowlevel.py +++ b/src/xtgeo/xyz/_xyz_lowlevel.py @@ -3,8 +3,9 @@ import numpy as np -from xtgeo.common import XTGeoDialog + import xtgeo.cxtgeo._cxtgeo as _cxtgeo +from xtgeo.common import XTGeoDialog xtg = XTGeoDialog() diff --git a/tests/conftest.py b/tests/conftest.py index 2e1243ab1..4630b01e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,8 @@ # -*- coding: utf-8 -*- """Conftest functions""" -from packaging.version import parse as versionparse - import pandas as pd import pytest +from packaging.version import parse as versionparse class Helpers: diff --git a/tests/test_common/test_calc.py b/tests/test_common/test_calc.py index e651f1ec7..7afd4e8a8 100644 --- a/tests/test_common/test_calc.py +++ b/tests/test_common/test_calc.py @@ -4,6 +4,7 @@ import numpy as np import pytest + import xtgeo import xtgeo.common.calc as xcalc import xtgeo.cxtgeo._cxtgeo as _cxtgeo diff --git a/tests/test_cube/test_cube.py b/tests/test_cube/test_cube.py index b2fdb16c7..9ba5c25ce 100644 --- a/tests/test_cube/test_cube.py +++ b/tests/test_cube/test_cube.py @@ -5,8 +5,9 @@ import numpy as np import pytest import segyio -import xtgeo from hypothesis import HealthCheck, given, settings + +import xtgeo from xtgeo.common import XTGeoDialog from xtgeo.cube import Cube from xtgeo.cube._cube_import import ( diff --git a/tests/test_cube/test_cube_deprecations.py b/tests/test_cube/test_cube_deprecations.py index 6caa48da4..3b1b15605 100644 --- a/tests/test_cube/test_cube_deprecations.py +++ b/tests/test_cube/test_cube_deprecations.py @@ -1,4 +1,5 @@ import pytest + import xtgeo diff --git a/tests/test_etc/test_etc_make_avg_maps.py b/tests/test_etc/test_etc_make_avg_maps.py index c26a18764..2337f1153 100644 --- a/tests/test_etc/test_etc_make_avg_maps.py +++ b/tests/test_etc/test_etc_make_avg_maps.py @@ -2,6 +2,7 @@ import numpy as np import pytest + import xtgeo from xtgeo.common import XTGeoDialog from xtgeo.surface import RegularSurface diff --git a/tests/test_grid3d/eclrun_fixtures.py b/tests/test_grid3d/eclrun_fixtures.py index 0846af879..5bca994ef 100644 --- a/tests/test_grid3d/eclrun_fixtures.py +++ b/tests/test_grid3d/eclrun_fixtures.py @@ -1,6 +1,7 @@ from os.path import join import pytest + import xtgeo diff --git a/tests/test_grid3d/test_ecl_inte_head.py b/tests/test_grid3d/test_ecl_inte_head.py index 2415e895b..210866ebb 100644 --- a/tests/test_grid3d/test_ecl_inte_head.py +++ b/tests/test_grid3d/test_ecl_inte_head.py @@ -1,5 +1,6 @@ import numpy as np import pytest + from xtgeo.grid3d._ecl_inte_head import InteHead, Phases from xtgeo.grid3d._ecl_output_file import Simulator, TypeOfGrid, UnitSystem diff --git a/tests/test_grid3d/test_eclrun.py b/tests/test_grid3d/test_eclrun.py index 9d968f7fe..1dc000df1 100644 --- a/tests/test_grid3d/test_eclrun.py +++ b/tests/test_grid3d/test_eclrun.py @@ -3,6 +3,7 @@ import numpy as np import pytest + import xtgeo # pylint: disable=wildcard-import, unused-wildcard-import diff --git a/tests/test_grid3d/test_grid.py b/tests/test_grid3d/test_grid.py index 904219c3c..002d14fea 100644 --- a/tests/test_grid3d/test_grid.py +++ b/tests/test_grid3d/test_grid.py @@ -6,8 +6,9 @@ import numpy as np import pytest -import xtgeo from hypothesis import given + +import xtgeo from xtgeo.common import XTGeoDialog from xtgeo.grid3d import Grid diff --git a/tests/test_grid3d/test_grid_bytesio.py b/tests/test_grid3d/test_grid_bytesio.py index bc52012d6..f3c22b1be 100644 --- a/tests/test_grid3d/test_grid_bytesio.py +++ b/tests/test_grid3d/test_grid_bytesio.py @@ -6,6 +6,7 @@ import io import numpy as np + import xtgeo from xtgeo.common import XTGeoDialog diff --git a/tests/test_grid3d/test_grid_ecl_grid.py b/tests/test_grid3d/test_grid_ecl_grid.py index 64e121a5b..02c78748e 100644 --- a/tests/test_grid3d/test_grid_ecl_grid.py +++ b/tests/test_grid3d/test_grid_ecl_grid.py @@ -1,11 +1,12 @@ import hypothesis.strategies as st import numpy as np import pytest +from hypothesis import HealthCheck, assume, given, settings +from numpy.testing import assert_allclose + import xtgeo import xtgeo.grid3d._egrid as xtg_egrid import xtgeo.grid3d._grdecl_grid as ggrid -from hypothesis import HealthCheck, assume, given, settings -from numpy.testing import assert_allclose from xtgeo.grid3d._ecl_grid import ( inverse_transform_xtgeo_coord_by_mapaxes, transform_xtgeo_coord_by_mapaxes, diff --git a/tests/test_grid3d/test_grid_egrid.py b/tests/test_grid3d/test_grid_egrid.py index adcb309a8..531ed86b0 100644 --- a/tests/test_grid3d/test_grid_egrid.py +++ b/tests/test_grid3d/test_grid_egrid.py @@ -5,9 +5,10 @@ import hypothesis.strategies as st import numpy as np import pytest +from hypothesis import HealthCheck, assume, given, settings + import xtgeo as xtg import xtgeo.grid3d._egrid as xtge -from hypothesis import HealthCheck, assume, given, settings from .egrid_generator import ( egrids, diff --git a/tests/test_grid3d/test_grid_grdecl.py b/tests/test_grid3d/test_grid_grdecl.py index 45cc118ca..6fa87c181 100644 --- a/tests/test_grid3d/test_grid_grdecl.py +++ b/tests/test_grid3d/test_grid_grdecl.py @@ -5,10 +5,11 @@ import hypothesis.strategies as st import numpy as np import pytest +from hypothesis import HealthCheck, assume, given, settings + import xtgeo import xtgeo.grid3d._ecl_grid as ecl_grid import xtgeo.grid3d._grdecl_grid as ggrid -from hypothesis import HealthCheck, assume, given, settings from xtgeo.grid3d import Grid from xtgeo.grid3d._grdecl_format import open_grdecl from xtgeo.grid3d._grid_import_ecl import grid_from_ecl_grid diff --git a/tests/test_grid3d/test_grid_property_grdecl.py b/tests/test_grid3d/test_grid_property_grdecl.py index d6d686951..4725f27e5 100644 --- a/tests/test_grid3d/test_grid_property_grdecl.py +++ b/tests/test_grid3d/test_grid_property_grdecl.py @@ -4,9 +4,10 @@ import hypothesis.strategies as st import numpy as np import pytest -import xtgeo from hypothesis import HealthCheck, assume, given, settings from numpy.testing import assert_allclose + +import xtgeo from xtgeo.grid3d._gridprop_import_grdecl import read_grdecl_3d_property from .grid_generator import xtgeo_grids as grids diff --git a/tests/test_grid3d/test_grid_vs_points.py b/tests/test_grid3d/test_grid_vs_points.py index dcbd8786f..d4a9f2d17 100644 --- a/tests/test_grid3d/test_grid_vs_points.py +++ b/tests/test_grid3d/test_grid_vs_points.py @@ -2,6 +2,7 @@ import pandas as pd import pytest + import xtgeo xtg = xtgeo.common.XTGeoDialog() diff --git a/tests/test_grid3d/test_grid_xtgformats_io.py b/tests/test_grid3d/test_grid_xtgformats_io.py index ebebbd547..e3a1e3b40 100644 --- a/tests/test_grid3d/test_grid_xtgformats_io.py +++ b/tests/test_grid3d/test_grid_xtgformats_io.py @@ -6,9 +6,10 @@ import hypothesis.strategies as st import numpy as np import pytest -import xtgeo from hypothesis import HealthCheck, given, settings from numpy.testing import assert_allclose + +import xtgeo from xtgeo.common import XTGeoDialog xtg = XTGeoDialog() diff --git a/tests/test_grid3d/test_gridprop_import_eclrun.py b/tests/test_grid3d/test_gridprop_import_eclrun.py index e84056111..a4b07382c 100644 --- a/tests/test_grid3d/test_gridprop_import_eclrun.py +++ b/tests/test_grid3d/test_gridprop_import_eclrun.py @@ -8,9 +8,10 @@ import hypothesis.strategies as st import numpy as np import pytest +from hypothesis import HealthCheck, assume, given, settings + import xtgeo import xtgeo.grid3d._find_gridprop_in_eclrun as xtg_im_ecl -from hypothesis import HealthCheck, assume, given, settings from xtgeo.grid3d._ecl_inte_head import InteHead from xtgeo.grid3d._ecl_logi_head import LogiHead from xtgeo.grid3d._ecl_output_file import Phases diff --git a/tests/test_plot/test_colortables.py b/tests/test_plot/test_colortables.py index 26818afcc..87dfe4be1 100644 --- a/tests/test_plot/test_colortables.py +++ b/tests/test_plot/test_colortables.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import sys + import xtgeo.plot._colortables as ct from xtgeo.common import XTGeoDialog diff --git a/tests/test_surface/test_forks.py b/tests/test_surface/test_forks.py index 624422d34..6ccbc56dd 100644 --- a/tests/test_surface/test_forks.py +++ b/tests/test_surface/test_forks.py @@ -1,6 +1,7 @@ # coding: utf-8 import subprocess + import xtgeo xtg = xtgeo.common.XTGeoDialog() diff --git a/tests/test_surface/test_regular_surface_vs_points.py b/tests/test_surface/test_regular_surface_vs_points.py index 576345e37..1c607467c 100644 --- a/tests/test_surface/test_regular_surface_vs_points.py +++ b/tests/test_surface/test_regular_surface_vs_points.py @@ -1,6 +1,7 @@ from os.path import join import pytest + import xtgeo from xtgeo.common import XTGeoDialog from xtgeo.surface import RegularSurface diff --git a/tests/test_surface/test_surf_xyz_from_ij.py b/tests/test_surface/test_surf_xyz_from_ij.py index d3f170061..87183f708 100644 --- a/tests/test_surface/test_surf_xyz_from_ij.py +++ b/tests/test_surface/test_surf_xyz_from_ij.py @@ -1,7 +1,8 @@ +import numpy as np import pytest -import xtgeo.cxtgeo._cxtgeo as _cxtgeo # type: ignore + import xtgeo -import numpy as np +import xtgeo.cxtgeo._cxtgeo as _cxtgeo # type: ignore class Surface: diff --git a/tests/test_surface/test_zmap_spec.py b/tests/test_surface/test_zmap_spec.py index 52df5457e..225286bc1 100644 --- a/tests/test_surface/test_zmap_spec.py +++ b/tests/test_surface/test_zmap_spec.py @@ -2,6 +2,7 @@ from pathlib import Path import pytest + import xtgeo from xtgeo.surface._zmap_parser import ZMAPFormatException, ZMAPSurface, parse_zmap diff --git a/tests/test_well/conftest.py b/tests/test_well/conftest.py index c9f374bc6..800df50b5 100644 --- a/tests/test_well/conftest.py +++ b/tests/test_well/conftest.py @@ -1,4 +1,5 @@ import pytest + import xtgeo diff --git a/tests/test_well/test_blockedwell.py b/tests/test_well/test_blockedwell.py index 3fe3a0ad2..629dd4035 100644 --- a/tests/test_well/test_blockedwell.py +++ b/tests/test_well/test_blockedwell.py @@ -4,6 +4,7 @@ from os.path import join import pytest + import xtgeo from xtgeo.common import XTGeoDialog diff --git a/tests/test_well/test_blockedwells.py b/tests/test_well/test_blockedwells.py index 024b6db0a..cf0340ebb 100644 --- a/tests/test_well/test_blockedwells.py +++ b/tests/test_well/test_blockedwells.py @@ -4,6 +4,7 @@ from os.path import join import pytest + import xtgeo from xtgeo.common import XTGeoDialog from xtgeo.well import BlockedWells diff --git a/tests/test_well/test_well.py b/tests/test_well/test_well.py index 9bfea8f3d..48d476218 100644 --- a/tests/test_well/test_well.py +++ b/tests/test_well/test_well.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- +import sys from collections import OrderedDict from os.path import join import numpy as np import pandas as pd import pytest -import sys import xtgeo from xtgeo.common import XTGeoDialog diff --git a/tests/test_well/test_well_deprecations.py b/tests/test_well/test_well_deprecations.py index eecde0c4e..3dc47c783 100644 --- a/tests/test_well/test_well_deprecations.py +++ b/tests/test_well/test_well_deprecations.py @@ -1,6 +1,7 @@ import pytest -import xtgeo from packaging import version + +import xtgeo from xtgeo import Well from xtgeo import version as xtgeo_version diff --git a/tests/test_well/test_well_to_points.py b/tests/test_well/test_well_to_points.py index 7568c4aed..3fa4f3460 100644 --- a/tests/test_well/test_well_to_points.py +++ b/tests/test_well/test_well_to_points.py @@ -1,4 +1,5 @@ import pytest + import xtgeo from xtgeo.common import XTGeoDialog diff --git a/tests/test_well/test_well_vs_grid.py b/tests/test_well/test_well_vs_grid.py index 5265c88ab..f9b2c83bc 100644 --- a/tests/test_well/test_well_vs_grid.py +++ b/tests/test_well/test_well_vs_grid.py @@ -4,6 +4,7 @@ from os.path import join import pytest + import xtgeo from xtgeo.common import XTGeoDialog diff --git a/tests/test_well/test_well_vs_surface.py b/tests/test_well/test_well_vs_surface.py index 98405673b..6d749d690 100644 --- a/tests/test_well/test_well_vs_surface.py +++ b/tests/test_well/test_well_vs_surface.py @@ -4,6 +4,7 @@ from os.path import join import pytest + import xtgeo xtg = xtgeo.common.XTGeoDialog() diff --git a/tests/test_well/test_wells.py b/tests/test_well/test_wells.py index 169a60e37..e9ed8e151 100644 --- a/tests/test_well/test_wells.py +++ b/tests/test_well/test_wells.py @@ -4,6 +4,7 @@ from os.path import join import pytest + import xtgeo from xtgeo.common import XTGeoDialog from xtgeo.well import Wells diff --git a/tests/test_xyz/test_points.py b/tests/test_xyz/test_points.py index 680c7884e..5070ff69f 100644 --- a/tests/test_xyz/test_points.py +++ b/tests/test_xyz/test_points.py @@ -4,9 +4,10 @@ import numpy as np import pandas as pd import pytest -import xtgeo from hypothesis import given, settings from hypothesis import strategies as st + +import xtgeo from xtgeo.xyz import Points PFILE = pathlib.Path("points/eme/1/emerald_10_random.poi") diff --git a/tests/test_xyz/test_points_from_surface.py b/tests/test_xyz/test_points_from_surface.py index 8775e5d03..7a1a27a21 100644 --- a/tests/test_xyz/test_points_from_surface.py +++ b/tests/test_xyz/test_points_from_surface.py @@ -1,6 +1,7 @@ import pathlib import pandas as pd + import xtgeo SURFACE = pathlib.Path("surfaces/reek/1/topreek_rota.gri") diff --git a/tests/test_xyz/test_points_from_wells.py b/tests/test_xyz/test_points_from_wells.py index a4f8025eb..7433bd206 100644 --- a/tests/test_xyz/test_points_from_wells.py +++ b/tests/test_xyz/test_points_from_wells.py @@ -2,6 +2,7 @@ import pathlib import pytest + import xtgeo from xtgeo.xyz import Points diff --git a/tests/test_xyz/test_points_vs_other.py b/tests/test_xyz/test_points_vs_other.py index e8db32aea..8d5d67211 100644 --- a/tests/test_xyz/test_points_vs_other.py +++ b/tests/test_xyz/test_points_vs_other.py @@ -1,6 +1,7 @@ import pathlib import pytest + import xtgeo SFILE1A = pathlib.Path("surfaces/reek/1/topupperreek.gri") diff --git a/tests/test_xyz/test_xyz_deprecated.py b/tests/test_xyz/test_xyz_deprecated.py index c5bd9e9e0..800e5f2bf 100644 --- a/tests/test_xyz/test_xyz_deprecated.py +++ b/tests/test_xyz/test_xyz_deprecated.py @@ -4,8 +4,9 @@ import pandas as pd import pytest -import xtgeo from packaging import version + +import xtgeo from xtgeo import version as xtgeo_version PFILE1A = pathlib.Path("polygons/reek/1/top_upper_reek_faultpoly.zmap") diff --git a/tests/test_xyz/test_xyz_roxapi_mock.py b/tests/test_xyz/test_xyz_roxapi_mock.py index 40339793a..fc3882c2b 100644 --- a/tests/test_xyz/test_xyz_roxapi_mock.py +++ b/tests/test_xyz/test_xyz_roxapi_mock.py @@ -5,9 +5,10 @@ import numpy as np import pandas as pd import pytest -import xtgeo from pandas.testing import assert_frame_equal +import xtgeo + @pytest.fixture def point_set(): From 5597436e1fd20676857f591dd87237835ea80e41 Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 2 Oct 2023 08:34:06 +0200 Subject: [PATCH 03/13] CLN: Update ci workflows --- .flake8 | 2 +- .github/actions/build-xtgeo/action.yml | 23 ---------- .github/actions/setup_testdata/action.yml | 9 ++++ .github/actions/setup_xtgeo/action.yml | 21 ++++++++++ .github/actions/test_setup/action.yml | 17 -------- .github/workflows/build_docs.yml | 12 +++--- .github/workflows/linting.yml | 34 ++++++++++----- .github/workflows/test.yml | 51 +++++++++++++++++------ .gitignore | 1 + 9 files changed, 101 insertions(+), 69 deletions(-) delete mode 100644 .github/actions/build-xtgeo/action.yml create mode 100644 .github/actions/setup_testdata/action.yml create mode 100644 .github/actions/setup_xtgeo/action.yml delete mode 100644 .github/actions/test_setup/action.yml diff --git a/.flake8 b/.flake8 index 4c7aaf9cd..f44d282a4 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] -exclude = docs/conf.py, src/xtgeo/cxtgeo/*.py, .eggs, _skbuild, tests/**/snapshots max-line-length = 88 ignore = E402, W503, E203, C901 +exclude = docs/conf.py, src/xtgeo/cxtgeo/*.py, .eggs, tests/**/snapshots, _theversion.py diff --git a/.github/actions/build-xtgeo/action.yml b/.github/actions/build-xtgeo/action.yml deleted file mode 100644 index c49928c7c..000000000 --- a/.github/actions/build-xtgeo/action.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Build -description: builds xtgeo - -inputs: - python-version: - required: true - - -runs: - using: "composite" - steps: - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: ${{ inputs.python-version }} - - - name: Upgrade pip - run: python -m pip install pip -U - shell: bash - - - name: install xtgeo - run: pip install . - shell: bash diff --git a/.github/actions/setup_testdata/action.yml b/.github/actions/setup_testdata/action.yml new file mode 100644 index 000000000..557f9d54d --- /dev/null +++ b/.github/actions/setup_testdata/action.yml @@ -0,0 +1,9 @@ +name: Setup xtgeo testdata +description: Set up the xtgeo test environment + +runs: + using: "composite" + steps: + - name: Install test data + shell: bash + run: git clone --depth 1 https://github.com/equinor/xtgeo-testdata ../xtgeo-testdata diff --git a/.github/actions/setup_xtgeo/action.yml b/.github/actions/setup_xtgeo/action.yml new file mode 100644 index 000000000..22e67f670 --- /dev/null +++ b/.github/actions/setup_xtgeo/action.yml @@ -0,0 +1,21 @@ +name: Setup xtgeo +description: Set up the xtgeo test environment + +inputs: + python-version: + required: true + type: string + +runs: + using: "composite" + steps: + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ inputs.python-version }} + + - name: Build and install xtgeo + shell: bash + run: | + pip install -U pip + pip install ".[dev]" diff --git a/.github/actions/test_setup/action.yml b/.github/actions/test_setup/action.yml deleted file mode 100644 index 1b9d77389..000000000 --- a/.github/actions/test_setup/action.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: "Set up for tests" -description: "Set up the xtgeo test environment" - -inputs: - python-version: - required: true - -runs: - using: "composite" - steps: - - uses: "./.github/actions/build-xtgeo" - with: - python-version: ${{ inputs.python-version }} - - run: git clone --depth 1 https://github.com/equinor/xtgeo-testdata ../xtgeo-testdata - shell: bash - - run: pip install -r requirements/requirements_test.txt - shell: bash diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml index a8e0c64aa..70acc71dd 100644 --- a/.github/workflows/build_docs.yml +++ b/.github/workflows/build_docs.yml @@ -10,12 +10,14 @@ jobs: build_docs: name: Build docs runs-on: ubuntu-latest + steps: - - uses: actions/checkout@v3 - - uses: "./.github/actions/build-xtgeo" + - uses: actions/checkout@v4 with: - python-version: 3.9 - - name: Install doc requirements - run: pip install -r requirements/requirements_docs.txt + fetch-depth: 0 + + - name: Install xtgeo + run: pip install ".[docs]" + - name: Build docs run: sphinx-build -W docs tmp/docs diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1bac276b1..1c5af2afc 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,4 +1,4 @@ -name: linting +name: Linting on: [push, pull_request] @@ -6,14 +6,26 @@ jobs: linting: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 with: - python-version: "3.10" - - name: Check black style and linting - run: | - pip install black - pip freeze - black --check *.py src tests --extend-exclude tests/**/snapshots --extend-exclude src/xtgeo/grid3d/grid_properties.py - pip install flake8 - flake8 src tests + fetch-depth: 0 + + - name: Setup xtgeo + uses: "./.github/actions/setup_xtgeo" + with: + python-version: 3.11 + + - name: List dependencies + run: pip freeze + + - name: Lint with isort + if: ${{ always() }} + run: isort --check-only src tests + + - name: Lint with black + if: ${{ always() }} + run: black src tests + + - name: Lint with flake8 + if: ${{ always() }} + run: flake8 src tests diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 25a245ac4..e2cc57f7c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,49 +23,76 @@ jobs: python-version: 3.11 runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: "./.github/actions/test_setup" + + - name: Setup xtgeo + uses: "./.github/actions/setup_xtgeo" with: python-version: ${{ matrix.python-version }} - - name: Run test - run: python -m pytest --disable-warnings -x --hypothesis-profile ci-fast + + - name: Setup testdata + uses: "./.github/actions/setup_testdata" + + - name: Run fast tests + run: pytest --disable-warnings -x --hypothesis-profile ci-fast + hypothesis: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: "./.github/actions/test_setup" + + - name: Setup xtgeo + uses: "./.github/actions/setup_xtgeo" with: python-version: 3.9 + + - name: Setup testdata + uses: "./.github/actions/setup_testdata" + - name: Run just hypothesis tests with more examples run: python -m pytest --disable-warnings -x -m hypothesis --hypothesis-profile ci --generate-plots + big: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: "./.github/actions/test_setup" + + - name: Setup xtgeo + uses: "./.github/actions/setup_xtgeo" with: python-version: 3.9 + + - name: Setup testdata + uses: "./.github/actions/setup_testdata" + - name: Run just tests marked big run: XTG_BIGTEST=1 python -m pytest --disable-warnings -x -m bigtest --hypothesis-profile ci --generate-plots + codecov: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: "./.github/actions/test_setup" + + - name: Setup xtgeo + uses: "./.github/actions/setup_xtgeo" with: python-version: 3.9 - - name: "Install codecov" - run: pip install pytest-cov + + - name: Setup testdata + uses: "./.github/actions/setup_testdata" + + - name: Generate coverage report run: pytest tests --doctest-modules --generate-plots --disable-warnings --cov=xtgeo --hypothesis-profile ci-fast --cov-report=xml:xtgeocoverage.xml; + - name: Upload coverage to Codecov uses: codecov/codecov-action@v2 with: diff --git a/.gitignore b/.gitignore index 960674f96..8d75d7692 100644 --- a/.gitignore +++ b/.gitignore @@ -82,3 +82,4 @@ pip-wheel-metadata/ .vscode/ .venv*/ .nfs* +tmp/ From 8d6a4f21ed1f18b790094e5fec05b00a59f469a3 Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 2 Oct 2023 09:21:00 +0200 Subject: [PATCH 04/13] BLD: Update cibuildwheel setup --- .../workflows/ci-test-xtgeo-cibuildwheel.yml | 24 ++++++------------- pyproject.toml | 14 +++++++++++ 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci-test-xtgeo-cibuildwheel.yml b/.github/workflows/ci-test-xtgeo-cibuildwheel.yml index ba6296291..ebc8a775b 100644 --- a/.github/workflows/ci-test-xtgeo-cibuildwheel.yml +++ b/.github/workflows/ci-test-xtgeo-cibuildwheel.yml @@ -11,6 +11,7 @@ jobs: name: CIBW python ${{ matrix.cibw_python }} on ${{ matrix.os.runs_on }} runs-on: ${{ matrix.os.runs_on }} strategy: + fail-fast: false matrix: cibw_python: [cp38, cp39, cp310, cp311] os: @@ -22,32 +23,21 @@ jobs: cibw_image: macosx_x86_64 env: - CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 - # pillow < 8.4 required for CIBW build - CIBW_BEFORE_TEST: > - pushd {project} && - pip install -r requirements/requirements_test.txt && - git clone --depth 1 https://github.com/equinor/xtgeo-testdata ../xtgeo-testdata - # Ignore forking tests as they do not work well with CIBW - CIBW_TEST_COMMAND: > - pushd {project} && - pytest --disable-warnings -x -m "not hypothesis" --ignore tests/test_well --ignore-glob="*forks.py" CIBW_BUILD: ${{ matrix.cibw_python }}-${{ matrix.os.cibw_image }} - # CIBW_TEST_SKIP: "cp312*linux* cp312*macos*" - CIBW_BEFORE_BUILD: python -m pip install "pip<=22.0.4" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Set up Python uses: actions/setup-python@v2 - name: Install cibuildwheel - run: > - python -m pip install wheel && - python -m pip install cibuildwheel + run: pip install wheel cibuildwheel + - name: Run cibuildwheel - run: python -m cibuildwheel --output-dir wheelhouse + run: cibuildwheel --output-dir wheelhouse + - name: Publish if: ${{ github.event_name == 'release' }} env: diff --git a/pyproject.toml b/pyproject.toml index 9f7f3940a..eddc1d771 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,6 +118,20 @@ exclude = ''' )/ ''' +[tool.cibuildwheel] +manylinux-x86_64-image = "manylinux2014" +test-extras = ["dev"] +before-build = 'python -m pip install "pip<=22.0.4"' +before-test = [ + "pushd {package}", + "git clone --depth 1 https://github.com/equinor/xtgeo-testdata ../xtgeo-testdata", +] +test-command = [ + "pushd {package}", + # Ignore forking tests as they do not work well with CIBW + 'pytest --disable-warnings -x -m "not hypothesis" --ignore tests/test_well --ignore-glob="*forks.py"', +] + [tool.isort] profile = "black" From fc181546e0289e406022ae04f7fac861654f8cea Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 2 Oct 2023 09:54:14 +0200 Subject: [PATCH 05/13] TST: Update Komodo ci script --- ci/testkomodo.sh | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/ci/testkomodo.sh b/ci/testkomodo.sh index 88da02744..5c4b96272 100644 --- a/ci/testkomodo.sh +++ b/ci/testkomodo.sh @@ -11,11 +11,7 @@ copy_test_files () { } install_package () { - pip install . -} - -install_test_dependencies () { - pip install -r requirements/requirements_test.txt + pip install ".[dev]" } start_tests () { @@ -31,12 +27,7 @@ cleanup () { run_tests() { copy_test_files - if [ ! -z "${CI_PR_RUN:-}" ] - then - install_package - fi - - install_test_dependencies + install_package pushd $CI_TEST_ROOT start_tests From 70fa992f0914d04fa548e6fe8fe12475c192733a Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 2 Oct 2023 09:56:13 +0200 Subject: [PATCH 06/13] CLN: Remove requirements directory --- docs/installation.rst | 3 +- requirements/pyproject.toml.komodo | 15 ---------- requirements/requirements.txt | 13 --------- requirements/requirements_dev.txt | 34 ---------------------- requirements/requirements_dev_rms.txt | 30 ------------------- requirements/requirements_docs.txt | 12 -------- requirements/requirements_komodo.txt | 4 --- requirements/requirements_setup.txt | 11 ------- requirements/requirements_setup_extras.txt | 9 ------ requirements/requirements_test.txt | 6 ---- 10 files changed, 1 insertion(+), 136 deletions(-) delete mode 100644 requirements/pyproject.toml.komodo delete mode 100644 requirements/requirements.txt delete mode 100644 requirements/requirements_dev.txt delete mode 100644 requirements/requirements_dev_rms.txt delete mode 100644 requirements/requirements_docs.txt delete mode 100644 requirements/requirements_komodo.txt delete mode 100644 requirements/requirements_setup.txt delete mode 100644 requirements/requirements_setup_extras.txt delete mode 100644 requirements/requirements_test.txt diff --git a/docs/installation.rst b/docs/installation.rst index e60c71b04..4bff1c726 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -66,8 +66,7 @@ Hence folder structure may look like /some/path/to/xtgeo /some/path/to/xtgeo-testdata -For required python packages, see the requirements*.txt files and the -pyproject.toml file in the root folder. +For required python packages, see the pyproject.toml file in the root folder. Once you have a copy of the source, and you have a `virtual environment`_, then always run tests (run first compile and install with ``pip install .``): diff --git a/requirements/pyproject.toml.komodo b/requirements/pyproject.toml.komodo deleted file mode 100644 index 60400194e..000000000 --- a/requirements/pyproject.toml.komodo +++ /dev/null @@ -1,15 +0,0 @@ -# version to be runnable by older pips in Komodo bleeding (Equinor) -[build-system] -requires = [ - "pip>=19.1.1", - "setuptools>=30.3.0", - "scikit-build<0.17", - "cmake>=3.6.0", - "ninja", - "setuptools_scm>=3.2.0", - "numpy>=1.19", - "Sphinx<4.0", # Due to sphinx-toolbox - "sphinx-rtd-theme", - "sphinxcontrib-apidoc", - "sphinx-autodoc-typehints", - ] diff --git a/requirements/requirements.txt b/requirements/requirements.txt deleted file mode 100644 index 252e1c41d..000000000 --- a/requirements/requirements.txt +++ /dev/null @@ -1,13 +0,0 @@ -deprecation -numpy>=1.19 -shapely>=1.6.2 -matplotlib>=3.3 -scipy>=1.5 -segyio>1.8.0 -pandas>=1.1 -h5py>=3 -hdf5plugin>=2.3 -tables>=3.5.1 -roffio>=0.0.2 -ecl-data-io>=2.1 -typing-extensions diff --git a/requirements/requirements_dev.txt b/requirements/requirements_dev.txt deleted file mode 100644 index 335703ca7..000000000 --- a/requirements/requirements_dev.txt +++ /dev/null @@ -1,34 +0,0 @@ -setuptools>=43 -setuptools_scm>=3.2.0 -scikit-build<0.17 -ninja>=1.9.0.post1 -cmake>3.13.3 -pip>=20 -wheel>=0.38 -flake8 -pydocstyle -coverage>=4.1 -pytest-runner>=2.11.1 -pre-commit -coverage>=4.1 -Sphinx<4.0 # Due to sphinx-toolbox -sphinx-rtd-theme -sphinx-toolbox -autoclasstoc -myst-parser -bandit -numpy>1.19 -pandas>=1.1 -segyio>=1.8.6 -matplotlib>=3.3 -scipy>=1.5 -shapely>=1.6.2 -black>=23.1 -autopep8 -pylint -pytest>=6 -pytest-cov -h5py>=3 -hdf5plugin>=2.3 -tables>=3.5 -swig diff --git a/requirements/requirements_dev_rms.txt b/requirements/requirements_dev_rms.txt deleted file mode 100644 index 855019d63..000000000 --- a/requirements/requirements_dev_rms.txt +++ /dev/null @@ -1,30 +0,0 @@ -# requirements when used in RMS python venv's -pip>=20.3.3 -setuptools>=30.3.0 -setuptools_scm>=3.2.0 -scikit-build -ninja>=1.9.0.post1 -cmake==3.15.3 -wheel>=0.38 -flake8 -pydocstyle -coverage>=4.1 -pre-commit -coverage>=4.1 -Sphinx<4.0 # Due to sphinx-toolbox -sphinx-rtd-theme -sphinx-toolbox -autoclasstoc -myst-parser -bandit -segyio>=1.8.6 -shapely>=1.6.2 -black>=23.1 -autopep8 -pylint -pytest>=2.9.2 -pytest-cov -pytest-runner>=2.11.1 -h5py>=3 -hdf5plugin>=2.3 -tables>=3.5.1 diff --git a/requirements/requirements_docs.txt b/requirements/requirements_docs.txt deleted file mode 100644 index 3355768af..000000000 --- a/requirements/requirements_docs.txt +++ /dev/null @@ -1,12 +0,0 @@ -setuptools>=43 -cmake>=3.13.3 -scikit-build -ninja - -setuptools_scm -pydocstyle -Sphinx<4.0 -sphinx-rtd-theme -sphinx-toolbox -autoclasstoc -myst-parser diff --git a/requirements/requirements_komodo.txt b/requirements/requirements_komodo.txt deleted file mode 100644 index f8c5bc9b6..000000000 --- a/requirements/requirements_komodo.txt +++ /dev/null @@ -1,4 +0,0 @@ -numpy>=1.13 -cmake>=3.13.1 -setuptools_scm>=3.2.0 -scikit-build==0.10.0 diff --git a/requirements/requirements_setup.txt b/requirements/requirements_setup.txt deleted file mode 100644 index 77a487bb7..000000000 --- a/requirements/requirements_setup.txt +++ /dev/null @@ -1,11 +0,0 @@ -# requirements for setup on localhost -# see also ...setup_ci.txt and pyproject.toml -setuptools>=43 -wheel>=0.38 -numpy -cmake -setuptools_scm -scikit-build -ninja -pytest-runner -swig diff --git a/requirements/requirements_setup_extras.txt b/requirements/requirements_setup_extras.txt deleted file mode 100644 index 3c099d592..000000000 --- a/requirements/requirements_setup_extras.txt +++ /dev/null @@ -1,9 +0,0 @@ -flake8 -pydocstyle -coverage>=4.1 -pre-commit -coverage>=4.1 -bandit -autopep8 -pylint -pytest-cov diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt deleted file mode 100644 index 512ea3780..000000000 --- a/requirements/requirements_test.txt +++ /dev/null @@ -1,6 +0,0 @@ -pytest -hypothesis<=6.83.0;python_version=='3.8' # ipython pinned to 8.12.2 for python 3.8 support -hypothesis;python_version>='3.9' -pytest-benchmark -pytest-mock -pytest-snapshot From 2c678036b64ce0442420cec1deda901c26acd840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Thu, 28 Sep 2023 08:04:56 +0200 Subject: [PATCH 07/13] WIP --- .mypy.ini | 19 + src/xtgeo/__init__.py | 2 +- src/xtgeo/well/_well1_old.py | 1729 ++++++++++++++++++++++++ src/xtgeo/well/_well_aux.py | 190 +++ src/xtgeo/well/_well_io.py | 2 +- src/xtgeo/well/_well_oper.py | 11 +- src/xtgeo/well/_welldata.py | 254 ++++ src/xtgeo/well/well1.py | 377 ++---- tests/test_well/test_well.py | 47 +- tests/test_well/test_welldata_class.py | 155 +++ 10 files changed, 2482 insertions(+), 304 deletions(-) create mode 100644 .mypy.ini create mode 100644 src/xtgeo/well/_well1_old.py create mode 100644 src/xtgeo/well/_well_aux.py create mode 100644 src/xtgeo/well/_welldata.py create mode 100644 tests/test_well/test_welldata_class.py diff --git a/.mypy.ini b/.mypy.ini new file mode 100644 index 000000000..92d70c69d --- /dev/null +++ b/.mypy.ini @@ -0,0 +1,19 @@ +# Global options: + +[mypy] +warn_return_any = True +warn_unused_configs = True + +# Per-module options: + +[mypy-mycode.foo.*] +disallow_untyped_defs = True + +[mypy-mycode.bar] +warn_return_any = False + +[mypy-xtgeo.well._welldata] +ignore_missing_imports = True + +[mypy-xtgeo.cxtgeo._cxtgeo] +ignore_missing_imports = True diff --git a/src/xtgeo/__init__.py b/src/xtgeo/__init__.py index 603784961..c2a092c46 100644 --- a/src/xtgeo/__init__.py +++ b/src/xtgeo/__init__.py @@ -79,6 +79,7 @@ def _xprint(msg): # _xprint("Import matplotlib etc...DONE") +from xtgeo.common import XTGeoDialog from xtgeo.common.constants import UNDEF, UNDEF_INT, UNDEF_INT_LIMIT, UNDEF_LIMIT from xtgeo.common.exceptions import ( BlockedWellsNotFoundError, @@ -89,7 +90,6 @@ def _xprint(msg): WellNotFoundError, ) from xtgeo.common.sys import _XTGeoFile -from xtgeo.common.xtgeo_dialog import XTGeoDialog from xtgeo.cxtgeo._cxtgeo import XTGeoCLibError _xprint("Import common... done") diff --git a/src/xtgeo/well/_well1_old.py b/src/xtgeo/well/_well1_old.py new file mode 100644 index 000000000..da3778921 --- /dev/null +++ b/src/xtgeo/well/_well1_old.py @@ -0,0 +1,1729 @@ +# -*- coding: utf-8 -*- +"""XTGeo well module, working with one single well.""" + +import functools +import io +import math +import warnings +from collections import OrderedDict +from copy import deepcopy +from pathlib import Path +from typing import Dict, List, Optional, Union + +import deprecation +import numpy as np +import pandas as pd + +import xtgeo +import xtgeo.common.constants as const +import xtgeo.cxtgeo._cxtgeo as _cxtgeo +from xtgeo import XTGeoCLibError + +from . import _well_io, _well_oper, _well_roxapi, _wellmarkers + +xtg = xtgeo.common.XTGeoDialog() +logger = xtg.functionlogger(__name__) + + +# pylint: disable=too-many-public-methods + + +# ====================================================================================== +# METHODS as wrappers to class init + import + + +def _data_reader_factory(file_format): + if file_format in ["rmswell", "irap_ascii"]: + return _well_io.import_rms_ascii + if file_format == "hdf": + return _well_io.import_hdf5_well + raise ValueError( + f"Unknown file format {file_format}, supported formats are " + "'rmswell', 'irap_ascii' and 'hdf'" + ) + + +def well_from_file( + wfile: Union[str, Path], + fformat: Optional[str] = "rms_ascii", + mdlogname: Optional[str] = None, + zonelogname: Optional[str] = None, + lognames: Optional[Union[str, List[str]]] = "all", + lognames_strict: Optional[bool] = False, + strict: Optional[bool] = False, +) -> "Well": + """Make an instance of a Well directly from file import. + + Note: + + rms_ascii is the only correct for wells from RMS. Irap did not have this + format. For maps and points, the formats from the old Irap tool is + applied in RMS, hence "irap_ascii" and "rms_ascii" are there the same. + + Args: + wfile: File path, either a string or a pathlib.Path instance + fformat: See :meth:`Well.from_file` + mdlogname: Name of Measured Depth log if any + zonelogname: Name of Zonelog, if any + lognames: Name or list of lognames to import, default is "all" + lognames_strict: If True, all lognames must be present. + strict: If True, then import will fail if zonelogname or mdlogname are asked + for but not present in wells. + + Example:: + + >>> import xtgeo + >>> mywell = xtgeo.well_from_file(well_dir + "/OP_1.w") + + .. versionchanged:: 2.1 Added ``lognames`` and ``lognames_strict`` + .. versionchanged:: 2.1 ``strict`` now defaults to False + """ + return Well._read_file( + wfile, + fformat=fformat, + mdlogname=mdlogname, + zonelogname=zonelogname, + strict=strict, + lognames=lognames, + lognames_strict=lognames_strict, + ) + + +def well_from_roxar( + project: Union[str, object], + name: str, + trajectory: Optional[str] = "Drilled trajectory", + logrun: Optional[str] = "log", + lognames: Optional[Union[str, List[str]]] = "all", + lognames_strict: Optional[bool] = False, + inclmd: Optional[bool] = False, + inclsurvey: Optional[bool] = False, +) -> "Well": + """This makes an instance of a Well directly from Roxar RMS. + + + Note this method works only when inside RMS, or when RMS license is + activated. + + Args: + project: Path to project or magic ``project`` variable in RMS. + name: Name of Well, as shown in RMS. + trajectory: Name of trajectory in RMS. + logrun: Name of logrun in RMS. + lognames: List of lognames to import or use 'all' for all present logs + lognames_strict: If True and log is not in lognames is a list, an Exception will + be raised. + inclmd: If True, a Measured Depth log will be included. + inclsurvey: If True, logs for azimuth and deviation will be included. + + Returns: + Well instance. + + Example:: + + # inside RMS: + import xtgeo + mylogs = ['ZONELOG', 'GR', 'Facies'] + mywell = xtgeo.well_from_roxar( + project, "31_3-1", trajectory="Drilled", logrun="log", lognames=mylogs + ) + + .. versionchanged:: 2.1 lognames defaults to "all", not None + """ + return Well._read_roxar( + project, + name, + trajectory=trajectory, + logrun=logrun, + lognames=lognames, + lognames_strict=lognames_strict, + inclmd=inclmd, + inclsurvey=inclsurvey, + ) + + +def allow_deprecated_init(func): + # This decorator is here to maintain backwards compatibility in the + # construction of Well and should be deleted once the deprecation period + # has expired, the construction will then follow the new pattern. + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + if not args and not kwargs: + warnings.warn( + "Initializing empty well is deprecated, please provide " + "non-defaulted values, or use mywell = " + "xtgeo.well_from_file('filename')", + DeprecationWarning, + ) + return func( + self, + *([0.0] * 3), + "", + pd.DataFrame({"X_UTME": [], "Y_UTMN": [], "Z_TVDSS": []}), + ) + + # Checking if we are doing an initialization from file and raise a + # deprecation warning if we are. + if "wfile" in kwargs or ( + len(args) >= 1 and isinstance(args[0], (str, Path, xtgeo._XTGeoFile)) + ): + warnings.warn( + "Initializing directly from file name is deprecated and will be " + "removed in xtgeo version 4.0. Use: " + "mywell = xtgeo.well_from_file('filename') instead", + DeprecationWarning, + ) + if len(args) >= 1: + wfile = args[0] + args = args[1:] + else: + wfile = kwargs.pop("wfile", None) + if len(args) >= 1: + fformat = args[0] + args = args[1:] + else: + fformat = kwargs.pop("fformat", None) + + mfile = xtgeo._XTGeoFile(wfile) + if fformat is None or fformat == "guess": + fformat = mfile.detect_fformat() + else: + fformat = mfile.generic_format_by_proposal(fformat) + kwargs = _data_reader_factory(fformat)(mfile, *args, **kwargs) + kwargs["filesrc"] = mfile.file + return func(self, **kwargs) + return func(self, *args, **kwargs) + + return wrapper + + +class Well: + """Class for a well in the XTGeo framework. + + The well logs are stored in a Pandas dataframe, which make manipulation + easy and fast. + + The well trajectory are here represented as logs, and XYZ have magic names: + ``X_UTME``, ``Y_UTMN``, ``Z_TVDSS``, which are the three first Pandas columns. + + Other geometry logs has also 'semi-magic' names: + + M_MDEPTH or Q_MDEPTH: Measured depth, either real/true (M_xx) or + quasi computed/estimated (Q_xx). The Quasi may be incorrect for + all uses, but sufficient for some computations. + + Similar for M_INCL, Q_INCL, M_AZI, Q_ASI. + + All Pandas values (yes, discrete also!) are currently stored as float64 + format, and undefined values are Nan. Integers are stored as Float due + to the (historic) lacking support for 'Integer Nan'. In coming versions, + use of ``pandas.NA`` (available from Pandas version 1.0) may be implemented. + + Note there is a method that can return a dataframe (copy) with Integer + and Float columns, see :meth:`get_filled_dataframe`. + + The instance can be made either from file or (todo!) by specification:: + + >>> well1 = Well(well_dir + '/OP_1.w') # assume RMS ascii well + >>> well2 = Well(well_dir + '/OP_1.w', fformat='rms_ascii') + >>> well3 = xtgeo.well_from_file(well_dir + '/OP_1.w') + + Args: + rkb: well RKB height + xpos: well head X pos + ypos: well head Y pos + wname: well name + df: pandas dataframe with log values, expects columns to include + 'X_UTME', 'Y_UTMN', 'Z_TVDSS' for x, y and z coordinates. + Other columns should be log values. + filesrc: source file if any + mdlogname: Name of Measured Depth log if any. + zonelogname: Name of Zonelog, if any + wlogtypes: dictionary of log types, 'DISC' or 'CONT', defaults to + to 'CONT'. + wlogrecords: dictionary of codes for 'DISC' logs, None for no codes given, + defaults to None. + """ + + VALID_LOGTYPES = {"DISC", "CONT"} + + @allow_deprecated_init + def __init__( + self, + rkb: float, + xpos: float, + ypos: float, + wname: str, + df: pd.DataFrame, + mdlogname: str = None, + zonelogname: str = None, + wlogtypes: Dict[str, str] = None, + wlogrecords: Dict[str, str] = None, + filesrc: Optional[Union[str, Path]] = None, + ): + if not all( + coordinate in df.columns for coordinate in ("X_UTME", "Y_UTMN", "Z_TVDSS") + ): + raise ValueError( + "Well dataframe must include 'X_UTME'," + f" 'Y_UTMN' and 'Z_TVDSS', got {df.columns}" + ) + self._reset( + rkb, + xpos, + ypos, + wname, + df, + filesrc, + mdlogname, + zonelogname, + wlogtypes, + wlogrecords, + ) + + def _reset( + self, + rkb: float = None, + xpos: float = None, + ypos: float = None, + wname: str = None, + df: pd.DataFrame = None, + filesrc: Optional[Union[str, Path]] = None, + mdlogname: str = None, + zonelogname: str = None, + wlogtypes: Dict[str, str] = None, + wlogrecords: Dict[str, str] = None, + ): + if wlogtypes is None: + wlogtypes = dict() + if wlogrecords is None: + wlogrecords = dict() + + self._rkb = rkb + self._xpos = xpos + self._ypos = ypos + self._wname = wname + self._filesrc = filesrc + self._mdlogname = mdlogname + self._zonelogname = zonelogname + + self._wlogtypes = wlogtypes + self._wlogrecords = wlogrecords + + self._df = df + + self._wlognames = list(self._df.columns) + + self._metadata = xtgeo.MetaDataWell() + self._metadata.required = self + + self._ensure_consistency() + + def __repr__(self): # noqa: D105 + # should be able to newobject = eval(repr(thisobject)) + myrp = ( + f"{self.__class__.__name__} (filesrc={self._filesrc!r}, " + f"name={self._wname!r}, ID={id(self)})" + ) + return myrp + + def __str__(self): # noqa: D105 + # user friendly print + return self.describe(flush=False) + + def _ensure_consistency(self): # pragma: no coverage + """Ensure consistency within an object (private function). + + Consistency checking. As well log names are columns in the Pandas DF, + there are additional attributes per log that have to be "in sync". + """ + if self._df is None: + return + + self._wlognames = list(self._df.columns) + + for logname in self._wlognames: + if logname not in self._wlogtypes: + self._wlogtypes[logname] = "CONT" # continuous as default + self._wlogrecords[logname] = None # None as default + else: + if self._wlogtypes[logname] not in self.VALID_LOGTYPES: + self._wlogtypes[logname] = "CONT" + self._wlogrecords[logname] = None # None as default + + if logname not in self._wlogrecords: + if self._wlogtypes[logname] == "DISC": + # it is a discrete log with missing record; try to find + # a default one based on current values... + lvalues = self._df[logname].values.round(decimals=0) + lmin = int(lvalues.min()) + lmax = int(lvalues.max()) + + lvalues = lvalues.astype("int") + codes = {} + for lval in range(lmin, lmax + 1): + if lval in lvalues: + codes[lval] = str(lval) + + self._wlogrecords = codes + + # ================================================================================== + # Properties + # ================================================================================== + + @property + def metadata(self): + """Return metadata object instance of type MetaDataRegularSurface.""" + return self._metadata + + @metadata.setter + def metadata(self, obj): + # The current metadata object can be replaced. This is a bit dangerous so + # further check must be done to validate. TODO. + if not isinstance(obj, xtgeo.MetaDataWell): + raise ValueError("Input obj not an instance of MetaDataRegularCube") + + self._metadata = obj + + @property + def rkb(self): + """Returns RKB height for the well (read only).""" + return self._rkb + + @property + def xpos(self): + """Returns well header X position (read only).""" + return self._xpos + + @property + def ypos(self) -> float: + """Returns well header Y position (read only).""" + return self._ypos + + @property + def wellname(self): + """str: Returns well name, read only.""" + return self._wname + + @property + def name(self): + """Returns or set (rename) a well name.""" + return self._wname + + @name.setter + def name(self, newname): + self._wname = newname + + # alias + wname = name + + @property + def safewellname(self): + """Get well name on syntax safe form; '/' and spaces replaced with '_'.""" + xname = self._wname + xname = xname.replace("/", "_") + xname = xname.replace(" ", "_") + return xname + + @property + def xwellname(self): + """See safewellname.""" + return self.safewellname + + @property + def shortwellname(self): + """str: Well name on a short form where blockname/spaces removed (read only). + + This should cope with both North Sea style and Haltenbanken style. + + E.g.: '31/2-G-5 AH' -> 'G-5AH', '6472_11-F-23_AH_T2' -> 'F-23AHT2' + + """ + return self.get_short_wellname(self.wellname) + + @property + def truewellname(self): + """Returns well name on the assummed form aka '31/2-E-4 AH2'.""" + xname = self.xwellname + if "/" not in xname: + xname = xname.replace("_", "/", 1) + xname = xname.replace("_", " ") + return xname + + @property + def mdlogname(self): + """str: Returns name of MD log, if any (None if missing).""" + return self._mdlogname + + @mdlogname.setter + def mdlogname(self, mname): + if mname in self._wlognames: + self._mdlogname = mname + else: + self._mdlogname = None + + @property + def zonelogname(self): + """str: Returns or sets name of zone log, return None if missing.""" + return self._zonelogname + + @zonelogname.setter + def zonelogname(self, zname): + if zname in self._wlognames: + self._zonelogname = zname + else: + self._zonelogname = None + + @property + def dataframe(self): + """Returns or set the Pandas dataframe object for all logs.""" + return self._df + + @dataframe.setter + def dataframe(self, dfr): + self._df = dfr.copy() + self._ensure_consistency() + + @property + def nrow(self): + """int: Returns the Pandas dataframe object number of rows.""" + return len(self._df.index) + + @property + def ncol(self): + """int: Returns the Pandas dataframe object number of columns.""" + return len(self._df.columns) + + @property + def nlogs(self): + """int: Returns the Pandas dataframe object number of columns.""" + return len(self._df.columns) - 3 + + @property + def lognames_all(self): + """list: Returns dataframe column names as list, including mandatory coords.""" + self._ensure_consistency() + return self._wlognames + + @property + def lognames(self): + """list: Returns the Pandas dataframe column as list excluding coords.""" + return list(self._df)[3:] + + # ================================================================================== + # Methods + # ================================================================================== + + @staticmethod + def get_short_wellname(wellname): + """Well name on a short name form where blockname and spaces are removed. + + This should cope with both North Sea style and Haltenbanken style. + E.g.: '31/2-G-5 AH' -> 'G-5AH', '6472_11-F-23_AH_T2' -> 'F-23AHT2' + """ + newname = [] + first1 = False + first2 = False + for letter in wellname: + if first1 and first2: + newname.append(letter) + continue + if letter in ("_", "/"): + first1 = True + continue + if first1 and letter == "-": + first2 = True + continue + + xname = "".join(newname) + xname = xname.replace("_", "") + xname = xname.replace(" ", "") + return xname + + def describe(self, flush=True): + """Describe an instance by printing to stdout.""" + dsc = xtgeo.common.XTGDescription() + + dsc.title("Description of Well instance") + dsc.txt("Object ID", id(self)) + dsc.txt("File source", self._filesrc) + dsc.txt("Well name", self._wname) + dsc.txt("RKB", self._rkb) + dsc.txt("Well head", self._xpos, self._ypos) + dsc.txt("Name of all columns", self.lognames_all) + dsc.txt("Name of log columns", self.lognames) + for wlog in self.lognames: + rec = self.get_logrecord(wlog) + if rec is not None and len(rec) > 3: + string = "(" + nlen = len(rec) + for idx, (code, val) in enumerate(rec.items()): + if idx < 2: + string += f"{code}: {val} " + elif idx == nlen - 1: + string += f"... {code}: {val})" + else: + string = f"{rec}" + dsc.txt("Logname", wlog, self.get_logtype(wlog), string) + + if flush: + dsc.flush() + return None + + return dsc.astext() + + @deprecation.deprecated( + deprecated_in="2.16", + removed_in="4.0", + current_version=xtgeo.version, + details="Use xtgeo.well_from_file() instead", + ) + def from_file( + self, + wfile, + fformat="rms_ascii", + **kwargs, + ): + """Deprecated, see :meth:`xtgeo.well_from_file`""" + + wfile = xtgeo._XTGeoFile(wfile) + if fformat is None or fformat == "guess": + fformat = wfile.detect_fformat() + else: + fformat = wfile.generic_format_by_proposal(fformat) # default + + kwargs = _data_reader_factory(fformat)(wfile, **kwargs) + self._reset(**kwargs) + return self + + @classmethod + def _read_file( + cls, + wfile, + fformat="rms_ascii", + **kwargs, + ): + """Import well from file. + + Args: + wfile (str): Name of file as string or pathlib.Path + fformat (str): File format, rms_ascii (rms well) is + currently supported and default format. + mdlogname (str): Name of measured depth log, if any + zonelogname (str): Name of zonation log, if any + strict (bool): If True, then import will fail if + zonelogname or mdlogname are asked for but not present + in wells. If False, and e.g. zonelogname is not present, the + attribute ``zonelogname`` will be set to None. + lognames (str or list): Name or list of lognames to import, default is "all" + lognames_strict (bool): Flag to require all logs in lognames (unless "all") + or to just accept that subset that is present. Default is `False`. + + + Returns: + Object instance (optionally) + + Example: + Here the from_file method is used to initiate the object + directly:: + + >>> mywell = Well().from_file(well_dir + '/OP_1.w') + + .. versionchanged:: 2.1 ``lognames`` and ``lognames_strict`` added + .. versionchanged:: 2.1 ``strict`` now defaults to False + """ + + wfile = xtgeo._XTGeoFile(wfile) + + if fformat is None or fformat == "guess": + fformat = wfile.detect_fformat() + else: + fformat = wfile.generic_format_by_proposal(fformat) # default + + kwargs = _data_reader_factory(fformat)(wfile, **kwargs) + return cls(**kwargs) + + def to_file( + self, + wfile: Union[str, Path, io.BytesIO], + fformat: Optional[str] = "rms_ascii", + ): + """Export well to file or memory stream. + + Args: + wfile: File name or stream. + fformat: File format ('rms_ascii'/'rmswell', 'hdf/hdf5/h5'). + + Example:: + + >>> xwell = Well(well_dir + '/OP_1.w') + >>> xwell.dataframe['Poro'] += 0.1 + >>> filename = xwell.to_file(outdir + "/somefile_copy.rmswell") + + """ + wfile = xtgeo._XTGeoFile(wfile, mode="wb", obj=self) + + wfile.check_folder(raiseerror=OSError) + + self._ensure_consistency() + + if fformat in (None, "rms_ascii", "rms_asc", "rmsasc", "rmswell"): + _well_io.export_rms_ascii(self, wfile.name) + + elif fformat in ("hdf", "hdf5", "h5"): + self.to_hdf(wfile) + + return wfile.file + + def from_hdf( + self, + wfile: Union[str, Path], + ): + """Deprecated, use :meth:`xtgeo.well_from_file()`""" + return self.from_file(wfile, fformat="hdf") + + def to_hdf( + self, + wfile: Union[str, Path], + compression: Optional[str] = "lzf", + ) -> Path: + """Export well to HDF based file. + + Warning: + This implementation is currently experimental and only recommended + for testing. + + Args: + wfile: HDF File name to write to export to. + + Returns: + A Path instance to actual file applied. + + .. versionadded:: 2.14 + """ + wfile = xtgeo._XTGeoFile(wfile, mode="wb", obj=self) + + wfile.check_folder(raiseerror=OSError) + + _well_io.export_hdf5_well(self, wfile, compression=compression) + + return wfile.file + + @deprecation.deprecated( + deprecated_in="2.16", + removed_in="4.0", + current_version=xtgeo.version, + details="Use xtgeo.well_from_roxar() instead", + ) + def from_roxar( + self, + project: Union[str, object], + name: str, + trajectory: Optional[str] = "Drilled trajectory", + logrun: Optional[str] = "log", + lognames: Optional[Union[str, List[str]]] = "all", + lognames_strict: Optional[bool] = False, + inclmd: Optional[bool] = False, + inclsurvey: Optional[bool] = False, + ): + """Deprecated, use :meth:`xtgeo.well_from_roxar()`""" + kwargs = _well_roxapi.import_well_roxapi( + project, + name, + trajectory=trajectory, + logrun=logrun, + lognames=lognames, + lognames_strict=lognames_strict, + inclmd=inclmd, + inclsurvey=inclsurvey, + ) + self._reset(**kwargs) + return self + + @classmethod + def _read_roxar( + cls, + project: Union[str, object], + name: str, + trajectory: Optional[str] = "Drilled trajectory", + logrun: Optional[str] = "log", + lognames: Optional[Union[str, List[str]]] = "all", + lognames_strict: Optional[bool] = False, + inclmd: Optional[bool] = False, + inclsurvey: Optional[bool] = False, + ): + kwargs = _well_roxapi.import_well_roxapi( + project, + name, + trajectory=trajectory, + logrun=logrun, + lognames=lognames, + lognames_strict=lognames_strict, + inclmd=inclmd, + inclsurvey=inclsurvey, + ) + return cls(**kwargs) + + def to_roxar(self, *args, **kwargs): + """Export (save/store) a well to a roxar project. + + Note this method works only when inside RMS, or when RMS license is + activated. + + The current implementation will either update existing well names + (then well log array size must not change), or it will make a new well in RMS. + + Note: + When project is file path (direct access, outside RMS) then + ``to_roxar()`` will implicitly do a project save. Otherwise, the project + will not be saved until the user do an explicit project save action. + + Args: + project (str): Magic string 'project' or file path to project + wname (str): Name of well, as shown in RMS. + lognames (:obj:list or :obj:str): List of lognames to save, or + use simply 'all' for current logs for this well. Default is 'all' + realisation (int): Currently inactive + trajectory (str): Name of trajectory in RMS + logrun (str): Name of logrun in RMS + + .. versionadded:: 2.12 + .. versionchanged:: 2.15 + Saving to new wells enabled (earlier only modifying existing) + + """ + # use *args, **kwargs since this method is overrided in blocked_well, and + # signature should be the same + + project = args[0] + wname = args[1] + lognames = kwargs.get("lognames", "all") + trajectory = kwargs.get("trajectory", "Drilled trajectory") + logrun = kwargs.get("logrun", "log") + realisation = kwargs.get("realisation", 0) + + logger.debug("Not in use: realisation %s", realisation) + + _well_roxapi.export_well_roxapi( + self, + project, + wname, + lognames=lognames, + trajectory=trajectory, + logrun=logrun, + realisation=realisation, + ) + + def get_wlogs(self) -> OrderedDict: + """Get a compound dictionary with well log metadata. + + The result will be an Ordered dict on the form: + + ``{"X_UTME": ["CONT", None], ... "Facies": ["DISC", {1: "BG", 2: "SAND"}]}`` + """ + res = OrderedDict() + + for key in self._wlognames: + wtype = "CONT" + wrecord = None + if key in self._wlogtypes: + wtype = self._wlogtypes[key] + if key in self._wlogrecords: + wrecord = self._wlogrecords[key] + + res[key] = [wtype, wrecord] + + return res + + def set_wlogs(self, wlogs: OrderedDict): + """Set a compound dictionary with well log metadata. + + This operation is somewhat risky as it may lead to inconsistency, so use with + care! Typically, one will use :meth:`get_wlogs` first and then modify some + attributes. + + Args: + wlogs: Input data dictionary + + Raises: + ValueError: Invalid log type found in input: + ValueError: Invalid log record found in input: + ValueError: Invalid input key found: + ValueError: Invalid log record found in input: + + """ + for key in self._wlognames: + if key in wlogs.keys(): + typ, rec = wlogs[key] + + if typ in Well.VALID_LOGTYPES: + self._wlogtypes[key] = deepcopy(typ) + else: + raise ValueError(f"Invalid log type found in input: {typ}") + + if rec is None or isinstance(rec, dict): + self._wlogrecords[key] = deepcopy(rec) + else: + raise ValueError(f"Invalid log record found in input: {rec}") + + else: + raise ValueError(f"Key for column not found in input: {key}") + + for key in wlogs.keys(): + if key not in self._wlognames: + raise ValueError(f"Invalid input key found: {key}") + + self._ensure_consistency() + + def isdiscrete(self, logname): + """Return True of log is discrete, otherwise False. + + Args: + logname (str): Name of log to check if discrete or not + + .. versionadded:: 2.2.0 + """ + if logname in self._wlognames and self.get_logtype(logname) == "DISC": + return True + return False + + def copy(self): + """Copy a Well instance to a new unique Well instance.""" + return Well( + self.rkb, + self.xpos, + self.ypos, + self.wname, + self._df.copy(), + self.mdlogname, + self.zonelogname, + deepcopy(self._wlogtypes), + deepcopy(self._wlogrecords), + self._filesrc, + ) + + def rename_log(self, lname, newname): + """Rename a log, e.g. Poro to PORO.""" + self._ensure_consistency() + + if lname not in self.lognames: + raise ValueError("Input log does not exist") + + if newname in self.lognames: + raise ValueError("New log name exists already") + + self._wlogtypes[newname] = self._wlogtypes.pop(lname) + self._wlogrecords[newname] = self._wlogrecords.pop(lname) + + # rename in dataframe + self._df.rename(index=str, columns={lname: newname}, inplace=True) + + if self._mdlogname == lname: + self._mdlogname = newname + + if self._zonelogname == lname: + self._zonelogname = newname + + def create_log(self, lname, logtype="CONT", logrecord=None, value=0.0, force=True): + """Create a new log with initial values. + + If the logname already exists, it will be silently overwritten, unless + the option force=False. + + Args: + lname (str): name of new log + logtype (str): Must be 'CONT' (default) or 'DISC' (discrete) + logrecord (dict): A dictionary of key: values for 'DISC' logs + value (float): initia value to set_index + force (bool): If True, and lname exists, it will be overwritten, if + False, no new log will be made. Will return False. + + Returns: + True ff a new log is made (either new or force overwrite an + existing) or False if the new log already exists, + and ``force=False``. + + """ + if lname in self.lognames and force is False: + return False + + self._wlogtypes[lname] = logtype + self._wlogrecords[lname] = logrecord + + # make a new column + self._df[lname] = float(value) + self._ensure_consistency() + return True + + def delete_log(self, lname): + """Delete/remove an existing log, or list of logs. + + Will continue silently if a log does not exist. + + Args: + lname(str or list): A logname or a list of lognames + + Returns: + Number of logs deleted + """ + return _well_oper.delete_log(self, lname) + + delete_logs = delete_log # alias function + + def get_logtype(self, lname): + """Returns the type of a give log (e.g. DISC or CONT).""" + self._ensure_consistency() + + if lname in self._wlogtypes: + return self._wlogtypes[lname] + return None + + def set_logtype(self, lname, ltype): + """Sets the type of a give log (e.g. DISC or CONT).""" + self._ensure_consistency() + + valid = {"DISC", "CONT"} + + if ltype in valid: + self._wlogtypes[lname] = ltype + else: + raise ValueError(f"Try to set invalid log type: {ltype}") + + def get_logrecord(self, lname): + """Returns the record (dict) of a given log name, None if not exists.""" + if lname in self._wlogtypes: + return self._wlogrecords[lname] + + return None + + def set_logrecord(self, lname, newdict): + """Sets the record (dict) of a given discrete log.""" + self._ensure_consistency() + if lname not in self.lognames: + raise ValueError(f"No such logname: {lname}") + + if self._wlogtypes[lname] == "CONT": + raise ValueError("Cannot set a log record for a continuous log") + + if not isinstance(newdict, dict): + raise ValueError("Input is not a dictionary") + + self._wlogrecords[lname] = newdict + + def get_logrecord_codename(self, lname, key): + """Returns the name entry of a log record, for a given key. + + Example:: + + # get the name for zonelog entry no 4: + zname = well.get_logrecord_codename('ZONELOG', 4) + """ + zlogdict = self.get_logrecord(lname) + if key in zlogdict: + return zlogdict[key] + + return None + + def get_carray(self, lname): + """Returns the C array pointer (via SWIG) for a given log. + + Type conversion is double if float64, int32 if DISC log. + Returns None of log does not exist. + """ + if lname in self._df: + np_array = self._df[lname].values + else: + return None + + if self.get_logtype(lname) == "DISC": + carr = self._convert_np_carr_int(np_array) + else: + carr = self._convert_np_carr_double(np_array) + + return carr + + def get_filled_dataframe( + self, fill_value=const.UNDEF, fill_value_int=const.UNDEF_INT + ): + """Fill the Nan's in the dataframe with real UNDEF values. + + This module returns a copy of the dataframe in the object; it + does not change the instance. + + Note that DISC logs will be casted to columns with integer + as datatype. + + Returns: + A pandas dataframe where Nan er replaces with preset + high XTGeo UNDEF values, or user defined values. + + """ + lnames = self.lognames + + newdf = self._df.copy() + + # make a dictionary of datatypes + dtype = {"X_UTME": "float64", "Y_UTMN": "float64", "Z_TVDSS": "float64"} + + dfill = {"X_UTME": const.UNDEF, "Y_UTMN": const.UNDEF, "Z_TVDSS": const.UNDEF} + + for lname in lnames: + if self.get_logtype(lname) == "DISC": + dtype[lname] = np.int32 + dfill[lname] = fill_value_int + else: + dtype[lname] = np.float64 + dfill[lname] = fill_value + + # now first fill Nan's (because int cannot be converted if Nan) + newdf = newdf.fillna(dfill) + newdf = newdf.astype(dtype) + + return newdf + + def create_relative_hlen(self): + """Make a relative length of a well, as a log. + + The first well og entry defines zero, then the horizontal length + is computed relative to that by simple geometric methods. + """ + # extract numpies from XYZ trajectory logs + xv = self._df["X_UTME"].values + yv = self._df["Y_UTMN"].values + + distance = [] + previous_x, previous_y = xv[0], yv[0] + for i, (x, y) in enumerate(zip(xv, yv)): + distance.append(math.hypot((previous_x - x), (y - previous_y))) + previous_x, previous_y = x, y + + self._df["R_HLEN"] = pd.Series(np.cumsum(distance), index=self._df.index) + + def geometrics(self): + """Compute some well geometrical arrays MD, INCL, AZI, as logs. + + These are kind of quasi measurements hence the logs will named + with a Q in front as Q_MDEPTH, Q_INCL, and Q_AZI. + + These logs will be added to the dataframe. If the mdlogname + attribute does not exist in advance, it will be set to 'Q_MDEPTH'. + + Returns: + False if geometrics cannot be computed + + """ + if self._df.shape[0] < 3: + raise ValueError( + f"Cannot compute geometrics for {self.name}. Not enough " + f"trajectory points (need >3, have: {self.dataframe.shape[0]})" + ) + + # extract numpies from XYZ trajetory logs + ptr_xv = self.get_carray("X_UTME") + ptr_yv = self.get_carray("Y_UTMN") + ptr_zv = self.get_carray("Z_TVDSS") + + # get number of rows in pandas + nlen = self.nrow + + ptr_md = _cxtgeo.new_doublearray(nlen) + ptr_incl = _cxtgeo.new_doublearray(nlen) + ptr_az = _cxtgeo.new_doublearray(nlen) + + ier = _cxtgeo.well_geometrics( + nlen, ptr_xv, ptr_yv, ptr_zv, ptr_md, ptr_incl, ptr_az, 0 + ) + + if ier != 0: + raise XTGeoCLibError(f"well_geometrics failed with error code: {ier}") + + dnumpy = self._convert_carr_double_np(ptr_md) + self._df["Q_MDEPTH"] = pd.Series(dnumpy, index=self._df.index) + + dnumpy = self._convert_carr_double_np(ptr_incl) + self._df["Q_INCL"] = pd.Series(dnumpy, index=self._df.index) + + dnumpy = self._convert_carr_double_np(ptr_az) + self._df["Q_AZI"] = pd.Series(dnumpy, index=self._df.index) + + if not self._mdlogname: + self._mdlogname = "Q_MDEPTH" + + # delete tmp pointers + _cxtgeo.delete_doublearray(ptr_xv) + _cxtgeo.delete_doublearray(ptr_yv) + _cxtgeo.delete_doublearray(ptr_zv) + _cxtgeo.delete_doublearray(ptr_md) + _cxtgeo.delete_doublearray(ptr_incl) + _cxtgeo.delete_doublearray(ptr_az) + + return True + + def truncate_parallel_path( + self, other, xtol=None, ytol=None, ztol=None, itol=None, atol=None + ): + """Truncate the part of the well trajectory that is ~parallel with other. + + Args: + other (Well): Other well to compare with + xtol (float): Tolerance in X (East) coord for measuring unit + ytol (float): Tolerance in Y (North) coord for measuring unit + ztol (float): Tolerance in Z (TVD) coord for measuring unit + itol (float): Tolerance in inclination (degrees) + atol (float): Tolerance in azimuth (degrees) + """ + if xtol is None: + xtol = 0.0 + if ytol is None: + ytol = 0.0 + if ztol is None: + ztol = 0.0 + if itol is None: + itol = 0.0 + if atol is None: + atol = 0.0 + + if self.dataframe.shape[0] < 3 or other.dataframe.shape[0] < 3: + raise ValueError( + f"Too few points to truncate parallel path, was {self._df.size} and " + f"{other._df.size}, must be >3" + ) + + # extract numpies from XYZ trajectory logs + xv1 = self._df["X_UTME"].values + yv1 = self._df["Y_UTMN"].values + zv1 = self._df["Z_TVDSS"].values + + xv2 = other._df["X_UTME"].values + yv2 = other._df["Y_UTMN"].values + zv2 = other._df["Z_TVDSS"].values + + ier = _cxtgeo.well_trunc_parallel( + xv1, yv1, zv1, xv2, yv2, zv2, xtol, ytol, ztol, itol, atol, 0 + ) + + if ier != 0: + raise RuntimeError("Unexpected error") + + self._df = self._df[self._df["X_UTME"] < const.UNDEF_LIMIT] + self._df.reset_index(drop=True, inplace=True) + + def may_overlap(self, other): + """Consider if well overlap in X Y coordinates with other well, True/False.""" + if self._df.size < 2 or other._df.size < 2: + return False + + # extract numpies from XYZ trajectory logs + xmin1 = np.nanmin(self.dataframe["X_UTME"].values) + xmax1 = np.nanmax(self.dataframe["X_UTME"].values) + ymin1 = np.nanmin(self.dataframe["Y_UTMN"].values) + ymax1 = np.nanmax(self.dataframe["Y_UTMN"].values) + + xmin2 = np.nanmin(other.dataframe["X_UTME"].values) + xmax2 = np.nanmax(other.dataframe["X_UTME"].values) + ymin2 = np.nanmin(other.dataframe["Y_UTMN"].values) + ymax2 = np.nanmax(other.dataframe["Y_UTMN"].values) + + if xmin1 > xmax2 or ymin1 > ymax2: + return False + if xmin2 > xmax1 or ymin2 > ymax1: + return False + + return True + + def limit_tvd(self, tvdmin, tvdmax): + """Truncate the part of the well that is outside tvdmin, tvdmax. + + Range will be in tvdmin <= tvd <= tvdmax. + + Args: + tvdmin (float): Minimum TVD + tvdmax (float): Maximum TVD + """ + self._df = self._df[self._df["Z_TVDSS"] >= tvdmin] + self._df = self._df[self._df["Z_TVDSS"] <= tvdmax] + + self._df.reset_index(drop=True, inplace=True) + + def downsample(self, interval=4, keeplast=True): + """Downsample by sampling every N'th element (coarsen only). + + Args: + interval (int): Sampling interval. + keeplast (bool): If True, the last element from the original + dataframe is kept, to avoid that the well is shortened. + """ + if self._df.size < 2 * interval: + return + + dfr = self._df[::interval] + + if keeplast: + dfr = pd.concat([dfr, self._df.iloc[-1:]], ignore_index=True) + + self._df = dfr.reset_index(drop=True) + + def rescale(self, delta=0.15, tvdrange=None): + """Rescale (refine or coarse) by sampling a delta along the trajectory, in MD. + + Args: + delta (float): Step length + tvdrange (tuple of floats): Resampling can be limited to TVD interval + + .. versionchanged:: 2.2 Added tvdrange + """ + _well_oper.rescale(self, delta=delta, tvdrange=tvdrange) + + def get_polygons(self, skipname=False): + """Return a Polygons object from the well trajectory. + + Args: + skipname (bool): If True then name column is omitted + + .. versionadded:: 2.1 + .. versionchanged:: 2.13 Added `skipname` key + """ + dfr = self._df.copy() + + keep = ("X_UTME", "Y_UTMN", "Z_TVDSS") + for col in dfr.columns: + if col not in keep: + dfr.drop(labels=col, axis=1, inplace=True) + dfr["POLY_ID"] = 1 + + if not skipname: + dfr["NAME"] = self.xwellname + poly = xtgeo.Polygons() + poly.dataframe = dfr + poly.name = self.xwellname + + return poly + + def get_fence_polyline(self, sampling=20, nextend=2, tvdmin=None, asnumpy=True): + """Return a fence polyline as a numpy array or a Polygons object. + + The result will aim for a regular sampling interval, useful for extracting + fence plots (cross-sections). + + Args: + sampling (float): Sampling interval i.e. horizonal distance (input) + nextend (int): Number if sampling to extend; e.g. 2 * 20 + tvdmin (float): Minimum TVD starting point. + as_numpy (bool): If True, a numpy array, otherwise a Polygons + object with 5 columns where the 2 last are HLEN and POLY_ID + and the POLY_ID will be set to 0. + + Returns: + A numpy array of shape (NLEN, 5) in F order, + Or a Polygons object with 5 columns + If not possible, return False + + .. versionchanged:: 2.1 improved algorithm + """ + poly = self.get_polygons() + + if tvdmin is not None: + poly.dataframe = poly.dataframe[poly.dataframe[poly.zname] >= tvdmin] + poly.dataframe.reset_index(drop=True, inplace=True) + + return poly.get_fence(distance=sampling, nextend=nextend, asnumpy=asnumpy) + + def create_surf_distance_log( + self, + surf: object, + name: Optional[str] = "DIST_SURF", + ): + """Make a log that is vertical distance to a regular surface. + + If the trajectory is above the surface (i.e. more shallow), then the + distance sign is positive. + + Args: + surf: The RegularSurface instance. + name: The name of the new log. If it exists it will be overwritten. + + Example:: + + mywell.rescale() # optional + thesurf = xtgeo.RegularSurface("some.gri") + mywell.create_surf_distance_log(thesurf, name="sdiff") + + """ + _well_oper.create_surf_distance_log(self, surf, name) + + def report_zonation_holes(self, threshold=5): + """Reports if well has holes in zonation, less or equal to N samples. + + Zonation may have holes due to various reasons, and + usually a few undef samples indicates that something is wrong. + This method reports well and start interval of the "holes" + + The well shall have zonelog from import (via zonelogname attribute) and + preferly a MD log (via mdlogname attribute); however if the + latter is not present, a report withou MD values will be present. + + Args: + threshold (int): Number of samples (max.) that defines a hole, e.g. + 5 means that undef samples in the range [1, 5] (including 5) is + applied + + Returns: + A Pandas dataframe as a report. None if no list is made. + + Raises: + RuntimeError if zonelog is not present + """ + dfr = _well_oper.report_zonation_holes(self, threshold=threshold) + + return dfr + + def get_zonation_points( + self, tops=True, incl_limit=80, top_prefix="Top", zonelist=None, use_undef=False + ): + """Extract zonation points from Zonelog and make a marker list. + + Currently it is either 'Tops' or 'Zone' (thicknesses); default + is tops (i.e. tops=True). + + The `zonelist` can be a list of zones, or a tuple with two members specifying + first and last member. Note however that the zonation shall be without jumps + and increasing. E.g.:: + + zonelist=(1, 5) # meaning [1, 2, 3, 4, 5] + # or + zonelist=[1, 2, 3, 4] + # while _not_ legal: + zonelist=[1, 4, 8] + + Zone numbers less than 0 are not accepted + + Args: + tops (bool): If True then compute tops, else (thickness) points. + incl_limit (float): If given, and usezone is True, the max + angle of inclination to be used as input to zonation points. + top_prefix (str): As well logs usually have isochore (zone) name, + this prefix could be Top, e.g. 'SO43' --> 'TopSO43' + zonelist (list of int or tuple): Zones to use + use_undef (bool): If True, then transition from UNDEF is also + used. + + + Returns: + A pandas dataframe (ready for the xyz/Points class), None + if a zonelog is missing + """ + # make a copy of the well instance as some tmp well logs are made + scopy = self.copy() + + dfr = _wellmarkers.get_zonation_points( + scopy, tops, incl_limit, top_prefix, zonelist, use_undef + ) + + del scopy + + return dfr + + def get_zone_interval(self, zonevalue, resample=1, extralogs=None): + """Extract the X Y Z ID line (polyline) segment for a given zonevalue. + + Args: + zonevalue (int): The zone value to extract + resample (int): If given, downsample every N'th sample to make + polylines smaller in terms of bit and bytes. + 1 = No downsampling. + extralogs (list of str): List of extra log names to include + + + Returns: + A pandas dataframe X Y Z ID (ready for the xyz/Polygon class), + None if a zonelog is missing or actual zone does dot + exist in the well. + """ + if resample < 1 or not isinstance(resample, int): + raise KeyError("Key resample of wrong type (must be int >= 1)") + + dff = self.get_filled_dataframe() + + # the technical solution here is to make a tmp column which + # will add one number for each time the actual segment is repeated, + # not straightforward... (thanks to H. Berland for tip) + + dff["ztmp"] = dff[self.zonelogname] + dff["ztmp"] = (dff[self.zonelogname] != zonevalue).astype(int) + + dff["ztmp"] = (dff.ztmp != dff.ztmp.shift()).cumsum() + + dff = dff[dff[self.zonelogname] == zonevalue] + + m1v = dff["ztmp"].min() + m2v = dff["ztmp"].max() + if np.isnan(m1v): + logger.debug("Returns (no data)") + return None + + df2 = dff.copy() + + dflist = [] + for mvv in range(m1v, m2v + 1): + dff9 = df2.copy() + dff9 = df2[df2["ztmp"] == mvv] + if dff9.index.shape[0] > 0: + dflist.append(dff9) + + dxlist = [] + + useloglist = ["X_UTME", "Y_UTMN", "Z_TVDSS", "POLY_ID"] + if extralogs is not None: + useloglist.extend(extralogs) + + # pylint: disable=consider-using-enumerate + for ivv in range(len(dflist)): + dxf = dflist[ivv] + dxf = dxf.rename(columns={"ztmp": "POLY_ID"}) + cols = [xxx for xxx in dxf.columns if xxx not in useloglist] + + dxf = dxf.drop(cols, axis=1) + + # now (down) resample every N'th + if resample > 1: + dxf = pd.concat([dxf.iloc[::resample, :], dxf.tail(1)]) + + dxlist.append(dxf) + + dff = pd.concat(dxlist) + dff.reset_index(inplace=True, drop=True) + + logger.debug("Dataframe from well:\n%s", dff) + return dff + + def get_fraction_per_zone( + self, + dlogname, + dcodes, + zonelist=None, + incl_limit=80, + count_limit=3, + zonelogname=None, + ): + """Get fraction of a discrete parameter, e.g. a facies, per zone. + + It can be constrained by an inclination. + + Also, it needs to be evaluated only of ZONE is complete; either + INCREASE or DECREASE ; hence a quality flag is made and applied. + + Args: + dlogname (str): Name of discrete log, e.g. 'FACIES' + dnames (list of int): Codes of facies (or similar) to report for + zonelist (list of int): Zones to use + incl_limit (float): Inclination limit for well path. + count_limit (int): Minimum number of counts required per segment + for valid calculations + zonelogname (str). If None, the Well().zonelogname attribute is + applied + + Returns: + A pandas dataframe (ready for the xyz/Points class), None + if a zonelog is missing or or dlogname is missing, + list is zero length for any reason. + """ + dfr = _wellmarkers.get_fraction_per_zone( + self, + dlogname, + dcodes, + zonelist=zonelist, + incl_limit=incl_limit, + count_limit=count_limit, + zonelogname=zonelogname, + ) + + return dfr + + def mask_shoulderbeds( + self, + inputlogs: List[str], + targetlogs: List[str], + nsamples: Optional[Union[int, Dict[str, float]]] = 2, + strict: Optional[bool] = False, + ) -> bool: + """Mask data around zone boundaries or other discrete log boundaries. + + This operates on number of samples, hence the actual distance which is masked + depends on the sampling interval (ie. count) or on distance measures. + Distance measures are TVD (true vertical depth) or MD (measured depth). + + .. image:: images/wells-mask-shoulderbeds.png + :width: 300 + :align: center + + Args: + inputlogs: List of input logs, must be of discrete type. + targetlogs: List of logs where mask is applied. + nsamples: Number of samples around boundaries to filter, per side, i.e. + value 2 means 2 above and 2 below, in total 4 samples. + As alternative specify nsamples indirectly with a relative distance, + as a dictionary with one record, as {"tvd": 0.5} or {"md": 0.7}. + strict: If True, will raise Exception of any of the input or target log + names are missing. + + Returns: + True if any operation has been done. False in case nothing has been done, + e.g. no targetlogs for this particular well and ``strict`` is False. + + Raises: + ValueError: Various messages when wrong or inconsistent input. + + Example: + >>> mywell1 = Well(well_dir + '/OP_1.w') + >>> mywell2 = Well(well_dir + '/OP_2.w') + >>> did_succeed = mywell1.mask_shoulderbeds(["Zonelog", "Facies"], ["Perm"]) + >>> did_succeed = mywell2.mask_shoulderbeds( + ... ["Zonelog"], + ... ["Perm"], + ... nsamples={"tvd": 0.8} + ... ) + + """ + return _well_oper.mask_shoulderbeds( + self, inputlogs, targetlogs, nsamples, strict + ) + + def get_surface_picks(self, surf): + """Return :class:`.Points` obj where well crosses the surface (horizon picks). + + There may be several points in the Points() dataframe attribute. + Also a ``DIRECTION`` column will show 1 if surface is penetrated from + above, and -1 if penetrated from below. + + Args: + surf (RegularSurface): The surface instance + + Returns: + A :class:`.Points` instance, or None if no crossing points + + .. versionadded:: 2.8 + + """ + return _wellmarkers.get_surface_picks(self, surf) + + def make_ijk_from_grid(self, grid, grid_id="", algorithm=2, activeonly=True): + """Look through a Grid and add grid I J K as discrete logs. + + Note that the the grid counting has base 1 (first row is 1 etc). + + By default, log (i.e. column names in the dataframe) will be + ICELL, JCELL, KCELL, but you can add a tag (ID) to that name. + + Args: + grid (Grid): A XTGeo Grid instance + grid_id (str): Add a tag (optional) to the current log name + algorithm (int): Which interbal algorithm to use, default is 2 (expert + setting) + activeonly (bool): If True, only active cells are applied (algorithm 2 only) + + Raises: + RuntimeError: 'Error from C routine, code is ...' + + .. versionchanged:: 2.9 Added keys for and `activeonly` + """ + _well_oper.make_ijk_from_grid( + self, grid, grid_id=grid_id, algorithm=algorithm, activeonly=activeonly + ) + + def make_zone_qual_log(self, zqname): + """Create a zone quality/indicator (flag) log. + + This routine looks through to zone log and flag intervals according + to neighbouring zones: + + * 0: Undetermined flag + + * 1: Zonelog interval numbering increases, + e.g. for zone 2: 1 1 1 1 2 2 2 2 2 5 5 5 5 5 + + * 2: Zonelog interval numbering decreases, + e.g. for zone 2: 6 6 6 2 2 2 2 1 1 1 + + * 3: Interval is a U turning point, e.g. 0 0 0 2 2 2 1 1 1 + + * 4: Interval is a inverse U turning point, 3 3 3 2 2 2 5 5 + + * 9: Interval is bounded by one or more missing sections, + e.g. 1 1 1 2 2 2 -999 -999 + + If a log with the name exists, it will be silently replaced + + Args: + zqname (str): Name of quality log + """ + _well_oper.make_zone_qual_log(self, zqname) + + def get_gridproperties( + self, gridprops, grid=("ICELL", "JCELL", "KCELL"), prop_id="_model" + ): + """Look through a Grid and add a set of grid properties as logs. + + The name of the logs will ... + + This can be done to sample model properties along a well. + + Args: + gridprops (Grid): A XTGeo GridProperties instance (a collection + of properties) or a single GridProperty instance + grid (Grid or tuple): A XTGeo Grid instance or a reference + via tuple. If this is tuple with log names, + it states that these logs already contains + the gridcell IJK numbering. + prop_id (str): Add a tag (optional) to the current log name, e.g + as PORO_model, where _model is the tag. + + Raises: + None + + .. versionadded:: 2.1 + + """ + _well_oper.get_gridproperties(self, gridprops, grid=grid, prop_id=prop_id) + + # ================================================================================== + # PRIVATE METHODS + # should not be applied outside the class + # ================================================================================== + + # ---------------------------------------------------------------------------------- + # Import/Export methods for various formats + # ---------------------------------------------------------------------------------- + + # ---------------------------------------------------------------------------------- + # Special methods for nerds, todo is to move to private module + # ---------------------------------------------------------------------------------- + + def _convert_np_carr_int(self, np_array): + """Convert numpy 1D array to C array, assuming int type. + + The numpy is always a double (float64), so need to convert first + """ + carr = _cxtgeo.new_intarray(self.nrow) + + np_array = np_array.astype(np.int32) + + _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) + + return carr + + def _convert_np_carr_double(self, np_array): + """Convert numpy 1D array to C array, assuming double type.""" + carr = _cxtgeo.new_doublearray(self.nrow) + + _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) + + return carr + + def _convert_carr_double_np(self, carray, nlen=None): + """Convert a C array to numpy, assuming double type.""" + if nlen is None: + nlen = len(self._df.index) + + nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) + + return nparray diff --git a/src/xtgeo/well/_well_aux.py b/src/xtgeo/well/_well_aux.py new file mode 100644 index 000000000..12c8eda83 --- /dev/null +++ b/src/xtgeo/well/_well_aux.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +"""Auxillary functions for the well class + +'self' is a Well() instance + +""" + +from __future__ import annotations + +import functools +import warnings +from pathlib import Path +from typing import Any, Callable, Optional + +import numpy as np +import numpy.typing as npt +import pandas as pd + +import xtgeo +import xtgeo.cxtgeo._cxtgeo as _cxtgeo # type: ignore +from xtgeo.common import XTGeoDialog + +from . import _well_io + +xtg = XTGeoDialog() +logger = xtg.functionlogger(__name__) + + +def _data_reader_factory(file_format: Optional[str] = None): + if file_format in ["rmswell", "irap_ascii", None]: + return _well_io.import_rms_ascii + if file_format == "hdf": + return _well_io.import_hdf5_well + raise ValueError( + f"Unknown file format {file_format}, supported formats are " + "'rmswell', 'irap_ascii' and 'hdf'" + ) + + +def allow_deprecated_init(func: Callable): + # This decorator is here to maintain backwards compatibility in the + # construction of Well and should be deleted once the deprecation period + # has expired, the construction will then follow the new pattern. + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + if not args and not kwargs: + warnings.warn( + "Initializing empty well is deprecated, please provide " + "non-defaulted values, or use mywell = " + "xtgeo.well_from_file('filename')", + DeprecationWarning, + ) + return func( + self, + *([0.0] * 3), + "", + pd.DataFrame({"X_UTME": [], "Y_UTMN": [], "Z_TVDSS": []}), + ) + + # Checking if we are doing an initialization from file and raise a + # deprecation warning if we are. + if "wfile" in kwargs or ( + len(args) >= 1 and isinstance(args[0], (str, Path, xtgeo._XTGeoFile)) + ): + warnings.warn( + "Initializing directly from file name is deprecated and will be " + "removed in xtgeo version 4.0. Use: " + "mywell = xtgeo.well_from_file('filename') instead", + DeprecationWarning, + ) + if len(args) >= 1: + wfile = args[0] + args = args[1:] + else: + wfile = kwargs.pop("wfile", None) + if len(args) >= 1: + fformat = args[0] + args = args[1:] + else: + fformat = kwargs.pop("fformat", None) + + mfile = xtgeo._XTGeoFile(wfile) + if fformat is None or fformat == "guess": + fformat = mfile.detect_fformat() + else: + fformat = mfile.generic_format_by_proposal(fformat) + kwargs = _data_reader_factory(fformat)(mfile, *args, **kwargs) + kwargs["filesrc"] = mfile.file + return func(self, **kwargs) + return func(self, *args, **kwargs) + + return wrapper + + +def ensure_consistency(self): + """Ensure consistency within an object (private function). + + Consistency checking. As well log names are columns in the Pandas DF, + there are additional attributes per log that have to be "in sync". + """ + if not all( + coordinate in self._df.columns for coordinate in ("X_UTME", "Y_UTMN", "Z_TVDSS") + ): + raise ValueError( + "Well dataframe must include 'X_UTME'," + f" 'Y_UTMN' and 'Z_TVDSS', got {self._df.columns}" + ) + if self._wlogtypes is None: + self._wlogtypes = dict() + if self._wlogrecords is None: + self._wlogrecords = dict() + + for logname in self.get_lognames(): + if logname not in self._wlogtypes: + self._wlogtypes[logname] = "CONT" # continuous as default + self._wlogrecords[logname] = None # None as default + else: + if self._wlogtypes[logname] not in self.VALID_LOGTYPES: + self._wlogtypes[logname] = "CONT" + self._wlogrecords[logname] = None # None as default + + if logname not in self._wlogrecords: + if self._wlogtypes[logname] == "DISC": + # it is a discrete log with missing record; try to find + # a default one based on current values... + lvalues = self._df[logname].values.round(decimals=0) + lmin = int(lvalues.min()) + lmax = int(lvalues.max()) + + lvalues = lvalues.astype("int") + codes = {} + for lval in range(lmin, lmax + 1): + if lval in lvalues: + codes[lval] = str(lval) + + self._wlogrecords = codes + + +def _convert_np_carr_int(self, np_array: npt.NDArray[np.int_]) -> Any: + """Convert numpy 1D array to C array, assuming int type. + + The numpy is always a double (float64), so need to convert first + """ + carr = _cxtgeo.new_intarray(self.nrow) + + np_array = np_array.astype(np.int32) + + _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) + + return carr + + +def _convert_np_carr_double(self, np_array: npt.NDArray[np.float64]) -> Any: + """Convert numpy 1D array to C array, assuming double type.""" + carr = _cxtgeo.new_doublearray(self.nrow) + + _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) + + return carr + + +def _convert_carr_double_np( + self, carray: Any, nlen: Optional[int] = None +) -> npt.NDArray[np.float64]: + """Convert a C array (SWIG pointer) to numpy, assuming double type.""" + if nlen is None: + nlen = len(self._df.index) + + nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) + + return nparray + + +def get_carray(self, lname: str) -> Optional[Any]: + """Returns the C array pointer (via SWIG) for a given log. + + Type conversion is double if float64, int32 if DISC log. + Returns None if log does not exist. + """ + if lname in self._df: + np_array = self._df[lname].values + else: + return None + + if self.get_logtype(lname) == "DISC": + carr = _convert_np_carr_int(self, np_array) + else: + carr = _convert_np_carr_double(self, np_array) + + return carr diff --git a/src/xtgeo/well/_well_io.py b/src/xtgeo/well/_well_io.py index 45b87a8bc..c15305725 100644 --- a/src/xtgeo/well/_well_io.py +++ b/src/xtgeo/well/_well_io.py @@ -32,7 +32,7 @@ def import_rms_ascii( xlognames = [] lnum = 1 - with open(wfile.file, "r") as fwell: + with open(wfile.file, "r", encoding="UTF-8") as fwell: for line in fwell: if lnum == 1: _ffver = line.strip() # noqa, file version diff --git a/src/xtgeo/well/_well_oper.py b/src/xtgeo/well/_well_oper.py index 4793b82da..ccfd890a8 100644 --- a/src/xtgeo/well/_well_oper.py +++ b/src/xtgeo/well/_well_oper.py @@ -10,6 +10,8 @@ from xtgeo.common import XTGeoDialog from xtgeo.common import constants as const +from ._well_aux import get_carray + xtg = XTGeoDialog() logger = xtg.functionlogger(__name__) @@ -24,7 +26,7 @@ def delete_log(self, lname): lcount = 0 for logn in lname: - if logn not in self._wlognames: + if logn not in self.get_lognames(): logger.info("Log does no exist: %s", logn) continue @@ -196,9 +198,9 @@ def _make_ijk_from_grid_v1(self, grid, grid_id=""): """ logger.info("Using algorithm 1 in %s", __name__) - wxarr = self.get_carray("X_UTME") - wyarr = self.get_carray("Y_UTMN") - wzarr = self.get_carray("Z_TVDSS") + wxarr = get_carray(self, "X_UTME") + wyarr = get_carray(self, "Y_UTMN") + wzarr = get_carray(self, "Z_TVDSS") nlen = self.nrow wivec = _cxtgeo.new_intarray(nlen) @@ -340,7 +342,6 @@ def get_gridproperties(self, gridprops, grid=("ICELL", "JCELL", "KCELL"), prop_i arr[np.isnan(xind)] = np.nan pname = prop.name + prop_id self.dataframe[pname] = arr - self._wlognames.append(pname) if prop.isdiscrete: self._wlogtypes[pname] = "DISC" self._wlogrecords[pname] = copy.deepcopy(prop.codes) diff --git a/src/xtgeo/well/_welldata.py b/src/xtgeo/well/_welldata.py new file mode 100644 index 000000000..a6b1ea31f --- /dev/null +++ b/src/xtgeo/well/_welldata.py @@ -0,0 +1,254 @@ +"""Module for private _WellData class + + X_UTME Y_UTMN Z_TVDSS MDepth PHIT KLOGH Sw +0 463256.911 5930542.294 -49.0000 0.0000 NaN NaN NaN ... +1 463256.912 5930542.295 -48.2859 0.5000 NaN NaN NaN ... +2 463256.913 5930542.296 -47.5735 1.0000 NaN NaN NaN ... +3 463256.914 5930542.299 -46.8626 1.5000 NaN NaN NaN ... +4 463256.916 5930542.302 -46.1533 2.0000 NaN NaN NaN ... + ... ... ... ... ... ... ... + +Where each log beside the 3 first has a wlogtypes dictoniary, telling if the logs are +treated as discrete (DISC) or continuous (CONT). In addition there is a wlogrecords +dict, storing the unit for continuous logs (defaulted to None) or a dictionary +of codes if the log in DISC type. + +The purpose here is to automate; if a column is added to the dataframe, then the +class methods here will try to guess the wlogtype and wlogtype, and add those; similarly +of a column is removed, the corresponding entries in wlogtypes and wlogrecords will be +deleted. +""" +from dataclasses import dataclass, field +from enum import Enum, EnumMeta, unique + +import pandas as pd + + +class _LogTypeMeta(EnumMeta): + """For 'in' method, cf https://stackoverflow.com/questions/43634618""" + + def __contains__(cls, item): + try: + cls(item) + except ValueError: + return False + else: + return True + + +@unique +class _LogType(Enum, metaclass=_LogTypeMeta): + """Enumerate type of log""" + + CONT = "CONT" + DISC = "DISC" + + +CONT_DEFAULT_WLOGRECORD = ("UNKNOWN", "LINEAR") + + +@dataclass +class _WellData: + """Private class for the Well log data, where a Pandas dataframe is core. + + The data are stored in pandas dataframes, and by default, all logs are float, and + np.nan defines undefined values. Even if they are DISC. The reason for this is + restrictions in older versions of Pandas. + + All values in the dataframe shall be numbers. + + The wlogstypes is on form {"PHIT": CONT, "FACIES": DISC, ...} + + The wlogrecords is somewhat heterogenous, on form: + {"PHIT": ("unit", "scale"), "FACIES": {0:BG, 2: "SST", 4: "CALC"}} + Hence the CONT logs hold a tuple or list with 2 str members, or None, while DISC + log holds a dict where the key is an int and the value is a string. + """ + + data: pd.DataFrame + wlogtypes: dict = field(default_factory=dict) + wlogrecords: dict = field(default_factory=dict) + xname: str = "X_UTME" + yname: str = "Y_UTMN" + zname: str = "Z_TVDSS" + + def __post_init__(self): + self.ensure_consistency() + + def _infer_log_dtypes(self): + """Return as dict on form {"X_UTME": "CONT", .... "FACIES": "DISC"}. + + There are some important restrictions: + * The first 3 columns X Y Z) are always CONT, even if input appears as DISC. + * A check is made towards existing wlogtypes; if the key,value pair exists + already, this function will *not* force a change but keep as is. + """ + + new_df = self.data.convert_dtypes() + + dlist = new_df.dtypes.to_dict() + print(self.wlogtypes) + + datatypes = {} + for name, dtype in dlist.items(): + if name in self.wlogtypes: + datatypes[name] = self.wlogtypes[name] # keep as is + continue + + if name in (self.xname, self.yname, self.zname): + # force coordinates, first 3 columns, to be CONT + datatypes[name] = _LogType.CONT.value + continue + + if "Float" in str(dtype): + datatypes[name] = _LogType.CONT.value + elif "Int" in str(dtype): + datatypes[name] = _LogType.DISC.value + else: + raise RuntimeError( + f"Log type seems to be something else than Float or Int for {name}" + ) + return datatypes + + def _ensure_consistency_wlogtypes(self): + """Ensure that dataframe and wlogtypes are consistent. + + wlogtypes are on form {"GR": "CONT", "ZONES": "DISC", ...} + + The column data in the dataframe takes precedence; i.e. if a column is removed + in a pandas operation, then wlogtypes are adapted silently by removing the item + from the dict. + """ + # check first if a log is removed in the dataframe (e.g. by pandas operations) + for logname in list(self.wlogtypes.keys()): + if logname not in self.data.columns[3:]: + del self.wlogtypes[logname] + + self.wlogtypes = self._infer_log_dtypes() + + def _ensure_consistency_wlogrecords(self): + """Ensure that data and wloglogrecords are consistent; cf wlogtypes. + + Important that wlogtypes are correct; i.e. run _ensure_consistency_wlogtypes() + first. + """ + for logname, dtype in self.wlogtypes.items(): + if logname not in self.wlogrecords or not isinstance( + self.wlogrecords[logname], (dict, list, tuple) + ): + if dtype == _LogType.CONT.value: + self.wlogrecords[logname] = CONT_DEFAULT_WLOGRECORD + + if dtype == _LogType.DISC.value: + # it is a discrete log with missing record; try to find + # a default one based on current values... + lvalues = self.data[logname].values.round(decimals=0) + lmin = int(lvalues.min()) + lmax = int(lvalues.max()) + + lvalues = lvalues.astype("int") + codes = {} + for lval in range(lmin, lmax + 1): + if lval in lvalues: + codes[lval] = str(lval) + + self.wlogrecords[logname] = codes + + # correct when wlogtypes is CONT but wlogrecords for that entry is a dict + if ( + logname in self.wlogrecords + and self.wlogtypes[logname] == _LogType.CONT.value + ): + if isinstance(self.wlogrecords[logname], dict): + self.wlogrecords[logname] = CONT_DEFAULT_WLOGRECORD + + def _ensure_consistency_df_dtypes(self): + """Ensure that dataframe float32 for all logs, except for X Y Z -> float64.""" + + col = list(self.data) + + coords_dtypes = [str(entry) for entry in self.data[col[0:3]].dtypes] + + if not all(["float64" in entry for entry in coords_dtypes]): + self.data[col[0:3]] = self.data.iloc[:, 0:3].astype("float64") + + logs_dtypes = [str(entry) for entry in self.data[col[3:]].dtypes] + + if not all(["float32" in entry for entry in logs_dtypes]): + self.data[col[3:]] = self.data.iloc[:, 3:].astype("float32") + + def ensure_consistency(self): + """Ensure that data and wlog* are consistent. + + This is important for many operations on the dataframe, an should keep wlogtypes + and wlogrecords 'in sync' with the dataframe. + + * When adding one or columns to the dataframe + * When removing one or more columns from the dataframe + """ + + if list(self.data.columns[:3]) != [self.xname, self.yname, self.zname]: + raise ValueError( + f"Well dataframe must include '{self.xname}', '{self.yname}' " + f"and '{self.zname}', got {list(self.data.columns[:3])}" + ) + + self._ensure_consistency_wlogtypes() + self._ensure_consistency_wlogrecords() + self._ensure_consistency_df_dtypes() + + def set_wlogtype(self, name: str, wtype: str) -> None: + """Set a wlogtype for a named log. + + A bit flexibility is added for wtype, e.g. allowing "float*" for CONT etc, and + allow lowercase "cont" for CONT + + """ + + apply_wtype = wtype.upper() + if "FLOAT" in apply_wtype: + apply_wtype = "CONT" + if "INT" in apply_wtype: + apply_wtype = "DISC" + + if name not in self.wlogtypes: + raise ValueError(f"No such well log name present: {name}") + + if apply_wtype in _LogType: + self.wlogtypes[name] = _LogType(apply_wtype) + else: + raise ValueError( + f"Cannot set wlogtype as {wtype}, not in {list(_LogType.__members__)}" + ) + + self.ensure_consistency() + + def set_wlogrecord(self, name: str, record: dict) -> None: + """Set a wlogrecord for a named log.""" + + if name not in self.wlogtypes: + raise ValueError(f"No such well log name present: {name}") + + if self.wlogtypes[name] == _LogType.CONT.value and isinstance( + record, (list, tuple) + ): + if len(record) == 2: + self.wlogrecords[name] = tuple(record) # prefer as tuple + elif self.wlogtypes[name] == _LogType.CONT.value and isinstance(record, dict): + raise ValueError( + "Cannot set a log record for a continuous log: input record is " + "dictionary, not a list or tuple" + ) + elif self.wlogtypes[name] == _LogType.DISC.value and isinstance(record, dict): + self.wlogrecords[name] = record + elif self.wlogtypes[name] == _LogType.DISC.value and not isinstance( + record, dict + ): + raise ValueError( + "Cannot set a log record for a discrete log: input record is " + "not a dictionary" + ) + else: + raise ValueError("Something went wrong when setting logrecord.") + + self.ensure_consistency() diff --git a/src/xtgeo/well/well1.py b/src/xtgeo/well/well1.py index da3778921..27da6426d 100644 --- a/src/xtgeo/well/well1.py +++ b/src/xtgeo/well/well1.py @@ -1,11 +1,10 @@ # -*- coding: utf-8 -*- """XTGeo well module, working with one single well.""" -import functools +from __future__ import annotations + import io import math -import warnings -from collections import OrderedDict from copy import deepcopy from pathlib import Path from typing import Dict, List, Optional, Union @@ -17,30 +16,15 @@ import xtgeo import xtgeo.common.constants as const import xtgeo.cxtgeo._cxtgeo as _cxtgeo -from xtgeo import XTGeoCLibError +from xtgeo import XTGeoCLibError # type: ignore[attr-defined] -from . import _well_io, _well_oper, _well_roxapi, _wellmarkers +from . import _well_aux, _well_io, _well_oper, _well_roxapi, _welldata, _wellmarkers -xtg = xtgeo.common.XTGeoDialog() +xtg = xtgeo.XTGeoDialog() logger = xtg.functionlogger(__name__) - -# pylint: disable=too-many-public-methods - - # ====================================================================================== -# METHODS as wrappers to class init + import - - -def _data_reader_factory(file_format): - if file_format in ["rmswell", "irap_ascii"]: - return _well_io.import_rms_ascii - if file_format == "hdf": - return _well_io.import_hdf5_well - raise ValueError( - f"Unknown file format {file_format}, supported formats are " - "'rmswell', 'irap_ascii' and 'hdf'" - ) +# Functions, as wrappers to class methods def well_from_file( @@ -51,29 +35,25 @@ def well_from_file( lognames: Optional[Union[str, List[str]]] = "all", lognames_strict: Optional[bool] = False, strict: Optional[bool] = False, -) -> "Well": +) -> Well: """Make an instance of a Well directly from file import. - Note: - - rms_ascii is the only correct for wells from RMS. Irap did not have this - format. For maps and points, the formats from the old Irap tool is - applied in RMS, hence "irap_ascii" and "rms_ascii" are there the same. - Args: - wfile: File path, either a string or a pathlib.Path instance - fformat: See :meth:`Well.from_file` - mdlogname: Name of Measured Depth log if any + wfile: File path for well, either a string or a pathlib.Path instance + fformat: "rms_ascii" or "hdf5" + mdlogname: Name of Measured Depth log, if any zonelogname: Name of Zonelog, if any lognames: Name or list of lognames to import, default is "all" lognames_strict: If True, all lognames must be present. strict: If True, then import will fail if zonelogname or mdlogname are asked - for but not present in wells. + for but those names are not present in wells. Example:: >>> import xtgeo - >>> mywell = xtgeo.well_from_file(well_dir + "/OP_1.w") + >>> import pathlib + >>> welldir = pathlib.Path("../foo") + >>> mywell = xtgeo.well_from_file(welldir / "OP_1.w") .. versionchanged:: 2.1 Added ``lognames`` and ``lognames_strict`` .. versionchanged:: 2.1 ``strict`` now defaults to False @@ -98,19 +78,18 @@ def well_from_roxar( lognames_strict: Optional[bool] = False, inclmd: Optional[bool] = False, inclsurvey: Optional[bool] = False, -) -> "Well": +) -> xtgeo.Well: """This makes an instance of a Well directly from Roxar RMS. - Note this method works only when inside RMS, or when RMS license is - activated. + activated (through the roxar environment). Args: - project: Path to project or magic ``project`` variable in RMS. + project: Path to project or magic the ``project`` variable in RMS. name: Name of Well, as shown in RMS. trajectory: Name of trajectory in RMS. logrun: Name of logrun in RMS. - lognames: List of lognames to import or use 'all' for all present logs + lognames: List of lognames to import, or use 'all' for all present logs lognames_strict: If True and log is not in lognames is a list, an Exception will be raised. inclmd: If True, a Measured Depth log will be included. @@ -130,6 +109,7 @@ def well_from_roxar( .. versionchanged:: 2.1 lognames defaults to "all", not None """ + # TODO - mdlogname and zonelogname return Well._read_roxar( project, name, @@ -142,163 +122,71 @@ def well_from_roxar( ) -def allow_deprecated_init(func): - # This decorator is here to maintain backwards compatibility in the - # construction of Well and should be deleted once the deprecation period - # has expired, the construction will then follow the new pattern. - @functools.wraps(func) - def wrapper(self, *args, **kwargs): - if not args and not kwargs: - warnings.warn( - "Initializing empty well is deprecated, please provide " - "non-defaulted values, or use mywell = " - "xtgeo.well_from_file('filename')", - DeprecationWarning, - ) - return func( - self, - *([0.0] * 3), - "", - pd.DataFrame({"X_UTME": [], "Y_UTMN": [], "Z_TVDSS": []}), - ) - - # Checking if we are doing an initialization from file and raise a - # deprecation warning if we are. - if "wfile" in kwargs or ( - len(args) >= 1 and isinstance(args[0], (str, Path, xtgeo._XTGeoFile)) - ): - warnings.warn( - "Initializing directly from file name is deprecated and will be " - "removed in xtgeo version 4.0. Use: " - "mywell = xtgeo.well_from_file('filename') instead", - DeprecationWarning, - ) - if len(args) >= 1: - wfile = args[0] - args = args[1:] - else: - wfile = kwargs.pop("wfile", None) - if len(args) >= 1: - fformat = args[0] - args = args[1:] - else: - fformat = kwargs.pop("fformat", None) - - mfile = xtgeo._XTGeoFile(wfile) - if fformat is None or fformat == "guess": - fformat = mfile.detect_fformat() - else: - fformat = mfile.generic_format_by_proposal(fformat) - kwargs = _data_reader_factory(fformat)(mfile, *args, **kwargs) - kwargs["filesrc"] = mfile.file - return func(self, **kwargs) - return func(self, *args, **kwargs) - - return wrapper - - class Well: - """Class for a well in the XTGeo framework. + """Class for a single well in the XTGeo framework. The well logs are stored in a Pandas dataframe, which make manipulation easy and fast. - The well trajectory are here represented as logs, and XYZ have magic names: - ``X_UTME``, ``Y_UTMN``, ``Z_TVDSS``, which are the three first Pandas columns. + The well trajectory are here represented as first 3 columns in the dataframe, + and XYZ have pre-defined names: ``X_UTME``, ``Y_UTMN``, ``Z_TVDSS``. - Other geometry logs has also 'semi-magic' names: + Other geometry logs may has also 'semi-defined' names, but this is not a strict + rule: - M_MDEPTH or Q_MDEPTH: Measured depth, either real/true (M_xx) or + ``M_MDEPTH`` or ``Q_MDEPTH``: Measured depth, either real/true (M_xx) or quasi computed/estimated (Q_xx). The Quasi may be incorrect for all uses, but sufficient for some computations. - Similar for M_INCL, Q_INCL, M_AZI, Q_ASI. + Similar for ``M_INCL``, ``Q_INCL``, ``M_AZI``, ``Q_ASI``. All Pandas values (yes, discrete also!) are currently stored as float64 format, and undefined values are Nan. Integers are stored as Float due - to the (historic) lacking support for 'Integer Nan'. In coming versions, - use of ``pandas.NA`` (available from Pandas version 1.0) may be implemented. + to the (historic) lacking support for 'Integer Nan'. Note there is a method that can return a dataframe (copy) with Integer and Float columns, see :meth:`get_filled_dataframe`. - The instance can be made either from file or (todo!) by specification:: + The instance can be made either from file or by specification:: - >>> well1 = Well(well_dir + '/OP_1.w') # assume RMS ascii well - >>> well2 = Well(well_dir + '/OP_1.w', fformat='rms_ascii') - >>> well3 = xtgeo.well_from_file(well_dir + '/OP_1.w') + >>> well1 = xtgeo.well_from_file(well_dir + '/OP_1.w') + >>> well2 = xtgeo.Well(rkb=32.0, xpos=1234.0, ypos=4567.0, wname="Foo", + df: mydataframe, ...) Args: - rkb: well RKB height - xpos: well head X pos - ypos: well head Y pos + rkb: Well RKB height + xpos: Well head X pos + ypos: Well head Y pos wname: well name - df: pandas dataframe with log values, expects columns to include + df: A pandas dataframe with log values, expects columns to include 'X_UTME', 'Y_UTMN', 'Z_TVDSS' for x, y and z coordinates. Other columns should be log values. filesrc: source file if any - mdlogname: Name of Measured Depth log if any. + mdlogname: Name of Measured Depth log, if any. zonelogname: Name of Zonelog, if any - wlogtypes: dictionary of log types, 'DISC' or 'CONT', defaults to - to 'CONT'. + wlogtypes: dictionary of log types, 'DISC' (discrete) or 'CONT' (continuous), + defaults to to 'CONT'. wlogrecords: dictionary of codes for 'DISC' logs, None for no codes given, defaults to None. """ VALID_LOGTYPES = {"DISC", "CONT"} - @allow_deprecated_init + @_well_aux.allow_deprecated_init def __init__( self, - rkb: float, - xpos: float, - ypos: float, - wname: str, - df: pd.DataFrame, - mdlogname: str = None, - zonelogname: str = None, - wlogtypes: Dict[str, str] = None, - wlogrecords: Dict[str, str] = None, + rkb: float = 0.0, + xpos: float = 0.0, + ypos: float = 0.0, + wname: str = "", + df: Optional[pd.DataFrame] = None, + mdlogname: Optional[str] = None, + zonelogname: Optional[str] = None, + wlogtypes: Optional[Dict[str, str]] = None, + wlogrecords: Optional[Dict[str, str]] = None, filesrc: Optional[Union[str, Path]] = None, ): - if not all( - coordinate in df.columns for coordinate in ("X_UTME", "Y_UTMN", "Z_TVDSS") - ): - raise ValueError( - "Well dataframe must include 'X_UTME'," - f" 'Y_UTMN' and 'Z_TVDSS', got {df.columns}" - ) - self._reset( - rkb, - xpos, - ypos, - wname, - df, - filesrc, - mdlogname, - zonelogname, - wlogtypes, - wlogrecords, - ) - - def _reset( - self, - rkb: float = None, - xpos: float = None, - ypos: float = None, - wname: str = None, - df: pd.DataFrame = None, - filesrc: Optional[Union[str, Path]] = None, - mdlogname: str = None, - zonelogname: str = None, - wlogtypes: Dict[str, str] = None, - wlogrecords: Dict[str, str] = None, - ): - if wlogtypes is None: - wlogtypes = dict() - if wlogrecords is None: - wlogrecords = dict() - + # state variables from args self._rkb = rkb self._xpos = xpos self._ypos = ypos @@ -307,65 +195,39 @@ def _reset( self._mdlogname = mdlogname self._zonelogname = zonelogname - self._wlogtypes = wlogtypes - self._wlogrecords = wlogrecords + self._wdata = _welldata._WellData(df, wlogtypes, wlogrecords) + self._wlogtypes = self._wdata.wlogtypes + self._wlogrecords = self._wdata.wlogrecords + self._df = self._wdata.data - self._df = df - - self._wlognames = list(self._df.columns) + self._ensure_consistency() + # additional state variables self._metadata = xtgeo.MetaDataWell() self._metadata.required = self - self._ensure_consistency() + _reset = __init__ # workaround until deprecation .from_file(), etc are removed def __repr__(self): # noqa: D105 # should be able to newobject = eval(repr(thisobject)) myrp = ( - f"{self.__class__.__name__} (filesrc={self._filesrc!r}, " - f"name={self._wname!r}, ID={id(self)})" + f"{self.__class__.__name__} (rkb={self._rkb}, xpos={self._xpos}, " + f"ypos={self._ypos}, wname='{self._wname}', " + f"filesrc='{self._filesrc}', mdlogname='{self._mdlogname}', " + f"zonelogname='{self._zonelogname}', \nwlogtypes='{self._wlogtypes}', " + f"\nwlogrecords='{self._wlogrecords}', " + f"df=\n{repr(self._df)}))" ) + return myrp def __str__(self): # noqa: D105 # user friendly print return self.describe(flush=False) - def _ensure_consistency(self): # pragma: no coverage - """Ensure consistency within an object (private function). - - Consistency checking. As well log names are columns in the Pandas DF, - there are additional attributes per log that have to be "in sync". - """ - if self._df is None: - return - - self._wlognames = list(self._df.columns) - - for logname in self._wlognames: - if logname not in self._wlogtypes: - self._wlogtypes[logname] = "CONT" # continuous as default - self._wlogrecords[logname] = None # None as default - else: - if self._wlogtypes[logname] not in self.VALID_LOGTYPES: - self._wlogtypes[logname] = "CONT" - self._wlogrecords[logname] = None # None as default - - if logname not in self._wlogrecords: - if self._wlogtypes[logname] == "DISC": - # it is a discrete log with missing record; try to find - # a default one based on current values... - lvalues = self._df[logname].values.round(decimals=0) - lmin = int(lvalues.min()) - lmax = int(lvalues.max()) - - lvalues = lvalues.astype("int") - codes = {} - for lval in range(lmin, lmax + 1): - if lval in lvalues: - codes[lval] = str(lval) - - self._wlogrecords = codes + def _ensure_consistency(self): + # _well_aux.ensure_consistency(self) + self._wdata.ensure_consistency() # ================================================================================== # Properties @@ -457,7 +319,7 @@ def mdlogname(self): @mdlogname.setter def mdlogname(self, mname): - if mname in self._wlognames: + if mname in self.get_lognames(): self._mdlogname = mname else: self._mdlogname = None @@ -469,7 +331,7 @@ def zonelogname(self): @zonelogname.setter def zonelogname(self, zname): - if zname in self._wlognames: + if zname in self.get_lognames(): self._zonelogname = zname else: self._zonelogname = None @@ -477,12 +339,12 @@ def zonelogname(self, zname): @property def dataframe(self): """Returns or set the Pandas dataframe object for all logs.""" - return self._df + return self._wdata.data @dataframe.setter def dataframe(self, dfr): - self._df = dfr.copy() - self._ensure_consistency() + self._wdata.data = dfr.copy() + self._wdata.ensure_consistency() @property def nrow(self): @@ -503,7 +365,7 @@ def nlogs(self): def lognames_all(self): """list: Returns dataframe column names as list, including mandatory coords.""" self._ensure_consistency() - return self._wlognames + return self.get_lognames() @property def lognames(self): @@ -592,15 +454,15 @@ def from_file( else: fformat = wfile.generic_format_by_proposal(fformat) # default - kwargs = _data_reader_factory(fformat)(wfile, **kwargs) + kwargs = _well_aux._data_reader_factory(fformat)(wfile, **kwargs) self._reset(**kwargs) return self @classmethod def _read_file( cls, - wfile, - fformat="rms_ascii", + wfile: Union[str, Path], + fformat: Optional[str] = "rms_ascii", **kwargs, ): """Import well from file. @@ -640,7 +502,7 @@ def _read_file( else: fformat = wfile.generic_format_by_proposal(fformat) # default - kwargs = _data_reader_factory(fformat)(wfile, **kwargs) + kwargs = _well_aux._data_reader_factory(fformat)(wfile, **kwargs) return cls(**kwargs) def to_file( @@ -768,9 +630,9 @@ def to_roxar(self, *args, **kwargs): """Export (save/store) a well to a roxar project. Note this method works only when inside RMS, or when RMS license is - activated. + activated in terminal. - The current implementation will either update existing well names + The current implementation will either update the existing well (then well log array size must not change), or it will make a new well in RMS. Note: @@ -779,7 +641,7 @@ def to_roxar(self, *args, **kwargs): will not be saved until the user do an explicit project save action. Args: - project (str): Magic string 'project' or file path to project + project (str, object): Magic string 'project' or file path to project wname (str): Name of well, as shown in RMS. lognames (:obj:list or :obj:str): List of lognames to save, or use simply 'all' for current logs for this well. Default is 'all' @@ -814,16 +676,20 @@ def to_roxar(self, *args, **kwargs): realisation=realisation, ) - def get_wlogs(self) -> OrderedDict: + def get_lognames(self): + """Get the lognames for all logs.""" + return list(self._df.columns) + + def get_wlogs(self) -> dict: """Get a compound dictionary with well log metadata. - The result will be an Ordered dict on the form: + The result will be an dict on the form: ``{"X_UTME": ["CONT", None], ... "Facies": ["DISC", {1: "BG", 2: "SAND"}]}`` """ - res = OrderedDict() + res = dict() - for key in self._wlognames: + for key in self.get_lognames(): wtype = "CONT" wrecord = None if key in self._wlogtypes: @@ -835,7 +701,7 @@ def get_wlogs(self) -> OrderedDict: return res - def set_wlogs(self, wlogs: OrderedDict): + def set_wlogs(self, wlogs: dict): """Set a compound dictionary with well log metadata. This operation is somewhat risky as it may lead to inconsistency, so use with @@ -852,7 +718,7 @@ def set_wlogs(self, wlogs: OrderedDict): ValueError: Invalid log record found in input: """ - for key in self._wlognames: + for key in self.get_lognames(): if key in wlogs.keys(): typ, rec = wlogs[key] @@ -861,8 +727,10 @@ def set_wlogs(self, wlogs: OrderedDict): else: raise ValueError(f"Invalid log type found in input: {typ}") - if rec is None or isinstance(rec, dict): + if isinstance(rec, dict): self._wlogrecords[key] = deepcopy(rec) + elif not rec: + self._wlogrecords[key] = "" else: raise ValueError(f"Invalid log record found in input: {rec}") @@ -870,7 +738,7 @@ def set_wlogs(self, wlogs: OrderedDict): raise ValueError(f"Key for column not found in input: {key}") for key in wlogs.keys(): - if key not in self._wlognames: + if key not in self.get_lognames(): raise ValueError(f"Invalid input key found: {key}") self._ensure_consistency() @@ -883,7 +751,7 @@ def isdiscrete(self, logname): .. versionadded:: 2.2.0 """ - if logname in self._wlognames and self.get_logtype(logname) == "DISC": + if logname in self.get_lognames() and self.get_logtype(logname) == "DISC": return True return False @@ -970,24 +838,15 @@ def delete_log(self, lname): delete_logs = delete_log # alias function - def get_logtype(self, lname): - """Returns the type of a give log (e.g. DISC or CONT).""" - self._ensure_consistency() - - if lname in self._wlogtypes: - return self._wlogtypes[lname] + def get_logtype(self, lname) -> Optional[str]: + """Returns the type of a given log (e.g. DISC or CONT), None if not present.""" + if lname in self._wdata.wlogtypes: + return self._wdata.wlogtypes[lname] return None def set_logtype(self, lname, ltype): """Sets the type of a give log (e.g. DISC or CONT).""" - self._ensure_consistency() - - valid = {"DISC", "CONT"} - - if ltype in valid: - self._wlogtypes[lname] = ltype - else: - raise ValueError(f"Try to set invalid log type: {ltype}") + self._wdata.set_wlogtype(lname, ltype) def get_logrecord(self, lname): """Returns the record (dict) of a given log name, None if not exists.""" @@ -998,17 +857,7 @@ def get_logrecord(self, lname): def set_logrecord(self, lname, newdict): """Sets the record (dict) of a given discrete log.""" - self._ensure_consistency() - if lname not in self.lognames: - raise ValueError(f"No such logname: {lname}") - - if self._wlogtypes[lname] == "CONT": - raise ValueError("Cannot set a log record for a continuous log") - - if not isinstance(newdict, dict): - raise ValueError("Input is not a dictionary") - - self._wlogrecords[lname] = newdict + self._wdata.set_wlogrecord(lname, newdict) def get_logrecord_codename(self, lname, key): """Returns the name entry of a log record, for a given key. @@ -1024,24 +873,6 @@ def get_logrecord_codename(self, lname, key): return None - def get_carray(self, lname): - """Returns the C array pointer (via SWIG) for a given log. - - Type conversion is double if float64, int32 if DISC log. - Returns None of log does not exist. - """ - if lname in self._df: - np_array = self._df[lname].values - else: - return None - - if self.get_logtype(lname) == "DISC": - carr = self._convert_np_carr_int(np_array) - else: - carr = self._convert_np_carr_double(np_array) - - return carr - def get_filled_dataframe( self, fill_value=const.UNDEF, fill_value_int=const.UNDEF_INT ): @@ -1093,7 +924,7 @@ def create_relative_hlen(self): distance = [] previous_x, previous_y = xv[0], yv[0] - for i, (x, y) in enumerate(zip(xv, yv)): + for _, (x, y) in enumerate(zip(xv, yv)): distance.append(math.hypot((previous_x - x), (y - previous_y))) previous_x, previous_y = x, y @@ -1119,9 +950,9 @@ def geometrics(self): ) # extract numpies from XYZ trajetory logs - ptr_xv = self.get_carray("X_UTME") - ptr_yv = self.get_carray("Y_UTMN") - ptr_zv = self.get_carray("Z_TVDSS") + ptr_xv = _well_aux.get_carray(self, "X_UTME") + ptr_yv = _well_aux.get_carray(self, "Y_UTMN") + ptr_zv = _well_aux.get_carray(self, "Z_TVDSS") # get number of rows in pandas nlen = self.nrow diff --git a/tests/test_well/test_well.py b/tests/test_well/test_well.py index 48d476218..0869dcda9 100644 --- a/tests/test_well/test_well.py +++ b/tests/test_well/test_well.py @@ -302,30 +302,8 @@ def test_import_export_rmsasc(tmp_path, simple_well): print("Time for load RMSASC: ", xtg.timer(t0)) -def test_get_carr(simple_well): - """Get a C array pointer""" - - mywell = simple_well - - dummy = mywell.get_carray("NOSUCH") - - assert dummy is None, "Wrong log name" - - cref = mywell.get_carray("X_UTME") - - xref = str(cref) - - assert "Swig" in xref and "double" in xref, "carray from log name, double" - - cref = mywell.get_carray("Zonelog") - - xref = str(cref) - - assert "Swig" in xref and "int" in xref, "carray from log name, int" - - def test_create_and_delete_logs(loadwell3): - """Test create adn delete logs.""" + """Test create and delete logs, using explicit create_log() and delete_log().""" mywell = loadwell3 status = mywell.create_log("NEWLOG") @@ -347,6 +325,27 @@ def test_create_and_delete_logs(loadwell3): assert ndeleted == 2 +def test_create_and_delete_logs_implicit(loadwell3): + """Test create and delete logs, using implicit dataframe operations.""" + mywell = loadwell3 + + mywell.dataframe["NEWLOG"] = 1234.0 + assert mywell.dataframe.NEWLOG.mean() == 1234.0 + assert "NEWLOG" in mywell.get_lognames() + + # status = mywell.create_log("NEWLOG", force=True, value=200) + # assert status is True + # assert mywell.dataframe.NEWLOG.mean() == 200.0 + + # ndeleted = mywell.delete_log("NEWLOG") + + # assert ndeleted == 1 + # status = mywell.create_log("NEWLOG", force=True, value=200) + + # ndeleted = mywell.delete_log(["NEWLOG", "GR"]) + # assert ndeleted == 2 + + def test_get_set_wlogs(loadwell3): """Test on getting ans setting a dictionary with some log attributes.""" mywell = loadwell3 @@ -354,7 +353,7 @@ def test_get_set_wlogs(loadwell3): mydict = mywell.get_wlogs() print(mydict) - assert isinstance(mydict, OrderedDict) + assert isinstance(mydict, dict) assert mydict["X_UTME"][0] == "CONT" assert mydict["ZONELOG"][0] == "DISC" diff --git a/tests/test_well/test_welldata_class.py b/tests/test_well/test_welldata_class.py new file mode 100644 index 000000000..091d51b78 --- /dev/null +++ b/tests/test_well/test_welldata_class.py @@ -0,0 +1,155 @@ +import pandas as pd +import pytest + +from xtgeo.well._welldata import _LogType, _WellData + + +@pytest.fixture(name="generate_data") +def fixture_generate_data() -> pd.DataFrame: + """Make a test dataframe""" + + data = { + "X_UTME": [1.3, 2.0, 3.0, 4.0, 5.2, 6.0, 9.0], + "Y_UTMN": [11.0, 21.0, 31.0, 41.1, 51.0, 61.0, 91.0], + "Z_TVDSS": [21.0, 22.0, 23.0, 24.0, 25.3, 26.0, 29.0], + "MDEPTH": [13.0, 23.0, 33.0, 43.0, 53.2, 63.0, 93.0], + "GR": [133.0, 2234.0, -999, 1644.0, 2225.5, 6532.0, 92.0], + "FACIES": [1, -999, 3, 4, 4, 1, 1], + "ZONES": [1, 2, 3, 3, 3, 4, -999], + } + + return pd.DataFrame(data) + + +def test_welldata_initialize(generate_data: pd.DataFrame): + """Initialize data with no wlogrecords and wlogtypes given. + + The init shall than then try to infer 'best' guess""" + + instance = _WellData(generate_data) + + assert instance.data.columns[0] == instance.xname + assert instance.data.columns[2] == instance.zname + + +def test_welldata_ensure_wlogtypes(generate_data: pd.DataFrame): + """Testing private method _ensure_wlogtypes""" + + instance = _WellData(generate_data) + assert "FACIES" in instance.data.columns + + del instance.data["FACIES"] + + instance._ensure_consistency_wlogtypes() + assert "FACIES" not in instance.data.columns + + instance.data["NEW"] = 1 + instance._ensure_consistency_wlogtypes() + assert "NEW" in instance.data.columns + assert "NEW" in instance.wlogtypes + + +def test_infer_log_dtypes(generate_data: pd.DataFrame): + """Testing private method _infer_log_dtypes""" + + instance = _WellData(generate_data) + + instance.wlogtypes = {} # for testing, make wlogtypes empty + + res = instance._infer_log_dtypes() + assert res["X_UTME"] == "CONT" + assert res["FACIES"] == "DISC" + + # next, FACIES is predefined in wlogtypes prior to parsing; here as CONT + # which shall 'win' in this setting + instance.wlogtypes = {"FACIES": "CONT"} + res = instance._infer_log_dtypes() + assert res["X_UTME"] == "CONT" + assert res["FACIES"] == "CONT" + + +def test_ensure_dataframe_dtypes(generate_data: pd.DataFrame): + """Testing private method _ensure_cosistency_df_dtypes""" + + instance = _WellData(generate_data) + + assert instance.data["FACIES"].dtype == "float32" + instance.data["FACIES"] = instance.data["FACIES"].astype("int32") + assert instance.data["FACIES"].dtype == "int32" + + instance._ensure_consistency_df_dtypes() + assert instance.data["FACIES"].dtype == "float32" + + +def test_welldata_consistency_add_column(generate_data: pd.DataFrame): + """Add a column to the dataframe; check if wlogtypes and wlogrecords are updated.""" + + instance = _WellData(generate_data) + + assert instance.wlogtypes == { + "X_UTME": "CONT", + "Y_UTMN": "CONT", + "Z_TVDSS": "CONT", + "MDEPTH": "CONT", + "GR": "CONT", + "FACIES": "DISC", + "ZONES": "DISC", + } + + instance.data["NEW"] = 1.992 + instance.ensure_consistency() + + assert instance.wlogtypes == { + "X_UTME": "CONT", + "Y_UTMN": "CONT", + "Z_TVDSS": "CONT", + "MDEPTH": "CONT", + "GR": "CONT", + "FACIES": "DISC", + "ZONES": "DISC", + "NEW": "CONT", + } + + instance.data["DNEW"] = [1, -999, 3, 4, 4, 1, 1] + instance.ensure_consistency() + + assert instance.wlogtypes == { + "X_UTME": "CONT", + "Y_UTMN": "CONT", + "Z_TVDSS": "CONT", + "MDEPTH": "CONT", + "GR": "CONT", + "FACIES": "DISC", + "ZONES": "DISC", + "NEW": "CONT", + "DNEW": "DISC", + } + + assert instance.wlogrecords == { + "X_UTME": None, + "Y_UTMN": None, + "Z_TVDSS": None, + "MDEPTH": None, + "GR": None, + "FACIES": {-999: "-999", 1: "1", 3: "3", 4: "4"}, + "ZONES": {-999: "-999", 1: "1", 2: "2", 3: "3", 4: "4"}, + "NEW": None, + "DNEW": {-999: "-999", 1: "1", 3: "3", 4: "4"}, + } + + +def test_logtype_class(): + """Test the ENUM type _LogClass""" + + assert _LogType.DISC.value == "DISC" + assert _LogType.CONT.value == "CONT" + + assert "CONT" in _LogType + assert "DISC" in _LogType + assert "FOO" not in _LogType + + assert _LogType("DISC") + assert _LogType("CONT") + + with pytest.raises(ValueError, match="is not a valid"): + _LogType("FOO") From ab08c54c8033ed5a462d7ab6df39ae25f2afd08a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Thu, 28 Sep 2023 16:54:30 +0200 Subject: [PATCH 08/13] WIP --- src/xtgeo/well/_welldata.py | 32 ++++++++++++++++++----- tests/test_well/test_welldata_class.py | 35 ++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/src/xtgeo/well/_welldata.py b/src/xtgeo/well/_welldata.py index a6b1ea31f..2aa41aaa2 100644 --- a/src/xtgeo/well/_welldata.py +++ b/src/xtgeo/well/_welldata.py @@ -8,15 +8,17 @@ 4 463256.916 5930542.302 -46.1533 2.0000 NaN NaN NaN ... ... ... ... ... ... ... ... -Where each log beside the 3 first has a wlogtypes dictoniary, telling if the logs are +Where each log has a wlogtypes dictionary, telling if the logs are treated as discrete (DISC) or continuous (CONT). In addition there is a wlogrecords dict, storing the unit for continuous logs (defaulted to None) or a dictionary of codes if the log in DISC type. -The purpose here is to automate; if a column is added to the dataframe, then the -class methods here will try to guess the wlogtype and wlogtype, and add those; similarly -of a column is removed, the corresponding entries in wlogtypes and wlogrecords will be -deleted. +The 3 first columsn are the coordinates. + +The purpose here is to automate and improve; if a column is added to the dataframe, then +the class methods here will try to guess the wlogtype and wlogtype, and add those; +similarly of a column is removed, the corresponding entries in wlogtypes and wlogrecords +will be deleted. """ from dataclasses import dataclass, field from enum import Enum, EnumMeta, unique @@ -25,7 +27,7 @@ class methods here will try to guess the wlogtype and wlogtype, and add those; s class _LogTypeMeta(EnumMeta): - """For 'in' method, cf https://stackoverflow.com/questions/43634618""" + """For enabling 'in' method, cf https://stackoverflow.com/questions/43634618""" def __contains__(cls, item): try: @@ -252,3 +254,21 @@ def set_wlogrecord(self, name: str, record: dict) -> None: raise ValueError("Something went wrong when setting logrecord.") self.ensure_consistency() + + def get_dataframe_copy(self, infer_dtype: bool = False): + """Get a deep copy of the dataframe, with options. + + If infer_dtype is True, then DISC columns will be of "int32" type + """ + + if infer_dtype: + dfr = self.data.copy() + + for name, wtype in self.wlogtypes.items(): + if "DISC" in wtype: + dfr[name] = dfr[name].astype("int32") + + return dfr + + else: + return self.data.copy() diff --git a/tests/test_well/test_welldata_class.py b/tests/test_well/test_welldata_class.py index 091d51b78..0e330e9bc 100644 --- a/tests/test_well/test_welldata_class.py +++ b/tests/test_well/test_welldata_class.py @@ -153,3 +153,38 @@ def test_logtype_class(): with pytest.raises(ValueError, match="is not a valid"): _LogType("FOO") + + +def test_welldata_dataframe_copy(generate_data: pd.DataFrame): + """Test get dataframe method, with option""" + + instance = _WellData(generate_data) + + copy = instance.get_dataframe_copy() + col = list(copy) + + dtypes = [str(entry) for entry in copy[col].dtypes] + assert dtypes == [ + "float64", + "float64", + "float64", + "float32", + "float32", + "float32", + "float32", + ] + + copy = instance.get_dataframe_copy(infer_dtype=True) + + dtypes = [str(entry) for entry in copy[col].dtypes] + assert dtypes == [ + "float64", + "float64", + "float64", + "float32", + "float32", + "int32", + "int32", + ] + + instance.data = From 04c8c925c34250725b250f24f6a9f8db227cbc84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Thu, 28 Sep 2023 16:54:50 +0200 Subject: [PATCH 09/13] WIP --- src/xtgeo/well/_welldata.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/xtgeo/well/_welldata.py b/src/xtgeo/well/_welldata.py index 2aa41aaa2..634fa9207 100644 --- a/src/xtgeo/well/_welldata.py +++ b/src/xtgeo/well/_welldata.py @@ -272,3 +272,21 @@ def get_dataframe_copy(self, infer_dtype: bool = False): else: return self.data.copy() + + def get_dataframe(self): + """Get the dataframe, . + + If infer_dtype is True, then DISC columns will be of "int32" type + """ + + if infer_dtype: + dfr = self.data.copy() + + for name, wtype in self.wlogtypes.items(): + if "DISC" in wtype: + dfr[name] = dfr[name].astype("int32") + + return dfr + + else: + return self.data.copy() From 2aef78a58bc5102989d4e60216e02c55442ae5cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Fri, 29 Sep 2023 15:54:44 +0200 Subject: [PATCH 10/13] WIP --- src/xtgeo/well/_well_aux.py | 90 +++++----- src/xtgeo/well/_well_io.py | 6 +- src/xtgeo/well/_well_oper.py | 49 ++---- src/xtgeo/well/_well_roxapi.py | 48 +++--- src/xtgeo/well/_welldata.py | 221 ++++++++++++++++++++++--- src/xtgeo/well/well1.py | 220 ++++++------------------ tests/test_well/test_welldata_class.py | 12 +- 7 files changed, 344 insertions(+), 302 deletions(-) diff --git a/src/xtgeo/well/_well_aux.py b/src/xtgeo/well/_well_aux.py index 12c8eda83..338152626 100644 --- a/src/xtgeo/well/_well_aux.py +++ b/src/xtgeo/well/_well_aux.py @@ -92,48 +92,48 @@ def wrapper(self, *args, **kwargs): return wrapper -def ensure_consistency(self): - """Ensure consistency within an object (private function). - - Consistency checking. As well log names are columns in the Pandas DF, - there are additional attributes per log that have to be "in sync". - """ - if not all( - coordinate in self._df.columns for coordinate in ("X_UTME", "Y_UTMN", "Z_TVDSS") - ): - raise ValueError( - "Well dataframe must include 'X_UTME'," - f" 'Y_UTMN' and 'Z_TVDSS', got {self._df.columns}" - ) - if self._wlogtypes is None: - self._wlogtypes = dict() - if self._wlogrecords is None: - self._wlogrecords = dict() - - for logname in self.get_lognames(): - if logname not in self._wlogtypes: - self._wlogtypes[logname] = "CONT" # continuous as default - self._wlogrecords[logname] = None # None as default - else: - if self._wlogtypes[logname] not in self.VALID_LOGTYPES: - self._wlogtypes[logname] = "CONT" - self._wlogrecords[logname] = None # None as default - - if logname not in self._wlogrecords: - if self._wlogtypes[logname] == "DISC": - # it is a discrete log with missing record; try to find - # a default one based on current values... - lvalues = self._df[logname].values.round(decimals=0) - lmin = int(lvalues.min()) - lmax = int(lvalues.max()) - - lvalues = lvalues.astype("int") - codes = {} - for lval in range(lmin, lmax + 1): - if lval in lvalues: - codes[lval] = str(lval) - - self._wlogrecords = codes +# def ensure_consistency(self): +# """Ensure consistency within an object (private function). + +# Consistency checking. As well log names are columns in the Pandas DF, +# there are additional attributes per log that have to be "in sync". +# """ +# if not all( +# coordinate in self._df.columns for coordinate in ("X_UTME", "Y_UTMN", "Z_TVDSS") +# ): +# raise ValueError( +# "Well dataframe must include 'X_UTME'," +# f" 'Y_UTMN' and 'Z_TVDSS', got {self._df.columns}" +# ) +# if self._wlogtypes is None: +# self._wlogtypes = dict() +# if self._wlogrecords is None: +# self._wlogrecords = dict() + +# for logname in self.get_lognames(): +# if logname not in self._wlogtypes: +# self._wlogtypes[logname] = "CONT" # continuous as default +# self._wlogrecords[logname] = None # None as default +# else: +# if self._wlogtypes[logname] not in self.VALID_LOGTYPES: +# self._wlogtypes[logname] = "CONT" +# self._wlogrecords[logname] = None # None as default + +# if logname not in self._wlogrecords: +# if self._wlogtypes[logname] == "DISC": +# # it is a discrete log with missing record; try to find +# # a default one based on current values... +# lvalues = self._df[logname].values.round(decimals=0) +# lmin = int(lvalues.min()) +# lmax = int(lvalues.max()) + +# lvalues = lvalues.astype("int") +# codes = {} +# for lval in range(lmin, lmax + 1): +# if lval in lvalues: +# codes[lval] = str(lval) + +# self._wlogrecords = codes def _convert_np_carr_int(self, np_array: npt.NDArray[np.int_]) -> Any: @@ -164,7 +164,7 @@ def _convert_carr_double_np( ) -> npt.NDArray[np.float64]: """Convert a C array (SWIG pointer) to numpy, assuming double type.""" if nlen is None: - nlen = len(self._df.index) + nlen = len(self._wdata.data.index) nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) @@ -177,8 +177,8 @@ def get_carray(self, lname: str) -> Optional[Any]: Type conversion is double if float64, int32 if DISC log. Returns None if log does not exist. """ - if lname in self._df: - np_array = self._df[lname].values + if lname in self._wdata.data: + np_array = self._wdata.data[lname].values else: return None diff --git a/src/xtgeo/well/_well_io.py b/src/xtgeo/well/_well_io.py index c15305725..42e798ab2 100644 --- a/src/xtgeo/well/_well_io.py +++ b/src/xtgeo/well/_well_io.py @@ -198,7 +198,7 @@ def _check_special_logs(dfr, mdlogname, zonelogname, strict, wname): def export_rms_ascii(self, wfile, precision=4): """Export to RMS well format.""" - with open(wfile, "w") as fwell: + with open(wfile, "w", encoding="utf-8") as fwell: print("1.0", file=fwell) print("Unknown", file=fwell) if self._rkb is None: @@ -221,7 +221,7 @@ def export_rms_ascii(self, wfile, precision=4): print(f"{lname} {self._wlogtypes[lname]} {usewrec}", file=fwell) # now export all logs as pandas framework - tmpdf = self._df.copy() + tmpdf = self._wdata.data.copy() tmpdf.fillna(value=-999, inplace=True) # make the disc as is np.int @@ -261,7 +261,7 @@ def export_hdf5_well(self, wfile, compression="lzf"): with pd.HDFStore(wfile.file, "w", complevel=complevel, complib=complib) as store: logger.info("export to HDF5 %s", wfile.name) - store.put("Well", self._df) + store.put("Well", self._wdata.data) store.get_storer("Well").attrs["metadata"] = jmeta store.get_storer("Well").attrs["provider"] = "xtgeo" store.get_storer("Well").attrs["format_idcode"] = 1401 diff --git a/src/xtgeo/well/_well_oper.py b/src/xtgeo/well/_well_oper.py index ccfd890a8..81d675158 100644 --- a/src/xtgeo/well/_well_oper.py +++ b/src/xtgeo/well/_well_oper.py @@ -17,36 +17,6 @@ logger = xtg.functionlogger(__name__) -def delete_log(self, lname): - """Delete/remove an existing log, or list of logs.""" - self._ensure_consistency() - - if not isinstance(lname, list): - lname = [lname] - - lcount = 0 - for logn in lname: - if logn not in self.get_lognames(): - logger.info("Log does no exist: %s", logn) - continue - - logger.info("Log exist and will be deleted: %s", logn) - lcount += 1 - del self._wlogtypes[logn] - del self._wlogrecords[logn] - - self._df.drop(logn, axis=1, inplace=True) - self._ensure_consistency() - - if self._mdlogname == logn: - self._mdlogname = None - if self._zonelogname == logn: - self._zonelogname = None - - self._ensure_consistency() - return lcount - - def rescale(self, delta=0.15, tvdrange=None): """Rescale by using a new MD increment. @@ -55,15 +25,18 @@ def rescale(self, delta=0.15, tvdrange=None): pdrows = pd.options.display.max_rows pd.options.display.max_rows = 999 - dfrcolumns0 = self._df.columns + # operate on a copy of the well + wcopy = self.copy() + + dfrcolumns0 = list(wcopy._wdata.data) - if self.mdlogname is None: - self.geometrics() + if wcopy.mdlogname is None: + wcopy.geometrics() - dfrcolumns1 = self._df.columns + dfrcolumns1 = list(wcopy._wdata.data) columnsadded = list(set(dfrcolumns1) - set(dfrcolumns0)) # new tmp columns, if any - dfr = self._df.copy().set_index(self.mdlogname) + dfr = wcopy._wdata.data.copy().set_index(self.mdlogname) logger.debug("Initial dataframe\n %s", dfr) @@ -76,12 +49,12 @@ def rescale(self, delta=0.15, tvdrange=None): tvd1, tvd2 = tvdrange try: - startt = dfr.index[dfr["Z_TVDSS"] >= tvd1][0] + startt = dfr.index[dfr[wcopy._wdata.zname] >= tvd1][0] except IndexError: startt = start try: - stopt = dfr.index[dfr["Z_TVDSS"] >= tvd2][0] + stopt = dfr.index[dfr[wcopy._wdata.zname] >= tvd2][0] except IndexError: stopt = stop @@ -111,7 +84,7 @@ def rescale(self, delta=0.15, tvdrange=None): pd.options.display.max_rows = pdrows # reset - self._df = dfr + self.dataframe = dfr if columnsadded: self.delete_log(columnsadded) diff --git a/src/xtgeo/well/_well_roxapi.py b/src/xtgeo/well/_well_roxapi.py index 8f96b758f..f43edc6f4 100644 --- a/src/xtgeo/well/_well_roxapi.py +++ b/src/xtgeo/well/_well_roxapi.py @@ -15,7 +15,7 @@ xtg = XTGeoDialog() logger = xtg.functionlogger(__name__) -# Well() instance self = xwell1 +# Well() instance: self # Import from ROX api @@ -155,7 +155,7 @@ def _get_roxlog(wlogtypes, wlogrecords, roxlrun, lname): # pragma: no cover def export_well_roxapi( - xwell1, + self, project, wname, lognames="all", @@ -168,7 +168,7 @@ def export_well_roxapi( rox = RoxUtils(project, readonly=False) - _roxapi_export_well(xwell1, rox, wname, lognames, logrun, trajectory, realisation) + _roxapi_export_well(self, rox, wname, lognames, logrun, trajectory, realisation) if rox._roxexternal: rox.project.save() @@ -176,18 +176,14 @@ def export_well_roxapi( rox.safe_close() -def _roxapi_export_well(xwell1, rox, wname, lognames, logrun, trajectory, realisation): +def _roxapi_export_well(self, rox, wname, lognames, logrun, trajectory, realisation): if wname in rox.project.wells: - _roxapi_update_well( - xwell1, rox, wname, lognames, logrun, trajectory, realisation - ) + _roxapi_update_well(self, rox, wname, lognames, logrun, trajectory, realisation) else: - _roxapi_create_well( - xwell1, rox, wname, lognames, logrun, trajectory, realisation - ) + _roxapi_create_well(self, rox, wname, lognames, logrun, trajectory, realisation) -def _roxapi_update_well(xwell1, rox, wname, lognames, logrun, trajectory, realisation): +def _roxapi_update_well(self, rox, wname, lognames, logrun, trajectory, realisation): """Assume well is to updated only with logs, new or changed. Also, the length of arrays should not change, at least not for now. @@ -202,14 +198,14 @@ def _roxapi_update_well(xwell1, rox, wname, lognames, logrun, trajectory, realis lrun.log_curves.clear() if lognames == "all": - uselognames = xwell1.lognames + uselognames = self.lognames else: uselognames = lognames for lname in uselognames: isdiscrete = False xtglimit = xtgeo.UNDEF_LIMIT - if xwell1._wlogtypes[lname] == "DISC": + if self._wlogtypes[lname] == "DISC": isdiscrete = True xtglimit = xtgeo.UNDEF_INT_LIMIT @@ -220,12 +216,12 @@ def _roxapi_update_well(xwell1, rox, wname, lognames, logrun, trajectory, realis values = thelog.generate_values() - if values.size != xwell1.dataframe[lname].values.size: + if values.size != self.dataframe[lname].values.size: raise ValueError("New logs have different sampling or size, not possible") usedtype = values.dtype - vals = np.ma.masked_invalid(xwell1.dataframe[lname].values) + vals = np.ma.masked_invalid(self.dataframe[lname].values) vals = np.ma.masked_greater(vals, xtglimit) vals = vals.astype(usedtype) thelog.set_values(vals) @@ -237,12 +233,12 @@ def _roxapi_update_well(xwell1, rox, wname, lognames, logrun, trajectory, realis else: codedict = { int(key): str(value) - for key, value in xwell1._wlogrecords[lname].items() + for key, value in self._wlogrecords[lname].items() } thelog.set_code_names(codedict) -def _roxapi_create_well(xwell1, rox, wname, lognames, logrun, trajectory, realisation): +def _roxapi_create_well(self, rox, wname, lognames, logrun, trajectory, realisation): """Save Well() instance to a new well in RMS. From version 2.15. @@ -250,15 +246,15 @@ def _roxapi_create_well(xwell1, rox, wname, lognames, logrun, trajectory, realis logger.debug("Key realisation is not supported: %s", realisation) roxwell = rox.project.wells.create(wname) - roxwell.rkb = xwell1.rkb - roxwell.wellhead = (xwell1.xpos, xwell1.ypos) + roxwell.rkb = self.rkb + roxwell.wellhead = (self.xpos, self.ypos) traj = roxwell.wellbore.trajectories.create(trajectory) series = traj.survey_point_series - east = xwell1.dataframe["X_UTME"].values - north = xwell1.dataframe["Y_UTMN"].values - tvd = xwell1.dataframe["Z_TVDSS"].values + east = self.dataframe["X_UTME"].values + north = self.dataframe["Y_UTMN"].values + tvd = self.dataframe["Z_TVDSS"].values values = np.array([east, north, tvd]).transpose() series.set_points(values) @@ -268,8 +264,8 @@ def _roxapi_create_well(xwell1, rox, wname, lognames, logrun, trajectory, realis lrun.set_measured_depths(md) # Add log curves - for curvename, curveprop in xwell1.get_wlogs().items(): - if curvename not in xwell1.lognames: + for curvename, curveprop in self.get_wlogs().items(): + if curvename not in self.lognames: continue # skip X_UTME .. Z_TVDSS if lognames and lognames != "all" and curvename not in lognames: continue @@ -283,12 +279,12 @@ def _roxapi_create_well(xwell1, rox, wname, lognames, logrun, trajectory, realis if curveprop[0] == "DISC": lcurve = lrun.log_curves.create_discrete(cname) - cc = np.ma.masked_invalid(xwell1.dataframe[curvename].values) + cc = np.ma.masked_invalid(self.dataframe[curvename].values) lcurve.set_values(cc.astype(np.int32)) codedict = {int(key): str(value) for key, value in curveprop[1].items()} lcurve.set_code_names(codedict) else: lcurve = lrun.log_curves.create(cname) - lcurve.set_values(xwell1.dataframe[curvename].values) + lcurve.set_values(self.dataframe[curvename].values) logger.info("Log curve created: %s", cname) diff --git a/src/xtgeo/well/_welldata.py b/src/xtgeo/well/_welldata.py index 634fa9207..3709cdff6 100644 --- a/src/xtgeo/well/_welldata.py +++ b/src/xtgeo/well/_welldata.py @@ -8,8 +8,8 @@ 4 463256.916 5930542.302 -46.1533 2.0000 NaN NaN NaN ... ... ... ... ... ... ... ... -Where each log has a wlogtypes dictionary, telling if the logs are -treated as discrete (DISC) or continuous (CONT). In addition there is a wlogrecords +Where each log has a wlogtypes dictionary, telling if the logs are treated +as discrete (DISC) or continuous (CONT). In addition there is a wlogrecords dict, storing the unit for continuous logs (defaulted to None) or a dictionary of codes if the log in DISC type. @@ -20,18 +20,26 @@ similarly of a column is removed, the corresponding entries in wlogtypes and wlogrecords will be deleted. """ +import math from dataclasses import dataclass, field from enum import Enum, EnumMeta, unique +import numpy as np import pandas as pd +import xtgeo.common.constants as const +from xtgeo import XTGeoCLibError # type: ignore[attr-defined] +from xtgeo.cxtgeo import _cxtgeo + +from . import _well_aux + class _LogTypeMeta(EnumMeta): """For enabling 'in' method, cf https://stackoverflow.com/questions/43634618""" def __contains__(cls, item): try: - cls(item) + cls(item) # pylint: disable=E1120 except ValueError: return False else: @@ -65,6 +73,16 @@ class _WellData: {"PHIT": ("unit", "scale"), "FACIES": {0:BG, 2: "SST", 4: "CALC"}} Hence the CONT logs hold a tuple or list with 2 str members, or None, while DISC log holds a dict where the key is an int and the value is a string. + + Note:: + + Callers shall not use properties, but methods, e.g.:: + + instance.well = some_new_dataframe # not + + but:: + + instance.set_dataframe(some_new_dataframe) """ data: pd.DataFrame @@ -255,38 +273,201 @@ def set_wlogrecord(self, name: str, record: dict) -> None: self.ensure_consistency() - def get_dataframe_copy(self, infer_dtype: bool = False): + def get_dataframe_copy( + self, + infer_dtype: bool = False, + filled=False, + fill_value=const.UNDEF, + fill_value_int=const.UNDEF_INT, + ): """Get a deep copy of the dataframe, with options. If infer_dtype is True, then DISC columns will be of "int32" type """ - + dfr = self.data.copy() if infer_dtype: - dfr = self.data.copy() - for name, wtype in self.wlogtypes.items(): if "DISC" in wtype: dfr[name] = dfr[name].astype("int32") - return dfr + if filled: + dfill = {} + for lname in self.data: + if "DISC" in self.wlogtypes[lname]: + dfill[lname] = fill_value_int + else: + dfill[lname] = fill_value - else: - return self.data.copy() + dfr = dfr.fillna(dfill) + + return dfr def get_dataframe(self): - """Get the dataframe, . + """Get the dataframe.""" + return self.data + + def set_dataframe(self, dfr): + """Set the dataframe in a controlled manner, shall be used""" + # TODO: more checks, and possibly acceptance of lists, dicts? + if isinstance(dfr, pd.DataFrame): + self.data = dfr + else: + raise ValueError("Input dfr is not a pandas dataframe") + self.ensure_consistency() + + def rename_log(self, lname, newname): + """Rename a log, e.g. Poro to PORO.""" + + if lname not in list(self.data): + raise ValueError("Input log does not exist") + + if newname in list(self.data): + raise ValueError("New log name exists already") + + # rename in dataframe + self.data.rename(index=str, columns={lname: newname}, inplace=True) + + self.wlogtypes[newname] = self.wlogtypes.pop(lname) + self.wlogrecords[newname] = self.wlogrecords.pop(lname) + + self.ensure_consistency() + + def create_log( + self, lname, logtype="CONT", logrecord=None, value=0.0, force=True + ) -> bool: + """Create a new log.""" + + if lname in list(self.data) and force is False: + return False + + self.wlogtypes[lname] = logtype + self.wlogrecords[lname] = logrecord + + # make a new column + self.data[lname] = float(value) + self.ensure_consistency() + return True + + def delete_log(self, lname): + """Delete/remove an existing log, or list of logs.""" + if not isinstance(lname, list): + lname = [lname] + + lcount = 0 + for logn in lname: + if logn not in list(self.data): + continue + + lcount += 1 + self.data.drop(logn, axis=1, inplace=True) + + self.ensure_consistency() + + return lcount + + def create_relative_hlen(self): + """Make a relative length of a well, as a log.""" + # extract numpies from XYZ trajectory logs + xv = self.data[self.xname].values + yv = self.data[self.yname].values + + distance = [] + previous_x, previous_y = xv[0], yv[0] + for _, (x, y) in enumerate(zip(xv, yv)): + distance.append(math.hypot((previous_x - x), (y - previous_y))) + previous_x, previous_y = x, y + + self.data["R_HLEN"] = pd.Series(np.cumsum(distance), index=self.data.index) + self.ensure_consistency() + + def geometrics(self): + """Compute some well geometrical arrays MD, INCL, AZI, as logs. + + These are kind of quasi measurements hence the logs will named + with a Q in front as Q_MDEPTH, Q_INCL, and Q_AZI. + + These logs will be added to the dataframe. If the mdlogname + attribute does not exist in advance, it will be set to 'Q_MDEPTH'. + + Returns: + False if geometrics cannot be computed - If infer_dtype is True, then DISC columns will be of "int32" type """ + # TODO: rewrite in pure python? + if self.data.shape[0] < 3: + raise ValueError( + f"Cannot compute geometrics. Not enough " + f"trajectory points (need >3, have: {self.data.shape[0]})" + ) - if infer_dtype: - dfr = self.data.copy() + # extract numpies from XYZ trajetory logs + ptr_xv = _well_aux.get_carray(self, "X_UTME") + ptr_yv = _well_aux.get_carray(self, "Y_UTMN") + ptr_zv = _well_aux.get_carray(self, "Z_TVDSS") - for name, wtype in self.wlogtypes.items(): - if "DISC" in wtype: - dfr[name] = dfr[name].astype("int32") + # get number of rows in pandas + nlen = len(self.data.index) - return dfr + ptr_md = _cxtgeo.new_doublearray(nlen) + ptr_incl = _cxtgeo.new_doublearray(nlen) + ptr_az = _cxtgeo.new_doublearray(nlen) - else: - return self.data.copy() + ier = _cxtgeo.well_geometrics( + nlen, ptr_xv, ptr_yv, ptr_zv, ptr_md, ptr_incl, ptr_az, 0 + ) + + if ier != 0: + raise XTGeoCLibError(f"well_geometrics failed with error code: {ier}") + + dnumpy = self._convert_carr_double_np(ptr_md) + self.data["Q_MDEPTH"] = pd.Series(dnumpy, index=self.data.index) + + dnumpy = self._convert_carr_double_np(ptr_incl) + self.data["Q_INCL"] = pd.Series(dnumpy, index=self.data.index) + + dnumpy = self._convert_carr_double_np(ptr_az) + self.data["Q_AZI"] = pd.Series(dnumpy, index=self.data.index) + + # delete tmp pointers + _cxtgeo.delete_doublearray(ptr_xv) + _cxtgeo.delete_doublearray(ptr_yv) + _cxtgeo.delete_doublearray(ptr_zv) + _cxtgeo.delete_doublearray(ptr_md) + _cxtgeo.delete_doublearray(ptr_incl) + _cxtgeo.delete_doublearray(ptr_az) + + return True + + # ---------------------------------------------------------------------------------- + # Special methods for nerds, todo is to move to private module + # ---------------------------------------------------------------------------------- + + def _convert_np_carr_int(self, np_array): + """Convert numpy 1D array to C array, assuming int type. + + The numpy is always a double (float64), so need to convert first + """ + carr = _cxtgeo.new_intarray(self.nrow) + + np_array = np_array.astype(np.int32) + + _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) + + return carr + + def _convert_np_carr_double(self, np_array): + """Convert numpy 1D array to C array, assuming double type.""" + carr = _cxtgeo.new_doublearray(self.nrow) + + _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) + + return carr + + def _convert_carr_double_np(self, carray, nlen=None): + """Convert a C array to numpy, assuming double type.""" + if nlen is None: + nlen = len(self._wdata.data.index) + + nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) + + return nparray diff --git a/src/xtgeo/well/well1.py b/src/xtgeo/well/well1.py index 27da6426d..d4cf6cf8b 100644 --- a/src/xtgeo/well/well1.py +++ b/src/xtgeo/well/well1.py @@ -198,7 +198,6 @@ def __init__( self._wdata = _welldata._WellData(df, wlogtypes, wlogrecords) self._wlogtypes = self._wdata.wlogtypes self._wlogrecords = self._wdata.wlogrecords - self._df = self._wdata.data self._ensure_consistency() @@ -216,7 +215,7 @@ def __repr__(self): # noqa: D105 f"filesrc='{self._filesrc}', mdlogname='{self._mdlogname}', " f"zonelogname='{self._zonelogname}', \nwlogtypes='{self._wlogtypes}', " f"\nwlogrecords='{self._wlogrecords}', " - f"df=\n{repr(self._df)}))" + f"df=\n{repr(self._wdata.data)}))" ) return myrp @@ -226,9 +225,15 @@ def __str__(self): # noqa: D105 return self.describe(flush=False) def _ensure_consistency(self): - # _well_aux.ensure_consistency(self) + """Ensure consistency""" self._wdata.ensure_consistency() + if self._mdlogname not in self._wdata.data: + self._mdlogname = None + + if self._zonelogname not in self._wdata.data: + self._zonelogname = None + # ================================================================================== # Properties # ================================================================================== @@ -339,38 +344,36 @@ def zonelogname(self, zname): @property def dataframe(self): """Returns or set the Pandas dataframe object for all logs.""" - return self._wdata.data + return self._wdata.get_dataframe() @dataframe.setter def dataframe(self, dfr): - self._wdata.data = dfr.copy() - self._wdata.ensure_consistency() + self._wdata.set_dataframe(dfr) @property def nrow(self): """int: Returns the Pandas dataframe object number of rows.""" - return len(self._df.index) + return len(self._wdata.data.index) @property def ncol(self): """int: Returns the Pandas dataframe object number of columns.""" - return len(self._df.columns) + return len(self._wdata.data.columns) @property def nlogs(self): """int: Returns the Pandas dataframe object number of columns.""" - return len(self._df.columns) - 3 + return len(self._wdata.data.columns) - 3 @property def lognames_all(self): """list: Returns dataframe column names as list, including mandatory coords.""" - self._ensure_consistency() return self.get_lognames() @property def lognames(self): """list: Returns the Pandas dataframe column as list excluding coords.""" - return list(self._df)[3:] + return list(self._wdata.data)[3:] # ================================================================================== # Methods @@ -678,7 +681,7 @@ def to_roxar(self, *args, **kwargs): def get_lognames(self): """Get the lognames for all logs.""" - return list(self._df.columns) + return list(self._wdata.data) def get_wlogs(self) -> dict: """Get a compound dictionary with well log metadata. @@ -762,7 +765,7 @@ def copy(self): self.xpos, self.ypos, self.wname, - self._df.copy(), + self._wdata.data.copy(), self.mdlogname, self.zonelogname, deepcopy(self._wlogtypes), @@ -772,19 +775,7 @@ def copy(self): def rename_log(self, lname, newname): """Rename a log, e.g. Poro to PORO.""" - self._ensure_consistency() - - if lname not in self.lognames: - raise ValueError("Input log does not exist") - - if newname in self.lognames: - raise ValueError("New log name exists already") - - self._wlogtypes[newname] = self._wlogtypes.pop(lname) - self._wlogrecords[newname] = self._wlogrecords.pop(lname) - - # rename in dataframe - self._df.rename(index=str, columns={lname: newname}, inplace=True) + self._wdata.rename_log(lname, newname) if self._mdlogname == lname: self._mdlogname = newname @@ -811,17 +802,12 @@ def create_log(self, lname, logtype="CONT", logrecord=None, value=0.0, force=Tru existing) or False if the new log already exists, and ``force=False``. - """ - if lname in self.lognames and force is False: - return False + Note:: - self._wlogtypes[lname] = logtype - self._wlogrecords[lname] = logrecord + A new log can also be created - # make a new column - self._df[lname] = float(value) - self._ensure_consistency() - return True + """ + self._wdata.create_log(lname, logtype, logrecord, value, force) def delete_log(self, lname): """Delete/remove an existing log, or list of logs. @@ -833,6 +819,11 @@ def delete_log(self, lname): Returns: Number of logs deleted + + Note:: + + A log can also be deleted by simply removing it from the dataframe. + """ return _well_oper.delete_log(self, lname) @@ -889,28 +880,12 @@ def get_filled_dataframe( high XTGeo UNDEF values, or user defined values. """ - lnames = self.lognames - - newdf = self._df.copy() - - # make a dictionary of datatypes - dtype = {"X_UTME": "float64", "Y_UTMN": "float64", "Z_TVDSS": "float64"} - - dfill = {"X_UTME": const.UNDEF, "Y_UTMN": const.UNDEF, "Z_TVDSS": const.UNDEF} - - for lname in lnames: - if self.get_logtype(lname) == "DISC": - dtype[lname] = np.int32 - dfill[lname] = fill_value_int - else: - dtype[lname] = np.float64 - dfill[lname] = fill_value - - # now first fill Nan's (because int cannot be converted if Nan) - newdf = newdf.fillna(dfill) - newdf = newdf.astype(dtype) - - return newdf + return self._wdata.get_dataframe_copy( + infer_dtype=True, + filled=True, + fill_value=fill_value, + fill_value_int=fill_value_int, + ) def create_relative_hlen(self): """Make a relative length of a well, as a log. @@ -918,17 +893,7 @@ def create_relative_hlen(self): The first well og entry defines zero, then the horizontal length is computed relative to that by simple geometric methods. """ - # extract numpies from XYZ trajectory logs - xv = self._df["X_UTME"].values - yv = self._df["Y_UTMN"].values - - distance = [] - previous_x, previous_y = xv[0], yv[0] - for _, (x, y) in enumerate(zip(xv, yv)): - distance.append(math.hypot((previous_x - x), (y - previous_y))) - previous_x, previous_y = x, y - - self._df["R_HLEN"] = pd.Series(np.cumsum(distance), index=self._df.index) + self._wdata.create_relative_hlen() def geometrics(self): """Compute some well geometrical arrays MD, INCL, AZI, as logs. @@ -943,52 +908,12 @@ def geometrics(self): False if geometrics cannot be computed """ - if self._df.shape[0] < 3: - raise ValueError( - f"Cannot compute geometrics for {self.name}. Not enough " - f"trajectory points (need >3, have: {self.dataframe.shape[0]})" - ) - - # extract numpies from XYZ trajetory logs - ptr_xv = _well_aux.get_carray(self, "X_UTME") - ptr_yv = _well_aux.get_carray(self, "Y_UTMN") - ptr_zv = _well_aux.get_carray(self, "Z_TVDSS") - - # get number of rows in pandas - nlen = self.nrow - - ptr_md = _cxtgeo.new_doublearray(nlen) - ptr_incl = _cxtgeo.new_doublearray(nlen) - ptr_az = _cxtgeo.new_doublearray(nlen) - - ier = _cxtgeo.well_geometrics( - nlen, ptr_xv, ptr_yv, ptr_zv, ptr_md, ptr_incl, ptr_az, 0 - ) - - if ier != 0: - raise XTGeoCLibError(f"well_geometrics failed with error code: {ier}") - - dnumpy = self._convert_carr_double_np(ptr_md) - self._df["Q_MDEPTH"] = pd.Series(dnumpy, index=self._df.index) - - dnumpy = self._convert_carr_double_np(ptr_incl) - self._df["Q_INCL"] = pd.Series(dnumpy, index=self._df.index) - - dnumpy = self._convert_carr_double_np(ptr_az) - self._df["Q_AZI"] = pd.Series(dnumpy, index=self._df.index) + rvalue = self._wdata.geometrics() if not self._mdlogname: self._mdlogname = "Q_MDEPTH" - # delete tmp pointers - _cxtgeo.delete_doublearray(ptr_xv) - _cxtgeo.delete_doublearray(ptr_yv) - _cxtgeo.delete_doublearray(ptr_zv) - _cxtgeo.delete_doublearray(ptr_md) - _cxtgeo.delete_doublearray(ptr_incl) - _cxtgeo.delete_doublearray(ptr_az) - - return True + return rvalue def truncate_parallel_path( self, other, xtol=None, ytol=None, ztol=None, itol=None, atol=None @@ -1016,14 +941,14 @@ def truncate_parallel_path( if self.dataframe.shape[0] < 3 or other.dataframe.shape[0] < 3: raise ValueError( - f"Too few points to truncate parallel path, was {self._df.size} and " + f"Too few points to truncate parallel path, was {self._wdata.data.size} and " f"{other._df.size}, must be >3" ) # extract numpies from XYZ trajectory logs - xv1 = self._df["X_UTME"].values - yv1 = self._df["Y_UTMN"].values - zv1 = self._df["Z_TVDSS"].values + xv1 = self._wdata.data["X_UTME"].values + yv1 = self._wdata.data["Y_UTMN"].values + zv1 = self._wdata.data["Z_TVDSS"].values xv2 = other._df["X_UTME"].values yv2 = other._df["Y_UTMN"].values @@ -1036,12 +961,14 @@ def truncate_parallel_path( if ier != 0: raise RuntimeError("Unexpected error") - self._df = self._df[self._df["X_UTME"] < const.UNDEF_LIMIT] - self._df.reset_index(drop=True, inplace=True) + self._wdata.data = self._wdata.data[ + self._wdata.data["X_UTME"] < const.UNDEF_LIMIT + ] + self._wdata.data.reset_index(drop=True, inplace=True) def may_overlap(self, other): """Consider if well overlap in X Y coordinates with other well, True/False.""" - if self._df.size < 2 or other._df.size < 2: + if self._wdata.data.size < 2 or other._df.size < 2: return False # extract numpies from XYZ trajectory logs @@ -1071,10 +998,10 @@ def limit_tvd(self, tvdmin, tvdmax): tvdmin (float): Minimum TVD tvdmax (float): Maximum TVD """ - self._df = self._df[self._df["Z_TVDSS"] >= tvdmin] - self._df = self._df[self._df["Z_TVDSS"] <= tvdmax] + self._wdata.data = self._wdata.data[self._wdata.data["Z_TVDSS"] >= tvdmin] + self._wdata.data = self._wdata.data[self._wdata.data["Z_TVDSS"] <= tvdmax] - self._df.reset_index(drop=True, inplace=True) + self._wdata.data.reset_index(drop=True, inplace=True) def downsample(self, interval=4, keeplast=True): """Downsample by sampling every N'th element (coarsen only). @@ -1084,15 +1011,15 @@ def downsample(self, interval=4, keeplast=True): keeplast (bool): If True, the last element from the original dataframe is kept, to avoid that the well is shortened. """ - if self._df.size < 2 * interval: + if self._wdata.data.size < 2 * interval: return - dfr = self._df[::interval] + dfr = self._wdata.data[::interval] if keeplast: - dfr = pd.concat([dfr, self._df.iloc[-1:]], ignore_index=True) + dfr = pd.concat([dfr, self._wdata.data.iloc[-1:]], ignore_index=True) - self._df = dfr.reset_index(drop=True) + self._wdata.data = dfr.reset_index(drop=True) def rescale(self, delta=0.15, tvdrange=None): """Rescale (refine or coarse) by sampling a delta along the trajectory, in MD. @@ -1114,7 +1041,7 @@ def get_polygons(self, skipname=False): .. versionadded:: 2.1 .. versionchanged:: 2.13 Added `skipname` key """ - dfr = self._df.copy() + dfr = self._wdata.data.copy() keep = ("X_UTME", "Y_UTMN", "Z_TVDSS") for col in dfr.columns: @@ -1515,46 +1442,3 @@ def get_gridproperties( """ _well_oper.get_gridproperties(self, gridprops, grid=grid, prop_id=prop_id) - - # ================================================================================== - # PRIVATE METHODS - # should not be applied outside the class - # ================================================================================== - - # ---------------------------------------------------------------------------------- - # Import/Export methods for various formats - # ---------------------------------------------------------------------------------- - - # ---------------------------------------------------------------------------------- - # Special methods for nerds, todo is to move to private module - # ---------------------------------------------------------------------------------- - - def _convert_np_carr_int(self, np_array): - """Convert numpy 1D array to C array, assuming int type. - - The numpy is always a double (float64), so need to convert first - """ - carr = _cxtgeo.new_intarray(self.nrow) - - np_array = np_array.astype(np.int32) - - _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) - - return carr - - def _convert_np_carr_double(self, np_array): - """Convert numpy 1D array to C array, assuming double type.""" - carr = _cxtgeo.new_doublearray(self.nrow) - - _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) - - return carr - - def _convert_carr_double_np(self, carray, nlen=None): - """Convert a C array to numpy, assuming double type.""" - if nlen is None: - nlen = len(self._df.index) - - nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) - - return nparray diff --git a/tests/test_well/test_welldata_class.py b/tests/test_well/test_welldata_class.py index 0e330e9bc..c1eafd04e 100644 --- a/tests/test_well/test_welldata_class.py +++ b/tests/test_well/test_welldata_class.py @@ -32,6 +32,16 @@ def test_welldata_initialize(generate_data: pd.DataFrame): assert instance.data.columns[2] == instance.zname +def test_welldata_setters(generate_data: pd.DataFrame): + """Initialize data, and try a direct setter.""" + + instance = _WellData(generate_data) + + dfr = instance.data.copy() + with pytest.raises(AttributeError, match="Don't use direct metods"): + instance.data = dfr + + def test_welldata_ensure_wlogtypes(generate_data: pd.DataFrame): """Testing private method _ensure_wlogtypes""" @@ -186,5 +196,3 @@ def test_welldata_dataframe_copy(generate_data: pd.DataFrame): "int32", "int32", ] - - instance.data = From 9baf69bf9b5be76a455be0248dd5cd32115b3a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Sun, 1 Oct 2023 19:53:56 +0200 Subject: [PATCH 11/13] WIP --- src/xtgeo/well/_welldata.py | 66 ++++++++++++++++++++++++++----------- src/xtgeo/well/well1.py | 1 - 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/src/xtgeo/well/_welldata.py b/src/xtgeo/well/_welldata.py index 3709cdff6..972231893 100644 --- a/src/xtgeo/well/_welldata.py +++ b/src/xtgeo/well/_welldata.py @@ -20,9 +20,12 @@ similarly of a column is removed, the corresponding entries in wlogtypes and wlogrecords will be deleted. """ +from __future__ import annotations + import math from dataclasses import dataclass, field from enum import Enum, EnumMeta, unique +from typing import Any, Optional import numpy as np import pandas as pd @@ -31,8 +34,6 @@ from xtgeo import XTGeoCLibError # type: ignore[attr-defined] from xtgeo.cxtgeo import _cxtgeo -from . import _well_aux - class _LogTypeMeta(EnumMeta): """For enabling 'in' method, cf https://stackoverflow.com/questions/43634618""" @@ -69,20 +70,20 @@ class _WellData: The wlogstypes is on form {"PHIT": CONT, "FACIES": DISC, ...} - The wlogrecords is somewhat heterogenous, on form: + The wlogrecords is somewhat heterogeneous, on form: {"PHIT": ("unit", "scale"), "FACIES": {0:BG, 2: "SST", 4: "CALC"}} Hence the CONT logs hold a tuple or list with 2 str members, or None, while DISC log holds a dict where the key is an int and the value is a string. Note:: - Callers shall not use properties, but methods, e.g.:: + Callers shall not use properties, but methods, e.g.:: - instance.well = some_new_dataframe # not + instance.well = some_new_dataframe # not - but:: + but - instance.set_dataframe(some_new_dataframe) + instance.set_dataframe(some_new_dataframe) """ data: pd.DataFrame @@ -112,7 +113,12 @@ def _infer_log_dtypes(self): datatypes = {} for name, dtype in dlist.items(): if name in self.wlogtypes: - datatypes[name] = self.wlogtypes[name] # keep as is + datatypes[name] = self.wlogtypes[name] + if "DISC" in datatypes[name]: + datatypes[name] = _LogType.DISC.value + else: + datatypes[name] = _LogType.CONT.value + continue if name in (self.xname, self.yname, self.zname): @@ -205,6 +211,7 @@ def ensure_consistency(self): * When adding one or columns to the dataframe * When removing one or more columns from the dataframe + * ... """ if list(self.data.columns[:3]) != [self.xname, self.yname, self.zname]: @@ -213,6 +220,7 @@ def ensure_consistency(self): f"and '{self.zname}', got {list(self.data.columns[:3])}" ) + # order matters: self._ensure_consistency_wlogtypes() self._ensure_consistency_wlogrecords() self._ensure_consistency_df_dtypes() @@ -232,7 +240,7 @@ def set_wlogtype(self, name: str, wtype: str) -> None: apply_wtype = "DISC" if name not in self.wlogtypes: - raise ValueError(f"No such well log name present: {name}") + raise ValueError(f"No such log name present: {name}") if apply_wtype in _LogType: self.wlogtypes[name] = _LogType(apply_wtype) @@ -247,7 +255,7 @@ def set_wlogrecord(self, name: str, record: dict) -> None: """Set a wlogrecord for a named log.""" if name not in self.wlogtypes: - raise ValueError(f"No such well log name present: {name}") + raise ValueError(f"No such logname: {name}") if self.wlogtypes[name] == _LogType.CONT.value and isinstance( record, (list, tuple) @@ -265,11 +273,13 @@ def set_wlogrecord(self, name: str, record: dict) -> None: record, dict ): raise ValueError( - "Cannot set a log record for a discrete log: input record is " - "not a dictionary" + "Input is not a dictionary. Cannot set a log record for a discrete log" ) else: - raise ValueError("Something went wrong when setting logrecord.") + raise ValueError( + "Something went wrong when setting logrecord: " + f"({self.wlogtypes[name]} {type(record)})." + ) self.ensure_consistency() @@ -401,9 +411,9 @@ def geometrics(self): ) # extract numpies from XYZ trajetory logs - ptr_xv = _well_aux.get_carray(self, "X_UTME") - ptr_yv = _well_aux.get_carray(self, "Y_UTMN") - ptr_zv = _well_aux.get_carray(self, "Z_TVDSS") + ptr_xv = self._get_carray(self.xname) + ptr_yv = self._get_carray(self.yname) + ptr_zv = self._get_carray(self.zname) # get number of rows in pandas nlen = len(self.data.index) @@ -447,7 +457,7 @@ def _convert_np_carr_int(self, np_array): The numpy is always a double (float64), so need to convert first """ - carr = _cxtgeo.new_intarray(self.nrow) + carr = _cxtgeo.new_intarray(len(self.data.index)) np_array = np_array.astype(np.int32) @@ -457,7 +467,7 @@ def _convert_np_carr_int(self, np_array): def _convert_np_carr_double(self, np_array): """Convert numpy 1D array to C array, assuming double type.""" - carr = _cxtgeo.new_doublearray(self.nrow) + carr = _cxtgeo.new_doublearray(len(self.data.index)) _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) @@ -466,8 +476,26 @@ def _convert_np_carr_double(self, np_array): def _convert_carr_double_np(self, carray, nlen=None): """Convert a C array to numpy, assuming double type.""" if nlen is None: - nlen = len(self._wdata.data.index) + nlen = len(self.data.index) nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) return nparray + + def _get_carray(self, lname: str) -> Optional[Any]: + """Returns the C array pointer (via SWIG) for a given log. + + Type conversion is double if float64, int32 if DISC log. + Returns None if log does not exist. + """ + if lname in self.data: + np_array = self.data[lname].values + else: + return None + + if "DISC" in self.wlogtypes[lname]: + carr = self._convert_np_carr_int(np_array) + else: + carr = self._convert_np_carr_double(np_array) + + return carr diff --git a/src/xtgeo/well/well1.py b/src/xtgeo/well/well1.py index d4cf6cf8b..35e23d555 100644 --- a/src/xtgeo/well/well1.py +++ b/src/xtgeo/well/well1.py @@ -4,7 +4,6 @@ from __future__ import annotations import io -import math from copy import deepcopy from pathlib import Path from typing import Dict, List, Optional, Union From 1aaf8b0194846556d8311547f8c368dd7743b28e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Tue, 3 Oct 2023 08:18:36 +0200 Subject: [PATCH 12/13] WIP --- src/xtgeo/common/constants.py | 4 + src/xtgeo/well/_well_io.py | 2 +- src/xtgeo/well/_welldata.py | 501 ------------------ src/xtgeo/well/well1.py | 84 +-- src/xtgeo/xyz_common/__init__.py | 3 + src/xtgeo/xyz_common/_xyz_data.py | 581 +++++++++++++++++++++ tests/test_well/test_well.py | 5 - tests/test_well/test_well_xyzdata_class.py | 192 +++++++ tests/test_well/test_welldata_class.py | 198 ------- 9 files changed, 828 insertions(+), 742 deletions(-) delete mode 100644 src/xtgeo/well/_welldata.py create mode 100644 src/xtgeo/xyz_common/__init__.py create mode 100644 src/xtgeo/xyz_common/_xyz_data.py create mode 100644 tests/test_well/test_well_xyzdata_class.py delete mode 100644 tests/test_well/test_welldata_class.py diff --git a/src/xtgeo/common/constants.py b/src/xtgeo/common/constants.py index 3bd0912b9..f22df3d7b 100644 --- a/src/xtgeo/common/constants.py +++ b/src/xtgeo/common/constants.py @@ -21,3 +21,7 @@ MAXKEYWORDS = cx.MAXKEYWORDS # maximum keywords for ECL and ROFF scanning MAXDATES = cx.MAXDATES # maximum keywords for ECL scanning + +# for XYZ data, restricted to float32 and int32 +UNDEF_CONT = UNDEF +UNDEF_DISC = UNDEF_INT diff --git a/src/xtgeo/well/_well_io.py b/src/xtgeo/well/_well_io.py index 42e798ab2..ddf333c75 100644 --- a/src/xtgeo/well/_well_io.py +++ b/src/xtgeo/well/_well_io.py @@ -95,7 +95,7 @@ def import_rms_ascii( xdict = {int(rxv[i]): rxv[i + 1] for i in range(0, len(rxv), 2)} wlogrecords[lname] = xdict else: - wlogrecords[lname] = rxv + wlogrecords[lname] = tuple(row[1:]) nlogread += 1 diff --git a/src/xtgeo/well/_welldata.py b/src/xtgeo/well/_welldata.py deleted file mode 100644 index 972231893..000000000 --- a/src/xtgeo/well/_welldata.py +++ /dev/null @@ -1,501 +0,0 @@ -"""Module for private _WellData class - - X_UTME Y_UTMN Z_TVDSS MDepth PHIT KLOGH Sw -0 463256.911 5930542.294 -49.0000 0.0000 NaN NaN NaN ... -1 463256.912 5930542.295 -48.2859 0.5000 NaN NaN NaN ... -2 463256.913 5930542.296 -47.5735 1.0000 NaN NaN NaN ... -3 463256.914 5930542.299 -46.8626 1.5000 NaN NaN NaN ... -4 463256.916 5930542.302 -46.1533 2.0000 NaN NaN NaN ... - ... ... ... ... ... ... ... - -Where each log has a wlogtypes dictionary, telling if the logs are treated -as discrete (DISC) or continuous (CONT). In addition there is a wlogrecords -dict, storing the unit for continuous logs (defaulted to None) or a dictionary -of codes if the log in DISC type. - -The 3 first columsn are the coordinates. - -The purpose here is to automate and improve; if a column is added to the dataframe, then -the class methods here will try to guess the wlogtype and wlogtype, and add those; -similarly of a column is removed, the corresponding entries in wlogtypes and wlogrecords -will be deleted. -""" -from __future__ import annotations - -import math -from dataclasses import dataclass, field -from enum import Enum, EnumMeta, unique -from typing import Any, Optional - -import numpy as np -import pandas as pd - -import xtgeo.common.constants as const -from xtgeo import XTGeoCLibError # type: ignore[attr-defined] -from xtgeo.cxtgeo import _cxtgeo - - -class _LogTypeMeta(EnumMeta): - """For enabling 'in' method, cf https://stackoverflow.com/questions/43634618""" - - def __contains__(cls, item): - try: - cls(item) # pylint: disable=E1120 - except ValueError: - return False - else: - return True - - -@unique -class _LogType(Enum, metaclass=_LogTypeMeta): - """Enumerate type of log""" - - CONT = "CONT" - DISC = "DISC" - - -CONT_DEFAULT_WLOGRECORD = ("UNKNOWN", "LINEAR") - - -@dataclass -class _WellData: - """Private class for the Well log data, where a Pandas dataframe is core. - - The data are stored in pandas dataframes, and by default, all logs are float, and - np.nan defines undefined values. Even if they are DISC. The reason for this is - restrictions in older versions of Pandas. - - All values in the dataframe shall be numbers. - - The wlogstypes is on form {"PHIT": CONT, "FACIES": DISC, ...} - - The wlogrecords is somewhat heterogeneous, on form: - {"PHIT": ("unit", "scale"), "FACIES": {0:BG, 2: "SST", 4: "CALC"}} - Hence the CONT logs hold a tuple or list with 2 str members, or None, while DISC - log holds a dict where the key is an int and the value is a string. - - Note:: - - Callers shall not use properties, but methods, e.g.:: - - instance.well = some_new_dataframe # not - - but - - instance.set_dataframe(some_new_dataframe) - """ - - data: pd.DataFrame - wlogtypes: dict = field(default_factory=dict) - wlogrecords: dict = field(default_factory=dict) - xname: str = "X_UTME" - yname: str = "Y_UTMN" - zname: str = "Z_TVDSS" - - def __post_init__(self): - self.ensure_consistency() - - def _infer_log_dtypes(self): - """Return as dict on form {"X_UTME": "CONT", .... "FACIES": "DISC"}. - - There are some important restrictions: - * The first 3 columns X Y Z) are always CONT, even if input appears as DISC. - * A check is made towards existing wlogtypes; if the key,value pair exists - already, this function will *not* force a change but keep as is. - """ - - new_df = self.data.convert_dtypes() - - dlist = new_df.dtypes.to_dict() - print(self.wlogtypes) - - datatypes = {} - for name, dtype in dlist.items(): - if name in self.wlogtypes: - datatypes[name] = self.wlogtypes[name] - if "DISC" in datatypes[name]: - datatypes[name] = _LogType.DISC.value - else: - datatypes[name] = _LogType.CONT.value - - continue - - if name in (self.xname, self.yname, self.zname): - # force coordinates, first 3 columns, to be CONT - datatypes[name] = _LogType.CONT.value - continue - - if "Float" in str(dtype): - datatypes[name] = _LogType.CONT.value - elif "Int" in str(dtype): - datatypes[name] = _LogType.DISC.value - else: - raise RuntimeError( - f"Log type seems to be something else than Float or Int for {name}" - ) - return datatypes - - def _ensure_consistency_wlogtypes(self): - """Ensure that dataframe and wlogtypes are consistent. - - wlogtypes are on form {"GR": "CONT", "ZONES": "DISC", ...} - - The column data in the dataframe takes precedence; i.e. if a column is removed - in a pandas operation, then wlogtypes are adapted silently by removing the item - from the dict. - """ - # check first if a log is removed in the dataframe (e.g. by pandas operations) - for logname in list(self.wlogtypes.keys()): - if logname not in self.data.columns[3:]: - del self.wlogtypes[logname] - - self.wlogtypes = self._infer_log_dtypes() - - def _ensure_consistency_wlogrecords(self): - """Ensure that data and wloglogrecords are consistent; cf wlogtypes. - - Important that wlogtypes are correct; i.e. run _ensure_consistency_wlogtypes() - first. - """ - for logname, dtype in self.wlogtypes.items(): - if logname not in self.wlogrecords or not isinstance( - self.wlogrecords[logname], (dict, list, tuple) - ): - if dtype == _LogType.CONT.value: - self.wlogrecords[logname] = CONT_DEFAULT_WLOGRECORD - - if dtype == _LogType.DISC.value: - # it is a discrete log with missing record; try to find - # a default one based on current values... - lvalues = self.data[logname].values.round(decimals=0) - lmin = int(lvalues.min()) - lmax = int(lvalues.max()) - - lvalues = lvalues.astype("int") - codes = {} - for lval in range(lmin, lmax + 1): - if lval in lvalues: - codes[lval] = str(lval) - - self.wlogrecords[logname] = codes - - # correct when wlogtypes is CONT but wlogrecords for that entry is a dict - if ( - logname in self.wlogrecords - and self.wlogtypes[logname] == _LogType.CONT.value - ): - if isinstance(self.wlogrecords[logname], dict): - self.wlogrecords[logname] = CONT_DEFAULT_WLOGRECORD - - def _ensure_consistency_df_dtypes(self): - """Ensure that dataframe float32 for all logs, except for X Y Z -> float64.""" - - col = list(self.data) - - coords_dtypes = [str(entry) for entry in self.data[col[0:3]].dtypes] - - if not all(["float64" in entry for entry in coords_dtypes]): - self.data[col[0:3]] = self.data.iloc[:, 0:3].astype("float64") - - logs_dtypes = [str(entry) for entry in self.data[col[3:]].dtypes] - - if not all(["float32" in entry for entry in logs_dtypes]): - self.data[col[3:]] = self.data.iloc[:, 3:].astype("float32") - - def ensure_consistency(self): - """Ensure that data and wlog* are consistent. - - This is important for many operations on the dataframe, an should keep wlogtypes - and wlogrecords 'in sync' with the dataframe. - - * When adding one or columns to the dataframe - * When removing one or more columns from the dataframe - * ... - """ - - if list(self.data.columns[:3]) != [self.xname, self.yname, self.zname]: - raise ValueError( - f"Well dataframe must include '{self.xname}', '{self.yname}' " - f"and '{self.zname}', got {list(self.data.columns[:3])}" - ) - - # order matters: - self._ensure_consistency_wlogtypes() - self._ensure_consistency_wlogrecords() - self._ensure_consistency_df_dtypes() - - def set_wlogtype(self, name: str, wtype: str) -> None: - """Set a wlogtype for a named log. - - A bit flexibility is added for wtype, e.g. allowing "float*" for CONT etc, and - allow lowercase "cont" for CONT - - """ - - apply_wtype = wtype.upper() - if "FLOAT" in apply_wtype: - apply_wtype = "CONT" - if "INT" in apply_wtype: - apply_wtype = "DISC" - - if name not in self.wlogtypes: - raise ValueError(f"No such log name present: {name}") - - if apply_wtype in _LogType: - self.wlogtypes[name] = _LogType(apply_wtype) - else: - raise ValueError( - f"Cannot set wlogtype as {wtype}, not in {list(_LogType.__members__)}" - ) - - self.ensure_consistency() - - def set_wlogrecord(self, name: str, record: dict) -> None: - """Set a wlogrecord for a named log.""" - - if name not in self.wlogtypes: - raise ValueError(f"No such logname: {name}") - - if self.wlogtypes[name] == _LogType.CONT.value and isinstance( - record, (list, tuple) - ): - if len(record) == 2: - self.wlogrecords[name] = tuple(record) # prefer as tuple - elif self.wlogtypes[name] == _LogType.CONT.value and isinstance(record, dict): - raise ValueError( - "Cannot set a log record for a continuous log: input record is " - "dictionary, not a list or tuple" - ) - elif self.wlogtypes[name] == _LogType.DISC.value and isinstance(record, dict): - self.wlogrecords[name] = record - elif self.wlogtypes[name] == _LogType.DISC.value and not isinstance( - record, dict - ): - raise ValueError( - "Input is not a dictionary. Cannot set a log record for a discrete log" - ) - else: - raise ValueError( - "Something went wrong when setting logrecord: " - f"({self.wlogtypes[name]} {type(record)})." - ) - - self.ensure_consistency() - - def get_dataframe_copy( - self, - infer_dtype: bool = False, - filled=False, - fill_value=const.UNDEF, - fill_value_int=const.UNDEF_INT, - ): - """Get a deep copy of the dataframe, with options. - - If infer_dtype is True, then DISC columns will be of "int32" type - """ - dfr = self.data.copy() - if infer_dtype: - for name, wtype in self.wlogtypes.items(): - if "DISC" in wtype: - dfr[name] = dfr[name].astype("int32") - - if filled: - dfill = {} - for lname in self.data: - if "DISC" in self.wlogtypes[lname]: - dfill[lname] = fill_value_int - else: - dfill[lname] = fill_value - - dfr = dfr.fillna(dfill) - - return dfr - - def get_dataframe(self): - """Get the dataframe.""" - return self.data - - def set_dataframe(self, dfr): - """Set the dataframe in a controlled manner, shall be used""" - # TODO: more checks, and possibly acceptance of lists, dicts? - if isinstance(dfr, pd.DataFrame): - self.data = dfr - else: - raise ValueError("Input dfr is not a pandas dataframe") - self.ensure_consistency() - - def rename_log(self, lname, newname): - """Rename a log, e.g. Poro to PORO.""" - - if lname not in list(self.data): - raise ValueError("Input log does not exist") - - if newname in list(self.data): - raise ValueError("New log name exists already") - - # rename in dataframe - self.data.rename(index=str, columns={lname: newname}, inplace=True) - - self.wlogtypes[newname] = self.wlogtypes.pop(lname) - self.wlogrecords[newname] = self.wlogrecords.pop(lname) - - self.ensure_consistency() - - def create_log( - self, lname, logtype="CONT", logrecord=None, value=0.0, force=True - ) -> bool: - """Create a new log.""" - - if lname in list(self.data) and force is False: - return False - - self.wlogtypes[lname] = logtype - self.wlogrecords[lname] = logrecord - - # make a new column - self.data[lname] = float(value) - self.ensure_consistency() - return True - - def delete_log(self, lname): - """Delete/remove an existing log, or list of logs.""" - if not isinstance(lname, list): - lname = [lname] - - lcount = 0 - for logn in lname: - if logn not in list(self.data): - continue - - lcount += 1 - self.data.drop(logn, axis=1, inplace=True) - - self.ensure_consistency() - - return lcount - - def create_relative_hlen(self): - """Make a relative length of a well, as a log.""" - # extract numpies from XYZ trajectory logs - xv = self.data[self.xname].values - yv = self.data[self.yname].values - - distance = [] - previous_x, previous_y = xv[0], yv[0] - for _, (x, y) in enumerate(zip(xv, yv)): - distance.append(math.hypot((previous_x - x), (y - previous_y))) - previous_x, previous_y = x, y - - self.data["R_HLEN"] = pd.Series(np.cumsum(distance), index=self.data.index) - self.ensure_consistency() - - def geometrics(self): - """Compute some well geometrical arrays MD, INCL, AZI, as logs. - - These are kind of quasi measurements hence the logs will named - with a Q in front as Q_MDEPTH, Q_INCL, and Q_AZI. - - These logs will be added to the dataframe. If the mdlogname - attribute does not exist in advance, it will be set to 'Q_MDEPTH'. - - Returns: - False if geometrics cannot be computed - - """ - # TODO: rewrite in pure python? - if self.data.shape[0] < 3: - raise ValueError( - f"Cannot compute geometrics. Not enough " - f"trajectory points (need >3, have: {self.data.shape[0]})" - ) - - # extract numpies from XYZ trajetory logs - ptr_xv = self._get_carray(self.xname) - ptr_yv = self._get_carray(self.yname) - ptr_zv = self._get_carray(self.zname) - - # get number of rows in pandas - nlen = len(self.data.index) - - ptr_md = _cxtgeo.new_doublearray(nlen) - ptr_incl = _cxtgeo.new_doublearray(nlen) - ptr_az = _cxtgeo.new_doublearray(nlen) - - ier = _cxtgeo.well_geometrics( - nlen, ptr_xv, ptr_yv, ptr_zv, ptr_md, ptr_incl, ptr_az, 0 - ) - - if ier != 0: - raise XTGeoCLibError(f"well_geometrics failed with error code: {ier}") - - dnumpy = self._convert_carr_double_np(ptr_md) - self.data["Q_MDEPTH"] = pd.Series(dnumpy, index=self.data.index) - - dnumpy = self._convert_carr_double_np(ptr_incl) - self.data["Q_INCL"] = pd.Series(dnumpy, index=self.data.index) - - dnumpy = self._convert_carr_double_np(ptr_az) - self.data["Q_AZI"] = pd.Series(dnumpy, index=self.data.index) - - # delete tmp pointers - _cxtgeo.delete_doublearray(ptr_xv) - _cxtgeo.delete_doublearray(ptr_yv) - _cxtgeo.delete_doublearray(ptr_zv) - _cxtgeo.delete_doublearray(ptr_md) - _cxtgeo.delete_doublearray(ptr_incl) - _cxtgeo.delete_doublearray(ptr_az) - - return True - - # ---------------------------------------------------------------------------------- - # Special methods for nerds, todo is to move to private module - # ---------------------------------------------------------------------------------- - - def _convert_np_carr_int(self, np_array): - """Convert numpy 1D array to C array, assuming int type. - - The numpy is always a double (float64), so need to convert first - """ - carr = _cxtgeo.new_intarray(len(self.data.index)) - - np_array = np_array.astype(np.int32) - - _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) - - return carr - - def _convert_np_carr_double(self, np_array): - """Convert numpy 1D array to C array, assuming double type.""" - carr = _cxtgeo.new_doublearray(len(self.data.index)) - - _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) - - return carr - - def _convert_carr_double_np(self, carray, nlen=None): - """Convert a C array to numpy, assuming double type.""" - if nlen is None: - nlen = len(self.data.index) - - nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) - - return nparray - - def _get_carray(self, lname: str) -> Optional[Any]: - """Returns the C array pointer (via SWIG) for a given log. - - Type conversion is double if float64, int32 if DISC log. - Returns None if log does not exist. - """ - if lname in self.data: - np_array = self.data[lname].values - else: - return None - - if "DISC" in self.wlogtypes[lname]: - carr = self._convert_np_carr_int(np_array) - else: - carr = self._convert_np_carr_double(np_array) - - return carr diff --git a/src/xtgeo/well/well1.py b/src/xtgeo/well/well1.py index 35e23d555..1cbb105b5 100644 --- a/src/xtgeo/well/well1.py +++ b/src/xtgeo/well/well1.py @@ -15,9 +15,9 @@ import xtgeo import xtgeo.common.constants as const import xtgeo.cxtgeo._cxtgeo as _cxtgeo -from xtgeo import XTGeoCLibError # type: ignore[attr-defined] -from . import _well_aux, _well_io, _well_oper, _well_roxapi, _welldata, _wellmarkers +from ..xyz_common import _xyz_data # type: ignore[attr-defined] +from . import _well_aux, _well_io, _well_oper, _well_roxapi, _wellmarkers xtg = xtgeo.XTGeoDialog() logger = xtg.functionlogger(__name__) @@ -194,9 +194,9 @@ def __init__( self._mdlogname = mdlogname self._zonelogname = zonelogname - self._wdata = _welldata._WellData(df, wlogtypes, wlogrecords) - self._wlogtypes = self._wdata.wlogtypes - self._wlogrecords = self._wdata.wlogrecords + self._wdata = _xyz_data._XYZData(df, wlogtypes, wlogrecords) + # self._wlogtypes = self._wdata.attr_types + # self._wlogrecords = self._wdata.attr_records self._ensure_consistency() @@ -212,8 +212,9 @@ def __repr__(self): # noqa: D105 f"{self.__class__.__name__} (rkb={self._rkb}, xpos={self._xpos}, " f"ypos={self._ypos}, wname='{self._wname}', " f"filesrc='{self._filesrc}', mdlogname='{self._mdlogname}', " - f"zonelogname='{self._zonelogname}', \nwlogtypes='{self._wlogtypes}', " - f"\nwlogrecords='{self._wlogrecords}', " + f"zonelogname='{self._zonelogname}', \n" + f"wlogtypes='{self._wdata.attr_types}', " + f"\nwlogrecords='{self._wdata.attr_records}', " f"df=\n{repr(self._wdata.data)}))" ) @@ -694,10 +695,10 @@ def get_wlogs(self) -> dict: for key in self.get_lognames(): wtype = "CONT" wrecord = None - if key in self._wlogtypes: - wtype = self._wlogtypes[key] - if key in self._wlogrecords: - wrecord = self._wlogrecords[key] + if key in self._wdata.attr_types: + wtype = self._wdata.attr_types[key] + if key in self._wdata.attr_records: + wrecord = self._wdata.attr_records[key] res[key] = [wtype, wrecord] @@ -725,14 +726,17 @@ def set_wlogs(self, wlogs: dict): typ, rec = wlogs[key] if typ in Well.VALID_LOGTYPES: - self._wlogtypes[key] = deepcopy(typ) + # self._wlogtypes[key] = deepcopy(typ) + self._wdata.set_attr_type(key, deepcopy(typ)) else: raise ValueError(f"Invalid log type found in input: {typ}") if isinstance(rec, dict): - self._wlogrecords[key] = deepcopy(rec) + self._wdata.set_attr_record(key, deepcopy(rec)) + # self._wlogrecords[key] = deepcopy(rec) elif not rec: - self._wlogrecords[key] = "" + self._wdata.set_attr_record(key, ("", "")) + # self._wlogrecords[key] = "" else: raise ValueError(f"Invalid log record found in input: {rec}") @@ -767,14 +771,14 @@ def copy(self): self._wdata.data.copy(), self.mdlogname, self.zonelogname, - deepcopy(self._wlogtypes), - deepcopy(self._wlogrecords), + deepcopy(self._wdata.attr_types), + deepcopy(self._wdata.attr_records), self._filesrc, ) def rename_log(self, lname, newname): """Rename a log, e.g. Poro to PORO.""" - self._wdata.rename_log(lname, newname) + self._wdata.rename_attr(lname, newname) if self._mdlogname == lname: self._mdlogname = newname @@ -782,18 +786,25 @@ def rename_log(self, lname, newname): if self._zonelogname == lname: self._zonelogname = newname - def create_log(self, lname, logtype="CONT", logrecord=None, value=0.0, force=True): + def create_log( + self, + lname: str, + logtype: str = "CONT", + logrecord: Optional[dict] = None, + value: float = 0.0, + force: bool = True, + ) -> bool: """Create a new log with initial values. If the logname already exists, it will be silently overwritten, unless the option force=False. Args: - lname (str): name of new log - logtype (str): Must be 'CONT' (default) or 'DISC' (discrete) - logrecord (dict): A dictionary of key: values for 'DISC' logs - value (float): initia value to set_index - force (bool): If True, and lname exists, it will be overwritten, if + lname: name of new log + logtype: Must be 'CONT' (default) or 'DISC' (discrete) + logrecord: A dictionary of key: values for 'DISC' logs + value: initial value to set + force: If True, and lname exists, it will be overwritten, if False, no new log will be made. Will return False. Returns: @@ -803,18 +814,19 @@ def create_log(self, lname, logtype="CONT", logrecord=None, value=0.0, force=Tru Note:: - A new log can also be created + A new log can also be created by adding it to the dataframe directly, but + with less control over e.g. logrecord """ - self._wdata.create_log(lname, logtype, logrecord, value, force) + return self._wdata.create_attr(lname, logtype, logrecord, value, force) - def delete_log(self, lname): + def delete_log(self, lname: Union[str, List[str]]) -> int: """Delete/remove an existing log, or list of logs. Will continue silently if a log does not exist. Args: - lname(str or list): A logname or a list of lognames + lname: A logname or a list of lognames Returns: Number of logs deleted @@ -824,30 +836,28 @@ def delete_log(self, lname): A log can also be deleted by simply removing it from the dataframe. """ - return _well_oper.delete_log(self, lname) + return self._wdata.delete_attr(lname) delete_logs = delete_log # alias function def get_logtype(self, lname) -> Optional[str]: """Returns the type of a given log (e.g. DISC or CONT), None if not present.""" - if lname in self._wdata.wlogtypes: - return self._wdata.wlogtypes[lname] + if lname in self._wdata.attr_types: + return self._wdata.attr_types[lname] return None def set_logtype(self, lname, ltype): """Sets the type of a give log (e.g. DISC or CONT).""" - self._wdata.set_wlogtype(lname, ltype) + self._wdata.set_attr_type(lname, ltype) def get_logrecord(self, lname): """Returns the record (dict) of a given log name, None if not exists.""" - if lname in self._wlogtypes: - return self._wlogrecords[lname] - return None + return self._wdata.get_attr_record(lname) def set_logrecord(self, lname, newdict): """Sets the record (dict) of a given discrete log.""" - self._wdata.set_wlogrecord(lname, newdict) + self._wdata.set_attr_record(lname, newdict) def get_logrecord_codename(self, lname, key): """Returns the name entry of a log record, for a given key. @@ -940,8 +950,8 @@ def truncate_parallel_path( if self.dataframe.shape[0] < 3 or other.dataframe.shape[0] < 3: raise ValueError( - f"Too few points to truncate parallel path, was {self._wdata.data.size} and " - f"{other._df.size}, must be >3" + f"Too few points to truncate parallel path, was " + f"{self._wdata.data.size} and {other._df.size}, must be >3" ) # extract numpies from XYZ trajectory logs diff --git a/src/xtgeo/xyz_common/__init__.py b/src/xtgeo/xyz_common/__init__.py new file mode 100644 index 000000000..a250ef494 --- /dev/null +++ b/src/xtgeo/xyz_common/__init__.py @@ -0,0 +1,3 @@ +# common low level and private modules for XYZ and Well + +from xtgeo.xyz_common._xyz_data import _XYZData, _AttrType diff --git a/src/xtgeo/xyz_common/_xyz_data.py b/src/xtgeo/xyz_common/_xyz_data.py new file mode 100644 index 000000000..9f3cad1a7 --- /dev/null +++ b/src/xtgeo/xyz_common/_xyz_data.py @@ -0,0 +1,581 @@ +"""Module for private _XYZData class. + +Note that that the design of this targets Well and general XYZ data (Points/Polygons), +hence the intentions is to let this work as a general 'engine' for dataframe'ish data +in xtgeo, at least Well, Points, Polygons. (But in the first round, it is implemented +for Wells only). Dataframes looks like: + + X_UTME Y_UTMN Z_TVDSS MDepth PHIT KLOGH Sw +0 463256.911 5930542.294 -49.0000 0.0000 NaN NaN NaN ... +1 463256.912 5930542.295 -48.2859 0.5000 NaN NaN NaN ... +2 463256.913 5930542.296 -47.5735 1.0000 NaN NaN NaN ... +3 463256.914 5930542.299 -46.8626 1.5000 NaN NaN NaN ... +4 463256.916 5930542.302 -46.1533 2.0000 NaN NaN NaN ... + ... ... ... ... ... ... ... + +Where each attr (log) has a attr_types dictionary, telling if the columns are treated +as discrete (DISC) or continuous (CONT). In addition there is a attr_records +dict, storing the unit for continuous logs/attr (defaulted to ("", "")) or a dictionary +of codes if the column if DISC type (this is optional, and perhaps only relevant for +Well data). + +The 3 first columns are the XYZ coordinates or XY coordinates + value: +X, Y, Z or X, Y, V. An optional fourth column as also possible as polygon_id. +All the rest are free 'attributes', which for wells will be well logs. Hence: + + attrtypes ~ refer to attr_types for XYZ and Well data + attrrecords ~ refer to attr_records for Well data and possibly Points/Polygons + +If a column is added to the dataframe, then the methods here will try to guess the +attr_type and attr_record, and add those; similarly of a column is removed, the +corresponding entries in attr_types and attr_records will be deleted. +""" +from __future__ import annotations + +import math +from enum import Enum, EnumMeta, unique +from typing import Any, Optional, Sequence, Union + +import numpy as np +import pandas as pd + +import xtgeo.common.constants as const +from xtgeo import XTGeoCLibError # type: ignore[attr-defined] +from xtgeo.cxtgeo import _cxtgeo + +# class _AttrTypeMeta(EnumMeta): +# """For enabling 'in' method, cf https://stackoverflow.com/questions/43634618""" + +# def __contains__(cls, item): +# try: +# cls(item) # pylint: disable=E1120 +# except ValueError: +# return False +# else: +# return True + + +@unique +class _AttrType(Enum): # (Enum, metaclass=_AttrTypeMeta): + """Enumerate type of attribute/log""" + + CONT = 1 + DISC = 2 + + +@unique +class _XYZType(Enum): # (Enum, metaclass=_AttrTypeMeta): + """Enumerate type of context""" + + POINTS = 1 + POLYGONS = 2 # ie. same here as PolyLines + WELL = 3 + + +CONT_DEFAULT_RECORD = ("", "") # unit and scale, where emptry string indicates ~unknown + + +class _XYZData: + """Private class for the XYZ and Well log data, where a Pandas dataframe is core. + + The data are stored in pandas dataframes, and by default, all columns are float, and + np.nan defines undefined values. Even if they are DISC. The reason for this is + restrictions in older versions of Pandas. + + All values in the dataframe shall be numbers. + + The attr_types is on form {"PHIT": CONT, "FACIES": DISC, ...} + + The attr_records is somewhat heterogeneous, on form: + {"PHIT": ("unit", "scale"), "FACIES": {0:BG, 2: "SST", 4: "CALC"}} + Hence the CONT logs hold a tuple or list with 2 str members, or None, while DISC + log holds a dict where the key is an int and the value is a string. + """ + + def __init__( + self, + dataframe: pd.DataFrame, + attr_types: Optional[dict] = None, + attr_records: Optional[dict] = None, + xname: str = "X_UTME", + yname: str = "Y_UTMN", + zname: str = "Z_TVDSS", + idname: Optional[str] = None, + undef: Union[float, Sequence[float, float]] = -999.0, + xyztype: str = "well", + ): + self._df = dataframe + + self._attr_types = {} + if isinstance(attr_types, dict): + for name, atype in attr_types.items(): + self._attr_types[name] = _AttrType[atype] + + self._attr_records = attr_records if attr_records is not None else {} + self._xname = xname + self._yname = yname + self._zname = zname + self._idname = idname + + # undefined data are given by a value, that may be different for cont vs disc + if isinstance(undef, list): + self._undef_disc = undef[0] + self._undef_cont = undef[1] + else: + self._undef_disc = undef + self._undef_cont = undef + + if xyztype == "well": + self._xyztype = _XYZType.WELL + + self.ensure_consistency() + + @property + def dataframe(self): + return self._df + + data = dataframe # alias + + @property + def attr_types(self): + return self._attr_types + + @property + def attr_records(self): + return self._attr_records + + @property + def xname(self): + return self._xname + + @property + def yname(self): + return self._yname + + @property + def zname(self): + return self._zname + + def _infer_attr_dtypes(self): + """Return as dict on form {"X_UTME": "CONT", .... "FACIES": "DISC"}. + + There are some important restrictions: + * The first 3 columns X Y Z) are always CONT, even if input appears as DISC. + * A check is made towards existing attr_types; if the key,value pair exists + already, this function will *not* force a change but keep as is. + """ + + new_df = self._df.convert_dtypes() + + dlist = new_df.dtypes.to_dict() + print(self._attr_types) + + datatypes = {} + for name, dtype in dlist.items(): + if name in self._attr_types: + datatypes[name] = self._attr_types[name] + + continue + + if name in (self._xname, self._yname, self._zname): + # force coordinates, first 3 columns, to be CONT + datatypes[name] = _AttrType.CONT + continue + + if "Float" in str(dtype): + datatypes[name] = _AttrType.CONT + elif "Int" in str(dtype): + datatypes[name] = _AttrType.DISC + else: + raise RuntimeError( + f"Log type seems to be something else than Float or Int for {name}" + ) + return datatypes + + def _ensure_consistency_attr_types(self): + """Ensure that dataframe and attr_types are consistent. + + attr_types are on form {"GR": "CONT", "ZONES": "DISC", ...} + + The column data in the dataframe takes precedence; i.e. if a column is removed + in a pandas operation, then attr_types are adapted silently by removing the item + from the dict. + """ + # check first if a log is removed in the dataframe (e.g. by pandas operations) + for attr_name in list(self._attr_types.keys()): + if attr_name not in self._df.columns[3:]: + del self._attr_types[attr_name] + + self._attr_types = self._infer_attr_dtypes() + + def _ensure_consistency_attr_records(self): + """Ensure that data and attr_records are consistent; cf attr_types. + + Important that attr_types are correct; i.e. run _ensure_consistency_attr_types() + first. + """ + for attr_name, dtype in self._attr_types.items(): + if attr_name not in self._attr_records or not isinstance( + self._attr_records[attr_name], (dict, list, tuple) + ): + if dtype == _AttrType.CONT.value: + self._attr_records[attr_name] = CONT_DEFAULT_RECORD + + if dtype == _AttrType.DISC.value: + # it is a discrete log with missing record; try to find + # a default one based on current values... + lvalues = self._df[attr_name].values.round(decimals=0) + lmin = int(lvalues.min()) + lmax = int(lvalues.max()) + + lvalues = lvalues.astype("int") + codes = {} + for lval in range(lmin, lmax + 1): + if lval in lvalues: + codes[lval] = str(lval) + + self._attr_records[attr_name] = codes + + # correct when attr_types is CONT but attr_records for that entry is a dict + if ( + attr_name in self._attr_records + and self._attr_types[attr_name] == _AttrType.CONT.value + ): + if isinstance(self._attr_records[attr_name], dict): + self._attr_records[attr_name] = CONT_DEFAULT_RECORD + + def _ensure_consistency_df_dtypes(self): + """Ensure that dataframe float32 for all logs, except for X Y Z -> float64.""" + + col = list(self._df) + + coords_dtypes = [str(entry) for entry in self._df[col[0:3]].dtypes] + + if not all(["float64" in entry for entry in coords_dtypes]): + self._df[col[0:3]] = self._df.iloc[:, 0:3].astype("float64") + + logs_dtypes = [str(entry) for entry in self._df[col[3:]].dtypes] + + if not all(["float32" in entry for entry in logs_dtypes]): + self._df[col[3:]] = self._df.iloc[:, 3:].astype("float32") + + for name, attr_type in self._attr_types.items(): + if attr_type == "CONT": + self._df[name] = self._df[name].replace( + self._undef_cont, np.float32(const.UNDEF_CONT) + ) + else: + self._df[name] = self._df[name].replace( + self._undef_disc, np.int32(const.UNDEF_DISC) + ) + + def ensure_consistency(self): + """Ensure that data and wlog* are consistent. + + This is important for many operations on the dataframe, an should keep + attr_types and attr_records 'in sync' with the dataframe. + + * When adding one or columns to the dataframe + * When removing one or more columns from the dataframe + * ... + """ + + if list(self._df.columns[:3]) != [self._xname, self._yname, self._zname]: + raise ValueError( + f"Dataframe must include '{self._xname}', '{self._yname}' " + f"and '{self._zname}', got {list(self._df.columns[:3])}" + ) + + # order matters: + self._ensure_consistency_attr_types() + self._ensure_consistency_attr_records() + self._ensure_consistency_df_dtypes() + + def set_attr_type(self, name: str, attrtype: str) -> None: + """Set a type (DISC, CONT) for a named attribute. + + A bit flexibility is added for attrtype, e.g. allowing "float*" for CONT + etc, and allow lowercase "cont" for CONT + + """ + + apply_attrtype = attrtype.upper() + if "FLOAT" in apply_attrtype: + apply_attrtype = "CONT" + if "INT" in apply_attrtype: + apply_attrtype = "DISC" + + if name not in self._attr_types: + raise ValueError(f"No such log name present: {name}") + + if apply_attrtype in _AttrType: + self._attr_types[name] = _AttrType(apply_attrtype) + else: + raise ValueError( + f"Cannot set wlogtype as {attrtype}, not in " + f"{list(_AttrType.__members__)}" + ) + + self.ensure_consistency() + + def get_attr_record(self, name: str): + """Get a record for a named attribute.""" + return self._attr_records[name] + + def set_attr_record(self, name: str, record: dict) -> None: + """Set a record for a named log.""" + + if name not in self._attr_types: + raise ValueError(f"No such attr_name: {name}") + + if self._attr_types[name] == _AttrType.CONT.value and isinstance( + record, (list, tuple) + ): + if len(record) == 2: + self._attr_records[name] = tuple(record) # prefer as tuple + elif self._attr_types[name] == _AttrType.CONT.value and isinstance( + record, dict + ): + raise ValueError( + "Cannot set a log record for a continuous log: input record is " + "dictionary, not a list or tuple" + ) + elif self._attr_types[name] == _AttrType.DISC.value and isinstance( + record, dict + ): + self._attr_records[name] = record + elif self._attr_types[name] == _AttrType.DISC.value and not isinstance( + record, dict + ): + raise ValueError( + "Input is not a dictionary. Cannot set a log record for a discrete log" + ) + else: + raise ValueError( + "Something went wrong when setting logrecord: " + f"({self._attr_types[name]} {type(record)})." + ) + + self.ensure_consistency() + + def get_dataframe_copy( + self, + infer_dtype: bool = False, + filled=False, + fill_value=const.UNDEF_CONT, + fill_value_int=const.UNDEF_DISC, + ): + """Get a deep copy of the dataframe, with options. + + If infer_dtype is True, then DISC columns will be of "int32" type + """ + dfr = self._df.copy() + if infer_dtype: + for name, attrtype in self._attr_types.items(): + if "DISC" in attrtype: + dfr[name] = dfr[name].astype("int32") + + if filled: + dfill = {} + for attrname in self._df: + if "DISC" in self._attr_types[attrname]: + dfill[attrname] = fill_value_int + else: + dfill[attrname] = fill_value + + dfr = dfr.fillna(dfill) + + return dfr + + def get_dataframe(self): + """Get the dataframe.""" + return self._df + + def set_dataframe(self, dfr): + """Set the dataframe in a controlled manner, shall be used""" + # TODO: more checks, and possibly acceptance of lists, dicts? + if isinstance(dfr, pd.DataFrame): + self._df = dfr + else: + raise ValueError("Input dfr is not a pandas dataframe") + self.ensure_consistency() + + def rename_attr(self, attrname, newname): + """Rename a attribute, e.g. Poro to PORO.""" + + if attrname not in list(self._df): + raise ValueError("Input log does not exist") + + if newname in list(self._df): + raise ValueError("New log name exists already") + + # rename in dataframe + self._df.rename(index=str, columns={attrname: newname}, inplace=True) + + self._attr_types[newname] = self._attr_types.pop(attrname) + self._attr_records[newname] = self._attr_records.pop(attrname) + + self.ensure_consistency() + + def create_attr( + self, attrname, attr_type="CONT", attr_record=None, value=0.0, force=True + ) -> bool: + """Create a new attribute, e.g. a log.""" + + if attrname in list(self._df) and force is False: + return False + + self._attr_types[attrname] = attr_type + self._attr_records[attrname] = attr_record + + # make a new column + self._df[attrname] = float(value) + self.ensure_consistency() + return True + + def delete_attr(self, attrname: str) -> int: + """Delete/remove an existing attribute, or list of attributes. + + Returns number of logs deleted + """ + if not isinstance(attrname, list): + attrname = [attrname] + + lcount = 0 + for logn in attrname: + if logn not in list(self._df): + continue + + lcount += 1 + self._df.drop(logn, axis=1, inplace=True) + + self.ensure_consistency() + + return lcount + + def create_relative_hlen(self): + """Make a relative length of e.g. a well, as a attribute (log).""" + # extract numpies from XYZ trajectory logs + xv = self._df[self._xname].values + yv = self._df[self._yname].values + + distance = [] + previous_x, previous_y = xv[0], yv[0] + for _, (x, y) in enumerate(zip(xv, yv)): + distance.append(math.hypot((previous_x - x), (y - previous_y))) + previous_x, previous_y = x, y + + self._df["R_HLEN"] = pd.Series(np.cumsum(distance), index=self._df.index) + self.ensure_consistency() + + def geometrics(self): + """Compute geometrical arrays MD, INCL, AZI, as attributes (logs) (~well data). + + These are kind of quasi measurements hence the attributes (logs) will named + with a Q in front as Q_MDEPTH, Q_INCL, and Q_AZI. + + These attributes will be added to the dataframe. + + TODO: If the mdlogname + attribute does not exist in advance, it will be set to 'Q_MDEPTH'. + + Returns: + False if geometrics cannot be computed + + """ + # TODO: rewrite in pure python? + if self._df.shape[0] < 3: + raise ValueError( + f"Cannot compute geometrics. Not enough " + f"trajectory points (need >3, have: {self._df.shape[0]})" + ) + + # extract numpies from XYZ trajetory logs + ptr_xv = self._get_carray(self._xname) + ptr_yv = self._get_carray(self._yname) + ptr_zv = self._get_carray(self._zname) + + # get number of rows in pandas + nlen = len(self._df.index) + + ptr_md = _cxtgeo.new_doublearray(nlen) + ptr_incl = _cxtgeo.new_doublearray(nlen) + ptr_az = _cxtgeo.new_doublearray(nlen) + + ier = _cxtgeo.well_geometrics( + nlen, ptr_xv, ptr_yv, ptr_zv, ptr_md, ptr_incl, ptr_az, 0 + ) + + if ier != 0: + raise XTGeoCLibError(f"XYZ/well_geometrics failed with error code: {ier}") + + dnumpy = self._convert_carr_double_np(ptr_md) + self._df["Q_MDEPTH"] = pd.Series(dnumpy, index=self._df.index) + + dnumpy = self._convert_carr_double_np(ptr_incl) + self._df["Q_INCL"] = pd.Series(dnumpy, index=self._df.index) + + dnumpy = self._convert_carr_double_np(ptr_az) + self._df["Q_AZI"] = pd.Series(dnumpy, index=self._df.index) + + # delete tmp pointers + _cxtgeo.delete_doublearray(ptr_xv) + _cxtgeo.delete_doublearray(ptr_yv) + _cxtgeo.delete_doublearray(ptr_zv) + _cxtgeo.delete_doublearray(ptr_md) + _cxtgeo.delete_doublearray(ptr_incl) + _cxtgeo.delete_doublearray(ptr_az) + + return True + + # ---------------------------------------------------------------------------------- + # Special methods for nerds, todo is to move to private module + # ---------------------------------------------------------------------------------- + + def _convert_np_carr_int(self, np_array): + """Convert numpy 1D array to C array, assuming int type. + + The numpy is always a double (float64), so need to convert first + """ + carr = _cxtgeo.new_intarray(len(self._df.index)) + + np_array = np_array.astype(np.int32) + + _cxtgeo.swig_numpy_to_carr_i1d(np_array, carr) + + return carr + + def _convert_np_carr_double(self, np_array): + """Convert numpy 1D array to C array, assuming double type.""" + carr = _cxtgeo.new_doublearray(len(self._df.index)) + + _cxtgeo.swig_numpy_to_carr_1d(np_array, carr) + + return carr + + def _convert_carr_double_np(self, carray, nlen=None): + """Convert a C array to numpy, assuming double type.""" + if nlen is None: + nlen = len(self._df.index) + + nparray = _cxtgeo.swig_carr_to_numpy_1d(nlen, carray) + + return nparray + + def _get_carray(self, attrname: str) -> Optional[Any]: + """Returns the C array pointer (via SWIG) for a given attr. + + Type conversion is double if float64, int32 if DISC attr. + Returns None if log does not exist. + """ + if attrname in self._df: + np_array = self._df[attrname].values + else: + return None + + if "DISC" in self._attr_types[attrname]: + carr = self._convert_np_carr_int(np_array) + else: + carr = self._convert_np_carr_double(np_array) + + return carr diff --git a/tests/test_well/test_well.py b/tests/test_well/test_well.py index 0869dcda9..217708c5f 100644 --- a/tests/test_well/test_well.py +++ b/tests/test_well/test_well.py @@ -2,7 +2,6 @@ import sys -from collections import OrderedDict from os.path import join import numpy as np @@ -1252,7 +1251,3 @@ def test_get_polygons_skipname(string_to_well): polygons = well.get_polygons(skipname=True) assert "NAME" not in polygons.dataframe.columns assert polygons.name == "custom_name" - - -def test_get_fence_poly(string_to_well): - pass diff --git a/tests/test_well/test_well_xyzdata_class.py b/tests/test_well/test_well_xyzdata_class.py new file mode 100644 index 000000000..0830a467a --- /dev/null +++ b/tests/test_well/test_well_xyzdata_class.py @@ -0,0 +1,192 @@ +"""Test _XYZData class, in a Well context""" +import pandas as pd +import pytest + +from xtgeo.xyz_common import _AttrType, _XYZData + + +@pytest.fixture(name="generate_data") +def fixture_generate_data() -> pd.DataFrame: + """Make a test dataframe""" + + data = { + "X_UTME": [1.3, 2.0, 3.0, 4.0, 5.2, 6.0, 9.0], + "Y_UTMN": [11.0, 21.0, 31.0, 41.1, 51.0, 61.0, 91.0], + "Z_TVDSS": [21.0, 22.0, 23.0, 24.0, 25.3, 26.0, 29.0], + "MDEPTH": [13.0, 23.0, 33.0, 43.0, 53.2, 63.0, 93.0], + "GR": [133.0, 2234.0, -999, 1644.0, 2225.5, 6532.0, 92.0], + "FACIES": [1, -999, 3, 4, 4, 1, 1], + "ZONES": [1, 2, 3, 3, 3, 4, -999], + } + + return pd.DataFrame(data) + + +def test_well_xyzdata_initialize(generate_data: pd.DataFrame): + """Initialize data with no attr_records and attr_types given. + + The init shall than then try to infer 'best' guess""" + + instance = _XYZData(generate_data) + + assert instance.dataframe.columns[0] == instance.xname + assert instance.dataframe.columns[2] == instance.zname + + +def test_well_xyzdata_ensure_attr_types(generate_data: pd.DataFrame): + """Testing private method _ensure_attr_types""" + + instance = _XYZData(generate_data) + assert "FACIES" in instance._df.columns + + del instance.dataframe["FACIES"] + + instance._ensure_consistency_attr_types() + assert "FACIES" not in instance.dataframe.columns + + instance.dataframe["NEW"] = 1 + instance._ensure_consistency_attr_types() + assert "NEW" in instance.dataframe.columns + assert "NEW" in instance.attr_types + + +def test_infer_attr_dtypes(generate_data: pd.DataFrame): + """Testing private method _infer_log_dtypes""" + + instance = _XYZData(generate_data) + + instance._attr_types = {} # for testing, make private _attr_types empty + + res = instance._infer_attr_dtypes() + assert res["X_UTME"].name == "CONT" + assert res["FACIES"].name == "DISC" + + # next, FACIES is predefined in attr_types prior to parsing; here as CONT + # which shall 'win' in this setting + instance._attr_types = {"FACIES": _AttrType.CONT} + res = instance._infer_attr_dtypes() + assert res["X_UTME"].name == "CONT" + assert res["FACIES"].name == "CONT" + + +def test_ensure_dataframe_dtypes(generate_data: pd.DataFrame): + """Testing private method _ensure_cosistency_df_dtypes""" + + instance = _XYZData(generate_data) + + assert instance.data["FACIES"].dtype == "float32" + instance.data["FACIES"] = instance.data["FACIES"].astype("int32") + assert instance.data["FACIES"].dtype == "int32" + + instance._ensure_consistency_df_dtypes() + assert instance.data["FACIES"].dtype == "float32" + + +def test_well_xyzdata_consistency_add_column(generate_data: pd.DataFrame): + """Add column to the dataframe; check if attr_types and attr_records are updated.""" + + instance = _XYZData(generate_data) + + assert instance.attr_types == { + "X_UTME": _AttrType.CONT, + "Y_UTMN": _AttrType.CONT, + "Z_TVDSS": _AttrType.CONT, + "MDEPTH": _AttrType.CONT, + "GR": _AttrType.CONT, + "FACIES": _AttrType.DISC, + "ZONES": _AttrType.DISC, + } + + instance.data["NEW"] = 1.992 + instance.ensure_consistency() + + assert instance.attr_types == { + "X_UTME": _AttrType.CONT, + "Y_UTMN": _AttrType.CONT, + "Z_TVDSS": _AttrType.CONT, + "MDEPTH": _AttrType.CONT, + "GR": _AttrType.CONT, + "FACIES": _AttrType.DISC, + "ZONES": _AttrType.DISC, + "NEW": _AttrType.CONT, + } + + instance.data["DNEW"] = [1, -999, 3, 4, 4, 1, 1] + instance.ensure_consistency() + + assert instance.attr_types == { + "X_UTME": _AttrType.CONT, + "Y_UTMN": _AttrType.CONT, + "Z_TVDSS": _AttrType.CONT, + "MDEPTH": _AttrType.CONT, + "GR": _AttrType.CONT, + "FACIES": _AttrType.DISC, + "ZONES": _AttrType.DISC, + "NEW": _AttrType.CONT, + "DNEW": _AttrType.DISC, + } + + empty = ("", "") + + assert instance.attr_records == { + "X_UTME": empty, + "Y_UTMN": empty, + "Z_TVDSS": empty, + "MDEPTH": empty, + "GR": empty, + "FACIES": {-999: "-999", 1: "1", 3: "3", 4: "4"}, + "ZONES": {-999: "-999", 1: "1", 2: "2", 3: "3", 4: "4"}, + "NEW": empty, + "DNEW": {-999: "-999", 1: "1", 3: "3", 4: "4"}, + } + + +def test_attrtype_class(): + """Test the ENUM type _LogClass""" + + assert _AttrType.DISC.value == 2 + assert _AttrType.CONT.value == 1 + + print("YYYY", list(_AttrType)) + assert "CONT" in _AttrType + assert "DISC" in _AttrType + assert "FOO" not in _AttrType + + assert _AttrType("DISC") + assert _AttrType("CONT") + + with pytest.raises(ValueError, match="is not a valid"): + _AttrType("FOO") + + +def test_well_xyzdata_dataframe_copy(generate_data: pd.DataFrame): + """Test get dataframe method, with option""" + + instance = _XYZData(generate_data) + + copy = instance.get_dataframe_copy() + col = list(copy) + + dtypes = [str(entry) for entry in copy[col].dtypes] + assert dtypes == [ + "float64", + "float64", + "float64", + "float32", + "float32", + "float32", + "float32", + ] + + copy = instance.get_dataframe_copy(infer_dtype=True) + + dtypes = [str(entry) for entry in copy[col].dtypes] + assert dtypes == [ + "float64", + "float64", + "float64", + "float32", + "float32", + "int32", + "int32", + ] diff --git a/tests/test_well/test_welldata_class.py b/tests/test_well/test_welldata_class.py deleted file mode 100644 index c1eafd04e..000000000 --- a/tests/test_well/test_welldata_class.py +++ /dev/null @@ -1,198 +0,0 @@ -import pandas as pd -import pytest - -from xtgeo.well._welldata import _LogType, _WellData - - -@pytest.fixture(name="generate_data") -def fixture_generate_data() -> pd.DataFrame: - """Make a test dataframe""" - - data = { - "X_UTME": [1.3, 2.0, 3.0, 4.0, 5.2, 6.0, 9.0], - "Y_UTMN": [11.0, 21.0, 31.0, 41.1, 51.0, 61.0, 91.0], - "Z_TVDSS": [21.0, 22.0, 23.0, 24.0, 25.3, 26.0, 29.0], - "MDEPTH": [13.0, 23.0, 33.0, 43.0, 53.2, 63.0, 93.0], - "GR": [133.0, 2234.0, -999, 1644.0, 2225.5, 6532.0, 92.0], - "FACIES": [1, -999, 3, 4, 4, 1, 1], - "ZONES": [1, 2, 3, 3, 3, 4, -999], - } - - return pd.DataFrame(data) - - -def test_welldata_initialize(generate_data: pd.DataFrame): - """Initialize data with no wlogrecords and wlogtypes given. - - The init shall than then try to infer 'best' guess""" - - instance = _WellData(generate_data) - - assert instance.data.columns[0] == instance.xname - assert instance.data.columns[2] == instance.zname - - -def test_welldata_setters(generate_data: pd.DataFrame): - """Initialize data, and try a direct setter.""" - - instance = _WellData(generate_data) - - dfr = instance.data.copy() - with pytest.raises(AttributeError, match="Don't use direct metods"): - instance.data = dfr - - -def test_welldata_ensure_wlogtypes(generate_data: pd.DataFrame): - """Testing private method _ensure_wlogtypes""" - - instance = _WellData(generate_data) - assert "FACIES" in instance.data.columns - - del instance.data["FACIES"] - - instance._ensure_consistency_wlogtypes() - assert "FACIES" not in instance.data.columns - - instance.data["NEW"] = 1 - instance._ensure_consistency_wlogtypes() - assert "NEW" in instance.data.columns - assert "NEW" in instance.wlogtypes - - -def test_infer_log_dtypes(generate_data: pd.DataFrame): - """Testing private method _infer_log_dtypes""" - - instance = _WellData(generate_data) - - instance.wlogtypes = {} # for testing, make wlogtypes empty - - res = instance._infer_log_dtypes() - assert res["X_UTME"] == "CONT" - assert res["FACIES"] == "DISC" - - # next, FACIES is predefined in wlogtypes prior to parsing; here as CONT - # which shall 'win' in this setting - instance.wlogtypes = {"FACIES": "CONT"} - res = instance._infer_log_dtypes() - assert res["X_UTME"] == "CONT" - assert res["FACIES"] == "CONT" - - -def test_ensure_dataframe_dtypes(generate_data: pd.DataFrame): - """Testing private method _ensure_cosistency_df_dtypes""" - - instance = _WellData(generate_data) - - assert instance.data["FACIES"].dtype == "float32" - instance.data["FACIES"] = instance.data["FACIES"].astype("int32") - assert instance.data["FACIES"].dtype == "int32" - - instance._ensure_consistency_df_dtypes() - assert instance.data["FACIES"].dtype == "float32" - - -def test_welldata_consistency_add_column(generate_data: pd.DataFrame): - """Add a column to the dataframe; check if wlogtypes and wlogrecords are updated.""" - - instance = _WellData(generate_data) - - assert instance.wlogtypes == { - "X_UTME": "CONT", - "Y_UTMN": "CONT", - "Z_TVDSS": "CONT", - "MDEPTH": "CONT", - "GR": "CONT", - "FACIES": "DISC", - "ZONES": "DISC", - } - - instance.data["NEW"] = 1.992 - instance.ensure_consistency() - - assert instance.wlogtypes == { - "X_UTME": "CONT", - "Y_UTMN": "CONT", - "Z_TVDSS": "CONT", - "MDEPTH": "CONT", - "GR": "CONT", - "FACIES": "DISC", - "ZONES": "DISC", - "NEW": "CONT", - } - - instance.data["DNEW"] = [1, -999, 3, 4, 4, 1, 1] - instance.ensure_consistency() - - assert instance.wlogtypes == { - "X_UTME": "CONT", - "Y_UTMN": "CONT", - "Z_TVDSS": "CONT", - "MDEPTH": "CONT", - "GR": "CONT", - "FACIES": "DISC", - "ZONES": "DISC", - "NEW": "CONT", - "DNEW": "DISC", - } - - assert instance.wlogrecords == { - "X_UTME": None, - "Y_UTMN": None, - "Z_TVDSS": None, - "MDEPTH": None, - "GR": None, - "FACIES": {-999: "-999", 1: "1", 3: "3", 4: "4"}, - "ZONES": {-999: "-999", 1: "1", 2: "2", 3: "3", 4: "4"}, - "NEW": None, - "DNEW": {-999: "-999", 1: "1", 3: "3", 4: "4"}, - } - - -def test_logtype_class(): - """Test the ENUM type _LogClass""" - - assert _LogType.DISC.value == "DISC" - assert _LogType.CONT.value == "CONT" - - assert "CONT" in _LogType - assert "DISC" in _LogType - assert "FOO" not in _LogType - - assert _LogType("DISC") - assert _LogType("CONT") - - with pytest.raises(ValueError, match="is not a valid"): - _LogType("FOO") - - -def test_welldata_dataframe_copy(generate_data: pd.DataFrame): - """Test get dataframe method, with option""" - - instance = _WellData(generate_data) - - copy = instance.get_dataframe_copy() - col = list(copy) - - dtypes = [str(entry) for entry in copy[col].dtypes] - assert dtypes == [ - "float64", - "float64", - "float64", - "float32", - "float32", - "float32", - "float32", - ] - - copy = instance.get_dataframe_copy(infer_dtype=True) - - dtypes = [str(entry) for entry in copy[col].dtypes] - assert dtypes == [ - "float64", - "float64", - "float64", - "float32", - "float32", - "int32", - "int32", - ] From 91af124165a74c5e84dac3fb046089c446a4a59d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Tue, 3 Oct 2023 08:19:01 +0200 Subject: [PATCH 13/13] WIP --- src/xtgeo/xyz_common/_xyz_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xtgeo/xyz_common/_xyz_data.py b/src/xtgeo/xyz_common/_xyz_data.py index 9f3cad1a7..293572c04 100644 --- a/src/xtgeo/xyz_common/_xyz_data.py +++ b/src/xtgeo/xyz_common/_xyz_data.py @@ -157,7 +157,7 @@ def zname(self): return self._zname def _infer_attr_dtypes(self): - """Return as dict on form {"X_UTME": "CONT", .... "FACIES": "DISC"}. + """Return as dict on form {"X_UTME": _AttrType.CONT, "FACIES": _AttrType.DISC}. There are some important restrictions: * The first 3 columns X Y Z) are always CONT, even if input appears as DISC. @@ -173,8 +173,8 @@ def _infer_attr_dtypes(self): datatypes = {} for name, dtype in dlist.items(): if name in self._attr_types: + # do not change already set attr_types datatypes[name] = self._attr_types[name] - continue if name in (self._xname, self._yname, self._zname):