From 81662ffc7eef0a2463a09831f8b1f45992c6d53b Mon Sep 17 00:00:00 2001 From: mferrera Date: Tue, 28 May 2024 15:03:38 +0200 Subject: [PATCH] MAINT: Refactor ObjectDataProvider base class This PR tries to add some firmer standardization to how metadata is defined within objects we generate metadata for. Presently there are string literals defined and grouped into an unvalidated object returned from the child providers. This changes makes progress toward having stronger guarantees that are also type checked. This PR cleans up the base class and moves toward having these defined as properties of providers classes, which they are. To keep the PR from being too large it makes this change to the classname only. This work precedes initialization internal metadata into a Pydantic object directly. --- src/fmu/dataio/aggregation.py | 8 +- src/fmu/dataio/providers/objectdata/_base.py | 288 ++++++------------ .../dataio/providers/objectdata/_faultroom.py | 8 +- .../dataio/providers/objectdata/_provider.py | 41 +-- .../dataio/providers/objectdata/_tables.py | 15 +- src/fmu/dataio/providers/objectdata/_xtgeo.py | 43 +-- src/fmu/dataio/types.py | 11 - .../test_units/test_filedataprovider_class.py | 58 ---- .../test_objectdataprovider_class.py | 40 +-- 9 files changed, 156 insertions(+), 356 deletions(-) diff --git a/src/fmu/dataio/aggregation.py b/src/fmu/dataio/aggregation.py index 617c0f639..73cc7999e 100644 --- a/src/fmu/dataio/aggregation.py +++ b/src/fmu/dataio/aggregation.py @@ -243,7 +243,9 @@ def _generate_aggrd_metadata( "model": template["fmu"]["model"], } etemp = dataio.ExportData(config=config, name=self.name) - objdata = objectdata_provider_factory(obj=obj, dataio=etemp).get_objectdata() + + objectdata_provider = objectdata_provider_factory(obj=obj, dataio=etemp) + objdata = objectdata_provider.get_objectdata() template["tracklog"] = [generate_meta_tracklog()[0].model_dump(mode="json")] template["file"] = { @@ -260,8 +262,8 @@ def _generate_aggrd_metadata( template["data"]["name"] = self.name if self.tagname: template["data"]["tagname"] = self.tagname - if objdata.bbox: - template["data"]["bbox"] = objdata.bbox + if bbox := objectdata_provider.get_bbox(): + template["data"]["bbox"] = bbox.model_dump(mode="json", exclude_none=True) self._metadata = template diff --git a/src/fmu/dataio/providers/objectdata/_base.py b/src/fmu/dataio/providers/objectdata/_base.py index 7079257ce..42b3f0533 100644 --- a/src/fmu/dataio/providers/objectdata/_base.py +++ b/src/fmu/dataio/providers/objectdata/_base.py @@ -4,8 +4,7 @@ from copy import deepcopy from dataclasses import dataclass, field from datetime import datetime -from pathlib import Path -from typing import TYPE_CHECKING, Any, Final, TypeVar +from typing import TYPE_CHECKING, Final, TypeVar from warnings import warn from fmu.dataio._definitions import ConfigurationError @@ -23,8 +22,9 @@ if TYPE_CHECKING: from fmu.dataio.dataio import ExportData from fmu.dataio.datastructure.meta.content import BoundingBox2D, BoundingBox3D + from fmu.dataio.datastructure.meta.enums import FMUClassEnum from fmu.dataio.datastructure.meta.specification import AnySpecification - from fmu.dataio.types import Classname, Efolder, Inferrable, Layout, Subtype + from fmu.dataio.types import Efolder, Inferrable, Layout, Subtype logger: Final = null_logger(__name__) @@ -34,13 +34,10 @@ @dataclass class DerivedObjectDescriptor: subtype: Subtype - classname: Classname layout: Layout efolder: Efolder | str fmt: str extension: str - spec: dict[str, Any] | None - bbox: dict[str, Any] | None table_index: list[str] | None @@ -57,105 +54,6 @@ class DerivedNamedStratigraphy: top: str | None = field(default=None) -def derive_name( - export: ExportData, - obj: Inferrable, -) -> str: - """ - Derives and returns a name for an export operation based on the - provided ExportData instance and a 'sniffable' object. - """ - if name := export.name: - return name - - if isinstance(name := getattr(obj, "name", ""), str): - return name - - return "" - - -def get_timedata_from_existing(meta_timedata: dict) -> tuple[datetime, datetime | None]: - """Converts the time data in existing metadata from a string to a datetime. - - The time section under datablock has variants to parse. - - Formats:: - "time": { - "t0": { - "value": "2022-08-02T00:00:00", - "label": "base" - } - } - - # with or without t1 - # or legacy format: - - "time": [ - { - "value": "2030-01-01T00:00:00", - "label": "moni" - }, - { - "value": "2010-02-03T00:00:00", - "label": "base" - } - ], - """ - date1 = None - if isinstance(meta_timedata, list): - date0 = meta_timedata[0]["value"] - if len(meta_timedata) == 2: - date1 = meta_timedata[1]["value"] - elif isinstance(meta_timedata, dict): - date0 = meta_timedata["t0"].get("value") - if "t1" in meta_timedata: - date1 = meta_timedata["t1"].get("value") - - return ( - datetime.strptime(date0, "%Y-%m-%dT%H:%M:%S"), - datetime.strptime(date1, "%Y-%m-%dT%H:%M:%S") if date1 else None, - ) - - -def get_fmu_time_object(timedata_item: list[str]) -> FMUTimeObject: - """ - Returns a FMUTimeObject from a timedata item on list - format: ["20200101", "monitor"] where the first item is a date and - the last item is an optional label - """ - value, *label = timedata_item - return FMUTimeObject( - value=datetime.strptime(str(value), "%Y%m%d"), - label=label[0] if label else None, - ) - - -def get_validated_content(content: str | dict | None) -> AllowedContent: - """Check content and return a validated model.""" - logger.info("Evaluate content") - logger.debug("content is %s of type %s", str(content), type(content)) - - if not content: - return AllowedContent(content="unset") - - if isinstance(content, str): - return AllowedContent(content=ContentEnum(content)) - - if len(content) > 1: - raise ValueError( - "Found more than one content item in the 'content' dictionary. Ensure " - "input is formatted as content={'mycontent': {extra_key: extra_value}}." - ) - content = deepcopy(content) - usecontent, content_specific = next(iter(content.items())) - logger.debug("usecontent is %s", usecontent) - logger.debug("content_specific is %s", content_specific) - - return AllowedContent.model_validate( - {"content": ContentEnum(usecontent), "content_incl_specific": content} - ) - - @dataclass class ObjectDataProvider(Provider): """Base class for providing metadata for data objects in fmu-dataio, e.g. a surface. @@ -175,9 +73,8 @@ class ObjectDataProvider(Provider): # result properties; the most important is metadata which IS the 'data' part in # the resulting metadata. But other variables needed later are also given # as instance properties in addition (for simplicity in other classes/functions) - metadata: dict = field(default_factory=dict) + _metadata: dict = field(default_factory=dict) name: str = field(default="") - classname: str = field(default="") efolder: str = field(default="") extension: str = field(default="") fmt: str = field(default="") @@ -185,73 +82,97 @@ class ObjectDataProvider(Provider): time1: datetime | None = field(default=None) def __post_init__(self) -> None: - """Main function here, will populate the metadata block for 'data'.""" - - # Don't re-initialize data if it's coming from pre-existing metadata. - if self.metadata: - return + content_model = self._get_validated_content(self.dataio.content) + named_stratigraphy = self._get_named_stratigraphy() + obj_data = self.get_objectdata() - namedstratigraphy = self._derive_named_stratigraphy() - objres = self.get_objectdata() - content_model = get_validated_content(self.dataio.content) + self.name = named_stratigraphy.name + self.extension = obj_data.extension + self.fmt = obj_data.fmt + self.efolder = obj_data.efolder if self.dataio.forcefolder: if self.dataio.forcefolder.startswith("/"): raise ValueError("Can't use absolute path as 'forcefolder'") msg = ( - f"The standard folder name is overrided from {objres.efolder} to " + f"The standard folder name is overrided from {obj_data.efolder} to " f"{self.dataio.forcefolder}" ) - objres.efolder = self.dataio.forcefolder + self.efolder = self.dataio.forcefolder logger.info(msg) warn(msg, UserWarning) - self.metadata["name"] = namedstratigraphy.name - self.metadata["stratigraphic"] = namedstratigraphy.stratigraphic - self.metadata["offset"] = namedstratigraphy.offset - self.metadata["alias"] = namedstratigraphy.alias - self.metadata["top"] = namedstratigraphy.top - self.metadata["base"] = namedstratigraphy.base + self._metadata["name"] = self.name + self._metadata["stratigraphic"] = named_stratigraphy.stratigraphic + self._metadata["offset"] = named_stratigraphy.offset + self._metadata["alias"] = named_stratigraphy.alias + self._metadata["top"] = named_stratigraphy.top + self._metadata["base"] = named_stratigraphy.base - self.metadata["content"] = (usecontent := content_model.content) + self._metadata["content"] = (usecontent := content_model.content) if content_model.content_incl_specific: - self.metadata[usecontent] = getattr( + self._metadata[usecontent] = getattr( content_model.content_incl_specific, usecontent, None ) - self.metadata["tagname"] = self.dataio.tagname - self.metadata["format"] = objres.fmt - self.metadata["layout"] = objres.layout - self.metadata["unit"] = self.dataio.unit or "" - self.metadata["vertical_domain"] = list(self.dataio.vertical_domain.keys())[0] - self.metadata["depth_reference"] = list(self.dataio.vertical_domain.values())[0] - self.metadata["spec"] = objres.spec - self.metadata["bbox"] = objres.bbox - self.metadata["table_index"] = objres.table_index - self.metadata["undef_is_zero"] = self.dataio.undef_is_zero + self._metadata["tagname"] = self.dataio.tagname + self._metadata["format"] = self.fmt + self._metadata["layout"] = obj_data.layout + self._metadata["unit"] = self.dataio.unit or "" + self._metadata["vertical_domain"] = list(self.dataio.vertical_domain.keys())[0] + self._metadata["depth_reference"] = list(self.dataio.vertical_domain.values())[0] + + self._metadata["spec"] = ( + spec.model_dump(mode="json", exclude_none=True) + if (spec := self.get_spec()) + else None + ) + self._metadata["bbox"] = ( + bbox.model_dump(mode="json", exclude_none=True) + if (bbox := self.get_bbox()) + else None + ) + self._metadata["time"] = ( + timedata.model_dump(mode="json", exclude_none=True) + if (timedata := self._get_timedata()) + else None + ) + + self._metadata["table_index"] = obj_data.table_index + self._metadata["undef_is_zero"] = self.dataio.undef_is_zero # timedata: - self.metadata["time"] = self._derive_timedata() - self.metadata["is_prediction"] = self.dataio.is_prediction - self.metadata["is_observation"] = self.dataio.is_observation - self.metadata["description"] = generate_description(self.dataio.description) + self._metadata["is_prediction"] = self.dataio.is_prediction + self._metadata["is_observation"] = self.dataio.is_observation + self._metadata["description"] = generate_description(self.dataio.description) + logger.info("Derive all metadata for data object... DONE") - # the next is to give addition state variables identical values, and for - # consistency these are derived after all eventual validation and directly from - # the self.metadata fields: + def _get_validated_content(self, content: str | dict | None) -> AllowedContent: + """Check content and return a validated model.""" + logger.info("Evaluate content") + logger.debug("content is %s of type %s", str(content), type(content)) - self.name = self.metadata["name"] + if not content: + return AllowedContent(content="unset") - # then there are a few settings that are not in the ``data`` metadata, but - # needed as data/variables in other classes: + if isinstance(content, str): + return AllowedContent(content=ContentEnum(content)) - self.efolder = objres.efolder - self.classname = objres.classname - self.extension = objres.extension - self.fmt = objres.fmt - logger.info("Derive all metadata for data object... DONE") + if len(content) > 1: + raise ValueError( + "Found more than one content item in the 'content' dictionary. Ensure " + "input is formatted as content={'mycontent': {extra_key: extra_value}}." + ) + content = deepcopy(content) + usecontent, content_specific = next(iter(content.items())) + logger.debug("usecontent is %s", usecontent) + logger.debug("content_specific is %s", content_specific) + + return AllowedContent.model_validate( + {"content": ContentEnum(usecontent), "content_incl_specific": content} + ) - def _derive_named_stratigraphy(self) -> DerivedNamedStratigraphy: + def _get_named_stratigraphy(self) -> DerivedNamedStratigraphy: """Derive the name and stratigraphy for the object; may have several sources. If not in input settings it is tried to be inferred from the xtgeo/pandas/... @@ -260,7 +181,11 @@ def _derive_named_stratigraphy(self) -> DerivedNamedStratigraphy: `stratigraphy`. For example, if "TopValysar" is the model name and the actual name is "Valysar Top Fm." that latter name will be used. """ - name = derive_name(self.dataio, self.obj) + name = "" + if self.dataio.name: + name = self.dataio.name + elif isinstance(obj_name := getattr(self.obj, "name", ""), str): + name = obj_name # next check if usename has a "truename" and/or aliases from the config stratigraphy = self.dataio.config.get("stratigraphy", {}) @@ -283,7 +208,19 @@ def _derive_named_stratigraphy(self) -> DerivedNamedStratigraphy: return rv - def _derive_timedata(self) -> dict[str, str] | None: + def _get_fmu_time_object(self, timedata_item: list[str]) -> FMUTimeObject: + """ + Returns a FMUTimeObject from a timedata item on list + format: ["20200101", "monitor"] where the first item is a date and + the last item is an optional label + """ + value, *label = timedata_item + return FMUTimeObject( + value=datetime.strptime(str(value), "%Y%m%d"), + label=label[0] if label else None, + ) + + def _get_timedata(self) -> Time | None: """Format input timedata to metadata New format: @@ -294,7 +231,6 @@ def _derive_timedata(self) -> dict[str, str] | None: will be some--time1_time0 where time1 is the newest (unless a class variable is set for those who wants it turned around). """ - if not self.dataio.timedata: return None @@ -303,8 +239,8 @@ def _derive_timedata(self) -> dict[str, str] | None: start_input, *stop_input = self.dataio.timedata - start = get_fmu_time_object(start_input) - stop = get_fmu_time_object(stop_input[0]) if stop_input else None + start = self._get_fmu_time_object(start_input) + stop = self._get_fmu_time_object(stop_input[0]) if stop_input else None if stop: assert start and start.value is not None # for mypy @@ -314,7 +250,12 @@ def _derive_timedata(self) -> dict[str, str] | None: self.time0, self.time1 = start.value, stop.value if stop else None - return Time(t0=start, t1=stop).model_dump(mode="json", exclude_none=True) + return Time(t0=start, t1=stop) + + @property + @abstractmethod + def classname(self) -> FMUClassEnum: + raise NotImplementedError @abstractmethod def get_spec(self) -> AnySpecification | None: @@ -330,9 +271,9 @@ def get_objectdata(self) -> DerivedObjectDescriptor: def get_metadata(self) -> AnyContent | UnsetAnyContent: return ( - UnsetAnyContent.model_validate(self.metadata) - if self.metadata["content"] == "unset" - else AnyContent.model_validate(self.metadata) + UnsetAnyContent.model_validate(self._metadata) + if self._metadata["content"] == "unset" + else AnyContent.model_validate(self._metadata) ) @staticmethod @@ -345,30 +286,3 @@ def _validate_get_ext(fmt: str, subtype: str, validator: dict[str, V]) -> V: f"The file format {fmt} is not supported. ", f"Valid {subtype} formats are: {list(validator.keys())}", ) - - @classmethod - def from_metadata_dict( - cls, obj: Inferrable, dataio: ExportData, meta_existing: dict - ) -> ObjectDataProvider: - """Instantiate from existing metadata.""" - - relpath = Path(meta_existing["file"]["relative_path"]) - - time0, time1 = None, None - if "time" in meta_existing["data"]: - time0, time1 = get_timedata_from_existing(meta_existing["data"]["time"]) - - return cls( - obj=obj, - dataio=dataio, - metadata=meta_existing["data"], - name=meta_existing["data"]["name"], - classname=meta_existing["class"], - efolder=( - relpath.parent.parent.name if dataio.subfolder else relpath.parent.name - ), - extension=relpath.suffix, - fmt=meta_existing["data"]["format"], - time0=time0, - time1=time1, - ) diff --git a/src/fmu/dataio/providers/objectdata/_faultroom.py b/src/fmu/dataio/providers/objectdata/_faultroom.py index 2a6f772d3..3f928126d 100644 --- a/src/fmu/dataio/providers/objectdata/_faultroom.py +++ b/src/fmu/dataio/providers/objectdata/_faultroom.py @@ -6,6 +6,7 @@ from fmu.dataio._definitions import ValidFormats from fmu.dataio._logging import null_logger from fmu.dataio.datastructure.meta.content import BoundingBox3D +from fmu.dataio.datastructure.meta.enums import FMUClassEnum from fmu.dataio.datastructure.meta.specification import FaultRoomSurfaceSpecification from fmu.dataio.readers import FaultRoomSurface @@ -21,6 +22,10 @@ class FaultRoomSurfaceProvider(ObjectDataProvider): obj: FaultRoomSurface + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.surface + def get_bbox(self) -> BoundingBox3D: """Derive data.bbox for FaultRoomSurface.""" logger.info("Get bbox for FaultRoomSurface") @@ -49,12 +54,9 @@ def get_objectdata(self) -> DerivedObjectDescriptor: """Derive object data for FaultRoomSurface""" return DerivedObjectDescriptor( subtype="JSON", - classname="surface", layout="faultroom_triangulated", efolder="maps", fmt=(fmt := self.dataio.dict_fformat), - spec=self.get_spec().model_dump(mode="json", exclude_none=True), - bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), extension=self._validate_get_ext(fmt, "JSON", ValidFormats().dictionary), table_index=None, ) diff --git a/src/fmu/dataio/providers/objectdata/_provider.py b/src/fmu/dataio/providers/objectdata/_provider.py index 8d6286e34..049281025 100644 --- a/src/fmu/dataio/providers/objectdata/_provider.py +++ b/src/fmu/dataio/providers/objectdata/_provider.py @@ -94,6 +94,7 @@ from fmu.dataio._definitions import ValidFormats from fmu.dataio._logging import null_logger +from fmu.dataio.datastructure.meta.enums import FMUClassEnum from fmu.dataio.readers import FaultRoomSurface from ._base import ( @@ -132,8 +133,6 @@ def objectdata_provider_factory( NotImplementedError: when receiving an object we don't know how to generated metadata for. """ - if meta_existing: - return ExistingDataProvider.from_metadata_dict(obj, dataio, meta_existing) if isinstance(obj, xtgeo.RegularSurface): return RegularSurfaceDataProvider(obj=obj, dataio=dataio) if isinstance(obj, xtgeo.Polygons): @@ -161,41 +160,14 @@ def objectdata_provider_factory( raise NotImplementedError(f"This data type is not currently supported: {type(obj)}") -@dataclass -class ExistingDataProvider(ObjectDataProvider): - """These getters should never be called because metadata was derived a priori.""" - - obj: Inferrable - - def get_spec(self) -> None: - """Derive data.spec from existing metadata.""" - - def get_bbox(self) -> None: - """Derive data.bbox from existing metadata.""" - - def get_objectdata(self) -> DerivedObjectDescriptor: - """Derive object data for existing metadata.""" - return DerivedObjectDescriptor( - subtype=self.metadata["subtype"], - classname=self.metadata["class"], - layout=self.metadata["layout"], - efolder=self.efolder, - fmt=self.fmt, - extension=self.extension, - spec=self.metadata["spec"], - bbox=self.metadata["bbox"], - table_index=None, - ) - - def derive_metadata(self) -> None: - """Metadata has already been derived for this provider, and is already set from - instantiation, so override this method and do nothing.""" - - @dataclass class DictionaryDataProvider(ObjectDataProvider): obj: dict + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.dictionary + def get_spec(self) -> None: """Derive data.spec for dict.""" @@ -206,12 +178,9 @@ def get_objectdata(self) -> DerivedObjectDescriptor: """Derive object data for dict.""" return DerivedObjectDescriptor( subtype="JSON", - classname="dictionary", layout="dictionary", efolder="dictionaries", fmt=(fmt := self.dataio.dict_fformat), extension=self._validate_get_ext(fmt, "JSON", ValidFormats().dictionary), - spec=None, - bbox=None, table_index=None, ) diff --git a/src/fmu/dataio/providers/objectdata/_tables.py b/src/fmu/dataio/providers/objectdata/_tables.py index da68ce1e0..70205d93e 100644 --- a/src/fmu/dataio/providers/objectdata/_tables.py +++ b/src/fmu/dataio/providers/objectdata/_tables.py @@ -7,6 +7,7 @@ from fmu.dataio._definitions import STANDARD_TABLE_INDEX_COLUMNS, ValidFormats from fmu.dataio._logging import null_logger +from fmu.dataio.datastructure.meta.enums import FMUClassEnum from fmu.dataio.datastructure.meta.specification import TableSpecification from ._base import ( @@ -59,6 +60,10 @@ def _derive_index(table_index: list[str] | None, columns: list[str]) -> list[str class DataFrameDataProvider(ObjectDataProvider): obj: pd.DataFrame + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.table + def get_spec(self) -> TableSpecification: """Derive data.spec for pd.DataFrame.""" logger.info("Get spec for pd.DataFrame (tables)") @@ -75,13 +80,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: table_index = _derive_index(self.dataio.table_index, list(self.obj.columns)) return DerivedObjectDescriptor( subtype="DataFrame", - classname="table", layout="table", efolder="tables", fmt=(fmt := self.dataio.table_fformat), extension=self._validate_get_ext(fmt, "DataFrame", ValidFormats().table), - spec=self.get_spec().model_dump(mode="json", exclude_none=True), - bbox=None, table_index=table_index, ) @@ -90,6 +92,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class ArrowTableDataProvider(ObjectDataProvider): obj: pyarrow.Table + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.table + def get_spec(self) -> TableSpecification: """Derive data.spec for pyarrow.Table.""" logger.info("Get spec for pyarrow (tables)") @@ -106,12 +112,9 @@ def get_objectdata(self) -> DerivedObjectDescriptor: table_index = _derive_index(self.dataio.table_index, self.obj.column_names) return DerivedObjectDescriptor( subtype="ArrowTable", - classname="table", layout="table", efolder="tables", fmt=(fmt := self.dataio.arrow_fformat), extension=self._validate_get_ext(fmt, "ArrowTable", ValidFormats().table), - spec=self.get_spec().model_dump(mode="json", exclude_none=True), - bbox=None, table_index=table_index, ) diff --git a/src/fmu/dataio/providers/objectdata/_xtgeo.py b/src/fmu/dataio/providers/objectdata/_xtgeo.py index 87446c3b9..20cfdecb5 100644 --- a/src/fmu/dataio/providers/objectdata/_xtgeo.py +++ b/src/fmu/dataio/providers/objectdata/_xtgeo.py @@ -11,6 +11,7 @@ from fmu.dataio._logging import null_logger from fmu.dataio._utils import npfloat_to_float from fmu.dataio.datastructure.meta.content import BoundingBox2D, BoundingBox3D +from fmu.dataio.datastructure.meta.enums import FMUClassEnum from fmu.dataio.datastructure.meta.specification import ( CPGridPropertySpecification, CPGridSpecification, @@ -35,6 +36,10 @@ class RegularSurfaceDataProvider(ObjectDataProvider): obj: xtgeo.RegularSurface + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.surface + def get_spec(self) -> SurfaceSpecification: """Derive data.spec for xtgeo.RegularSurface.""" logger.info("Get spec for RegularSurface") @@ -81,12 +86,9 @@ def get_objectdata(self) -> DerivedObjectDescriptor: """Derive object data for xtgeo.RegularSurface.""" return DerivedObjectDescriptor( subtype="RegularSurface", - classname="surface", layout="regular", efolder="maps", fmt=(fmt := self.dataio.surface_fformat), - spec=self.get_spec().model_dump(mode="json", exclude_none=True), - bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), extension=self._validate_get_ext( fmt, "RegularSurface", ValidFormats().surface ), @@ -98,6 +100,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class PolygonsDataProvider(ObjectDataProvider): obj: xtgeo.Polygons + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.polygons + def get_spec(self) -> PolygonsSpecification: """Derive data.spec for xtgeo.Polygons.""" logger.info("Get spec for Polygons") @@ -126,13 +132,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: """Derive object data for xtgeo.Polygons.""" return DerivedObjectDescriptor( subtype="Polygons", - classname="polygons", layout="unset", efolder="polygons", fmt=(fmt := self.dataio.polygons_fformat), extension=self._validate_get_ext(fmt, "Polygons", ValidFormats().polygons), - spec=self.get_spec().model_dump(mode="json", exclude_none=True), - bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), table_index=None, ) @@ -146,6 +149,10 @@ def obj_dataframe(self) -> pd.DataFrame: """Returns a dataframe of the referenced xtgeo.Points object.""" return self.obj.get_dataframe(copy=False) + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.points + def get_spec(self) -> PointSpecification: """Derive data.spec for xtgeo.Points.""" logger.info("Get spec for Points") @@ -174,13 +181,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: """Derive object data for xtgeo.Points.""" return DerivedObjectDescriptor( subtype="Points", - classname="points", layout="unset", efolder="points", fmt=(fmt := self.dataio.points_fformat), extension=self._validate_get_ext(fmt, "Points", ValidFormats().points), - spec=self.get_spec().model_dump(mode="json", exclude_none=True), - bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), table_index=None, ) @@ -189,6 +193,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class CubeDataProvider(ObjectDataProvider): obj: xtgeo.Cube + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.cube + def get_spec(self) -> CubeSpecification: """Derive data.spec for xtgeo.Cube.""" logger.info("Get spec for Cube") @@ -244,13 +252,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: """Derive object data for xtgeo.Cube.""" return DerivedObjectDescriptor( subtype="RegularCube", - classname="cube", layout="regular", efolder="cubes", fmt=(fmt := self.dataio.cube_fformat), extension=self._validate_get_ext(fmt, "RegularCube", ValidFormats().cube), - spec=self.get_spec().model_dump(mode="json", exclude_none=True), - bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), table_index=None, ) @@ -259,6 +264,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class CPGridDataProvider(ObjectDataProvider): obj: xtgeo.Grid + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.cpgrid + def get_spec(self) -> CPGridSpecification: """Derive data.spec for xtgeo.Grid.""" logger.info("Get spec for Grid geometry") @@ -298,13 +307,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: """Derive object data for xtgeo.Grid.""" return DerivedObjectDescriptor( subtype="CPGrid", - classname="cpgrid", layout="cornerpoint", efolder="grids", fmt=(fmt := self.dataio.grid_fformat), extension=self._validate_get_ext(fmt, "CPGrid", ValidFormats().grid), - spec=self.get_spec().model_dump(mode="json", exclude_none=True), - bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), table_index=None, ) @@ -313,6 +319,10 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class CPGridPropertyDataProvider(ObjectDataProvider): obj: xtgeo.GridProperty + @property + def classname(self) -> FMUClassEnum: + return FMUClassEnum.cpgrid_property + def get_spec(self) -> CPGridPropertySpecification: """Derive data.spec for xtgeo.GridProperty.""" logger.info("Get spec for GridProperty") @@ -330,14 +340,11 @@ def get_objectdata(self) -> DerivedObjectDescriptor: """Derive object data for xtgeo.GridProperty.""" return DerivedObjectDescriptor( subtype="CPGridProperty", - classname="cpgrid_property", layout="cornerpoint", efolder="grids", fmt=(fmt := self.dataio.grid_fformat), extension=self._validate_get_ext( fmt, "CPGridProperty", ValidFormats().grid ), - spec=self.get_spec().model_dump(mode="json", exclude_none=True), - bbox=None, table_index=None, ) diff --git a/src/fmu/dataio/types.py b/src/fmu/dataio/types.py index 9b33a8be4..8eb337b7b 100644 --- a/src/fmu/dataio/types.py +++ b/src/fmu/dataio/types.py @@ -78,17 +78,6 @@ class PolygonsProxy(Polygons): ... "ArrowTable", ] -Classname: TypeAlias = Literal[ - "surface", - "polygons", - "points", - "cube", - "cpgrid", - "cpgrid_property", - "table", - "dictionary", -] - Layout: TypeAlias = Literal[ "regular", "unset", diff --git a/tests/test_units/test_filedataprovider_class.py b/tests/test_units/test_filedataprovider_class.py index 8809419b3..eff443012 100644 --- a/tests/test_units/test_filedataprovider_class.py +++ b/tests/test_units/test_filedataprovider_class.py @@ -9,10 +9,7 @@ from fmu.dataio import ExportData from fmu.dataio.datastructure.meta import meta from fmu.dataio.providers._filedata import FileDataProvider -from fmu.dataio.providers.objectdata._base import derive_name from fmu.dataio.providers.objectdata._provider import objectdata_provider_factory -from xtgeo.cube import Cube -from xtgeo.surface import RegularSurface @pytest.mark.parametrize( @@ -248,58 +245,3 @@ def test_filedata_has_nonascii_letters(regsurf, tmp_path): fdata = FileDataProvider(edataobj1, objdata) with pytest.raises(ValueError, match="Path has non-ascii elements"): fdata.get_metadata() - - -@pytest.mark.parametrize( - "exportdata, obj, expected_name", - ( - ( - ExportData(), - Cube( - ncol=1, - nrow=1, - nlay=1, - xinc=25.0, - yinc=25.0, - zinc=2.0, - ), - "", - ), - ( - ExportData(name="NamedExportData"), - Cube( - ncol=1, - nrow=1, - nlay=1, - xinc=25.0, - yinc=25.0, - zinc=2.0, - ), - "NamedExportData", - ), - ( - ExportData(), - RegularSurface( - name="NamedRegularSurface", - ncol=25, - nrow=25, - xinc=1, - yinc=1, - ), - "NamedRegularSurface", - ), - ( - ExportData(name="NamedExportData"), - RegularSurface( - name="NamedRegularSurface", - ncol=25, - nrow=25, - xinc=1, - yinc=1, - ), - "NamedExportData", - ), - ), -) -def test_derive_name(exportdata, obj, expected_name) -> None: - assert derive_name(exportdata, obj) == expected_name diff --git a/tests/test_units/test_objectdataprovider_class.py b/tests/test_units/test_objectdataprovider_class.py index be1788e26..7c06ad227 100644 --- a/tests/test_units/test_objectdataprovider_class.py +++ b/tests/test_units/test_objectdataprovider_class.py @@ -1,14 +1,10 @@ """Test the _ObjectData class from the _objectdata.py module""" import os -from datetime import datetime import pytest from fmu.dataio import dataio from fmu.dataio._definitions import ConfigurationError, ValidFormats -from fmu.dataio.providers.objectdata._base import ( - get_timedata_from_existing, -) from fmu.dataio.providers.objectdata._provider import ( objectdata_provider_factory, ) @@ -17,30 +13,6 @@ from ..conftest import remove_ert_env, set_ert_env_prehook from ..utils import inside_rms - -@pytest.mark.parametrize( - "given, expected", - ( - ( - {"t0": {"value": "2022-08-02T00:00:00", "label": "base"}}, - (datetime.strptime("2022-08-02T00:00:00", "%Y-%m-%dT%H:%M:%S"), None), - ), - ( - [ - {"value": "2030-01-01T00:00:00", "label": "moni"}, - {"value": "2010-02-03T00:00:00", "label": "base"}, - ], - ( - datetime.strptime("2030-01-01T00:00:00", "%Y-%m-%dT%H:%M:%S"), - datetime.strptime("2010-02-03T00:00:00", "%Y-%m-%dT%H:%M:%S"), - ), - ), - ), -) -def test_get_timedata_from_existing(given: dict, expected: tuple): - assert get_timedata_from_existing(given) == expected - - # -------------------------------------------------------------------------------------- # RegularSurface # -------------------------------------------------------------------------------------- @@ -51,19 +23,19 @@ def test_objectdata_regularsurface_derive_named_stratigraphy(regsurf, edataobj1) # mimic the stripped parts of configuations for testing here objdata = objectdata_provider_factory(regsurf, edataobj1) - res = objdata._derive_named_stratigraphy() + res = objdata._get_named_stratigraphy() assert res.name == "Whatever Top" assert "TopWhatever" in res.alias assert res.stratigraphic is True -def test_objectdata_regularsurface_derive_named_stratigraphy_differ(regsurf, edataobj2): +def test_objectdata_regularsurface_get_named_stratigraphy_differ(regsurf, edataobj2): """Get name and some stratigaphic keys for a valid RegularSurface object .""" # mimic the stripped parts of configuations for testing here objdata = objectdata_provider_factory(regsurf, edataobj2) - res = objdata._derive_named_stratigraphy() + res = objdata._get_named_stratigraphy() assert res.name == "VOLANTIS GP. Top" assert "TopVolantis" in res.alias @@ -106,10 +78,10 @@ def test_objectdata_regularsurface_derive_objectdata(regsurf, edataobj1): objdata = objectdata_provider_factory(regsurf, edataobj1) assert isinstance(objdata, RegularSurfaceDataProvider) + assert objdata.classname.value == "surface" res = objdata.get_objectdata() assert res.subtype == "RegularSurface" - assert res.classname == "surface" assert res.extension == ".gri" @@ -117,8 +89,8 @@ def test_objectdata_regularsurface_derive_metadata(regsurf, edataobj1): """Derive all metadata for the 'data' block in fmu-dataio.""" myobj = objectdata_provider_factory(regsurf, edataobj1) - assert myobj.metadata["content"] == "depth" - assert myobj.metadata["alias"] + assert myobj._metadata["content"] == "depth" + assert myobj._metadata["alias"] def test_objectdata_provider_factory_raises_on_unknown(edataobj1):