From 36c6a0fcd202182974bcee52ffd8b7a1d974e5d8 Mon Sep 17 00:00:00 2001 From: mferrera Date: Tue, 28 May 2024 08:01:37 +0200 Subject: [PATCH] ENH: Implement ObjectDataProvider as a Provider It now sets its state on initialization and returns a Pydantic model via the prescribed method. Does not do much of the possible clean-ups within the base class. --- src/fmu/dataio/_metadata.py | 23 +- src/fmu/dataio/providers/objectdata/_base.py | 208 ++++++++++-------- tests/test_units/test_metadata_class.py | 8 +- .../test_objectdataprovider_class.py | 6 +- 4 files changed, 120 insertions(+), 125 deletions(-) diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index 499be7350..b805b15c9 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -62,15 +62,6 @@ def generate_meta_tracklog() -> list[meta.TracklogEvent]: ] -def _get_objectdata_provider( - obj: types.Inferrable, dataio: ExportData, meta_existing: dict | None = None -) -> ObjectDataProvider: - """Derive metadata for the object. Reuse metadata if existing""" - objdata = objectdata_provider_factory(obj, dataio, meta_existing) - objdata.derive_metadata() - return objdata - - def _get_meta_filedata( dataio: ExportData, obj: types.Inferrable, @@ -88,16 +79,6 @@ def _get_meta_filedata( ).get_metadata() -def _get_meta_objectdata( - objdata: ObjectDataProvider, -) -> meta.content.AnyContent | internal.UnsetAnyContent: - return ( - internal.UnsetAnyContent.model_validate(objdata.metadata) - if objdata.metadata["content"] == "unset" - else meta.content.AnyContent.model_validate(objdata.metadata) - ) - - def _get_meta_fmu(fmudata: FmuProvider) -> internal.FMUClassMetaData | None: try: return fmudata.get_metadata() @@ -173,7 +154,7 @@ def generate_export_metadata( logger.info("Partially reuse existing metadata from %s", obj) meta_existing = read_metadata_from_file(obj) - objdata = _get_objectdata_provider(obj, dataio, meta_existing) + objdata = objectdata_provider_factory(obj, dataio, meta_existing) masterdata = dataio.config.get("masterdata") metadata = internal.DataClassMeta( @@ -184,7 +165,7 @@ def generate_export_metadata( fmu=_get_meta_fmu(fmudata) if fmudata else None, masterdata=_get_meta_masterdata(masterdata) if masterdata else None, access=_get_meta_access(dataio), - data=_get_meta_objectdata(objdata), + data=objdata.get_metadata(), file=_get_meta_filedata(dataio, obj, objdata, fmudata, compute_md5), tracklog=generate_meta_tracklog(), display=_get_meta_display(dataio, objdata), diff --git a/src/fmu/dataio/providers/objectdata/_base.py b/src/fmu/dataio/providers/objectdata/_base.py index 48fb9b831..326142d05 100644 --- a/src/fmu/dataio/providers/objectdata/_base.py +++ b/src/fmu/dataio/providers/objectdata/_base.py @@ -1,6 +1,6 @@ from __future__ import annotations -from abc import ABC, abstractmethod +from abc import abstractmethod from copy import deepcopy from dataclasses import dataclass, field from datetime import datetime @@ -11,8 +11,14 @@ from fmu.dataio._definitions import ConfigurationError from fmu.dataio._logging import null_logger from fmu.dataio._utils import generate_description -from fmu.dataio.datastructure._internal.internal import AllowedContent -from fmu.dataio.datastructure.meta import content, enums +from fmu.dataio.datastructure._internal.internal import AllowedContent, UnsetAnyContent +from fmu.dataio.datastructure.meta.content import ( + AnyContent, + FMUTimeObject, + Time, +) +from fmu.dataio.datastructure.meta.enums import ContentEnum +from fmu.dataio.providers._base import Provider if TYPE_CHECKING: from fmu.dataio.dataio import ExportData @@ -111,14 +117,14 @@ def get_timedata_from_existing(meta_timedata: dict) -> tuple[datetime, datetime ) -def get_fmu_time_object(timedata_item: list[str]) -> content.FMUTimeObject: +def get_fmu_time_object(timedata_item: list[str]) -> FMUTimeObject: """ Returns a FMUTimeObject from a timedata item on list format: ["20200101", "monitor"] where the first item is a date and the last item is an optional label """ value, *label = timedata_item - return content.FMUTimeObject( + return FMUTimeObject( value=datetime.strptime(str(value), "%Y%m%d"), label=label[0] if label else None, ) @@ -133,7 +139,7 @@ def get_validated_content(content: str | dict | None) -> AllowedContent: return AllowedContent(content="unset") if isinstance(content, str): - return AllowedContent(content=enums.ContentEnum(content)) + return AllowedContent(content=ContentEnum(content)) if len(content) > 1: raise ValueError( @@ -146,12 +152,12 @@ def get_validated_content(content: str | dict | None) -> AllowedContent: logger.debug("content_specific is %s", content_specific) return AllowedContent.model_validate( - {"content": enums.ContentEnum(usecontent), "content_incl_specific": content} + {"content": ContentEnum(usecontent), "content_incl_specific": content} ) @dataclass -class ObjectDataProvider(ABC): +class ObjectDataProvider(Provider): """Base class for providing metadata for data objects in fmu-dataio, e.g. a surface. The metadata for the 'data' are constructed by: @@ -178,16 +184,72 @@ class ObjectDataProvider(ABC): time0: datetime | None = field(default=None) time1: datetime | None = field(default=None) - @staticmethod - def _validate_get_ext(fmt: str, subtype: str, validator: dict[str, V]) -> V: - """Validate that fmt (file format) matches data and return legal extension.""" - try: - return validator[fmt] - except KeyError: - raise ConfigurationError( - f"The file format {fmt} is not supported. ", - f"Valid {subtype} formats are: {list(validator.keys())}", + def __post_init__(self) -> None: + """Main function here, will populate the metadata block for 'data'.""" + + # Don't re-initialize data if it's coming from pre-existing metadata. + if self.metadata: + return + + namedstratigraphy = self._derive_name_stratigraphy() + objres = self.get_objectdata() + content_model = get_validated_content(self.dataio.content) + + if self.dataio.forcefolder: + if self.dataio.forcefolder.startswith("/"): + raise ValueError("Can't use absolute path as 'forcefolder'") + msg = ( + f"The standard folder name is overrided from {objres.efolder} to " + f"{self.dataio.forcefolder}" ) + objres.efolder = self.dataio.forcefolder + logger.info(msg) + warn(msg, UserWarning) + + self.metadata["name"] = namedstratigraphy.name + self.metadata["stratigraphic"] = namedstratigraphy.stratigraphic + self.metadata["offset"] = namedstratigraphy.offset + self.metadata["alias"] = namedstratigraphy.alias + self.metadata["top"] = namedstratigraphy.top + self.metadata["base"] = namedstratigraphy.base + + self.metadata["content"] = (usecontent := content_model.content) + if content_model.content_incl_specific: + self.metadata[usecontent] = getattr( + content_model.content_incl_specific, usecontent, None + ) + + self.metadata["tagname"] = self.dataio.tagname + self.metadata["format"] = objres.fmt + self.metadata["layout"] = objres.layout + self.metadata["unit"] = self.dataio.unit + self.metadata["vertical_domain"] = list(self.dataio.vertical_domain.keys())[0] + self.metadata["depth_reference"] = list(self.dataio.vertical_domain.values())[0] + self.metadata["spec"] = objres.spec + self.metadata["bbox"] = objres.bbox + self.metadata["table_index"] = objres.table_index + self.metadata["undef_is_zero"] = self.dataio.undef_is_zero + + # timedata: + self.metadata["time"] = self._derive_timedata() + self.metadata["is_prediction"] = self.dataio.is_prediction + self.metadata["is_observation"] = self.dataio.is_observation + self.metadata["description"] = generate_description(self.dataio.description) + + # the next is to give addition state variables identical values, and for + # consistency these are derived after all eventual validation and directly from + # the self.metadata fields: + + self.name = self.metadata["name"] + + # then there are a few settings that are not in the ``data`` metadata, but + # needed as data/variables in other classes: + + self.efolder = objres.efolder + self.classname = objres.classname + self.extension = objres.extension + self.fmt = objres.fmt + logger.info("Derive all metadata for data object... DONE") def _derive_name_stratigraphy(self) -> DerivedNamedStratigraphy: """Derive the name and stratigraphy for the object; may have several sources. @@ -202,23 +264,27 @@ def _derive_name_stratigraphy(self) -> DerivedNamedStratigraphy: # next check if usename has a "truename" and/or aliases from the config strat = self.dataio.config.get("stratigraphy", {}) - no_start_or_missing_name = strat is None or name not in strat + no_stratigraphy_or_name = strat is None or name not in strat rv = DerivedNamedStratigraphy( - name=name if no_start_or_missing_name else strat[name].get("name", name), - alias=[] if no_start_or_missing_name else strat[name].get("alias", []), - stratigraphic=False - if no_start_or_missing_name - else strat[name].get("stratigraphic", False), - stratigraphic_alias=[] - if no_start_or_missing_name - else strat[name].get("stratigraphic_alias"), - offset=0.0 if no_start_or_missing_name else strat[name].get("offset", 0.0), - top=None if no_start_or_missing_name else strat[name].get("top"), - base=None if no_start_or_missing_name else strat[name].get("base"), + name=name if no_stratigraphy_or_name else strat[name].get("name", name), + alias=[] if no_stratigraphy_or_name else strat[name].get("alias", []), + stratigraphic=( + False + if no_stratigraphy_or_name + else strat[name].get("stratigraphic", False) + ), + stratigraphic_alias=( + [] + if no_stratigraphy_or_name + else strat[name].get("stratigraphic_alias") + ), + offset=0.0 if no_stratigraphy_or_name else strat[name].get("offset", 0.0), + top=None if no_stratigraphy_or_name else strat[name].get("top"), + base=None if no_stratigraphy_or_name else strat[name].get("base"), ) - if not no_start_or_missing_name and rv.name != "name": + if not no_stratigraphy_or_name and rv.name != "name": rv.alias.append(name) return rv @@ -254,9 +320,7 @@ def _derive_timedata(self) -> dict[str, str] | None: self.time0, self.time1 = start.value, stop.value if stop else None - return content.Time(t0=start, t1=stop).model_dump( - mode="json", exclude_none=True - ) + return Time(t0=start, t1=stop).model_dump(mode="json", exclude_none=True) @abstractmethod def get_spec(self) -> AnySpecification | None: @@ -270,72 +334,24 @@ def get_bbox(self) -> BoundingBox2D | BoundingBox3D | None: def get_objectdata(self) -> DerivedObjectDescriptor: raise NotImplementedError - def derive_metadata(self) -> None: - """Main function here, will populate the metadata block for 'data'.""" - logger.info("Derive all metadata for data object...") - - namedstratigraphy = self._derive_name_stratigraphy() - objres = self.get_objectdata() - content_model = get_validated_content(self.dataio.content) - - if self.dataio.forcefolder: - if self.dataio.forcefolder.startswith("/"): - raise ValueError("Can't use absolute path as 'forcefolder'") - msg = ( - f"The standard folder name is overrided from {objres.efolder} to " - f"{self.dataio.forcefolder}" - ) - objres.efolder = self.dataio.forcefolder - logger.info(msg) - warn(msg, UserWarning) - - meta = self.metadata # shortform - - meta["name"] = namedstratigraphy.name - meta["stratigraphic"] = namedstratigraphy.stratigraphic - meta["offset"] = namedstratigraphy.offset - meta["alias"] = namedstratigraphy.alias - meta["top"] = namedstratigraphy.top - meta["base"] = namedstratigraphy.base + def get_metadata(self) -> AnyContent | UnsetAnyContent: + return ( + UnsetAnyContent.model_validate(self.metadata) + if self.metadata["content"] == "unset" + else AnyContent.model_validate(self.metadata) + ) - meta["content"] = (usecontent := content_model.content) - if content_model.content_incl_specific: - meta[usecontent] = getattr( - content_model.content_incl_specific, usecontent, None + @staticmethod + def _validate_get_ext(fmt: str, subtype: str, validator: dict[str, V]) -> V: + """Validate that fmt (file format) matches data and return legal extension.""" + try: + return validator[fmt] + except KeyError: + raise ConfigurationError( + f"The file format {fmt} is not supported. ", + f"Valid {subtype} formats are: {list(validator.keys())}", ) - meta["tagname"] = self.dataio.tagname - meta["format"] = objres.fmt - meta["layout"] = objres.layout - meta["unit"] = self.dataio.unit - meta["vertical_domain"] = list(self.dataio.vertical_domain.keys())[0] - meta["depth_reference"] = list(self.dataio.vertical_domain.values())[0] - meta["spec"] = objres.spec - meta["bbox"] = objres.bbox - meta["table_index"] = objres.table_index - meta["undef_is_zero"] = self.dataio.undef_is_zero - - # timedata: - meta["time"] = self._derive_timedata() - meta["is_prediction"] = self.dataio.is_prediction - meta["is_observation"] = self.dataio.is_observation - meta["description"] = generate_description(self.dataio.description) - - # the next is to give addition state variables identical values, and for - # consistency these are derived after all eventual validation and directly from - # the self.metadata fields: - - self.name = meta["name"] - - # then there are a few settings that are not in the ``data`` metadata, but - # needed as data/variables in other classes: - - self.efolder = objres.efolder - self.classname = objres.classname - self.extension = objres.extension - self.fmt = objres.fmt - logger.info("Derive all metadata for data object... DONE") - @classmethod def from_metadata_dict( cls, obj: Inferrable, dataio: ExportData, meta_existing: dict diff --git a/tests/test_units/test_metadata_class.py b/tests/test_units/test_metadata_class.py index 34c054ab0..1d7c51ad8 100644 --- a/tests/test_units/test_metadata_class.py +++ b/tests/test_units/test_metadata_class.py @@ -9,7 +9,6 @@ SCHEMA, SOURCE, VERSION, - _get_objectdata_provider, generate_export_metadata, ) from fmu.dataio._utils import prettyprint_dict @@ -17,6 +16,7 @@ SystemInformationOperatingSystem, TracklogEvent, ) +from fmu.dataio.providers.objectdata._provider import objectdata_provider_factory # pylint: disable=no-member @@ -106,7 +106,7 @@ def test_generate_meta_tracklog_operating_system(edataobj1, regsurf): def test_populate_meta_objectdata(regsurf, edataobj2): mymeta = generate_export_metadata(regsurf, edataobj2) - objdata = _get_objectdata_provider(regsurf, edataobj2) + objdata = objectdata_provider_factory(regsurf, edataobj2) assert objdata.name == "VOLANTIS GP. Top" assert mymeta["display"]["name"] == objdata.name @@ -399,7 +399,7 @@ def test_metadata_display_name_not_given(regsurf, edataobj2): """Test that display.name == data.name when not explicitly provided.""" mymeta = generate_export_metadata(regsurf, edataobj2) - objdata = _get_objectdata_provider(regsurf, edataobj2) + objdata = objectdata_provider_factory(regsurf, edataobj2) assert "name" in mymeta["display"] assert mymeta["display"]["name"] == objdata.name @@ -411,7 +411,7 @@ def test_metadata_display_name_given(regsurf, edataobj2): edataobj2.display_name = "My Display Name" mymeta = generate_export_metadata(regsurf, edataobj2) - objdata = _get_objectdata_provider(regsurf, edataobj2) + objdata = objectdata_provider_factory(regsurf, edataobj2) assert mymeta["display"]["name"] == "My Display Name" assert objdata.name == "VOLANTIS GP. Top" diff --git a/tests/test_units/test_objectdataprovider_class.py b/tests/test_units/test_objectdataprovider_class.py index 0e4388103..16063f2bf 100644 --- a/tests/test_units/test_objectdataprovider_class.py +++ b/tests/test_units/test_objectdataprovider_class.py @@ -117,10 +117,8 @@ def test_objectdata_regularsurface_derive_metadata(regsurf, edataobj1): """Derive all metadata for the 'data' block in fmu-dataio.""" myobj = objectdata_provider_factory(regsurf, edataobj1) - myobj.derive_metadata() - res = myobj.metadata - assert res["content"] == "depth" - assert res["alias"] + assert myobj.metadata["content"] == "depth" + assert myobj.metadata["alias"] def test_objectdata_provider_factory_raises_on_unknown(edataobj1):