Skip to content

Commit

Permalink
ENH: Implement ObjectDataProvider as a Provider
Browse files Browse the repository at this point in the history
It now sets its state on initialization and returns a Pydantic model via
the prescribed method. Does not do much of the possible clean-ups within
the base class.
  • Loading branch information
mferrera committed May 28, 2024
1 parent 93f39fd commit 36c6a0f
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 125 deletions.
23 changes: 2 additions & 21 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,6 @@ def generate_meta_tracklog() -> list[meta.TracklogEvent]:
]


def _get_objectdata_provider(
obj: types.Inferrable, dataio: ExportData, meta_existing: dict | None = None
) -> ObjectDataProvider:
"""Derive metadata for the object. Reuse metadata if existing"""
objdata = objectdata_provider_factory(obj, dataio, meta_existing)
objdata.derive_metadata()
return objdata


def _get_meta_filedata(
dataio: ExportData,
obj: types.Inferrable,
Expand All @@ -88,16 +79,6 @@ def _get_meta_filedata(
).get_metadata()


def _get_meta_objectdata(
objdata: ObjectDataProvider,
) -> meta.content.AnyContent | internal.UnsetAnyContent:
return (
internal.UnsetAnyContent.model_validate(objdata.metadata)
if objdata.metadata["content"] == "unset"
else meta.content.AnyContent.model_validate(objdata.metadata)
)


def _get_meta_fmu(fmudata: FmuProvider) -> internal.FMUClassMetaData | None:
try:
return fmudata.get_metadata()
Expand Down Expand Up @@ -173,7 +154,7 @@ def generate_export_metadata(
logger.info("Partially reuse existing metadata from %s", obj)
meta_existing = read_metadata_from_file(obj)

objdata = _get_objectdata_provider(obj, dataio, meta_existing)
objdata = objectdata_provider_factory(obj, dataio, meta_existing)
masterdata = dataio.config.get("masterdata")

metadata = internal.DataClassMeta(
Expand All @@ -184,7 +165,7 @@ def generate_export_metadata(
fmu=_get_meta_fmu(fmudata) if fmudata else None,
masterdata=_get_meta_masterdata(masterdata) if masterdata else None,
access=_get_meta_access(dataio),
data=_get_meta_objectdata(objdata),
data=objdata.get_metadata(),
file=_get_meta_filedata(dataio, obj, objdata, fmudata, compute_md5),
tracklog=generate_meta_tracklog(),
display=_get_meta_display(dataio, objdata),
Expand Down
208 changes: 112 additions & 96 deletions src/fmu/dataio/providers/objectdata/_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from abc import abstractmethod
from copy import deepcopy
from dataclasses import dataclass, field
from datetime import datetime
Expand All @@ -11,8 +11,14 @@
from fmu.dataio._definitions import ConfigurationError
from fmu.dataio._logging import null_logger
from fmu.dataio._utils import generate_description
from fmu.dataio.datastructure._internal.internal import AllowedContent
from fmu.dataio.datastructure.meta import content, enums
from fmu.dataio.datastructure._internal.internal import AllowedContent, UnsetAnyContent
from fmu.dataio.datastructure.meta.content import (
AnyContent,
FMUTimeObject,
Time,
)
from fmu.dataio.datastructure.meta.enums import ContentEnum
from fmu.dataio.providers._base import Provider

if TYPE_CHECKING:
from fmu.dataio.dataio import ExportData
Expand Down Expand Up @@ -111,14 +117,14 @@ def get_timedata_from_existing(meta_timedata: dict) -> tuple[datetime, datetime
)


def get_fmu_time_object(timedata_item: list[str]) -> content.FMUTimeObject:
def get_fmu_time_object(timedata_item: list[str]) -> FMUTimeObject:
"""
Returns a FMUTimeObject from a timedata item on list
format: ["20200101", "monitor"] where the first item is a date and
the last item is an optional label
"""
value, *label = timedata_item
return content.FMUTimeObject(
return FMUTimeObject(
value=datetime.strptime(str(value), "%Y%m%d"),
label=label[0] if label else None,
)
Expand All @@ -133,7 +139,7 @@ def get_validated_content(content: str | dict | None) -> AllowedContent:
return AllowedContent(content="unset")

if isinstance(content, str):
return AllowedContent(content=enums.ContentEnum(content))
return AllowedContent(content=ContentEnum(content))

if len(content) > 1:
raise ValueError(
Expand All @@ -146,12 +152,12 @@ def get_validated_content(content: str | dict | None) -> AllowedContent:
logger.debug("content_specific is %s", content_specific)

return AllowedContent.model_validate(
{"content": enums.ContentEnum(usecontent), "content_incl_specific": content}
{"content": ContentEnum(usecontent), "content_incl_specific": content}
)


@dataclass
class ObjectDataProvider(ABC):
class ObjectDataProvider(Provider):
"""Base class for providing metadata for data objects in fmu-dataio, e.g. a surface.
The metadata for the 'data' are constructed by:
Expand All @@ -178,16 +184,72 @@ class ObjectDataProvider(ABC):
time0: datetime | None = field(default=None)
time1: datetime | None = field(default=None)

@staticmethod
def _validate_get_ext(fmt: str, subtype: str, validator: dict[str, V]) -> V:
"""Validate that fmt (file format) matches data and return legal extension."""
try:
return validator[fmt]
except KeyError:
raise ConfigurationError(
f"The file format {fmt} is not supported. ",
f"Valid {subtype} formats are: {list(validator.keys())}",
def __post_init__(self) -> None:
"""Main function here, will populate the metadata block for 'data'."""

# Don't re-initialize data if it's coming from pre-existing metadata.
if self.metadata:
return

namedstratigraphy = self._derive_name_stratigraphy()
objres = self.get_objectdata()
content_model = get_validated_content(self.dataio.content)

if self.dataio.forcefolder:
if self.dataio.forcefolder.startswith("/"):
raise ValueError("Can't use absolute path as 'forcefolder'")
msg = (
f"The standard folder name is overrided from {objres.efolder} to "
f"{self.dataio.forcefolder}"
)
objres.efolder = self.dataio.forcefolder
logger.info(msg)
warn(msg, UserWarning)

self.metadata["name"] = namedstratigraphy.name
self.metadata["stratigraphic"] = namedstratigraphy.stratigraphic
self.metadata["offset"] = namedstratigraphy.offset
self.metadata["alias"] = namedstratigraphy.alias
self.metadata["top"] = namedstratigraphy.top
self.metadata["base"] = namedstratigraphy.base

self.metadata["content"] = (usecontent := content_model.content)
if content_model.content_incl_specific:
self.metadata[usecontent] = getattr(
content_model.content_incl_specific, usecontent, None
)

self.metadata["tagname"] = self.dataio.tagname
self.metadata["format"] = objres.fmt
self.metadata["layout"] = objres.layout
self.metadata["unit"] = self.dataio.unit
self.metadata["vertical_domain"] = list(self.dataio.vertical_domain.keys())[0]
self.metadata["depth_reference"] = list(self.dataio.vertical_domain.values())[0]
self.metadata["spec"] = objres.spec
self.metadata["bbox"] = objres.bbox
self.metadata["table_index"] = objres.table_index
self.metadata["undef_is_zero"] = self.dataio.undef_is_zero

# timedata:
self.metadata["time"] = self._derive_timedata()
self.metadata["is_prediction"] = self.dataio.is_prediction
self.metadata["is_observation"] = self.dataio.is_observation
self.metadata["description"] = generate_description(self.dataio.description)

# the next is to give addition state variables identical values, and for
# consistency these are derived after all eventual validation and directly from
# the self.metadata fields:

self.name = self.metadata["name"]

# then there are a few settings that are not in the ``data`` metadata, but
# needed as data/variables in other classes:

self.efolder = objres.efolder
self.classname = objres.classname
self.extension = objres.extension
self.fmt = objres.fmt
logger.info("Derive all metadata for data object... DONE")

def _derive_name_stratigraphy(self) -> DerivedNamedStratigraphy:
"""Derive the name and stratigraphy for the object; may have several sources.
Expand All @@ -202,23 +264,27 @@ def _derive_name_stratigraphy(self) -> DerivedNamedStratigraphy:

# next check if usename has a "truename" and/or aliases from the config
strat = self.dataio.config.get("stratigraphy", {})
no_start_or_missing_name = strat is None or name not in strat
no_stratigraphy_or_name = strat is None or name not in strat

rv = DerivedNamedStratigraphy(
name=name if no_start_or_missing_name else strat[name].get("name", name),
alias=[] if no_start_or_missing_name else strat[name].get("alias", []),
stratigraphic=False
if no_start_or_missing_name
else strat[name].get("stratigraphic", False),
stratigraphic_alias=[]
if no_start_or_missing_name
else strat[name].get("stratigraphic_alias"),
offset=0.0 if no_start_or_missing_name else strat[name].get("offset", 0.0),
top=None if no_start_or_missing_name else strat[name].get("top"),
base=None if no_start_or_missing_name else strat[name].get("base"),
name=name if no_stratigraphy_or_name else strat[name].get("name", name),
alias=[] if no_stratigraphy_or_name else strat[name].get("alias", []),
stratigraphic=(
False
if no_stratigraphy_or_name
else strat[name].get("stratigraphic", False)
),
stratigraphic_alias=(
[]
if no_stratigraphy_or_name
else strat[name].get("stratigraphic_alias")
),
offset=0.0 if no_stratigraphy_or_name else strat[name].get("offset", 0.0),
top=None if no_stratigraphy_or_name else strat[name].get("top"),
base=None if no_stratigraphy_or_name else strat[name].get("base"),
)

if not no_start_or_missing_name and rv.name != "name":
if not no_stratigraphy_or_name and rv.name != "name":
rv.alias.append(name)

return rv
Expand Down Expand Up @@ -254,9 +320,7 @@ def _derive_timedata(self) -> dict[str, str] | None:

self.time0, self.time1 = start.value, stop.value if stop else None

return content.Time(t0=start, t1=stop).model_dump(
mode="json", exclude_none=True
)
return Time(t0=start, t1=stop).model_dump(mode="json", exclude_none=True)

@abstractmethod
def get_spec(self) -> AnySpecification | None:
Expand All @@ -270,72 +334,24 @@ def get_bbox(self) -> BoundingBox2D | BoundingBox3D | None:
def get_objectdata(self) -> DerivedObjectDescriptor:
raise NotImplementedError

def derive_metadata(self) -> None:
"""Main function here, will populate the metadata block for 'data'."""
logger.info("Derive all metadata for data object...")

namedstratigraphy = self._derive_name_stratigraphy()
objres = self.get_objectdata()
content_model = get_validated_content(self.dataio.content)

if self.dataio.forcefolder:
if self.dataio.forcefolder.startswith("/"):
raise ValueError("Can't use absolute path as 'forcefolder'")
msg = (
f"The standard folder name is overrided from {objres.efolder} to "
f"{self.dataio.forcefolder}"
)
objres.efolder = self.dataio.forcefolder
logger.info(msg)
warn(msg, UserWarning)

meta = self.metadata # shortform

meta["name"] = namedstratigraphy.name
meta["stratigraphic"] = namedstratigraphy.stratigraphic
meta["offset"] = namedstratigraphy.offset
meta["alias"] = namedstratigraphy.alias
meta["top"] = namedstratigraphy.top
meta["base"] = namedstratigraphy.base
def get_metadata(self) -> AnyContent | UnsetAnyContent:
return (
UnsetAnyContent.model_validate(self.metadata)
if self.metadata["content"] == "unset"
else AnyContent.model_validate(self.metadata)
)

meta["content"] = (usecontent := content_model.content)
if content_model.content_incl_specific:
meta[usecontent] = getattr(
content_model.content_incl_specific, usecontent, None
@staticmethod
def _validate_get_ext(fmt: str, subtype: str, validator: dict[str, V]) -> V:
"""Validate that fmt (file format) matches data and return legal extension."""
try:
return validator[fmt]
except KeyError:
raise ConfigurationError(
f"The file format {fmt} is not supported. ",
f"Valid {subtype} formats are: {list(validator.keys())}",
)

meta["tagname"] = self.dataio.tagname
meta["format"] = objres.fmt
meta["layout"] = objres.layout
meta["unit"] = self.dataio.unit
meta["vertical_domain"] = list(self.dataio.vertical_domain.keys())[0]
meta["depth_reference"] = list(self.dataio.vertical_domain.values())[0]
meta["spec"] = objres.spec
meta["bbox"] = objres.bbox
meta["table_index"] = objres.table_index
meta["undef_is_zero"] = self.dataio.undef_is_zero

# timedata:
meta["time"] = self._derive_timedata()
meta["is_prediction"] = self.dataio.is_prediction
meta["is_observation"] = self.dataio.is_observation
meta["description"] = generate_description(self.dataio.description)

# the next is to give addition state variables identical values, and for
# consistency these are derived after all eventual validation and directly from
# the self.metadata fields:

self.name = meta["name"]

# then there are a few settings that are not in the ``data`` metadata, but
# needed as data/variables in other classes:

self.efolder = objres.efolder
self.classname = objres.classname
self.extension = objres.extension
self.fmt = objres.fmt
logger.info("Derive all metadata for data object... DONE")

@classmethod
def from_metadata_dict(
cls, obj: Inferrable, dataio: ExportData, meta_existing: dict
Expand Down
8 changes: 4 additions & 4 deletions tests/test_units/test_metadata_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
SCHEMA,
SOURCE,
VERSION,
_get_objectdata_provider,
generate_export_metadata,
)
from fmu.dataio._utils import prettyprint_dict
from fmu.dataio.datastructure.meta.meta import (
SystemInformationOperatingSystem,
TracklogEvent,
)
from fmu.dataio.providers.objectdata._provider import objectdata_provider_factory

# pylint: disable=no-member

Expand Down Expand Up @@ -106,7 +106,7 @@ def test_generate_meta_tracklog_operating_system(edataobj1, regsurf):

def test_populate_meta_objectdata(regsurf, edataobj2):
mymeta = generate_export_metadata(regsurf, edataobj2)
objdata = _get_objectdata_provider(regsurf, edataobj2)
objdata = objectdata_provider_factory(regsurf, edataobj2)

assert objdata.name == "VOLANTIS GP. Top"
assert mymeta["display"]["name"] == objdata.name
Expand Down Expand Up @@ -399,7 +399,7 @@ def test_metadata_display_name_not_given(regsurf, edataobj2):
"""Test that display.name == data.name when not explicitly provided."""

mymeta = generate_export_metadata(regsurf, edataobj2)
objdata = _get_objectdata_provider(regsurf, edataobj2)
objdata = objectdata_provider_factory(regsurf, edataobj2)

assert "name" in mymeta["display"]
assert mymeta["display"]["name"] == objdata.name
Expand All @@ -411,7 +411,7 @@ def test_metadata_display_name_given(regsurf, edataobj2):
edataobj2.display_name = "My Display Name"

mymeta = generate_export_metadata(regsurf, edataobj2)
objdata = _get_objectdata_provider(regsurf, edataobj2)
objdata = objectdata_provider_factory(regsurf, edataobj2)

assert mymeta["display"]["name"] == "My Display Name"
assert objdata.name == "VOLANTIS GP. Top"
Expand Down
6 changes: 2 additions & 4 deletions tests/test_units/test_objectdataprovider_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,8 @@ def test_objectdata_regularsurface_derive_metadata(regsurf, edataobj1):
"""Derive all metadata for the 'data' block in fmu-dataio."""

myobj = objectdata_provider_factory(regsurf, edataobj1)
myobj.derive_metadata()
res = myobj.metadata
assert res["content"] == "depth"
assert res["alias"]
assert myobj.metadata["content"] == "depth"
assert myobj.metadata["alias"]


def test_objectdata_provider_factory_raises_on_unknown(edataobj1):
Expand Down

0 comments on commit 36c6a0f

Please sign in to comment.