From 1d858969bc35ebe3ac2e298134c945d4d4e5c5c6 Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 27 May 2024 08:22:16 +0200 Subject: [PATCH] ENH: Return BaseModel from spec getters This also allows the getter to be optional in the cases in which the method will never be called because a specification is not relevant to the data type. Schema was updated to reflect the added docstrings. --- schema/definitions/0.8.0/schema/fmu_meta.json | 9 ++- .../datastructure/meta/specification.py | 21 +++++- src/fmu/dataio/providers/objectdata/_base.py | 3 +- .../dataio/providers/objectdata/_faultroom.py | 26 ++++---- .../dataio/providers/objectdata/_provider.py | 13 ++-- .../dataio/providers/objectdata/_tables.py | 22 ++----- src/fmu/dataio/providers/objectdata/_xtgeo.py | 65 ++++++++----------- tests/test_units/test_metadata_class.py | 5 +- .../test_objectdataprovider_class.py | 2 +- 9 files changed, 84 insertions(+), 82 deletions(-) diff --git a/schema/definitions/0.8.0/schema/fmu_meta.json b/schema/definitions/0.8.0/schema/fmu_meta.json index 3425cc27f..f106d23bc 100644 --- a/schema/definitions/0.8.0/schema/fmu_meta.json +++ b/schema/definitions/0.8.0/schema/fmu_meta.json @@ -315,6 +315,7 @@ "type": "object" }, "CPGridPropertySpecification": { + "description": "Specifies relevant values describing a corner point grid property object.", "properties": { "ncol": { "description": "The number of columns", @@ -341,7 +342,7 @@ "type": "object" }, "CPGridSpecification": { - "description": "Corner point grid", + "description": "Specifies relevant values describing a corner point grid object.", "properties": { "ncol": { "description": "The number of columns", @@ -473,6 +474,7 @@ "type": "object" }, "CubeSpecification": { + "description": "Specifies relevant values describing a cube object, i.e. a seismic cube.", "properties": { "ncol": { "description": "The number of columns", @@ -2097,6 +2099,7 @@ "type": "object" }, "FaultRoomSurfaceSpecification": { + "description": "Specifies relevant values describing a Faultroom surface object.", "properties": { "faults": { "description": "Names of faults", @@ -5008,6 +5011,7 @@ "type": "object" }, "PointSpecification": { + "description": "Specifies relevant values describing an xyz points object.", "properties": { "attributes": { "anyOf": [ @@ -5042,6 +5046,7 @@ "type": "object" }, "PolygonsSpecification": { + "description": "Specifies relevant values describing a polygon object.", "properties": { "npolys": { "description": "The number of individual polygons in the data object", @@ -7005,6 +7010,7 @@ "type": "object" }, "SurfaceSpecification": { + "description": "Specifies relevant values describing a regular surface object.", "properties": { "ncol": { "description": "The number of columns", @@ -7169,6 +7175,7 @@ "type": "object" }, "TableSpecification": { + "description": "Specifies relevant values describing a generic tabular data object.", "properties": { "columns": { "description": "List of columns present in a table.", diff --git a/src/fmu/dataio/datastructure/meta/specification.py b/src/fmu/dataio/datastructure/meta/specification.py index b6baf8bbb..156cbdd54 100644 --- a/src/fmu/dataio/datastructure/meta/specification.py +++ b/src/fmu/dataio/datastructure/meta/specification.py @@ -8,6 +8,8 @@ class RowColumn(BaseModel): + """Specifies the number of rows and columns in a regular surface object.""" + nrow: int = Field( description="The number of rows", ) @@ -17,12 +19,16 @@ class RowColumn(BaseModel): class RowColumnLayer(RowColumn): + """Specifies the number of rows, columns, and layers in grid object.""" + nlay: int = Field( description="The number of layers", ) class SurfaceSpecification(RowColumn): + """Specifies relevant values describing a regular surface object.""" + rotation: float = Field( description="Rotation angle", allow_inf_nan=False, @@ -54,6 +60,8 @@ class SurfaceSpecification(RowColumn): class PointSpecification(BaseModel): + """Specifies relevant values describing an xyz points object.""" + attributes: Optional[List[str]] = Field( description="List of columns present in a table.", ) @@ -64,6 +72,8 @@ class PointSpecification(BaseModel): class TableSpecification(BaseModel): + """Specifies relevant values describing a generic tabular data object.""" + columns: List[str] = Field( description="List of columns present in a table.", ) @@ -74,7 +84,7 @@ class TableSpecification(BaseModel): class CPGridSpecification(RowColumnLayer): - """Corner point grid""" + """Specifies relevant values describing a corner point grid object.""" xshift: float = Field( description="Shift along the x-axis", @@ -103,16 +113,21 @@ class CPGridSpecification(RowColumnLayer): ) -class CPGridPropertySpecification(RowColumnLayer): ... +class CPGridPropertySpecification(RowColumnLayer): + """Specifies relevant values describing a corner point grid property object.""" class PolygonsSpecification(BaseModel): + """Specifies relevant values describing a polygon object.""" + npolys: int = Field( description="The number of individual polygons in the data object", ) class FaultRoomSurfaceSpecification(BaseModel): + """Specifies relevant values describing a Faultroom surface object.""" + horizons: List[str] = Field( description="List of horizon names", ) @@ -134,6 +149,8 @@ class FaultRoomSurfaceSpecification(BaseModel): class CubeSpecification(SurfaceSpecification): + """Specifies relevant values describing a cube object, i.e. a seismic cube.""" + nlay: int = Field( description="The number of layers", ) diff --git a/src/fmu/dataio/providers/objectdata/_base.py b/src/fmu/dataio/providers/objectdata/_base.py index 432b0045f..48fb9b831 100644 --- a/src/fmu/dataio/providers/objectdata/_base.py +++ b/src/fmu/dataio/providers/objectdata/_base.py @@ -17,6 +17,7 @@ if TYPE_CHECKING: from fmu.dataio.dataio import ExportData from fmu.dataio.datastructure.meta.content import BoundingBox2D, BoundingBox3D + from fmu.dataio.datastructure.meta.specification import AnySpecification from fmu.dataio.types import Classname, Efolder, Inferrable, Layout, Subtype logger: Final = null_logger(__name__) @@ -258,7 +259,7 @@ def _derive_timedata(self) -> dict[str, str] | None: ) @abstractmethod - def get_spec(self) -> dict: + def get_spec(self) -> AnySpecification | None: raise NotImplementedError @abstractmethod diff --git a/src/fmu/dataio/providers/objectdata/_faultroom.py b/src/fmu/dataio/providers/objectdata/_faultroom.py index 2309ea39f..2a6f772d3 100644 --- a/src/fmu/dataio/providers/objectdata/_faultroom.py +++ b/src/fmu/dataio/providers/objectdata/_faultroom.py @@ -1,12 +1,12 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, Final +from typing import Final from fmu.dataio._definitions import ValidFormats from fmu.dataio._logging import null_logger -from fmu.dataio.datastructure.meta import specification from fmu.dataio.datastructure.meta.content import BoundingBox3D +from fmu.dataio.datastructure.meta.specification import FaultRoomSurfaceSpecification from fmu.dataio.readers import FaultRoomSurface from ._base import ( @@ -33,20 +33,16 @@ def get_bbox(self) -> BoundingBox3D: zmax=float(self.obj.bbox["zmax"]), ) - def get_spec(self) -> dict[str, Any]: + def get_spec(self) -> FaultRoomSurfaceSpecification: """Derive data.spec for FaultRoomSurface""" logger.info("Get spec for FaultRoomSurface") - faultsurf = self.obj - return specification.FaultRoomSurfaceSpecification( - horizons=faultsurf.horizons, - faults=faultsurf.faults, - juxtaposition_hw=faultsurf.juxtaposition_hw, - juxtaposition_fw=faultsurf.juxtaposition_fw, - properties=faultsurf.properties, - name=faultsurf.name, - ).model_dump( - mode="json", - exclude_none=True, + return FaultRoomSurfaceSpecification( + horizons=self.obj.horizons, + faults=self.obj.faults, + juxtaposition_hw=self.obj.juxtaposition_hw, + juxtaposition_fw=self.obj.juxtaposition_fw, + properties=self.obj.properties, + name=self.obj.name, ) def get_objectdata(self) -> DerivedObjectDescriptor: @@ -57,7 +53,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: layout="faultroom_triangulated", efolder="maps", fmt=(fmt := self.dataio.dict_fformat), - spec=self.get_spec(), + spec=self.get_spec().model_dump(mode="json", exclude_none=True), bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), extension=self._validate_get_ext(fmt, "JSON", ValidFormats().dictionary), table_index=None, diff --git a/src/fmu/dataio/providers/objectdata/_provider.py b/src/fmu/dataio/providers/objectdata/_provider.py index 9e1b018bc..8d6286e34 100644 --- a/src/fmu/dataio/providers/objectdata/_provider.py +++ b/src/fmu/dataio/providers/objectdata/_provider.py @@ -87,7 +87,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Final +from typing import TYPE_CHECKING, Final import pandas as pd import xtgeo @@ -167,9 +167,8 @@ class ExistingDataProvider(ObjectDataProvider): obj: Inferrable - def get_spec(self) -> dict: + def get_spec(self) -> None: """Derive data.spec from existing metadata.""" - return self.metadata["spec"] def get_bbox(self) -> None: """Derive data.bbox from existing metadata.""" @@ -183,7 +182,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: efolder=self.efolder, fmt=self.fmt, extension=self.extension, - spec=self.get_spec(), + spec=self.metadata["spec"], bbox=self.metadata["bbox"], table_index=None, ) @@ -197,10 +196,8 @@ def derive_metadata(self) -> None: class DictionaryDataProvider(ObjectDataProvider): obj: dict - def get_spec(self) -> dict[str, Any]: + def get_spec(self) -> None: """Derive data.spec for dict.""" - logger.info("Get spec for dictionary") - return {} def get_bbox(self) -> None: """Derive data.bbox for dict.""" @@ -214,7 +211,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: efolder="dictionaries", fmt=(fmt := self.dataio.dict_fformat), extension=self._validate_get_ext(fmt, "JSON", ValidFormats().dictionary), - spec=self.get_spec() or None, + spec=None, bbox=None, table_index=None, ) diff --git a/src/fmu/dataio/providers/objectdata/_tables.py b/src/fmu/dataio/providers/objectdata/_tables.py index 69ce649cc..da68ce1e0 100644 --- a/src/fmu/dataio/providers/objectdata/_tables.py +++ b/src/fmu/dataio/providers/objectdata/_tables.py @@ -7,7 +7,7 @@ from fmu.dataio._definitions import STANDARD_TABLE_INDEX_COLUMNS, ValidFormats from fmu.dataio._logging import null_logger -from fmu.dataio.datastructure.meta import specification +from fmu.dataio.datastructure.meta.specification import TableSpecification from ._base import ( DerivedObjectDescriptor, @@ -59,16 +59,12 @@ def _derive_index(table_index: list[str] | None, columns: list[str]) -> list[str class DataFrameDataProvider(ObjectDataProvider): obj: pd.DataFrame - def get_spec(self) -> dict: + def get_spec(self) -> TableSpecification: """Derive data.spec for pd.DataFrame.""" logger.info("Get spec for pd.DataFrame (tables)") - - return specification.TableSpecification( + return TableSpecification( columns=list(self.obj.columns), size=int(self.obj.size), - ).model_dump( - mode="json", - exclude_none=True, ) def get_bbox(self) -> None: @@ -84,7 +80,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: efolder="tables", fmt=(fmt := self.dataio.table_fformat), extension=self._validate_get_ext(fmt, "DataFrame", ValidFormats().table), - spec=self.get_spec(), + spec=self.get_spec().model_dump(mode="json", exclude_none=True), bbox=None, table_index=table_index, ) @@ -94,16 +90,12 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class ArrowTableDataProvider(ObjectDataProvider): obj: pyarrow.Table - def get_spec(self) -> dict: + def get_spec(self) -> TableSpecification: """Derive data.spec for pyarrow.Table.""" logger.info("Get spec for pyarrow (tables)") - - return specification.TableSpecification( + return TableSpecification( columns=list(self.obj.column_names), size=self.obj.num_columns * self.obj.num_rows, - ).model_dump( - mode="json", - exclude_none=True, ) def get_bbox(self) -> None: @@ -119,7 +111,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: efolder="tables", fmt=(fmt := self.dataio.arrow_fformat), extension=self._validate_get_ext(fmt, "ArrowTable", ValidFormats().table), - spec=self.get_spec(), + spec=self.get_spec().model_dump(mode="json", exclude_none=True), bbox=None, table_index=table_index, ) diff --git a/src/fmu/dataio/providers/objectdata/_xtgeo.py b/src/fmu/dataio/providers/objectdata/_xtgeo.py index 4cf75e6ac..87446c3b9 100644 --- a/src/fmu/dataio/providers/objectdata/_xtgeo.py +++ b/src/fmu/dataio/providers/objectdata/_xtgeo.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Final +from typing import TYPE_CHECKING, Final import numpy as np import pandas as pd @@ -10,8 +10,15 @@ from fmu.dataio._definitions import ValidFormats from fmu.dataio._logging import null_logger from fmu.dataio._utils import npfloat_to_float -from fmu.dataio.datastructure.meta import specification from fmu.dataio.datastructure.meta.content import BoundingBox2D, BoundingBox3D +from fmu.dataio.datastructure.meta.specification import ( + CPGridPropertySpecification, + CPGridSpecification, + CubeSpecification, + PointSpecification, + PolygonsSpecification, + SurfaceSpecification, +) from ._base import ( DerivedObjectDescriptor, @@ -28,12 +35,12 @@ class RegularSurfaceDataProvider(ObjectDataProvider): obj: xtgeo.RegularSurface - def get_spec(self) -> dict[str, Any]: + def get_spec(self) -> SurfaceSpecification: """Derive data.spec for xtgeo.RegularSurface.""" logger.info("Get spec for RegularSurface") required = self.obj.metadata.required - return specification.SurfaceSpecification( + return SurfaceSpecification( ncol=npfloat_to_float(required["ncol"]), nrow=npfloat_to_float(required["nrow"]), xori=npfloat_to_float(required["xori"]), @@ -43,9 +50,6 @@ def get_spec(self) -> dict[str, Any]: yflip=npfloat_to_float(required["yflip"]), rotation=npfloat_to_float(required["rotation"]), undef=1.0e30, - ).model_dump( - mode="json", - exclude_none=True, ) def get_bbox(self) -> BoundingBox2D | BoundingBox3D: @@ -81,7 +85,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: layout="regular", efolder="maps", fmt=(fmt := self.dataio.surface_fformat), - spec=self.get_spec(), + spec=self.get_spec().model_dump(mode="json", exclude_none=True), bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), extension=self._validate_get_ext( fmt, "RegularSurface", ValidFormats().surface @@ -94,17 +98,14 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class PolygonsDataProvider(ObjectDataProvider): obj: xtgeo.Polygons - def get_spec(self) -> dict[str, Any]: + def get_spec(self) -> PolygonsSpecification: """Derive data.spec for xtgeo.Polygons.""" logger.info("Get spec for Polygons") - return specification.PolygonsSpecification( + return PolygonsSpecification( npolys=np.unique( self.obj.get_dataframe(copy=False)[self.obj.pname].values ).size - ).model_dump( - mode="json", - exclude_none=True, ) def get_bbox(self) -> BoundingBox3D: @@ -130,7 +131,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: efolder="polygons", fmt=(fmt := self.dataio.polygons_fformat), extension=self._validate_get_ext(fmt, "Polygons", ValidFormats().polygons), - spec=self.get_spec(), + spec=self.get_spec().model_dump(mode="json", exclude_none=True), bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), table_index=None, ) @@ -145,17 +146,14 @@ def obj_dataframe(self) -> pd.DataFrame: """Returns a dataframe of the referenced xtgeo.Points object.""" return self.obj.get_dataframe(copy=False) - def get_spec(self) -> dict[str, Any]: + def get_spec(self) -> PointSpecification: """Derive data.spec for xtgeo.Points.""" logger.info("Get spec for Points") df = self.obj_dataframe - return specification.PointSpecification( + return PointSpecification( attributes=list(df.columns[3:]) if len(df.columns) > 3 else None, size=int(df.size), - ).model_dump( - mode="json", - exclude_none=True, ) def get_bbox(self) -> BoundingBox3D: @@ -181,7 +179,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: efolder="points", fmt=(fmt := self.dataio.points_fformat), extension=self._validate_get_ext(fmt, "Points", ValidFormats().points), - spec=self.get_spec(), + spec=self.get_spec().model_dump(mode="json", exclude_none=True), bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), table_index=None, ) @@ -191,12 +189,12 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class CubeDataProvider(ObjectDataProvider): obj: xtgeo.Cube - def get_spec(self) -> dict[str, Any]: + def get_spec(self) -> CubeSpecification: """Derive data.spec for xtgeo.Cube.""" logger.info("Get spec for Cube") required = self.obj.metadata.required - return specification.CubeSpecification( + return CubeSpecification( ncol=npfloat_to_float(required["ncol"]), nrow=npfloat_to_float(required["nrow"]), nlay=npfloat_to_float(required["nlay"]), @@ -210,9 +208,6 @@ def get_spec(self) -> dict[str, Any]: zflip=npfloat_to_float(required["zflip"]), rotation=npfloat_to_float(required["rotation"]), undef=npfloat_to_float(required["undef"]), - ).model_dump( - mode="json", - exclude_none=True, ) def get_bbox(self) -> BoundingBox3D: @@ -254,7 +249,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: efolder="cubes", fmt=(fmt := self.dataio.cube_fformat), extension=self._validate_get_ext(fmt, "RegularCube", ValidFormats().cube), - spec=self.get_spec(), + spec=self.get_spec().model_dump(mode="json", exclude_none=True), bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), table_index=None, ) @@ -264,12 +259,12 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class CPGridDataProvider(ObjectDataProvider): obj: xtgeo.Grid - def get_spec(self) -> dict[str, Any]: + def get_spec(self) -> CPGridSpecification: """Derive data.spec for xtgeo.Grid.""" logger.info("Get spec for Grid geometry") required = self.obj.metadata.required - return specification.CPGridSpecification( + return CPGridSpecification( ncol=npfloat_to_float(required["ncol"]), nrow=npfloat_to_float(required["nrow"]), nlay=npfloat_to_float(required["nlay"]), @@ -279,9 +274,6 @@ def get_spec(self) -> dict[str, Any]: xscale=npfloat_to_float(required["xscale"]), yscale=npfloat_to_float(required["yscale"]), zscale=npfloat_to_float(required["zscale"]), - ).model_dump( - mode="json", - exclude_none=True, ) def get_bbox(self) -> BoundingBox3D: @@ -311,7 +303,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: efolder="grids", fmt=(fmt := self.dataio.grid_fformat), extension=self._validate_get_ext(fmt, "CPGrid", ValidFormats().grid), - spec=self.get_spec(), + spec=self.get_spec().model_dump(mode="json", exclude_none=True), bbox=self.get_bbox().model_dump(mode="json", exclude_none=True), table_index=None, ) @@ -321,17 +313,14 @@ def get_objectdata(self) -> DerivedObjectDescriptor: class CPGridPropertyDataProvider(ObjectDataProvider): obj: xtgeo.GridProperty - def get_spec(self) -> dict[str, Any]: + def get_spec(self) -> CPGridPropertySpecification: """Derive data.spec for xtgeo.GridProperty.""" logger.info("Get spec for GridProperty") - return specification.CPGridPropertySpecification( + return CPGridPropertySpecification( nrow=self.obj.nrow, ncol=self.obj.ncol, nlay=self.obj.nlay, - ).model_dump( - mode="json", - exclude_none=True, ) def get_bbox(self) -> None: @@ -348,7 +337,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: extension=self._validate_get_ext( fmt, "CPGridProperty", ValidFormats().grid ), - spec=self.get_spec(), + spec=self.get_spec().model_dump(mode="json", exclude_none=True), bbox=None, table_index=None, ) diff --git a/tests/test_units/test_metadata_class.py b/tests/test_units/test_metadata_class.py index 58f7f424a..34c054ab0 100644 --- a/tests/test_units/test_metadata_class.py +++ b/tests/test_units/test_metadata_class.py @@ -115,7 +115,10 @@ def test_populate_meta_objectdata(regsurf, edataobj2): # surfaces shall have data.spec assert mymeta["data"] assert mymeta["data"]["spec"] - assert mymeta["data"]["spec"] == objdata.get_spec() + assert mymeta["data"]["spec"] == objdata.get_spec().model_dump( + mode="json", + exclude_none=True, + ) def test_bbox_zmin_zmax_presence(polygons, edataobj2): diff --git a/tests/test_units/test_objectdataprovider_class.py b/tests/test_units/test_objectdataprovider_class.py index 92ffe8986..0e4388103 100644 --- a/tests/test_units/test_objectdataprovider_class.py +++ b/tests/test_units/test_objectdataprovider_class.py @@ -96,7 +96,7 @@ def test_objectdata_regularsurface_spec_bbox(regsurf, edataobj1): specs = objdata.get_spec() bbox = objdata.get_bbox() - assert specs["ncol"] == regsurf.ncol + assert specs.ncol == regsurf.ncol assert bbox.xmin == 0.0 assert bbox.zmin == 1234.0