Skip to content

Commit

Permalink
ENH: Add num_columns, num_rows to table spec
Browse files Browse the repository at this point in the history
  • Loading branch information
mferrera committed Sep 19, 2024
1 parent 3d517ab commit 4be386e
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 7 deletions.
2 changes: 2 additions & 0 deletions schema/definitions/0.8.0/examples/summary_table.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ data:
- FWPRH
- WWPT:R_A4
- WTIRWT1:A4
num_columns: 10
num_rows: 20
size: 200
table_index:
- DATE
Expand Down
4 changes: 3 additions & 1 deletion schema/definitions/0.8.0/examples/table_inplace.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,13 @@ data: # The data block describes the actual data (e.g. surface). Only present in
grid_model: # Making this an object to allow for expanding in the future
name: MyGrid # important for data identification, also important for other data types
spec: # class/layout dependent, optional? Can spec be expanded to work for all data types?
size: 123921
columns:
- BULK_OIL
- NET_OIL
- PORE_OIL
num_columns: 3
num_rows: 41306
size: 123918
is_prediction: true # A mechanism for separating pure QC output from actual predictions
is_observation: false
description:
Expand Down
4 changes: 3 additions & 1 deletion schema/definitions/0.8.0/examples/table_wellpicks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,16 @@ data: # The data block describes the actual data (e.g. surface). Only present in
vertical_domain: depth # / time / null
depth_reference: msl # / seabed / etc
spec: # class/layout dependent, optional? Can spec be expanded to work for all data types?
size: 123921
columns:
- X_UTME
- Y_UTMN
- Z_TVDSS
- MD
- WELL
- HORIZON
num_columns: 6
num_rows: 20653
size: 123918
table_index:
- WELL
- HORIZON
Expand Down
18 changes: 18 additions & 0 deletions schema/definitions/0.8.0/schema/fmu_results.json
Original file line number Diff line number Diff line change
Expand Up @@ -7593,6 +7593,22 @@
"title": "Columns",
"type": "array"
},
"num_columns": {
"examples": [
1,
9999
],
"title": "Num Columns",
"type": "integer"
},
"num_rows": {
"examples": [
1,
9999
],
"title": "Num Rows",
"type": "integer"
},
"size": {
"examples": [
1,
Expand All @@ -7604,6 +7620,8 @@
},
"required": [
"columns",
"num_columns",
"num_rows",
"size"
],
"title": "TableSpecification",
Expand Down
8 changes: 7 additions & 1 deletion src/fmu/dataio/_model/specification.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,14 @@ class TableSpecification(BaseModel):
columns: List[str]
"""List of columns present in a table."""

num_columns: int = Field(examples=[1, 9999])
"""The number of columns in a table."""

num_rows: int = Field(examples=[1, 9999])
"""The number of rows in a table.."""

size: int = Field(examples=[1, 9999])
"""Size of data object."""
"""The total Size of the table, i.e. `rows x cols`."""


class CPGridSpecification(RowColumnLayer):
Expand Down
5 changes: 5 additions & 0 deletions src/fmu/dataio/providers/objectdata/_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,11 @@ def get_bbox(self) -> None:
def get_spec(self) -> TableSpecification:
"""Derive data.spec for pd.DataFrame."""
logger.info("Get spec for pd.DataFrame (tables)")
num_rows, num_columns = self.obj.shape
return TableSpecification(
columns=list(self.obj.columns),
num_columns=num_columns,
num_rows=num_rows,
size=int(self.obj.size),
)

Expand Down Expand Up @@ -142,5 +145,7 @@ def get_spec(self) -> TableSpecification:
logger.info("Get spec for pyarrow (tables)")
return TableSpecification(
columns=list(self.obj.column_names),
num_columns=self.obj.num_columns,
num_rows=self.obj.num_rows,
size=self.obj.num_columns * self.obj.num_rows,
)
6 changes: 6 additions & 0 deletions tests/test_units/test_ert_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,9 @@ def test_dataframe_export_file_set_name(

metaout = dataio.read_metadata(output)
assert metaout["data"]["spec"]["columns"] == ["COL1", "COL2"]
assert metaout["data"]["spec"]["num_columns"] == 2
assert metaout["data"]["spec"]["num_rows"] == 4
assert metaout["data"]["spec"]["size"] == 8


def test_pyarrow_export_file_set_name(
Expand Down Expand Up @@ -443,3 +446,6 @@ def test_pyarrow_export_file_set_name(

metaout = dataio.read_metadata(output)
assert metaout["data"]["spec"]["columns"] == ["COL1", "COL2"]
assert metaout["data"]["spec"]["num_columns"] == 2
assert metaout["data"]["spec"]["num_rows"] == 4
assert metaout["data"]["spec"]["size"] == 8
8 changes: 4 additions & 4 deletions tests/test_units/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
from fmu.dataio.providers.objectdata._provider import objectdata_provider_factory


def _read_dict(file_path):
def _read_dict(file_path: str) -> None:
"""Reads text file into dictionary
Args:
file_path (string): path to generated file
Returns:
dict: contents of file
"""
file_path = Path(file_path)
meta_path = file_path.parent / f".{file_path.name}.yml"
path = Path(file_path)
meta_path = path.parent / f".{path.name}.yml"
meta = yaml_load(meta_path)
file_path.unlink()
path.unlink()
meta_path.unlink()
return meta

Expand Down

0 comments on commit 4be386e

Please sign in to comment.