Skip to content

Commit

Permalink
API: Export pyarrow as parquet (equinor#652)
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-sol authored May 24, 2024
1 parent fe33877 commit 282ea0a
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/fmu/dataio/_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class ValidFormats:
default_factory=lambda: {
"hdf": ".hdf",
"csv": ".csv",
"arrow": ".arrow",
"parquet": ".parquet",
}
)
polygons: dict[str, str] = field(
Expand Down
18 changes: 5 additions & 13 deletions src/fmu/dataio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,22 +146,14 @@ def export_file(
"preserved unless calling 'reset_index()' on the dataframe."
)
obj.to_csv(filename, index=False)
elif filename.suffix == ".arrow":
elif filename.suffix == ".parquet":
from pyarrow import Table

if isinstance(obj, Table):
from pyarrow import feather

# comment taken from equinor/webviz_subsurface/smry2arrow.py
# Writing here is done through the feather import, but could also be
# done using pa.RecordBatchFileWriter.write_table() with a few
# pa.ipc.IpcWriteOptions(). It is convenient to use feather since it
# has ready configured defaults and the actual file format is the same
# (https://arrow.apache.org/docs/python/feather.html)

# Types in pyarrow-stubs package are wrong for the write_feather(...).
# https://arrow.apache.org/docs/python/generated/pyarrow.feather.write_feather.html#pyarrow.feather.write_feather
feather.write_feather(obj, dest=str(filename)) # type: ignore
from pyarrow import parquet

parquet.write_table(obj, where=str(filename))

elif filename.suffix == ".json" and isinstance(obj, FaultRoomSurface):
with open(filename, "w") as stream:
json.dump(obj.storage, stream, indent=4)
Expand Down
2 changes: 1 addition & 1 deletion src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ class ExportData:

# class variables
allow_forcefolder_absolute: ClassVar[bool] = False # deprecated
arrow_fformat: ClassVar[str] = "arrow"
arrow_fformat: ClassVar[str] = "parquet"
case_folder: ClassVar[str] = "share/metadata"
createfolder: ClassVar[bool] = True # deprecated
cube_fformat: ClassVar[str] = "segy"
Expand Down
2 changes: 1 addition & 1 deletion tests/test_units/test_ert_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def test_pyarrow_export_file_set_name(
assert str(output) == str(
(
edata._rootpath
/ "realization-0/iter-0/share/results/tables/myarrowtable.arrow"
/ "realization-0/iter-0/share/results/tables/myarrowtable.parquet"
).resolve()
)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_units/test_rms_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ def test_pyarrow_export_file_set_name(rmssetup, rmsglobalconfig, arrowtable):
logger.info("Output is %s", output)

assert str(output) == str(
(edata._rootpath / "share/results/tables/myarrowtable.arrow").resolve()
(edata._rootpath / "share/results/tables/myarrowtable.parquet").resolve()
)

metaout = dataio.read_metadata(output)
Expand Down

0 comments on commit 282ea0a

Please sign in to comment.