Skip to content

Commit

Permalink
Fix table read (#180)
Browse files Browse the repository at this point in the history
* Fix read pandas as csv

* Fix read arrow as parquet

* Correct return of pd.DataFrame when parquet

* Add test_table

Readress test, as other tests, checks against existing case
  • Loading branch information
daniel-sol authored Jun 8, 2023
1 parent 82f96bb commit 722f523
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 14 deletions.
46 changes: 35 additions & 11 deletions src/fmu/sumo/explorer/objects/table.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""module containing class for table"""
import logging
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
Expand All @@ -7,6 +8,8 @@
from fmu.sumo.explorer.objects._child import Child
from warnings import warn

logging.basicConfig(handlers=logging.NullHandler)


class Table(Child):
"""Class representing a table object in Sumo"""
Expand All @@ -20,6 +23,7 @@ def __init__(self, sumo: SumoClient, metadata: dict) -> None:
super().__init__(sumo, metadata)
self._dataframe = None
self._arrowtable = None
self._logger = logging.getLogger("__name__" + ".Table")

@property
def dataframe(self) -> pd.DataFrame:
Expand All @@ -42,22 +46,32 @@ def to_pandas(self) -> pd.DataFrame:
Returns:
DataFrame: A DataFrame object
"""
if not self._dataframe:
try:
self._dataframe = pd.read_parquet(self.blob)

except pa.lib.ArrowInvalid:
if self._dataframe is None:
if self["data"]["format"] == "csv":
worked = "csv"
self._logger.debug("Treating blob as csv")
try:
self._dataframe = pd.read_csv(self.blob)
worked = "csv"

except UnicodeDecodeError as ud_e:
raise UnicodeDecodeError("Maybe not csv?") from ud_e
else:
try:
worked = "feather"
self._dataframe = pf.read_feather(self.blob)
except pa.lib.ArrowInvalid:
try:
self._dataframe = pd.read_csv(self.blob)
worked = "parquet"
self._dataframe = pd.read_parquet(self.blob)

except UnicodeDecodeError as ud_error:
raise TypeError(
"Come on, no way this is converting to pandas!!"
) from ud_error

self._logger.debug("Read blob as %s to return pandas", worked)
return self._dataframe

@to_pandas.setter
Expand Down Expand Up @@ -86,17 +100,27 @@ def to_arrow(self) -> pa.Table:
Returns:
pa.Table: _description_
"""
if not self._arrowtable:
try:
self._arrowtable = pq.read_table(self.blob)
except pa.lib.ArrowInvalid:
if self._arrowtable is None:
if self["data"]["format"] == "arrow":
try:
worked = "feather"
self._arrowtable = pf.read_table(self.blob)
except pa.lib.ArrowInvalid:
worked = "parquet"
self._arrowtable = pq.read_table(self.blob)
else:
warn(
"Reading csv format into arrow, you will not get the full benefit of native arrow"
)
worked = "csv"
try:
self._arrowtable = pa.Table.from_pandas(
pd.read_csv(self.blob)
)
except TypeError as type_err:
raise OSError("Cannot read this") from type_err

except TypeError as type_err:
raise OSError("Cannot read this into arrow") from type_err

self._logger.debug("Read blob as %s to return arrow", worked)

return self._arrowtable
4 changes: 1 addition & 3 deletions tests/test_aggregated_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ def test_aggregated_summary_arrow_with_deprecated_function_name(
DeprecationWarning,
match=".arrowtable is deprecated, renamed to .to_arrow",
):
column.arrowtable

assert isinstance(column.arrowtable, pa.Table)
assert isinstance(column.arrowtable, pa.Table)
with pytest.raises(IndexError) as e_info:
table = table["banana"]
assert (
Expand Down
65 changes: 65 additions & 0 deletions tests/test_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from fmu.sumo.explorer import Explorer
import pytest


@pytest.fixture(name="doc", scope="function")
def _fixture_doc():
exp = Explorer("dev")
case = exp.get_case_by_uuid("8575ed9a-b0c1-4f64-b9ab-fab9cec31f4a")
pvt_table = case.tables.filter(
name="SNORRE",
tagname="pvt",
iteration="iter-0",
realization=113,
)
return pvt_table[0]


@pytest.fixture(name="correct_columns", scope="session")
def _fixture_cols():
return [
"PRESSURE",
"VOLUMEFACTOR",
"VISCOSITY",
"RS",
"PVTNUM",
"KEYWORD",
"OILDENSITY",
"WATERDENSITY",
"GASDENSITY",
"COMPRESSIBILITY",
"VISCOSIBILITY",
]


def test_to_pandas_with_csv(doc, correct_columns):
"""Test method to_pandas
Args:
doc (fmu.sumo.Document): the document to read from
correct_columns (list): list of correct columns
"""
returned = doc.to_pandas
check_columns = returned.columns.tolist()
correct_columns.sort()
check_columns.sort()
assert (
check_columns == correct_columns
), f"Cols should be {correct_columns}, {check_columns}, when csv"


def test_to_arrow_with_csv(doc, correct_columns):
"""Test method to_arrow
Args:
doc (fmu.sumo.Document): the document to read from
correct_columns (list): list of correct columns
"""
returned = doc.to_arrow
correct_columns.sort()
check_columns = returned.column_names
check_columns.sort()

assert (
check_columns == correct_columns
), f"Cols should be {correct_columns}, {check_columns}, when arrow"

0 comments on commit 722f523

Please sign in to comment.