Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For discussion only: Temporary changes to make timeseries work with new Sumo cases #343

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 51 additions & 9 deletions backend/src/services/sumo_access/summary_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ def get_available_vectors(self) -> List[VectorInfo]:

smry_table_collection = self._case.tables.filter(
aggregation="collection",
name="summary",
tagname="eclipse",
# name="summary", # New name is field-specific? e.g. DROGON, SNORRE. Not sure how to handle.
tagname="summary",
iteration=self._iteration_name,
)

if len(smry_table_collection.names) > 1:
raise ValueError(
f"Multiple summary table collections found. ({smry_table_collection.names}). Not sure what to do"
)
column_names = smry_table_collection.columns

ret_info_arr: List[VectorInfo] = []
Expand Down Expand Up @@ -98,7 +101,12 @@ def get_vector_table(
# For now, fail hard if metadata is not present. This test could be refined, but should suffice now.
vector_metadata = create_vector_metadata_from_field_meta(table.schema.field(vector_name))
if not vector_metadata:
raise ValueError(f"Did not find valid metadata for vector {vector_name}")
### TEMPORARY FIX SUMO
vector_metadata = VectorMetadata(
unit="m", is_total=False, is_rate=False, is_historical=False, keyword=vector_name, wgname="", get_num=0
)
LOGGER.warning(f"Did not find valid metadata for vector {vector_name}")
# raise ValueError(f"Did not find valid metadata for vector {vector_name}")

# Do the actual resampling
timer.lap_ms()
Expand All @@ -115,7 +123,6 @@ def get_vector_table(
f"resampling={et_resampling_ms}ms) "
f"{vector_name=} {resampling_frequency=} {table.shape=}"
)

return table, vector_metadata

def get_vector(
Expand Down Expand Up @@ -155,7 +162,6 @@ def get_matching_historical_vector(
non_historical_vector_name: str,
resampling_frequency: Optional[Frequency],
) -> Optional[HistoricalVector]:

timer = PerfTimer()

hist_vec_name = _construct_historical_vector_name(non_historical_vector_name)
Expand All @@ -179,7 +185,12 @@ def get_matching_historical_vector(
# Need metadata both for resampling and return value
vector_metadata = create_vector_metadata_from_field_meta(table.schema.field(hist_vec_name))
if not vector_metadata:
raise ValueError(f"Did not find valid metadata for vector {hist_vec_name}")
### TEMPORARY FIX SUMO
vector_metadata = VectorMetadata(
unit="m", is_total=True, is_rate=False, is_historical=True, keyword=hist_vec_name, wgname="", get_num=0
)
LOGGER.warning(f"Did not find valid metadata for vector {hist_vec_name}")
# raise ValueError(f"Did not find valid metadata for vector {hist_vec_name}")

# Do the actual resampling
if resampling_frequency is not None:
Expand Down Expand Up @@ -243,8 +254,8 @@ def _load_arrow_table_for_from_sumo(case: Case, iteration_name: str, vector_name

smry_table_collection = case.tables.filter(
aggregation="collection",
name="summary",
tagname="eclipse",
# name="summary", # New name is field-specific? e.g. DROGON, SNORRE. Not sure how to handle.
tagname="summary",
iteration=iteration_name,
column=vector_name,
)
Expand All @@ -264,6 +275,7 @@ def _load_arrow_table_for_from_sumo(case: Case, iteration_name: str, vector_name
# For now, just read the parquet data into an arrow table
byte_stream: BytesIO = sumo_table.blob
table = pq.read_table(byte_stream)

et_download_arrow_table_ms = timer.lap_ms()

# Verify that we got the expected columns
Expand All @@ -282,6 +294,36 @@ def _load_arrow_table_for_from_sumo(case: Case, iteration_name: str, vector_name

# Verify that the column datatypes are as we expect
schema = table.schema

### TEMPORARY FIX SUMO
print(f"Received table for {vector_name=}: {table=}")
if schema.field("DATE").type == pa.timestamp("us"):
LOGGER.warning("Sumo bug: DATE column is in microseconds. Casting to milliseconds")
table = table.cast(
pa.schema(
[
pa.field("DATE", pa.timestamp("ms")),
pa.field("REAL", pa.int16()),
pa.field(vector_name, schema.field(vector_name).type),
]
)
)
schema = table.schema
if schema.field(vector_name).type == pa.float64():
LOGGER.warning("Sumo bug: vector column is in float64. Casting to float32")
table = table.cast(
pa.schema(
[
pa.field("DATE", schema.field("DATE").type),
pa.field("REAL", pa.int16()),
pa.field(vector_name, pa.float32()),
]
)
)
schema = table.schema

###

if schema.field("DATE").type != pa.timestamp("ms"):
raise ValueError(f"Unexpected type for DATE column {schema.field('DATE').type=}")
if schema.field("REAL").type != pa.int16():
Expand Down