-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2e19f7a
commit a18e3c4
Showing
17 changed files
with
715 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import logging | ||
from typing import Annotated | ||
|
||
import pyarrow as pa | ||
import pyarrow.compute as pc | ||
from fastapi import APIRouter, Depends, HTTPException, Query | ||
|
||
from src.backend.auth.auth_helper import AuthHelper | ||
from src.services.summary_vector_statistics import compute_vector_statistics | ||
from src.services.sumo_access.generic_types import EnsembleScalarResponse | ||
from src.services.sumo_access.parameter_access import ParameterAccess | ||
from src.services.sumo_access.rft_access import RftAccess | ||
from src.services.utils.authenticated_user import AuthenticatedUser | ||
|
||
from . import schemas | ||
|
||
LOGGER = logging.getLogger(__name__) | ||
|
||
router = APIRouter() | ||
|
||
|
||
@router.get("/rft_info") | ||
async def get_rft_info( | ||
authenticated_user: Annotated[AuthenticatedUser, Depends(AuthHelper.get_authenticated_user)], | ||
case_uuid: Annotated[str, Query(description="Sumo case uuid")], | ||
ensemble_name: Annotated[str, Query(description="Ensemble name")], | ||
) -> list[schemas.RftInfo]: | ||
access = await RftAccess.from_case_uuid(authenticated_user.get_sumo_access_token(), case_uuid, ensemble_name) | ||
rft_well_list = await access.get_rft_info() | ||
|
||
return rft_well_list | ||
|
||
|
||
@router.get("/realization_data") | ||
async def get_realization_data( | ||
authenticated_user: Annotated[AuthenticatedUser, Depends(AuthHelper.get_authenticated_user)], | ||
case_uuid: Annotated[str, Query(description="Sumo case uuid")], | ||
ensemble_name: Annotated[str, Query(description="Ensemble name")], | ||
well_name: Annotated[str, Query(description="Well name")], | ||
response_name: Annotated[str, Query(description="Response name")], | ||
timestamps_utc_ms: Annotated[list[int] | None, Query(description="Timestamps utc ms")] = None, | ||
realizations: Annotated[list[int] | None, Query(description="Realizations")] = None, | ||
) -> list[schemas.RftRealizationData]: | ||
access = await RftAccess.from_case_uuid(authenticated_user.get_sumo_access_token(), case_uuid, ensemble_name) | ||
data = await access.get_rft_well_realization_data( | ||
well_name=well_name, | ||
response_name=response_name, | ||
timestamps_utc_ms=timestamps_utc_ms, | ||
realizations=realizations, | ||
) | ||
|
||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from pydantic import BaseModel | ||
|
||
|
||
class RftInfo(BaseModel): | ||
well_name: str | ||
timestamps_utc_ms: list[int] | ||
|
||
|
||
class RftRealizationData(BaseModel): | ||
well_name: str | ||
realization: int | ||
timestamp_utc_ms: int | ||
depth_arr: list[float] | ||
value_arr: list[float] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
import logging | ||
from typing import List, Optional, Sequence | ||
from io import BytesIO | ||
|
||
import pandas as pd | ||
import pyarrow as pa | ||
import pyarrow.compute as pc | ||
import pyarrow.parquet as pq | ||
from fmu.sumo.explorer.objects import Case, TableCollection | ||
|
||
from ._helpers import SumoEnsemble | ||
from ..utils.perf_timer import PerfTimer | ||
from .rft_types import RftInfo, RftRealizationData | ||
|
||
LOGGER = logging.getLogger(__name__) | ||
|
||
|
||
class RftAccess(SumoEnsemble): | ||
async def get_rft_info(self) -> list[RftInfo]: | ||
table = await get_concatenated_rft_table(self._case, self._iteration_name, column_names=["PRESSURE"]) | ||
rft_well_infos: list[RftInfo] = [] | ||
well_names = table["WELL"].unique().tolist() | ||
|
||
for well_name in well_names: | ||
well_table = table.filter(pc.equal(table["WELL"], well_name)) | ||
timestamps_utc_ms = sorted(list(set(well_table["DATE"].to_numpy().astype(int).tolist()))) | ||
|
||
rft_well_infos.append(RftInfo(well_name=well_name, timestamps_utc_ms=timestamps_utc_ms)) | ||
|
||
return rft_well_infos | ||
|
||
async def get_rft_well_realization_data( | ||
self, | ||
well_name: str, | ||
response_name: str, | ||
timestamps_utc_ms: Optional[int], | ||
realizations: Optional[Sequence[int]], | ||
) -> List[RftRealizationData]: | ||
column_names = [response_name, "DEPTH"] | ||
table = await self.get_rft_table( | ||
well_names=[well_name], | ||
column_names=column_names, | ||
timestamps_utc_ms=timestamps_utc_ms, | ||
realizations=realizations, | ||
) | ||
pandas_table = table.to_pandas(types_mapper=pd.ArrowDtype) | ||
|
||
ret_arr: List[RftRealizationData] = [] | ||
|
||
for real, real_df in pandas_table.groupby("REAL"): | ||
for datetime, date_df in real_df.groupby("DATE"): | ||
ret_arr.append( | ||
RftRealizationData( | ||
well_name=well_name, | ||
realization=real, | ||
timestamp_utc_ms=datetime.timestamp() * 1000, | ||
depth_arr=date_df["DEPTH"], | ||
value_arr=date_df[response_name], | ||
) | ||
) | ||
|
||
return ret_arr | ||
|
||
async def get_rft_table( | ||
self, | ||
well_names: List[str], | ||
column_names: List[str], | ||
timestamps_utc_ms: Optional[int], | ||
realizations: Optional[Sequence[int]], | ||
) -> pa.table: | ||
table = await get_concatenated_rft_table(self._case, self._iteration_name, column_names) | ||
|
||
if realizations is not None: | ||
mask = pc.is_in(table["REAL"], value_set=pa.array(realizations)) | ||
table = table.filter(mask) | ||
mask = pc.is_in(table["WELL"], value_set=pa.array(well_names)) | ||
table = table.filter(mask) | ||
if timestamps_utc_ms is not None: | ||
mask = pc.is_in(table["DATE"], value_set=pa.array(timestamps_utc_ms)) | ||
table = table.filter(mask) | ||
|
||
return table | ||
|
||
|
||
async def get_concatenated_rft_table(case: Case, iteration_name: str, column_names: List[str]) -> pa.Table: | ||
concatenated_table = None | ||
for column_name in column_names: | ||
table = await _load_arrow_table_for_from_sumo(case, iteration_name, column_name=column_name) | ||
|
||
if concatenated_table is None: | ||
concatenated_table = table | ||
else: | ||
concatenated_table = concatenated_table.append_column(column_name, table[column_name]) | ||
|
||
return concatenated_table | ||
|
||
|
||
async def _load_arrow_table_for_from_sumo(case: Case, iteration_name: str, column_name: str) -> Optional[pa.Table]: | ||
timer = PerfTimer() | ||
|
||
rft_table_collection = await get_rft_table_collection(case, iteration_name, column_name=column_name) | ||
if await rft_table_collection.length_async() == 0: | ||
return None | ||
if await rft_table_collection.length_async() > 1: | ||
raise ValueError(f"Multiple tables found for vector {column_name=}") | ||
|
||
sumo_table = await rft_table_collection.getitem_async(0) | ||
# print(f"{sumo_table.format=}") | ||
et_locate_sumo_table_ms = timer.lap_ms() | ||
|
||
# Now, read as an arrow table | ||
# Note!!! | ||
# The tables we have seen so far have format set to 'arrow', but the actual data is in parquet format. | ||
# This must be a bug or a misunderstanding. | ||
# For now, just read the parquet data into an arrow table | ||
byte_stream: BytesIO = await sumo_table.blob_async | ||
table = pq.read_table(byte_stream) | ||
et_download_arrow_table_ms = timer.lap_ms() | ||
|
||
# Verify that we got the expected columns | ||
if not "DATE" in table.column_names: | ||
raise ValueError("Table does not contain a DATE column") | ||
if not "REAL" in table.column_names: | ||
raise ValueError("Table does not contain a REAL column") | ||
if not column_name in table.column_names: | ||
raise ValueError(f"Table does not contain a {column_name} column") | ||
if table.num_columns != 4: | ||
raise ValueError("Table should contain exactly 4 columns") | ||
|
||
# Verify that we got the expected columns | ||
if sorted(table.column_names) != sorted(["DATE", "REAL", "WELL", column_name]): | ||
raise ValueError(f"Unexpected columns in table {table.column_names=}") | ||
|
||
# Verify that the column datatypes are as we expect | ||
schema = table.schema | ||
if schema.field("DATE").type != pa.timestamp("ms"): | ||
raise ValueError(f"Unexpected type for DATE column {schema.field('DATE').type=}") | ||
if schema.field("REAL").type != pa.int16(): | ||
raise ValueError(f"Unexpected type for REAL column {schema.field('REAL').type=}") | ||
if schema.field(column_name).type != pa.float32(): | ||
raise ValueError(f"Unexpected type for {column_name} column {schema.field(column_name).type=}") | ||
|
||
LOGGER.debug( | ||
f"Loaded arrow table from Sumo in: {timer.elapsed_ms()}ms (" | ||
f"locate_sumo_table={et_locate_sumo_table_ms}ms, " | ||
f"download_arrow_table={et_download_arrow_table_ms}ms) " | ||
f"{column_name=} {table.shape=}" | ||
) | ||
|
||
return table | ||
|
||
|
||
async def get_rft_table_collection( | ||
case: Case, iteration_name: str, column_name: Optional[str] = None | ||
) -> TableCollection: | ||
"""Get a collection of rft tables for a case and iteration""" | ||
rft_table_collection = case.tables.filter( | ||
aggregation="collection", | ||
tagname="rft", | ||
iteration=iteration_name, | ||
) | ||
table_names = await rft_table_collection.names_async | ||
print(table_names) | ||
rft_table_collection = case.tables.filter( | ||
aggregation="collection", | ||
tagname="rft", | ||
iteration=iteration_name, | ||
column=column_name, | ||
) | ||
table_names = await rft_table_collection.names_async | ||
if len(table_names) == 0: | ||
raise ValueError("No rft table collections found") | ||
if len(table_names) == 1: | ||
return rft_table_collection | ||
|
||
raise ValueError(f"Multiple rft table collections found: {table_names}. Expected only one.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
from enum import Enum | ||
|
||
from pydantic import BaseModel | ||
|
||
|
||
class RftColumnNames(str, Enum): | ||
WELL = "WELL" | ||
DEPTH = "DEPTH" | ||
PRESSURE = "PRESSURE" | ||
|
||
|
||
class RftSumoTableSchema(BaseModel): | ||
tagname: str | ||
column_names: list[str] | ||
|
||
|
||
class RftInfo(BaseModel): | ||
well_name: str | ||
timestamps_utc_ms: list[int] | ||
|
||
|
||
class RftRealizationData(BaseModel): | ||
well_name: str | ||
realization: int | ||
timestamp_utc_ms: int | ||
depth_arr: list[float] | ||
value_arr: list[float] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
/* istanbul ignore file */ | ||
/* tslint:disable */ | ||
/* eslint-disable */ | ||
|
||
export type RftInfo = { | ||
well_name: string; | ||
timestamps_utc_ms: Array<number>; | ||
}; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/* istanbul ignore file */ | ||
/* tslint:disable */ | ||
/* eslint-disable */ | ||
|
||
export type RftRealizationData = { | ||
well_name: string; | ||
realization: number; | ||
timestamp_utc_ms: number; | ||
depth_arr: Array<number>; | ||
value_arr: Array<number>; | ||
}; | ||
|
Oops, something went wrong.