Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Account for ert and ert3 discrepancy when uploading parameter data to storage #192

Merged
merged 2 commits into from
Dec 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions tests/data/snake_oil_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ def content(self):
"id": "FOPR",
},
},
"http://127.0.0.1:5000/ensembles/1/records/OP1_DIVERGENCE_SCALE/labels": [],
"http://127.0.0.1:5000/ensembles/1/records/BPR_138_PERSISTENCE/labels": [],
"http://127.0.0.1:5000/ensembles/1/records/SNAKE_OIL_GPR_DIFF/observations?realization_index=0": [],
"http://127.0.0.1:5000/ensembles/3/records/SNAKE_OIL_GPR_DIFF?realization_index=0": pd.DataFrame(
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
Expand Down Expand Up @@ -235,6 +237,16 @@ def to_parquet_helper(dataframe: pd.DataFrame) -> bytes:
).transpose()
)

ensembles_response[
"http://127.0.0.1:5000/ensembles/1/records/SNAKE_OIL_GPR_DIFF"
] = to_parquet_helper(
pd.DataFrame(
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
columns=["0"],
index=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
).transpose()
)

ensembles_response[
"http://127.0.0.1:5000/ensembles/1/records/OP1_DIVERGENCE_SCALE"
] = to_parquet_helper(
Expand All @@ -255,3 +267,69 @@ def to_parquet_helper(dataframe: pd.DataFrame) -> bytes:
index=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
).transpose()
)

ensembles_response.update(
{
"http://127.0.0.1:5000/ensembles/42": {
"data": {
"ensemble": {
"children": [],
"experiment": {"id": "exp1_id"},
"parent": None,
"id": 1,
"timeCreated": "2020-04-29T09:36:26",
"size": 1,
"activeRealizations": [0],
"userdata": '{"name": "default"}',
}
}
},
"http://127.0.0.1:5000/ensembles/42/parameters": [
"test_parameter_1",
"test_parameter_2",
],
"http://127.0.0.1:5000/ensembles/42/responses": {
"test_resposne": {
"name": "name_test_response",
"id": "test_response_id_1",
},
},
"http://127.0.0.1:5000/ensembles/42/records/test_parameter_1/labels": [],
"http://127.0.0.1:5000/ensembles/42/records/test_parameter_2/labels": [
"a",
"b",
],
"http://127.0.0.1:5000/ensembles/42/records/test_parameter_2/labels": [
"a",
"b",
],
}
)

ensembles_response[
"" "http://127.0.0.1:5000/ensembles/42/records/test_parameter_1?"
] = to_parquet_helper(
pd.DataFrame(
[0.1, 1.1, 2.1],
columns=["0"],
index=["0", "1", "2"],
).transpose()
)
ensembles_response[
"http://127.0.0.1:5000/ensembles/42/records/test_parameter_2?label=a"
] = to_parquet_helper(
pd.DataFrame(
[0.01, 1.01, 2.01],
columns=["a"],
index=["0", "1", "2"],
).transpose()
)
ensembles_response[
"http://127.0.0.1:5000/ensembles/42/records/test_parameter_2?label=b"
] = to_parquet_helper(
pd.DataFrame(
[0.02, 1.02, 2.02],
columns=["b"],
index=["0", "1", "2"],
).transpose()
)
41 changes: 41 additions & 0 deletions tests/models/test_ensemble_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,44 @@ def test_ensemble_model(mock_data):
assert ens_model.children[0]._name == "default_smoother_update"
assert ens_model._name == "default"
assert len(ens_model.responses) == 1


def test_ensemble_model_labled_parameters(mock_data):
ens_id = 42
ens_model = EnsembleModel(ensemble_id=ens_id, project_id=None)
assert ens_model._name == "default"
assert len(ens_model.parameters) == 3
for param_name, parameter in ens_model.parameters.items():
name, label = (
param_name.split("::", maxsplit=1)
if "::" in param_name
else [param_name, None]
)
expected_lables = ens_model._data_loader.get_record_labels(ens_id, name)
if label is not None:
assert label in expected_lables


def test_ensemble_model_parameter_data(mock_data):
ens_id = 42
ens_model = EnsembleModel(ensemble_id=ens_id, project_id=None)
parameters = ens_model.parameters
assert len(parameters) == 3

# Parameter no lables:
expected_lables = ens_model._data_loader.get_record_labels(
ens_id, "test_parameter_1"
)
assert expected_lables == []
data = parameters["test_parameter_1"].data_df().values
assert data.flatten().tolist() == [0.1, 1.1, 2.1]

# Parameter with lables:
expected_lables = ens_model._data_loader.get_record_labels(
ens_id, "test_parameter_2"
)
assert expected_lables == ["a", "b"]
data = parameters["test_parameter_2::a"].data_df()["a"].values.tolist()
assert data == [0.01, 1.01, 2.01]
data = parameters["test_parameter_2::b"].data_df()["b"].values.tolist()
assert data == [0.02, 1.02, 2.02]
75 changes: 46 additions & 29 deletions webviz_ert/data_loader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from typing import Any, Mapping, Optional, List, MutableMapping, Tuple
from typing import Any, Mapping, Optional, List, MutableMapping, Tuple, Dict
from collections import defaultdict
from pprint import pformat
import requests
Expand Down Expand Up @@ -113,7 +113,7 @@ class DataLoader:
_instances: MutableMapping[ServerIdentifier, "DataLoader"] = {}

baseurl: str
token: str
token: Optional[str]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain why this is optional now?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really sure why pylint was not complaining before about this, it just started, I guess it was updated recently.

The Dataloader __new__ definition looked like this for quite some time

def __new__(cls, baseurl: str, token: Optional[str] = None) -> "DataLoader":

So token is expected to he Optional[str]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks :)

_graphql_cache: MutableMapping[str, MutableMapping[dict, Any]]

def __new__(cls, baseurl: str, token: Optional[str] = None) -> "DataLoader":
Expand Down Expand Up @@ -195,45 +195,62 @@ def get_ensemble_userdata(self, ensemble_id: str) -> dict:
def get_ensemble_parameters(self, ensemble_id: str) -> list:
return self._get(url=f"ensembles/{ensemble_id}/parameters").json()

def get_record_labels(self, ensemble_id: str, name: str) -> list:
return self._get(url=f"ensembles/{ensemble_id}/records/{name}/labels").json()

def get_experiment_priors(self, experiment_id: str) -> dict:
return json.loads(
self._query(GET_PRIORS, id=experiment_id)["experiment"]["priors"]
)

def get_ensemble_parameter_data(
self, ensemble_id: str, parameter_name: str
self,
ensemble_id: str,
parameter_name: str,
) -> pd.DataFrame:
resp = self._get(
url=f"ensembles/{ensemble_id}/records/{parameter_name}",
headers={"accept": "application/x-parquet"},
)
stream = io.BytesIO(resp.content)
df = pd.read_parquet(stream)
return df
try:
if "::" in parameter_name:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what I meant by future work. It might be that I'm misunderstanding something, but is this not somewhat of a hack to avoid having to make ERT2 upload parameters as it should?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure we can make ert2 upload parameters like ert3 does it, but I thought the idea was that the underlying Data structure is the same for both ert3 and ert2 it should not matter how they are uploaded, ert2 or ert3 style given that both ways are valid and supported by the storage API.

name, label = parameter_name.split("::", 1)
params = {"label": label}
else:
name = parameter_name
params = {}

resp = self._get(
url=f"ensembles/{ensemble_id}/records/{name}",
headers={"accept": "application/x-parquet"},
params=params,
)
stream = io.BytesIO(resp.content)
df = pd.read_parquet(stream).transpose()
return df
except DataLoaderException as e:
logger.error(e)
return pd.DataFrame()

def get_ensemble_record_data(
self, ensemble_id: str, record_name: str, active_realizations: List[int]
self,
ensemble_id: str,
record_name: str,
) -> pd.DataFrame:
dfs = []
for rel_idx in active_realizations:
try:
resp = self._get(
url=f"ensembles/{ensemble_id}/records/{record_name}",
headers={"accept": "application/x-parquet"},
params={"realization_index": rel_idx},
)
stream = io.BytesIO(resp.content)
df = pd.read_parquet(stream).transpose()
df.columns = [rel_idx]
dfs.append(df)

except DataLoaderException as e:
logger.error(e)

if dfs == []:
try:
resp = self._get(
url=f"ensembles/{ensemble_id}/records/{record_name}",
headers={"accept": "application/x-parquet"},
)
stream = io.BytesIO(resp.content)
df = pd.read_parquet(stream).transpose()

except DataLoaderException as e:
logger.error(e)
return pd.DataFrame()

return pd.concat(dfs, axis=1)
try:
df.index = df.index.astype(int)
except TypeError:
pass
df = df.sort_index()
return df

def get_ensemble_record_observations(
self, ensemble_id: str, record_name: str
Expand Down
18 changes: 13 additions & 5 deletions webviz_ert/models/ensemble_model.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import json
import pandas as pd
from typing import Mapping, List, Dict, Union, Any, Optional
from webviz_ert.data_loader import (
get_data_loader,
)
from webviz_ert.data_loader import get_data_loader, DataLoaderException

from webviz_ert.models import Response, PriorModel, ParametersModel


def _create_parameter_models(
parameters_names: list, priors: dict, ensemble_id: str, project_id: str
parameters_names: list,
priors: dict,
ensemble_id: str,
project_id: str,
) -> Optional[Mapping[str, ParametersModel]]:
parameters = {}
for param in parameters_names:
Expand Down Expand Up @@ -98,7 +99,14 @@ def parameters(
self,
) -> Optional[Mapping[str, ParametersModel]]:
if not self._parameters:
parameter_names = self._data_loader.get_ensemble_parameters(self._id)
parameter_names = []
for param_name in self._data_loader.get_ensemble_parameters(self._id):
labels = self._data_loader.get_record_labels(self._id, param_name)
if len(labels) > 0:
for label in labels:
parameter_names.append(f"{param_name}::{label}")
else:
parameter_names.append(param_name)
parameter_priors = (
self._data_loader.get_experiment_priors(self._experiment_id)
if not self._parent
Expand Down
4 changes: 2 additions & 2 deletions webviz_ert/models/parameter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ def __init__(self, **kwargs: Any):
def data_df(self) -> pd.DataFrame:
if self._data_df.empty:
_data_df = self._data_loader.get_ensemble_parameter_data(
ensemble_id=self._ensemble_id, parameter_name=self.key
ensemble_id=self._ensemble_id,
parameter_name=self.key,
)
if _data_df is not None:
_data_df = _data_df.transpose()
_data_df.index.name = self.key
self._data_df = _data_df
return self._data_df
2 changes: 1 addition & 1 deletion webviz_ert/models/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def axis(self) -> Optional[List[Union[int, str, datetime.datetime]]]:
def data(self) -> pd.DataFrame:
if self._data is None:
self._data = self._data_loader.get_ensemble_record_data(
self._ensemble_id, self.name, self._active_realizations
self._ensemble_id, self.name
)
return self._data

Expand Down