Skip to content

Commit

Permalink
Fix keyword types in from_pandas
Browse files Browse the repository at this point in the history
The heuristic used to determine what kind of keyword
the column name used was ambiguous.
  • Loading branch information
eivindjahren committed Aug 12, 2024
1 parent ab09c20 commit 61b0054
Show file tree
Hide file tree
Showing 9 changed files with 1,080 additions and 35 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ jobs:
- name: Run Python tests
run: |
# Runs tests on installed distribution from an empty directory
python -m pip install pytest
python -m pip install -r test_requirements.txt
# pytest adds every directory up-to and including python/ into sys.path,
# meaning that "import resdata" will import python/resdata and not the installed
Expand Down
6 changes: 6 additions & 0 deletions lib/include/resdata/rd_sum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,12 @@ const rd::smspec_node *rd_sum_add_smspec_node(rd_sum_type *rd_sum,
const rd::smspec_node *rd_sum_add_var(rd_sum_type *rd_sum, const char *keyword,
const char *wgname, int num,
const char *unit, float default_value);
const rd::smspec_node *rd_sum_add_local_var(rd_sum_type *rd_sum,
const char *keyword,
const char *wgname, int num,
const char *unit, const char *lgr,
int lgr_i, int lgr_j, int lgr_k,
float default_value);
rd_sum_tstep_type *rd_sum_add_tstep(rd_sum_type *rd_sum, int report_step,
double sim_seconds);

Expand Down
16 changes: 16 additions & 0 deletions lib/resdata/rd_sum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,22 @@ const rd::smspec_node *rd_sum_add_var(rd_sum_type *rd_sum, const char *keyword,
default_value);
}

const rd::smspec_node *rd_sum_add_local_var(rd_sum_type *rd_sum,
const char *keyword,
const char *wgname, int num,
const char *unit, const char *lgr,
int lgr_i, int lgr_j, int lgr_k,
float default_value) {
if (rd_sum_data_get_length(rd_sum->data) > 0)
throw std::invalid_argument(
"Can not interchange variable adding and timesteps.\n");

int params_index = rd_smspec_num_nodes(rd_sum->smspec);
return rd_smspec_add_node(rd_sum->smspec, params_index, keyword, wgname,
num, unit, lgr, lgr_i, lgr_j, lgr_k,
default_value);
}

const rd::smspec_node *rd_sum_add_smspec_node(rd_sum_type *rd_sum,
const rd::smspec_node *node) {
return rd_smspec_add_node(rd_sum->smspec, *node);
Expand Down
133 changes: 106 additions & 27 deletions python/resdata/summary/rd_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import ctypes
import pandas
import re
from typing import Sequence, List, Tuple, Optional

# Observe that there is some convention conflict with the C code
# regarding order of arguments: The C code generally takes the time
Expand Down Expand Up @@ -180,6 +181,9 @@ class Summary(BaseCClass):
_add_variable = ResdataPrototype(
"rd_smspec_node_ref rd_sum_add_var(rd_sum, char*, char*, int, char*, double)"
)
_add_local_variable = ResdataPrototype(
"rd_smspec_node_ref rd_sum_add_local_var(rd_sum, char*, char*, int, char*, char*, int, int, int, double)"
)
_add_tstep = ResdataPrototype(
"rd_sum_tstep_ref rd_sum_add_tstep(rd_sum, int, double)"
)
Expand Down Expand Up @@ -360,7 +364,21 @@ def restart_writer(
smry._load_case = "restart_writer"
return smry

def add_variable(self, variable, wgname=None, num=0, unit="None", default_value=0):
def add_variable(
self,
variable,
wgname=None,
num=0,
unit="None",
default_value=0,
lgr=None,
lgr_ijk=None,
):
if lgr is not None:
return self._add_local_variable(
variable, wgname, num, unit, lgr, *lgr_ijk, default_value
).setParent(parent=self)

return self._add_variable(variable, wgname, num, unit, default_value).setParent(
parent=self
)
Expand Down Expand Up @@ -606,36 +624,94 @@ def pandas_frame(self, time_index=None, column_keys=None):
return frame

@staticmethod
def _compile_headers_list(headers, dims):
def _compile_headers_list(
headers: Sequence[str], dims: Optional[List[int]]
) -> List[Tuple[str, str, int, str, Optional[str], Optional[Tuple[int, int, int]]]]:
"""
Converts column names generated with `Summary.pandas_frame()` so
that `Summary.from_pandas(sum.pandas_frame()) == sum`.
The column names are specified by `smspec_node::gen_key1` see
`smspec_node::gen_key1`, but could also be `smspec_node::gen_key2`.
"""
var_list = []
for key in headers:
lst = re.split(":", key)
kw = lst[0]
wgname = None
num = 0
lgr = None
nums = None
num = None
unit = "UNIT"
if len(lst) > 1:
nums = []
if lst[1][0].isdigit():
nums = re.split(",", lst[1])
var_type = Summary.var_type(kw)
if var_type == SummaryVarType.RD_SMSPEC_INVALID_VAR:
raise ValueError(f"Invalid var type: {kw}")
elif var_type == SummaryVarType.RD_SMSPEC_FIELD_VAR:
pass
elif var_type == SummaryVarType.RD_SMSPEC_REGION_VAR:
num = int(lst[1])
elif var_type == SummaryVarType.RD_SMSPEC_GROUP_VAR:
wgname = lst[1]
elif var_type == SummaryVarType.RD_SMSPEC_WELL_VAR:
wgname = lst[1]
elif var_type == SummaryVarType.RD_SMSPEC_SEGMENT_VAR:
kw, wgname, num = lst
num = int(num)
elif var_type == SummaryVarType.RD_SMSPEC_BLOCK_VAR:
kw, loc = lst
if loc.count(",") == 2:
nums = tuple(int(i) for i in loc.split(","))
else:
num = int(loc)
elif var_type == SummaryVarType.RD_SMSPEC_AQUIFER_VAR:
kw, num = lst
num = int(num)
elif var_type == SummaryVarType.RD_SMSPEC_COMPLETION_VAR:
kw, wgname, loc = lst
if loc.count(",") == 2:
nums = tuple(int(i) for i in loc.split(","))
else:
wgname = lst[1]
if len(lst) == 3:
nums = re.split(",", lst[2])
if len(nums) == 3:
i = int(nums[0]) - 1
j = int(nums[1]) - 1
k = int(nums[2]) - 1
if dims is None:
raise ValueError(
"For key %s When using indexing i,j,k you must supply a valid value for the dims argument"
% key
)
num = i + j * dims[0] + k * dims[0] * dims[1] + 1
elif len(nums) == 1:
num = int(nums[0])

var_list.append([kw, wgname, num, unit])
num = int(loc)
elif var_type == SummaryVarType.RD_SMSPEC_NETWORK_VAR:
kw, wgname = lst
elif var_type == SummaryVarType.RD_SMSPEC_REGION_2_REGION_VAR:
kw, r1r2 = lst
if "-" in r1r2:
r1, r2 = tuple(int(i) for i in r1r2.split("-", 1))
num = (r2 + 10) * 32768 + r1
else:
num = int(r1r2)
elif var_type == SummaryVarType.RD_SMSPEC_LOCAL_BLOCK_VAR:
kw, lgr, nums = lst
nums = tuple(int(i) for i in nums.split(","))
elif var_type == SummaryVarType.RD_SMSPEC_LOCAL_COMPLETION_VAR:
kw, lgr, wgname, nums = lst
nums = tuple(int(i) for i in nums.split(","))
elif var_type == SummaryVarType.RD_SMSPEC_LOCAL_WELL_VAR:
kw, lgr, wgname = lst
nums = (0, 0, 0) # We don't know from the list so use dummy
elif var_type == SummaryVarType.RD_SMSPEC_MISC_VAR:
pass
else:
raise ValueError(f"Unknown SummaryVarType {var_type}")

if nums and num is None:
i = int(nums[0]) - 1
j = int(nums[1]) - 1
k = int(nums[2]) - 1
if dims is None:
raise ValueError(
"For key %s When using indexing i,j,k you must supply a valid value for the dims argument"
% key
)
num = i + j * dims[0] + k * dims[0] * dims[1] + 1

if num is None:
num = 0
if wgname is None:
wgname = ""

var_list.append((kw, wgname, num, unit, lgr, nums))
return var_list

@classmethod
Expand All @@ -656,18 +732,21 @@ def from_pandas(cls, case, frame, dims=None, headers=None):
if dims is None:
dims = [1, 1, 1]
rd_sum = Summary.writer(case, start_time, dims[0], dims[1], dims[2])
for kw, wgname, num, unit in header_list:
for kw, wgname, num, unit, lgr, lgr_ijk in header_list:
var_list.append(
rd_sum.addVariable(kw, wgname=wgname, num=num, unit=unit).getKey1()
rd_sum.add_variable(
kw, wgname=wgname, num=num, unit=unit, lgr=lgr, lgr_ijk=lgr_ijk
).getKey1()
)

for i, time in enumerate(frame.index):
days = (time - start_time).days
days = (time - start_time).total_seconds() / 86400
t_step = rd_sum.addTStep(i + 1, days)

for var in var_list:
t_step[var] = frame.iloc[i][var]

rd_sum._load_case = case
return rd_sum

def get_key_index(self, key):
Expand Down
11 changes: 11 additions & 0 deletions python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from hypothesis import HealthCheck, settings

# Timeout settings are unreliable both on CI and
# when running pytest with xdist so we disable it
settings.register_profile(
"no_timeouts",
deadline=None,
suppress_health_check=[HealthCheck.too_slow],
print_blob=True,
)
settings.load_profile("no_timeouts")
Loading

0 comments on commit 61b0054

Please sign in to comment.