From 50664cb303c7c78a17bb9e84c47175154df94cba Mon Sep 17 00:00:00 2001 From: Eivind Jahren Date: Fri, 15 Dec 2023 08:43:11 +0100 Subject: [PATCH] Optimize summary reading --- src/ert/config/_read_summary.py | 52 ++++++++++++++++----------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/src/ert/config/_read_summary.py b/src/ert/config/_read_summary.py index ec0d2578840..f9a6361833a 100644 --- a/src/ert/config/_read_summary.py +++ b/src/ert/config/_read_summary.py @@ -257,12 +257,8 @@ def read_summary( filepath: str, fetch_keys: Sequence[str] ) -> Tuple[List[str], Sequence[datetime], Any]: summary, spec = _get_summary_filenames(filepath) - date_index, start_date, date_units, keys, key_indecies = _read_spec( - spec, fetch_keys - ) - fetched, time_map = _read_summary( - summary, start_date, date_units, key_indecies, date_index - ) + date_index, start_date, date_units, keys, mask = _read_spec(spec, fetch_keys) + fetched, time_map = _read_summary(summary, start_date, date_units, mask, date_index) return (keys, time_map, fetched) @@ -278,7 +274,7 @@ def _key2str(key: Union[bytes, str]) -> str: def _read_spec( spec: str, fetch_keys: Sequence[str] -) -> Tuple[int, datetime, DateUnit, List[str], List[int]]: +) -> Tuple[int, datetime, DateUnit, List[str], npt.NDArray[Any]]: date = None n = None nx = None @@ -287,14 +283,14 @@ def _read_spec( arrays: Dict[str, Optional[npt.NDArray[Any]]] = { kw: None for kw in [ - "WGNAMES", - "NUMS", + "WGNAMES ", + "NUMS ", "KEYWORDS", - "NUMLX", - "NUMLY", - "NUMLZ", + "NUMLX ", + "NUMLY ", + "NUMLZ ", "LGRNAMES", - "UNITS", + "UNITS ", ] } @@ -320,13 +316,13 @@ def _read_spec( ) ): break - kw = _key2str(entry.read_keyword()) + kw = entry.read_keyword() if kw in arrays: vals = entry.read_array() if vals is resfo.MESS or isinstance(vals, resfo.MESS): raise ValueError(f"{kw} in {spec} was MESS") arrays[kw] = vals - if kw == "DIMENS": + if kw == "DIMENS ": vals = entry.read_array() if vals is resfo.MESS or isinstance(vals, resfo.MESS): raise ValueError(f"DIMENS in {spec} was MESS") @@ -355,11 +351,11 @@ def _read_spec( microsecond=microsecond % 10**6, ) keywords = arrays["KEYWORDS"] - wgnames = arrays["WGNAMES"] - nums = arrays["NUMS"] - numlx = arrays["NUMLX"] - numly = arrays["NUMLY"] - numlz = arrays["NUMLZ"] + wgnames = arrays["WGNAMES "] + nums = arrays["NUMS "] + numlx = arrays["NUMLX "] + numly = arrays["NUMLY "] + numlz = arrays["NUMLZ "] lgr_names = arrays["LGRNAMES"] if date is None: @@ -407,7 +403,9 @@ def optional_get(arr: Optional[npt.NDArray[Any]], idx: int) -> Any: indices.append(i) keys.append(key) - units = arrays["UNITS"] + mask = np.in1d(np.arange(n), indices) + + units = arrays["UNITS "] if units is None: raise ValueError(f"keyword units missing in {spec}") if date_index is None: @@ -415,14 +413,14 @@ def optional_get(arr: Optional[npt.NDArray[Any]], idx: int) -> Any: if date_index >= len(units): raise ValueError(f"Unit missing for TIME in {spec}") - return date_index, date, DateUnit[_key2str(units[date_index])], keys, indices + return date_index, date, DateUnit[_key2str(units[date_index])], keys, mask def _read_summary( summary: str, start_date: datetime, unit: DateUnit, - indices: List[int], + mask: npt.NDArray[Any], date_index: int, ) -> Tuple[npt.NDArray[np.float32], List[datetime]]: if summary.lower().endswith("funsmry"): @@ -442,16 +440,16 @@ def read_params(): vals = last_params.read_array() if vals is resfo.MESS or isinstance(vals, resfo.MESS): raise ValueError(f"PARAMS in {summary} was MESS") - values.append(vals[indices]) + values.append(vals[mask]) dates.append(start_date + unit.make_delta(float(vals[date_index]))) last_params = None with open(summary, mode) as fp: for entry in resfo.lazy_read(fp, format): - kw = _key2str(entry.read_keyword()) - if kw == "PARAMS": + kw = entry.read_keyword() + if kw == "PARAMS ": last_params = entry - if kw == "SEQHDR": + if kw == "SEQHDR ": read_params() read_params() return np.array(values).T, dates