Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/split table aggregation #353

Merged
merged 2 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 77 additions & 78 deletions examples/table-aggregation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import time\n",
"class Timer:\n",
" def __init__(self):\n",
Expand All @@ -18,7 +19,7 @@
" t1 = time.perf_counter()\n",
" print(f\"Elapsed: {t1-self._t0:0.3f} seconds.\")\n",
" return\n",
" pass"
" pass\n"
]
},
{
Expand Down Expand Up @@ -59,17 +60,15 @@
" tot_size_bytes = total_blob_size(rels)\n",
" print(f\"Total size of input: {tot_size_bytes / (1024*1024*1024):.3f} GiB\")\n",
" with Timer():\n",
" agg=rels.filter(column=columns).aggregate(columns=columns)\n",
" agg=rels.filter(column=columns)._aggregate(columns=columns)\n",
" print(agg.to_pandas().sort_values(by=[\"REAL\", \"DATE\"]))\n",
"\n",
"def run_exp(caseuuid, itername, tagname, columns):\n",
" case = exp.get_case_by_uuid(caseuuid)\n",
" print(f\"{case.asset}: {case.name}: {caseuuid}\")\n",
" rels=case.tables.filter(iteration=itername, realization=True, tagname=tagname, \n",
" complex={\"bool\": {\"must_not\": [{\"term\": {\"_sumo.hidden\": True}}]}})\n",
" rels=case.tables.visible.filter(iteration=itername, realization=True, tagname=tagname, column=columns)\n",
" do_aggregate(tagname, rels, columns)\n",
" rels=case.tables.filter(iteration=itername, realization=True, tagname=tagname,\n",
" complex={\"term\": {\"_sumo.hidden\": True}})\n",
" rels=case.tables.hidden.filter(iteration=itername, realization=True, tagname=tagname, column=columns)\n",
" do_aggregate(tagname, rels, columns)"
]
},
Expand All @@ -86,36 +85,36 @@
"Troll: 24.0.0-20240828_ix_network_test5: 359e7c72-a4ca-43ee-9203-f09cd0f149a9\n",
"summary: 27 objects, 64996 columns.\n",
"Total size of input: 1.248 GiB\n",
"Elapsed: 15.166 seconds.\n",
"Elapsed: 15.108 seconds.\n",
" DATE FOPT REAL\n",
"137 2024-07-02 282442208.0 6\n",
"138 2024-07-03 282451072.0 6\n",
"139 2024-08-01 282677120.0 6\n",
"140 2024-09-01 282889760.0 6\n",
"141 2024-10-01 283077440.0 6\n",
"198 2024-07-02 282442208.0 6\n",
"199 2024-07-03 282451072.0 6\n",
"200 2024-08-01 282677120.0 6\n",
"201 2024-09-01 282889760.0 6\n",
"202 2024-10-01 283077440.0 6\n",
".. ... ... ...\n",
"47 2025-02-15 286229120.0 249\n",
"48 2025-04-01 286425696.0 249\n",
"49 2025-09-01 287060416.0 249\n",
"50 2025-10-01 287176832.0 249\n",
"51 2026-01-01 287523552.0 249\n",
"193 2025-02-15 286229120.0 249\n",
"194 2025-04-01 286425696.0 249\n",
"195 2025-09-01 287060416.0 249\n",
"196 2025-10-01 287176832.0 249\n",
"197 2026-01-01 287523552.0 249\n",
"\n",
"[265 rows x 3 columns]\n",
"summary: 3537 objects, 64996 columns.\n",
"Total size of input: 1.087 GiB\n",
"Elapsed: 1.692 seconds.\n",
"summary: 27 objects, 554 columns.\n",
"Total size of input: 0.009 GiB\n",
"Elapsed: 1.351 seconds.\n",
" DATE FOPT REAL\n",
"52 2024-07-02 282442208.0 6\n",
"53 2024-07-03 282451072.0 6\n",
"54 2024-08-01 282677120.0 6\n",
"55 2024-09-01 282889760.0 6\n",
"56 2024-10-01 283077440.0 6\n",
"114 2024-07-02 282442208.0 6\n",
"115 2024-07-03 282451072.0 6\n",
"116 2024-08-01 282677120.0 6\n",
"117 2024-09-01 282889760.0 6\n",
"118 2024-10-01 283077440.0 6\n",
".. ... ... ...\n",
"173 2025-02-15 286229120.0 249\n",
"174 2025-04-01 286425696.0 249\n",
"175 2025-09-01 287060416.0 249\n",
"176 2025-10-01 287176832.0 249\n",
"177 2026-01-01 287523552.0 249\n",
"16 2025-02-15 286229120.0 249\n",
"17 2025-04-01 286425696.0 249\n",
"18 2025-09-01 287060416.0 249\n",
"19 2025-10-01 287176832.0 249\n",
"20 2026-01-01 287523552.0 249\n",
"\n",
"[265 rows x 3 columns]\n"
]
Expand All @@ -138,36 +137,36 @@
"Troll: 24.0.0-20240820: fc6cc7d3-6162-46a3-9d69-48ad1eaecdfb\n",
"summary: 196 objects, 24568 columns.\n",
"Total size of input: 30.013 GiB\n",
"Elapsed: 32.124 seconds.\n",
"Elapsed: 32.407 seconds.\n",
" DATE FOPT REAL\n",
"708796 1990-02-01 0.000000e+00 1\n",
"708797 1990-03-01 1.445590e+05 1\n",
"708798 1990-04-01 2.741935e+05 1\n",
"708799 1990-05-01 4.145006e+05 1\n",
"708800 1990-06-01 5.512956e+05 1\n",
"778120 1990-02-01 0.000000e+00 1\n",
"778121 1990-03-01 1.445590e+05 1\n",
"778122 1990-04-01 2.741935e+05 1\n",
"778123 1990-05-01 4.145006e+05 1\n",
"778124 1990-06-01 5.512956e+05 1\n",
"... ... ... ...\n",
"841571 2024-06-27 2.980280e+08 249\n",
"841572 2024-06-28 2.980311e+08 249\n",
"841573 2024-06-29 2.980342e+08 249\n",
"841574 2024-06-30 2.980384e+08 249\n",
"841575 2024-07-01 2.980405e+08 249\n",
"249139 2024-06-27 2.980280e+08 249\n",
"249140 2024-06-28 2.980311e+08 249\n",
"249141 2024-06-29 2.980342e+08 249\n",
"249142 2024-06-30 2.980384e+08 249\n",
"249143 2024-07-01 2.980405e+08 249\n",
"\n",
"[952560 rows x 3 columns]\n",
"summary: 9800 objects, 24568 columns.\n",
"Total size of input: 29.907 GiB\n",
"Elapsed: 4.722 seconds.\n",
"summary: 196 objects, 500 columns.\n",
"Total size of input: 1.328 GiB\n",
"Elapsed: 3.757 seconds.\n",
" DATE FOPT REAL\n",
"34020 1990-02-01 0.000000e+00 1\n",
"34021 1990-03-01 1.445590e+05 1\n",
"34022 1990-04-01 2.741935e+05 1\n",
"34023 1990-05-01 4.145006e+05 1\n",
"34024 1990-06-01 5.512956e+05 1\n",
"488844 1990-02-01 0.000000e+00 1\n",
"488845 1990-03-01 1.445590e+05 1\n",
"488846 1990-04-01 2.741935e+05 1\n",
"488847 1990-05-01 4.145006e+05 1\n",
"488848 1990-06-01 5.512956e+05 1\n",
"... ... ... ...\n",
"316447 2024-06-27 2.980280e+08 249\n",
"316448 2024-06-28 2.980311e+08 249\n",
"316449 2024-06-29 2.980342e+08 249\n",
"316450 2024-06-30 2.980384e+08 249\n",
"316451 2024-07-01 2.980405e+08 249\n",
"352759 2024-06-27 2.980280e+08 249\n",
"352760 2024-06-28 2.980311e+08 249\n",
"352761 2024-06-29 2.980342e+08 249\n",
"352762 2024-06-30 2.980384e+08 249\n",
"352763 2024-07-01 2.980405e+08 249\n",
"\n",
"[952560 rows x 3 columns]\n"
]
Expand All @@ -190,36 +189,36 @@
"Drogon: ruaj_testcase: 5b558daf-61c5-400a-9aa2-c602bb471a16\n",
"summary: 160 objects, 974 columns.\n",
"Total size of input: 0.175 GiB\n",
"Elapsed: 2.485 seconds.\n",
"Elapsed: 2.270 seconds.\n",
" DATE FOPT REAL\n",
"4910 2018-01-01 0.000000e+00 0\n",
"4911 2018-01-02 0.000000e+00 0\n",
"4912 2018-01-05 0.000000e+00 0\n",
"4913 2018-01-06 3.991868e+03 0\n",
"4914 2018-01-09 1.596676e+04 0\n",
"8097 2018-01-01 0.000000e+00 0\n",
"8098 2018-01-02 0.000000e+00 0\n",
"8099 2018-01-05 0.000000e+00 0\n",
"8100 2018-01-06 3.991868e+03 0\n",
"8101 2018-01-09 1.596676e+04 0\n",
"... ... ... ...\n",
"36831 2020-06-14 7.278816e+06 159\n",
"36832 2020-06-27 7.349246e+06 159\n",
"36833 2020-06-28 7.354664e+06 159\n",
"36834 2020-06-30 7.365482e+06 159\n",
"36835 2020-07-01 7.370888e+06 159\n",
"39275 2020-06-14 7.278816e+06 159\n",
"39276 2020-06-27 7.349246e+06 159\n",
"39277 2020-06-28 7.354664e+06 159\n",
"39278 2020-06-30 7.365482e+06 159\n",
"39279 2020-07-01 7.370888e+06 159\n",
"\n",
"[39280 rows x 3 columns]\n",
"summary: 320 objects, 974 columns.\n",
"Total size of input: 0.163 GiB\n",
"Elapsed: 2.528 seconds.\n",
"summary: 160 objects, 500 columns.\n",
"Total size of input: 0.097 GiB\n",
"Elapsed: 1.794 seconds.\n",
" DATE FOPT REAL\n",
"19394 2018-01-01 0.000000e+00 0\n",
"19395 2018-01-02 0.000000e+00 0\n",
"19396 2018-01-05 0.000000e+00 0\n",
"19397 2018-01-06 3.991868e+03 0\n",
"19398 2018-01-09 1.596676e+04 0\n",
"247 2018-01-01 0.000000e+00 0\n",
"248 2018-01-02 0.000000e+00 0\n",
"249 2018-01-05 0.000000e+00 0\n",
"250 2018-01-06 3.991868e+03 0\n",
"251 2018-01-09 1.596676e+04 0\n",
"... ... ... ...\n",
"10795 2020-06-14 7.278816e+06 159\n",
"10796 2020-06-27 7.349246e+06 159\n",
"10797 2020-06-28 7.354664e+06 159\n",
"10798 2020-06-30 7.365482e+06 159\n",
"10799 2020-07-01 7.370888e+06 159\n",
"31149 2020-06-14 7.278816e+06 159\n",
"31150 2020-06-27 7.349246e+06 159\n",
"31151 2020-06-28 7.354664e+06 159\n",
"31152 2020-06-30 7.365482e+06 159\n",
"31153 2020-07-01 7.370888e+06 159\n",
"\n",
"[39280 rows x 3 columns]\n"
]
Expand Down
1 change: 1 addition & 0 deletions src/fmu/sumo/explorer/objects/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Sumo cases and child objects"""

from fmu.sumo.explorer.objects._search_context import SearchContext
from fmu.sumo.explorer.objects._child import Child
from fmu.sumo.explorer.objects._metrics import Metrics
from fmu.sumo.explorer.objects.case import Case
from fmu.sumo.explorer.objects.cases import Cases
Expand Down
5 changes: 5 additions & 0 deletions src/fmu/sumo/explorer/objects/_child.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,5 +85,10 @@ def interval(self) -> str:

return None

@property
def template_path(self):
return "/".join(["{realization}", "{iteration}"] +
self.relative_path.split("/")[2:])


Child.map_properties(Child, _prop_desc)
68 changes: 58 additions & 10 deletions src/fmu/sumo/explorer/objects/_search_context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import uuid
import httpx
import deprecation
import warnings
from typing import List, Dict, Tuple
from datetime import datetime
from io import BytesIO
Expand Down Expand Up @@ -276,10 +277,14 @@ def __init__(
sumo: SumoClient,
must: List = [],
must_not: List = [],
hidden = False,
visible = True
):
self._sumo = sumo
self._must = must[:]
self._must_not = must_not[:]
self._visible = visible
self._hidden = hidden
self._field_values = {}
self._hits = None
self._cache = LRUCache(capacity=200)
Expand All @@ -288,17 +293,24 @@ def __init__(

@property
def _query(self):
if len(self._must_not) == 0:
if len(self._must) == 1:
return self._must[0]
must = self._must[:]
must_not = self._must_not[:]
if self._visible and not self._hidden:
must_not.append({"term": {"_sumo.hidden": True}})
elif not self._visible and self._hidden:
must.append({"term": {"_sumo.hidden": True}})
pass
if len(must_not) == 0:
if len(must) == 1:
return must[0]
else:
return {"bool": {"must": self._must}}
return {"bool": {"must": must}}
else:
if len(self._must) == 0:
return {"bool": {"must_not": self._must_not}}
if len(must) == 0:
return {"bool": {"must_not": must_not}}
else:
return {
"bool": {"must": self._must, "must_not": self._must_not}
"bool": {"must": must, "must_not": must_not}
}

def _to_sumo(self, obj, blob=None):
Expand All @@ -313,7 +325,9 @@ def _to_sumo(self, obj, blob=None):
"surface": objects.Surface,
"table": objects.Table,
}.get(cls)
assert constructor is not None
if constructor is None:
warnings.warn(f"No constructor for class {cls}")
constructor = objects.Child
return constructor(self._sumo, obj, blob)

def __len__(self):
Expand Down Expand Up @@ -768,6 +782,30 @@ async def _get_field_values_async(self, field: str) -> List:
def _context_for_class(self, cls):
return self.filter(cls=cls)

@property
def hidden(self):
return SearchContext(sumo=self._sumo,
must=self._must,
must_not = self._must_not,
hidden = True,
visible = False)

@property
def visible(self):
return SearchContext(sumo=self._sumo,
must=self._must,
must_not = self._must_not,
hidden = False,
visible = True)

@property
def all(self):
return SearchContext(sumo=self._sumo,
must=self._must,
must_not = self._must_not,
hidden = True,
visible = True)

@property
def cases(self):
"""Cases from current selection."""
Expand All @@ -783,6 +821,10 @@ def realizations(self):
"""Realizations from current selection."""
return objects.Realizations(self)

@property
def template_paths(sc):
return set([obj.template_path for obj in sc])

@property
def metrics(self):
"""Metrics for current search context."""
Expand Down Expand Up @@ -874,7 +916,7 @@ def filter(self, **kwargs) -> "SearchContext":
if _must_not is not None:
must_not.append(_must_not)

sc = SearchContext(self._sumo, must=must, must_not=must_not)
sc = SearchContext(self._sumo, must=must, must_not=must_not, hidden=self._hidden, visible = self._visible)

if "has" in kwargs:
# Get list of cases matched by current filter set
Expand Down Expand Up @@ -1161,7 +1203,7 @@ def _verify_aggregation_operation(self):
rids = [hit["_source"]["fmu"]["realization"]["id"] for hit in hits]
return prototype, uuids, rids

def aggregate(self, columns=None, operation=None):
def _aggregate(self, columns=None, operation=None):
prototype, uuids, rids = self._verify_aggregation_operation()
spec = {
"object_ids": uuids,
Expand Down Expand Up @@ -1199,6 +1241,12 @@ def aggregate(self, columns=None, operation=None):
res._blob = blob
return res

def aggregate(self, columns=None, operation=None):
if len(self.hidden) > 0:
return self.hidden._aggregate(columns=columns, operation=operation)
else:
return self.visible._aggregate(columns=columns, operation=operation)

@deprecation.deprecated(details="Use the method 'aggregate' instead, with parameter 'operation'.")
def min(self):
return self.aggregate(operation="min")
Expand Down