Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'has' filter to CaseCollection #328

Merged
merged 5 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,6 @@ src/fmu/sumo/version.py
testing.ipynb
# files generated during testing
*.csv

# emacs backup files
*~
31 changes: 31 additions & 0 deletions docs/explorer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,37 @@ You can also use a case `uuid` to get a `Case` object:
my_case = sumo.get_case_by_uuid("1234567")


Finding cases with specific data types
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There is also a filter that searches for cases where there are objects
that match specific criteria. For example, if we define
``4d-seismic`` as objects that have ``data.content=seismic``,
``data.time.t0.label=base`` and ``data.time.t1.label=monitor``, we can use
the ``has`` filter to find cases that have ``4d-seismic`` data:

.. code-block::

from fmu.sumo.explorer import Explorer, Filters

exp = Explorer(env="prod")

cases = exp.cases.filter(asset="Heidrun", has=Filters.seismic4d)

In this case, we have a predefined filter for ``4d-seismic``, exposed
thorugh ``fmu.sumo.explorer.Filters``. There is no magic involved; any
user can create their own filters, and either use them directly or ask
for them to be added to ``fmu.sumo.explorer.Filters``.

It is also possible to chain filters. The previous example could also
be handled by

.. code-block::
cases = exp.cases.filter(asset="Heidrun",
has={"term":{"data.content.keyword": "seismic"}})\
.filter(has={"term":{"data.time.t0.label.keyword":"base"}})\
.filter(has={"term":{"data.time.t1.label.keyword":"monitor"}})


Browsing data in a case
^^^^^^^^^^^^^^^^^^^^^^^
The `Case` object has properties for accessing different data types:
Expand Down
24 changes: 24 additions & 0 deletions src/fmu/sumo/explorer/Filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

# Filter that matches 4d-seismic objects.

seismic4d = {
"bool": {
"must": [
{
"term": {
"data.content.keyword": "seismic"
}
},
{
"term": {
"data.time.t0.label.keyword": "base"
}
},
{
"term": {
"data.time.t1.label.keyword": "monitor"
}
}
]
}
}
46 changes: 44 additions & 2 deletions src/fmu/sumo/explorer/objects/case_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,17 @@ def _make_overview_query(ids, pit):
class CaseCollection(DocumentCollection):
"""A class for representing a collection of cases in Sumo"""

def __init__(self, sumo: SumoClient, query: Dict = None, pit: Pit = None):
def __init__(self, sumo: SumoClient, query: Dict = None, pit: Pit = None, has = None):
"""
Args:
sumo (SumoClient): connection to Sumo
query (dict): elastic query object
pit (Pit): point in time
has (dict): query for specific child objects
"""
super().__init__("case", sumo, query, _CASE_FIELDS, pit)
self._overviews = {}
self._has = has

@property
def names(self) -> List[str]:
Expand All @@ -104,6 +106,16 @@ async def names_async(self) -> List[str]:
"""List of unique case names"""
return await self._get_field_values_async("fmu.case.name.keyword")

@property
def uuids(self) -> List[str]:
"""List of unique case uuids"""
return self._get_field_values("fmu.case.uuid.keyword")

@property
async def uuids_async(self) -> List[str]:
"""List of unique case uuids"""
return await self._get_field_values_async("fmu.case.uuid.keyword")

@property
def statuses(self) -> List[str]:
"""List of unique statuses"""
Expand Down Expand Up @@ -160,6 +172,34 @@ async def getitem_async(self, index: int) -> Case:
overview = self._overviews[uuid]
return Case(self._sumo, doc, overview, self._pit)

def _next_batch(self) -> List[Dict]:
"""Get next batch of documents

Returns:
The next batch of documents
"""
if self._has is not None:
uuids = self.uuids
query = { "bool": { "must": [ {"terms": { "fmu.case.uuid.keyword": uuids}}, self._has]}}
nuuids = [ x["key"] for x in self._utils.get_buckets("fmu.case.uuid.keyword", query)]
self._query = {"ids": {"values": nuuids}}
self._has = None
return super()._next_batch()

async def _next_batch_async(self) -> List[Dict]:
"""Get next batch of documents

Returns:
The next batch of documents
"""
if self._has is not None:
uuids = await self.uuids_async
query = { "bool": { "must": [ {"terms": { "fmu.case.uuid.keyword": uuids}}, self._has]}}
nuuids = [ x["key"] for x in self._utils.get_buckets("fmu.case.uuid.keyword", query)]
self._query = {"ids": {"values": nuuids}}
self._has = None
return await super()._next_batch_async()

def _postprocess_batch(self, hits, pit):
ids = [hit["_id"] for hit in hits]
query = _make_overview_query(ids, pit)
Expand Down Expand Up @@ -215,6 +255,7 @@ def filter(
user: Union[int, List[int]] = None,
asset: Union[int, List[int]] = None,
field: Union[str, List[str]] = None,
has: Dict = None,
) -> "CaseCollection":
"""Filter cases

Expand All @@ -241,4 +282,5 @@ def filter(
)

query = super()._add_filter({"bool": {"must": must}})
return CaseCollection(self._sumo, query, self._pit)

return CaseCollection(self._sumo, query, self._pit, has = has)