Skip to content

Commit

Permalink
Add 'has' filter to CaseCollection (#328)
Browse files Browse the repository at this point in the history
* Ignore emacs backups.

* Implement 'has' filter on case collection.

* Added section on has filter to documentation.

---------

Co-authored-by: Raymond Wiker <[email protected]>
  • Loading branch information
rwiker and rwiker authored Jun 13, 2024
1 parent 4d6cdbe commit 4b4a36f
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,6 @@ src/fmu/sumo/version.py
testing.ipynb
# files generated during testing
*.csv

# emacs backup files
*~
31 changes: 31 additions & 0 deletions docs/explorer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,37 @@ You can also use a case `uuid` to get a `Case` object:
my_case = sumo.get_case_by_uuid("1234567")
Finding cases with specific data types
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There is also a filter that searches for cases where there are objects
that match specific criteria. For example, if we define
``4d-seismic`` as objects that have ``data.content=seismic``,
``data.time.t0.label=base`` and ``data.time.t1.label=monitor``, we can use
the ``has`` filter to find cases that have ``4d-seismic`` data:

.. code-block::
from fmu.sumo.explorer import Explorer, Filters
exp = Explorer(env="prod")
cases = exp.cases.filter(asset="Heidrun", has=Filters.seismic4d)
In this case, we have a predefined filter for ``4d-seismic``, exposed
thorugh ``fmu.sumo.explorer.Filters``. There is no magic involved; any
user can create their own filters, and either use them directly or ask
for them to be added to ``fmu.sumo.explorer.Filters``.

It is also possible to chain filters. The previous example could also
be handled by

.. code-block::
cases = exp.cases.filter(asset="Heidrun",
has={"term":{"data.content.keyword": "seismic"}})\
.filter(has={"term":{"data.time.t0.label.keyword":"base"}})\
.filter(has={"term":{"data.time.t1.label.keyword":"monitor"}})
Browsing data in a case
^^^^^^^^^^^^^^^^^^^^^^^
The `Case` object has properties for accessing different data types:
Expand Down
24 changes: 24 additions & 0 deletions src/fmu/sumo/explorer/Filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

# Filter that matches 4d-seismic objects.

seismic4d = {
"bool": {
"must": [
{
"term": {
"data.content.keyword": "seismic"
}
},
{
"term": {
"data.time.t0.label.keyword": "base"
}
},
{
"term": {
"data.time.t1.label.keyword": "monitor"
}
}
]
}
}
46 changes: 44 additions & 2 deletions src/fmu/sumo/explorer/objects/case_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,17 @@ def _make_overview_query(ids, pit):
class CaseCollection(DocumentCollection):
"""A class for representing a collection of cases in Sumo"""

def __init__(self, sumo: SumoClient, query: Dict = None, pit: Pit = None):
def __init__(self, sumo: SumoClient, query: Dict = None, pit: Pit = None, has = None):
"""
Args:
sumo (SumoClient): connection to Sumo
query (dict): elastic query object
pit (Pit): point in time
has (dict): query for specific child objects
"""
super().__init__("case", sumo, query, _CASE_FIELDS, pit)
self._overviews = {}
self._has = has

@property
def names(self) -> List[str]:
Expand All @@ -104,6 +106,16 @@ async def names_async(self) -> List[str]:
"""List of unique case names"""
return await self._get_field_values_async("fmu.case.name.keyword")

@property
def uuids(self) -> List[str]:
"""List of unique case uuids"""
return self._get_field_values("fmu.case.uuid.keyword")

@property
async def uuids_async(self) -> List[str]:
"""List of unique case uuids"""
return await self._get_field_values_async("fmu.case.uuid.keyword")

@property
def statuses(self) -> List[str]:
"""List of unique statuses"""
Expand Down Expand Up @@ -160,6 +172,34 @@ async def getitem_async(self, index: int) -> Case:
overview = self._overviews[uuid]
return Case(self._sumo, doc, overview, self._pit)

def _next_batch(self) -> List[Dict]:
"""Get next batch of documents
Returns:
The next batch of documents
"""
if self._has is not None:
uuids = self.uuids
query = { "bool": { "must": [ {"terms": { "fmu.case.uuid.keyword": uuids}}, self._has]}}
nuuids = [ x["key"] for x in self._utils.get_buckets("fmu.case.uuid.keyword", query)]
self._query = {"ids": {"values": nuuids}}
self._has = None
return super()._next_batch()

async def _next_batch_async(self) -> List[Dict]:
"""Get next batch of documents
Returns:
The next batch of documents
"""
if self._has is not None:
uuids = await self.uuids_async
query = { "bool": { "must": [ {"terms": { "fmu.case.uuid.keyword": uuids}}, self._has]}}
nuuids = [ x["key"] for x in self._utils.get_buckets("fmu.case.uuid.keyword", query)]
self._query = {"ids": {"values": nuuids}}
self._has = None
return await super()._next_batch_async()

def _postprocess_batch(self, hits, pit):
ids = [hit["_id"] for hit in hits]
query = _make_overview_query(ids, pit)
Expand Down Expand Up @@ -215,6 +255,7 @@ def filter(
user: Union[int, List[int]] = None,
asset: Union[int, List[int]] = None,
field: Union[str, List[str]] = None,
has: Dict = None,
) -> "CaseCollection":
"""Filter cases
Expand All @@ -241,4 +282,5 @@ def filter(
)

query = super()._add_filter({"bool": {"must": must}})
return CaseCollection(self._sumo, query, self._pit)

return CaseCollection(self._sumo, query, self._pit, has = has)

0 comments on commit 4b4a36f

Please sign in to comment.