From 4b4a36f1a22e5ed7c131c1e76dc15d5575d0ec9d Mon Sep 17 00:00:00 2001 From: Raymond Wiker Date: Thu, 13 Jun 2024 10:54:35 +0200 Subject: [PATCH] Add 'has' filter to CaseCollection (#328) * Ignore emacs backups. * Implement 'has' filter on case collection. * Added section on has filter to documentation. --------- Co-authored-by: Raymond Wiker --- .gitignore | 3 ++ docs/explorer.rst | 31 +++++++++++++ src/fmu/sumo/explorer/Filters.py | 24 ++++++++++ .../sumo/explorer/objects/case_collection.py | 46 ++++++++++++++++++- 4 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 src/fmu/sumo/explorer/Filters.py diff --git a/.gitignore b/.gitignore index 43ffe970..1aa985b9 100644 --- a/.gitignore +++ b/.gitignore @@ -103,3 +103,6 @@ src/fmu/sumo/version.py testing.ipynb # files generated during testing *.csv + +# emacs backup files +*~ diff --git a/docs/explorer.rst b/docs/explorer.rst index 66072079..b4c542d9 100644 --- a/docs/explorer.rst +++ b/docs/explorer.rst @@ -191,6 +191,37 @@ You can also use a case `uuid` to get a `Case` object: my_case = sumo.get_case_by_uuid("1234567") +Finding cases with specific data types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +There is also a filter that searches for cases where there are objects +that match specific criteria. For example, if we define +``4d-seismic`` as objects that have ``data.content=seismic``, +``data.time.t0.label=base`` and ``data.time.t1.label=monitor``, we can use +the ``has`` filter to find cases that have ``4d-seismic`` data: + +.. code-block:: + + from fmu.sumo.explorer import Explorer, Filters + + exp = Explorer(env="prod") + + cases = exp.cases.filter(asset="Heidrun", has=Filters.seismic4d) + +In this case, we have a predefined filter for ``4d-seismic``, exposed +thorugh ``fmu.sumo.explorer.Filters``. There is no magic involved; any +user can create their own filters, and either use them directly or ask +for them to be added to ``fmu.sumo.explorer.Filters``. + +It is also possible to chain filters. The previous example could also +be handled by + +.. code-block:: + cases = exp.cases.filter(asset="Heidrun", + has={"term":{"data.content.keyword": "seismic"}})\ + .filter(has={"term":{"data.time.t0.label.keyword":"base"}})\ + .filter(has={"term":{"data.time.t1.label.keyword":"monitor"}}) + + Browsing data in a case ^^^^^^^^^^^^^^^^^^^^^^^ The `Case` object has properties for accessing different data types: diff --git a/src/fmu/sumo/explorer/Filters.py b/src/fmu/sumo/explorer/Filters.py new file mode 100644 index 00000000..c8a34442 --- /dev/null +++ b/src/fmu/sumo/explorer/Filters.py @@ -0,0 +1,24 @@ + +# Filter that matches 4d-seismic objects. + +seismic4d = { + "bool": { + "must": [ + { + "term": { + "data.content.keyword": "seismic" + } + }, + { + "term": { + "data.time.t0.label.keyword": "base" + } + }, + { + "term": { + "data.time.t1.label.keyword": "monitor" + } + } + ] + } +} diff --git a/src/fmu/sumo/explorer/objects/case_collection.py b/src/fmu/sumo/explorer/objects/case_collection.py index 5e225492..b242910a 100644 --- a/src/fmu/sumo/explorer/objects/case_collection.py +++ b/src/fmu/sumo/explorer/objects/case_collection.py @@ -84,15 +84,17 @@ def _make_overview_query(ids, pit): class CaseCollection(DocumentCollection): """A class for representing a collection of cases in Sumo""" - def __init__(self, sumo: SumoClient, query: Dict = None, pit: Pit = None): + def __init__(self, sumo: SumoClient, query: Dict = None, pit: Pit = None, has = None): """ Args: sumo (SumoClient): connection to Sumo query (dict): elastic query object pit (Pit): point in time + has (dict): query for specific child objects """ super().__init__("case", sumo, query, _CASE_FIELDS, pit) self._overviews = {} + self._has = has @property def names(self) -> List[str]: @@ -104,6 +106,16 @@ async def names_async(self) -> List[str]: """List of unique case names""" return await self._get_field_values_async("fmu.case.name.keyword") + @property + def uuids(self) -> List[str]: + """List of unique case uuids""" + return self._get_field_values("fmu.case.uuid.keyword") + + @property + async def uuids_async(self) -> List[str]: + """List of unique case uuids""" + return await self._get_field_values_async("fmu.case.uuid.keyword") + @property def statuses(self) -> List[str]: """List of unique statuses""" @@ -160,6 +172,34 @@ async def getitem_async(self, index: int) -> Case: overview = self._overviews[uuid] return Case(self._sumo, doc, overview, self._pit) + def _next_batch(self) -> List[Dict]: + """Get next batch of documents + + Returns: + The next batch of documents + """ + if self._has is not None: + uuids = self.uuids + query = { "bool": { "must": [ {"terms": { "fmu.case.uuid.keyword": uuids}}, self._has]}} + nuuids = [ x["key"] for x in self._utils.get_buckets("fmu.case.uuid.keyword", query)] + self._query = {"ids": {"values": nuuids}} + self._has = None + return super()._next_batch() + + async def _next_batch_async(self) -> List[Dict]: + """Get next batch of documents + + Returns: + The next batch of documents + """ + if self._has is not None: + uuids = await self.uuids_async + query = { "bool": { "must": [ {"terms": { "fmu.case.uuid.keyword": uuids}}, self._has]}} + nuuids = [ x["key"] for x in self._utils.get_buckets("fmu.case.uuid.keyword", query)] + self._query = {"ids": {"values": nuuids}} + self._has = None + return await super()._next_batch_async() + def _postprocess_batch(self, hits, pit): ids = [hit["_id"] for hit in hits] query = _make_overview_query(ids, pit) @@ -215,6 +255,7 @@ def filter( user: Union[int, List[int]] = None, asset: Union[int, List[int]] = None, field: Union[str, List[str]] = None, + has: Dict = None, ) -> "CaseCollection": """Filter cases @@ -241,4 +282,5 @@ def filter( ) query = super()._add_filter({"bool": {"must": must}}) - return CaseCollection(self._sumo, query, self._pit) + + return CaseCollection(self._sumo, query, self._pit, has = has)