Skip to content

Commit

Permalink
Exlude/include time data (#92)
Browse files Browse the repository at this point in the history
* Add exclude_time_data parameter in get_objects and get_object_property_values

* add include_time_data parameter to get_objects and get_object_property_values

* TimeData extends str

* raise exception for illegal include_time_data values

* more refined time data filtering

* make it possible to create bucket aggregation on time_type

* add some helpful enums for property and objectType
  • Loading branch information
adnejacobsen authored Oct 7, 2022
1 parent 10ddbcf commit 57b4828
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/fmu/sumo/explorer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from fmu.sumo.explorer._explorer import Explorer
from fmu.sumo.explorer._case import Case
from fmu.sumo.explorer._utils import Utils
from fmu.sumo.explorer._utils import Utils, TimeData, Property, ObjectType
from fmu.sumo.explorer._child_object import ChildObject
from fmu.sumo.explorer._document_collection import DocumentCollection
35 changes: 18 additions & 17 deletions src/fmu/sumo/explorer/_case.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
from typing import List
from fmu.sumo.explorer._utils import Utils
from fmu.sumo.explorer._utils import Utils, TimeData, Property, ObjectType
from fmu.sumo.explorer._document_collection import DocumentCollection
from fmu.sumo.explorer._child_object import ChildObject
import deprecation

OBJECT_TYPES = {
'surface': '.gri',
'polygons': '.csv',
'table': '.csv'
}

class Case:
def __init__(self, sumo_client, meta_data):
self.sumo = sumo_client
Expand Down Expand Up @@ -185,27 +179,29 @@ def _list_wrap(self, value):

def get_object_property_values(
self,
property: str,
object_type: str,
property: Property,
object_type: ObjectType,
object_names: List[str]=[],
tag_names: List[str]=[],
time_intervals: List[str]=[],
iteration_ids: List[str]=[],
realization_ids: List[int]=[],
aggregations: List[int]=[]
aggregations: List[int]=[],
include_time_data: TimeData = None
):
"""
Get a dictionary of unique values for a given property in case child objects.
Arguments:
`property`: tag_name | time_interval | aggregation | object_name | iteration_id | realization_id
`object_type`: surface | polygons | table
`property`: tag_name | time_interval | time_type | aggregation | object_name | iteration_id | realization_id (Property)
`object_type`: surface | polygons | table (ObjectType)
`object_names`: list of object names (strings)
`tag_names`: list of tag names (strings)
`time_intervals`: list of time intervals (strings)
`iteration_ids`: list of iteration ids (integers)
`realization_ids`: list of realizatio ids (intergers)
`aggregations`: list of aggregation operations (strings)
`include_time_data`: ALL | NO_TIMEDATA | ONLY_TIMEDATA (TimeData)
Returns:
Dictionary of unique values and number of objects
Expand All @@ -214,6 +210,7 @@ def get_object_property_values(
accepted_properties = {
"tag_name": "tag_name",
"time_interval": "time_interval",
"time_type": "time_type",
"aggregation": "fmu.aggregation.operation.keyword",
"object_name": "data.name.keyword",
"iteration_id": "fmu.iteration.id",
Expand Down Expand Up @@ -250,7 +247,8 @@ def get_object_property_values(
elastic_query = self.utils.create_elastic_query(
object_type=object_type,
terms=terms,
aggregate_field=agg_field
aggregate_field=agg_field,
include_time_data=include_time_data
)

result = self.sumo.post("/search", json=elastic_query)
Expand All @@ -261,25 +259,27 @@ def get_object_property_values(

def get_objects(
self,
object_type: str,
object_type: ObjectType,
object_names: List[str]=[],
tag_names: List[str]=[],
time_intervals: List[str]=[],
iteration_ids: List[int]=[],
realization_ids: List[int]=[],
aggregations: List[str]=[]
aggregations: List[str]=[],
include_time_data: TimeData = None
):
"""
Search for child objects in a case.
Arguments:
`object_type`: surface | polygons | table
`object_type`: surface | polygons | table (ObjectType)
`object_names`: list of object names (strings)
`tag_names`: list of tag names (strings)
`time_intervals`: list of time intervals (strings)
`iteration_ids`: list of iteration ids (integers)
`realization_ids`: list of realizatio ids (intergers)
`aggregations`: list of aggregation operations (strings)
`include_time_data`: ALL | NO_TIMEDATA | ONLY_TIMEDATA (TimeData)
Returns:
`DocumentCollection` used for retrieving search results
Expand Down Expand Up @@ -315,7 +315,8 @@ def get_objects(
fields_exists=fields_exists,
terms=terms,
size=20,
sort=[{"tracklog.datetime": "desc"}]
sort=[{"tracklog.datetime": "desc"}],
include_time_data=include_time_data
)

return DocumentCollection(
Expand Down
12 changes: 7 additions & 5 deletions src/fmu/sumo/explorer/_explorer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from sumo.wrapper import SumoClient
from fmu.sumo.explorer._case import Case
from fmu.sumo.explorer._utils import Utils
from fmu.sumo.explorer._utils import Utils, TimeData, ObjectType
from fmu.sumo.explorer._document_collection import DocumentCollection
from typing import List
from fmu.sumo.explorer._child_object import ChildObject
Expand Down Expand Up @@ -105,20 +105,21 @@ def get_cases(

def get_objects(
self,
object_type: str,
object_type: ObjectType,
case_ids: List[str]=[],
object_names: List[str]=[],
tag_names: List[str]=[],
time_intervals: List[str]=[],
iteration_ids: List[int]=[],
realization_ids: List[int]=[],
aggregations: List[str]=[]
aggregations: List[str]=[],
include_time_data: TimeData = None
):
"""
Search for child objects in a case.
Arguments:
`object_type`: surface | polygons | table
`object_type`: surface | polygons | table (ObjectType)
`object_names`: list of object names (strings)
`tag_names`: list of tag names (strings)
`time_intervals`: list of time intervals (strings)
Expand Down Expand Up @@ -161,7 +162,8 @@ def get_objects(
fields_exists=fields_exists,
terms=terms,
size=20,
sort=[{"tracklog.datetime": "desc"}]
sort=[{"tracklog.datetime": "desc"}],
include_time_data=include_time_data
)

return DocumentCollection(
Expand Down
97 changes: 87 additions & 10 deletions src/fmu/sumo/explorer/_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
from enum import Enum

class ObjectType(str, Enum):
SURFACE = "surface"
POLYGONS = "polyons"
TABLE = "table"

class Property(str, Enum):
TAG_NAME = "tag_name"
TIME_INTERVAL = "time_interval"
TIME_TYPE = "time_type"
AGGREGATION = "aggregation"
OBJECT_NAME = "object_name"
ITERATION_ID = "iteration_id"
REALIZATION_ID = "realization_id"

class TimeData(str, Enum):
ALL = "ALL"
TIMESTAMP = "TIMESTAMP"
TIME_INTERVAL = "TIME_INTERVAL"
NONE = "NONE"


OBJECT_TYPES = {
'surface': '.gri',
'polygons': '.csv',
Expand All @@ -22,7 +45,8 @@ def create_elastic_query(
sort=None,
terms={},
fields_exists=[],
aggregate_field=None
aggregate_field=None,
include_time_data=None
):
if object_type not in list(OBJECT_TYPES.keys()):
raise Exception(f"Invalid object_type: {object_type}. Accepted object_types: {OBJECT_TYPES.keys()}")
Expand All @@ -47,7 +71,30 @@ def time = params['_source']['data']['time'];
emit(params['_source']['data']['time'][0]['value'].splitOnToken('T')[0]);
}
}else {
emit('NULL');
emit('NONE');
}
"""
}
},
"time_type": {
"type": "keyword",
"script": {
"lang": "painless",
"source": """
def time = params['_source']['data']['time'];
if(time != null) {
if(time.length == 0) {
emit("NONE");
} else if(time.length == 1) {
emit("TIMESTAMP");
} else if (time.length == 2) {
emit("TIME_INTERVAL");
} else {
emit("UNKNOWN");
}
} else {
emit("NONE");
}
"""
}
Expand All @@ -61,7 +108,7 @@ def time = params['_source']['data']['time'];
String[] split_file_name = file_name.splitOnToken('--');
if(split_file_name.length == 1) {{
emit('NULL');
emit('NONE');
}} else {{
String surface_content = split_file_name[1].replace('{OBJECT_TYPES[object_type]}', '');
emit(surface_content);
Expand All @@ -71,25 +118,24 @@ def time = params['_source']['data']['time'];
}
},
"query": {
"bool": {
"must": [
{"match": {"class": object_type}}
]
}
"bool": {}
},
"fields": ["tag_name", "time_interval"]
}

must = [{"match": {"class": object_type}}]
must_not = []

if sort:
elastic_query["sort"] = sort

for field in terms:
elastic_query["query"]["bool"]["must"].append({
must.append({
"terms": {field: terms[field]}
})

for field in fields_exists:
elastic_query["query"]["bool"]["must"].append({
must.append({
"exists": { "field": field}
})

Expand All @@ -103,4 +149,35 @@ def time = params['_source']['data']['time'];
}
}

if aggregate_field in ["tag_name", "time_interval"]:
must_not.append({
"term": {aggregate_field: "NONE"}
})

if include_time_data is not None:
if include_time_data == TimeData.ALL:
must.append({
"terms": {"time_type": ["TIMESTAMP", "TIME_INTERVAL"]}
})
elif include_time_data == TimeData.TIMESTAMP:
must.append({
"term": {"time_type": "TIMESTAMP"}
})
elif include_time_data == TimeData.TIME_INTERVAL:
must.append({
"term": {"time_type": "TIME_INTERVAL"}
})
elif include_time_data == TimeData.NONE:
must_not.append({
"terms": {"time_type": ["TIMESTAMP", "TIME_INTERVAL"]}
})
else:
raise ValueError(f"Invalid value for include_time_data: {include_time_data}")

if len(must) > 0:
elastic_query["query"]["bool"]["must"] = must

if len(must_not) > 0:
elastic_query["query"]["bool"]["must_not"] = must_not

return elastic_query

0 comments on commit 57b4828

Please sign in to comment.