diff --git a/src/fmu/sumo/uploader/_caseondisk.py b/src/fmu/sumo/uploader/_caseondisk.py index d0802b3a..3cca9071 100644 --- a/src/fmu/sumo/uploader/_caseondisk.py +++ b/src/fmu/sumo/uploader/_caseondisk.py @@ -1,6 +1,8 @@ """Objectify an FMU case (results) as it appears on the disk.""" import os +from pathlib import Path +import uuid import glob import time import logging @@ -72,6 +74,7 @@ def __init__(self, case_metadata_path: str, sumo_connection, verbosity="DEBUG"): self.sumo_connection = sumo_connection logger.debug("case metadata path: %s", case_metadata_path) + self._case_metadata_path = Path(case_metadata_path) self.case_metadata = _load_case_metadata(case_metadata_path) self._fmu_case_uuid = self._get_fmu_case_uuid() logger.debug("self._fmu_case_uuid is %s", self._fmu_case_uuid) @@ -125,9 +128,26 @@ def add_files(self, search_string): def _get_sumo_parent_id(self): """Get the sumo parent ID. - - Call sumo, check if the case is already there. Use fmu_case_uuid for this.""" - + + If parent id is cached on disk, use that. Else call sumo to get it based on fmu_case_uuid.""" + + # If a relatively new cached file exists we use that and avoid calling Sumo + cached_key = "sumo-case-id" + cached_file = Path(self._case_metadata_path.parent / "sumo_parent_id.yml") + if cached_file.exists(): + file_age = datetime.datetime.today() - datetime.datetime.fromtimestamp(cached_file.lstat().st_mtime) + if file_age.days < 14: + with open(str(cached_file), 'r') as infile: + filecontents = yaml.safe_load(infile) + sumo_parent_id = filecontents.get(cached_key) + try: + test_uuid = uuid.UUID(sumo_parent_id) + logger.debug("Getting sumo parent id from cached file") + return sumo_parent_id + except ValueError: + pass # Not a valid uuid, will call Sumo + + # No valid cached file, need to call Sumo to get the parent id query = f"fmu.case.uuid:{self.fmu_case_uuid}" search_results = self.sumo_connection.api.get("/searchroot", query=query, size=2, **{'from': 0}) @@ -142,6 +162,17 @@ def _get_sumo_parent_id(self): if len(hits) == 1: sumo_parent_id = hits[0].get("_id") + + try: + # Cache the parent id in a file + my_dict = { cached_key: sumo_parent_id } + with open(str(cached_file), 'w') as outfile: + yaml.dump(my_dict, outfile) + logger.debug("Caching sumo parent id") + except: + # Might be concurrency issues, just skip caching to file this time + pass + return sumo_parent_id raise ValueError( diff --git a/src/fmu/sumo/uploader/_fileondisk.py b/src/fmu/sumo/uploader/_fileondisk.py index 1513606b..aaa501e3 100644 --- a/src/fmu/sumo/uploader/_fileondisk.py +++ b/src/fmu/sumo/uploader/_fileondisk.py @@ -118,9 +118,7 @@ def __repr__(self): if self.byte_string is not None: s += f"\n# Byte string length: {len(self.byte_string)}" - if self.sumo_object_id is None: - s += "\n# Not uploaded to Sumo" - else: + if not self.sumo_object_id is None: s += f"\n# Uploaded to Sumo. Sumo_ID: {self.sumo_object_id}" return s diff --git a/src/fmu/sumo/uploader/scripts/sumo_upload.py b/src/fmu/sumo/uploader/scripts/sumo_upload.py index f677f759..1219c73f 100644 --- a/src/fmu/sumo/uploader/scripts/sumo_upload.py +++ b/src/fmu/sumo/uploader/scripts/sumo_upload.py @@ -179,7 +179,7 @@ def check_arguments(args) -> None: logger.debug("Running check_arguments()") logger.debug("Arguments are: %s", str(vars(args))) - if args.env not in ["dev", "test", "prod", "localhost"]: + if args.env not in ["preview", "dev", "test", "prod", "localhost"]: warnings.warn(f"Non-standard environment: {args.env}") if not Path(args.casepath).is_absolute():