diff --git a/ai4papi/routers/v1/deployments/modules.py b/ai4papi/routers/v1/deployments/modules.py index 71870c5..9c1f74d 100644 --- a/ai4papi/routers/v1/deployments/modules.py +++ b/ai4papi/routers/v1/deployments/modules.py @@ -22,6 +22,12 @@ security = HTTPBearer() +# When deploying in production, force the definition of a provenance token +provenance_token = os.environ.get('PAPI_PROVENANCE_TOKEN', None) +if not papiconf.IS_DEV and not provenance_token: + raise Exception("You need to define the variable \"PAPI_PROVENANCE_TOKEN\".") + + @router.get("") def get_deployments( vos: Union[Tuple, None] = Query(default=None), @@ -106,6 +112,10 @@ def get_deployment( Returns a dict with info """ + # Check if the query comes from the provenance-workflow, if so search in snapshots + if authorization.credentials == provenance_token: + return utils.retrieve_from_snapshots(deployment_uuid) + # Retrieve authenticated user info auth_info = auth.get_user_info(token=authorization.credentials) auth.check_vo_membership(vo, auth_info['vos']) diff --git a/ai4papi/utils.py b/ai4papi/utils.py index 20ba25f..563ab20 100644 --- a/ai4papi/utils.py +++ b/ai4papi/utils.py @@ -2,10 +2,12 @@ Miscellaneous utils """ from datetime import datetime +import json +from pathlib import Path import os import re -from cachetools import cached, TTLCache +from cachetools import cached, TTLCache, LRUCache from fastapi import HTTPException import requests @@ -125,3 +127,47 @@ def get_github_info(owner, repo): print(f' [Error] Failed to parse Github repo info: {msg}') return out + + +@cached(cache=LRUCache(maxsize=20)) +def retrieve_from_snapshots( + deployment_uuid: str, + ): + """ + Retrieve the deployment info from Nomad periodic snapshots. + + This implementation is ugly as hell (iterate through all JSONs). Hopefully + after refactoring the "ai4-accounting" repo we will implement something cleaner + (eg. database). + + Anyway, not a big concern because this function is not meant to be called very + frequently and latency from reading JSONs is very small. + """ + main_dir = os.environ.get('ACCOUNTING_PTH', None) + if not main_dir: + raise HTTPException( + status_code=500, + detail="Accounting repo with snapshots not available.", + ) + snapshot_dir = Path(main_dir) / 'snapshots' + + # Iterate over snapshots, from recent to old + for snapshot_pth in sorted(snapshot_dir.glob('**/*.json'))[::-1]: + + # Load the snapshot + with open(snapshot_pth, 'r') as f: + snapshot = json.load(f) + + # Iterate over deployments until we find the correct one + for namespace, jobs in snapshot.items(): + for job in jobs: + if (job['job_ID'] == deployment_uuid) and (job['status'] == 'running'): + job['namespace'] = namespace + job['alloc_end'] = f'{snapshot_pth.stem}0000Z' # the end date is approximate (true value lies between this snapshot date and next one) + return job + + # If no deployment found, show error + raise HTTPException( + status_code=404, + detail="Could not find the deployment in the database." + ) diff --git a/tests/deployments/modules.py b/tests/deployments/modules.py index 28b5315..1f192f3 100644 --- a/tests/deployments/modules.py +++ b/tests/deployments/modules.py @@ -97,4 +97,14 @@ ) assert not any([d['job_ID']==rcreate['job_ID'] for d in rdeps3]) +# Check that we are able to retrieve info from Nomad snapshots (provenance) +modules.provenance_token = '1234' +r_prov = modules.get_deployment( + vo='', + deployment_uuid='de0599d6-a1b9-11ef-b98d-0242ac120005', + authorization=SimpleNamespace( + credentials='1234' + ), +) + print('Deployments (modules) tests passed!')