diff --git a/ai4papi/routers/v1/try_me/nomad.py b/ai4papi/routers/v1/try_me/nomad.py index f1b5698d..04c2e7e7 100644 --- a/ai4papi/routers/v1/try_me/nomad.py +++ b/ai4papi/routers/v1/try_me/nomad.py @@ -1,7 +1,14 @@ +""" +This route is meant to be public for everyone authenticated to try (no VO membership +required). We deploy jobs by default in the AI4EOSC namespace. +""" + from copy import deepcopy +from typing import Tuple, Union +import types import uuid -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Query from fastapi.security import HTTPBearer from ai4papi import auth @@ -18,19 +25,101 @@ ) security = HTTPBearer() +# (!) try-me jobs are always deployed in AI4EOSC +VO = "vo.ai4eosc.eu" +NAMESPACE = papiconf.MAIN_CONF['nomad']['namespaces'][VO] + +@router.get("/") +def get_deployments( + full_info: bool = Query(default=False), + authorization=Depends(security), + ): + """ + Returns a list of all deployments belonging to a user. + + Parameters: + * **vo**: Virtual Organizations from where you want to retrieve your deployments. + If no vo is provided, it will retrieve the deployments of all VOs. + * **full_info**: retrieve the full information of each deployment. + Disabled by default, as it will increase latency too much if there are many + deployments. + """ + # Retrieve authenticated user info + auth_info = auth.get_user_info(token=authorization.credentials) + + # Retrieve all jobs in namespace + jobs = nomad.get_deployments( + namespace=NAMESPACE, + owner=auth_info['id'], + prefix='try', + ) + user_jobs = [] + for j in jobs: + try: + job_info = get_deployment( + deployment_uuid=j['ID'], + full_info=full_info, + authorization=types.SimpleNamespace( + credentials=authorization.credentials # token + ), + ) + except HTTPException: # not a try-me + continue + except Exception as e: # unexpected error + raise(e) + + user_jobs.append(job_info) + + # Sort deployments by creation date + seq = [j['submit_time'] for j in user_jobs] + args = sorted(range(len(seq)), key=seq.__getitem__)[::-1] + sorted_jobs = [user_jobs[i] for i in args] + + return sorted_jobs + + +@router.get("/{deployment_uuid}") +def get_deployment( + deployment_uuid: str, + full_info: bool = Query(default=True), + authorization=Depends(security), + ): + """ + This function is used mainly to be able to retrieve the endpoint of the try_me job. + We cannot return the endpoint when creating the job, because the final endpoint will + on which datacenter the job ends up landing. + + Parameters: + * **deployment_uuid**: uuid of deployment to gather info about + + Returns a dict with info + """ + # Retrieve authenticated user info + auth_info = auth.get_user_info(token=authorization.credentials) + + job = nomad.get_deployment( + deployment_uuid=deployment_uuid, + namespace=NAMESPACE, + owner=auth_info['id'], + full_info=full_info, + ) + + # Rewrite main endpoint, otherwise it automatically selects DEEPaaS API + job['main_endpoint'] = 'ui' + + return job + @router.post("") def create_deployment( module_name: str, + title: str = Query(default=""), authorization=Depends(security), ): """ Submit a try-me deployment to Nomad. The deployment will automatically kill himself after a short amount of time. - This endpoint is meant to be public for everyone to try (no authorization required). - We deploy jobs by default in the AI4EOSC namespace. - Returns a string with the endpoint to access the API. """ # Retrieve authenticated user info @@ -51,11 +140,12 @@ def create_deployment( nomad_conf = nomad_conf.safe_substitute( { 'JOB_UUID': job_uuid, - 'NAMESPACE': 'ai4eosc', # (!) try-me jobs are always deployed in "ai4eosc" + 'NAMESPACE': NAMESPACE, + 'TITLE': title[:45], 'OWNER': auth_info['id'], 'OWNER_NAME': auth_info['name'], 'OWNER_EMAIL': auth_info['email'], - 'BASE_DOMAIN': papiconf.MAIN_CONF['lb']['domain']['vo.ai4eosc.eu'], # idem + 'BASE_DOMAIN': papiconf.MAIN_CONF['lb']['domain'][VO], 'HOSTNAME': job_uuid, 'DOCKER_IMAGE': docker_image, } @@ -67,7 +157,7 @@ def create_deployment( # Check that the target node (ie. tag='tryme') resources are available because # these jobs cannot be left queueing # We check for every resource metric (cpu, disk, ram) - stats = get_cluster_stats(vo='vo.ai4eosc.eu') + stats = get_cluster_stats(vo=VO) resources = ['cpu', 'ram', 'disk'] keys = [f"{i}_used" for i in resources] + [f"{i}_total" for i in resources] status = {k: 0 for k in keys} @@ -89,16 +179,17 @@ def create_deployment( # Check that the user hasn't too many "try-me" jobs currently running jobs = nomad.get_deployments( - namespace="ai4eosc", # (!) try-me jobs are always deployed in "ai4eosc" + namespace=NAMESPACE, owner=auth_info['id'], prefix="try", ) - if len(jobs) >= 2: + if len(jobs) >= 3: raise HTTPException( status_code=503, - detail="Sorry, but you seem to be currently running two `Try-me` environments already. " \ + detail="Sorry, but you seem to be currently running 3 `try-me` environments already. " \ "Before launching a new one, you will need to wait till one of your " \ - "existing environments gets automatically deleted (ca. 10 min)." + "existing environments gets automatically deleted (ca. 10 min) or delete it manually " \ + "in the Dashboard." ) # Submit job @@ -107,32 +198,28 @@ def create_deployment( return r -@router.get("/{deployment_uuid}") -def get_deployment( +@router.delete("/{deployment_uuid}") +def delete_deployment( deployment_uuid: str, authorization=Depends(security), ): """ - This function is used mainly to be able to retrieve the endpoint of the try_me job. - We cannot return the endpoint when creating the job, because the final endpoint will - on which datacenter the job ends up landing. + Delete a deployment. Users can only delete their own deployments. Parameters: - * **deployment_uuid**: uuid of deployment to gather info about + * **vo**: Virtual Organization where your deployment is located + * **deployment_uuid**: uuid of deployment to delete - Returns a dict with info + Returns a dict with status """ # Retrieve authenticated user info auth_info = auth.get_user_info(token=authorization.credentials) - job = nomad.get_deployment( + # Delete deployment + r = nomad.delete_deployment( deployment_uuid=deployment_uuid, - namespace="ai4eosc", # (!) try-me jobs are always deployed in "ai4eosc" + namespace=NAMESPACE, owner=auth_info['id'], - full_info=True, ) - # Rewrite main endpoint, otherwise it automatically selects DEEPaaS API - job['main_endpoint'] = 'ui' - - return job + return r diff --git a/etc/try_me/nomad.hcl b/etc/try_me/nomad.hcl index d11b5802..e829962e 100644 --- a/etc/try_me/nomad.hcl +++ b/etc/try_me/nomad.hcl @@ -20,7 +20,7 @@ job "try-${JOB_UUID}" { owner = "${OWNER}" # user-id from OIDC owner_name = "${OWNER_NAME}" owner_email = "${OWNER_EMAIL}" - title = "" + title = "${TITLE}" description = "" } diff --git a/tests/main.py b/tests/main.py index 0383b684..a676bc2c 100644 --- a/tests/main.py +++ b/tests/main.py @@ -21,6 +21,7 @@ import catalog.tools import deployments.modules import deployments.tools +import try_me.test_nomad import routes import test_secrets import test_stats diff --git a/tests/try_me/test_nomad.py b/tests/try_me/test_nomad.py new file mode 100644 index 00000000..39afee74 --- /dev/null +++ b/tests/try_me/test_nomad.py @@ -0,0 +1,69 @@ +import os +import time +from types import SimpleNamespace + +from ai4papi.routers.v1.try_me import nomad + + +# Retrieve EGI token (not generated on the fly in case the are rate limiting issues +# if too many queries) +token = os.getenv('TMP_EGI_TOKEN') +if not token: + raise Exception( +'Please remember to set a token as ENV variable before executing \ +the tests! \n\n \ + export TMP_EGI_TOKEN="$(oidc-token egi-checkin)" \n\n \ +If running from VScode make sure to launch `code` from that terminal so it can access \ +that ENV variable.' + ) + +# Create deployment +rcreate = nomad.create_deployment( + module_name="ai4os-demo-app", + authorization=SimpleNamespace( + credentials=token + ), +) +assert isinstance(rcreate, dict) +assert 'job_ID' in rcreate.keys() + +# Retrieve that deployment +rdep = nomad.get_deployment( + deployment_uuid=rcreate['job_ID'], + authorization=SimpleNamespace( + credentials=token + ), +) +assert isinstance(rdep, dict) +assert 'job_ID' in rdep.keys() +assert rdep['job_ID']==rcreate['job_ID'] + +# Retrieve all deployments +rdeps = nomad.get_deployments( + authorization=SimpleNamespace( + credentials=token + ), +) +assert isinstance(rdeps, list) +assert any([d['job_ID']==rcreate['job_ID'] for d in rdeps]) + +# Delete deployment +rdel = nomad.delete_deployment( + deployment_uuid=rcreate['job_ID'], + authorization=SimpleNamespace( + credentials=token + ), +) +time.sleep(3) # Nomad takes some time to delete +assert isinstance(rdel, dict) +assert 'status' in rdel.keys() + +# Check module no longer exists +rdeps3 = nomad.get_deployments( + authorization=SimpleNamespace( + credentials=token + ), +) +assert not any([d['job_ID']==rcreate['job_ID'] for d in rdeps3]) + +print('Try-me (nomad) tests passed!')