Skip to content

Commit

Permalink
feat: add support for CVAT tool (#73)
Browse files Browse the repository at this point in the history
* feat: initial integration of CVAT

Co-authored-by: Stefan Dlugolinsky <[email protected]>

* feat: allow to configure username

* eat: add a `tool_name` in deployments info

* feat: ignore `server` and `grafana` endpoints

* fix: check hardware is not missing

* test: improve tool tests

* feat: rename `ai4-cvat` to `ai4os-cvat` for catalog consistency

* fix: do not replace active endpoint if `full_info` is False

* fix: improve task lifecycles

Even with this, The UI still throws an error because it is can be launched before the server is _completely_ ready. I'm not sure we can do much more. Just warn the users about it.

* remote path for storage changed

remote path logic for CVAT data persistence was changed from multi- to single-instance

* fix: fix docker storage container path

* added -p flag to mkdir

* persistence logic changed

* cvat share fix

* raised memory resources for clickhouse

* fix: fix tests

* feat: return delete status

* fix: bring back double dollar signs in template

* test: add tests for CVAT

* fix: handle case where user belong to VO not supported by the project

* feat: allow picking from which backup to restore

* feat: move backup retrieval to a separate endpoint

* restoring from backups

* indentation fix

* backup name timestamp set at creating the backup

* fix in generating backup name

* escaping env vars in sh scripts

* logging level adjustment

* feat: add max client disconnect

* fix: fix Dockerfile

* feat: install latest rclone

* feat: update to new nextcloud endpoint

* fix for username setting, force pull server and ui images, rclone url fix

* feat: remove hostname from conf

* fix: align rshare_url with PAPI defaults

* build: add missing packages for rclone installation

---------

Co-authored-by: Stefan Dlugolinsky <[email protected]>
  • Loading branch information
IgnacioHeredia and Stifo authored Nov 13, 2024
1 parent d4a2dbc commit 8cc62ab
Show file tree
Hide file tree
Showing 17 changed files with 1,558 additions and 149 deletions.
9 changes: 9 additions & 0 deletions ai4papi/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ def load_yaml_conf(fpath):
}
}

# For tools, map the Nomad job name prefixes to tool IDs
tools_nomad2id = {
'fl': 'ai4os-federated-server',
'cvat': 'ai4os-cvat',
}
for tool in TOOLS.keys():
if tool not in tools_nomad2id.values():
raise Exception(f"The tool {tool} is missing from the mapping dictionary.")

# OSCAR template
with open(paths['conf'] / 'oscar.yaml', 'r') as f:
OSCAR_TMPL = Template(f.read())
Expand Down
6 changes: 3 additions & 3 deletions ai4papi/nomad/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def get_deployment(
# Iterate through tags to find `Host` tag
for t in s['Tags']:
try:
url = re.search('Host\(`(.+?)`', t).group(1)
url = re.search(r'Host\(`(.+?)`', t).group(1)
break
except Exception:
url = "missing-endpoint"
Expand Down Expand Up @@ -166,7 +166,7 @@ def get_deployment(
info['main_endpoint'] = service2endpoint[service]

except Exception: # return first endpoint
info['main_endpoint'] = list(info['endpoints'].values())[0]
info['main_endpoint'] = list(info['endpoints'].keys())[0]

# Only fill resources if the job is allocated
allocs = Nomad.job.get_allocations(
Expand Down Expand Up @@ -274,7 +274,7 @@ def get_deployment(
# Something happened, job didn't deploy (eg. job needs port that's currently being used)
# We have to return `placement failures message`.
info['status'] = 'error'
info['error_msg'] = f"{evals[0]['FailedTGAllocs']}"
info['error_msg'] = f"{evals[0].get('FailedTGAllocs', '')}"

else:
# info['error_msg'] = f"Job has not been yet evaluated. Contact with support sharing your job ID: {j['ID']}."
Expand Down
3 changes: 2 additions & 1 deletion ai4papi/routers/v1/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import fastapi

from . import catalog, deployments, inference, secrets, stats, try_me
from . import catalog, deployments, inference, secrets, stats, storage, try_me


router = fastapi.APIRouter()
Expand All @@ -9,6 +9,7 @@
router.include_router(inference.router)
router.include_router(secrets.router)
router.include_router(stats.router)
router.include_router(storage.router)
router.include_router(try_me.router)


Expand Down
30 changes: 27 additions & 3 deletions ai4papi/routers/v1/catalog/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
This means you cannot name your modules like those names (eg. tags, detail, etc)
"""

import configparser
import re
from typing import Tuple, Union
import yaml
Expand All @@ -32,12 +33,17 @@
import requests

from ai4papi import utils
import ai4papi.conf as papiconf


class Catalog:

def __init__(self) -> None:
pass
def __init__(self, repo: str) -> None:
"""
Parameters:
* repo: Github repo where the catalog is hosted (via git submodules)
"""
self.repo = repo


@cached(cache=TTLCache(maxsize=1024, ttl=6*60*60))
Expand All @@ -57,8 +63,26 @@ def get_items(
This is implemented in a separate function as many functions from this router
are using this function, so we need to avoid infinite recursions.
"""
return {}
gitmodules_url = f"https://raw.githubusercontent.com/{self.repo}/master/.gitmodules"
r = requests.get(gitmodules_url)

cfg = configparser.ConfigParser()
cfg.read_string(r.text)

modules = {}
for section in cfg.sections():
items = dict(cfg.items(section))
key = items.pop('path').lower()
items['url'] = items['url'].replace('.git', '') # remove `.git`, if present
modules[key] = items

# In the case of the tools repo, make sure to remove any tool that is not yet
# supported by PAPI (use the ^ operator to only keep common items)
if 'tool' in self.repo:
for tool_name in papiconf.TOOLS.keys() ^ modules.keys():
_ = modules.pop(tool_name)

return modules

@cached(cache=TTLCache(maxsize=1024, ttl=6*60*60))
def get_filtered_list(
Expand Down
26 changes: 3 additions & 23 deletions ai4papi/routers/v1/catalog/modules.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,14 @@
import configparser
from copy import deepcopy
import types

from cachetools import cached, TTLCache
from fastapi import APIRouter, HTTPException
from natsort import natsorted
import requests

from ai4papi import quotas, nomad
import ai4papi.conf as papiconf
from .common import Catalog, retrieve_docker_tags


@cached(cache=TTLCache(maxsize=1024, ttl=6*60*60))
def get_items(self):
gitmodules_url = "https://raw.githubusercontent.com/ai4os-hub/modules-catalog/master/.gitmodules"
r = requests.get(gitmodules_url)

cfg = configparser.ConfigParser()
cfg.read_string(r.text)

modules = {}
for section in cfg.sections():
items = dict(cfg.items(section))
key = items.pop('path').lower()
items['url'] = items['url'].replace('.git', '') # remove `.git`, if present
modules[key] = items

return modules


def get_config(
self,
item_name: str,
Expand Down Expand Up @@ -91,8 +70,9 @@ def get_config(
return conf


Modules = Catalog()
Modules.get_items = types.MethodType(get_items, Modules)
Modules = Catalog(
repo='ai4os-hub/modules-catalog',
)
Modules.get_config = types.MethodType(get_config, Modules)


Expand Down
52 changes: 22 additions & 30 deletions ai4papi/routers/v1/catalog/tools.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,28 @@
from copy import deepcopy
import types

from cachetools import cached, TTLCache
from fastapi import APIRouter, HTTPException
from fastapi.security import HTTPBearer

from ai4papi import quotas
import ai4papi.conf as papiconf
from .common import Catalog, retrieve_docker_tags


@cached(cache=TTLCache(maxsize=1024, ttl=6*60*60))
def get_items(self):
# Set default branch manually (because we are not yet reading this from submodules)
# TODO: start reading from submodules (only accept the submodules that have been
# integrated in papiconf.TOOLS)
tools_branches= {
'ai4os-federated-server': 'main',
}

tools = {}
for k in papiconf.TOOLS.keys():
tools[k] = {
'url': f'https://github.com/ai4os/{k}',
'branch': tools_branches[k],
}

return tools
security = HTTPBearer()


def get_config(
self,
item_name: str,
vo: str,
):
"""
Returns the default configuration (dict) for creating a deployment
for a specific item. It is prefilled with the appropriate
docker image and the available docker tags.
"""

# Retrieve tool configuration
try:
conf = deepcopy(papiconf.TOOLS[item_name]['user']['full'])
Expand All @@ -51,25 +41,27 @@ def get_config(
if repo not in ['deephdc', 'ai4oshub']:
repo = 'ai4oshub'

# Fill with correct Docker image
conf["general"]["docker_image"]["value"] = f"{repo}/{image}"
# Fill with correct Docker image and tags (not needed for CVAT because hardcoded)
if item_name != 'ai4os-cvat':
conf["general"]["docker_image"]["value"] = f"{repo}/{image}"

# Add available Docker tags
tags = retrieve_docker_tags(image=image, repo=repo)
conf["general"]["docker_tag"]["options"] = tags
conf["general"]["docker_tag"]["value"] = tags[0]
tags = retrieve_docker_tags(image=image, repo=repo)
conf["general"]["docker_tag"]["options"] = tags
conf["general"]["docker_tag"]["value"] = tags[0]

# Modify the resources limits for a given user or VO
conf["hardware"] = quotas.limit_resources(
item_name=item_name,
vo=vo,
)
if conf.get("hardware", None):
conf["hardware"] = quotas.limit_resources(
item_name=item_name,
vo=vo,
)

return conf


Tools = Catalog()
Tools.get_items = types.MethodType(get_items, Tools)
Tools = Catalog(
repo='ai4os/tools-catalog',
)
Tools.get_config = types.MethodType(get_config, Tools)


Expand Down
Loading

0 comments on commit 8cc62ab

Please sign in to comment.