From 619258d41278e48ad917326d4830cc61e6533c05 Mon Sep 17 00:00:00 2001 From: Andy Bayer Roswell <andyroswellrr@gmail.com> Date: Sat, 30 Nov 2024 00:36:04 +0100 Subject: [PATCH] [bugfix] install R dependencies used by rpy2 during image build [bugfix] mistaken exception when node_id exists [should update the existing cell] --- vreapis/Dockerfile | 2 + vreapis/containerizer/views.py | 21 ++++++--- vreapis/services/extractor/rextractor.py | 46 +++++++++---------- .../tests/emulated-frontend/containerizer.py | 6 +-- 4 files changed, 42 insertions(+), 33 deletions(-) diff --git a/vreapis/Dockerfile b/vreapis/Dockerfile index 0ff8ad8..9777092 100644 --- a/vreapis/Dockerfile +++ b/vreapis/Dockerfile @@ -11,6 +11,8 @@ RUN /opt/venv/bin/pip install pip --upgrade COPY ./requirements.txt /app RUN /opt/venv/bin/pip install -r requirements.txt +RUN Rscript -e "install.packages(c('rlang', 'lobstr', 'purrr', 'renv'), repos = 'https://cloud.r-project.org')" + COPY . /app RUN chmod +x entrypoint.sh diff --git a/vreapis/containerizer/views.py b/vreapis/containerizer/views.py index cc2bcb3..ca36cb2 100644 --- a/vreapis/containerizer/views.py +++ b/vreapis/containerizer/views.py @@ -17,6 +17,7 @@ import jinja2 from django.db.models import QuerySet from rest_framework.authentication import BaseAuthentication +from rest_framework.exceptions import ValidationError from rest_framework.permissions import IsAuthenticated, BasePermission from rest_framework.response import Response from rest_framework.request import Request @@ -153,10 +154,10 @@ def post(self, request: Request): title += '-' + slugify(payload['JUPYTERHUB_USER']) # If any of these change, we create a new cell in the catalog. This matches the cell properties saved in workflows. - # cell_identity_dict = {'title': title, 'params': extractor.params, 'inputs': extractor.ins, 'outputs': extractor.outs, } - # cell_identity_str = json.dumps(cell_identity_dict, sort_keys=True) - # node_id = hashlib.sha1(cell_identity_str.encode()).hexdigest()[:7] - node_id = str(time.time_ns())[len('0x'):] + cell_identity_dict = {'title': title, 'params': extractor.params, 'inputs': extractor.ins, 'outputs': extractor.outs, } + cell_identity_str = json.dumps(cell_identity_dict, sort_keys=True) + node_id = hashlib.sha1(cell_identity_str.encode()).hexdigest()[:7] + # node_id = str(time.time_ns())[len('0x'):] cell = Cell( node_id=node_id, @@ -452,7 +453,7 @@ def create(self, request: Request, *args, **kwargs): except Exception as ex: return return_error('Error setting cell', ex) - common.logger.debug('current_cell: ' + current_cell.toJSON()) + # common.logger.debug('current_cell: ' + current_cell.toJSON()) all_vars = current_cell.params + current_cell.inputs + current_cell.outputs for param_name in all_vars: if param_name not in current_cell.types: @@ -462,8 +463,14 @@ def create(self, request: Request, *args, **kwargs): return return_error(f'{current_cell.task_name} has not selected base image') try: serializer: CellSerializer = self.get_serializer(data=request.data) - serializer.is_valid(raise_exception=True) - instance, created = Cell.objects.update_or_create(node_id=serializer.validated_data['node_id'], defaults=serializer.validated_data) + try: + serializer.is_valid(raise_exception=True) + except ValidationError as ex: + if 'node_id' in ex.detail and str(ex.detail) == 'cell with this node id already exists.': + Cell.objects.update(**serializer.validated_data) + else: + Cell.objects.create(**serializer.validated_data) + # instance, created = Cell.objects.update_or_create(node_id=serializer.validated_data['node_id'], defaults=serializer.validated_data) except Exception as ex: return return_error('Error adding or updating cell in catalog', ex) diff --git a/vreapis/services/extractor/rextractor.py b/vreapis/services/extractor/rextractor.py index 8a2a19a..7ea963c 100644 --- a/vreapis/services/extractor/rextractor.py +++ b/vreapis/services/extractor/rextractor.py @@ -21,29 +21,29 @@ # robjects.conversion.set_conversion(robject_converter) # install R packages -robjects.r(''' -install_package_with_retry <- function(package_name, max_attempts = 5) { - for(i in 1:max_attempts) { - print(paste("Attempt", i, "to install", package_name)) - tryCatch({ - install.packages(package_name, quiet = TRUE) - print(paste(package_name, "installed successfully.")) - return(TRUE) - }, warning = function(w) { - print(paste("Warning while installing", package_name, ":", w)) - Sys.sleep(2) - }, error = function(e) { - print(paste("Failed to install", package_name, ":", e)) - Sys.sleep(2) - }) - } - return(FALSE) -} -''') -packnames = ('rlang', 'lobstr', 'purrr', 'renv',) -for p in packnames: - if not rpackages.isinstalled(p): - robjects.r(f'install_package_with_retry("{p}")') +# robjects.r(''' +# install_package_with_retry <- function(package_name, max_attempts = 5) { +# for(i in 1:max_attempts) { +# print(paste("Attempt", i, "to install", package_name)) +# tryCatch({ +# install.packages(package_name, quiet = TRUE) +# print(paste(package_name, "installed successfully.")) +# return(TRUE) +# }, warning = function(w) { +# print(paste("Warning while installing", package_name, ":", w)) +# Sys.sleep(2) +# }, error = function(e) { +# print(paste("Failed to install", package_name, ":", e)) +# Sys.sleep(2) +# }) +# } +# return(FALSE) +# } +# ''') +# packnames = ('rlang', 'lobstr', 'purrr', 'renv',) +# for p in packnames: +# if not rpackages.isinstalled(p): +# robjects.r(f'install_package_with_retry("{p}")') # This R code is used to obtain all assignment variables (source https://adv-r.hadley.nz/expressions.html) r_env["result"] = robjects.r(""" diff --git a/vreapis/tests/emulated-frontend/containerizer.py b/vreapis/tests/emulated-frontend/containerizer.py index 26282a2..f8489da 100644 --- a/vreapis/tests/emulated-frontend/containerizer.py +++ b/vreapis/tests/emulated-frontend/containerizer.py @@ -32,9 +32,9 @@ def test_post(endpoint: str, files: list[str]): for file in files: with open(f'{script_path}/dat/{file}') as f: body: dict[str, any] = json.load(f) - match endpoint: - case 'addcell': - body['node_id'] = str(hex(time.time_ns())[len('0x'):]) + # match endpoint: + # case 'addcell': + # body['node_id'] = str(hex(time.time_ns())[len('0x'):]) # use a unique node_id response = session.post(f'{API_ENDPOINT}/{endpoint}', json.dumps(body), headers=headers, verify=False) print(response.text)