From 619258d41278e48ad917326d4830cc61e6533c05 Mon Sep 17 00:00:00 2001
From: Andy Bayer Roswell <andyroswellrr@gmail.com>
Date: Sat, 30 Nov 2024 00:36:04 +0100
Subject: [PATCH] [bugfix] install R dependencies used by rpy2 during image
 build [bugfix] mistaken exception when node_id exists [should update the
 existing cell]

---
 vreapis/Dockerfile                            |  2 +
 vreapis/containerizer/views.py                | 21 ++++++---
 vreapis/services/extractor/rextractor.py      | 46 +++++++++----------
 .../tests/emulated-frontend/containerizer.py  |  6 +--
 4 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/vreapis/Dockerfile b/vreapis/Dockerfile
index 0ff8ad8..9777092 100644
--- a/vreapis/Dockerfile
+++ b/vreapis/Dockerfile
@@ -11,6 +11,8 @@ RUN /opt/venv/bin/pip install pip --upgrade
 COPY ./requirements.txt /app
 RUN /opt/venv/bin/pip install -r requirements.txt
 
+RUN Rscript -e "install.packages(c('rlang', 'lobstr', 'purrr', 'renv'), repos = 'https://cloud.r-project.org')"
+
 COPY . /app
 RUN chmod +x entrypoint.sh
 
diff --git a/vreapis/containerizer/views.py b/vreapis/containerizer/views.py
index cc2bcb3..ca36cb2 100644
--- a/vreapis/containerizer/views.py
+++ b/vreapis/containerizer/views.py
@@ -17,6 +17,7 @@
 import jinja2
 from django.db.models import QuerySet
 from rest_framework.authentication import BaseAuthentication
+from rest_framework.exceptions import ValidationError
 from rest_framework.permissions import IsAuthenticated, BasePermission
 from rest_framework.response import Response
 from rest_framework.request import Request
@@ -153,10 +154,10 @@ def post(self, request: Request):
             title += '-' + slugify(payload['JUPYTERHUB_USER'])
 
         # If any of these change, we create a new cell in the catalog. This matches the cell properties saved in workflows.
-        # cell_identity_dict = {'title': title, 'params': extractor.params, 'inputs': extractor.ins, 'outputs': extractor.outs, }
-        # cell_identity_str = json.dumps(cell_identity_dict, sort_keys=True)
-        # node_id = hashlib.sha1(cell_identity_str.encode()).hexdigest()[:7]
-        node_id = str(time.time_ns())[len('0x'):]
+        cell_identity_dict = {'title': title, 'params': extractor.params, 'inputs': extractor.ins, 'outputs': extractor.outs, }
+        cell_identity_str = json.dumps(cell_identity_dict, sort_keys=True)
+        node_id = hashlib.sha1(cell_identity_str.encode()).hexdigest()[:7]
+        # node_id = str(time.time_ns())[len('0x'):]
 
         cell = Cell(
             node_id=node_id,
@@ -452,7 +453,7 @@ def create(self, request: Request, *args, **kwargs):
         except Exception as ex:
             return return_error('Error setting cell', ex)
 
-        common.logger.debug('current_cell: ' + current_cell.toJSON())
+        # common.logger.debug('current_cell: ' + current_cell.toJSON())
         all_vars = current_cell.params + current_cell.inputs + current_cell.outputs
         for param_name in all_vars:
             if param_name not in current_cell.types:
@@ -462,8 +463,14 @@ def create(self, request: Request, *args, **kwargs):
             return return_error(f'{current_cell.task_name} has not selected base image')
         try:
             serializer: CellSerializer = self.get_serializer(data=request.data)
-            serializer.is_valid(raise_exception=True)
-            instance, created = Cell.objects.update_or_create(node_id=serializer.validated_data['node_id'], defaults=serializer.validated_data)
+            try:
+                serializer.is_valid(raise_exception=True)
+            except ValidationError as ex:
+                if 'node_id' in ex.detail and str(ex.detail) == 'cell with this node id already exists.':
+                    Cell.objects.update(**serializer.validated_data)
+            else:
+                Cell.objects.create(**serializer.validated_data)
+            # instance, created = Cell.objects.update_or_create(node_id=serializer.validated_data['node_id'], defaults=serializer.validated_data)
         except Exception as ex:
             return return_error('Error adding or updating cell in catalog', ex)
 
diff --git a/vreapis/services/extractor/rextractor.py b/vreapis/services/extractor/rextractor.py
index 8a2a19a..7ea963c 100644
--- a/vreapis/services/extractor/rextractor.py
+++ b/vreapis/services/extractor/rextractor.py
@@ -21,29 +21,29 @@
 # robjects.conversion.set_conversion(robject_converter)
 
 # install R packages
-robjects.r('''
-install_package_with_retry <- function(package_name, max_attempts = 5) {
-  for(i in 1:max_attempts) {
-    print(paste("Attempt", i, "to install", package_name))
-    tryCatch({
-      install.packages(package_name, quiet = TRUE)
-      print(paste(package_name, "installed successfully."))
-      return(TRUE)
-    }, warning = function(w) {
-      print(paste("Warning while installing", package_name, ":", w))
-      Sys.sleep(2)
-    }, error = function(e) {
-      print(paste("Failed to install", package_name, ":", e))
-      Sys.sleep(2)
-    })
-  }
-  return(FALSE)
-}
-''')
-packnames = ('rlang', 'lobstr', 'purrr', 'renv',)
-for p in packnames:
-    if not rpackages.isinstalled(p):
-        robjects.r(f'install_package_with_retry("{p}")')
+# robjects.r('''
+# install_package_with_retry <- function(package_name, max_attempts = 5) {
+#   for(i in 1:max_attempts) {
+#     print(paste("Attempt", i, "to install", package_name))
+#     tryCatch({
+#       install.packages(package_name, quiet = TRUE)
+#       print(paste(package_name, "installed successfully."))
+#       return(TRUE)
+#     }, warning = function(w) {
+#       print(paste("Warning while installing", package_name, ":", w))
+#       Sys.sleep(2)
+#     }, error = function(e) {
+#       print(paste("Failed to install", package_name, ":", e))
+#       Sys.sleep(2)
+#     })
+#   }
+#   return(FALSE)
+# }
+# ''')
+# packnames = ('rlang', 'lobstr', 'purrr', 'renv',)
+# for p in packnames:
+#     if not rpackages.isinstalled(p):
+#         robjects.r(f'install_package_with_retry("{p}")')
 
 # This R code is used to obtain all assignment variables (source https://adv-r.hadley.nz/expressions.html)
 r_env["result"] = robjects.r("""
diff --git a/vreapis/tests/emulated-frontend/containerizer.py b/vreapis/tests/emulated-frontend/containerizer.py
index 26282a2..f8489da 100644
--- a/vreapis/tests/emulated-frontend/containerizer.py
+++ b/vreapis/tests/emulated-frontend/containerizer.py
@@ -32,9 +32,9 @@ def test_post(endpoint: str, files: list[str]):
     for file in files:
         with open(f'{script_path}/dat/{file}') as f:
             body: dict[str, any] = json.load(f)
-            match endpoint:
-                case 'addcell':
-                    body['node_id'] = str(hex(time.time_ns())[len('0x'):])
+            # match endpoint:
+            #     case 'addcell':
+            #         body['node_id'] = str(hex(time.time_ns())[len('0x'):])  # use a unique node_id
         response = session.post(f'{API_ENDPOINT}/{endpoint}', json.dumps(body), headers=headers, verify=False)
         print(response.text)