Skip to content

Commit

Permalink
[bugfix] install R dependencies used by rpy2 during image build
Browse files Browse the repository at this point in the history
[bugfix] mistaken exception when node_id exists [should update the existing cell]
  • Loading branch information
AndyBRoswell committed Nov 29, 2024
1 parent ea37ff6 commit 619258d
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 33 deletions.
2 changes: 2 additions & 0 deletions vreapis/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ RUN /opt/venv/bin/pip install pip --upgrade
COPY ./requirements.txt /app
RUN /opt/venv/bin/pip install -r requirements.txt

RUN Rscript -e "install.packages(c('rlang', 'lobstr', 'purrr', 'renv'), repos = 'https://cloud.r-project.org')"

COPY . /app
RUN chmod +x entrypoint.sh

Expand Down
21 changes: 14 additions & 7 deletions vreapis/containerizer/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import jinja2
from django.db.models import QuerySet
from rest_framework.authentication import BaseAuthentication
from rest_framework.exceptions import ValidationError
from rest_framework.permissions import IsAuthenticated, BasePermission
from rest_framework.response import Response
from rest_framework.request import Request
Expand Down Expand Up @@ -153,10 +154,10 @@ def post(self, request: Request):
title += '-' + slugify(payload['JUPYTERHUB_USER'])

# If any of these change, we create a new cell in the catalog. This matches the cell properties saved in workflows.
# cell_identity_dict = {'title': title, 'params': extractor.params, 'inputs': extractor.ins, 'outputs': extractor.outs, }
# cell_identity_str = json.dumps(cell_identity_dict, sort_keys=True)
# node_id = hashlib.sha1(cell_identity_str.encode()).hexdigest()[:7]
node_id = str(time.time_ns())[len('0x'):]
cell_identity_dict = {'title': title, 'params': extractor.params, 'inputs': extractor.ins, 'outputs': extractor.outs, }
cell_identity_str = json.dumps(cell_identity_dict, sort_keys=True)
node_id = hashlib.sha1(cell_identity_str.encode()).hexdigest()[:7]
# node_id = str(time.time_ns())[len('0x'):]

cell = Cell(
node_id=node_id,
Expand Down Expand Up @@ -452,7 +453,7 @@ def create(self, request: Request, *args, **kwargs):
except Exception as ex:
return return_error('Error setting cell', ex)

common.logger.debug('current_cell: ' + current_cell.toJSON())
# common.logger.debug('current_cell: ' + current_cell.toJSON())
all_vars = current_cell.params + current_cell.inputs + current_cell.outputs
for param_name in all_vars:
if param_name not in current_cell.types:
Expand All @@ -462,8 +463,14 @@ def create(self, request: Request, *args, **kwargs):
return return_error(f'{current_cell.task_name} has not selected base image')
try:
serializer: CellSerializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
instance, created = Cell.objects.update_or_create(node_id=serializer.validated_data['node_id'], defaults=serializer.validated_data)
try:
serializer.is_valid(raise_exception=True)
except ValidationError as ex:
if 'node_id' in ex.detail and str(ex.detail) == 'cell with this node id already exists.':
Cell.objects.update(**serializer.validated_data)
else:
Cell.objects.create(**serializer.validated_data)
# instance, created = Cell.objects.update_or_create(node_id=serializer.validated_data['node_id'], defaults=serializer.validated_data)
except Exception as ex:
return return_error('Error adding or updating cell in catalog', ex)

Expand Down
46 changes: 23 additions & 23 deletions vreapis/services/extractor/rextractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,29 @@
# robjects.conversion.set_conversion(robject_converter)

# install R packages
robjects.r('''
install_package_with_retry <- function(package_name, max_attempts = 5) {
for(i in 1:max_attempts) {
print(paste("Attempt", i, "to install", package_name))
tryCatch({
install.packages(package_name, quiet = TRUE)
print(paste(package_name, "installed successfully."))
return(TRUE)
}, warning = function(w) {
print(paste("Warning while installing", package_name, ":", w))
Sys.sleep(2)
}, error = function(e) {
print(paste("Failed to install", package_name, ":", e))
Sys.sleep(2)
})
}
return(FALSE)
}
''')
packnames = ('rlang', 'lobstr', 'purrr', 'renv',)
for p in packnames:
if not rpackages.isinstalled(p):
robjects.r(f'install_package_with_retry("{p}")')
# robjects.r('''
# install_package_with_retry <- function(package_name, max_attempts = 5) {
# for(i in 1:max_attempts) {
# print(paste("Attempt", i, "to install", package_name))
# tryCatch({
# install.packages(package_name, quiet = TRUE)
# print(paste(package_name, "installed successfully."))
# return(TRUE)
# }, warning = function(w) {
# print(paste("Warning while installing", package_name, ":", w))
# Sys.sleep(2)
# }, error = function(e) {
# print(paste("Failed to install", package_name, ":", e))
# Sys.sleep(2)
# })
# }
# return(FALSE)
# }
# ''')
# packnames = ('rlang', 'lobstr', 'purrr', 'renv',)
# for p in packnames:
# if not rpackages.isinstalled(p):
# robjects.r(f'install_package_with_retry("{p}")')

# This R code is used to obtain all assignment variables (source https://adv-r.hadley.nz/expressions.html)
r_env["result"] = robjects.r("""
Expand Down
6 changes: 3 additions & 3 deletions vreapis/tests/emulated-frontend/containerizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def test_post(endpoint: str, files: list[str]):
for file in files:
with open(f'{script_path}/dat/{file}') as f:
body: dict[str, any] = json.load(f)
match endpoint:
case 'addcell':
body['node_id'] = str(hex(time.time_ns())[len('0x'):])
# match endpoint:
# case 'addcell':
# body['node_id'] = str(hex(time.time_ns())[len('0x'):]) # use a unique node_id
response = session.post(f'{API_ENDPOINT}/{endpoint}', json.dumps(body), headers=headers, verify=False)
print(response.text)

Expand Down

0 comments on commit 619258d

Please sign in to comment.