From ffa47955c5d72806e9b78aac70dc3929be5173f9 Mon Sep 17 00:00:00 2001 From: dafeliton Date: Sun, 29 Dec 2024 00:40:04 -0800 Subject: [PATCH] revamp scipy --- images/scipy-ml-notebook/Dockerfile | 45 +++++++---------------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/images/scipy-ml-notebook/Dockerfile b/images/scipy-ml-notebook/Dockerfile index 28585ac7..0fd407a1 100644 --- a/images/scipy-ml-notebook/Dockerfile +++ b/images/scipy-ml-notebook/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_TAG=latest +ARG BASE_TAG=2025.4-py312-cuda124-exp FROM ghcr.io/ucsd-ets/datascience-notebook:${BASE_TAG} USER root @@ -9,9 +9,8 @@ USER root # Python/Mamba deps ## tf 2.13 does not work with torch 2.2.1. Both require conflicting versions of typing-extensions -ARG CUDA_VERSION=12.4 LIBNVINFER=7.2.2 LIBNVINFER_MAJOR_VERSION=7 \ - TENSORFLOW_VERSION=2.17.0 KERAS_VERSION=3.5.0 TENSORRT_VERSION=8.6.1 TORCH_VERSION=2.5.1 \ - PROTOBUF_VERSION=3.20.3 +ARG CUDA_VERSION=12.4 \ + TENSORFLOW_VERSION=2.18.0 KERAS_VERSION=3.7.0 TENSORRT_VERSION=10.7.0 TORCH_VERSION=2.5.1 # apt deps RUN apt-get update && \ @@ -47,34 +46,16 @@ RUN mamba install -c rapidsai-nightly -c conda-forge jupyterlab-nvdashboard \ # CUDA setup w/mamba # cuda-toolkit is a skeleton package on CUDA 12, unlike CUDA <= 11 -RUN mamba install -c "nvidia/label/cuda-12.4" \ +RUN mamba install -c "nvidia/label/cuda-${CUDA_VERSION}" \ -c conda-forge \ cuda-nvcc \ - cuda-toolkit=$CUDA_VERSION \ - cuda-version=$CUDA_VERSION \ - cudnn \ - libcublas \ - nccl \ - -y && \ - fix-permissions $CONDA_DIR && \ - fix-permissions /home/$NB_USER && \ - mamba clean -a -y - -RUN mamba install -c conda-forge -c pytorch \ - pytorch=$TORCH_VERSION \ - pytorch-cuda=$CUDA_VERSION \ - torchvision \ - torchaudio \ + cuda-version=${CUDA_VERSION} \ -y && \ fix-permissions $CONDA_DIR && \ fix-permissions /home/$NB_USER && \ mamba clean -a -y # Install scipy pip packages -## install protobuf to avoid weird base type error. seems like if we don't then it'll be installed twice. -## https://github.com/spesmilo/electrum/issues/7825 -## pip cache purge didnt work here for some reason. -RUN pip install --no-cache-dir protobuf==$PROTOBUF_VERSION ## cuda-python installed to have parity with tensorflow and cudnn ## Install pillow<7 due to dependency issue https://github.com/pytorch/vision/issues/1712 ## tensorrt installed to fix not having libnvinfer that has caused tensorflow issues. @@ -95,7 +76,12 @@ RUN mamba install -c conda-forge pyqt pycocotools pillow scapy && \ ## Beware of potentially needing to update these if we update the drivers. ## Check tensorrt_env_vars.sh if you have to bump tensorrt! ## We install torch and its dependencies EXCLUDING any nvidia* deps. These are handled by our conda env. -#RUN pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \ +RUN pip install --no-cache-dir \ + torch==${TORCH_VERSION} \ + torchvision \ + torchaudio \ + --index-url https://download.pytorch.org/whl/cu124 + RUN pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TENSORFLOW_VERSION && \ fix-permissions $CONDA_DIR && \ fix-permissions /home/$NB_USER && \ @@ -117,14 +103,5 @@ RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \ cp $CONDA_DIR/nvvm/libdevice/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/ #CUDA 11: cp $CONDA_DIR/lib/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/ -# TensorRT fix for tensorflow -## https://github.com/tensorflow/tensorflow/issues/61468 (could not find TensorRT) -## This will most definitely have to be changed after 8.6.1... -RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.$TENSORRT_VERSION && \ - ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.$TENSORRT_VERSION - -# Cleanup unnecessary pkgs TODO -# RUN pip uninstall nvidia-cuda-runtime-cu12 nvidia-cublas-cu12-12.6.4.1 nvidia-cudnn-cu12 - # Run datahub scripts RUN . /tmp/activate.sh \ No newline at end of file