From b406971bdc2e647aa151aeda108410c09eacd1ea Mon Sep 17 00:00:00 2001 From: dafeliton Date: Sun, 29 Dec 2024 00:44:46 -0800 Subject: [PATCH] revamp --- images/scipy-ml-notebook/Dockerfile | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/images/scipy-ml-notebook/Dockerfile b/images/scipy-ml-notebook/Dockerfile index 117ca183..ed4697e4 100644 --- a/images/scipy-ml-notebook/Dockerfile +++ b/images/scipy-ml-notebook/Dockerfile @@ -8,10 +8,8 @@ USER root # https://www.tensorflow.org/install/source#linux # Python/Mamba deps -## tf 2.13 does not work with torch 2.2.1. Both require conflicting versions of typing-extensions -ARG CUDA_VERSION=12.1 CUDNN_VERSION=8.9.2.26 LIBNVINFER=7.2.2 LIBNVINFER_MAJOR_VERSION=7 \ - TENSORFLOW_VERSION=2.17.0 KERAS_VERSION=3.5.0 TENSORRT_VERSION=8.6.1 TORCH_VERSION=2.3.1 \ - PROTOBUF_VERSION=3.20.3 +ARG CUDA_VERSION=12.1 \ + TENSORFLOW_VERSION=2.17.0 KERAS_VERSION=3.5.0 TENSORRT_VERSION=8.6.1 TORCH_VERSION=2.3.1 # apt deps RUN apt-get update && \ @@ -42,10 +40,8 @@ USER jovyan RUN mamba install -c rapidsai-nightly -c conda-forge jupyterlab-nvdashboard # CUDA setup w/mamba -## TODO: Investigate this command, seems to duplicate cuda packages for nvidia (pypi + conda-forge). -# cuda-toolkit is a skeleton package on CUDA 12, unlike CUDA <= 11 -RUN mamba install -c "nvidia/label/cuda-12.1.1" cuda-nvcc \ - cuda-toolkit=$CUDA_VERSION \ +RUN mamba install -c "nvidia/label/cuda-12.1.1" \ + cuda-nvcc \ cuda-version=$CUDA_VERSION \ nccl \ -y && \ @@ -54,12 +50,6 @@ RUN mamba install -c "nvidia/label/cuda-12.1.1" cuda-nvcc \ mamba clean -a -y # Install scipy pip packages -## install protobuf to avoid weird base type error. seems like if we don't then it'll be installed twice. -## https://github.com/spesmilo/electrum/issues/7825 -## pip cache purge didnt work here for some reason. -RUN pip install --no-cache-dir protobuf==$PROTOBUF_VERSION -## cuda-python installed to have parity with tensorflow and cudnn -## Install pillow<7 due to dependency issue https://github.com/pytorch/vision/issues/1712 ## tensorrt installed to fix not having libnvinfer that has caused tensorflow issues. RUN pip install opencv-contrib-python-headless \ opencv-python && \ @@ -77,8 +67,8 @@ RUN mamba install -c conda-forge pyqt pycocotools pillow scapy && \ ## no purge required but no-cache-dir is used. pip purge will actually break the build here! ## Beware of potentially needing to update these if we update the drivers. ## Check tensorrt_env_vars.sh if you have to bump tensorrt! -RUN pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \ - pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TENSORFLOW_VERSION && \ +RUN pip install torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 --no-cache-dir && \ + pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TENSORFLOW_VERSION --no-cache-dir && \ fix-permissions $CONDA_DIR && \ fix-permissions /home/$NB_USER && \ mamba clean -a -y && \ @@ -101,7 +91,7 @@ RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \ # TensorRT fix for tensorflow ## https://github.com/tensorflow/tensorflow/issues/61468 (could not find TensorRT) -## This will most definitely have to be changed after 8.6.1... +## To be removed with TF 2.18+ RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.$TENSORRT_VERSION && \ ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.$TENSORRT_VERSION