Skip to content

Commit

Permalink
bump to py312, bump datasci deps, bump torch attempt 1
Browse files Browse the repository at this point in the history
  • Loading branch information
dafeliton committed Dec 29, 2024
1 parent 0d8c67a commit 23876e2
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 25 deletions.
8 changes: 4 additions & 4 deletions images/datascience-notebook/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# If you are building manually, pass PYTHON_VERSION/PY_VER_SHORT/JUPYTERHUB_VERSION with --build-arg
ARG PYTHON_VERSION=python-3.11.10
ARG PY_VER_SHORT=3.11
ARG JUPYTERHUB_VERSION=4.1.6
ARG PYTHON_VERSION=python-3.12.8
ARG PY_VER_SHORT=3.12
ARG JUPYTERHUB_VERSION=5.2.1

# Jupyter has changed image URL
FROM quay.io/jupyter/datascience-notebook:$PYTHON_VERSION
Expand Down Expand Up @@ -72,7 +72,7 @@ USER jovyan

# Python/Mamba Deps
## Package versions
ARG JUPYTERSERVER_VERSION=2.14.2 NBGRADER_VERSION=0.9.3 JUPYTERLAB_VERSION=4.3.4 NBCONVERT_VERSION=7.16.4 NOTEBOOK_VERSION=7.3.1 NBCLASSIC_VERSION=1.1.0
ARG JUPYTERSERVER_VERSION=2.15.0 NBGRADER_VERSION=0.9.4 JUPYTERLAB_VERSION=4.3.4 NBCONVERT_VERSION=7.16.4 NOTEBOOK_VERSION=7.3.2 NBCLASSIC_VERSION=1.1.0
ARG PANDAS_VERSION=2.2.3 STATSMODELS_VERSION=0.14.4 BOTTLENECK_VERSION=1.4.2 NUMEXPR_VERSION=2.10.2

# Install essential+datascience pip packages
Expand Down
51 changes: 36 additions & 15 deletions images/scipy-ml-notebook/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ USER root

# Python/Mamba deps
## tf 2.13 does not work with torch 2.2.1. Both require conflicting versions of typing-extensions
ARG CUDA_VERSION=12.1 CUDNN_VERSION=8.9.2.26 LIBNVINFER=7.2.2 LIBNVINFER_MAJOR_VERSION=7 \
TENSORFLOW_VERSION=2.17.0 KERAS_VERSION=3.5.0 TENSORRT_VERSION=8.6.1 TORCH_VERSION=2.3.1 \
ARG CUDA_VERSION=12.4 LIBNVINFER=7.2.2 LIBNVINFER_MAJOR_VERSION=7 \
TENSORFLOW_VERSION=2.17.0 KERAS_VERSION=3.5.0 TENSORRT_VERSION=8.6.1 TORCH_VERSION=2.5.1 \
PROTOBUF_VERSION=3.20.3

# apt deps
Expand Down Expand Up @@ -39,20 +39,37 @@ RUN chmod 777 /etc/datahub-profile.d/*.sh /tmp/activate.sh
USER jovyan

# Install nvdashboard for GPU monitoring
RUN mamba install -c rapidsai-nightly -c conda-forge jupyterlab-nvdashboard
RUN mamba install -c rapidsai-nightly -c conda-forge jupyterlab-nvdashboard \
-y && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y

# CUDA setup w/mamba
## TODO: Investigate this command, seems to duplicate cuda packages for nvidia (pypi + conda-forge).
# cuda-toolkit is a skeleton package on CUDA 12, unlike CUDA <= 11
RUN mamba install -c "nvidia/label/cuda-12.1.1" cuda-nvcc \
RUN mamba install -c "nvidia/label/cuda-12.4" \
-c conda-forge \
cuda-nvcc \
cuda-toolkit=$CUDA_VERSION \
cuda-version=$CUDA_VERSION \
cudnn \
libcublas \
nccl \
-y && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y

RUN mamba install -c conda-forge -c pytorch \
pytorch=$TORCH_VERSION \
pytorch-cuda=$CUDA_VERSION \
torchvision \
torchaudio \
-y && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y

# Install scipy pip packages
## install protobuf to avoid weird base type error. seems like if we don't then it'll be installed twice.
## https://github.com/spesmilo/electrum/issues/7825
Expand All @@ -77,18 +94,19 @@ RUN mamba install -c conda-forge pyqt pycocotools pillow scapy && \
## no purge required but no-cache-dir is used. pip purge will actually break the build here!
## Beware of potentially needing to update these if we update the drivers.
## Check tensorrt_env_vars.sh if you have to bump tensorrt!
RUN pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \
pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TENSORFLOW_VERSION && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge
## We install torch and its dependencies EXCLUDING any nvidia* deps. These are handled by our conda env.
#RUN pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \
RUN pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TENSORFLOW_VERSION && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge

RUN pip install transformers datasets accelerate huggingface-hub timm && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge

USER $NB_UID:$NB_GID
ENV PATH=${PATH}:/usr/local/nvidia/bin:/opt/conda/bin
Expand All @@ -105,5 +123,8 @@ RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \
RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.$TENSORRT_VERSION && \
ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.$TENSORRT_VERSION

# Cleanup unnecessary pkgs TODO
# RUN pip uninstall nvidia-cuda-runtime-cu12 nvidia-cublas-cu12-12.6.4.1 nvidia-cudnn-cu12

# Run datahub scripts
RUN . /tmp/activate.sh
12 changes: 6 additions & 6 deletions images/spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ images:
datascience-notebook:
image_name: ghcr.io/ucsd-ets/datascience-notebook
build_args:
PYTHON_VERSION: python-3.11.10
PY_VER_SHORT: "3.11"
JUPYTERHUB_VERSION: 4.1.6
PYTHON_VERSION: python-3.12.8
PY_VER_SHORT: "3.12"
JUPYTERHUB_VERSION: 5.2.1
info_cmds: [PY_VER, PIP_LIST, CONDA_INFO, CONDA_LIST, APT_PKG_LIST]

rstudio-notebook:
Expand All @@ -25,7 +25,7 @@ images:
#prepull: false #-- uncomment to disable prepulling behavior for scipy-ml. gives you space on machine in exchange for build time.

tag:
prefix: "2025.1"
prefix: "2025.4"

all_info_cmds:
PY_VER:
Expand All @@ -39,10 +39,10 @@ all_info_cmds:
command: conda info
CONDA_LIST:
description: Conda Packages
command: conda list
command: mamba list
APT_PKG_LIST:
description: System Packages
command: apt list --installed
CUDA_VERSION:
description: CUDA Version
command: bash -c 'conda list | grep "cuda\|cudnn\|nccl"'
command: bash -c 'mamba list | grep "cuda\|cudnn\|nccl"'

0 comments on commit 23876e2

Please sign in to comment.