scrna interactive docker updated

IntelLabs · Sep 4, 2024 · 9f208f1 · 9f208f1
1 parent eda599d
commit 9f208f1
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 52 deletions.
diff --git a/pipelines/single-cell-RNA-seq-analysis/Dockerfile b/pipelines/single-cell-RNA-seq-analysis/Dockerfile
@@ -23,69 +23,70 @@
 # Authors: Narendra Chaudhary <[email protected]>; Sanchit Misra <[email protected]>
 
 # Install Base miniconda image
-ARG BASE_IMAGE=continuumio/miniconda3
-FROM ${BASE_IMAGE}
+ARG FROM_IMAGE=ubuntu:22.04
 
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    git build-essential gcc curl gnupg gnupg2 gnupg1 sudo tar wget
-
-RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
-| gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
-RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
-RUN apt-get update && \
-  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    curl ca-certificates gpg-agent software-properties-common intel-basekit intel-hpckit && \
-  rm -rf /var/lib/apt/lists/*
+# Install Base miniforge image
+ARG BASE_IMAGE=condaforge/miniforge3:23.1.0-3
+FROM ${BASE_IMAGE} as conda_setup
 
 RUN conda update -n base conda
 COPY ./environment.yml ./
-RUN conda env create --name=single_cell -f environment.yml
-RUN echo "source activate single_cell" > ~/.bashrc
-ENV PATH /opt/conda/envs/single_cell/bin:$PATH
-# RUN conda install python==3.8.0
-# RUN conda install -y seaborn=0.12.2 scikit-learn=1.0.2 statsmodels=0.13.2 numba=0.53.1 pytables=3.7.0 matplotlib-base=3.6.2 pandas=1.5.2
-# RUN conda install -y -c conda-forge mkl-service=2.4.0
-# RUN conda install -y -c conda-forge python-igraph=0.10.3 leidenalg=0.9.1
-# RUN conda install -y -c conda-forge cython=0.29.33 jinja2=3.1.2 clang-tools=15.0.7
-# RUN conda install -y -c katanagraph/label/dev -c conda-forge katana-python
-
-# RUN pip install scanpy==1.8.1
-# RUN pip install scikit-learn-intelex==2023.0.1
-# RUN pip install pybind11
-# RUN pip install jupyter
-# RUN pip install wget
 
+RUN conda env create --name=single_cell -f ./environment.yml
 COPY ./_t_sne.py /opt/conda/lib/python3.8/site-packages/daal4py/sklearn/manifold/_t_sne.py
 
 
-WORKDIR /workspace
-ENV HOME /workspace
-COPY ./notebooks/ /workspace
+FROM ${FROM_IMAGE} as builder
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    git build-essential gcc curl gnupg gnupg2 gnupg1 sudo wget tar ca-certificates -y \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get autoremove -y \
+    && apt-get clean \
+    && apt update
+
+
+COPY --from=conda_setup /opt/conda /opt/conda
+ENV PATH "/opt/conda/envs/single_cell/bin:$PATH"
+RUN echo "source /opt/conda/bin/activate single_cell" >> ~/.bashrc
+CMD source ~/.bashrc
+
+# Non-root user setup
+ENV SERVICE_NAME="scrna"
+
+RUN groupadd --gid 1001 $SERVICE_NAME && \
+    useradd -m -g $SERVICE_NAME --shell /bin/false --uid 1001 $SERVICE_NAME
 
 
 RUN pip uninstall -y umap-learn
 WORKDIR /
 RUN wget https://github.com/IntelLabs/Open-Omics-Acceleration-Framework/releases/download/2.1/Source_code_with_submodules.tar.gz
 RUN tar -xzf Source_code_with_submodules.tar.gz
-WORKDIR ./Open-Omics-Acceleration-Framework/lib/tal/applications/UMAP_fast/umap_extend/
+#RUN git clone --recursive https://github.com/IntelLabs/Open-Omics-Acceleration-Framework.git
+
+#SHELL ["/bin/bash", "-c", "source activate single_cell"]
+WORKDIR /Open-Omics-Acceleration-Framework/lib/tal/applications/UMAP_fast/umap_extend/
 RUN python setup.py install
 
 WORKDIR ../umap/
-RUN python setup.py install  
+RUN python setup.py install
+
+RUN chown -R $SERVICE_NAME:$SERVICE_NAME /Open-Omics-Acceleration-Framework /opt
+# Switch to non-root user
+USER $SERVICE_NAME
+
+
+WORKDIR /Open-Omics-Acceleration-Framework/pipelines/single-cell-RNA-seq-analysis/notebooks
 
-# ENV NUMEXPR_MAX_THREADS=64
-# ENV NUMBA_NUM_THREADS=64
 
-WORKDIR /workspace
 CMD jupyter notebook \
-		--no-browser \
-		--allow-root \
-		--port=8888 \
-		--ip=0.0.0.0 \
-		--notebook-dir=/workspace \
-		--NotebookApp.password="" \
-		--NotebookApp.token="" \
-		--NotebookApp.password_required=False
-
-# build with "docker build -t scanpy ."
-# run with "docker run -it -p 8888:8888 -v ~/Open-Omics-Acceleration-Framework/pipelines/single_cell_pipeline/data:/data scanpy"
+                --no-browser \
+                --allow-root \
+                --port=8888 \
+                --ip=0.0.0.0 \
+                --notebook-dir=/Open-Omics-Acceleration-Framework/pipelines/single-cell-RNA-seq-analysis/notebooks \
+                --NotebookApp.password="" \
+                --NotebookApp.token="" \
+                --NotebookApp.password_required=False
+
+
+
diff --git a/pipelines/single-cell-RNA-seq-analysis/README.md b/pipelines/single-cell-RNA-seq-analysis/README.md
@@ -9,9 +9,8 @@ Given a cell by gene matrix, this [scanpy](https://github.com/scverse/scanpy) ba
 
 # Download entire repository
 ```bash
-
-wget https://github.com/IntelLabs/Open-Omics-Acceleration-Framework/releases/download/2.1/Source_code_with_submodules.tar.gz
-tar -xzf Source_code_with_submodules.tar.gz
+cd ~
+git clone https://github.com/IntelLabs/Open-Omics-Acceleration-Framework.git
 cd ~/Open-Omics-Acceleration-Framework/pipelines/single-cell-RNA-seq-analysis
 ```
 
@@ -46,8 +45,7 @@ docker build -f Dockerfile.python -t scanpy_python . # Create a docker image nam
 # Download dataset
 wget -P  $DATA_DIR https://rapids-single-cell-examples.s3.us-east-2.amazonaws.com/1M_brain_cells_10X.sparse.h5ad
 
-docker run -v $OUTPUT_DIR:/workspace/figures -v $DATA_DIR:/data scanpy_python 
-
+docker run -v $OUTPUT_DIR:/Open-Omics-Acceleration-Framework/pipelines/single-cell-RNA-seq-analysis/notebooks/figures  -v $DATA_DIR:/data  -it scanpy_python
 ```
 
 

diff --git a/pipelines/single-cell-RNA-seq-analysis/notebooks/1.3_million_single_cell_analysis.ipynb b/pipelines/single-cell-RNA-seq-analysis/notebooks/1.3_million_single_cell_analysis.ipynb
@@ -60,7 +60,7 @@
    "outputs": [],
    "source": [
     "# Add path to input file here.\n",
-    "input_file = \"../data/1M_brain_cells_10X.sparse.h5ad\"\n",
+    "input_file = \"/data/1M_brain_cells_10X.sparse.h5ad\"\n",
     "# USE_FIRST_N_CELLS = 10000\n",
     "USE_FIRST_N_CELLS = -1           # -1 indicates use whole file"
    ]