From 73ab07c547e1096c7563cf9b17c408de12794a6e Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 1 Dec 2023 10:12:38 -0500 Subject: [PATCH 1/5] Add gpus example --- gpus/.dockerignore | 1 + gpus/Dockerfile | 8 ++++++++ gpus/README.md | 38 ++++++++++++++++++++++++++++++++++++++ gpus/check_gpus.sh | 17 +++++++++++++++++ gpus/mlcube.yaml | 24 ++++++++++++++++++++++++ 5 files changed, 88 insertions(+) create mode 100644 gpus/.dockerignore create mode 100644 gpus/Dockerfile create mode 100644 gpus/README.md create mode 100644 gpus/check_gpus.sh create mode 100644 gpus/mlcube.yaml diff --git a/gpus/.dockerignore b/gpus/.dockerignore new file mode 100644 index 0000000..382f954 --- /dev/null +++ b/gpus/.dockerignore @@ -0,0 +1 @@ +workspace/ \ No newline at end of file diff --git a/gpus/Dockerfile b/gpus/Dockerfile new file mode 100644 index 0000000..fbaa343 --- /dev/null +++ b/gpus/Dockerfile @@ -0,0 +1,8 @@ +FROM nvidia/cuda:11.0-base + +# Copy code +COPY . /workspace +RUN chmod +x /workspace/*.sh + +# Set working directory +WORKDIR /workspace \ No newline at end of file diff --git a/gpus/README.md b/gpus/README.md new file mode 100644 index 0000000..d993729 --- /dev/null +++ b/gpus/README.md @@ -0,0 +1,38 @@ +# GPUs example + +## Project setup + +An important requirement is that you must have Docker and/or Singularity installed. + +```bash +# Create Python environment and install MLCube with runners +virtualenv -p python3 ./env && source ./env/bin/activate && pip install mlcube-docker mlcube-singularity +# Fetch the gpus example from GitHub +git clone https://github.com/mlcommons/mlcube_examples && cd ./mlcube_examples +git fetch origin pull/xxx/head:feature/gpu_example && git checkout feature/gpu_example +cd ./gpu_example/ +``` + +## MLCube tasks + +There is only one taks that will output the variable `CUDA_VISIBLE_DEVICES` along with the ouput of the `nvidia-smi` command: + +```shell +mlcube run --task=check_gpus +``` + +You can modify the number of gpus by editing the number of `accelerator_count` inside the **mlcube.yaml** file. + +Also you can override the number of gpus to use by using the `--gpus` flag when running the command, example: + +```shell +mlcube run --task=check_gpus --gpus=2 +``` + +### Singularity + +For running on Singularity, you can define the platform while running the command as follows: + +```shell +mlcube run --task=check_gpus --platform=singularity +``` diff --git a/gpus/check_gpus.sh b/gpus/check_gpus.sh new file mode 100644 index 0000000..f172480 --- /dev/null +++ b/gpus/check_gpus.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +LOG_DIR=${LOG_DIR:-"/"} + +# Handle MLCube parameters +while [ $# -gt 0 ]; do + case "$1" in + --log_dir=*) + LOG_DIR="${1#*=}" + ;; + *) ;; + esac + shift +done + +echo "CUDA_VISIBLE_DEVICES $CUDA_VISIBLE_DEVICES" |& tee "$LOG_DIR/train_console.log" +nvidia-smi |& tee -a "$LOG_DIR/train_console.log" diff --git a/gpus/mlcube.yaml b/gpus/mlcube.yaml new file mode 100644 index 0000000..f35d40e --- /dev/null +++ b/gpus/mlcube.yaml @@ -0,0 +1,24 @@ +name: check_gpus +description: Check gpus example +authors: + - { name: "MLCommons Best Practices Working Group" } + +platform: + accelerator_count: 1 + +docker: + # Image name. + image: dfjbtest/gpus_example:0.0.1 + # Docker build context relative to $MLCUBE_ROOT. Default is `build`. + build_context: "./" + # Docker file name within docker build context, default is `Dockerfile`. + build_file: "Dockerfile" + # GPU arguments + gpu_args: "--gpus=all" + +tasks: + check_gpus: + entrypoint: ./check_gpus.sh -a + parameters: + outputs: + log_dir: logs/ From 8537210879338b3cb5ec5890f685f83b3c832d08 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 1 Dec 2023 10:14:02 -0500 Subject: [PATCH 2/5] Fix PR number --- gpus/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpus/README.md b/gpus/README.md index d993729..80bf69e 100644 --- a/gpus/README.md +++ b/gpus/README.md @@ -9,7 +9,7 @@ An important requirement is that you must have Docker and/or Singularity install virtualenv -p python3 ./env && source ./env/bin/activate && pip install mlcube-docker mlcube-singularity # Fetch the gpus example from GitHub git clone https://github.com/mlcommons/mlcube_examples && cd ./mlcube_examples -git fetch origin pull/xxx/head:feature/gpu_example && git checkout feature/gpu_example +git fetch origin pull/68/head:feature/gpu_example && git checkout feature/gpu_example cd ./gpu_example/ ``` From 35ed0d2082c1b0fa83cffcd4c6960dfaea3542aa Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 1 Dec 2023 11:17:46 -0500 Subject: [PATCH 3/5] Fix example logic --- gpus/Dockerfile | 2 +- gpus/README.md | 2 +- gpus/check_gpus.sh | 5 +++-- gpus/mlcube.yaml | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/gpus/Dockerfile b/gpus/Dockerfile index fbaa343..a274d64 100644 --- a/gpus/Dockerfile +++ b/gpus/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.0-base +FROM nvidia/cuda:11.6.1-base-ubuntu20.04 # Copy code COPY . /workspace diff --git a/gpus/README.md b/gpus/README.md index 80bf69e..bbeb3aa 100644 --- a/gpus/README.md +++ b/gpus/README.md @@ -10,7 +10,7 @@ virtualenv -p python3 ./env && source ./env/bin/activate && pip install mlcube-d # Fetch the gpus example from GitHub git clone https://github.com/mlcommons/mlcube_examples && cd ./mlcube_examples git fetch origin pull/68/head:feature/gpu_example && git checkout feature/gpu_example -cd ./gpu_example/ +cd ./gpus/ ``` ## MLCube tasks diff --git a/gpus/check_gpus.sh b/gpus/check_gpus.sh index f172480..a84fe08 100644 --- a/gpus/check_gpus.sh +++ b/gpus/check_gpus.sh @@ -13,5 +13,6 @@ while [ $# -gt 0 ]; do shift done -echo "CUDA_VISIBLE_DEVICES $CUDA_VISIBLE_DEVICES" |& tee "$LOG_DIR/train_console.log" -nvidia-smi |& tee -a "$LOG_DIR/train_console.log" +echo "CUDA_VISIBLE_DEVICES $CUDA_VISIBLE_DEVICES" |& tee "$LOG_DIR/gpus.log" +nvidia-smi |& tee -a "$LOG_DIR/gpus.log" +nvidia-smi --query-gpu=gpu_name,uuid --format=csv |& tee -a "$LOG_DIR/gpus.log" diff --git a/gpus/mlcube.yaml b/gpus/mlcube.yaml index f35d40e..0968106 100644 --- a/gpus/mlcube.yaml +++ b/gpus/mlcube.yaml @@ -14,7 +14,7 @@ docker: # Docker file name within docker build context, default is `Dockerfile`. build_file: "Dockerfile" # GPU arguments - gpu_args: "--gpus=all" + gpu_args: "--gpus=1" tasks: check_gpus: From f77c491fb6b11681359dfb80ec56fb9b35971888 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 1 Dec 2023 11:19:24 -0500 Subject: [PATCH 4/5] Add gitignore --- gpus/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 gpus/.gitignore diff --git a/gpus/.gitignore b/gpus/.gitignore new file mode 100644 index 0000000..ece6ca2 --- /dev/null +++ b/gpus/.gitignore @@ -0,0 +1 @@ +/workspace \ No newline at end of file From 518174d896cbb97dd89cb4431ee9c34d1f6c2b40 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 1 Dec 2023 17:52:17 -0500 Subject: [PATCH 5/5] Add NVIDIA_VISIBLE_DEVICES in output --- gpus/check_gpus.sh | 1 + gpus/mlcube.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/gpus/check_gpus.sh b/gpus/check_gpus.sh index a84fe08..c9e1135 100644 --- a/gpus/check_gpus.sh +++ b/gpus/check_gpus.sh @@ -14,5 +14,6 @@ while [ $# -gt 0 ]; do done echo "CUDA_VISIBLE_DEVICES $CUDA_VISIBLE_DEVICES" |& tee "$LOG_DIR/gpus.log" +echo "NVIDIA_VISIBLE_DEVICES $NVIDIA_VISIBLE_DEVICES" |& tee "$LOG_DIR/gpus.log" nvidia-smi |& tee -a "$LOG_DIR/gpus.log" nvidia-smi --query-gpu=gpu_name,uuid --format=csv |& tee -a "$LOG_DIR/gpus.log" diff --git a/gpus/mlcube.yaml b/gpus/mlcube.yaml index 0968106..ea62eca 100644 --- a/gpus/mlcube.yaml +++ b/gpus/mlcube.yaml @@ -18,7 +18,7 @@ docker: tasks: check_gpus: - entrypoint: ./check_gpus.sh -a + entrypoint: ./check_gpus.sh parameters: outputs: log_dir: logs/