From 73ab07c547e1096c7563cf9b17c408de12794a6e Mon Sep 17 00:00:00 2001
From: David Jurado <david.jurado@factored.ai>
Date: Fri, 1 Dec 2023 10:12:38 -0500
Subject: [PATCH 1/5] Add gpus example

---
 gpus/.dockerignore |  1 +
 gpus/Dockerfile    |  8 ++++++++
 gpus/README.md     | 38 ++++++++++++++++++++++++++++++++++++++
 gpus/check_gpus.sh | 17 +++++++++++++++++
 gpus/mlcube.yaml   | 24 ++++++++++++++++++++++++
 5 files changed, 88 insertions(+)
 create mode 100644 gpus/.dockerignore
 create mode 100644 gpus/Dockerfile
 create mode 100644 gpus/README.md
 create mode 100644 gpus/check_gpus.sh
 create mode 100644 gpus/mlcube.yaml

diff --git a/gpus/.dockerignore b/gpus/.dockerignore
new file mode 100644
index 0000000..382f954
--- /dev/null
+++ b/gpus/.dockerignore
@@ -0,0 +1 @@
+workspace/
\ No newline at end of file
diff --git a/gpus/Dockerfile b/gpus/Dockerfile
new file mode 100644
index 0000000..fbaa343
--- /dev/null
+++ b/gpus/Dockerfile
@@ -0,0 +1,8 @@
+FROM nvidia/cuda:11.0-base
+
+# Copy code
+COPY . /workspace
+RUN chmod +x /workspace/*.sh
+
+# Set working directory
+WORKDIR /workspace
\ No newline at end of file
diff --git a/gpus/README.md b/gpus/README.md
new file mode 100644
index 0000000..d993729
--- /dev/null
+++ b/gpus/README.md
@@ -0,0 +1,38 @@
+# GPUs example
+
+## Project setup
+
+An important requirement is that you must have Docker and/or Singularity installed.
+
+```bash
+# Create Python environment and install MLCube with runners 
+virtualenv -p python3 ./env && source ./env/bin/activate && pip install mlcube-docker mlcube-singularity
+# Fetch the gpus example from GitHub
+git clone https://github.com/mlcommons/mlcube_examples && cd ./mlcube_examples
+git fetch origin pull/xxx/head:feature/gpu_example && git checkout feature/gpu_example
+cd ./gpu_example/
+```
+
+## MLCube tasks
+
+There is only one taks that will output the variable `CUDA_VISIBLE_DEVICES` along with the ouput of the `nvidia-smi` command:
+
+```shell
+mlcube run --task=check_gpus
+```
+
+You can modify the number of gpus by editing the number of `accelerator_count` inside the **mlcube.yaml** file.
+
+Also you can override the number of gpus to use by using the `--gpus` flag when running the command, example:
+
+```shell
+mlcube run --task=check_gpus --gpus=2
+```
+
+### Singularity
+
+For running on Singularity, you can define the platform while running the command as follows:
+
+```shell
+mlcube run --task=check_gpus --platform=singularity
+```
diff --git a/gpus/check_gpus.sh b/gpus/check_gpus.sh
new file mode 100644
index 0000000..f172480
--- /dev/null
+++ b/gpus/check_gpus.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+LOG_DIR=${LOG_DIR:-"/"}
+
+# Handle MLCube parameters
+while [ $# -gt 0 ]; do
+    case "$1" in
+    --log_dir=*)
+        LOG_DIR="${1#*=}"
+        ;;
+    *) ;;
+    esac
+    shift
+done
+
+echo "CUDA_VISIBLE_DEVICES $CUDA_VISIBLE_DEVICES" |& tee "$LOG_DIR/train_console.log"
+nvidia-smi |& tee -a "$LOG_DIR/train_console.log"
diff --git a/gpus/mlcube.yaml b/gpus/mlcube.yaml
new file mode 100644
index 0000000..f35d40e
--- /dev/null
+++ b/gpus/mlcube.yaml
@@ -0,0 +1,24 @@
+name: check_gpus
+description: Check gpus example
+authors:
+  - { name: "MLCommons Best Practices Working Group" }
+
+platform:
+  accelerator_count: 1
+
+docker:
+  # Image name.
+  image: dfjbtest/gpus_example:0.0.1
+  # Docker build context relative to $MLCUBE_ROOT. Default is `build`.
+  build_context: "./"
+  # Docker file name within docker build context, default is `Dockerfile`.
+  build_file: "Dockerfile"
+  # GPU arguments
+  gpu_args: "--gpus=all"
+
+tasks:
+  check_gpus:
+    entrypoint: ./check_gpus.sh -a
+    parameters:
+      outputs:
+        log_dir: logs/

From 8537210879338b3cb5ec5890f685f83b3c832d08 Mon Sep 17 00:00:00 2001
From: David Jurado <david.jurado@factored.ai>
Date: Fri, 1 Dec 2023 10:14:02 -0500
Subject: [PATCH 2/5] Fix PR number

---
 gpus/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gpus/README.md b/gpus/README.md
index d993729..80bf69e 100644
--- a/gpus/README.md
+++ b/gpus/README.md
@@ -9,7 +9,7 @@ An important requirement is that you must have Docker and/or Singularity install
 virtualenv -p python3 ./env && source ./env/bin/activate && pip install mlcube-docker mlcube-singularity
 # Fetch the gpus example from GitHub
 git clone https://github.com/mlcommons/mlcube_examples && cd ./mlcube_examples
-git fetch origin pull/xxx/head:feature/gpu_example && git checkout feature/gpu_example
+git fetch origin pull/68/head:feature/gpu_example && git checkout feature/gpu_example
 cd ./gpu_example/
 ```
 

From 35ed0d2082c1b0fa83cffcd4c6960dfaea3542aa Mon Sep 17 00:00:00 2001
From: David Jurado <david.jurado@factored.ai>
Date: Fri, 1 Dec 2023 11:17:46 -0500
Subject: [PATCH 3/5] Fix example logic

---
 gpus/Dockerfile    | 2 +-
 gpus/README.md     | 2 +-
 gpus/check_gpus.sh | 5 +++--
 gpus/mlcube.yaml   | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/gpus/Dockerfile b/gpus/Dockerfile
index fbaa343..a274d64 100644
--- a/gpus/Dockerfile
+++ b/gpus/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.0-base
+FROM nvidia/cuda:11.6.1-base-ubuntu20.04
 
 # Copy code
 COPY . /workspace
diff --git a/gpus/README.md b/gpus/README.md
index 80bf69e..bbeb3aa 100644
--- a/gpus/README.md
+++ b/gpus/README.md
@@ -10,7 +10,7 @@ virtualenv -p python3 ./env && source ./env/bin/activate && pip install mlcube-d
 # Fetch the gpus example from GitHub
 git clone https://github.com/mlcommons/mlcube_examples && cd ./mlcube_examples
 git fetch origin pull/68/head:feature/gpu_example && git checkout feature/gpu_example
-cd ./gpu_example/
+cd ./gpus/
 ```
 
 ## MLCube tasks
diff --git a/gpus/check_gpus.sh b/gpus/check_gpus.sh
index f172480..a84fe08 100644
--- a/gpus/check_gpus.sh
+++ b/gpus/check_gpus.sh
@@ -13,5 +13,6 @@ while [ $# -gt 0 ]; do
     shift
 done
 
-echo "CUDA_VISIBLE_DEVICES $CUDA_VISIBLE_DEVICES" |& tee "$LOG_DIR/train_console.log"
-nvidia-smi |& tee -a "$LOG_DIR/train_console.log"
+echo "CUDA_VISIBLE_DEVICES $CUDA_VISIBLE_DEVICES" |& tee "$LOG_DIR/gpus.log"
+nvidia-smi |& tee -a "$LOG_DIR/gpus.log"
+nvidia-smi --query-gpu=gpu_name,uuid --format=csv |& tee -a "$LOG_DIR/gpus.log"
diff --git a/gpus/mlcube.yaml b/gpus/mlcube.yaml
index f35d40e..0968106 100644
--- a/gpus/mlcube.yaml
+++ b/gpus/mlcube.yaml
@@ -14,7 +14,7 @@ docker:
   # Docker file name within docker build context, default is `Dockerfile`.
   build_file: "Dockerfile"
   # GPU arguments
-  gpu_args: "--gpus=all"
+  gpu_args: "--gpus=1"
 
 tasks:
   check_gpus:

From f77c491fb6b11681359dfb80ec56fb9b35971888 Mon Sep 17 00:00:00 2001
From: David Jurado <david.jurado@factored.ai>
Date: Fri, 1 Dec 2023 11:19:24 -0500
Subject: [PATCH 4/5] Add gitignore

---
 gpus/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 gpus/.gitignore

diff --git a/gpus/.gitignore b/gpus/.gitignore
new file mode 100644
index 0000000..ece6ca2
--- /dev/null
+++ b/gpus/.gitignore
@@ -0,0 +1 @@
+/workspace
\ No newline at end of file

From 518174d896cbb97dd89cb4431ee9c34d1f6c2b40 Mon Sep 17 00:00:00 2001
From: David Jurado <david.jurado@factored.ai>
Date: Fri, 1 Dec 2023 17:52:17 -0500
Subject: [PATCH 5/5] Add NVIDIA_VISIBLE_DEVICES in output

---
 gpus/check_gpus.sh | 1 +
 gpus/mlcube.yaml   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/gpus/check_gpus.sh b/gpus/check_gpus.sh
index a84fe08..c9e1135 100644
--- a/gpus/check_gpus.sh
+++ b/gpus/check_gpus.sh
@@ -14,5 +14,6 @@ while [ $# -gt 0 ]; do
 done
 
 echo "CUDA_VISIBLE_DEVICES $CUDA_VISIBLE_DEVICES" |& tee "$LOG_DIR/gpus.log"
+echo "NVIDIA_VISIBLE_DEVICES $NVIDIA_VISIBLE_DEVICES" |& tee "$LOG_DIR/gpus.log"
 nvidia-smi |& tee -a "$LOG_DIR/gpus.log"
 nvidia-smi --query-gpu=gpu_name,uuid --format=csv |& tee -a "$LOG_DIR/gpus.log"
diff --git a/gpus/mlcube.yaml b/gpus/mlcube.yaml
index 0968106..ea62eca 100644
--- a/gpus/mlcube.yaml
+++ b/gpus/mlcube.yaml
@@ -18,7 +18,7 @@ docker:
 
 tasks:
   check_gpus:
-    entrypoint: ./check_gpus.sh -a
+    entrypoint: ./check_gpus.sh
     parameters:
       outputs:
         log_dir: logs/