From 130bc60b2e50b90a9cff3a1171b8cf20c57cc17d Mon Sep 17 00:00:00 2001
From: dafeliton <dafeliton@ucsd.edu>
Date: Tue, 23 Jul 2024 01:11:40 -0700
Subject: [PATCH] move some workflow_tests to datasci

---
 Documentation/actions.md                               |  2 +-
 Documentation/architecture.md                          | 10 ++++++++--
 images/datascience-notebook/Dockerfile                 |  4 ++++
 .../workflow_tests/test_matplotlib.py                  |  0
 .../workflow_tests/test_nltk.py                        |  1 +
 .../workflow_tests/test_pandas.py                      |  0
 .../workflow_tests/test_statsmodels.py                 |  0
 images/scipy-ml-notebook/Dockerfile                    |  5 ++++-
 .../scipy-ml-notebook/{test => old_tests}/__init__.py  |  0
 .../{test => old_tests}/data/test_tf.ipynb             |  0
 .../scipy-ml-notebook/{test => old_tests}/test_tf.py   |  0
 .../scipy-ml-notebook/workflow_tests/test_pytorch.py   |  8 ++++----
 12 files changed, 22 insertions(+), 8 deletions(-)
 rename images/{scipy-ml-notebook => datascience-notebook}/workflow_tests/test_matplotlib.py (100%)
 rename images/{scipy-ml-notebook => datascience-notebook}/workflow_tests/test_nltk.py (97%)
 rename images/{scipy-ml-notebook => datascience-notebook}/workflow_tests/test_pandas.py (100%)
 rename images/{scipy-ml-notebook => datascience-notebook}/workflow_tests/test_statsmodels.py (100%)
 rename images/scipy-ml-notebook/{test => old_tests}/__init__.py (100%)
 rename images/scipy-ml-notebook/{test => old_tests}/data/test_tf.ipynb (100%)
 rename images/scipy-ml-notebook/{test => old_tests}/test_tf.py (100%)

diff --git a/Documentation/actions.md b/Documentation/actions.md
index 82397426..387e6ba9 100644
--- a/Documentation/actions.md
+++ b/Documentation/actions.md
@@ -1,6 +1,6 @@
 # DataHub Docker Stack: GitHub Actions
 
-The images used to be built and pushed to [our organization at DockerHub](https://hub.docker.com/orgs/ucsdets/members) through GitHub Actions, but are now published as packages within this repo instead. We also use GitHub Actions for testing and pushing our stable images to production. [You may also check out scripts.md](/Documentation/scripts.md) for a more indepth look at the Python code underlying these actions.
+The images used to be built and pushed to [our organization at DockerHub](https://hub.docker.com/orgs/ucsdets/members) through GitHub Actions, but are now published as packages within this repo instead. We also use GitHub Actions for testing and pushing our stable images to production. [You may also check out scripts.md](/Documentation/scripts.md) for a more in-depth look at the Python code underlying these actions.
 
 We have four actions that we use to develop, test, and deploy our Docker Stack.
 
diff --git a/Documentation/architecture.md b/Documentation/architecture.md
index 7aa02642..3799a5c0 100644
--- a/Documentation/architecture.md
+++ b/Documentation/architecture.md
@@ -56,6 +56,11 @@ to run the pipeline. For testing, we use pytest.
 │   │   ├── Dockerfile  # image definition for docker
 │   │   ├── scripts     # .sh & .py scripts used for container setup
 │   │   │   └── ...
+│   │   ├── workflow_tests
+│   │       ├── test_matplotlib.py
+│   │       ├── test_nltk.py
+│   │       ├── test_pandas.py
+│   │       └── test_statsmodels.py
 │   │   └── test    # image acceptance tests
 │   │       ├── data
 │   │       │   └── test-notebook.ipynb
@@ -77,16 +82,17 @@ to run the pipeline. For testing, we use pytest.
 │   │   ├── activate.sh
 │   │   ├── cudatoolkit_env_vars.sh
 │   │   ├── cudnn_env_vars.sh
+│   │   ├── run_jupyter.sh
 │   │   ├── manual_tests
 │   │   │   ├── pytorch_mtest.ipynb
 │   │   │   └── tensorflow_mtest.ipynb
-│   │   ├── run_jupyter.sh
-│   │   ├── test
+│   │   ├── old_tests
 │   │   │   ├── __init__.py
 │   │   │   ├── data
 │   │   │   │   └── test_tf.ipynb
 │   │   │   └── test_tf.py
 │   │   └── workflow_tests
+│   │       ├── test_keras.py
 │   │       ├── test_pytorch.py
 │   │       └── test_tf.py
 │   ├── spec.yml        # image definition metadata (for all images)
diff --git a/images/datascience-notebook/Dockerfile b/images/datascience-notebook/Dockerfile
index 5af5f083..d692c176 100644
--- a/images/datascience-notebook/Dockerfile
+++ b/images/datascience-notebook/Dockerfile
@@ -54,6 +54,10 @@ RUN mkdir /opt/manual_tests
 COPY /test/test_r_dump_packages.R /opt/manual_tests
 COPY /test/test_r_func.R /opt/manual_tests
 
+# Add additional tests
+RUN mkdir -p /opt/workflow_tests
+COPY workflow_tests/* /opt/workflow_tests
+
 USER jovyan
 
 # Python/Mamba Deps
diff --git a/images/scipy-ml-notebook/workflow_tests/test_matplotlib.py b/images/datascience-notebook/workflow_tests/test_matplotlib.py
similarity index 100%
rename from images/scipy-ml-notebook/workflow_tests/test_matplotlib.py
rename to images/datascience-notebook/workflow_tests/test_matplotlib.py
diff --git a/images/scipy-ml-notebook/workflow_tests/test_nltk.py b/images/datascience-notebook/workflow_tests/test_nltk.py
similarity index 97%
rename from images/scipy-ml-notebook/workflow_tests/test_nltk.py
rename to images/datascience-notebook/workflow_tests/test_nltk.py
index 5aa0139a..4249ed86 100644
--- a/images/scipy-ml-notebook/workflow_tests/test_nltk.py
+++ b/images/datascience-notebook/workflow_tests/test_nltk.py
@@ -5,6 +5,7 @@ def setup_module(module):
     nltk.download('punkt', download_dir='/tmp/nltk_data')
     nltk.download('maxent_ne_chunker', download_dir='/tmp/nltk_data')
     nltk.download('words', download_dir='/tmp/nltk_data')
+    nltk.download('averaged_perceptron_tagger', download_dir='/tmp/nltk_data')
     nltk.data.path.append('/tmp/nltk_data')
 
 def test_tokenization():
diff --git a/images/scipy-ml-notebook/workflow_tests/test_pandas.py b/images/datascience-notebook/workflow_tests/test_pandas.py
similarity index 100%
rename from images/scipy-ml-notebook/workflow_tests/test_pandas.py
rename to images/datascience-notebook/workflow_tests/test_pandas.py
diff --git a/images/scipy-ml-notebook/workflow_tests/test_statsmodels.py b/images/datascience-notebook/workflow_tests/test_statsmodels.py
similarity index 100%
rename from images/scipy-ml-notebook/workflow_tests/test_statsmodels.py
rename to images/datascience-notebook/workflow_tests/test_statsmodels.py
diff --git a/images/scipy-ml-notebook/Dockerfile b/images/scipy-ml-notebook/Dockerfile
index c98fe2ad..b6be710d 100644
--- a/images/scipy-ml-notebook/Dockerfile
+++ b/images/scipy-ml-notebook/Dockerfile
@@ -30,7 +30,10 @@ RUN chmod +x /run_jupyter.sh
 # Scripts setup
 COPY cudatoolkit_env_vars.sh cudnn_env_vars.sh tensorrt_env_vars.sh /etc/datahub-profile.d/
 COPY activate.sh /tmp/activate.sh
-COPY workflow_tests /opt/workflow_tests
+
+# Add tests
+RUN mkdir -p /opt/workflow_tests
+COPY workflow_tests/* /opt/workflow_tests
 ADD manual_tests /opt/manual_tests
 
 RUN chmod 777 /etc/datahub-profile.d/*.sh /tmp/activate.sh
diff --git a/images/scipy-ml-notebook/test/__init__.py b/images/scipy-ml-notebook/old_tests/__init__.py
similarity index 100%
rename from images/scipy-ml-notebook/test/__init__.py
rename to images/scipy-ml-notebook/old_tests/__init__.py
diff --git a/images/scipy-ml-notebook/test/data/test_tf.ipynb b/images/scipy-ml-notebook/old_tests/data/test_tf.ipynb
similarity index 100%
rename from images/scipy-ml-notebook/test/data/test_tf.ipynb
rename to images/scipy-ml-notebook/old_tests/data/test_tf.ipynb
diff --git a/images/scipy-ml-notebook/test/test_tf.py b/images/scipy-ml-notebook/old_tests/test_tf.py
similarity index 100%
rename from images/scipy-ml-notebook/test/test_tf.py
rename to images/scipy-ml-notebook/old_tests/test_tf.py
diff --git a/images/scipy-ml-notebook/workflow_tests/test_pytorch.py b/images/scipy-ml-notebook/workflow_tests/test_pytorch.py
index 3ba0e15f..962fd807 100644
--- a/images/scipy-ml-notebook/workflow_tests/test_pytorch.py
+++ b/images/scipy-ml-notebook/workflow_tests/test_pytorch.py
@@ -106,7 +106,7 @@ def length_of_dataset_no_cuda():
 
     # Download and load the training data
     train_data = datasets.MNIST(
-        root='./data', train=True, download=True, transform=transform)
+        root='/tmp', train=True, download=True, transform=transform)
 
     # Check the size of the training set
     ld = len(train_data)
@@ -131,9 +131,9 @@ def mean_pixel_value_cuda():
     transform = transforms.Compose(
         [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
     train_set = datasets.MNIST(
-        root='./data', train=True, download=True, transform=transform)
+        root='/tmp', train=True, download=True, transform=transform)
     test_set = datasets.MNIST(
-        root='./data', train=False, download=True, transform=transform)
+        root='/tmp', train=False, download=True, transform=transform)
 
     # Move dataset to device
     train_loader = torch.utils.data.DataLoader(
@@ -171,7 +171,7 @@ def multiply_dataset_calculate_mean_cuda():
     transform = transforms.Compose(
         [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
     train_dataset = datasets.MNIST(
-        './data', train=True, download=True, transform=transform)
+        '/tmp', train=True, download=True, transform=transform)
 
     # Create a DataLoader for the dataset
     train_loader = torch.utils.data.DataLoader(