move some workflow_tests to datasci

ucsd-ets · Jul 23, 2024 · 130bc60 · 130bc60
1 parent b431dbe
commit 130bc60
Show file tree

Hide file tree

Showing 12 changed files with 22 additions and 8 deletions.
diff --git a/Documentation/actions.md b/Documentation/actions.md
@@ -1,6 +1,6 @@
 # DataHub Docker Stack: GitHub Actions
 
-The images used to be built and pushed to [our organization at DockerHub](https://hub.docker.com/orgs/ucsdets/members) through GitHub Actions, but are now published as packages within this repo instead. We also use GitHub Actions for testing and pushing our stable images to production. [You may also check out scripts.md](/Documentation/scripts.md) for a more indepth look at the Python code underlying these actions.
+The images used to be built and pushed to [our organization at DockerHub](https://hub.docker.com/orgs/ucsdets/members) through GitHub Actions, but are now published as packages within this repo instead. We also use GitHub Actions for testing and pushing our stable images to production. [You may also check out scripts.md](/Documentation/scripts.md) for a more in-depth look at the Python code underlying these actions.
 
 We have four actions that we use to develop, test, and deploy our Docker Stack.
 

diff --git a/Documentation/architecture.md b/Documentation/architecture.md
@@ -56,6 +56,11 @@ to run the pipeline. For testing, we use pytest.
 │   │   ├── Dockerfile  # image definition for docker
 │   │   ├── scripts     # .sh & .py scripts used for container setup
 │   │   │   └── ...
+│   │   ├── workflow_tests
+│   │       ├── test_matplotlib.py
+│   │       ├── test_nltk.py
+│   │       ├── test_pandas.py
+│   │       └── test_statsmodels.py
 │   │   └── test    # image acceptance tests
 │   │       ├── data
 │   │       │   └── test-notebook.ipynb
@@ -77,16 +82,17 @@ to run the pipeline. For testing, we use pytest.
 │   │   ├── activate.sh
 │   │   ├── cudatoolkit_env_vars.sh
 │   │   ├── cudnn_env_vars.sh
+│   │   ├── run_jupyter.sh
 │   │   ├── manual_tests
 │   │   │   ├── pytorch_mtest.ipynb
 │   │   │   └── tensorflow_mtest.ipynb
-│   │   ├── run_jupyter.sh
-│   │   ├── test
+│   │   ├── old_tests
 │   │   │   ├── __init__.py
 │   │   │   ├── data
 │   │   │   │   └── test_tf.ipynb
 │   │   │   └── test_tf.py
 │   │   └── workflow_tests
+│   │       ├── test_keras.py
 │   │       ├── test_pytorch.py
 │   │       └── test_tf.py
 │   ├── spec.yml        # image definition metadata (for all images)

diff --git a/images/datascience-notebook/Dockerfile b/images/datascience-notebook/Dockerfile
@@ -54,6 +54,10 @@ RUN mkdir /opt/manual_tests
 COPY /test/test_r_dump_packages.R /opt/manual_tests
 COPY /test/test_r_func.R /opt/manual_tests
 
+# Add additional tests
+RUN mkdir -p /opt/workflow_tests
+COPY workflow_tests/* /opt/workflow_tests
+
 USER jovyan
 
 # Python/Mamba Deps

diff --git a/...otebook/workflow_tests/test_matplotlib.py → ...otebook/workflow_tests/test_matplotlib.py b/...otebook/workflow_tests/test_matplotlib.py → ...otebook/workflow_tests/test_matplotlib.py
diff --git a/...y-ml-notebook/workflow_tests/test_nltk.py → ...ence-notebook/workflow_tests/test_nltk.py b/...y-ml-notebook/workflow_tests/test_nltk.py → ...ence-notebook/workflow_tests/test_nltk.py
@@ -5,6 +5,7 @@ def setup_module(module):
     nltk.download('punkt', download_dir='/tmp/nltk_data')
     nltk.download('maxent_ne_chunker', download_dir='/tmp/nltk_data')
     nltk.download('words', download_dir='/tmp/nltk_data')
+    nltk.download('averaged_perceptron_tagger', download_dir='/tmp/nltk_data')
     nltk.data.path.append('/tmp/nltk_data')
 
 def test_tokenization():

diff --git a/...ml-notebook/workflow_tests/test_pandas.py → ...ce-notebook/workflow_tests/test_pandas.py b/...ml-notebook/workflow_tests/test_pandas.py → ...ce-notebook/workflow_tests/test_pandas.py
diff --git a/...tebook/workflow_tests/test_statsmodels.py → ...tebook/workflow_tests/test_statsmodels.py b/...tebook/workflow_tests/test_statsmodels.py → ...tebook/workflow_tests/test_statsmodels.py
diff --git a/images/scipy-ml-notebook/Dockerfile b/images/scipy-ml-notebook/Dockerfile
@@ -30,7 +30,10 @@ RUN chmod +x /run_jupyter.sh
 # Scripts setup
 COPY cudatoolkit_env_vars.sh cudnn_env_vars.sh tensorrt_env_vars.sh /etc/datahub-profile.d/
 COPY activate.sh /tmp/activate.sh
-COPY workflow_tests /opt/workflow_tests
+
+# Add tests
+RUN mkdir -p /opt/workflow_tests
+COPY workflow_tests/* /opt/workflow_tests
 ADD manual_tests /opt/manual_tests
 
 RUN chmod 777 /etc/datahub-profile.d/*.sh /tmp/activate.sh

diff --git a/images/scipy-ml-notebook/test/__init__.py → ...s/scipy-ml-notebook/old_tests/__init__.py b/images/scipy-ml-notebook/test/__init__.py → ...s/scipy-ml-notebook/old_tests/__init__.py
diff --git a/...scipy-ml-notebook/test/data/test_tf.ipynb → ...-ml-notebook/old_tests/data/test_tf.ipynb b/...scipy-ml-notebook/test/data/test_tf.ipynb → ...-ml-notebook/old_tests/data/test_tf.ipynb
diff --git a/images/scipy-ml-notebook/test/test_tf.py → ...es/scipy-ml-notebook/old_tests/test_tf.py b/images/scipy-ml-notebook/test/test_tf.py → ...es/scipy-ml-notebook/old_tests/test_tf.py
diff --git a/images/scipy-ml-notebook/workflow_tests/test_pytorch.py b/images/scipy-ml-notebook/workflow_tests/test_pytorch.py
@@ -106,7 +106,7 @@ def length_of_dataset_no_cuda():
 
     # Download and load the training data
     train_data = datasets.MNIST(
-        root='./data', train=True, download=True, transform=transform)
+        root='/tmp', train=True, download=True, transform=transform)
 
     # Check the size of the training set
     ld = len(train_data)
@@ -131,9 +131,9 @@ def mean_pixel_value_cuda():
     transform = transforms.Compose(
         [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
     train_set = datasets.MNIST(
-        root='./data', train=True, download=True, transform=transform)
+        root='/tmp', train=True, download=True, transform=transform)
     test_set = datasets.MNIST(
-        root='./data', train=False, download=True, transform=transform)
+        root='/tmp', train=False, download=True, transform=transform)
 
     # Move dataset to device
     train_loader = torch.utils.data.DataLoader(
@@ -171,7 +171,7 @@ def multiply_dataset_calculate_mean_cuda():
     transform = transforms.Compose(
         [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
     train_dataset = datasets.MNIST(
-        './data', train=True, download=True, transform=transform)
+        '/tmp', train=True, download=True, transform=transform)
 
     # Create a DataLoader for the dataset
     train_loader = torch.utils.data.DataLoader(