From 130bc60b2e50b90a9cff3a1171b8cf20c57cc17d Mon Sep 17 00:00:00 2001 From: dafeliton Date: Tue, 23 Jul 2024 01:11:40 -0700 Subject: [PATCH] move some workflow_tests to datasci --- Documentation/actions.md | 2 +- Documentation/architecture.md | 10 ++++++++-- images/datascience-notebook/Dockerfile | 4 ++++ .../workflow_tests/test_matplotlib.py | 0 .../workflow_tests/test_nltk.py | 1 + .../workflow_tests/test_pandas.py | 0 .../workflow_tests/test_statsmodels.py | 0 images/scipy-ml-notebook/Dockerfile | 5 ++++- .../scipy-ml-notebook/{test => old_tests}/__init__.py | 0 .../{test => old_tests}/data/test_tf.ipynb | 0 .../scipy-ml-notebook/{test => old_tests}/test_tf.py | 0 .../scipy-ml-notebook/workflow_tests/test_pytorch.py | 8 ++++---- 12 files changed, 22 insertions(+), 8 deletions(-) rename images/{scipy-ml-notebook => datascience-notebook}/workflow_tests/test_matplotlib.py (100%) rename images/{scipy-ml-notebook => datascience-notebook}/workflow_tests/test_nltk.py (97%) rename images/{scipy-ml-notebook => datascience-notebook}/workflow_tests/test_pandas.py (100%) rename images/{scipy-ml-notebook => datascience-notebook}/workflow_tests/test_statsmodels.py (100%) rename images/scipy-ml-notebook/{test => old_tests}/__init__.py (100%) rename images/scipy-ml-notebook/{test => old_tests}/data/test_tf.ipynb (100%) rename images/scipy-ml-notebook/{test => old_tests}/test_tf.py (100%) diff --git a/Documentation/actions.md b/Documentation/actions.md index 82397426..387e6ba9 100644 --- a/Documentation/actions.md +++ b/Documentation/actions.md @@ -1,6 +1,6 @@ # DataHub Docker Stack: GitHub Actions -The images used to be built and pushed to [our organization at DockerHub](https://hub.docker.com/orgs/ucsdets/members) through GitHub Actions, but are now published as packages within this repo instead. We also use GitHub Actions for testing and pushing our stable images to production. [You may also check out scripts.md](/Documentation/scripts.md) for a more indepth look at the Python code underlying these actions. +The images used to be built and pushed to [our organization at DockerHub](https://hub.docker.com/orgs/ucsdets/members) through GitHub Actions, but are now published as packages within this repo instead. We also use GitHub Actions for testing and pushing our stable images to production. [You may also check out scripts.md](/Documentation/scripts.md) for a more in-depth look at the Python code underlying these actions. We have four actions that we use to develop, test, and deploy our Docker Stack. diff --git a/Documentation/architecture.md b/Documentation/architecture.md index 7aa02642..3799a5c0 100644 --- a/Documentation/architecture.md +++ b/Documentation/architecture.md @@ -56,6 +56,11 @@ to run the pipeline. For testing, we use pytest. │   │   ├── Dockerfile # image definition for docker │   │   ├── scripts # .sh & .py scripts used for container setup │   │   │   └── ... +│   │   ├── workflow_tests +│   │   ├── test_matplotlib.py +│   │   ├── test_nltk.py +│   │   ├── test_pandas.py +│   │   └── test_statsmodels.py │   │   └── test # image acceptance tests │   │      ├── data │   │      │   └── test-notebook.ipynb @@ -77,16 +82,17 @@ to run the pipeline. For testing, we use pytest. │   │   ├── activate.sh │   │   ├── cudatoolkit_env_vars.sh │   │   ├── cudnn_env_vars.sh +│   │   ├── run_jupyter.sh │   │   ├── manual_tests │   │   │   ├── pytorch_mtest.ipynb │   │   │   └── tensorflow_mtest.ipynb -│   │   ├── run_jupyter.sh -│   │   ├── test +│   │   ├── old_tests │   │   │   ├── __init__.py │   │   │   ├── data │   │   │   │   └── test_tf.ipynb │   │   │   └── test_tf.py │   │   └── workflow_tests +│   │   ├── test_keras.py │   │   ├── test_pytorch.py │   │   └── test_tf.py │   ├── spec.yml # image definition metadata (for all images) diff --git a/images/datascience-notebook/Dockerfile b/images/datascience-notebook/Dockerfile index 5af5f083..d692c176 100644 --- a/images/datascience-notebook/Dockerfile +++ b/images/datascience-notebook/Dockerfile @@ -54,6 +54,10 @@ RUN mkdir /opt/manual_tests COPY /test/test_r_dump_packages.R /opt/manual_tests COPY /test/test_r_func.R /opt/manual_tests +# Add additional tests +RUN mkdir -p /opt/workflow_tests +COPY workflow_tests/* /opt/workflow_tests + USER jovyan # Python/Mamba Deps diff --git a/images/scipy-ml-notebook/workflow_tests/test_matplotlib.py b/images/datascience-notebook/workflow_tests/test_matplotlib.py similarity index 100% rename from images/scipy-ml-notebook/workflow_tests/test_matplotlib.py rename to images/datascience-notebook/workflow_tests/test_matplotlib.py diff --git a/images/scipy-ml-notebook/workflow_tests/test_nltk.py b/images/datascience-notebook/workflow_tests/test_nltk.py similarity index 97% rename from images/scipy-ml-notebook/workflow_tests/test_nltk.py rename to images/datascience-notebook/workflow_tests/test_nltk.py index 5aa0139a..4249ed86 100644 --- a/images/scipy-ml-notebook/workflow_tests/test_nltk.py +++ b/images/datascience-notebook/workflow_tests/test_nltk.py @@ -5,6 +5,7 @@ def setup_module(module): nltk.download('punkt', download_dir='/tmp/nltk_data') nltk.download('maxent_ne_chunker', download_dir='/tmp/nltk_data') nltk.download('words', download_dir='/tmp/nltk_data') + nltk.download('averaged_perceptron_tagger', download_dir='/tmp/nltk_data') nltk.data.path.append('/tmp/nltk_data') def test_tokenization(): diff --git a/images/scipy-ml-notebook/workflow_tests/test_pandas.py b/images/datascience-notebook/workflow_tests/test_pandas.py similarity index 100% rename from images/scipy-ml-notebook/workflow_tests/test_pandas.py rename to images/datascience-notebook/workflow_tests/test_pandas.py diff --git a/images/scipy-ml-notebook/workflow_tests/test_statsmodels.py b/images/datascience-notebook/workflow_tests/test_statsmodels.py similarity index 100% rename from images/scipy-ml-notebook/workflow_tests/test_statsmodels.py rename to images/datascience-notebook/workflow_tests/test_statsmodels.py diff --git a/images/scipy-ml-notebook/Dockerfile b/images/scipy-ml-notebook/Dockerfile index c98fe2ad..b6be710d 100644 --- a/images/scipy-ml-notebook/Dockerfile +++ b/images/scipy-ml-notebook/Dockerfile @@ -30,7 +30,10 @@ RUN chmod +x /run_jupyter.sh # Scripts setup COPY cudatoolkit_env_vars.sh cudnn_env_vars.sh tensorrt_env_vars.sh /etc/datahub-profile.d/ COPY activate.sh /tmp/activate.sh -COPY workflow_tests /opt/workflow_tests + +# Add tests +RUN mkdir -p /opt/workflow_tests +COPY workflow_tests/* /opt/workflow_tests ADD manual_tests /opt/manual_tests RUN chmod 777 /etc/datahub-profile.d/*.sh /tmp/activate.sh diff --git a/images/scipy-ml-notebook/test/__init__.py b/images/scipy-ml-notebook/old_tests/__init__.py similarity index 100% rename from images/scipy-ml-notebook/test/__init__.py rename to images/scipy-ml-notebook/old_tests/__init__.py diff --git a/images/scipy-ml-notebook/test/data/test_tf.ipynb b/images/scipy-ml-notebook/old_tests/data/test_tf.ipynb similarity index 100% rename from images/scipy-ml-notebook/test/data/test_tf.ipynb rename to images/scipy-ml-notebook/old_tests/data/test_tf.ipynb diff --git a/images/scipy-ml-notebook/test/test_tf.py b/images/scipy-ml-notebook/old_tests/test_tf.py similarity index 100% rename from images/scipy-ml-notebook/test/test_tf.py rename to images/scipy-ml-notebook/old_tests/test_tf.py diff --git a/images/scipy-ml-notebook/workflow_tests/test_pytorch.py b/images/scipy-ml-notebook/workflow_tests/test_pytorch.py index 3ba0e15f..962fd807 100644 --- a/images/scipy-ml-notebook/workflow_tests/test_pytorch.py +++ b/images/scipy-ml-notebook/workflow_tests/test_pytorch.py @@ -106,7 +106,7 @@ def length_of_dataset_no_cuda(): # Download and load the training data train_data = datasets.MNIST( - root='./data', train=True, download=True, transform=transform) + root='/tmp', train=True, download=True, transform=transform) # Check the size of the training set ld = len(train_data) @@ -131,9 +131,9 @@ def mean_pixel_value_cuda(): transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) train_set = datasets.MNIST( - root='./data', train=True, download=True, transform=transform) + root='/tmp', train=True, download=True, transform=transform) test_set = datasets.MNIST( - root='./data', train=False, download=True, transform=transform) + root='/tmp', train=False, download=True, transform=transform) # Move dataset to device train_loader = torch.utils.data.DataLoader( @@ -171,7 +171,7 @@ def multiply_dataset_calculate_mean_cuda(): transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) train_dataset = datasets.MNIST( - './data', train=True, download=True, transform=transform) + '/tmp', train=True, download=True, transform=transform) # Create a DataLoader for the dataset train_loader = torch.utils.data.DataLoader(