Skip to content

Commit

Permalink
laying out plans for test and creating test objects
Browse files Browse the repository at this point in the history
  • Loading branch information
ttimbers committed Nov 24, 2023
1 parent 350230e commit c055907
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 4 deletions.
6 changes: 4 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ FROM quay.io/jupyter/minimal-notebook:notebook-7.0.6
# install necessary packages for analysis
RUN conda install -y \
python=3.11.6 \
altair=5.1.2 \
altair=altair \
pandas=2.1.2 \
ipykernel=6.26.0 \
scikit-learn=1.3.2 \
requests=2.31.0 \
notebook=6.5.4
notebook=7.0.6 \
pytest=7.4.3 \
openpyxl=3.1.2
2 changes: 0 additions & 2 deletions src/breast_cancer_predictor_report.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@
"with open(\"../data/raw/breast+cancer+wisconsin+original.zip\", 'wb') as f:\n",
" f.write(request.content)\n",
"\n",
"pd.arrays\n",
"\n",
"with zipfile.ZipFile(\"../data/raw/breast+cancer+wisconsin+original.zip\", 'r') as zip_ref:\n",
" zip_ref.extractall(\"../data/raw\")"
]
Expand Down
15 changes: 15 additions & 0 deletions src/read_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
def read_zip(url, directory):
"""
Read a zip file from the given URL and extract its contents to the specified directory.
Parameters:
----------
url : str
The URL of the zip file to be read.
directory : str
The directory where the contents of the zip file will be extracted.
Returns:
-------
None
"""
9 changes: 9 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
## How to run the test suite

### Preparation of test zip files
The test zip files used in `test_read_zip.py` were genereated
by running the `generate_test_zip_files.py` script in the `tests` directory.
These files need to exist in the remote GitHub repository for the tests to pass.
If for some reason they go missing from the remote repository,
we can re-run the `generate_test_zip_files.py` script to re-generate them
and then push them to the remote repository.
Binary file added tests/empty.zip
Binary file not shown.
Binary file added tests/files_txt_subdir.zip
Binary file not shown.
Binary file added tests/files_txt_xlsx.zip
Binary file not shown.
46 changes: 46 additions & 0 deletions tests/generate_test_zip_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pytest
import os
import openpyxl
import zipfile
import shutil

# Create a directory named 'subdir'
os.makedirs('subdir', exist_ok=True)

# Create 'test1.txt' and write "test data" to it
with open('test1.txt', 'w') as file:
file.write('test data')

# Create 'test2.txt' inside 'subdir' and write "test data" to it
with open('subdir/test2.txt', 'w') as file:
file.write('test data')

# Create 'test1.xlsx' and write "test data" to it
workbook = openpyxl.Workbook()
worksheet = workbook.active
worksheet.cell(row=1, column=1, value='test data')
workbook.save('test1.xlsx')

# Case 1 - Create a zip file containing 'test1.txt' and 'test1.xlsx'
with zipfile.ZipFile('files_txt_xlsx.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.write('test1.txt')
zipf.write('test1.xlsx')

# Case 2 - Create a zip file containing 'test1.txt' and 'subdir/test2.txt'
with zipfile.ZipFile('files_txt_subdir.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.write('test1.txt')
zipf.write('subdir/test2.txt')

# Case 3 - Create an empty zip file
with zipfile.ZipFile('empty.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
pass

# Clean up the files and directories created
test_files = ['test1.txt', 'test1.xlsx']

for file in test_files:
if os.path.exists(file):
os.remove(file)

if os.path.exists("subdir"):
shutil.rmtree("subdir")
42 changes: 42 additions & 0 deletions tests/test_read_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pytest
import os
import shutil

# Test files setup
# setup empty directory for data files to be downloaded to
if not os.path.exists('test_zip_data1'):
os.makedirs('test_zip_data1')

# setup directory that contains a file for data files to be downloaded to
if not os.path.exists('test_zip_data2'):
os.makedirs('test_zip_data2')
with open('test_zip_data2/test3.txt', 'w') as file:
pass # The 'pass' statement does nothing, creating an empty file

# test read_zip function can download and extract a zip file containing files
# and subdirectories containing files
def test_read_zip_function():
# add tests here


# test read_zip function throws an error if the zip file is empty
def test_read_zip_error_on_empty():
# add tests here


# test read_zip function throws an error if the input URL is invalid
# (e.g., points to a non-existent file or a non-zip file)
def test_read_zip_error_on_invalid_url():
# add tests here


# test read_zip function throws an error
# if the directory path provided does not exist
def test_read_zip_error_on_missing_dir():
# add tests here


# clean up data directory
if os.path.exists("subdir"):
shutil.rmtree("subdir")

0 comments on commit c055907

Please sign in to comment.