Skip to content

Commit

Permalink
Merge pull request #4 from ttimbers/feat-read-data
Browse files Browse the repository at this point in the history
laying out plans for test and creating test objects
  • Loading branch information
ttimbers authored Nov 24, 2023
2 parents 350230e + 1411230 commit cb9ab13
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 3 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ RUN conda install -y \
ipykernel=6.26.0 \
scikit-learn=1.3.2 \
requests=2.31.0 \
notebook=6.5.4
notebook=7.0.6 \
pytest=7.4.3
2 changes: 0 additions & 2 deletions src/breast_cancer_predictor_report.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@
"with open(\"../data/raw/breast+cancer+wisconsin+original.zip\", 'wb') as f:\n",
" f.write(request.content)\n",
"\n",
"pd.arrays\n",
"\n",
"with zipfile.ZipFile(\"../data/raw/breast+cancer+wisconsin+original.zip\", 'r') as zip_ref:\n",
" zip_ref.extractall(\"../data/raw\")"
]
Expand Down
15 changes: 15 additions & 0 deletions src/read_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
def read_zip(url, directory):
"""
Read a zip file from the given URL and extract its contents to the specified directory.
Parameters:
----------
url : str
The URL of the zip file to be read.
directory : str
The directory where the contents of the zip file will be extracted.
Returns:
-------
None
"""
9 changes: 9 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
## How to run the test suite

### Preparation of test zip files
The test zip files used in `test_read_zip.py` were genereated
by running the `generate_test_zip_files.py` script in the `tests` directory.
These files need to exist in the remote GitHub repository for the tests to pass.
If for some reason they go missing from the remote repository,
we can re-run the `generate_test_zip_files.py` script to re-generate them
and then push them to the remote repository.
Binary file added tests/empty.zip
Binary file not shown.
Binary file added tests/files_txt_csv.zip
Binary file not shown.
Binary file added tests/files_txt_subdir.zip
Binary file not shown.
43 changes: 43 additions & 0 deletions tests/generate_test_zip_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import zipfile
import shutil

# Create a directory named 'subdir'
os.makedirs('subdir', exist_ok=True)

# Create 'test1.txt' and write "test data" to it
with open('test1.txt', 'w') as file:
file.write('test data')

# Create 'test2.csv' and write "test data" to it
with open('test2.csv', 'w') as file:
file.write('test,data')

# Create 'test3.txt' inside 'subdir' and write "test data" to it
with open('subdir/test3.txt', 'w') as file:
file.write('test data')

# Case 1 - Create a zip file containing 'test1.txt' and 'test2.csv'
with zipfile.ZipFile('files_txt_csv.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.write('test1.txt')
zipf.write('test2.csv')

# Case 2 - Create a zip file containing 'test1.txt', test2.csv and 'subdir/test2.txt'
with zipfile.ZipFile('files_txt_subdir.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.write('test1.txt')
zipf.write('test2.csv')
zipf.write('subdir/test3.txt')

# Case 3 - Create an empty zip file
with zipfile.ZipFile('empty.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
pass

# Clean up the files and directories created
test_files = ['test1.txt', 'test2.csv']

for file in test_files:
if os.path.exists(file):
os.remove(file)

if os.path.exists("subdir"):
shutil.rmtree("subdir")
42 changes: 42 additions & 0 deletions tests/test_read_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pytest
import os
import shutil

# Test files setup
# setup empty directory for data files to be downloaded to
if not os.path.exists('test_zip_data1'):
os.makedirs('test_zip_data1')

# setup directory that contains a file for data files to be downloaded to
if not os.path.exists('test_zip_data2'):
os.makedirs('test_zip_data2')
with open('test_zip_data2/test3.txt', 'w') as file:
pass # The 'pass' statement does nothing, creating an empty file

# test read_zip function can download and extract a zip file containing files
# and subdirectories containing files
def test_read_zip_function():
# add tests here


# test read_zip function throws an error if the zip file is empty
def test_read_zip_error_on_empty():
# add tests here


# test read_zip function throws an error if the input URL is invalid
# (e.g., points to a non-existent file or a non-zip file)
def test_read_zip_error_on_invalid_url():
# add tests here


# test read_zip function throws an error
# if the directory path provided does not exist
def test_read_zip_error_on_missing_dir():
# add tests here


# clean up data directory
if os.path.exists("subdir"):
shutil.rmtree("subdir")

0 comments on commit cb9ab13

Please sign in to comment.