From c90f289132b57d026499d5a22b8bef17f6250886 Mon Sep 17 00:00:00 2001 From: Tiffany Timbers Date: Mon, 27 Nov 2023 17:05:45 -0800 Subject: [PATCH] added two scripts --- Dockerfile | 3 +- README.md | 9 +- results/cancer_preprocessor.pickle | Bin 0 -> 453 bytes results/cancer_test.csv | 172 +++++++++++++ results/cancer_train.csv | 399 +++++++++++++++++++++++++++++ results/scaled_cancer_test.csv | 172 +++++++++++++ results/scaled_cancer_train.csv | 399 +++++++++++++++++++++++++++++ scripts/download_data.py | 24 ++ scripts/split_n_preprocess.py | 91 +++++++ 9 files changed, 1266 insertions(+), 3 deletions(-) create mode 100644 results/cancer_preprocessor.pickle create mode 100644 results/cancer_test.csv create mode 100644 results/cancer_train.csv create mode 100644 results/scaled_cancer_test.csv create mode 100644 results/scaled_cancer_train.csv create mode 100644 scripts/download_data.py create mode 100644 scripts/split_n_preprocess.py diff --git a/Dockerfile b/Dockerfile index ac16de2..959cfec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,4 +11,5 @@ RUN conda install -y \ requests=2.31.0 \ notebook=7.0.6 \ pytest=7.4.3 \ - responses=0.24.1 + responses=0.24.1 \ + click=8.0.3 diff --git a/README.md b/README.md index 86c210a..65365c0 100644 --- a/README.md +++ b/README.md @@ -79,8 +79,13 @@ Copy and paste that URL into your browser. 3. To run the analysis, -open `src/breast_cancer_predict_report.ipynb` in Jupyter Lab you just launched -and under the "Kernel" menu click "Restart Kernel and Run All Cells...". +enter the following commands in the terminal in the project root: + +``` +python scripts/download_data.py --url="https://archive.ics.uci.edu/static/public/15/breast+cancer+wisconsin+original.zip" --write-to="data/raw" + +python scripts/split_n_preprocess.py --raw_data=data/raw/wdbc.data --write_to=results +``` #### Clean up diff --git a/results/cancer_preprocessor.pickle b/results/cancer_preprocessor.pickle new file mode 100644 index 0000000000000000000000000000000000000000..d90d8690e01b6c6114b4aabb4d7cea0f5fe7e9d3 GIT binary patch literal 453 zcmYk3J5B>J5QaetgouX{L4zo1pjioW0167aG@(V7cRh)htRM2&gcL|nK_qt!m*E;* z2=8VSWg3tF`TUQ^e((JJ?(Q@ZFL71@cRDMKvIZezg{gwl%sZ}eVw?g;ad6*=k6YR| zU4PIUUB#m<66uNJ33{#tcLEDu)u%7kJ*)%kOaX|co@PvN&l5gw@$m}Z829F-mrMcI z^c4@94SJFCPGRgKt$o%Svlnn!8clKUCL3nA6mq!}&lI1*dK`ob3U3^3?7NnG4^Go4 z9t*#)z)D?IK>)>l9aLW5ye+`%sxS)qC-K;Gpm?c8O&nT|=%q7ZDk=8Sa)-c@04Pl* z=;7{f*hDgA&nCyz?sV(L*c{5K^tIegQ{?FjXc^Djludy9;DBka0GSD%ma;DgA