Skip to content

Commit

Permalink
third script done
Browse files Browse the repository at this point in the history
  • Loading branch information
ttimbers committed Nov 28, 2023
1 parent a5eb2a7 commit 4318b52
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 13 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ RUN conda install -y \
notebook=7.0.6 \
pytest=7.4.3 \
responses=0.24.1 \
click=8.0.3
click=8.0.3 \
vl-convert-python=1.1.0
20 changes: 16 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,25 @@ enter the following commands in the terminal in the project root:

```
# download and extract data
python scripts/download_data.py --url="https://archive.ics.uci.edu/static/public/15/breast+cancer+wisconsin+original.zip" --write-to="data/raw"
python scripts/download_data.py --url="https://archive.ics.uci.edu/static/public/15/breast+cancer+wisconsin+original.zip" \
--write-to="data/raw"
# split data into train and test sets, preprocess data for eda
# and save preprocessor
python scripts/split_n_preprocess.py --raw-data=data/raw/wdbc.data --data-to=data/processed --preprocessor-to=results/models --seed=522
python scripts/split_n_preprocess.py --raw-data=data/raw/wdbc.data \
--data-to=data/processed \
--preprocessor-to=results/models \
--seed=522
# perform eda and save plots
# train model, create visualize tuning, and save plot and model
python scripts/fit_breast_cancer_classifier.py --training-data=data/processed/cancer_train.csv \
--preprocessor=results/models/cancer_preprocessor.pickle \
--columns-to-drop=data/processed/columns_to_drop.csv \
--pipeline-to=results/models \
--plot-to=results/figures \
--seed=523
```

#### Clean up
Expand Down
Binary file added results/figures/cancer_choose_k.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/models/cancer_pipeline.pickle
Binary file not shown.
17 changes: 9 additions & 8 deletions scripts/fit_breast_cancer_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,22 @@

import click
import os
import altair as alt
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn import set_config
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_transformer, make_column_selector

from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import fbeta_score, make_scorer

@click.command()
@click.option('--training-data', type=str, help="Path to training data")
@click.option('--preprocessor', type=str, help="Path to preprocessor object")
@click.option('--columns-to-drop', type=str, help="Optional: columns to drop")
@click.option('--pipeline-to', type=str, help="Path to directory where the pipeline object will be written to")
@click.option('--plot-to', type=int, help="Path to directory where the plot will be written to")
@click.option('--plot-to', type=str, help="Path to directory where the plot will be written to")
@click.option('--seed', type=int, help="Random seed", default=123)

def main(training_data, preprocessor, columns_to_drop, pipeline_to, plot_to, seed):
Expand All @@ -30,10 +30,11 @@ def main(training_data, preprocessor, columns_to_drop, pipeline_to, plot_to, see

# read in data & preprocessor
cancer_train = pd.read_csv(training_data)
pickle.load(open(preprocessor, "rb"))
cancer_preprocessor = pickle.load(open(preprocessor, "rb"))

if columns_to_drop:
to_drop = pd.read_csv(columns_to_drop).['feats_to_drop'].tolist()
to_drop = pd.read_csv(columns_to_drop).feats_to_drop.tolist()
print(to_drop)
cancer_train = cancer_train.drop(columns=to_drop)

# tune model (here, find K for k-nn using 30 fold cv)
Expand Down

0 comments on commit 4318b52

Please sign in to comment.