forked from eyra/fertility-prediction-challenge
-
Notifications
You must be signed in to change notification settings - Fork 1
/
run.py
68 lines (49 loc) · 2.01 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
This script calls submission.py. Add your method to submission.py to run your
prediction method.
To test your submission use the following command:
python run.py
For example:
python run.py PreFer_fake_data.csv PreFer_fake_background_data.csv
Optionally, you can use the score function to calculate evaluation scores given
your predictions and the ground truth within the training dataset.
"""
import sys
import argparse
import pandas as pd
import submission
parser = argparse.ArgumentParser(description="Process data.")
parser.add_argument("data_path", help="Path to data CSV file.")
parser.add_argument("background_data_path", help="Path to background data CSV file.")
parser.add_argument("--output", help="Path to prediction output CSV file.")
args = parser.parse_args()
def predict(data_path, background_data_path, output):
"""Predict Score (evaluate) the predictions and write the metrics.
This function takes the path to an data CSV file containing the data data.
It calls submission.py clean_df and predict_outcomes writes the predictions
to a new output CSV file.
This function should not be modified.
"""
if output is None:
output = sys.stdout
data_df = pd.read_csv(
data_path, encoding="latin-1", encoding_errors="replace", low_memory=False
)
background_data_df = pd.read_csv(
background_data_path,
encoding="latin-1",
encoding_errors="replace",
low_memory=False,
)
predictions = submission.predict_outcomes(data_df, background_data_df)
assert (
predictions.shape[1] == 2
), "Predictions must have two columns: nomem_encr and prediction"
# Check for the columns, order does not matter
assert set(predictions.columns) == set(
["nomem_encr", "prediction"]
), "Predictions must have two columns: nomem_encr and prediction"
predictions.to_csv(output, index=False)
if __name__ == "__main__":
args = parser.parse_args()
predict(args.data_path, args.background_data_path, args.output)