-
Notifications
You must be signed in to change notification settings - Fork 0
/
keras_preds.py
executable file
·138 lines (103 loc) · 3.61 KB
/
keras_preds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# Import libraries
import pandas as pd
from datetime import datetime
import pytz
import os
# Keras imports
import tensorflow as tf
# physical_devices = tf.config.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(physical_devices[0], True)
import numpy as np
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
# Loading model
from keras.models import load_model
# Preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
imputer = SimpleImputer()
MMS = MinMaxScaler()
# Path
dir_path = os.path.dirname(os.path.realpath(__file__))
model = load_model(dir_path + '/Model.h5')
# Today and yesterday date EST
tz = pytz.timezone('America/New_York')
today = datetime.now(tz)
year = today.year
def clean_games():
# Get future games
games = pd.read_csv(dir_path + '/auto_files/future_games.csv')
# Read stats and only keep necessary columns
stats = pd.read_csv(dir_path + '/MLB All Stats.csv')
# Find a year's columns
cols = stats.columns
str_cols = [str(col) for col in cols]
sum('2021' in s for s in str_cols)
remove_dupes = [i[5:] for i in str_cols]
one_year_cols = []
[one_year_cols.append(x) for x in remove_dupes if x not in one_year_cols]
one_year_cols.pop(0)
final_cols = [str(year) + ' ' + i for i in one_year_cols]
final_cols.insert(0, 'Tm')
stats = pd.read_csv(dir_path + '/MLB All Stats.csv', usecols=final_cols)
# Make home and away stat dfs
home_stats = stats.add_prefix('H ')
vis_stats = stats.add_prefix('V ')
# Rename Team column
home_stats.rename(columns={'H Tm': 'Home'}, inplace=True)
vis_stats.rename(columns={'V Tm': 'Visitor'}, inplace=True)
# Merge Stats and games
merged = games.merge(home_stats, on='Home')
merged = merged.merge(vis_stats, on='Visitor')
x = merged.loc[:,'H 2021 #Bat':]
return x, games
# Get previous data so we can fit_transform on it then transform on predictions x
# Split columns with hyphens
def fit_transform(x, y):
x = x.astype(str)
cols_to_delim = []
for col in x.columns:
result = x[col].str.contains(pat='\d-\d')
if result.any():
cols_to_delim.append(col)
for col in cols_to_delim:
x[[col + '1', col + '2']] = x[col].str.split('-', expand=True)
del x[col]
x = x.astype(float)
# Scale and Normalise
x = imputer.fit_transform(x, y)
x = MMS.fit_transform(x)
return x
# Transform predictive columns
def transform(x):
x = x.astype(str)
cols_to_delim = []
for col in x.columns:
result = x[col].str.contains(pat='\d-\d')
if result.any():
cols_to_delim.append(col)
for col in cols_to_delim:
x[[col + '1', col + '2']] = x[col].str.split('-', expand=True)
del x[col]
x = x.astype(float)
# Scale and Normalise
x = imputer.transform(x)
x = MMS.transform(x)
return x
def transform_all_data(x):
data_current = pd.read_csv(dir_path + '/Current Stats and Games.csv', parse_dates=['Date'])
y = data_current['Home Win']
x_current = data_current.loc[:,'H #Bat':]
x_current = fit_transform(x_current, y)
x_trans = transform(x)
return x_trans
def predict_and_save(x, games):
preds = model.predict(x)
games['Predictions'] = preds
NE_input = games[['Time', 'Date', 'Home', 'Visitor', 'Home Odds', 'Vis Odds', 'Predictions']]
NE_input.to_csv(dir_path + '/auto_files/ne_input.csv', index=False)
#### RUN FUNCTIONS
# x, games = clean_games()
# x = transform_all_data(x)
# predict_and_save(x, games)