-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpredict.py
90 lines (74 loc) · 3.12 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm import tqdm
import time
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer, PowerTransformer
from src.logger import setup_logger
from src.preprocessing import prepare_lstm_data
from src.model_dispatcher import MODEL_DISPATCHER
from src.indicators import enrich_data
LOGGER = setup_logger() # set up the logger.
scaler = StandardScaler()
# training or testing config
config = {'model_name': 'BiLSTM',
'data_file': 'train_data',
'mode': 'train_test',
'optimizer': tf.keras.optimizers.SGD(learning_rate=0.01),
'base_name': 'sgd_lr_0.001',
'time_window': 5,
'epochs': 1
}
LOGGER.info("Initializing the model.")
model_init = MODEL_DISPATCHER[config['model_name']]
model = model_init(input_shape=(
config['time_window'], 326), output_shape=(1))
model = model.create_model()
LOGGER.info("Loading the pretrained model.")
model.load_weights('data/biLstm_5_folds_std_scaler.h5')
LOGGER.info('Loading the data from the database.')
data = np.load(f'data/train_data_v2.npy', allow_pickle=True)
data = data.item()
def make_prediction(user_input):
"""Make prediction for the company name provided.
Args:
user_input (str): Name of the company.
Returns:
[float]: Predicted score out of 100.
[float]: Predicted probability of going to default.
"""
for idx in data.keys():
company_data = data[idx]
company = company_data[4]
company = company.lower()
if company.find(user_input.lower()) >= 0:
LOGGER.info(f"Found {user_input} in the database.")
d = data[idx]
dataframe = d[0]
dataframe = dataframe.drop('Date', axis=1)
# to use in creating float from strings.
columns = dataframe.columns
def convert_string_to_float(x):
for col in columns:
x[col] = float(str(x[col]).replace(",", "").replace(
' - ', str(0)).replace("%", ""))
return x
dataframe = dataframe.apply(convert_string_to_float, axis=1)
dataframe = dataframe.astype(np.float32)
dataframe = dataframe.drop('index', axis=1)
dataframe = enrich_data(dataframe)
if dataframe.shape[0] != 0:
normalized_data = scaler.fit_transform(dataframe.values)
lstm_data = prepare_lstm_data(normalized_data, config['time_window'])
if len(lstm_data.shape) == 3:
LOGGER.info("Giving score to the company.")
preds = model.predict(lstm_data, batch_size=32)
preds = float(preds[-1][0])
score = (1- preds) * 100
# print("Scrore Predicted: ", round(score, 4), "(Range 0-100)")
# print("Probability of going to default: ", round(preds * 100, 4), "%")
return round(score, 4), round(preds * 100, 4)
# if we do not find anything the return None
return None, None