diff --git a/engines/.gitignore b/engines/.gitignore new file mode 100644 index 0000000..7f9f422 --- /dev/null +++ b/engines/.gitignore @@ -0,0 +1,114 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ \ No newline at end of file diff --git a/engines/__pycache__/__init__.cpython-36.pyc b/engines/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 2f4bf95..0000000 Binary files a/engines/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/engines/__pycache__/auto_arima.cpython-36.pyc b/engines/__pycache__/auto_arima.cpython-36.pyc deleted file mode 100644 index 03fccb8..0000000 Binary files a/engines/__pycache__/auto_arima.cpython-36.pyc and /dev/null differ diff --git a/engines/__pycache__/functions_timeseries.cpython-36.pyc b/engines/__pycache__/functions_timeseries.cpython-36.pyc deleted file mode 100644 index 333fae8..0000000 Binary files a/engines/__pycache__/functions_timeseries.cpython-36.pyc and /dev/null differ diff --git a/engines/__pycache__/helpers.cpython-36.pyc b/engines/__pycache__/helpers.cpython-36.pyc deleted file mode 100644 index e8f60ee..0000000 Binary files a/engines/__pycache__/helpers.cpython-36.pyc and /dev/null differ diff --git a/engines/__pycache__/holtwinter.cpython-36.pyc b/engines/__pycache__/holtwinter.cpython-36.pyc deleted file mode 100644 index 768e712..0000000 Binary files a/engines/__pycache__/holtwinter.cpython-36.pyc and /dev/null differ diff --git a/engines/__pycache__/lstm.cpython-36.pyc b/engines/__pycache__/lstm.cpython-36.pyc deleted file mode 100644 index cff51d8..0000000 Binary files a/engines/__pycache__/lstm.cpython-36.pyc and /dev/null differ diff --git a/engines/__pycache__/var.cpython-36.pyc b/engines/__pycache__/var.cpython-36.pyc deleted file mode 100644 index 144ef6c..0000000 Binary files a/engines/__pycache__/var.cpython-36.pyc and /dev/null differ diff --git a/engines/lstm.py b/engines/lstm.py index d10d45c..514da45 100644 --- a/engines/lstm.py +++ b/engines/lstm.py @@ -10,13 +10,87 @@ #import helpers as h from keras.layers import Dropout from keras.layers.normalization import BatchNormalization +import numpy as np +import pandas as pd +from sklearn.preprocessing import MinMaxScaler +from sklearn.model_selection import GridSearchCV +from sklearn.metrics import mean_squared_error,mean_absolute_error +from keras.models import Sequential +from keras.layers.recurrent import LSTM +from keras.layers.core import Dense +import math +from matplotlib import pyplot +from numpy.random import seed +seed(69) +from math import sqrt +from numpy import concatenate +import matplotlib.pyplot as plt +from pandas import read_csv +from pandas import DataFrame +from pandas import concat +from sklearn.preprocessing import MinMaxScaler +from sklearn.preprocessing import LabelEncoder +from sklearn.metrics import mean_squared_error +from keras.models import Sequential +from keras.layers import Dense +from keras.layers import LSTM +from keras.layers import Activation, Dropout +from keras.layers.normalization import BatchNormalization + + +def add_hlayer(model, num_nodes, return_sequences=False): + model.add(LSTM(num_nodes, return_sequences=return_sequences)) + +def define_model(n_nodes, n_hlayers, dropout, input_data, output_shape): + model = Sequential() + if n_hlayers == 1: + model.add(LSTM(output_dim =int(n_nodes), activation='relu', input_shape =(input_data.shape[1], input_data.shape[2]), + return_sequences=False)) + else: + model.add(LSTM(output_dim =int(n_nodes), activation='relu', input_shape =(input_data.shape[1], input_data.shape[2]), + return_sequences=True)) + model.add(Dropout(dropout)) + #print(n_hlayers) + + for i in range(n_hlayers-1): + #print(i) + if i == n_hlayers-2: + add_hlayer(model, n_nodes, return_sequences=False) + model.add(Dropout(dropout)) + model.add(BatchNormalization()) + else: + add_hlayer(model, n_nodes, return_sequences=True) + model.add(Dropout(dropout)) + model.add(BatchNormalization()) -def anomaly_uni_LSTM(lista_datos,num_fut,desv_mae=2): + model.add(Dense(int(n_nodes/2), activation='relu')) + model.add(Dropout(dropout)) + + model.add(Dense(output_dim=int(output_shape))) + + model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) + return model + + +def hyperparameter_opt(list_hlayers, list_n_nodes, n_dropout, input_data, output_shape): + models_dict = {} + for hlayer in list_hlayers: + for nodes in list_n_nodes: + for drop in n_dropout: + model = define_model(nodes, hlayer, drop, input_data, output_shape) + name = 'model_nlayers_{}_nnodes_{}_dropout_{}'.format(hlayer, nodes, drop) + models_dict[name] = model + print(name) + + return models_dict + +def anomaly_uni_LSTM(lista_datos,num_forecast=10,desv_mse=2): + temp= pd.DataFrame(lista_datos,columns=['values']) scaler_x = MinMaxScaler(feature_range =(-1, 1)) @@ -27,17 +101,18 @@ def anomaly_uni_LSTM(lista_datos,num_fut,desv_mae=2): x = scaler_x.fit_transform(temp) x = x[:,0] + print ('x',x) TRAIN_SIZE = 0.7 train_size = int(len(x) * TRAIN_SIZE) test_size = len(x) - train_size x_train, x_test = x[0:train_size], x[train_size:len(x)] - #print 'x_train',x_train - #print 'x_test',x_test + print ('x_train',x_train) + print ('x_test',x_test) window_size = 1 - num_forecast = num_fut + num_fore = num_forecast + 1 win_train_x, win_train_y = [], [] @@ -50,64 +125,119 @@ def anomaly_uni_LSTM(lista_datos,num_fut,desv_mae=2): win_train_x = np.array(win_train_x) - #print 'win_train_x',win_train_x + print ('win_train_x',win_train_x) + print ('shape win_train_x',win_train_x.shape) win_train_y = np.array(win_train_y) - #print 'win_train_y',win_train_y + print ('win_train_y',win_train_y) + print ('shape win_train_y',win_train_y.shape) win_train_x = win_train_x.reshape((win_train_x.shape[0], 1, win_train_x.shape[1])) - #print(win_train_x.shape) + print('reshape win_train_x',win_train_x.shape) new_test_x = x_test.reshape((x_test.shape[0], 1, 1)) - #print 'new_test_x',new_test_x + print ('new_test_x',new_test_x) + + ##################neural network###################### + + models_dict = {} + n_hlayers = [1, 2] + n_nodes = [100, 300, 500] + n_dropout = [0, 0.1, 0.15, 0.20] + + #pruebas + #n_hlayers = [1] + #n_nodes = [500] + #n_dropout = [0.15] + + models_dict = hyperparameter_opt(n_hlayers, n_nodes, n_dropout, win_train_x, num_forecast) + + for model in models_dict: + print(model) + print(models_dict[model].summary()) + + print ('Numero de modelos',len(models_dict)) + + #####getting best model + dict_eval_models = {} + for model in models_dict: +# print 'fit model {}'.format(model) + try: + seed(69) + name_model = models_dict[model].fit(win_train_x, win_train_y, epochs=25, verbose=0, shuffle=False) + dict_eval_models[model] = name_model + except: + dict_eval_models[model] = 'Error' + + dict_mse_models = {} + for model in models_dict: + print(model) + yhat = models_dict[model].predict(new_test_x) + yhat_test = yhat[:,0] + + temp_res= pd.DataFrame(yhat_test,columns=['values']) + temp_res = np.array(temp_res) + y_yhat_inv = scaler_x.inverse_transform(temp_res) + y_yhat_inv= y_yhat_inv[:,0] + + temp_x_test= pd.DataFrame(x_test,columns=['values']) + temp_x_test = np.array(temp_x_test) + x_test_inv = scaler_x.inverse_transform(temp_x_test) + + #pyplot.plot(x_test_inv, label='real') + #pyplot.plot(y_yhat_inv, label='pred') + #pyplot.legend() + #pyplot.show() + + mse = (mean_squared_error(x_test_inv, y_yhat_inv)) + rmse = np.sqrt(mse) + print ('mse', mse) + print ('rmse', rmse) + dict_mse_models[model] = rmse + + best_model = min(dict_mse_models, key = dict_mse_models.get) - model = Sequential () - model.add(LSTM(output_dim = 100, activation='relu', input_shape =(win_train_x.shape[1], win_train_x.shape[2]))) - model.add(Dropout(0.2)) - model.add(Dense(output_dim=num_forecast)) - model.compile(loss='mse', optimizer='adam') - history = model.fit(win_train_x, win_train_y, epochs=200, verbose=1, shuffle=False) + print('best_model',best_model) - yhat = model.predict(new_test_x) - #print 'yhat',yhat + yhat = models_dict[best_model].predict(new_test_x) + print ('yhat',yhat) yhat_test = yhat[:,0] + print ('yhat_test',yhat_test) - temp_tes= pd.DataFrame(yhat_test,columns=['values']) - temp_tes = np.array(temp_tes) - y_hat_inv = scaler_x.inverse_transform(temp_tes) + temp_res= pd.DataFrame(yhat_test,columns=['values']) + temp_res = np.array(temp_res) + + y_yhat_inv = scaler_x.inverse_transform(temp_res) + print ('y_yhat_inv',y_yhat_inv) + + y_yhat_inv= y_yhat_inv[:,0] temp_x_test= pd.DataFrame(x_test,columns=['values']) temp_x_test = np.array(temp_x_test) x_test_inv = scaler_x.inverse_transform(temp_x_test) - - print ("paso 1") - + x_test_inv= x_test_inv[:,0] + print ('x_test_inv',x_test_inv) - #pyplot.plot(y_hat_inv, label='pred') #pyplot.plot(x_test_inv, label='real') + #pyplot.plot(y_yhat_inv, label='pred') #pyplot.legend() #pyplot.show() - #print x_test_inv[:,0] - #print y_hat_inv[:,0] - - mse = (mean_squared_error(x_test_inv[:,0], y_hat_inv[:,0])) + mse = (mean_squared_error(x_test_inv, y_yhat_inv)) rmse = np.sqrt(mse) - df_aler = pd.DataFrame() - #print ('mse' + mse) - #print ('rmse'+ rmse) - print ("paso 2") + print ('mse', mse) + print ('rmse', rmse) + df_aler = pd.DataFrame() lista_puntos = np.arange(train_size, train_size + test_size,1) - testing_data = pd.DataFrame(y_hat_inv[:,0],index =lista_puntos,columns=['expected value']) - #print testing_data + testing_data = pd.DataFrame(y_yhat_inv,index =lista_puntos,columns=['expected value']) - #print 'x_test_inv',x_test_inv - #print 'y_hat_inv',y_hat_inv + #print ('x_test_inv',x_test_inv) + #print ('y_yhat_inv',y_yhat_inv) - df_aler['real_value'] = x_test_inv[:,0] - df_aler['expected_value'] = y_hat_inv[:,0] + df_aler['real_value'] = x_test_inv + df_aler['expected_value'] = y_yhat_inv df_aler['mse'] = mse df_aler['puntos'] = df_aler.index @@ -116,18 +246,21 @@ def anomaly_uni_LSTM(lista_datos,num_fut,desv_mae=2): df_aler['rmse'] = rmse - mae = mean_absolute_error(y_hat_inv[:,0], x_test_inv[:,0]) + mae = mean_absolute_error(y_yhat_inv, x_test_inv) df_aler['mae'] = mae df_aler['anomaly_score'] = abs(df_aler['expected_value']-df_aler['real_value'])/df_aler['mae'] + #print (df_aler) + df_aler_ult = df_aler[:5] - df_aler = df_aler[(df_aler['anomaly_score']> desv_mae)] + df_aler = df_aler[(df_aler['anomaly_score']> desv_mse)] max_anom = df_aler['anomaly_score'].max() min_anom = df_aler['anomaly_score'].min() df_aler['anomaly_score'] = ( df_aler['anomaly_score'] - min_anom ) /(max_anom - min_anom) + print ('Anomaly') df_aler_ult = df_aler[:5] df_aler_ult = df_aler_ult[(df_aler_ult.index==df_aler.index.max())|(df_aler_ult.index==((df_aler.index.max())-1)) @@ -142,10 +275,45 @@ def anomaly_uni_LSTM(lista_datos,num_fut,desv_mae=2): df_aler_ult['anomaly_score']= ( df_aler_ult['anomaly_score'] - min_ult ) /(max_ult - min_ult) + #print (df_aler_ult) + + ################## forecast - temp_res= pd.DataFrame(yhat[-1],columns=['values']) - print (temp_res) + win_todo_x, win_todo_y = [], [] + for i in range(len(x) - window_size - 1): + if len(x)<(i+num_fore): + break + a = x[i:(i + window_size)] + win_todo_x.append(a) + win_todo_y.append(x[i + window_size: i+num_fore]) + + win_todo_x = np.array(win_todo_x) + #print ('win_todo_x',win_todo_x) + #print ('shape win_todo_x',win_todo_x.shape) + + win_todo_y = np.array(win_todo_y) + #print ('win_todo_y',win_todo_y) + #print ('shape win_todo_y',win_todo_y.shape) + + win_todo_x = win_todo_x.reshape((win_todo_x.shape[0], 1, win_todo_x.shape[1])) + #print('reshape win_todo_x',win_todo_x.shape) + + + name_model = models_dict[best_model].fit(win_todo_x, win_todo_y, epochs=25, verbose=0, shuffle=False) + + falta_win_todo_x = x[-num_forecast:] + #print ('falta_win_todo_x',falta_win_todo_x) + #print ('shape falta_win_todo_x',falta_win_todo_x.shape) + + falta_win_todo_x = falta_win_todo_x.reshape(falta_win_todo_x.shape[0],1,1) + #print ('x',x) + #print ('falta_win_todo_x',falta_win_todo_x) + yhat_todo = models_dict[best_model].predict(falta_win_todo_x) + #print ('yhat_todo',yhat_todo) + #print ('yhat_todo',yhat_todo[-1,:]) + + temp_res= pd.DataFrame(yhat_todo[-1],columns=['values']) temp_res = np.array(temp_res) y_fore_inv = scaler_x.inverse_transform(temp_res) @@ -156,8 +324,8 @@ def anomaly_uni_LSTM(lista_datos,num_fut,desv_mae=2): #pyplot.legend() #pyplot.show() - engine_output={} + engine_output={} engine_output['rmse'] = int(rmse) engine_output['mse'] = int(mse) @@ -171,15 +339,15 @@ def anomaly_uni_LSTM(lista_datos,num_fut,desv_mae=2): df_future['value']=df_future.value.astype("float64") df_future['step']= np.arange(len(x),len(x)+len(y_fore_inv),1) - - #print 'df_future',df_future engine_output['future'] = df_future.fillna(0).to_dict(orient='record') testing_data['step']=testing_data.index - engine_output['debug'] = testing_data.to_dict(orient='record') + engine_output['debug'] = testing_data.fillna(0).to_dict(orient='record') + + return engine_output + - return (engine_output) def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): n_vars = 1 if type(data) is list else data.shape[1] @@ -209,110 +377,162 @@ def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): -def anomaly_LSTM(list_var,num_fut,desv_mae=2): +def anomaly_LSTM(list_var,num_fut=10,desv_mae=2): + df_var = pd.DataFrame() for i in range(len(list_var)): df_var['var_{}'.format(i)] = list_var[i] - print (df_var.head(3)) + #print df_var + + temp_var_ult = pd.DataFrame(df_var[df_var.columns[-1]]) + scaler_y = MinMaxScaler(feature_range =(-1, 1)) + y = scaler_y.fit_transform(temp_var_ult) + #print ('y', y) + + scaler_x = MinMaxScaler(feature_range =(-1, 1)) + #x = np.array(df_var) + #print x + x = scaler_x.fit_transform(df_var) + #x = x[:,0] + #print ('x',x) + + TRAIN_SIZE = 0.7 + train_size = int(len(x) * TRAIN_SIZE) + test_size = len(x) - train_size - normalized_df = (df_var-df_var.min())/(df_var.max()-df_var.min()) - #print normalized_df.head(3) + x_train, x_test = x[0:train_size], x[train_size:len(x)] + #print ('x_train',x_train) + #print ('x_test',x_test) + #print ('shape x_test',x_test.shape) + window_size = 1 + num_fore = num_forecast + 1 - values = normalized_df.values + win_train_x, win_train_y = [], [] + for i in range(len(x_train) - window_size - 1): + if len(x_train)<(i+num_fore): + break + a = x_train[i:(i + window_size)] + win_train_x.append(a) + win_train_y.append(x_train[i + window_size: i+num_fore]) - TRAIN_SIZE = 0.70 - train_size = int(len(values) * TRAIN_SIZE) - test_size = len(values) - train_size - train, test = values[0:train_size, :], values[train_size:len(values), :] + win_train_x = np.array(win_train_x) + print ('win_train_x',win_train_x) + print ('shape win_train_x',win_train_x.shape) + win_train_y = np.array(win_train_y) + print ('win_train_y',win_train_y) + print ('shape win_train_y',win_train_y.shape) + win_train_y_var_pred = win_train_y[:,:,-1] + print ('win_train_y_var_pred',win_train_y_var_pred) + print ('shape win_train_y_var_pred',win_train_y_var_pred.shape) - train_X, train_y = train[:, :-1], train[:, -1] - test_X, test_y = test[:, :-1], test[:, -1] + new_test_x = x_test.reshape((x_test.shape[0], 1, x_test.shape[1])) + print ('new_test_x',new_test_x) + print ('shape new_test_x',new_test_x.shape) - train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1])) - test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1])) - print(train_X.shape, train_y.shape, test_X.shape, test_y.shape) - model = Sequential() + ##################neural network###################### - model.add(LSTM(30, input_shape=(train_X.shape[1], train_X.shape[2]),return_sequences=True)) - model.add(Dropout(0.2)) - model.add(BatchNormalization()) + models_dict = {} + n_hlayers = [1, 2, 3] + n_nodes = [100, 300, 500,700] + n_dropout = [0, 0.1, 0.15, 0.20] - model.add(LSTM(30,return_sequences=True)) - model.add(Dropout(0.2)) - model.add(BatchNormalization()) + #pruebas + #n_hlayers = [1] + #n_nodes = [500] + #n_dropout = [0] - model.add(LSTM(30)) - model.add(Dropout(0.2)) - model.add(BatchNormalization()) + models_dict = hyperparameter_opt(n_hlayers, n_nodes, n_dropout, win_train_x, num_forecast) - model.add(Dense(32,activation='relu')) - model.add(Dropout(0.2)) + for model in models_dict: + print(model) + print(models_dict[model].summary()) - model.add(Dense(1,activation='sigmoid')) - model.compile(loss='mae', optimizer='adam') - - # fit network - history = model.fit(train_X, train_y, epochs=50, batch_size=72, validation_data=(test_X, test_y), shuffle=False) + print ('Numero de modelos',len(models_dict)) - yhat = model.predict(test_X) + #####getting best model + dict_eval_models = {} + for model in models_dict: + #print 'fit model {}'.format(model) + try: + seed(69) + name_model = models_dict[model].fit(win_train_x, win_train_y_var_pred, epochs=25, verbose=0, shuffle=False) + dict_eval_models[model] = name_model + except: + dict_eval_models[model] = 'Error' - ###################################Desnormalizacion############################################# - y_hat_df = pd.DataFrame() - y_hat_df['yhat'] = yhat[:,0] + dict_mse_models = {} + for model in models_dict: + print(model) + yhat = models_dict[model].predict(new_test_x) + yhat_test = yhat[:,0] - test_y_df = pd.DataFrame() - test_y_df['yhat'] = test_y + temp_res= pd.DataFrame(yhat_test,columns=['values']) + temp_res = np.array(temp_res) + y_yhat_inv = scaler_y.inverse_transform(temp_res) + y_yhat_inv= y_yhat_inv[:,0] - #nos quedamos con la columna inicial la cual predecimos para desnormalizar - ult = df_var[[df_var.columns[-1]]] - ult['yhat'] = ult[df_var.columns[-1]] - ult.drop(columns=[df_var.columns[-1]],inplace=True) + x_test_var_pred = x_test[:,-1] + #print ('x_test_var_pred', x_test_var_pred) + temp_x_test= pd.DataFrame(x_test_var_pred,columns=['values']) + temp_x_test = np.array(temp_x_test) + x_test_inv = scaler_y.inverse_transform(temp_x_test) + x_test_inv= x_test_inv[:,0] - #pyplot.plot(normalized_df[normalized_df.columns[-1]], label='real') - #pyplot.plot(y_hat_df, label='pred') - #pyplot.legend() - #pyplot.show() + #pyplot.plot(x_test_inv, label='real') + #pyplot.plot(y_yhat_inv, label='pred') + #pyplot.legend() + #pyplot.show() - op1= (ult.max()-ult.min()) + mse = (mean_squared_error(x_test_inv, y_yhat_inv)) + rmse = np.sqrt(mse) + print ('mse', mse) + print ('rmse', rmse) + dict_mse_models[model] = rmse - desnormalize_y_hat_df = (y_hat_df * op1)+ult.min() + best_model = min(dict_mse_models, key = dict_mse_models.get) - desnormalize_test_y_df = (test_y_df * op1)+ult.min() - #pyplot.plot(desnormalize_test_y_df, label='real') - #pyplot.plot(desnormalize_y_hat_df, label='pred') - #pyplot.legend() - #pyplot.show() + print('best_model',best_model) + yhat = models_dict[best_model].predict(new_test_x) + yhat_test = yhat[:,0] + temp_res= pd.DataFrame(yhat_test,columns=['values']) + temp_res = np.array(temp_res) + y_yhat_inv = scaler_y.inverse_transform(temp_res) + y_yhat_inv= y_yhat_inv[:,0] - test_y_list = desnormalize_test_y_df['yhat'].tolist() - yhat_list = desnormalize_y_hat_df['yhat'].tolist() + x_test_var_pred = x_test[:,-1] + #print ('x_test_var_pred', x_test_var_pred) + temp_x_test= pd.DataFrame(x_test_var_pred,columns=['values']) + temp_x_test = np.array(temp_x_test) + x_test_inv = scaler_y.inverse_transform(temp_x_test) + x_test_inv= x_test_inv[:,0] - ################################### Fin Desnormalizacion############################################# + #pyplot.plot(x_test_inv, label='real') + #pyplot.plot(y_yhat_inv, label='pred') + #pyplot.legend() + #pyplot.show() - mse = (mean_squared_error(test_y_list, yhat_list)) + mse = (mean_squared_error(x_test_inv, y_yhat_inv)) rmse = np.sqrt(mse) - df_aler = pd.DataFrame() - #print 'mse', mse - #print 'rmse', rmse + print ('mse', mse) + print ('rmse', rmse) - print ('yhat_list',len(yhat_list)) - print ('test_y_list',len(test_y_list)) - print ('values',len(values)) - print ('train_size',train_size) - print ('test_size',test_size) - lista_puntos = np.arange(train_size, train_size + test_size,1) - testing_data = pd.DataFrame(yhat_list,index =lista_puntos,columns=['expected value']) + df_aler = pd.DataFrame() + lista_puntos = np.arange(train_size, train_size + test_size,1) + testing_data = pd.DataFrame(y_yhat_inv,index =lista_puntos,columns=['expected value']) + #print ('x_test_inv',x_test_inv) + #print ('y_yhat_inv',y_yhat_inv) - df_aler['real_value'] = test_y_list - df_aler['expected_value'] = yhat_list + df_aler['real_value'] = x_test_inv + df_aler['expected_value'] = y_yhat_inv df_aler['mse'] = mse df_aler['puntos'] = df_aler.index @@ -321,22 +541,21 @@ def anomaly_LSTM(list_var,num_fut,desv_mae=2): df_aler['rmse'] = rmse - mae = mean_absolute_error(yhat_list, test_y_list) - df_aler['mae'] = mean_absolute_error(yhat_list, test_y_list) + mae = mean_absolute_error(y_yhat_inv, x_test_inv) + df_aler['mae'] = mae df_aler['anomaly_score'] = abs(df_aler['expected_value']-df_aler['real_value'])/df_aler['mae'] print (df_aler) + df_aler_ult = df_aler[:5] - df_aler = df_aler[(df_aler['anomaly_score']> desv_mae)] + df_aler = df_aler[(df_aler['anomaly_score']> desv_mse)] max_anom = df_aler['anomaly_score'].max() min_anom = df_aler['anomaly_score'].min() - df_aler['anomaly_score']= ( df_aler['anomaly_score'] - min_anom ) /(max_anom - min_anom) - - print ('Anomaly') + df_aler['anomaly_score'] = ( df_aler['anomaly_score'] - min_anom ) /(max_anom - min_anom) - print (df_aler) + #print ('Anomaly') df_aler_ult = df_aler[:5] df_aler_ult = df_aler_ult[(df_aler_ult.index==df_aler.index.max())|(df_aler_ult.index==((df_aler.index.max())-1)) @@ -350,84 +569,74 @@ def anomaly_LSTM(list_var,num_fut,desv_mae=2): min_ult = df_aler_ult['anomaly_score'].min() df_aler_ult['anomaly_score']= ( df_aler_ult['anomaly_score'] - min_ult ) /(max_ult - min_ult) - df_aler_ult = df_aler_ult.fillna(0) - - ################################### Forecast ############################################# - - - - test1_X, test1_y = values[:, :-1], values[:, -1] - test1_X = test1_X.reshape((test1_X.shape[0], 1, test1_X.shape[1])) - - model = Sequential() - - model.add(LSTM(30, input_shape=(test1_X.shape[1], test1_X.shape[2]),return_sequences=True)) - model.add(Dropout(0.2)) - model.add(BatchNormalization()) - model.add(LSTM(30,return_sequences=True)) - model.add(Dropout(0.2)) - model.add(BatchNormalization()) + #print (df_aler_ult) - model.add(LSTM(30)) - model.add(Dropout(0.2)) - model.add(BatchNormalization()) - - model.add(Dense(32,activation='relu')) - model.add(Dropout(0.2)) - - model.add(Dense(1,activation='sigmoid')) - model.compile(loss='mae', optimizer='adam') - - # fit network - history = model.fit(test1_X, test1_y, epochs=50, batch_size=72, verbose=0, shuffle=False) - - num_fut=num_fut - len_fore = len(test1_X) - num_fut - fore = test1_X[len_fore:] - yhat_fore = model.predict(fore) + ###forecast + win_todo_x, win_todo_y = [], [] + for i in range(len(x) - window_size - 1): + if len(x)<(i+num_fore): + break + a = x[i:(i + window_size)] + win_todo_x.append(a) + win_todo_y.append(x[i + window_size: i+num_fore]) + win_todo_x = np.array(win_todo_x) + #print ('win_todo_x',win_todo_x) + #print ('shape win_todo_x',win_todo_x.shape) + win_todo_y = np.array(win_todo_y) + #print ('win_todo_y',win_todo_y) + #print ('shape win_todo_y',win_todo_y.shape) - ###################################Desnormalizacion############################################# - y_hat_df_fore = pd.DataFrame() - y_hat_df_fore['yhat'] = yhat_fore[:,0] + win_todo_y_var_pred = win_todo_y[:,:,-1] + #print ('win_todo_y_var_pred',win_todo_y_var_pred) + #print ('shape win_todo_y_var_pred',win_todo_y_var_pred.shape) + name_model = models_dict[best_model].fit(win_todo_x, win_todo_y_var_pred, epochs=25, verbose=0, shuffle=False) - op1= (ult.max()-ult.min()) + falta_win_todo_x = x[-num_forecast:,:] + #print ('falta_win_todo_x',falta_win_todo_x) + #print ('shape falta_win_todo_x',falta_win_todo_x.shape) - desnormalize_y_hat_fore = (y_hat_df_fore * op1)+ult.min() + falta_win_todo_x = falta_win_todo_x.reshape(falta_win_todo_x.shape[0],1,falta_win_todo_x.shape[1]) + #print ()'x',x) + #print ('falta_win_todo_x',falta_win_todo_x) + yhat_todo = models_dict[best_model].predict(falta_win_todo_x) + #print ('yhat_todo',yhat_todo) + #print ('yhat_todo',yhat_todo[-1,:]) + temp_res= pd.DataFrame(yhat_todo[-1],columns=['values']) + temp_res = np.array(temp_res) + y_fore_inv = scaler_y.inverse_transform(temp_res) + y_fore_inv= y_fore_inv[:,0] - #pyplot.plot(desnormalize_y_hat_fore, label='pred') + #pyplot.plot(y_fore_inv, label='pred') #pyplot.legend() #pyplot.show() - yhat_fore_list = desnormalize_y_hat_fore['yhat'].tolist() - - - - lista_result = np.arange(len(test1_X), (len(test1_X)+num_fut),1) - df_result_forecast = pd.DataFrame({'puntos':lista_result, 'valores':yhat_fore_list}) - df_result_forecast.set_index('puntos',inplace=True) - df_result_forecast['valores']=df_result_forecast['valores'].astype(str) - df_result_forecast['step'] = df_result_forecast.index - engine_output={} - engine_output['rmse'] = rmse - engine_output['mse'] = mse - engine_output['mae'] = mae + + engine_output['rmse'] = int(rmse) + engine_output['mse'] = int(mse) + engine_output['mae'] = int(mae) engine_output['present_status']=exists_anom_last_5 - engine_output['present_alerts']=df_aler_ult.to_dict(orient='record') - engine_output['past']=df_aler.to_dict(orient='record') + engine_output['present_alerts']=df_aler_ult.fillna(0).to_dict(orient='record') + engine_output['past']=df_aler.fillna(0).to_dict(orient='record') engine_output['engine']='LSTM' - engine_output['future']= df_result_forecast.to_dict(orient='record') + + df_future= pd.DataFrame(y_fore_inv,columns=['value']) + + df_future['value']=df_future.value.astype("float64") + df_future['step']= np.arange(len(x),len(x)+len(y_fore_inv),1) + engine_output['future'] = df_future.to_dict(orient='record') testing_data['step']=testing_data.index + engine_output['debug'] = testing_data.to_dict(orient='record') - return (engine_output) + return engine_output diff --git a/engines/var.py b/engines/var.py index 0c173b5..4ecde9f 100644 --- a/engines/var.py +++ b/engines/var.py @@ -13,6 +13,7 @@ def univariate_anomaly_VAR(lista_datos,num_fut): df = pd.DataFrame() df['valores'] = lista_datos + df['valores'] = df.valores.astype(np.float) tam_train = int(len(df)*0.7) #print tam_train @@ -21,14 +22,17 @@ def univariate_anomaly_VAR(lista_datos,num_fut): df_test = df[tam_train:] print('Tamanio test: {}'.format(df_test.shape)) + print (type(df_test)) mae_period = 99999999 best_lag=0 lags = int(round(len(df_train)/2)) + print ("empezamos el bucle") for lag in range(lags): model = pf.VAR(df_train,lags=lag) x = model.fit() + print ("fit ready") future_forecast_pred = model.predict(len(df_test)) future_forecast_pred = future_forecast_pred[['valores']] @@ -170,6 +174,8 @@ def anomaly_VAR(list_var,num_fut): mae_period = 99999999 best_lag=0 lags = int(round(len(df_train)/2)) + if (lags > 100): + lags=100 for lag in range(lags): print ("entra en el bucle con dato " + str(lag)) model = pf.VAR(df_train,lags=lag) diff --git a/server.py b/server.py index 4f3d69a..63552b7 100644 --- a/server.py +++ b/server.py @@ -52,8 +52,8 @@ def multivariate_engine(): list_var=[] for item in items: data = item['data'] - sub_name = item['name'] if(name != 'NA'): + sub_name = item['name'] filename= './lst/'+name + '_' + sub_name +'.lst' try: diff --git a/web_test/_arrivals_from_australia_monthly.csv b/web_test/_arrivals_from_australia_monthly.csv new file mode 100644 index 0000000..b490e14 --- /dev/null +++ b/web_test/_arrivals_from_australia_monthly.csv @@ -0,0 +1,129 @@ +Date,Number of arrivals +1991M01,27566 +1991M02,27621 +1991M03,25696 +1991M04,21653 +1991M05,21197 +1991M06,21620 +1991M07,25596 +1991M08,28327 +1991M09,29892 +1991M10,28206 +1991M11,28718 +1991M12,44288 +1992M01,29219 +1992M02,29644 +1992M03,32218 +1992M04,29586 +1992M05,22089 +1992M06,28209 +1992M07,33675 +1992M08,25075 +1992M09,32186 +1992M10,27235 +1992M11,29864 +1992M12,49103 +1993M01,29164 +1993M02,29603 +1993M03,32186 +1993M04,24415 +1993M05,19784 +1993M06,24414 +1993M07,27565 +1993M08,27195 +1993M09,34042 +1993M10,37434 +1993M11,35694 +1993M12,48706 +1994M01,31852 +1994M02,30595 +1994M03,34674 +1994M04,27604 +1994M05,22198 +1994M06,26123 +1994M07,28663 +1994M08,27622 +1994M09,40449 +1994M10,29890 +1994M11,28611 +1994M12,52268 +1995M01,33107 +1995M02,31596 +1995M03,38201 +1995M04,36443 +1995M05,23401 +1995M06,25007 +1995M07,30838 +1995M08,28133 +1995M09,40717 +1995M10,30156 +1995M11,31128 +1995M12,53754 +1996M01,35064 +1996M02,35736 +1996M03,39570 +1996M04,38185 +1996M05,24885 +1996M06,31983 +1996M07,31330 +1996M08,31692 +1996M09,41228 +1996M10,35142 +1996M11,36248 +1996M12,58774 +1997M01,36981 +1997M02,36434 +1997M03,41582 +1997M04,33090 +1997M05,27913 +1997M06,30197 +1997M07,32034 +1997M08,33434 +1997M09,41170 +1997M10,34119 +1997M11,35007 +1997M12,54617 +1998M01,39892 +1998M02,41970 +1998M03,41204 +1998M04,46232 +1998M05,31122 +1998M06,31839 +1998M07,40017 +1998M08,37335 +1998M09,46586 +1998M10,40656 +1998M11,43900 +1998M12,61656 +1999M01,41678 +1999M02,41267 +1999M03,46116 +1999M04,44875 +1999M05,32437 +1999M06,32732 +1999M07,41276 +1999M08,40579 +1999M09,49177 +1999M10,42140 +1999M11,44589 +1999M12,69672 +2000M01,46057 +2000M02,49286 +2000M03,51877 +2000M04,41966 +2000M05,33160 +2000M06,34671 +2000M07,44117 +2000M08,45356 +2000M09,51756 +2000M10,46904 +2000M11,48200 +2000M12,78352 +2001M01,53264 +2001M02,51909 +2001M03,58021 +2001M04,56304 +2001M05,39324 +2001M06,43422 +2001M07,49671 +2001M08,55260 diff --git a/web_test/production/index.html b/web_test/production/index.html index 6e349ba..71279d6 100644 --- a/web_test/production/index.html +++ b/web_test/production/index.html @@ -1 +1 @@ -
{{errorDialog.text}}\r\n
{{errorDialog.text}}\r\n