-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis_ARIMA.py
120 lines (81 loc) · 3.64 KB
/
analysis_ARIMA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 9 15:23:52 2020
@author: tomdarmon
"""
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
import numpy as np
import pandas as pd
from functions import stationary_test, delete_image_folder, create_image_folder, compute_all_acf_plots, save_forecasted_values
import matplotlib.pyplot as plt
import os
from models_bench import *
from metrics import *
#reset the image folder, not working if called from an imported script
if os.path.isdir('image'):
delete_image_folder()
create_image_folder()
else:
create_image_folder()
data_all = pd.read_csv(f'/Users/tomdarmon/Documents/Thesis Bocconi/project/train/Daily-train.csv')
data_all = data_all.drop('V1', axis = 1)
data_all = data_all.values
fh = 14
freq = 30
'''
i = 0
### LOOKING FOR INTERESTING PLOTS ####
for ts in data_all:
plt.plot(ts)
plt.savefig(f'image/time_plots{i}')
i += 1
plt.clf()
### INTERESTING AT 379(seasonal), 1437 (maybe mult seasonality), 4130 (seasonal), 2131
'''
ts_all = [data_all[379],data_all[1437],data_all[4130],data_all[2131]]
for i in range(len(ts_all)):
ts_all[i] = ts_all[i][~np.isnan(ts_all[i])]
stationary = [stationary_test(ts) for ts in ts_all]
#### stationary = [F, F, T, F] but the first ts look stationary, the test has a p value of 0.06
#so it is rejected with a small margin, we can assume it is because the ts stop at the bottom. We will assume
#the first ts is stationary
stationary[0] = True
MAPE = list()
MASE = list()
y_hats_ARIMA = list()
for i in range(len(ts_all)):
y_hat_ARIMAs = list()
if stationary[i] == False:
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (0,1,0)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (1,1,0)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (2,1,0)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (3,1,0)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (4,1,0)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (0,1,1)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (1,1,1)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (3,1,1)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (4,1,1)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (5,1,1)))
else:
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (2,0,0)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (3,0,0)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (4,0,0)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (0,0,1)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (1,0,1)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (2,0,1)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (3,0,1)))
y_hat_ARIMAs.append(arima_bench(ts_all[i][:-fh], fh, o = (4,0,1)))
#y_test is the last 14 value (that was previsously taken out of the ts in: arima_bench(ts[i][:-fh] )
y_test = ts_all[i][-14:]
x_test = ts_all[i][:-fh]
all_mase = [mase(ts_all[i][:-fh], y_test, y_hat_ARIMAs[j], freq) for j in range(len(y_hat_ARIMAs))]
final_mase = np.min(all_mase)
min_index = np.argmin(all_mase)
y_hat_ARIMA = y_hat_ARIMAs[min_index]
final_mape = smape(y_test, y_hat_ARIMA)
MAPE.append(final_mape)
MASE.append(final_mase)
y_hats_ARIMA.append(y_hat_ARIMA)
save_forecasted_values(x_test[-200:], y_test, y_hat_ARIMA, name = 'ARIMA')