forked from jacquessham/StockAnalytics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Prediction_AAPL_arima.py
86 lines (73 loc) · 3.2 KB
/
Prediction_AAPL_arima.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pandas as pd
import numpy as np
import psycopg2
import pmdarima as pm
import plotly
import plotly.graph_objs as go
from plotly.offline import *
from Graph import *
import plotly.io as pio
pio.renderers.default = 'notebook'
# Connect to database
conn = psycopg2.connect(host='localhost', port=5432, database='postgres')
# Query
query_train = """select tradedate, closeprice from stock.stockprice
where ticker = 'AAPL' and
date_part('year', tradedate) between 1997 and 2018
order by 1; """
query_test = """select tradedate as ds, closeprice as realprice
from stock.stockprice
where ticker = 'AAPL' and
date_part('year', tradedate) between 2019 and 2020
order by 1; """
df_train = pd.io.sql.read_sql(query_train, conn)
df_test = pd.io.sql.read_sql(query_test, conn)
# Calculate SST
ybar_test = df_test['realprice'].mean()*1.0
sst = ((df_test['realprice'] - ybar_test)**2).sum()
### Train model using 1997-2018 data, let's call model_max ###
# Obtain training data between 1997 and 2018
df_train = pd.io.sql.read_sql(query_train, conn)
X_train = df_train['closeprice']
model_max = pm.auto_arima(X_train, start_p=1, start_q=1,
max_p=3, max_q=3, m=12,
max_P=3, max_Q=3, seasonal=True,
d=1, D=1, max_d=3, max_D=3, trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
print(model_max.summary())
df_test_max = df_test.copy()
pred_max = model_max.predict(df_test.shape[0]) # It returns ndarray
df_test_max['yhat'] = pred_max
sse_max = ((df_test_max['yhat'] - df_test_max['realprice'])**2).sum()
rsqu_max = 1 - sse_max / sst
### Train model using 2010-2018 data, let's call model_max ###
# Obtain training data between 2010 and 2018
query_train = """select tradedate, closeprice from stock.stockprice
where ticker = 'AAPL' and
date_part('year', tradedate) between 2010 and 2018
order by 1; """
df_train = pd.io.sql.read_sql(query_train, conn)
X_train = df_train['closeprice']
model_8yr = pm.auto_arima(X_train, start_p=1, start_q=1,
max_p=3, max_q=3, m=12,
max_P=3, max_Q=3, seasonal=True,
d=1, D=1, max_d=3, max_D=3, trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
print(model_8yr.summary())
df_test_8yr = df_test.copy()
pred_8yr = model_8yr.predict(df_test.shape[0])
df_test_8yr['yhat'] = pred_8yr
sse_8yr = ((df_test_8yr['yhat'] - df_test_8yr['realprice'])**2).sum()
rsqu_8yr = 1 - sse_8yr / sst
print('The R-square of model_max is',f'{rsqu_max:.2f}')
print('The R-square of model_8yr is',f'{rsqu_8yr:.2f}')
# Generate graph of the results
df_train.columns = ['ds','y']
fig = generate_line_chart(df_train, df_test, df_test_max, 'model_max',
df_test_8yr, 'model_8yr',
'Prediction with pmdarima')
plotly.offline.plot(fig, filename='AAPLprice_pmdarima.html')