-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper_func.py
87 lines (69 loc) · 2.79 KB
/
helper_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Install pandas-datareader, an up-to-date remote data access for pandas
#!pip install git+https://github.com/pydata/pandas-datareader.git
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pandas_datareader.data as web
from datetime import datetime, timedelta
from sklearn.cluster import KMeans
def get_data(ticker, time_window=1000):
"""
Uses pandas-datareader to load daily stock data and returns a DataFrame.
ticker: str, stock symbol which identifies shares of a stock
time_window: int, number of days to be considered, to date (default = 1000)
"""
ticker = ticker.upper()
end_date = datetime.today()
start_date = end_date - timedelta(days = time_window)
# convert objects to a string according to a given format
end_date = end_date.strftime('%Y-%m-%d')
start_date = start_date.strftime('%Y-%m-%d')
# feed web-data into a pandas DataFrame
df = web.DataReader(ticker, 'yahoo', start_date, end_date)
return df
def plot_trend(x, y, labels, ticker):
"""
Scatter plot the data.
x: ndarray, evenly spaced values within the interval (0, len(y))
y: ndarray, closing prices
labels: ndarray, labels of each point after K-Means clustering
ticker: str, stock symbol which identifies shares of a stock
"""
colors = ['tab:red','tab:orange','tab:green']
nclass = len(np.unique(labels))
plt.figure(figsize=(15,5))
for i in range(nclass):
xx = x[labels == i]
yy = y[labels == i]
plt.scatter(xx, yy, c=colors[i], s=3, label=i)
plt.legend(frameon=False, fontsize='medium')
plt.title(f'{ticker.upper()}')
plt.savefig(f'{ticker.upper()}.png', dpi=300, bbox_inches='tight')
plt.show()
def get_trend(c, window=20):
'''
Filters the time series with a forward-backward EMA and fits a K-Means clustering algorithm.
Returns data points and labels for use with the plot_trend() function.
c: ndarray, daily close prices
window: int, lookback window for smoothing
'''
# forward-backward filtering with EMA
cs = pd.Series(c)
f_ema = cs.ewm(span=window).mean()
fb_ema = f_ema[::-1].ewm(span=window).mean()[::-1]
# evaluate log-return and fit the clustering model
lr = np.diff(np.log(fb_ema.values))
km = KMeans(3).fit(lr.reshape(-1,1))
lb = km.labels_
# change the labels to have some semblance of order
cc = km.cluster_centers_.flatten()
temp = [(cc[i], i) for i in range(3)]
temp = sorted(temp, key = lambda x: x[0])
labels = np.zeros(len(lb), dtype = int)
for i in range(1,3):
old_lb = temp[i][1]
idx = np.where(lb == old_lb)[0]
labels[idx] = i
x = np.arange(len(labels))
y = fb_ema.values[1:]
return x, y, labels