-
Notifications
You must be signed in to change notification settings - Fork 498
/
Copy pathreinforcement_trader.py
77 lines (59 loc) · 2.63 KB
/
reinforcement_trader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from quanttrader import TradingEnv
def load_data():
from datetime import timedelta
import ta
sd = '2015'
ed = '2020'
syms = ['SPY']
max_price_scaler = 5_000.0
max_volume_scaler = 1.5e10
df_obs = pd.DataFrame() # observation
df_exch = pd.DataFrame() # exchange; for order match
for sym in syms:
df = pd.read_csv(f'../data/{sym}.csv', index_col=0)
df.index = pd.to_datetime(df.index) + timedelta(hours=15, minutes=59, seconds=59)
df = df[sd:ed]
df_exch = pd.concat([df_exch, df['Close'].rename(sym)], axis=1)
df['Open'] = df['Adj Close'] / df['Close'] * df['Open'] / max_price_scaler
df['High'] = df['Adj Close'] / df['Close'] * df['High'] / max_price_scaler
df['Low'] = df['Adj Close'] / df['Close'] * df['Low'] / max_price_scaler
df['Volume'] = df['Adj Close'] / df['Close'] * df['Volume'] / max_volume_scaler
df['Close'] = df['Adj Close'] / max_price_scaler
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
df.columns = [f'{sym}:{c.lower()}' for c in df.columns]
macd = ta.trend.MACD(close=df[f'{sym}:close'])
df[f'{sym}:macd'] = macd.macd()
df[f'{sym}:macd_diff'] = macd.macd_diff()
df[f'{sym}:macd_signal'] = macd.macd_signal()
rsi = ta.momentum.RSIIndicator(close=df[f'{sym}:close'])
df[f'{sym}:rsi'] = rsi.rsi()
bb = ta.volatility.BollingerBands(close=df[f'{sym}:close'], window=20, window_dev=2)
df[f'{sym}:bb_bbm'] = bb.bollinger_mavg()
df[f'{sym}:bb_bbh'] = bb.bollinger_hband()
df[f'{sym}:bb_bbl'] = bb.bollinger_lband()
atr = ta.volatility.AverageTrueRange(high=df[f'{sym}:high'], low=df[f'{sym}:low'], close=df[f'{sym}:close'])
df[f'{sym}:atr'] = atr.average_true_range()
df_obs = pd.concat([df_obs, df], axis=1)
return df_obs, df_exch
if __name__ == '__main__':
look_back = 10
cash = 100_000.0
max_nav_scaler = cash
df_obs, df_exch = load_data()
trading_env = TradingEnv(2, df_obs, df_exch)
trading_env.set_cash(cash)
trading_env.set_commission(0.0001)
trading_env.set_steps(n_lookback=10, n_warmup=50, n_maxsteps=250)
trading_env.set_feature_scaling(max_nav_scaler)
o1 = trading_env.reset()
# trading_env._current_step = look_back-1 # ignore randomness
while True:
action = trading_env.action_space.sample()
o2, reward, done, info = trading_env.step(action)
print(action, reward * max_nav_scaler, info)
if done:
break