-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmkt_research_funcs.jl
232 lines (187 loc) · 5.82 KB
/
mkt_research_funcs.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
module MarketResearchFuncs
using DataFrames, Arrow, Dates
function py_bytes_to_arrow_table(py_bytes)
jbytes = copy(reinterpret(UInt8, py_bytes))
return Arrow.Table(jbytes)
end
function py_bytes_to_arrow_table_1(vec_of_py_bytes::Vector)
s = Set()
for item in vec_of_py_bytes
push!(s, MarketResearchFuncs.py_bytes_to_arrow_table(item))
end
return s
end
function table_to_py_bytes(table)
io = IOBuffer()
Arrow.write(io, table)
seekstart(io)
return PyCall.pybytes(take!(io))
end
function calc_return(price_vec2::AbstractArray, price_vec1::AbstractArray, lookback::Integer)
lookback < 0 && throw("lookback must be >= 1")
len = size(price_vec2, 1)
ret_vec = Vector{Float64}(undef, len)
@inbounds for i in 1:lookback
ret_vec[i] = 0.0
end
@inbounds for i in lookback+1:len
ret_vec[i] = log(price_vec2[i] / price_vec1[i-lookback])
end
return ret_vec
end
@inline above_0(x) = x .> 0
function bytes_to_df_with_preprocessing(bytes, is_disallowmissing=true)
df = py_bytes_to_arrow_table(bytes) |> DataFrame
is_disallowmissing && disallowmissing!(df)
subset!(df, :low => above_0, :high => above_0)
return df
end
function calc_tr_vec(df::AbstractDataFrame)
return calc_tr_vec(
log.(df.high),
log.(df.low),
log.(df.close)
)
end
function calc_tr_vec(high_vec::Vector{T}, low_vec::Vector{T}, close_vec::Vector{T}) where {T}
len = size(high_vec, 1)
tr_vec = Vector{Union{Float64,Missing}}(undef, len)
tr_vec[1] = 0.0
@inbounds for i in 2:len
tr1 = high_vec[i] - low_vec[i]
tr2 = high_vec[i] - close_vec[i-1]
tr3 = low_vec[i] - close_vec[i-1]
tr_vec[i] = max(tr1, tr2, tr3)
end
return tr_vec
end
function calc_expanding_rolling_mean(vec::Vector{T}, lookback::Integer) where {T}
len = size(vec, 1)
rolling_mean_sum::Float64 = 0
rolling_mean_vec = Vector{Float64}(undef, len)
@inbounds for i in 1:len
val = vec[i]
rolling_mean_sum += val
if i <= lookback
rolling_mean = rolling_mean_sum / i
else
rolling_mean_sum -= vec[i-lookback]
rolling_mean = rolling_mean_sum / lookback
end
rolling_mean_vec[i] = rolling_mean
end
return rolling_mean_vec
end
function calc_expanding_rolling_mean(vec::Vector{Union{Missing,T}}, lookback::Integer) where {T}
len = size(vec, 1)
rolling_mean = calc_expanding_rolling_mean(vec |> skipmissing |> collect, lookback)
rolling_mean_dst = Vector{Union{Missing,Float64}}(undef, len)
dest_idx = 1
rolling_mean_idx = 1
for i in eachindex(vec)
if ismissing(vec[i])
res = missing
else
res = rolling_mean[rolling_mean_idx]
rolling_mean_idx += 1
end
@inbounds rolling_mean_dst[i] = res
end
return rolling_mean_dst
end
function ffill!(v, init=zero(eltype(v)))
accumulate!((n0, n1) -> ismissing(n1) ? n0 : n1, v, v, init=init)
end
function calc_vwap(df, price_col="close", volume_col="volume")
return cumsum(df[!, price_col] .* df[!, volume_col]) ./ cumsum(df[!, volume_col])
end
function count_flips(vec, flip_point=0.0)
return cum_count_flips(vec, flip_point)[end]
end
function cum_count_flips(vec, flip_point=0.0, buffer_thresh=0.0)
len = size(vec, 1)
flip_count::Int = 0
cum_flips_vec = Vector{Int}(undef, len)
prior_zone = 0
@inbounds for i in 1:len
if prior_zone == 1
if vec[i] < flip_point - buffer_thresh
flip_count += 1
end
elseif prior_zone == -1
if vec[i] > flip_point + buffer_thresh
flip_count += 1
end
end
if vec[i] > flip_point + buffer_thresh
prior_zone = 1
elseif vec[i] < flip_point - buffer_thresh
prior_zone = -1
else
prior_zone = 0
end
cum_flips_vec[i] = flip_count
end
return cum_flips_vec
end
mkt_open_time = Time(9, 30)
mkt_close_time = Time(16, 0)
function calc_had_halt_signal(df)
len = size(df, 1)
halt_signal_vec = Vector{Bool}(undef, len)
@inbounds for i in 1:len
if i < 5
halt_signal_vec[i] = false
continue
end
curr_time = Time(df[i, :ts])
if curr_time < mkt_open_time || curr_time >= mkt_close_time
halt_signal_vec[i] = false
continue
end
time_diff = curr_time - Time(df[i-1, :ts])
if time_diff <= Minute(3)
halt_signal_vec[i] = false
continue
end
before_halt_5_min_pct_change_1 = abs(df[i-1, :high] / df[i-6, :low] - 1)
before_halt_5_min_pct_change_2 = abs(df[i-1, :low] / df[i-6, :high] - 1)
before_halt_5_min_pct_change =
max(before_halt_5_min_pct_change_1, before_halt_5_min_pct_change_2)
if before_halt_5_min_pct_change < 0.10
halt_signal_vec[i] = false
continue
end
halt_signal_vec[i] = true
end
return halt_signal_vec
end
function calc_mae_mfe_pf(high_vec, low_vec, close_vec, start_idx, end_idx, entry_price)
highest_high::Float64 = 0.0
lowest_low::Float64 = typemax(Float64)
pf_num = 0.0
pf_denom = 0.0
@inbounds for i in start_idx+1:end_idx
if low_vec[i] < lowest_low
lowest_low = low_vec[i]
end
if high_vec[i] > highest_high
highest_high = high_vec[i]
end
curr_ret = close_vec[i] / close_vec[i-1] - 1
if curr_ret < 0
pf_num -= curr_ret
elseif curr_ret > 0
pf_denom += curr_ret
end
end
mae = entry_price / highest_high - 1.0
mfe = entry_price / lowest_low - 1.0
pf = pf_num / pf_denom
return (
mae=mae,
mfe=mfe,
pf=pf
)
end
end