Skip to content

Commit

Permalink
refactor and add functionality.
Browse files Browse the repository at this point in the history
  • Loading branch information
dominiktraxl committed Dec 19, 2017
1 parent eada0d0 commit 88da414
Showing 1 changed file with 79 additions and 76 deletions.
155 changes: 79 additions & 76 deletions scripts/download_trade_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
Use pd.read_pickle(file) to load data into memory.
Use the ``interval`` argument to sample trade data into ohlc format instead of
downloading/updating trade data (in that case, only the arguments ``folder``,
``pair`` and ``interval`` have an effect). Data is stored as a pandas.DataFrame
(in "pair_interval.pickle" format).
downloading/updating trade data. Data is stored as a pandas.DataFrame (in
"pair_interval.pickle" format).
"""

Expand All @@ -21,68 +20,14 @@
import krakenex
from pykrakenapi import KrakenAPI

from pykrakenapi.pykrakenapi import CallRateLimitError

# parser
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)

parser.add_argument(
'--folder',
help='which (parent) folder to store data in',
type=str,
default=str(Path.home()) + '/cryptodata/')

parser.add_argument(
'--pair',
help=('asset pair to get trade data for. '
'see KrakenAPI(api).get_tradable_asset_pairs().index.values'),
type=str,
default='XXBTZEUR')

parser.add_argument(
'--since',
help=("return trade data since given unixtime (exclusive). If 0 (default) "
"and this script was called before, only an update to the "
"most recent data is retrieved. If 0 and this function was not "
"called before, retrieve from earliest time possible."),
type=str,
default=0)

parser.add_argument(
'--timezone',
help=("convert the timezone of timestamps to ``timezone``, which must be "
"a string that pytz.timezone() accepts (see pytz.all_timezones)"),
type=str,
default='Europe/Berlin')

parser.add_argument(
'--interval',
help=('sample downloaded trade data to ohlc format with the given time '
'interval (minutes). If 0 (default), only download/update trade '
'data.'),
type=int,
default=0)

# args
args = parser.parse_args()

folder = args.folder
pair = args.pair
since = args.since
timezone = args.timezone
interval = args.interval


class GetTradeData(object):

def __init__(self, folder, pair, timezone):

# initiate api
self.api = krakenex.API()
self.k = KrakenAPI(self.api, tier=0, retry=.1)
api = krakenex.API()
self.k = KrakenAPI(api, tier=0, retry=.1)

# set pair
self.pair = pair
Expand Down Expand Up @@ -122,54 +67,112 @@ def download_trade_data(self, since):
print('storing', fname)
trades.to_pickle(fname)

except CallRateLimitError:
print('\n this should not happen. please report an issue on '
'github! thanks. \n')
raise

except ValueError:
print('download/update finished!')
print('\n download/update finished!')
break

def agg_ohlc(self, interval):
def agg_ohlc(self, since, interval):

folder = self.folder + self.pair + '/'

# fetch files and convert to dataframe
fs = os.listdir(folder)
fs.sort(reverse=True)
if since > 0:
fs = [f for f in fs if int(f.split('.')[0]) >= since*1e9]

trades = []
for f in fs:
trades.append(pd.read_pickle(folder + f))
trades = pd.concat(trades, axis=0)
trades.loc[:, 'cost'] = trades.price * trades.volume

# store on disc
fname = self.folder + self.pair + '.pickle'
print('\n storing', fname)
trades.to_pickle(fname)

# resample
gtrades = trades.resample('{}min'.format(interval))

# ohlc, volume
ohlc = gtrades.price.ohlc()
ohlc.loc[:, 'vol'] = gtrades.volume.sum()
ohlc.vol.fillna(0, inplace=True)
ohlc.loc[:, 'volume'] = gtrades.volume.sum()
ohlc.volume.fillna(0, inplace=True)
closes = ohlc.close.fillna(method='pad')
ohlc = ohlc.apply(lambda x: x.fillna(closes))

# vwap
ohlc.loc[:, 'vwap'] = gtrades.cost.sum() / ohlc.vol
ohlc.loc[:, 'vwap'] = gtrades.cost.sum() / ohlc.volume
ohlc.vwap.fillna(ohlc.close, inplace=True)

# count
ohlc.loc[:, 'count'] = gtrades.size()

# store on disc
fname = self.folder + self.pair + '_{}.pickle'.format(interval)
print('storing', fname)
print('\n storing', fname)
ohlc.to_pickle(fname)


dl = GetTradeData(folder, pair, timezone)

if interval == 0:
dl.download_trade_data(since)
else:
dl.agg_ohlc(interval)
def main(folder, pair, since, timezone, interval):

dl = GetTradeData(folder, pair, timezone)

if interval == 0:
dl.download_trade_data(since)
else:
dl.agg_ohlc(since, interval)


if __name__ == "__main__":

parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)

parser.add_argument(
'--folder',
help='which (parent) folder to store data in',
type=str,
default=str(Path.home()) + '/cryptodata/')

parser.add_argument(
'--pair',
help=('asset pair to get trade data for. '
'see KrakenAPI(api).get_tradable_asset_pairs().index.values'),
type=str,
default='XXBTZEUR')

parser.add_argument(
'--since',
help=("download/aggregate trade data since given unixtime (exclusive)."
" If 0 (default) and this script was called before, only an"
" update to the most recent data is retrieved. If 0 and this"
" function was not called before, retrieve from earliest time"
" possible. When aggregating (interval>0), aggregate from"
" ``since`` onwards (unixtime)."),
type=str,
default=0)

parser.add_argument(
'--timezone',
help=("convert the timezone of timestamps to ``timezone``, which must "
"be a string that pytz.timezone() accepts (see "
"pytz.all_timezones)"),
type=str,
default='Europe/Berlin')

parser.add_argument(
'--interval',
help=("sample downloaded trade data to ohlc format with the given time"
"interval (minutes). If 0 (default), only download/update trade "
"data."),
type=int,
default=0)

args = parser.parse_args()

# execute
main(args.folder, args.pair, args.since, args.timezone, args.interval)

0 comments on commit 88da414

Please sign in to comment.