diff --git a/scripts/download_trade_data.py b/scripts/download_trade_data.py index 51aa9a6..55a9c79 100644 --- a/scripts/download_trade_data.py +++ b/scripts/download_trade_data.py @@ -6,9 +6,8 @@ Use pd.read_pickle(file) to load data into memory. Use the ``interval`` argument to sample trade data into ohlc format instead of -downloading/updating trade data (in that case, only the arguments ``folder``, -``pair`` and ``interval`` have an effect). Data is stored as a pandas.DataFrame -(in "pair_interval.pickle" format). +downloading/updating trade data. Data is stored as a pandas.DataFrame (in +"pair_interval.pickle" format). """ @@ -21,68 +20,14 @@ import krakenex from pykrakenapi import KrakenAPI -from pykrakenapi.pykrakenapi import CallRateLimitError - -# parser -parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, -) - -parser.add_argument( - '--folder', - help='which (parent) folder to store data in', - type=str, - default=str(Path.home()) + '/cryptodata/') - -parser.add_argument( - '--pair', - help=('asset pair to get trade data for. ' - 'see KrakenAPI(api).get_tradable_asset_pairs().index.values'), - type=str, - default='XXBTZEUR') - -parser.add_argument( - '--since', - help=("return trade data since given unixtime (exclusive). If 0 (default) " - "and this script was called before, only an update to the " - "most recent data is retrieved. If 0 and this function was not " - "called before, retrieve from earliest time possible."), - type=str, - default=0) - -parser.add_argument( - '--timezone', - help=("convert the timezone of timestamps to ``timezone``, which must be " - "a string that pytz.timezone() accepts (see pytz.all_timezones)"), - type=str, - default='Europe/Berlin') - -parser.add_argument( - '--interval', - help=('sample downloaded trade data to ohlc format with the given time ' - 'interval (minutes). If 0 (default), only download/update trade ' - 'data.'), - type=int, - default=0) - -# args -args = parser.parse_args() - -folder = args.folder -pair = args.pair -since = args.since -timezone = args.timezone -interval = args.interval - class GetTradeData(object): def __init__(self, folder, pair, timezone): # initiate api - self.api = krakenex.API() - self.k = KrakenAPI(self.api, tier=0, retry=.1) + api = krakenex.API() + self.k = KrakenAPI(api, tier=0, retry=.1) # set pair self.pair = pair @@ -122,40 +67,43 @@ def download_trade_data(self, since): print('storing', fname) trades.to_pickle(fname) - except CallRateLimitError: - print('\n this should not happen. please report an issue on ' - 'github! thanks. \n') - raise - except ValueError: - print('download/update finished!') + print('\n download/update finished!') break - def agg_ohlc(self, interval): + def agg_ohlc(self, since, interval): folder = self.folder + self.pair + '/' # fetch files and convert to dataframe fs = os.listdir(folder) fs.sort(reverse=True) + if since > 0: + fs = [f for f in fs if int(f.split('.')[0]) >= since*1e9] + trades = [] for f in fs: trades.append(pd.read_pickle(folder + f)) trades = pd.concat(trades, axis=0) trades.loc[:, 'cost'] = trades.price * trades.volume + # store on disc + fname = self.folder + self.pair + '.pickle' + print('\n storing', fname) + trades.to_pickle(fname) + # resample gtrades = trades.resample('{}min'.format(interval)) # ohlc, volume ohlc = gtrades.price.ohlc() - ohlc.loc[:, 'vol'] = gtrades.volume.sum() - ohlc.vol.fillna(0, inplace=True) + ohlc.loc[:, 'volume'] = gtrades.volume.sum() + ohlc.volume.fillna(0, inplace=True) closes = ohlc.close.fillna(method='pad') ohlc = ohlc.apply(lambda x: x.fillna(closes)) # vwap - ohlc.loc[:, 'vwap'] = gtrades.cost.sum() / ohlc.vol + ohlc.loc[:, 'vwap'] = gtrades.cost.sum() / ohlc.volume ohlc.vwap.fillna(ohlc.close, inplace=True) # count @@ -163,13 +111,68 @@ def agg_ohlc(self, interval): # store on disc fname = self.folder + self.pair + '_{}.pickle'.format(interval) - print('storing', fname) + print('\n storing', fname) ohlc.to_pickle(fname) -dl = GetTradeData(folder, pair, timezone) - -if interval == 0: - dl.download_trade_data(since) -else: - dl.agg_ohlc(interval) +def main(folder, pair, since, timezone, interval): + + dl = GetTradeData(folder, pair, timezone) + + if interval == 0: + dl.download_trade_data(since) + else: + dl.agg_ohlc(since, interval) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + parser.add_argument( + '--folder', + help='which (parent) folder to store data in', + type=str, + default=str(Path.home()) + '/cryptodata/') + + parser.add_argument( + '--pair', + help=('asset pair to get trade data for. ' + 'see KrakenAPI(api).get_tradable_asset_pairs().index.values'), + type=str, + default='XXBTZEUR') + + parser.add_argument( + '--since', + help=("download/aggregate trade data since given unixtime (exclusive)." + " If 0 (default) and this script was called before, only an" + " update to the most recent data is retrieved. If 0 and this" + " function was not called before, retrieve from earliest time" + " possible. When aggregating (interval>0), aggregate from" + " ``since`` onwards (unixtime)."), + type=str, + default=0) + + parser.add_argument( + '--timezone', + help=("convert the timezone of timestamps to ``timezone``, which must " + "be a string that pytz.timezone() accepts (see " + "pytz.all_timezones)"), + type=str, + default='Europe/Berlin') + + parser.add_argument( + '--interval', + help=("sample downloaded trade data to ohlc format with the given time" + "interval (minutes). If 0 (default), only download/update trade " + "data."), + type=int, + default=0) + + args = parser.parse_args() + + # execute + main(args.folder, args.pair, args.since, args.timezone, args.interval)