-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathnumerai_dataset.py
45 lines (34 loc) · 1.09 KB
/
numerai_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from kedro.io.core import (
AbstractDataSet
)
import tempfile
import pandas as pd
import numerapi
import requests, zipfile
from kedro_work.utils import get_joblib_memory
memory = get_joblib_memory()
@memory.cache
def download_url(url):
r = requests.get(url)
return r.content
class NumeraiDataset(AbstractDataSet):
def __init__(self, is_train):
self._is_train = is_train
self._napi = numerapi.NumerAPI(verbosity="info")
def _load(self):
url = self._napi.get_dataset_url()
with tempfile.TemporaryDirectory() as dir:
cache_path = '{}/numerai_cache.zip'.format(dir)
with open(cache_path, 'wb') as f:
f.write(download_url(url))
z = zipfile.ZipFile(cache_path)
if self._is_train:
fname = 'numerai_training_data.csv'
else:
fname = 'numerai_tournament_data.csv'
df = pd.read_csv(z.open(fname), index_col=0)
return df
def _describe(self):
return dict(is_train=self._is_train)
def _save(self, data) -> None:
pass