Skip to content

Commit

Permalink
feat: write data to parquet files instead of csv
Browse files Browse the repository at this point in the history
  • Loading branch information
kieran-mackle committed Dec 8, 2023
1 parent 39fc1d0 commit f84599c
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ need to be revised. An option is to partition dynamically, for example
daily for 1 minutely data, weekly for 30 minutely, and so on.
- Support for private downloads to assist in accounting, account tracking
and analysis, etc.
- Save files to parquet format instead of compressed CSV.
- Centralise data store across machine.
- Handling of incomplete day data. If data is downloaded for the present day, it will
not contain 24 hours worth, depsite being timestamped for the day. This means that in
Expand Down
6 changes: 3 additions & 3 deletions src/ccxt_download/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ async def candles(
df["symbol"] = symbol

# Save
df.to_csv(path_or_buf=filename, compression="gzip")
df.to_parquet(path=filename)

if verbose:
print(
Expand Down Expand Up @@ -410,7 +410,7 @@ async def trades(
df.drop("timestamp", inplace=True, axis=1)

# Save
df.to_csv(filename)
df.to_parquet(filename)

if verbose:
print(
Expand Down Expand Up @@ -516,7 +516,7 @@ async def funding(
df.drop("timestamp", inplace=True, axis=1)

# Save
df.to_csv(filename)
df.to_parquet(filename)

if verbose:
print(
Expand Down
6 changes: 4 additions & 2 deletions src/ccxt_download/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ def filename_builder(
dtid = f"{data_type_id}_" if data_type_id else ""
filename = os.path.join(
download_dir,
format_str(f"{exchange.lower()}_{dtid}{data_type}_{start_str}_{symbol}.csv.gz"),
format_str(
f"{exchange.lower()}_{dtid}{data_type}_{start_str}_{symbol}.parquet"
),
)
return filename

Expand Down Expand Up @@ -145,7 +147,7 @@ def filter(unfiltered_files: list[str], match_strs: list[str]):
df = pd.DataFrame()
for f in files:
try:
_df = pd.read_csv(f, index_col=0, parse_dates=True)
_df = pd.read_parquet(f)
_df = _df[~_df.index.duplicated(keep="first")]
df = pd.concat([df, _df])
except:
Expand Down

0 comments on commit f84599c

Please sign in to comment.