Skip to content

Commit

Permalink
add load_hf_dataset function
Browse files Browse the repository at this point in the history
  • Loading branch information
menamerai committed Feb 10, 2024
1 parent 9367171 commit 6887e30
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions scripts/get_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import os
from pickle import dump

from datasets import load_dataset


def load_hf_dataset(
dataset_name: str, output_path: str, split: str | None = None
) -> None:
hf_ds = load_dataset(dataset_name, split=split)

os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "wb") as f:
dump(hf_ds, f)

0 comments on commit 6887e30

Please sign in to comment.