Skip to content

Commit

Permalink
feat: Add custom dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
zzangsu committed Jun 1, 2024
1 parent 646378a commit 94a933c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
27 changes: 27 additions & 0 deletions autosink_model_training/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# dataset.py

from datasets import load_dataset

def load_custom_dataset(path_dataset_dir):
"""
Loads a dataset from the specified directory.
Parameters:
path_dataset_dir (str): The path to the directory containing the dataset.
Returns:
Dataset: The loaded dataset.
"""
dataset = load_dataset('imagefolder', data_dir=path_dataset_dir)
return dataset

if __name__ == '__main__':
import os

# Ensure that PATH_DATASET_DIR environment variable is set
path_dataset_dir = os.getenv('data_dir')
if path_dataset_dir is None:
raise ValueError("Environment variable PATH_DATASET_DIR is not set")

dataset = load_custom_dataset(path_dataset_dir)
print(dataset)
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ classifiers = [
requires-python = ">=3.8"
dependencies = [
"PyYAML",
"datasets",
"wandb"
]

[project.optional-dependencies]
Expand Down

0 comments on commit 94a933c

Please sign in to comment.