From 94a933cebed96b1398d113bfb249de5da3066f59 Mon Sep 17 00:00:00 2001 From: subin Date: Sat, 1 Jun 2024 15:37:02 +0900 Subject: [PATCH] feat: Add custom dataset --- autosink_model_training/dataset.py | 27 +++++++++++++++++++++++++++ pyproject.toml | 2 ++ 2 files changed, 29 insertions(+) create mode 100644 autosink_model_training/dataset.py diff --git a/autosink_model_training/dataset.py b/autosink_model_training/dataset.py new file mode 100644 index 0000000..5f1e543 --- /dev/null +++ b/autosink_model_training/dataset.py @@ -0,0 +1,27 @@ +# dataset.py + +from datasets import load_dataset + +def load_custom_dataset(path_dataset_dir): + """ + Loads a dataset from the specified directory. + + Parameters: + path_dataset_dir (str): The path to the directory containing the dataset. + + Returns: + Dataset: The loaded dataset. + """ + dataset = load_dataset('imagefolder', data_dir=path_dataset_dir) + return dataset + +if __name__ == '__main__': + import os + + # Ensure that PATH_DATASET_DIR environment variable is set + path_dataset_dir = os.getenv('data_dir') + if path_dataset_dir is None: + raise ValueError("Environment variable PATH_DATASET_DIR is not set") + + dataset = load_custom_dataset(path_dataset_dir) + print(dataset) diff --git a/pyproject.toml b/pyproject.toml index 87b3ace..5c1170a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,8 @@ classifiers = [ requires-python = ">=3.8" dependencies = [ "PyYAML", + "datasets", + "wandb" ] [project.optional-dependencies]