From 7c8044e8bb09877d3ba9187ffdb312ce9e96282b Mon Sep 17 00:00:00 2001 From: Elron Bandel Date: Sun, 5 May 2024 15:41:07 +0300 Subject: [PATCH] Fix huggingface uploads (#793) Signed-off-by: Elron Bandel --- utils/hf/prepare_dataset.py | 26 ++++++++++++++++---------- utils/hf/prepare_metric.py | 22 ++++++++++++++-------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/utils/hf/prepare_dataset.py b/utils/hf/prepare_dataset.py index 53ba2c7ae..c0f177f43 100644 --- a/utils/hf/prepare_dataset.py +++ b/utils/hf/prepare_dataset.py @@ -1,5 +1,7 @@ import glob import os +import shutil +import tempfile from huggingface_hub import HfApi @@ -9,20 +11,24 @@ print("Uploading files from src/unitxt/ to hf:unitxt/data") -for file in files: - file_name = os.path.basename(file) +with tempfile.TemporaryDirectory() as temp_dir: + for file in files: + file_name = os.path.basename(file) - if file_name == "__init__.py": - continue + if file_name == "__init__.py": + continue - if file_name == "dataset.py": - file_name = "data.py" + if file_name == "dataset.py": + file_name = "data.py" - print(f" - {file_name}") + shutil.copy(file, os.path.join(temp_dir, file_name)) - api.upload_file( - path_or_fileobj=file, - path_in_repo=file_name, + print(f" - {file_name}") + + api.upload_folder( + folder_path=temp_dir, + delete_patterns="*.py", # delete any unused python files repo_id="unitxt/data", repo_type="dataset", + run_as_future=True, ) diff --git a/utils/hf/prepare_metric.py b/utils/hf/prepare_metric.py index 210b30869..efacace08 100644 --- a/utils/hf/prepare_metric.py +++ b/utils/hf/prepare_metric.py @@ -1,5 +1,7 @@ import glob import os +import shutil +import tempfile from huggingface_hub import HfApi @@ -9,17 +11,21 @@ print("\nUploading files from src/unitxt/ to hf:unitxt/metric") -for file in files: - file_name = os.path.basename(file) +with tempfile.TemporaryDirectory() as temp_dir: + for file in files: + file_name = os.path.basename(file) - if file_name == "__init__.py": - continue + if file_name == "__init__.py": + continue - print(f" - {file_name}") + shutil.copy(file, os.path.join(temp_dir, file_name)) - api.upload_file( - path_or_fileobj=file, - path_in_repo=file_name, + print(f" - {file_name}") + + api.upload_folder( + folder_path=temp_dir, + delete_patterns="*.py", # delete any unused python files repo_id="unitxt/metric", repo_type="space", + run_as_future=True, )