From b9d1430736a0f19589065b02edd8f47de564b923 Mon Sep 17 00:00:00 2001 From: John Wilkie <124276291+JBWilkie@users.noreply.github.com> Date: Thu, 7 Mar 2024 16:50:57 +0000 Subject: [PATCH] [PLA-683][external] 2 Minor fixes for importing of PASCAL VOC annotations (#783) * Fixed mislabelled default arg value * Added support for importing PASCAL VOC annotations to folders + unit test * Fixed misconfigured querystring filter name in fetch_remote_files() * removed test file --- darwin/cli_functions.py | 2 +- darwin/importer/formats/pascal_voc.py | 5 ++++- darwin/importer/importer.py | 2 +- .../importer/formats/import_pascalvoc_test.py | 14 ++++++++++++++ 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index ad8d75268..662280e75 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -881,7 +881,7 @@ def dataset_import( import_reviewers : bool, default: False If ``True`` it will import the reviewers from the files to the dataset, if . If ``False`` it will not import the reviewers. - use_multi_cpu : bool, default: True + use_multi_cpu : bool, default: False If ``True`` it will use all multiple CPUs to speed up the import process. cpu_limit : Optional[int], default: Core count - 2 The maximum number of CPUs to use for the import process. diff --git a/darwin/importer/formats/pascal_voc.py b/darwin/importer/formats/pascal_voc.py index 4470a6149..e6fadf328 100644 --- a/darwin/importer/formats/pascal_voc.py +++ b/darwin/importer/formats/pascal_voc.py @@ -3,6 +3,7 @@ from typing import List, Optional import darwin.datatypes as dt +from darwin.path_utils import deconstruct_full_path def parse_path(path: Path) -> Optional[dt.AnnotationFile]: @@ -57,8 +58,10 @@ def parse_path(path: Path) -> Optional[dt.AnnotationFile]: ) annotation_classes = {annotation.annotation_class for annotation in annotations} + remote_path, filename = deconstruct_full_path(filename) + return dt.AnnotationFile( - path, filename, annotation_classes, annotations, remote_path="/" + path, filename, annotation_classes, annotations, remote_path=remote_path ) diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py index eac3e295c..1dd0880e4 100644 --- a/darwin/importer/importer.py +++ b/darwin/importer/importer.py @@ -221,7 +221,7 @@ def get_remote_files( for i in range(0, len(filenames), chunk_size): chunk = filenames[i : i + chunk_size] for remote_file in dataset.fetch_remote_files( - {"types": "image,playback_video,video_frame", "filenames": chunk} + {"types": "image,playback_video,video_frame", "item_names": chunk} ): slot_name = _get_slot_name(remote_file) remote_files[remote_file.full_path] = (remote_file.id, slot_name) diff --git a/tests/darwin/importer/formats/import_pascalvoc_test.py b/tests/darwin/importer/formats/import_pascalvoc_test.py index 2c50dc24f..84d09aa9f 100644 --- a/tests/darwin/importer/formats/import_pascalvoc_test.py +++ b/tests/darwin/importer/formats/import_pascalvoc_test.py @@ -171,3 +171,17 @@ def test_returns_annotation_file_with_correct_annotations_with_float_values( assert annotation.subs == [] assert annotation_file.remote_path == "/" + + def test_deconstructs_filepath_properly_if_folder_included_in_filename( + self, annotation_path: Path + ): + annotation_path.write_text( + "folder/image.jpgClass10101010" + ) + + annotation_file = parse_path(annotation_path) + + assert annotation_file is not None + assert annotation_file.path == annotation_path + assert annotation_file.filename == "image.jpg" + assert annotation_file.remote_path == "/folder"