From 8b0e213c07df3d6f4e304e2fcb0df3ab26fd8151 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Wed, 18 Oct 2023 20:12:19 +0100 Subject: [PATCH] Undo accidental changes meant as part of IO-1445 --- darwin/dataset/local_dataset.py | 24 +++++++++++++++-------- darwin/dataset/utils.py | 34 +++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py index 01d2d1d89..c686f516a 100644 --- a/darwin/dataset/local_dataset.py +++ b/darwin/dataset/local_dataset.py @@ -95,15 +95,23 @@ def __init__( stems = build_stems(release_path, annotations_dir, annotation_type, split, partition, split_type) # Find all the annotations and their corresponding images - for annotation_path in sorted(annotations_dir.glob("**/*.json")): - darwin_json = parse_darwin_json(annotation_path) - image_path = images_dir / Path(darwin_json.full_path.lstrip('/\\')) - if image_path.exists(): - self.images_path.append(image_path) - self.annotations_path.append(annotation_path) - continue - else: + for stem in stems: + annotation_path = annotations_dir / f"{stem}.json" + images = [] + for ext in SUPPORTED_IMAGE_EXTENSIONS: + image_path = images_dir / f"{stem}{ext}" + if image_path.exists(): + images.append(image_path) + continue + image_path = images_dir / f"{stem}{ext.upper()}" + if image_path.exists(): + images.append(image_path) + if len(images) < 1: raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image") + if len(images) > 1: + raise ValueError(f"Image ({stem}) is present with multiple extensions. This is forbidden.") + self.images_path.append(images[0]) + self.annotations_path.append(annotation_path) if len(self.images_path) == 0: raise ValueError(f"Could not find any {SUPPORTED_IMAGE_EXTENSIONS} file", f" in {images_dir}") diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py index cdaae8ce2..3f3bb865f 100644 --- a/darwin/dataset/utils.py +++ b/darwin/dataset/utils.py @@ -434,19 +434,29 @@ def get_annotations( # Find all the annotations and their corresponding images invalid_annotation_paths = [] - for annotation_path in annotations_dir.glob("**/*.json"): - darwin_json = parse_darwin_json(annotation_path) - image_path = images_dir / Path(darwin_json.full_path.lstrip('/\\')) - if image_path.exists(): - images_paths.append(image_path) - annotations_paths.append(annotation_path) - continue - else: - if ignore_inconsistent_examples: - invalid_annotation_paths.append(annotation_path) + for stem in stems: + annotation_path = annotations_dir / f"{stem}.json" + images = [] + for ext in SUPPORTED_EXTENSIONS: + image_path = images_dir / f"{stem}{ext}" + if image_path.exists(): + images.append(image_path) continue - else: - raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image") + image_path = images_dir / f"{stem}{ext.upper()}" + if image_path.exists(): + images.append(image_path) + + image_count = len(images) + if image_count != 1 and ignore_inconsistent_examples: + invalid_annotation_paths.append(annotation_path) + continue + elif image_count < 1: + raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image") + elif image_count > 1: + raise ValueError(f"Image ({stem}) is present with multiple extensions. This is forbidden.") + + images_paths.append(images[0]) + annotations_paths.append(annotation_path) print(f"Found {len(invalid_annotation_paths)} invalid annotations") for p in invalid_annotation_paths: