Skip to content

Commit

Permalink
Undo accidental changes meant as part of IO-1445
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie committed Oct 18, 2023
1 parent 760e49c commit 8b0e213
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 20 deletions.
24 changes: 16 additions & 8 deletions darwin/dataset/local_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,23 @@ def __init__(
stems = build_stems(release_path, annotations_dir, annotation_type, split, partition, split_type)

# Find all the annotations and their corresponding images
for annotation_path in sorted(annotations_dir.glob("**/*.json")):
darwin_json = parse_darwin_json(annotation_path)
image_path = images_dir / Path(darwin_json.full_path.lstrip('/\\'))
if image_path.exists():
self.images_path.append(image_path)
self.annotations_path.append(annotation_path)
continue
else:
for stem in stems:
annotation_path = annotations_dir / f"{stem}.json"
images = []
for ext in SUPPORTED_IMAGE_EXTENSIONS:
image_path = images_dir / f"{stem}{ext}"
if image_path.exists():
images.append(image_path)
continue
image_path = images_dir / f"{stem}{ext.upper()}"
if image_path.exists():
images.append(image_path)
if len(images) < 1:
raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image")
if len(images) > 1:
raise ValueError(f"Image ({stem}) is present with multiple extensions. This is forbidden.")
self.images_path.append(images[0])
self.annotations_path.append(annotation_path)

if len(self.images_path) == 0:
raise ValueError(f"Could not find any {SUPPORTED_IMAGE_EXTENSIONS} file", f" in {images_dir}")
Expand Down
34 changes: 22 additions & 12 deletions darwin/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,19 +434,29 @@ def get_annotations(

# Find all the annotations and their corresponding images
invalid_annotation_paths = []
for annotation_path in annotations_dir.glob("**/*.json"):
darwin_json = parse_darwin_json(annotation_path)
image_path = images_dir / Path(darwin_json.full_path.lstrip('/\\'))
if image_path.exists():
images_paths.append(image_path)
annotations_paths.append(annotation_path)
continue
else:
if ignore_inconsistent_examples:
invalid_annotation_paths.append(annotation_path)
for stem in stems:
annotation_path = annotations_dir / f"{stem}.json"
images = []
for ext in SUPPORTED_EXTENSIONS:
image_path = images_dir / f"{stem}{ext}"
if image_path.exists():
images.append(image_path)
continue
else:
raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image")
image_path = images_dir / f"{stem}{ext.upper()}"
if image_path.exists():
images.append(image_path)

image_count = len(images)
if image_count != 1 and ignore_inconsistent_examples:
invalid_annotation_paths.append(annotation_path)
continue
elif image_count < 1:
raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image")
elif image_count > 1:
raise ValueError(f"Image ({stem}) is present with multiple extensions. This is forbidden.")

images_paths.append(images[0])
annotations_paths.append(annotation_path)

print(f"Found {len(invalid_annotation_paths)} invalid annotations")
for p in invalid_annotation_paths:
Expand Down

0 comments on commit 8b0e213

Please sign in to comment.