Skip to content

Commit

Permalink
Allow import of multi-array COCO segmentations
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie committed Jan 2, 2025
1 parent acf3178 commit 314ec61
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 19 deletions.
40 changes: 21 additions & 19 deletions darwin/importer/formats/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def parse_json(
annotation["segmentation"]
if image_id not in image_annotations:
image_annotations[image_id] = []
image_annotations[image_id].append(
image_annotations[image_id].extend(
parse_annotation(annotation, category_lookup_table)
)

Expand All @@ -105,7 +105,7 @@ def parse_json(
def parse_annotation(
annotation: Dict[str, dt.UnknownType],
category_lookup_table: Dict[str, dt.UnknownType],
) -> Optional[dt.Annotation]:
) -> List[dt.Annotation]:
"""
Parses the given ``json`` dictionary into a darwin ``Annotation`` if possible.
Expand All @@ -128,20 +128,20 @@ def parse_annotation(
if iscrowd:
logger.warn(
f"Skipping annotation {annotation.get('id')} because it is a crowd "
"annotation, and Darwin does not support import of crowd annotations."
"annotation, and Darwin does not support import of COCO crowd annotations."
)
return None
return []

if len(segmentation) == 0 and len(annotation["bbox"]) == 4:
x, y, w, h = map(int, annotation["bbox"])
return dt.make_bounding_box(category["name"], x, y, w, h)
return [dt.make_bounding_box(category["name"], x, y, w, h)]
elif (
len(segmentation) == 0
and len(annotation["bbox"]) == 1
and len(annotation["bbox"][0]) == 4
):
x, y, w, h = map(int, annotation["bbox"][0])
return dt.make_bounding_box(category["name"], x, y, w, h)
return [dt.make_bounding_box(category["name"], x, y, w, h)]
elif isinstance(segmentation, dict):
logger.warn(
"warning, converting complex coco rle mask to polygon, could take some time"
Expand All @@ -167,21 +167,23 @@ def parse_annotation(
except StopIteration:
break
paths.append(path)
return dt.make_polygon(category["name"], paths)
return [dt.make_polygon(category["name"], paths)]
elif isinstance(segmentation, list):
path = []
points = iter(
segmentation[0] if isinstance(segmentation[0], list) else segmentation
)
while True:
try:
x, y = next(points), next(points)
path.append({"x": x, "y": y})
except StopIteration:
break
return dt.make_polygon(category["name"], path)
paths = segmentation if isinstance(segmentation[0], list) else [segmentation]
polygons = []
for path in paths:
point_path = []
points = iter(path)
while True:
try:
x, y = next(points), next(points)
point_path.append({"x": x, "y": y})
except StopIteration:
break
polygons.append(dt.make_polygon(category["name"], point_path))
return polygons
else:
return None
return []


def _decode_file(current_encoding: str, path: Path):
Expand Down
92 changes: 92 additions & 0 deletions tests/darwin/importer/formats/import_coco_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from typing import Dict, Any

import darwin.datatypes as dt
from darwin.importer.formats.coco import parse_annotation


def test_parse_annotation_single_polygon():
"""Test parsing a single polygon segmentation"""
annotation = {
"segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]],
"category_id": "1",
"bbox": [10, 10, 10, 10],
"iscrowd": 0,
}
category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}

result = parse_annotation(annotation, category_lookup)

assert len(result) == 1
assert isinstance(result[0], dt.Annotation)
assert result[0].annotation_class.name == "test_class"
assert len(result[0].data["paths"]) == 1
path = result[0].data["paths"][0]
assert len(path) == 4
assert path[0] == {"x": 10, "y": 10}
assert path[2] == {"x": 20, "y": 20}


def test_parse_annotation_multiple_polygons():
"""Test parsing segmentation with multiple polygons"""
annotation = {
"segmentation": [
[10, 10, 20, 10, 20, 20, 10, 20],
[30, 30, 40, 30, 40, 40, 30, 40],
],
"category_id": "1",
"bbox": [10, 10, 30, 30],
"iscrowd": 0,
}
category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}

result = parse_annotation(annotation, category_lookup)

assert len(result) == 2
assert all(isinstance(r, dt.Annotation) for r in result)
assert all(r.annotation_class.name == "test_class" for r in result)

path1 = result[0].data["paths"][0]
assert len(path1) == 4
assert path1[0] == {"x": 10, "y": 10}
assert path1[2] == {"x": 20, "y": 20}

path2 = result[1].data["paths"][0]
assert len(path2) == 4
assert path2[0] == {"x": 30, "y": 30}
assert path2[2] == {"x": 40, "y": 40}


def test_parse_annotation_bounding_box():
"""Test parsing a bounding box annotation"""
annotation = {
"segmentation": [],
"category_id": "1",
"bbox": [10, 20, 30, 40],
"iscrowd": 0,
}
category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}

result = parse_annotation(annotation, category_lookup)

assert len(result) == 1
assert isinstance(result[0], dt.Annotation)
assert result[0].annotation_class.name == "test_class"
assert result[0].data["x"] == 10
assert result[0].data["y"] == 20
assert result[0].data["w"] == 30
assert result[0].data["h"] == 40


def test_parse_annotation_crowd():
"""Test that crowd annotations are skipped"""
annotation = {
"segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]],
"category_id": "1",
"bbox": [10, 10, 10, 10],
"iscrowd": 1,
}
category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}

result = parse_annotation(annotation, category_lookup)

assert len(result) == 0

0 comments on commit 314ec61

Please sign in to comment.