Allow import of multi-array COCO segmentations

v7labs · Jan 2, 2025 · 314ec61 · 314ec61
1 parent acf3178
commit 314ec61
Show file tree

Hide file tree

Showing 2 changed files with 113 additions and 19 deletions.
diff --git a/darwin/importer/formats/coco.py b/darwin/importer/formats/coco.py
@@ -88,7 +88,7 @@ def parse_json(
         annotation["segmentation"]
         if image_id not in image_annotations:
             image_annotations[image_id] = []
-        image_annotations[image_id].append(
+        image_annotations[image_id].extend(
             parse_annotation(annotation, category_lookup_table)
         )
 
@@ -105,7 +105,7 @@ def parse_json(
 def parse_annotation(
     annotation: Dict[str, dt.UnknownType],
     category_lookup_table: Dict[str, dt.UnknownType],
-) -> Optional[dt.Annotation]:
+) -> List[dt.Annotation]:
     """
     Parses the given ``json`` dictionary into a darwin ``Annotation`` if possible.
 
@@ -128,20 +128,20 @@ def parse_annotation(
     if iscrowd:
         logger.warn(
             f"Skipping annotation {annotation.get('id')} because it is a crowd "
-            "annotation, and Darwin does not support import of crowd annotations."
+            "annotation, and Darwin does not support import of COCO crowd annotations."
         )
-        return None
+        return []
 
     if len(segmentation) == 0 and len(annotation["bbox"]) == 4:
         x, y, w, h = map(int, annotation["bbox"])
-        return dt.make_bounding_box(category["name"], x, y, w, h)
+        return [dt.make_bounding_box(category["name"], x, y, w, h)]
     elif (
         len(segmentation) == 0
         and len(annotation["bbox"]) == 1
         and len(annotation["bbox"][0]) == 4
     ):
         x, y, w, h = map(int, annotation["bbox"][0])
-        return dt.make_bounding_box(category["name"], x, y, w, h)
+        return [dt.make_bounding_box(category["name"], x, y, w, h)]
     elif isinstance(segmentation, dict):
         logger.warn(
             "warning, converting complex coco rle mask to polygon, could take some time"
@@ -167,21 +167,23 @@ def parse_annotation(
                 except StopIteration:
                     break
             paths.append(path)
-        return dt.make_polygon(category["name"], paths)
+        return [dt.make_polygon(category["name"], paths)]
     elif isinstance(segmentation, list):
-        path = []
-        points = iter(
-            segmentation[0] if isinstance(segmentation[0], list) else segmentation
-        )
-        while True:
-            try:
-                x, y = next(points), next(points)
-                path.append({"x": x, "y": y})
-            except StopIteration:
-                break
-        return dt.make_polygon(category["name"], path)
+        paths = segmentation if isinstance(segmentation[0], list) else [segmentation]
+        polygons = []
+        for path in paths:
+            point_path = []
+            points = iter(path)
+            while True:
+                try:
+                    x, y = next(points), next(points)
+                    point_path.append({"x": x, "y": y})
+                except StopIteration:
+                    break
+            polygons.append(dt.make_polygon(category["name"], point_path))
+        return polygons
     else:
-        return None
+        return []
 
 
 def _decode_file(current_encoding: str, path: Path):

diff --git a/tests/darwin/importer/formats/import_coco_test.py b/tests/darwin/importer/formats/import_coco_test.py
@@ -0,0 +1,92 @@
+from typing import Dict, Any
+
+import darwin.datatypes as dt
+from darwin.importer.formats.coco import parse_annotation
+
+
+def test_parse_annotation_single_polygon():
+    """Test parsing a single polygon segmentation"""
+    annotation = {
+        "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]],
+        "category_id": "1",
+        "bbox": [10, 10, 10, 10],
+        "iscrowd": 0,
+    }
+    category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}
+
+    result = parse_annotation(annotation, category_lookup)
+
+    assert len(result) == 1
+    assert isinstance(result[0], dt.Annotation)
+    assert result[0].annotation_class.name == "test_class"
+    assert len(result[0].data["paths"]) == 1
+    path = result[0].data["paths"][0]
+    assert len(path) == 4
+    assert path[0] == {"x": 10, "y": 10}
+    assert path[2] == {"x": 20, "y": 20}
+
+
+def test_parse_annotation_multiple_polygons():
+    """Test parsing segmentation with multiple polygons"""
+    annotation = {
+        "segmentation": [
+            [10, 10, 20, 10, 20, 20, 10, 20],
+            [30, 30, 40, 30, 40, 40, 30, 40],
+        ],
+        "category_id": "1",
+        "bbox": [10, 10, 30, 30],
+        "iscrowd": 0,
+    }
+    category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}
+
+    result = parse_annotation(annotation, category_lookup)
+
+    assert len(result) == 2
+    assert all(isinstance(r, dt.Annotation) for r in result)
+    assert all(r.annotation_class.name == "test_class" for r in result)
+
+    path1 = result[0].data["paths"][0]
+    assert len(path1) == 4
+    assert path1[0] == {"x": 10, "y": 10}
+    assert path1[2] == {"x": 20, "y": 20}
+
+    path2 = result[1].data["paths"][0]
+    assert len(path2) == 4
+    assert path2[0] == {"x": 30, "y": 30}
+    assert path2[2] == {"x": 40, "y": 40}
+
+
+def test_parse_annotation_bounding_box():
+    """Test parsing a bounding box annotation"""
+    annotation = {
+        "segmentation": [],
+        "category_id": "1",
+        "bbox": [10, 20, 30, 40],
+        "iscrowd": 0,
+    }
+    category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}
+
+    result = parse_annotation(annotation, category_lookup)
+
+    assert len(result) == 1
+    assert isinstance(result[0], dt.Annotation)
+    assert result[0].annotation_class.name == "test_class"
+    assert result[0].data["x"] == 10
+    assert result[0].data["y"] == 20
+    assert result[0].data["w"] == 30
+    assert result[0].data["h"] == 40
+
+
+def test_parse_annotation_crowd():
+    """Test that crowd annotations are skipped"""
+    annotation = {
+        "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]],
+        "category_id": "1",
+        "bbox": [10, 10, 10, 10],
+        "iscrowd": 1,
+    }
+    category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}
+
+    result = parse_annotation(annotation, category_lookup)
+
+    assert len(result) == 0