From 314ec61cd6e35647b1ff23e68c1aeccdd33668d9 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Thu, 2 Jan 2025 22:29:07 +0000
Subject: [PATCH 1/2] Allow import of multi-array COCO segmentations

---
 darwin/importer/formats/coco.py               | 40 ++++----
 .../importer/formats/import_coco_test.py      | 92 +++++++++++++++++++
 2 files changed, 113 insertions(+), 19 deletions(-)
 create mode 100644 tests/darwin/importer/formats/import_coco_test.py

diff --git a/darwin/importer/formats/coco.py b/darwin/importer/formats/coco.py
index 616a4eeb5..5d7190345 100644
--- a/darwin/importer/formats/coco.py
+++ b/darwin/importer/formats/coco.py
@@ -88,7 +88,7 @@ def parse_json(
         annotation["segmentation"]
         if image_id not in image_annotations:
             image_annotations[image_id] = []
-        image_annotations[image_id].append(
+        image_annotations[image_id].extend(
             parse_annotation(annotation, category_lookup_table)
         )
 
@@ -105,7 +105,7 @@ def parse_json(
 def parse_annotation(
     annotation: Dict[str, dt.UnknownType],
     category_lookup_table: Dict[str, dt.UnknownType],
-) -> Optional[dt.Annotation]:
+) -> List[dt.Annotation]:
     """
     Parses the given ``json`` dictionary into a darwin ``Annotation`` if possible.
 
@@ -128,20 +128,20 @@ def parse_annotation(
     if iscrowd:
         logger.warn(
             f"Skipping annotation {annotation.get('id')} because it is a crowd "
-            "annotation, and Darwin does not support import of crowd annotations."
+            "annotation, and Darwin does not support import of COCO crowd annotations."
         )
-        return None
+        return []
 
     if len(segmentation) == 0 and len(annotation["bbox"]) == 4:
         x, y, w, h = map(int, annotation["bbox"])
-        return dt.make_bounding_box(category["name"], x, y, w, h)
+        return [dt.make_bounding_box(category["name"], x, y, w, h)]
     elif (
         len(segmentation) == 0
         and len(annotation["bbox"]) == 1
         and len(annotation["bbox"][0]) == 4
     ):
         x, y, w, h = map(int, annotation["bbox"][0])
-        return dt.make_bounding_box(category["name"], x, y, w, h)
+        return [dt.make_bounding_box(category["name"], x, y, w, h)]
     elif isinstance(segmentation, dict):
         logger.warn(
             "warning, converting complex coco rle mask to polygon, could take some time"
@@ -167,21 +167,23 @@ def parse_annotation(
                 except StopIteration:
                     break
             paths.append(path)
-        return dt.make_polygon(category["name"], paths)
+        return [dt.make_polygon(category["name"], paths)]
     elif isinstance(segmentation, list):
-        path = []
-        points = iter(
-            segmentation[0] if isinstance(segmentation[0], list) else segmentation
-        )
-        while True:
-            try:
-                x, y = next(points), next(points)
-                path.append({"x": x, "y": y})
-            except StopIteration:
-                break
-        return dt.make_polygon(category["name"], path)
+        paths = segmentation if isinstance(segmentation[0], list) else [segmentation]
+        polygons = []
+        for path in paths:
+            point_path = []
+            points = iter(path)
+            while True:
+                try:
+                    x, y = next(points), next(points)
+                    point_path.append({"x": x, "y": y})
+                except StopIteration:
+                    break
+            polygons.append(dt.make_polygon(category["name"], point_path))
+        return polygons
     else:
-        return None
+        return []
 
 
 def _decode_file(current_encoding: str, path: Path):
diff --git a/tests/darwin/importer/formats/import_coco_test.py b/tests/darwin/importer/formats/import_coco_test.py
new file mode 100644
index 000000000..ce8e6893b
--- /dev/null
+++ b/tests/darwin/importer/formats/import_coco_test.py
@@ -0,0 +1,92 @@
+from typing import Dict, Any
+
+import darwin.datatypes as dt
+from darwin.importer.formats.coco import parse_annotation
+
+
+def test_parse_annotation_single_polygon():
+    """Test parsing a single polygon segmentation"""
+    annotation = {
+        "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]],
+        "category_id": "1",
+        "bbox": [10, 10, 10, 10],
+        "iscrowd": 0,
+    }
+    category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}
+
+    result = parse_annotation(annotation, category_lookup)
+
+    assert len(result) == 1
+    assert isinstance(result[0], dt.Annotation)
+    assert result[0].annotation_class.name == "test_class"
+    assert len(result[0].data["paths"]) == 1
+    path = result[0].data["paths"][0]
+    assert len(path) == 4
+    assert path[0] == {"x": 10, "y": 10}
+    assert path[2] == {"x": 20, "y": 20}
+
+
+def test_parse_annotation_multiple_polygons():
+    """Test parsing segmentation with multiple polygons"""
+    annotation = {
+        "segmentation": [
+            [10, 10, 20, 10, 20, 20, 10, 20],
+            [30, 30, 40, 30, 40, 40, 30, 40],
+        ],
+        "category_id": "1",
+        "bbox": [10, 10, 30, 30],
+        "iscrowd": 0,
+    }
+    category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}
+
+    result = parse_annotation(annotation, category_lookup)
+
+    assert len(result) == 2
+    assert all(isinstance(r, dt.Annotation) for r in result)
+    assert all(r.annotation_class.name == "test_class" for r in result)
+
+    path1 = result[0].data["paths"][0]
+    assert len(path1) == 4
+    assert path1[0] == {"x": 10, "y": 10}
+    assert path1[2] == {"x": 20, "y": 20}
+
+    path2 = result[1].data["paths"][0]
+    assert len(path2) == 4
+    assert path2[0] == {"x": 30, "y": 30}
+    assert path2[2] == {"x": 40, "y": 40}
+
+
+def test_parse_annotation_bounding_box():
+    """Test parsing a bounding box annotation"""
+    annotation = {
+        "segmentation": [],
+        "category_id": "1",
+        "bbox": [10, 20, 30, 40],
+        "iscrowd": 0,
+    }
+    category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}
+
+    result = parse_annotation(annotation, category_lookup)
+
+    assert len(result) == 1
+    assert isinstance(result[0], dt.Annotation)
+    assert result[0].annotation_class.name == "test_class"
+    assert result[0].data["x"] == 10
+    assert result[0].data["y"] == 20
+    assert result[0].data["w"] == 30
+    assert result[0].data["h"] == 40
+
+
+def test_parse_annotation_crowd():
+    """Test that crowd annotations are skipped"""
+    annotation = {
+        "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]],
+        "category_id": "1",
+        "bbox": [10, 10, 10, 10],
+        "iscrowd": 1,
+    }
+    category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}
+
+    result = parse_annotation(annotation, category_lookup)
+
+    assert len(result) == 0

From 8ec70d0cee690c652fb2fee2c4f96e765f8130f4 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Mon, 6 Jan 2025 10:49:43 +0000
Subject: [PATCH 2/2] Support for complex polygon import

---
 darwin/importer/formats/coco.py                   |  6 +++---
 tests/darwin/importer/formats/import_coco_test.py | 13 +++++++------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/darwin/importer/formats/coco.py b/darwin/importer/formats/coco.py
index 5d7190345..1dd3e5725 100644
--- a/darwin/importer/formats/coco.py
+++ b/darwin/importer/formats/coco.py
@@ -170,7 +170,7 @@ def parse_annotation(
         return [dt.make_polygon(category["name"], paths)]
     elif isinstance(segmentation, list):
         paths = segmentation if isinstance(segmentation[0], list) else [segmentation]
-        polygons = []
+        point_paths = []
         for path in paths:
             point_path = []
             points = iter(path)
@@ -180,8 +180,8 @@ def parse_annotation(
                     point_path.append({"x": x, "y": y})
                 except StopIteration:
                     break
-            polygons.append(dt.make_polygon(category["name"], point_path))
-        return polygons
+            point_paths.append(point_path)
+        return [dt.make_polygon(category["name"], point_paths)]
     else:
         return []
 
diff --git a/tests/darwin/importer/formats/import_coco_test.py b/tests/darwin/importer/formats/import_coco_test.py
index ce8e6893b..ff9e53cd9 100644
--- a/tests/darwin/importer/formats/import_coco_test.py
+++ b/tests/darwin/importer/formats/import_coco_test.py
@@ -26,8 +26,8 @@ def test_parse_annotation_single_polygon():
     assert path[2] == {"x": 20, "y": 20}
 
 
-def test_parse_annotation_multiple_polygons():
-    """Test parsing segmentation with multiple polygons"""
+def test_parse_annotation_multiple_paths():
+    """Test parsing segmentation with multiple paths in a single polygon"""
     annotation = {
         "segmentation": [
             [10, 10, 20, 10, 20, 20, 10, 20],
@@ -41,16 +41,17 @@ def test_parse_annotation_multiple_polygons():
 
     result = parse_annotation(annotation, category_lookup)
 
-    assert len(result) == 2
-    assert all(isinstance(r, dt.Annotation) for r in result)
-    assert all(r.annotation_class.name == "test_class" for r in result)
+    assert len(result) == 1
+    assert isinstance(result[0], dt.Annotation)
+    assert result[0].annotation_class.name == "test_class"
+    assert len(result[0].data["paths"]) == 2
 
     path1 = result[0].data["paths"][0]
     assert len(path1) == 4
     assert path1[0] == {"x": 10, "y": 10}
     assert path1[2] == {"x": 20, "y": 20}
 
-    path2 = result[1].data["paths"][0]
+    path2 = result[0].data["paths"][1]
     assert len(path2) == 4
     assert path2[0] == {"x": 30, "y": 30}
     assert path2[2] == {"x": 40, "y": 40}