IO-496: Request: Add support for importing polygons in Dataloop format (

#497) * Adds support for Dataloop polygons in dataloop video format, for simple polygons only * Adds tests for entire Dataloop importer, to avoid future regressions Co-authored-by: Owen Jones <[email protected]>
v7labs · Dec 5, 2022 · ff6e0cf · ff6e0cf
1 parent 9b64d8f
commit ff6e0cf
Show file tree

Hide file tree

Showing 8 changed files with 597 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,7 @@ __pycache__/
 
 output/
 data/
+!tests/darwin/data
 darwin_py.egg-info/PKG-INFO
 
 *.png

diff --git a/darwin/cli.py b/darwin/cli.py
@@ -2,7 +2,10 @@
 
 import getpass
 import os
+import platform
 from argparse import ArgumentParser, Namespace
+from datetime import datetime
+from json import dumps
 
 import requests.exceptions
 
@@ -40,6 +43,21 @@ def main() -> None:
         f._error("The team specified is not in the configuration, please authenticate first.")
     except requests.exceptions.ConnectionError:
         f._error("Darwin seems unreachable, please try again in a minute or contact support.")
+    except Exception as e:  # Catch unhandled exceptions
+        filename = f"darwin_error_{datetime.now().timestamp()}.log"
+
+        fd = open(filename, "w")
+        fd.write("Darwin CLI error log")
+        fd.write(f"Version: {__version__}")
+        fd.write(f"OS: {platform.platform()}")
+        fd.write(f"Command: {dumps(args, check_circular=True)}")
+        fd.write(f"Error: {dumps(e, check_circular=True)}")
+        fd.close()
+
+        f._error(
+            "An unexpected error occurred, errors have been written to {filename}, please contact support, and send them the file."
+            + str(e)
+        )
 
 
 def _run(args: Namespace, parser: ArgumentParser) -> None:

diff --git a/darwin/exceptions.py b/darwin/exceptions.py
@@ -112,7 +112,7 @@ class Unauthorized(Exception):
     """
 
     def __str__(self):
-        return f"Unauthorized"
+        return "Unauthorized"
 
 
 class OutdatedDarwinJSONFormat(Exception):
@@ -142,3 +142,43 @@ def __init__(self, version: str):
 
     def __str__(self):
         return f"Unknown version: '{self.version}'"
+
+
+class UnsupportedImportAnnotationType(Exception):
+    """
+    Used when one tries to parse an annotation with an unsupported type.
+    """
+
+    def __init__(self, import_type: str, annotation_type: str):
+        """
+        Parameters
+        ----------
+        import_type: str
+            The type of import, e.g. "dataloop".
+        annotation_type: str
+            The unsupported annotation type.
+        """
+        super().__init__(
+            f"Unsupported annotation type {annotation_type} for {import_type} import"
+        )
+        self.import_type = import_type
+        self.annotation_type = annotation_type
+
+
+class DataloopComplexPolygonsNotYetSupported(Exception):
+    """
+    Used when one tries to parse an annotation with a complex polygon.
+    """
+
+    def __init__(
+        self,
+    ):
+        """
+        Parameters
+        ----------
+        import_type: str
+            The type of import, e.g. "dataloop".
+        annotation_type: str
+            The unsupported annotation type.
+        """
+        super().__init__("Complex polygons not yet supported for dataloop import")
diff --git a/darwin/importer/formats/dataloop.py b/darwin/importer/formats/dataloop.py
@@ -3,6 +3,10 @@
 from typing import Any, Dict, List, Optional, Set
 
 import darwin.datatypes as dt
+from darwin.exceptions import (
+    DataloopComplexPolygonsNotYetSupported,
+    UnsupportedImportAnnotationType,
+)
 
 
 def parse_path(path: Path) -> Optional[dt.AnnotationFile]:
@@ -25,10 +29,18 @@ def parse_path(path: Path) -> Optional[dt.AnnotationFile]:
         return None
     with path.open() as f:
         data = json.load(f)
-        annotations: List[dt.Annotation] = list(filter(None, map(_parse_annotation, data["annotations"])))
-        annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations])
+        annotations: List[dt.Annotation] = list(
+            filter(None, map(_parse_annotation, data["annotations"]))
+        )
+        annotation_classes: Set[dt.AnnotationClass] = set(
+            [annotation.annotation_class for annotation in annotations]
+        )
         return dt.AnnotationFile(
-            path, _remove_leading_slash(data["filename"]), annotation_classes, annotations, remote_path="/"
+            path,
+            _remove_leading_slash(data["filename"]),
+            annotation_classes,
+            annotations,
+            remote_path="/",
         )
 
 
@@ -42,8 +54,8 @@ def _remove_leading_slash(filename: str) -> str:
 def _parse_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotation]:
     annotation_type = annotation["type"]
     annotation_label = annotation["label"]
-    if annotation_type not in ["box", "class"]:
-        raise ValueError(f"Unknown supported annotation type: {annotation_type}")
+    if annotation_type not in ["box", "class", "segment"]:
+        raise UnsupportedImportAnnotationType("dataloop", annotation_type)
 
     if len(annotation["metadata"]["system"].get("snapshots_", [])) > 1:
         raise ValueError("multiple snapshots per annotations are not supported")
@@ -58,4 +70,12 @@ def _parse_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotation]:
         x2, y2 = coords[1]["x"], coords[1]["y"]
         return dt.make_bounding_box(annotation_label, x1, y1, x2 - x1, y2 - y1)
 
+    if annotation_type == "segment":
+        coords = annotation["coordinates"]
+        if len(coords) != 1:
+            raise DataloopComplexPolygonsNotYetSupported()
+
+        points: List[dt.Point] = [{"x": c["x"], "y": c["y"]} for c in coords[0]]
+        return dt.make_polygon(annotation_label, point_path=points)
+
     return None
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 160
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 with open(Path(__file__).parent / "darwin" / "version" / "__init__.py", "r") as f:
     content = f.read()
     # from https://www.py4u.net/discuss/139845
-    version = re.search(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', content).group(1)
+    version = re.search(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', content).group(1)  # type: ignore
 
 with open("README.md", "rb") as f:
     long_description = f.read().decode("utf-8")
@@ -37,6 +37,7 @@
     ],
     extras_require={
         "test": ["responses", "pytest", "pytest-describe", "scikit-learn"],
+        "dev": ["black", "flake8", "isort", "mypy", "responses", "pytest", "pytest-describe", "scikit-learn"],
         "ml": ["scikit-learn", "torch", "torchvision"],
         "medical": ["nibabel", "connected-components-3d"],
     },
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,7 @@ __pycache__/ @@
     output/
     data/
+    !tests/darwin/data
     darwin_py.egg-info/PKG-INFO
     *.png
@@ Expand Down @@