From cfce1bc9268428f82674cbbd611a7b1079dcc4dd Mon Sep 17 00:00:00 2001 From: Christoffer Date: Tue, 14 Nov 2023 19:28:31 +0100 Subject: [PATCH] added support RemoteDatasetV1 parsing and updated tests --- darwin/dataset/remote_dataset.py | 5 +- darwin/dataset/remote_dataset_v1.py | 6 +- darwin/dataset/remote_dataset_v2.py | 6 +- darwin/exporter/formats/darwin_1_0.py | 73 ++++++++++++++++++++- tests/darwin/dataset/remote_dataset_test.py | 10 +-- 5 files changed, 89 insertions(+), 11 deletions(-) diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index c84d81394..59c04df03 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -159,7 +159,7 @@ def split_video_annotations(self, release_name: str = "latest") -> None: frame_annotations = split_video_annotation(darwin_annotation) for frame_annotation in frame_annotations: - annotation = build_image_annotation(frame_annotation) + annotation = self._build_image_annotation(frame_annotation) video_frame_annotations_path = annotations_path / annotation_file.stem video_frame_annotations_path.mkdir(exist_ok=True, parents=True) @@ -894,3 +894,6 @@ def local_images_path(self) -> Path: def identifier(self) -> DatasetIdentifier: """The ``DatasetIdentifier`` of this ``RemoteDataset``.""" return DatasetIdentifier(team_slug=self.team, dataset_slug=self.slug) + + def _build_image_annotation(self, annotation_file: AnnotationFile) -> Dict[str, Any]: + return build_image_annotation(annotation_file) \ No newline at end of file diff --git a/darwin/dataset/remote_dataset_v1.py b/darwin/dataset/remote_dataset_v1.py index 2872629bd..d69f1d58a 100644 --- a/darwin/dataset/remote_dataset_v1.py +++ b/darwin/dataset/remote_dataset_v1.py @@ -13,8 +13,9 @@ UploadHandlerV1, ) from darwin.dataset.utils import is_relative_to -from darwin.datatypes import ItemId, PathLike +from darwin.datatypes import AnnotationFile, ItemId, PathLike from darwin.exceptions import NotFound, ValidationError +from darwin.exporter.formats.darwin_1_0 import build_image_annotation from darwin.item import DatasetItem from darwin.item_sorter import ItemSorter from darwin.utils import find_files, urljoin @@ -457,3 +458,6 @@ def import_annotation(self, item_id: ItemId, payload: Dict[str, Any]) -> None: """ self.client.import_annotation(item_id, payload=payload) + + def _build_image_annotation(self, annotation_file: AnnotationFile) -> Dict[str, Any]: + return build_image_annotation(annotation_file) \ No newline at end of file diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 32555d4aa..a148e4cae 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -22,8 +22,9 @@ UploadHandlerV2, ) from darwin.dataset.utils import is_relative_to -from darwin.datatypes import ItemId, PathLike +from darwin.datatypes import AnnotationFile, ItemId, PathLike from darwin.exceptions import NotFound, UnknownExportVersion +from darwin.exporter.formats.darwin import build_image_annotation from darwin.item import DatasetItem from darwin.item_sorter import ItemSorter from darwin.utils import find_files, urljoin @@ -478,3 +479,6 @@ def _fetch_stages(self, stage_type): workflow_id = workflow_ids[0] workflow = self.client.api_v2.get_workflow(workflow_id, team_slug=self.team) return (workflow_id, [stage for stage in workflow["stages"] if stage["type"] == stage_type]) + + def _build_image_annotation(self, annotation_file: AnnotationFile) -> Dict[str, Any]: + return build_image_annotation(annotation_file) \ No newline at end of file diff --git a/darwin/exporter/formats/darwin_1_0.py b/darwin/exporter/formats/darwin_1_0.py index 28744817d..16fe8cd76 100644 --- a/darwin/exporter/formats/darwin_1_0.py +++ b/darwin/exporter/formats/darwin_1_0.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Iterable, List, Union +from typing import Any, Dict, Iterable, List, Union import orjson as json @@ -190,3 +190,74 @@ def _build_metadata(annotation_file: AnnotationFile) -> DictFreeForm: return {"metadata": annotation_file.slots[0].metadata} else: return {} + + +def build_image_annotation(annotation_file: AnnotationFile) -> Dict[str, Any]: + """ + Builds and returns a dictionary with the annotations present in the given file. + + Parameters + ---------- + annotation_file: dt.AnnotationFile + File with the image annotations to extract. + + Returns + ------- + Dict[str, Any] + A dictionary with the annotation from the given file. Has the following structure: + + .. code-block:: python + + { + "annotations": [ + { + "annotation_type": { ... }, # annotation_data + "name": "annotation class name", + "bounding_box": { ... } # Optional parameter, only present if the file has a bounding box as well + } + ], + "image": { + "filename": "a_file_name.json", + "height": 1000, + "width": 2000, + "url": "https://www.darwin.v7labs.com/..." + } + } + """ + annotations: List[Dict[str, Any]] = [] + for annotation in annotation_file.annotations: + payload = { + annotation.annotation_class.annotation_type: _build_annotation_data( + annotation + ), + "name": annotation.annotation_class.name, + } + + if ( + annotation.annotation_class.annotation_type == "complex_polygon" + or annotation.annotation_class.annotation_type == "polygon" + ) and "bounding_box" in annotation.data: + payload["bounding_box"] = annotation.data["bounding_box"] + + annotations.append(payload) + + return { + "annotations": annotations, + "image": { + "filename": annotation_file.filename, + "height": annotation_file.image_height, + "width": annotation_file.image_width, + "url": annotation_file.image_url, + }, + } + +def _build_annotation_data(annotation: Annotation) -> Dict[str, Any]: + if annotation.annotation_class.annotation_type == "complex_polygon": + return {"path": annotation.data["paths"]} + + if annotation.annotation_class.annotation_type == "polygon": + return dict( + filter(lambda item: item[0] != "bounding_box", annotation.data.items()) + ) + + return dict(annotation.data) diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index b81dfe29c..0788a4bc7 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -348,8 +348,10 @@ def test_works_on_videos( ) assert video_path.exists() + print(list(video_path.iterdir())) + assert (video_path / "0000000.json").exists() - assert (video_path / "0000001.json").exists() + assert not (video_path / "0000001.json").exists() assert (video_path / "0000002.json").exists() assert not (video_path / "0000003.json").exists() @@ -361,12 +363,6 @@ def test_works_on_videos( "image": {"filename": "test_video/0000000.png", "height": 1080, "url": "frame_1.jpg", "width": 1920}, } - with (video_path / "0000001.json").open() as f: - assert json.loads(f.read()) == { - "annotations": [], - "image": {"filename": "test_video/0000001.png", "height": 1080, "url": "frame_2.jpg", "width": 1920}, - } - with (video_path / "0000002.json").open() as f: assert json.loads(f.read()) == { "annotations": [