diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index 303cb884d..fcfb3cf8d 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -996,24 +996,6 @@ def import_annotation(self, item_id: ItemId, payload: Dict[str, Any]) -> None: """ ... - @abstractmethod - def _get_remote_files_that_require_legacy_scaling(self) -> List[Path]: - """ - Get all remote files that have been scaled upon upload. These files require that - NifTI annotations are similarly scaled during import - - Parameters - ---------- - dataset : RemoteDataset - The remote dataset to get the files from - - Returns - ------- - List[Path] - A list of full remote paths of dataset items that require NifTI annotations to be scaled - """ - ... - @property def remote_path(self) -> Path: """Returns an URL specifying the location of the remote dataset.""" diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 8956a8a3a..e4b56b22a 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -11,7 +11,6 @@ Tuple, Union, ) -import numpy as np from pydantic import ValidationError from requests.models import Response @@ -873,51 +872,6 @@ def register_multi_slotted( print(f"Reistration complete. Check your items in the dataset: {self.slug}") return results - def _get_remote_files_that_require_legacy_scaling( - self, - ) -> Dict[str, Dict[str, Any]]: - """ - Get all remote files that have been scaled upon upload. These files require that - NifTI annotations are similarly scaled during import. - - The in-platform affines are returned for each legacy file, as this is required - to properly re-orient the annotations during import. - - Parameters - ---------- - dataset : RemoteDataset - The remote dataset to get the files from - - Returns - ------- - Dict[str, Dict[str, Any]] - A dictionary of remote file full paths to their slot affine maps - """ - remote_files_that_require_legacy_scaling = {} - remote_files = self.fetch_remote_files( - filters={"statuses": ["new", "annotate", "review", "complete", "archived"]} - ) - for remote_file in remote_files: - if not remote_file.slots[0].get("metadata", {}).get("medical", {}): - continue - if not ( - remote_file.slots[0] - .get("metadata", {}) - .get("medical", {}) - .get("handler") - ): - slot_affine_map = {} - for slot in remote_file.slots: - slot_affine_map[slot["slot_name"]] = np.array( - slot["metadata"]["medical"]["affine"], - dtype=np.float64, - ) - remote_files_that_require_legacy_scaling[ - Path(remote_file.full_path) - ] = slot_affine_map - - return remote_files_that_require_legacy_scaling - def _find_files_to_upload_as_multi_file_items( search_files: List[PathLike], diff --git a/darwin/datatypes.py b/darwin/datatypes.py index 189a6ce94..4525af9f2 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -244,6 +244,55 @@ def get_sub(self, annotation_type: str) -> Optional[SubAnnotation]: return sub return None + def scale_coordinates(self, x_scale: float, y_scale: float) -> None: + """ + Multiplies the coordinates of the annotation by the given values. + + Parameters + ---------- + x_scale : float + Scale factor for x coordinates + y_scale : float + Scale factor for y coordinates + """ + annotation_type = ( + self.annotation_class.annotation_type + if hasattr(self, "annotation_class") + else None + ) + + if annotation_type == "bounding_box": + self.data["x"] *= x_scale + self.data["y"] *= y_scale + self.data["w"] *= x_scale + self.data["h"] *= y_scale + + elif annotation_type == "polygon": + for path in self.data["paths"]: + for point in path: + point["x"] *= x_scale + point["y"] *= y_scale + + elif annotation_type == "ellipse": + self.data["center"]["x"] *= x_scale + self.data["center"]["y"] *= y_scale + self.data["radius"]["x"] *= x_scale + self.data["radius"]["y"] *= y_scale + + elif annotation_type == "line": + for point in self.data["path"]: + point["x"] *= x_scale + point["y"] *= y_scale + + elif annotation_type == "keypoint": + self.data["x"] *= x_scale + self.data["y"] *= y_scale + + elif annotation_type == "skeleton": + for node in self.data["nodes"]: + node["x"] *= x_scale + node["y"] *= y_scale + @dataclass(frozen=False, eq=True) class VideoAnnotation: diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py index 0d4764b84..b9d846c87 100644 --- a/darwin/importer/importer.py +++ b/darwin/importer/importer.py @@ -20,7 +20,7 @@ Tuple, Union, ) - +import numpy as np from darwin.datatypes import ( AnnotationFile, @@ -117,7 +117,9 @@ def _find_and_parse( # noqa: C901 console: Optional[Console] = None, use_multi_cpu: bool = True, cpu_limit: int = 1, - remote_files_that_require_legacy_scaling: Optional[List[Path]] = None, + remote_files_that_require_legacy_scaling: Optional[ + Dict[str, Dict[str, Any]] + ] = None, ) -> Optional[Iterable[dt.AnnotationFile]]: is_console = console is not None @@ -185,6 +187,9 @@ def maybe_console(*args: Union[str, int, float]) -> None: maybe_console("Finished.") # Sometimes we have a list of lists of AnnotationFile, sometimes we have a list of AnnotationFile # We flatten the list of lists + if not parsed_files: + return None + if isinstance(parsed_files, list): if isinstance(parsed_files[0], list): parsed_files = [item for sublist in parsed_files for item in sublist] @@ -1252,21 +1257,34 @@ def import_annotations( # noqa: C901 console.print("Retrieving local annotations ...", style="info") local_files = [] local_files_missing_remotely = [] + + remote_files_targeted_by_import = _get_remote_files_targeted_by_import( + importer, file_paths, dataset, console, use_multi_cpu, cpu_limit + ) + ( + remote_files_that_require_legacy_nifti_scaling, + remote_files_that_require_pixel_to_mm_transform, + ) = _get_remote_medical_file_transform_requirements(remote_files_targeted_by_import) + if importer.__module__ == "darwin.importer.formats.nifti": - remote_files_that_require_legacy_scaling = ( - dataset._get_remote_files_that_require_legacy_scaling() - ) maybe_parsed_files: Optional[Iterable[dt.AnnotationFile]] = _find_and_parse( importer, file_paths, console, use_multi_cpu, cpu_limit, - remote_files_that_require_legacy_scaling, + remote_files_that_require_legacy_nifti_scaling, ) else: maybe_parsed_files: Optional[Iterable[dt.AnnotationFile]] = _find_and_parse( - importer, file_paths, console, use_multi_cpu, cpu_limit + importer, + file_paths, + console, + use_multi_cpu, + cpu_limit, + ) + maybe_parsed_files = _scale_coordinates_by_pixdims( + maybe_parsed_files, remote_files_that_require_pixel_to_mm_transform ) if not maybe_parsed_files: @@ -2312,3 +2330,189 @@ def _split_payloads( payloads.append(current_payload) return payloads + + +def _get_remote_files_targeted_by_import( + importer: Callable[[Path], Union[List[dt.AnnotationFile], dt.AnnotationFile, None]], + file_paths: List[PathLike], + dataset: "RemoteDataset", + console: Optional[Console] = None, + use_multi_cpu: bool = True, + cpu_limit: int = 1, +) -> List[DatasetItem]: + """ + Parses local annotations files for import and returns a list of remote dataset items + targeted by the import. Handles chunking of requests if there are many files to + avoid URL length issues. + + Parameters + ---------- + importer: Callable[[Path], Union[List[dt.AnnotationFile], dt.AnnotationFile, None]] + The importer used to parse local annotation files + file_paths: List[PathLike] + A list of local annotation files to be uploaded + dataset: RemoteDataset + The remote dataset to fetch files from + console: Optional[Console] + The console object + use_multi_cpu: bool + Whether to use multi-CPU processing + cpu_limit: int + The number of CPUs to use for processing + + Returns + ------- + List[DatasetItem] + A list of remote dataset items targeted by the import + + Raises + ------ + ValueError + If no files could be parsed or if the URL becomes too long even with minimum chunk size + """ + maybe_parsed_files = _find_and_parse( + importer, file_paths, console, use_multi_cpu, cpu_limit + ) + if not maybe_parsed_files: + raise ValueError("Not able to parse any files.") + + remote_filenames = list({file.filename for file in maybe_parsed_files}) + remote_filepaths = [file.full_path for file in maybe_parsed_files] + + chunk_size = 100 + all_remote_files: List[DatasetItem] = [] + while chunk_size > 0: + try: + for i in range(0, len(remote_filenames), chunk_size): + chunk = remote_filenames[i : i + chunk_size] + remote_files = dataset.fetch_remote_files(filters={"item_names": chunk}) + all_remote_files.extend(remote_files) + break + except RequestEntitySizeExceeded: + chunk_size -= 8 + if chunk_size <= 0: + raise ValueError( + "Unable to fetch remote file list - URL too long even with minimum chunk size." + ) + return [ + remote_file + for remote_file in all_remote_files + if remote_file.full_path in remote_filepaths + ] + + +def _get_remote_medical_file_transform_requirements( + remote_files_targeted_by_import: List[DatasetItem], +) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, List[str]]]: + """ + This function parses the remote files targeted by the import. If the remote file is + a medical file, it checks if it requires legacy NifTI scaling or a pixel to mm transform. + + If the file requires a pixel to mm transform, it select the correct pixdims values + based on the axis of acquisition. + + Parameters + ---------- + remote_files_targeted_by_import: List[DatasetItem] + The remote files targeted by the import + + Returns + ------- + Tuple[Dict[str, Dict[Path, Any]], Dict[Path, Any]] + A tuple of 2 dictionaries: + - remote_files_that_require_legacy_nifti_scaling: A dictionary of remote files + that require legacy NifTI scaling and the slot name to affine matrix mapping + - remote_files_that_require_pixel_to_mm_transform: A dictionary of remote files + that require a pixel to mm transform and the pixdims of the (x, y) axes + """ + remote_files_that_require_legacy_nifti_scaling = {} + remote_files_that_require_pixel_to_mm_transform = {} + for remote_file in remote_files_targeted_by_import: + if not remote_file.slots: + continue + slot_pixdim_map = {} + slot_affine_map = {} + for slot in remote_file.slots: + if not slot_is_medical(slot): + continue + if slot_is_handled_by_monai(slot): + slot_name = slot["slot_name"] + primary_plane = slot["metadata"]["medical"]["plane_map"][slot_name] + pixdims = slot["metadata"]["medical"]["pixdims"] + if primary_plane == "AXIAL": + pixdims = [pixdims[0], pixdims[1]] + elif primary_plane == "CORONAL": + pixdims = [pixdims[0], pixdims[2]] + elif primary_plane == "SAGITTAL": + pixdims = [pixdims[1], pixdims[2]] + slot_pixdim_map[slot_name] = pixdims + else: + slot_affine_map[slot["slot_name"]] = np.array( + slot["metadata"]["medical"]["affine"], dtype=np.float64 + ) + if slot_pixdim_map: + remote_files_that_require_pixel_to_mm_transform[remote_file.full_path] = ( + slot_pixdim_map + ) + if slot_affine_map: + remote_files_that_require_legacy_nifti_scaling[remote_file.full_path] = ( + slot_affine_map + ) + + return ( + remote_files_that_require_legacy_nifti_scaling, + remote_files_that_require_pixel_to_mm_transform, + ) + + +def _scale_coordinates_by_pixdims( + maybe_parsed_files: List[dt.AnnotationFile], + remote_files_that_require_pixel_to_mm_transform: Dict[Path, Any], +) -> List[dt.AnnotationFile]: + """ + This function scales coordinates by the pixdims of the (x, y) axes. + + Parameters + ---------- + maybe_parsed_files: List[dt.AnnotationFile] + The parsed annotation files + remote_files_that_require_pixel_to_mm_transform: Dict[Path, Any] + The remote files that require a pixel to mm transform + + Returns + ------- + List[dt.AnnotationFile] + The parsed annotation files with coordinates scaled by the pixdims of the (x, y) axes + """ + if not remote_files_that_require_pixel_to_mm_transform: + return maybe_parsed_files + for file in maybe_parsed_files: + if file.full_path in remote_files_that_require_pixel_to_mm_transform: + for annotation in file.annotations: + slot_name = annotation.slot_names[0] + if ( + slot_name + not in remote_files_that_require_pixel_to_mm_transform[ + file.full_path + ] + ): + continue + pixdims = remote_files_that_require_pixel_to_mm_transform[ + file.full_path + ][slot_name] + if isinstance(annotation, dt.VideoAnnotation): + for _, frame_annotation in annotation.frames.items(): + frame_annotation.scale_coordinates( + float(pixdims[0]), float(pixdims[1]) + ) + elif isinstance(annotation, dt.Annotation): + annotation.scale_coordinates(float(pixdims[0]), float(pixdims[1])) + return maybe_parsed_files + + +def slot_is_medical(slot: Dict[str, Any]) -> bool: + return slot.get("metadata", {}).get("medical") is not None + + +def slot_is_handled_by_monai(slot: Dict[str, Any]) -> bool: + return slot.get("metadata", {}).get("medical", {}).get("handler") == "MONAI" diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 0fb12d703..b15c55fff 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -6,7 +6,6 @@ from pathlib import Path from typing import Any, Dict from unittest.mock import MagicMock, patch -import numpy as np import orjson as json import pytest @@ -1948,22 +1947,3 @@ def mock_remote_files(self): current_workflow=None, ), ] - - @patch.object(RemoteDatasetV2, "fetch_remote_files") - def test_get_remote_files_that_require_legacy_scaling( - self, mock_fetch_remote_files, mock_remote_files - ): - mock_fetch_remote_files.return_value = mock_remote_files - remote_dataset = RemoteDatasetV2( - client=MagicMock(), - team="test-team", - name="test-dataset", - slug="test-dataset", - dataset_id=1, - ) - - result = remote_dataset._get_remote_files_that_require_legacy_scaling() - assert Path("/path/to/file/filename") in result - np.testing.assert_array_equal( - result[Path("/path/to/file/filename")]["0"], np.array([[-1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) # type: ignore - ) diff --git a/tests/darwin/datatypes_test.py b/tests/darwin/datatypes_test.py index e2fb6d662..d01a39108 100644 --- a/tests/darwin/datatypes_test.py +++ b/tests/darwin/datatypes_test.py @@ -15,6 +15,8 @@ make_polygon, parse_property_classes, split_paths_by_metadata, + Annotation, + AnnotationClass, ) @@ -188,3 +190,116 @@ def test_repr(self, object_store): repr(object_store) == "ObjectStore(name=test, prefix=test_prefix, readonly=False, provider=aws)" ) + + +class TestAnnotation: + def test_scale_coordinates_bounding_box(self): + annotation = Annotation( + annotation_class=AnnotationClass("test_bb", "bounding_box"), + data={"x": 10, "y": 20, "w": 30, "h": 40}, + ) + annotation.scale_coordinates(x_scale=2, y_scale=1.5) + assert annotation.data == {"x": 20, "y": 30, "w": 60, "h": 60} + + def test_scale_coordinates_polygon(self): + annotation = Annotation( + annotation_class=AnnotationClass("test_polygon", "polygon"), + data={ + "paths": [ + [ + {"x": 0, "y": 0}, + {"x": 100, "y": 0}, + {"x": 100, "y": 100}, + {"x": 0, "y": 100}, + {"x": 0, "y": 0}, + ], + [ + {"x": 100, "y": 0}, + {"x": 200, "y": 0}, + {"x": 200, "y": 100}, + {"x": 100, "y": 100}, + {"x": 100, "y": 0}, + ], + ] + }, + ) + annotation.scale_coordinates(x_scale=2, y_scale=1.5) + assert annotation.data == { + "paths": [ + [ + {"x": 0, "y": 0}, + {"x": 200, "y": 0}, + {"x": 200, "y": 150}, + {"x": 0, "y": 150}, + {"x": 0, "y": 0}, + ], + [ + {"x": 200, "y": 0}, + {"x": 400, "y": 0}, + {"x": 400, "y": 150}, + {"x": 200, "y": 150}, + {"x": 200, "y": 0}, + ], + ] + } + + def test_scale_coordinates_ellipse(self): + annotation = Annotation( + annotation_class=AnnotationClass("test_ellipse", "ellipse"), + data={ + "center": {"x": 0, "y": 0}, + "radius": {"x": 100, "y": 50}, + }, + ) + annotation.scale_coordinates(x_scale=2, y_scale=1.5) + assert annotation.data == { + "center": {"x": 0, "y": 0}, + "radius": {"x": 200, "y": 75}, + } + + def test_scale_coordinates_line(self): + annotation = Annotation( + annotation_class=AnnotationClass("test_line", "line"), + data={"path": [{"x": 0, "y": 0}, {"x": 100, "y": 100}]}, + ) + annotation.scale_coordinates(x_scale=2, y_scale=1.5) + assert annotation.data == {"path": [{"x": 0, "y": 0}, {"x": 200, "y": 150}]} + + def test_scale_coordinates_keypoint(self): + annotation = Annotation( + annotation_class=AnnotationClass("test_keypoint", "keypoint"), + data={"x": 50, "y": 100}, + ) + annotation.scale_coordinates(x_scale=2, y_scale=1.5) + assert annotation.data == {"x": 100, "y": 150} + + def test_scale_coordinates_skeleton(self): + annotation = Annotation( + annotation_class=AnnotationClass("test_skeleton", "skeleton"), + data={ + "nodes": [ + {"x": 0, "y": 0}, + {"x": 100, "y": 100}, + ] + }, + ) + annotation.scale_coordinates(x_scale=2, y_scale=1.5) + assert annotation.data == {"nodes": [{"x": 0, "y": 0}, {"x": 200, "y": 150}]} + + def test_scale_coordinates_skips_raster_layer(self): + annotation = Annotation( + annotation_class=AnnotationClass("__raster_layer__", "raster_layer"), + data={"dense_rle": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]}, + ) + annotation.scale_coordinates(x_scale=2, y_scale=1.5) + assert annotation.data == { + "dense_rle": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + } + + def test_scale_coordinates_skips_tag(self): + annotation = Annotation( + annotation_class=AnnotationClass("test_tag", "tag"), + data={}, + ) + annotation.scale_coordinates(x_scale=2, y_scale=1.5) + assert annotation.data == {} diff --git a/tests/darwin/importer/importer_test.py b/tests/darwin/importer/importer_test.py index 9b55d9cc5..7cf00bf7a 100644 --- a/tests/darwin/importer/importer_test.py +++ b/tests/darwin/importer/importer_test.py @@ -1,7 +1,7 @@ import json import tempfile from pathlib import Path -from typing import List, Tuple +from typing import List, Tuple, Optional, Dict from unittest.mock import MagicMock, Mock, _patch, patch from zipfile import ZipFile @@ -11,7 +11,9 @@ FullProperty, PropertyValue, ) +from darwin.item import DatasetItem import pytest +import numpy as np from darwin import datatypes as dt from darwin.importer import get_importer from darwin.importer.importer import ( @@ -34,6 +36,25 @@ _warn_for_annotations_with_multiple_instance_ids, _serialize_item_level_properties, _split_payloads, + _get_remote_files_targeted_by_import, + _get_remote_medical_file_transform_requirements, + _scale_coordinates_by_pixdims, + slot_is_medical, + slot_is_handled_by_monai, +) +from darwin.exceptions import RequestEntitySizeExceeded +from darwin.datatypes import ( + AnnotationFile, + make_bounding_box, + make_polygon, + make_ellipse, + make_line, + make_keypoint, + make_skeleton, + make_video_annotation, + make_tag, + make_mask, + make_raster_layer, ) @@ -145,6 +166,200 @@ def setup_data(request, multiple_annotations=False): return client, team_slug, annotation_class_ids_map, annotations +def make_all_image_annotations_test(slot_name: str) -> List[dt.Annotation]: + """ + Create sample image annotations of all types for a given slot name + """ + image_annotations: List[dt.Annotation] = [] + image_annotations.append( + make_bounding_box("bbox_class", x=10, y=20, w=30, h=40, slot_names=[slot_name]) + ) + image_annotations.append( + make_polygon( + "poly_class", [[{"x": 1, "y": 2}, {"x": 3, "y": 4}]], slot_names=[slot_name] + ) + ) + image_annotations.append( + make_ellipse( + "ellipse_class", + {"center": {"x": 5, "y": 6}, "radius": {"x": 7, "y": 8}, "angle": 0}, + slot_names=[slot_name], + ) + ) + image_annotations.append( + make_line( + "line_class", + [{"x": 9, "y": 10}, {"x": 11, "y": 12}], + slot_names=[slot_name], + ) + ) + image_annotations.append( + make_keypoint("point_class", x=13, y=14, slot_names=[slot_name]) + ) + image_annotations.append( + make_skeleton( + "skeleton_class", + [{"name": "node1", "x": 15, "y": 16}], + slot_names=[slot_name], + ) + ) + image_annotations.append(make_tag("tag_class", slot_names=[slot_name])) + image_annotations.append(make_mask("mask_class", slot_names=[slot_name])) + image_annotations.append( + make_raster_layer( + "raster_layer_class", {}, 100, [1, 2, 3, 4, 5], slot_names=[slot_name] + ) + ) + return image_annotations + + +def make_all_video_annotations_test(slot_name: str) -> List[dt.VideoAnnotation]: + """ + Create sample video annotations of all types for a given slot name + """ + + video_annotations: List[dt.VideoAnnotation] = [] + start_frame = 0 + end_frame = 10 + video_annotations.append( + make_video_annotation( + frames={ + i: make_bounding_box( + "bbox_class", x=10, y=20, w=30, h=40, slot_names=[slot_name] + ) + for i in range(start_frame, end_frame) + }, + keyframes={i: True for i in range(start_frame, end_frame)}, + segments=[], + interpolated=False, + slot_names=[slot_name], + ) + ) + video_annotations.append( + make_video_annotation( + frames={ + i: make_polygon( + "poly_class", + [[{"x": 1, "y": 2}, {"x": 3, "y": 4}]], + slot_names=[slot_name], + ) + for i in range(start_frame, end_frame) + }, + keyframes={i: True for i in range(start_frame, end_frame)}, + segments=[], + interpolated=False, + slot_names=[slot_name], + ) + ) + video_annotations.append( + make_video_annotation( + frames={ + i: make_ellipse( + "ellipse_class", + { + "center": {"x": 5, "y": 6}, + "radius": {"x": 7, "y": 8}, + "angle": 0, + }, + slot_names=[slot_name], + ) + for i in range(start_frame, end_frame) + }, + keyframes={i: True for i in range(start_frame, end_frame)}, + segments=[], + interpolated=False, + slot_names=[slot_name], + ) + ) + video_annotations.append( + make_video_annotation( + frames={ + i: make_line( + "line_class", + [{"x": 9, "y": 10}, {"x": 11, "y": 12}], + slot_names=[slot_name], + ) + for i in range(start_frame, end_frame) + }, + keyframes={i: True for i in range(start_frame, end_frame)}, + segments=[], + interpolated=False, + slot_names=[slot_name], + ) + ) + video_annotations.append( + make_video_annotation( + frames={ + i: make_keypoint("point_class", x=13, y=14, slot_names=[slot_name]) + for i in range(start_frame, end_frame) + }, + keyframes={i: True for i in range(start_frame, end_frame)}, + segments=[], + interpolated=False, + slot_names=[slot_name], + ) + ) + video_annotations.append( + make_video_annotation( + frames={ + i: make_skeleton( + "skeleton_class", + [{"name": "node1", "x": 15, "y": 16}], + slot_names=[slot_name], + ) + for i in range(start_frame, end_frame) + }, + keyframes={i: True for i in range(start_frame, end_frame)}, + segments=[], + interpolated=False, + slot_names=[slot_name], + ) + ) + video_annotations.append( + make_video_annotation( + frames={ + i: make_tag("tag_class", slot_names=[slot_name]) + for i in range(start_frame, end_frame) + }, + keyframes={i: True for i in range(start_frame, end_frame)}, + segments=[], + interpolated=False, + slot_names=[slot_name], + ) + ) + video_annotations.append( + make_video_annotation( + frames={ + i: make_mask("mask_class", slot_names=[slot_name]) + for i in range(start_frame, end_frame) + }, + keyframes={i: True for i in range(start_frame, end_frame)}, + segments=[], + interpolated=False, + slot_names=[slot_name], + ) + ) + video_annotations.append( + make_video_annotation( + frames={ + i: make_raster_layer( + "raster_layer_class", + {}, + 100, + [1, 2, 3, 4, 5], + slot_names=[slot_name], + ) + for i in range(start_frame, end_frame) + }, + keyframes={i: True for i in range(start_frame, end_frame)}, + segments=[], + interpolated=False, + slot_names=[slot_name], + ) + ) + return video_annotations + + def root_path(x: str) -> str: return f"darwin.importer.importer.{x}" @@ -1434,12 +1649,72 @@ def test_import_properties_creates_missing_item_level_properties_from_manifest_a property_values=[ PropertyValue(type="string", value="1"), PropertyValue(type="string", value="2"), - PropertyValue(type="string", value="3"), ], ), }, ), - ({}, {}), + ( + {}, + { + "prop1": FullProperty( + name="prop1", + type="single_select", + description="", + required=True, + slug="test_team", + dataset_ids=[], + granularity=PropertyGranularity("item"), + property_values=[ + PropertyValue(type="string", value="1"), + PropertyValue(type="string", value="2"), + ], + ), + "prop2": FullProperty( + name="prop2", + type="multi_select", + description="", + required=False, + slug="test_team", + dataset_ids=[], + granularity=PropertyGranularity("item"), + property_values=[ + PropertyValue(type="string", value="1"), + PropertyValue(type="string", value="2"), + ], + ), + }, + ), + ( + {}, + { + "prop1": FullProperty( + name="prop1", + type="single_select", + description="", + required=True, + slug="test_team", + dataset_ids=[], + granularity=PropertyGranularity("item"), + property_values=[ + PropertyValue(type="string", value="1"), + PropertyValue(type="string", value="2"), + ], + ), + "prop2": FullProperty( + name="prop2", + type="multi_select", + description="", + required=False, + slug="test_team", + dataset_ids=[], + granularity=PropertyGranularity("item"), + property_values=[ + PropertyValue(type="string", value="1"), + PropertyValue(type="string", value="2"), + ], + ), + }, + ), ] mock_create_update_props.side_effect = _create_update_item_properties @@ -1795,7 +2070,6 @@ def test_import_properties_creates_missing_item_level_property_values_from_annot property_values=[ PropertyValue(type="string", value="1"), PropertyValue(type="string", value="2"), - PropertyValue(type="string", value="3"), ], ), }, @@ -1827,7 +2101,6 @@ def test_import_properties_creates_missing_item_level_property_values_from_annot property_values=[ PropertyValue(type="string", value="1"), PropertyValue(type="string", value="2"), - PropertyValue(type="string", value="3"), ], ), }, @@ -1942,7 +2215,6 @@ def test_import_properties_creates_missing_item_level_property_values_from_manif granularity=PropertyGranularity("item"), property_values=[ PropertyValue(type="string", value="1"), - PropertyValue(type="string", value="2"), ], ), }, @@ -2005,7 +2277,6 @@ def test_import_properties_creates_missing_item_level_property_values_from_manif property_values=[ PropertyValue(type="string", value="1"), PropertyValue(type="string", value="2"), - PropertyValue(type="string", value="3"), ], ), }, @@ -3615,3 +3886,454 @@ def test__split_payloads_overwrites_on_first_payload_and_appends_on_the_rest(): assert result[0]["overwrite"] assert not result[1]["overwrite"] assert not result[2]["overwrite"] + + +def test__get_remote_files_targeted_by_import_success() -> None: + """Test successful case where files are found remotely.""" + mock_dataset = Mock() + mock_console = Mock() + + # Mock the remote files that will be returned + mock_remote_file1 = Mock(full_path="/path/to/file1.json") + mock_remote_file2 = Mock(full_path="/path/to/file2.json") + + mock_dataset.fetch_remote_files.return_value = [ + mock_remote_file1, + mock_remote_file2, + ] + + def mock_importer(path: Path) -> List[dt.AnnotationFile]: + file_num = int(path.stem.replace("file", "")) + mock_file = Mock( + spec=dt.AnnotationFile, + filename=f"file{file_num}.json", + full_path=f"/path/to/file{file_num}.json", + ) + return [mock_file] + + result = _get_remote_files_targeted_by_import( + importer=mock_importer, + file_paths=[Path("file1.json"), Path("file2.json")], + dataset=mock_dataset, + console=mock_console, + ) + + assert len(result) == 2 + assert result[0] == mock_remote_file1 + assert result[1] == mock_remote_file2 + mock_dataset.fetch_remote_files.assert_called_once() + + +def test__get_remote_files_targeted_by_import_no_files_parsed() -> None: + """Test error case when no files can be parsed.""" + mock_dataset = Mock() + mock_console = Mock() + + def mock_importer(path: Path) -> Optional[List[dt.AnnotationFile]]: + return None + + with pytest.raises(ValueError, match="Not able to parse any files."): + _get_remote_files_targeted_by_import( + importer=mock_importer, + file_paths=[Path("file1.json")], + dataset=mock_dataset, + console=mock_console, + ) + + +def test__get_remote_files_targeted_by_import_url_too_long() -> None: + """Test error case when URL becomes too long even with minimum chunk size.""" + mock_dataset = Mock() + mock_console = Mock() + + def mock_importer(path: Path) -> List[dt.AnnotationFile]: + file_num = int(path.stem.replace("file", "")) + mock_file = Mock( + spec=dt.AnnotationFile, + filename=f"file{file_num}.json", + full_path=f"/path/to/file{file_num}.json", + ) + return [mock_file] + + mock_dataset.fetch_remote_files.side_effect = RequestEntitySizeExceeded() + + with pytest.raises( + ValueError, + match="Unable to fetch remote file list - URL too long even with minimum chunk size.", + ): + _get_remote_files_targeted_by_import( + importer=mock_importer, + file_paths=[Path("file1.json")], + dataset=mock_dataset, + console=mock_console, + ) + + +def test__get_remote_files_targeted_by_import_partial_match() -> None: + """Test case where some files exist remotely and others don't.""" + mock_dataset = Mock() + mock_console = Mock() + + mock_remote_file1 = Mock(full_path="/path/to/file1.json") + mock_dataset.fetch_remote_files.return_value = [mock_remote_file1] + + def mock_importer(path: Path) -> List[dt.AnnotationFile]: + file_num = int(path.stem.replace("file", "")) + mock_file = Mock( + spec=dt.AnnotationFile, + filename=f"file{file_num}.json", + full_path=f"/path/to/file{file_num}.json", + ) + return [mock_file] + + result = _get_remote_files_targeted_by_import( + importer=mock_importer, + file_paths=[Path("file1.json"), Path("file2.json")], + dataset=mock_dataset, + console=mock_console, + ) + + assert len(result) == 1 + assert result[0] == mock_remote_file1 + mock_dataset.fetch_remote_files.assert_called_once() + + +def test__get_remote_medical_file_transform_requirements_empty_list(): + """Test that empty input list returns empty dictionaries""" + remote_files: List[DatasetItem] = [] + legacy_scaling, pixel_transform = _get_remote_medical_file_transform_requirements( + remote_files + ) + assert legacy_scaling == {} + assert pixel_transform == {} + + +def test__get_remote_medical_file_transform_requirements_no_slots(): + """Test that files with no slots are handled correctly""" + mock_file = MagicMock(spec=DatasetItem) + mock_file.slots = None + mock_file.full_path = "/path/to/file" + remote_files: List[DatasetItem] = [mock_file] + legacy_scaling, pixel_transform = _get_remote_medical_file_transform_requirements( + remote_files + ) + assert legacy_scaling == {} + assert pixel_transform == {} + + +def test__get_remote_medical_file_transform_requirements_non_medical_slots(): + """Test that files with non-medical slots are handled correctly""" + mock_file = MagicMock(spec=DatasetItem) + mock_file.slots = [{"slot_name": "slot1", "metadata": {}}] + mock_file.full_path = "/path/to/file" + remote_files: List[DatasetItem] = [mock_file] + legacy_scaling, pixel_transform = _get_remote_medical_file_transform_requirements( + remote_files + ) + assert legacy_scaling == {} + assert pixel_transform == {} + + +def test__get_remote_medical_file_transform_requirements_monai_axial(): + """Test MONAI-handled medical slots with AXIAL plane""" + mock_file = MagicMock(spec=DatasetItem) + mock_file.slots = [ + { + "slot_name": "slot1", + "metadata": { + "medical": { + "handler": "MONAI", + "plane_map": {"slot1": "AXIAL"}, + "pixdims": [1.0, 2.0, 3.0], + } + }, + } + ] + mock_file.full_path = "/path/to/file" + remote_files: List[DatasetItem] = [mock_file] + legacy_scaling, pixel_transform = _get_remote_medical_file_transform_requirements( + remote_files + ) + assert legacy_scaling == {} + assert pixel_transform == {"/path/to/file": {"slot1": [1.0, 2.0]}} + + +def test__get_remote_medical_file_transform_requirements_monai_coronal(): + """Test MONAI-handled medical slots with CORONAL plane""" + mock_file = MagicMock(spec=DatasetItem) + mock_file.slots = [ + { + "slot_name": "slot1", + "metadata": { + "medical": { + "handler": "MONAI", + "plane_map": {"slot1": "CORONAL"}, + "pixdims": [1.0, 2.0, 3.0], + } + }, + } + ] + mock_file.full_path = "/path/to/file" + remote_files: List[DatasetItem] = [mock_file] + legacy_scaling, pixel_transform = _get_remote_medical_file_transform_requirements( + remote_files + ) + assert legacy_scaling == {} + assert pixel_transform == {"/path/to/file": {"slot1": [1.0, 3.0]}} + + +def test__get_remote_medical_file_transform_requirements_monai_saggital(): + """Test MONAI-handled medical slots with SAGGITAL plane""" + mock_file = MagicMock(spec=DatasetItem) + mock_file.slots = [ + { + "slot_name": "slot1", + "metadata": { + "medical": { + "handler": "MONAI", + "plane_map": {"slot1": "SAGITTAL"}, + "pixdims": [1.0, 2.0, 3.0], + } + }, + } + ] + mock_file.full_path = "/path/to/file" + remote_files: List[DatasetItem] = [mock_file] + legacy_scaling, pixel_transform = _get_remote_medical_file_transform_requirements( + remote_files + ) + assert legacy_scaling == {} + assert pixel_transform == {"/path/to/file": {"slot1": [2.0, 3.0]}} + + +def test__get_remote_medical_file_transform_requirements_legacy_nifti(): + """Test legacy NifTI scaling""" + mock_file = MagicMock(spec=DatasetItem) + mock_file.slots = [ + { + "slot_name": "slot1", + "metadata": { + "medical": { + "affine": [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]] + } + }, + } + ] + mock_file.full_path = "/path/to/file" + remote_files: List[DatasetItem] = [mock_file] + legacy_scaling, pixel_transform = _get_remote_medical_file_transform_requirements( + remote_files + ) + assert pixel_transform == {} + assert "/path/to/file" in legacy_scaling + assert "slot1" in legacy_scaling["/path/to/file"] + assert legacy_scaling["/path/to/file"]["slot1"].shape == (4, 4) + assert ( + legacy_scaling["/path/to/file"]["slot1"] + == np.array( + [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float64 + ) + ).all() + + +def test__get_remote_medical_file_transform_requirements_mixed(): + """Test mixed case with both MONAI and legacy NifTI files""" + mock_file1 = MagicMock(spec=DatasetItem) + mock_file1.slots = [ + { + "slot_name": "slot1", + "metadata": { + "medical": { + "handler": "MONAI", + "plane_map": {"slot1": "AXIAL"}, + "pixdims": [1.0, 2.0, 3.0], + } + }, + } + ] + mock_file1.full_path = "/path/to/file1" + + mock_file2 = MagicMock(spec=DatasetItem) + mock_file2.slots = [ + { + "slot_name": "slot2", + "metadata": { + "medical": { + "affine": [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]] + } + }, + } + ] + mock_file2.full_path = "/path/to/file2" + + remote_files: List[DatasetItem] = [mock_file1, mock_file2] + legacy_scaling, pixel_transform = _get_remote_medical_file_transform_requirements( + remote_files + ) + + # Check MONAI file results + assert pixel_transform == {"/path/to/file1": {"slot1": [1.0, 2.0]}} + + # Check legacy NifTI file results + assert "/path/to/file2" in legacy_scaling + assert "slot2" in legacy_scaling["/path/to/file2"] + assert legacy_scaling["/path/to/file2"]["slot2"].shape == (4, 4) + assert ( + legacy_scaling["/path/to/file2"]["slot2"] + == np.array( + [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float64 + ) + ).all() + + +def test_scale_coordinates_by_pixdims(): + """ + Test scaling of coordinates by pixdims for image and video annotations + """ + image_annotations_slot_1 = make_bounding_box( + "bbox_class", x=10, y=20, w=30, h=40, slot_names=["slot1"] + ) + image_annotations_slot_2 = make_polygon( + "polygon_class", + [[{"x": 1, "y": 2}, {"x": 3, "y": 4}]], + slot_names=["slot2"], + ) + + video_annotations_slot_1 = make_video_annotation( + frames={ + 0: make_bounding_box( + "bbox_class", x=10, y=20, w=30, h=40, slot_names=["slot1"] + ) + }, + keyframes={0: True}, + segments=[], + interpolated=False, + slot_names=["slot1"], + ) + video_annotations_slot_2 = make_video_annotation( + frames={ + 0: make_polygon( + "polygon_class", + [[{"x": 1, "y": 2}, {"x": 3, "y": 4}]], + slot_names=["slot2"], + ) + }, + keyframes={0: True}, + segments=[], + interpolated=False, + slot_names=["slot2"], + ) + + image_annotations_path = Path("test.json") + video_annotations_path = Path("test2.json") + + multi_slotted_image_annotations = AnnotationFile( + path=image_annotations_path, + filename=image_annotations_path.name, + annotation_classes=set(), + annotations=[image_annotations_slot_1, image_annotations_slot_2], + ) + + multi_slotted_video_annotations = AnnotationFile( + path=video_annotations_path, + filename=video_annotations_path.name, + annotation_classes=set(), + annotations=[video_annotations_slot_1, video_annotations_slot_2], + ) + + remote_files_that_require_pixel_to_mm_transform: Dict[ + str, Dict[str, List[float]] + ] = { + str(image_annotations_path): { + "slot1": [2.0, 3.0], + "slot2": [1.5, 2.5], + }, + str(video_annotations_path): { + "slot1": [4.0, 6.0], + "slot2": [3.0, 5.0], + }, + } + + scaled_files = _scale_coordinates_by_pixdims( + [multi_slotted_image_annotations, multi_slotted_video_annotations], + remote_files_that_require_pixel_to_mm_transform, + ) + + scaled_image_annotations = scaled_files[0].annotations + scaled_video_annotations = scaled_files[1].annotations + + assert scaled_image_annotations[0].data == { + "x": 20.0, + "y": 60.0, + "w": 60.0, + "h": 120.0, + } + assert scaled_image_annotations[1].data == { + "paths": [[{"x": 1.5, "y": 5}, {"x": 4.5, "y": 10.0}]] + } + assert scaled_video_annotations[0].frames[0].data == { + "x": 40.0, + "y": 120.0, + "w": 120.0, + "h": 240.0, + } + assert scaled_video_annotations[1].frames[0].data == { + "paths": [[{"x": 3.0, "y": 10.0}, {"x": 9.0, "y": 20.0}]] + } + + +def test_scale_coordinates_by_pixdims_no_scaling_needed(): + """ + Test that annotations are not scaled if no scaling is needed + """ + single_image_annotation = make_bounding_box( + "bbox_class", x=10, y=20, w=30, h=40, slot_names=["slot1"] + ) + + image_annotations_path = Path("test.json") + single_slotted_annotation_file = AnnotationFile( + path=image_annotations_path, + filename=image_annotations_path.name, + annotation_classes=set(), + annotations=[single_image_annotation], + ) + + remote_files_that_require_pixel_to_mm_transform: Dict[ + str, Dict[str, List[float]] + ] = { + str(image_annotations_path): { + "slot2": [1.0, 1.0], + }, + } + + scaled_files = _scale_coordinates_by_pixdims( + [single_slotted_annotation_file], + remote_files_that_require_pixel_to_mm_transform, + ) + + assert scaled_files[0].annotations[0].data == { + "x": 10, + "y": 20, + "w": 30, + "h": 40, + } + + +def test_slot_is_medical(): + """ + Test that slot_is_medical returns True if the slot has medical metadata + """ + medical_slot = {"metadata": {"medical": {}}} + non_medical_slot = {"metadata": {}} + assert slot_is_medical(medical_slot) is True + assert slot_is_medical(non_medical_slot) is False + + +def test_slot_is_handled_by_monai(): + """ + Test that slot_is_handled_by_monai returns True if the slot has MONAI handler + """ + monai_slot = {"metadata": {"medical": {"handler": "MONAI"}}} + non_monai_slot = {"metadata": {"medical": {}}} + assert slot_is_handled_by_monai(monai_slot) is True + assert slot_is_handled_by_monai(non_monai_slot) is False