Merge branch 'master' into DAR-4304

v7labs · Nov 14, 2024 · bee2eea · bee2eea
2 parents f7d876f + f0ba378
commit bee2eea
Show file tree

Hide file tree

Showing 127 changed files with 9,880 additions and 239 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,7 +9,6 @@ data/
 !tests/darwin/data
 darwin_py.egg-info/PKG-INFO
 
-*.png
 *.jpeg
 *.jpg
 *.bpm

diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py
@@ -919,8 +919,8 @@ def dataset_import(
         If ``True`` it will bypass a warning that the import will overwrite the current annotations if any are present.
         If ``False`` this warning will be skipped and the import will overwrite the current annotations without warning.
     legacy : bool, default: False
-        If ``True`` it will not resize the annotations to be isotropic.
-        If ``False`` it will resize the annotations to be isotropic.
+        If ``True`` it will resize the annotations to be isotropic.
+        If ``False`` it will not resize the annotations to be isotropic.
     use_multi_cpu : bool, default: False
         If ``True`` it will use all multiple CPUs to speed up the import process.
     cpu_limit : Optional[int], default: Core count - 2
@@ -931,7 +931,6 @@ def dataset_import(
 
     try:
         importer: ImportParser = get_importer(format)
-
         if format == "nifti" and legacy:
             importer = partial(importer, legacy=True)
 
@@ -954,7 +953,7 @@ def dataset_import(
             overwrite,
             use_multi_cpu,
             cpu_limit,
-            no_legacy=False if legacy else True,
+            legacy,
         )
 
     except ImporterNotFoundError:
@@ -1228,8 +1227,8 @@ def dataset_convert(
         annotations folder of the dataset under 'other_formats/{format}'.
     legacy : bool, default: False
         This flag is only for the nifti format.
-        If True, it will not export the annotations using legacy calculations.
-        If False, it will resize the annotations using the new calculation by dividing with pixdims.
+        If True, it will resize the annotations by dividing by pixdims.
+        If False, it will not export the annotations using legacy calculations
     """
     identifier: DatasetIdentifier = DatasetIdentifier.parse(dataset_identifier)
     client: Client = _load_client(team_slug=identifier.team_slug)
@@ -1286,8 +1285,8 @@ def convert(
         Folder where the exported annotations will be placed.
     legacy: bool, default: False
         This flag is only for the nifti format.
-        If True, it will not export the annotations using legacy calculations.
-        If False, it will resize the annotations using the new calculation by dividing with pixdims.
+        If True, it will resize the annotations by dividing by pixdims
+        If False, it will not export the annotations using legacy calculations.
     """
     try:
         parser: ExportParser = get_exporter(format)

diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py
@@ -986,6 +986,8 @@ def _find_files_to_upload_as_single_file_items(
                 local_path = str(
                     found_file.relative_to(source_files[0]).parent.as_posix()
                 )
+                if local_path == ".":
+                    local_path = "/"
         uploading_files.append(
             LocalFile(
                 found_file,

diff --git a/darwin/importer/formats/nifti.py b/darwin/importer/formats/nifti.py
@@ -103,7 +103,7 @@ def _parse_nifti(
     is_mpr: bool,
     legacy: bool = False,
 ) -> dt.AnnotationFile:
-    img, pixdims = process_nifti(nib.load(nifti_path), legacy=legacy)
+    img, pixdims = process_nifti(nib.load(nifti_path))
 
     processed_class_map = process_class_map(class_map)
     video_annotations = []
@@ -513,11 +513,10 @@ def correct_nifti_header_if_necessary(img_nii):
 def process_nifti(
     input_data: nib.nifti1.Nifti1Image,
     ornt: Optional[List[List[float]]] = [[0.0, -1.0], [1.0, -1.0], [2.0, -1.0]],
-    legacy: bool = False,
 ) -> Tuple[np.ndarray, Tuple[float]]:
     """
-    Function that converts a nifti object to RAS orientation (if legacy), then converts to the passed ornt orientation.
-    The default ornt is for LPI.
+    Function that converts a nifti object to the RAS orientation, then converts to the passed ornt orientation.
+    The default ornt is LPI.
 
     Args:
         input_data: nibabel nifti object.
@@ -530,8 +529,7 @@ def process_nifti(
         pixdims: tuple of nifti header zoom values.
     """
     img = correct_nifti_header_if_necessary(input_data)
-    if legacy:
-        img = nib.funcs.as_closest_canonical(img)
+    img = nib.funcs.as_closest_canonical(img)
     data_array = nib.orientations.apply_orientation(img.get_fdata(), ornt)
     pixdims = img.header.get_zooms()
     return data_array, pixdims

diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py
@@ -1095,7 +1095,7 @@ def import_annotations(  # noqa: C901
     overwrite: bool = False,
     use_multi_cpu: bool = False,
     cpu_limit: Optional[int] = None,
-    no_legacy: Optional[bool] = False,
+    legacy: Optional[bool] = False,
 ) -> None:
     """
     Imports the given given Annotations into the given Dataset.
@@ -1137,9 +1137,9 @@ def import_annotations(  # noqa: C901
         If ``cpu_limit`` is greater than the number of available CPU cores, it will be set to the number of available cores.
         If ``cpu_limit`` is less than 1, it will be set to CPU count - 2.
         If ``cpu_limit`` is omitted, it will be set to CPU count - 2.
-    no_legacy : bool, default: False
-        If ``True`` will not use the legacy isotropic transformation to resize annotations
-        If ``False`` will use the legacy isotropic transformation to resize annotations
+    legacy : bool, default: False
+        If ``True`` will use the legacy isotropic transformation to resize annotations
+        If ``False`` will not use the legacy isotropic transformation to resize annotations
     Raises
     -------
     ValueError
@@ -1157,7 +1157,7 @@ def import_annotations(  # noqa: C901
     # CLI-initiated imports will raise an AttributeError because of the partial function
     # This block handles SDK-initiated imports
     try:
-        if importer.__module__ == "darwin.importer.formats.nifti" and not no_legacy:
+        if importer.__module__ == "darwin.importer.formats.nifti" and legacy:
             importer = partial(importer, legacy=True)
     except AttributeError:
         pass

diff --git a/darwin/options.py b/darwin/options.py
@@ -61,10 +61,10 @@ def __init__(self) -> None:
             help="Annotation files (or folders) to convert.",
         )
         parser_convert.add_argument(
-            "--no-legacy",
-            action="store_false",
-            dest="legacy",
-            help="Do not convert annotation using legacy process (isotropic transformation).",
+            "--legacy",
+            action="store_true",
+            default=False,
+            help="Import annotation files using legacy process (isotropic transformation).",
         )
         parser_convert.add_argument(
             "output_dir", type=str, help="Where to store output files."
@@ -375,10 +375,10 @@ def __init__(self) -> None:
             help="Bypass warnings about overwiting existing annotations.",
         )
         parser_import.add_argument(
-            "--no-legacy",
-            action="store_false",
-            dest="legacy",
-            help="Do not importing annotation files using legacy process (isotropic transformation).",
+            "--legacy",
+            action="store_true",
+            default=False,
+            help="Import annotation files using legacy process (isotropic transformation).",
         )
 
         # Cpu limit for multiprocessing tasks
@@ -410,9 +410,10 @@ def cpu_default_types(input: Any) -> Optional[int]:  # type: ignore
             "format", type=str, help="Annotation format to convert to."
         )
         parser_convert.add_argument(
-            "legacy",
+            "--legacy",
             action="store_true",
-            help="Convert annotation using legacy process (isotropic transformation).",
+            default=False,
+            help="Import annotation files using legacy process (isotropic transformation).",
         )
         parser_convert.add_argument(
             "-o", "--output_dir", type=str, help="Where to store output files."

diff --git a/darwin/version/__init__.py b/darwin/version/__init__.py
@@ -1 +1 @@
-__version__ = "1.0.11"
+__version__ = "1.0.12"
diff --git a/e2e_tests/cli/convert/test_convert.py → e2e_tests/cli/test_convert.py b/e2e_tests/cli/convert/test_convert.py → e2e_tests/cli/test_convert.py
@@ -4,13 +4,14 @@
 
 import orjson as json
 import pytest
+import xml.etree.ElementTree as ET
 
 from e2e_tests.helpers import assert_cli, run_cli_command
 
 
 class TestExportCli:
     this_file_path = Path(dirname(__file__)).absolute()
-    data_path = (this_file_path / ".." / ".." / "data").resolve()
+    data_path = (this_file_path / ".." / "data" / "convert").resolve()
 
     @pytest.fixture(autouse=True)
     def config(self) -> None:
@@ -33,10 +34,10 @@ def compare_directories(self, path: Path, expected_path: Path) -> None:
                     continue
 
                 # Compare files
-                with file.open("r") as f:
+                with file.open("rb") as f:
                     content = f.read()
 
-                with Path(expected_path / file.name).open() as f:
+                with Path(expected_path / file.name).open("rb") as f:
                     expected_content = f.read()
 
                 if content != expected_content:
@@ -52,6 +53,14 @@ def compare_directories(self, path: Path, expected_path: Path) -> None:
         [
             ("yolo_segmented", data_path / "yolov8/from", data_path / "yolov8/to"),
             ("yolo", data_path / "yolo/from", data_path / "yolo/to"),
+            ("cvat", data_path / "cvat/from", data_path / "cvat/to"),
+            ("pascalvoc", data_path / "pascalvoc/from", data_path / "pascalvoc/to"),
+            ("nifti", data_path / "nifti/from", data_path / "nifti/to"),
+            (
+                "instance_mask",
+                data_path / "instance_mask/from",
+                data_path / "instance_mask/to",
+            ),
             pytest.param(
                 "coco",
                 data_path / "coco/from",
@@ -87,30 +96,64 @@ def test_darwin_convert(
         result = run_cli_command(
             f"darwin convert {format} {str(input_path)} {str(tmp_path)}"
         )
-        if format == "coco":
-            self.patch_coco(tmp_path / "output.json")
+        self.patch_format(format, tmp_path)
         assert_cli(result, 0)
         self.compare_directories(expectation_path, tmp_path)
 
+    def patch_format(self, format: str, path: Path) -> None:
+        """
+        Patch files based on format to match the expected output.
+        """
+        patch_methods = {
+            "coco": self.patch_coco,
+            "cvat": self.patch_cvat,
+        }
+        patch_method = patch_methods.get(format)
+        if patch_method:
+            patch_method(path)
+
     def patch_coco(self, path: Path) -> None:
         """
         Patch coco file to match the expected output, includes changes to year and date_created,
         wrapped in try except so that format errors are still caught later with correct error messages
         """
         try:
-            with open(path, "r") as f:
+            with open(path / "output.json", "r") as f:
                 contents = f.read()
                 temp = json.loads(contents)
                 temp["info"]["year"] = 2023
                 temp["info"]["date_created"] = "2023/12/05"
-            with open(path, "w") as f:
+            with open(path / "output.json", "w") as f:
                 op = json.dumps(
                     temp, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY
                 ).decode("utf-8")
                 f.write(op)
         except Exception:
             print(f"Error patching {path}")
 
+    def patch_cvat(self, path: Path) -> None:
+        """
+        Patch cvat file to match the expected output.
+        """
+        try:
+            tree = ET.parse(path / "output.xml")
+            root = tree.getroot()
+            # Adjust the required fields
+            dumped_elem = root.find(".//meta/dumped")
+            if dumped_elem is not None:
+                dumped_elem.text = "2024-10-25 10:33:01.789498+00:00"
+            created_elem = root.find(".//meta/task/created")
+            if created_elem is not None:
+                created_elem.text = "2024-10-25 10:33:01.789603+00:00"
+            updated_elem = root.find(".//meta/task/updated")
+            if updated_elem is not None:
+                updated_elem.text = "2024-10-25 10:33:01.789608+00:00"
+            tree.write(path / "output.xml")
+        except ET.ParseError:
+            print(f"Error parsing XML in {path}")
+        except Exception as e:
+            print(f"Error patching {path}: {e}")
+
 
 if __name__ == "__main__":
     pytest.main(["-vv", "-s", __file__])
diff --git a/e2e_tests/cli/test_full_cycle.py b/e2e_tests/cli/test_full_cycle.py
@@ -0,0 +1,105 @@
+import shutil
+from pathlib import Path
+
+from e2e_tests.helpers import assert_cli, run_cli_command, export_release
+from e2e_tests.objects import E2EDataset, ConfigValues
+from e2e_tests.cli.test_import import compare_annotations_export
+
+
+def test_full_cycle(
+    local_dataset: E2EDataset,
+    config_values: ConfigValues,
+):
+    """
+    This test performs the following steps:
+    - 1: Registers a set of files from external storage to a dataset
+    - 2: Imports some annotations
+    - 3: Creates and pulls a release of the dataset
+    - 4: Deletes all items from the dataset
+    - 5: Pushes and imports the pulled files & annotations to the dataset
+    - 6: Deletes locally pulled copies of the dataset files
+    - 7: Creates and pulls a new release of the dataset
+    - 8: Assert that the pulled data is as expected
+
+    It is designed to catch errors that may arise from changes to exported Darwin JSON
+    """
+    item_type = "single_slotted"
+    annotation_format = "darwin"
+    first_release_name = "first_release"
+    second_release_name = "second_release"
+    pull_dir = Path(
+        f"{Path.home()}/.darwin/datasets/{config_values.team_slug}/{local_dataset.slug}"
+    )
+    annotations_import_dir = (
+        Path(__file__).parents[1]
+        / "data"
+        / "import"
+        / "image_annotations_with_item_level_properties"
+    )
+    expected_filepaths = [
+        f"{pull_dir}/images/image_1.jpg",
+        f"{pull_dir}/images/image_2.jpg",
+        f"{pull_dir}/images/dir1/image_3.jpg",
+        f"{pull_dir}/images/dir1/image_4.jpg",
+        f"{pull_dir}/images/dir2/image_5.jpg",
+        f"{pull_dir}/images/dir2/image_6.jpg",
+        f"{pull_dir}/images/dir1/dir3/image_7.jpg",
+        f"{pull_dir}/images/dir1/dir3/image_8.jpg",
+    ]
+
+    # Populate the dataset with items and annotations
+    local_dataset.register_read_only_items(config_values, item_type)
+    result = run_cli_command(
+        f"darwin dataset import {local_dataset.name} {annotation_format} {annotations_import_dir}"
+    )
+    assert_cli(result, 0)
+
+    # Pull a first release of the dataset
+    original_release = export_release(
+        annotation_format, local_dataset, config_values, release_name=first_release_name
+    )
+    result = run_cli_command(
+        f"darwin dataset pull {local_dataset.name}:{original_release.name}"
+    )
+    assert_cli(result, 0)
+
+    # Delete all items in the dataset
+    local_dataset.delete_items(config_values)
+
+    # Push and import the pulled files and annotations to the dataset
+    result = run_cli_command(
+        f"darwin dataset push {local_dataset.name} {pull_dir}/images --preserve-folders"
+    )
+    assert_cli(result, 0)
+    result = run_cli_command(
+        f"darwin dataset import {local_dataset.name} {annotation_format} {pull_dir}/releases/{first_release_name}/annotations"
+    )
+    assert_cli(result, 0)
+
+    # Delete local copies of the dataset files for the dataset
+    shutil.rmtree(f"{pull_dir}/images")
+
+    # Pull a second release of the dataset
+    new_release = export_release(
+        annotation_format,
+        local_dataset,
+        config_values,
+        release_name=second_release_name,
+    )
+    result = run_cli_command(
+        f"darwin dataset pull {local_dataset.name}:{new_release.name}"
+    )
+    assert_cli(result, 0)
+
+    # Check that all expected files have been downloaded
+    all_filepaths = list(pull_dir.rglob("*"))
+    for expected_file in expected_filepaths:
+        assert Path(expected_file) in all_filepaths
+
+    # Check that all downloaded annotations are as expected
+    compare_annotations_export(
+        Path(f"{pull_dir}/releases/{first_release_name}/annotations"),
+        Path(f"{pull_dir}/releases/{second_release_name}/annotations"),
+        item_type,
+        unzip=False,
+    )
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,7 +9,6 @@ data/ @@
     !tests/darwin/data
     darwin_py.egg-info/PKG-INFO
-    *.png
     *.jpeg
     *.jpg
     *.bpm
@@ Expand Down @@