Skip to content

Commit

Permalink
Merge branch 'master' into DAR-4304
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie committed Nov 14, 2024
2 parents f7d876f + f0ba378 commit bee2eea
Show file tree
Hide file tree
Showing 127 changed files with 9,880 additions and 239 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ data/
!tests/darwin/data
darwin_py.egg-info/PKG-INFO

*.png
*.jpeg
*.jpg
*.bpm
Expand Down
15 changes: 7 additions & 8 deletions darwin/cli_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,8 +919,8 @@ def dataset_import(
If ``True`` it will bypass a warning that the import will overwrite the current annotations if any are present.
If ``False`` this warning will be skipped and the import will overwrite the current annotations without warning.
legacy : bool, default: False
If ``True`` it will not resize the annotations to be isotropic.
If ``False`` it will resize the annotations to be isotropic.
If ``True`` it will resize the annotations to be isotropic.
If ``False`` it will not resize the annotations to be isotropic.
use_multi_cpu : bool, default: False
If ``True`` it will use all multiple CPUs to speed up the import process.
cpu_limit : Optional[int], default: Core count - 2
Expand All @@ -931,7 +931,6 @@ def dataset_import(

try:
importer: ImportParser = get_importer(format)

if format == "nifti" and legacy:
importer = partial(importer, legacy=True)

Expand All @@ -954,7 +953,7 @@ def dataset_import(
overwrite,
use_multi_cpu,
cpu_limit,
no_legacy=False if legacy else True,
legacy,
)

except ImporterNotFoundError:
Expand Down Expand Up @@ -1228,8 +1227,8 @@ def dataset_convert(
annotations folder of the dataset under 'other_formats/{format}'.
legacy : bool, default: False
This flag is only for the nifti format.
If True, it will not export the annotations using legacy calculations.
If False, it will resize the annotations using the new calculation by dividing with pixdims.
If True, it will resize the annotations by dividing by pixdims.
If False, it will not export the annotations using legacy calculations
"""
identifier: DatasetIdentifier = DatasetIdentifier.parse(dataset_identifier)
client: Client = _load_client(team_slug=identifier.team_slug)
Expand Down Expand Up @@ -1286,8 +1285,8 @@ def convert(
Folder where the exported annotations will be placed.
legacy: bool, default: False
This flag is only for the nifti format.
If True, it will not export the annotations using legacy calculations.
If False, it will resize the annotations using the new calculation by dividing with pixdims.
If True, it will resize the annotations by dividing by pixdims
If False, it will not export the annotations using legacy calculations.
"""
try:
parser: ExportParser = get_exporter(format)
Expand Down
2 changes: 2 additions & 0 deletions darwin/dataset/remote_dataset_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,8 @@ def _find_files_to_upload_as_single_file_items(
local_path = str(
found_file.relative_to(source_files[0]).parent.as_posix()
)
if local_path == ".":
local_path = "/"
uploading_files.append(
LocalFile(
found_file,
Expand Down
10 changes: 4 additions & 6 deletions darwin/importer/formats/nifti.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def _parse_nifti(
is_mpr: bool,
legacy: bool = False,
) -> dt.AnnotationFile:
img, pixdims = process_nifti(nib.load(nifti_path), legacy=legacy)
img, pixdims = process_nifti(nib.load(nifti_path))

processed_class_map = process_class_map(class_map)
video_annotations = []
Expand Down Expand Up @@ -513,11 +513,10 @@ def correct_nifti_header_if_necessary(img_nii):
def process_nifti(
input_data: nib.nifti1.Nifti1Image,
ornt: Optional[List[List[float]]] = [[0.0, -1.0], [1.0, -1.0], [2.0, -1.0]],
legacy: bool = False,
) -> Tuple[np.ndarray, Tuple[float]]:
"""
Function that converts a nifti object to RAS orientation (if legacy), then converts to the passed ornt orientation.
The default ornt is for LPI.
Function that converts a nifti object to the RAS orientation, then converts to the passed ornt orientation.
The default ornt is LPI.
Args:
input_data: nibabel nifti object.
Expand All @@ -530,8 +529,7 @@ def process_nifti(
pixdims: tuple of nifti header zoom values.
"""
img = correct_nifti_header_if_necessary(input_data)
if legacy:
img = nib.funcs.as_closest_canonical(img)
img = nib.funcs.as_closest_canonical(img)
data_array = nib.orientations.apply_orientation(img.get_fdata(), ornt)
pixdims = img.header.get_zooms()
return data_array, pixdims
Expand Down
10 changes: 5 additions & 5 deletions darwin/importer/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1095,7 +1095,7 @@ def import_annotations( # noqa: C901
overwrite: bool = False,
use_multi_cpu: bool = False,
cpu_limit: Optional[int] = None,
no_legacy: Optional[bool] = False,
legacy: Optional[bool] = False,
) -> None:
"""
Imports the given given Annotations into the given Dataset.
Expand Down Expand Up @@ -1137,9 +1137,9 @@ def import_annotations( # noqa: C901
If ``cpu_limit`` is greater than the number of available CPU cores, it will be set to the number of available cores.
If ``cpu_limit`` is less than 1, it will be set to CPU count - 2.
If ``cpu_limit`` is omitted, it will be set to CPU count - 2.
no_legacy : bool, default: False
If ``True`` will not use the legacy isotropic transformation to resize annotations
If ``False`` will use the legacy isotropic transformation to resize annotations
legacy : bool, default: False
If ``True`` will use the legacy isotropic transformation to resize annotations
If ``False`` will not use the legacy isotropic transformation to resize annotations
Raises
-------
ValueError
Expand All @@ -1157,7 +1157,7 @@ def import_annotations( # noqa: C901
# CLI-initiated imports will raise an AttributeError because of the partial function
# This block handles SDK-initiated imports
try:
if importer.__module__ == "darwin.importer.formats.nifti" and not no_legacy:
if importer.__module__ == "darwin.importer.formats.nifti" and legacy:
importer = partial(importer, legacy=True)
except AttributeError:
pass
Expand Down
21 changes: 11 additions & 10 deletions darwin/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ def __init__(self) -> None:
help="Annotation files (or folders) to convert.",
)
parser_convert.add_argument(
"--no-legacy",
action="store_false",
dest="legacy",
help="Do not convert annotation using legacy process (isotropic transformation).",
"--legacy",
action="store_true",
default=False,
help="Import annotation files using legacy process (isotropic transformation).",
)
parser_convert.add_argument(
"output_dir", type=str, help="Where to store output files."
Expand Down Expand Up @@ -375,10 +375,10 @@ def __init__(self) -> None:
help="Bypass warnings about overwiting existing annotations.",
)
parser_import.add_argument(
"--no-legacy",
action="store_false",
dest="legacy",
help="Do not importing annotation files using legacy process (isotropic transformation).",
"--legacy",
action="store_true",
default=False,
help="Import annotation files using legacy process (isotropic transformation).",
)

# Cpu limit for multiprocessing tasks
Expand Down Expand Up @@ -410,9 +410,10 @@ def cpu_default_types(input: Any) -> Optional[int]: # type: ignore
"format", type=str, help="Annotation format to convert to."
)
parser_convert.add_argument(
"legacy",
"--legacy",
action="store_true",
help="Convert annotation using legacy process (isotropic transformation).",
default=False,
help="Import annotation files using legacy process (isotropic transformation).",
)
parser_convert.add_argument(
"-o", "--output_dir", type=str, help="Where to store output files."
Expand Down
2 changes: 1 addition & 1 deletion darwin/version/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.11"
__version__ = "1.0.12"
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@

import orjson as json
import pytest
import xml.etree.ElementTree as ET

from e2e_tests.helpers import assert_cli, run_cli_command


class TestExportCli:
this_file_path = Path(dirname(__file__)).absolute()
data_path = (this_file_path / ".." / ".." / "data").resolve()
data_path = (this_file_path / ".." / "data" / "convert").resolve()

@pytest.fixture(autouse=True)
def config(self) -> None:
Expand All @@ -33,10 +34,10 @@ def compare_directories(self, path: Path, expected_path: Path) -> None:
continue

# Compare files
with file.open("r") as f:
with file.open("rb") as f:
content = f.read()

with Path(expected_path / file.name).open() as f:
with Path(expected_path / file.name).open("rb") as f:
expected_content = f.read()

if content != expected_content:
Expand All @@ -52,6 +53,14 @@ def compare_directories(self, path: Path, expected_path: Path) -> None:
[
("yolo_segmented", data_path / "yolov8/from", data_path / "yolov8/to"),
("yolo", data_path / "yolo/from", data_path / "yolo/to"),
("cvat", data_path / "cvat/from", data_path / "cvat/to"),
("pascalvoc", data_path / "pascalvoc/from", data_path / "pascalvoc/to"),
("nifti", data_path / "nifti/from", data_path / "nifti/to"),
(
"instance_mask",
data_path / "instance_mask/from",
data_path / "instance_mask/to",
),
pytest.param(
"coco",
data_path / "coco/from",
Expand Down Expand Up @@ -87,30 +96,64 @@ def test_darwin_convert(
result = run_cli_command(
f"darwin convert {format} {str(input_path)} {str(tmp_path)}"
)
if format == "coco":
self.patch_coco(tmp_path / "output.json")
self.patch_format(format, tmp_path)
assert_cli(result, 0)
self.compare_directories(expectation_path, tmp_path)

def patch_format(self, format: str, path: Path) -> None:
"""
Patch files based on format to match the expected output.
"""
patch_methods = {
"coco": self.patch_coco,
"cvat": self.patch_cvat,
}
patch_method = patch_methods.get(format)
if patch_method:
patch_method(path)

def patch_coco(self, path: Path) -> None:
"""
Patch coco file to match the expected output, includes changes to year and date_created,
wrapped in try except so that format errors are still caught later with correct error messages
"""
try:
with open(path, "r") as f:
with open(path / "output.json", "r") as f:
contents = f.read()
temp = json.loads(contents)
temp["info"]["year"] = 2023
temp["info"]["date_created"] = "2023/12/05"
with open(path, "w") as f:
with open(path / "output.json", "w") as f:
op = json.dumps(
temp, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY
).decode("utf-8")
f.write(op)
except Exception:
print(f"Error patching {path}")

def patch_cvat(self, path: Path) -> None:
"""
Patch cvat file to match the expected output.
"""
try:
tree = ET.parse(path / "output.xml")
root = tree.getroot()
# Adjust the required fields
dumped_elem = root.find(".//meta/dumped")
if dumped_elem is not None:
dumped_elem.text = "2024-10-25 10:33:01.789498+00:00"
created_elem = root.find(".//meta/task/created")
if created_elem is not None:
created_elem.text = "2024-10-25 10:33:01.789603+00:00"
updated_elem = root.find(".//meta/task/updated")
if updated_elem is not None:
updated_elem.text = "2024-10-25 10:33:01.789608+00:00"
tree.write(path / "output.xml")
except ET.ParseError:
print(f"Error parsing XML in {path}")
except Exception as e:
print(f"Error patching {path}: {e}")


if __name__ == "__main__":
pytest.main(["-vv", "-s", __file__])
105 changes: 105 additions & 0 deletions e2e_tests/cli/test_full_cycle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import shutil
from pathlib import Path

from e2e_tests.helpers import assert_cli, run_cli_command, export_release
from e2e_tests.objects import E2EDataset, ConfigValues
from e2e_tests.cli.test_import import compare_annotations_export


def test_full_cycle(
local_dataset: E2EDataset,
config_values: ConfigValues,
):
"""
This test performs the following steps:
- 1: Registers a set of files from external storage to a dataset
- 2: Imports some annotations
- 3: Creates and pulls a release of the dataset
- 4: Deletes all items from the dataset
- 5: Pushes and imports the pulled files & annotations to the dataset
- 6: Deletes locally pulled copies of the dataset files
- 7: Creates and pulls a new release of the dataset
- 8: Assert that the pulled data is as expected
It is designed to catch errors that may arise from changes to exported Darwin JSON
"""
item_type = "single_slotted"
annotation_format = "darwin"
first_release_name = "first_release"
second_release_name = "second_release"
pull_dir = Path(
f"{Path.home()}/.darwin/datasets/{config_values.team_slug}/{local_dataset.slug}"
)
annotations_import_dir = (
Path(__file__).parents[1]
/ "data"
/ "import"
/ "image_annotations_with_item_level_properties"
)
expected_filepaths = [
f"{pull_dir}/images/image_1.jpg",
f"{pull_dir}/images/image_2.jpg",
f"{pull_dir}/images/dir1/image_3.jpg",
f"{pull_dir}/images/dir1/image_4.jpg",
f"{pull_dir}/images/dir2/image_5.jpg",
f"{pull_dir}/images/dir2/image_6.jpg",
f"{pull_dir}/images/dir1/dir3/image_7.jpg",
f"{pull_dir}/images/dir1/dir3/image_8.jpg",
]

# Populate the dataset with items and annotations
local_dataset.register_read_only_items(config_values, item_type)
result = run_cli_command(
f"darwin dataset import {local_dataset.name} {annotation_format} {annotations_import_dir}"
)
assert_cli(result, 0)

# Pull a first release of the dataset
original_release = export_release(
annotation_format, local_dataset, config_values, release_name=first_release_name
)
result = run_cli_command(
f"darwin dataset pull {local_dataset.name}:{original_release.name}"
)
assert_cli(result, 0)

# Delete all items in the dataset
local_dataset.delete_items(config_values)

# Push and import the pulled files and annotations to the dataset
result = run_cli_command(
f"darwin dataset push {local_dataset.name} {pull_dir}/images --preserve-folders"
)
assert_cli(result, 0)
result = run_cli_command(
f"darwin dataset import {local_dataset.name} {annotation_format} {pull_dir}/releases/{first_release_name}/annotations"
)
assert_cli(result, 0)

# Delete local copies of the dataset files for the dataset
shutil.rmtree(f"{pull_dir}/images")

# Pull a second release of the dataset
new_release = export_release(
annotation_format,
local_dataset,
config_values,
release_name=second_release_name,
)
result = run_cli_command(
f"darwin dataset pull {local_dataset.name}:{new_release.name}"
)
assert_cli(result, 0)

# Check that all expected files have been downloaded
all_filepaths = list(pull_dir.rglob("*"))
for expected_file in expected_filepaths:
assert Path(expected_file) in all_filepaths

# Check that all downloaded annotations are as expected
compare_annotations_export(
Path(f"{pull_dir}/releases/{first_release_name}/annotations"),
Path(f"{pull_dir}/releases/{second_release_name}/annotations"),
item_type,
unzip=False,
)
Loading

0 comments on commit bee2eea

Please sign in to comment.