Skip to content

Commit

Permalink
Allow download of frames extracted from videos & multi-slotted items (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie authored Nov 21, 2024
1 parent cc274ca commit c3e29a8
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 22 deletions.
51 changes: 45 additions & 6 deletions darwin/dataset/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
from rich.console import Console

import darwin.datatypes as dt
from darwin.dataset.utils import sanitize_filename
from darwin.dataset.utils import (
sanitize_filename,
SUPPORTED_IMAGE_EXTENSIONS,
SUPPORTED_VIDEO_EXTENSIONS,
)
from darwin.datatypes import AnnotationFile
from darwin.exceptions import MissingDependency
from darwin.utils import (
Expand Down Expand Up @@ -301,12 +305,12 @@ def _download_all_slots_from_json_annotation(
)
else:
for upload in slot.source_files:
file_path = slot_path / sanitize_filename(upload["file_name"])
file_path = slot_path / sanitize_filename(upload.file_name)
generator.append(
functools.partial(
_download_image_with_trace,
annotation,
upload["url"],
upload.url,
file_path,
api_key,
)
Expand Down Expand Up @@ -357,8 +361,8 @@ def _download_single_slot_from_json_annotation(
else:
if len(slot.source_files) > 0:
image = slot.source_files[0]
image_url = image["url"]
image_filename = image["file_name"]
image_url = image.url
image_filename = image.file_name
if image_filename.endswith(".nii.gz"):
suffix = ".nii.gz"
stem = annotation.filename[: -len(suffix)]
Expand Down Expand Up @@ -670,9 +674,44 @@ def _get_planned_image_paths(
return [images_path / filename]
else:
for slot in annotation.slots:
if len(slot.source_files) > 1:
# Check that the item is either a DICOM series or a frame extracted from a video
is_dicom_series = all(
source_file.file_name.endswith(".dcm") # type: ignore
for source_file in slot.source_files
)
is_extracted_frame = (
len(slot.source_files) == 2
and any(
source_file.file_name.endswith(ext) # type: ignore
for ext in SUPPORTED_VIDEO_EXTENSIONS
for source_file in slot.source_files
)
and any(
source_file.file_name.endswith(ext) # type: ignore
for ext in SUPPORTED_IMAGE_EXTENSIONS
for source_file in slot.source_files
)
)
if is_extracted_frame:
# Select only the image if it's an extracted frame
frame_source_file = next(
source_file
for source_file in slot.source_files
if any(
source_file.file_name.endswith(ext) # type: ignore
for ext in SUPPORTED_IMAGE_EXTENSIONS
)
)
slot.source_files = [frame_source_file]
if not is_dicom_series and not is_extracted_frame:
raise ValueError(
"This slot contains data that is not a DICOM series or a frame extracted from a video"
)

slot_name = Path(slot.name)
for source_file in slot.source_files:
file_name = source_file.file_name
file_name = source_file.file_name # type: ignore
if use_folders and annotation.remote_path != "/":
file_paths.append(
images_path
Expand Down
4 changes: 2 additions & 2 deletions darwin/dataset/remote_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@ def split_video_annotations(self, release_name: str = "latest") -> None:
# When splitting into frames, we need to read each frame individually
# Because we use the source name suffix, we need to adjust this to .png here
current_stem = Path(
annotation["item"]["slots"][0]["source_files"][0]["file_name"]
annotation["item"]["slots"][0]["source_files"][0].file_name
).stem
annotation["item"]["slots"][0]["source_files"][0]["file_name"] = (
annotation["item"]["slots"][0]["source_files"][0].file_name = (
current_stem + ".png"
)
# We also need to account for the folder that this function creates
Expand Down
2 changes: 1 addition & 1 deletion darwin/exporter/formats/nifti.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def check_for_error_and_return_imageid(
# Check if all item slots have the correct file-extension
for slot in video_annotation.slots:
for source_file in slot.source_files:
filename = Path(source_file["file_name"])
filename = Path(source_file.file_name)
if not (
filename.name.lower().endswith(".nii.gz")
or filename.name.lower().endswith(".nii")
Expand Down
12 changes: 8 additions & 4 deletions darwin/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,9 +612,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
image_width=slot.width,
image_height=slot.height,
image_url=(
None
if len(slot.source_files or []) == 0
else slot.source_files[0]["url"]
None if len(slot.source_files or []) == 0 else slot.source_files[0].url
),
image_thumbnail_url=slot.thumbnail_url,
workview_url=item_source.get("workview_url", None),
Expand All @@ -630,12 +628,18 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:


def _parse_darwin_slot(data: Dict[str, Any]) -> dt.Slot:
source_files_data = data.get("source_files", [])
source_files = [
dt.SourceFile(file_name=source_file["file_name"], url=source_file.get("url"))
for source_file in source_files_data
]

return dt.Slot(
name=data["slot_name"],
type=data["type"],
width=data.get("width"),
height=data.get("height"),
source_files=data.get("source_files", []),
source_files=source_files,
thumbnail_url=data.get("thumbnail_url"),
frame_count=data.get("frame_count"),
frame_urls=data.get("frame_urls"),
Expand Down
82 changes: 73 additions & 9 deletions tests/darwin/dataset/download_manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ def test_multi_slot_without_folders_planned_image_paths():
Slot(
name="slot1",
type="image",
source_files=[SourceFile(file_name="source_name_1.jpg")],
source_files=[
SourceFile(file_name="source_name_1.jpg"),
],
),
Slot(
name="slot2",
Expand Down Expand Up @@ -208,34 +210,96 @@ def test_single_slot_root_path_with_folders_planned_image_paths():
assert result == expected


def test_multiple_source_files_planned_image_paths():
def test_dicom_series_planned_image_paths():
annotation = AnnotationFile(
path=Path("/local/annotations/image.json"),
filename="image.jpg",
path=Path("/local/annotations/series.json"),
filename="series.dcm",
annotation_classes={
AnnotationClass(name="test_class", annotation_type="polygon")
},
annotations=[],
slots=[
Slot(
name="slot1",
type="image",
type="dicom",
source_files=[
SourceFile(file_name="source_name_1.jpg"),
SourceFile(file_name="source_name_2.jpg"),
SourceFile(file_name="slice_1.dcm"),
SourceFile(file_name="slice_2.dcm"),
SourceFile(file_name="slice_3.dcm"),
],
)
],
remote_path="/",
)
images_path = Path("/local/images")
results = dm._get_planned_image_paths(annotation, images_path, use_folders=False)
expected = [
images_path / "image.jpg" / "slot1" / "source_name_1.jpg",
images_path / "image.jpg" / "slot1" / "source_name_2.jpg",
images_path / "series.dcm" / "slot1" / "slice_1.dcm",
images_path / "series.dcm" / "slot1" / "slice_2.dcm",
images_path / "series.dcm" / "slot1" / "slice_3.dcm",
]
assert results == expected


def test_extracted_frames_planned_image_paths():
annotation = AnnotationFile(
path=Path("/local/annotations/video.json"),
filename="video.mp4",
annotation_classes={
AnnotationClass(name="test_class", annotation_type="polygon")
},
annotations=[],
slots=[
Slot(
name="0",
type="image",
source_files=[
SourceFile(file_name="frame_0.jpg"),
SourceFile(file_name="video.mp4"),
],
),
],
remote_path="/",
)
images_path = Path("/local/images")
results = dm._get_planned_image_paths(annotation, images_path, use_folders=False)
expected = [
images_path / "video.mp4" / "0" / "frame_0.jpg",
]
assert results == expected


def test_multiple_source_files_raises_error():
annotation = AnnotationFile(
path=Path("/local/annotations/image.json"),
filename="image.jpg",
annotation_classes={
AnnotationClass(name="test_class", annotation_type="polygon")
},
annotations=[],
slots=[
Slot(
name="slot1",
type="image",
source_files=[
SourceFile(file_name="image1.jpg"),
SourceFile(file_name="image2.jpg"),
],
)
],
remote_path="/",
)
images_path = Path("/local/images")

with pytest.raises(ValueError) as exc_info:
dm._get_planned_image_paths(annotation, images_path, use_folders=False)

assert (
str(exc_info.value)
== "This slot contains data that is not a DICOM series or a frame extracted from a video"
)


def test__remove_empty_directories(tmp_path: Path) -> None:
root_dir = tmp_path / "root"
root_dir.mkdir()
Expand Down

0 comments on commit c3e29a8

Please sign in to comment.