Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAR-4878][External] Addition of the handle_as_slices argument for push #962

Merged
merged 1 commit into from
Nov 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions darwin/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def _run(args: Namespace, parser: ArgumentParser) -> None:
args.path,
args.frames,
args.extract_views,
args.handle_as_slices,
args.preserve_folders,
args.verbose,
args.item_merge_mode,
Expand Down
4 changes: 4 additions & 0 deletions darwin/cli_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,7 @@ def upload_data(
path: Optional[str],
frames: bool,
extract_views: bool = False,
handle_as_slices: bool = False,
preserve_folders: bool = False,
verbose: bool = False,
item_merge_mode: Optional[str] = None,
Expand Down Expand Up @@ -682,6 +683,8 @@ def upload_data(
Specify whether the files will be uploaded as a list of frames or not.
extract_views : bool
If providing a volume, specify whether to extract the orthogonal views or not.
handle_as_slices : bool
Whether to upload DICOM files as slices
preserve_folders : bool
Specify whether or not to preserve folder paths when uploading.
verbose : bool
Expand Down Expand Up @@ -779,6 +782,7 @@ def file_upload_callback(
fps=fps,
as_frames=frames,
extract_views=extract_views,
handle_as_slices=handle_as_slices,
path=path,
preserve_folders=preserve_folders,
progress_callback=progress_callback,
Expand Down
15 changes: 11 additions & 4 deletions darwin/dataset/remote_dataset_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def push(
fps: int = 0,
as_frames: bool = False,
extract_views: bool = False,
handle_as_slices: Optional[bool] = False,
files_to_exclude: Optional[List[PathLike]] = None,
path: Optional[str] = None,
preserve_folders: bool = False,
Expand Down Expand Up @@ -199,6 +200,8 @@ def push(
When the uploading file is a video, specify whether it's going to be uploaded as a list of frames.
extract_views: bool, default: False
When the uploading file is a volume, specify whether it's going to be split into orthogonal views.
handle_as_slices: Optioonal[bool], default: False
Whether to upload DICOM files as slices
files_to_exclude : Optional[PathLike]], default: None
Optional list of files to exclude from the file scan. These can be folders.
path: Optional[str], default: None
Expand Down Expand Up @@ -267,7 +270,9 @@ def push(
local_files, multi_file_items = _find_files_to_upload_as_multi_file_items(
search_files, files_to_exclude, fps, item_merge_mode
)
handler = UploadHandlerV2(self, local_files, multi_file_items)
handler = UploadHandlerV2(
self, local_files, multi_file_items, handle_as_slices=handle_as_slices
)
else:
local_files = _find_files_to_upload_as_single_file_items(
search_files,
Expand All @@ -279,7 +284,9 @@ def push(
extract_views,
preserve_folders,
)
handler = UploadHandlerV2(self, local_files)
handler = UploadHandlerV2(
self, local_files, handle_as_slices=handle_as_slices
)
if blocking:
handler.upload(
max_workers=max_workers,
Expand Down Expand Up @@ -883,10 +890,10 @@ def _find_files_to_upload_as_multi_file_items(
List of directories to search for files.
files_to_exclude : List[PathLike]
List of files to exclude from the file scan.
item_merge_mode : str
Mode to merge the files in the folders. Valid options are: 'slots', 'series', 'channels'.
fps : int
When uploading video files, specify the framerate
item_merge_mode : str
Mode to merge the files in the folders. Valid options are: 'slots', 'series', 'channels'.

Returns
-------
Expand Down
33 changes: 26 additions & 7 deletions darwin/dataset/upload_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ def __init__(
dataset: "RemoteDataset",
local_files: List[LocalFile],
multi_file_items: Optional[List[MultiFileItem]] = None,
handle_as_slices: Optional[bool] = False,
):
self._progress: Optional[
Iterator[Callable[[Optional[ByteReadCallback]], None]]
Expand All @@ -391,11 +392,17 @@ def __init__(
self.dataset: RemoteDataset = dataset
self.errors: List[UploadRequestError] = []
self.skip_existing_full_remote_filepaths()
self.blocked_items, self.pending_items = self._request_upload()
self.blocked_items, self.pending_items = self._request_upload(
handle_as_slices=handle_as_slices
)

@staticmethod
def build(dataset: "RemoteDataset", local_files: List[LocalFile]):
return UploadHandlerV2(dataset, local_files)
def build(
dataset: "RemoteDataset",
local_files: List[LocalFile],
handle_as_slices: Optional[bool] = False,
):
return UploadHandlerV2(dataset, local_files, handle_as_slices=handle_as_slices)

@property
def client(self) -> "Client":
Expand Down Expand Up @@ -542,7 +549,9 @@ def callback(file_name, file_total_bytes, file_bytes_sent):
file_to_upload(callback)

@abstractmethod
def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]:
def _request_upload(
self, handle_as_slices: Optional[bool] = False
) -> Tuple[List[ItemPayload], List[ItemPayload]]:
pass

@abstractmethod
Expand All @@ -565,14 +574,18 @@ def __init__(
dataset: "RemoteDataset",
local_files: List[LocalFile],
multi_file_items: Optional[List[MultiFileItem]] = None,
handle_as_slices: Optional[bool] = False,
):
super().__init__(
dataset=dataset,
local_files=local_files,
multi_file_items=multi_file_items,
handle_as_slices=handle_as_slices,
)

def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]:
def _request_upload(
self, handle_as_slices: Optional[bool] = False
) -> Tuple[List[ItemPayload], List[ItemPayload]]:
blocked_items = []
items = []
chunk_size: int = _upload_chunk_size()
Expand All @@ -585,7 +598,10 @@ def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]:
"items": [
file.serialize_darwin_json_v2() for file in file_chunk
],
"options": {"ignore_dicom_layout": True},
"options": {
"ignore_dicom_layout": True,
"handle_as_slices": handle_as_slices,
},
}
for file_chunk in chunk(self.multi_file_items, chunk_size)
]
Expand All @@ -603,7 +619,10 @@ def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]:

upload_payloads.extend(
[
{"items": [file.serialize_darwin_json_v2() for file in file_chunk]}
{
"items": [file.serialize_darwin_json_v2() for file in file_chunk],
"options": {"handle_as_slices": handle_as_slices},
}
for file_chunk in chunk(single_file_items, chunk_size)
]
)
Expand Down
5 changes: 5 additions & 0 deletions darwin/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,11 @@ def __init__(self) -> None:
action="store_true",
help="Upload a volume with all 3 orthogonal views.",
)
parser_push.add_argument(
"--handle_as_slices",
action="store_true",
help="Upload DICOM files as slices",
)

parser_push.add_argument(
"--path", type=str, default=None, help="Folder to upload the files into."
Expand Down
1 change: 1 addition & 0 deletions tests/darwin/cli_functions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ def test_with_verbose_flag(
None,
False,
False,
False,
True,
)
get_remote_dataset_mock.assert_called_once()
Expand Down
93 changes: 93 additions & 0 deletions tests/darwin/dataset/upload_manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pytest
import responses
import inspect


from darwin.client import Client
from darwin.config import Config
Expand Down Expand Up @@ -416,7 +418,98 @@ def test_upload_files(dataset: RemoteDataset, request_upload_endpoint: str):
assert upload_handler.error_count == 0


@pytest.mark.usefixtures("file_read_write_test")
@responses.activate
def test_upload_files_adds_handle_as_slices_option_to_upload_payload(
dataset: RemoteDataset, request_upload_endpoint: str
):
request_upload_response = {
"blocked_items": [],
"items": [
{
"id": "3b241101-e2bb-4255-8caf-4136c566a964",
"name": "test.dcm",
"path": "/",
"slots": [
{
"type": "image",
"file_name": "test.dcm",
"slot_name": "0",
"upload_id": "123e4567-e89b-12d3-a456-426614174000",
"as_frames": False,
"extract_views": False,
}
],
}
],
}
responses.add(
responses.POST,
request_upload_endpoint,
json=request_upload_response,
status=200,
)
Path("test.dcm").touch()
local_file = LocalFile(local_path=Path("test.dcm"))
with patch("darwin.backend_v2.BackendV2.register_data") as mock_register_data:
with patch.object(dataset, "fetch_remote_files", return_value=[]):
UploadHandler.build(dataset, [local_file], handle_as_slices=True)
assert mock_register_data.call_count == 1
assert mock_register_data.call_args[0][1]["options"] == {
"handle_as_slices": True
}


@pytest.mark.usefixtures("file_read_write_test")
@responses.activate
def test_upload_files_does_not_add_handle_as_slices_option_to_upload_payload(
dataset: RemoteDataset, request_upload_endpoint: str
):
request_upload_response = {
"blocked_items": [],
"items": [
{
"id": "3b241101-e2bb-4255-8caf-4136c566a964",
"name": "test.dcm",
"path": "/",
"slots": [
{
"type": "image",
"file_name": "test.dcm",
"slot_name": "0",
"upload_id": "123e4567-e89b-12d3-a456-426614174000",
"as_frames": False,
"extract_views": False,
}
],
}
],
}
responses.add(
responses.POST,
request_upload_endpoint,
json=request_upload_response,
status=200,
)
Path("test.dcm").touch()
local_file = LocalFile(local_path=Path("test.dcm"))
with patch("darwin.backend_v2.BackendV2.register_data") as mock_register_data:
with patch.object(dataset, "fetch_remote_files", return_value=[]):
UploadHandler.build(dataset, [local_file], handle_as_slices=False)
assert mock_register_data.call_count == 1
assert mock_register_data.call_args[0][1]["options"] == {
"handle_as_slices": False
}


def test_default_value_for_handle_as_slices():
signature = inspect.signature(UploadHandlerV2._request_upload)
handle_as_slices_default_value = signature.parameters["handle_as_slices"].default
assert handle_as_slices_default_value is False


class TestUploadChunkSize:

def test_default_value_when_env_var_is_not_set(self):
assert _upload_chunk_size() == 500

Expand Down