diff --git a/darwin/cli.py b/darwin/cli.py index c48f55e38..f5fbe7325 100644 --- a/darwin/cli.py +++ b/darwin/cli.py @@ -124,6 +124,7 @@ def _run(args: Namespace, parser: ArgumentParser) -> None: args.path, args.frames, args.extract_views, + args.handle_as_slices, args.preserve_folders, args.verbose, args.item_merge_mode, diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index 8ff145054..6286d01fe 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -655,6 +655,7 @@ def upload_data( path: Optional[str], frames: bool, extract_views: bool = False, + handle_as_slices: bool = False, preserve_folders: bool = False, verbose: bool = False, item_merge_mode: Optional[str] = None, @@ -682,6 +683,8 @@ def upload_data( Specify whether the files will be uploaded as a list of frames or not. extract_views : bool If providing a volume, specify whether to extract the orthogonal views or not. + handle_as_slices : bool + Whether to upload DICOM files as slices preserve_folders : bool Specify whether or not to preserve folder paths when uploading. verbose : bool @@ -779,6 +782,7 @@ def file_upload_callback( fps=fps, as_frames=frames, extract_views=extract_views, + handle_as_slices=handle_as_slices, path=path, preserve_folders=preserve_folders, progress_callback=progress_callback, diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index ce94c4985..662a7658c 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -171,6 +171,7 @@ def push( fps: int = 0, as_frames: bool = False, extract_views: bool = False, + handle_as_slices: Optional[bool] = False, files_to_exclude: Optional[List[PathLike]] = None, path: Optional[str] = None, preserve_folders: bool = False, @@ -199,6 +200,8 @@ def push( When the uploading file is a video, specify whether it's going to be uploaded as a list of frames. extract_views: bool, default: False When the uploading file is a volume, specify whether it's going to be split into orthogonal views. + handle_as_slices: Optioonal[bool], default: False + Whether to upload DICOM files as slices files_to_exclude : Optional[PathLike]], default: None Optional list of files to exclude from the file scan. These can be folders. path: Optional[str], default: None @@ -267,7 +270,9 @@ def push( local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( search_files, files_to_exclude, fps, item_merge_mode ) - handler = UploadHandlerV2(self, local_files, multi_file_items) + handler = UploadHandlerV2( + self, local_files, multi_file_items, handle_as_slices=handle_as_slices + ) else: local_files = _find_files_to_upload_as_single_file_items( search_files, @@ -279,7 +284,9 @@ def push( extract_views, preserve_folders, ) - handler = UploadHandlerV2(self, local_files) + handler = UploadHandlerV2( + self, local_files, handle_as_slices=handle_as_slices + ) if blocking: handler.upload( max_workers=max_workers, @@ -883,10 +890,10 @@ def _find_files_to_upload_as_multi_file_items( List of directories to search for files. files_to_exclude : List[PathLike] List of files to exclude from the file scan. - item_merge_mode : str - Mode to merge the files in the folders. Valid options are: 'slots', 'series', 'channels'. fps : int When uploading video files, specify the framerate + item_merge_mode : str + Mode to merge the files in the folders. Valid options are: 'slots', 'series', 'channels'. Returns ------- diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index 9997c7630..f401a4d12 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -382,6 +382,7 @@ def __init__( dataset: "RemoteDataset", local_files: List[LocalFile], multi_file_items: Optional[List[MultiFileItem]] = None, + handle_as_slices: Optional[bool] = False, ): self._progress: Optional[ Iterator[Callable[[Optional[ByteReadCallback]], None]] @@ -391,11 +392,17 @@ def __init__( self.dataset: RemoteDataset = dataset self.errors: List[UploadRequestError] = [] self.skip_existing_full_remote_filepaths() - self.blocked_items, self.pending_items = self._request_upload() + self.blocked_items, self.pending_items = self._request_upload( + handle_as_slices=handle_as_slices + ) @staticmethod - def build(dataset: "RemoteDataset", local_files: List[LocalFile]): - return UploadHandlerV2(dataset, local_files) + def build( + dataset: "RemoteDataset", + local_files: List[LocalFile], + handle_as_slices: Optional[bool] = False, + ): + return UploadHandlerV2(dataset, local_files, handle_as_slices=handle_as_slices) @property def client(self) -> "Client": @@ -542,7 +549,9 @@ def callback(file_name, file_total_bytes, file_bytes_sent): file_to_upload(callback) @abstractmethod - def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: + def _request_upload( + self, handle_as_slices: Optional[bool] = False + ) -> Tuple[List[ItemPayload], List[ItemPayload]]: pass @abstractmethod @@ -565,14 +574,18 @@ def __init__( dataset: "RemoteDataset", local_files: List[LocalFile], multi_file_items: Optional[List[MultiFileItem]] = None, + handle_as_slices: Optional[bool] = False, ): super().__init__( dataset=dataset, local_files=local_files, multi_file_items=multi_file_items, + handle_as_slices=handle_as_slices, ) - def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: + def _request_upload( + self, handle_as_slices: Optional[bool] = False + ) -> Tuple[List[ItemPayload], List[ItemPayload]]: blocked_items = [] items = [] chunk_size: int = _upload_chunk_size() @@ -585,7 +598,10 @@ def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: "items": [ file.serialize_darwin_json_v2() for file in file_chunk ], - "options": {"ignore_dicom_layout": True}, + "options": { + "ignore_dicom_layout": True, + "handle_as_slices": handle_as_slices, + }, } for file_chunk in chunk(self.multi_file_items, chunk_size) ] @@ -603,7 +619,10 @@ def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: upload_payloads.extend( [ - {"items": [file.serialize_darwin_json_v2() for file in file_chunk]} + { + "items": [file.serialize_darwin_json_v2() for file in file_chunk], + "options": {"handle_as_slices": handle_as_slices}, + } for file_chunk in chunk(single_file_items, chunk_size) ] ) diff --git a/darwin/options.py b/darwin/options.py index 7a59f137d..4784e7ae7 100644 --- a/darwin/options.py +++ b/darwin/options.py @@ -168,6 +168,11 @@ def __init__(self) -> None: action="store_true", help="Upload a volume with all 3 orthogonal views.", ) + parser_push.add_argument( + "--handle_as_slices", + action="store_true", + help="Upload DICOM files as slices", + ) parser_push.add_argument( "--path", type=str, default=None, help="Folder to upload the files into." diff --git a/test.dcm b/test.dcm new file mode 100644 index 000000000..e69de29bb diff --git a/tests/darwin/cli_functions_test.py b/tests/darwin/cli_functions_test.py index 497278b2a..8e6569bfd 100644 --- a/tests/darwin/cli_functions_test.py +++ b/tests/darwin/cli_functions_test.py @@ -229,6 +229,7 @@ def test_with_verbose_flag( None, False, False, + False, True, ) get_remote_dataset_mock.assert_called_once() diff --git a/tests/darwin/dataset/upload_manager_test.py b/tests/darwin/dataset/upload_manager_test.py index 914ab7e19..7640dfb39 100644 --- a/tests/darwin/dataset/upload_manager_test.py +++ b/tests/darwin/dataset/upload_manager_test.py @@ -3,6 +3,8 @@ import pytest import responses +import inspect + from darwin.client import Client from darwin.config import Config @@ -416,7 +418,98 @@ def test_upload_files(dataset: RemoteDataset, request_upload_endpoint: str): assert upload_handler.error_count == 0 +@pytest.mark.usefixtures("file_read_write_test") +@responses.activate +def test_upload_files_adds_handle_as_slices_option_to_upload_payload( + dataset: RemoteDataset, request_upload_endpoint: str +): + request_upload_response = { + "blocked_items": [], + "items": [ + { + "id": "3b241101-e2bb-4255-8caf-4136c566a964", + "name": "test.dcm", + "path": "/", + "slots": [ + { + "type": "image", + "file_name": "test.dcm", + "slot_name": "0", + "upload_id": "123e4567-e89b-12d3-a456-426614174000", + "as_frames": False, + "extract_views": False, + } + ], + } + ], + } + responses.add( + responses.POST, + request_upload_endpoint, + json=request_upload_response, + status=200, + ) + Path("test.dcm").touch() + local_file = LocalFile(local_path=Path("test.dcm")) + with patch("darwin.backend_v2.BackendV2.register_data") as mock_register_data: + with patch.object(dataset, "fetch_remote_files", return_value=[]): + UploadHandler.build(dataset, [local_file], handle_as_slices=True) + assert mock_register_data.call_count == 1 + assert mock_register_data.call_args[0][1]["options"] == { + "handle_as_slices": True + } + + +@pytest.mark.usefixtures("file_read_write_test") +@responses.activate +def test_upload_files_does_not_add_handle_as_slices_option_to_upload_payload( + dataset: RemoteDataset, request_upload_endpoint: str +): + request_upload_response = { + "blocked_items": [], + "items": [ + { + "id": "3b241101-e2bb-4255-8caf-4136c566a964", + "name": "test.dcm", + "path": "/", + "slots": [ + { + "type": "image", + "file_name": "test.dcm", + "slot_name": "0", + "upload_id": "123e4567-e89b-12d3-a456-426614174000", + "as_frames": False, + "extract_views": False, + } + ], + } + ], + } + responses.add( + responses.POST, + request_upload_endpoint, + json=request_upload_response, + status=200, + ) + Path("test.dcm").touch() + local_file = LocalFile(local_path=Path("test.dcm")) + with patch("darwin.backend_v2.BackendV2.register_data") as mock_register_data: + with patch.object(dataset, "fetch_remote_files", return_value=[]): + UploadHandler.build(dataset, [local_file], handle_as_slices=False) + assert mock_register_data.call_count == 1 + assert mock_register_data.call_args[0][1]["options"] == { + "handle_as_slices": False + } + + +def test_default_value_for_handle_as_slices(): + signature = inspect.signature(UploadHandlerV2._request_upload) + handle_as_slices_default_value = signature.parameters["handle_as_slices"].default + assert handle_as_slices_default_value is False + + class TestUploadChunkSize: + def test_default_value_when_env_var_is_not_set(self): assert _upload_chunk_size() == 500