From 91dc524dae4f7b6f6dc28bf2ea01d068bb400860 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Tue, 27 Aug 2024 22:54:39 +0100 Subject: [PATCH 01/12] Added to CLI & SDK + created MultiFileItem parsing & layout building --- darwin/cli.py | 1 + darwin/cli_functions.py | 9 ++ darwin/dataset/remote_dataset.py | 1 + darwin/dataset/remote_dataset_v2.py | 134 ++++++++++++++++---- darwin/dataset/upload_manager.py | 32 +++++ darwin/options.py | 6 + tests/darwin/dataset/remote_dataset_test.py | 14 ++ 7 files changed, 169 insertions(+), 28 deletions(-) diff --git a/darwin/cli.py b/darwin/cli.py index bf493341a..c48f55e38 100644 --- a/darwin/cli.py +++ b/darwin/cli.py @@ -126,6 +126,7 @@ def _run(args: Namespace, parser: ArgumentParser) -> None: args.extract_views, args.preserve_folders, args.verbose, + args.item_merge_mode, ) # Remove a project (remotely) elif args.action == "remove": diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index 5d01d7a9d..f2b11999d 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -656,6 +656,7 @@ def upload_data( extract_views: bool = False, preserve_folders: bool = False, verbose: bool = False, + item_merge_mode: Optional[str] = None, ) -> None: """ Uploads the provided files to the remote dataset. @@ -684,6 +685,13 @@ def upload_data( Specify whether or not to preserve folder paths when uploading. verbose : bool Specify whether to have full traces print when uploading files or not. + item_merge_mode : Optional[str] + If set, each file path passed to `files_to_upload` behaves as follows: + - Every path that points directly to a file is ignored + - Each folder of files passed to `files_to_upload` will be uploaded according to the following modes: + - "slots": Each file in the folder will be uploaded to a different slot of the same item. + - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored. + - "channels": Each file in the folder will be uploaded to a different channel of the same item. """ client: Client = _load_client() try: @@ -773,6 +781,7 @@ def file_upload_callback( preserve_folders=preserve_folders, progress_callback=progress_callback, file_upload_callback=file_upload_callback, + item_merge_mode=item_merge_mode, ) console = Console(theme=_console_theme()) diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index 154c26c85..34f642912 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -138,6 +138,7 @@ def push( preserve_folders: bool = False, progress_callback: Optional[ProgressCallback] = None, file_upload_callback: Optional[FileUploadCallback] = None, + item_merge_mode: Optional[str] = None, ) -> UploadHandler: pass diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 59f87b320..72241785f 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -18,7 +18,9 @@ from darwin.dataset.release import Release from darwin.dataset.upload_manager import ( FileUploadCallback, + ItemMergeMode, LocalFile, + MultiFileItem, ProgressCallback, UploadHandler, UploadHandlerV2, @@ -166,6 +168,7 @@ def push( preserve_folders: bool = False, progress_callback: Optional[ProgressCallback] = None, file_upload_callback: Optional[FileUploadCallback] = None, + item_merge_mode: Optional[str] = None, ) -> UploadHandler: """ Uploads a local dataset (images ONLY) in the datasets directory. @@ -173,7 +176,8 @@ def push( Parameters ---------- files_to_upload : Optional[List[Union[PathLike, LocalFile]]] - List of files to upload. Those can be folders. + List of files to upload. These can be folders. + If `item_merge_mode` is set, these must be folders. blocking : bool, default: True If False, the dataset is not uploaded and a generator function is returned instead. multi_threaded : bool, default: True @@ -188,7 +192,7 @@ def push( extract_views: bool, default: False When the uploading file is a volume, specify whether it's going to be split into orthogonal views. files_to_exclude : Optional[PathLike]], default: None - Optional list of files to exclude from the file scan. Those can be folders. + Optional list of files to exclude from the file scan. These can be folders. path: Optional[str], default: None Optional path to store the files in. preserve_folders : bool, default: False @@ -197,6 +201,13 @@ def push( Optional callback, called every time the progress of an uploading files is reported. file_upload_callback: Optional[FileUploadCallback], default: None Optional callback, called every time a file chunk is uploaded. + item_merge_mode: Optional[str], default: None + If set, each file path passed to `files_to_upload` behaves as follows: + - Every path that points directly to a file is ignored + - Each folder of files passed to `files_to_upload` will be uploaded according to the following modes: + - "slots": Each file in the folder will be uploaded to a different slot of the same item. + - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored. + - "channels": Each file in the folder will be uploaded to a different channel of the same item. Returns ------- @@ -216,39 +227,41 @@ def push( if files_to_upload is None: raise ValueError("No files or directory specified.") + if item_merge_mode: + try: + item_merge_mode = ItemMergeMode(item_merge_mode) + except ValueError: + raise ValueError( + f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels" + ) + + if item_merge_mode and preserve_folders: + raise TypeError("Cannot use `preserve_folders` with `item_merge_mode`") + + # Direct file paths uploading_files = [ item for item in files_to_upload if isinstance(item, LocalFile) ] + + # Folder paths search_files = [ item for item in files_to_upload if not isinstance(item, LocalFile) ] - generic_parameters_specified = ( - path is not None or fps != 0 or as_frames is not False - ) - if uploading_files and generic_parameters_specified: - raise ValueError("Cannot specify a path when uploading a LocalFile object.") - - for found_file in find_files(search_files, files_to_exclude=files_to_exclude): - local_path = path - if preserve_folders: - source_files = [ - source_file - for source_file in search_files - if is_relative_to(found_file, source_file) - ] - if source_files: - local_path = str( - found_file.relative_to(source_files[0]).parent.as_posix() - ) - uploading_files.append( - LocalFile( - found_file, - fps=fps, - as_frames=as_frames, - extract_views=extract_views, - path=local_path, - ) + if item_merge_mode: + uploading_files = find_files_to_upload_merging( + search_files, files_to_exclude, item_merge_mode + ) + else: + uploading_files = find_files_to_upload_no_merging( + search_files, + files_to_exclude, + path, + fps, + as_frames, + extract_views, + preserve_folders, + uploading_files, ) if not uploading_files: @@ -842,3 +855,68 @@ def register_multi_slotted( print(f" - {item}") print(f"Reistration complete. Check your items in the dataset: {self.slug}") return results + + +def find_files_to_upload_merging( + search_files: List[PathLike], + files_to_exclude: Optional[List[PathLike]], + item_merge_mode: str, +) -> List[MultiFileItem]: + multi_file_items = [] + for directory in search_files: + files_in_directory = list( + find_files([directory], files_to_exclude=files_to_exclude, recursive=False) + ) + if not files_in_directory: + print( + f"Warning: There are no uploading files in the first level of {directory}, skipping" + ) + continue + multi_file_items.append( + MultiFileItem(directory, files_in_directory, item_merge_mode) + ) + if not multi_file_items: + raise ValueError( + "No valid folders to upload after searching the passed directories for files" + ) + return multi_file_items + + +def find_files_to_upload_no_merging( + search_files: List[PathLike], + files_to_exclude: Optional[List[PathLike]], + path: Optional[str], + fps: int, + as_frames: bool, + extract_views: bool, + preserve_folders: bool, + uploading_files: List[LocalFile], +) -> List[LocalFile]: + generic_parameters_specified = ( + path is not None or fps != 0 or as_frames is not False + ) + if uploading_files and generic_parameters_specified: + raise ValueError("Cannot specify a path when uploading a LocalFile object.") + + for found_file in find_files(search_files, files_to_exclude=files_to_exclude): + local_path = path + if preserve_folders: + source_files = [ + source_file + for source_file in search_files + if is_relative_to(found_file, source_file) + ] + if source_files: + local_path = str( + found_file.relative_to(source_files[0]).parent.as_posix() + ) + uploading_files.append( + LocalFile( + found_file, + fps=fps, + as_frames=as_frames, + extract_views=extract_views, + path=local_path, + ) + ) + return uploading_files diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index bcd28f8f7..53fad692c 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -2,6 +2,7 @@ import os import time from dataclasses import dataclass +from enum import Enum from pathlib import Path from typing import ( TYPE_CHECKING, @@ -31,6 +32,12 @@ from typing import Dict +class ItemMergeMode(Enum): + SLOTS = "slots" + SERIES = "series" + CHANNELS = "channels" + + class ItemPayload: """ Represents an item's payload. @@ -186,6 +193,31 @@ def full_path(self) -> str: return construct_full_path(self.data["path"], self.data["filename"]) +class MultiFileItem: + def __init__( + self, directory: PathLike, files: List[PathLike], merge_mode: ItemMergeMode + ): + self.directory = directory + self.files = files + self.merge_mode = merge_mode + self.layout = self._create_layout() + self.temp = {"version": 2, "slots": ["1", "2", "3"], "type": "grid"} + + def _create_layout(self): + # TODO + if ( + self.merge_mode == ItemMergeMode.slots + or self.merge_mode == ItemMergeMode.series + ): + return { + "version": 2, + "slots": [str(i) for i in range(len(self.files))], + "type": "grid", # Worth experimenting with - Is this the best option? Should we change this dynamically? + } + else: + return {"version": 3, "slots_grid": [[[file.name for file in self.files]]]} + + class FileMonitor(object): """ Monitors the progress of a :class:``BufferedReader``. diff --git a/darwin/options.py b/darwin/options.py index b6a292394..996c27b3a 100644 --- a/darwin/options.py +++ b/darwin/options.py @@ -183,6 +183,12 @@ def __init__(self) -> None: action="store_true", help="Preserve the local folder structure in the dataset.", ) + parser_push.add_argument( + "--item-merge-mode", + type=str, + choices=["slots", "series", "channels"], + help="Specify the item merge mode: slots, series, or channels", + ) # Remove parser_remove = dataset_action.add_parser( diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index c974ace0e..b0cf10276 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -659,6 +659,20 @@ def test_raises_with_unsupported_files(self, remote_dataset: RemoteDataset): with pytest.raises(UnsupportedFileType): remote_dataset.push(["test.txt"]) + def test_raises_if_invalid_item_merge_mode(self, remote_dataset: RemoteDataset): + with pytest.raises(ValueError): + remote_dataset.push(["path/to/dir"], item_merge_mode="invalid") + + def test_raises_if_preserve_folders_with_item_merge_mode( + self, remote_dataset: RemoteDataset + ): + with pytest.raises(TypeError): + remote_dataset.push( + ["path/to/dir"], + item_merge_mode="slots", + preserve_folders=True, + ) + @pytest.mark.usefixtures("file_read_write_test") class TestPull: From cd806a9bde32159bcfb97b008c757f251d7758be Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Thu, 29 Aug 2024 18:00:39 +0100 Subject: [PATCH 02/12] Unit test coverage --- darwin/cli_functions.py | 3 +- darwin/dataset/remote_dataset_v2.py | 82 +++++++++-- darwin/dataset/upload_manager.py | 48 +++++-- tests/darwin/data/push_test_dir.zip | Bin 0 -> 24374 bytes tests/darwin/dataset/remote_dataset_test.py | 146 +++++++++++++++++++- 5 files changed, 250 insertions(+), 29 deletions(-) create mode 100644 tests/darwin/data/push_test_dir.zip diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index f2b11999d..ce00f6a54 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -688,7 +688,8 @@ def upload_data( item_merge_mode : Optional[str] If set, each file path passed to `files_to_upload` behaves as follows: - Every path that points directly to a file is ignored - - Each folder of files passed to `files_to_upload` will be uploaded according to the following modes: + - Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules. + Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded: - "slots": Each file in the folder will be uploaded to a different slot of the same item. - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored. - "channels": Each file in the folder will be uploaded to a different channel of the same item. diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 72241785f..eecc725d9 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -1,4 +1,5 @@ import json +from pathlib import Path from typing import ( TYPE_CHECKING, Any, @@ -204,7 +205,8 @@ def push( item_merge_mode: Optional[str], default: None If set, each file path passed to `files_to_upload` behaves as follows: - Every path that points directly to a file is ignored - - Each folder of files passed to `files_to_upload` will be uploaded according to the following modes: + - Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules. + Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded: - "slots": Each file in the folder will be uploaded to a different slot of the same item. - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored. - "channels": Each file in the folder will be uploaded to a different channel of the same item. @@ -229,7 +231,7 @@ def push( if item_merge_mode: try: - item_merge_mode = ItemMergeMode(item_merge_mode) + ItemMergeMode(item_merge_mode) except ValueError: raise ValueError( f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels" @@ -249,11 +251,11 @@ def push( ] if item_merge_mode: - uploading_files = find_files_to_upload_merging( + uploading_files = _find_files_to_upload_merging( search_files, files_to_exclude, item_merge_mode ) else: - uploading_files = find_files_to_upload_no_merging( + uploading_files = _find_files_to_upload_no_merging( search_files, files_to_exclude, path, @@ -264,11 +266,6 @@ def push( uploading_files, ) - if not uploading_files: - raise ValueError( - "No files to upload, check your path, exclusion filters and resume flag" - ) - handler = UploadHandlerV2(self, uploading_files) if blocking: handler.upload( @@ -857,11 +854,33 @@ def register_multi_slotted( return results -def find_files_to_upload_merging( +def _find_files_to_upload_merging( search_files: List[PathLike], - files_to_exclude: Optional[List[PathLike]], + files_to_exclude: List[PathLike], item_merge_mode: str, ) -> List[MultiFileItem]: + """ + Finds files to upload as either: + - Multi-slotted items + - Multi-channel items + - Single-slotted items containing multiple `.dcm` files + + Does not search each directory recursively, only considers files in the first level of each directory. + + Parameters + ---------- + search_files : List[PathLike] + List of directories to search for files. + files_to_exclude : List[PathLike] + List of files to exclude from the file scan. + item_merge_mode : str + Mode to merge the files in the folders. Valid options are: 'slots', 'series', 'channels'. + + Returns + ------- + List[MultiFileItem] + List of files to upload. + """ multi_file_items = [] for directory in search_files: files_in_directory = list( @@ -873,7 +892,9 @@ def find_files_to_upload_merging( ) continue multi_file_items.append( - MultiFileItem(directory, files_in_directory, item_merge_mode) + MultiFileItem( + Path(directory), files_in_directory, ItemMergeMode(item_merge_mode) + ) ) if not multi_file_items: raise ValueError( @@ -882,9 +903,9 @@ def find_files_to_upload_merging( return multi_file_items -def find_files_to_upload_no_merging( +def _find_files_to_upload_no_merging( search_files: List[PathLike], - files_to_exclude: Optional[List[PathLike]], + files_to_exclude: List[PathLike], path: Optional[str], fps: int, as_frames: bool, @@ -892,6 +913,33 @@ def find_files_to_upload_no_merging( preserve_folders: bool, uploading_files: List[LocalFile], ) -> List[LocalFile]: + """ + Finds files to upload as single-slotted dataset items. Recursively searches the passed directories for files. + + Parameters + ---------- + search_files : List[PathLike] + List of directories to search for files. + files_to_exclude : Optional[List[PathLike]] + List of files to exclude from the file scan. + path : Optional[str] + Path to store the files in. + fps: int + When uploading video files, specify the framerate. + as_frames: bool + When uploading video files, specify whether to upload as a list of frames. + extract_views: bool + When uploading volume files, specify whether to split into orthogonal views. + preserve_folders: bool + Specify whether or not to preserve folder paths when uploading. + uploading_files : List[LocalFile] + List of files to upload. + + Returns + ------- + List[LocalFile] + List of files to upload. + """ generic_parameters_specified = ( path is not None or fps != 0 or as_frames is not False ) @@ -919,4 +967,10 @@ def find_files_to_upload_no_merging( path=local_path, ) ) + + if not uploading_files: + raise ValueError( + "No files to upload, check your path, exclusion filters and resume flag" + ) + return uploading_files diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index 53fad692c..855d9e415 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -14,6 +14,7 @@ Optional, Set, Tuple, + Union, ) import requests @@ -194,27 +195,46 @@ def full_path(self) -> str: class MultiFileItem: - def __init__( - self, directory: PathLike, files: List[PathLike], merge_mode: ItemMergeMode - ): + def __init__(self, directory: Path, files: List[Path], merge_mode: ItemMergeMode): self.directory = directory + self.name = directory.name self.files = files self.merge_mode = merge_mode self.layout = self._create_layout() - self.temp = {"version": 2, "slots": ["1", "2", "3"], "type": "grid"} def _create_layout(self): - # TODO - if ( - self.merge_mode == ItemMergeMode.slots - or self.merge_mode == ItemMergeMode.series - ): + """ + Creates the layout to be used when uploading the files: + - For multi-slotted items: LayoutV2 + - For series items: LayoutV2, but only with `.dcm` files + - For multi-channel items: LayoutV3 + + Raises + ------ + ValueError + - If no DICOM files are found in the directory for ItemMergeMode.SEIRES items + - If the number of files is greater than 16 for ItemMergeMode.CHANNELS items + """ + if self.merge_mode == ItemMergeMode.SLOTS: + return { + "version": 2, + "slots": [str(i) for i in range(len(self.files))], + "type": "grid", + } + elif self.merge_mode == ItemMergeMode.SERIES: + self.files = [file for file in self.files if file.suffix.lower() == ".dcm"] + if not self.files: + raise ValueError("No DICOM files found in 1st level of directory") return { "version": 2, "slots": [str(i) for i in range(len(self.files))], - "type": "grid", # Worth experimenting with - Is this the best option? Should we change this dynamically? + "type": "grid", } - else: + elif self.merge_mode == ItemMergeMode.CHANNELS: + if len(self.files) > 16: + raise ValueError( + f"No multi-channel item can have more than 16 channels. The following directory has {len(self.files)} files: {self.directory}" + ) return {"version": 3, "slots_grid": [[[file.name for file in self.files]]]} @@ -434,7 +454,11 @@ def _upload_file( class UploadHandlerV2(UploadHandler): - def __init__(self, dataset: "RemoteDataset", local_files: List[LocalFile]): + def __init__( + self, + dataset: "RemoteDataset", + local_files: Union[List[LocalFile], List[MultiFileItem]], + ): super().__init__(dataset=dataset, local_files=local_files) def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: diff --git a/tests/darwin/data/push_test_dir.zip b/tests/darwin/data/push_test_dir.zip new file mode 100644 index 0000000000000000000000000000000000000000..2a09ce77336d274582539635428692163674766f GIT binary patch literal 24374 zcmd^{c|4Tg_rOQi5E3GL*|#xcPo*qvh$2a8EZNs=5tXsEpv6av6sakNh*Gj;D$G#j{R97)^DPOeyAh~vpZ3kgsO8!! zj?RFFS{9GjLUZv#rpqnT0&LFn@wD-zyFDs-*Wgmm+BGu{+Y}od{+RPgP@J&FgTWK` zwtx9?LrGsV(;_WbVE>uVcH%TCG;!_wu`_Zk&6*kG`96anR0W zuPdUxHsT?3X;j*p7L5@ z+3CR@h=|A#jwv^DyYA&5R}Y1GIC=UUWxo9TAf{C&wI$T|u;Lt#s@S+6;tX5| zZbpln8?&-4mU^HRJcF#=WECD!ZfI|C?4#Z&b0#P^d9&b0c1z#MI_D~~azmeSiVbzh zPTngROim1Shujl7#0~F~(4;zN=mXmsE7a3}+URI|Kt_uj8EsAq;%G!eihmr>2(h`RR7w2jz8egcGV_6$56CKfbem>tm2KhOQn7&rl zmibG3V>#K*xcRB_d4r2hyY3D){?r@jP~XOhIisU64}-%P+6j7xTg*Np&s-&{)kUZ~+qbtb{GAO!nnV-ekLMee>LQ zXvgMtXNklzjNP&07d@D)SF+?qClE}R8JJmEW2w)$Vck_Bz^{MZ#9i!Y$M5K3=U~@m zYGT#JZMD}te8wR`Mp+(VhFoEu$bSB%wHS+LiL!7;?U*R0Wl8+1wr!9tVP)gkTc)i+ z9t$*{$xQ(tyn51M4`|%Je=Gyg_8o5+rQiqe=j0gZ=kFHeMDTaA_j7awvXp#eOF7Q3 zki%eLcP|bLzj}~^G0p~e+Sq#Xt>;_f;%e_4?zhP<{M_f&MyC@~zZABLU+)x%DfJmjuh%3-xW0+&uBi!^)ZBm++kFI@5nab9sx|6IS*53*YWW zv6iU$A2YfB$|{7I_#@%d%g~29+aHvC)b;QC!iTvmFZ-O4i6NYIL&8RL-bF>X!yWUE zZdp0V@W@Vv^}fkEwS;rk|29|d;TB&K*64g%u|Mg$7AN#Q@p3BnGMTd`Tt&P|DUNxy z?+lJ};)R_2UL3j8p)GUo`1>R&6PBbnyiUjJ-a7rD{-r@KTV?r%fA%{^exPgHhVmsv zW~@S%D~W3ZcHKN)>zd(q$##I>?{Q5{`okudF0~&kbXnrcxfJbNYtI|@hQI!-&Z)ey z|K!Rq7Dvjplb(CFS?NDLlJ)L%4ZFstcB{abf}VK4LQkXK{gBf}^o$sG7`L@8uMr%z``CKs;)go7`hjSmU*0iumSibKT z`=y(u3!F%5*>5ZI9CkMBz4gMcYtv1ULkA4?+(bMUn7RKW)b&P|?bgq*6;{tVOT-&p z6u*6z;F?+IW+oyQ$thQ-7yU^6lS#?N8+A>bGlZdB6H` z=T66d9a7$EzR*vPJ|Ep-y*|%2GQ)FLhuLv?qhsq!ZtCWoa9?M!u*c9w@*|Udlx1y_ zvhpE;Z={&7ccq`6)p@$3QOJE;zEtmB^%zZqYpnm=Wi4)Ujd+?6Q1(pSh5zS<4)iV(`az<=w(0`;CMzQ&ZN9)=f{^*v*!_h!!8Xf(kY(-X zv{kDgCmlaxU0rzH^9U)j_tbV-lg!0u+tf1@J9AmJo_qvq%J$Gc=T zEFFvT?_@Y_z4$0;NB)!B-2YbyQR%u1U=N@Z-?vu|FB z(l-rIzQ@WPYo{@I^akTkxx;v0-Zhm%+8iI74GLzYR($lGRk}yKX+{6cUD0oww)Zps zUS7p6Wxa2Gf#9)bzs*F+&3lA>7kF0)UTaZljQ1+w9_W&|{f#Fhn9ua8>K?Ne`7nXh z%E$c*rO`g`bi!D%P13*E^lsnl7ups1wq<*;=tBt~h4+|)5=Cf9MV{zeJ%e?k82WoI{WR#PdZCGzfFKX45-=1uI%W09* z!lW;n9*lvDE-6>&m~JyYz2!8&;rSc)8&aQhovN27AVLADlhQy$ULuU1Wpo zwR-Ka=gn_dzA)1{5vYBlI^9>$tZ7x)JVM#rZ_;5xgmSfD0YbG_muXEycfVLEp{nYX zkx+^HiG6QuuXW}p7q!Z9gz(1J>$-enQT-w&bRr^j>xJ*+uf7L4L;ksJgw3sz^x<8- z$>excUBN0gvvR2@V_C^X_PiBuq(nUBbF1wC{BKvms=bL^+Iai2**;~}HfHg&WLstx zTs7JkzO&KU#NVjy_ux&ZUwuPM!upR_l;v5k$eQV0$|J%!aJ^Tego|Iqu4OL6$pLPI zdm9POx*eyw@nw2L10RQqe*GH4LdPiIa}KjoX7YhgJ`F~l-tmVV>`8>fZgq3?@$x6w zdv7JU`#QP&@gsrGy72@dxU&w2bqjFvQX4to(C(HN$C`%GYW#0TON$@zeO$;g4}NY( zR&QjuS;&zKe4GXAT;Z_zzX`VB2?}r{$73f3Fxo2azAn?i)lpe(z5+e2BB;DLb#w&= z2Ww21#9Gq@)|{@X#!nYmZMvo!H(g-WX_{*MbV-b#Hi@TAt5H59z)dE~hXnAUi1JJW zb8&yWnMRTnW78l|=2#j2P@Iqkq0U-BnPW=!czo94xGsCeQa+=vu?&pl2>O9Q0JA|I ze*r_idIXOu$*i?-^NHIx+$D1Bme+^cY2O->Ht}8;xKUddMe%ECmmbXnGNB(#5_q+PPW}TILo-$;-=+sa& zk6zrl*r)TWcS^L?r{w?6dseISnmu(hQ$l~G@BS4j>5Q)p+wl@rvf7N0Ltn98v*IN= zCpMaMhY~+y=M>7AX_My^DT6n0a>u3gDAW7DOzFYr%as7t!9UPp^d8}_f(P12U>-WW zpDPb26UxrlB=-Xo7GE7b4;NHYlwA;5+XWNA;#5_(I(q$$xFfb}Y@j%)?SkP_+b&33 zDH=P`c5zjx@))|ra~P|u$iMc*c-tA>omqfk-f+6FUb10!9^X21>4=bo9;^Mk-9*eV zFS-f{;hYlX@oGyFUI;{GYd#>GCjGrT9kT-(gM{_>isDnbqq772g;QA<207ww+g58` z&n28*aH`E@ZsDW*_YLj|E{RhVp0ghpZJpM>AxEj|t*>wEj{!Y7D)$PEU>s!agM`?i$i%2f=;vc-)=$rR#$U+uhv9t zojQHTW2a8v@z|-;cRY6L^c|0#I(^5ZdE0@Y@3<+{cia@}J8lZ~9XEygj+;V#$4#NW zqk6k10q@WJxtF9j6dzl-35DVd5NXr_I8?}a`OlT;|Dyo>zbpvKVYfKqf}k91Bg0S* zIgw!~2b#z*l*3CL+?wIB6EcRBDEo4>;!$?*Xv5>MDBriyil0{1 zL*0j>Rr+aFy}$Of7-sM>PX3yToIoCKoy(vv)OF{>*^6Z5_=*LCc+E@0 zLk&o`CsRGB5RlSH#AsI7#iL38kk#WOJb|JfAUCn2>M?$!Lba}x*11fd4OW;54$rW5 z{Fj{z**40Wjjg+-)(#RJk%l)JV~x=GX{45|mJ3EUn2VtQ+_XU!X%eeMvD@Q`P+c#j zbuLrkh9!bl`Hn6cGA4R*3`o^4D5i083`i9)C`OHE(n+ID)8XoZN7rUQJO;Q*7}Pwp zz;gid#*-7Et5ixO5e;XLPhy|#lo!pL4ODZ4=CMn&AGWBkbhr_~zjG&iyqq`Z3=*E_b zQtxJFF&J@SxYWBDp^g?Qok1-!UK|XK1H2vsUEvHsF5pGk?=Sl-5Ut1Q=&8yah(*bv?Dn^z)QHv^@y5Pio~K@d8k-!dYggHc6-)oRqDC#?2VN#iZt5QMr zA_a+svc9t`BFKfZeGfunp{(yMNGz1~eT@>r9HOl6g-9&A^c^aDq%;z7zA~&#G*+dK z2DVTs#n`|WszaqT67disjXIn~s7gf&XQ{=zqJk(M-9ao=j!0=Fq9hiHMtA5+4Lg7c zqdS13h7BOX=nmVcVXmr3(@1x)1{If48j1K64n}{C=j3MqHQo{jBP#-)?qh)(|CmNR z-6sV#ejT1R>FGW^sPXkQ;el&BLD$|~jTY%qa6hP}e@G)9Wz}1wPMh>7tKNMY@qntw WOs+x0V78F|JRy6mCeZKf#rz-t0Wf6% literal 0 HcmV?d00001 diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index b0cf10276..5af8d04ab 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -20,8 +20,17 @@ download_all_images_from_annotations, ) from darwin.dataset.release import Release, ReleaseStatus -from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2 -from darwin.dataset.upload_manager import LocalFile, UploadHandlerV2 +from darwin.dataset.remote_dataset_v2 import ( + RemoteDatasetV2, + _find_files_to_upload_merging, + _find_files_to_upload_no_merging, +) +from darwin.dataset.upload_manager import ( + ItemMergeMode, + LocalFile, + MultiFileItem, + UploadHandlerV2, +) from darwin.datatypes import ManifestItem, ObjectStore, SegmentManifest from darwin.exceptions import UnsupportedExportFormat, UnsupportedFileType from darwin.item import DatasetItem @@ -601,6 +610,14 @@ def remote_dataset( @pytest.mark.usefixtures("file_read_write_test") class TestPush: + @pytest.fixture(scope="class") + def setup_zip(self): + zip_path = Path("tests/darwin/data/push_test_dir.zip") + with tempfile.TemporaryDirectory() as tmpdir: + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(tmpdir) + yield Path(tmpdir) + def test_raises_if_files_are_not_provided(self, remote_dataset: RemoteDataset): with pytest.raises(ValueError): remote_dataset.push(None) @@ -673,6 +690,131 @@ def test_raises_if_preserve_folders_with_item_merge_mode( preserve_folders=True, ) + def test_find_files_to_upload_merging_slots(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir1" + search_files = [base_path / "item1", base_path / "item2"] + multi_file_items = _find_files_to_upload_merging(search_files, [], "slots") + assert len(multi_file_items) == 2 + assert all(isinstance(item, MultiFileItem) for item in multi_file_items) + + def test_find_files_to_upload_merging_series(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir1" + search_files = [base_path / "dicoms"] + multi_file_items = _find_files_to_upload_merging(search_files, [], "series") + assert len(multi_file_items) == 1 + assert all(isinstance(item, MultiFileItem) for item in multi_file_items) + + def test_find_files_to_upload_merging_channels(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir1" + search_files = [base_path / "item1", base_path / "item2"] + multi_file_items = _find_files_to_upload_merging(search_files, [], "slots") + assert len(multi_file_items) == 2 + + def test_find_files_to_upload_merging_does_not_search_recursively(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir2" + search_files = [base_path / "recursive_search"] + multi_file_items = _find_files_to_upload_merging(search_files, [], "slots") + assert len(multi_file_items) == 1 + assert len(multi_file_items[0].files) == 2 + + def test_find_files_to_upload_no_merging_searches_recursively(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir2" + search_files = [base_path / "recursive_search"] + local_files = _find_files_to_upload_no_merging( + search_files, + [], + None, + 0, + False, + False, + False, + [], + ) + assert len(local_files) == 11 + assert all(isinstance(file, LocalFile) for file in local_files) + + def test_find_files_to_upload_no_merging_no_files(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir2" + search_files = [base_path / "no_files1", base_path / "no_files2"] + with pytest.raises( + ValueError, + match="No files to upload, check your path, exclusion filters and resume flag", + ): + _find_files_to_upload_no_merging( + search_files, + [], + None, + 0, + False, + False, + False, + [], + ) + + +class TestMultiFileItem: + @pytest.fixture(scope="class") + def setup_zip(self): + zip_path = Path("tests/darwin/data/push_test_dir.zip") + with tempfile.TemporaryDirectory() as tmpdir: + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(tmpdir) + yield Path(tmpdir) + + def test_create_multi_file_item_slots(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir1" / "item1" + files = list(base_path.glob("*")) + item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SLOTS) + assert len(item.files) == 7 + assert item.name == "item1" + assert item.layout == { + "version": 2, + "slots": ["0", "1", "2", "3", "4", "5", "6"], + "type": "grid", + } + + def test_create_multi_file_item_series(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir1" / "dicoms" + files = list(base_path.glob("*")) + item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES) + assert len(item.files) == 6 + assert item.name == "dicoms" + assert item.layout == { + "version": 2, + "slots": ["0", "1", "2", "3", "4", "5"], + "type": "grid", + } + + def test_create_multi_file_item_channels(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir1" / "item1" + files = list(base_path.glob("*")) + item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS) + assert len(item.files) == 7 + assert item.name == "item1" + assert item.layout == { + "version": 3, + "slots_grid": [ + [["4.jpg", "5.JPG", "7.JPG", "6.jpg", "3.JPG", "1.JPG", "2"]] + ], + } + + def test_create_series_no_valid_files(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir1" / "item1" + files = list(base_path.glob("*")) + with pytest.raises( + ValueError, match="No DICOM files found in 1st level of directory" + ): + MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES) + + def test_create_channels_too_many_files(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "dir2" / "too_many_channels" + files = list(base_path.glob("*")) + with pytest.raises( + ValueError, + match=f"No multi-channel item can have more than 16 channels. The following directory has 17 files: {base_path}", + ): + MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS) + @pytest.mark.usefixtures("file_read_write_test") class TestPull: From bd325172817ab76de91d12b35f38d44a9544ccdd Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Fri, 30 Aug 2024 14:13:01 +0100 Subject: [PATCH 03/12] Cleaned up tests --- tests/darwin/data/push_test_dir.zip | Bin 24374 -> 14762 bytes tests/darwin/dataset/remote_dataset_test.py | 28 +++++++++----------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/tests/darwin/data/push_test_dir.zip b/tests/darwin/data/push_test_dir.zip index 2a09ce77336d274582539635428692163674766f..6d76d817411da1608ab0374474f13321e222aaed 100644 GIT binary patch literal 14762 zcmc(lO>7%Q6vx;3AS9)wr7bjteo)#Nj;nIZ}SQh ze-%8hZ(||T*6x(8$XI~czGM&w}md>lQQwx6@KJ@j=evMHbUOtzDOaFl-Ety*QX z)n2(?3);2CR%O}3`R_D!=aU5CS63>H^>(mWuLi5l+LfX2CvW$brlUu`X8I>dn58rb zKVepyv@*M&HamFZ1BZ-wyEERS(SG~x#sO{bouurR{B$&)DXuoJ^!Me5U)<@3BN>uY z67t`Y`_z$pw)pzOc|Pv*s9rJ8D;NjAn3v+0@&cdE%hjd4z;k)ITI2;jm6NMWc_}XC zrZ~5#X3i1#k;$AS;H1dxX_zY>b~6nM@MKB)0-Y37bys`y-ME^T>AyFsGIb*j>(mh> zy1+OAYGTf|D3&<^kdZ^wwc=ExD5mU^!0`#+G-a#mqc>|H17}X76zF`_!%)w)1F-MG%#e8)% zvbHidm~SnIscJ}VcAvW{7(o-fN%@db8*>FCj@N*44;Xh>P6~^|AXU`hdcp@yzzicOu z*|z2EQLV82X3BQlJ5ggz`I?$6p@6R_05X}30m=4^oK30~mj9pt)NH7Kua{k#%t>X8 zq-K>d@~uMh1L-@>YH}H~sM%%Ax;cV{Ofq8@HOq`y(_^HrRKDwb5{!=Ie2`!AxjMFVc+1*jA+xdv% zP|*+OCzSgD<$_J`Y_9CDtJUqSO%Q&i^rDkd6#P3326G__zC4L42D2av{uKs;`40sz zJc23)+b<~iCm0M~vQhA>Q>bF_#*Kn+!C-Jsh=S*%Z(!&(O+SUEztMqCMp5uh7|g8r z_%y1RIjX-#!NNI5=x6o58BnosP7-=j{2T>4ds^pCZ$kxRlhL@?NS45I7OGZQ?$F#+ z-G)y_QSx0YGBPR6-p8^zu2#2m@iEfZDb3E=SjYYhVx1F&9$DwM6KYjC`M)|D?U81C zS?AtDxnQ?6o2$EdZU?3p?3-p|b-CYwSiyX79C;j*i%w3n_3Cc^66As%V-&kuR|Pt)XQnyV0&>WwR=4x=lO}D>6@U)=)(CUXgF0;XDU&ki?5D$S7-7yi zPY$fq>UQo^F!#x6_ED+hTl)yd8t~4ir;h*A67PJV>iFiYoqFdpR>$A7!o!nO^s}$; uw^1*A=Pa$O4=wS`p||$5oqFcbyK9LDLvJ!_%zEA{(a*^Pp11mp{O?~z7hfX) literal 24374 zcmd^{c|4Tg_rOQi5E3GL*|#xcPo*qvh$2a8EZNs=5tXsEpv6av6sakNh*Gj;D$G#j{R97)^DPOeyAh~vpZ3kgsO8!! zj?RFFS{9GjLUZv#rpqnT0&LFn@wD-zyFDs-*Wgmm+BGu{+Y}od{+RPgP@J&FgTWK` zwtx9?LrGsV(;_WbVE>uVcH%TCG;!_wu`_Zk&6*kG`96anR0W zuPdUxHsT?3X;j*p7L5@ z+3CR@h=|A#jwv^DyYA&5R}Y1GIC=UUWxo9TAf{C&wI$T|u;Lt#s@S+6;tX5| zZbpln8?&-4mU^HRJcF#=WECD!ZfI|C?4#Z&b0#P^d9&b0c1z#MI_D~~azmeSiVbzh zPTngROim1Shujl7#0~F~(4;zN=mXmsE7a3}+URI|Kt_uj8EsAq;%G!eihmr>2(h`RR7w2jz8egcGV_6$56CKfbem>tm2KhOQn7&rl zmibG3V>#K*xcRB_d4r2hyY3D){?r@jP~XOhIisU64}-%P+6j7xTg*Np&s-&{)kUZ~+qbtb{GAO!nnV-ekLMee>LQ zXvgMtXNklzjNP&07d@D)SF+?qClE}R8JJmEW2w)$Vck_Bz^{MZ#9i!Y$M5K3=U~@m zYGT#JZMD}te8wR`Mp+(VhFoEu$bSB%wHS+LiL!7;?U*R0Wl8+1wr!9tVP)gkTc)i+ z9t$*{$xQ(tyn51M4`|%Je=Gyg_8o5+rQiqe=j0gZ=kFHeMDTaA_j7awvXp#eOF7Q3 zki%eLcP|bLzj}~^G0p~e+Sq#Xt>;_f;%e_4?zhP<{M_f&MyC@~zZABLU+)x%DfJmjuh%3-xW0+&uBi!^)ZBm++kFI@5nab9sx|6IS*53*YWW zv6iU$A2YfB$|{7I_#@%d%g~29+aHvC)b;QC!iTvmFZ-O4i6NYIL&8RL-bF>X!yWUE zZdp0V@W@Vv^}fkEwS;rk|29|d;TB&K*64g%u|Mg$7AN#Q@p3BnGMTd`Tt&P|DUNxy z?+lJ};)R_2UL3j8p)GUo`1>R&6PBbnyiUjJ-a7rD{-r@KTV?r%fA%{^exPgHhVmsv zW~@S%D~W3ZcHKN)>zd(q$##I>?{Q5{`okudF0~&kbXnrcxfJbNYtI|@hQI!-&Z)ey z|K!Rq7Dvjplb(CFS?NDLlJ)L%4ZFstcB{abf}VK4LQkXK{gBf}^o$sG7`L@8uMr%z``CKs;)go7`hjSmU*0iumSibKT z`=y(u3!F%5*>5ZI9CkMBz4gMcYtv1ULkA4?+(bMUn7RKW)b&P|?bgq*6;{tVOT-&p z6u*6z;F?+IW+oyQ$thQ-7yU^6lS#?N8+A>bGlZdB6H` z=T66d9a7$EzR*vPJ|Ep-y*|%2GQ)FLhuLv?qhsq!ZtCWoa9?M!u*c9w@*|Udlx1y_ zvhpE;Z={&7ccq`6)p@$3QOJE;zEtmB^%zZqYpnm=Wi4)Ujd+?6Q1(pSh5zS<4)iV(`az<=w(0`;CMzQ&ZN9)=f{^*v*!_h!!8Xf(kY(-X zv{kDgCmlaxU0rzH^9U)j_tbV-lg!0u+tf1@J9AmJo_qvq%J$Gc=T zEFFvT?_@Y_z4$0;NB)!B-2YbyQR%u1U=N@Z-?vu|FB z(l-rIzQ@WPYo{@I^akTkxx;v0-Zhm%+8iI74GLzYR($lGRk}yKX+{6cUD0oww)Zps zUS7p6Wxa2Gf#9)bzs*F+&3lA>7kF0)UTaZljQ1+w9_W&|{f#Fhn9ua8>K?Ne`7nXh z%E$c*rO`g`bi!D%P13*E^lsnl7ups1wq<*;=tBt~h4+|)5=Cf9MV{zeJ%e?k82WoI{WR#PdZCGzfFKX45-=1uI%W09* z!lW;n9*lvDE-6>&m~JyYz2!8&;rSc)8&aQhovN27AVLADlhQy$ULuU1Wpo zwR-Ka=gn_dzA)1{5vYBlI^9>$tZ7x)JVM#rZ_;5xgmSfD0YbG_muXEycfVLEp{nYX zkx+^HiG6QuuXW}p7q!Z9gz(1J>$-enQT-w&bRr^j>xJ*+uf7L4L;ksJgw3sz^x<8- z$>excUBN0gvvR2@V_C^X_PiBuq(nUBbF1wC{BKvms=bL^+Iai2**;~}HfHg&WLstx zTs7JkzO&KU#NVjy_ux&ZUwuPM!upR_l;v5k$eQV0$|J%!aJ^Tego|Iqu4OL6$pLPI zdm9POx*eyw@nw2L10RQqe*GH4LdPiIa}KjoX7YhgJ`F~l-tmVV>`8>fZgq3?@$x6w zdv7JU`#QP&@gsrGy72@dxU&w2bqjFvQX4to(C(HN$C`%GYW#0TON$@zeO$;g4}NY( zR&QjuS;&zKe4GXAT;Z_zzX`VB2?}r{$73f3Fxo2azAn?i)lpe(z5+e2BB;DLb#w&= z2Ww21#9Gq@)|{@X#!nYmZMvo!H(g-WX_{*MbV-b#Hi@TAt5H59z)dE~hXnAUi1JJW zb8&yWnMRTnW78l|=2#j2P@Iqkq0U-BnPW=!czo94xGsCeQa+=vu?&pl2>O9Q0JA|I ze*r_idIXOu$*i?-^NHIx+$D1Bme+^cY2O->Ht}8;xKUddMe%ECmmbXnGNB(#5_q+PPW}TILo-$;-=+sa& zk6zrl*r)TWcS^L?r{w?6dseISnmu(hQ$l~G@BS4j>5Q)p+wl@rvf7N0Ltn98v*IN= zCpMaMhY~+y=M>7AX_My^DT6n0a>u3gDAW7DOzFYr%as7t!9UPp^d8}_f(P12U>-WW zpDPb26UxrlB=-Xo7GE7b4;NHYlwA;5+XWNA;#5_(I(q$$xFfb}Y@j%)?SkP_+b&33 zDH=P`c5zjx@))|ra~P|u$iMc*c-tA>omqfk-f+6FUb10!9^X21>4=bo9;^Mk-9*eV zFS-f{;hYlX@oGyFUI;{GYd#>GCjGrT9kT-(gM{_>isDnbqq772g;QA<207ww+g58` z&n28*aH`E@ZsDW*_YLj|E{RhVp0ghpZJpM>AxEj|t*>wEj{!Y7D)$PEU>s!agM`?i$i%2f=;vc-)=$rR#$U+uhv9t zojQHTW2a8v@z|-;cRY6L^c|0#I(^5ZdE0@Y@3<+{cia@}J8lZ~9XEygj+;V#$4#NW zqk6k10q@WJxtF9j6dzl-35DVd5NXr_I8?}a`OlT;|Dyo>zbpvKVYfKqf}k91Bg0S* zIgw!~2b#z*l*3CL+?wIB6EcRBDEo4>;!$?*Xv5>MDBriyil0{1 zL*0j>Rr+aFy}$Of7-sM>PX3yToIoCKoy(vv)OF{>*^6Z5_=*LCc+E@0 zLk&o`CsRGB5RlSH#AsI7#iL38kk#WOJb|JfAUCn2>M?$!Lba}x*11fd4OW;54$rW5 z{Fj{z**40Wjjg+-)(#RJk%l)JV~x=GX{45|mJ3EUn2VtQ+_XU!X%eeMvD@Q`P+c#j zbuLrkh9!bl`Hn6cGA4R*3`o^4D5i083`i9)C`OHE(n+ID)8XoZN7rUQJO;Q*7}Pwp zz;gid#*-7Et5ixO5e;XLPhy|#lo!pL4ODZ4=CMn&AGWBkbhr_~zjG&iyqq`Z3=*E_b zQtxJFF&J@SxYWBDp^g?Qok1-!UK|XK1H2vsUEvHsF5pGk?=Sl-5Ut1Q=&8yah(*bv?Dn^z)QHv^@y5Pio~K@d8k-!dYggHc6-)oRqDC#?2VN#iZt5QMr zA_a+svc9t`BFKfZeGfunp{(yMNGz1~eT@>r9HOl6g-9&A^c^aDq%;z7zA~&#G*+dK z2DVTs#n`|WszaqT67disjXIn~s7gf&XQ{=zqJk(M-9ao=j!0=Fq9hiHMtA5+4Lg7c zqdS13h7BOX=nmVcVXmr3(@1x)1{If48j1K64n}{C=j3MqHQo{jBP#-)?qh)(|CmNR z-6sV#ejT1R>FGW^sPXkQ;el&BLD$|~jTY%qa6hP}e@G)9Wz}1wPMh>7tKNMY@qntw WOs+x0V78F|JRy6mCeZKf#rz-t0Wf6% diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 5af8d04ab..897364580 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -692,7 +692,7 @@ def test_raises_if_preserve_folders_with_item_merge_mode( def test_find_files_to_upload_merging_slots(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" - search_files = [base_path / "item1", base_path / "item2"] + search_files = [base_path / "jpegs", base_path / "dicoms"] multi_file_items = _find_files_to_upload_merging(search_files, [], "slots") assert len(multi_file_items) == 2 assert all(isinstance(item, MultiFileItem) for item in multi_file_items) @@ -706,8 +706,8 @@ def test_find_files_to_upload_merging_series(self, setup_zip): def test_find_files_to_upload_merging_channels(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" - search_files = [base_path / "item1", base_path / "item2"] - multi_file_items = _find_files_to_upload_merging(search_files, [], "slots") + search_files = [base_path / "jpegs", base_path / "dicoms"] + multi_file_items = _find_files_to_upload_merging(search_files, [], "channels") assert len(multi_file_items) == 2 def test_find_files_to_upload_merging_does_not_search_recursively(self, setup_zip): @@ -735,7 +735,7 @@ def test_find_files_to_upload_no_merging_searches_recursively(self, setup_zip): def test_find_files_to_upload_no_merging_no_files(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir2" - search_files = [base_path / "no_files1", base_path / "no_files2"] + search_files = [base_path / "no_files_1", base_path / "no_files_2"] with pytest.raises( ValueError, match="No files to upload, check your path, exclusion filters and resume flag", @@ -762,14 +762,14 @@ def setup_zip(self): yield Path(tmpdir) def test_create_multi_file_item_slots(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" / "item1" + base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" files = list(base_path.glob("*")) item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SLOTS) - assert len(item.files) == 7 - assert item.name == "item1" + assert len(item.files) == 6 + assert item.name == "jpegs" assert item.layout == { "version": 2, - "slots": ["0", "1", "2", "3", "4", "5", "6"], + "slots": ["0", "1", "2", "3", "4", "5"], "type": "grid", } @@ -786,20 +786,18 @@ def test_create_multi_file_item_series(self, setup_zip): } def test_create_multi_file_item_channels(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" / "item1" + base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" files = list(base_path.glob("*")) item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS) - assert len(item.files) == 7 - assert item.name == "item1" + assert len(item.files) == 6 + assert item.name == "jpegs" assert item.layout == { "version": 3, - "slots_grid": [ - [["4.jpg", "5.JPG", "7.JPG", "6.jpg", "3.JPG", "1.JPG", "2"]] - ], + "slots_grid": [[["4.jpg", "5.JPG", "7.JPG", "6.jpg", "3.JPG", "1.JPG"]]], } def test_create_series_no_valid_files(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" / "item1" + base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" files = list(base_path.glob("*")) with pytest.raises( ValueError, match="No DICOM files found in 1st level of directory" From a123c88f724b6e12dd0541e4c1ab4f10779f96c6 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Fri, 30 Aug 2024 14:33:11 +0100 Subject: [PATCH 04/12] Added natural sorting to the order of files in multi-file items --- darwin/dataset/remote_dataset_v2.py | 7 ++++++- darwin/utils/utils.py | 13 ++++++++++--- poetry.lock | 17 ++++++++++++++++- pyproject.toml | 1 + tests/darwin/dataset/remote_dataset_test.py | 13 +++++++------ 5 files changed, 40 insertions(+), 11 deletions(-) diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index eecc725d9..063b402d6 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -884,7 +884,12 @@ def _find_files_to_upload_merging( multi_file_items = [] for directory in search_files: files_in_directory = list( - find_files([directory], files_to_exclude=files_to_exclude, recursive=False) + find_files( + [directory], + files_to_exclude=files_to_exclude, + recursive=False, + sort=True, + ) ) if not files_in_directory: print( diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py index 5f57449ad..f2cbd8d85 100644 --- a/darwin/utils/utils.py +++ b/darwin/utils/utils.py @@ -25,6 +25,7 @@ import requests from json_stream.base import PersistentStreamingJSONList, PersistentStreamingJSONObject from jsonschema import validators +from natsort import natsorted from requests import Response from rich.progress import ProgressType, track from upolygon import draw_polygon @@ -216,6 +217,7 @@ def find_files( *, files_to_exclude: List[dt.PathLike] = [], recursive: bool = True, + sort: bool = False, ) -> List[Path]: """ Retrieve a list of all files belonging to supported extensions. The exploration can be made @@ -229,7 +231,8 @@ def find_files( List of files to exclude from the search. recursive : bool Flag for recursive search. - + sort : bool + Flag for sorting the files naturally, i.e. file2.txt will come before file10.txt. Returns ------- List[Path] @@ -255,8 +258,12 @@ def find_files( raise UnsupportedFileType(path) files_to_exclude_full_paths = [str(Path(f)) for f in files_to_exclude] - - return [f for f in found_files if str(f) not in files_to_exclude_full_paths] + filtered_files = [ + f for f in found_files if str(f) not in files_to_exclude_full_paths + ] + if sort: + return natsorted(filtered_files) + return filtered_files def secure_continue_request() -> bool: diff --git a/poetry.lock b/poetry.lock index 9161c0ff2..a850e8c4c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -913,6 +913,21 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "natsort" +version = "8.4.0" +description = "Simple yet flexible natural sorting in Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"}, + {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"}, +] + +[package.extras] +fast = ["fastnumbers (>=2.0.0)"] +icu = ["PyICU (>=1.0.0)"] + [[package]] name = "networkx" version = "3.1" @@ -2238,4 +2253,4 @@ test = ["pytest", "responses"] [metadata] lock-version = "2.0" python-versions = ">=3.8.0,<3.12" -content-hash = "6e6c0628c98652df5dd76a8d82a0f67af9ec2037388350412152d21d84fa9d57" +content-hash = "3ea848bf4d0e5e0f22170f20321ce5d426eb79c6bc0a536b36519fd6f7c6782e" diff --git a/pyproject.toml b/pyproject.toml index 10c69fc3e..ef4146782 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,7 @@ types-requests = "^2.28.11.8" upolygon = "0.1.11" tenacity = "8.5.0" +natsort = "^8.4.0" [tool.poetry.extras] dev = ["black", "isort", "flake8", "mypy", "debugpy", "responses", "pytest", "flake8-pyproject", "pytest-rerunfailures", "ruff", "validate-pyproject"] medical = ["nibabel", "connected-components-3d", "scipy"] diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 897364580..0c69b054c 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -10,6 +10,7 @@ import orjson as json import pytest import responses +from natsort import natsorted from pydantic import ValidationError from darwin.client import Client @@ -763,7 +764,7 @@ def setup_zip(self): def test_create_multi_file_item_slots(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" - files = list(base_path.glob("*")) + files = natsorted(list(base_path.glob("*"))) item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SLOTS) assert len(item.files) == 6 assert item.name == "jpegs" @@ -775,7 +776,7 @@ def test_create_multi_file_item_slots(self, setup_zip): def test_create_multi_file_item_series(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" / "dicoms" - files = list(base_path.glob("*")) + files = natsorted(list(base_path.glob("*"))) item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES) assert len(item.files) == 6 assert item.name == "dicoms" @@ -787,18 +788,18 @@ def test_create_multi_file_item_series(self, setup_zip): def test_create_multi_file_item_channels(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" - files = list(base_path.glob("*")) + files = natsorted(list(base_path.glob("*"))) item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS) assert len(item.files) == 6 assert item.name == "jpegs" assert item.layout == { "version": 3, - "slots_grid": [[["4.jpg", "5.JPG", "7.JPG", "6.jpg", "3.JPG", "1.JPG"]]], + "slots_grid": [[["1.JPG", "3.JPG", "4.jpg", "5.JPG", "6.jpg", "7.JPG"]]], } def test_create_series_no_valid_files(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" - files = list(base_path.glob("*")) + files = natsorted(list(base_path.glob("*"))) with pytest.raises( ValueError, match="No DICOM files found in 1st level of directory" ): @@ -806,7 +807,7 @@ def test_create_series_no_valid_files(self, setup_zip): def test_create_channels_too_many_files(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir2" / "too_many_channels" - files = list(base_path.glob("*")) + files = natsorted(list(base_path.glob("*"))) with pytest.raises( ValueError, match=f"No multi-channel item can have more than 16 channels. The following directory has 17 files: {base_path}", From 3c678b650f4608f2a58f628a407d9468606c698c Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Fri, 30 Aug 2024 16:14:50 +0100 Subject: [PATCH 05/12] Fix for failing test on Windows --- darwin/cli_functions.py | 4 ++-- darwin/dataset/remote_dataset_v2.py | 15 ++++++++------- darwin/dataset/upload_manager.py | 10 +++++----- darwin/options.py | 2 +- tests/darwin/dataset/remote_dataset_test.py | 4 ++-- 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index ce00f6a54..9f67fc1b7 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -687,8 +687,8 @@ def upload_data( Specify whether to have full traces print when uploading files or not. item_merge_mode : Optional[str] If set, each file path passed to `files_to_upload` behaves as follows: - - Every path that points directly to a file is ignored - - Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules. + - Paths pointing directly to individual files are ignored + - Paths pointing to folders of files will be uploaded according to the following mode rules. Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded: - "slots": Each file in the folder will be uploaded to a different slot of the same item. - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored. diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 063b402d6..fba60c8ff 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -202,19 +202,18 @@ def push( Optional callback, called every time the progress of an uploading files is reported. file_upload_callback: Optional[FileUploadCallback], default: None Optional callback, called every time a file chunk is uploaded. - item_merge_mode: Optional[str], default: None + item_merge_mode : Optional[str] If set, each file path passed to `files_to_upload` behaves as follows: - - Every path that points directly to a file is ignored - - Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules. - Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded: + - Paths pointing directly to individual files are ignored + - Paths pointing to folders of files will be uploaded according to the below mode rules. + Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded: - "slots": Each file in the folder will be uploaded to a different slot of the same item. - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored. - "channels": Each file in the folder will be uploaded to a different channel of the same item. - Returns ------- handler : UploadHandler - Class for handling uploads, progress and error messages. + Class for handling uploads, progress and error messages. Raises ------ @@ -238,7 +237,9 @@ def push( ) if item_merge_mode and preserve_folders: - raise TypeError("Cannot use `preserve_folders` with `item_merge_mode`") + raise TypeError( + "`item_merge_mode` does not support preserving local file structures with `preserve_folders` or `--folders`" + ) # Direct file paths uploading_files = [ diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index 855d9e415..7ba3fcd86 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -204,7 +204,7 @@ def __init__(self, directory: Path, files: List[Path], merge_mode: ItemMergeMode def _create_layout(self): """ - Creates the layout to be used when uploading the files: + Creates the layout to be used when uploading the files as a dataset item: - For multi-slotted items: LayoutV2 - For series items: LayoutV2, but only with `.dcm` files - For multi-channel items: LayoutV3 @@ -212,8 +212,8 @@ def _create_layout(self): Raises ------ ValueError - - If no DICOM files are found in the directory for ItemMergeMode.SEIRES items - - If the number of files is greater than 16 for ItemMergeMode.CHANNELS items + - If no DICOM files are found in the directory for `ItemMergeMode.SERIES` items + - If the number of files is greater than 16 for `ItemMergeMode.CHANNELS` items """ if self.merge_mode == ItemMergeMode.SLOTS: return { @@ -224,7 +224,7 @@ def _create_layout(self): elif self.merge_mode == ItemMergeMode.SERIES: self.files = [file for file in self.files if file.suffix.lower() == ".dcm"] if not self.files: - raise ValueError("No DICOM files found in 1st level of directory") + raise ValueError("No `.dcm` files found in 1st level of directory") return { "version": 2, "slots": [str(i) for i in range(len(self.files))], @@ -233,7 +233,7 @@ def _create_layout(self): elif self.merge_mode == ItemMergeMode.CHANNELS: if len(self.files) > 16: raise ValueError( - f"No multi-channel item can have more than 16 channels. The following directory has {len(self.files)} files: {self.directory}" + f"No multi-channel item can have more than 16 files. The following directory has {len(self.files)} files: {self.directory}" ) return {"version": 3, "slots_grid": [[[file.name for file in self.files]]]} diff --git a/darwin/options.py b/darwin/options.py index 996c27b3a..6ac6c6717 100644 --- a/darwin/options.py +++ b/darwin/options.py @@ -187,7 +187,7 @@ def __init__(self) -> None: "--item-merge-mode", type=str, choices=["slots", "series", "channels"], - help="Specify the item merge mode: slots, series, or channels", + help="Specify the item merge mode: `slots`, `series`, or `channels`", ) # Remove diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 0c69b054c..72dea3d65 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -801,7 +801,7 @@ def test_create_series_no_valid_files(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" files = natsorted(list(base_path.glob("*"))) with pytest.raises( - ValueError, match="No DICOM files found in 1st level of directory" + ValueError, match="No `.dcm` files found in 1st level of directory" ): MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES) @@ -810,7 +810,7 @@ def test_create_channels_too_many_files(self, setup_zip): files = natsorted(list(base_path.glob("*"))) with pytest.raises( ValueError, - match=f"No multi-channel item can have more than 16 channels. The following directory has 17 files: {base_path}", + match=r"No multi-channel item can have more than 16 files. The following directory has 17 files: .*", ): MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS) From e066974f11c85a2fb90d447f602caf1de7df35bd Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Mon, 2 Sep 2024 11:22:02 +0100 Subject: [PATCH 06/12] Working upload for multi-slot, multi-channel, & DICOM series items --- darwin/dataset/remote_dataset_v2.py | 46 ++++---- darwin/dataset/upload_manager.py | 173 +++++++++++++++++++++------- 2 files changed, 155 insertions(+), 64 deletions(-) diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index fba60c8ff..fe62b7000 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -235,16 +235,13 @@ def push( raise ValueError( f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels" ) - - if item_merge_mode and preserve_folders: - raise TypeError( - "`item_merge_mode` does not support preserving local file structures with `preserve_folders` or `--folders`" - ) + if preserve_folders: + raise TypeError( + "`item_merge_mode` does not support preserving local file structures with `preserve_folders` or `--folders`" + ) # Direct file paths - uploading_files = [ - item for item in files_to_upload if isinstance(item, LocalFile) - ] + local_files = [item for item in files_to_upload if isinstance(item, LocalFile)] # Folder paths search_files = [ @@ -252,22 +249,23 @@ def push( ] if item_merge_mode: - uploading_files = _find_files_to_upload_merging( - search_files, files_to_exclude, item_merge_mode + local_files, multi_file_items = _find_files_to_upload_merging( + search_files, files_to_exclude, fps, item_merge_mode ) + handler = UploadHandlerV2(self, local_files, multi_file_items) else: - uploading_files = _find_files_to_upload_no_merging( + local_files = _find_files_to_upload_no_merging( search_files, files_to_exclude, path, fps, - as_frames, extract_views, + as_frames, preserve_folders, - uploading_files, + local_files, ) + handler = UploadHandlerV2(self, local_files) - handler = UploadHandlerV2(self, uploading_files) if blocking: handler.upload( max_workers=max_workers, @@ -858,8 +856,9 @@ def register_multi_slotted( def _find_files_to_upload_merging( search_files: List[PathLike], files_to_exclude: List[PathLike], + fps: int, item_merge_mode: str, -) -> List[MultiFileItem]: +) -> Tuple[List[LocalFile], List[MultiFileItem]]: """ Finds files to upload as either: - Multi-slotted items @@ -876,13 +875,15 @@ def _find_files_to_upload_merging( List of files to exclude from the file scan. item_merge_mode : str Mode to merge the files in the folders. Valid options are: 'slots', 'series', 'channels'. + fps : int + When uploading video files, specify the framerate Returns ------- List[MultiFileItem] List of files to upload. """ - multi_file_items = [] + multi_file_items, local_files = [], [] for directory in search_files: files_in_directory = list( find_files( @@ -894,19 +895,20 @@ def _find_files_to_upload_merging( ) if not files_in_directory: print( - f"Warning: There are no uploading files in the first level of {directory}, skipping" + f"Warning: There are no files in the first level of {directory}, skipping directory" ) continue - multi_file_items.append( - MultiFileItem( - Path(directory), files_in_directory, ItemMergeMode(item_merge_mode) - ) + multi_file_item = MultiFileItem( + Path(directory), files_in_directory, ItemMergeMode(item_merge_mode), fps ) + multi_file_items.append(multi_file_item) + local_files.extend(multi_file_item.files) + if not multi_file_items: raise ValueError( "No valid folders to upload after searching the passed directories for files" ) - return multi_file_items + return local_files, multi_file_items def _find_files_to_upload_no_merging( diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index 7ba3fcd86..f9ce956bd 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -1,3 +1,4 @@ +from __future__ import annotations import concurrent.futures import os import time @@ -14,7 +15,6 @@ Optional, Set, Tuple, - Union, ) import requests @@ -32,6 +32,21 @@ from abc import ABC, abstractmethod from typing import Dict +LAYOUT_SHAPE_MAP = { + 1: [1, 1], + 2: [2, 1], + 3: [3, 1], + 4: [2, 2], + 5: [3, 2], + 6: [3, 2], + 7: [4, 2], + 8: [4, 2], + 9: [3, 3], + 10: [4, 3], + 11: [4, 3], + 12: [4, 3], +} + class ItemMergeMode(Enum): SLOTS = "slots" @@ -51,8 +66,8 @@ class ItemPayload: The filename of where this ``ItemPayload``'s data is. path : str The path to ``filename``. - reason : Optional[str], default: None - A reason to upload this ``ItemPayload``. + reasons : Optional[List[str]], default: None + A per-slot reason to upload this ``ItemPayload``. Attributes ---------- @@ -62,8 +77,8 @@ class ItemPayload: The filename of where this ``ItemPayload``'s data is. path : str The path to ``filename``. - reason : Optional[str], default: None - A reason to upload this ``ItemPayload``. + reasons : Optional[List[str]], default: None + A per-slot reason to upload this ``ItemPayload``. """ def __init__( @@ -72,25 +87,22 @@ def __init__( dataset_item_id: int, filename: str, path: str, - reason: Optional[str] = None, + reasons: Optional[List[str]] = None, slots: Optional[any] = None, ): self.dataset_item_id = dataset_item_id self.filename = filename self.path = path - self.reason = reason + self.reason = reasons self.slots = slots @staticmethod def parse_v2(payload): - if len(payload["slots"]) > 1: - raise NotImplementedError("multiple files support not yet implemented") - slot = payload["slots"][0] return ItemPayload( dataset_item_id=payload.get("id", None), filename=payload["name"], path=payload["path"], - reason=slot.get("reason", None), + reasons=[slot.get("reason", None) for slot in payload["slots"]], slots=payload["slots"], ) @@ -160,7 +172,11 @@ class LocalFile: """ - def __init__(self, local_path: PathLike, **kwargs): + def __init__( + self, + local_path: PathLike, + **kwargs, + ): self.local_path = Path(local_path) self.data = kwargs self._type_check(kwargs) @@ -195,10 +211,12 @@ def full_path(self) -> str: class MultiFileItem: - def __init__(self, directory: Path, files: List[Path], merge_mode: ItemMergeMode): + def __init__( + self, directory: Path, files: List[Path], merge_mode: ItemMergeMode, fps: int + ): self.directory = directory self.name = directory.name - self.files = files + self.files = [LocalFile(file, fps=fps) for file in files] self.merge_mode = merge_mode self.layout = self._create_layout() @@ -216,18 +234,23 @@ def _create_layout(self): - If the number of files is greater than 16 for `ItemMergeMode.CHANNELS` items """ if self.merge_mode == ItemMergeMode.SLOTS: + num_files = len(self.files) + layout_shape = LAYOUT_SHAPE_MAP.get(num_files, [4, 4]) return { "version": 2, - "slots": [str(i) for i in range(len(self.files))], - "type": "grid", + "slots": [str(i) for i in range(num_files)], + "type": "grid" if num_files >= 4 else "horizontal", + "layout_shape": layout_shape, } elif self.merge_mode == ItemMergeMode.SERIES: - self.files = [file for file in self.files if file.suffix.lower() == ".dcm"] + self.files = [ + file for file in self.files if file.local_path.suffix.lower() == ".dcm" + ] if not self.files: raise ValueError("No `.dcm` files found in 1st level of directory") return { "version": 2, - "slots": [str(i) for i in range(len(self.files))], + "slots": [self.name], "type": "grid", } elif self.merge_mode == ItemMergeMode.CHANNELS: @@ -235,7 +258,32 @@ def _create_layout(self): raise ValueError( f"No multi-channel item can have more than 16 files. The following directory has {len(self.files)} files: {self.directory}" ) - return {"version": 3, "slots_grid": [[[file.name for file in self.files]]]} + return { + "version": 3, + "slots_grid": [[[file.local_path.name for file in self.files]]], + } + + # Need to add a `data` attribute for MultFileItem? Where do we get `fps` from? + def serialize_v2(self): + optional_properties = ["fps"] + slots = [] + if self.merge_mode == ItemMergeMode.SLOTS: + slot_names = self.layout["slots"] + elif self.merge_mode == ItemMergeMode.SERIES: + slot_names = [self.name] * len(self.files) + elif self.merge_mode == ItemMergeMode.CHANNELS: + slot_names = self.layout["slots_grid"][0][0] + for idx, local_file in enumerate(self.files): + slot = { + "file_name": local_file.data["filename"], + "slot_name": slot_names[idx], + } + for optional_property in optional_properties: + if optional_property in local_file.data: + slot[optional_property] = local_file.data.get(optional_property) + slots.append(slot) + + return {"slots": slots, "layout": self.layout, "name": self.name, "path": "/"} class FileMonitor(object): @@ -311,16 +359,18 @@ class UploadHandler(ABC): ---------- dataset: RemoteDataset Target ``RemoteDataset`` where we want to upload our files to. - local_files : List[LocalFile] - List of ``LocalFile``\\s to be uploaded. + uploading_files : Union[List[LocalFile], List[MultiFileItems]] + List of ``LocalFile``\\s or ``MultiFileItem``\\s to be uploaded. Attributes ---------- dataset : RemoteDataset - Target ``RemoteDataset`` where we want to upload our files to.. + Target ``RemoteDataset`` where we want to upload our files to. errors : List[UploadRequestError] List of errors that happened during the upload process. - local_files : List[LocalFile] + item_type : str + Either ``single_file`` or ``multi_file``. Indicates what type of data we are uploading + local_files : Optional[List[LocalFile]] List of ``LocalFile``\\s to be uploaded. blocked_items : List[ItemPayload] List of items that were not able to be uploaded. @@ -328,14 +378,19 @@ class UploadHandler(ABC): List of items waiting to be uploaded. """ - def __init__(self, dataset: "RemoteDataset", local_files: List[LocalFile]): - self.dataset: RemoteDataset = dataset - self.errors: List[UploadRequestError] = [] - self.local_files: List[LocalFile] = local_files + def __init__( + self, + dataset: "RemoteDataset", + local_files: List[LocalFile], + multi_file_items: Optional[List[MultiFileItem]] = None, + ): self._progress: Optional[ Iterator[Callable[[Optional[ByteReadCallback]], None]] ] = None - + self.multi_file_items = multi_file_items + self.local_files = local_files + self.dataset: RemoteDataset = dataset + self.errors: List[UploadRequestError] = [] self.blocked_items, self.pending_items = self._request_upload() @staticmethod @@ -457,16 +512,50 @@ class UploadHandlerV2(UploadHandler): def __init__( self, dataset: "RemoteDataset", - local_files: Union[List[LocalFile], List[MultiFileItem]], + local_files: List[LocalFile], + multi_file_items: Optional[List[MultiFileItem]] = None, ): - super().__init__(dataset=dataset, local_files=local_files) + super().__init__( + dataset=dataset, + local_files=local_files, + multi_file_items=multi_file_items, + ) def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: blocked_items = [] items = [] chunk_size: int = _upload_chunk_size() - for file_chunk in chunk(self.local_files, chunk_size): - upload_payload = {"items": [file.serialize_v2() for file in file_chunk]} + single_file_items = self.local_files + upload_payloads = [] + if self.multi_file_items: + upload_payloads.extend( + [ + { + "items": [file.serialize_v2() for file in file_chunk], + "options": {"ignore_dicom_layout": True}, + } + for file_chunk in chunk(self.multi_file_items, chunk_size) + ] + ) + local_files_for_multi_file_items = [ + file + for multi_file_item in self.multi_file_items + for file in multi_file_item.files + ] + single_file_items = [ + file + for file in single_file_items + if file not in local_files_for_multi_file_items + ] + + upload_payloads.extend( + [ + {"items": [file.serialize_v2() for file in file_chunk]} + for file_chunk in chunk(single_file_items, chunk_size) + ] + ) + + for upload_payload in upload_payloads: dataset_slug: str = self.dataset_identifier.dataset_slug team_slug: Optional[str] = self.dataset_identifier.team_slug @@ -490,17 +579,17 @@ def upload_function( file_lookup = {file.full_path: file for file in self.local_files} for item in self.pending_items: - if len(item.slots) != 1: - raise NotImplementedError("Multi file upload is not supported") - upload_id = item.slots[0]["upload_id"] - file = file_lookup.get(item.full_path) - if not file: - raise ValueError( - f"Cannot match {item.full_path} from payload with files to upload" + for slot in item.slots: + upload_id = slot["upload_id"] + slot_path = item.path + (slot["file_name"]) + file = file_lookup.get(slot_path) + if not file: + raise ValueError( + f"Cannot match {item.full_path} from payload with files to upload" + ) + yield upload_function( + self.dataset.identifier.dataset_slug, file.local_path, upload_id ) - yield upload_function( - self.dataset.identifier.dataset_slug, file.local_path, upload_id - ) def _upload_file( self, From 2fa72f828706a9a892c6bbb6f72db0a4e842aa27 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Mon, 2 Sep 2024 11:55:02 +0100 Subject: [PATCH 07/12] Multi-slot, multi-channel, & DICOM series --- darwin/cli_functions.py | 28 +++++++++++--------- darwin/dataset/remote_dataset_v2.py | 12 +++++---- darwin/dataset/upload_manager.py | 19 ++++++-------- tests/darwin/dataset/remote_dataset_test.py | 29 ++++++++++++++------- tests/darwin/dataset/upload_manager_test.py | 4 +-- 5 files changed, 52 insertions(+), 40 deletions(-) diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index 9f67fc1b7..22fd99df7 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -798,10 +798,13 @@ def file_upload_callback( already_existing_items = [] other_skipped_items = [] for item in upload_manager.blocked_items: - if (item.reason is not None) and (item.reason.upper() == "ALREADY_EXISTS"): - already_existing_items.append(item) - else: - other_skipped_items.append(item) + for slot in item.slots: + if (slot["reason"] is not None) and ( + slot["reason"].upper() == "ALREADY_EXISTS" + ): + already_existing_items.append(item) + else: + other_skipped_items.append(item) if already_existing_items: console.print( @@ -831,14 +834,15 @@ def file_upload_callback( ) for item in upload_manager.blocked_items: - if item.reason != "ALREADY_EXISTS": - error_table.add_row( - str(item.dataset_item_id), - item.filename, - item.path, - "UPLOAD_REQUEST", - item.reason, - ) + for slot in item.slots: + if slot["reason"] != "ALREADY_EXISTS": + error_table.add_row( + str(item.dataset_item_id), + item.filename, + item.path, + "UPLOAD_REQUEST", + slot["reason"], + ) for error in upload_manager.errors: for local_file in upload_manager.local_files: diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index fe62b7000..9296dbcbe 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -178,7 +178,7 @@ def push( ---------- files_to_upload : Optional[List[Union[PathLike, LocalFile]]] List of files to upload. These can be folders. - If `item_merge_mode` is set, these must be folders. + If `item_merge_mode` is set, these paths must be folders. blocking : bool, default: True If False, the dataset is not uploaded and a generator function is returned instead. multi_threaded : bool, default: True @@ -205,7 +205,7 @@ def push( item_merge_mode : Optional[str] If set, each file path passed to `files_to_upload` behaves as follows: - Paths pointing directly to individual files are ignored - - Paths pointing to folders of files will be uploaded according to the below mode rules. + - Paths pointing to folders of files will be uploaded according to the following mode rules. Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded: - "slots": Each file in the folder will be uploaded to a different slot of the same item. - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored. @@ -233,7 +233,7 @@ def push( ItemMergeMode(item_merge_mode) except ValueError: raise ValueError( - f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels" + f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels'" ) if preserve_folders: raise TypeError( @@ -259,8 +259,8 @@ def push( files_to_exclude, path, fps, - extract_views, as_frames, + extract_views, preserve_folders, local_files, ) @@ -880,8 +880,10 @@ def _find_files_to_upload_merging( Returns ------- + List[LocalFile] + List of `LocalFile` objects contained within each `MultiFileItem` List[MultiFileItem] - List of files to upload. + List of `MultiFileItem` objects to be uploaded """ multi_file_items, local_files = [], [] for directory in search_files: diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index f9ce956bd..d0ee0c799 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -93,7 +93,7 @@ def __init__( self.dataset_item_id = dataset_item_id self.filename = filename self.path = path - self.reason = reasons + self.reasons = reasons self.slots = slots @staticmethod @@ -263,7 +263,6 @@ def _create_layout(self): "slots_grid": [[[file.local_path.name for file in self.files]]], } - # Need to add a `data` attribute for MultFileItem? Where do we get `fps` from? def serialize_v2(self): optional_properties = ["fps"] slots = [] @@ -367,11 +366,11 @@ class UploadHandler(ABC): dataset : RemoteDataset Target ``RemoteDataset`` where we want to upload our files to. errors : List[UploadRequestError] - List of errors that happened during the upload process. - item_type : str - Either ``single_file`` or ``multi_file``. Indicates what type of data we are uploading - local_files : Optional[List[LocalFile]] + List of errors that happened during the upload process + local_files : List[LocalFile] List of ``LocalFile``\\s to be uploaded. + multi_file_items : List[MultiFileItem] + List of ``MultiFileItem``\\s to be uploaded. blocked_items : List[ItemPayload] List of items that were not able to be uploaded. pending_items : List[ItemPayload] @@ -555,14 +554,12 @@ def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: ] ) + dataset_slug: str = self.dataset_identifier.dataset_slug + team_slug: Optional[str] = self.dataset_identifier.team_slug for upload_payload in upload_payloads: - dataset_slug: str = self.dataset_identifier.dataset_slug - team_slug: Optional[str] = self.dataset_identifier.team_slug - data: Dict[str, Any] = self.client.api_v2.register_data( dataset_slug, upload_payload, team_slug=team_slug ) - blocked_items.extend( [ItemPayload.parse_v2(item) for item in data["blocked_items"]] ) @@ -585,7 +582,7 @@ def upload_function( file = file_lookup.get(slot_path) if not file: raise ValueError( - f"Cannot match {item.full_path} from payload with files to upload" + f"Cannot match {slot_path} from payload with files to upload" ) yield upload_function( self.dataset.identifier.dataset_slug, file.local_path, upload_id diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 72dea3d65..4228f7e53 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -694,27 +694,35 @@ def test_raises_if_preserve_folders_with_item_merge_mode( def test_find_files_to_upload_merging_slots(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" search_files = [base_path / "jpegs", base_path / "dicoms"] - multi_file_items = _find_files_to_upload_merging(search_files, [], "slots") + local_files, multi_file_items = _find_files_to_upload_merging( + search_files, [], 0, "slots" + ) assert len(multi_file_items) == 2 assert all(isinstance(item, MultiFileItem) for item in multi_file_items) def test_find_files_to_upload_merging_series(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" search_files = [base_path / "dicoms"] - multi_file_items = _find_files_to_upload_merging(search_files, [], "series") + local_files, multi_file_items = _find_files_to_upload_merging( + search_files, [], 0, "series" + ) assert len(multi_file_items) == 1 assert all(isinstance(item, MultiFileItem) for item in multi_file_items) def test_find_files_to_upload_merging_channels(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" search_files = [base_path / "jpegs", base_path / "dicoms"] - multi_file_items = _find_files_to_upload_merging(search_files, [], "channels") + local_files, multi_file_items = _find_files_to_upload_merging( + search_files, [], 0, "channels" + ) assert len(multi_file_items) == 2 def test_find_files_to_upload_merging_does_not_search_recursively(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir2" search_files = [base_path / "recursive_search"] - multi_file_items = _find_files_to_upload_merging(search_files, [], "slots") + local_files, multi_file_items = _find_files_to_upload_merging( + search_files, [], 0, "slots" + ) assert len(multi_file_items) == 1 assert len(multi_file_items[0].files) == 2 @@ -765,31 +773,32 @@ def setup_zip(self): def test_create_multi_file_item_slots(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" files = natsorted(list(base_path.glob("*"))) - item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SLOTS) + item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SLOTS, fps=0) assert len(item.files) == 6 assert item.name == "jpegs" assert item.layout == { "version": 2, "slots": ["0", "1", "2", "3", "4", "5"], "type": "grid", + "layout_shape": [3, 2], } def test_create_multi_file_item_series(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" / "dicoms" files = natsorted(list(base_path.glob("*"))) - item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES) + item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES, fps=0) assert len(item.files) == 6 assert item.name == "dicoms" assert item.layout == { "version": 2, - "slots": ["0", "1", "2", "3", "4", "5"], + "slots": ["dicoms"], "type": "grid", } def test_create_multi_file_item_channels(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" files = natsorted(list(base_path.glob("*"))) - item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS) + item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS, fps=0) assert len(item.files) == 6 assert item.name == "jpegs" assert item.layout == { @@ -803,7 +812,7 @@ def test_create_series_no_valid_files(self, setup_zip): with pytest.raises( ValueError, match="No `.dcm` files found in 1st level of directory" ): - MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES) + MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES, fps=0) def test_create_channels_too_many_files(self, setup_zip): base_path = setup_zip / "push_test_dir" / "dir2" / "too_many_channels" @@ -812,7 +821,7 @@ def test_create_channels_too_many_files(self, setup_zip): ValueError, match=r"No multi-channel item can have more than 16 files. The following directory has 17 files: .*", ): - MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS) + MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS, fps=0) @pytest.mark.usefixtures("file_read_write_test") diff --git a/tests/darwin/dataset/upload_manager_test.py b/tests/darwin/dataset/upload_manager_test.py index 03e65be37..a6ca355fb 100644 --- a/tests/darwin/dataset/upload_manager_test.py +++ b/tests/darwin/dataset/upload_manager_test.py @@ -107,7 +107,7 @@ def test_pending_count_is_correct(dataset: RemoteDataset, request_upload_endpoin assert pending_item.dataset_item_id == "3b241101-e2bb-4255-8caf-4136c566a964" assert pending_item.filename == "test.jpg" assert pending_item.path == "/" - assert pending_item.reason is None + assert pending_item.reasons[0] is None @pytest.mark.usefixtures("file_read_write_test") @@ -149,7 +149,7 @@ def test_blocked_count_is_correct(dataset: RemoteDataset, request_upload_endpoin assert blocked_item.dataset_item_id == "3b241101-e2bb-4255-8caf-4136c566a964" assert blocked_item.filename == "test.jpg" assert blocked_item.path == "/" - assert blocked_item.reason == "ALREADY_EXISTS" + assert blocked_item.reasons[0] == "ALREADY_EXISTS" @pytest.mark.usefixtures("file_read_write_test") From ce85a68c87123856e11e6701a6581a53a19c8b2e Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Mon, 2 Sep 2024 16:17:50 +0100 Subject: [PATCH 08/12] Test improvements --- darwin/cli_functions.py | 4 +- darwin/dataset/remote_dataset_v2.py | 2 +- darwin/dataset/upload_manager.py | 15 +- tests/darwin/data/push_test_dir.zip | Bin 14762 -> 151592 bytes tests/darwin/dataset/remote_dataset_test.py | 339 +++++++++++++------- 5 files changed, 239 insertions(+), 121 deletions(-) diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index 22fd99df7..682e502ca 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -869,8 +869,8 @@ def file_upload_callback( _error(f"No dataset with name '{e.name}'") except UnsupportedFileType as e: _error(f"Unsupported file type {e.path.suffix} ({e.path.name})") - except ValueError: - _error("No files found") + except ValueError as e: + _error(f"{e}") def dataset_import( diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 9296dbcbe..4741c97d9 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -237,7 +237,7 @@ def push( ) if preserve_folders: raise TypeError( - "`item_merge_mode` does not support preserving local file structures with `preserve_folders` or `--folders`" + "`item_merge_mode` does not support preserving local file structures with `preserve_folders`" ) # Direct file paths diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index d0ee0c799..c49ad3a40 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -23,6 +23,7 @@ from darwin.doc_enum import DocEnum from darwin.path_utils import construct_full_path from darwin.utils import chunk +from darwin.utils.utils import is_image_extension_allowed_by_filename if TYPE_CHECKING: from darwin.client import Client @@ -254,6 +255,16 @@ def _create_layout(self): "type": "grid", } elif self.merge_mode == ItemMergeMode.CHANNELS: + # Currently, only image files are supported in multi-channel items. This is planned to change in the future + self.files = [ + file + for file in self.files + if is_image_extension_allowed_by_filename(str(file.local_path)) + ] + if not self.files: + raise ValueError( + "No supported filetypes found in 1st level of directory. Currently, multi-channel items only support images." + ) if len(self.files) > 16: raise ValueError( f"No multi-channel item can have more than 16 files. The following directory has {len(self.files)} files: {self.directory}" @@ -578,8 +589,8 @@ def upload_function( for item in self.pending_items: for slot in item.slots: upload_id = slot["upload_id"] - slot_path = item.path + (slot["file_name"]) - file = file_lookup.get(slot_path) + slot_path = Path(item.path) / Path((slot["file_name"])) + file = file_lookup.get(str(slot_path)) if not file: raise ValueError( f"Cannot match {slot_path} from payload with files to upload" diff --git a/tests/darwin/data/push_test_dir.zip b/tests/darwin/data/push_test_dir.zip index 6d76d817411da1608ab0374474f13321e222aaed..073acd0618a1ff73686df044d256eae6b4d3273e 100644 GIT binary patch literal 151592 zcmeFZWprFgx-2SYi&+*kGqc6aj21J4#THu3%*urr-4Gp7UnTntOh| zx7YfrR8?BBqcS7D9hoYXtOPI!GQh8+;TwV4Z$JF~1{MGhz{b(uM90C<-a*H})Q(n3 z@e=?zSKqAB@8$Rn8UPUF77zg7Hw@X|upj^+00P|pfd#?_0D$%vEE+)t9R&w#JHy{V z-G^t5ey=})o@$s|d{#l>S$|-zt{d#RG;d*B>k*4-N?AI7NU90S2ogX_T?*GRJw7{= z+``|+b~d(TWzLuL2V(M&m34tC3Y1TVAmGcUhA1MBl_gRXgv>*M0Ne%lk$*$3<;~n1 zTd7=JJ(-hMm_-?h>N6f|-9H&=y-sV~Z^lGIXfm3Cmjgx!>Z#T2c}pdMTHp{9yl;@S zTL5&j*xF}^;)k3vO*W-g;7f+Ea;M>BnLr-R?U9+FcN!Z zG8UGPw`^gn;a7gdVY6drILTDHe7!vkp0_+_GL!=E&9=B7jV*xHRpwCI7vJ6~2uc^N zcz&=OO4vV4O??)Z&tl+W;9$WYW{hXa#Mt!XDY^ASN@m8ds9uQu9GG0$ZEG1XwJkcs zWR$O0YV5&eA!wXfJ!gvU`=pi3oIG(RxnR5}1m zAt^ESm|b-=I z*aJoBEuD;=yOufFZszPT-29}1B3Sym?lhRmpwfHb>pXpfKy-jK-NQh9kH(j)VZl~$m zLKyWV-L_?3;hid;W0t%fOKBoKt|vdWm1Gia=MG0i!lr10_3vbyii`8LDas3}+Fd%r zBX1W<#}ETnk;bteLFJImIgi-X(}K!QO*WKGG2>5tzGUH@W_d74%Yn792b@120zN@; z3$JI99%4LUaLcskq8+H*a6U1<#%ULLO<~=T0N_$&W9KOigkC-9#D~#^ z5TOf&G+_%DI|$sSaYu%JK^L5B_xenHmiU8=q#J5g>piEgq*+JQlAlqMGScV6vaB+0hC- zWfVp1o!UQIHpLRvsWY~3q@^Tl07(iRPCl#RPZY0g2g`a~oJ=eS#=4ZNg(yMCo{-{c z8bLJJbn^i35T%@Q_Z4RV*9CXQ+) zdx^W{6GyKtgGwqUc)7N)Bq$;)>gL8&JB$mP5-3MPLFQDpC`XkkNf9&#X%f?XZlgye zEWB9Cn6E%;ozn{KwG!HpJd977-t60y^NpFq| zV0lP6EsmK2Tbte1F6TWX{@4g3Zo?8XO)|WwgY`<#3M+^KFQ}Mr*D+^|BUokTt<(<=p+lf8a5ts56jw9AFHtw~aJ@=N7 z&6^Y5GR@iK*i`vsx#w2m0TZWJ{{bw$W4fWE@Hpa<%hNaEg5fP-{t^`x6p^Tt6dAJj z3*Q3K4CHMILHVM>z9mFS#m^?^j0$Thh5c57@-{PR*=phhOu*juW*fnw%1}gMFCl_5zgy+M56Yl@N4cR)P+jCQuCwX{?FBG?T8dg%D}u zoLxMg(C!8dV76cldhcZfqeNypfAl?@=*_t`8}=}nErkZOjNa}CuD0*A91x!FN`B1H z1Th$6gj&1v)HZj~vl^{H=P;$DA;y^RYP;#T>J^KYW67&)PI2aUX6;}zVTGJa-$Mk{ zH8tIn)3^j1+~A@vr-ekg-p((*WVkMSkYqFmw~IU1fDcCBUfVzPc*RRKaEftqn0?|O z|5~Q?9lO4n2Q40`+8u)I!VBF@qH*APSAZ|#-R)(4`#tC#Kvn`Af`zTXo(tk5p?t)U ze;!c2eFPqYkAR}1BgHQuqo7LrUsDf_&Ogs8Q6HJ*PgzA@7e(J#S5NmsN>cd(PB~Ea zJ5VG#FeyA1U?mp3?*j_o2!O&SUdMM}wg5j6{v;GSt%F~rz#*UmJJ^^%X#Aq`H zqB3izmLRxFfix6}%esF0ehq&r#T^&?+Kz`+IxcI=J-u8~ z{e$$zd%4Gi``dYrBSZN8_leh2wpSxTI88iuC#Z(IQQK}Kj~i^Dd#QD`*1F1o z>}~z2ihjtL)5y4+xB2^a3sOC{XGD|DYl*D5OVBgcDcJ?v$Kkas=ObM;% z9o>?}f<pwHol^+k>#%N6|WZ zsjNjmB1F+_p>^u@0Tra$My zL)WH9GjKsvAG$9x+~DEo-haORxLtn%`qgU!wYUo+fj%1Iht&Vk5dUa`UtNgd!vz0L zL;Q;y{?s6U`QZ2Zlj;61H^`VjH50I`|wMf#Bov?7T577S&@(lk0{J$pupP>I6*MEX=jUZRu9 zP1SduHt8jQ#NF~o(g-d9K>qCs6dxo2o1pR>9!j`r@Ml*65|VAer40>_cKyaK@pd$R zco2U0cMQ~^an*ZBpNB5LYv17VERiUT`27ig)leVhN#+|ehHh7O2f)!uD0QLviJSp8 zQ+h6UI(LR{Bq)Umil+FYL6FNli1D*wU0>i&@IlwlIkUFdykrUeR0*_H)D;&fZGGdn z@dwA=7&q>tC+Yqwq+c55h^*DyQ;pVTSt9(=62s?D+CbTy6qFx5j=t3$m}l=VzNwXd zM{}8!75C-8&1@+jaPzN#ynzZ~9x+g{!(x!X5s3V;2YCT;#^Z*^0?4|NWeoph_w z0s}+W8Iys2#qk}LDU0$tp(=p#I%Mi=($eSi2}y5q>~wM-9=!AnZRN^p#-UJo_ID(Z%8s93v?|Q$+$iD8Yl|h(VgM4`5p*kgVN416 z;4>oHf2B3VQC-0uz(1$8KZ@#CTBG?8)qj)L{)@W)l+=FL)~~eoCw=|jPHKN}!3SRe z`u|rK{KnZ{=c|>0jK26}4?&3q{d*_Se&p!Nu z6d%(-j6gwz$yz%Ht(pVPEK2U(H-%3<;1l_C?i;d}8&Jf?S4l~WLsAxbWEI5J1WE0| z_vx{V=_FVH_%h=-00V$bcr|rjL9#k>`Gc^`s7r2(i%clu>T~Dlb7FW1)WD0?ybS?3 z$)8*26^AyAf!rzB{ddj1Ta;pXd2zEaqZ6N78Pr>>9n4qLD`s0lF6PPi!`U0llZ>5m zbmCYe8kWlzt-7(Cf0hYH+PPRYqn?|$?5n>CQSntXX~bRocD`BVbqwte{ppn)jG$p=v)%>|_&@pD>LN<*@EYD=$iAnRk5A zKbY3YzbX{q4}d(3Z1q}!PZbLB+p%i$}n7y{8Epb>u4>Mfb3Gw zBY{<At=j9s&efw1J1NCKyME1YIU8PZbwqbb`VaIZ0O=iaLv2U6}FL zOMOBi8=JlxF_7u9fuTkww<-##5O1FjMeQ2iamRVyp+S6z14Ns$%;}y75YK9g*b?Bwd!Xd`Z8Jw%`UmvDFUx)?;~xvGKWgTeWy$}0%lHP1zweL^ss=sbL{hgw{qu$3}j?Ui$tl!6__V@aezW-s(!T%xl-*ZUF2lnsv zPqBab@{cPv-;X8kUpKY>&YPab%*Ob)zdZh0r#XJqubrqr;XZq9zXO8-(z431f(Z+Q zfeGUoJb0yaCVBgYg{6hX`KG0%#l-lAi1;P^NJ$Zq7fsX=R2J8kRu&f)<`(Fa!YxBhxj|Gx}<%zrV4w1xB$z(b3T}Ff!89H8M2R1r%Eb zI>h)Bq6&%}lcNd}oTQ2pB^aGqT5}-lq~1S)C-` zXb&}Bd5osq>I^4cyAwa_>n1{!+#q?8=^;jvS}$>);Vw#?(kOMAT+txAsEMU~Q=R5hXPK8Ylc+9W`UMrrQg@fB!BD=|iBm-ND(JRrH4gVH%>(Pls2&^L>}|b$j0P{ngi0_Wh9&cE)RN z`|{%_{X!JMom|*;DcZJOHc?@p0s|hzKPxBU1x&7=U%#Sk+$15zDVO z%JlYy)RG|i)`kE`@O<@^{@t0V2jM|4S+56h-ZDZmI>AcJJ&ib&IGeN)Yx7h*HO$SOTU$J6UfPxyDG^?^e_!SA*}m!)$xu--=9R z<#bbhdb2yQa=-=Vc&O0*u6yTo&Gv?XxBrf~p<{;!00L%L_0GM*twO3&7F4+0q~K}v z+?O68Jc-{c43-vm2nW*e+U^&N2iVZEt(s&g8EWW_dNcX0p_)my?|!mP7RePucefnW zkXL?7e@o=&6QIA`!Aarg$t<;{go6?u4UWY8HOK42I;6aUps42lIKE)3J?9_zZwlAb zf+_f{k9_WkRHPg-r8fAD(=%_V6nJby?<5_(C2z=$N4NJo6fM9=!Ick>Se~?6{oTDD zLmi&t6LF`q$D=Qv;@6=wXdoe5al^NiwL(M4q<3sh5+OLZ7$@PS^tB*fY#o$OAXs?@ z(2ANfn-7H&yHR|j(uYlZ7kixJaK1&H2lWe*rL>W#T*GYhfe3rn+?pL)TYxZ}Dx*H& z?ADNl#)oqwR+(Qb(sJEQr*@0dO&jWfusG>&K3(2~*vm1via}_+SgUIWeTj=1t&EFd z7J8|L@3&2L!yl!>A`rkq7d~FSWcBWjEKB`jjdWLsh4IOcul;K~q#=&aTqf=hktP11 zek;WkatigJJ_1rfF!r9|sSCv&MF)Cr6LYU&dkGh3HwoUm_^l^7B$mD~s*|#BT`BQI zJc2j)!D-(@BENbtbO!EOpcSgzI4&Ei9#(m@s$8=Ieqm)8t;Ege24&aF?*yW);JvfN zz)7GHnVqt-y{ZAF%~PX-H$(ZwD!d?xY)G_*rKPEMP$GyHZVusR5gV|%Gkt8emlby zN!#9B!qK|1FoA7ozjsH!-9W7BrrlP+=00z)@Vc?*I%U_+)0t;*y$Pq1pa6zpZ;z7H z&gqT+n+(R=JH&{ zoIH?-QhJc!8o5;(tFI&O^HP-Mgh@XP(%78elw{W%9;#4i-&8JH$1-9#S5~_nZ)9D? zZ|WGf6GzSOtr>LqX zf8s7cF!YoJ%g1%aK4w)kpU^4hpk z*(E!$enorqyAmv+(;Eeqs^`VG%wD!;FW7KDhZm)v6 zC`OJ=DBRJ`WtWyC&egIgI*2SGi{>`hil6h`Xn)CC?VHDcXpq3uzdYGp5Ug9Q*H}JU zT}54_U)InN+5h2aq{p;ma#b!@0fIIln9;-w(d;4|8GZVFTOfN06M~yL?Ia~FpFOLd zl!Jmgk$r77BRpwyU5Mi=jk|80Amgh&dG2IM)|^azyFv9HUU!s*z3ug_-5_8?tV~lV z0<^M9Ia|yf%{w{r_2`ob8)Jq<*VmJ;JzpQc-fa%w7938hQ5APB%Pn6opDqXYZ?#?Z zkhQO9l!+|r8gC9qEXnm!fq%ym48#&Ibl{%Rm589YBe)8 zTB1DA%#cZGH@@J1n9wLH=aKjNs`xAo2TCg|O0D#L+QC7i7sM?k$dcyy5C7~3y!%q%;Qz6?PlR_cqZyb?W4CDBqMs; zeGAh*&*Of8@9bUXQR7hT)PH~M>kssHP+{bi%vAEvOsQVkgzWqoOrEsdYX-N6<<`6LW?G{=9>GJ6f12ZHv(qU^O{T;{(%x*zT^|ThAR5f$6oOQ|hB8&DhPn;Z zs-i@2v*JFcqpFZo$RqbYcXDxGxg4{dG&!XRGnm~x8BE@O8aOMsJctqOr13yF&OZ&h z8iR*_Oh>AFb;)D1<0v3vZru-I?Q>DGU(T9R1yOYpqa|;r%fX|5N|e$r zvgrU+mSboPX!B!_FPE|ish%jk<=0&B=f0OGTWOK~G_M}mkyXoq1>dIi>^)XcMS`$l8rs_RTf zeH)%R5o}JGm=LcSONp4=Tu^ z9gu8^q=d4?Ru^TvWqLMd6_th1>gC};fK0Xe3go-!3q>ADD;=+|TZnQb@+ciAT%^Ck zd1s!0{gS3!{YVg3K3Ig>T*j=X8XJoeLAFLd|79^L& zyLe~YF{Dd3skzUmu9@05NU1&HQhr>WP728%P(P!dNk1AwZXFh8g@M*e2t5^ZM6mWW zbP%O-&5SCBu4lMUfx^P|&CY09{DPxY=Ufn+{6Gz<5V9q>Rf74IyJxF!HFF^PhK<1} zQGF~GyZ5?1GHTGZlOz-W^o% zvG`jNAz|^S7_RD#(}xX}WhEn!1dR?X7P$V)UOBomoAbE}kZ*1XJ^Z-)OP?7D%ucp| zt$`g1SnNC-sPsuu)z+Z&%xQ;eDTdppC$Mu=TCTg~nJ!)0<&w0G0HjeOhZZ@%zMjh9 zokFlXd&jbh0aHgAWFv_?!+BNksL1wLi64B%2|vyOE!STh!s5uGucW7-JU+Q#6)Sdt zPO{tJ8B0jiKTqAdL=mGp4S_kCL4(pZuO0cUu5GOHIf6HlM^NvOmQSVW+3Wk9YdR!_ zF|;a@b*0fBIo$I^8^A0MQP+S@^b+bcV7tQsNjc?J&SN~9v`GsH^LN_LL{OAsEDmxS z=HcFKGIjZ#*ulx<$~`{9FAR7#BOP-_`xEQV=cJ)u%j0 znwu&;o^v28B7}gD&P}iP9J(WP@2*&@5nBp&#YhH_Fl}vJ1bNLMIrkjY;}qAWQOcBU z1B0D{(=RyI$gWEU)7_JnG=9ddW?-7(L{2q&vYwZCxW5ECkS2v01WqnqZDA#K{IIV4 zBm}$&RA5aNTUVs^D4kD;6-&rTA)O?GYAzulhtF11^ zDMTVT!SSg-ulmL?9h4B>So0})ELe}MYFAuXd~rxUnME-wTlZ79>fBBJ{#HeP8*XIn2M~;jZ9Ok&_#mtvudB-4M>YM&`tK`6Hq8|6iS&d zTb-n(d13=fdKgyG!NDVklY_R7hhxX`qA-vO?j=$xhF80L_Ooq?r*!$ld#(A%_gbic z($H_*VPIw;GF?LNvZQc#(a)yuEO5SAYKa6M7m_R2z4sP94VfEkOt3|fO*B#-C7<>_ z0aZi2u@1pZwNQ@9FUUcgp*fYDE_#l~EP`}pR9=*@%~ykq(uS-8?Q-LrtMqaMOs^AXk_y$a zQp4;}QQ7FAb9!^cSsTp|vVhna9mxoPd_{*&tGJzZe-*X-sS$?8%$SXQ_r{X%BIwYV_E49e$;TR3PJdH>n-}Yf zdC)8g-Y1$h(C=EhtO$uYDZWXaOmcnubCfQJ9)gi|V}oqHLR)8YRsG~dY0*Me>T2Yu z<*dPs7LREitxZra+UQy?!}+t8aE5@U+7o#U=zz$jfg0kG7G0A4xQ32|F`~!=25Jz5AiJ-R9X`a81 zsks~Vtjo@`I~nd3w0rSQughky;3OdXv#a+S7^4N!Ewgw;N}({d`U_R7Yj;|_b))C( zNExfr0FG0#@;VAmf$wB_h%_r9SkbRjmk165xJt$xh+jL43nh{T?KIH7fbw`m$~jKB z=M)8jHztlY$9>9ecODqjaljm{S6Gv*UADzbVm7#oU7`He*H zXS)`uZSCys$guKRk<1(H4m)+XTJD9zvEj2>oye%mwHo}7@SL^bBtkIJoF+))0C z9WocD9=d|-`r2Du5HJ*K>MM4668SyTB|n`-$Jnj8!m2KSKtlFGr5%Lm=eUUpa2v<2 z&N-i35vo?>eDQiNryENB`%@M3iWG|>zxRA3rP{VGr~8k(p!9(OLLC5x3N8lL*2pV@ zk(aH<+)vj;<=>w1#Cx$|^&A!-nt0|XOTL8>I59}41ha9)eOa3Dwun^%YU86cJnjZY zgzkk5HVTULLw&El#_KdUJ^?3Vd7XCjlXA`5e)@cbsF%LiB<>ru^qSxcZKTP{3Q_SI zOR`vB6H3Htbo$z;LkM1XCc7=dpkh&J`j?Vz;wtQ89T+Q`gZwM zUuwpkBJGTvCe$cPg#638h*lhv?T~FU9v)RTd23nab#vYkfTP1Ynx!j`s^FTgh!L@n zoxLm?8TgJMp@Z3lwCLP~Ct`aui;-n)k0CfBozp>_L(98;2X~PyH$S${HG%{`{|+sF zfpOXnYBnmq13D&Ca0EY|lCNp+esc_sSxD|JGSqe)@3b+u6^j?&osy4{Mk#fZ$o;HR z8I@*ut6iI)tfbmg95qz3^9V$TuZno34uu7Y=fbm1-bMIu^ydEZ`E!X(iFu59E6)LR zTgNb;fMrl5M&tCLXE0VfrL zz7wF$Q?AMB&Cqi$c(@L*?>)5NQ`FTyJWSCNat@G#OlPkbAb!a& zwRy*p;`Q%_oD7$I0I$?>R}2rtpzvrejsmV=Xoj8DfQvOEC!=SWS66vHtduW9 z6=AYnqhr}|jR`Yto7pF(3~_)B<-T6euoL5Xw^=S(bvx;_iv5{R80rys{-&@<=-GJ# z6&pcekp)L0(HWEyTRSvX-i?FChez(!m1XYy;O$syge92a6Bj~MD}V8I8q^Z!)NLfG zjekGs6=HcNW`_(!MQHIt&GWmMvlX*8yD z@fnA4gkI+DZ_Ha^v_0Ri$GGo~moou3KoY0eGs|K0WaUz7i%A+=iHU_Y`LUFDwi>A$ z<+*6vsN1U#Pqu03FB4L%FQ%{-`hENKsYp~%vD*x$Jp8^ zj~^N*pIK$Gf<))z!=rVJnvSrwlXi5PxhvJn683Wjlc6}d2REya8a*^G%x4PrdBy8k zt7^rD_JgQ~ygTODHKTlpO(bgV7b0o}h?-5)LtMSi$M82M_KNcbPDkC;lEZ4qh4LXe z6U?2s1~ca-<*_;T%Wyl)U|^fUCLZ!mM`G#g!q$`vPHKeXw>i$}dsoZ2b)xmzfVAZ< ziLm*(v&~gbt74F;gH=05yQ^?Ns5duhQ+m3lPGi+u@HfAM4Tjp}2wtFm zVY)7u73PIlr;FhGmO79?udlSRav5AxgA=2J5o<4B<718pyc8j?6ck+sGZ6A?b$xNvgKdxw67;!P z0?ML;1pbid?=cxy4^q_T3n*w2qeJc{6p+Tt4`tqZzV?F|qZEna7iQf%0C z8Ko+HegcET9wSXA*SY9_XzfbrUcw14bdUbf)Lq>2Qn1JZ_7wv|Gw@3Oz5z8^qbLl}V( z9Ma+bVD5ZPK)%uWY0@nf$`c~NU_Iyjaf#}lH8pei;;J6lIMi-E8rksoGsDxcF&WzV zlnAE=l_e#o`jsQ|n+)q6RxxLK!I(b6sTv23nI0+-k?}b+AKj9Sd7luNl~b+Ij;Usq z(7^Npc_V^&-hOE%XfGb@M9`_A<&l;aOxO-ithNVj++qP4>0VwP;O`eIi0KHQKt1bm zDeB5GenzgE^x~HB2iu#Z*m~k9>I~_$9_(^$wkN#OV;C_4?(O8|Xo?x<-l5YGXXvpk zfzTVX4szaCbKI_at#E<P`>BGDS-Sdf9PctQBx!`kU%)5Ff*-PqK1c7iU#~#w=INr0N(h4^8T`T(E>jy)NrIvD#sRO^%##h=i zwr=4%YMv0!uGd}UFzluoyWC#}Mq`U(j^?CcJEDTc>W?g?6K%|(N5H}b@v$8Lg z$TjkkEXH!GF)v`G;L*tGS#x#dv-3$`!H64sNI+_PIZcC|6Rxb<#ip>kZs<82wD-AX zu$mQ>D5+h0Q=eXWct)G_9Afn{B3N6)<4Tp7e-n!}MWxLOcjBH?gYHRdWuXT*b2K6> zxn_$a2a7mQ^0}nzBDO~LrGkgHY0lYHlyC+5^U9;cq%JMJkxBMVrrbQ6W(zTdy&)^j z=SV6UNv|(?7lXT@Z0)aPea_%GEBC`n{tS67gg@+PWLOF=jz2b}-G`Hz8qHJ8#3|aV zw{j{cdb3}5Zj0PiG_`Q+VrC)N&+cERwVpZN7Gb1vP%j&66MhV&=UkSbo`vLvYV;?P zu!_R+#55n=vZ5c#?=WzJC$2@9%2SKt(BMcTBj0!oP~wmX3l-I8(_CgbuUL-uN&o1p52M~k_zTLW|I4j9HJbAPVdRoJ=4t*_x%Yx=Gi@`FzY4BW|AHRtE>Hey%Ch{Ejt*v=S=!u2z4SyaxXXU*}9 zq~YQ$6l8M|uZv2Y&N_6b0GCk4H8-;WfrUH4*^cgB{LAE56Z9TUOSnOg0zzNM&Ebl} zuUsRS4}slTy%w&m&^=DF@1m>YlG(4#Zb_WS_Zh2jkJP(>653pb0A0-O=ku3lels8j zJwN(lq0coemKA>Fo0vysD1WPT)ZNQOSFhiMA%{7~Q))$Ka=N+5+FVarCaNOUG4Tzc zw|ktfr~Vpcjh8p*5c)mIpg=50hby!CqVb!+UGL=imjTAxH1f>39F&$oiL6^uvVj5H z#Dw6rd1~PX`0ZSNRY16Hk0C`R*3*E8-YTg{f^UN|D~@-wZG7mn#xbvQvBXp%n<62J z($@e#u8&n}ab54Y2=%a%+st~bvl%g9eI*>1jdAaY@S~35u7UP+!UbYM3RvWNBzov0 zc`3t4h*+ZpM%@xC#P6gtTQgpTqFTvzBoJgz;#3Io0WfmalPuJAD)ip;`baj7-Ar!j z(}KuIft1TC5{%T=c^<`hh$fr77RKH%Cg-A{0J-|Uug9x@nl>uJlWnzP%g!bvC% zZ7-f{auL$1uXVYr%}XB@x~Z+L>M7)Wf>;ye&XW{&9mfUa490;WcTM6Ofv!v?uc1qV zFQgpyT?_=5;&>|#505vL-ssip)!J>Cnrxz38JC5`k^(e&#- z#86oP%&tjMB$~|P9_i!RSnLfVRO*=4ku>#3!n41?39?>!r*pJm>CUpM=2Fxqp1W(4OxYA%wR6q4nqY4amj$WGDF}h#PJvljX zsb~L=iYQ3gHW;ZVUi&1aon)NP^k#}2c*1{Ko`v?Fmm(*aqX62vuM{hx7@dnCNBf`~z80msh!h1(mUE{09M~spG8>fjzG{;B6LNiTnoQB^!#P=o zBInNvuAFIEIpyID-8rR6IXUHz%`wxilR=$xR>oSIDfy8kGK!fws|~<4*`;x3jbOm* z{&pd49%#yMj#7k=9&Q^fsC87=YJrdiV7 z_>sl;ozEeP4*=7iqP;sP9jsp0Soo3T(s|k8rwBqlkVyMe_M_o7&FL-o2D6kM`bjyxu`4db>sLi>=qJv9;N`2aX#zRcF!>|oA^)6IpcdWRf(&p+wZ`H|&G>c|pfIg-s z$=yteoT&Ki_qXf%#uv@;RbkAglV9I*ryP!wAXvCe-b_htd?AJA6gPPi$rilTsFE2a z%fNRKbm&S|3vNxy544t7J(k~vQoLEL`F3*y8sBMl`3+Ptx_BI=djy#Id-fx0wMn)L zW8RRXEY$GDl!uPjQ7Ck_W4d${ZV^|+by5v%r@TAGA)s8H9rjGIK|BK#mm#oSZQ9~n zy+00l7?+%>3{Lyxr(pPq5X*1#Dc8tyGxY?U=9Q~S3T#Q2>xumo{J38%sV z2XGr!eLqF~v7D1j%}~xMZcplupeCmwotu?N?DasF)i2Q9JD%j*uoXn*s|;G9{%l%b zd6JH2Mw{F(I$x)ce}At_-Kx(eg$d^vf#0C zxfXQmRL2v4S6_~aZjmo3U`ty2t@`P)+n2k8%Ei!IaKBPSJ*Ex;^{j{ZSmxA3mEF9ZQ@y+=u)PV!0WmIb<;Rwsxid%xWxG;T@^d)e z4~Cn37bxB8jvvN5dVy zZk%-I-_14)+E>we$Ydk%KfXd?6(98ap#|O}Y=_D%izZZf8HhjVpcOMlr=VEC}<-<=+*Uu=| zCZ(D_`j0;{b%09PN}@4_-`XjFimrC^?joAG9hBBFv}nf~M)%dlkZxJqaM zIL)yN5%|XCmyPLstkh9+c%e;~u5Je0Jf-A{K)2$<;?19S-|#pI+v!;QzIHv1ia#w+ zxPqLPopF=_mJM9{bX0!=&%>pVQ^`bwJqvP;@P?{XlsaT6^Uxvgcp`vnn)($^uH0Jt zgsgYb?;P}eaeTD3otPN%OKPv}US9K#nutjS$oz%*dLftZh`is+(~Vhmt5c`U&72$U z>9ItJ0zZ)OyxengcEz31ev+Vu`nilitRUw}37gyC=PKi$MpvWDumx0+dnTi5du;n) z{_$ib&!fBa9WIbg?M(_4C4LD#P}7Sym}Vb?S|GJwYGDLS4OMLC&Lyd6Kvg^5-^SxA z!7=x86OcrY5YgN?$UqQMqGQC1O6l#N10ml-Pe@6PMC6ukX&%FyLkgZ9ruAy1N-vMD zgT{KHZx|D6U>ImiWsql3%*y)nJ76$>;oXtEx{D2-+{P$_o3dz_R#sFm19$Gxg2Q91 zF>Cao2Wd@B8T+D$3r+n%-zYsrw;r-}dgem@l!y~;9}kFyH5j>nQ@w&A45|d;#{O9c zXC;b2$1QJ8`>}Ij#=`6ZhoIfUfvZx~vgy60jMe8>dtu~*cR%8<)(@vCXBJuQ;o3yh zB=82-rn*jH93jK=`yn}HMx+{XlIxFyF&>*B=leH`&2KDlHk09aGJ(j*$C6&i`#siI zhliE?NPKLnnGcesj-9p zs34T7;%qI{KwbU(k+-=ehsUK0rvA2jb=4G(Zf@9`%3LwlJ0E06?v~yroOyRaypD0_r|HMi!iZge&=89wAhg&j&-RJbio0XZ(qx?uhU>xkU8m@W=A&qEv!7Vq6J&n4WFIAk3FHspM2V z)G$s%N;y?nXc`PQrt-PVQ>4aP=Gx&M7N&dSL;+mQqSylky)icq0zXeJZkZ8k7QnNkn}Qh%eY7juiplj;_o-{1&vEu7d%V+ujIzy;f#x!@_Q=k8X~a4MZMWp znQ~197c;=gP5cQ3G0L?|#%;IW5`vY+yp&3-Y`xjuM*Ux8MvpW|&CyE|p&1c-o>c|TS=zUPyC2J0u z7!*}``r5=PWT=toPw_qEbo@<^WRDJhqpeR_jBjK8&HgoK4;2LP6_>boX;UD6?Ypto zv-`lR+k{B6(fa*_&o@@J(CB})n#zJ{ebgR3o44uW+dOk zfvrL{H6FUOZ1C#S#*=7q7`Zu`HRejLu1bYjd&+Yu1Pl5ZSxRrr@_;C(N&dHUHd5D_ zBektD<3@D@W>7vY7MW+-sK>)`IaJo`4uR{4jADlnEyb>V;auwpG`xrf&PE_b&cw`) zTyT3{8zR>t|NGvEM!3Y=N0}Yt@RiXAK5@6DOeBTgcXh;B-MTf!7JF zWAE3z#8A%OqmfI8oLuYfW9n@<7p%+SXTa_>uGa0lL9FyCEnxiDaT{T7DlT8?OOntR zFqcjE*%koc;UPL84S;QIZGqX?&Z5D90B!8--uApZH`cG8ooJ_p-y?QFR`$ygIgT|b zQ6yrX8NqB`jb5lZ&*~mkh@4Bj0nWV00NzPwo>u|eRaPvE0qUnz0v63GDFOp4+x*#= z%-%g_y*6?Hn43s1o+XXmwXVuQsw=B}kGm}Z+NsGZhq_=SGw)eG z@-Qkkl9y=IP(@U1i5l#v2$p~qqoVTeU0io(7q~Ob{_)P?!?VXE=Y-$Ry}$Y9oA1ud z{cYuo?t90(b$e1b=vDf(#0^i)mz`KX%-zK=X>zB4ML!%_!rS*OJ$`}yJYGiq5tY2$ z$yeL?Ub{8sxq2M$nP=jMq3`2Y*XNe!$FD3bx@uJ1ukV1q#l^+v&lk^M@a4R1J>$D3 zq^)MoM!Iu2?rhoM`qIHqzdb+1aQCz6_qE@AmlpQlk45_q?9ePP4;J!_wJpx3cdw4W ztMcCS>9zWHn{6M~kI?=QZ>;*hePP`%DYk+`mz3XM{eAn#Hu-l{K6rL3xOi;Lx(%1U z@Qq)il|K3Q`KvdJUfec*_{G={`+uoloxcCD-h2P?w`M9I{XuEr-%d}vR`ghrYkV|y zR9?K}skdF8&Um`!)alwWAAPsiMLZc@Uusf5#6Iz<`G2Z&J-)uVXXJ?Z<=5&r3zw+I z?cOU)ANJ23^IK^lb!XdO8vgQ5zuJR;Dc7$RI@L$%ygIddkzIbq@WmF-{3|`O%|_Ss z*csE)A)_wL(bz(}AYXHtMUHUxwX)fd>ebfuG73$K`?s{pX;$689rI4-D^>Cgls`UO zG9tX@m~WkLpyr$3dZg5S;h>%LVfexk+6JYsJ%_%If2brhs!v?-uj@O{9-C92eBq#R zBmP}fmDRK1)wiFQ<<~#!Sn{#sP;KUaobUKgMtm9XaCH5-S-$qKYC_a=FW823o>kU8 zGuw5_lm&x7OwDrj>FT2tS5@)BNj0NQdFQ1`BaBA|=xC2k@?F)fsLeh7nN|9o?`sPq zHNRdiJfeEBK4QbMvoGsxG!t)p9Oh=MQP9S5cb5K!E~Dz>*PsR zHzW*WcC^?wy@jl?oeAdtjqgn$*-s|0uz1d!W4!(J8lh>qL8nftGlel0%CS~mcl4hr zOtR9juo&B?s_Jncecg)8#}9hzZ(6UvJ1evI#>@fgs-xChp5bYBR8xyf`2A*4g;A1U z+Z6kvyo?pSH~Fm7xV(M#hUzExyw`2*_TyTg^^d2l-|njwmE1KYB_Ke1^iO;<4>Q-& zfu~E|LNvbo)~0>%-c6C4EXv*#uTOtx6W+mm%*GQAdtOWmdECcV&0Qxq<4=bvou6jf zYVL~2>L0js^WC+RQc}JB0s{^ut?fS}Z|JVf%#1Qu|J>up{SMxo#?LjKKK$k-uRAtl zb9?{yNanQMp1XS6sh5sBTk10CD&KX`dHx_*wVbvS|9s`;8XY~bc8S^JEk*g=+qR$M z{L|7cmmDl|$KT1zD=52OdfDt;SxMQ&>w{GcGWJ~667517Vm9{=S7sLyd(R|ybD-2F z((UbdJTt8ACG+P5heQa>!y>Ik`%5gDeI@K?sQmtt>M0k#>g=c+mln25FL9avgi@1{ z$FHPhjaKddYNJNa>!CNY^!~B^*=9~}34AsFe)WiVV-|Kk`EBj^LAOsNsSjPT zyX|e=`)Sv{3r?FDpZ@E`F@Kb%MciIw^|G@1ALS9Z^%wYzEB|_lR*C+EyZfI{;Mvcu z+t?!}sSZ%kS;4>&(v$k6Zlm?rp=5OY0p!n16SmdR1(k>w|Ya`xm+E zSdA{V7W`FodHJ5sLE1q@TKj7%;T{$2aSJ1Z~$>sSy{dw*#OQR?~ z>!YPTR~El0{q3J3+cgK@hMW8u+e_Qc{P9y2okI!dd>3E)p)PY|`0vU#^S-L{d39#b zIQ9AnAEkDaEU#C;o*eU}aDGmTm*DmZWrH`Dlif8Jd4`xAUudD*=I^$LY9m!4L(_#Hcyh);{L5E{#50(%VdP_?b8(_i;uVv6jD--q>r(GrzZN`EdQ@^by;f zw~Z>{r)w64BnVUboeHtMbZ|>jms68s>)M~H>1#Xaty_{XK5xf|{G2a3IQ_Nqg74J= zjRL=*kyqW8`tIEjW&3v0eUI|A{Tth98h*WV->x@K3oQeB{ z3roBiomp`kSGtzjHDyN{pqj9oBtj`YpJ}Z2Jl9gYjnSx)-)Fd|tWg;;Z$(yu<^c1& zJmU@9w(r|CZG1}V-c6~~{3iNr+q!Puv~_!pw6!KLQc9X^XyBytxUE~~m7QL?TE3iU zxGH&uhf4pgy}sJ5wzFMrdo8n7+2=|px~F=qUcFz%%_{ZhQI^qOK?7E4DMxi0=(RH2 zP;_y+s2fh)%ru|91dU%D!v1ar{T=%I|9O8WRN4}h2BrTGm6qKbE`JzgW2v=;d2mqJ z=ko+EGRYWw>Li>e&^pf(z;i}`ICBA;=ZrOSh>0r`XU57G z3*fAqR&v%2o->x6*vRVFcq(c)3)&940;Y5j7^ZwaKG3AJ(LZcY2D8B zk$Y+;`+9=Ge0yYMVo!V&o%)ErybdzpR+0hZ7=zB4z!-=z=(r>wV{9PCK=%7N`^AYd zc3=z`Lu8CC7z4%-8H3jGAdi7OhAWRjt9W1x7{g@@TEhclz!)xL&I~HJz!_3!pe_T>kU9gE8E}Tw8K}*GGo;Qybq1V~ID=K4 zfoT&cL#PbQnm`#sWnfPVC_|_Wte6312$g}-3@AgW3{+-78A4^CFaydEDg$*HP)4E* z_ns76*pmW_W*USM9b$!J3{+(x#*oDrm@a`aWX8a335+2#1|~~j44E-7R{~=s#$Y_d zwu2o8Fj;Ca2KU5YJ3j0%fG8u0G8j7y>|lq14}=-1FoWj|RAnH}kj5EU`2fz4Is>&l zaE8*b1IiF81BDq-hEN%(%YZTxWw`ek2>4(O7$Y$TqmpLC%1D&qeqLY?r&vKHuR$2hQ>^UyP?dogLl$G8C6 z288YtpSB^>8^g~+On2BRI`#Vi> zoiT2TtFyD?XwT`6E@Ql%oeTZEPUT%JDsq}}>(8qNCC(ShiZp^XdiCqqucKYED*s@@ z+MTO*>YOgGS5kBApys0H66)MXt*@hVsAF;c7=sF7O>l^+ikfLaaA4@nkf5saihF7U z$_C`+6_u2h78R79Ia8p#et(-YR);il+V=1IWsatL-<*MZD-3ML`IYSW=a=UhvoD_P zIH1^Nw8u8TzJR4V?H3+3%+)Bg%g9oPjwu#2L~!gU+(q88*za@9M0b zaJv7VIgbptc%SU^pO6Q78z!F{v_Gu6d;0X$zMfmY412d~%g_3QX5_?Nd!aXQYS{JT zAy0m-9x}h?%a}JMmx4}tZgYrz^kT!lzYp9w^39XGyH0ujIBe0!dk^*{Zyy%__FZ*d z^7h;j9|MB}LKBp-KXy`mw{m%6;+Dkj6@2Hcs1#*K{VAhI*_xah@w#Z_%s#uEJLv3Q z-p;N^#U$Gf?~J!jidpOCezIc6<1s0J=MC$1KFA~ZtyYa7Avav`%)&~!SF(3X{*QO7 za?Mw_aUZ+6zTSg3-c4O&X11YWE>ksOla}#rDopW2gZYy>zo$2OoPl{Z#2L~!12b)K zM&bfhqr*&yAclc~X0xx_GA~?3Qf3yCrdq zffxfZMiOH%YQI+SBpaS&Hy9&YV1=a&JjsSABZ)E?PqLXsc~F)CXGooa{1==dbp|qE za7N+`RyqT#A3zyGWnlFKC_|_WtbPDx2$g}=51&KNZH1!KS%E@RNl7mR7$#$Y^=w1zc#koroczF5k@ znmm|aOXk-YYx0;E*uujCaE8*b1IiF81BDq- zhEN%(%YZTxWo&uIJaqj-@SLECuuto&ME@M^KQlboKO$y!NL0h(D`sJ5a8TIiW34Wn zyb}6~`M@KD*7*pbMG9FU3S9(+TJ=iL7AYh^6oM$!x*ssKNFggkA&5et5OFSKDW{NH zG2}vPC$zp?NLPu+b7azKe}50hQJ!AY_{1)m`^($3_N<&uK>>O}vjPGGo_V-WdDd}C z()cCHDF!P2w7V%i?`5p^!f>=flJXi86Gv_RX{UM^uNmQ0psLp;ARxu{{DmuSW~L=B zSI)YanfD(w&cnsU%w=*1jb(d>sucCm(t58Edvy8f!6WkTj5(Mk)Qz;5sv(?htLAH1 zrCmJPJkIxzv{^GwUw7ITE#UFSyQynL^megGWabiV%g4K^sOs`Gm_MoWdwMraFnFSk zKP`&P=%e5_On&$;Ofhs?W4BtW%;U94A+!Py3c=t`9FWn9JSYST5i5jN=s_V+h*%-C zVh;+nZiVER#mQIf8>!G&L;Bom(`PJYc)C{0&eBlxa>G`HFiL zCmIKzxDt(y)OZ9gi&e=oUrrrFFy?C1qU|UD0FkBMT+Z z!`c7PfyrRRKh=)xKcN+2(ZX5>W)X?_BNJN2yEXi7_F4O~MLoT3?RQ5zXz+Nd_B>t} zso$L`beH?x=1yMzUNI3-AtK0PW^IA^!vSfRlxJ_l{JMqArzZlbX3$M6c`)~HoY%;j z263a9U(Z{^G>ZLO{L@2~R*_jU^J&OpvxkaMA<<368pP=Q(k9R5Y}4X4+hrQeaDk zLp@o^iKC;TjeEpN6G&al0py~fjT@`Oc`YTEau7KUZCn;NO{y1AYuTm~SL5s@jiaUo zanh`*Xy~QP=cEa2sBow!#+|t?r4G|?ndcH9xr zT}!laTRCx3p0MZTQVl$BIBCogWun9dV^n2xhzcfA>U1u^sY0nCabc(Y;-5_6R6W^i zDV_77Ny{oGVG)0t7{7>OR0cLk;GHF4`G6{>A%O)*Nu-QhX%4CHCDR#Bvbc5y^ zE5gff{}Wwnb~$bnOHrhg(ls)8)Z0=Fe8a6}moL;XpGKB3h;i(dzRBRGjQgQo343dq zUoJg$$P^AVlx^*&G#kCEOPISV{xorA#VqN&+F$C*W(xhgHF;OXP?;MlMsnQHVwstQ zbQ7x`+Zi>{#Lwk%(cNP!;^I#eO=RUc-7ISs2kTRW6IVPMsgg4!^93roBi{`Ph}o*B~kNH3hm&z}<<5+N`Ti?kNC z-jZoG`x)AN>sfLab;F69nby`aA3KCRY6@ZNoA?q`%@3H0-F&N-G{5N!Z{>qcwj?FX z_A9g;pf(+cEWZYctwS{9dh^gP{c)i3t5~R@;WKfd@(;DCpgV(bpy;L=SEeJ4ZN4)# ze|fKl;6TxB7}U_VvvHv4Gd*hPu(>!;bSR>RM$N;4qT^BvpzNEx#6OwBtZ-SV>`Y=X z-pdV|8`@E1StK_SHMfbwWbu)0qq&jjV;gFuLo7EE>tsUwR)#O&Mq-_8hmXwoh8u|n z1nLX>=OS(-8V;zDs&U*%G#F4LZI^H((NNF=B)d*n{F5o1{XG|wDfrr(&yAcV%Fo0+ z_HTj_8)c>?U@)=Xl+m2CXlinP83t3J;ESug90N)n_0WrJvl4?z9rjS>?9~`d>bQq8 z(~>cm)PWCWo?nZ>q>g+jQ)xX0lREUFOzVvpOzPN&GG{S!IgaY4fKS~&(WGSSRxYNU z%t9b>{3D2cD6(`LHN>ki>SsRIHEEzZG#Quj|3 z`tBDTD0L4-q2|BgK&ksE3JpAp166P@-FOTGN-ZxM8BU+TU{bq_GT-N7Fcq9?@(VDS z)FUX`&cGrJCUqA@nH!2RnAANKWtNm+FsVBz%B(ws!KCh==)i4$j*Drl=u|V{0ymPn zccQIacZnNGEiH;H`imP$?JSCXeT5rItt^T(E#pQ~8;kZ5pX*#mWHiFaN!?-45V-CI22;Vw=E+wWOzJ+0w)4$v3?_9KMVSNNVlb(D zD9W5shry)opeS?g2Mi{4|3tHeqIymyeJK@f=4&M-uJWygqO*nmsu)b_YyoYjpBe^J z!J#i%1B0pH&{xnFgQ?)qSF44=RB-4s?TEotaOm^Z!C)#l^sVWP!BlYQ%h$tTDme7L z>cYjeSM)r)Z&z+4b?-z2{4^tOBz5ORkt=#|BdPl)iac)2jim0HD6*y}HnO!#= zYn5#1M>CZ9Uv~^9bq__E5659JsXHjj>^1>|N!>ruVSAhx7t@OVcB057lev-9R~AL) zOyx#WUs&{JR(NwEnf=%b&J?=%VK5b(DY(wSU<#=3CpwhH1>isxd`EKvF`x<_${qw` zFcmzM8O*|9DtIV!n}fkr@K6@_6$XXBx(?K(KvexHxdZfOaB8yeAO=&xsmblb7)%AHCOW@hFcqAdIQ@#jLc zU{d!`)Z4wkV=$>ZD9XHb9D_;SKiSt4i+?hO+RO=N=pz8>WBi&pQ_mWCjr;}XSe{p8 zNAsZH!7IN7-$DNLC93bRv9JZ-LH;l%s_$^^WDCB7{5z_szJn2S7$LC_^5;XTai4@G z{XTX_#7X}%N=+Tojv*)Jd+D1yas$;f`fLj>2|A>4oITmtNq^{f`1M>1z5^PDslUV9 z3oZB#XjrEH4i0~|;5(pUn)*A$Uv9y7K*M$mz5_Z`6kO&04$PD?I4JD%aW`z3<~Wd4 z_5?MQSzCs|ln=|7$%Y1$X?g>LDIbO#d9!14{T*fc-ojwYhuubA4T#TNa|eSdA7&eQ zw_I~f^z7jLyRuC5WiXg8C&LFmzt06kqeT;Mx55XSR5lO9d_59AaKb|_AUf?3pEfIZ X3`-j|HbpCE{;XxrEfhXhV*c%aZ59v^ literal 14762 zcmc(lO>7%Q6vx;3AS9)wr7bjteo)#Nj;nIZ}SQh ze-%8hZ(||T*6x(8$XI~czGM&w}md>lQQwx6@KJ@j=evMHbUOtzDOaFl-Ety*QX z)n2(?3);2CR%O}3`R_D!=aU5CS63>H^>(mWuLi5l+LfX2CvW$brlUu`X8I>dn58rb zKVepyv@*M&HamFZ1BZ-wyEERS(SG~x#sO{bouurR{B$&)DXuoJ^!Me5U)<@3BN>uY z67t`Y`_z$pw)pzOc|Pv*s9rJ8D;NjAn3v+0@&cdE%hjd4z;k)ITI2;jm6NMWc_}XC zrZ~5#X3i1#k;$AS;H1dxX_zY>b~6nM@MKB)0-Y37bys`y-ME^T>AyFsGIb*j>(mh> zy1+OAYGTf|D3&<^kdZ^wwc=ExD5mU^!0`#+G-a#mqc>|H17}X76zF`_!%)w)1F-MG%#e8)% zvbHidm~SnIscJ}VcAvW{7(o-fN%@db8*>FCj@N*44;Xh>P6~^|AXU`hdcp@yzzicOu z*|z2EQLV82X3BQlJ5ggz`I?$6p@6R_05X}30m=4^oK30~mj9pt)NH7Kua{k#%t>X8 zq-K>d@~uMh1L-@>YH}H~sM%%Ax;cV{Ofq8@HOq`y(_^HrRKDwb5{!=Ie2`!AxjMFVc+1*jA+xdv% zP|*+OCzSgD<$_J`Y_9CDtJUqSO%Q&i^rDkd6#P3326G__zC4L42D2av{uKs;`40sz zJc23)+b<~iCm0M~vQhA>Q>bF_#*Kn+!C-Jsh=S*%Z(!&(O+SUEztMqCMp5uh7|g8r z_%y1RIjX-#!NNI5=x6o58BnosP7-=j{2T>4ds^pCZ$kxRlhL@?NS45I7OGZQ?$F#+ z-G)y_QSx0YGBPR6-p8^zu2#2m@iEfZDb3E=SjYYhVx1F&9$DwM6KYjC`M)|D?U81C zS?AtDxnQ?6o2$EdZU?3p?3-p|b-CYwSiyX79C;j*i%w3n_3Cc^66As%V-&kuR|Pt)XQnyV0&>WwR=4x=lO}D>6@U)=)(CUXgF0;XDU&ki?5D$S7-7yi zPY$fq>UQo^F!#x6_ED+hTl)yd8t~4ir;h*A67PJV>iFiYoqFdpR>$A7!o!nO^s}$; uw^1*A=Pa$O4=wS`p||$5oqFcbyK9LDLvJ!_%zEA{(a*^Pp11mp{O?~z7hfX) diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 4228f7e53..119b5d9ff 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -10,7 +10,6 @@ import orjson as json import pytest import responses -from natsort import natsorted from pydantic import ValidationError from darwin.client import Client @@ -24,13 +23,12 @@ from darwin.dataset.remote_dataset_v2 import ( RemoteDatasetV2, _find_files_to_upload_merging, - _find_files_to_upload_no_merging, ) from darwin.dataset.upload_manager import ( ItemMergeMode, LocalFile, - MultiFileItem, UploadHandlerV2, + LAYOUT_SHAPE_MAP, ) from darwin.datatypes import ManifestItem, ObjectStore, SegmentManifest from darwin.exceptions import UnsupportedExportFormat, UnsupportedFileType @@ -358,6 +356,15 @@ def files_content() -> Dict[str, Any]: # assert dataset.release == None +@pytest.fixture() +def setup_zip(): + zip_path = Path("tests/darwin/data/push_test_dir.zip") + with tempfile.TemporaryDirectory() as tmpdir: + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(tmpdir) + yield Path(tmpdir) + + @pytest.mark.usefixtures("file_read_write_test", "create_annotation_file") class TestSplitVideoAnnotations: def test_works_on_videos( @@ -611,14 +618,6 @@ def remote_dataset( @pytest.mark.usefixtures("file_read_write_test") class TestPush: - @pytest.fixture(scope="class") - def setup_zip(self): - zip_path = Path("tests/darwin/data/push_test_dir.zip") - with tempfile.TemporaryDirectory() as tmpdir: - with zipfile.ZipFile(zip_path, "r") as zip_ref: - zip_ref.extractall(tmpdir) - yield Path(tmpdir) - def test_raises_if_files_are_not_provided(self, remote_dataset: RemoteDataset): with pytest.raises(ValueError): remote_dataset.push(None) @@ -691,137 +690,245 @@ def test_raises_if_preserve_folders_with_item_merge_mode( preserve_folders=True, ) - def test_find_files_to_upload_merging_slots(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" - search_files = [base_path / "jpegs", base_path / "dicoms"] + +@pytest.mark.usefixtures("setup_zip") +class TestPushMultiSlotItem: + def test_different_numbers_of_input_files(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "num_files_tests" + directories = [d for d in base_path.iterdir() if d.is_dir()] + for directory in directories: + if directory.name == "0": + with pytest.raises( + ValueError, + match="No valid folders to upload after searching the passed directories for files", + ): + _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="slots" + ) + continue + + local_files, multi_file_items = _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="slots" + ) + num_local_files = len(local_files) + expected_num_files = int(directory.name) + assert len(multi_file_items) == 1 + assert num_local_files == expected_num_files + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 2, + "slots": [str(i) for i in range(num_local_files)], + "type": "grid" if num_local_files >= 4 else "horizontal", + "layout_shape": LAYOUT_SHAPE_MAP.get(num_local_files, [4, 4]), + } + + def test_does_not_recursively_search(self, setup_zip): + directory = setup_zip / "push_test_dir" / "topdir" local_files, multi_file_items = _find_files_to_upload_merging( - search_files, [], 0, "slots" + [directory], [], 0, item_merge_mode="slots" ) - assert len(multi_file_items) == 2 - assert all(isinstance(item, MultiFileItem) for item in multi_file_items) + assert len(multi_file_items) == 1 + assert len(local_files) == 2 + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 2, + "slots": [str(i) for i in range(len(local_files))], + "type": "horizontal", + "layout_shape": [2, 1], + } - def test_find_files_to_upload_merging_series(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" - search_files = [base_path / "dicoms"] + def test_dicoms(self, setup_zip): + directory = setup_zip / "push_test_dir" / "dicom_tests" / "dicoms" local_files, multi_file_items = _find_files_to_upload_merging( - search_files, [], 0, "series" + [directory], [], 0, item_merge_mode="slots" ) assert len(multi_file_items) == 1 - assert all(isinstance(item, MultiFileItem) for item in multi_file_items) + assert len(local_files) == 5 + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 2, + "slots": [str(i) for i in range(len(local_files))], + "type": "grid", + "layout_shape": [3, 2], + } - def test_find_files_to_upload_merging_channels(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" - search_files = [base_path / "jpegs", base_path / "dicoms"] + def test_dicoms_and_other_files(self, setup_zip): + directory = ( + setup_zip / "push_test_dir" / "dicom_tests" / "dicoms_and_other_files" + ) local_files, multi_file_items = _find_files_to_upload_merging( - search_files, [], 0, "channels" + [directory], [], 0, item_merge_mode="slots" ) - assert len(multi_file_items) == 2 + assert len(multi_file_items) == 1 + assert len(local_files) == 10 + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 2, + "slots": [str(i) for i in range(len(local_files))], + "type": "grid", + "layout_shape": [4, 3], + } - def test_find_files_to_upload_merging_does_not_search_recursively(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir2" - search_files = [base_path / "recursive_search"] + def test_multiple_file_types(self, setup_zip): + directory = setup_zip / "push_test_dir" / "multiple_file_types" local_files, multi_file_items = _find_files_to_upload_merging( - search_files, [], 0, "slots" + [directory], [], 0, item_merge_mode="slots" ) assert len(multi_file_items) == 1 - assert len(multi_file_items[0].files) == 2 - - def test_find_files_to_upload_no_merging_searches_recursively(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir2" - search_files = [base_path / "recursive_search"] - local_files = _find_files_to_upload_no_merging( - search_files, - [], - None, - 0, - False, - False, - False, - [], - ) - assert len(local_files) == 11 - assert all(isinstance(file, LocalFile) for file in local_files) - - def test_find_files_to_upload_no_merging_no_files(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir2" - search_files = [base_path / "no_files_1", base_path / "no_files_2"] - with pytest.raises( - ValueError, - match="No files to upload, check your path, exclusion filters and resume flag", - ): - _find_files_to_upload_no_merging( - search_files, - [], - None, - 0, - False, - False, - False, - [], - ) + assert len(local_files) == 12 + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 2, + "slots": [str(i) for i in range(len(local_files))], + "type": "grid", + "layout_shape": [4, 3], + } + +@pytest.mark.usefixtures("setup_zip") +class TestPushDICOMSeries: + def test_dicoms(self, setup_zip): + directory = setup_zip / "push_test_dir" / "dicom_tests" / "dicoms" + local_files, multi_file_items = _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="series" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 5 + assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 2, + "slots": [directory.name], + "type": "grid", + } -class TestMultiFileItem: - @pytest.fixture(scope="class") - def setup_zip(self): - zip_path = Path("tests/darwin/data/push_test_dir.zip") - with tempfile.TemporaryDirectory() as tmpdir: - with zipfile.ZipFile(zip_path, "r") as zip_ref: - zip_ref.extractall(tmpdir) - yield Path(tmpdir) - - def test_create_multi_file_item_slots(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" - files = natsorted(list(base_path.glob("*"))) - item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SLOTS, fps=0) - assert len(item.files) == 6 - assert item.name == "jpegs" - assert item.layout == { + def test_dicoms_and_other_files(self, setup_zip): + directory = ( + setup_zip / "push_test_dir" / "dicom_tests" / "dicoms_and_other_files" + ) + local_files, multi_file_items = _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="series" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 5 + assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { "version": 2, - "slots": ["0", "1", "2", "3", "4", "5"], + "slots": [directory.name], "type": "grid", - "layout_shape": [3, 2], } - def test_create_multi_file_item_series(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" / "dicoms" - files = natsorted(list(base_path.glob("*"))) - item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES, fps=0) - assert len(item.files) == 6 - assert item.name == "dicoms" - assert item.layout == { + def test_multiple_file_types(self, setup_zip): + directory = setup_zip / "push_test_dir" / "multiple_file_types" + local_files, multi_file_items = _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="series" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 3 + assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { "version": 2, - "slots": ["dicoms"], + "slots": [directory.name], "type": "grid", } - def test_create_multi_file_item_channels(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" - files = natsorted(list(base_path.glob("*"))) - item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS, fps=0) - assert len(item.files) == 6 - assert item.name == "jpegs" - assert item.layout == { + +@pytest.mark.usefixtures("setup_zip") +class TestPushMultiChannelItem: + def test_different_numbers_of_input_files(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "num_files_tests" + directories = [d for d in base_path.iterdir() if d.is_dir()] + for directory in directories: + if directory.name == "0": + with pytest.raises( + ValueError, + match="No valid folders to upload after searching the passed directories for files", + ): + _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="channels" + ) + continue + + if directory.name == "17": + with pytest.raises( + ValueError, + match="No multi-channel item can have more than 16 files. The following directory has 17 files: ", + ): + _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="channels" + ) + continue + + local_files, multi_file_items = _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="channels" + ) + num_local_files = len(local_files) + expected_num_files = int(directory.name) + assert len(multi_file_items) == 1 + assert num_local_files == expected_num_files + assert multi_file_items[0].merge_mode == ItemMergeMode.CHANNELS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": [[[file.local_path.name for file in local_files]]], + } + + def test_does_not_recursively_search(self, setup_zip): + directory = setup_zip / "push_test_dir" / "topdir" + local_files, multi_file_items = _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="channels" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 2 + assert multi_file_items[0].merge_mode == ItemMergeMode.CHANNELS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { "version": 3, - "slots_grid": [[["1.JPG", "3.JPG", "4.jpg", "5.JPG", "6.jpg", "7.JPG"]]], + "slots_grid": [[[file.local_path.name for file in local_files]]], } - def test_create_series_no_valid_files(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs" - files = natsorted(list(base_path.glob("*"))) - with pytest.raises( - ValueError, match="No `.dcm` files found in 1st level of directory" - ): - MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES, fps=0) - - def test_create_channels_too_many_files(self, setup_zip): - base_path = setup_zip / "push_test_dir" / "dir2" / "too_many_channels" - files = natsorted(list(base_path.glob("*"))) - with pytest.raises( - ValueError, - match=r"No multi-channel item can have more than 16 files. The following directory has 17 files: .*", - ): - MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS, fps=0) + def test_multiple_file_types(self, setup_zip): + directory = setup_zip / "push_test_dir" / "multiple_file_types" + local_files, multi_file_items = _find_files_to_upload_merging( + [directory], [], 0, item_merge_mode="channels" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 5 + assert multi_file_items[0].merge_mode == ItemMergeMode.CHANNELS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": [[[file.local_path.name for file in local_files]]], + } @pytest.mark.usefixtures("file_read_write_test") From da925540077c8c84cf69309f1fd776eda65b49c2 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Wed, 4 Sep 2024 15:27:15 +0100 Subject: [PATCH 09/12] Improved checking of args that are incompatible with `item_merge_mode` --- darwin/dataset/remote_dataset_v2.py | 15 +++++++++++++-- tests/darwin/dataset/remote_dataset_test.py | 20 +++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 4741c97d9..0fd8d4a6a 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -222,6 +222,12 @@ def push( - If a path is specified when uploading a LocalFile object. - If there are no files to upload (because path is wrong or the exclude filter excludes everything). """ + merge_incompatible_args = { + "preserve_folders": preserve_folders, + "as_frames": as_frames, + "extract_views": extract_views, + } + if files_to_exclude is None: files_to_exclude = [] @@ -235,9 +241,14 @@ def push( raise ValueError( f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels'" ) - if preserve_folders: + incompatible_args = [ + arg for arg, value in merge_incompatible_args.items() if value + ] + + if incompatible_args: + incompatible_args_str = ", ".join(incompatible_args) raise TypeError( - "`item_merge_mode` does not support preserving local file structures with `preserve_folders`" + f"`item_merge_mode` does not support the following incompatible arguments: {incompatible_args_str}." ) # Direct file paths diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 119b5d9ff..b8b86d321 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -680,15 +680,21 @@ def test_raises_if_invalid_item_merge_mode(self, remote_dataset: RemoteDataset): with pytest.raises(ValueError): remote_dataset.push(["path/to/dir"], item_merge_mode="invalid") - def test_raises_if_preserve_folders_with_item_merge_mode( + def test_raises_if_incompatible_args_with_item_merge_mode( self, remote_dataset: RemoteDataset ): - with pytest.raises(TypeError): - remote_dataset.push( - ["path/to/dir"], - item_merge_mode="slots", - preserve_folders=True, - ) + incompatible_args = [ + {"preserve_folders": True}, + {"as_frames": True}, + {"extract_views": True}, + ] + for args in incompatible_args: + with pytest.raises(TypeError): + remote_dataset.push( + ["path/to/dir"], + item_merge_mode="slots", + **args, # type: ignore + ) @pytest.mark.usefixtures("setup_zip") From b5ef828fd83be5baadaf6c23efb3d8bf3f1b4d89 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Fri, 6 Sep 2024 09:39:48 +0100 Subject: [PATCH 10/12] Fix for Windows filepaths --- darwin/dataset/upload_manager.py | 6 +++--- darwin/path_utils.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index c49ad3a40..160207552 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -4,7 +4,7 @@ import time from dataclasses import dataclass from enum import Enum -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import ( TYPE_CHECKING, Any, @@ -93,7 +93,7 @@ def __init__( ): self.dataset_item_id = dataset_item_id self.filename = filename - self.path = path + self.path = PurePosixPath(path).as_posix() self.reasons = reasons self.slots = slots @@ -589,7 +589,7 @@ def upload_function( for item in self.pending_items: for slot in item.slots: upload_id = slot["upload_id"] - slot_path = Path(item.path) / Path((slot["file_name"])) + slot_path = (Path(item.path) / Path((slot["file_name"]))).as_posix() file = file_lookup.get(str(slot_path)) if not file: raise ValueError( diff --git a/darwin/path_utils.py b/darwin/path_utils.py index a25c056a2..904c41fe7 100644 --- a/darwin/path_utils.py +++ b/darwin/path_utils.py @@ -24,7 +24,7 @@ def construct_full_path(remote_path: Optional[str], filename: str) -> str: if remote_path is None: return filename else: - return (PurePosixPath("/") / remote_path / filename).as_posix() + return PurePosixPath("/", remote_path, filename).as_posix() def deconstruct_full_path(filename: str) -> Tuple[str, str]: From 10826c174fd7066f5a33b4bab125b6a88695f764 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Fri, 13 Sep 2024 17:31:38 +0100 Subject: [PATCH 11/12] Always use LayoutV3 + misc improvements --- darwin/cli_functions.py | 9 ++- darwin/dataset/remote_dataset_v2.py | 51 +++++++----- darwin/dataset/upload_manager.py | 45 +++++------ darwin/dataset/utils.py | 1 - darwin/utils/utils.py | 62 ++++++++++++++ tests/darwin/cli_functions_test.py | 5 +- tests/darwin/dataset/remote_dataset_test.py | 89 ++++++++++----------- tests/darwin/dataset/upload_manager_test.py | 5 +- tests/darwin/utils/flatten_list_test.py | 2 - 9 files changed, 165 insertions(+), 104 deletions(-) diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index 682e502ca..d352c78e0 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -70,6 +70,7 @@ prompt, secure_continue_request, validate_file_against_schema, + BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS, ) @@ -800,7 +801,7 @@ def file_upload_callback( for item in upload_manager.blocked_items: for slot in item.slots: if (slot["reason"] is not None) and ( - slot["reason"].upper() == "ALREADY_EXISTS" + slot["reason"].upper() == BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS ): already_existing_items.append(item) else: @@ -832,10 +833,11 @@ def file_upload_callback( show_header=True, header_style="bold cyan", ) - for item in upload_manager.blocked_items: for slot in item.slots: - if slot["reason"] != "ALREADY_EXISTS": + if (slot["reason"] is not None) and ( + slot["reason"].upper() != BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS + ): error_table.add_row( str(item.dataset_item_id), item.filename, @@ -843,7 +845,6 @@ def file_upload_callback( "UPLOAD_REQUEST", slot["reason"], ) - for error in upload_manager.errors: for local_file in upload_manager.local_files: if local_file.local_path != error.file_path: diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 0fd8d4a6a..b55d60d6d 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -45,7 +45,14 @@ from darwin.exporter.formats.darwin import build_image_annotation from darwin.item import DatasetItem from darwin.item_sorter import ItemSorter -from darwin.utils import SUPPORTED_EXTENSIONS, find_files, urljoin +from darwin.utils import ( + SUPPORTED_EXTENSIONS, + PRESERVE_FOLDERS_KEY, + AS_FRAMES_KEY, + EXTRACT_VIEWS_KEY, + find_files, + urljoin, +) if TYPE_CHECKING: from darwin.client import Client @@ -223,9 +230,9 @@ def push( - If there are no files to upload (because path is wrong or the exclude filter excludes everything). """ merge_incompatible_args = { - "preserve_folders": preserve_folders, - "as_frames": as_frames, - "extract_views": extract_views, + PRESERVE_FOLDERS_KEY: preserve_folders, + AS_FRAMES_KEY: as_frames, + EXTRACT_VIEWS_KEY: extract_views, } if files_to_exclude is None: @@ -251,32 +258,28 @@ def push( f"`item_merge_mode` does not support the following incompatible arguments: {incompatible_args_str}." ) - # Direct file paths - local_files = [item for item in files_to_upload if isinstance(item, LocalFile)] - # Folder paths search_files = [ item for item in files_to_upload if not isinstance(item, LocalFile) ] if item_merge_mode: - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( search_files, files_to_exclude, fps, item_merge_mode ) handler = UploadHandlerV2(self, local_files, multi_file_items) else: - local_files = _find_files_to_upload_no_merging( + local_files = _find_files_to_upload_as_single_file_items( search_files, + files_to_upload, files_to_exclude, path, fps, as_frames, extract_views, preserve_folders, - local_files, ) handler = UploadHandlerV2(self, local_files) - if blocking: handler.upload( max_workers=max_workers, @@ -864,18 +867,14 @@ def register_multi_slotted( return results -def _find_files_to_upload_merging( +def _find_files_to_upload_as_multi_file_items( search_files: List[PathLike], files_to_exclude: List[PathLike], fps: int, item_merge_mode: str, ) -> Tuple[List[LocalFile], List[MultiFileItem]]: """ - Finds files to upload as either: - - Multi-slotted items - - Multi-channel items - - Single-slotted items containing multiple `.dcm` files - + Finds files to upload according to the `item_merge_mode`. Does not search each directory recursively, only considers files in the first level of each directory. Parameters @@ -924,15 +923,15 @@ def _find_files_to_upload_merging( return local_files, multi_file_items -def _find_files_to_upload_no_merging( +def _find_files_to_upload_as_single_file_items( search_files: List[PathLike], + files_to_upload: Optional[Sequence[Union[PathLike, LocalFile]]], files_to_exclude: List[PathLike], path: Optional[str], fps: int, as_frames: bool, extract_views: bool, preserve_folders: bool, - uploading_files: List[LocalFile], ) -> List[LocalFile]: """ Finds files to upload as single-slotted dataset items. Recursively searches the passed directories for files. @@ -941,8 +940,11 @@ def _find_files_to_upload_no_merging( ---------- search_files : List[PathLike] List of directories to search for files. + files_to_exclude : Optional[List[PathLike]] List of files to exclude from the file scan. + files_to_upload : Optional[List[Union[PathLike, LocalFile]]] + List of files to upload. These can be folders. path : Optional[str] Path to store the files in. fps: int @@ -953,18 +955,23 @@ def _find_files_to_upload_no_merging( When uploading volume files, specify whether to split into orthogonal views. preserve_folders: bool Specify whether or not to preserve folder paths when uploading. - uploading_files : List[LocalFile] - List of files to upload. Returns ------- List[LocalFile] List of files to upload. """ + # Direct file paths + uploading_files = [item for item in files_to_upload if isinstance(item, LocalFile)] + generic_parameters_specified = ( path is not None or fps != 0 or as_frames is not False ) - if uploading_files and generic_parameters_specified: + + if ( + any(isinstance(item, LocalFile) for item in uploading_files) + and generic_parameters_specified + ): raise ValueError("Cannot specify a path when uploading a LocalFile object.") for found_file in find_files(search_files, files_to_exclude=files_to_exclude): diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index 160207552..83b56d0d3 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -15,6 +15,7 @@ Optional, Set, Tuple, + Dict, ) import requests @@ -23,7 +24,7 @@ from darwin.doc_enum import DocEnum from darwin.path_utils import construct_full_path from darwin.utils import chunk -from darwin.utils.utils import is_image_extension_allowed_by_filename +from darwin.utils.utils import is_image_extension_allowed_by_filename, SLOTS_GRID_MAP if TYPE_CHECKING: from darwin.client import Client @@ -31,7 +32,7 @@ from darwin.dataset.identifier import DatasetIdentifier from abc import ABC, abstractmethod -from typing import Dict + LAYOUT_SHAPE_MAP = { 1: [1, 1], @@ -219,14 +220,11 @@ def __init__( self.name = directory.name self.files = [LocalFile(file, fps=fps) for file in files] self.merge_mode = merge_mode - self.layout = self._create_layout() + self._create_layout() def _create_layout(self): """ - Creates the layout to be used when uploading the files as a dataset item: - - For multi-slotted items: LayoutV2 - - For series items: LayoutV2, but only with `.dcm` files - - For multi-channel items: LayoutV3 + Sets the layout as a LayoutV3 object to be used when uploading the files as a dataset item. Raises ------ @@ -234,25 +232,25 @@ def _create_layout(self): - If no DICOM files are found in the directory for `ItemMergeMode.SERIES` items - If the number of files is greater than 16 for `ItemMergeMode.CHANNELS` items """ + self.slot_names = [] if self.merge_mode == ItemMergeMode.SLOTS: - num_files = len(self.files) - layout_shape = LAYOUT_SHAPE_MAP.get(num_files, [4, 4]) - return { - "version": 2, - "slots": [str(i) for i in range(num_files)], - "type": "grid" if num_files >= 4 else "horizontal", - "layout_shape": layout_shape, + num_viewports = min(len(self.files), 16) + slots_grid = SLOTS_GRID_MAP.get(num_viewports) + self.layout = { + "version": 3, + "slots_grid": slots_grid, } + self.slot_names = [str(i) for i in range(len(self.files))] elif self.merge_mode == ItemMergeMode.SERIES: self.files = [ file for file in self.files if file.local_path.suffix.lower() == ".dcm" ] if not self.files: raise ValueError("No `.dcm` files found in 1st level of directory") - return { - "version": 2, - "slots": [self.name], - "type": "grid", + self.slot_names = [self.name] * len(self.files) + self.layout = { + "version": 3, + "slots_grid": [[[self.name]]], } elif self.merge_mode == ItemMergeMode.CHANNELS: # Currently, only image files are supported in multi-channel items. This is planned to change in the future @@ -269,24 +267,19 @@ def _create_layout(self): raise ValueError( f"No multi-channel item can have more than 16 files. The following directory has {len(self.files)} files: {self.directory}" ) - return { + self.layout = { "version": 3, "slots_grid": [[[file.local_path.name for file in self.files]]], } + self.slot_names = self.layout["slots_grid"][0][0] def serialize_v2(self): optional_properties = ["fps"] slots = [] - if self.merge_mode == ItemMergeMode.SLOTS: - slot_names = self.layout["slots"] - elif self.merge_mode == ItemMergeMode.SERIES: - slot_names = [self.name] * len(self.files) - elif self.merge_mode == ItemMergeMode.CHANNELS: - slot_names = self.layout["slots_grid"][0][0] for idx, local_file in enumerate(self.files): slot = { "file_name": local_file.data["filename"], - "slot_name": slot_names[idx], + "slot_name": self.slot_names[idx], } for optional_property in optional_properties: if optional_property in local_file.data: diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py index e948c11b2..6c8d75441 100644 --- a/darwin/dataset/utils.py +++ b/darwin/dataset/utils.py @@ -11,7 +11,6 @@ import darwin.datatypes as dt -# from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2 from darwin.datatypes import PathLike from darwin.exceptions import NotFound from darwin.importer.formats.darwin import parse_path diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py index f2cbd8d85..f314e936b 100644 --- a/darwin/utils/utils.py +++ b/darwin/utils/utils.py @@ -74,6 +74,68 @@ ] SUPPORTED_EXTENSIONS = SUPPORTED_IMAGE_EXTENSIONS + SUPPORTED_VIDEO_EXTENSIONS +# Define incompatible `item_merge_mode` arguments +PRESERVE_FOLDERS_KEY = "preserve_folders" +AS_FRAMES_KEY = "as_frames" +EXTRACT_VIEWS_KEY = "extract_views" + +# Define reasons for blocking slot uploads +BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS = "ALREADY_EXISTS" +BLOCKED_UPLOAD_ERROR_FILE_UPLOAD_TIMEOUT = "FILE_UPLOAD_TIMEOUT" +BLOCKED_UPLOAD_ERROR_FILE_UPLOAD_FAILED = "FILE_UPLOAD_FAILED" +BLOCKED_UPLOAD_ERROR_UNEXPECTED_ERROR = "UNEXPECTED_ERROR" +BLOCKED_UPLOAD_ERROR_ITEM_COUNT_LIMIT_EXCEEDED = "ITEM_COUNT_LIMIT_EXCEEDED" + +SLOTS_GRID_MAP = { + 1: [[["0"]]], + 2: [[["0"]], [["1"]]], + 3: [[["0"]], [["1"]], [["2"]]], + 4: [[["0"], ["2"]], [["1"], ["3"]]], + 5: [[["0"], ["3"]], [["1"], ["4"]], [["2"]]], + 6: [[["0"], ["3"]], [["1"], ["4"]], [["2"], ["5"]]], + 7: [[["0"], ["3"], ["6"]], [["1"], ["4"]], [["2"], ["5"]]], + 8: [[["0"], ["3"], ["6"]], [["1"], ["4"], ["7"]], [["2"], ["5"]]], + 9: [[["0"], ["3"], ["6"]], [["1"], ["4"], ["7"]], [["2"], ["5"], ["8"]]], + 10: [[["0"], ["4"], ["8"]], [["1"], ["5"], ["9"]], [["2"], ["6"]], [["3"], ["7"]]], + 11: [ + [["0"], ["4"], ["8"]], + [["1"], ["5"], ["9"]], + [["2"], ["6"], ["10"]], + [["3"], ["7"]], + ], + 12: [ + [["0"], ["4"], ["8"]], + [["1"], ["5"], ["9"]], + [["2"], ["6"], ["10"]], + [["3"], ["7"], ["11"]], + ], + 13: [ + [["0"], ["4"], ["8"], ["12"]], + [["1"], ["5"], ["9"]], + [["2"], ["6"], ["10"]], + [["3"], ["7"], ["11"]], + ], + 14: [ + [["0"], ["4"], ["8"], ["12"]], + [["1"], ["5"], ["9"], ["13"]], + [["2"], ["6"], ["10"]], + [["3"], ["7"], ["11"]], + ], + 15: [ + [["0"], ["4"], ["8"], ["12"]], + [["1"], ["5"], ["9"], ["13"]], + [["2"], ["6"], ["10"], ["14"]], + [["3"], ["7"], ["11"]], + ], + 16: [ + [["0"], ["4"], ["8"], ["12"]], + [["1"], ["5"], ["9"], ["13"]], + [["2"], ["6"], ["10"], ["14"]], + [["3"], ["7"], ["11"], ["15"]], + ], +} + + _darwin_schema_cache = {} diff --git a/tests/darwin/cli_functions_test.py b/tests/darwin/cli_functions_test.py index 7ddf1a78f..6f5c0fe9e 100644 --- a/tests/darwin/cli_functions_test.py +++ b/tests/darwin/cli_functions_test.py @@ -12,6 +12,7 @@ from darwin.dataset import RemoteDataset from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2 from tests.fixtures import * +from darwin.utils import BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS @pytest.fixture @@ -57,7 +58,7 @@ def test_default_non_verbose( { "type": "image", "file_name": "test_1.jpg", - "reason": "ALREADY_EXISTS", + "reason": BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS, "slot_name": "0", "upload_id": "123e4567-e89b-12d3-a456-426614174000", "as_frames": False, @@ -160,7 +161,7 @@ def test_with_verbose_flag( { "type": "image", "file_name": "test_1.jpg", - "reason": "ALREADY_EXISTS", + "reason": BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS, "slot_name": "0", "upload_id": "123e4567-e89b-12d3-a456-426614174000", "as_frames": False, diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index b8b86d321..3eb4f7acb 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -22,14 +22,14 @@ from darwin.dataset.release import Release, ReleaseStatus from darwin.dataset.remote_dataset_v2 import ( RemoteDatasetV2, - _find_files_to_upload_merging, + _find_files_to_upload_as_multi_file_items, ) from darwin.dataset.upload_manager import ( ItemMergeMode, LocalFile, UploadHandlerV2, - LAYOUT_SHAPE_MAP, ) +from darwin.utils.utils import SLOTS_GRID_MAP from darwin.datatypes import ManifestItem, ObjectStore, SegmentManifest from darwin.exceptions import UnsupportedExportFormat, UnsupportedFileType from darwin.item import DatasetItem @@ -708,34 +708,38 @@ def test_different_numbers_of_input_files(self, setup_zip): ValueError, match="No valid folders to upload after searching the passed directories for files", ): - _find_files_to_upload_merging( + _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="slots" ) continue - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="slots" ) num_local_files = len(local_files) expected_num_files = int(directory.name) + num_viewports = min(num_local_files, 16) assert len(multi_file_items) == 1 assert num_local_files == expected_num_files assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS assert multi_file_items[0].files == local_files assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name + assert multi_file_items[0].slot_names == [ + str(i) for i in range(num_local_files) + ] assert multi_file_items[0].layout == { - "version": 2, - "slots": [str(i) for i in range(num_local_files)], - "type": "grid" if num_local_files >= 4 else "horizontal", - "layout_shape": LAYOUT_SHAPE_MAP.get(num_local_files, [4, 4]), + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), } def test_does_not_recursively_search(self, setup_zip): directory = setup_zip / "push_test_dir" / "topdir" - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="slots" ) + num_local_files = len(local_files) + num_viewports = min(num_local_files, 16) assert len(multi_file_items) == 1 assert len(local_files) == 2 assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS @@ -743,17 +747,17 @@ def test_does_not_recursively_search(self, setup_zip): assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name assert multi_file_items[0].layout == { - "version": 2, - "slots": [str(i) for i in range(len(local_files))], - "type": "horizontal", - "layout_shape": [2, 1], + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), } def test_dicoms(self, setup_zip): directory = setup_zip / "push_test_dir" / "dicom_tests" / "dicoms" - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="slots" ) + num_local_files = len(local_files) + num_viewports = min(num_local_files, 16) assert len(multi_file_items) == 1 assert len(local_files) == 5 assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS @@ -761,19 +765,19 @@ def test_dicoms(self, setup_zip): assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name assert multi_file_items[0].layout == { - "version": 2, - "slots": [str(i) for i in range(len(local_files))], - "type": "grid", - "layout_shape": [3, 2], + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), } def test_dicoms_and_other_files(self, setup_zip): directory = ( setup_zip / "push_test_dir" / "dicom_tests" / "dicoms_and_other_files" ) - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="slots" ) + num_local_files = len(local_files) + num_viewports = min(num_local_files, 16) assert len(multi_file_items) == 1 assert len(local_files) == 10 assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS @@ -781,17 +785,17 @@ def test_dicoms_and_other_files(self, setup_zip): assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name assert multi_file_items[0].layout == { - "version": 2, - "slots": [str(i) for i in range(len(local_files))], - "type": "grid", - "layout_shape": [4, 3], + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), } def test_multiple_file_types(self, setup_zip): directory = setup_zip / "push_test_dir" / "multiple_file_types" - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="slots" ) + num_local_files = len(local_files) + num_viewports = min(num_local_files, 16) assert len(multi_file_items) == 1 assert len(local_files) == 12 assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS @@ -799,10 +803,8 @@ def test_multiple_file_types(self, setup_zip): assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name assert multi_file_items[0].layout == { - "version": 2, - "slots": [str(i) for i in range(len(local_files))], - "type": "grid", - "layout_shape": [4, 3], + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), } @@ -810,7 +812,7 @@ def test_multiple_file_types(self, setup_zip): class TestPushDICOMSeries: def test_dicoms(self, setup_zip): directory = setup_zip / "push_test_dir" / "dicom_tests" / "dicoms" - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="series" ) assert len(multi_file_items) == 1 @@ -820,16 +822,15 @@ def test_dicoms(self, setup_zip): assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name assert multi_file_items[0].layout == { - "version": 2, - "slots": [directory.name], - "type": "grid", + "version": 3, + "slots_grid": [[["dicoms"]]], } def test_dicoms_and_other_files(self, setup_zip): directory = ( setup_zip / "push_test_dir" / "dicom_tests" / "dicoms_and_other_files" ) - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="series" ) assert len(multi_file_items) == 1 @@ -839,14 +840,13 @@ def test_dicoms_and_other_files(self, setup_zip): assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name assert multi_file_items[0].layout == { - "version": 2, - "slots": [directory.name], - "type": "grid", + "version": 3, + "slots_grid": [[["dicoms_and_other_files"]]], } def test_multiple_file_types(self, setup_zip): directory = setup_zip / "push_test_dir" / "multiple_file_types" - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="series" ) assert len(multi_file_items) == 1 @@ -856,9 +856,8 @@ def test_multiple_file_types(self, setup_zip): assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name assert multi_file_items[0].layout == { - "version": 2, - "slots": [directory.name], - "type": "grid", + "version": 3, + "slots_grid": [[["multiple_file_types"]]], } @@ -873,7 +872,7 @@ def test_different_numbers_of_input_files(self, setup_zip): ValueError, match="No valid folders to upload after searching the passed directories for files", ): - _find_files_to_upload_merging( + _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="channels" ) continue @@ -883,12 +882,12 @@ def test_different_numbers_of_input_files(self, setup_zip): ValueError, match="No multi-channel item can have more than 16 files. The following directory has 17 files: ", ): - _find_files_to_upload_merging( + _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="channels" ) continue - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="channels" ) num_local_files = len(local_files) @@ -906,7 +905,7 @@ def test_different_numbers_of_input_files(self, setup_zip): def test_does_not_recursively_search(self, setup_zip): directory = setup_zip / "push_test_dir" / "topdir" - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="channels" ) assert len(multi_file_items) == 1 @@ -922,7 +921,7 @@ def test_does_not_recursively_search(self, setup_zip): def test_multiple_file_types(self, setup_zip): directory = setup_zip / "push_test_dir" / "multiple_file_types" - local_files, multi_file_items = _find_files_to_upload_merging( + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( [directory], [], 0, item_merge_mode="channels" ) assert len(multi_file_items) == 1 diff --git a/tests/darwin/dataset/upload_manager_test.py b/tests/darwin/dataset/upload_manager_test.py index a6ca355fb..7c0a0199a 100644 --- a/tests/darwin/dataset/upload_manager_test.py +++ b/tests/darwin/dataset/upload_manager_test.py @@ -16,6 +16,7 @@ _upload_chunk_size, ) from tests.fixtures import * +from darwin.utils import BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS @pytest.fixture @@ -123,7 +124,7 @@ def test_blocked_count_is_correct(dataset: RemoteDataset, request_upload_endpoin { "type": "image", "file_name": "test.jpg", - "reason": "ALREADY_EXISTS", + "reason": BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS, "slot_name": "0", "upload_id": "123e4567-e89b-12d3-a456-426614174000", "as_frames": False, @@ -149,7 +150,7 @@ def test_blocked_count_is_correct(dataset: RemoteDataset, request_upload_endpoin assert blocked_item.dataset_item_id == "3b241101-e2bb-4255-8caf-4136c566a964" assert blocked_item.filename == "test.jpg" assert blocked_item.path == "/" - assert blocked_item.reasons[0] == "ALREADY_EXISTS" + assert blocked_item.reasons[0] == BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS @pytest.mark.usefixtures("file_read_write_test") diff --git a/tests/darwin/utils/flatten_list_test.py b/tests/darwin/utils/flatten_list_test.py index 41ab75b56..62d321134 100644 --- a/tests/darwin/utils/flatten_list_test.py +++ b/tests/darwin/utils/flatten_list_test.py @@ -28,6 +28,4 @@ def test_returns_flattened_list_if_passed_nested_list_with_different_depth() -> if __name__ == "__main__": import sys - import pytest - sys.exit(pytest.main(["-v", "-x", __file__])) From 09c623e04eb79707099760e1db6f87cdadc4d29b Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Tue, 17 Sep 2024 11:10:24 +0100 Subject: [PATCH 12/12] Better API response typing + other misc changes --- darwin/cli_functions.py | 10 ++-- darwin/dataset/download_manager.py | 2 +- darwin/dataset/upload_manager.py | 52 ++++++++----------- darwin/datatypes.py | 17 +++++- darwin/importer/formats/nifti.py | 2 +- darwin/utils/utils.py | 16 +++--- tests/darwin/dataset/download_manager_test.py | 20 +++---- tests/darwin/dataset/upload_manager_test.py | 4 +- 8 files changed, 66 insertions(+), 57 deletions(-) diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index d352c78e0..32f996366 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -800,8 +800,8 @@ def file_upload_callback( other_skipped_items = [] for item in upload_manager.blocked_items: for slot in item.slots: - if (slot["reason"] is not None) and ( - slot["reason"].upper() == BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS + if (slot.reason is not None) and ( + slot.reason.upper() == BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS ): already_existing_items.append(item) else: @@ -835,15 +835,15 @@ def file_upload_callback( ) for item in upload_manager.blocked_items: for slot in item.slots: - if (slot["reason"] is not None) and ( - slot["reason"].upper() != BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS + if (slot.reason is not None) and ( + slot.reason.upper() != BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS ): error_table.add_row( str(item.dataset_item_id), item.filename, item.path, "UPLOAD_REQUEST", - slot["reason"], + slot.reason, ) for error in upload_manager.errors: for local_file in upload_manager.local_files: diff --git a/darwin/dataset/download_manager.py b/darwin/dataset/download_manager.py index 752f37579..2fc0d061e 100644 --- a/darwin/dataset/download_manager.py +++ b/darwin/dataset/download_manager.py @@ -672,7 +672,7 @@ def _get_planned_image_paths( for slot in annotation.slots: slot_name = Path(slot.name) for source_file in slot.source_files: - file_name = source_file["file_name"] + file_name = source_file.file_name if use_folders and annotation.remote_path != "/": file_paths.append( images_path diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index 83b56d0d3..d47342276 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -20,7 +20,7 @@ import requests -from darwin.datatypes import PathLike +from darwin.datatypes import PathLike, Slot, SourceFile from darwin.doc_enum import DocEnum from darwin.path_utils import construct_full_path from darwin.utils import chunk @@ -34,22 +34,6 @@ from abc import ABC, abstractmethod -LAYOUT_SHAPE_MAP = { - 1: [1, 1], - 2: [2, 1], - 3: [3, 1], - 4: [2, 2], - 5: [3, 2], - 6: [3, 2], - 7: [4, 2], - 8: [4, 2], - 9: [3, 3], - 10: [4, 3], - 11: [4, 3], - 12: [4, 3], -} - - class ItemMergeMode(Enum): SLOTS = "slots" SERIES = "series" @@ -79,8 +63,6 @@ class ItemPayload: The filename of where this ``ItemPayload``'s data is. path : str The path to ``filename``. - reasons : Optional[List[str]], default: None - A per-slot reason to upload this ``ItemPayload``. """ def __init__( @@ -90,13 +72,21 @@ def __init__( filename: str, path: str, reasons: Optional[List[str]] = None, - slots: Optional[any] = None, + slots: List[Dict[str, str]], ): self.dataset_item_id = dataset_item_id self.filename = filename self.path = PurePosixPath(path).as_posix() - self.reasons = reasons - self.slots = slots + self.slots = [ + Slot( + type=slot["type"], + source_files=[SourceFile(file_name=slot["file_name"])], + name=slot["slot_name"], + upload_id=slot["upload_id"] if "upload_id" in slot else None, + reason=slot["reason"] if "reason" in slot else None, + ) + for slot in slots + ] @staticmethod def parse_v2(payload): @@ -193,7 +183,7 @@ def serialize(self): "name": self.data["filename"], } - def serialize_v2(self): + def serialize_darwin_json_v2(self): optional_properties = ["tags", "fps", "as_frames", "extract_views"] slot = {"file_name": self.data["filename"], "slot_name": "0"} for optional_property in optional_properties: @@ -235,7 +225,7 @@ def _create_layout(self): self.slot_names = [] if self.merge_mode == ItemMergeMode.SLOTS: num_viewports = min(len(self.files), 16) - slots_grid = SLOTS_GRID_MAP.get(num_viewports) + slots_grid = SLOTS_GRID_MAP[num_viewports] self.layout = { "version": 3, "slots_grid": slots_grid, @@ -273,7 +263,7 @@ def _create_layout(self): } self.slot_names = self.layout["slots_grid"][0][0] - def serialize_v2(self): + def serialize_darwin_json_v2(self): optional_properties = ["fps"] slots = [] for idx, local_file in enumerate(self.files): @@ -534,7 +524,9 @@ def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: upload_payloads.extend( [ { - "items": [file.serialize_v2() for file in file_chunk], + "items": [ + file.serialize_darwin_json_v2() for file in file_chunk + ], "options": {"ignore_dicom_layout": True}, } for file_chunk in chunk(self.multi_file_items, chunk_size) @@ -553,7 +545,7 @@ def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: upload_payloads.extend( [ - {"items": [file.serialize_v2() for file in file_chunk]} + {"items": [file.serialize_darwin_json_v2() for file in file_chunk]} for file_chunk in chunk(single_file_items, chunk_size) ] ) @@ -581,8 +573,10 @@ def upload_function( file_lookup = {file.full_path: file for file in self.local_files} for item in self.pending_items: for slot in item.slots: - upload_id = slot["upload_id"] - slot_path = (Path(item.path) / Path((slot["file_name"]))).as_posix() + upload_id = slot.upload_id + slot_path = ( + Path(item.path) / Path((slot.source_files[0].file_name)) + ).as_posix() file = file_lookup.get(str(slot_path)) if not file: raise ValueError( diff --git a/darwin/datatypes.py b/darwin/datatypes.py index c2ef99d26..2f75ef8d0 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -361,7 +361,7 @@ class Slot: type: str #: Original upload information for the slot - source_files: List[Dict[str, str]] + source_files: List[SourceFile] #: Thumbnail url to the file thumbnail_url: Optional[str] = None @@ -390,6 +390,21 @@ class Slot: #: Segments for video slots segments: Optional[List[Dict[str, UnknownType]]] = None + #: Upload ID + upload_id: Optional[str] = None + + #: The reason for blocking upload of this slot, if it was blocked + reason: Optional[str] = None + + +@dataclass +class SourceFile: + #: File name of source file + file_name: str + + #: URL of file + url: Optional[str] = None + @dataclass class AnnotationFileVersion: diff --git a/darwin/importer/formats/nifti.py b/darwin/importer/formats/nifti.py index e871b2532..a86a70d89 100644 --- a/darwin/importer/formats/nifti.py +++ b/darwin/importer/formats/nifti.py @@ -170,7 +170,7 @@ def _parse_nifti( dt.Slot( name=slot_name, type="dicom", - source_files=[{"url": None, "file_name": str(filename)}], + source_files=[dt.SourceFile(file_name=str(filename), url=None)], ) for slot_name in slot_names ], diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py index dcbc068e3..ba95198c3 100644 --- a/darwin/utils/utils.py +++ b/darwin/utils/utils.py @@ -658,10 +658,10 @@ def _parse_darwin_image( name=None, type="image", source_files=[ - { - "url": data["image"].get("url"), - "file_name": _get_local_filename(data["image"]), - } + dt.SourceFile( + file_name=_get_local_filename(data["image"]), + url=data["image"].get("url"), + ) ], thumbnail_url=data["image"].get("thumbnail_url"), width=data["image"].get("width"), @@ -708,10 +708,10 @@ def _parse_darwin_video( name=None, type="video", source_files=[ - { - "url": data["image"].get("url"), - "file_name": _get_local_filename(data["image"]), - } + dt.SourceFile( + file_name=_get_local_filename(data["image"]), + url=data["image"].get("url"), + ) ], thumbnail_url=data["image"].get("thumbnail_url"), width=data["image"].get("width"), diff --git a/tests/darwin/dataset/download_manager_test.py b/tests/darwin/dataset/download_manager_test.py index 06d9045d3..ee0f5380c 100644 --- a/tests/darwin/dataset/download_manager_test.py +++ b/tests/darwin/dataset/download_manager_test.py @@ -6,7 +6,7 @@ import responses from darwin.dataset import download_manager as dm -from darwin.datatypes import AnnotationClass, AnnotationFile, Slot +from darwin.datatypes import AnnotationClass, AnnotationFile, Slot, SourceFile from tests.fixtures import * @@ -89,7 +89,7 @@ def test_single_slot_without_folders_planned_image_paths(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name.jpg"}], + source_files=[SourceFile(file_name="source_name.jpg")], ) ], remote_path="/", @@ -112,7 +112,7 @@ def test_single_slot_with_folders_planned_image_paths(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name.jpg"}], + source_files=[SourceFile(file_name="source_name.jpg")], ) ], remote_path="/remote/path", @@ -135,12 +135,12 @@ def test_multi_slot_without_folders_planned_image_paths(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name_1.jpg"}], + source_files=[SourceFile(file_name="source_name_1.jpg")], ), Slot( name="slot2", type="image", - source_files=[{"file_name": "source_name_2.jpg"}], + source_files=[SourceFile(file_name="source_name_2.jpg")], ), ], remote_path="/", @@ -166,12 +166,12 @@ def test_multi_slot_with_folders_planned_image_path(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name_1.jpg"}], + source_files=[SourceFile(file_name="source_name_1.jpg")], ), Slot( name="slot2", type="image", - source_files=[{"file_name": "source_name_2.jpg"}], + source_files=[SourceFile(file_name="source_name_2.jpg")], ), ], remote_path="/remote/path", @@ -197,7 +197,7 @@ def test_single_slot_root_path_with_folders_planned_image_paths(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name.jpg"}], + source_files=[SourceFile(file_name="source_name.jpg")], ) ], remote_path="/", @@ -221,8 +221,8 @@ def test_multiple_source_files_planned_image_paths(): name="slot1", type="image", source_files=[ - {"file_name": "source_name_1.jpg"}, - {"file_name": "source_name_2.jpg"}, + SourceFile(file_name="source_name_1.jpg"), + SourceFile(file_name="source_name_2.jpg"), ], ) ], diff --git a/tests/darwin/dataset/upload_manager_test.py b/tests/darwin/dataset/upload_manager_test.py index 7c0a0199a..ca695232a 100644 --- a/tests/darwin/dataset/upload_manager_test.py +++ b/tests/darwin/dataset/upload_manager_test.py @@ -108,7 +108,7 @@ def test_pending_count_is_correct(dataset: RemoteDataset, request_upload_endpoin assert pending_item.dataset_item_id == "3b241101-e2bb-4255-8caf-4136c566a964" assert pending_item.filename == "test.jpg" assert pending_item.path == "/" - assert pending_item.reasons[0] is None + assert pending_item.slots[0].reason is None @pytest.mark.usefixtures("file_read_write_test") @@ -150,7 +150,7 @@ def test_blocked_count_is_correct(dataset: RemoteDataset, request_upload_endpoin assert blocked_item.dataset_item_id == "3b241101-e2bb-4255-8caf-4136c566a964" assert blocked_item.filename == "test.jpg" assert blocked_item.path == "/" - assert blocked_item.reasons[0] == BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS + assert blocked_item.slots[0].reason == BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS @pytest.mark.usefixtures("file_read_write_test")