diff --git a/darwin/cli.py b/darwin/cli.py index bf493341a..c48f55e38 100644 --- a/darwin/cli.py +++ b/darwin/cli.py @@ -126,6 +126,7 @@ def _run(args: Namespace, parser: ArgumentParser) -> None: args.extract_views, args.preserve_folders, args.verbose, + args.item_merge_mode, ) # Remove a project (remotely) elif args.action == "remove": diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index 5d01d7a9d..32f996366 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -70,6 +70,7 @@ prompt, secure_continue_request, validate_file_against_schema, + BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS, ) @@ -656,6 +657,7 @@ def upload_data( extract_views: bool = False, preserve_folders: bool = False, verbose: bool = False, + item_merge_mode: Optional[str] = None, ) -> None: """ Uploads the provided files to the remote dataset. @@ -684,6 +686,14 @@ def upload_data( Specify whether or not to preserve folder paths when uploading. verbose : bool Specify whether to have full traces print when uploading files or not. + item_merge_mode : Optional[str] + If set, each file path passed to `files_to_upload` behaves as follows: + - Paths pointing directly to individual files are ignored + - Paths pointing to folders of files will be uploaded according to the following mode rules. + Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded: + - "slots": Each file in the folder will be uploaded to a different slot of the same item. + - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored. + - "channels": Each file in the folder will be uploaded to a different channel of the same item. """ client: Client = _load_client() try: @@ -773,6 +783,7 @@ def file_upload_callback( preserve_folders=preserve_folders, progress_callback=progress_callback, file_upload_callback=file_upload_callback, + item_merge_mode=item_merge_mode, ) console = Console(theme=_console_theme()) @@ -788,10 +799,13 @@ def file_upload_callback( already_existing_items = [] other_skipped_items = [] for item in upload_manager.blocked_items: - if (item.reason is not None) and (item.reason.upper() == "ALREADY_EXISTS"): - already_existing_items.append(item) - else: - other_skipped_items.append(item) + for slot in item.slots: + if (slot.reason is not None) and ( + slot.reason.upper() == BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS + ): + already_existing_items.append(item) + else: + other_skipped_items.append(item) if already_existing_items: console.print( @@ -819,17 +833,18 @@ def file_upload_callback( show_header=True, header_style="bold cyan", ) - for item in upload_manager.blocked_items: - if item.reason != "ALREADY_EXISTS": - error_table.add_row( - str(item.dataset_item_id), - item.filename, - item.path, - "UPLOAD_REQUEST", - item.reason, - ) - + for slot in item.slots: + if (slot.reason is not None) and ( + slot.reason.upper() != BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS + ): + error_table.add_row( + str(item.dataset_item_id), + item.filename, + item.path, + "UPLOAD_REQUEST", + slot.reason, + ) for error in upload_manager.errors: for local_file in upload_manager.local_files: if local_file.local_path != error.file_path: @@ -855,8 +870,8 @@ def file_upload_callback( _error(f"No dataset with name '{e.name}'") except UnsupportedFileType as e: _error(f"Unsupported file type {e.path.suffix} ({e.path.name})") - except ValueError: - _error("No files found") + except ValueError as e: + _error(f"{e}") def dataset_import( diff --git a/darwin/dataset/download_manager.py b/darwin/dataset/download_manager.py index 752f37579..2fc0d061e 100644 --- a/darwin/dataset/download_manager.py +++ b/darwin/dataset/download_manager.py @@ -672,7 +672,7 @@ def _get_planned_image_paths( for slot in annotation.slots: slot_name = Path(slot.name) for source_file in slot.source_files: - file_name = source_file["file_name"] + file_name = source_file.file_name if use_folders and annotation.remote_path != "/": file_paths.append( images_path diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index e95cc00e9..744b73860 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -138,6 +138,7 @@ def push( preserve_folders: bool = False, progress_callback: Optional[ProgressCallback] = None, file_upload_callback: Optional[FileUploadCallback] = None, + item_merge_mode: Optional[str] = None, ) -> UploadHandler: pass diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 59f87b320..b55d60d6d 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -1,4 +1,5 @@ import json +from pathlib import Path from typing import ( TYPE_CHECKING, Any, @@ -18,7 +19,9 @@ from darwin.dataset.release import Release from darwin.dataset.upload_manager import ( FileUploadCallback, + ItemMergeMode, LocalFile, + MultiFileItem, ProgressCallback, UploadHandler, UploadHandlerV2, @@ -42,7 +45,14 @@ from darwin.exporter.formats.darwin import build_image_annotation from darwin.item import DatasetItem from darwin.item_sorter import ItemSorter -from darwin.utils import SUPPORTED_EXTENSIONS, find_files, urljoin +from darwin.utils import ( + SUPPORTED_EXTENSIONS, + PRESERVE_FOLDERS_KEY, + AS_FRAMES_KEY, + EXTRACT_VIEWS_KEY, + find_files, + urljoin, +) if TYPE_CHECKING: from darwin.client import Client @@ -166,6 +176,7 @@ def push( preserve_folders: bool = False, progress_callback: Optional[ProgressCallback] = None, file_upload_callback: Optional[FileUploadCallback] = None, + item_merge_mode: Optional[str] = None, ) -> UploadHandler: """ Uploads a local dataset (images ONLY) in the datasets directory. @@ -173,7 +184,8 @@ def push( Parameters ---------- files_to_upload : Optional[List[Union[PathLike, LocalFile]]] - List of files to upload. Those can be folders. + List of files to upload. These can be folders. + If `item_merge_mode` is set, these paths must be folders. blocking : bool, default: True If False, the dataset is not uploaded and a generator function is returned instead. multi_threaded : bool, default: True @@ -188,7 +200,7 @@ def push( extract_views: bool, default: False When the uploading file is a volume, specify whether it's going to be split into orthogonal views. files_to_exclude : Optional[PathLike]], default: None - Optional list of files to exclude from the file scan. Those can be folders. + Optional list of files to exclude from the file scan. These can be folders. path: Optional[str], default: None Optional path to store the files in. preserve_folders : bool, default: False @@ -197,11 +209,18 @@ def push( Optional callback, called every time the progress of an uploading files is reported. file_upload_callback: Optional[FileUploadCallback], default: None Optional callback, called every time a file chunk is uploaded. - + item_merge_mode : Optional[str] + If set, each file path passed to `files_to_upload` behaves as follows: + - Paths pointing directly to individual files are ignored + - Paths pointing to folders of files will be uploaded according to the following mode rules. + Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded: + - "slots": Each file in the folder will be uploaded to a different slot of the same item. + - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored. + - "channels": Each file in the folder will be uploaded to a different channel of the same item. Returns ------- handler : UploadHandler - Class for handling uploads, progress and error messages. + Class for handling uploads, progress and error messages. Raises ------ @@ -210,53 +229,57 @@ def push( - If a path is specified when uploading a LocalFile object. - If there are no files to upload (because path is wrong or the exclude filter excludes everything). """ + merge_incompatible_args = { + PRESERVE_FOLDERS_KEY: preserve_folders, + AS_FRAMES_KEY: as_frames, + EXTRACT_VIEWS_KEY: extract_views, + } + if files_to_exclude is None: files_to_exclude = [] if files_to_upload is None: raise ValueError("No files or directory specified.") - uploading_files = [ - item for item in files_to_upload if isinstance(item, LocalFile) - ] + if item_merge_mode: + try: + ItemMergeMode(item_merge_mode) + except ValueError: + raise ValueError( + f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels'" + ) + incompatible_args = [ + arg for arg, value in merge_incompatible_args.items() if value + ] + + if incompatible_args: + incompatible_args_str = ", ".join(incompatible_args) + raise TypeError( + f"`item_merge_mode` does not support the following incompatible arguments: {incompatible_args_str}." + ) + + # Folder paths search_files = [ item for item in files_to_upload if not isinstance(item, LocalFile) ] - generic_parameters_specified = ( - path is not None or fps != 0 or as_frames is not False - ) - if uploading_files and generic_parameters_specified: - raise ValueError("Cannot specify a path when uploading a LocalFile object.") - - for found_file in find_files(search_files, files_to_exclude=files_to_exclude): - local_path = path - if preserve_folders: - source_files = [ - source_file - for source_file in search_files - if is_relative_to(found_file, source_file) - ] - if source_files: - local_path = str( - found_file.relative_to(source_files[0]).parent.as_posix() - ) - uploading_files.append( - LocalFile( - found_file, - fps=fps, - as_frames=as_frames, - extract_views=extract_views, - path=local_path, - ) + if item_merge_mode: + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + search_files, files_to_exclude, fps, item_merge_mode ) - - if not uploading_files: - raise ValueError( - "No files to upload, check your path, exclusion filters and resume flag" + handler = UploadHandlerV2(self, local_files, multi_file_items) + else: + local_files = _find_files_to_upload_as_single_file_items( + search_files, + files_to_upload, + files_to_exclude, + path, + fps, + as_frames, + extract_views, + preserve_folders, ) - - handler = UploadHandlerV2(self, uploading_files) + handler = UploadHandlerV2(self, local_files) if blocking: handler.upload( max_workers=max_workers, @@ -842,3 +865,140 @@ def register_multi_slotted( print(f" - {item}") print(f"Reistration complete. Check your items in the dataset: {self.slug}") return results + + +def _find_files_to_upload_as_multi_file_items( + search_files: List[PathLike], + files_to_exclude: List[PathLike], + fps: int, + item_merge_mode: str, +) -> Tuple[List[LocalFile], List[MultiFileItem]]: + """ + Finds files to upload according to the `item_merge_mode`. + Does not search each directory recursively, only considers files in the first level of each directory. + + Parameters + ---------- + search_files : List[PathLike] + List of directories to search for files. + files_to_exclude : List[PathLike] + List of files to exclude from the file scan. + item_merge_mode : str + Mode to merge the files in the folders. Valid options are: 'slots', 'series', 'channels'. + fps : int + When uploading video files, specify the framerate + + Returns + ------- + List[LocalFile] + List of `LocalFile` objects contained within each `MultiFileItem` + List[MultiFileItem] + List of `MultiFileItem` objects to be uploaded + """ + multi_file_items, local_files = [], [] + for directory in search_files: + files_in_directory = list( + find_files( + [directory], + files_to_exclude=files_to_exclude, + recursive=False, + sort=True, + ) + ) + if not files_in_directory: + print( + f"Warning: There are no files in the first level of {directory}, skipping directory" + ) + continue + multi_file_item = MultiFileItem( + Path(directory), files_in_directory, ItemMergeMode(item_merge_mode), fps + ) + multi_file_items.append(multi_file_item) + local_files.extend(multi_file_item.files) + + if not multi_file_items: + raise ValueError( + "No valid folders to upload after searching the passed directories for files" + ) + return local_files, multi_file_items + + +def _find_files_to_upload_as_single_file_items( + search_files: List[PathLike], + files_to_upload: Optional[Sequence[Union[PathLike, LocalFile]]], + files_to_exclude: List[PathLike], + path: Optional[str], + fps: int, + as_frames: bool, + extract_views: bool, + preserve_folders: bool, +) -> List[LocalFile]: + """ + Finds files to upload as single-slotted dataset items. Recursively searches the passed directories for files. + + Parameters + ---------- + search_files : List[PathLike] + List of directories to search for files. + + files_to_exclude : Optional[List[PathLike]] + List of files to exclude from the file scan. + files_to_upload : Optional[List[Union[PathLike, LocalFile]]] + List of files to upload. These can be folders. + path : Optional[str] + Path to store the files in. + fps: int + When uploading video files, specify the framerate. + as_frames: bool + When uploading video files, specify whether to upload as a list of frames. + extract_views: bool + When uploading volume files, specify whether to split into orthogonal views. + preserve_folders: bool + Specify whether or not to preserve folder paths when uploading. + + Returns + ------- + List[LocalFile] + List of files to upload. + """ + # Direct file paths + uploading_files = [item for item in files_to_upload if isinstance(item, LocalFile)] + + generic_parameters_specified = ( + path is not None or fps != 0 or as_frames is not False + ) + + if ( + any(isinstance(item, LocalFile) for item in uploading_files) + and generic_parameters_specified + ): + raise ValueError("Cannot specify a path when uploading a LocalFile object.") + + for found_file in find_files(search_files, files_to_exclude=files_to_exclude): + local_path = path + if preserve_folders: + source_files = [ + source_file + for source_file in search_files + if is_relative_to(found_file, source_file) + ] + if source_files: + local_path = str( + found_file.relative_to(source_files[0]).parent.as_posix() + ) + uploading_files.append( + LocalFile( + found_file, + fps=fps, + as_frames=as_frames, + extract_views=extract_views, + path=local_path, + ) + ) + + if not uploading_files: + raise ValueError( + "No files to upload, check your path, exclusion filters and resume flag" + ) + + return uploading_files diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index bcd28f8f7..d47342276 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -1,8 +1,10 @@ +from __future__ import annotations import concurrent.futures import os import time from dataclasses import dataclass -from pathlib import Path +from enum import Enum +from pathlib import Path, PurePosixPath from typing import ( TYPE_CHECKING, Any, @@ -13,14 +15,16 @@ Optional, Set, Tuple, + Dict, ) import requests -from darwin.datatypes import PathLike +from darwin.datatypes import PathLike, Slot, SourceFile from darwin.doc_enum import DocEnum from darwin.path_utils import construct_full_path from darwin.utils import chunk +from darwin.utils.utils import is_image_extension_allowed_by_filename, SLOTS_GRID_MAP if TYPE_CHECKING: from darwin.client import Client @@ -28,7 +32,12 @@ from darwin.dataset.identifier import DatasetIdentifier from abc import ABC, abstractmethod -from typing import Dict + + +class ItemMergeMode(Enum): + SLOTS = "slots" + SERIES = "series" + CHANNELS = "channels" class ItemPayload: @@ -43,8 +52,8 @@ class ItemPayload: The filename of where this ``ItemPayload``'s data is. path : str The path to ``filename``. - reason : Optional[str], default: None - A reason to upload this ``ItemPayload``. + reasons : Optional[List[str]], default: None + A per-slot reason to upload this ``ItemPayload``. Attributes ---------- @@ -54,8 +63,6 @@ class ItemPayload: The filename of where this ``ItemPayload``'s data is. path : str The path to ``filename``. - reason : Optional[str], default: None - A reason to upload this ``ItemPayload``. """ def __init__( @@ -64,25 +71,30 @@ def __init__( dataset_item_id: int, filename: str, path: str, - reason: Optional[str] = None, - slots: Optional[any] = None, + reasons: Optional[List[str]] = None, + slots: List[Dict[str, str]], ): self.dataset_item_id = dataset_item_id self.filename = filename - self.path = path - self.reason = reason - self.slots = slots + self.path = PurePosixPath(path).as_posix() + self.slots = [ + Slot( + type=slot["type"], + source_files=[SourceFile(file_name=slot["file_name"])], + name=slot["slot_name"], + upload_id=slot["upload_id"] if "upload_id" in slot else None, + reason=slot["reason"] if "reason" in slot else None, + ) + for slot in slots + ] @staticmethod def parse_v2(payload): - if len(payload["slots"]) > 1: - raise NotImplementedError("multiple files support not yet implemented") - slot = payload["slots"][0] return ItemPayload( dataset_item_id=payload.get("id", None), filename=payload["name"], path=payload["path"], - reason=slot.get("reason", None), + reasons=[slot.get("reason", None) for slot in payload["slots"]], slots=payload["slots"], ) @@ -152,7 +164,11 @@ class LocalFile: """ - def __init__(self, local_path: PathLike, **kwargs): + def __init__( + self, + local_path: PathLike, + **kwargs, + ): self.local_path = Path(local_path) self.data = kwargs self._type_check(kwargs) @@ -167,7 +183,7 @@ def serialize(self): "name": self.data["filename"], } - def serialize_v2(self): + def serialize_darwin_json_v2(self): optional_properties = ["tags", "fps", "as_frames", "extract_views"] slot = {"file_name": self.data["filename"], "slot_name": "0"} for optional_property in optional_properties: @@ -186,6 +202,83 @@ def full_path(self) -> str: return construct_full_path(self.data["path"], self.data["filename"]) +class MultiFileItem: + def __init__( + self, directory: Path, files: List[Path], merge_mode: ItemMergeMode, fps: int + ): + self.directory = directory + self.name = directory.name + self.files = [LocalFile(file, fps=fps) for file in files] + self.merge_mode = merge_mode + self._create_layout() + + def _create_layout(self): + """ + Sets the layout as a LayoutV3 object to be used when uploading the files as a dataset item. + + Raises + ------ + ValueError + - If no DICOM files are found in the directory for `ItemMergeMode.SERIES` items + - If the number of files is greater than 16 for `ItemMergeMode.CHANNELS` items + """ + self.slot_names = [] + if self.merge_mode == ItemMergeMode.SLOTS: + num_viewports = min(len(self.files), 16) + slots_grid = SLOTS_GRID_MAP[num_viewports] + self.layout = { + "version": 3, + "slots_grid": slots_grid, + } + self.slot_names = [str(i) for i in range(len(self.files))] + elif self.merge_mode == ItemMergeMode.SERIES: + self.files = [ + file for file in self.files if file.local_path.suffix.lower() == ".dcm" + ] + if not self.files: + raise ValueError("No `.dcm` files found in 1st level of directory") + self.slot_names = [self.name] * len(self.files) + self.layout = { + "version": 3, + "slots_grid": [[[self.name]]], + } + elif self.merge_mode == ItemMergeMode.CHANNELS: + # Currently, only image files are supported in multi-channel items. This is planned to change in the future + self.files = [ + file + for file in self.files + if is_image_extension_allowed_by_filename(str(file.local_path)) + ] + if not self.files: + raise ValueError( + "No supported filetypes found in 1st level of directory. Currently, multi-channel items only support images." + ) + if len(self.files) > 16: + raise ValueError( + f"No multi-channel item can have more than 16 files. The following directory has {len(self.files)} files: {self.directory}" + ) + self.layout = { + "version": 3, + "slots_grid": [[[file.local_path.name for file in self.files]]], + } + self.slot_names = self.layout["slots_grid"][0][0] + + def serialize_darwin_json_v2(self): + optional_properties = ["fps"] + slots = [] + for idx, local_file in enumerate(self.files): + slot = { + "file_name": local_file.data["filename"], + "slot_name": self.slot_names[idx], + } + for optional_property in optional_properties: + if optional_property in local_file.data: + slot[optional_property] = local_file.data.get(optional_property) + slots.append(slot) + + return {"slots": slots, "layout": self.layout, "name": self.name, "path": "/"} + + class FileMonitor(object): """ Monitors the progress of a :class:``BufferedReader``. @@ -259,31 +352,38 @@ class UploadHandler(ABC): ---------- dataset: RemoteDataset Target ``RemoteDataset`` where we want to upload our files to. - local_files : List[LocalFile] - List of ``LocalFile``\\s to be uploaded. + uploading_files : Union[List[LocalFile], List[MultiFileItems]] + List of ``LocalFile``\\s or ``MultiFileItem``\\s to be uploaded. Attributes ---------- dataset : RemoteDataset - Target ``RemoteDataset`` where we want to upload our files to.. + Target ``RemoteDataset`` where we want to upload our files to. errors : List[UploadRequestError] - List of errors that happened during the upload process. + List of errors that happened during the upload process local_files : List[LocalFile] List of ``LocalFile``\\s to be uploaded. + multi_file_items : List[MultiFileItem] + List of ``MultiFileItem``\\s to be uploaded. blocked_items : List[ItemPayload] List of items that were not able to be uploaded. pending_items : List[ItemPayload] List of items waiting to be uploaded. """ - def __init__(self, dataset: "RemoteDataset", local_files: List[LocalFile]): - self.dataset: RemoteDataset = dataset - self.errors: List[UploadRequestError] = [] - self.local_files: List[LocalFile] = local_files + def __init__( + self, + dataset: "RemoteDataset", + local_files: List[LocalFile], + multi_file_items: Optional[List[MultiFileItem]] = None, + ): self._progress: Optional[ Iterator[Callable[[Optional[ByteReadCallback]], None]] ] = None - + self.multi_file_items = multi_file_items + self.local_files = local_files + self.dataset: RemoteDataset = dataset + self.errors: List[UploadRequestError] = [] self.blocked_items, self.pending_items = self._request_upload() @staticmethod @@ -402,22 +502,60 @@ def _upload_file( class UploadHandlerV2(UploadHandler): - def __init__(self, dataset: "RemoteDataset", local_files: List[LocalFile]): - super().__init__(dataset=dataset, local_files=local_files) + def __init__( + self, + dataset: "RemoteDataset", + local_files: List[LocalFile], + multi_file_items: Optional[List[MultiFileItem]] = None, + ): + super().__init__( + dataset=dataset, + local_files=local_files, + multi_file_items=multi_file_items, + ) def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: blocked_items = [] items = [] chunk_size: int = _upload_chunk_size() - for file_chunk in chunk(self.local_files, chunk_size): - upload_payload = {"items": [file.serialize_v2() for file in file_chunk]} - dataset_slug: str = self.dataset_identifier.dataset_slug - team_slug: Optional[str] = self.dataset_identifier.team_slug + single_file_items = self.local_files + upload_payloads = [] + if self.multi_file_items: + upload_payloads.extend( + [ + { + "items": [ + file.serialize_darwin_json_v2() for file in file_chunk + ], + "options": {"ignore_dicom_layout": True}, + } + for file_chunk in chunk(self.multi_file_items, chunk_size) + ] + ) + local_files_for_multi_file_items = [ + file + for multi_file_item in self.multi_file_items + for file in multi_file_item.files + ] + single_file_items = [ + file + for file in single_file_items + if file not in local_files_for_multi_file_items + ] + + upload_payloads.extend( + [ + {"items": [file.serialize_darwin_json_v2() for file in file_chunk]} + for file_chunk in chunk(single_file_items, chunk_size) + ] + ) + dataset_slug: str = self.dataset_identifier.dataset_slug + team_slug: Optional[str] = self.dataset_identifier.team_slug + for upload_payload in upload_payloads: data: Dict[str, Any] = self.client.api_v2.register_data( dataset_slug, upload_payload, team_slug=team_slug ) - blocked_items.extend( [ItemPayload.parse_v2(item) for item in data["blocked_items"]] ) @@ -434,17 +572,19 @@ def upload_function( file_lookup = {file.full_path: file for file in self.local_files} for item in self.pending_items: - if len(item.slots) != 1: - raise NotImplementedError("Multi file upload is not supported") - upload_id = item.slots[0]["upload_id"] - file = file_lookup.get(item.full_path) - if not file: - raise ValueError( - f"Cannot match {item.full_path} from payload with files to upload" + for slot in item.slots: + upload_id = slot.upload_id + slot_path = ( + Path(item.path) / Path((slot.source_files[0].file_name)) + ).as_posix() + file = file_lookup.get(str(slot_path)) + if not file: + raise ValueError( + f"Cannot match {slot_path} from payload with files to upload" + ) + yield upload_function( + self.dataset.identifier.dataset_slug, file.local_path, upload_id ) - yield upload_function( - self.dataset.identifier.dataset_slug, file.local_path, upload_id - ) def _upload_file( self, diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py index 978c6e431..a55113abb 100644 --- a/darwin/dataset/utils.py +++ b/darwin/dataset/utils.py @@ -12,7 +12,6 @@ import darwin.datatypes as dt -# from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2 from darwin.datatypes import PathLike from darwin.exceptions import NotFound from darwin.importer.formats.darwin import parse_path diff --git a/darwin/datatypes.py b/darwin/datatypes.py index c2ef99d26..2f75ef8d0 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -361,7 +361,7 @@ class Slot: type: str #: Original upload information for the slot - source_files: List[Dict[str, str]] + source_files: List[SourceFile] #: Thumbnail url to the file thumbnail_url: Optional[str] = None @@ -390,6 +390,21 @@ class Slot: #: Segments for video slots segments: Optional[List[Dict[str, UnknownType]]] = None + #: Upload ID + upload_id: Optional[str] = None + + #: The reason for blocking upload of this slot, if it was blocked + reason: Optional[str] = None + + +@dataclass +class SourceFile: + #: File name of source file + file_name: str + + #: URL of file + url: Optional[str] = None + @dataclass class AnnotationFileVersion: diff --git a/darwin/importer/formats/nifti.py b/darwin/importer/formats/nifti.py index e871b2532..a86a70d89 100644 --- a/darwin/importer/formats/nifti.py +++ b/darwin/importer/formats/nifti.py @@ -170,7 +170,7 @@ def _parse_nifti( dt.Slot( name=slot_name, type="dicom", - source_files=[{"url": None, "file_name": str(filename)}], + source_files=[dt.SourceFile(file_name=str(filename), url=None)], ) for slot_name in slot_names ], diff --git a/darwin/options.py b/darwin/options.py index b6a292394..6ac6c6717 100644 --- a/darwin/options.py +++ b/darwin/options.py @@ -183,6 +183,12 @@ def __init__(self) -> None: action="store_true", help="Preserve the local folder structure in the dataset.", ) + parser_push.add_argument( + "--item-merge-mode", + type=str, + choices=["slots", "series", "channels"], + help="Specify the item merge mode: `slots`, `series`, or `channels`", + ) # Remove parser_remove = dataset_action.add_parser( diff --git a/darwin/path_utils.py b/darwin/path_utils.py index a25c056a2..904c41fe7 100644 --- a/darwin/path_utils.py +++ b/darwin/path_utils.py @@ -24,7 +24,7 @@ def construct_full_path(remote_path: Optional[str], filename: str) -> str: if remote_path is None: return filename else: - return (PurePosixPath("/") / remote_path / filename).as_posix() + return PurePosixPath("/", remote_path, filename).as_posix() def deconstruct_full_path(filename: str) -> Tuple[str, str]: diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py index b7b13c9f3..ba95198c3 100644 --- a/darwin/utils/utils.py +++ b/darwin/utils/utils.py @@ -25,6 +25,7 @@ import requests from json_stream.base import PersistentStreamingJSONList, PersistentStreamingJSONObject from jsonschema import validators +from natsort import natsorted from requests import Response from rich.progress import ProgressType, track from upolygon import draw_polygon @@ -73,6 +74,68 @@ ] SUPPORTED_EXTENSIONS = SUPPORTED_IMAGE_EXTENSIONS + SUPPORTED_VIDEO_EXTENSIONS +# Define incompatible `item_merge_mode` arguments +PRESERVE_FOLDERS_KEY = "preserve_folders" +AS_FRAMES_KEY = "as_frames" +EXTRACT_VIEWS_KEY = "extract_views" + +# Define reasons for blocking slot uploads +BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS = "ALREADY_EXISTS" +BLOCKED_UPLOAD_ERROR_FILE_UPLOAD_TIMEOUT = "FILE_UPLOAD_TIMEOUT" +BLOCKED_UPLOAD_ERROR_FILE_UPLOAD_FAILED = "FILE_UPLOAD_FAILED" +BLOCKED_UPLOAD_ERROR_UNEXPECTED_ERROR = "UNEXPECTED_ERROR" +BLOCKED_UPLOAD_ERROR_ITEM_COUNT_LIMIT_EXCEEDED = "ITEM_COUNT_LIMIT_EXCEEDED" + +SLOTS_GRID_MAP = { + 1: [[["0"]]], + 2: [[["0"]], [["1"]]], + 3: [[["0"]], [["1"]], [["2"]]], + 4: [[["0"], ["2"]], [["1"], ["3"]]], + 5: [[["0"], ["3"]], [["1"], ["4"]], [["2"]]], + 6: [[["0"], ["3"]], [["1"], ["4"]], [["2"], ["5"]]], + 7: [[["0"], ["3"], ["6"]], [["1"], ["4"]], [["2"], ["5"]]], + 8: [[["0"], ["3"], ["6"]], [["1"], ["4"], ["7"]], [["2"], ["5"]]], + 9: [[["0"], ["3"], ["6"]], [["1"], ["4"], ["7"]], [["2"], ["5"], ["8"]]], + 10: [[["0"], ["4"], ["8"]], [["1"], ["5"], ["9"]], [["2"], ["6"]], [["3"], ["7"]]], + 11: [ + [["0"], ["4"], ["8"]], + [["1"], ["5"], ["9"]], + [["2"], ["6"], ["10"]], + [["3"], ["7"]], + ], + 12: [ + [["0"], ["4"], ["8"]], + [["1"], ["5"], ["9"]], + [["2"], ["6"], ["10"]], + [["3"], ["7"], ["11"]], + ], + 13: [ + [["0"], ["4"], ["8"], ["12"]], + [["1"], ["5"], ["9"]], + [["2"], ["6"], ["10"]], + [["3"], ["7"], ["11"]], + ], + 14: [ + [["0"], ["4"], ["8"], ["12"]], + [["1"], ["5"], ["9"], ["13"]], + [["2"], ["6"], ["10"]], + [["3"], ["7"], ["11"]], + ], + 15: [ + [["0"], ["4"], ["8"], ["12"]], + [["1"], ["5"], ["9"], ["13"]], + [["2"], ["6"], ["10"], ["14"]], + [["3"], ["7"], ["11"]], + ], + 16: [ + [["0"], ["4"], ["8"], ["12"]], + [["1"], ["5"], ["9"], ["13"]], + [["2"], ["6"], ["10"], ["14"]], + [["3"], ["7"], ["11"], ["15"]], + ], +} + + _darwin_schema_cache = {} @@ -216,6 +279,7 @@ def find_files( *, files_to_exclude: List[dt.PathLike] = [], recursive: bool = True, + sort: bool = False, ) -> List[Path]: """ Retrieve a list of all files belonging to supported extensions. The exploration can be made @@ -229,7 +293,8 @@ def find_files( List of files to exclude from the search. recursive : bool Flag for recursive search. - + sort : bool + Flag for sorting the files naturally, i.e. file2.txt will come before file10.txt. Returns ------- List[Path] @@ -255,8 +320,12 @@ def find_files( raise UnsupportedFileType(path) files_to_exclude_full_paths = [str(Path(f)) for f in files_to_exclude] - - return [f for f in found_files if str(f) not in files_to_exclude_full_paths] + filtered_files = [ + f for f in found_files if str(f) not in files_to_exclude_full_paths + ] + if sort: + return natsorted(filtered_files) + return filtered_files def secure_continue_request() -> bool: @@ -589,10 +658,10 @@ def _parse_darwin_image( name=None, type="image", source_files=[ - { - "url": data["image"].get("url"), - "file_name": _get_local_filename(data["image"]), - } + dt.SourceFile( + file_name=_get_local_filename(data["image"]), + url=data["image"].get("url"), + ) ], thumbnail_url=data["image"].get("thumbnail_url"), width=data["image"].get("width"), @@ -639,10 +708,10 @@ def _parse_darwin_video( name=None, type="video", source_files=[ - { - "url": data["image"].get("url"), - "file_name": _get_local_filename(data["image"]), - } + dt.SourceFile( + file_name=_get_local_filename(data["image"]), + url=data["image"].get("url"), + ) ], thumbnail_url=data["image"].get("thumbnail_url"), width=data["image"].get("width"), diff --git a/poetry.lock b/poetry.lock index 4fb0e6474..558dd7fd1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -912,6 +912,21 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "natsort" +version = "8.4.0" +description = "Simple yet flexible natural sorting in Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"}, + {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"}, +] + +[package.extras] +fast = ["fastnumbers (>=2.0.0)"] +icu = ["PyICU (>=1.0.0)"] + [[package]] name = "networkx" version = "3.1" @@ -2237,4 +2252,4 @@ test = ["pytest", "responses"] [metadata] lock-version = "2.0" python-versions = ">=3.8.0,<3.12" -content-hash = "6e6c0628c98652df5dd76a8d82a0f67af9ec2037388350412152d21d84fa9d57" +content-hash = "3ea848bf4d0e5e0f22170f20321ce5d426eb79c6bc0a536b36519fd6f7c6782e" diff --git a/pyproject.toml b/pyproject.toml index 70f59b36a..a409a060e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,7 @@ types-requests = "^2.28.11.8" upolygon = "0.1.11" tenacity = "8.5.0" +natsort = "^8.4.0" [tool.poetry.extras] dev = ["black", "isort", "flake8", "mypy", "debugpy", "responses", "pytest", "flake8-pyproject", "pytest-rerunfailures", "ruff", "validate-pyproject"] medical = ["nibabel", "connected-components-3d", "scipy"] diff --git a/tests/darwin/cli_functions_test.py b/tests/darwin/cli_functions_test.py index 7ddf1a78f..6f5c0fe9e 100644 --- a/tests/darwin/cli_functions_test.py +++ b/tests/darwin/cli_functions_test.py @@ -12,6 +12,7 @@ from darwin.dataset import RemoteDataset from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2 from tests.fixtures import * +from darwin.utils import BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS @pytest.fixture @@ -57,7 +58,7 @@ def test_default_non_verbose( { "type": "image", "file_name": "test_1.jpg", - "reason": "ALREADY_EXISTS", + "reason": BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS, "slot_name": "0", "upload_id": "123e4567-e89b-12d3-a456-426614174000", "as_frames": False, @@ -160,7 +161,7 @@ def test_with_verbose_flag( { "type": "image", "file_name": "test_1.jpg", - "reason": "ALREADY_EXISTS", + "reason": BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS, "slot_name": "0", "upload_id": "123e4567-e89b-12d3-a456-426614174000", "as_frames": False, diff --git a/tests/darwin/data/push_test_dir.zip b/tests/darwin/data/push_test_dir.zip new file mode 100644 index 000000000..073acd061 Binary files /dev/null and b/tests/darwin/data/push_test_dir.zip differ diff --git a/tests/darwin/dataset/download_manager_test.py b/tests/darwin/dataset/download_manager_test.py index 06d9045d3..ee0f5380c 100644 --- a/tests/darwin/dataset/download_manager_test.py +++ b/tests/darwin/dataset/download_manager_test.py @@ -6,7 +6,7 @@ import responses from darwin.dataset import download_manager as dm -from darwin.datatypes import AnnotationClass, AnnotationFile, Slot +from darwin.datatypes import AnnotationClass, AnnotationFile, Slot, SourceFile from tests.fixtures import * @@ -89,7 +89,7 @@ def test_single_slot_without_folders_planned_image_paths(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name.jpg"}], + source_files=[SourceFile(file_name="source_name.jpg")], ) ], remote_path="/", @@ -112,7 +112,7 @@ def test_single_slot_with_folders_planned_image_paths(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name.jpg"}], + source_files=[SourceFile(file_name="source_name.jpg")], ) ], remote_path="/remote/path", @@ -135,12 +135,12 @@ def test_multi_slot_without_folders_planned_image_paths(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name_1.jpg"}], + source_files=[SourceFile(file_name="source_name_1.jpg")], ), Slot( name="slot2", type="image", - source_files=[{"file_name": "source_name_2.jpg"}], + source_files=[SourceFile(file_name="source_name_2.jpg")], ), ], remote_path="/", @@ -166,12 +166,12 @@ def test_multi_slot_with_folders_planned_image_path(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name_1.jpg"}], + source_files=[SourceFile(file_name="source_name_1.jpg")], ), Slot( name="slot2", type="image", - source_files=[{"file_name": "source_name_2.jpg"}], + source_files=[SourceFile(file_name="source_name_2.jpg")], ), ], remote_path="/remote/path", @@ -197,7 +197,7 @@ def test_single_slot_root_path_with_folders_planned_image_paths(): Slot( name="slot1", type="image", - source_files=[{"file_name": "source_name.jpg"}], + source_files=[SourceFile(file_name="source_name.jpg")], ) ], remote_path="/", @@ -221,8 +221,8 @@ def test_multiple_source_files_planned_image_paths(): name="slot1", type="image", source_files=[ - {"file_name": "source_name_1.jpg"}, - {"file_name": "source_name_2.jpg"}, + SourceFile(file_name="source_name_1.jpg"), + SourceFile(file_name="source_name_2.jpg"), ], ) ], diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 7ed48a448..ce7fcba60 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -20,8 +20,16 @@ download_all_images_from_annotations, ) from darwin.dataset.release import Release, ReleaseStatus -from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2 -from darwin.dataset.upload_manager import LocalFile, UploadHandlerV2 +from darwin.dataset.remote_dataset_v2 import ( + RemoteDatasetV2, + _find_files_to_upload_as_multi_file_items, +) +from darwin.dataset.upload_manager import ( + ItemMergeMode, + LocalFile, + UploadHandlerV2, +) +from darwin.utils.utils import SLOTS_GRID_MAP from darwin.datatypes import ManifestItem, ObjectStore, SegmentManifest from darwin.exceptions import UnsupportedExportFormat, UnsupportedFileType from darwin.item import DatasetItem @@ -348,6 +356,15 @@ def files_content() -> Dict[str, Any]: # assert dataset.release == None +@pytest.fixture() +def setup_zip(): + zip_path = Path("tests/darwin/data/push_test_dir.zip") + with tempfile.TemporaryDirectory() as tmpdir: + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(tmpdir) + yield Path(tmpdir) + + @pytest.mark.usefixtures("file_read_write_test", "create_annotation_file") class TestSplitVideoAnnotations: def test_works_on_videos( @@ -732,6 +749,265 @@ def test_raises_with_unsupported_files(self, remote_dataset: RemoteDataset): with pytest.raises(UnsupportedFileType): remote_dataset.push(["test.txt"]) + def test_raises_if_invalid_item_merge_mode(self, remote_dataset: RemoteDataset): + with pytest.raises(ValueError): + remote_dataset.push(["path/to/dir"], item_merge_mode="invalid") + + def test_raises_if_incompatible_args_with_item_merge_mode( + self, remote_dataset: RemoteDataset + ): + incompatible_args = [ + {"preserve_folders": True}, + {"as_frames": True}, + {"extract_views": True}, + ] + for args in incompatible_args: + with pytest.raises(TypeError): + remote_dataset.push( + ["path/to/dir"], + item_merge_mode="slots", + **args, # type: ignore + ) + + +@pytest.mark.usefixtures("setup_zip") +class TestPushMultiSlotItem: + def test_different_numbers_of_input_files(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "num_files_tests" + directories = [d for d in base_path.iterdir() if d.is_dir()] + for directory in directories: + if directory.name == "0": + with pytest.raises( + ValueError, + match="No valid folders to upload after searching the passed directories for files", + ): + _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="slots" + ) + continue + + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="slots" + ) + num_local_files = len(local_files) + expected_num_files = int(directory.name) + num_viewports = min(num_local_files, 16) + assert len(multi_file_items) == 1 + assert num_local_files == expected_num_files + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].slot_names == [ + str(i) for i in range(num_local_files) + ] + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), + } + + def test_does_not_recursively_search(self, setup_zip): + directory = setup_zip / "push_test_dir" / "topdir" + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="slots" + ) + num_local_files = len(local_files) + num_viewports = min(num_local_files, 16) + assert len(multi_file_items) == 1 + assert len(local_files) == 2 + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), + } + + def test_dicoms(self, setup_zip): + directory = setup_zip / "push_test_dir" / "dicom_tests" / "dicoms" + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="slots" + ) + num_local_files = len(local_files) + num_viewports = min(num_local_files, 16) + assert len(multi_file_items) == 1 + assert len(local_files) == 5 + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), + } + + def test_dicoms_and_other_files(self, setup_zip): + directory = ( + setup_zip / "push_test_dir" / "dicom_tests" / "dicoms_and_other_files" + ) + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="slots" + ) + num_local_files = len(local_files) + num_viewports = min(num_local_files, 16) + assert len(multi_file_items) == 1 + assert len(local_files) == 10 + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), + } + + def test_multiple_file_types(self, setup_zip): + directory = setup_zip / "push_test_dir" / "multiple_file_types" + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="slots" + ) + num_local_files = len(local_files) + num_viewports = min(num_local_files, 16) + assert len(multi_file_items) == 1 + assert len(local_files) == 12 + assert multi_file_items[0].merge_mode == ItemMergeMode.SLOTS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": SLOTS_GRID_MAP.get(num_viewports), + } + + +@pytest.mark.usefixtures("setup_zip") +class TestPushDICOMSeries: + def test_dicoms(self, setup_zip): + directory = setup_zip / "push_test_dir" / "dicom_tests" / "dicoms" + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="series" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 5 + assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": [[["dicoms"]]], + } + + def test_dicoms_and_other_files(self, setup_zip): + directory = ( + setup_zip / "push_test_dir" / "dicom_tests" / "dicoms_and_other_files" + ) + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="series" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 5 + assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": [[["dicoms_and_other_files"]]], + } + + def test_multiple_file_types(self, setup_zip): + directory = setup_zip / "push_test_dir" / "multiple_file_types" + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="series" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 3 + assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": [[["multiple_file_types"]]], + } + + +@pytest.mark.usefixtures("setup_zip") +class TestPushMultiChannelItem: + def test_different_numbers_of_input_files(self, setup_zip): + base_path = setup_zip / "push_test_dir" / "num_files_tests" + directories = [d for d in base_path.iterdir() if d.is_dir()] + for directory in directories: + if directory.name == "0": + with pytest.raises( + ValueError, + match="No valid folders to upload after searching the passed directories for files", + ): + _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="channels" + ) + continue + + if directory.name == "17": + with pytest.raises( + ValueError, + match="No multi-channel item can have more than 16 files. The following directory has 17 files: ", + ): + _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="channels" + ) + continue + + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="channels" + ) + num_local_files = len(local_files) + expected_num_files = int(directory.name) + assert len(multi_file_items) == 1 + assert num_local_files == expected_num_files + assert multi_file_items[0].merge_mode == ItemMergeMode.CHANNELS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": [[[file.local_path.name for file in local_files]]], + } + + def test_does_not_recursively_search(self, setup_zip): + directory = setup_zip / "push_test_dir" / "topdir" + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="channels" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 2 + assert multi_file_items[0].merge_mode == ItemMergeMode.CHANNELS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": [[[file.local_path.name for file in local_files]]], + } + + def test_multiple_file_types(self, setup_zip): + directory = setup_zip / "push_test_dir" / "multiple_file_types" + local_files, multi_file_items = _find_files_to_upload_as_multi_file_items( + [directory], [], 0, item_merge_mode="channels" + ) + assert len(multi_file_items) == 1 + assert len(local_files) == 5 + assert multi_file_items[0].merge_mode == ItemMergeMode.CHANNELS + assert multi_file_items[0].files == local_files + assert multi_file_items[0].directory == directory + assert multi_file_items[0].name == directory.name + assert multi_file_items[0].layout == { + "version": 3, + "slots_grid": [[[file.local_path.name for file in local_files]]], + } + @pytest.mark.usefixtures("file_read_write_test") class TestPull: diff --git a/tests/darwin/dataset/upload_manager_test.py b/tests/darwin/dataset/upload_manager_test.py index 03e65be37..ca695232a 100644 --- a/tests/darwin/dataset/upload_manager_test.py +++ b/tests/darwin/dataset/upload_manager_test.py @@ -16,6 +16,7 @@ _upload_chunk_size, ) from tests.fixtures import * +from darwin.utils import BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS @pytest.fixture @@ -107,7 +108,7 @@ def test_pending_count_is_correct(dataset: RemoteDataset, request_upload_endpoin assert pending_item.dataset_item_id == "3b241101-e2bb-4255-8caf-4136c566a964" assert pending_item.filename == "test.jpg" assert pending_item.path == "/" - assert pending_item.reason is None + assert pending_item.slots[0].reason is None @pytest.mark.usefixtures("file_read_write_test") @@ -123,7 +124,7 @@ def test_blocked_count_is_correct(dataset: RemoteDataset, request_upload_endpoin { "type": "image", "file_name": "test.jpg", - "reason": "ALREADY_EXISTS", + "reason": BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS, "slot_name": "0", "upload_id": "123e4567-e89b-12d3-a456-426614174000", "as_frames": False, @@ -149,7 +150,7 @@ def test_blocked_count_is_correct(dataset: RemoteDataset, request_upload_endpoin assert blocked_item.dataset_item_id == "3b241101-e2bb-4255-8caf-4136c566a964" assert blocked_item.filename == "test.jpg" assert blocked_item.path == "/" - assert blocked_item.reason == "ALREADY_EXISTS" + assert blocked_item.slots[0].reason == BLOCKED_UPLOAD_ERROR_ALREADY_EXISTS @pytest.mark.usefixtures("file_read_write_test") diff --git a/tests/darwin/utils/flatten_list_test.py b/tests/darwin/utils/flatten_list_test.py index 41ab75b56..62d321134 100644 --- a/tests/darwin/utils/flatten_list_test.py +++ b/tests/darwin/utils/flatten_list_test.py @@ -28,6 +28,4 @@ def test_returns_flattened_list_if_passed_nested_list_with_different_depth() -> if __name__ == "__main__": import sys - import pytest - sys.exit(pytest.main(["-v", "-x", __file__]))