diff --git a/darwin/backend_v2.py b/darwin/backend_v2.py index dacb39ac0..dca5327f6 100644 --- a/darwin/backend_v2.py +++ b/darwin/backend_v2.py @@ -24,9 +24,12 @@ def __init__(self, client: "Client", default_team): @inject_default_team_slug def register_data( - self, dataset_slug: str, payload: Dict[str, Any], *, team_slug: Optional[str] = None + self, + dataset_slug: str, + payload: Dict[str, Any], + *, + team_slug: Optional[str] = None, ) -> Dict[str, Any]: - payload["dataset_slug"] = dataset_slug response = self._client._post( endpoint=f"v2/teams/{team_slug}/items/register_upload", @@ -36,11 +39,17 @@ def register_data( return response @inject_default_team_slug - def sign_upload(self, dataset_slug: str, upload_id: str, *, team_slug: Optional[str] = None) -> Dict[str, Any]: - return self._client._get(f"v2/teams/{team_slug}/items/uploads/{upload_id}/sign", team_slug=team_slug) + def sign_upload( + self, dataset_slug: str, upload_id: str, *, team_slug: Optional[str] = None + ) -> Dict[str, Any]: + return self._client._get( + f"v2/teams/{team_slug}/items/uploads/{upload_id}/sign", team_slug=team_slug + ) @inject_default_team_slug - def confirm_upload(self, dataset_slug: str, upload_id: str, *, team_slug: Optional[str] = None) -> Dict[str, Any]: + def confirm_upload( + self, dataset_slug: str, upload_id: str, *, team_slug: Optional[str] = None + ) -> Dict[str, Any]: return self._client._post( f"v2/teams/{team_slug}/items/uploads/{upload_id}/confirm", payload={}, @@ -49,7 +58,11 @@ def confirm_upload(self, dataset_slug: str, upload_id: str, *, team_slug: Option @inject_default_team_slug def fetch_items( - self, dataset_id: int, cursor: Union[Dict[str, Any], List[Tuple[str, Any]]], *, team_slug: Optional[str] = None + self, + dataset_id: int, + cursor: Union[Dict[str, Any], List[Tuple[str, Any]]], + *, + team_slug: Optional[str] = None, ) -> Dict[str, Any]: """ Fetch the remote items from the given dataset. @@ -75,10 +88,14 @@ def fetch_items( cursor.append(("dataset_ids[]", dataset_id)) - return self._client._get(f"/v2/teams/{team_slug}/items?{parse.urlencode(cursor, True)}", team_slug) + return self._client._get( + f"/v2/teams/{team_slug}/items?{parse.urlencode(cursor, True)}", team_slug + ) @inject_default_team_slug - def archive_items(self, payload: Dict[str, Any], *, team_slug: Optional[str] = None) -> None: + def archive_items( + self, payload: Dict[str, Any], *, team_slug: Optional[str] = None + ) -> None: """ Archives the item from the given dataset. @@ -92,7 +109,9 @@ def archive_items(self, payload: Dict[str, Any], *, team_slug: Optional[str] = N self._client._post(f"v2/teams/{team_slug}/items/archive", payload, team_slug) @inject_default_team_slug - def restore_archived_items(self, payload: Dict[str, Any], *, team_slug: Optional[str] = None) -> None: + def restore_archived_items( + self, payload: Dict[str, Any], *, team_slug: Optional[str] = None + ) -> None: """ Restores the archived item from the given dataset. @@ -107,7 +126,12 @@ def restore_archived_items(self, payload: Dict[str, Any], *, team_slug: Optional @inject_default_team_slug def move_to_stage( - self, filters: Dict[str, Any], stage_id: str, workflow_id: str, *, team_slug: Optional[str] = None + self, + filters: Dict[str, Any], + stage_id: str, + workflow_id: str, + *, + team_slug: Optional[str] = None, ) -> None: """ Moves the given items to the specified stage @@ -125,16 +149,22 @@ def move_to_stage( self._client._post_raw(f"v2/teams/{team_slug}/items/stage", payload, team_slug) @inject_default_team_slug - def get_dataset(self, id: str, *, team_slug: Optional[str] = None) -> Dict[str, Any]: + def get_dataset( + self, id: str, *, team_slug: Optional[str] = None + ) -> Dict[str, Any]: return self._client._get(f"datasets/{id}", team_slug) @inject_default_team_slug - def get_workflow(self, id: str, *, team_slug: Optional[str] = None) -> Dict[str, Any]: + def get_workflow( + self, id: str, *, team_slug: Optional[str] = None + ) -> Dict[str, Any]: return self._client._get(f"v2/teams/{team_slug}/workflows/{id}", team_slug) @inject_default_team_slug def delete_items(self, filters, *, team_slug: Optional[str] = None): - self._client._delete(f"v2/teams/{team_slug}/items", {"filters": filters}, team_slug) + self._client._delete( + f"v2/teams/{team_slug}/items", {"filters": filters}, team_slug + ) @inject_default_team_slug def export_dataset( @@ -159,27 +189,39 @@ def export_dataset( payload["format"] = format if annotation_class_ids: - payload["annotation_filters"] = {"annotation_class_ids": list(map(int, annotation_class_ids))} + payload["annotation_filters"] = { + "annotation_class_ids": list(map(int, annotation_class_ids)) + } if filters is not None: # Backend assumes default filters only if those are completely missing. payload["filters"] = filters - return self._client._post(f"v2/teams/{team_slug}/datasets/{dataset_slug}/exports", payload, team_slug) + return self._client._post( + f"v2/teams/{team_slug}/datasets/{dataset_slug}/exports", payload, team_slug + ) def get_exports(self, dataset_slug, *, team_slug: Optional[str] = None): - return self._client._get(f"v2/teams/{team_slug}/datasets/{dataset_slug}/exports", team_slug) + return self._client._get( + f"v2/teams/{team_slug}/datasets/{dataset_slug}/exports", team_slug + ) @inject_default_team_slug - def post_comment(self, item_id, text, x, y, w, h, slot_name, team_slug: Optional[str] = None): + def post_comment( + self, item_id, text, x, y, w, h, slot_name, team_slug: Optional[str] = None + ): payload = { "bounding_box": {"h": h, "w": w, "x": x, "y": y}, "comments": [{"body": text}], "slot_name": slot_name, } - return self._client._post(f"v2/teams/{team_slug}/items/{item_id}/comment_threads", payload, team_slug) + return self._client._post( + f"v2/teams/{team_slug}/items/{item_id}/comment_threads", payload, team_slug + ) @inject_default_team_slug - def import_annotation(self, item_id: ItemId, payload: Dict[str, Any], team_slug: Optional[str] = None) -> None: + def import_annotation( + self, item_id: ItemId, payload: Dict[str, Any], team_slug: Optional[str] = None + ) -> None: """ Imports the annotation for the item with the given id. @@ -192,4 +234,6 @@ def import_annotation(self, item_id: ItemId, payload: Dict[str, Any], team_slug: `{"annotations": serialized_annotations, "overwrite": "false"}` """ - return self._client._post_raw(f"v2/teams/{team_slug}/items/{item_id}/import", payload=payload) + return self._client._post_raw( + f"v2/teams/{team_slug}/items/{item_id}/import", payload=payload + ) diff --git a/darwin/cli.py b/darwin/cli.py index 3f3f5221b..da18d2ec6 100644 --- a/darwin/cli.py +++ b/darwin/cli.py @@ -41,14 +41,20 @@ def main() -> None: except Unauthenticated: f._error("You need to specify a valid API key to do that action.") except InvalidTeam: - f._error("The team specified is not in the configuration, please authenticate first.") + f._error( + "The team specified is not in the configuration, please authenticate first." + ) except requests.exceptions.ConnectionError: - f._error("Darwin seems unreachable, please try again in a minute or contact support.") + f._error( + "Darwin seems unreachable, please try again in a minute or contact support." + ) except GracefulExit as e: f._error(e.message) except Exception: # Catch unhandled exceptions console = Console() - console.print("An unexpected error occurred, please contact support, and send them the file.") + console.print( + "An unexpected error occurred, please contact support, and send them the file." + ) console.print_exception() exit(255) @@ -67,7 +73,9 @@ def _run(args: Namespace, parser: ArgumentParser) -> None: api_key = getpass.getpass(prompt="API key: ", stream=None) api_key = api_key.strip() if api_key == "": - print("API Key needed, generate one for your team: https://darwin.v7labs.com/?settings=api-keys") + print( + "API Key needed, generate one for your team: https://darwin.v7labs.com/?settings=api-keys" + ) return f.authenticate(api_key) print("Authentication succeeded.") @@ -121,10 +129,17 @@ def _run(args: Namespace, parser: ArgumentParser) -> None: f.dataset_report(args.dataset, args.granularity or "day", args.pretty) elif args.action == "export": f.export_dataset( - args.dataset, args.include_url_token, args.name, args.class_ids, args.include_authorship, args.version + args.dataset, + args.include_url_token, + args.name, + args.class_ids, + args.include_authorship, + args.version, ) elif args.action == "files": - f.list_files(args.dataset, args.status, args.path, args.only_filenames, args.sort_by) + f.list_files( + args.dataset, args.status, args.path, args.only_filenames, args.sort_by + ) elif args.action == "releases": f.dataset_list_releases(args.dataset) elif args.action == "pull": @@ -170,7 +185,12 @@ def _run(args: Namespace, parser: ArgumentParser) -> None: ) # Annotation schema validation elif args.command == "validate": - f.validate_schemas(location=args.location, pattern=args.pattern, silent=args.silent, output=args.output) + f.validate_schemas( + location=args.location, + pattern=args.pattern, + silent=args.silent, + output=args.output, + ) if __name__ == "__main__": diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index 9bd6fc903..c0ce35322 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -93,7 +93,11 @@ def validate_api_key(api_key: str) -> None: _error(f"Expected key prefix to be 7 characters long\n(example: {example_key})") -def authenticate(api_key: str, default_team: Optional[bool] = None, datasets_dir: Optional[Path] = None) -> Config: +def authenticate( + api_key: str, + default_team: Optional[bool] = None, + datasets_dir: Optional[Path] = None, +) -> Config: """ Authenticate the API key against the server and creates a configuration file for it. @@ -121,7 +125,9 @@ def authenticate(api_key: str, default_team: Optional[bool] = None, datasets_dir config_path.parent.mkdir(exist_ok=True) if default_team is None: - default_team = input(f"Make {client.default_team} the default team? [y/N] ") in ["Y", "y"] + default_team = input( + f"Make {client.default_team} the default team? [y/N] " + ) in ["Y", "y"] if datasets_dir is None: datasets_dir = Path(prompt("Datasets directory", "~/.darwin/datasets")) @@ -225,7 +231,9 @@ def local(team: Optional[str] = None) -> None: table.add_row( f"{dataset_path.parent.name}/{dataset_path.name}", str(len(files_in_dataset_path)), - humanize.naturaldate(datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime)), + humanize.naturaldate( + datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime) + ), humanize.naturalsize(sum(p.stat().st_size for p in files_in_dataset_path)), ) @@ -273,7 +281,9 @@ def url(dataset_slug: str) -> None: """ client: Client = _load_client(offline=True) try: - remote_dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_slug) + remote_dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_slug + ) print(remote_dataset.remote_path) except NotFound as e: _error(f"Dataset '{e.name}' does not exist.") @@ -296,7 +306,9 @@ def dataset_report(dataset_slug: str, granularity: str, pretty: bool) -> None: client: Client = _load_client(offline=True) console = Console(theme=_console_theme()) try: - remote_dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_slug) + remote_dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_slug + ) report: str = remote_dataset.get_report(granularity) if not pretty: @@ -412,7 +424,9 @@ def pull_dataset( version: str = DatasetIdentifier.parse(dataset_slug).version or "latest" client: Client = _load_client(offline=False, maybe_guest=True) try: - dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_slug) + dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_slug + ) except NotFound: _error( f"Dataset '{dataset_slug}' does not exist, please check the spelling. " @@ -450,7 +464,9 @@ def pull_dataset( print(f"Dataset {release.identifier} downloaded at {dataset.local_path} .") -def split(dataset_slug: str, val_percentage: float, test_percentage: float, seed: int = 0) -> None: +def split( + dataset_slug: str, val_percentage: float, test_percentage: float, seed: int = 0 +) -> None: """ Splits a local version of a dataset into train, validation, and test partitions. @@ -526,7 +542,11 @@ def list_remote_datasets(all_teams: bool, team: Optional[str] = None) -> None: datasets = list(client.list_remote_datasets()) for dataset in datasets: - table.add_row(f"{dataset.team}/{dataset.slug}", str(dataset.item_count), f"{dataset.progress * 100:.1f}%") + table.add_row( + f"{dataset.team}/{dataset.slug}", + str(dataset.item_count), + f"{dataset.progress * 100:.1f}%", + ) if table.row_count == 0: print("No dataset available.") else: @@ -547,7 +567,9 @@ def remove_remote_dataset(dataset_slug: str) -> None: """ client: Client = _load_client(offline=False) try: - dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_slug) + dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_slug + ) print(f"About to delete {dataset.identifier} on darwin.") if not secure_continue_request(): print("Cancelled.") @@ -571,7 +593,9 @@ def dataset_list_releases(dataset_slug: str) -> None: """ client: Client = _load_client(offline=False) try: - dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_slug) + dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_slug + ) releases: List[Release] = dataset.get_releases() if len(releases) == 0: print("No available releases, export one first.") @@ -587,7 +611,10 @@ def dataset_list_releases(dataset_slug: str) -> None: if not release.available: continue table.add_row( - str(release.identifier), str(release.image_count), str(release.class_count), str(release.export_date) + str(release.identifier), + str(release.image_count), + str(release.class_count), + str(release.export_date), ) Console().print(table) @@ -639,12 +666,18 @@ def upload_data( try: max_workers: int = concurrent.futures.ThreadPoolExecutor()._max_workers # type: ignore - dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_identifier) + dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_identifier + ) - sync_metadata: Progress = Progress(SpinnerColumn(), TextColumn("[bold blue]Syncing metadata")) + sync_metadata: Progress = Progress( + SpinnerColumn(), TextColumn("[bold blue]Syncing metadata") + ) overall_progress = Progress( - TextColumn("[bold blue]{task.fields[filename]}"), BarColumn(), "{task.completed} of {task.total}" + TextColumn("[bold blue]{task.fields[filename]}"), + BarColumn(), + "{task.completed} of {task.total}", ) file_progress = Progress( @@ -666,14 +699,28 @@ def upload_data( sync_task: TaskID = sync_metadata.add_task("") file_tasks: Dict[str, TaskID] = {} overall_task = overall_progress.add_task( - "[green]Total progress", filename="Total progress", total=0, visible=False + "[green]Total progress", + filename="Total progress", + total=0, + visible=False, ) - def progress_callback(total_file_count: NumberLike, file_advancement: NumberLike) -> None: + def progress_callback( + total_file_count: NumberLike, file_advancement: NumberLike + ) -> None: sync_metadata.update(sync_task, visible=False) - overall_progress.update(overall_task, total=total_file_count, advance=file_advancement, visible=True) + overall_progress.update( + overall_task, + total=total_file_count, + advance=file_advancement, + visible=True, + ) - def file_upload_callback(file_name: str, file_total_bytes: NumberLike, file_bytes_sent: NumberLike) -> None: + def file_upload_callback( + file_name: str, + file_total_bytes: NumberLike, + file_bytes_sent: NumberLike, + ) -> None: if file_name not in file_tasks: file_tasks[file_name] = file_progress.add_task( f"[blue]{file_name}", filename=file_name, total=file_total_bytes @@ -683,7 +730,9 @@ def file_upload_callback(file_name: str, file_total_bytes: NumberLike, file_byte # or removing a task fails. Wrapping this logic around a try/catch block # is a workaround, we should consider solving this properly (e.g.: using locks) try: - file_progress.update(file_tasks[file_name], completed=file_bytes_sent) + file_progress.update( + file_tasks[file_name], completed=file_bytes_sent + ) for task in file_progress.tasks: if task.finished and len(file_progress.tasks) >= max_workers: @@ -707,7 +756,10 @@ def file_upload_callback(file_name: str, file_total_bytes: NumberLike, file_byte console.print() if not upload_manager.blocked_count and not upload_manager.error_count: - console.print(f"All {upload_manager.total_count} files have been successfully uploaded.\n", style="success") + console.print( + f"All {upload_manager.total_count} files have been successfully uploaded.\n", + style="success", + ) return already_existing_items = [] @@ -736,12 +788,24 @@ def file_upload_callback(file_name: str, file_total_bytes: NumberLike, file_byte return error_table: Table = Table( - "Dataset Item ID", "Filename", "Remote Path", "Stage", "Reason", show_header=True, header_style="bold cyan" + "Dataset Item ID", + "Filename", + "Remote Path", + "Stage", + "Reason", + show_header=True, + header_style="bold cyan", ) for item in upload_manager.blocked_items: if item.reason != "ALREADY_EXISTS": - error_table.add_row(str(item.dataset_item_id), item.filename, item.path, "UPLOAD_REQUEST", item.reason) + error_table.add_row( + str(item.dataset_item_id), + item.filename, + item.path, + "UPLOAD_REQUEST", + item.reason, + ) for error in upload_manager.errors: for local_file in upload_manager.local_files: @@ -821,7 +885,9 @@ def dataset_import( try: importer: ImportParser = get_importer(format) - dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_slug) + dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_slug + ) import_annotations( dataset, @@ -837,7 +903,9 @@ def dataset_import( ) except ImporterNotFoundError: - _error(f"Unsupported import format: {format}, currently supported: {import_formats}") + _error( + f"Unsupported import format: {format}, currently supported: {import_formats}" + ) except AttributeError as e: _error(f"Internal problem with import occured: {str(e)}") except NotFound as e: @@ -880,13 +948,17 @@ def list_files( """ client: Client = _load_client(dataset_identifier=dataset_slug) try: - dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_slug) + dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_slug + ) filters: Dict[str, UnknownType] = {} if statuses: for status in statuses.split(","): if not _has_valid_status(status): - _error(f"Invalid status '{status}', available statuses: annotate, archived, complete, new, review") + _error( + f"Invalid status '{status}', available statuses: annotate, archived, complete, new, review" + ) filters["statuses"] = statuses else: filters["statuses"] = "new,annotate,review,complete" @@ -909,7 +981,11 @@ def list_files( table.add_row(file.filename) else: image_url = dataset.workview_url_for_item(file) - table.add_row(file.filename, f"{file.status if not file.archived else 'archived'}", image_url) + table.add_row( + file.filename, + f"{file.status if not file.archived else 'archived'}", + image_url, + ) Console().print(table) except NotFound as e: @@ -934,12 +1010,18 @@ def set_file_status(dataset_slug: str, status: str, files: List[str]) -> None: """ available_statuses = ["archived", "clear", "new", "restore-archived", "complete"] if status not in available_statuses: - _error(f"Invalid status '{status}', available statuses: {', '.join(available_statuses)}") + _error( + f"Invalid status '{status}', available statuses: {', '.join(available_statuses)}" + ) client: Client = _load_client(dataset_identifier=dataset_slug) try: - dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_slug) - items: Iterator[DatasetItem] = dataset.fetch_remote_files({"filenames": ",".join(files)}) + dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_slug + ) + items: Iterator[DatasetItem] = dataset.fetch_remote_files( + {"filenames": ",".join(files)} + ) if status == "archived": dataset.archive(items) elif status == "clear": @@ -956,7 +1038,9 @@ def set_file_status(dataset_slug: str, status: str, files: List[str]) -> None: _error(str(e)) -def delete_files(dataset_slug: str, files: List[str], skip_user_confirmation: bool = False) -> None: +def delete_files( + dataset_slug: str, files: List[str], skip_user_confirmation: bool = False +) -> None: """ Deletes the files from the given dataset. Exits the application if no dataset with the given slug is found or a general error occurs. @@ -973,7 +1057,9 @@ def delete_files(dataset_slug: str, files: List[str], skip_user_confirmation: bo client: Client = _load_client(dataset_identifier=dataset_slug) try: console = Console(theme=_console_theme(), stderr=True) - dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=dataset_slug) + dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=dataset_slug + ) items, items_2 = tee(dataset.fetch_remote_files({"filenames": files})) if not skip_user_confirmation and not secure_continue_request(): console.print("Cancelled.") @@ -1035,7 +1121,10 @@ def validate_schemas( for file in to_validate: try: - errors = [{"message": e.message, "location": e.json_path} for e in validate_file_against_schema(file)] + errors = [ + {"message": e.message, "location": e.json_path} + for e in validate_file_against_schema(file) + ] except MissingSchema as e: errors = [{"message": e.message, "location": "schema link"}] @@ -1062,7 +1151,9 @@ def validate_schemas( console.print("Did you supply an invalid filename?") -def dataset_convert(dataset_identifier: str, format: str, output_dir: Optional[PathLike] = None) -> None: +def dataset_convert( + dataset_identifier: str, format: str, output_dir: Optional[PathLike] = None +) -> None: """ Converts the annotations from the given dataset to the given format. Exits the application if no dataset with the given slug exists or no releases for the dataset @@ -1083,7 +1174,9 @@ def dataset_convert(dataset_identifier: str, format: str, output_dir: Optional[P try: parser: ExportParser = get_exporter(format) - dataset: RemoteDataset = client.get_remote_dataset(dataset_identifier=identifier) + dataset: RemoteDataset = client.get_remote_dataset( + dataset_identifier=identifier + ) if not dataset.local_path.exists(): _error( f"No annotations downloaded for dataset f{dataset}, first pull a release using " @@ -1100,9 +1193,13 @@ def dataset_convert(dataset_identifier: str, format: str, output_dir: Optional[P export_annotations(parser, [annotations_path], output_dir) except ExporterNotFoundError: - _error(f"Unsupported export format: {format}, currently supported: {export_formats}") + _error( + f"Unsupported export format: {format}, currently supported: {export_formats}" + ) except AttributeError: - _error(f"Unsupported export format: {format}, currently supported: {export_formats}") + _error( + f"Unsupported export format: {format}, currently supported: {export_formats}" + ) except NotFound as e: _error(f"No dataset with name '{e.name}'") @@ -1127,11 +1224,22 @@ def convert(format: str, files: List[PathLike], output_dir: Path) -> None: except AttributeError: _error(f"Unsupported export format, currently supported: {export_formats}") - export_annotations(parser, files, output_dir, split_sequences=(format not in ["darwin_1.0", "nifti"])) + export_annotations( + parser, + files, + output_dir, + split_sequences=(format not in ["darwin_1.0", "nifti"]), + ) def post_comment( - dataset_slug: str, filename: str, text: str, x: float = 1, y: float = 1, w: float = 1, h: float = 1 + dataset_slug: str, + filename: str, + text: str, + x: float = 1, + y: float = 1, + w: float = 1, + h: float = 1, ) -> None: """ Creates a comment box with a comment for the given file in the given dataset. @@ -1166,7 +1274,9 @@ def post_comment( except NotFound: _error(f"unable to find dataset: {dataset_slug}") - items: List[DatasetItem] = list(dataset.fetch_remote_files(filters={"filenames": [filename]})) + items: List[DatasetItem] = list( + dataset.fetch_remote_files(filters={"filenames": [filename]}) + ) if len(items) == 0: console.print(f"[bold yellow]No files matching '{filename}' found...") @@ -1197,10 +1307,15 @@ def help(parser: argparse.ArgumentParser, subparser: Optional[str] = None) -> No parser = next( action.choices[subparser] for action in parser._actions - if isinstance(action, argparse._SubParsersAction) and subparser in action.choices + if isinstance(action, argparse._SubParsersAction) + and subparser in action.choices ) - actions = [action for action in parser._actions if isinstance(action, argparse._SubParsersAction)] + actions = [ + action + for action in parser._actions + if isinstance(action, argparse._SubParsersAction) + ] print(parser.description) print("\nCommands:") @@ -1289,7 +1404,9 @@ def _load_client( def _console_theme() -> Theme: - return Theme({"success": "bold green", "warning": "bold yellow", "error": "bold red"}) + return Theme( + {"success": "bold green", "warning": "bold yellow", "error": "bold red"} + ) def _has_valid_status(status: str) -> bool: diff --git a/darwin/client.py b/darwin/client.py index 296c09b2e..b87bf371d 100644 --- a/darwin/client.py +++ b/darwin/client.py @@ -40,7 +40,12 @@ class Client: - def __init__(self, config: Config, default_team: Optional[str] = None, log: Optional[Logger] = None): + def __init__( + self, + config: Config, + default_team: Optional[str] = None, + log: Optional[Logger] = None, + ): self.config: Config = config self.url: str = config.get("global/api_endpoint") self.base_url: str = config.get("global/base_url") @@ -110,7 +115,9 @@ def list_local_datasets(self, team_slug: Optional[str] = None) -> Iterator[Path] if project_path.is_dir() and is_project_dir(project_path): yield Path(project_path) - def list_remote_datasets(self, team_slug: Optional[str] = None) -> Iterator[RemoteDataset]: + def list_remote_datasets( + self, team_slug: Optional[str] = None + ) -> Iterator[RemoteDataset]: """ Returns a list of all available datasets with the team currently authenticated against. @@ -150,7 +157,9 @@ def list_remote_datasets(self, team_slug: Optional[str] = None) -> Iterator[Remo client=self, ) - def get_remote_dataset(self, dataset_identifier: Union[str, DatasetIdentifier]) -> RemoteDataset: + def get_remote_dataset( + self, dataset_identifier: Union[str, DatasetIdentifier] + ) -> RemoteDataset: """ Get a remote dataset based on its identifier. @@ -169,7 +178,9 @@ def get_remote_dataset(self, dataset_identifier: Union[str, DatasetIdentifier]) NotFound If no dataset with the given identifier was found. """ - parsed_dataset_identifier: DatasetIdentifier = DatasetIdentifier.parse(dataset_identifier) + parsed_dataset_identifier: DatasetIdentifier = DatasetIdentifier.parse( + dataset_identifier + ) if not parsed_dataset_identifier.team_slug: parsed_dataset_identifier.team_slug = self.default_team @@ -177,21 +188,29 @@ def get_remote_dataset(self, dataset_identifier: Union[str, DatasetIdentifier]) try: matching_datasets: List[RemoteDataset] = [ dataset - for dataset in self.list_remote_datasets(team_slug=parsed_dataset_identifier.team_slug) + for dataset in self.list_remote_datasets( + team_slug=parsed_dataset_identifier.team_slug + ) if dataset.slug == parsed_dataset_identifier.dataset_slug ] except Unauthorized: # There is a chance that we tried to access an open dataset dataset: Dict[str, UnknownType] = cast( Dict[str, UnknownType], - self._get(f"{parsed_dataset_identifier.team_slug}/{parsed_dataset_identifier.dataset_slug}"), + self._get( + f"{parsed_dataset_identifier.team_slug}/{parsed_dataset_identifier.dataset_slug}" + ), ) # If there isn't a record of this team, create one. - if not self.config.get_team(parsed_dataset_identifier.team_slug, raise_on_invalid_team=False): + if not self.config.get_team( + parsed_dataset_identifier.team_slug, raise_on_invalid_team=False + ): datasets_dir: Path = Path.home() / ".darwin" / "datasets" self.config.set_team( - team=parsed_dataset_identifier.team_slug, api_key="", datasets_dir=str(datasets_dir) + team=parsed_dataset_identifier.team_slug, + api_key="", + datasets_dir=str(datasets_dir), ) if dataset.get("version", 1) == 2: @@ -220,7 +239,9 @@ def get_remote_dataset(self, dataset_identifier: Union[str, DatasetIdentifier]) matching_datasets[0].release = parsed_dataset_identifier.version return matching_datasets[0] - def create_dataset(self, name: str, team_slug: Optional[str] = None) -> RemoteDataset: + def create_dataset( + self, name: str, team_slug: Optional[str] = None + ) -> RemoteDataset: """ Create a remote dataset. @@ -237,7 +258,8 @@ def create_dataset(self, name: str, team_slug: Optional[str] = None) -> RemoteDa The created dataset. """ dataset: Dict[str, UnknownType] = cast( - Dict[str, UnknownType], self._post("/datasets", {"name": name}, team_slug=team_slug) + Dict[str, UnknownType], + self._post("/datasets", {"name": name}, team_slug=team_slug), ) if dataset.get("version", 1) == 2: @@ -275,7 +297,11 @@ def archive_remote_dataset(self, dataset_id: int, team_slug: str) -> None: self._put(f"datasets/{dataset_id}/archive", payload={}, team_slug=team_slug) def fetch_remote_files( - self, dataset_id: int, cursor: Dict[str, UnknownType], payload: Dict[str, UnknownType], team_slug: str + self, + dataset_id: int, + cursor: Dict[str, UnknownType], + payload: Dict[str, UnknownType], + team_slug: str, ) -> Dict[str, UnknownType]: """ Download the remote files from the given dataset. @@ -298,11 +324,17 @@ def fetch_remote_files( """ response: Dict[str, UnknownType] = cast( Dict[str, UnknownType], - self._post(f"/datasets/{dataset_id}/items?{parse.urlencode(cursor)}", payload, team_slug), + self._post( + f"/datasets/{dataset_id}/items?{parse.urlencode(cursor)}", + payload, + team_slug, + ), ) return response - def fetch_remote_classes(self, team_slug: Optional[str] = None) -> List[Dict[str, UnknownType]]: + def fetch_remote_classes( + self, team_slug: Optional[str] = None + ) -> List[Dict[str, UnknownType]]: """ Fetches all remote classes on the remote dataset. @@ -328,12 +360,15 @@ def fetch_remote_classes(self, team_slug: Optional[str] = None) -> List[Dict[str the_team_slug: str = the_team.slug response: Dict[str, UnknownType] = cast( - Dict[str, UnknownType], self._get(f"/teams/{the_team_slug}/annotation_classes?include_tags=true") + Dict[str, UnknownType], + self._get(f"/teams/{the_team_slug}/annotation_classes?include_tags=true"), ) return response["annotation_classes"] - def update_annotation_class(self, class_id: int, payload: Dict[str, UnknownType]) -> Dict[str, UnknownType]: + def update_annotation_class( + self, class_id: int, payload: Dict[str, UnknownType] + ) -> Dict[str, UnknownType]: """ Updates the AnnotationClass with the given id. @@ -350,11 +385,14 @@ def update_annotation_class(self, class_id: int, payload: Dict[str, UnknownType] A dictionary with the result of the operation. """ response: Dict[str, UnknownType] = cast( - Dict[str, UnknownType], self._put(f"/annotation_classes/{class_id}", payload) + Dict[str, UnknownType], + self._put(f"/annotation_classes/{class_id}", payload), ) return response - def create_annotation_class(self, dataset_id: int, type_ids: List[int], name: str) -> Dict[str, UnknownType]: + def create_annotation_class( + self, dataset_id: int, type_ids: List[int], name: str + ) -> Dict[str, UnknownType]: """ Creates an AnnotationClass. @@ -387,7 +425,9 @@ def create_annotation_class(self, dataset_id: int, type_ids: List[int], name: st ) return response - def import_annotation(self, item_id: ItemId, payload: Dict[str, UnknownType]) -> None: + def import_annotation( + self, item_id: ItemId, payload: Dict[str, UnknownType] + ) -> None: """ Imports the annotation for the item with the given id. @@ -418,7 +458,8 @@ def fetch_remote_attributes(self, dataset_id: int) -> List[Dict[str, UnknownType A List with the attributes, where each attribute is a dictionary. """ response: List[Dict[str, UnknownType]] = cast( - List[Dict[str, UnknownType]], self._get(f"/datasets/{dataset_id}/attributes") + List[Dict[str, UnknownType]], + self._get(f"/datasets/{dataset_id}/attributes"), ) return response @@ -460,11 +501,15 @@ def get_team_features(self, team_slug: str) -> List[Feature]: features: List[Feature] = [] for feature in response: - features.append(Feature(name=str(feature["name"]), enabled=bool(feature["enabled"]))) + features.append( + Feature(name=str(feature["name"]), enabled=bool(feature["enabled"])) + ) return features - def feature_enabled(self, feature_name: str, team_slug: Optional[str] = None) -> bool: + def feature_enabled( + self, feature_name: str, team_slug: Optional[str] = None + ) -> bool: """ Returns whether or not a given feature is enabled for a team. @@ -524,7 +569,9 @@ def get_datasets_dir(self, team_slug: Optional[str] = None) -> str: return the_team.datasets_dir - def set_datasets_dir(self, datasets_dir: Path, team_slug: Optional[str] = None) -> None: + def set_datasets_dir( + self, datasets_dir: Path, team_slug: Optional[str] = None + ) -> None: """ Sets the dataset directory of the specified team or the default one. @@ -535,9 +582,13 @@ def set_datasets_dir(self, datasets_dir: Path, team_slug: Optional[str] = None) team_slug: Optional[str] Team slug of the team the dataset will belong to. Defaults to None. """ - self.config.put(f"teams/{team_slug or self.default_team}/datasets_dir", datasets_dir) + self.config.put( + f"teams/{team_slug or self.default_team}/datasets_dir", datasets_dir + ) - def confirm_upload(self, dataset_item_id: int, team_slug: Optional[str] = None) -> None: + def confirm_upload( + self, dataset_item_id: int, team_slug: Optional[str] = None + ) -> None: """ Confirms that the item was uploaded. @@ -555,9 +606,15 @@ def confirm_upload(self, dataset_item_id: int, team_slug: Optional[str] = None) the_team_slug: str = the_team.slug - self._put_raw(endpoint=f"/dataset_items/{dataset_item_id}/confirm_upload", payload={}, team_slug=the_team_slug) + self._put_raw( + endpoint=f"/dataset_items/{dataset_item_id}/confirm_upload", + payload={}, + team_slug=the_team_slug, + ) - def sign_upload(self, dataset_item_id: int, team_slug: Optional[str] = None) -> Dict[str, UnknownType]: + def sign_upload( + self, dataset_item_id: int, team_slug: Optional[str] = None + ) -> Dict[str, UnknownType]: """ Signs the upload of the given DatasetItem. @@ -586,12 +643,18 @@ def sign_upload(self, dataset_item_id: int, team_slug: Optional[str] = None) -> the_team_slug: str = the_team.slug response: Dict[str, UnknownType] = cast( - Dict[str, UnknownType], self._get(f"/dataset_items/{dataset_item_id}/sign_upload", team_slug=the_team_slug) + Dict[str, UnknownType], + self._get( + f"/dataset_items/{dataset_item_id}/sign_upload", team_slug=the_team_slug + ), ) return response def upload_data( - self, dataset_slug: str, payload: Dict[str, UnknownType], team_slug: Optional[str] = None + self, + dataset_slug: str, + payload: Dict[str, UnknownType], + team_slug: Optional[str] = None, ) -> Dict[str, UnknownType]: """ Uploads the given data to the given dataset. @@ -642,10 +705,14 @@ def annotation_types(self) -> List[Dict[str, UnknownType]]: List[Dict[str, UnknownType]] A list with the annotation types as dictionaries. """ - response: List[Dict[str, UnknownType]] = cast(List[Dict[str, UnknownType]], self._get("/annotation_types")) + response: List[Dict[str, UnknownType]] = cast( + List[Dict[str, UnknownType]], self._get("/annotation_types") + ) return response - def get_exports(self, dataset_id: int, team_slug: Optional[str] = None) -> List[Dict[str, UnknownType]]: + def get_exports( + self, dataset_id: int, team_slug: Optional[str] = None + ) -> List[Dict[str, UnknownType]]: """ Get all the exports from the given dataset. @@ -674,11 +741,14 @@ def get_exports(self, dataset_id: int, team_slug: Optional[str] = None) -> List[ the_team_slug: str = the_team.slug response: List[Dict[str, UnknownType]] = cast( - List[Dict[str, UnknownType]], self._get(f"/datasets/{dataset_id}/exports", team_slug=the_team_slug) + List[Dict[str, UnknownType]], + self._get(f"/datasets/{dataset_id}/exports", team_slug=the_team_slug), ) return response - def create_export(self, dataset_id: int, payload: Dict[str, UnknownType], team_slug: str) -> None: + def create_export( + self, dataset_id: int, payload: Dict[str, UnknownType], team_slug: str + ) -> None: """ Create an export for the given dataset. @@ -691,9 +761,13 @@ def create_export(self, dataset_id: int, payload: Dict[str, UnknownType], team_s team_slug: Optional[str] Team slug of the team the dataset will belong to. Defaults to None. """ - self._post(f"/datasets/{dataset_id}/exports", payload=payload, team_slug=team_slug) + self._post( + f"/datasets/{dataset_id}/exports", payload=payload, team_slug=team_slug + ) - def get_report(self, dataset_id: int, granularity: str, team_slug: Optional[str] = None) -> Response: + def get_report( + self, dataset_id: int, granularity: str, team_slug: Optional[str] = None + ) -> Response: """ Gets the report for the given dataset. @@ -728,7 +802,9 @@ def get_report(self, dataset_id: int, granularity: str, team_slug: Optional[str] the_team_slug, ) - def delete_item(self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType]) -> None: + def delete_item( + self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType] + ) -> None: """ Gets the report for the given dataset. @@ -741,9 +817,13 @@ def delete_item(self, dataset_slug: str, team_slug: str, payload: Dict[str, Unkn payload: Dict[str, UnknownType] A filter Dictionary that defines the items to be deleted. """ - self._delete(f"teams/{team_slug}/datasets/{dataset_slug}/items", payload, team_slug) + self._delete( + f"teams/{team_slug}/datasets/{dataset_slug}/items", payload, team_slug + ) - def archive_item(self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType]) -> None: + def archive_item( + self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType] + ) -> None: """ Archives the item from the given dataset. @@ -756,9 +836,15 @@ def archive_item(self, dataset_slug: str, team_slug: str, payload: Dict[str, Unk payload: Dict[str, UnknownType] A filter Dictionary that defines the items to be archived. """ - self._put_raw(f"teams/{team_slug}/datasets/{dataset_slug}/items/archive", payload, team_slug) + self._put_raw( + f"teams/{team_slug}/datasets/{dataset_slug}/items/archive", + payload, + team_slug, + ) - def restore_archived_item(self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType]) -> None: + def restore_archived_item( + self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType] + ) -> None: """ Restores the archived item from the given dataset. @@ -771,9 +857,15 @@ def restore_archived_item(self, dataset_slug: str, team_slug: str, payload: Dict payload: Dict[str, UnknownType] A filter Dictionary that defines the items to be restored. """ - self._put_raw(f"teams/{team_slug}/datasets/{dataset_slug}/items/restore", payload, team_slug) + self._put_raw( + f"teams/{team_slug}/datasets/{dataset_slug}/items/restore", + payload, + team_slug, + ) - def move_item_to_new(self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType]) -> None: + def move_item_to_new( + self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType] + ) -> None: """ Moves the given item's status to new. @@ -786,9 +878,15 @@ def move_item_to_new(self, dataset_slug: str, team_slug: str, payload: Dict[str, payload: Dict[str, UnknownType] A filter Dictionary that defines the items to have the 'new' status. """ - self._put_raw(f"teams/{team_slug}/datasets/{dataset_slug}/items/move_to_new", payload, team_slug) + self._put_raw( + f"teams/{team_slug}/datasets/{dataset_slug}/items/move_to_new", + payload, + team_slug, + ) - def reset_item(self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType]) -> None: + def reset_item( + self, dataset_slug: str, team_slug: str, payload: Dict[str, UnknownType] + ) -> None: """ Resets the given item. @@ -801,9 +899,17 @@ def reset_item(self, dataset_slug: str, team_slug: str, payload: Dict[str, Unkno payload: Dict[str, UnknownType] A filter Dictionary that defines the items to be reset. """ - self._put_raw(f"teams/{team_slug}/datasets/{dataset_slug}/items/reset", payload, team_slug) + self._put_raw( + f"teams/{team_slug}/datasets/{dataset_slug}/items/reset", payload, team_slug + ) - def move_to_stage(self, dataset_slug: str, team_slug: str, filters: Dict[str, UnknownType], stage_id: int) -> None: + def move_to_stage( + self, + dataset_slug: str, + team_slug: str, + filters: Dict[str, UnknownType], + stage_id: int, + ) -> None: """ Moves the given items to the specified stage @@ -822,10 +928,18 @@ def move_to_stage(self, dataset_slug: str, team_slug: str, filters: Dict[str, Un "filter": filters, "workflow_stage_template_id": stage_id, } - self._put_raw(f"teams/{team_slug}/datasets/{dataset_slug}/set_stage", payload, team_slug) + self._put_raw( + f"teams/{team_slug}/datasets/{dataset_slug}/set_stage", payload, team_slug + ) def post_workflow_comment( - self, workflow_id: int, text: str, x: float = 1, y: float = 1, w: float = 1, h: float = 1 + self, + workflow_id: int, + text: str, + x: float = 1, + y: float = 1, + w: float = 1, + h: float = 1, ) -> int: """ Creates a comment box with the given text for the given workflow. @@ -854,17 +968,24 @@ def post_workflow_comment( Dict[str, UnknownType], self._post( f"workflows/{workflow_id}/workflow_comment_threads", - {"bounding_box": {"x": x, "y": y, "w": w, "h": h}, "workflow_comments": [{"body": text}]}, + { + "bounding_box": {"x": x, "y": y, "w": w, "h": h}, + "workflow_comments": [{"body": text}], + }, ), ) comment_id: Optional[int] = response.get("id") if comment_id is None: - raise ValueError(f"Unable to retrieve comment id for workflow: {workflow_id}.") + raise ValueError( + f"Unable to retrieve comment id for workflow: {workflow_id}." + ) return comment_id - def instantiate_item(self, item_id: int, include_metadata: bool = False) -> Union[int, Tuple[int, DatasetItem]]: + def instantiate_item( + self, item_id: int, include_metadata: bool = False + ) -> Union[int, Tuple[int, DatasetItem]]: """ Instantiates the given item with a workflow. @@ -886,7 +1007,9 @@ def instantiate_item(self, item_id: int, include_metadata: bool = False) -> Unio ValueError If due to an error, no workflow was instantiated for this item an therefore no workflow id can be returned. """ - response: Dict[str, UnknownType] = cast(Dict[str, UnknownType], self._post(f"dataset_items/{item_id}/workflow")) + response: Dict[str, UnknownType] = cast( + Dict[str, UnknownType], self._post(f"dataset_items/{item_id}/workflow") + ) id: Optional[int] = response.get("current_workflow_id") if id is None: @@ -933,7 +1056,9 @@ def local(cls, team_slug: Optional[str] = None) -> "Client": return Client.from_config(config_path, team_slug=team_slug) @classmethod - def from_config(cls, config_path: Path, team_slug: Optional[str] = None) -> "Client": + def from_config( + cls, config_path: Path, team_slug: Optional[str] = None + ) -> "Client": """ Factory method to create a client from the configuration file passed as parameter @@ -975,12 +1100,16 @@ def from_guest(cls, datasets_dir: Optional[Path] = None) -> "Client": datasets_dir = Path.home() / ".darwin" / "datasets" config: Config = Config(path=None) - config.set_global(api_endpoint=Client.default_api_url(), base_url=Client.default_base_url()) + config.set_global( + api_endpoint=Client.default_api_url(), base_url=Client.default_base_url() + ) return cls(config=config) @classmethod - def from_api_key(cls, api_key: str, datasets_dir: Optional[Path] = None) -> "Client": + def from_api_key( + cls, api_key: str, datasets_dir: Optional[Path] = None + ) -> "Client": """ Factory method to create a client given an API key. @@ -999,9 +1128,14 @@ def from_api_key(cls, api_key: str, datasets_dir: Optional[Path] = None) -> "Cli if not datasets_dir: datasets_dir = Path.home() / ".darwin" / "datasets" - headers: Dict[str, str] = {"Content-Type": "application/json", "Authorization": f"ApiKey {api_key}"} + headers: Dict[str, str] = { + "Content-Type": "application/json", + "Authorization": f"ApiKey {api_key}", + } api_url: str = Client.default_api_url() - response: requests.Response = requests.get(urljoin(api_url, "/users/token_info"), headers=headers) + response: requests.Response = requests.get( + urljoin(api_url, "/users/token_info"), headers=headers + ) if not response.ok: raise InvalidLogin() @@ -1039,11 +1173,15 @@ def default_base_url() -> str: """ return os.getenv("DARWIN_BASE_URL", "https://darwin.v7labs.com") - def _get_headers(self, team_slug: Optional[str] = None, compressed: bool = False) -> Dict[str, str]: + def _get_headers( + self, team_slug: Optional[str] = None, compressed: bool = False + ) -> Dict[str, str]: headers: Dict[str, str] = {"Content-Type": "application/json"} api_key: Optional[str] = None - team_config: Optional[Team] = self.config.get_team(team_slug or self.default_team, raise_on_invalid_team=False) + team_config: Optional[Team] = self.config.get_team( + team_slug or self.default_team, raise_on_invalid_team=False + ) if team_config: api_key = team_config.api_key @@ -1060,9 +1198,15 @@ def _get_headers(self, team_slug: Optional[str] = None, compressed: bool = False return headers def _get_raw_from_full_url( - self, url: str, team_slug: Optional[str] = None, retry: bool = False, stream: bool = False + self, + url: str, + team_slug: Optional[str] = None, + retry: bool = False, + stream: bool = False, ) -> Response: - response: Response = self.session.get(url, headers=self._get_headers(team_slug), stream=stream) + response: Response = self.session.get( + url, headers=self._get_headers(team_slug), stream=stream + ) self.log.debug( f"Client GET request response ({get_response_content(response)}) with status " @@ -1075,16 +1219,24 @@ def _get_raw_from_full_url( if not response.ok and retry: time.sleep(10) - return self._get_raw_from_full_url(url=url, team_slug=team_slug, retry=False, stream=stream) + return self._get_raw_from_full_url( + url=url, team_slug=team_slug, retry=False, stream=stream + ) response.raise_for_status() return response def _get_raw( - self, endpoint: str, team_slug: Optional[str] = None, retry: bool = False, stream: bool = False + self, + endpoint: str, + team_slug: Optional[str] = None, + retry: bool = False, + stream: bool = False, ) -> Response: - return self._get_raw_from_full_url(urljoin(self.url, endpoint), team_slug, retry=retry, stream=stream) + return self._get_raw_from_full_url( + urljoin(self.url, endpoint), team_slug, retry=retry, stream=stream + ) def _get( self, endpoint: str, team_slug: Optional[str] = None, retry: bool = False @@ -1093,10 +1245,16 @@ def _get( return self._decode_response(response) def _put_raw( - self, endpoint: str, payload: Dict[str, UnknownType], team_slug: Optional[str] = None, retry: bool = False + self, + endpoint: str, + payload: Dict[str, UnknownType], + team_slug: Optional[str] = None, + retry: bool = False, ) -> Response: response: requests.Response = self.session.put( - urljoin(self.url, endpoint), json=payload, headers=self._get_headers(team_slug) + urljoin(self.url, endpoint), + json=payload, + headers=self._get_headers(team_slug), ) self.log.debug( @@ -1117,7 +1275,11 @@ def _put_raw( return response def _put( - self, endpoint: str, payload: Dict[str, UnknownType], team_slug: Optional[str] = None, retry: bool = False + self, + endpoint: str, + payload: Dict[str, UnknownType], + team_slug: Optional[str] = None, + retry: bool = False, ) -> Union[Dict[str, UnknownType], List[Dict[str, UnknownType]]]: response: Response = self._put_raw(endpoint, payload, team_slug, retry) return self._decode_response(response) @@ -1132,10 +1294,14 @@ def _post_raw( if payload is None: payload = {} - compression_level = int(self.config.get("global/payload_compression_level", "0")) + compression_level = int( + self.config.get("global/payload_compression_level", "0") + ) if compression_level > 0: - compressed_payload = zlib.compress(json.dumps(payload).encode("utf-8"), level=compression_level) + compressed_payload = zlib.compress( + json.dumps(payload).encode("utf-8"), level=compression_level + ) response: Response = requests.post( urljoin(self.url, endpoint), @@ -1144,7 +1310,9 @@ def _post_raw( ) else: response: Response = requests.post( - urljoin(self.url, endpoint), json=payload, headers=self._get_headers(team_slug) + urljoin(self.url, endpoint), + json=payload, + headers=self._get_headers(team_slug), ) self.log.debug( @@ -1185,7 +1353,9 @@ def _delete( payload = {} response: requests.Response = self.session.delete( - urljoin(self.url, endpoint), json=payload, headers=self._get_headers(team_slug) + urljoin(self.url, endpoint), + json=payload, + headers=self._get_headers(team_slug), ) self.log.debug( @@ -1268,7 +1438,11 @@ def _decode_response( except ValueError: self.log.error(f"[ERROR {response.status_code}] {response.text}") response.close() - return {"error": "Response is not JSON encoded", "status_code": response.status_code, "text": response.text} + return { + "error": "Response is not JSON encoded", + "status_code": response.status_code, + "text": response.text, + } def _handle_latest_darwin_py(self, server_latest_version: str) -> None: try: diff --git a/darwin/config.py b/darwin/config.py index e867810c2..f4cb38981 100644 --- a/darwin/config.py +++ b/darwin/config.py @@ -149,7 +149,9 @@ def set_compression_level(self, level: int) -> None: raise InvalidCompressionLevel(level) self.put("global/payload_compression_level", level) - def set_global(self, api_endpoint: str, base_url: str, default_team: Optional[str] = None) -> None: + def set_global( + self, api_endpoint: str, base_url: str, default_team: Optional[str] = None + ) -> None: """ Stores the url to access teams. If a default team is given, it also stores that team as the globaly default one. @@ -168,7 +170,9 @@ def set_global(self, api_endpoint: str, base_url: str, default_team: Optional[st if default_team: self.put("global/default_team", default_team) - def get_team(self, team: Optional[str] = None, raise_on_invalid_team: bool = True) -> Optional[Team]: + def get_team( + self, team: Optional[str] = None, raise_on_invalid_team: bool = True + ) -> Optional[Team]: """ Returns the Team object from the team with the given slug if an authentication with an API key was performed earlier. @@ -201,10 +205,15 @@ def get_team(self, team: Optional[str] = None, raise_on_invalid_team: bool = Tru raise InvalidTeam() else: return None - default: bool = self.get("global/default_team") == team or len(list(self.get("teams").keys())) == 1 + default: bool = ( + self.get("global/default_team") == team + or len(list(self.get("teams").keys())) == 1 + ) datasets_dir = self.get(f"teams/{team}/datasets_dir") - return Team(slug=team, api_key=api_key, default=default, datasets_dir=datasets_dir) + return Team( + slug=team, api_key=api_key, default=default, datasets_dir=datasets_dir + ) def get_default_team(self, raise_on_invalid_team: bool = True) -> Optional[Team]: """ diff --git a/darwin/dataset/download_manager.py b/darwin/dataset/download_manager.py index 1b9326e0c..b851a6c59 100644 --- a/darwin/dataset/download_manager.py +++ b/darwin/dataset/download_manager.py @@ -93,8 +93,12 @@ def download_all_images_from_annotations( raise ValueError(f"Annotation format {annotation_format} not supported") # Verify that there is not already image in the images folder - unfiltered_files = images_path.rglob(f"*") if use_folders else images_path.glob(f"*") - existing_images = {image for image in unfiltered_files if is_image_extension_allowed(image.suffix)} + unfiltered_files = ( + images_path.rglob(f"*") if use_folders else images_path.glob(f"*") + ) + existing_images = { + image for image in unfiltered_files if is_image_extension_allowed(image.suffix) + } annotations_to_download_path = [] for annotation_path in annotations_path.glob(f"*.{annotation_format}"): @@ -104,7 +108,11 @@ def download_all_images_from_annotations( if not force_replace: # Check the planned path for the image against the existing images - planned_image_path = images_path / Path(annotation.remote_path.lstrip('/\\')).resolve().absolute() / Path(annotation.filename) + planned_image_path = ( + images_path + / Path(annotation.remote_path.lstrip("/\\")).resolve().absolute() + / Path(annotation.filename) + ) if planned_image_path in existing_images: continue @@ -118,12 +126,16 @@ def download_all_images_from_annotations( if remove_extra: # Removes existing images for which there is not corresponding annotation - annotations_downloaded_stem = [a.stem for a in annotations_path.glob(f"*.{annotation_format}")] + annotations_downloaded_stem = [ + a.stem for a in annotations_path.glob(f"*.{annotation_format}") + ] for existing_image in existing_images: if existing_image.stem not in annotations_downloaded_stem: - print(f"Removing {existing_image} as there is no corresponding annotation") + print( + f"Removing {existing_image} as there is no corresponding annotation" + ) existing_image.unlink() - + # Create the generator with the partial functions download_functions: List = [] for annotation_path in annotations_to_download_path: @@ -191,7 +203,13 @@ def download_image_from_annotation( if annotation_format == "json": downloadables = _download_image_from_json_annotation( - api_key, annotation_path, images_path, use_folders, video_frames, force_slots, ignore_slots + api_key, + annotation_path, + images_path, + use_folders, + video_frames, + force_slots, + ignore_slots, ) for downloadable in downloadables: downloadable() @@ -239,7 +257,13 @@ def lazy_download_image_from_annotation( if annotation_format == "json": return _download_image_from_json_annotation( - api_key, annotation_path, images_path, use_folders, video_frames, force_slots, ignore_slots + api_key, + annotation_path, + images_path, + use_folders, + video_frames, + force_slots, + ignore_slots, ) else: console = Console() @@ -269,13 +293,25 @@ def _download_image_from_json_annotation( if len(annotation.slots) > 0: if ignore_slots: return _download_single_slot_from_json_annotation( - annotation, api_key, parent_path, annotation_path, video_frames, use_folders + annotation, + api_key, + parent_path, + annotation_path, + video_frames, + use_folders, ) if force_slots: - return _download_all_slots_from_json_annotation(annotation, api_key, parent_path, video_frames) + return _download_all_slots_from_json_annotation( + annotation, api_key, parent_path, video_frames + ) else: return _download_single_slot_from_json_annotation( - annotation, api_key, parent_path, annotation_path, video_frames, use_folders + annotation, + api_key, + parent_path, + annotation_path, + video_frames, + use_folders, ) return [] @@ -288,7 +324,11 @@ def _download_all_slots_from_json_annotation( for slot in annotation.slots: if not slot.name: raise ValueError("Slot name is required to download all slots") - slot_path = parent_path / sanitize_filename(annotation.filename) / sanitize_filename(slot.name) + slot_path = ( + parent_path + / sanitize_filename(annotation.filename) + / sanitize_filename(slot.name) + ) slot_path.mkdir(exist_ok=True, parents=True) if video_frames and slot.type != "image": @@ -302,17 +342,33 @@ def _download_all_slots_from_json_annotation( segment_url = slot.segments[index]["url"] path = video_path / f".{index:07d}.ts" generator.append( - functools.partial(_download_and_extract_video_segment, segment_url, api_key, path, manifest) + functools.partial( + _download_and_extract_video_segment, + segment_url, + api_key, + path, + manifest, + ) ) else: for i, frame_url in enumerate(slot.frame_urls or []): path = video_path / f"{i:07d}.png" - generator.append(functools.partial(_download_image, frame_url, path, api_key, slot)) + generator.append( + functools.partial( + _download_image, frame_url, path, api_key, slot + ) + ) else: for upload in slot.source_files: file_path = slot_path / sanitize_filename(upload["file_name"]) generator.append( - functools.partial(_download_image_with_trace, annotation, upload["url"], file_path, api_key) + functools.partial( + _download_image_with_trace, + annotation, + upload["url"], + file_path, + api_key, + ) ) return generator @@ -341,12 +397,20 @@ def _download_single_slot_from_json_annotation( segment_url = slot.segments[index]["url"] path = video_path / f".{index:07d}.ts" generator.append( - functools.partial(_download_and_extract_video_segment, segment_url, api_key, path, manifest) + functools.partial( + _download_and_extract_video_segment, + segment_url, + api_key, + path, + manifest, + ) ) else: for i, frame_url in enumerate(slot.frame_urls): path = video_path / f"{i:07d}.png" - generator.append(functools.partial(_download_image, frame_url, path, api_key, slot)) + generator.append( + functools.partial(_download_image, frame_url, path, api_key, slot) + ) else: if len(slot.source_files) > 0: image = slot.source_files[0] @@ -359,9 +423,19 @@ def _download_single_slot_from_json_annotation( filename = str(Path(stem + suffix)) else: filename = slot.source_files[0]["file_name"] - image_path = parent_path / sanitize_filename(filename or annotation.filename) + image_path = parent_path / sanitize_filename( + filename or annotation.filename + ) - generator.append(functools.partial(_download_image_with_trace, annotation, image_url, image_path, api_key)) + generator.append( + functools.partial( + _download_image_with_trace, + annotation, + image_url, + image_path, + api_key, + ) + ) return generator @@ -389,7 +463,12 @@ def _update_local_path(annotation: AnnotationFile, url, local_path): details="Use the ``download_image_from_annotation`` instead.", ) def download_image_from_json_annotation( - api_key: str, api_url: str, annotation_path: Path, image_path: Path, use_folders: bool, video_frames: bool + api_key: str, + api_url: str, + annotation_path: Path, + image_path: Path, + use_folders: bool, + video_frames: bool, ) -> None: """ Downloads an image given a ``.json`` annotation path and renames the json after the image's @@ -456,7 +535,9 @@ def download_image(url: str, path: Path, api_key: str) -> None: if "token" in url: response: requests.Response = requests.get(url, stream=True) else: - response = requests.get(url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True) + response = requests.get( + url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True + ) # Correct status: download image if response.ok: with open(str(path), "wb") as file: @@ -472,7 +553,9 @@ def download_image(url: str, path: Path, api_key: str) -> None: time.sleep(1) -def _download_image(url: str, path: Path, api_key: str, slot: Optional[dt.Slot] = None) -> None: +def _download_image( + url: str, path: Path, api_key: str, slot: Optional[dt.Slot] = None +) -> None: if path.exists(): return TIMEOUT: int = 60 @@ -485,7 +568,9 @@ def _download_image(url: str, path: Path, api_key: str, slot: Optional[dt.Slot] if "token" in url: response: requests.Response = requests.get(url, stream=True) else: - response = requests.get(url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True) + response = requests.get( + url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True + ) # Correct status: download image if response.ok and has_json_content_type(response): # this branch is a workaround for edge case in V1 when video file from external storage could be registered @@ -511,7 +596,9 @@ def _download_image_with_trace(annotation, image_url, image_path, api_key): _update_local_path(annotation, image_url, image_path) -def _fetch_multiple_files(path: Path, response: requests.Response, transform_file_function=None) -> None: +def _fetch_multiple_files( + path: Path, response: requests.Response, transform_file_function=None +) -> None: obj = response.json() if "urls" not in obj: raise Exception(f"Malformed response: {obj}") @@ -533,7 +620,9 @@ def _fetch_multiple_files(path: Path, response: requests.Response, transform_fil ) -def _write_file(path: Path, response: requests.Response, transform_file_function=None) -> None: +def _write_file( + path: Path, response: requests.Response, transform_file_function=None +) -> None: with open(str(path), "wb") as file: for chunk in response: file.write(chunk) @@ -558,7 +647,9 @@ def _rg16_to_grayscale(path): new_image.save(path) -def _download_and_extract_video_segment(url: str, api_key: str, path: Path, manifest: dt.SegmentManifest) -> None: +def _download_and_extract_video_segment( + url: str, api_key: str, path: Path, manifest: dt.SegmentManifest +) -> None: _download_video_segment_file(url, api_key, path) _extract_frames_from_segment(path, manifest) path.unlink() @@ -575,14 +666,18 @@ def _extract_frames_from_segment(path: Path, manifest: dt.SegmentManifest) -> No cap = VideoCapture(str(path)) # Read and save frames. Iterates over every frame because frame seeking in OCV is not reliable or guaranteed. - frames_to_extract = dict([(item.frame, item.visible_frame) for item in manifest.items if item.visibility]) + frames_to_extract = dict( + [(item.frame, item.visible_frame) for item in manifest.items if item.visibility] + ) frame_index = 0 while cap.isOpened(): success, frame = cap.read() if frame is None: break if not success: - raise ValueError(f"Failed to read frame {frame_index} from video segment {path}") + raise ValueError( + f"Failed to read frame {frame_index} from video segment {path}" + ) if frame_index in frames_to_extract: visible_frame = frames_to_extract.pop(frame_index) frame_path = path.parent / f"{visible_frame:07d}.png" @@ -595,7 +690,9 @@ def _extract_frames_from_segment(path: Path, manifest: dt.SegmentManifest) -> No def _download_video_segment_file(url: str, api_key: str, path: Path) -> None: with requests.Session() as session: - retries = Retry(total=5, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504]) + retries = Retry( + total=5, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504] + ) session.mount("https://", HTTPAdapter(max_retries=retries)) if "token" in url: response = session.get(url) @@ -615,7 +712,9 @@ def _download_video_segment_file(url: str, api_key: str, path: Path) -> None: def download_manifest_txts(urls: List[str], api_key: str, folder: Path) -> List[Path]: paths = [] with requests.Session() as session: - retries = Retry(total=5, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504]) + retries = Retry( + total=5, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504] + ) session.mount("https://", HTTPAdapter(max_retries=retries)) for index, url in enumerate(urls): if "token" in url: @@ -636,7 +735,9 @@ def download_manifest_txts(urls: List[str], api_key: str, folder: Path) -> List[ return paths -def get_segment_manifests(slot: dt.Slot, parent_path: Path, api_key: str) -> List[dt.SegmentManifest]: +def get_segment_manifests( + slot: dt.Slot, parent_path: Path, api_key: str +) -> List[dt.SegmentManifest]: with TemporaryDirectory(dir=parent_path) as tmpdirname: tmpdir = Path(tmpdirname) if slot.frame_manifest is None: @@ -659,19 +760,33 @@ def _parse_manifests(paths: List[Path], slot: str) -> List[dt.SegmentManifest]: all_manifests[segment_int] = [] if bool(int(visibility)): all_manifests[segment_int].append( - dt.ManifestItem(int(frame), None, segment_int, True, float(timestamp), visible_frame_index) + dt.ManifestItem( + int(frame), + None, + segment_int, + True, + float(timestamp), + visible_frame_index, + ) ) visible_frame_index += 1 else: all_manifests[segment_int].append( - dt.ManifestItem(int(frame), None, segment_int, False, float(timestamp), None) + dt.ManifestItem( + int(frame), None, segment_int, False, float(timestamp), None + ) ) # Create a list of segments, sorted by segment number and all items sorted by frame number segments = [] for segment_int, seg_manifests in all_manifests.items(): seg_manifests.sort(key=lambda x: x.frame) segments.append( - dt.SegmentManifest(slot=slot, segment=segment_int, total_frames=len(seg_manifests), items=seg_manifests) + dt.SegmentManifest( + slot=slot, + segment=segment_int, + total_frames=len(seg_manifests), + items=seg_manifests, + ) ) # Calculate the absolute frame number for each item, as manifests are per segment diff --git a/darwin/dataset/identifier.py b/darwin/dataset/identifier.py index 7b7a1ddf1..ad23ff346 100644 --- a/darwin/dataset/identifier.py +++ b/darwin/dataset/identifier.py @@ -31,7 +31,12 @@ class DatasetIdentifier: The version of the identifier. """ - def __init__(self, dataset_slug: str, team_slug: Optional[str] = None, version: Optional[str] = None): + def __init__( + self, + dataset_slug: str, + team_slug: Optional[str] = None, + version: Optional[str] = None, + ): self.dataset_slug = dataset_slug self.team_slug = team_slug self.version = version @@ -96,4 +101,7 @@ def _parse(slug: str) -> Tuple[Optional[str], str, Optional[str]]: def _is_slug_valid(slug: str) -> bool: slug_format = "[\\_a-zA-Z0-9.-]+" version_format = "[\\_a-zA-Z0-9.:-]+" - return re.fullmatch(rf"({slug_format}/)?{slug_format}(:{version_format})?", slug) is not None + return ( + re.fullmatch(rf"({slug_format}/)?{slug_format}(:{version_format})?", slug) + is not None + ) diff --git a/darwin/dataset/release.py b/darwin/dataset/release.py index e74dfa17f..a8ce9e92d 100644 --- a/darwin/dataset/release.py +++ b/darwin/dataset/release.py @@ -90,7 +90,9 @@ def __init__( self.format = format @classmethod - def parse_json(cls, dataset_slug: str, team_slug: str, payload: Dict[str, Any]) -> "Release": + def parse_json( + cls, dataset_slug: str, team_slug: str, payload: Dict[str, Any] + ) -> "Release": """ Given a json, parses it into a ``Release`` object instance. @@ -138,10 +140,14 @@ def parse_json(cls, dataset_slug: str, team_slug: str, payload: Dict[str, Any]) A ``Release`` created from the given payload. """ try: - export_date: datetime.datetime = datetime.datetime.strptime(payload["inserted_at"], "%Y-%m-%dT%H:%M:%S%z") + export_date: datetime.datetime = datetime.datetime.strptime( + payload["inserted_at"], "%Y-%m-%dT%H:%M:%S%z" + ) except ValueError: # For python version older than 3.7 - export_date = datetime.datetime.strptime(payload["inserted_at"], "%Y-%m-%dT%H:%M:%SZ") + export_date = datetime.datetime.strptime( + payload["inserted_at"], "%Y-%m-%dT%H:%M:%SZ" + ) if payload["download_url"] is None: return cls( @@ -203,4 +209,6 @@ def download_zip(self, path: Path) -> Path: @property def identifier(self) -> DatasetIdentifier: """DatasetIdentifier : The ``DatasetIdentifier`` for this ``Release``.""" - return DatasetIdentifier(team_slug=self.team_slug, dataset_slug=self.dataset_slug, version=self.name) + return DatasetIdentifier( + team_slug=self.team_slug, dataset_slug=self.dataset_slug, version=self.name + ) diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index c84d81394..336523326 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -153,7 +153,9 @@ def split_video_annotations(self, release_name: str = "latest") -> None: annotations_path: Path = release_dir / "annotations" for count, annotation_file in enumerate(annotations_path.glob("*.json")): - darwin_annotation: Optional[AnnotationFile] = parse_darwin_json(annotation_file, count) + darwin_annotation: Optional[AnnotationFile] = parse_darwin_json( + annotation_file, count + ) if not darwin_annotation or not darwin_annotation.is_video: continue @@ -259,14 +261,20 @@ def pull( if subset_filter_annotations_function is not None: subset_filter_annotations_function(tmp_dir) if subset_folder_name is None: - subset_folder_name = datetime.now().strftime("%m/%d/%Y_%H:%M:%S") - annotations_dir: Path = release_dir / (subset_folder_name or "") / "annotations" + subset_folder_name = datetime.now().strftime( + "%m/%d/%Y_%H:%M:%S" + ) + annotations_dir: Path = ( + release_dir / (subset_folder_name or "") / "annotations" + ) # Remove existing annotations if necessary if annotations_dir.exists(): try: shutil.rmtree(annotations_dir) except PermissionError: - print(f"Could not remove dataset in {annotations_dir}. Permission denied.") + print( + f"Could not remove dataset in {annotations_dir}. Permission denied." + ) annotations_dir.mkdir(parents=True, exist_ok=False) stems: dict = {} @@ -277,7 +285,9 @@ def pull( if annotation is None: continue - if video_frames and any([not slot.frame_urls for slot in annotation.slots]): + if video_frames and any( + [not slot.frame_urls for slot in annotation.slots] + ): # will raise if not installed via pip install darwin-py[ocv] try: from cv2 import ( @@ -294,7 +304,9 @@ def pull( else: stems[filename] = 1 - destination_name = annotations_dir / f"{filename}{annotation_path.suffix}" + destination_name = ( + annotations_dir / f"{filename}{annotation_path.suffix}" + ) shutil.move(str(annotation_path), str(destination_name)) # Extract the list of classes and create the text files @@ -309,7 +321,9 @@ def pull( target_link: Path = self.local_releases_path / release_dir.name latest_dir.symlink_to(target_link) except OSError: - self.console.log(f"Could not mark release {release.name} as latest. Continuing...") + self.console.log( + f"Could not mark release {release.name} as latest. Continuing..." + ) if only_annotations: # No images will be downloaded @@ -344,18 +358,33 @@ def pull( if env_max_workers and int(env_max_workers) > 0: max_workers = int(env_max_workers) - console.print(f"Going to download {str(count)} files to {self.local_images_path.as_posix()} .") + console.print( + f"Going to download {str(count)} files to {self.local_images_path.as_posix()} ." + ) successes, errors = exhaust_generator( - progress=progress(), count=count, multi_threaded=multi_threaded, worker_count=max_workers + progress=progress(), + count=count, + multi_threaded=multi_threaded, + worker_count=max_workers, ) if errors: - self.console.print(f"Encountered errors downloading {len(errors)} files") + self.console.print( + f"Encountered errors downloading {len(errors)} files" + ) for error in errors: self.console.print(f"\t - {error}") - downloaded_file_count = len([f for f in self.local_images_path.rglob("*") if f.is_file() and not f.name.startswith('.')]) + downloaded_file_count = len( + [ + f + for f in self.local_images_path.rglob("*") + if f.is_file() and not f.name.startswith(".") + ] + ) - console.print(f"Total file count after download completed {str(downloaded_file_count)}.") + console.print( + f"Total file count after download completed {str(downloaded_file_count)}." + ) return None, count else: @@ -367,7 +396,9 @@ def remove_remote(self) -> None: @abstractmethod def fetch_remote_files( - self, filters: Optional[Dict[str, Union[str, List[str]]]] = None, sort: Optional[Union[str, ItemSorter]] = None + self, + filters: Optional[Dict[str, Union[str, List[str]]]] = None, + sort: Optional[Union[str, ItemSorter]] = None, ) -> Iterator[DatasetItem]: """ Fetch and lists all files on the remote dataset. @@ -476,7 +507,9 @@ def fetch_annotation_type_id_for_name(self, name: str) -> Optional[int]: return None - def create_annotation_class(self, name: str, type: str, subtypes: List[str] = []) -> Dict[str, Any]: + def create_annotation_class( + self, name: str, type: str, subtypes: List[str] = [] + ) -> Dict[str, Any]: """ Creates an annotation class for this ``RemoteDataset``. @@ -502,9 +535,13 @@ def create_annotation_class(self, name: str, type: str, subtypes: List[str] = [] type_ids: List[int] = [] for annotation_type in [type] + subtypes: - type_id: Optional[int] = self.fetch_annotation_type_id_for_name(annotation_type) + type_id: Optional[int] = self.fetch_annotation_type_id_for_name( + annotation_type + ) if not type_id and self.annotation_types is not None: - list_of_annotation_types = ", ".join([type["name"] for type in self.annotation_types]) + list_of_annotation_types = ", ".join( + [type["name"] for type in self.annotation_types] + ) raise ValueError( f"Unknown annotation type: '{annotation_type}', valid values: {list_of_annotation_types}" ) @@ -514,7 +551,9 @@ def create_annotation_class(self, name: str, type: str, subtypes: List[str] = [] return self.client.create_annotation_class(self.dataset_id, type_ids, name) - def add_annotation_class(self, annotation_class: Union[AnnotationClass, int]) -> Optional[Dict[str, Any]]: + def add_annotation_class( + self, annotation_class: Union[AnnotationClass, int] + ) -> Optional[Dict[str, Any]]: """ Adds an annotation class to this ``RemoteDataset``. @@ -541,13 +580,19 @@ def add_annotation_class(self, annotation_class: Union[AnnotationClass, int]) -> if isinstance(annotation_class, int): match = [cls for cls in all_classes if cls["id"] == annotation_class] if not match: - raise ValueError(f"Annotation class id: `{annotation_class}` does not exist in Team.") + raise ValueError( + f"Annotation class id: `{annotation_class}` does not exist in Team." + ) else: - annotation_class_type = annotation_class.annotation_internal_type or annotation_class.annotation_type + annotation_class_type = ( + annotation_class.annotation_internal_type + or annotation_class.annotation_type + ) match = [ cls for cls in all_classes - if cls["name"] == annotation_class.name and annotation_class_type in cls["annotation_types"] + if cls["name"] == annotation_class.name + and annotation_class_type in cls["annotation_types"] ] if not match: # We do not expect to reach here; as pervious logic divides annotation classes in imports @@ -586,7 +631,9 @@ def fetch_remote_classes(self, team_wide=False) -> List[Dict[str, Any]]: classes_to_return = [] for cls in all_classes: - belongs_to_current_dataset = any([dataset["id"] == self.dataset_id for dataset in cls["datasets"]]) + belongs_to_current_dataset = any( + [dataset["id"] == self.dataset_id for dataset in cls["datasets"]] + ) cls["available"] = belongs_to_current_dataset if team_wide or belongs_to_current_dataset: classes_to_return.append(cls) @@ -740,7 +787,9 @@ def split( make_default_split=make_default_split, ) - def classes(self, annotation_type: str, release_name: Optional[str] = None) -> List[str]: + def classes( + self, annotation_type: str, release_name: Optional[str] = None + ) -> List[str]: """ Returns the list of ``class_type`` classes. @@ -762,7 +811,9 @@ def classes(self, annotation_type: str, release_name: Optional[str] = None) -> L release = self.get_release("latest") release_name = release.name - return get_classes(self.local_path, release_name=release_name, annotation_type=annotation_type) + return get_classes( + self.local_path, release_name=release_name, annotation_type=annotation_type + ) def annotations( self, @@ -829,7 +880,9 @@ def workview_url_for_item(self, item: DatasetItem) -> str: """ @abstractmethod - def post_comment(self, item: DatasetItem, text: str, x: float, y: float, w: float, h: float) -> None: + def post_comment( + self, item: DatasetItem, text: str, x: float, y: float, w: float, h: float + ) -> None: """ Adds a comment to an item in this dataset. The comment will be added with a bounding box. Creates the workflow for said item if necessary. diff --git a/darwin/dataset/remote_dataset_v1.py b/darwin/dataset/remote_dataset_v1.py index 2872629bd..3d9b41ed0 100644 --- a/darwin/dataset/remote_dataset_v1.py +++ b/darwin/dataset/remote_dataset_v1.py @@ -101,12 +101,21 @@ def get_releases(self) -> List["Release"]: Returns a sorted list of available ``Release``\\s with the most recent first. """ try: - releases_json: List[Dict[str, Any]] = self.client.get_exports(self.dataset_id, self.team) + releases_json: List[Dict[str, Any]] = self.client.get_exports( + self.dataset_id, self.team + ) except NotFound: return [] - releases = [Release.parse_json(self.slug, self.team, payload) for payload in releases_json] - return sorted(filter(lambda x: x.available, releases), key=lambda x: x.version, reverse=True) + releases = [ + Release.parse_json(self.slug, self.team, payload) + for payload in releases_json + ] + return sorted( + filter(lambda x: x.available, releases), + key=lambda x: x.version, + reverse=True, + ) def push( self, @@ -172,23 +181,37 @@ def push( if files_to_upload is None: raise ValueError("No files or directory specified.") - uploading_files = [item for item in files_to_upload if isinstance(item, LocalFile)] - search_files = [item for item in files_to_upload if not isinstance(item, LocalFile)] + uploading_files = [ + item for item in files_to_upload if isinstance(item, LocalFile) + ] + search_files = [ + item for item in files_to_upload if not isinstance(item, LocalFile) + ] - generic_parameters_specified = path is not None or fps != 0 or as_frames is not False + generic_parameters_specified = ( + path is not None or fps != 0 or as_frames is not False + ) if uploading_files and generic_parameters_specified: raise ValueError("Cannot specify a path when uploading a LocalFile object.") for found_file in find_files(search_files, files_to_exclude=files_to_exclude): local_path = path if preserve_folders: - source_files = [source_file for source_file in search_files if is_relative_to(found_file, source_file)] + source_files = [ + source_file + for source_file in search_files + if is_relative_to(found_file, source_file) + ] if source_files: local_path = str(found_file.relative_to(source_files[0]).parent) - uploading_files.append(LocalFile(found_file, fps=fps, as_frames=as_frames, path=local_path)) + uploading_files.append( + LocalFile(found_file, fps=fps, as_frames=as_frames, path=local_path) + ) if not uploading_files: - raise ValueError("No files to upload, check your path, exclusion filters and resume flag") + raise ValueError( + "No files to upload, check your path, exclusion filters and resume flag" + ) handler = UploadHandlerV1(self, uploading_files) if blocking: @@ -204,7 +227,9 @@ def push( return handler def fetch_remote_files( - self, filters: Optional[Dict[str, Union[str, List[str]]]] = None, sort: Optional[Union[str, ItemSorter]] = None + self, + filters: Optional[Dict[str, Union[str, List[str]]]] = None, + sort: Optional[Union[str, ItemSorter]] = None, ) -> Iterator[DatasetItem]: """ Fetch and lists all files on the remote dataset. @@ -245,7 +270,9 @@ def fetch_remote_files( cursor = {"page[size]": 500} while True: payload = {"filter": post_filters, "sort": post_sort} - response = self.client.fetch_remote_files(self.dataset_id, cursor, payload, self.team) + response = self.client.fetch_remote_files( + self.dataset_id, cursor, payload, self.team + ) yield from [DatasetItem.parse(item) for item in response["items"]] @@ -263,7 +290,9 @@ def archive(self, items: Iterator[DatasetItem]) -> None: items : Iterator[DatasetItem] The ``DatasetItem``\\s to be archived. """ - payload: Dict[str, Any] = {"filter": {"dataset_item_ids": [item.id for item in items]}} + payload: Dict[str, Any] = { + "filter": {"dataset_item_ids": [item.id for item in items]} + } self.client.archive_item(self.slug, self.team, payload) def restore_archived(self, items: Iterator[DatasetItem]) -> None: @@ -275,7 +304,9 @@ def restore_archived(self, items: Iterator[DatasetItem]) -> None: items : Iterator[DatasetItem] The ``DatasetItem``\\s to be restored. """ - payload: Dict[str, Any] = {"filter": {"dataset_item_ids": [item.id for item in items]}} + payload: Dict[str, Any] = { + "filter": {"dataset_item_ids": [item.id for item in items]} + } self.client.restore_archived_item(self.slug, self.team, payload) def move_to_new(self, items: Iterator[DatasetItem]) -> None: @@ -287,7 +318,9 @@ def move_to_new(self, items: Iterator[DatasetItem]) -> None: items : Iterator[DatasetItem] The ``DatasetItem``\\s whose status will change. """ - payload: Dict[str, Any] = {"filter": {"dataset_item_ids": [item.id for item in items]}} + payload: Dict[str, Any] = { + "filter": {"dataset_item_ids": [item.id for item in items]} + } self.client.move_item_to_new(self.slug, self.team, payload) def reset(self, items: Iterator[DatasetItem]) -> None: @@ -299,7 +332,9 @@ def reset(self, items: Iterator[DatasetItem]) -> None: items : Iterator[DatasetItem] The ``DatasetItem``\\s to be resetted. """ - payload: Dict[str, Any] = {"filter": {"dataset_item_ids": [item.id for item in items]}} + payload: Dict[str, Any] = { + "filter": {"dataset_item_ids": [item.id for item in items]} + } self.client.reset_item(self.slug, self.team, payload) def complete(self, items: Iterator[DatasetItem]) -> None: @@ -311,7 +346,9 @@ def complete(self, items: Iterator[DatasetItem]) -> None: items : Iterator[DatasetItem] The ``DatasetItem``\\s to be completed. """ - wf_template_id_mapper = lambda item: item.current_workflow["workflow_template_id"] + wf_template_id_mapper = lambda item: item.current_workflow[ + "workflow_template_id" + ] input_items: List[DatasetItem] = list(items) # We split into items with and without workflow @@ -339,13 +376,22 @@ def complete(self, items: Iterator[DatasetItem]) -> None: sample_item = current_items[0] deep_sample_stages = sample_item.current_workflow["stages"].values() sample_stages = [item for sublist in deep_sample_stages for item in sublist] - complete_stage = list(filter(lambda stage: stage["type"] == "complete", sample_stages))[0] + complete_stage = list( + filter(lambda stage: stage["type"] == "complete", sample_stages) + )[0] filters = {"dataset_item_ids": [item.id for item in current_items]} try: - self.client.move_to_stage(self.slug, self.team, filters, complete_stage["workflow_stage_template_id"]) + self.client.move_to_stage( + self.slug, + self.team, + filters, + complete_stage["workflow_stage_template_id"], + ) except ValidationError: - raise ValueError("Unable to complete some of provided items. Make sure to assign them to a user first.") + raise ValueError( + "Unable to complete some of provided items. Make sure to assign them to a user first." + ) def delete_items(self, items: Iterator[DatasetItem]) -> None: """ @@ -356,7 +402,9 @@ def delete_items(self, items: Iterator[DatasetItem]) -> None: items : Iterator[DatasetItem] The ``DatasetItem``\\s to be deleted. """ - payload: Dict[str, Any] = {"filter": {"dataset_item_ids": [item.id for item in items]}} + payload: Dict[str, Any] = { + "filter": {"dataset_item_ids": [item.id for item in items]} + } self.client.delete_item(self.slug, self.team, payload) def export( @@ -410,7 +458,9 @@ def get_report(self, granularity: str = "day") -> str: str A CSV report. """ - response: Response = self.client.get_report(self.dataset_id, granularity, self.team) + response: Response = self.client.get_report( + self.dataset_id, granularity, self.team + ) return response.text def workview_url_for_item(self, item: DatasetItem) -> str: @@ -427,9 +477,14 @@ def workview_url_for_item(self, item: DatasetItem) -> str: str The url. """ - return urljoin(self.client.base_url, f"/workview?dataset={self.dataset_id}&image={item.seq}") + return urljoin( + self.client.base_url, + f"/workview?dataset={self.dataset_id}&image={item.seq}", + ) - def post_comment(self, item: DatasetItem, text: str, x: float, y: float, w: float, h: float): + def post_comment( + self, item: DatasetItem, text: str, x: float, y: float, w: float, h: float + ): """ Adds a comment to an item in this dataset Instantiates a workflow if needed diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 32555d4aa..ecb4817db 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -109,12 +109,21 @@ def get_releases(self) -> List["Release"]: Returns a sorted list of available ``Release``\\s with the most recent first. """ try: - releases_json: List[Dict[str, Any]] = self.client.api_v2.get_exports(self.slug, team_slug=self.team) + releases_json: List[Dict[str, Any]] = self.client.api_v2.get_exports( + self.slug, team_slug=self.team + ) except NotFound: return [] - releases = [Release.parse_json(self.slug, self.team, payload) for payload in releases_json] - return sorted(filter(lambda x: x.available, releases), key=lambda x: x.version, reverse=True) + releases = [ + Release.parse_json(self.slug, self.team, payload) + for payload in releases_json + ] + return sorted( + filter(lambda x: x.available, releases), + key=lambda x: x.version, + reverse=True, + ) def push( self, @@ -181,25 +190,43 @@ def push( if files_to_upload is None: raise ValueError("No files or directory specified.") - uploading_files = [item for item in files_to_upload if isinstance(item, LocalFile)] - search_files = [item for item in files_to_upload if not isinstance(item, LocalFile)] + uploading_files = [ + item for item in files_to_upload if isinstance(item, LocalFile) + ] + search_files = [ + item for item in files_to_upload if not isinstance(item, LocalFile) + ] - generic_parameters_specified = path is not None or fps != 0 or as_frames is not False + generic_parameters_specified = ( + path is not None or fps != 0 or as_frames is not False + ) if uploading_files and generic_parameters_specified: raise ValueError("Cannot specify a path when uploading a LocalFile object.") for found_file in find_files(search_files, files_to_exclude=files_to_exclude): local_path = path if preserve_folders: - source_files = [source_file for source_file in search_files if is_relative_to(found_file, source_file)] + source_files = [ + source_file + for source_file in search_files + if is_relative_to(found_file, source_file) + ] if source_files: local_path = str(found_file.relative_to(source_files[0]).parent) uploading_files.append( - LocalFile(found_file, fps=fps, as_frames=as_frames, extract_views=extract_views, path=local_path) + LocalFile( + found_file, + fps=fps, + as_frames=as_frames, + extract_views=extract_views, + path=local_path, + ) ) if not uploading_files: - raise ValueError("No files to upload, check your path, exclusion filters and resume flag") + raise ValueError( + "No files to upload, check your path, exclusion filters and resume flag" + ) handler = UploadHandlerV2(self, uploading_files) if blocking: @@ -215,7 +242,9 @@ def push( return handler def fetch_remote_files( - self, filters: Optional[Dict[str, Union[str, List[str]]]] = None, sort: Optional[Union[str, ItemSorter]] = None + self, + filters: Optional[Dict[str, Union[str, List[str]]]] = None, + sort: Optional[Union[str, ItemSorter]] = None, ) -> Iterator[DatasetItem]: """ Fetch and lists all files on the remote dataset. @@ -256,8 +285,13 @@ def fetch_remote_files( cursor = {"page[size]": 500, "include_workflow_data": "true"} while True: query = post_filters + list(post_sort.items()) + list(cursor.items()) - response = self.client.api_v2.fetch_items(self.dataset_id, query, team_slug=self.team) - yield from [DatasetItem.parse(item, dataset_slug=self.slug) for item in response["items"]] + response = self.client.api_v2.fetch_items( + self.dataset_id, query, team_slug=self.team + ) + yield from [ + DatasetItem.parse(item, dataset_slug=self.slug) + for item in response["items"] + ] if response["page"]["next"]: cursor["page[from]"] = response["page"]["next"] @@ -274,7 +308,10 @@ def archive(self, items: Iterator[DatasetItem]) -> None: The ``DatasetItem``\\s to be archived. """ payload: Dict[str, Any] = { - "filters": {"item_ids": [item.id for item in items], "dataset_ids": [self.dataset_id]} + "filters": { + "item_ids": [item.id for item in items], + "dataset_ids": [self.dataset_id], + } } self.client.api_v2.archive_items(payload, team_slug=self.team) @@ -288,7 +325,10 @@ def restore_archived(self, items: Iterator[DatasetItem]) -> None: The ``DatasetItem``\\s to be restored. """ payload: Dict[str, Any] = { - "filters": {"item_ids": [item.id for item in items], "dataset_ids": [self.dataset_id]} + "filters": { + "item_ids": [item.id for item in items], + "dataset_ids": [self.dataset_id], + } } self.client.api_v2.restore_archived_items(payload, team_slug=self.team) @@ -355,7 +395,8 @@ def delete_items(self, items: Iterator[DatasetItem]) -> None: The ``DatasetItem``\\s to be deleted. """ self.client.api_v2.delete_items( - {"dataset_ids": [self.dataset_id], "item_ids": [item.id for item in items]}, team_slug=self.team + {"dataset_ids": [self.dataset_id], "item_ids": [item.id for item in items]}, + team_slug=self.team, ) def export( @@ -393,9 +434,13 @@ def export( format = None else: raise UnknownExportVersion(version) - - filters = None if not annotation_class_ids else {"annotation_class_ids": list(map(int, annotation_class_ids))} - + + filters = ( + None + if not annotation_class_ids + else {"annotation_class_ids": list(map(int, annotation_class_ids))} + ) + self.client.api_v2.export_dataset( format=format, name=name, @@ -421,7 +466,9 @@ def get_report(self, granularity: str = "day") -> str: str A CSV report. """ - response: Response = self.client.get_report(self.dataset_id, granularity, self.team) + response: Response = self.client.get_report( + self.dataset_id, granularity, self.team + ) return response.text def workview_url_for_item(self, item: DatasetItem) -> str: @@ -438,10 +485,19 @@ def workview_url_for_item(self, item: DatasetItem) -> str: str The url. """ - return urljoin(self.client.base_url, f"/workview?dataset={self.dataset_id}&item={item.id}") + return urljoin( + self.client.base_url, f"/workview?dataset={self.dataset_id}&item={item.id}" + ) def post_comment( - self, item: DatasetItem, text: str, x: float, y: float, w: float, h: float, slot_name: Optional[str] = None + self, + item: DatasetItem, + text: str, + x: float, + y: float, + w: float, + h: float, + slot_name: Optional[str] = None, ): """ Adds a comment to an item in this dataset, @@ -449,10 +505,14 @@ def post_comment( """ if not slot_name: if len(item.slots) != 1: - raise ValueError(f"Unable to infer slot for '{item.id}', has multiple slots: {','.join(item.slots)}") + raise ValueError( + f"Unable to infer slot for '{item.id}', has multiple slots: {','.join(item.slots)}" + ) slot_name = item.slots[0]["slot_name"] - self.client.api_v2.post_comment(item.id, text, x, y, w, h, slot_name, team_slug=self.team) + self.client.api_v2.post_comment( + item.id, text, x, y, w, h, slot_name, team_slug=self.team + ) def import_annotation(self, item_id: ItemId, payload: Dict[str, Any]) -> None: """ @@ -467,7 +527,9 @@ def import_annotation(self, item_id: ItemId, payload: Dict[str, Any]) -> None: `{"annotations": serialized_annotations, "overwrite": "false"}` """ - self.client.api_v2.import_annotation(item_id, payload=payload, team_slug=self.team) + self.client.api_v2.import_annotation( + item_id, payload=payload, team_slug=self.team + ) def _fetch_stages(self, stage_type): detailed_dataset = self.client.api_v2.get_dataset(self.dataset_id) @@ -477,4 +539,7 @@ def _fetch_stages(self, stage_type): # currently we can only be part of one workflow workflow_id = workflow_ids[0] workflow = self.client.api_v2.get_workflow(workflow_id, team_slug=self.team) - return (workflow_id, [stage for stage in workflow["stages"] if stage["type"] == stage_type]) + return ( + workflow_id, + [stage for stage in workflow["stages"] if stage["type"] == stage_type], + ) diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index c6ee1502b..79b6213b2 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -99,7 +99,10 @@ class UploadStage(DocEnum): REQUEST_SIGNATURE = 0, "First stage, when authentication is being performed." UPLOAD_TO_S3 = 1, "Second stage, when the file is being uploaded to S3." - CONFIRM_UPLOAD_COMPLETE = 2, "Final stage, when we confirm the file was correctly uploaded." + CONFIRM_UPLOAD_COMPLETE = ( + 2, + "Final stage, when we confirm the file was correctly uploaded.", + ) OTHER = 3, "If the stage of the upload process is unknown." @@ -159,7 +162,10 @@ def _type_check(self, args) -> None: self.data["path"] = args.get("path") or "/" def serialize(self): - return {"files": [{"file_name": self.data["filename"], "slot_name": "0"}], "name": self.data["filename"]} + return { + "files": [{"file_name": self.data["filename"], "slot_name": "0"}], + "name": self.data["filename"], + } def serialize_v2(self): optional_properties = ["tags", "fps", "as_frames", "extract_views"] @@ -208,7 +214,9 @@ class FileMonitor(object): Total size of the IO. """ - def __init__(self, io: BinaryIO, file_size: int, callback: Callable[["FileMonitor"], None]): + def __init__( + self, io: BinaryIO, file_size: int, callback: Callable[["FileMonitor"], None] + ): self.io: BinaryIO = io self.callback: Callable[["FileMonitor"], None] = callback @@ -272,7 +280,9 @@ def __init__(self, dataset: "RemoteDataset", local_files: List[LocalFile]): self.dataset: RemoteDataset = dataset self.errors: List[UploadRequestError] = [] self.local_files: List[LocalFile] = local_files - self._progress: Optional[Iterator[Callable[[Optional[ByteReadCallback]], None]]] = None + self._progress: Optional[ + Iterator[Callable[[Optional[ByteReadCallback]], None]] + ] = None self.blocked_items, self.pending_items = self._request_upload() @@ -320,7 +330,9 @@ def progress(self): """Current level of upload progress.""" return self._progress - def prepare_upload(self) -> Optional[Iterator[Callable[[Optional[ByteReadCallback]], None]]]: + def prepare_upload( + self, + ) -> Optional[Iterator[Callable[[Optional[ByteReadCallback]], None]]]: self._progress = self._upload_files() return self._progress @@ -345,7 +357,10 @@ def callback(file_name, file_total_bytes, file_bytes_sent): file_upload_callback(file_name, file_total_bytes, file_bytes_sent) if progress_callback: - if file_total_bytes == file_bytes_sent and file_name not in file_complete: + if ( + file_total_bytes == file_bytes_sent + and file_name not in file_complete + ): file_complete.add(file_name) progress_callback(self.pending_count, 1) @@ -358,8 +373,12 @@ def callback(file_name, file_total_bytes, file_bytes_sent): ) if multi_threaded and self.progress: - with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: - future_to_progress = {executor.submit(f, callback) for f in self.progress} + with concurrent.futures.ThreadPoolExecutor( + max_workers=max_workers + ) as executor: + future_to_progress = { + executor.submit(f, callback) for f in self.progress + } for future in concurrent.futures.as_completed(future_to_progress): try: future.result() @@ -379,7 +398,10 @@ def _upload_files(self) -> Iterator[Callable[[Optional[ByteReadCallback]], None] @abstractmethod def _upload_file( - self, dataset_item_id: int, file_path: Path, byte_read_callback: Optional[ByteReadCallback] + self, + dataset_item_id: int, + file_path: Path, + byte_read_callback: Optional[ByteReadCallback], ) -> None: pass @@ -397,32 +419,49 @@ def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: dataset_slug: str = self.dataset_identifier.dataset_slug team_slug: Optional[str] = self.dataset_identifier.team_slug - data: Dict[str, Any] = self.client.upload_data(dataset_slug, upload_payload, team_slug) + data: Dict[str, Any] = self.client.upload_data( + dataset_slug, upload_payload, team_slug + ) - blocked_items.extend([ItemPayload(**item) for item in data["blocked_items"]]) + blocked_items.extend( + [ItemPayload(**item) for item in data["blocked_items"]] + ) items.extend([ItemPayload(**item) for item in data["items"]]) return blocked_items, items def _upload_files(self) -> Iterator[Callable[[Optional[ByteReadCallback]], None]]: - def upload_function(dataset_item_id, local_path) -> Callable[[Optional[ByteReadCallback]], None]: - return lambda byte_read_callback=None: self._upload_file(dataset_item_id, local_path, byte_read_callback) + def upload_function( + dataset_item_id, local_path + ) -> Callable[[Optional[ByteReadCallback]], None]: + return lambda byte_read_callback=None: self._upload_file( + dataset_item_id, local_path, byte_read_callback + ) file_lookup = {file.full_path: file for file in self.local_files} for item in self.pending_items: file = file_lookup.get(item.full_path) if not file: - raise ValueError(f"Cannot match {item.full_path} from payload with files to upload") + raise ValueError( + f"Cannot match {item.full_path} from payload with files to upload" + ) yield upload_function(item.dataset_item_id, file.local_path) def _upload_file( - self, dataset_item_id: int, file_path: Path, byte_read_callback: Optional[ByteReadCallback] + self, + dataset_item_id: int, + file_path: Path, + byte_read_callback: Optional[ByteReadCallback], ) -> None: try: self._do_upload_file(dataset_item_id, file_path, byte_read_callback) except UploadRequestError as e: self.errors.append(e) except Exception as e: - self.errors.append(UploadRequestError(file_path=file_path, stage=UploadStage.OTHER, error=e)) + self.errors.append( + UploadRequestError( + file_path=file_path, stage=UploadStage.OTHER, error=e + ) + ) def _do_upload_file( self, @@ -433,9 +472,13 @@ def _do_upload_file( team_slug: Optional[str] = self.dataset_identifier.team_slug try: - sign_response: Dict[str, Any] = self.client.sign_upload(dataset_item_id, team_slug) + sign_response: Dict[str, Any] = self.client.sign_upload( + dataset_item_id, team_slug + ) except Exception as e: - raise UploadRequestError(file_path=file_path, stage=UploadStage.REQUEST_SIGNATURE, error=e) + raise UploadRequestError( + file_path=file_path, stage=UploadStage.REQUEST_SIGNATURE, error=e + ) upload_url = sign_response["upload_url"] @@ -463,12 +506,16 @@ def callback(monitor): upload_response.raise_for_status() except Exception as e: - raise UploadRequestError(file_path=file_path, stage=UploadStage.UPLOAD_TO_S3, error=e) + raise UploadRequestError( + file_path=file_path, stage=UploadStage.UPLOAD_TO_S3, error=e + ) try: self.client.confirm_upload(dataset_item_id, team_slug) except Exception as e: - raise UploadRequestError(file_path=file_path, stage=UploadStage.CONFIRM_UPLOAD_COMPLETE, error=e) + raise UploadRequestError( + file_path=file_path, stage=UploadStage.CONFIRM_UPLOAD_COMPLETE, error=e + ) class UploadHandlerV2(UploadHandler): @@ -484,14 +531,20 @@ def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]: dataset_slug: str = self.dataset_identifier.dataset_slug team_slug: Optional[str] = self.dataset_identifier.team_slug - data: Dict[str, Any] = self.client.api_v2.register_data(dataset_slug, upload_payload, team_slug=team_slug) + data: Dict[str, Any] = self.client.api_v2.register_data( + dataset_slug, upload_payload, team_slug=team_slug + ) - blocked_items.extend([ItemPayload.parse_v2(item) for item in data["blocked_items"]]) + blocked_items.extend( + [ItemPayload.parse_v2(item) for item in data["blocked_items"]] + ) items.extend([ItemPayload.parse_v2(item) for item in data["items"]]) return blocked_items, items def _upload_files(self) -> Iterator[Callable[[Optional[ByteReadCallback]], None]]: - def upload_function(dataset_slug, local_path, upload_id) -> Callable[[Optional[ByteReadCallback]], None]: + def upload_function( + dataset_slug, local_path, upload_id + ) -> Callable[[Optional[ByteReadCallback]], None]: return lambda byte_read_callback=None: self._upload_file( dataset_slug, local_path, upload_id, byte_read_callback ) @@ -503,18 +556,30 @@ def upload_function(dataset_slug, local_path, upload_id) -> Callable[[Optional[B upload_id = item.slots[0]["upload_id"] file = file_lookup.get(item.full_path) if not file: - raise ValueError(f"Cannot match {item.full_path} from payload with files to upload") - yield upload_function(self.dataset.identifier.dataset_slug, file.local_path, upload_id) + raise ValueError( + f"Cannot match {item.full_path} from payload with files to upload" + ) + yield upload_function( + self.dataset.identifier.dataset_slug, file.local_path, upload_id + ) def _upload_file( - self, dataset_slug: str, file_path: Path, upload_id: str, byte_read_callback: Optional[ByteReadCallback] + self, + dataset_slug: str, + file_path: Path, + upload_id: str, + byte_read_callback: Optional[ByteReadCallback], ) -> None: try: self._do_upload_file(dataset_slug, file_path, upload_id, byte_read_callback) except UploadRequestError as e: self.errors.append(e) except Exception as e: - self.errors.append(UploadRequestError(file_path=file_path, stage=UploadStage.OTHER, error=e)) + self.errors.append( + UploadRequestError( + file_path=file_path, stage=UploadStage.OTHER, error=e + ) + ) def _do_upload_file( self, @@ -526,9 +591,13 @@ def _do_upload_file( team_slug: Optional[str] = self.dataset_identifier.team_slug try: - sign_response: Dict[str, Any] = self.client.api_v2.sign_upload(dataset_slug, upload_id, team_slug=team_slug) + sign_response: Dict[str, Any] = self.client.api_v2.sign_upload( + dataset_slug, upload_id, team_slug=team_slug + ) except Exception as e: - raise UploadRequestError(file_path=file_path, stage=UploadStage.REQUEST_SIGNATURE, error=e) + raise UploadRequestError( + file_path=file_path, stage=UploadStage.REQUEST_SIGNATURE, error=e + ) upload_url = sign_response["upload_url"] @@ -556,12 +625,18 @@ def callback(monitor): upload_response.raise_for_status() except Exception as e: - raise UploadRequestError(file_path=file_path, stage=UploadStage.UPLOAD_TO_S3, error=e) + raise UploadRequestError( + file_path=file_path, stage=UploadStage.UPLOAD_TO_S3, error=e + ) try: - self.client.api_v2.confirm_upload(dataset_slug, upload_id, team_slug=team_slug) + self.client.api_v2.confirm_upload( + dataset_slug, upload_id, team_slug=team_slug + ) except Exception as e: - raise UploadRequestError(file_path=file_path, stage=UploadStage.CONFIRM_UPLOAD_COMPLETE, error=e) + raise UploadRequestError( + file_path=file_path, stage=UploadStage.CONFIRM_UPLOAD_COMPLETE, error=e + ) DEFAULT_UPLOAD_CHUNK_SIZE: int = 500 diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py index c11e3576a..7aa3fa1bc 100644 --- a/darwin/dataset/utils.py +++ b/darwin/dataset/utils.py @@ -230,7 +230,7 @@ def exhaust_generator( Exhausts the generator passed as parameter. Can be done multi threaded if desired. Creates and returns a coco record from the given annotation. - + Uses ``BoxMode.XYXY_ABS`` from ``detectron2.structures`` if available, defaults to ``box_mode = 0`` otherwise. Parameters @@ -583,7 +583,9 @@ def _map_annotations_to_images( invalid_annotation_paths.append(annotation_path) continue else: - raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image") + raise ValueError( + f"Annotation ({annotation_path}) does not have a corresponding image" + ) return images_paths, annotations_paths, invalid_annotation_paths diff --git a/darwin/datatypes.py b/darwin/datatypes.py index ad177986e..bd39b90cd 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -25,7 +25,9 @@ # Utility types -NumberLike = Union[int, float] # Used for functions that can take either an int or a float +NumberLike = Union[ + int, float +] # Used for functions that can take either an int or a float # Used for functions that _genuinely_ don't know what type they're dealing with, such as those that test if something is of a certain type. UnknownType = Any # type:ignore @@ -269,7 +271,9 @@ class VideoAnnotation: def get_data( self, only_keyframes: bool = True, - post_processing: Optional[Callable[[Annotation, UnknownType], UnknownType]] = None, + post_processing: Optional[ + Callable[[Annotation, UnknownType], UnknownType] + ] = None, ) -> Dict: """ Return the post-processed frames and the additional information from this @@ -303,7 +307,9 @@ def get_data( """ if not post_processing: - def post_processing(annotation: Annotation, data: UnknownType) -> UnknownType: + def post_processing( + annotation: Annotation, data: UnknownType + ) -> UnknownType: return data # type: ignore output = { @@ -502,7 +508,9 @@ def make_bounding_box( def make_tag( - class_name: str, subs: Optional[List[SubAnnotation]] = None, slot_names: Optional[List[str]] = None + class_name: str, + subs: Optional[List[SubAnnotation]] = None, + slot_names: Optional[List[str]] = None, ) -> Annotation: """ Creates and returns a tag annotation. @@ -519,7 +527,9 @@ def make_tag( Annotation A tag ``Annotation``. """ - return Annotation(AnnotationClass(class_name, "tag"), {}, subs or [], slot_names=slot_names or []) + return Annotation( + AnnotationClass(class_name, "tag"), {}, subs or [], slot_names=slot_names or [] + ) def make_polygon( @@ -643,7 +653,10 @@ def make_keypoint( A point ``Annotation``. """ return Annotation( - AnnotationClass(class_name, "keypoint"), {"x": x, "y": y}, subs or [], slot_names=slot_names or [] + AnnotationClass(class_name, "keypoint"), + {"x": x, "y": y}, + subs or [], + slot_names=slot_names or [], ) @@ -678,7 +691,12 @@ def make_line( Annotation A line ``Annotation``. """ - return Annotation(AnnotationClass(class_name, "line"), {"path": path}, subs or [], slot_names=slot_names or []) + return Annotation( + AnnotationClass(class_name, "line"), + {"path": path}, + subs or [], + slot_names=slot_names or [], + ) def make_skeleton( @@ -715,7 +733,10 @@ def make_skeleton( A skeleton ``Annotation``. """ return Annotation( - AnnotationClass(class_name, "skeleton"), {"nodes": nodes}, subs or [], slot_names=slot_names or [] + AnnotationClass(class_name, "skeleton"), + {"nodes": nodes}, + subs or [], + slot_names=slot_names or [], ) @@ -764,7 +785,12 @@ def make_ellipse( Annotation An ellipse ``Annotation``. """ - return Annotation(AnnotationClass(class_name, "ellipse"), parameters, subs or [], slot_names=slot_names or []) + return Annotation( + AnnotationClass(class_name, "ellipse"), + parameters, + subs or [], + slot_names=slot_names or [], + ) def make_cuboid( @@ -804,7 +830,12 @@ def make_cuboid( Annotation A cuboid ``Annotation``. """ - return Annotation(AnnotationClass(class_name, "cuboid"), cuboid, subs or [], slot_names=slot_names or []) + return Annotation( + AnnotationClass(class_name, "cuboid"), + cuboid, + subs or [], + slot_names=slot_names or [], + ) def make_table( @@ -902,7 +933,10 @@ def make_string( A string ``Annotation``. """ return Annotation( - AnnotationClass(class_name, "string"), {"sources": sources}, subs or [], slot_names=slot_names or [] + AnnotationClass(class_name, "string"), + {"sources": sources}, + subs or [], + slot_names=slot_names or [], ) @@ -956,12 +990,17 @@ def make_graph( A graph ``Annotation``. """ return Annotation( - AnnotationClass(class_name, "graph"), {"nodes": nodes, "edges": edges}, subs or [], slot_names=slot_names or [] + AnnotationClass(class_name, "graph"), + {"nodes": nodes, "edges": edges}, + subs or [], + slot_names=slot_names or [], ) def make_mask( - class_name: str, subs: Optional[List[SubAnnotation]] = None, slot_names: Optional[List[str]] = None + class_name: str, + subs: Optional[List[SubAnnotation]] = None, + slot_names: Optional[List[str]] = None, ) -> Annotation: """ Creates and returns a mask annotation. @@ -978,7 +1017,9 @@ def make_mask( Annotation A mask ``Annotation``. """ - return Annotation(AnnotationClass(class_name, "mask"), {}, subs or [], slot_names=slot_names or []) + return Annotation( + AnnotationClass(class_name, "mask"), {}, subs or [], slot_names=slot_names or [] + ) def make_raster_layer( @@ -1167,11 +1208,18 @@ def make_video_annotation( raise ValueError("invalid argument to make_video_annotation") return VideoAnnotation( - first_annotation.annotation_class, frames, keyframes, segments, interpolated, slot_names=slot_names or [] + first_annotation.annotation_class, + frames, + keyframes, + segments, + interpolated, + slot_names=slot_names or [], ) -def _maybe_add_bounding_box_data(data: Dict[str, UnknownType], bounding_box: Optional[Dict]) -> Dict[str, UnknownType]: +def _maybe_add_bounding_box_data( + data: Dict[str, UnknownType], bounding_box: Optional[Dict] +) -> Dict[str, UnknownType]: if bounding_box: data["bounding_box"] = { "x": bounding_box["x"], diff --git a/darwin/exceptions.py b/darwin/exceptions.py index 1c6b5e72b..e688dd6ee 100644 --- a/darwin/exceptions.py +++ b/darwin/exceptions.py @@ -63,7 +63,10 @@ def __repr__(self) -> str: @classmethod def from_multiple_exceptions( - cls, exceptions: List[Exception], echo: bool = False, console: Optional[Console] = None + cls, + exceptions: List[Exception], + echo: bool = False, + console: Optional[Console] = None, ) -> "DarwinException": """ Creates a new exception from a list of exceptions. @@ -298,7 +301,10 @@ class UnknownAnnotationFileSchema(Exception): """ def __init__( - self, file_path: Path, supported_versions: List[AnnotationFileVersion], detected_version: AnnotationFileVersion + self, + file_path: Path, + supported_versions: List[AnnotationFileVersion], + detected_version: AnnotationFileVersion, ): """ Parameters @@ -361,7 +367,9 @@ def __init__(self, import_type: str, annotation_type: str): annotation_type: str The unsupported annotation type. """ - super().__init__(f"Unsupported annotation type {annotation_type} for {import_type} import") + super().__init__( + f"Unsupported annotation type {annotation_type} for {import_type} import" + ) self.import_type = import_type self.annotation_type = annotation_type diff --git a/darwin/exporter/exporter.py b/darwin/exporter/exporter.py index 57aa62a8f..0d1a97ca9 100644 --- a/darwin/exporter/exporter.py +++ b/darwin/exporter/exporter.py @@ -5,7 +5,9 @@ from darwin.utils import parse_darwin_json, split_video_annotation -def darwin_to_dt_gen(file_paths: List[PathLike], split_sequences: bool) -> Iterator[AnnotationFile]: +def darwin_to_dt_gen( + file_paths: List[PathLike], split_sequences: bool +) -> Iterator[AnnotationFile]: """ Parses the given paths recursively and into an ``Iterator`` of ``AnnotationFile``\\s. diff --git a/darwin/exporter/formats/coco.py b/darwin/exporter/formats/coco.py index 01e1bb417..53e85353b 100644 --- a/darwin/exporter/formats/coco.py +++ b/darwin/exporter/formats/coco.py @@ -36,7 +36,9 @@ def export(annotation_files: Iterator[dt.AnnotationFile], output_dir: Path) -> N output = _build_json(list(annotation_files)) output_file_path = (output_dir / "output").with_suffix(".json") with open(output_file_path, "w") as f: - op = json.dumps(output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY).decode("utf-8") + op = json.dumps( + output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY + ).decode("utf-8") f.write(op) @@ -69,12 +71,18 @@ def calculate_categories(annotation_files: List[dt.AnnotationFile]) -> Dict[str, categories: Dict[str, int] = {} for annotation_file in annotation_files: for annotation_class in annotation_file.annotation_classes: - if annotation_class.name not in categories and annotation_class.annotation_type in [ - "polygon", - "complex_polygon", - "bounding_box", - ]: - categories[annotation_class.name] = _calculate_category_id(annotation_class) + if ( + annotation_class.name not in categories + and annotation_class.annotation_type + in [ + "polygon", + "complex_polygon", + "bounding_box", + ] + ): + categories[annotation_class.name] = _calculate_category_id( + annotation_class + ) return categories @@ -84,12 +92,19 @@ def calculate_categories(annotation_files: List[dt.AnnotationFile]) -> Dict[str, current_version=__version__, details=DEPRECATION_MESSAGE, ) -def calculate_tag_categories(annotation_files: List[dt.AnnotationFile]) -> Dict[str, int]: +def calculate_tag_categories( + annotation_files: List[dt.AnnotationFile], +) -> Dict[str, int]: categories: Dict[str, int] = {} for annotation_file in annotation_files: for annotation_class in annotation_file.annotation_classes: - if annotation_class.name not in categories and annotation_class.annotation_type == "tag": - categories[annotation_class.name] = _calculate_category_id(annotation_class) + if ( + annotation_class.name not in categories + and annotation_class.annotation_type == "tag" + ): + categories[annotation_class.name] = _calculate_category_id( + annotation_class + ) return categories @@ -128,7 +143,9 @@ def build_licenses() -> List[Dict[str, Any]]: current_version=__version__, details=DEPRECATION_MESSAGE, ) -def build_images(annotation_files: List[dt.AnnotationFile], tag_categories: Dict[str, int]) -> List[Dict[str, Any]]: +def build_images( + annotation_files: List[dt.AnnotationFile], tag_categories: Dict[str, int] +) -> List[Dict[str, Any]]: return [ build_image(annotation_file, tag_categories) for annotation_file in sorted(annotation_files, key=lambda x: x.seq) @@ -141,9 +158,13 @@ def build_images(annotation_files: List[dt.AnnotationFile], tag_categories: Dict current_version=__version__, details=DEPRECATION_MESSAGE, ) -def build_image(annotation_file: dt.AnnotationFile, tag_categories: Dict[str, int]) -> Dict[str, Any]: +def build_image( + annotation_file: dt.AnnotationFile, tag_categories: Dict[str, int] +) -> Dict[str, Any]: tags = [ - annotation for annotation in annotation_file.annotations if annotation.annotation_class.annotation_type == "tag" + annotation + for annotation in annotation_file.annotations + if annotation.annotation_class.annotation_type == "tag" ] return { "license": 0, @@ -173,7 +194,9 @@ def build_annotations( for annotation_file in annotation_files: for annotation in annotation_file.annotations: annotation_id += 1 - annotation_data = build_annotation(annotation_file, annotation_id, annotation, categories) + annotation_data = build_annotation( + annotation_file, annotation_id, annotation, categories + ) if annotation_data: yield annotation_data @@ -185,11 +208,16 @@ def build_annotations( details=DEPRECATION_MESSAGE, ) def build_annotation( - annotation_file: dt.AnnotationFile, annotation_id: int, annotation: dt.Annotation, categories: Dict[str, int] + annotation_file: dt.AnnotationFile, + annotation_id: int, + annotation: dt.Annotation, + categories: Dict[str, int], ) -> Optional[Dict[str, Any]]: annotation_type = annotation.annotation_class.annotation_type if annotation_type == "polygon": - sequences = convert_polygons_to_sequences(annotation.data["path"], rounding=False) + sequences = convert_polygons_to_sequences( + annotation.data["path"], rounding=False + ) x_coords = [s[0::2] for s in sequences] y_coords = [s[1::2] for s in sequences] min_x = np.min([np.min(x_coord) for x_coord in x_coords]) @@ -199,7 +227,12 @@ def build_annotation( w = max_x - min_x h = max_y - min_y # Compute the area of the polygon - poly_area = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)]) + poly_area = np.sum( + [ + polygon_area(x_coord, y_coord) + for x_coord, y_coord in zip(x_coords, y_coords) + ] + ) return { "id": annotation_id, @@ -230,7 +263,10 @@ def build_annotation( "id": annotation_id, "image_id": _build_image_id(annotation_file), "category_id": categories[annotation.annotation_class.name], - "segmentation": {"counts": counts, "size": [annotation_file.image_height, annotation_file.image_width]}, + "segmentation": { + "counts": counts, + "size": [annotation_file.image_height, annotation_file.image_width], + }, "area": np.sum(mask), "bbox": [min_x, min_y, w, h], "iscrowd": 1, @@ -249,7 +285,12 @@ def build_annotation( annotation_id, dt.make_polygon( annotation.annotation_class.name, - [{"x": x, "y": y}, {"x": x + w, "y": y}, {"x": x + w, "y": y + h}, {"x": x, "y": y + h}], + [ + {"x": x, "y": y}, + {"x": x + w, "y": y}, + {"x": x + w, "y": y + h}, + {"x": x, "y": y + h}, + ], None, annotation.subs, ), @@ -333,21 +374,34 @@ def _calculate_categories(annotation_files: List[dt.AnnotationFile]) -> Dict[str categories: Dict[str, int] = {} for annotation_file in annotation_files: for annotation_class in annotation_file.annotation_classes: - if annotation_class.name not in categories and annotation_class.annotation_type in [ - "polygon", - "complex_polygon", - "bounding_box", - ]: - categories[annotation_class.name] = _calculate_category_id(annotation_class) + if ( + annotation_class.name not in categories + and annotation_class.annotation_type + in [ + "polygon", + "complex_polygon", + "bounding_box", + ] + ): + categories[annotation_class.name] = _calculate_category_id( + annotation_class + ) return categories -def _calculate_tag_categories(annotation_files: List[dt.AnnotationFile]) -> Dict[str, int]: +def _calculate_tag_categories( + annotation_files: List[dt.AnnotationFile], +) -> Dict[str, int]: categories: Dict[str, int] = {} for annotation_file in annotation_files: for annotation_class in annotation_file.annotation_classes: - if annotation_class.name not in categories and annotation_class.annotation_type == "tag": - categories[annotation_class.name] = _calculate_category_id(annotation_class) + if ( + annotation_class.name not in categories + and annotation_class.annotation_type == "tag" + ): + categories[annotation_class.name] = _calculate_category_id( + annotation_class + ) return categories @@ -372,16 +426,22 @@ def _build_licenses() -> List[Dict[str, Any]]: return [{"url": "n/a", "id": 0, "name": "placeholder license"}] -def _build_images(annotation_files: List[dt.AnnotationFile], tag_categories: Dict[str, int]) -> List[Dict[str, Any]]: +def _build_images( + annotation_files: List[dt.AnnotationFile], tag_categories: Dict[str, int] +) -> List[Dict[str, Any]]: return [ _build_image(annotation_file, tag_categories) for annotation_file in sorted(annotation_files, key=lambda x: x.seq) ] -def _build_image(annotation_file: dt.AnnotationFile, tag_categories: Dict[str, int]) -> Dict[str, Any]: +def _build_image( + annotation_file: dt.AnnotationFile, tag_categories: Dict[str, int] +) -> Dict[str, Any]: tags = [ - annotation for annotation in annotation_file.annotations if annotation.annotation_class.annotation_type == "tag" + annotation + for annotation in annotation_file.annotations + if annotation.annotation_class.annotation_type == "tag" ] return { @@ -406,7 +466,9 @@ def _build_image_id(annotation_file: dt.AnnotationFile) -> int: if annotation_file.seq: return annotation_file.seq else: - full_path = str(Path(annotation_file.remote_path or "/") / Path(annotation_file.filename)) + full_path = str( + Path(annotation_file.remote_path or "/") / Path(annotation_file.filename) + ) return crc32(str.encode(full_path)) @@ -417,17 +479,24 @@ def _build_annotations( for annotation_file in annotation_files: for annotation in annotation_file.annotations: annotation_id += 1 - annotation_data = _build_annotation(annotation_file, annotation_id, annotation, categories) + annotation_data = _build_annotation( + annotation_file, annotation_id, annotation, categories + ) if annotation_data: yield annotation_data def _build_annotation( - annotation_file: dt.AnnotationFile, annotation_id: int, annotation: dt.Annotation, categories: Dict[str, int] + annotation_file: dt.AnnotationFile, + annotation_id: int, + annotation: dt.Annotation, + categories: Dict[str, int], ) -> Optional[Dict[str, Any]]: annotation_type = annotation.annotation_class.annotation_type if annotation_type == "polygon": - sequences = convert_polygons_to_sequences(annotation.data["path"], rounding=False) + sequences = convert_polygons_to_sequences( + annotation.data["path"], rounding=False + ) x_coords = [s[0::2] for s in sequences] y_coords = [s[1::2] for s in sequences] min_x = np.min([np.min(x_coord) for x_coord in x_coords]) @@ -437,7 +506,12 @@ def _build_annotation( w = max_x - min_x h = max_y - min_y # Compute the area of the polygon - poly_area = np.sum([_polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)]) + poly_area = np.sum( + [ + _polygon_area(x_coord, y_coord) + for x_coord, y_coord in zip(x_coords, y_coords) + ] + ) return { "id": annotation_id, @@ -468,7 +542,10 @@ def _build_annotation( "id": annotation_id, "image_id": _build_image_id(annotation_file), "category_id": categories[annotation.annotation_class.name], - "segmentation": {"counts": counts, "size": [annotation_file.image_height, annotation_file.image_width]}, + "segmentation": { + "counts": counts, + "size": [annotation_file.image_height, annotation_file.image_width], + }, "area": np.sum(mask), "bbox": [min_x, min_y, w, h], "iscrowd": 1, @@ -487,7 +564,12 @@ def _build_annotation( annotation_id, dt.make_polygon( annotation.annotation_class.name, - [{"x": x, "y": y}, {"x": x + w, "y": y}, {"x": x + w, "y": y + h}, {"x": x, "y": y + h}], + [ + {"x": x, "y": y}, + {"x": x + w, "y": y}, + {"x": x + w, "y": y + h}, + {"x": x, "y": y + h}, + ], None, annotation.subs, ), diff --git a/darwin/exporter/formats/cvat.py b/darwin/exporter/formats/cvat.py index 5648ec062..20d6dcfa5 100644 --- a/darwin/exporter/formats/cvat.py +++ b/darwin/exporter/formats/cvat.py @@ -68,7 +68,11 @@ def build_xml(annotation_files: List[dt.AnnotationFile]) -> Element: current_version=__version__, details=DEPRECATION_MESSAGE, ) -def build_images(root: Element, annotation_files: List[dt.AnnotationFile], label_lookup: Dict[str, int]) -> None: +def build_images( + root: Element, + annotation_files: List[dt.AnnotationFile], + label_lookup: Dict[str, int], +) -> None: for id, annotation_file in enumerate(annotation_files, 1): image = SubElement(root, "image") image.attrib["id"] = str(id) @@ -112,9 +116,13 @@ def build_attributes(box: Element, annotation: dt.Annotation) -> None: attribute = add_subelement_text(box, "attribute", annotation_text.data) attribute.attrib["name"] = "__text" - annotation_instance_id: Optional[dt.SubAnnotation] = annotation.get_sub("instance_id") + annotation_instance_id: Optional[dt.SubAnnotation] = annotation.get_sub( + "instance_id" + ) if annotation_instance_id: - attribute = add_subelement_text(box, "attribute", str(annotation_instance_id.data)) + attribute = add_subelement_text( + box, "attribute", str(annotation_instance_id.data) + ) attribute.attrib["name"] = "__instance_id" annotation_attributes: Optional[dt.SubAnnotation] = annotation.get_sub("attributes") @@ -130,9 +138,15 @@ def build_attributes(box: Element, annotation: dt.Annotation) -> None: current_version=__version__, details=DEPRECATION_MESSAGE, ) -def build_meta(root: Element, annotation_files: List[dt.AnnotationFile], label_lookup: Dict[str, int]) -> None: +def build_meta( + root: Element, + annotation_files: List[dt.AnnotationFile], + label_lookup: Dict[str, int], +) -> None: meta: Element = SubElement(root, "meta") - add_subelement_text(meta, "dumped", str(datetime.datetime.now(tz=datetime.timezone.utc))) + add_subelement_text( + meta, "dumped", str(datetime.datetime.now(tz=datetime.timezone.utc)) + ) task: Element = SubElement(meta, "task") add_subelement_text(task, "id", 1) @@ -142,8 +156,12 @@ def build_meta(root: Element, annotation_files: List[dt.AnnotationFile], label_l add_subelement_text(task, "overlapp", 0) add_subelement_text(task, "bugtracker", None) add_subelement_text(task, "flipped", False) - add_subelement_text(task, "created", str(datetime.datetime.now(tz=datetime.timezone.utc))) - add_subelement_text(task, "updated", str(datetime.datetime.now(tz=datetime.timezone.utc))) + add_subelement_text( + task, "created", str(datetime.datetime.now(tz=datetime.timezone.utc)) + ) + add_subelement_text( + task, "updated", str(datetime.datetime.now(tz=datetime.timezone.utc)) + ) labels: Element = SubElement(task, "labels") build_labels(labels, label_lookup) @@ -162,7 +180,9 @@ def build_meta(root: Element, annotation_files: List[dt.AnnotationFile], label_l current_version=__version__, details=DEPRECATION_MESSAGE, ) -def build_segments(segments: Element, annotation_files: List[dt.AnnotationFile]) -> None: +def build_segments( + segments: Element, annotation_files: List[dt.AnnotationFile] +) -> None: segment: Element = SubElement(segments, "segment") add_subelement_text(segment, "id", 1) add_subelement_text(segment, "start", 1) @@ -193,7 +213,10 @@ def build_label_lookup(annotation_files: List[dt.AnnotationFile]) -> Dict[str, i labels: Dict[str, int] = {} for annotation_file in annotation_files: for annotation_class in annotation_file.annotation_classes: - if annotation_class.name not in labels and annotation_class.annotation_type == "bounding_box": + if ( + annotation_class.name not in labels + and annotation_class.annotation_type == "bounding_box" + ): labels[annotation_class.name] = len(labels) return labels @@ -213,7 +236,11 @@ def _build_xml(annotation_files: List[dt.AnnotationFile]) -> Element: return root -def _build_images(root: Element, annotation_files: List[dt.AnnotationFile], label_lookup: Dict[str, int]) -> None: +def _build_images( + root: Element, + annotation_files: List[dt.AnnotationFile], + label_lookup: Dict[str, int], +) -> None: for id, annotation_file in enumerate(annotation_files, 1): image = SubElement(root, "image") image.attrib["id"] = str(id) @@ -245,9 +272,13 @@ def _build_attributes(box: Element, annotation: dt.Annotation) -> None: attribute = _add_subelement_text(box, "attribute", annotation_text.data) attribute.attrib["name"] = "__text" - annotation_instance_id: Optional[dt.SubAnnotation] = annotation.get_sub("instance_id") + annotation_instance_id: Optional[dt.SubAnnotation] = annotation.get_sub( + "instance_id" + ) if annotation_instance_id: - attribute = _add_subelement_text(box, "attribute", str(annotation_instance_id.data)) + attribute = _add_subelement_text( + box, "attribute", str(annotation_instance_id.data) + ) attribute.attrib["name"] = "__instance_id" annotation_attributes: Optional[dt.SubAnnotation] = annotation.get_sub("attributes") @@ -257,9 +288,15 @@ def _build_attributes(box: Element, annotation: dt.Annotation) -> None: attribute.attrib["name"] = attrib -def _build_meta(root: Element, annotation_files: List[dt.AnnotationFile], label_lookup: Dict[str, int]) -> None: +def _build_meta( + root: Element, + annotation_files: List[dt.AnnotationFile], + label_lookup: Dict[str, int], +) -> None: meta: Element = SubElement(root, "meta") - _add_subelement_text(meta, "dumped", str(datetime.datetime.now(tz=datetime.timezone.utc))) + _add_subelement_text( + meta, "dumped", str(datetime.datetime.now(tz=datetime.timezone.utc)) + ) task: Element = SubElement(meta, "task") _add_subelement_text(task, "id", 1) @@ -269,8 +306,12 @@ def _build_meta(root: Element, annotation_files: List[dt.AnnotationFile], label_ _add_subelement_text(task, "overlapp", 0) _add_subelement_text(task, "bugtracker", None) _add_subelement_text(task, "flipped", False) - _add_subelement_text(task, "created", str(datetime.datetime.now(tz=datetime.timezone.utc))) - _add_subelement_text(task, "updated", str(datetime.datetime.now(tz=datetime.timezone.utc))) + _add_subelement_text( + task, "created", str(datetime.datetime.now(tz=datetime.timezone.utc)) + ) + _add_subelement_text( + task, "updated", str(datetime.datetime.now(tz=datetime.timezone.utc)) + ) labels: Element = SubElement(task, "labels") _build_labels(labels, label_lookup) @@ -283,7 +324,9 @@ def _build_meta(root: Element, annotation_files: List[dt.AnnotationFile], label_ _add_subelement_text(owner, "email", "user@example.com") -def _build_segments(segments: Element, annotation_files: List[dt.AnnotationFile]) -> None: +def _build_segments( + segments: Element, annotation_files: List[dt.AnnotationFile] +) -> None: segment: Element = SubElement(segments, "segment") _add_subelement_text(segment, "id", 1) _add_subelement_text(segment, "start", 1) @@ -302,6 +345,9 @@ def _build_label_lookup(annotation_files: List[dt.AnnotationFile]) -> Dict[str, labels: Dict[str, int] = {} for annotation_file in annotation_files: for annotation_class in annotation_file.annotation_classes: - if annotation_class.name not in labels and annotation_class.annotation_type == "bounding_box": + if ( + annotation_class.name not in labels + and annotation_class.annotation_type == "bounding_box" + ): labels[annotation_class.name] = len(labels) return labels diff --git a/darwin/exporter/formats/darwin_1_0.py b/darwin/exporter/formats/darwin_1_0.py index 28744817d..5654e5f5b 100644 --- a/darwin/exporter/formats/darwin_1_0.py +++ b/darwin/exporter/formats/darwin_1_0.py @@ -38,19 +38,30 @@ def _export_file(annotation_file: AnnotationFile, _: int, output_dir: Path) -> N filename = annotation_file.path.parts[-1] output_file_path = (output_dir / filename).with_suffix(".json") except Exception as e: - raise ExportException_CouldNotAssembleOutputPath(f"Could not export file {annotation_file.path} to {output_dir}") from e + raise ExportException_CouldNotAssembleOutputPath( + f"Could not export file {annotation_file.path} to {output_dir}" + ) from e try: output: DictFreeForm = _build_json(annotation_file) except Exception as e: - raise ExportException_CouldNotBuildOutput(f"Could not build output for {annotation_file.path}") from e + raise ExportException_CouldNotBuildOutput( + f"Could not build output for {annotation_file.path}" + ) from e try: with open(output_file_path, "w") as f: - op = json.dumps(output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY | json.OPT_NON_STR_KEYS).decode("utf-8") + op = json.dumps( + output, + option=json.OPT_INDENT_2 + | json.OPT_SERIALIZE_NUMPY + | json.OPT_NON_STR_KEYS, + ).decode("utf-8") f.write(op) except Exception as e: - raise ExportException_CouldNotWriteFile(f"Could not write output for {annotation_file.path}") from e + raise ExportException_CouldNotWriteFile( + f"Could not write output for {annotation_file.path}" + ) from e def _build_json(annotation_file: AnnotationFile) -> DictFreeForm: @@ -125,11 +136,17 @@ def _build_sub_annotation(sub: SubAnnotation) -> DictFreeForm: def _build_authorship(annotation: Union[VideoAnnotation, Annotation]) -> DictFreeForm: annotators = {} if annotation.annotators: - annotators = {"annotators": [_build_author(annotator) for annotator in annotation.annotators]} + annotators = { + "annotators": [ + _build_author(annotator) for annotator in annotation.annotators + ] + } reviewers = {} if annotation.reviewers: - reviewers = {"annotators": [_build_author(reviewer) for reviewer in annotation.reviewers]} + reviewers = { + "annotators": [_build_author(reviewer) for reviewer in annotation.reviewers] + } return {**annotators, **reviewers} @@ -138,7 +155,9 @@ def _build_video_annotation(annotation: VideoAnnotation) -> DictFreeForm: return { **annotation.get_data( only_keyframes=False, - post_processing=lambda annotation, _: _build_image_annotation(annotation, skip_slots=True), + post_processing=lambda annotation, _: _build_image_annotation( + annotation, skip_slots=True + ), ), "name": annotation.annotation_class.name, "slot_names": annotation.slot_names, @@ -146,7 +165,9 @@ def _build_video_annotation(annotation: VideoAnnotation) -> DictFreeForm: } -def _build_image_annotation(annotation: Annotation, skip_slots: bool = False) -> DictFreeForm: +def _build_image_annotation( + annotation: Annotation, skip_slots: bool = False +) -> DictFreeForm: json_subs = {} for sub in annotation.subs: json_subs.update(_build_sub_annotation(sub)) @@ -164,7 +185,9 @@ def _build_image_annotation(annotation: Annotation, skip_slots: bool = False) -> return {**base_json, "slot_names": annotation.slot_names} -def _build_legacy_annotation_data(annotation_class: AnnotationClass, data: DictFreeForm) -> DictFreeForm: +def _build_legacy_annotation_data( + annotation_class: AnnotationClass, data: DictFreeForm +) -> DictFreeForm: v1_data = {} polygon_annotation_mappings = {"complex_polygon": "paths", "polygon": "path"} diff --git a/darwin/exporter/formats/dataloop.py b/darwin/exporter/formats/dataloop.py index c442867a4..1aedd9d77 100644 --- a/darwin/exporter/formats/dataloop.py +++ b/darwin/exporter/formats/dataloop.py @@ -40,9 +40,13 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> N ) def export_file(annotation_file: dt.AnnotationFile, id: int, output_dir: Path) -> None: output: Dict[str, Any] = _build_json(annotation_file, id) - output_file_path: Path = (output_dir / annotation_file.filename).with_suffix(".json") + output_file_path: Path = (output_dir / annotation_file.filename).with_suffix( + ".json" + ) with open(output_file_path, "w") as f: - op = json.dumps(output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY).decode("utf-8") + op = json.dumps( + output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY + ).decode("utf-8") f.write(op) @@ -67,7 +71,9 @@ def build_json(annotation_file: dt.AnnotationFile, id: int) -> Dict[str, Any]: current_version=__version__, details=DEPRECATION_MESSAGE, ) -def build_annotations(annotation_file: dt.AnnotationFile, id: int) -> Iterable[Dict[str, Any]]: +def build_annotations( + annotation_file: dt.AnnotationFile, id: int +) -> Iterable[Dict[str, Any]]: output = [] for annotation_id, annotation in enumerate(annotation_file.annotations): print(annotation) @@ -96,7 +102,10 @@ def build_annotations(annotation_file: dt.AnnotationFile, id: int) -> Iterable[D "type": "segment", "label": annotation.annotation_class.name, "attributes": [], - "coordinates": [{"x": point["x"], "y": point["y"], "z": 0} for point in annotation.data["path"]], + "coordinates": [ + {"x": point["x"], "y": point["y"], "z": 0} + for point in annotation.data["path"] + ], "metadata": {}, } output.append(entry) @@ -106,13 +115,19 @@ def build_annotations(annotation_file: dt.AnnotationFile, id: int) -> Iterable[D def _export_file(annotation_file: dt.AnnotationFile, id: int, output_dir: Path) -> None: output: Dict[str, Any] = _build_json(annotation_file, id) - output_file_path: Path = (output_dir / annotation_file.filename).with_suffix(".json") + output_file_path: Path = (output_dir / annotation_file.filename).with_suffix( + ".json" + ) with open(output_file_path, "w") as f: - op = json.dumps(output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY).decode("utf-8") + op = json.dumps( + output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY + ).decode("utf-8") f.write(op) -def _build_annotations(annotation_file: dt.AnnotationFile, id: int) -> Iterable[Dict[str, Any]]: +def _build_annotations( + annotation_file: dt.AnnotationFile, id: int +) -> Iterable[Dict[str, Any]]: output = [] for annotation_id, annotation in enumerate(annotation_file.annotations): print(annotation) @@ -141,7 +156,10 @@ def _build_annotations(annotation_file: dt.AnnotationFile, id: int) -> Iterable[ "type": "segment", "label": annotation.annotation_class.name, "attributes": [], - "coordinates": [{"x": point["x"], "y": point["y"], "z": 0} for point in annotation.data["path"]], + "coordinates": [ + {"x": point["x"], "y": point["y"], "z": 0} + for point in annotation.data["path"] + ], "metadata": {}, } output.append(entry) diff --git a/darwin/exporter/formats/helpers/yolo_class_builder.py b/darwin/exporter/formats/helpers/yolo_class_builder.py index 68bb172d0..ac20a495b 100644 --- a/darwin/exporter/formats/helpers/yolo_class_builder.py +++ b/darwin/exporter/formats/helpers/yolo_class_builder.py @@ -30,7 +30,9 @@ def export_file( # do it manually. filename = annotation_file.path.name - filename_to_write = filename.replace(".json", ".txt") if ".json" in filename else filename + ".txt" + filename_to_write = ( + filename.replace(".json", ".txt") if ".json" in filename else filename + ".txt" + ) output_file_path = output_dir / filename_to_write output_file_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/darwin/exporter/formats/instance_mask.py b/darwin/exporter/formats/instance_mask.py index 7e7b882ee..2991faaa9 100644 --- a/darwin/exporter/formats/instance_mask.py +++ b/darwin/exporter/formats/instance_mask.py @@ -28,11 +28,15 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> N masks_dir.mkdir(parents=True, exist_ok=True) with open(output_dir / "instance_mask_annotations.csv", "w") as f: f.write("image_id,mask_id,class_name\n") - for annotation_file in get_progress_bar(list(annotation_files), "Processing annotations"): + for annotation_file in get_progress_bar( + list(annotation_files), "Processing annotations" + ): image_id = os.path.splitext(annotation_file.filename)[0] height = annotation_file.image_height width = annotation_file.image_width - annotations = [a for a in annotation_file.annotations if ispolygon(a.annotation_class)] + annotations = [ + a for a in annotation_file.annotations if ispolygon(a.annotation_class) + ] for i, annotation in enumerate(annotations): cat = annotation.annotation_class.name if annotation.annotation_class.annotation_type == "polygon": @@ -41,7 +45,9 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> N polygon = annotation.data["paths"] else: continue - mask = convert_polygons_to_mask(polygon, height=height, width=width, value=255) + mask = convert_polygons_to_mask( + polygon, height=height, width=width, value=255 + ) mask = Image.fromarray(mask.astype(np.uint8)) mask_id = f"{image_id}_{i:05}" outfile = masks_dir / f"{mask_id}.png" diff --git a/darwin/exporter/formats/mask.py b/darwin/exporter/formats/mask.py index 9a2220c64..0f7e8b735 100644 --- a/darwin/exporter/formats/mask.py +++ b/darwin/exporter/formats/mask.py @@ -53,9 +53,13 @@ def get_palette(mode: dt.MaskTypes.Mode, categories: List[str]) -> dt.MaskTypes. if num_categories > 254: raise ValueError("maximum number of classes supported: 254.") elif num_categories == 1: - raise ValueError("only having the '__background__' class is not allowed. Please add more classes.") + raise ValueError( + "only having the '__background__' class is not allowed. Please add more classes." + ) - palette = {c: int(i * 255 / (num_categories - 1)) for i, c in enumerate(categories)} + palette = { + c: int(i * 255 / (num_categories - 1)) for i, c in enumerate(categories) + } if mode == "rgb": if num_categories > 360: @@ -63,12 +67,16 @@ def get_palette(mode: dt.MaskTypes.Mode, categories: List[str]) -> dt.MaskTypes. palette = {c: i for i, c in enumerate(categories)} if not palette: - raise ValueError(f"Failed to generate a palette.", mode, categories) from DarwinException + raise ValueError( + f"Failed to generate a palette.", mode, categories + ) from DarwinException return palette -def get_rgb_colours(categories: dt.MaskTypes.CategoryList) -> Tuple[dt.MaskTypes.RgbColors, dt.MaskTypes.RgbPalette]: +def get_rgb_colours( + categories: dt.MaskTypes.CategoryList, +) -> Tuple[dt.MaskTypes.RgbColors, dt.MaskTypes.RgbPalette]: """ Returns a list of RGB colours and a dict of categories and their corresponding RGB palette value. @@ -90,14 +98,17 @@ def get_rgb_colours(categories: dt.MaskTypes.CategoryList) -> Tuple[dt.MaskTypes SATURATION_OF_COLOUR: float = 0.8 VALUE_OF_COLOUR: float = 1.0 hsv_colours: dt.MaskTypes.HsvColors = [ - (x / num_categories, SATURATION_OF_COLOUR, VALUE_OF_COLOUR) for x in range(num_categories - 1) + (x / num_categories, SATURATION_OF_COLOUR, VALUE_OF_COLOUR) + for x in range(num_categories - 1) ] rgb_colour_list: dt.MaskTypes.RgbColorList = list( map(lambda x: [int(e * 255) for e in colorsys.hsv_to_rgb(*x)], hsv_colours) ) # Now we add BG class with [0 0 0] RGB value rgb_colour_list.insert(0, [0, 0, 0]) - palette_rgb: dt.MaskTypes.RgbPalette = {c: rgb for c, rgb in zip(categories, rgb_colour_list)} + palette_rgb: dt.MaskTypes.RgbPalette = { + c: rgb for c, rgb in zip(categories, rgb_colour_list) + } rgb_colours: dt.MaskTypes.RgbColors = [c for e in rgb_colour_list for c in e] return rgb_colours, palette_rgb @@ -117,12 +128,16 @@ def get_render_mode(annotations: List[dt.AnnotationLike]) -> dt.MaskTypes.TypeOf TypeOfRenderType A string reading either "raster" or "polygon". """ - non_video_annotations: List[dt.Annotation] = [a for a in annotations if not isinstance(a, dt.VideoAnnotation)] + non_video_annotations: List[dt.Annotation] = [ + a for a in annotations if not isinstance(a, dt.VideoAnnotation) + ] if not non_video_annotations: return "polygon" - list_of_types: List[str] = [a.annotation_class.annotation_type for a in non_video_annotations] + list_of_types: List[str] = [ + a.annotation_class.annotation_type for a in non_video_annotations + ] types: Set[str] = set(list_of_types) is_raster_mask = ("mask" in types) and ("raster_layer" in types) @@ -131,7 +146,9 @@ def get_render_mode(annotations: List[dt.AnnotationLike]) -> dt.MaskTypes.TypeOf raster_layer_count = len([a for a in types if a == "raster_layer"]) if is_raster_mask and is_polygon: - raise ValueError("Cannot have both raster and polygon annotations in the same file") + raise ValueError( + "Cannot have both raster and polygon annotations in the same file" + ) if is_raster_mask and raster_layer_count > 1: raise ValueError("Cannot have more than one raster layer in the same file") @@ -142,7 +159,10 @@ def get_render_mode(annotations: List[dt.AnnotationLike]) -> dt.MaskTypes.TypeOf if is_polygon: return "polygon" - raise ValueError("No renderable annotations found in file, found types: " + ",".join(list_of_types)) + raise ValueError( + "No renderable annotations found in file, found types: " + + ",".join(list_of_types) + ) def colours_in_rle( @@ -171,7 +191,9 @@ def colours_in_rle( mask: Optional[dt.AnnotationMask] = mask_lookup.get(uuid) if mask is None: - raise ValueError(f"Could not find mask with uuid {uuid} in mask lookup table.") + raise ValueError( + f"Could not find mask with uuid {uuid} in mask lookup table." + ) if not mask.name in colours: colours[mask.name] = colour_value @@ -255,7 +277,9 @@ def render_polygons( errors: List[Exception] = [] - filtered_annotations: List[dt.Annotation] = [a for a in annotations if not isinstance(a, dt.VideoAnnotation)] + filtered_annotations: List[dt.Annotation] = [ + a for a in annotations if not isinstance(a, dt.VideoAnnotation) + ] beyond_window = annotations_exceed_window(filtered_annotations, height, width) if beyond_window: # If the annotations exceed the window, we need to offset the mask to fit them all in. @@ -277,14 +301,20 @@ def render_polygons( elif a.annotation_class.annotation_type == "complex_polygon": polygon = a.data["paths"] else: - raise ValueError(f"Unknown annotation type {a.annotation_class.annotation_type}") + raise ValueError( + f"Unknown annotation type {a.annotation_class.annotation_type}" + ) if beyond_window: # Offset the polygon by the minimum x and y values to shift it to new frame of reference polygon_off = offset_polygon(polygon, offset_x, offset_y) - sequence = convert_polygons_to_sequences(polygon_off, height=new_height, width=new_width) + sequence = convert_polygons_to_sequences( + polygon_off, height=new_height, width=new_width + ) else: - sequence = convert_polygons_to_sequences(polygon, height=height, width=width) + sequence = convert_polygons_to_sequences( + polygon, height=height, width=width + ) colour_to_draw = categories.index(cat) mask = draw_polygon(mask, sequence, colour_to_draw) @@ -369,7 +399,9 @@ def render_raster( if a.annotation_class.annotation_type == "raster_layer" and (rl := data): if raster_layer: - errors.append(ValueError(f"Annotation {a.id} has more than one raster layer")) + errors.append( + ValueError(f"Annotation {a.id} has more than one raster layer") + ) break new_rl = dt.RasterLayer( @@ -400,16 +432,27 @@ def render_raster( return errors, mask, categories, colours -def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path, mode: dt.MaskTypes.Mode) -> None: +def export( + annotation_files: Iterable[dt.AnnotationFile], + output_dir: Path, + mode: dt.MaskTypes.Mode, +) -> None: masks_dir: Path = output_dir / "masks" masks_dir.mkdir(exist_ok=True, parents=True) annotation_files = list(annotation_files) accepted_types = ["polygon", "complex_polygon", "raster_layer", "mask"] - all_classes_sets: List[Set[dt.AnnotationClass]] = [a.annotation_classes for a in annotation_files] + all_classes_sets: List[Set[dt.AnnotationClass]] = [ + a.annotation_classes for a in annotation_files + ] if len(all_classes_sets) > 0: all_classes: Set[dt.AnnotationClass] = set.union(*all_classes_sets) categories: List[str] = ["__background__"] + sorted( - list(set([c.name for c in all_classes if c.annotation_type in accepted_types])), key=lambda x: x.lower() + list( + set( + [c.name for c in all_classes if c.annotation_type in accepted_types] + ) + ), + key=lambda x: x.lower(), ) palette = get_palette(mode, categories) else: @@ -426,11 +469,15 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path, mode height = annotation_file.image_height width = annotation_file.image_width if height is None or width is None: - raise ValueError(f"Annotation file {annotation_file.filename} references an image with no height or width") + raise ValueError( + f"Annotation file {annotation_file.filename} references an image with no height or width" + ) mask: NDArray = np.zeros((height, width)).astype(np.uint8) annotations: List[dt.AnnotationLike] = [ - a for a in annotation_file.annotations if a.annotation_class.annotation_type in accepted_types + a + for a in annotation_file.annotations + if a.annotation_class.annotation_type in accepted_types ] render_type = get_render_mode(annotations) @@ -455,7 +502,9 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path, mode raise DarwinException.from_multiple_exceptions(errors) # Map to palette - mask = np.array(mask, dtype=np.uint8) # Final double check that type is using correct dtype + mask = np.array( + mask, dtype=np.uint8 + ) # Final double check that type is using correct dtype if mode == "rgb": rgb_colours, palette_rgb = get_rgb_colours(categories) @@ -482,7 +531,9 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path, mode writer.writerow([class_key, f"{palette[class_key]}"]) -def annotations_exceed_window(annotations: List[dt.Annotation], height: int, width: int) -> bool: +def annotations_exceed_window( + annotations: List[dt.Annotation], height: int, width: int +) -> bool: """Check if any annotations exceed the image window Args: @@ -508,7 +559,9 @@ def annotations_exceed_window(annotations: List[dt.Annotation], height: int, wid return False -def get_extents(annotations: List[dt.Annotation], height: int = 0, width: int = 0) -> Tuple[int, int, int, int]: +def get_extents( + annotations: List[dt.Annotation], height: int = 0, width: int = 0 +) -> Tuple[int, int, int, int]: """Create a bounding box around all annotations in discrete pixel space Args: diff --git a/darwin/exporter/formats/pascalvoc.py b/darwin/exporter/formats/pascalvoc.py index 4250d9b7f..af8acffa4 100644 --- a/darwin/exporter/formats/pascalvoc.py +++ b/darwin/exporter/formats/pascalvoc.py @@ -145,7 +145,9 @@ def save_xml(xml: Element, path: Path) -> None: current_version=__version__, details=REMOVAL_MESSAGE, ) -def build_voc(metadata: Dict[str, Any], annotations: Iterable[Dict[str, Any]]) -> Element: +def build_voc( + metadata: Dict[str, Any], annotations: Iterable[Dict[str, Any]] +) -> Element: print(metadata) root: Element = Element("annotation") add_subelement_text(root, "folder", "images") diff --git a/darwin/exporter/formats/semantic_mask.py b/darwin/exporter/formats/semantic_mask.py index 71e7e95b9..0726abd35 100644 --- a/darwin/exporter/formats/semantic_mask.py +++ b/darwin/exporter/formats/semantic_mask.py @@ -16,4 +16,6 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> N output_dir : Path The folder where the new semantic mask files will be. """ - return export_mask(annotation_files=annotation_files, output_dir=output_dir, mode="rgb") + return export_mask( + annotation_files=annotation_files, output_dir=output_dir, mode="rgb" + ) diff --git a/darwin/exporter/formats/semantic_mask_grey.py b/darwin/exporter/formats/semantic_mask_grey.py index b1e6b8a28..f0287f00b 100644 --- a/darwin/exporter/formats/semantic_mask_grey.py +++ b/darwin/exporter/formats/semantic_mask_grey.py @@ -6,4 +6,6 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> None: - return export_mask(annotation_files=annotation_files, output_dir=output_dir, mode="grey") + return export_mask( + annotation_files=annotation_files, output_dir=output_dir, mode="grey" + ) diff --git a/darwin/exporter/formats/semantic_mask_index.py b/darwin/exporter/formats/semantic_mask_index.py index d08a067d0..b4784888f 100644 --- a/darwin/exporter/formats/semantic_mask_index.py +++ b/darwin/exporter/formats/semantic_mask_index.py @@ -6,4 +6,6 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> None: - return export_mask(annotation_files=annotation_files, output_dir=output_dir, mode="index") + return export_mask( + annotation_files=annotation_files, output_dir=output_dir, mode="index" + ) diff --git a/darwin/exporter/formats/yolo.py b/darwin/exporter/formats/yolo.py index 00f1ceca5..73ac5ddb3 100644 --- a/darwin/exporter/formats/yolo.py +++ b/darwin/exporter/formats/yolo.py @@ -39,7 +39,9 @@ def _build_txt(annotation_file: dt.AnnotationFile, class_index: ClassIndex) -> s annotation_type = annotation.annotation_class.annotation_type if isinstance(annotation, dt.VideoAnnotation): - raise ValueError("YOLO format does not support video annotations for export or conversion.") + raise ValueError( + "YOLO format does not support video annotations for export or conversion." + ) if annotation_type == "bounding_box": data = annotation.data diff --git a/darwin/exporter/formats/yolo_segmented.py b/darwin/exporter/formats/yolo_segmented.py index 00ee40189..ef599e7e5 100644 --- a/darwin/exporter/formats/yolo_segmented.py +++ b/darwin/exporter/formats/yolo_segmented.py @@ -103,7 +103,9 @@ def _determine_annotation_type(annotation: Annotation) -> YoloSegmentedAnnotatio return YoloSegmentedAnnotationType.UNKNOWN -def _handle_bounding_box(data: dict, im_w: int, im_h: int, annotation_index: int, points: List[Point]) -> bool: +def _handle_bounding_box( + data: dict, im_w: int, im_h: int, annotation_index: int, points: List[Point] +) -> bool: logger.debug(f"Exporting bounding box at index {annotation_index}.") try: @@ -163,7 +165,8 @@ def _handle_bounding_box(data: dict, im_w: int, im_h: int, annotation_index: int except KeyError as exc: logger.warn( - f"Skipped annotation at index {annotation_index} because an" "expected key was not found in the data.", + f"Skipped annotation at index {annotation_index} because an" + "expected key was not found in the data.", exc_info=exc, ) return False @@ -171,7 +174,9 @@ def _handle_bounding_box(data: dict, im_w: int, im_h: int, annotation_index: int return True -def _handle_polygon(data: dict, im_w: int, im_h: int, annotation_index: int, points: List[Point]) -> bool: +def _handle_polygon( + data: dict, im_w: int, im_h: int, annotation_index: int, points: List[Point] +) -> bool: logger.debug(f"Exporting polygon at index {annotation_index}.") last_point = None @@ -204,7 +209,9 @@ def _handle_polygon(data: dict, im_w: int, im_h: int, annotation_index: int, poi return False except Exception as exc: - logger.error(f"An unexpected error occured while exporting annotation at index {annotation_index}.") + logger.error( + f"An unexpected error occured while exporting annotation at index {annotation_index}." + ) return True @@ -246,7 +253,9 @@ def _build_text(annotation_file: AnnotationFile, class_index: ClassIndex) -> str continue if annotation.data is None: - logger.warn(f"Skipped annotation at index {annotation_index} because it's data fields are empty.'") + logger.warn( + f"Skipped annotation at index {annotation_index} because it's data fields are empty.'" + ) continue # Process annotations @@ -259,11 +268,15 @@ def _build_text(annotation_file: AnnotationFile, class_index: ClassIndex) -> str points: List[Point] = [] if annotation_type == YoloSegmentedAnnotationType.BOUNDING_BOX: - bb_success = _handle_bounding_box(data, im_w, im_h, annotation_index, points) + bb_success = _handle_bounding_box( + data, im_w, im_h, annotation_index, points + ) if not bb_success: continue elif annotation_type == YoloSegmentedAnnotationType.POLYGON: - polygon_success = _handle_polygon(data, im_w, im_h, annotation_index, points) + polygon_success = _handle_polygon( + data, im_w, im_h, annotation_index, points + ) if not polygon_success: continue else: diff --git a/darwin/importer/formats/coco.py b/darwin/importer/formats/coco.py index 0cf90f975..05034b479 100644 --- a/darwin/importer/formats/coco.py +++ b/darwin/importer/formats/coco.py @@ -44,7 +44,9 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]: return list(parse_json(path, data)) -def parse_json(path: Path, data: Dict[str, dt.UnknownType]) -> Iterator[dt.AnnotationFile]: +def parse_json( + path: Path, data: Dict[str, dt.UnknownType] +) -> Iterator[dt.AnnotationFile]: """ Parses the given ``json`` structure into an ``Iterator[dt.AnnotationFile]``. @@ -62,9 +64,13 @@ def parse_json(path: Path, data: Dict[str, dt.UnknownType]) -> Iterator[dt.Annot """ annotations = data["annotations"] image_lookup_table = {image["id"]: image for image in data["images"]} - category_lookup_table = {category["id"]: category for category in data["categories"]} + category_lookup_table = { + category["id"]: category for category in data["categories"] + } tag_categories = data.get("tag_categories") or [] - tag_category_lookup_table = {category["id"]: category for category in tag_categories} + tag_category_lookup_table = { + category["id"]: category for category in tag_categories + } image_annotations: Dict[str, dt.UnknownType] = {} for image in data["images"]: @@ -84,18 +90,25 @@ def parse_json(path: Path, data: Dict[str, dt.UnknownType]) -> Iterator[dt.Annot annotation["segmentation"] if image_id not in image_annotations: image_annotations[image_id] = [] - image_annotations[image_id].append(parse_annotation(annotation, category_lookup_table)) + image_annotations[image_id].append( + parse_annotation(annotation, category_lookup_table) + ) for image_id in image_annotations.keys(): image = image_lookup_table[int(image_id)] annotations = list(filter(None, image_annotations[image_id])) - annotation_classes = set([annotation.annotation_class for annotation in annotations]) + annotation_classes = set( + [annotation.annotation_class for annotation in annotations] + ) remote_path, filename = deconstruct_full_path(image["file_name"]) - yield dt.AnnotationFile(path, filename, annotation_classes, annotations, remote_path=remote_path) + yield dt.AnnotationFile( + path, filename, annotation_classes, annotations, remote_path=remote_path + ) def parse_annotation( - annotation: Dict[str, dt.UnknownType], category_lookup_table: Dict[str, dt.UnknownType] + annotation: Dict[str, dt.UnknownType], + category_lookup_table: Dict[str, dt.UnknownType], ) -> Optional[dt.Annotation]: """ Parses the given ``json`` dictionary into a darwin ``Annotation`` if possible. @@ -126,11 +139,17 @@ def parse_annotation( if len(segmentation) == 0 and len(annotation["bbox"]) == 4: x, y, w, h = map(int, annotation["bbox"]) return dt.make_bounding_box(category["name"], x, y, w, h) - elif len(segmentation) == 0 and len(annotation["bbox"]) == 1 and len(annotation["bbox"][0]) == 4: + elif ( + len(segmentation) == 0 + and len(annotation["bbox"]) == 1 + and len(annotation["bbox"][0]) == 4 + ): x, y, w, h = map(int, annotation["bbox"][0]) return dt.make_bounding_box(category["name"], x, y, w, h) elif isinstance(segmentation, dict): - logger.warn("warning, converting complex coco rle mask to polygon, could take some time") + logger.warn( + "warning, converting complex coco rle mask to polygon, could take some time" + ) if isinstance(segmentation["counts"], list): mask = rle_decode(segmentation["counts"], segmentation["size"][::-1]) else: @@ -155,7 +174,9 @@ def parse_annotation( return dt.make_complex_polygon(category["name"], paths) elif isinstance(segmentation, list): path = [] - points = iter(segmentation[0] if isinstance(segmentation[0], list) else segmentation) + points = iter( + segmentation[0] if isinstance(segmentation[0], list) else segmentation + ) while True: try: x, y = next(points), next(points) diff --git a/darwin/importer/formats/csv_tags.py b/darwin/importer/formats/csv_tags.py index 93fbc7130..f181b8296 100644 --- a/darwin/importer/formats/csv_tags.py +++ b/darwin/importer/formats/csv_tags.py @@ -32,7 +32,17 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]: if filename == "": continue annotations = [dt.make_tag(tag) for tag in tags if len(tag) > 0] - annotation_classes = set([annotation.annotation_class for annotation in annotations]) + annotation_classes = set( + [annotation.annotation_class for annotation in annotations] + ) remote_path, filename = deconstruct_full_path(filename) - files.append(dt.AnnotationFile(path, filename, annotation_classes, annotations, remote_path=remote_path)) + files.append( + dt.AnnotationFile( + path, + filename, + annotation_classes, + annotations, + remote_path=remote_path, + ) + ) return files diff --git a/darwin/importer/formats/csv_tags_video.py b/darwin/importer/formats/csv_tags_video.py index cea345293..35f05e230 100644 --- a/darwin/importer/formats/csv_tags_video.py +++ b/darwin/importer/formats/csv_tags_video.py @@ -43,12 +43,25 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]: frames = {i: annotation for i in range(start_frame, end_frame + 1)} keyframes = {i: i == start_frame for i in range(start_frame, end_frame + 1)} - annotation = dt.make_video_annotation(frames, keyframes, [[start_frame, end_frame]], False, slot_names=[]) + annotation = dt.make_video_annotation( + frames, keyframes, [[start_frame, end_frame]], False, slot_names=[] + ) if filename not in file_annotation_map: file_annotation_map[filename] = [] file_annotation_map[filename].append(annotation) for filename in file_annotation_map: annotations = file_annotation_map[filename] - annotation_classes = set([annotation.annotation_class for annotation in annotations]) - files.append(dt.AnnotationFile(path, filename, annotation_classes, annotations, is_video=True, remote_path="/")) + annotation_classes = set( + [annotation.annotation_class for annotation in annotations] + ) + files.append( + dt.AnnotationFile( + path, + filename, + annotation_classes, + annotations, + is_video=True, + remote_path="/", + ) + ) return files diff --git a/darwin/importer/formats/dataloop.py b/darwin/importer/formats/dataloop.py index 65c4db87b..207027316 100644 --- a/darwin/importer/formats/dataloop.py +++ b/darwin/importer/formats/dataloop.py @@ -30,8 +30,12 @@ def parse_path(path: Path) -> Optional[dt.AnnotationFile]: if path.suffix != ".json": return None data = attempt_decode(path) - annotations: List[dt.Annotation] = list(filter(None, map(_parse_annotation, data["annotations"]))) - annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations]) + annotations: List[dt.Annotation] = list( + filter(None, map(_parse_annotation, data["annotations"])) + ) + annotation_classes: Set[dt.AnnotationClass] = set( + [annotation.annotation_class for annotation in annotations] + ) return dt.AnnotationFile( path, _remove_leading_slash(data["filename"]), diff --git a/darwin/importer/formats/labelbox.py b/darwin/importer/formats/labelbox.py index 450e880f5..d6cd48eaa 100644 --- a/darwin/importer/formats/labelbox.py +++ b/darwin/importer/formats/labelbox.py @@ -96,19 +96,31 @@ def parse_path(path: Path) -> Optional[List[AnnotationFile]]: def _convert(file_data: Dict[str, Any], path) -> AnnotationFile: filename: str = str(file_data.get("External ID")) label: Dict[str, Any] = cast(Dict[str, Any], file_data.get("Label")) - label_objects: List[Dict[str, Any]] = cast(List[Dict[str, Any]], label.get("objects")) - label_classifications: List[Dict[str, Any]] = cast(List[Dict[str, Any]], label.get("classifications")) + label_objects: List[Dict[str, Any]] = cast( + List[Dict[str, Any]], label.get("objects") + ) + label_classifications: List[Dict[str, Any]] = cast( + List[Dict[str, Any]], label.get("classifications") + ) classification_annotations: List[Annotation] = [] if len(label_classifications) > 0: - classification_annotations = _flat_map_list(_map_list(_convert_label_classifications, label_classifications)) + classification_annotations = _flat_map_list( + _map_list(_convert_label_classifications, label_classifications) + ) - object_annotations: List[Annotation] = _map_list(_convert_label_objects, label_objects) + object_annotations: List[Annotation] = _map_list( + _convert_label_objects, label_objects + ) annotations: List[Annotation] = object_annotations + classification_annotations classes: Set[AnnotationClass] = set(map(_get_class, annotations)) return AnnotationFile( - annotations=annotations, path=path, filename=filename, annotation_classes=classes, remote_path="/" + annotations=annotations, + path=path, + filename=filename, + annotation_classes=classes, + remote_path="/", ) @@ -174,7 +186,9 @@ def _to_line_annotation(line: List[Point], title: str) -> Annotation: return make_line(title, line, None) -def _to_tag_annotations_from_radio_box(question: str, radio_button: Dict[str, Any]) -> Annotation: +def _to_tag_annotations_from_radio_box( + question: str, radio_button: Dict[str, Any] +) -> Annotation: answer: str = str(radio_button.get("value")) return make_tag(f"{question}:{answer}") diff --git a/darwin/importer/formats/labelbox_schemas.py b/darwin/importer/formats/labelbox_schemas.py index 61f6cfbc9..9b49f94ae 100644 --- a/darwin/importer/formats/labelbox_schemas.py +++ b/darwin/importer/formats/labelbox_schemas.py @@ -30,7 +30,10 @@ "description": "Schema of a Polygon", "title": "Polygon", "default": [{"x": 1.2, "y": 2.5}, {"x": 2.5, "y": 3.6}, {"x": 1.2, "y": 2.5}], - "examples": [[{"x": 1.2, "y": 2.5}, {"x": 2.5, "y": 3.6}, {"x": 1.2, "y": 2.5}], []], + "examples": [ + [{"x": 1.2, "y": 2.5}, {"x": 2.5, "y": 3.6}, {"x": 1.2, "y": 2.5}], + [], + ], "type": "array", "items": point, } @@ -40,7 +43,10 @@ "description": "Schema of a Polyline", "title": "Polyline", "default": [{"x": 1.2, "y": 2.5}, {"x": 2.5, "y": 3.6}, {"x": 1.2, "y": 2.5}], - "examples": [[{"x": 1.2, "y": 2.5}, {"x": 2.5, "y": 3.6}, {"x": 1.2, "y": 2.5}], []], + "examples": [ + [{"x": 1.2, "y": 2.5}, {"x": 2.5, "y": 3.6}, {"x": 1.2, "y": 2.5}], + [], + ], "type": "array", "items": point, } @@ -52,8 +58,18 @@ "default": {"title": "Banana", "point": {"x": 3665.814, "y": 351.628}}, "examples": [ {"title": "Banana", "point": {"x": 3665.814, "y": 351.628}}, - {"title": "Orange", "bbox": {"top": 1.2, "left": 2.5, "height": 10, "width": 20}}, - {"title": "Apple", "polygon": [{"x": 1.2, "y": 2.5}, {"x": 2.5, "y": 3.6}, {"x": 1.2, "y": 2.5}]}, + { + "title": "Orange", + "bbox": {"top": 1.2, "left": 2.5, "height": 10, "width": 20}, + }, + { + "title": "Apple", + "polygon": [ + {"x": 1.2, "y": 2.5}, + {"x": 2.5, "y": 3.6}, + {"x": 1.2, "y": 2.5}, + ], + }, ], "type": "object", "required": ["title"], @@ -93,7 +109,10 @@ "default": {"value": "a_question", "answer": {"value": "an_answer"}}, "examples": [ {"value": "a_question", "answer": {"value": "an_answer"}}, - {"value": "a_question", "answers": [{"value": "an_answer_1"}, {"value": "an_answer_2"}]}, + { + "value": "a_question", + "answers": [{"value": "an_answer_1"}, {"value": "an_answer_2"}], + }, ], "type": "object", "required": ["value"], @@ -101,9 +120,21 @@ "oneOf": [ { "required": ["answer"], - "properties": {"answer": {"oneOf": [classification_answer_free_text, classification_answer_obj]}}, + "properties": { + "answer": { + "oneOf": [ + classification_answer_free_text, + classification_answer_obj, + ] + } + }, + }, + { + "required": ["answers"], + "properties": { + "answers": {"type": "array", "items": classification_answer_obj} + }, }, - {"required": ["answers"], "properties": {"answers": {"type": "array", "items": classification_answer_obj}}}, ], } diff --git a/darwin/importer/formats/nifti.py b/darwin/importer/formats/nifti.py index 0e06623bf..49be7b951 100644 --- a/darwin/importer/formats/nifti.py +++ b/darwin/importer/formats/nifti.py @@ -49,17 +49,26 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]: if not isinstance(path, Path): path = Path(path) if path.suffix != ".json": - console.print("Skipping file: {} (not a json file)".format(path), style="bold yellow") + console.print( + "Skipping file: {} (not a json file)".format(path), style="bold yellow" + ) return None data = attempt_decode(path) try: validate(data, schema=nifti_import_schema) except Exception as e: - console.print("Skipping file: {} (invalid json file, see schema for details)".format(path), style="bold yellow") + console.print( + "Skipping file: {} (invalid json file, see schema for details)".format( + path + ), + style="bold yellow", + ) return None nifti_annotations = data.get("data") if nifti_annotations is None or nifti_annotations == []: - console.print("Skipping file: {} (no data found)".format(path), style="bold yellow") + console.print( + "Skipping file: {} (no data found)".format(path), style="bold yellow" + ) return None annotation_files = [] for nifti_annotation in nifti_annotations: @@ -85,7 +94,6 @@ def _parse_nifti( slot_names: List[str], is_mpr: bool, ) -> dt.AnnotationFile: - img, pixdims = process_nifti(nib.load(nifti_path)) shape = img.shape @@ -113,13 +121,21 @@ def _parse_nifti( if class_name == "background": continue _video_annotations = get_video_annotation( - img, class_idxs=class_idxs, class_name=class_name, slot_names=slot_names, is_mpr=is_mpr, pixdims=pixdims + img, + class_idxs=class_idxs, + class_name=class_name, + slot_names=slot_names, + is_mpr=is_mpr, + pixdims=pixdims, ) if _video_annotations is None: continue video_annotations += _video_annotations annotation_classes = set( - [dt.AnnotationClass(class_name, "polygon", "polygon") for class_name in class_map.values()] + [ + dt.AnnotationClass(class_name, "polygon", "polygon") + for class_name in class_map.values() + ] ) return dt.AnnotationFile( path=json_path, @@ -128,7 +144,11 @@ def _parse_nifti( annotation_classes=annotation_classes, annotations=video_annotations, slots=[ - dt.Slot(name=slot_name, type="dicom", source_files=[{"url": None, "file_name": str(filename)}]) + dt.Slot( + name=slot_name, + type="dicom", + source_files=[{"url": None, "file_name": str(filename)}], + ) for slot_name in slot_names ], ) @@ -142,14 +162,20 @@ def get_video_annotation( is_mpr: bool, pixdims: Tuple[float], ) -> Optional[List[dt.VideoAnnotation]]: - if not is_mpr: - return nifti_to_video_annotation(volume, class_name, class_idxs, slot_names, view_idx=2, pixdims=pixdims) + return nifti_to_video_annotation( + volume, class_name, class_idxs, slot_names, view_idx=2, pixdims=pixdims + ) elif is_mpr and len(slot_names) == 3: video_annotations = [] for view_idx, slot_name in enumerate(slot_names): _video_annotations = nifti_to_video_annotation( - volume, class_name, class_idxs, [slot_name], view_idx=view_idx, pixdims=pixdims + volume, + class_name, + class_idxs, + [slot_name], + view_idx=view_idx, + pixdims=pixdims, ) video_annotations += _video_annotations return video_annotations @@ -157,7 +183,9 @@ def get_video_annotation( raise Exception("If is_mpr is True, slot_names must be of length 3") -def nifti_to_video_annotation(volume, class_name, class_idxs, slot_names, view_idx=2, pixdims=(1, 1, 1)): +def nifti_to_video_annotation( + volume, class_name, class_idxs, slot_names, view_idx=2, pixdims=(1, 1, 1) +): frame_annotations = OrderedDict() for i in range(volume.shape[view_idx]): if view_idx == 2: @@ -172,7 +200,9 @@ def nifti_to_video_annotation(volume, class_name, class_idxs, slot_names, view_i class_mask = np.isin(slice_mask, class_idxs).astype(np.uint8).copy() if class_mask.sum() == 0: continue - polygon = mask_to_polygon(mask=class_mask, class_name=class_name, pixdims=_pixdims) + polygon = mask_to_polygon( + mask=class_mask, class_name=class_name, pixdims=_pixdims + ) if polygon is None: continue frame_annotations[i] = polygon @@ -193,7 +223,9 @@ def nifti_to_video_annotation(volume, class_name, class_idxs, slot_names, view_i return [video_annotation] -def mask_to_polygon(mask: np.ndarray, class_name: str, pixdims: List[float]) -> Optional[dt.Annotation]: +def mask_to_polygon( + mask: np.ndarray, class_name: str, pixdims: List[float] +) -> Optional[dt.Annotation]: def adjust_for_pixdims(x, y, pixdims): if pixdims[1] > pixdims[0]: return {"x": y, "y": x * pixdims[1] / pixdims[0]} @@ -209,7 +241,10 @@ def adjust_for_pixdims(x, y, pixdims): # skip paths with less than 2 points if len(external_path) // 2 <= 2: continue - path = [adjust_for_pixdims(x, y, pixdims) for x, y in zip(external_path[0::2], external_path[1::2])] + path = [ + adjust_for_pixdims(x, y, pixdims) + for x, y in zip(external_path[0::2], external_path[1::2]) + ] paths.append(path) if len(paths) > 1: polygon = dt.make_complex_polygon(class_name, paths) @@ -226,7 +261,10 @@ def adjust_for_pixdims(x, y, pixdims): return None polygon = dt.make_polygon( class_name, - point_path=[adjust_for_pixdims(x, y, pixdims) for x, y in zip(external_path[0::2], external_path[1::2])], + point_path=[ + adjust_for_pixdims(x, y, pixdims) + for x, y in zip(external_path[0::2], external_path[1::2]) + ], ) else: return None @@ -282,7 +320,9 @@ def rectify_header_sform_qform(img_nii): return img_nii -def affine_to_spacing(affine: np.ndarray, r: int = 3, dtype=float, suppress_zeros: bool = True) -> np.ndarray: +def affine_to_spacing( + affine: np.ndarray, r: int = 3, dtype=float, suppress_zeros: bool = True +) -> np.ndarray: """ Copied over from monai.data.utils - https://docs.monai.io/en/stable/_modules/monai/data/utils.html @@ -326,7 +366,9 @@ def correct_nifti_header_if_necessary(img_nii): return img_nii -def process_nifti(input_data: Union[Sequence[nib.nifti1.Nifti1Image], nib.nifti1.Nifti1Image]): +def process_nifti( + input_data: Union[Sequence[nib.nifti1.Nifti1Image], nib.nifti1.Nifti1Image] +): """ Function which takes in a single nifti path or a list of nifti paths and returns the pixel_array, affine and pixdim diff --git a/darwin/importer/formats/nifti_schemas.py b/darwin/importer/formats/nifti_schemas.py index 626c967cd..4b10133de 100644 --- a/darwin/importer/formats/nifti_schemas.py +++ b/darwin/importer/formats/nifti_schemas.py @@ -1,4 +1,8 @@ -class_map = {"type": "object", "patternProperties": {"^([0-9]+)+$": {"type": "string"}}, "additionalProperties": False} +class_map = { + "type": "object", + "patternProperties": {"^([0-9]+)+$": {"type": "string"}}, + "additionalProperties": False, +} nifti_image_label_pair = { "type": "object", diff --git a/darwin/importer/formats/pascal_voc.py b/darwin/importer/formats/pascal_voc.py index ca701ff29..f8abf441d 100644 --- a/darwin/importer/formats/pascal_voc.py +++ b/darwin/importer/formats/pascal_voc.py @@ -52,10 +52,16 @@ def parse_path(path: Path) -> Optional[dt.AnnotationFile]: filename = _find_text_value(root, "filename") - annotations: List[dt.Annotation] = list(filter(None, map(_parse_annotation, root.findall("object")))) - annotation_classes = set([annotation.annotation_class for annotation in annotations]) - - return dt.AnnotationFile(path, filename, annotation_classes, annotations, remote_path="/") + annotations: List[dt.Annotation] = list( + filter(None, map(_parse_annotation, root.findall("object"))) + ) + annotation_classes = set( + [annotation.annotation_class for annotation in annotations] + ) + + return dt.AnnotationFile( + path, filename, annotation_classes, annotations, remote_path="/" + ) def _parse_annotation(annotation_object: ET.Element) -> dt.Annotation: diff --git a/darwin/importer/formats/superannotate.py b/darwin/importer/formats/superannotate.py index c56f76760..95e989d33 100644 --- a/darwin/importer/formats/superannotate.py +++ b/darwin/importer/formats/superannotate.py @@ -104,7 +104,9 @@ def parse_path(path: Path) -> Optional[AnnotationFile]: classes_path = path.parent / "classes.json" if not classes_path.is_file(): - raise ValueError("Folder must contain a 'classes.json' file with classes information.") + raise ValueError( + "Folder must contain a 'classes.json' file with classes information." + ) with classes_path.open(encoding="utf-8") as classes_file: classes = json.loads(classes_file.read()) @@ -126,7 +128,9 @@ def _convert( metadata: Dict[str, Any], tags: List[str], ) -> AnnotationFile: - conver_to_darwin_object = partial(_convert_instance, superannotate_classes=superannotate_classes) + conver_to_darwin_object = partial( + _convert_instance, superannotate_classes=superannotate_classes + ) filename: str = str(metadata.get("name")) darwin_tags: List[Annotation] = _map_to_list(make_tag, tags) @@ -143,7 +147,9 @@ def _convert( ) -def _convert_instance(obj: Dict[str, Any], superannotate_classes: List[Dict[str, Any]]) -> Annotation: +def _convert_instance( + obj: Dict[str, Any], superannotate_classes: List[Dict[str, Any]] +) -> Annotation: type: str = str(obj.get("type")) if type == "point": @@ -167,7 +173,9 @@ def _convert_instance(obj: Dict[str, Any], superannotate_classes: List[Dict[str, raise ValueError(f"Unknown label object {obj}") -def _to_keypoint_annotation(point: Dict[str, Any], classes: List[Dict[str, Any]]) -> Annotation: +def _to_keypoint_annotation( + point: Dict[str, Any], classes: List[Dict[str, Any]] +) -> Annotation: x: float = cast(float, point.get("x")) y: float = cast(float, point.get("y")) class_id: int = cast(int, point.get("classId")) @@ -181,7 +189,9 @@ def _to_keypoint_annotation(point: Dict[str, Any], classes: List[Dict[str, Any]] return make_keypoint(f"{name}-point", x, y, subannotations) -def _to_bbox_annotation(bbox: Dict[str, Any], classes: List[Dict[str, Any]]) -> Annotation: +def _to_bbox_annotation( + bbox: Dict[str, Any], classes: List[Dict[str, Any]] +) -> Annotation: points: Dict[str, float] = cast(Dict[str, float], bbox.get("points")) x: float = cast(float, points.get("x1")) y: float = cast(float, points.get("y1")) @@ -199,11 +209,23 @@ def _to_bbox_annotation(bbox: Dict[str, Any], classes: List[Dict[str, Any]]) -> return make_bounding_box(f"{name}-bbox", x, y, w, h, subannotations) -def _to_ellipse_annotation(ellipse: Dict[str, Any], classes: List[Dict[str, Any]]) -> Annotation: +def _to_ellipse_annotation( + ellipse: Dict[str, Any], classes: List[Dict[str, Any]] +) -> Annotation: angle: float = cast(float, ellipse.get("angle")) - center: Point = {"x": cast(float, ellipse.get("cx")), "y": cast(float, ellipse.get("cy"))} - radius: Point = {"x": cast(float, ellipse.get("rx")), "y": cast(float, ellipse.get("ry"))} - ellipse_data: Dict[str, Union[float, Point]] = {"angle": angle, "center": center, "radius": radius} + center: Point = { + "x": cast(float, ellipse.get("cx")), + "y": cast(float, ellipse.get("cy")), + } + radius: Point = { + "x": cast(float, ellipse.get("rx")), + "y": cast(float, ellipse.get("ry")), + } + ellipse_data: Dict[str, Union[float, Point]] = { + "angle": angle, + "center": center, + "radius": radius, + } class_id: int = cast(int, ellipse.get("classId")) instance_class: Dict[str, Any] = _find_class(class_id, classes) @@ -216,23 +238,41 @@ def _to_ellipse_annotation(ellipse: Dict[str, Any], classes: List[Dict[str, Any] return make_ellipse(f"{name}-ellipse", ellipse_data, subannotations) -def _to_cuboid_annotation(cuboid: Dict[str, Any], classes: List[Dict[str, Any]]) -> Annotation: - points: Dict[str, Dict[str, float]] = cast(Dict[str, Dict[str, float]], cuboid.get("points")) +def _to_cuboid_annotation( + cuboid: Dict[str, Any], classes: List[Dict[str, Any]] +) -> Annotation: + points: Dict[str, Dict[str, float]] = cast( + Dict[str, Dict[str, float]], cuboid.get("points") + ) back_top_left_point: Dict[str, float] = cast(Dict[str, float], points.get("r1")) back_bottom_right_point: Dict[str, float] = cast(Dict[str, float], points.get("r2")) front_top_left_point: Dict[str, float] = cast(Dict[str, float], points.get("f1")) - front_bottom_right_point: Dict[str, float] = cast(Dict[str, float], points.get("f2")) + front_bottom_right_point: Dict[str, float] = cast( + Dict[str, float], points.get("f2") + ) cuboid_data: CuboidData = { "back": { - "h": abs(cast(float, back_top_left_point.get("y")) - cast(float, back_bottom_right_point.get("y"))), - "w": abs(cast(float, back_bottom_right_point.get("x")) - cast(float, back_top_left_point.get("x"))), + "h": abs( + cast(float, back_top_left_point.get("y")) + - cast(float, back_bottom_right_point.get("y")) + ), + "w": abs( + cast(float, back_bottom_right_point.get("x")) + - cast(float, back_top_left_point.get("x")) + ), "x": cast(float, back_top_left_point.get("x")), "y": cast(float, back_top_left_point.get("y")), }, "front": { - "h": abs(cast(float, front_top_left_point.get("y")) - cast(float, front_bottom_right_point.get("y"))), - "w": abs(cast(float, front_bottom_right_point.get("x")) - cast(float, front_top_left_point.get("x"))), + "h": abs( + cast(float, front_top_left_point.get("y")) + - cast(float, front_bottom_right_point.get("y")) + ), + "w": abs( + cast(float, front_bottom_right_point.get("x")) + - cast(float, front_top_left_point.get("x")) + ), "x": cast(float, front_top_left_point.get("x")), "y": cast(float, front_top_left_point.get("y")), }, @@ -249,7 +289,9 @@ def _to_cuboid_annotation(cuboid: Dict[str, Any], classes: List[Dict[str, Any]]) return make_cuboid(f"{name}-cuboid", cuboid_data, subannotations) -def _to_polygon_annotation(polygon: Dict[str, Any], classes: List[Dict[str, Any]]) -> Annotation: +def _to_polygon_annotation( + polygon: Dict[str, Any], classes: List[Dict[str, Any]] +) -> Annotation: data: List[float] = cast(List[float], polygon.get("points")) class_id: int = cast(int, polygon.get("classId")) instance_class: Dict[str, Any] = _find_class(class_id, classes) @@ -263,7 +305,9 @@ def _to_polygon_annotation(polygon: Dict[str, Any], classes: List[Dict[str, Any] return make_polygon(f"{name}-polygon", points, None, subannotations) -def _to_line_annotation(line: Dict[str, Any], classes: List[Dict[str, Any]]) -> Annotation: +def _to_line_annotation( + line: Dict[str, Any], classes: List[Dict[str, Any]] +) -> Annotation: data: List[float] = cast(List[float], line.get("points")) class_id: int = cast(int, line.get("classId")) instance_class: Dict[str, Any] = _find_class(class_id, classes) @@ -278,7 +322,9 @@ def _to_line_annotation(line: Dict[str, Any], classes: List[Dict[str, Any]]) -> def _find_class(class_id: int, classes: List[Dict[str, Any]]) -> Dict[str, Any]: - obj: Optional[Dict[str, Any]] = next((class_obj for class_obj in classes if class_obj.get("id") == class_id), None) + obj: Optional[Dict[str, Any]] = next( + (class_obj for class_obj in classes if class_obj.get("id") == class_id), None + ) if obj is None: raise ValueError( @@ -288,9 +334,15 @@ def _find_class(class_id: int, classes: List[Dict[str, Any]]) -> Dict[str, Any]: return obj -def _get_attributes(instance: Dict[str, Any], instance_class: Dict[str, Any]) -> Optional[SubAnnotation]: - attribute_info: List[Dict[str, int]] = cast(List[Dict[str, int]], instance.get("attributes")) - groups: List[Dict[str, Any]] = cast(List[Dict[str, Any]], instance_class.get("attribute_groups")) +def _get_attributes( + instance: Dict[str, Any], instance_class: Dict[str, Any] +) -> Optional[SubAnnotation]: + attribute_info: List[Dict[str, int]] = cast( + List[Dict[str, int]], instance.get("attributes") + ) + groups: List[Dict[str, Any]] = cast( + List[Dict[str, Any]], instance_class.get("attribute_groups") + ) all_attributes: List[str] = [] for info in attribute_info: @@ -303,15 +355,24 @@ def _get_attributes(instance: Dict[str, Any], instance_class: Dict[str, Any]) -> if info_group_id != group_id: continue - group_attributes: List[AttributeGroup] = cast(List[AttributeGroup], group.get("attributes")) + group_attributes: List[AttributeGroup] = cast( + List[AttributeGroup], group.get("attributes") + ) attribute: Optional[AttributeGroup] = next( - (attribute for attribute in group_attributes if attribute.get("id") == attribute_id), None + ( + attribute + for attribute in group_attributes + if attribute.get("id") == attribute_id + ), + None, ) if attribute is None: raise ValueError(f"No attribute data found for {info}.") - final_attribute: str = f"{str(group.get('name'))}:{str(attribute.get('name'))}" + final_attribute: str = ( + f"{str(group.get('name'))}:{str(attribute.get('name'))}" + ) all_attributes.append(final_attribute) if all_attributes == []: diff --git a/darwin/importer/formats/superannotate_schemas.py b/darwin/importer/formats/superannotate_schemas.py index 39a894241..8b8205785 100644 --- a/darwin/importer/formats/superannotate_schemas.py +++ b/darwin/importer/formats/superannotate_schemas.py @@ -72,7 +72,12 @@ "title": "Polygon", "default": {"type": "polygon", "points": [1, 2, 3, 4], "classId": 1}, "examples": [ - {"type": "polygon", "points": [1, 2, 3, 4], "classId": 1, "attributes": [{"id": 1, "groupId": 2}]}, + { + "type": "polygon", + "points": [1, 2, 3, 4], + "classId": 1, + "attributes": [{"id": 1, "groupId": 2}], + }, {"type": "polygon", "points": [], "classId": 1, "attributes": []}, ], "type": "object", @@ -91,7 +96,12 @@ "title": "Polyline", "default": {"type": "polyline", "points": [1, 2, 3, 4], "classId": 1}, "examples": [ - {"type": "polyline", "points": [1, 2, 3, 4], "classId": 1, "attributes": [{"id": 1, "groupId": 2}]}, + { + "type": "polyline", + "points": [1, 2, 3, 4], + "classId": 1, + "attributes": [{"id": 1, "groupId": 2}], + }, {"type": "polyline", "points": [], "classId": 1, "attributes": []}, ], "type": "object", @@ -214,7 +224,13 @@ "default": {"type": "point", "x": 1.2, "y": 2.5, "classId": 1, "attributes": []}, "examples": [ {"type": "point", "x": 1.2, "y": 2.5, "classId": 1, "attributes": []}, - {"type": "point", "x": 0, "y": 1, "classId": 2, "attributes": [{"id": 1, "groupId": 2}]}, + { + "type": "point", + "x": 0, + "y": 1, + "classId": 2, + "attributes": [{"id": 1, "groupId": 2}], + }, ], "type": "object", "properties": { @@ -236,7 +252,11 @@ "type": "array", "items": {"oneOf": [point, ellipse, cuboid, polygon, bbox, polyline]}, }, - "metadata": {"type": "object", "required": ["name"], "properties": {"name": {"type": "string"}}}, + "metadata": { + "type": "object", + "required": ["name"], + "properties": {"name": {"type": "string"}}, + }, "tags": {"type": "array", "items": {"type": "string"}}, }, } @@ -258,7 +278,10 @@ "itmes": { "type": "object", "required": ["id", "name"], - "properties": {"id": {"type": "integer"}, "name": {"type": "string"}}, + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"}, + }, }, }, }, @@ -270,6 +293,10 @@ "items": { "type": "object", "required": ["name", "id", "attribute_groups"], - "properties": {"name": {"type": "string"}, "id": {"type": "integer"}, "attribute_groups": attribute_groups}, + "properties": { + "name": {"type": "string"}, + "id": {"type": "integer"}, + "attribute_groups": attribute_groups, + }, }, } diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py index 9dc1fcac9..06b5f5af7 100644 --- a/darwin/importer/importer.py +++ b/darwin/importer/importer.py @@ -69,7 +69,9 @@ current_version=__version__, details=DEPRECATION_MESSAGE, ) -def build_main_annotations_lookup_table(annotation_classes: List[Dict[str, Unknown]]) -> Dict[str, Unknown]: +def build_main_annotations_lookup_table( + annotation_classes: List[Dict[str, Unknown]] +) -> Dict[str, Unknown]: MAIN_ANNOTATION_TYPES = [ "bounding_box", "cuboid", @@ -164,7 +166,10 @@ def maybe_console(*args: Union[str, int, float]) -> None: def _get_files_for_parsing(file_paths: List[PathLike]) -> List[Path]: - packed_files = [filepath.glob("**/*") if filepath.is_dir() else [filepath] for filepath in map(Path, file_paths)] + packed_files = [ + filepath.glob("**/*") if filepath.is_dir() else [filepath] + for filepath in map(Path, file_paths) + ] return [file for files in packed_files for file in files] @@ -191,7 +196,9 @@ def build_attribute_lookup(dataset: "RemoteDataset") -> Dict[str, Unknown]: current_version=__version__, details=DEPRECATION_MESSAGE, ) -def get_remote_files(dataset: "RemoteDataset", filenames: List[str], chunk_size: int = 100) -> Dict[str, Tuple[int, str]]: +def get_remote_files( + dataset: "RemoteDataset", filenames: List[str], chunk_size: int = 100 +) -> Dict[str, Tuple[int, str]]: """ Fetches remote files from the datasets in chunks; by default 100 filenames at a time. @@ -204,7 +211,9 @@ def get_remote_files(dataset: "RemoteDataset", filenames: List[str], chunk_size: remote_files = {} for i in range(0, len(filenames), chunk_size): chunk = filenames[i : i + chunk_size] - for remote_file in dataset.fetch_remote_files({"types": "image,playback_video,video_frame", "filenames": chunk}): + for remote_file in dataset.fetch_remote_files( + {"types": "image,playback_video,video_frame", "filenames": chunk} + ): slot_name = _get_slot_name(remote_file) remote_files[remote_file.full_path] = (remote_file.id, slot_name) return remote_files @@ -227,18 +236,30 @@ def _resolve_annotation_classes( local_classes_not_in_team: Set[dt.AnnotationClass] = set() for local_cls in local_annotation_classes: - local_annotation_type = local_cls.annotation_internal_type or local_cls.annotation_type + local_annotation_type = ( + local_cls.annotation_internal_type or local_cls.annotation_type + ) # Only add the new class if it doesn't exist remotely already - if local_annotation_type in classes_in_dataset and local_cls.name in classes_in_dataset[local_annotation_type]: + if ( + local_annotation_type in classes_in_dataset + and local_cls.name in classes_in_dataset[local_annotation_type] + ): continue # Only add the new class if it's not included in the list of the missing classes already - if local_cls.name in [missing_class.name for missing_class in local_classes_not_in_dataset]: + if local_cls.name in [ + missing_class.name for missing_class in local_classes_not_in_dataset + ]: continue - if local_cls.name in [missing_class.name for missing_class in local_classes_not_in_team]: + if local_cls.name in [ + missing_class.name for missing_class in local_classes_not_in_team + ]: continue - if local_annotation_type in classes_in_team and local_cls.name in classes_in_team[local_annotation_type]: + if ( + local_annotation_type in classes_in_team + and local_cls.name in classes_in_team[local_annotation_type] + ): local_classes_not_in_dataset.add(local_cls) else: local_classes_not_in_team.add(local_cls) @@ -313,16 +334,22 @@ def import_annotations( # noqa: C901 console = Console(theme=_console_theme()) if append and delete_for_empty: - raise IncompatibleOptions("The options 'append' and 'delete_for_empty' cannot be used together. Use only one of them.") + raise IncompatibleOptions( + "The options 'append' and 'delete_for_empty' cannot be used together. Use only one of them." + ) - cpu_limit, use_multi_cpu = _get_multi_cpu_settings(cpu_limit, cpu_count(), use_multi_cpu) + cpu_limit, use_multi_cpu = _get_multi_cpu_settings( + cpu_limit, cpu_count(), use_multi_cpu + ) if use_multi_cpu: console.print(f"Using {cpu_limit} CPUs for parsing...", style="info") else: console.print("Using 1 CPU for parsing...", style="info") if not isinstance(file_paths, list): - raise ValueError(f"file_paths must be a list of 'Path' or 'str'. Current value: {file_paths}") + raise ValueError( + f"file_paths must be a list of 'Path' or 'str'. Current value: {file_paths}" + ) console.print("Fetching remote class list...", style="info") team_classes: List[dt.DictFreeForm] = dataset.fetch_remote_classes(True) @@ -336,10 +363,18 @@ def import_annotations( # noqa: C901 ) classes_in_dataset: dt.DictFreeForm = build_main_annotations_lookup_table( - [cls for cls in team_classes if cls["available"] or cls["name"] in GLOBAL_CLASSES] + [ + cls + for cls in team_classes + if cls["available"] or cls["name"] in GLOBAL_CLASSES + ] ) classes_in_team: dt.DictFreeForm = build_main_annotations_lookup_table( - [cls for cls in team_classes if not cls["available"] and cls["name"] not in GLOBAL_CLASSES] + [ + cls + for cls in team_classes + if not cls["available"] and cls["name"] not in GLOBAL_CLASSES + ] ) attributes = build_attribute_lookup(dataset) @@ -348,14 +383,18 @@ def import_annotations( # noqa: C901 local_files_missing_remotely = [] # ! Other place we can use multiprocessing - hard to pass in the importer though - maybe_parsed_files: Optional[Iterable[dt.AnnotationFile]] = find_and_parse(importer, file_paths, console, use_multi_cpu, cpu_limit) + maybe_parsed_files: Optional[Iterable[dt.AnnotationFile]] = find_and_parse( + importer, file_paths, console, use_multi_cpu, cpu_limit + ) if not maybe_parsed_files: raise ValueError("Not able to parse any files.") parsed_files: List[AnnotationFile] = flatten_list(list(maybe_parsed_files)) - filenames: List[str] = [parsed_file.filename for parsed_file in parsed_files if parsed_file is not None] + filenames: List[str] = [ + parsed_file.filename for parsed_file in parsed_files if parsed_file is not None + ] console.print("Fetching remote file list...", style="info") # This call will only filter by filename; so can return a superset of matched files across different paths @@ -380,25 +419,47 @@ def import_annotations( # noqa: C901 else: local_files.append(parsed_file) - console.print(f"{len(local_files) + len(local_files_missing_remotely)} annotation file(s) found.", style="info") + console.print( + f"{len(local_files) + len(local_files_missing_remotely)} annotation file(s) found.", + style="info", + ) if local_files_missing_remotely: - console.print(f"{len(local_files_missing_remotely)} file(s) are missing from the dataset", style="warning") + console.print( + f"{len(local_files_missing_remotely)} file(s) are missing from the dataset", + style="warning", + ) for local_file in local_files_missing_remotely: - console.print(f"\t{local_file.path}: '{local_file.full_path}'", style="warning") + console.print( + f"\t{local_file.path}: '{local_file.full_path}'", style="warning" + ) if class_prompt and not secure_continue_request(): return - local_classes_not_in_dataset, local_classes_not_in_team = _resolve_annotation_classes( - [annotation_class for file in local_files for annotation_class in file.annotation_classes], + ( + local_classes_not_in_dataset, + local_classes_not_in_team, + ) = _resolve_annotation_classes( + [ + annotation_class + for file in local_files + for annotation_class in file.annotation_classes + ], classes_in_dataset, classes_in_team, ) - console.print(f"{len(local_classes_not_in_team)} classes needs to be created.", style="info") - console.print(f"{len(local_classes_not_in_dataset)} classes needs to be added to {dataset.identifier}", style="info") + console.print( + f"{len(local_classes_not_in_team)} classes needs to be created.", style="info" + ) + console.print( + f"{len(local_classes_not_in_dataset)} classes needs to be added to {dataset.identifier}", + style="info", + ) - missing_skeletons: List[dt.AnnotationClass] = list(filter(_is_skeleton_class, local_classes_not_in_team)) + missing_skeletons: List[dt.AnnotationClass] = list( + filter(_is_skeleton_class, local_classes_not_in_team) + ) missing_skeleton_names: str = ", ".join(map(_get_skeleton_name, missing_skeletons)) if missing_skeletons: console.print( @@ -417,9 +478,14 @@ def import_annotations( # noqa: C901 if class_prompt and not secure_continue_request(): return for missing_class in local_classes_not_in_team: - dataset.create_annotation_class(missing_class.name, missing_class.annotation_internal_type or missing_class.annotation_type) + dataset.create_annotation_class( + missing_class.name, + missing_class.annotation_internal_type or missing_class.annotation_type, + ) if local_classes_not_in_dataset: - console.print(f"About to add the following classes to {dataset.identifier}", style="info") + console.print( + f"About to add the following classes to {dataset.identifier}", style="info" + ) for cls in local_classes_not_in_dataset: dataset.add_annotation_class(cls) @@ -434,7 +500,9 @@ def import_annotations( # noqa: C901 remote_classes = build_main_annotations_lookup_table(team_classes) if dataset.version == 1: - console.print("Importing annotations...\nEmpty annotations will be skipped.", style="info") + console.print( + "Importing annotations...\nEmpty annotations will be skipped.", style="info" + ) elif dataset.version == 2 and delete_for_empty: console.print( "Importing annotations...\nEmpty annotation file(s) will clear all existing annotations in matching remote files.", @@ -448,7 +516,9 @@ def import_annotations( # noqa: C901 # Need to re parse the files since we didn't save the annotations in memory for local_path in set(local_file.path for local_file in local_files): # noqa: C401 - imported_files: Union[List[dt.AnnotationFile], dt.AnnotationFile, None] = importer(local_path) + imported_files: Union[ + List[dt.AnnotationFile], dt.AnnotationFile, None + ] = importer(local_path) if imported_files is None: parsed_files = [] elif not isinstance(imported_files, List): @@ -457,17 +527,31 @@ def import_annotations( # noqa: C901 parsed_files = imported_files # remove files missing on the server - missing_files = [missing_file.full_path for missing_file in local_files_missing_remotely] - parsed_files = [parsed_file for parsed_file in parsed_files if parsed_file.full_path not in missing_files] + missing_files = [ + missing_file.full_path for missing_file in local_files_missing_remotely + ] + parsed_files = [ + parsed_file + for parsed_file in parsed_files + if parsed_file.full_path not in missing_files + ] files_to_not_track = [ - file_to_track for file_to_track in parsed_files if not file_to_track.annotations and (not delete_for_empty or dataset.version == 1) + file_to_track + for file_to_track in parsed_files + if not file_to_track.annotations + and (not delete_for_empty or dataset.version == 1) ] for file in files_to_not_track: - console.print(f"{file.filename} has no annotations. Skipping upload...", style="warning") + console.print( + f"{file.filename} has no annotations. Skipping upload...", + style="warning", + ) - files_to_track = [file for file in parsed_files if file not in files_to_not_track] + files_to_track = [ + file for file in parsed_files if file not in files_to_not_track + ] if files_to_track: _warn_unsupported_annotations(files_to_track) for parsed_file in track(files_to_track): @@ -492,12 +576,16 @@ def import_annotations( # noqa: C901 ) if errors: - console.print(f"Errors importing {parsed_file.filename}", style="error") + console.print( + f"Errors importing {parsed_file.filename}", style="error" + ) for error in errors: console.print(f"\t{error}", style="error") -def _get_multi_cpu_settings(cpu_limit: Optional[int], cpu_count: int, use_multi_cpu: bool) -> Tuple[int, bool]: +def _get_multi_cpu_settings( + cpu_limit: Optional[int], cpu_count: int, use_multi_cpu: bool +) -> Tuple[int, bool]: if cpu_limit == 1 or cpu_count == 1 or not use_multi_cpu: return 1, False @@ -515,7 +603,9 @@ def _warn_unsupported_annotations(parsed_files: List[AnnotationFile]) -> None: if annotation.annotation_class.annotation_type in UNSUPPORTED_CLASSES: skipped_annotations.append(annotation) if len(skipped_annotations) > 0: - types = set(map(lambda c: c.annotation_class.annotation_type, skipped_annotations)) # noqa: C417 + types = set( + map(lambda c: c.annotation_class.annotation_type, skipped_annotations) + ) # noqa: C417 console.print( f"Import of annotation class types '{', '.join(types)}' is not yet supported. Skipping {len(skipped_annotations)} " + "annotations from '{parsed_file.full_path}'.\n", @@ -524,24 +614,36 @@ def _warn_unsupported_annotations(parsed_files: List[AnnotationFile]) -> None: def _is_skeleton_class(the_class: dt.AnnotationClass) -> bool: - return (the_class.annotation_internal_type or the_class.annotation_type) == "skeleton" + return ( + the_class.annotation_internal_type or the_class.annotation_type + ) == "skeleton" def _get_skeleton_name(skeleton: dt.AnnotationClass) -> str: return skeleton.name -def _handle_subs(annotation: dt.Annotation, data: dt.DictFreeForm, annotation_class_id: str, attributes: Dict[str, dt.UnknownType]) -> dt.DictFreeForm: +def _handle_subs( + annotation: dt.Annotation, + data: dt.DictFreeForm, + annotation_class_id: str, + attributes: Dict[str, dt.UnknownType], +) -> dt.DictFreeForm: for sub in annotation.subs: if sub.annotation_type == "text": data["text"] = {"text": sub.data} elif sub.annotation_type == "attributes": attributes_with_key = [] for attr in sub.data: - if annotation_class_id in attributes and attr in attributes[annotation_class_id]: + if ( + annotation_class_id in attributes + and attr in attributes[annotation_class_id] + ): attributes_with_key.append(attributes[annotation_class_id][attr]) else: - print(f"The attribute '{attr}' for class '{annotation.annotation_class.name}' was not imported.") + print( + f"The attribute '{attr}' for class '{annotation.annotation_class.name}' was not imported." + ) data["attributes"] = {"attributes": attributes_with_key} elif sub.annotation_type == "instance_id": @@ -552,37 +654,59 @@ def _handle_subs(annotation: dt.Annotation, data: dt.DictFreeForm, annotation_cl return data -def _handle_complex_polygon(annotation: dt.Annotation, data: dt.DictFreeForm) -> dt.DictFreeForm: +def _handle_complex_polygon( + annotation: dt.Annotation, data: dt.DictFreeForm +) -> dt.DictFreeForm: if "complex_polygon" in data: del data["complex_polygon"] - data["polygon"] = {"path": annotation.data["paths"][0], "additional_paths": annotation.data["paths"][1:]} + data["polygon"] = { + "path": annotation.data["paths"][0], + "additional_paths": annotation.data["paths"][1:], + } return data -def _annotators_or_reviewers_to_payload(actors: List[dt.AnnotationAuthor], role: dt.AnnotationAuthorRole) -> List[dt.DictFreeForm]: +def _annotators_or_reviewers_to_payload( + actors: List[dt.AnnotationAuthor], role: dt.AnnotationAuthorRole +) -> List[dt.DictFreeForm]: return [{"email": actor.email, "role": role.value} for actor in actors] -def _handle_reviewers(annotation: dt.Annotation, import_reviewers: bool) -> List[dt.DictFreeForm]: +def _handle_reviewers( + annotation: dt.Annotation, import_reviewers: bool +) -> List[dt.DictFreeForm]: if import_reviewers: if annotation.reviewers: - return _annotators_or_reviewers_to_payload(annotation.reviewers, dt.AnnotationAuthorRole.REVIEWER) + return _annotators_or_reviewers_to_payload( + annotation.reviewers, dt.AnnotationAuthorRole.REVIEWER + ) return [] -def _handle_annotators(annotation: dt.Annotation, import_annotators: bool) -> List[dt.DictFreeForm]: +def _handle_annotators( + annotation: dt.Annotation, import_annotators: bool +) -> List[dt.DictFreeForm]: if import_annotators: if annotation.annotators: - return _annotators_or_reviewers_to_payload(annotation.annotators, dt.AnnotationAuthorRole.ANNOTATOR) + return _annotators_or_reviewers_to_payload( + annotation.annotators, dt.AnnotationAuthorRole.ANNOTATOR + ) return [] -def _get_annotation_data(annotation: dt.AnnotationLike, annotation_class_id: str, attributes: dt.DictFreeForm) -> dt.DictFreeForm: +def _get_annotation_data( + annotation: dt.AnnotationLike, annotation_class_id: str, attributes: dt.DictFreeForm +) -> dt.DictFreeForm: annotation_class = annotation.annotation_class if isinstance(annotation, dt.VideoAnnotation): data = annotation.get_data( only_keyframes=True, - post_processing=lambda annotation, data: _handle_subs(annotation, _handle_complex_polygon(annotation, data), annotation_class_id, attributes), + post_processing=lambda annotation, data: _handle_subs( + annotation, + _handle_complex_polygon(annotation, data), + annotation_class_id, + attributes, + ), ) else: data = {annotation_class.annotation_type: annotation.data} @@ -592,7 +716,9 @@ def _get_annotation_data(annotation: dt.AnnotationLike, annotation_class_id: str return data -def _handle_slot_names(annotation: dt.Annotation, dataset_version: int, default_slot_name: str) -> dt.Annotation: +def _handle_slot_names( + annotation: dt.Annotation, dataset_version: int, default_slot_name: str +) -> dt.Annotation: if not annotation.slot_names and dataset_version > 1: annotation.slot_names.extend([default_slot_name]) @@ -622,16 +748,28 @@ def _import_annotations( serialized_annotations = [] for annotation in annotations: annotation_class = annotation.annotation_class - annotation_type = annotation_class.annotation_internal_type or annotation_class.annotation_type + annotation_type = ( + annotation_class.annotation_internal_type + or annotation_class.annotation_type + ) - if annotation_type not in remote_classes or annotation_class.name not in remote_classes[annotation_type]: + if ( + annotation_type not in remote_classes + or annotation_class.name not in remote_classes[annotation_type] + ): if annotation_type not in remote_classes: - logger.warning(f"Annotation type '{annotation_type}' is not in the remote classes, skipping import of annotation '{annotation_class.name}'") + logger.warning( + f"Annotation type '{annotation_type}' is not in the remote classes, skipping import of annotation '{annotation_class.name}'" + ) else: - logger.warning(f"Annotation '{annotation_class.name}' is not in the remote classes, skipping import") + logger.warning( + f"Annotation '{annotation_class.name}' is not in the remote classes, skipping import" + ) continue - annotation_class_id: str = remote_classes[annotation_type][annotation_class.name] + annotation_class_id: str = remote_classes[annotation_type][ + annotation_class.name + ] data = _get_annotation_data(annotation, annotation_class_id, attributes) @@ -670,4 +808,11 @@ def _import_annotations( # mypy: ignore-errors def _console_theme() -> Theme: - return Theme({"success": "bold green", "warning": "bold yellow", "error": "bold red", "info": "bold deep_sky_blue1"}) + return Theme( + { + "success": "bold green", + "warning": "bold yellow", + "error": "bold red", + "info": "bold deep_sky_blue1", + } + ) diff --git a/darwin/item_sorter.py b/darwin/item_sorter.py index 12c9278e8..731b4d235 100644 --- a/darwin/item_sorter.py +++ b/darwin/item_sorter.py @@ -38,7 +38,9 @@ def parse(cls, direction: str) -> "SortDirection": if cls._is_descending(normalized_direction): return cls.DESCENDING - raise ValueError(f"Invalid direction '{direction}', use 'asc' or 'ascending', 'desc' or 'descending'.") + raise ValueError( + f"Invalid direction '{direction}', use 'asc' or 'ascending', 'desc' or 'descending'." + ) @staticmethod def _is_ascending(direction: str) -> bool: @@ -117,7 +119,13 @@ def _has_valid_format(sort_by: str) -> bool: @staticmethod def _has_valid_field(sort: str) -> bool: - return sort in ["inserted_at", "updated_at", "file_size", "filename", "priority"] + return sort in [ + "inserted_at", + "updated_at", + "file_size", + "filename", + "priority", + ] def __str__(self): return f"{self.field}:{self.direction.value}" diff --git a/darwin/options.py b/darwin/options.py index e8b6ab1ea..16dfefd70 100644 --- a/darwin/options.py +++ b/darwin/options.py @@ -22,7 +22,9 @@ def __init__(self) -> None: subparsers.add_parser("authenticate", help="Authenticate the user. ") # SET COMPRESSION LEVEL - parser_compression = subparsers.add_parser("compression", help="Set compression level.") + parser_compression = subparsers.add_parser( + "compression", help="Set compression level." + ) parser_compression.add_argument( "compression_level", type=int, @@ -32,17 +34,34 @@ def __init__(self) -> None: # SELECT TEAM parser_create = subparsers.add_parser("team", help="List or pick teams.") - parser_create.add_argument("team_name", nargs="?", type=str, help="Team name to use.") parser_create.add_argument( - "-c", "--current", action="store_true", required=False, help="Shows only the current team." + "team_name", nargs="?", type=str, help="Team name to use." + ) + parser_create.add_argument( + "-c", + "--current", + action="store_true", + required=False, + help="Shows only the current team.", ) - parser_convert = subparsers.add_parser("convert", help="Converts darwin json to other annotation formats.") - parser_convert.add_argument("format", type=str, help="Annotation format to convert to.") + parser_convert = subparsers.add_parser( + "convert", help="Converts darwin json to other annotation formats." + ) + parser_convert.add_argument( + "format", type=str, help="Annotation format to convert to." + ) - parser_convert.add_argument("files", type=str, nargs="+", help="Annotation files (or folders) to convert.") + parser_convert.add_argument( + "files", + type=str, + nargs="+", + help="Annotation files (or folders) to convert.", + ) - parser_convert.add_argument("output_dir", type=str, help="Where to store output files.") + parser_convert.add_argument( + "output_dir", type=str, help="Where to store output files." + ) # VALIDATE SCHEMA parser_validate_schema = subparsers.add_parser( @@ -59,38 +78,58 @@ def __init__(self) -> None: ) parser_validate_schema.add_argument( - "--silent", action="store_true", help="Flag to suppress all output except errors to console" + "--silent", + action="store_true", + help="Flag to suppress all output except errors to console", + ) + parser_validate_schema.add_argument( + "--output", help="name of file to write output json to" ) - parser_validate_schema.add_argument("--output", help="name of file to write output json to") # DATASET dataset = subparsers.add_parser( - "dataset", help="Dataset related functions.", description="Arguments to interact with datasets" + "dataset", + help="Dataset related functions.", + description="Arguments to interact with datasets", ) dataset_action = dataset.add_subparsers(dest="action") # Remote - parser_remote = dataset_action.add_parser("remote", help="List remote datasets.") + parser_remote = dataset_action.add_parser( + "remote", help="List remote datasets." + ) parser_remote.add_argument("-t", "--team", help="Specify team.") - parser_remote.add_argument("-a", "--all", action="store_true", help="List datasets for all teams.") + parser_remote.add_argument( + "-a", "--all", action="store_true", help="List datasets for all teams." + ) # Local - parser_local = dataset_action.add_parser("local", help="List downloaded datasets.") + parser_local = dataset_action.add_parser( + "local", help="List downloaded datasets." + ) parser_local.add_argument("-t", "--team", help="Specify team.") # Create - parser_create = dataset_action.add_parser("create", help="Creates a new dataset on darwin.") + parser_create = dataset_action.add_parser( + "create", help="Creates a new dataset on darwin." + ) parser_create.add_argument("dataset", type=str, help="Dataset name.") # Path - parser_path = dataset_action.add_parser("path", help="Print local path to dataset.") + parser_path = dataset_action.add_parser( + "path", help="Print local path to dataset." + ) parser_path.add_argument("dataset", type=str, help="Dataset name.") # Url - parser_url = dataset_action.add_parser("url", help="Print url to dataset on darwin.") + parser_url = dataset_action.add_parser( + "url", help="Print url to dataset on darwin." + ) parser_url.add_argument("dataset", type=str, help="Dataset name.") # Push - parser_push = dataset_action.add_parser("push", help="Upload data to an existing (remote) dataset.") + parser_push = dataset_action.add_parser( + "push", help="Upload data to an existing (remote) dataset." + ) parser_push.add_argument( "dataset", type=str, @@ -111,29 +150,53 @@ def __init__(self) -> None: default="native", help="Frames per second for video split (recommended: 1), use 'native' to use the videos intrinsic fps.", ) - parser_push.add_argument("--frames", action="store_true", help="Annotate a video as independent frames.") + parser_push.add_argument( + "--frames", + action="store_true", + help="Annotate a video as independent frames.", + ) parser_push.add_argument( - "--extract_views", action="store_true", help="Upload a volume with all 3 orthogonal views." + "--extract_views", + action="store_true", + help="Upload a volume with all 3 orthogonal views.", ) - parser_push.add_argument("--path", type=str, default=None, help="Folder to upload the files into.") + parser_push.add_argument( + "--path", type=str, default=None, help="Folder to upload the files into." + ) - parser_push.add_argument("--verbose", action="store_true", help="Flag to show upload details.") + parser_push.add_argument( + "--verbose", action="store_true", help="Flag to show upload details." + ) parser_push.add_argument( - "-p", "--preserve-folders", action="store_true", help="Preserve the local folder structure in the dataset." + "-p", + "--preserve-folders", + action="store_true", + help="Preserve the local folder structure in the dataset.", ) # Remove - parser_remove = dataset_action.add_parser("remove", help="Remove a remote or remote and local dataset.") - parser_remove.add_argument("dataset", type=str, help="Remote dataset name to delete.") + parser_remove = dataset_action.add_parser( + "remove", help="Remove a remote or remote and local dataset." + ) + parser_remove.add_argument( + "dataset", type=str, help="Remote dataset name to delete." + ) # Report - parser_report = dataset_action.add_parser("report", help="Report about the annotators.") - parser_report.add_argument("dataset", type=str, help="Remote dataset name to report on.") + parser_report = dataset_action.add_parser( + "report", help="Report about the annotators." + ) + parser_report.add_argument( + "dataset", type=str, help="Remote dataset name to report on." + ) parser_report.add_argument( - "-g", "--granularity", choices=["day", "week", "month", "total"], help="Granularity of the report." + "-g", + "--granularity", + choices=["day", "week", "month", "total"], + help="Granularity of the report.", ) parser_report.add_argument( "-r", @@ -143,9 +206,15 @@ def __init__(self) -> None: help="Prints the results formatted in a rich table.", ) # Export - parser_export = dataset_action.add_parser("export", help="Export a version of a dataset.") - parser_export.add_argument("dataset", type=str, help="Remote dataset name to export.") - parser_export.add_argument("name", type=str, help="Name with with the version gets tagged.") + parser_export = dataset_action.add_parser( + "export", help="Export a version of a dataset." + ) + parser_export.add_argument( + "dataset", type=str, help="Remote dataset name to export." + ) + parser_export.add_argument( + "name", type=str, help="Name with with the version gets tagged." + ) parser_export.add_argument( "--class-ids", type=str, @@ -178,18 +247,32 @@ def __init__(self) -> None: ) # Releases - parser_dataset_version = dataset_action.add_parser("releases", help="Available version of a dataset.") - parser_dataset_version.add_argument("dataset", type=str, help="Remote dataset name to list.") + parser_dataset_version = dataset_action.add_parser( + "releases", help="Available version of a dataset." + ) + parser_dataset_version.add_argument( + "dataset", type=str, help="Remote dataset name to list." + ) # Pull - parser_pull = dataset_action.add_parser("pull", help="Download a version of a dataset.") - parser_pull.add_argument("dataset", type=str, help="Remote dataset name to download.") + parser_pull = dataset_action.add_parser( + "pull", help="Download a version of a dataset." + ) parser_pull.add_argument( - "--only-annotations", action="store_true", help="Download only annotations and no corresponding images." + "dataset", type=str, help="Remote dataset name to download." ) - parser_pull.add_argument("--folders", action="store_true", help="Recreates image folders.") parser_pull.add_argument( - "--video-frames", action="store_true", help="Pulls video frame images instead of video files." + "--only-annotations", + action="store_true", + help="Download only annotations and no corresponding images.", + ) + parser_pull.add_argument( + "--folders", action="store_true", help="Recreates image folders." + ) + parser_pull.add_argument( + "--video-frames", + action="store_true", + help="Pulls video frame images instead of video files.", ) slots_group = parser_pull.add_mutually_exclusive_group() slots_group.add_argument( @@ -204,13 +287,17 @@ def __init__(self) -> None: help="Ignores slots and only pulls the first slot of each item into a flat file structure ({prefix}/{file_name}).", ) # Import - parser_import = dataset_action.add_parser("import", help="Import data to an existing (remote) dataset.") + parser_import = dataset_action.add_parser( + "import", help="Import data to an existing (remote) dataset." + ) parser_import.add_argument( "dataset", type=str, help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.", ) - parser_import.add_argument("format", type=str, help="The format of the annotations to import.") + parser_import.add_argument( + "format", type=str, help="The format of the annotations to import." + ) parser_import.add_argument( "files", @@ -218,9 +305,15 @@ def __init__(self) -> None: nargs="+", help="The location of the annotation files, or the folder where the annotation files are.", ) - parser_import.add_argument("--append", action="store_true", help="Append annotations instead of overwriting.") parser_import.add_argument( - "--yes", action="store_true", help="Skips prompts for creating and adding classes to dataset." + "--append", + action="store_true", + help="Append annotations instead of overwriting.", + ) + parser_import.add_argument( + "--yes", + action="store_true", + help="Skips prompts for creating and adding classes to dataset.", ) parser_import.add_argument( "--delete-for-empty", @@ -255,35 +348,67 @@ def cpu_default_types(input: Any) -> Optional[int]: # type: ignore ) # Convert - parser_convert = dataset_action.add_parser("convert", help="Converts darwin json to other annotation formats.") + parser_convert = dataset_action.add_parser( + "convert", help="Converts darwin json to other annotation formats." + ) parser_convert.add_argument( "dataset", type=str, help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.", ) - parser_convert.add_argument("format", type=str, help="Annotation format to convert to.") + parser_convert.add_argument( + "format", type=str, help="Annotation format to convert to." + ) - parser_convert.add_argument("-o", "--output_dir", type=str, help="Where to store output files.") + parser_convert.add_argument( + "-o", "--output_dir", type=str, help="Where to store output files." + ) # Split parser_split = dataset_action.add_parser( - "split", help="Splits a local dataset following random and stratified split types." + "split", + help="Splits a local dataset following random and stratified split types.", + ) + parser_split.add_argument( + "dataset", type=str, help="Local dataset name to split." + ) + parser_split.add_argument( + "-v", + "--val-percentage", + required=True, + type=float, + help="Validation percentage.", + ) + parser_split.add_argument( + "-t", + "--test-percentage", + required=True, + type=float, + help="Test percentage.", + ) + parser_split.add_argument( + "-s", "--seed", type=int, required=False, default=0, help="Split seed." ) - parser_split.add_argument("dataset", type=str, help="Local dataset name to split.") - parser_split.add_argument("-v", "--val-percentage", required=True, type=float, help="Validation percentage.") - parser_split.add_argument("-t", "--test-percentage", required=True, type=float, help="Test percentage.") - parser_split.add_argument("-s", "--seed", type=int, required=False, default=0, help="Split seed.") # List Files - parser_files = dataset_action.add_parser("files", help="Lists file in a remote dataset.") + parser_files = dataset_action.add_parser( + "files", help="Lists file in a remote dataset." + ) parser_files.add_argument( "dataset", type=str, help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.", ) - parser_files.add_argument("--only-filenames", action="store_true", help="Only prints out filenames.") - parser_files.add_argument("--status", type=str, required=False, help="Comma separated list of statuses.") + parser_files.add_argument( + "--only-filenames", action="store_true", help="Only prints out filenames." + ) + parser_files.add_argument( + "--status", + type=str, + required=False, + help="Comma separated list of statuses.", + ) parser_files.add_argument( "--path", type=str, @@ -298,23 +423,31 @@ def cpu_default_types(input: Any) -> Optional[int]: # type: ignore ) # Set file status - parser_file_status = dataset_action.add_parser("set-file-status", help="Sets the status of one or more files.") + parser_file_status = dataset_action.add_parser( + "set-file-status", help="Sets the status of one or more files." + ) parser_file_status.add_argument( "dataset", type=str, help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.", ) parser_file_status.add_argument("status", type=str, help="Status to change to.") - parser_file_status.add_argument("files", type=str, nargs="+", help="Files to change status.") + parser_file_status.add_argument( + "files", type=str, nargs="+", help="Files to change status." + ) # Delete files - parser_delete_files = dataset_action.add_parser("delete-files", help="Delete one or more files remotely.") + parser_delete_files = dataset_action.add_parser( + "delete-files", help="Delete one or more files remotely." + ) parser_delete_files.add_argument( "dataset", type=str, help="[Remote] Dataset name: to list all the existing dataset, run 'darwin dataset remote'.", ) - parser_delete_files.add_argument("files", type=str, nargs="+", help="Files to delete.") + parser_delete_files.add_argument( + "files", type=str, nargs="+", help="Files to delete." + ) parser_delete_files.add_argument( "-y", "--yes", @@ -333,20 +466,44 @@ def cpu_default_types(input: Any) -> Optional[int]: # type: ignore ) parser_comment.add_argument("file", type=str, help="File to comment") parser_comment.add_argument("--text", type=str, help="Comment: list of words") - parser_comment.add_argument("--x", required=False, type=float, default=1, help="X coordinate for comment box") - parser_comment.add_argument("--y", required=False, type=float, default=1, help="Y coordinate for comment box") parser_comment.add_argument( - "--w", "--width", required=False, type=float, default=1, help="Comment box width in pixels" + "--x", + required=False, + type=float, + default=1, + help="X coordinate for comment box", + ) + parser_comment.add_argument( + "--y", + required=False, + type=float, + default=1, + help="Y coordinate for comment box", + ) + parser_comment.add_argument( + "--w", + "--width", + required=False, + type=float, + default=1, + help="Comment box width in pixels", ) parser_comment.add_argument( - "--h", "--height", required=False, type=float, default=1, help="Comment box height in pixels" + "--h", + "--height", + required=False, + type=float, + default=1, + help="Comment box height in pixels", ) # Help dataset_action.add_parser("help", help="Show this help message and exit.") # VERSION - subparsers.add_parser("version", help="Check current version of the repository. ") + subparsers.add_parser( + "version", help="Check current version of the repository. " + ) argcomplete.autocomplete(self.parser) diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py index 0ad2d1be3..080fef467 100644 --- a/darwin/torch/dataset.py +++ b/darwin/torch/dataset.py @@ -99,7 +99,9 @@ class ClassificationDataset(LocalDataset): be composed via torchvision. """ - def __init__(self, transform: Optional[Union[Callable, List]] = None, **kwargs) -> None: + def __init__( + self, transform: Optional[Union[Callable, List]] = None, **kwargs + ) -> None: super().__init__(annotation_type="tag", **kwargs) if transform is not None and isinstance(transform, list): @@ -152,7 +154,11 @@ def get_target(self, index: int) -> Tensor: data = self.parse_json(index) annotations = data.pop("annotations") - tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"] + tags = [ + a.annotation_class.name + for a in annotations + if a.annotation_class.annotation_type == "tag" + ] if not self.is_multi_label: # Binary or multiclass must have a label per image @@ -176,7 +182,11 @@ def check_if_multi_label(self) -> None: for idx in range(len(self)): target = self.parse_json(idx) annotations = target.pop("annotations") - tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"] + tags = [ + a.annotation_class.name + for a in annotations + if a.annotation_class.annotation_type == "tag" + ] if len(tags) > 1: self.is_multi_label = True @@ -319,7 +329,9 @@ def get_target(self, index: int) -> Dict[str, Any]: annotation_type: str = annotation.annotation_class.annotation_type path_key = "paths" if annotation_type == "complex_polygon" else "path" if path_key not in annotation.data: - print(f"Warning: missing polygon in annotation {self.annotations_path[index]}") + print( + f"Warning: missing polygon in annotation {self.annotations_path[index]}" + ) # Extract the sequences of coordinates from the polygon annotation sequences = convert_polygons_to_sequences( annotation.data[path_key], @@ -348,7 +360,12 @@ def get_target(self, index: int) -> Dict[str, Any]: # Compute the area of the polygon # TODO fix with addictive/subtractive paths in complex polygons - poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)]) + poly_area: float = np.sum( + [ + polygon_area(x_coord, y_coord) + for x_coord, y_coord in zip(x_coords, y_coords) + ] + ) # Create and append the new entry for this annotation annotations.append( @@ -400,7 +417,9 @@ class SemanticSegmentationDataset(LocalDataset): Object used to convert polygons to semantic masks. """ - def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs): + def __init__( + self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs + ): super().__init__(annotation_type="polygon", **kwargs) if not "__background__" in self.classes: self.classes.insert(0, "__background__") diff --git a/darwin/torch/utils.py b/darwin/torch/utils.py index 8b2dbf27d..f5ffcac94 100644 --- a/darwin/torch/utils.py +++ b/darwin/torch/utils.py @@ -45,7 +45,9 @@ def flatten_masks_by_category(masks: torch.Tensor, cats: List[int]) -> torch.Ten return torch.as_tensor(mapped, dtype=masks.dtype) -def convert_segmentation_to_mask(segmentations: List[Segment], height: int, width: int) -> torch.Tensor: +def convert_segmentation_to_mask( + segmentations: List[Segment], height: int, width: int +) -> torch.Tensor: """ Converts a polygon represented as a sequence of coordinates into a mask. @@ -172,7 +174,9 @@ def detectron2_register_dataset( if partition: catalog_name += f"_{partition}" - classes = get_classes(dataset_path=dataset_path, release_name=release_name, annotation_type="polygon") + classes = get_classes( + dataset_path=dataset_path, release_name=release_name, annotation_type="polygon" + ) DatasetCatalog.register( catalog_name, @@ -219,8 +223,12 @@ def clamp_bbox_to_image_size(annotations, img_width, img_height, format="xywh"): boxes[:, 0].clamp_(min=0, max=img_width - 1) boxes[:, 1].clamp_(min=0, max=img_height - 1) # Then, clamp the width and height - boxes[:, 2].clamp_(min=torch.tensor(0), max=img_width - boxes[:, 0] - 1) # -1 since we images are zero-indexed - boxes[:, 3].clamp_(min=torch.tensor(0), max=img_height - boxes[:, 1] - 1) # -1 since we images are zero-indexed + boxes[:, 2].clamp_( + min=torch.tensor(0), max=img_width - boxes[:, 0] - 1 + ) # -1 since we images are zero-indexed + boxes[:, 3].clamp_( + min=torch.tensor(0), max=img_height - boxes[:, 1] - 1 + ) # -1 since we images are zero-indexed else: raise ValueError(f"Unsupported bounding box format: {format}") diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py index 68f81a53f..1d4f135af 100644 --- a/darwin/utils/utils.py +++ b/darwin/utils/utils.py @@ -216,7 +216,9 @@ def is_project_dir(project_path: Path) -> bool: return (project_path / "releases").exists() and (project_path / "images").exists() -def get_progress_bar(array: List[dt.AnnotationFile], description: Optional[str] = None) -> Iterable[ProgressType]: +def get_progress_bar( + array: List[dt.AnnotationFile], description: Optional[str] = None +) -> Iterable[ProgressType]: """ Get a rich a progress bar for the given list of annotation files. @@ -265,7 +267,10 @@ def prompt(msg: str, default: Optional[str] = None) -> str: def find_files( - files: List[dt.PathLike], *, files_to_exclude: List[dt.PathLike] = [], recursive: bool = True + files: List[dt.PathLike], + *, + files_to_exclude: List[dt.PathLike] = [], + recursive: bool = True, ) -> List[Path]: """ Retrieve a list of all files belonging to supported extensions. The exploration can be made @@ -322,7 +327,9 @@ def secure_continue_request() -> bool: def persist_client_configuration( - client: "Client", default_team: Optional[str] = None, config_path: Optional[Path] = None + client: "Client", + default_team: Optional[str] = None, + config_path: Optional[Path] = None, ) -> Config: """ Authenticate user against the server and creates a configuration file for him/her. @@ -350,8 +357,14 @@ def persist_client_configuration( raise ValueError("Unable to get default team.") config: Config = Config(config_path) - config.set_team(team=team_config.slug, api_key=team_config.api_key, datasets_dir=team_config.datasets_dir) - config.set_global(api_endpoint=client.url, base_url=client.base_url, default_team=default_team) + config.set_team( + team=team_config.slug, + api_key=team_config.api_key, + datasets_dir=team_config.datasets_dir, + ) + config.set_global( + api_endpoint=client.url, base_url=client.base_url, default_team=default_team + ) return config @@ -408,7 +421,9 @@ def attempt_decode(path: Path) -> dict: return data except Exception: continue - raise UnrecognizableFileEncoding(f"Unable to load file {path} with any encodings: {encodings}") + raise UnrecognizableFileEncoding( + f"Unable to load file {path} with any encodings: {encodings}" + ) def load_data_from_file(path: Path) -> Tuple[dict, dt.AnnotationFileVersion]: @@ -417,7 +432,9 @@ def load_data_from_file(path: Path) -> Tuple[dict, dt.AnnotationFileVersion]: return data, version -def parse_darwin_json(path: Path, count: Optional[int] = None) -> Optional[dt.AnnotationFile]: +def parse_darwin_json( + path: Path, count: Optional[int] = None +) -> Optional[dt.AnnotationFile]: """ Parses the given JSON file in v7's darwin proprietary format. Works for images, split frame videos (treated as images) and playback videos. @@ -456,6 +473,7 @@ def parse_darwin_json(path: Path, count: Optional[int] = None) -> Optional[dt.An else: return _parse_darwin_image(path, data, count) + def stream_darwin_json(path: Path) -> PersistentStreamingJSONObject: """ Returns a Darwin JSON file as a persistent stream. This allows for parsing large files without @@ -474,8 +492,11 @@ def stream_darwin_json(path: Path) -> PersistentStreamingJSONObject: with path.open() as infile: return json_stream.load(infile, persistent=True) - -def get_image_path_from_stream(darwin_json: PersistentStreamingJSONObject, images_dir: Path) -> Path: + + +def get_image_path_from_stream( + darwin_json: PersistentStreamingJSONObject, images_dir: Path +) -> Path: """ Returns the path to the image file associated with the given darwin json file (V1 or V2). @@ -492,16 +513,31 @@ def get_image_path_from_stream(darwin_json: PersistentStreamingJSONObject, image Path to the image file. """ try: - return images_dir / (Path(darwin_json['item']['path'].lstrip('/\\'))) / Path(darwin_json['item']['name']) + return ( + images_dir + / (Path(darwin_json["item"]["path"].lstrip("/\\"))) + / Path(darwin_json["item"]["name"]) + ) except KeyError: - return images_dir / (Path(darwin_json['image']['path'].lstrip('/\\'))) / Path(darwin_json['image']['filename']) + return ( + images_dir + / (Path(darwin_json["image"]["path"].lstrip("/\\"))) + / Path(darwin_json["image"]["filename"]) + ) + def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile: item = data["item"] item_source = item.get("source_info", {}) - slots: List[dt.Slot] = list(filter(None, map(_parse_darwin_slot, item.get("slots", [])))) - annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(data) - annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations]) + slots: List[dt.Slot] = list( + filter(None, map(_parse_darwin_slot, item.get("slots", []))) + ) + annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations( + data + ) + annotation_classes: Set[dt.AnnotationClass] = set( + [annotation.annotation_class for annotation in annotations] + ) if len(slots) == 0: annotation_file = dt.AnnotationFile( @@ -509,7 +545,9 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile: path=path, filename=item["name"], item_id=item.get("source_info", {}).get("item_id", None), - dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None), + dataset_name=item.get("source_info", {}) + .get("dataset", {}) + .get("name", None), annotation_classes=annotation_classes, annotations=annotations, is_video=False, @@ -530,13 +568,17 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile: path=path, filename=item["name"], item_id=item.get("source_info", {}).get("item_id", None), - dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None), + dataset_name=item.get("source_info", {}) + .get("dataset", {}) + .get("name", None), annotation_classes=annotation_classes, annotations=annotations, is_video=slot.frame_urls is not None, image_width=slot.width, image_height=slot.height, - image_url=None if len(slot.source_files or []) == 0 else slot.source_files[0]["url"], + image_url=None + if len(slot.source_files or []) == 0 + else slot.source_files[0]["url"], image_thumbnail_url=slot.thumbnail_url, workview_url=item_source.get("workview_url", None), seq=0, @@ -565,14 +607,25 @@ def _parse_darwin_slot(data: Dict[str, Any]) -> dt.Slot: ) -def _parse_darwin_image(path: Path, data: Dict[str, Any], count: Optional[int]) -> dt.AnnotationFile: - annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(data) - annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations]) +def _parse_darwin_image( + path: Path, data: Dict[str, Any], count: Optional[int] +) -> dt.AnnotationFile: + annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations( + data + ) + annotation_classes: Set[dt.AnnotationClass] = set( + [annotation.annotation_class for annotation in annotations] + ) slot = dt.Slot( name=None, type="image", - source_files=[{"url": data["image"].get("url"), "file_name": _get_local_filename(data["image"])}], + source_files=[ + { + "url": data["image"].get("url"), + "file_name": _get_local_filename(data["image"]), + } + ], thumbnail_url=data["image"].get("thumbnail_url"), width=data["image"].get("width"), height=data["image"].get("height"), @@ -599,17 +652,30 @@ def _parse_darwin_image(path: Path, data: Dict[str, Any], count: Optional[int]) return annotation_file -def _parse_darwin_video(path: Path, data: Dict[str, Any], count: Optional[int]) -> dt.AnnotationFile: - annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(data) - annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations]) +def _parse_darwin_video( + path: Path, data: Dict[str, Any], count: Optional[int] +) -> dt.AnnotationFile: + annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations( + data + ) + annotation_classes: Set[dt.AnnotationClass] = set( + [annotation.annotation_class for annotation in annotations] + ) if "width" not in data["image"] or "height" not in data["image"]: - raise OutdatedDarwinJSONFormat("Missing width/height in video, please re-export") + raise OutdatedDarwinJSONFormat( + "Missing width/height in video, please re-export" + ) slot = dt.Slot( name=None, type="video", - source_files=[{"url": data["image"].get("url"), "file_name": _get_local_filename(data["image"])}], + source_files=[ + { + "url": data["image"].get("url"), + "file_name": _get_local_filename(data["image"]), + } + ], thumbnail_url=data["image"].get("thumbnail_url"), width=data["image"].get("width"), height=data["image"].get("height"), @@ -645,23 +711,41 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati main_annotation: Optional[dt.Annotation] = None # Darwin JSON 2.0 representation of complex polygons - if "polygon" in annotation and "paths" in annotation["polygon"] and len(annotation["polygon"]["paths"]) > 1: + if ( + "polygon" in annotation + and "paths" in annotation["polygon"] + and len(annotation["polygon"]["paths"]) > 1 + ): bounding_box = annotation.get("bounding_box") paths = annotation["polygon"]["paths"] - main_annotation = dt.make_complex_polygon(name, paths, bounding_box, slot_names=slot_names) + main_annotation = dt.make_complex_polygon( + name, paths, bounding_box, slot_names=slot_names + ) # Darwin JSON 2.0 representation of simple polygons - elif "polygon" in annotation and "paths" in annotation["polygon"] and len(annotation["polygon"]["paths"]) == 1: + elif ( + "polygon" in annotation + and "paths" in annotation["polygon"] + and len(annotation["polygon"]["paths"]) == 1 + ): bounding_box = annotation.get("bounding_box") paths = annotation["polygon"]["paths"] - main_annotation = dt.make_polygon(name, paths[0], bounding_box, slot_names=slot_names) + main_annotation = dt.make_polygon( + name, paths[0], bounding_box, slot_names=slot_names + ) # Darwin JSON 1.0 representation of complex and simple polygons elif "polygon" in annotation: bounding_box = annotation.get("bounding_box") if "additional_paths" in annotation["polygon"]: - paths = [annotation["polygon"]["path"]] + annotation["polygon"]["additional_paths"] - main_annotation = dt.make_complex_polygon(name, paths, bounding_box, slot_names=slot_names) + paths = [annotation["polygon"]["path"]] + annotation["polygon"][ + "additional_paths" + ] + main_annotation = dt.make_complex_polygon( + name, paths, bounding_box, slot_names=slot_names + ) else: - main_annotation = dt.make_polygon(name, annotation["polygon"]["path"], bounding_box, slot_names=slot_names) + main_annotation = dt.make_polygon( + name, annotation["polygon"]["path"], bounding_box, slot_names=slot_names + ) # Darwin JSON 1.0 representation of complex polygons elif "complex_polygon" in annotation: bounding_box = annotation.get("bounding_box") @@ -673,42 +757,72 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati if "additional_paths" in annotation["complex_polygon"]: paths.extend(annotation["complex_polygon"]["additional_paths"]) - main_annotation = dt.make_complex_polygon(name, paths, bounding_box, slot_names=slot_names) + main_annotation = dt.make_complex_polygon( + name, paths, bounding_box, slot_names=slot_names + ) elif "bounding_box" in annotation: bounding_box = annotation["bounding_box"] main_annotation = dt.make_bounding_box( - name, bounding_box["x"], bounding_box["y"], bounding_box["w"], bounding_box["h"], slot_names=slot_names + name, + bounding_box["x"], + bounding_box["y"], + bounding_box["w"], + bounding_box["h"], + slot_names=slot_names, ) elif "tag" in annotation: main_annotation = dt.make_tag(name, slot_names=slot_names) elif "line" in annotation: - main_annotation = dt.make_line(name, annotation["line"]["path"], slot_names=slot_names) + main_annotation = dt.make_line( + name, annotation["line"]["path"], slot_names=slot_names + ) elif "keypoint" in annotation: main_annotation = dt.make_keypoint( - name, annotation["keypoint"]["x"], annotation["keypoint"]["y"], slot_names=slot_names + name, + annotation["keypoint"]["x"], + annotation["keypoint"]["y"], + slot_names=slot_names, ) elif "ellipse" in annotation: - main_annotation = dt.make_ellipse(name, annotation["ellipse"], slot_names=slot_names) + main_annotation = dt.make_ellipse( + name, annotation["ellipse"], slot_names=slot_names + ) elif "cuboid" in annotation: - main_annotation = dt.make_cuboid(name, annotation["cuboid"], slot_names=slot_names) + main_annotation = dt.make_cuboid( + name, annotation["cuboid"], slot_names=slot_names + ) elif "skeleton" in annotation: - main_annotation = dt.make_skeleton(name, annotation["skeleton"]["nodes"], slot_names=slot_names) + main_annotation = dt.make_skeleton( + name, annotation["skeleton"]["nodes"], slot_names=slot_names + ) elif "table" in annotation: main_annotation = dt.make_table( - name, annotation["table"]["bounding_box"], annotation["table"]["cells"], slot_names=slot_names + name, + annotation["table"]["bounding_box"], + annotation["table"]["cells"], + slot_names=slot_names, ) elif "string" in annotation: - main_annotation = dt.make_string(name, annotation["string"]["sources"], slot_names=slot_names) + main_annotation = dt.make_string( + name, annotation["string"]["sources"], slot_names=slot_names + ) elif "graph" in annotation: main_annotation = dt.make_graph( - name, annotation["graph"]["nodes"], annotation["graph"]["edges"], slot_names=slot_names + name, + annotation["graph"]["nodes"], + annotation["graph"]["edges"], + slot_names=slot_names, ) elif "mask" in annotation: main_annotation = dt.make_mask(name, slot_names=slot_names) elif "raster_layer" in annotation: raster_layer = annotation["raster_layer"] main_annotation = dt.make_raster_layer( - name, raster_layer["mask_annotation_ids_mapping"], raster_layer["total_pixels"], raster_layer["dense_rle"], slot_names=slot_names + name, + raster_layer["mask_annotation_ids_mapping"], + raster_layer["total_pixels"], + raster_layer["dense_rle"], + slot_names=slot_names, ) if not main_annotation: @@ -718,19 +832,29 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati if "id" in annotation: main_annotation.id = annotation["id"] if "instance_id" in annotation: - main_annotation.subs.append(dt.make_instance_id(annotation["instance_id"]["value"])) + main_annotation.subs.append( + dt.make_instance_id(annotation["instance_id"]["value"]) + ) if "attributes" in annotation: main_annotation.subs.append(dt.make_attributes(annotation["attributes"])) if "text" in annotation: main_annotation.subs.append(dt.make_text(annotation["text"]["text"])) if "inference" in annotation: - main_annotation.subs.append(dt.make_opaque_sub("inference", annotation["inference"])) + main_annotation.subs.append( + dt.make_opaque_sub("inference", annotation["inference"]) + ) if "directional_vector" in annotation: - main_annotation.subs.append(dt.make_opaque_sub("directional_vector", annotation["directional_vector"])) + main_annotation.subs.append( + dt.make_opaque_sub("directional_vector", annotation["directional_vector"]) + ) if "measures" in annotation: - main_annotation.subs.append(dt.make_opaque_sub("measures", annotation["measures"])) + main_annotation.subs.append( + dt.make_opaque_sub("measures", annotation["measures"]) + ) if "auto_annotate" in annotation: - main_annotation.subs.append(dt.make_opaque_sub("auto_annotate", annotation["auto_annotate"])) + main_annotation.subs.append( + dt.make_opaque_sub("auto_annotate", annotation["auto_annotate"]) + ) if annotation.get("annotators") is not None: main_annotation.annotators = _parse_annotators(annotation["annotators"]) @@ -784,7 +908,9 @@ def _parse_darwin_raster_annotation(annotation: dict) -> Optional[dt.Annotation] slot_names: Optional[List[str]] = parse_slot_names(annotation) if not id or not name or not raster_layer: - raise ValueError("Raster annotation must have an 'id', 'name' and 'raster_layer' field") + raise ValueError( + "Raster annotation must have an 'id', 'name' and 'raster_layer' field" + ) dense_rle, mask_annotation_ids_mapping, total_pixels = ( raster_layer.get("dense_rle", None), @@ -835,9 +961,14 @@ def _parse_darwin_mask_annotation(annotation: dict) -> Optional[dt.Annotation]: def _parse_annotators(annotators: List[Dict[str, Any]]) -> List[dt.AnnotationAuthor]: if not (hasattr(annotators, "full_name") or not hasattr(annotators, "email")): - raise AttributeError("JSON file must contain annotators with 'full_name' and 'email' fields") + raise AttributeError( + "JSON file must contain annotators with 'full_name' and 'email' fields" + ) - return [dt.AnnotationAuthor(annotator["full_name"], annotator["email"]) for annotator in annotators] + return [ + dt.AnnotationAuthor(annotator["full_name"], annotator["email"]) + for annotator in annotators + ] def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationFile]: @@ -870,9 +1001,13 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF frame_annotations = [] for i, frame_url in enumerate(annotation.frame_urls): annotations = [ - a.frames[i] for a in annotation.annotations if isinstance(a, dt.VideoAnnotation) and i in a.frames + a.frames[i] + for a in annotation.annotations + if isinstance(a, dt.VideoAnnotation) and i in a.frames ] - annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations]) + annotation_classes: Set[dt.AnnotationClass] = set( + [annotation.annotation_class for annotation in annotations] + ) filename: str = f"{Path(annotation.filename).stem}/{i:07d}.png" frame_annotations.append( dt.AnnotationFile( @@ -956,7 +1091,9 @@ def convert_polygons_to_sequences( else: list_polygons = cast(List[dt.Polygon], [polygons]) - if not isinstance(list_polygons[0], list) or not isinstance(list_polygons[0][0], dict): + if not isinstance(list_polygons[0], list) or not isinstance( + list_polygons[0][0], dict + ): raise ValueError("Unknown input format") sequences: List[List[Union[int, float]]] = [] @@ -964,8 +1101,8 @@ def convert_polygons_to_sequences( path: List[Union[int, float]] = [] for point in polygon: # Clip coordinates to the image size - x = max(min(point["x"], width -1) if width else point["x"], 0) - y = max(min(point["y"], height -1) if height else point["y"], 0) + x = max(min(point["x"], width - 1) if width else point["x"], 0) + y = max(min(point["y"], height - 1) if height else point["y"], 0) if rounding: path.append(round(x)) path.append(round(y)) @@ -983,7 +1120,9 @@ def convert_polygons_to_sequences( details="Do not use.", ) def convert_sequences_to_polygons( - sequences: List[Union[List[int], List[float]]], height: Optional[int] = None, width: Optional[int] = None + sequences: List[Union[List[int], List[float]]], + height: Optional[int] = None, + width: Optional[int] = None, ) -> Dict[str, List[dt.Polygon]]: """ Converts a list of polygons, encoded as a list of dictionaries of into a list of nd.arrays @@ -1095,7 +1234,9 @@ def convert_bounding_box_to_xyxy(box: dt.BoundingBox) -> List[float]: return [box["x"], box["y"], x2, y2] -def convert_polygons_to_mask(polygons: List, height: int, width: int, value: Optional[int] = 1) -> np.ndarray: +def convert_polygons_to_mask( + polygons: List, height: int, width: int, value: Optional[int] = 1 +) -> np.ndarray: """ Converts a list of polygons, encoded as a list of dictionaries into an ``nd.array`` mask. @@ -1139,7 +1280,7 @@ def chunk(items: List[Any], size: int) -> Iterator[Any]: A chunk of the of the given size. """ for i in range(0, len(items), size): - yield items[i:i + size] + yield items[i : i + size] def is_unix_like_os() -> bool: @@ -1189,31 +1330,58 @@ def _parse_version(data: dict) -> dt.AnnotationFileVersion: return dt.AnnotationFileVersion(int(major), int(minor), suffix) -def _data_to_annotations(data: Dict[str, Any]) -> List[Union[dt.Annotation, dt.VideoAnnotation]]: +def _data_to_annotations( + data: Dict[str, Any] +) -> List[Union[dt.Annotation, dt.VideoAnnotation]]: raw_image_annotations = filter( lambda annotation: ( - ("frames" not in annotation) and ("raster_layer" not in annotation) and ("mask" not in annotation) + ("frames" not in annotation) + and ("raster_layer" not in annotation) + and ("mask" not in annotation) ), data["annotations"], ) - raw_video_annotations = filter(lambda annotation: "frames" in annotation, data["annotations"]) - raw_raster_annotations = filter(lambda annotation: "raster_layer" in annotation, data["annotations"]) - raw_mask_annotations = filter(lambda annotation: "mask" in annotation, data["annotations"]) - image_annotations: List[dt.Annotation] = list(filter(None, map(_parse_darwin_annotation, raw_image_annotations))) + raw_video_annotations = filter( + lambda annotation: "frames" in annotation, data["annotations"] + ) + raw_raster_annotations = filter( + lambda annotation: "raster_layer" in annotation, data["annotations"] + ) + raw_mask_annotations = filter( + lambda annotation: "mask" in annotation, data["annotations"] + ) + image_annotations: List[dt.Annotation] = list( + filter(None, map(_parse_darwin_annotation, raw_image_annotations)) + ) video_annotations: List[dt.VideoAnnotation] = list( filter(None, map(_parse_darwin_video_annotation, raw_video_annotations)) ) raster_annotations: List[dt.Annotation] = list( filter(None, map(_parse_darwin_raster_annotation, raw_raster_annotations)) ) - mask_annotations: List[dt.Annotation] = list(filter(None, map(_parse_darwin_mask_annotation, raw_mask_annotations))) + mask_annotations: List[dt.Annotation] = list( + filter(None, map(_parse_darwin_mask_annotation, raw_mask_annotations)) + ) - return [*image_annotations, *video_annotations, *raster_annotations, *mask_annotations] + return [ + *image_annotations, + *video_annotations, + *raster_annotations, + *mask_annotations, + ] def _supported_schema_versions() -> Dict[Tuple[int, int, str], str]: - return {(2, 0, ""): "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json"} + return { + ( + 2, + 0, + "", + ): "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json" + } def _default_schema(version: dt.AnnotationFileVersion) -> Optional[str]: - return _supported_schema_versions().get((version.major, version.minor, version.suffix)) + return _supported_schema_versions().get( + (version.major, version.minor, version.suffix) + ) diff --git a/darwin_demo.py b/darwin_demo.py index b7822c851..1f911aef9 100644 --- a/darwin_demo.py +++ b/darwin_demo.py @@ -47,7 +47,9 @@ def run_demo( else: client = Client.local(team_slug=team_slug) # Create a dataset identifier - dataset_identifier = DatasetIdentifier(dataset_slug=dataset_slug, team_slug=team_slug) + dataset_identifier = DatasetIdentifier( + dataset_slug=dataset_slug, team_slug=team_slug + ) # Get an object representing the remote dataset ds = client.get_remote_dataset(dataset_identifier=dataset_identifier) # Download the dataset on the local file system @@ -68,12 +70,32 @@ def run_demo( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="This script can be used to download a dataset from Darwin", ) - parser.add_argument("--datasets-dir", help="Path to where the dataset will be downloaded", default=None, type=Path) - parser.add_argument("--dataset-slug", help="Dataset slug (see Darwin documentation)", default=None, type=str) - parser.add_argument("--team-slug", help="Team slug (see Darwin documentation)", default=None, type=str) - parser.add_argument("--api-key", help="API key to authenticate the client", default=None, type=str) parser.add_argument( - "--config-path", help="Path to the configuration file to authenticate the client", default=None, type=Path + "--datasets-dir", + help="Path to where the dataset will be downloaded", + default=None, + type=Path, + ) + parser.add_argument( + "--dataset-slug", + help="Dataset slug (see Darwin documentation)", + default=None, + type=str, + ) + parser.add_argument( + "--team-slug", + help="Team slug (see Darwin documentation)", + default=None, + type=str, + ) + parser.add_argument( + "--api-key", help="API key to authenticate the client", default=None, type=str + ) + parser.add_argument( + "--config-path", + help="Path to the configuration file to authenticate the client", + default=None, + type=Path, ) args = parser.parse_args() diff --git a/deploy/_move_tickets_to_done.py b/deploy/_move_tickets_to_done.py index ade220a28..e1e317903 100644 --- a/deploy/_move_tickets_to_done.py +++ b/deploy/_move_tickets_to_done.py @@ -8,8 +8,16 @@ from confirm_main_branch_deployability import ExitCodes, _exit, _run_command parser = argparse.ArgumentParser(description="Move tickets in release to done.") -parser.add_argument("--release-tag", "--tag", "-t", help="The release tag to move tickets from", required=True) -parser.add_argument("--dry-run", action="store_true", help="Don't actually move tickets to done") +parser.add_argument( + "--release-tag", + "--tag", + "-t", + help="The release tag to move tickets from", + required=True, +) +parser.add_argument( + "--dry-run", action="store_true", help="Don't actually move tickets to done" +) args = parser.parse_args() release_tag = args.release_tag @@ -23,14 +31,18 @@ # get details body, error = _run_command("gh", "release", "view", release_tag, "--json", "body") -assert error == 0, _exit("Failed to get last release body", ExitCodes.GETTING_RELEASE_METADATA_THREW_EXITCODE) +assert error == 0, _exit( + "Failed to get last release body", ExitCodes.GETTING_RELEASE_METADATA_THREW_EXITCODE +) body_parsed = json.loads(body) body = body_parsed["body"].split("\n") TICKET_MATCHER = re.compile(r"^\* \[([A-z]+-[0-9]+)\]") -wrong_lines = [line for line in body if line.startswith("* ") and not TICKET_MATCHER.match(line)] +wrong_lines = [ + line for line in body if line.startswith("* ") and not TICKET_MATCHER.match(line) +] body_lines = [line.upper() for line in body if TICKET_MATCHER.match(line)] unmoved_tickets = len(wrong_lines) - len(body_lines) diff --git a/deploy/confirm_main_branch_deployability.py b/deploy/confirm_main_branch_deployability.py index c647f5243..53b0f745b 100755 --- a/deploy/confirm_main_branch_deployability.py +++ b/deploy/confirm_main_branch_deployability.py @@ -11,7 +11,9 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler(sys.stdout)) -logger.setLevel(logging.DEBUG) if environ.get("DEBUG") else logger.setLevel(logging.INFO) +logger.setLevel(logging.DEBUG) if environ.get("DEBUG") else logger.setLevel( + logging.INFO +) # Set up default constants @@ -85,10 +87,15 @@ def _get_most_recent_release_tag() -> str: """ output, error = _run_command("gh", "release", "list", "--limit", "1") - assert error == 0, _exit("Failed to get last release tag", ExitCodes.GETTING_LAST_RELEASE_TAG_THREW_EXITCODE) + assert error == 0, _exit( + "Failed to get last release tag", + ExitCodes.GETTING_LAST_RELEASE_TAG_THREW_EXITCODE, + ) release_tag = str(output).split()[0] - assert release_tag, _exit("No release tag found", ExitCodes.COULD_NOT_PARSE_LAST_RELEASE_TAG) + assert release_tag, _exit( + "No release tag found", ExitCodes.COULD_NOT_PARSE_LAST_RELEASE_TAG + ) return release_tag @@ -97,20 +104,31 @@ def _get_most_recent_release_timestamp(release_tag: str) -> Tuple[str, datetime] """ Gets the last release timestamp from the repo """ - output, error = _run_command("gh", "release", "view", release_tag, "--json", "name,publishedAt") - assert error == 0, _exit("Failed to get last release timestamp", ExitCodes.GETTING_RELEASE_METADATA_THREW_EXITCODE) + output, error = _run_command( + "gh", "release", "view", release_tag, "--json", "name,publishedAt" + ) + assert error == 0, _exit( + "Failed to get last release timestamp", + ExitCodes.GETTING_RELEASE_METADATA_THREW_EXITCODE, + ) json_output = {} try: json_output = json.loads(output) except json.JSONDecodeError: - _exit("Could not parse release metadata", ExitCodes.COULD_NOT_PARSE_RELEASE_METADATA) + _exit( + "Could not parse release metadata", + ExitCodes.COULD_NOT_PARSE_RELEASE_METADATA, + ) assert "name" in json_output and "publishedAt" in json_output, _exit( - "Expected release name and timestamp in metadata", ExitCodes.UNEXPECTED_STRUCTURE_TO_RELEASE_METADATA + "Expected release name and timestamp in metadata", + ExitCodes.UNEXPECTED_STRUCTURE_TO_RELEASE_METADATA, ) - return json_output["name"], datetime.fromisoformat(json_output["publishedAt"].replace("Z", "+00:00")) + return json_output["name"], datetime.fromisoformat( + json_output["publishedAt"].replace("Z", "+00:00") + ) def _get_changes_since_last_release(last_release_timestamp: datetime) -> List[str]: @@ -120,16 +138,25 @@ def _get_changes_since_last_release(last_release_timestamp: datetime) -> List[st SECONDS_IN_A_DAY = 86400 seconds_since_last_release: int = int( ( - datetime.utcnow().astimezone(timezone.utc) - last_release_timestamp.astimezone(timezone.utc) + datetime.utcnow().astimezone(timezone.utc) + - last_release_timestamp.astimezone(timezone.utc) ).total_seconds() # Whose idea was it to create timedelta.seconds _and_ datetime.total_seconds ) - gitref_to_compare = "{}@{{{} seconds ago}}".format(DEFAULT_BRANCH, seconds_since_last_release) + gitref_to_compare = "{}@{{{} seconds ago}}".format( + DEFAULT_BRANCH, seconds_since_last_release + ) - print(f"It's been {seconds_since_last_release} seconds since the last release, about {int(seconds_since_last_release / SECONDS_IN_A_DAY)} days ago") + print( + f"It's been {seconds_since_last_release} seconds since the last release, about {int(seconds_since_last_release / SECONDS_IN_A_DAY)} days ago" + ) printl(f"Getting changes since {gitref_to_compare}") - output, error = _run_command("git", "diff", DEFAULT_BRANCH, gitref_to_compare, "--name-only") - assert error == 0, _exit("Failed to get changes since last release", ExitCodes.GIT_DIFF_THREW_EXITCODE) + output, error = _run_command( + "git", "diff", DEFAULT_BRANCH, gitref_to_compare, "--name-only" + ) + assert error == 0, _exit( + "Failed to get changes since last release", ExitCodes.GIT_DIFF_THREW_EXITCODE + ) files_changed = output.split("\n") @@ -144,7 +171,9 @@ def main() -> None: last_release_tag = _get_most_recent_release_tag() printl("Getting last release timestamp") - last_release_tag, last_release_timestamp = _get_most_recent_release_timestamp(last_release_tag) + last_release_tag, last_release_timestamp = _get_most_recent_release_timestamp( + last_release_tag + ) printl(f"Last release timestamp: {last_release_timestamp}") printl(f"Last release tag: {last_release_tag}") diff --git a/deploy/increase_version.py b/deploy/increase_version.py index 84a04fbb3..8c7bc3d67 100755 --- a/deploy/increase_version.py +++ b/deploy/increase_version.py @@ -12,7 +12,9 @@ from validate_pyproject import api as pyproject_api from validate_pyproject import errors as pyproject_errors -DARWIN_PYPI_INFO_PAGE = environ.get("PYPY_INFO_PAGE", "https://pypi.org/pypi/darwin-py/json") +DARWIN_PYPI_INFO_PAGE = environ.get( + "PYPY_INFO_PAGE", "https://pypi.org/pypi/darwin-py/json" +) @dataclass @@ -26,7 +28,11 @@ class Version: def __eq__(self, other: object) -> bool: if not isinstance(other, Version): return False - return self.major == other.major and self.minor == other.minor and self.patch == other.patch + return ( + self.major == other.major + and self.minor == other.minor + and self.patch == other.patch + ) def __gt__(self, other: object) -> bool: if not isinstance(other, Version): @@ -158,14 +164,20 @@ def _get_pypi_version(force: bool, cicd: bool) -> Version: return Version(int(major), int(minor), int(patch)) -def _sanity_check(version: Version, pyproject_version: Version, pypi_version: Version, force: bool) -> None: +def _sanity_check( + version: Version, pyproject_version: Version, pypi_version: Version, force: bool +) -> None: if version != pyproject_version: - raise ValueError("Version in darwin.version module and pyproject.toml do not match") + raise ValueError( + "Version in darwin.version module and pyproject.toml do not match" + ) # pypi version should be either equal to or one greater difference_between_versions = version - pypi_version if difference_between_versions not in [(0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0)]: - print(f"Version in PyPI is not equal to or one greater than local version: {version} != {pypi_version}") + print( + f"Version in PyPI is not equal to or one greater than local version: {version} != {pypi_version}" + ) print("Your local version is probably too old, check your version number") if not force or confirm("Continue with updating version number?"): @@ -180,7 +192,9 @@ def _sanity_check(version: Version, pyproject_version: Version, pypi_version: Ve def _update_version(new_version: Version) -> None: - version_file = (Path(__file__).parent / "..").resolve() / "darwin" / "version" / "__init__.py" + version_file = ( + (Path(__file__).parent / "..").resolve() / "darwin" / "version" / "__init__.py" + ) print(f"Updating version in {version_file.absolute()}") assert version_file.exists(), "Version file not found" @@ -219,7 +233,12 @@ def _update_pyproject_version(new_version: Version) -> None: def arguments() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Increase version number") - parser.add_argument("-f", "--force", action="store_true", help="force actions, do not ask for confirmation") + parser.add_argument( + "-f", + "--force", + action="store_true", + help="force actions, do not ask for confirmation", + ) parser.add_argument( "-c", "--cicd", @@ -227,16 +246,35 @@ def arguments() -> argparse.Namespace: help="run in CI/CD mode (no confirmation, assume failure unless --force specified)", ) - parser.add_argument("-v", "--version", action="store_true", help="show version number and exit", default=True) - parser.add_argument("-M", "--major", action="store_true", help="increase major version") - parser.add_argument("-m", "--minor", action="store_true", help="increase minor version") - parser.add_argument("-p", "--patch", action="store_true", help="increase patch version") - parser.add_argument("-N", "--new-version", type=str, help="set new version number (overrides -M, -m, -p)") + parser.add_argument( + "-v", + "--version", + action="store_true", + help="show version number and exit", + default=True, + ) + parser.add_argument( + "-M", "--major", action="store_true", help="increase major version" + ) + parser.add_argument( + "-m", "--minor", action="store_true", help="increase minor version" + ) + parser.add_argument( + "-p", "--patch", action="store_true", help="increase patch version" + ) + parser.add_argument( + "-N", + "--new-version", + type=str, + help="set new version number (overrides -M, -m, -p)", + ) return parser.parse_args() -def validate_args(args: argparse.Namespace, force_actions: bool, cicd_mode: bool) -> Tuple[bool, bool]: +def validate_args( + args: argparse.Namespace, force_actions: bool, cicd_mode: bool +) -> Tuple[bool, bool]: if args.force: print("Force mode enabled, no confirmation will be asked") force_actions = True @@ -246,7 +284,9 @@ def validate_args(args: argparse.Namespace, force_actions: bool, cicd_mode: bool cicd_mode = True if args.major and args.minor and args.patch: - print("Cannot increase major, minor and patch at the same time. Specify only one of these.") + print( + "Cannot increase major, minor and patch at the same time. Specify only one of these." + ) exit(2) return force_actions, cicd_mode @@ -275,7 +315,9 @@ def main() -> None: new_version = LOCAL_VERSION.copy() if (args.major or args.minor or args.patch) and args.new_version: - print("Cannot increase version and set new version at the same time. Specify only one of these.") + print( + "Cannot increase version and set new version at the same time. Specify only one of these." + ) exit(2) if args.new_version: @@ -292,7 +334,11 @@ def main() -> None: if args.patch: new_version.increment_patch() - if new_version.was_changed() and (force_actions or cicd_mode or confirm(f"Update version from {str(LOCAL_VERSION)} to {str(new_version)}?")): + if new_version.was_changed() and ( + force_actions + or cicd_mode + or confirm(f"Update version from {str(LOCAL_VERSION)} to {str(new_version)}?") + ): _update_version(new_version) _update_pyproject_version(new_version) print(f"Version updated successfully to {str(new_version)}") diff --git a/deploy/nightly_package_setup.py b/deploy/nightly_package_setup.py index 216251ecd..ea357bb7e 100644 --- a/deploy/nightly_package_setup.py +++ b/deploy/nightly_package_setup.py @@ -37,7 +37,9 @@ def main() -> None: path_to_pyproject.write_text(linesep.join(lines_to_write)) - print(f"Set build to a nightly in pyproject.toml - darwin-nightly@{epoch_timestring}") + print( + f"Set build to a nightly in pyproject.toml - darwin-nightly@{epoch_timestring}" + ) if __name__ == "__main__": diff --git a/deploy/revert_nightly_setup.py b/deploy/revert_nightly_setup.py index 12737b6aa..bd1e6f698 100644 --- a/deploy/revert_nightly_setup.py +++ b/deploy/revert_nightly_setup.py @@ -29,7 +29,7 @@ def main() -> None: if line.startswith("name ="): line = f'name = "{new_package_name}"\n' if line.startswith("version ="): - line = f'version = {new_version}\n' + line = f"version = {new_version}\n" lines_to_write.append(line) path_to_pyproject.write_text("\n".join(lines_to_write)) diff --git a/e2e_tests/cli/convert/test_convert.py b/e2e_tests/cli/convert/test_convert.py index f5f93c83c..4cd8f3273 100644 --- a/e2e_tests/cli/convert/test_convert.py +++ b/e2e_tests/cli/convert/test_convert.py @@ -49,11 +49,15 @@ def compare_directories(self, path: Path, expected_path: Path) -> Tuple[bool, st "format, input_path, expectation_path", [("yolo_segmented", data_path / "yolov8/from", data_path / "yolov8/to")], ) - def test_darwin_convert(self, format: str, input_path: Path, expectation_path: Path, tmp_path: Path) -> None: + def test_darwin_convert( + self, format: str, input_path: Path, expectation_path: Path, tmp_path: Path + ) -> None: """ Test converting a file format to another format """ - assert input_path is not None and expectation_path is not None, "Input or expectation path is None" + assert ( + input_path is not None and expectation_path is not None + ), "Input or expectation path is None" assert ( input_path.exists() and expectation_path.exists() ), f"Input path {input_path.absolute()} or expectation path {expectation_path.absolute()} does not exist" @@ -61,7 +65,9 @@ def test_darwin_convert(self, format: str, input_path: Path, expectation_path: P input_path.is_dir() and expectation_path.is_dir() ), f"Input path {input_path.absolute()} or expectation path {expectation_path.absolute()} is not a directory" - result = run_cli_command(f"darwin convert {format} {str(input_path)} {str(tmp_path)}") + result = run_cli_command( + f"darwin convert {format} {str(input_path)} {str(tmp_path)}" + ) assert_cli(result, 0) assert self.compare_directories(expectation_path, tmp_path)[0] diff --git a/e2e_tests/conftest.py b/e2e_tests/conftest.py index 207437e23..b44f786da 100644 --- a/e2e_tests/conftest.py +++ b/e2e_tests/conftest.py @@ -14,7 +14,9 @@ def pytest_configure(config: pytest.Config) -> None: - config.addinivalue_line("addopts", "--ignore=../tests/, ../future --capture=tee-sys") + config.addinivalue_line( + "addopts", "--ignore=../tests/, ../future --capture=tee-sys" + ) def pytest_sessionstart(session: pytest.Session) -> None: @@ -43,7 +45,9 @@ def pytest_sessionstart(session: pytest.Session) -> None: session.config.cache.set("api_key", api_key) session.config.cache.set("team_slug", team_slug) - datasets = setup_tests(ConfigValues(server=server, api_key=api_key, team_slug=team_slug)) + datasets = setup_tests( + ConfigValues(server=server, api_key=api_key, team_slug=team_slug) + ) # pytest.datasets = datasets setattr(pytest, "datasets", datasets) # Set the environment variables for running CLI arguments diff --git a/e2e_tests/helpers.py b/e2e_tests/helpers.py index af143ff4b..8d3b3d393 100644 --- a/e2e_tests/helpers.py +++ b/e2e_tests/helpers.py @@ -28,7 +28,10 @@ class CLIResult: def run_cli_command( - command: str, working_directory: Optional[str] = None, yes: bool = False, server_wait: int = SERVER_WAIT_TIME + command: str, + working_directory: Optional[str] = None, + yes: bool = False, + server_wait: int = SERVER_WAIT_TIME, ) -> CLIResult: """ Run a CLI command and return the return code, stdout, and stderr. @@ -68,9 +71,17 @@ def run_cli_command( ) sleep(server_wait) # wait for server to catch up try: - return CLIResult(result.returncode, result.stdout.decode("utf-8"), result.stderr.decode("utf-8")) + return CLIResult( + result.returncode, + result.stdout.decode("utf-8"), + result.stderr.decode("utf-8"), + ) except UnicodeDecodeError: - return CLIResult(result.returncode, result.stdout.decode("cp437"), result.stderr.decode("cp437")) + return CLIResult( + result.returncode, + result.stdout.decode("cp437"), + result.stderr.decode("cp437"), + ) def format_cli_output(result: CLIResult) -> str: diff --git a/e2e_tests/objects.py b/e2e_tests/objects.py index 31dc8452d..6beaec09b 100644 --- a/e2e_tests/objects.py +++ b/e2e_tests/objects.py @@ -41,8 +41,10 @@ class E2EDataset: slug: str items: List[E2EItem] directory: Optional[str] = None - - def __init__(self, id: int, name: str, slug: Optional[str], directory: Optional[str]=None) -> None: + + def __init__( + self, id: int, name: str, slug: Optional[str], directory: Optional[str] = None + ) -> None: self.id = id self.name = name self.slug = slug or name.lower().replace(" ", "_") diff --git a/e2e_tests/setup_tests.py b/e2e_tests/setup_tests.py index 273f935a7..7fb1424a5 100644 --- a/e2e_tests/setup_tests.py +++ b/e2e_tests/setup_tests.py @@ -14,7 +14,12 @@ from e2e_tests.objects import ConfigValues, E2EDataset, E2EItem -def api_call(verb: Literal["get", "post", "put", "delete"], url: str, payload: Optional[dict], api_key: str) -> requests.Response: +def api_call( + verb: Literal["get", "post", "put", "delete"], + url: str, + payload: Optional[dict], + api_key: str, +) -> requests.Response: """ Make an API call to the server (Written independently of the client library to avoid relying on tested items) @@ -44,7 +49,9 @@ def api_call(verb: Literal["get", "post", "put", "delete"], url: str, payload: O return response -def generate_random_string(length: int = 6, alphabet: str = (string.ascii_lowercase + string.digits)) -> str: +def generate_random_string( + length: int = 6, alphabet: str = (string.ascii_lowercase + string.digits) +) -> str: """ A random-enough to avoid collision on test runs prefix generator @@ -82,7 +89,9 @@ def create_dataset(prefix: str, config: ConfigValues) -> E2EDataset: url = f"{host}/api/datasets" if not url.startswith("http"): - raise E2EException(f"Invalid server URL {host} - need to specify protocol in var E2E_ENVIRONMENT") + raise E2EException( + f"Invalid server URL {host} - need to specify protocol in var E2E_ENVIRONMENT" + ) try: response = api_call("post", url, {"name": name}, api_key) @@ -97,13 +106,17 @@ def create_dataset(prefix: str, config: ConfigValues) -> E2EDataset: # fmt: on ) - raise E2EException(f"Failed to create dataset {name} - {response.status_code} - {response.text}") + raise E2EException( + f"Failed to create dataset {name} - {response.status_code} - {response.text}" + ) except Exception as e: print(f"Failed to create dataset {name} - {e}") pytest.exit("Test run failed in test setup stage") -def create_item(dataset_slug: str, prefix: str, image: Path, config: ConfigValues) -> E2EItem: +def create_item( + dataset_slug: str, prefix: str, image: Path, config: ConfigValues +) -> E2EItem: """ Creates a randomised new item, and return its minimal info for reference @@ -168,7 +181,9 @@ def create_item(dataset_slug: str, prefix: str, image: Path, config: ConfigValue annotations=[], ) - raise E2EException(f"Failed to create item {name} - {response.status_code} - {response.text}") + raise E2EException( + f"Failed to create item {name} - {response.status_code} - {response.text}" + ) except E2EException as e: print(f"Failed to create item {name} - {e}") @@ -179,7 +194,13 @@ def create_item(dataset_slug: str, prefix: str, image: Path, config: ConfigValue pytest.exit("Test run failed in test setup stage") -def create_random_image(prefix: str, directory: Path, height: int = 100, width: int = 100, fixed_name: bool=False) -> Path: +def create_random_image( + prefix: str, + directory: Path, + height: int = 100, + width: int = 100, + fixed_name: bool = False, +) -> Path: """ Create a random image file in the given directory @@ -275,8 +296,9 @@ def teardown_tests(config: ConfigValues, datasets: List[E2EDataset]) -> None: response = api_call("put", url, {}, api_key) if not response.ok: - failures.append(f"Failed to delete dataset {dataset.name} - {response.status_code} - {response.text}") - + failures.append( + f"Failed to delete dataset {dataset.name} - {response.status_code} - {response.text}" + ) # Teardown workflows as they need to be disconnected before datasets can be deleted url = f"{host}/api/v2/teams/{team_slug}/workflows" @@ -286,25 +308,26 @@ def teardown_tests(config: ConfigValues, datasets: List[E2EDataset]) -> None: for item in items: if not item["dataset"]: continue - if not item['dataset']['name'].startswith("test_dataset_"): + if not item["dataset"]["name"].startswith("test_dataset_"): continue - new_item = { - "name": item['name'], - "stages": item['stages'] - } - for stage in new_item['stages']: - if stage['type'] == 'dataset': + new_item = {"name": item["name"], "stages": item["stages"]} + for stage in new_item["stages"]: + if stage["type"] == "dataset": stage["config"]["dataset_id"] = None url = f"{host}/api/v2/teams/{team_slug}/workflows/{item['id']}" response = api_call("put", url, new_item, api_key) if not response.ok: - failures.append(f"Failed to delete workflow {item['name']} - {response.status_code} - {response.text}") - + failures.append( + f"Failed to delete workflow {item['name']} - {response.status_code} - {response.text}" + ) + # Now Delete the workflow once dataset is disconnected response = api_call("delete", url, None, api_key) if not response.ok: - failures.append(f"Failed to delete workflow {item['name']} - {response.status_code} - {response.text}") - + failures.append( + f"Failed to delete workflow {item['name']} - {response.status_code} - {response.text}" + ) + # teardown any other datasets of specific format url = f"{host}/api/datasets" response = api_call("get", url, {}, api_key) @@ -316,8 +339,10 @@ def teardown_tests(config: ConfigValues, datasets: List[E2EDataset]) -> None: url = f"{host}/api/datasets/{item['id']}/archive" response = api_call("put", url, None, api_key) if not response.ok: - failures.append(f"Failed to delete dataset {item['name']} - {response.status_code} - {response.text}") - + failures.append( + f"Failed to delete dataset {item['name']} - {response.status_code} - {response.text}" + ) + if failures: for item in failures: print(item) diff --git a/e2e_tests/test_darwin.py b/e2e_tests/test_darwin.py index a01d48e44..dd03d8f3c 100644 --- a/e2e_tests/test_darwin.py +++ b/e2e_tests/test_darwin.py @@ -42,7 +42,9 @@ def local_dataset(new_dataset: E2EDataset) -> Generator[E2EDataset, None, None]: def local_dataset_with_images(local_dataset: E2EDataset) -> E2EDataset: assert local_dataset.directory is not None for x in range(3): - path = create_random_image(str(x), Path(local_dataset.directory), fixed_name=True) + path = create_random_image( + str(x), Path(local_dataset.directory), fixed_name=True + ) local_dataset.add_item( E2EItem( name=path.name, @@ -116,7 +118,9 @@ def test_darwin_import(local_dataset_with_annotations: E2EDataset) -> None: assert_cli(result, 0) -def test_darwin_export(local_dataset_with_annotations: E2EDataset, config_values: ConfigValues) -> None: +def test_darwin_export( + local_dataset_with_annotations: E2EDataset, config_values: ConfigValues +) -> None: """ Test exporting a dataset via the darwin cli, dataset created via fixture with annotations added to objects """ @@ -146,8 +150,12 @@ def test_darwin_export(local_dataset_with_annotations: E2EDataset, config_values f"darwin dataset export {local_dataset_with_annotations.name} test_darwin_export --class-ids {class_str}" ) assert_cli(result, 0, in_stdout="successfully exported") - result = run_cli_command(f"darwin dataset releases {local_dataset_with_annotations.name}") - assert_cli(result, 0, in_stdout="No available releases, export one first", inverse=True) + result = run_cli_command( + f"darwin dataset releases {local_dataset_with_annotations.name}" + ) + assert_cli( + result, 0, in_stdout="No available releases, export one first", inverse=True + ) # Check that a release is there via inverse, the CLI will truncate outputs and pass/fail is not clear # if we check for release name diff --git a/source/conf.py b/source/conf.py index 08e9049eb..e50fc9d97 100644 --- a/source/conf.py +++ b/source/conf.py @@ -33,7 +33,11 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions: List[str] = ["sphinx.ext.viewcode", "sphinx.ext.napoleon", "sphinx.ext.autodoc"] +extensions: List[str] = [ + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.autodoc", +] # Add any paths that contain templates here, relative to this directory. templates_path: List[str] = ["_templates"] diff --git a/tests/darwin/cli_functions_test.py b/tests/darwin/cli_functions_test.py index d29394c5f..8bcaa0aa8 100644 --- a/tests/darwin/cli_functions_test.py +++ b/tests/darwin/cli_functions_test.py @@ -17,7 +17,13 @@ @pytest.fixture def remote_dataset(team_slug: str, dataset_slug: str, local_config_file: Config): client = Client(local_config_file) - return RemoteDatasetV1(client=client, team=team_slug, name="TEST_DATASET", slug=dataset_slug, dataset_id=1) + return RemoteDatasetV1( + client=client, + team=team_slug, + name="TEST_DATASET", + slug=dataset_slug, + dataset_id=1, + ) @pytest.fixture @@ -33,19 +39,40 @@ def request_upload_endpoint(self, team_slug: str, dataset_slug: str): @pytest.mark.usefixtures("file_read_write_test") @responses.activate def test_default_non_verbose( - self, team_slug: str, dataset_slug: str, remote_dataset: RemoteDataset, request_upload_endpoint: str + self, + team_slug: str, + dataset_slug: str, + remote_dataset: RemoteDataset, + request_upload_endpoint: str, ): request_upload_response = { "blocked_items": [ - {"dataset_item_id": 1, "filename": "test_1.jpg", "path": "/", "reason": "ALREADY_EXISTS"}, - {"dataset_item_id": 2, "filename": "test_2.jpg", "path": "/", "reason": "UNKNOWN_TAGS"}, + { + "dataset_item_id": 1, + "filename": "test_1.jpg", + "path": "/", + "reason": "ALREADY_EXISTS", + }, + { + "dataset_item_id": 2, + "filename": "test_2.jpg", + "path": "/", + "reason": "UNKNOWN_TAGS", + }, ], "items": [{"dataset_item_id": 3, "filename": "test_3.jpg", "path": "/"}], } - responses.add(responses.PUT, request_upload_endpoint, json=request_upload_response, status=200) + responses.add( + responses.PUT, + request_upload_endpoint, + json=request_upload_response, + status=200, + ) - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: with patch.object(Console, "print", return_value=None) as print_mock: upload_data( f"{team_slug}/{dataset_slug}", @@ -60,29 +87,59 @@ def test_default_non_verbose( ) get_remote_dataset_mock.assert_called_once() - assert call("Skipped 1 files already in the dataset.\n", style="warning") in print_mock.call_args_list assert ( - call("2 files couldn't be uploaded because an error occurred.\n", style="error") + call("Skipped 1 files already in the dataset.\n", style="warning") + in print_mock.call_args_list + ) + assert ( + call( + "2 files couldn't be uploaded because an error occurred.\n", + style="error", + ) + in print_mock.call_args_list + ) + assert ( + call('Re-run with "--verbose" for further details') in print_mock.call_args_list ) - assert call('Re-run with "--verbose" for further details') in print_mock.call_args_list @pytest.mark.usefixtures("file_read_write_test") @responses.activate def test_with_verbose_flag( - self, team_slug: str, dataset_slug: str, remote_dataset: RemoteDataset, request_upload_endpoint: str + self, + team_slug: str, + dataset_slug: str, + remote_dataset: RemoteDataset, + request_upload_endpoint: str, ): request_upload_response = { "blocked_items": [ - {"dataset_item_id": 1, "filename": "test_1.jpg", "path": "/", "reason": "ALREADY_EXISTS"}, - {"dataset_item_id": 2, "filename": "test_2.jpg", "path": "/", "reason": "UNKNOWN_TAGS"}, + { + "dataset_item_id": 1, + "filename": "test_1.jpg", + "path": "/", + "reason": "ALREADY_EXISTS", + }, + { + "dataset_item_id": 2, + "filename": "test_2.jpg", + "path": "/", + "reason": "UNKNOWN_TAGS", + }, ], "items": [{"dataset_item_id": 3, "filename": "test_3.jpg", "path": "/"}], } - responses.add(responses.PUT, request_upload_endpoint, json=request_upload_response, status=200) + responses.add( + responses.PUT, + request_upload_endpoint, + json=request_upload_response, + status=200, + ) - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: with patch.object(Console, "print", return_value=None) as print_mock: upload_data( f"{team_slug}/{dataset_slug}", @@ -97,12 +154,21 @@ def test_with_verbose_flag( ) get_remote_dataset_mock.assert_called_once() - assert call("Skipped 1 files already in the dataset.\n", style="warning") in print_mock.call_args_list assert ( - call("2 files couldn't be uploaded because an error occurred.\n", style="error") + call("Skipped 1 files already in the dataset.\n", style="warning") in print_mock.call_args_list ) - assert call('Re-run with "--verbose" for further details') not in print_mock.call_args_list + assert ( + call( + "2 files couldn't be uploaded because an error occurred.\n", + style="error", + ) + in print_mock.call_args_list + ) + assert ( + call('Re-run with "--verbose" for further details') + not in print_mock.call_args_list + ) class TestSetFileStatus: @@ -115,40 +181,84 @@ def test_raises_if_status_not_supported(self, dataset_identifier: str): set_file_status(dataset_identifier, "unknown", []) assert exception.value.code == 1 - def test_calls_dataset_archive(self, dataset_identifier: str, remote_dataset: RemoteDataset): - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: - with patch.object(RemoteDatasetV1, "fetch_remote_files") as fetch_remote_files_mock: + def test_calls_dataset_archive( + self, dataset_identifier: str, remote_dataset: RemoteDataset + ): + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: + with patch.object( + RemoteDatasetV1, "fetch_remote_files" + ) as fetch_remote_files_mock: with patch.object(RemoteDatasetV1, "archive") as mock: - set_file_status(dataset_identifier, "archived", ["one.jpg", "two.jpg"]) - get_remote_dataset_mock.assert_called_once_with(dataset_identifier=dataset_identifier) - fetch_remote_files_mock.assert_called_once_with({"filenames": "one.jpg,two.jpg"}) + set_file_status( + dataset_identifier, "archived", ["one.jpg", "two.jpg"] + ) + get_remote_dataset_mock.assert_called_once_with( + dataset_identifier=dataset_identifier + ) + fetch_remote_files_mock.assert_called_once_with( + {"filenames": "one.jpg,two.jpg"} + ) mock.assert_called_once_with(fetch_remote_files_mock.return_value) - def test_calls_dataset_clear(self, dataset_identifier: str, remote_dataset: RemoteDataset): - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: - with patch.object(RemoteDatasetV1, "fetch_remote_files") as fetch_remote_files_mock: + def test_calls_dataset_clear( + self, dataset_identifier: str, remote_dataset: RemoteDataset + ): + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: + with patch.object( + RemoteDatasetV1, "fetch_remote_files" + ) as fetch_remote_files_mock: with patch.object(RemoteDatasetV1, "reset") as mock: set_file_status(dataset_identifier, "clear", ["one.jpg", "two.jpg"]) - get_remote_dataset_mock.assert_called_once_with(dataset_identifier=dataset_identifier) - fetch_remote_files_mock.assert_called_once_with({"filenames": "one.jpg,two.jpg"}) + get_remote_dataset_mock.assert_called_once_with( + dataset_identifier=dataset_identifier + ) + fetch_remote_files_mock.assert_called_once_with( + {"filenames": "one.jpg,two.jpg"} + ) mock.assert_called_once_with(fetch_remote_files_mock.return_value) - def test_calls_dataset_new(self, dataset_identifier: str, remote_dataset: RemoteDataset): - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: - with patch.object(RemoteDatasetV1, "fetch_remote_files") as fetch_remote_files_mock: + def test_calls_dataset_new( + self, dataset_identifier: str, remote_dataset: RemoteDataset + ): + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: + with patch.object( + RemoteDatasetV1, "fetch_remote_files" + ) as fetch_remote_files_mock: with patch.object(RemoteDatasetV1, "move_to_new") as mock: set_file_status(dataset_identifier, "new", ["one.jpg", "two.jpg"]) - get_remote_dataset_mock.assert_called_once_with(dataset_identifier=dataset_identifier) - fetch_remote_files_mock.assert_called_once_with({"filenames": "one.jpg,two.jpg"}) + get_remote_dataset_mock.assert_called_once_with( + dataset_identifier=dataset_identifier + ) + fetch_remote_files_mock.assert_called_once_with( + {"filenames": "one.jpg,two.jpg"} + ) mock.assert_called_once_with(fetch_remote_files_mock.return_value) - def test_calls_dataset_restore_archived(self, dataset_identifier: str, remote_dataset: RemoteDataset): - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: - with patch.object(RemoteDatasetV1, "fetch_remote_files") as fetch_remote_files_mock: + def test_calls_dataset_restore_archived( + self, dataset_identifier: str, remote_dataset: RemoteDataset + ): + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: + with patch.object( + RemoteDatasetV1, "fetch_remote_files" + ) as fetch_remote_files_mock: with patch.object(RemoteDatasetV1, "restore_archived") as mock: - set_file_status(dataset_identifier, "restore-archived", ["one.jpg", "two.jpg"]) - get_remote_dataset_mock.assert_called_once_with(dataset_identifier=dataset_identifier) - fetch_remote_files_mock.assert_called_once_with({"filenames": "one.jpg,two.jpg"}) + set_file_status( + dataset_identifier, "restore-archived", ["one.jpg", "two.jpg"] + ) + get_remote_dataset_mock.assert_called_once_with( + dataset_identifier=dataset_identifier + ) + fetch_remote_files_mock.assert_called_once_with( + {"filenames": "one.jpg,two.jpg"} + ) mock.assert_called_once_with(fetch_remote_files_mock.return_value) @@ -157,46 +267,88 @@ class TestDeleteFiles: def dataset_identifier(self, team_slug: str, dataset_slug: str): return f"{team_slug}/{dataset_slug}" - def test_bypasses_user_prompt_if_yes_flag_is_true(self, dataset_identifier: str, remote_dataset: RemoteDataset): - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: - with patch.object(RemoteDatasetV1, "fetch_remote_files") as fetch_remote_files_mock: + def test_bypasses_user_prompt_if_yes_flag_is_true( + self, dataset_identifier: str, remote_dataset: RemoteDataset + ): + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: + with patch.object( + RemoteDatasetV1, "fetch_remote_files" + ) as fetch_remote_files_mock: with patch.object(RemoteDatasetV1, "delete_items") as mock: delete_files(dataset_identifier, ["one.jpg", "two.jpg"], True) - get_remote_dataset_mock.assert_called_once_with(dataset_identifier=dataset_identifier) - fetch_remote_files_mock.assert_called_once_with({"filenames": ["one.jpg", "two.jpg"]}) + get_remote_dataset_mock.assert_called_once_with( + dataset_identifier=dataset_identifier + ) + fetch_remote_files_mock.assert_called_once_with( + {"filenames": ["one.jpg", "two.jpg"]} + ) mock.assert_called_once() - def test_deletes_items_if_user_accepts_prompt(self, dataset_identifier: str, remote_dataset: RemoteDataset): - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: - with patch.object(RemoteDatasetV1, "fetch_remote_files") as fetch_remote_files_mock: + def test_deletes_items_if_user_accepts_prompt( + self, dataset_identifier: str, remote_dataset: RemoteDataset + ): + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: + with patch.object( + RemoteDatasetV1, "fetch_remote_files" + ) as fetch_remote_files_mock: with patch.object(builtins, "input", lambda _: "y"): with patch.object(RemoteDatasetV1, "delete_items") as mock: delete_files(dataset_identifier, ["one.jpg", "two.jpg"]) - get_remote_dataset_mock.assert_called_once_with(dataset_identifier=dataset_identifier) - fetch_remote_files_mock.assert_called_once_with({"filenames": ["one.jpg", "two.jpg"]}) + get_remote_dataset_mock.assert_called_once_with( + dataset_identifier=dataset_identifier + ) + fetch_remote_files_mock.assert_called_once_with( + {"filenames": ["one.jpg", "two.jpg"]} + ) mock.assert_called_once() - def test_does_not_delete_items_if_user_refuses_prompt(self, dataset_identifier: str, remote_dataset: RemoteDataset): - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: - with patch.object(RemoteDatasetV1, "fetch_remote_files") as fetch_remote_files_mock: + def test_does_not_delete_items_if_user_refuses_prompt( + self, dataset_identifier: str, remote_dataset: RemoteDataset + ): + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: + with patch.object( + RemoteDatasetV1, "fetch_remote_files" + ) as fetch_remote_files_mock: with patch.object(builtins, "input", lambda _: "n"): with patch.object(RemoteDatasetV1, "delete_items") as mock: delete_files(dataset_identifier, ["one.jpg", "two.jpg"]) - get_remote_dataset_mock.assert_called_once_with(dataset_identifier=dataset_identifier) - fetch_remote_files_mock.assert_called_once_with({"filenames": ["one.jpg", "two.jpg"]}) + get_remote_dataset_mock.assert_called_once_with( + dataset_identifier=dataset_identifier + ) + fetch_remote_files_mock.assert_called_once_with( + {"filenames": ["one.jpg", "two.jpg"]} + ) mock.assert_not_called() - def test_exits_if_error_occurs(self, dataset_identifier: str, remote_dataset: RemoteDataset): + def test_exits_if_error_occurs( + self, dataset_identifier: str, remote_dataset: RemoteDataset + ): def error_mock(): raise ValueError("Something went Wrong") with patch.object(sys, "exit") as exception: - with patch.object(Client, "get_remote_dataset", return_value=remote_dataset) as get_remote_dataset_mock: - with patch.object(RemoteDatasetV1, "fetch_remote_files") as fetch_remote_files_mock: - with patch.object(RemoteDatasetV1, "delete_items", side_effect=error_mock) as mock: + with patch.object( + Client, "get_remote_dataset", return_value=remote_dataset + ) as get_remote_dataset_mock: + with patch.object( + RemoteDatasetV1, "fetch_remote_files" + ) as fetch_remote_files_mock: + with patch.object( + RemoteDatasetV1, "delete_items", side_effect=error_mock + ) as mock: delete_files(dataset_identifier, ["one.jpg", "two.jpg"], True) - get_remote_dataset_mock.assert_called_once_with(dataset_identifier=dataset_identifier) - fetch_remote_files_mock.assert_called_once_with({"filenames": ["one.jpg", "two.jpg"]}) + get_remote_dataset_mock.assert_called_once_with( + dataset_identifier=dataset_identifier + ) + fetch_remote_files_mock.assert_called_once_with( + {"filenames": ["one.jpg", "two.jpg"]} + ) mock.assert_called_once() exception.assert_called_once_with(1) diff --git a/tests/darwin/client_test.py b/tests/darwin/client_test.py index 4cf318ee2..deb8d3e34 100644 --- a/tests/darwin/client_test.py +++ b/tests/darwin/client_test.py @@ -15,7 +15,9 @@ @pytest.fixture -def darwin_client(darwin_config_path: Path, darwin_datasets_path: Path, team_slug: str) -> Client: +def darwin_client( + darwin_config_path: Path, darwin_datasets_path: Path, team_slug: str +) -> Client: config = Config(darwin_config_path) config.put(["global", "api_endpoint"], "http://localhost/api") config.put(["global", "base_url"], "http://localhost") @@ -49,7 +51,9 @@ def test_returns_list_of_datasets(self, darwin_client: Client) -> None: }, ] - responses.add(responses.GET, darwin_client.url + endpoint, json=json_response, status=200) + responses.add( + responses.GET, darwin_client.url + endpoint, json=json_response, status=200 + ) remote_datasets = list(darwin_client.list_remote_datasets(team_slug)) @@ -108,7 +112,9 @@ def test_coalesces_null_item_counts_to_zeroes(self, darwin_client: Client) -> No }, ] - responses.add(responses.GET, darwin_client.url + endpoint, json=json_response, status=200) + responses.add( + responses.GET, darwin_client.url + endpoint, json=json_response, status=200 + ) remote_datasets = list(darwin_client.list_remote_datasets(team_slug)) @@ -133,7 +139,9 @@ def test_raises_if_dataset_is_not_found(self, darwin_client: Client) -> None: } ] - responses.add(responses.GET, darwin_client.url + endpoint, json=json_response, status=200) + responses.add( + responses.GET, darwin_client.url + endpoint, json=json_response, status=200 + ) with pytest.raises(NotFound): darwin_client.get_remote_dataset("v7/dataset-slug-2") @@ -152,7 +160,9 @@ def test_returns_the_dataset(self, darwin_client: Client) -> None: } ] - responses.add(responses.GET, darwin_client.url + endpoint, json=json_response, status=200) + responses.add( + responses.GET, darwin_client.url + endpoint, json=json_response, status=200 + ) actual_dataset = darwin_client.get_remote_dataset("v7/dataset-slug-1") for version in [RemoteDatasetV1, RemoteDatasetV2]: @@ -182,7 +192,9 @@ def test_returns_the_created_dataset(self, darwin_client: Client) -> None: "progress": 0, } - responses.add(responses.POST, darwin_client.url + endpoint, json=json_response, status=200) + responses.add( + responses.POST, darwin_client.url + endpoint, json=json_response, status=200 + ) actual_dataset = darwin_client.create_dataset("my-dataset", "v7") expected_dataset = RemoteDatasetV1( @@ -218,16 +230,22 @@ class TestFetchRemoteFiles: @responses.activate def test_returns_remote_files(self, darwin_client: Client) -> None: dataset_id = 1 - endpoint: str = f"/datasets/{dataset_id}/items?page%5Bsize%5D=500&page%5Bfrom%5D=0" + endpoint: str = ( + f"/datasets/{dataset_id}/items?page%5Bsize%5D=500&page%5Bfrom%5D=0" + ) responses.add(responses.POST, darwin_client.url + endpoint, json={}, status=200) - darwin_client.fetch_remote_files(dataset_id, {"page[size]": 500, "page[from]": 0}, {}, "v7") + darwin_client.fetch_remote_files( + dataset_id, {"page[size]": 500, "page[from]": 0}, {}, "v7" + ) @pytest.mark.usefixtures("file_read_write_test") class TestFetchRemoteClasses: @responses.activate - def test_returns_remote_classes(self, team_slug: str, darwin_client: Client) -> None: + def test_returns_remote_classes( + self, team_slug: str, darwin_client: Client + ) -> None: endpoint: str = f"/teams/{team_slug}/annotation_classes?include_tags=true" response: JSONFreeForm = { "annotation_classes": [ @@ -248,7 +266,9 @@ def test_returns_remote_classes(self, team_slug: str, darwin_client: Client) -> ] } - responses.add(responses.GET, darwin_client.url + endpoint, json=response, status=200) + responses.add( + responses.GET, darwin_client.url + endpoint, json=response, status=200 + ) result: List[JSONFreeForm] = darwin_client.fetch_remote_classes(team_slug) annotation_class: JSONFreeForm = result[0] @@ -263,14 +283,18 @@ def test_returns_remote_classes(self, team_slug: str, darwin_client: Client) -> @pytest.mark.usefixtures("file_read_write_test") class TestGetTeamFeatures: @responses.activate - def test_returns_list_of_features(self, team_slug: str, darwin_client: Client) -> None: + def test_returns_list_of_features( + self, team_slug: str, darwin_client: Client + ) -> None: endpoint: str = f"/teams/{team_slug}/features" json_response = [ {"enabled": False, "name": "WORKFLOW_V2"}, {"enabled": True, "name": "BLIND_STAGE"}, ] - responses.add(responses.GET, darwin_client.url + endpoint, json=json_response, status=200) + responses.add( + responses.GET, darwin_client.url + endpoint, json=json_response, status=200 + ) assert darwin_client.get_team_features(team_slug) == [ Feature(name="WORKFLOW_V2", enabled=False), @@ -286,7 +310,9 @@ def test_raises_if_workflow_id_is_not_found(self, darwin_client: Client) -> None endpoint: str = f"/dataset_items/{item_id}/workflow" json_response: JSONFreeForm = {} - responses.add(responses.POST, darwin_client.url + endpoint, json=json_response, status=200) + responses.add( + responses.POST, darwin_client.url + endpoint, json=json_response, status=200 + ) with pytest.raises(ValueError) as exception: darwin_client.instantiate_item(item_id) @@ -300,7 +326,9 @@ def test_returns_workflow_id(self, darwin_client: Client) -> None: endpoint: str = f"/dataset_items/{item_id}/workflow" json_response: JSONFreeForm = {"current_workflow_id": workflow_id} - responses.add(responses.POST, darwin_client.url + endpoint, json=json_response, status=200) + responses.add( + responses.POST, darwin_client.url + endpoint, json=json_response, status=200 + ) assert darwin_client.instantiate_item(item_id) == workflow_id @@ -312,12 +340,17 @@ def test_raises_if_comment_id_is_not_found(self, darwin_client: Client) -> None: endpoint: str = f"/workflows/{workflow_id}/workflow_comment_threads" json_response: JSONFreeForm = {} - responses.add(responses.POST, darwin_client.url + endpoint, json=json_response, status=200) + responses.add( + responses.POST, darwin_client.url + endpoint, json=json_response, status=200 + ) with pytest.raises(ValueError) as exception: darwin_client.post_workflow_comment(workflow_id, "My comment.") - assert str(exception.value) == f"Unable to retrieve comment id for workflow: {workflow_id}." + assert ( + str(exception.value) + == f"Unable to retrieve comment id for workflow: {workflow_id}." + ) @responses.activate def test_returns_comment_id(self, darwin_client: Client) -> None: @@ -326,8 +359,13 @@ def test_returns_comment_id(self, darwin_client: Client) -> None: endpoint: str = f"/workflows/{workflow_id}/workflow_comment_threads" json_response: JSONFreeForm = {"id": comment_id} - responses.add(responses.POST, darwin_client.url + endpoint, json=json_response, status=200) - assert darwin_client.post_workflow_comment(workflow_id, "My comment.") == comment_id + responses.add( + responses.POST, darwin_client.url + endpoint, json=json_response, status=200 + ) + assert ( + darwin_client.post_workflow_comment(workflow_id, "My comment.") + == comment_id + ) def assert_dataset(dataset_1: RemoteDataset, dataset_2: RemoteDataset) -> None: diff --git a/tests/darwin/dataset/download_manager_test.py b/tests/darwin/dataset/download_manager_test.py index 622d7226b..de9e1da4e 100644 --- a/tests/darwin/dataset/download_manager_test.py +++ b/tests/darwin/dataset/download_manager_test.py @@ -54,7 +54,9 @@ def test_parse_manifests(manifest_paths: List[Path]) -> None: assert segment_manifests[3].items[1].visibility == True -def test_get_segment_manifests(manifest_paths: List[Path], slot_w_manifests: Slot) -> None: +def test_get_segment_manifests( + manifest_paths: List[Path], slot_w_manifests: Slot +) -> None: parent_path = Path("tests/darwin/dataset/data/manifest_examples") files = [open(path, "r").read() for path in manifest_paths] with responses.RequestsMock() as rsps: diff --git a/tests/darwin/dataset/item_test.py b/tests/darwin/dataset/item_test.py index 52690e421..db9d2d5a0 100644 --- a/tests/darwin/dataset/item_test.py +++ b/tests/darwin/dataset/item_test.py @@ -30,6 +30,8 @@ def test_item_parse_w_slots(response_json_slots: dict) -> None: assert item.dataset_id == response_json_slots["dataset_id"] assert item.dataset_slug == "test_dataset_slug" assert item.seq == response_json_slots["seq"] - assert item.current_workflow_id == response_json_slots["workflow_data"]["workflow_id"] + assert ( + item.current_workflow_id == response_json_slots["workflow_data"]["workflow_id"] + ) assert item.current_workflow == response_json_slots["workflow_data"] assert item.slots == response_json_slots["slots"] diff --git a/tests/darwin/dataset/local_dataset_test.py b/tests/darwin/dataset/local_dataset_test.py index 48a8fd7f5..6e0039462 100644 --- a/tests/darwin/dataset/local_dataset_test.py +++ b/tests/darwin/dataset/local_dataset_test.py @@ -16,7 +16,11 @@ def test_look_into_annotations_directory_if_no_partition_specified( (annotations_path / "2" / "2.json").mkdir(parents=True) (annotations_path / "test" / "3" / "3.json").mkdir(parents=True) - stems = list(build_stems(team_dataset_release_path, annotations_path, "tag", split_path.name)) + stems = list( + build_stems( + team_dataset_release_path, annotations_path, "tag", split_path.name + ) + ) assert "1" in stems assert "2/2" in stems or "2\\2" in stems @@ -26,15 +30,34 @@ def test_raise_value_error_if_split_type_is_unknown( self, team_dataset_release_path: Path, annotations_path: Path, split_path: Path ): with pytest.raises(ValueError) as e: - build_stems(team_dataset_release_path, annotations_path, "tag", split_path.name, "train", "unknown") + build_stems( + team_dataset_release_path, + annotations_path, + "tag", + split_path.name, + "train", + "unknown", + ) assert str(e.value) == 'Unknown split type "unknown"' - def test_stems_ending_with_spaces(self, team_dataset_release_path: Path, annotations_path: Path, split_path: Path): - resource_file = Path("tests") / "darwin" / "dataset" / "resources" / "random_train" + def test_stems_ending_with_spaces( + self, team_dataset_release_path: Path, annotations_path: Path, split_path: Path + ): + resource_file = ( + Path("tests") / "darwin" / "dataset" / "resources" / "random_train" + ) copyfile(resource_file, split_path / "random_train.txt") - stems = list(build_stems(team_dataset_release_path, annotations_path, "tag", split_path.name, "train")) + stems = list( + build_stems( + team_dataset_release_path, + annotations_path, + "tag", + split_path.name, + "train", + ) + ) assert "one" in stems assert "two " in stems @@ -44,7 +67,13 @@ def test_raise_file_not_found_if_split_file_does_not_exists( self, team_dataset_release_path: Path, annotations_path: Path, split_path: Path ): with pytest.raises(FileNotFoundError) as e: - build_stems(team_dataset_release_path, annotations_path, "tag", split_path.name, "train") + build_stems( + team_dataset_release_path, + annotations_path, + "tag", + split_path.name, + "train", + ) assert ( str(e.value) diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index b81dfe29c..1fced67f5 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -37,9 +37,33 @@ def annotation_content() -> Dict[str, Any]: "annotations": [ { "frames": { - "0": {"polygon": {"path": [{"x": 0, "y": 0}, {"x": 1, "y": 1}, {"x": 1, "y": 0}]}}, - "2": {"polygon": {"path": [{"x": 5, "y": 5}, {"x": 6, "y": 6}, {"x": 6, "y": 5}]}}, - "4": {"polygon": {"path": [{"x": 9, "y": 9}, {"x": 8, "y": 8}, {"x": 8, "y": 9}]}}, + "0": { + "polygon": { + "path": [ + {"x": 0, "y": 0}, + {"x": 1, "y": 1}, + {"x": 1, "y": 0}, + ] + } + }, + "2": { + "polygon": { + "path": [ + {"x": 5, "y": 5}, + {"x": 6, "y": 6}, + {"x": 6, "y": 5}, + ] + } + }, + "4": { + "polygon": { + "path": [ + {"x": 9, "y": 9}, + {"x": 8, "y": 8}, + {"x": 8, "y": 9}, + ] + } + }, }, "name": "test_class", "segments": [[0, 3]], @@ -49,7 +73,9 @@ def annotation_content() -> Dict[str, Any]: @pytest.fixture -def darwin_client(darwin_config_path: Path, darwin_datasets_path: Path, team_slug: str) -> Client: +def darwin_client( + darwin_config_path: Path, darwin_datasets_path: Path, team_slug: str +) -> Client: config = Config(darwin_config_path) config.put(["global", "api_endpoint"], "http://localhost/api") config.put(["global", "base_url"], "http://localhost") @@ -67,7 +93,14 @@ def create_annotation_file( annotation_name: str, annotation_content: dict, ): - annotations: Path = darwin_datasets_path / team_slug / dataset_slug / "releases" / release_name / "annotations" + annotations: Path = ( + darwin_datasets_path + / team_slug + / dataset_slug + / "releases" + / release_name + / "annotations" + ) annotations.mkdir(exist_ok=True, parents=True) with (annotations / annotation_name).open("w") as f: @@ -338,13 +371,23 @@ def test_works_on_videos( team_slug: str, ): remote_dataset = RemoteDatasetV1( - client=darwin_client, team=team_slug, name=dataset_name, slug=dataset_slug, dataset_id=1 + client=darwin_client, + team=team_slug, + name=dataset_name, + slug=dataset_slug, + dataset_id=1, ) remote_dataset.split_video_annotations() video_path = ( - darwin_datasets_path / team_slug / dataset_slug / "releases" / release_name / "annotations" / "test_video" + darwin_datasets_path + / team_slug + / dataset_slug + / "releases" + / release_name + / "annotations" + / "test_video" ) assert video_path.exists() @@ -356,23 +399,56 @@ def test_works_on_videos( with (video_path / "0000000.json").open() as f: assert json.loads(f.read()) == { "annotations": [ - {"name": "test_class", "polygon": {"path": [{"x": 0, "y": 0}, {"x": 1, "y": 1}, {"x": 1, "y": 0}]}} + { + "name": "test_class", + "polygon": { + "path": [ + {"x": 0, "y": 0}, + {"x": 1, "y": 1}, + {"x": 1, "y": 0}, + ] + }, + } ], - "image": {"filename": "test_video/0000000.png", "height": 1080, "url": "frame_1.jpg", "width": 1920}, + "image": { + "filename": "test_video/0000000.png", + "height": 1080, + "url": "frame_1.jpg", + "width": 1920, + }, } with (video_path / "0000001.json").open() as f: assert json.loads(f.read()) == { "annotations": [], - "image": {"filename": "test_video/0000001.png", "height": 1080, "url": "frame_2.jpg", "width": 1920}, + "image": { + "filename": "test_video/0000001.png", + "height": 1080, + "url": "frame_2.jpg", + "width": 1920, + }, } with (video_path / "0000002.json").open() as f: assert json.loads(f.read()) == { "annotations": [ - {"name": "test_class", "polygon": {"path": [{"x": 5, "y": 5}, {"x": 6, "y": 6}, {"x": 6, "y": 5}]}} + { + "name": "test_class", + "polygon": { + "path": [ + {"x": 5, "y": 5}, + {"x": 6, "y": 6}, + {"x": 6, "y": 5}, + ] + }, + } ], - "image": {"filename": "test_video/0000002.png", "height": 1080, "url": "frame_3.jpg", "width": 1920}, + "image": { + "filename": "test_video/0000002.png", + "height": 1080, + "url": "frame_3.jpg", + "width": 1920, + }, } @@ -380,10 +456,19 @@ def test_works_on_videos( class TestFetchRemoteFiles: @responses.activate def test_works( - self, darwin_client: Client, dataset_name: str, dataset_slug: str, team_slug: str, files_content: dict + self, + darwin_client: Client, + dataset_name: str, + dataset_slug: str, + team_slug: str, + files_content: dict, ): remote_dataset = RemoteDatasetV1( - client=darwin_client, team=team_slug, name=dataset_name, slug=dataset_slug, dataset_id=1 + client=darwin_client, + team=team_slug, + name=dataset_name, + slug=dataset_slug, + dataset_id=1, ) url = "http://localhost/api/datasets/1/items?page%5Bsize%5D=500" responses.add( @@ -406,10 +491,19 @@ def test_works( @responses.activate def test_fetches_files_with_commas( - self, darwin_client: Client, dataset_name: str, dataset_slug: str, team_slug: str, files_content: dict + self, + darwin_client: Client, + dataset_name: str, + dataset_slug: str, + team_slug: str, + files_content: dict, ): remote_dataset = RemoteDatasetV1( - client=darwin_client, team=team_slug, name=dataset_name, slug=dataset_slug, dataset_id=1 + client=darwin_client, + team=team_slug, + name=dataset_name, + slug=dataset_slug, + dataset_id=1, ) url = "http://localhost/api/datasets/1/items?page%5Bsize%5D=500" responses.add( @@ -419,7 +513,11 @@ def test_fetches_files_with_commas( status=200, ) - list(remote_dataset.fetch_remote_files({"filenames": ["example,with, comma.mp4"]})) + list( + remote_dataset.fetch_remote_files( + {"filenames": ["example,with, comma.mp4"]} + ) + ) request_body = json.loads(responses.calls[0].request.body) @@ -427,8 +525,16 @@ def test_fetches_files_with_commas( @pytest.fixture -def remote_dataset(darwin_client: Client, dataset_name: str, dataset_slug: str, team_slug: str): - return RemoteDatasetV1(client=darwin_client, team=team_slug, name=dataset_name, slug=dataset_slug, dataset_id=1) +def remote_dataset( + darwin_client: Client, dataset_name: str, dataset_slug: str, team_slug: str +): + return RemoteDatasetV1( + client=darwin_client, + team=team_slug, + name=dataset_name, + slug=dataset_slug, + dataset_id=1, + ) @pytest.mark.usefixtures("file_read_write_test") @@ -437,20 +543,28 @@ def test_raises_if_files_are_not_provided(self, remote_dataset: RemoteDataset): with pytest.raises(ValueError): remote_dataset.push(None) - def test_raises_if_both_path_and_local_files_are_given(self, remote_dataset: RemoteDataset): + def test_raises_if_both_path_and_local_files_are_given( + self, remote_dataset: RemoteDataset + ): with pytest.raises(ValueError): remote_dataset.push([LocalFile("test.jpg")], path="test") - def test_raises_if_both_fps_and_local_files_are_given(self, remote_dataset: RemoteDataset): + def test_raises_if_both_fps_and_local_files_are_given( + self, remote_dataset: RemoteDataset + ): with pytest.raises(ValueError): remote_dataset.push([LocalFile("test.jpg")], fps=2) - def test_raises_if_both_as_frames_and_local_files_are_given(self, remote_dataset: RemoteDataset): + def test_raises_if_both_as_frames_and_local_files_are_given( + self, remote_dataset: RemoteDataset + ): with pytest.raises(ValueError): remote_dataset.push([LocalFile("test.jpg")], as_frames=True) def test_works_with_local_files_list(self, remote_dataset: RemoteDataset): - assert_upload_mocks_are_correctly_called(remote_dataset, [LocalFile("test.jpg")]) + assert_upload_mocks_are_correctly_called( + remote_dataset, [LocalFile("test.jpg")] + ) def test_works_with_path_list(self, remote_dataset: RemoteDataset): assert_upload_mocks_are_correctly_called(remote_dataset, [Path("test.jpg")]) @@ -487,7 +601,9 @@ def test_raises_with_unsupported_files(self, remote_dataset: RemoteDataset): @pytest.mark.usefixtures("file_read_write_test") class TestPull: @patch("platform.system", return_value="Linux") - def test_gets_latest_release_when_not_given_one(self, system_mock: MagicMock, remote_dataset: RemoteDataset): + def test_gets_latest_release_when_not_given_one( + self, system_mock: MagicMock, remote_dataset: RemoteDataset + ): stub_release_response = Release( "dataset-slug", "team-slug", @@ -507,13 +623,17 @@ def fake_download_zip(self, path): shutil.copy(zip, path) return path - with patch.object(RemoteDataset, "get_release", return_value=stub_release_response) as get_release_stub: + with patch.object( + RemoteDataset, "get_release", return_value=stub_release_response + ) as get_release_stub: with patch.object(Release, "download_zip", new=fake_download_zip): remote_dataset.pull(only_annotations=True) get_release_stub.assert_called_once() @patch("platform.system", return_value="Windows") - def test_does_not_create_symlink_on_windows(self, mocker: MagicMock, remote_dataset: RemoteDataset): + def test_does_not_create_symlink_on_windows( + self, mocker: MagicMock, remote_dataset: RemoteDataset + ): stub_release_response = Release( "dataset-slug", "team-slug", @@ -535,13 +655,17 @@ def fake_download_zip(self, path): latest: Path = remote_dataset.local_releases_path / "latest" - with patch.object(RemoteDataset, "get_release", return_value=stub_release_response): + with patch.object( + RemoteDataset, "get_release", return_value=stub_release_response + ): with patch.object(Release, "download_zip", new=fake_download_zip): remote_dataset.pull(only_annotations=True) assert not latest.is_symlink() @patch("platform.system", return_value="Linux") - def test_continues_if_symlink_creation_fails(self, system_mock: MagicMock, remote_dataset: RemoteDataset): + def test_continues_if_symlink_creation_fails( + self, system_mock: MagicMock, remote_dataset: RemoteDataset + ): stub_release_response = Release( "dataset-slug", "team-slug", @@ -564,14 +688,18 @@ def fake_download_zip(self, path): latest: Path = remote_dataset.local_releases_path / "latest" with patch.object(Path, "symlink_to") as mock_symlink_to: - with patch.object(RemoteDataset, "get_release", return_value=stub_release_response): + with patch.object( + RemoteDataset, "get_release", return_value=stub_release_response + ): with patch.object(Release, "download_zip", new=fake_download_zip): mock_symlink_to.side_effect = OSError() remote_dataset.pull(only_annotations=True) assert not latest.is_symlink() @patch("platform.system", return_value="Linux") - def test_raises_if_release_format_is_not_json(self, system_mock: MagicMock, remote_dataset: RemoteDataset): + def test_raises_if_release_format_is_not_json( + self, system_mock: MagicMock, remote_dataset: RemoteDataset + ): a_release = Release( remote_dataset.slug, remote_dataset.team, @@ -611,61 +739,96 @@ def dataset_item(dataset_slug: str) -> DatasetItem: @pytest.mark.usefixtures("file_read_write_test") class TestArchive: def test_calls_client_put( - self, remote_dataset: RemoteDataset, dataset_item: DatasetItem, team_slug: str, dataset_slug: str + self, + remote_dataset: RemoteDataset, + dataset_item: DatasetItem, + team_slug: str, + dataset_slug: str, ): with patch.object(Client, "archive_item", return_value={}) as stub: remote_dataset.archive([dataset_item]) - stub.assert_called_once_with(dataset_slug, team_slug, {"filter": {"dataset_item_ids": [1]}}) + stub.assert_called_once_with( + dataset_slug, team_slug, {"filter": {"dataset_item_ids": [1]}} + ) @pytest.mark.usefixtures("file_read_write_test") class TestMoveToNew: def test_calls_client_put( - self, remote_dataset: RemoteDataset, dataset_item: DatasetItem, team_slug: str, dataset_slug: str + self, + remote_dataset: RemoteDataset, + dataset_item: DatasetItem, + team_slug: str, + dataset_slug: str, ): with patch.object(Client, "move_item_to_new", return_value={}) as stub: remote_dataset.move_to_new([dataset_item]) - stub.assert_called_once_with(dataset_slug, team_slug, {"filter": {"dataset_item_ids": [1]}}) + stub.assert_called_once_with( + dataset_slug, team_slug, {"filter": {"dataset_item_ids": [1]}} + ) @pytest.mark.usefixtures("file_read_write_test") class TestReset: def test_calls_client_put( - self, remote_dataset: RemoteDataset, dataset_item: DatasetItem, team_slug: str, dataset_slug: str + self, + remote_dataset: RemoteDataset, + dataset_item: DatasetItem, + team_slug: str, + dataset_slug: str, ): with patch.object(Client, "reset_item", return_value={}) as stub: remote_dataset.reset([dataset_item]) - stub.assert_called_once_with(dataset_slug, team_slug, {"filter": {"dataset_item_ids": [1]}}) + stub.assert_called_once_with( + dataset_slug, team_slug, {"filter": {"dataset_item_ids": [1]}} + ) @pytest.mark.usefixtures("file_read_write_test") class TestRestoreArchived: def test_calls_client_put( - self, remote_dataset: RemoteDataset, dataset_item: DatasetItem, team_slug: str, dataset_slug: str + self, + remote_dataset: RemoteDataset, + dataset_item: DatasetItem, + team_slug: str, + dataset_slug: str, ): with patch.object(Client, "restore_archived_item", return_value={}) as stub: remote_dataset.restore_archived([dataset_item]) - stub.assert_called_once_with(dataset_slug, team_slug, {"filter": {"dataset_item_ids": [1]}}) + stub.assert_called_once_with( + dataset_slug, team_slug, {"filter": {"dataset_item_ids": [1]}} + ) @pytest.mark.usefixtures("file_read_write_test") class TestDeleteItems: def test_calls_client_delete( - self, remote_dataset: RemoteDataset, dataset_item: DatasetItem, team_slug: str, dataset_slug: str + self, + remote_dataset: RemoteDataset, + dataset_item: DatasetItem, + team_slug: str, + dataset_slug: str, ): with patch.object(Client, "delete_item", return_value={}) as stub: remote_dataset.delete_items([dataset_item]) - stub.assert_called_once_with("test-dataset", team_slug, {"filter": {"dataset_item_ids": [1]}}) + stub.assert_called_once_with( + "test-dataset", team_slug, {"filter": {"dataset_item_ids": [1]}} + ) def assert_upload_mocks_are_correctly_called(remote_dataset: RemoteDataset, *args): - with patch.object(UploadHandlerV1, "_request_upload", return_value=([], [])) as request_upload_mock: + with patch.object( + UploadHandlerV1, "_request_upload", return_value=([], []) + ) as request_upload_mock: with patch.object(UploadHandlerV1, "upload") as upload_mock: remote_dataset.push(*args) request_upload_mock.assert_called_once() upload_mock.assert_called_once_with( - multi_threaded=True, progress_callback=None, file_upload_callback=None, max_workers=None + multi_threaded=True, + progress_callback=None, + file_upload_callback=None, + max_workers=None, ) diff --git a/tests/darwin/dataset/split_manager_test.py b/tests/darwin/dataset/split_manager_test.py index 95b3426e4..1d9cb9ebd 100644 --- a/tests/darwin/dataset/split_manager_test.py +++ b/tests/darwin/dataset/split_manager_test.py @@ -21,17 +21,33 @@ def test_requires_scikit_learn(): class TestClassificationDataset: - @pytest.mark.parametrize("val_percentage,test_percentage", [(0, 0.3), (0, 0), (0.2, 0), (0.5, 0.5), (1, 0.1)]) + @pytest.mark.parametrize( + "val_percentage,test_percentage", + [(0, 0.3), (0, 0), (0.2, 0), (0.5, 0.5), (1, 0.1)], + ) def test_raises_for_invalid_split_configuration( - self, team_slug: str, team_extracted_dataset_path: Path, val_percentage: float, test_percentage: float + self, + team_slug: str, + team_extracted_dataset_path: Path, + val_percentage: float, + test_percentage: float, ): with pytest.raises(ValueError): root = team_extracted_dataset_path / team_slug / "sl" - split_dataset(root, release_name="latest", val_percentage=val_percentage, test_percentage=test_percentage) + split_dataset( + root, + release_name="latest", + val_percentage=val_percentage, + test_percentage=test_percentage, + ) @pytest.mark.parametrize("val_percentage,test_percentage", [(0.2, 0.3), (0.3, 0.2)]) def test_should_split_a_dataset( - self, team_slug: str, team_extracted_dataset_path: Path, val_percentage: float, test_percentage: float + self, + team_slug: str, + team_extracted_dataset_path: Path, + val_percentage: float, + test_percentage: float, ): root = team_extracted_dataset_path / team_slug / "sl" @@ -39,7 +55,10 @@ def test_should_split_a_dataset( tot_size: int = len(list((root / "images").glob("*"))) splits: Path = split_dataset( - root, release_name="latest", val_percentage=val_percentage, test_percentage=test_percentage + root, + release_name="latest", + val_percentage=val_percentage, + test_percentage=test_percentage, ) sizes = (train_percentage, val_percentage, test_percentage) diff --git a/tests/darwin/dataset/upload_manager_test.py b/tests/darwin/dataset/upload_manager_test.py index b4d212b4d..8172e0b1d 100644 --- a/tests/darwin/dataset/upload_manager_test.py +++ b/tests/darwin/dataset/upload_manager_test.py @@ -19,7 +19,9 @@ @pytest.fixture -def darwin_client(darwin_config_path: Path, darwin_datasets_path: Path, team_slug: str) -> Client: +def darwin_client( + darwin_config_path: Path, darwin_datasets_path: Path, team_slug: str +) -> Client: config = Config(darwin_config_path) config.put(["global", "api_endpoint"], "http://localhost/api") config.put(["global", "base_url"], "http://localhost") @@ -40,12 +42,20 @@ def request_upload_endpoint(team_slug: str, dataset_slug: str): @pytest.fixture def dataset(darwin_client: Client, team_slug: str, dataset_slug: str) -> RemoteDataset: - return RemoteDatasetV1(client=darwin_client, team=team_slug, name=dataset_slug, slug=dataset_slug, dataset_id=1) + return RemoteDatasetV1( + client=darwin_client, + team=team_slug, + name=dataset_slug, + slug=dataset_slug, + dataset_id=1, + ) @pytest.mark.usefixtures("file_read_write_test") @responses.activate -def test_request_upload_is_not_called_on_init(dataset: RemoteDataset, request_upload_endpoint: str): +def test_request_upload_is_not_called_on_init( + dataset: RemoteDataset, request_upload_endpoint: str +): upload_handler = UploadHandler.build(dataset, []) assert upload_handler.pending_count == 0 @@ -58,7 +68,10 @@ def test_request_upload_is_not_called_on_init(dataset: RemoteDataset, request_up @pytest.mark.usefixtures("file_read_write_test") @responses.activate def test_pending_count_is_correct(dataset: RemoteDataset, request_upload_endpoint: str): - response = {"blocked_items": [], "items": [{"dataset_item_id": 1, "filename": "test.jpg", "path": "/"}]} + response = { + "blocked_items": [], + "items": [{"dataset_item_id": 1, "filename": "test.jpg", "path": "/"}], + } responses.add(responses.PUT, request_upload_endpoint, json=response, status=200) @@ -81,7 +94,14 @@ def test_pending_count_is_correct(dataset: RemoteDataset, request_upload_endpoin @responses.activate def test_blocked_count_is_correct(dataset: RemoteDataset, request_upload_endpoint: str): response = { - "blocked_items": [{"dataset_item_id": 1, "filename": "test.jpg", "path": "/", "reason": "ALREADY_EXISTS"}], + "blocked_items": [ + { + "dataset_item_id": 1, + "filename": "test.jpg", + "path": "/", + "reason": "ALREADY_EXISTS", + } + ], "items": [], } @@ -111,11 +131,15 @@ def test_error_count_is_correct(dataset: RemoteDataset, request_upload_endpoint: } sign_upload_endpoint = "http://localhost/api/dataset_items/1/sign_upload" - upload_to_s3_endpoint = "https://darwin-data.s3.eu-west-1.amazonaws.com/test.jpg?X-Amz-Signature=abc" + upload_to_s3_endpoint = ( + "https://darwin-data.s3.eu-west-1.amazonaws.com/test.jpg?X-Amz-Signature=abc" + ) confirm_upload_endpoint = "http://localhost/api/dataset_items/1/confirm_upload" - responses.add(responses.PUT, request_upload_endpoint, json=request_upload_response, status=200) + responses.add( + responses.PUT, request_upload_endpoint, json=request_upload_response, status=200 + ) responses.add(responses.GET, sign_upload_endpoint, status=500) local_file = LocalFile(local_path=Path("test.jpg")) @@ -147,14 +171,20 @@ def test_error_count_is_correct(dataset: RemoteDataset, request_upload_endpoint: "items": [{"dataset_item_id": 1, "filename": "test.jpg", "path": "/"}], } - upload_to_s3_endpoint = "https://darwin-data.s3.eu-west-1.amazonaws.com/test.jpg?X-Amz-Signature=abc" + upload_to_s3_endpoint = ( + "https://darwin-data.s3.eu-west-1.amazonaws.com/test.jpg?X-Amz-Signature=abc" + ) confirm_upload_endpoint = "http://localhost/api/dataset_items/1/confirm_upload" sign_upload_endpoint = "http://localhost/api/dataset_items/1/sign_upload" sign_upload_response = {"upload_url": upload_to_s3_endpoint} - responses.add(responses.PUT, request_upload_endpoint, json=request_upload_response, status=200) - responses.add(responses.GET, sign_upload_endpoint, json=sign_upload_response, status=200) + responses.add( + responses.PUT, request_upload_endpoint, json=request_upload_response, status=200 + ) + responses.add( + responses.GET, sign_upload_endpoint, json=sign_upload_response, status=200 + ) responses.add(responses.PUT, upload_to_s3_endpoint, status=500) Path("test.jpg").touch() @@ -187,14 +217,20 @@ def test_error_count_is_correct(dataset: RemoteDataset, request_upload_endpoint: "items": [{"dataset_item_id": 1, "filename": "test.jpg", "path": "/"}], } - upload_to_s3_endpoint = "https://darwin-data.s3.eu-west-1.amazonaws.com/test.jpg?X-Amz-Signature=abc" + upload_to_s3_endpoint = ( + "https://darwin-data.s3.eu-west-1.amazonaws.com/test.jpg?X-Amz-Signature=abc" + ) confirm_upload_endpoint = "http://localhost/api/dataset_items/1/confirm_upload" sign_upload_endpoint = "http://localhost/api/dataset_items/1/sign_upload" sign_upload_response = {"upload_url": upload_to_s3_endpoint} - responses.add(responses.PUT, request_upload_endpoint, json=request_upload_response, status=200) - responses.add(responses.GET, sign_upload_endpoint, json=sign_upload_response, status=200) + responses.add( + responses.PUT, request_upload_endpoint, json=request_upload_response, status=200 + ) + responses.add( + responses.GET, sign_upload_endpoint, json=sign_upload_response, status=200 + ) responses.add(responses.PUT, upload_to_s3_endpoint, status=201) responses.add(responses.PUT, confirm_upload_endpoint, status=500) @@ -228,14 +264,20 @@ def test_upload_files(dataset: RemoteDataset, request_upload_endpoint: str): "items": [{"dataset_item_id": 1, "filename": "test.jpg", "path": "/"}], } - upload_to_s3_endpoint = "https://darwin-data.s3.eu-west-1.amazonaws.com/test.jpg?X-Amz-Signature=abc" + upload_to_s3_endpoint = ( + "https://darwin-data.s3.eu-west-1.amazonaws.com/test.jpg?X-Amz-Signature=abc" + ) confirm_upload_endpoint = "http://localhost/api/dataset_items/1/confirm_upload" sign_upload_endpoint = "http://localhost/api/dataset_items/1/sign_upload" sign_upload_response = {"upload_url": upload_to_s3_endpoint} - responses.add(responses.PUT, request_upload_endpoint, json=request_upload_response, status=200) - responses.add(responses.GET, sign_upload_endpoint, json=sign_upload_response, status=200) + responses.add( + responses.PUT, request_upload_endpoint, json=request_upload_response, status=200 + ) + responses.add( + responses.GET, sign_upload_endpoint, json=sign_upload_response, status=200 + ) responses.add(responses.PUT, upload_to_s3_endpoint, status=201) responses.add(responses.PUT, confirm_upload_endpoint, status=200) diff --git a/tests/darwin/exporter/formats/export_coco_test.py b/tests/darwin/exporter/formats/export_coco_test.py index 399bf78aa..ecdac9aed 100644 --- a/tests/darwin/exporter/formats/export_coco_test.py +++ b/tests/darwin/exporter/formats/export_coco_test.py @@ -9,7 +9,12 @@ class TestBuildAnnotations: @pytest.fixture def annotation_file(self) -> dt.AnnotationFile: - return dt.AnnotationFile(path=Path("test.json"), filename="test.json", annotation_classes=set(), annotations=[]) + return dt.AnnotationFile( + path=Path("test.json"), + filename="test.json", + annotation_classes=set(), + annotations=[], + ) def test_polygon_include_extras(self, annotation_file: dt.AnnotationFile): polygon = dt.Annotation( @@ -20,7 +25,9 @@ def test_polygon_include_extras(self, annotation_file: dt.AnnotationFile): categories = {"polygon_class": 1} - assert coco._build_annotation(annotation_file, "test-id", polygon, categories)["extra"] == {"instance_id": 1} + assert coco._build_annotation(annotation_file, "test-id", polygon, categories)[ + "extra" + ] == {"instance_id": 1} def test_bounding_boxes_include_extras(self, annotation_file: dt.AnnotationFile): bbox = dt.Annotation( @@ -31,4 +38,6 @@ def test_bounding_boxes_include_extras(self, annotation_file: dt.AnnotationFile) categories = {"bbox_class": 1} - assert coco._build_annotation(annotation_file, "test-id", bbox, categories)["extra"] == {"instance_id": 1} + assert coco._build_annotation(annotation_file, "test-id", bbox, categories)[ + "extra" + ] == {"instance_id": 1} diff --git a/tests/darwin/exporter/formats/export_darwin_test.py b/tests/darwin/exporter/formats/export_darwin_test.py index af4eb34c8..5274c9b45 100644 --- a/tests/darwin/exporter/formats/export_darwin_test.py +++ b/tests/darwin/exporter/formats/export_darwin_test.py @@ -6,7 +6,10 @@ def test_empty_annotation_file(): annotation_file = AnnotationFile( - path=Path("test.json"), filename="test.json", annotation_classes=[], annotations=[] + path=Path("test.json"), + filename="test.json", + annotation_classes=[], + annotations=[], ) assert build_image_annotation(annotation_file) == { @@ -17,7 +20,9 @@ def test_empty_annotation_file(): def test_complete_annotation_file(): annotation_class = AnnotationClass(name="test", annotation_type="polygon") - annotation = Annotation(annotation_class=annotation_class, data={"path": []}, subs=[]) + annotation = Annotation( + annotation_class=annotation_class, data={"path": []}, subs=[] + ) annotation_file = AnnotationFile( path=Path("test.json"), @@ -31,5 +36,10 @@ def test_complete_annotation_file(): assert build_image_annotation(annotation_file) == { "annotations": [{"name": "test", "polygon": {"path": []}}], - "image": {"filename": "test.json", "height": 1080, "url": "https://darwin.v7labs.com/image.jpg", "width": 1920}, + "image": { + "filename": "test.json", + "height": 1080, + "url": "https://darwin.v7labs.com/image.jpg", + "width": 1920, + }, } diff --git a/tests/darwin/exporter/formats/export_mask_test.py b/tests/darwin/exporter/formats/export_mask_test.py index 3274c045a..d8a47e502 100644 --- a/tests/darwin/exporter/formats/export_mask_test.py +++ b/tests/darwin/exporter/formats/export_mask_test.py @@ -66,19 +66,26 @@ def test_in_rgb_mode_spreads_colors() -> None: assert palette == {"red": 0, "green": 1, "blue": 2, "yellow": 3, "purple": 4} -def test_get_palette_raises_value_error_when_num_categories_exceeds_maximum_for_index_mode() -> None: +def test_get_palette_raises_value_error_when_num_categories_exceeds_maximum_for_index_mode() -> ( + None +): with pytest.raises(ValueError, match="maximum number of classes supported: 254."): get_palette("index", ["category"] * 255) -def test_get_palette_raises_value_error_when_only_one_category_provided_for_grey_mode() -> None: +def test_get_palette_raises_value_error_when_only_one_category_provided_for_grey_mode() -> ( + None +): with pytest.raises( - ValueError, match="only having the '__background__' class is not allowed. Please add more classes." + ValueError, + match="only having the '__background__' class is not allowed. Please add more classes.", ): get_palette("grey", ["__background__"]) -def test_get_palette_raises_value_error_when_num_categories_exceeds_maximum_for_rgb_mode() -> None: +def test_get_palette_raises_value_error_when_num_categories_exceeds_maximum_for_rgb_mode() -> ( + None +): with pytest.raises(ValueError, match="maximum number of classes supported: 360."): get_palette("rgb", ["category"] * 361) @@ -159,18 +166,31 @@ def test_get_or_generate_colour() -> None: @pytest.fixture def annotations() -> List[dt.Annotation]: return [ - dt.Annotation(dt.AnnotationClass("class_1", "raster_layer"), data={"dense_rle": [], "mask_annotation_ids_mapping": {}, "total_pixels": 247500}), + dt.Annotation( + dt.AnnotationClass("class_1", "raster_layer"), + data={ + "dense_rle": [], + "mask_annotation_ids_mapping": {}, + "total_pixels": 247500, + }, + ), dt.Annotation(dt.AnnotationClass("class_2", "mask"), data={"sparse_rle": []}), dt.Annotation(dt.AnnotationClass("class_3", "polygon"), data={"path": "data"}), - dt.Annotation(dt.AnnotationClass("class_4", "complex_polygon"), data={"paths": "data"}), + dt.Annotation( + dt.AnnotationClass("class_4", "complex_polygon"), data={"paths": "data"} + ), ] -def test_get_render_mode_returns_raster_when_given_raster_mask(annotations: List[dt.AnnotationLike]) -> None: +def test_get_render_mode_returns_raster_when_given_raster_mask( + annotations: List[dt.AnnotationLike], +) -> None: assert get_render_mode([annotations[0], annotations[1]]) == "raster" -def test_get_render_mode_returns_polygon_when_given_polygon(annotations: List[dt.AnnotationLike]) -> None: +def test_get_render_mode_returns_polygon_when_given_polygon( + annotations: List[dt.AnnotationLike], +) -> None: assert get_render_mode([annotations[2]]) == "polygon" assert get_render_mode([annotations[3]]) == "polygon" @@ -178,12 +198,19 @@ def test_get_render_mode_returns_polygon_when_given_polygon(annotations: List[dt def test_get_render_mode_raises_value_error_when_given_both_raster_mask_and_polygon( annotations: List[dt.AnnotationLike], ) -> None: - with pytest.raises(ValueError, match="Cannot have both raster and polygon annotations in the same file"): + with pytest.raises( + ValueError, + match="Cannot have both raster and polygon annotations in the same file", + ): get_render_mode(annotations) -def test_get_render_mode_raises_value_error_when_no_renderable_annotations_found() -> None: - with pytest.raises(ValueError, match="No renderable annotations found in file, found types:"): +def test_get_render_mode_raises_value_error_when_no_renderable_annotations_found() -> ( + None +): + with pytest.raises( + ValueError, match="No renderable annotations found in file, found types:" + ): get_render_mode([dt.Annotation(dt.AnnotationClass("class_3", "invalid"), data={"line": "data"})]) # type: ignore @@ -200,18 +227,25 @@ def raster_layer() -> dt.RasterLayer: @pytest.fixture def mask_lookup() -> Dict[str, dt.AnnotationMask]: - return {"uuid1": dt.AnnotationMask("mask3", name="mask3"), "uuid2": dt.AnnotationMask("mask3", name="mask4")} + return { + "uuid1": dt.AnnotationMask("mask3", name="mask3"), + "uuid2": dt.AnnotationMask("mask3", name="mask4"), + } def test_colours_in_rle_returns_expected_dict( - colours: dt.MaskTypes.ColoursDict, raster_layer: dt.RasterLayer, mask_lookup: Dict[str, dt.AnnotationMask] + colours: dt.MaskTypes.ColoursDict, + raster_layer: dt.RasterLayer, + mask_lookup: Dict[str, dt.AnnotationMask], ) -> None: expected_dict = {"mask1": 1, "mask2": 2, "mask3": 3, "mask4": 4} assert colours_in_rle(colours, raster_layer, mask_lookup) == expected_dict def test_colours_in_rle_raises_value_error_when_mask_not_in_lookup( - colours: dt.MaskTypes.ColoursDict, raster_layer: dt.RasterLayer, mask_lookup: Dict[str, dt.AnnotationMask] + colours: dt.MaskTypes.ColoursDict, + raster_layer: dt.RasterLayer, + mask_lookup: Dict[str, dt.AnnotationMask], ) -> None: with pytest.raises(ValueError): colours_in_rle( @@ -268,7 +302,14 @@ def test_beyond_polygon_beyond_window() -> None: ) expected = np.array( - [[1, 1, 0, 0, 0], [1, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], dtype=np.uint8 + [ + [1, 1, 0, 0, 0], + [1, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ], + dtype=np.uint8, ) assert np.array_equal(new_mask, expected) assert not errors @@ -283,8 +324,20 @@ def test_beyond_complex_polygon() -> None: dt.AnnotationClass("cat3", "complex_polygon"), { "paths": [ - [{"x": -1, "y": -1}, {"x": -1, "y": 1}, {"x": 1, "y": 1}, {"x": 1, "y": -1}, {"x": -1, "y": -1}], - [{"x": 3, "y": 3}, {"x": 3, "y": 4}, {"x": 4, "y": 4}, {"x": 4, "y": 3}, {"x": 3, "y": 3}], + [ + {"x": -1, "y": -1}, + {"x": -1, "y": 1}, + {"x": 1, "y": 1}, + {"x": 1, "y": -1}, + {"x": -1, "y": -1}, + ], + [ + {"x": 3, "y": 3}, + {"x": 3, "y": 4}, + {"x": 4, "y": 4}, + {"x": 4, "y": 3}, + {"x": 3, "y": 3}, + ], ], "bounding_box": {"x": -1, "y": -1, "w": 6, "h": 6}, }, @@ -302,7 +355,14 @@ def test_beyond_complex_polygon() -> None: ) expected = np.array( - [[1, 1, 0, 0, 0], [1, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 1, 1], [0, 0, 0, 1, 1]], dtype=np.uint8 + [ + [1, 1, 0, 0, 0], + [1, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 1, 1], + [0, 0, 0, 1, 1], + ], + dtype=np.uint8, ) assert np.array_equal(new_mask, expected) assert not errors @@ -320,33 +380,56 @@ def test_render_polygons() -> None: dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [{"x": 10, "y": 10}, {"x": 20, "y": 10}, {"x": 20, "y": 20}, {"x": 10, "y": 20}], + "path": [ + {"x": 10, "y": 10}, + {"x": 20, "y": 10}, + {"x": 20, "y": 20}, + {"x": 10, "y": 20}, + ], "bounding_box": base_bb, }, ), dt.Annotation( dt.AnnotationClass("cat2", "polygon"), { - "path": [{"x": 30, "y": 30}, {"x": 40, "y": 30}, {"x": 40, "y": 40}, {"x": 30, "y": 40}], + "path": [ + {"x": 30, "y": 30}, + {"x": 40, "y": 30}, + {"x": 40, "y": 40}, + {"x": 30, "y": 40}, + ], "bounding_box": base_bb, }, ), dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [{"x": 50, "y": 50}, {"x": 60, "y": 50}, {"x": 60, "y": 60}, {"x": 50, "y": 60}], + "path": [ + {"x": 50, "y": 50}, + {"x": 60, "y": 50}, + {"x": 60, "y": 60}, + {"x": 50, "y": 60}, + ], "bounding_box": base_bb, }, ), dt.Annotation( dt.AnnotationClass("cat1", "polygon"), - {"path": [{"x": 10, "y": 80}, {"x": 20, "y": 80}, {"x": 20, "y": 60}], "bounding_box": base_bb}, + { + "path": [{"x": 10, "y": 80}, {"x": 20, "y": 80}, {"x": 20, "y": 60}], + "bounding_box": base_bb, + }, ), dt.Annotation( dt.AnnotationClass("cat3", "complex_polygon"), { "paths": [ - [{"x": 70, "y": 70}, {"x": 80, "y": 70}, {"x": 80, "y": 80}, {"x": 70, "y": 80}], + [ + {"x": 70, "y": 70}, + {"x": 80, "y": 70}, + {"x": 80, "y": 80}, + {"x": 70, "y": 80}, + ], [{"x": 75, "y": 75}, {"x": 75, "y": 78}, {"x": 78, "y": 78}], ], "bounding_box": base_bb, @@ -402,21 +485,21 @@ def test_render_raster() -> None: {"sparse_rle": None}, subs=[], id="mask1", - slot_names=["slot1"] + slot_names=["slot1"], ), dt.Annotation( dt.AnnotationClass("mask2", "mask"), {"sparse_rle": None}, subs=[], id="mask2", - slot_names=["slot1"] + slot_names=["slot1"], ), dt.Annotation( dt.AnnotationClass("mask3", "mask"), {"sparse_rle": None}, subs=[], id="mask3", - slot_names=["slot1"] + slot_names=["slot1"], ), dt.Annotation( dt.AnnotationClass("__raster_layer__", "raster_layer"), @@ -473,8 +556,17 @@ def test_render_raster() -> None: GREEN = [0, 255, 0] BLUE = [0, 0, 255] BLACK = [0, 0, 0] -colours_for_test: Callable[[], dt.MaskTypes.RgbColors] = lambda: [*BLACK, *RED, *GREEN, *BLUE] -colour_list_for_test: Callable[[], dt.MaskTypes.ColoursDict] = lambda: {"mask1": 0, "mask2": 1, "mask3": 2} +colours_for_test: Callable[[], dt.MaskTypes.RgbColors] = lambda: [ + *BLACK, + *RED, + *GREEN, + *BLUE, +] +colour_list_for_test: Callable[[], dt.MaskTypes.ColoursDict] = lambda: { + "mask1": 0, + "mask2": 1, + "mask3": 2, +} data_path = (Path(__file__).parent / ".." / ".." / "data").resolve() @@ -572,7 +664,9 @@ def test_export( ) -> None: with TemporaryDirectory() as output_dir, patch( "darwin.exporter.formats.mask.get_render_mode" - ) as mock_get_render_mode, patch("darwin.exporter.formats.mask.render_raster") as mock_render_raster, patch( + ) as mock_get_render_mode, patch( + "darwin.exporter.formats.mask.render_raster" + ) as mock_render_raster, patch( "darwin.exporter.formats.mask.render_polygons" ) as mock_render_polygons, patch( "darwin.exporter.formats.mask.get_palette" @@ -583,7 +677,12 @@ def test_export( annotation_files = [ dt.AnnotationFile( - Path("test"), "test", annotation_classes=set(), annotations=[], image_height=height, image_width=width + Path("test"), + "test", + annotation_classes=set(), + annotations=[], + image_height=height, + image_width=width, ) ] @@ -592,7 +691,12 @@ def test_export( if colour_mode == "rgb": mock_get_rgb_colours.return_value = ( colours_for_test(), - {"__background__": [0, 0, 0], "class1": [255, 0, 0], "class2": [0, 255, 0], "class3": [0, 0, 255]}, + { + "__background__": [0, 0, 0], + "class1": [255, 0, 0], + "class2": [0, 255, 0], + "class3": [0, 0, 255], + }, ) if colour_mode == "rgb" or colour_mode == "index": @@ -646,7 +750,9 @@ def test_export( assert expected_csv_file.exists() assert test_csv_path.exists() - with expected_csv_file.open("r") as expected_csv, test_csv_path.open("r") as test_output_csv: + with expected_csv_file.open("r") as expected_csv, test_csv_path.open( + "r" + ) as test_output_csv: assert expected_csv.read() == test_output_csv.read() # PNG File @@ -664,7 +770,9 @@ def test_export( for x in range(expected.width): for y in range(expected.height): - assert expected.getpixel((x, y)) == test_output.getpixel((x, y)), f"Pixel {x},{y} is different" + assert expected.getpixel((x, y)) == test_output.getpixel( + (x, y) + ), f"Pixel {x},{y} is different" def test_class_mappings_preserved_on_large_export(tmpdir) -> None: @@ -680,33 +788,69 @@ def test_class_mappings_preserved_on_large_export(tmpdir) -> None: dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [{"x": 0, "y": 0}, {"x": 1, "y": 0}, {"x": 1, "y": 1}, {"x": 0, "y": 1}, {"x": 0, "y": 1}], + "path": [ + {"x": 0, "y": 0}, + {"x": 1, "y": 0}, + {"x": 1, "y": 1}, + {"x": 0, "y": 1}, + {"x": 0, "y": 1}, + ], }, ), dt.Annotation( dt.AnnotationClass("cat2", "polygon"), { - "path": [{"x": 2, "y": 2}, {"x": 4, "y": 2}, {"x": 4, "y": 4}, {"x": 2, "y": 4}, {"x": 2, "y": 2}], + "path": [ + {"x": 2, "y": 2}, + {"x": 4, "y": 2}, + {"x": 4, "y": 4}, + {"x": 2, "y": 4}, + {"x": 2, "y": 2}, + ], }, ), dt.Annotation( dt.AnnotationClass("cat3", "polygon"), { - "path": [{"x": 5, "y": 5}, {"x": 8, "y": 5}, {"x": 8, "y": 8}, {"x": 5, "y": 8}, {"x": 5, "y": 5}], + "path": [ + {"x": 5, "y": 5}, + {"x": 8, "y": 5}, + {"x": 8, "y": 8}, + {"x": 5, "y": 8}, + {"x": 5, "y": 5}, + ], }, ), dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [{"x": 4, "y": 0}, {"x": 5, "y": 0}, {"x": 5, "y": 1}, {"x": 4, "y": 1}, {"x": 4, "y": 0}], + "path": [ + {"x": 4, "y": 0}, + {"x": 5, "y": 0}, + {"x": 5, "y": 1}, + {"x": 4, "y": 1}, + {"x": 4, "y": 0}, + ], }, ), dt.Annotation( dt.AnnotationClass("cat4", "complex_polygon"), { "paths": [ - [{"x": 0, "y": 3}, {"x": 1, "y": 3}, {"x": 1, "y": 5}, {"x": 0, "y": 5}, {"x": 0, "y": 3}], - [{"x": 0, "y": 7}, {"x": 1, "y": 7}, {"x": 1, "y": 8}, {"x": 0, "y": 8}, {"x": 0, "y": 7}], + [ + {"x": 0, "y": 3}, + {"x": 1, "y": 3}, + {"x": 1, "y": 5}, + {"x": 0, "y": 5}, + {"x": 0, "y": 3}, + ], + [ + {"x": 0, "y": 7}, + {"x": 1, "y": 7}, + {"x": 1, "y": 8}, + {"x": 0, "y": 8}, + {"x": 0, "y": 7}, + ], ] }, ), diff --git a/tests/darwin/exporter/formats/export_nifti_test.py b/tests/darwin/exporter/formats/export_nifti_test.py index d7eaadf7e..d987d7779 100644 --- a/tests/darwin/exporter/formats/export_nifti_test.py +++ b/tests/darwin/exporter/formats/export_nifti_test.py @@ -14,12 +14,20 @@ def test_video_annotation_nifti_export_single_slot(team_slug: str): with tempfile.TemporaryDirectory() as tmpdir: with ZipFile("tests/data.zip") as zfile: zfile.extractall(tmpdir) - annotations_dir = Path(tmpdir) / team_slug / "nifti/releases/latest/annotations" + annotations_dir = ( + Path(tmpdir) / team_slug / "nifti/releases/latest/annotations" + ) video_annotation_filepaths = [annotations_dir / "hippocampus_001.nii.json"] - video_annotations = list(darwin_to_dt_gen(video_annotation_filepaths, False)) + video_annotations = list( + darwin_to_dt_gen(video_annotation_filepaths, False) + ) nifti.export(video_annotations, output_dir=tmpdir) - export_im = nib.load(annotations_dir / "hippocampus_001_hippocampus.nii.gz").get_fdata() - expected_im = nib.load(annotations_dir / "hippocampus_001_hippocampus.nii.gz").get_fdata() + export_im = nib.load( + annotations_dir / "hippocampus_001_hippocampus.nii.gz" + ).get_fdata() + expected_im = nib.load( + annotations_dir / "hippocampus_001_hippocampus.nii.gz" + ).get_fdata() assert np.allclose(export_im, expected_im) @@ -27,14 +35,26 @@ def test_video_annotation_nifti_export_multi_slot(team_slug: str): with tempfile.TemporaryDirectory() as tmpdir: with ZipFile("tests/data.zip") as zfile: zfile.extractall(tmpdir) - annotations_dir = Path(tmpdir) / team_slug / "nifti/releases/latest/annotations" - video_annotation_filepaths = [annotations_dir / "hippocampus_multislot.nii.json"] - video_annotations = list(darwin_to_dt_gen(video_annotation_filepaths, False)) + annotations_dir = ( + Path(tmpdir) / team_slug / "nifti/releases/latest/annotations" + ) + video_annotation_filepaths = [ + annotations_dir / "hippocampus_multislot.nii.json" + ] + video_annotations = list( + darwin_to_dt_gen(video_annotation_filepaths, False) + ) nifti.export(video_annotations, output_dir=tmpdir) names = ["1", "2", "3", "4", "5"] for slotname in names: - export_im = nib.load(annotations_dir / f"hippocampus_multislot_{slotname}_test_hippo.nii.gz").get_fdata() - expected_im = nib.load(annotations_dir / f"hippocampus_multislot_{slotname}_test_hippo.nii.gz").get_fdata() + export_im = nib.load( + annotations_dir + / f"hippocampus_multislot_{slotname}_test_hippo.nii.gz" + ).get_fdata() + expected_im = nib.load( + annotations_dir + / f"hippocampus_multislot_{slotname}_test_hippo.nii.gz" + ).get_fdata() assert np.allclose(export_im, expected_im) @@ -42,11 +62,20 @@ def test_video_annotation_nifti_export_mpr(team_slug: str): with tempfile.TemporaryDirectory() as tmpdir: with ZipFile("tests/data.zip") as zfile: zfile.extractall(tmpdir) - annotations_dir = Path(tmpdir) / team_slug / "nifti/releases/latest/annotations" - video_annotation_filepaths = [annotations_dir / "hippocampus_multislot_001_mpr.json"] - video_annotations = list(darwin_to_dt_gen(video_annotation_filepaths, False)) + annotations_dir = ( + Path(tmpdir) / team_slug / "nifti/releases/latest/annotations" + ) + video_annotation_filepaths = [ + annotations_dir / "hippocampus_multislot_001_mpr.json" + ] + video_annotations = list( + darwin_to_dt_gen(video_annotation_filepaths, False) + ) nifti.export(video_annotations, output_dir=tmpdir) - export_im = nib.load(annotations_dir / f"hippocampus_001_mpr_1_test_hippo.nii.gz").get_fdata() - expected_im = nib.load(annotations_dir / f"hippocampus_001_mpr_1_test_hippo.nii.gz").get_fdata() + export_im = nib.load( + annotations_dir / f"hippocampus_001_mpr_1_test_hippo.nii.gz" + ).get_fdata() + expected_im = nib.load( + annotations_dir / f"hippocampus_001_mpr_1_test_hippo.nii.gz" + ).get_fdata() assert np.allclose(export_im, expected_im) - diff --git a/tests/darwin/exporter/formats/export_pascalvoc_test.py b/tests/darwin/exporter/formats/export_pascalvoc_test.py index da4ad0103..d31251267 100644 --- a/tests/darwin/exporter/formats/export_pascalvoc_test.py +++ b/tests/darwin/exporter/formats/export_pascalvoc_test.py @@ -44,7 +44,9 @@ def test_it_creates_missing_folders(self, folder_path: Path): class TestBuildXml: def test_xml_has_bounding_boxes_of_polygons(self): - annotation_class = AnnotationClass(name="car", annotation_type="polygon", annotation_internal_type=None) + annotation_class = AnnotationClass( + name="car", annotation_type="polygon", annotation_internal_type=None + ) annotation = Annotation( annotation_class=annotation_class, data={ @@ -80,11 +82,21 @@ def test_xml_has_bounding_boxes_of_polygons(self): def test_xml_has_bounding_boxes_of_complex_polygons(self): annotation_class = AnnotationClass( - name="rubber", annotation_type="complex_polygon", annotation_internal_type="polygon" + name="rubber", + annotation_type="complex_polygon", + annotation_internal_type="polygon", ) annotation = Annotation( annotation_class=annotation_class, - data={"paths": [{...}], "bounding_box": {"x": 1174.28, "y": 2379.17, "w": 824.9000000000001, "h": 843.52}}, + data={ + "paths": [{...}], + "bounding_box": { + "x": 1174.28, + "y": 2379.17, + "w": 824.9000000000001, + "h": 843.52, + }, + }, subs=[], ) @@ -114,7 +126,9 @@ def test_xml_has_bounding_boxes_of_complex_polygons(self): assert_xml_element_text(bndbox, "ymax", "3223") def test_xml_has_bounding_boxes(self): - annotation_class = AnnotationClass(name="tire", annotation_type="bounding_box", annotation_internal_type=None) + annotation_class = AnnotationClass( + name="tire", annotation_type="bounding_box", annotation_internal_type=None + ) annotation = Annotation( annotation_class=annotation_class, data={"x": 574.88, "y": 427.0, "w": 137.04, "h": 190.66}, diff --git a/tests/darwin/exporter/formats/export_yolo_segmented_test.py b/tests/darwin/exporter/formats/export_yolo_segmented_test.py index 8c45ca33b..a31c770ec 100644 --- a/tests/darwin/exporter/formats/export_yolo_segmented_test.py +++ b/tests/darwin/exporter/formats/export_yolo_segmented_test.py @@ -76,7 +76,9 @@ def annotations(annotation_classes: List[AnnotationClass]) -> List[Annotation]: @pytest.fixture -def annotation_files(annotation_classes: List[AnnotationClass], annotations: List[Annotation]) -> List[AnnotationFile]: +def annotation_files( + annotation_classes: List[AnnotationClass], annotations: List[Annotation] +) -> List[AnnotationFile]: return [ # fmt: off AnnotationFile( @@ -91,7 +93,9 @@ def annotation_files(annotation_classes: List[AnnotationClass], annotations: Lis ] -def test_export_yolo_segmented(annotation_files: List[AnnotationFile], tmp_path: Path) -> None: +def test_export_yolo_segmented( + annotation_files: List[AnnotationFile], tmp_path: Path +) -> None: export(annotation_files, tmp_path) assert (tmp_path / "darknet.labels").exists() assert (tmp_path / "file1.txt").exists() @@ -99,8 +103,14 @@ def test_export_yolo_segmented(annotation_files: List[AnnotationFile], tmp_path: output_lines = (tmp_path / "file1.txt").read_text().splitlines() if CLOSE_VERTICES: assert output_lines[0] == "0 0.02 0.03 0.27 0.03 0.27 0.16 0.02 0.16 0.02 0.03" - assert output_lines[1] == "1 0.0 0.0 0.0 0.1 0.05 0.15 0.1 0.1 0.0 0.1 0.0 0.0 0.0 0.0" - assert output_lines[2] == "2 0.0 0.0 0.0 0.1 0.05 0.15 0.1 0.1 0.0 0.1 0.0 0.0 0.0 0.0" + assert ( + output_lines[1] + == "1 0.0 0.0 0.0 0.1 0.05 0.15 0.1 0.1 0.0 0.1 0.0 0.0 0.0 0.0" + ) + assert ( + output_lines[2] + == "2 0.0 0.0 0.0 0.1 0.05 0.15 0.1 0.1 0.0 0.1 0.0 0.0 0.0 0.0" + ) else: assert output_lines[0] == "0 0.02 0.03 0.27 0.03 0.27 0.16 0.02 0.16" assert output_lines[1] == "1 0.0 0.0 0.0 0.1 0.05 0.15 0.1 0.1 0.0 0.1 0.0 0.0" diff --git a/tests/darwin/exporter/formats/export_yolo_test.py b/tests/darwin/exporter/formats/export_yolo_test.py index 90ba5d801..d66586134 100644 --- a/tests/darwin/exporter/formats/export_yolo_test.py +++ b/tests/darwin/exporter/formats/export_yolo_test.py @@ -48,7 +48,10 @@ def test_it_creates_missing_folders(self, folder_path: Path): yolo_lines = (folder_path / "annotation_test.txt").read_text().split("\n") assert yolo_lines[0] == "0 {} {} {} {}".format( - (bbox["x"] + bbox["w"] / 2) / 1920, (bbox["y"] + bbox["h"] / 2) / 1080, bbox["w"] / 1920, bbox["h"] / 1080 + (bbox["x"] + bbox["w"] / 2) / 1920, + (bbox["y"] + bbox["h"] / 2) / 1080, + bbox["w"] / 1920, + bbox["h"] / 1080, ) yolo_classes = (folder_path / "darknet.labels").read_text().split("\n") diff --git a/tests/darwin/importer/formats/import_darwin_test.py b/tests/darwin/importer/formats/import_darwin_test.py index 6d87bf31f..6dd6c2335 100644 --- a/tests/darwin/importer/formats/import_darwin_test.py +++ b/tests/darwin/importer/formats/import_darwin_test.py @@ -18,7 +18,9 @@ def test_it_returns_none_if_there_are_no_annotations(self): path = Path("path/to/file.xml") assert parse_path(path) is None - def test_it_parses_slot_names_properly_if_present_for_sequences(self, file_path: Path): + def test_it_parses_slot_names_properly_if_present_for_sequences( + self, file_path: Path + ): json: str = """ { "dataset": "test", @@ -130,7 +132,9 @@ def test_it_parses_slot_names_properly_if_present_for_images(self, file_path: Pa for annotation in annotation_file.annotations: assert annotation.slot_names == ["my_slot"] - def test_it_skips_slot_names_when_no_slot_names_for_sequences(self, file_path: Path): + def test_it_skips_slot_names_when_no_slot_names_for_sequences( + self, file_path: Path + ): json: str = """ { "dataset": "test", diff --git a/tests/darwin/importer/formats/import_dataloop_test.py b/tests/darwin/importer/formats/import_dataloop_test.py index 2e6c4654c..944d6be57 100644 --- a/tests/darwin/importer/formats/import_dataloop_test.py +++ b/tests/darwin/importer/formats/import_dataloop_test.py @@ -20,11 +20,17 @@ class DataLoopTestCase(TestCase): def setUp(self) -> None: - _fd = open(realpath(join(dirname(__file__), "..", "..", "data", "dataloop.example.json"))) + _fd = open( + realpath( + join(dirname(__file__), "..", "..", "data", "dataloop.example.json") + ) + ) self.DATALOOP_MOCK_DATA = _fd.read() _fd.close() - def assertApproximatelyEqualNumber(self, a: Union[int, float], b: Union[int, float], places: int = 8): + def assertApproximatelyEqualNumber( + self, a: Union[int, float], b: Union[int, float], places: int = 8 + ): math_isclose(a, b, rel_tol=10**-places) DARWIN_PARSED_DATA = { @@ -44,7 +50,9 @@ def tearDown(self): @patch( "darwin.importer.formats.dataloop._remove_leading_slash", ) - def test_returns_none_if_file_extension_is_not_json(self, mock_remove_leading_slash): + def test_returns_none_if_file_extension_is_not_json( + self, mock_remove_leading_slash + ): self.assertIsNone(parse_path(Path("foo.bar"))) @patch( @@ -93,20 +101,28 @@ def tearDown(self) -> None: def test_handles_box_type(self): from darwin.importer.formats.dataloop import _parse_annotation as pa - with patch("darwin.importer.formats.dataloop.dt.make_bounding_box") as make_bounding_box_mock: + with patch( + "darwin.importer.formats.dataloop.dt.make_bounding_box" + ) as make_bounding_box_mock: make_bounding_box_mock.return_value = Annotation("class_1", 0, 0, 0, 0) pa(self.parsed_json["annotations"][0]) # 0 is a box type - make_bounding_box_mock.assert_called_with("box_class", 288.81, 845.49, 1932.5100000000002, 2682.75) + make_bounding_box_mock.assert_called_with( + "box_class", 288.81, 845.49, 1932.5100000000002, 2682.75 + ) def test_handles_class_type(self): - annotation = _parse_annotation(self.parsed_json["annotations"][1]) # 1 is a class type + annotation = _parse_annotation( + self.parsed_json["annotations"][1] + ) # 1 is a class type self.assertEqual(annotation, None) def test_handles_segment_type(self): from darwin.importer.formats.dataloop import _parse_annotation as pa - with patch("darwin.importer.formats.dataloop.dt.make_polygon") as make_polygon_mock: + with patch( + "darwin.importer.formats.dataloop.dt.make_polygon" + ) as make_polygon_mock: pa(self.parsed_json["annotations"][2]) # 2 is a segment type if "kwargs" in make_polygon_mock.call_args: @@ -114,7 +130,10 @@ def test_handles_segment_type(self): def make_tuple_entry(point: Dict[str, float]) -> Tuple[float, float]: return (point["x"], point["y"]) - point_path = [make_tuple_entry(p) for p in make_polygon_mock.call_args.kwargs["point_path"]] + point_path = [ + make_tuple_entry(p) + for p in make_polygon_mock.call_args.kwargs["point_path"] + ] expectation_points = [ (856.73076923, 1077.88461538), (575, 657.69230769), @@ -126,14 +145,17 @@ def make_tuple_entry(point: Dict[str, float]) -> Tuple[float, float]: ] [ - self.assertApproximatelyEqualNumber(a[0], b[0]) and self.assertApproximatelyEqualNumber(a[1], b[1]) + self.assertApproximatelyEqualNumber(a[0], b[0]) + and self.assertApproximatelyEqualNumber(a[1], b[1]) for a, b in zip(point_path, expectation_points) ] self.assertTrue(make_polygon_mock.call_args[0][0], "segment_class") def test_throws_on_unknown_type(self): try: - _parse_annotation(self.parsed_json["annotations"][3]) # 3 is an unsupported type + _parse_annotation( + self.parsed_json["annotations"][3] + ) # 3 is an unsupported type except UnsupportedImportAnnotationType as e: self.assertEqual(e.import_type, "dataloop") self.assertEqual(e.annotation_type, "UNSUPPORTED_TYPE") @@ -142,4 +164,6 @@ def test_throws_on_unknown_type(self): def test_rejects_complex_polygons(self): with self.assertRaises(DataloopComplexPolygonsNotYetSupported): - _parse_annotation(self.parsed_json["annotations"][4]) # 4 is a complex polygon + _parse_annotation( + self.parsed_json["annotations"][4] + ) # 4 is a complex polygon diff --git a/tests/darwin/importer/formats/import_labelbox_test.py b/tests/darwin/importer/formats/import_labelbox_test.py index 69558e1da..f01f7b320 100644 --- a/tests/darwin/importer/formats/import_labelbox_test.py +++ b/tests/darwin/importer/formats/import_labelbox_test.py @@ -277,7 +277,9 @@ def test_it_imports_bbox_images(self, file_path: Path): assert annotation_file.remote_path == "/" assert annotation_file.annotations - bbox_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + bbox_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_bbox(bbox_annotation, 145, 3558, 623, 449) annotation_class = bbox_annotation.annotation_class @@ -376,10 +378,16 @@ def test_it_imports_polygon_images(self, file_path: Path): assert annotation_file.annotations - polygon_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + polygon_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_polygon( polygon_annotation, - [{"x": 3665.814, "y": 351.628}, {"x": 3762.93, "y": 810.419}, {"x": 3042.93, "y": 914.233}], + [ + {"x": 3665.814, "y": 351.628}, + {"x": 3762.93, "y": 810.419}, + {"x": 3042.93, "y": 914.233}, + ], ) annotation_class = polygon_annotation.annotation_class @@ -416,7 +424,9 @@ def test_it_imports_point_images(self, file_path: Path): assert annotation_file.annotations - point_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + point_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_point(point_annotation, {"x": 342.93, "y": 914.233}) annotation_class = point_annotation.annotation_class @@ -457,10 +467,16 @@ def test_it_imports_polyline_images(self, file_path: Path): assert annotation_file.annotations - line_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + line_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_line( line_annotation, - [{"x": 198.027, "y": 1979.196}, {"x": 321.472, "y": 1801.743}, {"x": 465.491, "y": 1655.152}], + [ + {"x": 198.027, "y": 1979.196}, + {"x": 321.472, "y": 1801.743}, + {"x": 465.491, "y": 1655.152}, + ], ) annotation_class = line_annotation.annotation_class @@ -596,7 +612,9 @@ def test_it_imports_classification_from_radio_buttons(self, file_path: Path): tag_annotation: Annotation = cast(Annotation, annotation_file.annotations[1]) tag_annotation_class = tag_annotation.annotation_class - assert_annotation_class(tag_annotation_class, "r_c_or_l_side_radiograph:right", "tag") + assert_annotation_class( + tag_annotation_class, "r_c_or_l_side_radiograph:right", "tag" + ) def test_it_imports_classification_from_checklist(self, file_path: Path): json: str = """ @@ -640,11 +658,15 @@ def test_it_imports_classification_from_checklist(self, file_path: Path): tag_annotation_1: Annotation = cast(Annotation, annotation_file.annotations[1]) tag_annotation_class_1 = tag_annotation_1.annotation_class - assert_annotation_class(tag_annotation_class_1, "r_c_or_l_side_radiograph:right", "tag") + assert_annotation_class( + tag_annotation_class_1, "r_c_or_l_side_radiograph:right", "tag" + ) tag_annotation_2: Annotation = cast(Annotation, annotation_file.annotations[2]) tag_annotation_class_2 = tag_annotation_2.annotation_class - assert_annotation_class(tag_annotation_class_2, "r_c_or_l_side_radiograph:left", "tag") + assert_annotation_class( + tag_annotation_class_2, "r_c_or_l_side_radiograph:left", "tag" + ) def test_it_imports_classification_from_free_text(self, file_path: Path): json: str = """ @@ -687,8 +709,13 @@ def test_it_imports_classification_from_free_text(self, file_path: Path): assert_annotation_class(point_annotation_class, "Shark", "keypoint") tag_annotation: Annotation = cast(Annotation, annotation_file.annotations[1]) - assert_annotation_class(tag_annotation.annotation_class, "r_c_or_l_side_radiograph", "tag") - assert_subannotations(tag_annotation.subs, [SubAnnotation(annotation_type="text", data="righ side")]) + assert_annotation_class( + tag_annotation.annotation_class, "r_c_or_l_side_radiograph", "tag" + ) + assert_subannotations( + tag_annotation.subs, + [SubAnnotation(annotation_type="text", data="righ side")], + ) def assert_bbox(annotation: Annotation, x: float, y: float, h: float, w: float) -> None: @@ -721,7 +748,10 @@ def assert_line(annotation: Annotation, line: List[Point]) -> None: def assert_annotation_class( - annotation_class: AnnotationClass, name: str, type: str, internal_type: Optional[str] = None + annotation_class: AnnotationClass, + name: str, + type: str, + internal_type: Optional[str] = None, ) -> None: assert annotation_class assert annotation_class.name == name @@ -729,7 +759,9 @@ def assert_annotation_class( assert annotation_class.annotation_internal_type == internal_type -def assert_subannotations(actual_subs: List[SubAnnotation], expected_subs: List[SubAnnotation]) -> None: +def assert_subannotations( + actual_subs: List[SubAnnotation], expected_subs: List[SubAnnotation] +) -> None: assert actual_subs for actual_sub in actual_subs: for expected_sub in expected_subs: diff --git a/tests/darwin/importer/formats/import_nifti_test.py b/tests/darwin/importer/formats/import_nifti_test.py index a97611eff..8527d153d 100644 --- a/tests/darwin/importer/formats/import_nifti_test.py +++ b/tests/darwin/importer/formats/import_nifti_test.py @@ -21,22 +21,43 @@ def test_image_annotation_nifti_import_single_slot(team_slug: str): with ZipFile("tests/data.zip") as zfile: zfile.extractall(tmpdir) label_path = ( - Path(tmpdir) / team_slug / "nifti" / "releases" / "latest" / "annotations" / "vol0_brain.nii.gz" + Path(tmpdir) + / team_slug + / "nifti" + / "releases" + / "latest" + / "annotations" + / "vol0_brain.nii.gz" ) input_dict = { "data": [ - {"image": "vol0 (1).nii", "label": str(label_path), "class_map": {"1": "brain"}, "mode": "video"} + { + "image": "vol0 (1).nii", + "label": str(label_path), + "class_map": {"1": "brain"}, + "mode": "video", + } ] } upload_json = Path(tmpdir) / "annotations.json" - upload_json.write_text(json.dumps(input_dict, indent=4, sort_keys=True, default=str)) + upload_json.write_text( + json.dumps(input_dict, indent=4, sort_keys=True, default=str) + ) annotation_files = parse_path(path=upload_json) annotation_file = annotation_files[0] - output_json_string = json.loads(serialise_annotation_file(annotation_file, as_dict=False)) + output_json_string = json.loads( + serialise_annotation_file(annotation_file, as_dict=False) + ) expected_json_string = json.load( - open(Path(tmpdir) / team_slug / "nifti" / "vol0_annotation_file.json", "r") + open( + Path(tmpdir) / team_slug / "nifti" / "vol0_annotation_file.json", + "r", + ) + ) + assert ( + output_json_string["annotations"][0]["frames"] + == expected_json_string["annotations"][0]["frames"] ) - assert output_json_string["annotations"][0]["frames"] == expected_json_string["annotations"][0]["frames"] def test_image_annotation_nifti_import_multi_slot(team_slug: str): @@ -44,7 +65,13 @@ def test_image_annotation_nifti_import_multi_slot(team_slug: str): with ZipFile("tests/data.zip") as zfile: zfile.extractall(tmpdir) label_path = ( - Path(tmpdir) / team_slug / "nifti" / "releases" / "latest" / "annotations" / "vol0_brain.nii.gz" + Path(tmpdir) + / team_slug + / "nifti" + / "releases" + / "latest" + / "annotations" + / "vol0_brain.nii.gz" ) input_dict = { "data": [ @@ -59,14 +86,27 @@ def test_image_annotation_nifti_import_multi_slot(team_slug: str): ] } upload_json = Path(tmpdir) / "annotations.json" - upload_json.write_text(json.dumps(input_dict, indent=4, sort_keys=True, default=str)) + upload_json.write_text( + json.dumps(input_dict, indent=4, sort_keys=True, default=str) + ) annotation_files = parse_path(path=upload_json) annotation_file = annotation_files[0] - output_json_string = json.loads(serialise_annotation_file(annotation_file, as_dict=False)) + output_json_string = json.loads( + serialise_annotation_file(annotation_file, as_dict=False) + ) expected_json_string = json.load( - open(Path(tmpdir) / team_slug / "nifti" / "vol0_annotation_file_multi_slot.json", "r") + open( + Path(tmpdir) + / team_slug + / "nifti" + / "vol0_annotation_file_multi_slot.json", + "r", + ) + ) + assert ( + output_json_string["annotations"][0]["frames"] + == expected_json_string["annotations"][0]["frames"] ) - assert output_json_string["annotations"][0]["frames"] == expected_json_string["annotations"][0]["frames"] def test_image_annotation_nifti_import_incorrect_number_slot(team_slug: str): @@ -74,7 +114,13 @@ def test_image_annotation_nifti_import_incorrect_number_slot(team_slug: str): with ZipFile("tests/data.zip") as zfile: zfile.extractall(tmpdir) label_path = ( - Path(tmpdir) / team_slug / "nifti" / "releases" / "latest" / "annotations" / "vol0_brain.nii.gz" + Path(tmpdir) + / team_slug + / "nifti" + / "releases" + / "latest" + / "annotations" + / "vol0_brain.nii.gz" ) input_dict = { "data": [ @@ -89,12 +135,16 @@ def test_image_annotation_nifti_import_incorrect_number_slot(team_slug: str): ] } upload_json = Path(tmpdir) / "annotations.json" - upload_json.write_text(json.dumps(input_dict, indent=4, sort_keys=True, default=str)) + upload_json.write_text( + json.dumps(input_dict, indent=4, sort_keys=True, default=str) + ) with pytest.raises(Exception): annotation_files = parse_path(path=upload_json) -def serialise_annotation_file(annotation_file: AnnotationFile, as_dict) -> Union[str, dict]: +def serialise_annotation_file( + annotation_file: AnnotationFile, as_dict +) -> Union[str, dict]: """ Serialises an ``AnnotationFile`` into a string. @@ -112,9 +162,12 @@ def serialise_annotation_file(annotation_file: AnnotationFile, as_dict) -> Union "path": str(annotation_file.path), "filename": annotation_file.filename, "annotation_classes": [ - serialise_annotation_class(ac, as_dict=True) for ac in annotation_file.annotation_classes + serialise_annotation_class(ac, as_dict=True) + for ac in annotation_file.annotation_classes + ], + "annotations": [ + serialise_annotation(a, as_dict=True) for a in annotation_file.annotations ], - "annotations": [serialise_annotation(a, as_dict=True) for a in annotation_file.annotations], "is_video": annotation_file.is_video, "image_width": annotation_file.image_width, "image_height": annotation_file.image_height, @@ -133,7 +186,9 @@ def serialise_annotation_file(annotation_file: AnnotationFile, as_dict) -> Union return output_dict if as_dict else json_string -def serialise_annotation(annotation: Union[Annotation, VideoAnnotation], as_dict) -> Union[str, dict]: +def serialise_annotation( + annotation: Union[Annotation, VideoAnnotation], as_dict +) -> Union[str, dict]: if isinstance(annotation, VideoAnnotation): return serialise_video_annotation(annotation, as_dict=as_dict) elif isinstance(annotation, Annotation): @@ -157,7 +212,9 @@ def serialise_general_annotation(annotation: Annotation, as_dict) -> Union[str, return output_dict if as_dict else json_string -def serialise_video_annotation(video_annotation: VideoAnnotation, as_dict: bool = True) -> Union[str, dict]: +def serialise_video_annotation( + video_annotation: VideoAnnotation, as_dict: bool = True +) -> Union[str, dict]: data = video_annotation.get_data() output_dict = { "annotation_class": video_annotation.annotation_class.name, @@ -172,7 +229,9 @@ def serialise_video_annotation(video_annotation: VideoAnnotation, as_dict: bool return output_dict if as_dict else json_string -def serialise_annotation_class(annotation_class: AnnotationClass, as_dict: bool = True) -> Union[str, dict]: +def serialise_annotation_class( + annotation_class: AnnotationClass, as_dict: bool = True +) -> Union[str, dict]: output_dict = { "name": annotation_class.name, "annotation_type": annotation_class.annotation_type, @@ -182,7 +241,9 @@ def serialise_annotation_class(annotation_class: AnnotationClass, as_dict: bool return output_dict if as_dict else json_string -def serialise_sub_annotation(sub_annotation: SubAnnotation, as_dict: bool = True) -> Union[str, dict]: +def serialise_sub_annotation( + sub_annotation: SubAnnotation, as_dict: bool = True +) -> Union[str, dict]: output_dict = { "type": sub_annotation.annotation_type, "data": sub_annotation.data, @@ -197,7 +258,9 @@ def serialise_sub_annotation(sub_annotation: SubAnnotation, as_dict: bool = True if __name__ == "__main__": - args = argparse.ArgumentParser(description="Update the serialisation of AnnotationFile with the current version.") + args = argparse.ArgumentParser( + description="Update the serialisation of AnnotationFile with the current version." + ) input_json_string: str = """ { "data": [ @@ -222,6 +285,11 @@ def serialise_sub_annotation(sub_annotation: SubAnnotation, as_dict: bool = True annotation_file = annotation_files[0] output_json_string = serialise_annotation_file(annotation_file, as_dict=False) with open( - Path("tests") / "v7" / "v7-darwin-json-v1" / "nifti" / "vol0_annotation_file_multi_slot.json", "w" + Path("tests") + / "v7" + / "v7-darwin-json-v1" + / "nifti" + / "vol0_annotation_file_multi_slot.json", + "w", ) as f: f.write(output_json_string) diff --git a/tests/darwin/importer/formats/import_pascalvoc_test.py b/tests/darwin/importer/formats/import_pascalvoc_test.py index 7f8340e01..2c50dc24f 100644 --- a/tests/darwin/importer/formats/import_pascalvoc_test.py +++ b/tests/darwin/importer/formats/import_pascalvoc_test.py @@ -32,7 +32,9 @@ def test_raises_value_error_if_filename_tag_not_found(self, annotation_path: Pat assert str(info.value) == "Could not find filename element in annotation file" - def test_raises_value_error_if_filename_tag_has_empty_text(self, annotation_path: Path): + def test_raises_value_error_if_filename_tag_has_empty_text( + self, annotation_path: Path + ): annotation_path.write_text(" ") with pytest.raises(ValueError) as info: @@ -48,7 +50,9 @@ def test_raises_value_error_if_filename_is_empty(self, annotation_path: Path): assert str(info.value) == "filename element does not have a text value" - def test_returns_annotation_file_with_empty_annotations_otherwise(self, annotation_path: Path): + def test_returns_annotation_file_with_empty_annotations_otherwise( + self, annotation_path: Path + ): annotation_path.write_text("image.jpg") annotation_file = parse_path(annotation_path) @@ -61,7 +65,9 @@ def test_returns_annotation_file_with_empty_annotations_otherwise(self, annotati assert annotation_file.remote_path == "/" def test_raises_if_name_tag_not_found_in_object(self, annotation_path: Path): - annotation_path.write_text("image.jpg") + annotation_path.write_text( + "image.jpg" + ) with pytest.raises(ValueError) as info: parse_path(annotation_path) @@ -69,7 +75,9 @@ def test_raises_if_name_tag_not_found_in_object(self, annotation_path: Path): assert str(info.value) == "Could not find name element in annotation file" def test_raises_if_bndbox_tag_not_found_in_object(self, annotation_path: Path): - annotation_path.write_text("image.jpgClass") + annotation_path.write_text( + "image.jpgClass" + ) with pytest.raises(ValueError) as info: parse_path(annotation_path) @@ -116,7 +124,9 @@ def test_raises_if_ymax_tag_not_found_in_object(self, annotation_path: Path): assert str(info.value) == "Could not find ymax element in annotation file" - def test_returns_annotation_file_with_correct_annotations_otherwise(self, annotation_path: Path): + def test_returns_annotation_file_with_correct_annotations_otherwise( + self, annotation_path: Path + ): annotation_path.write_text( "image.jpgClass10101010" ) @@ -138,7 +148,9 @@ def test_returns_annotation_file_with_correct_annotations_otherwise(self, annota assert annotation_file.remote_path == "/" - def test_returns_annotation_file_with_correct_annotations_with_float_values(self, annotation_path: Path): + def test_returns_annotation_file_with_correct_annotations_with_float_values( + self, annotation_path: Path + ): annotation_path.write_text( "image.jpgClass10.010.010.010.0" ) diff --git a/tests/darwin/importer/formats/import_superannotate_test.py b/tests/darwin/importer/formats/import_superannotate_test.py index 781a8ac02..2b28ada40 100644 --- a/tests/darwin/importer/formats/import_superannotate_test.py +++ b/tests/darwin/importer/formats/import_superannotate_test.py @@ -53,7 +53,9 @@ def test_raises_if_folder_has_no_classes_file(self, annotations_file_path: Path) assert "Folder must contain a 'classes.json'" in str(error.value) - def test_returns_empty_file_if_there_are_no_annotations(self, annotations_file_path: Path, classes_file_path: Path): + def test_returns_empty_file_if_there_are_no_annotations( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [], @@ -76,7 +78,9 @@ def test_returns_empty_file_if_there_are_no_annotations(self, annotations_file_p remote_path="/", ) - def test_raises_if_annotation_has_no_type(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_annotation_has_no_type( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -100,7 +104,9 @@ def test_raises_if_annotation_has_no_type(self, annotations_file_path: Path, cla assert "'type' is a required property" in str(error.value) - def test_raises_if_annotation_has_no_class_id(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_annotation_has_no_class_id( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -125,7 +131,9 @@ def test_raises_if_annotation_has_no_class_id(self, annotations_file_path: Path, assert "'classId' is a required property" in str(error.value) - def test_raises_if_metadata_is_missing(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_metadata_is_missing( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -147,7 +155,9 @@ def test_raises_if_metadata_is_missing(self, annotations_file_path: Path, classe assert "'metadata' is a required property" in str(error.value) - def test_raises_if_metadata_is_missing_name(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_metadata_is_missing_name( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -169,7 +179,9 @@ def test_raises_if_metadata_is_missing_name(self, annotations_file_path: Path, c assert "'name' is a required property" in str(error.value) - def test_raises_if_point_has_missing_coordinate(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_point_has_missing_coordinate( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -194,7 +206,9 @@ def test_raises_if_point_has_missing_coordinate(self, annotations_file_path: Pat error_str = str(error.value) assert all(["point" in error_str, "ellipse" in error_str]) - def test_imports_point_vectors(self, annotations_file_path: Path, classes_file_path: Path): + def test_imports_point_vectors( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -222,13 +236,17 @@ def test_imports_point_vectors(self, annotations_file_path: Path, classes_file_p assert annotation_file.annotations - point_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + point_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_point(point_annotation, {"x": 1.93, "y": 0.233}) annotation_class = point_annotation.annotation_class assert_annotation_class(annotation_class, "Person-point", "keypoint") - def test_raises_if_ellipse_has_missing_coordinate(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_ellipse_has_missing_coordinate( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -248,7 +266,9 @@ def test_raises_if_ellipse_has_missing_coordinate(self, annotations_file_path: P error_str = str(error.value) assert all(["ellipse" in error_str, "point" in error_str]) - def test_imports_ellipse_vectors(self, annotations_file_path: Path, classes_file_path: Path): + def test_imports_ellipse_vectors( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -285,15 +305,24 @@ def test_imports_ellipse_vectors(self, annotations_file_path: Path, classes_file assert annotation_file.annotations - ellipse_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + ellipse_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_ellipse( - ellipse_annotation, {"angle": 0, "center": {"x": 922.1, "y": 475.8}, "radius": {"x": 205.4, "y": 275.7}} + ellipse_annotation, + { + "angle": 0, + "center": {"x": 922.1, "y": 475.8}, + "radius": {"x": 205.4, "y": 275.7}, + }, ) annotation_class = ellipse_annotation.annotation_class assert_annotation_class(annotation_class, "Person-ellipse", "ellipse") - def test_raises_if_cuboid_has_missing_point(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_cuboid_has_missing_point( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -320,7 +349,9 @@ def test_raises_if_cuboid_has_missing_point(self, annotations_file_path: Path, c error_str = str(error.value) assert all(["cuboid" in error_str, "point" in error_str]) - def test_imports_cuboid_vectors(self, annotations_file_path: Path, classes_file_path: Path): + def test_imports_cuboid_vectors( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -358,7 +389,9 @@ def test_imports_cuboid_vectors(self, annotations_file_path: Path, classes_file_ assert annotation_file.annotations - cuboid_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + cuboid_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_cuboid( cuboid_annotation, { @@ -370,7 +403,9 @@ def test_imports_cuboid_vectors(self, annotations_file_path: Path, classes_file_ annotation_class = cuboid_annotation.annotation_class assert_annotation_class(annotation_class, "Person-cuboid", "cuboid") - def test_raises_if_polygon_has_missing_points(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_polygon_has_missing_points( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -395,7 +430,9 @@ def test_raises_if_polygon_has_missing_points(self, annotations_file_path: Path, error_str = str(error.value) assert all(["polygon" in error_str, "point" in error_str]) - def test_imports_polygon_vectors(self, annotations_file_path: Path, classes_file_path: Path): + def test_imports_polygon_vectors( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -428,16 +465,24 @@ def test_imports_polygon_vectors(self, annotations_file_path: Path, classes_file assert annotation_file.annotations - polygon_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + polygon_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_polygon( polygon_annotation, - [{"x": 1053, "y": 587.2}, {"x": 1053.1, "y": 586}, {"x": 1053.8, "y": 585.4}], + [ + {"x": 1053, "y": 587.2}, + {"x": 1053.1, "y": 586}, + {"x": 1053.8, "y": 585.4}, + ], ) annotation_class = polygon_annotation.annotation_class assert_annotation_class(annotation_class, "Person-polygon", "polygon") - def test_raises_if_polyline_has_missing_points(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_polyline_has_missing_points( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -462,7 +507,9 @@ def test_raises_if_polyline_has_missing_points(self, annotations_file_path: Path error_str = str(error.value) assert all(["polyline" in error_str, "point" in error_str]) - def test_imports_polyline_vectors(self, annotations_file_path: Path, classes_file_path: Path): + def test_imports_polyline_vectors( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -495,16 +542,24 @@ def test_imports_polyline_vectors(self, annotations_file_path: Path, classes_fil assert annotation_file.annotations - line_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + line_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_line( line_annotation, - [{"x": 1053, "y": 587.2}, {"x": 1053.1, "y": 586}, {"x": 1053.8, "y": 585.4}], + [ + {"x": 1053, "y": 587.2}, + {"x": 1053.1, "y": 586}, + {"x": 1053.8, "y": 585.4}, + ], ) annotation_class = line_annotation.annotation_class assert_annotation_class(annotation_class, "Person-polyline", "line") - def test_raises_if_bbox_has_missing_points(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_bbox_has_missing_points( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -528,7 +583,9 @@ def test_raises_if_bbox_has_missing_points(self, annotations_file_path: Path, cl error_str = str(error.value) assert all(["bbox" in error_str, "point" in error_str]) - def test_imports_bbox_vectors(self, annotations_file_path: Path, classes_file_path: Path): + def test_imports_bbox_vectors( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -561,13 +618,17 @@ def test_imports_bbox_vectors(self, annotations_file_path: Path, classes_file_pa assert annotation_file.annotations - bbox_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + bbox_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_bbox(bbox_annotation, 1642.9, 516.5, 217.5, 277.1) annotation_class = bbox_annotation.annotation_class assert_annotation_class(annotation_class, "Person-bbox", "bounding_box") - def test_raises_if_an_attributes_is_missing(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_an_attributes_is_missing( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -608,7 +669,9 @@ def test_raises_if_an_attributes_is_missing(self, annotations_file_path: Path, c error_str = str(error.value) assert all(["type" in error_str, "bbox" in error_str]) - def test_raises_if_an_attribute_from_a_group_is_missing(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_an_attribute_from_a_group_is_missing( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -647,9 +710,13 @@ def test_raises_if_an_attribute_from_a_group_is_missing(self, annotations_file_p with pytest.raises(ValueError) as error: parse_path(annotations_file_path) - assert "No attribute data found for {'id': 2, 'groupId': 1}." in str(error.value) + assert "No attribute data found for {'id': 2, 'groupId': 1}." in str( + error.value + ) - def test_imports_attributes(self, annotations_file_path: Path, classes_file_path: Path): + def test_imports_attributes( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -704,15 +771,22 @@ def test_imports_attributes(self, annotations_file_path: Path, classes_file_path assert annotation_file.annotations - bbox_annotation: Annotation = cast(Annotation, annotation_file.annotations.pop()) + bbox_annotation: Annotation = cast( + Annotation, annotation_file.annotations.pop() + ) assert_bbox(bbox_annotation, 1642.9, 516.5, 217.5, 277.1) annotation_class = bbox_annotation.annotation_class assert_annotation_class(annotation_class, "Person-bbox", "bounding_box") - assert_subannotations(bbox_annotation.subs, [SubAnnotation("attributes", ["Sex:Female", "Emotion:Smiling"])]) + assert_subannotations( + bbox_annotation.subs, + [SubAnnotation("attributes", ["Sex:Female", "Emotion:Smiling"])], + ) - def test_raises_if_tags_is_missing(self, annotations_file_path: Path, classes_file_path: Path): + def test_raises_if_tags_is_missing( + self, annotations_file_path: Path, classes_file_path: Path + ): annotations_json: str = """ { "instances": [ @@ -855,7 +929,10 @@ def assert_line(annotation: Annotation, line: List[Point]) -> None: def assert_annotation_class( - annotation_class: AnnotationClass, name: str, type: str, internal_type: Optional[str] = None + annotation_class: AnnotationClass, + name: str, + type: str, + internal_type: Optional[str] = None, ) -> None: assert annotation_class assert annotation_class.name == name @@ -863,7 +940,9 @@ def assert_annotation_class( assert annotation_class.annotation_internal_type == internal_type -def assert_subannotations(actual_subs: List[SubAnnotation], expected_subs: List[SubAnnotation]) -> None: +def assert_subannotations( + actual_subs: List[SubAnnotation], expected_subs: List[SubAnnotation] +) -> None: assert actual_subs for actual_sub in actual_subs: for expected_sub in expected_subs: diff --git a/tests/darwin/importer/importer_mcpu_test.py b/tests/darwin/importer/importer_mcpu_test.py index 00c4b2a7a..ce7528325 100644 --- a/tests/darwin/importer/importer_mcpu_test.py +++ b/tests/darwin/importer/importer_mcpu_test.py @@ -24,7 +24,9 @@ def setUp(self) -> None: def tearDown(self) -> None: return super().tearDown() - def test_get_multi_cpu_settings__disables_multiprocessing_if_either_core_count_or_core_limit_is_one(self) -> None: + def test_get_multi_cpu_settings__disables_multiprocessing_if_either_core_count_or_core_limit_is_one( + self, + ) -> None: from darwin.importer.importer import _get_multi_cpu_settings as gmcus res_1 = gmcus(None, 1, True) @@ -32,13 +34,17 @@ def test_get_multi_cpu_settings__disables_multiprocessing_if_either_core_count_o self.assertEqual(res_1, (1, False)) self.assertEqual(res_2, (1, False)) - def test_get_multi_cpu_settings__sets_cpu_count_to_cpu_count_minus_two_if_omitted(self) -> None: + def test_get_multi_cpu_settings__sets_cpu_count_to_cpu_count_minus_two_if_omitted( + self, + ) -> None: from darwin.importer.importer import _get_multi_cpu_settings as gmcus cpu_limit, _ = gmcus(None, 768, True) self.assertEqual(cpu_limit, 766) - def test_get_multi_cpu_settings__sets_cpu_count_to_cpu_count_if_greater_that_total_available_passed(self) -> None: + def test_get_multi_cpu_settings__sets_cpu_count_to_cpu_count_if_greater_that_total_available_passed( + self, + ) -> None: from darwin.importer.importer import _get_multi_cpu_settings as gmcus cpu_limit, _ = gmcus(900, 768, True) @@ -56,9 +62,15 @@ def test_get_files_for_parsing_dir_handling(self) -> None: with patch.object(Path, "is_dir") as mock_is_dir: with patch.object(Path, "glob") as mock_glob: mock_is_dir.return_value = True - mock_glob.return_value = [Path("example_dir/file1.txt"), Path("example_dir/file2.txt")] + mock_glob.return_value = [ + Path("example_dir/file1.txt"), + Path("example_dir/file2.txt"), + ] result = _get_files_for_parsing([Path("example_dir")]) - assert result == [Path("example_dir/file1.txt"), Path("example_dir/file2.txt")] + assert result == [ + Path("example_dir/file1.txt"), + Path("example_dir/file2.txt"), + ] def test_get_files_for_parsing_single_file(self) -> None: with patch.object(Path, "is_dir") as mock_is_dir: @@ -87,11 +99,13 @@ def tearDown(self) -> None: def test_uses_mpire_if_use_multi_cpu_true( self, mock_wp: MagicMock, mock_gffp: MagicMock, mock_gmcus: MagicMock ) -> None: - from darwin.importer.importer import find_and_parse mock_gmcus.return_value = (2, True) - mock_gffp.return_value = [Path("example_dir/file1.txt"), Path("example_dir/file2.txt")] + mock_gffp.return_value = [ + Path("example_dir/file1.txt"), + Path("example_dir/file2.txt"), + ] mock_importer = MagicMock() mock_map = MagicMock() @@ -109,7 +123,9 @@ def __exit__(self, *args) -> None: # type: ignore mock_wp.return_value = MockWorkerPool() mock_map.return_value = ["1", "2"] - result = find_and_parse(mock_importer, [Path("example_dir")], self.mock_console, True, 2) + result = find_and_parse( + mock_importer, [Path("example_dir")], self.mock_console, True, 2 + ) mock_wp.assert_called_once() mock_wp.assert_called_with(2) @@ -119,16 +135,22 @@ def __exit__(self, *args) -> None: # type: ignore @patch("darwin.importer.importer._get_files_for_parsing") @patch("darwin.importer.importer.WorkerPool") - def test_runs_single_threaded_if_use_multi_cpu_false(self, mock_wp: MagicMock, mock_gffp: MagicMock) -> None: - + def test_runs_single_threaded_if_use_multi_cpu_false( + self, mock_wp: MagicMock, mock_gffp: MagicMock + ) -> None: from darwin.importer.importer import find_and_parse - mock_gffp.return_value = [Path("example_dir/file1.txt"), Path("example_dir/file2.txt")] + mock_gffp.return_value = [ + Path("example_dir/file1.txt"), + Path("example_dir/file2.txt"), + ] mock_importer = MagicMock() mock_importer.side_effect = ["1", "2"] - result = find_and_parse(mock_importer, [Path("example_dir")], self.mock_console, False) + result = find_and_parse( + mock_importer, [Path("example_dir")], self.mock_console, False + ) mock_wp.assert_not_called() mock_importer.assert_called() @@ -138,12 +160,16 @@ def test_runs_single_threaded_if_use_multi_cpu_false(self, mock_wp: MagicMock, m @patch("darwin.importer.importer._get_multi_cpu_settings") @patch("darwin.importer.importer._get_files_for_parsing") @patch("darwin.importer.importer.WorkerPool") - def test_returns_list_if_solo_value(self, mock_wp: MagicMock, mock_gffp: MagicMock, mock_gmcus: MagicMock) -> None: - + def test_returns_list_if_solo_value( + self, mock_wp: MagicMock, mock_gffp: MagicMock, mock_gmcus: MagicMock + ) -> None: from darwin.importer.importer import find_and_parse mock_gmcus.return_value = (2, True) - mock_gffp.return_value = [Path("example_dir/file1.txt"), Path("example_dir/file2.txt")] + mock_gffp.return_value = [ + Path("example_dir/file1.txt"), + Path("example_dir/file2.txt"), + ] mock_importer = MagicMock() mock_map = MagicMock() @@ -161,7 +187,9 @@ def __exit__(self, *args) -> None: # type: ignore mock_wp.return_value = MockWorkerPool() mock_map.return_value = "1" - result = find_and_parse(mock_importer, [Path("example_dir")], self.mock_console, True, 2) + result = find_and_parse( + mock_importer, [Path("example_dir")], self.mock_console, True, 2 + ) mock_wp.assert_called_once() mock_wp.assert_called_with(2) @@ -175,11 +203,13 @@ def __exit__(self, *args) -> None: # type: ignore def test_returns_none_if_pool_raises_error( self, mock_wp: MagicMock, mock_gffp: MagicMock, mock_gmcus: MagicMock ) -> None: - from darwin.importer.importer import find_and_parse mock_gmcus.return_value = (2, True) - mock_gffp.return_value = [Path("example_dir/file1.txt"), Path("example_dir/file2.txt")] + mock_gffp.return_value = [ + Path("example_dir/file1.txt"), + Path("example_dir/file2.txt"), + ] mock_importer = MagicMock() mock_map = MagicMock() @@ -197,7 +227,9 @@ def __exit__(self, *args) -> None: # type: ignore mock_wp.return_value = MockWorkerPool() mock_map.side_effect = Exception("Test") - result = find_and_parse(mock_importer, [Path("example_dir")], self.mock_console, True, 2) + result = find_and_parse( + mock_importer, [Path("example_dir")], self.mock_console, True, 2 + ) mock_wp.assert_called_once() mock_wp.assert_called_with(2) diff --git a/tests/darwin/importer/importer_test.py b/tests/darwin/importer/importer_test.py index c68e928ce..240ffb28a 100644 --- a/tests/darwin/importer/importer_test.py +++ b/tests/darwin/importer/importer_test.py @@ -106,13 +106,22 @@ def test_handle_subs() -> None: def test__handle_complex_polygon() -> None: from darwin.importer.importer import _handle_complex_polygon - assert _handle_complex_polygon({}, {"example": "data", "example2": "data2", "example3": "data3",},) == { # type: ignore + assert _handle_complex_polygon( + {}, + { + "example": "data", + "example2": "data2", + "example3": "data3", + }, + ) == { # type: ignore "example": "data", "example2": "data2", "example3": "data3", } assert _handle_complex_polygon( - dt.Annotation(dt.AnnotationClass("Class", "bbox"), {"paths": [1, 2, 3, 4, 5]}, [], []), + dt.Annotation( + dt.AnnotationClass("Class", "bbox"), {"paths": [1, 2, 3, 4, 5]}, [], [] + ), {"complex_polygon": "test_data"}, ) == { "polygon": {"path": 1, "additional_paths": [2, 3, 4, 5]}, @@ -127,12 +136,16 @@ def test__annotators_or_reviewers_to_payload() -> None: dt.AnnotationAuthor("Jane Doe", "jane@doe.com"), ] - assert _annotators_or_reviewers_to_payload(authors, dt.AnnotationAuthorRole.ANNOTATOR) == [ + assert _annotators_or_reviewers_to_payload( + authors, dt.AnnotationAuthorRole.ANNOTATOR + ) == [ {"email": "john@doe.com", "role": "annotator"}, {"email": "jane@doe.com", "role": "annotator"}, ] - assert _annotators_or_reviewers_to_payload(authors, dt.AnnotationAuthorRole.REVIEWER) == [ + assert _annotators_or_reviewers_to_payload( + authors, dt.AnnotationAuthorRole.REVIEWER + ) == [ {"email": "john@doe.com", "role": "reviewer"}, {"email": "jane@doe.com", "role": "reviewer"}, ] @@ -169,11 +182,15 @@ def test__get_annotation_data() -> None: video_annotation_class = dt.AnnotationClass("video_class", "video") annotation = dt.Annotation(annotation_class, {}, [], []) - video_annotation = dt.VideoAnnotation(video_annotation_class, dict(), dict(), [], False) + video_annotation = dt.VideoAnnotation( + video_annotation_class, dict(), dict(), [], False + ) annotation.data = "TEST DATA" - with patch_factory("_handle_complex_polygon") as mock_hcp, patch_factory("_handle_subs") as mock_hs, patch.object( + with patch_factory("_handle_complex_polygon") as mock_hcp, patch_factory( + "_handle_subs" + ) as mock_hs, patch.object( dt.VideoAnnotation, "get_data", return_value="TEST VIDEO DATA" ): from darwin.importer.importer import _get_annotation_data @@ -181,32 +198,46 @@ def test__get_annotation_data() -> None: mock_hcp.return_value = "TEST DATA_HCP" mock_hs.return_value = "TEST DATA_HS" - assert _get_annotation_data(video_annotation, "video_class_id", {}) == "TEST VIDEO DATA" + assert ( + _get_annotation_data(video_annotation, "video_class_id", {}) + == "TEST VIDEO DATA" + ) assert _get_annotation_data(annotation, "class_id", {}) == "TEST DATA_HS" assert mock_hcp.call_count == 1 assert mock_hs.call_count == 1 - with patch_factory("_handle_complex_polygon") as mock_hcp, patch_factory("_handle_subs") as mock_hs: + with patch_factory("_handle_complex_polygon") as mock_hcp, patch_factory( + "_handle_subs" + ) as mock_hs: from darwin.importer.importer import _get_annotation_data mock_hs.return_value = {"TEST_TYPE": "TEST DATA"} - assert _get_annotation_data(annotation, "class_id", {}) == {"TEST_TYPE": "TEST DATA"} + assert _get_annotation_data(annotation, "class_id", {}) == { + "TEST_TYPE": "TEST DATA" + } assert mock_hcp.call_args_list[0][0][0] == annotation assert mock_hcp.call_args_list[0][0][1] == {"TEST_TYPE": "TEST DATA"} def __expectation_factory(i: int, slot_names: List[str]) -> dt.Annotation: - annotation = dt.Annotation(dt.AnnotationClass(f"class_{i}", f"TEST_TYPE_{i}"), {}, [], []) + annotation = dt.Annotation( + dt.AnnotationClass(f"class_{i}", f"TEST_TYPE_{i}"), {}, [], [] + ) annotation.slot_names.extend(slot_names) return annotation expectations_hsr: List[Tuple[dt.Annotation, int, str, dt.Annotation]] = [ - (__expectation_factory(0, []), 1, "default_slot_name", __expectation_factory(0, [])), + ( + __expectation_factory(0, []), + 1, + "default_slot_name", + __expectation_factory(0, []), + ), ( __expectation_factory(1, ["slot", "names"]), 1, @@ -219,13 +250,23 @@ def __expectation_factory(i: int, slot_names: List[str]) -> dt.Annotation: "default_slot_name", __expectation_factory(2, ["default_slot_name"]), ), - (__expectation_factory(3, ["slot", "names"]), 2, "default_slot_name", __expectation_factory(3, ["slot", "names"])), + ( + __expectation_factory(3, ["slot", "names"]), + 2, + "default_slot_name", + __expectation_factory(3, ["slot", "names"]), + ), ] -@pytest.mark.parametrize("annotation, version, default_slot_name, expected", expectations_hsr) +@pytest.mark.parametrize( + "annotation, version, default_slot_name, expected", expectations_hsr +) def test__handle_slot_names( - annotation: dt.Annotation, version: int, default_slot_name: str, expected: dt.Annotation + annotation: dt.Annotation, + version: int, + default_slot_name: str, + expected: dt.Annotation, ) -> None: from darwin.importer.importer import _handle_slot_names @@ -243,7 +284,6 @@ def test_get_overwrite_value() -> None: def test__import_annotations() -> None: - with patch_factory("_handle_complex_polygon") as mock_hcp, patch_factory( "_handle_reviewers" ) as mock_hr, patch_factory("_handle_annotators") as mock_ha, patch_factory( @@ -272,10 +312,15 @@ def test__import_annotations() -> None: mock_gov.return_value = "test_append_out" mock_hs.return_value = "test_sub" mock_hsn.return_value = dt.Annotation( - dt.AnnotationClass("test_class", "bbox"), {"paths": [1, 2, 3, 4, 5]}, [], ["test_slot_name"] + dt.AnnotationClass("test_class", "bbox"), + {"paths": [1, 2, 3, 4, 5]}, + [], + ["test_slot_name"], ) - annotation = dt.Annotation(dt.AnnotationClass("test_class", "bbox"), {"paths": [1, 2, 3, 4, 5]}, [], []) + annotation = dt.Annotation( + dt.AnnotationClass("test_class", "bbox"), {"paths": [1, 2, 3, 4, 5]}, [], [] + ) _import_annotations( mock_client, diff --git a/tests/darwin/item_sorter_test.py b/tests/darwin/item_sorter_test.py index e910f147c..5620b8e66 100644 --- a/tests/darwin/item_sorter_test.py +++ b/tests/darwin/item_sorter_test.py @@ -65,4 +65,7 @@ def test_raises_when_direction_is_invalid(self): with pytest.raises(ValueError) as error: SortDirection.parse(direction) - assert f"Invalid direction '{direction}', use 'asc' or 'ascending', 'desc' or 'descending'." in str(error.value) + assert ( + f"Invalid direction '{direction}', use 'asc' or 'ascending', 'desc' or 'descending'." + in str(error.value) + ) diff --git a/tests/darwin/path_utils_test.py b/tests/darwin/path_utils_test.py index 6e276ad92..04b6b002a 100644 --- a/tests/darwin/path_utils_test.py +++ b/tests/darwin/path_utils_test.py @@ -12,8 +12,14 @@ def test_path_construction(): assert "/file.name" == (PurePosixPath("/") / "/file.name").as_posix() # note; this is not in /one path assert "/file.name" == (PurePosixPath("/one") / "/file.name").as_posix() - assert "/one/two/file.name" == (PurePosixPath("/") / "one/two/" / "file.name").as_posix() - assert "/one/two/file.name" == (PurePosixPath("/") / "/one/two/" / "file.name").as_posix() + assert ( + "/one/two/file.name" + == (PurePosixPath("/") / "one/two/" / "file.name").as_posix() + ) + assert ( + "/one/two/file.name" + == (PurePosixPath("/") / "/one/two/" / "file.name").as_posix() + ) assert "onlyfile.name" == construct_full_path(None, "onlyfile.name") assert "/file.name" == construct_full_path("/", "file.name") diff --git a/tests/darwin/torch/utils_test.py b/tests/darwin/torch/utils_test.py index 1be480569..28fc2c280 100644 --- a/tests/darwin/torch/utils_test.py +++ b/tests/darwin/torch/utils_test.py @@ -35,7 +35,9 @@ def multiple_overlap_masks() -> Tuple[torch.Tensor, List[int]]: class TestFlattenMasks: - def test_should_raise_with_incorrect_shaped_inputs(self, basic_masks_with_cats: Tuple) -> None: + def test_should_raise_with_incorrect_shaped_inputs( + self, basic_masks_with_cats: Tuple + ) -> None: masks, _ = basic_masks_with_cats cats = [0] with pytest.raises(AssertionError) as error: @@ -52,12 +54,16 @@ def test_should_correctly_set_overlap(self, basic_masks_with_cats: Tuple) -> Non assert torch.equal(unique, expected_unique) assert torch.equal(counts, expected_counts) - def test_should_handle_fully_masked_image(self, multiple_overlap_masks: Tuple) -> None: + def test_should_handle_fully_masked_image( + self, multiple_overlap_masks: Tuple + ) -> None: masks, cats = multiple_overlap_masks flattened: torch.Tensor = flatten_masks_by_category(masks, cats) assert 0 not in np.unique(flattened) - def test_should_handle_multiple_overlaps(self, multiple_overlap_masks: Tuple) -> None: + def test_should_handle_multiple_overlaps( + self, multiple_overlap_masks: Tuple + ) -> None: masks, cats = multiple_overlap_masks flattened: torch.Tensor = flatten_masks_by_category(masks, cats) unique, counts = flattened.unique(return_counts=True) @@ -69,23 +75,32 @@ def test_should_handle_multiple_overlaps(self, multiple_overlap_masks: Tuple) -> assert torch.equal(unique, expected_unique) assert torch.equal(counts, expected_counts) + class TestClampBboxToImageSize: def test_clamp_bbox_xyxy(self): - annotations = {'boxes': torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 25.0, 25.0]])} + annotations = { + "boxes": torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 25.0, 25.0]]) + } width = 20 height = 20 - - clamped_annotations = clamp_bbox_to_image_size(annotations, width, height, format="xyxy") + + clamped_annotations = clamp_bbox_to_image_size( + annotations, width, height, format="xyxy" + ) expected_boxes = torch.tensor([[5.0, 5.0, 15.0, 15.0], [0.0, 0.0, 19.0, 19.0]]) - - assert torch.equal(clamped_annotations['boxes'], expected_boxes) + + assert torch.equal(clamped_annotations["boxes"], expected_boxes) def test_clamp_bbox_xywh(self): - annotations = {'boxes': torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 30.0, 30.0]])} + annotations = { + "boxes": torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 30.0, 30.0]]) + } width = 20 height = 20 - - clamped_annotations = clamp_bbox_to_image_size(annotations, width, height, format="xywh") + + clamped_annotations = clamp_bbox_to_image_size( + annotations, width, height, format="xywh" + ) expected_boxes = torch.tensor([[5.0, 5.0, 14.0, 14.0], [0.0, 0.0, 19.0, 19.0]]) - - assert torch.equal(clamped_annotations['boxes'], expected_boxes) + + assert torch.equal(clamped_annotations["boxes"], expected_boxes) diff --git a/tests/darwin/utils/find_files_test.py b/tests/darwin/utils/find_files_test.py index eea229399..184ae9925 100644 --- a/tests/darwin/utils/find_files_test.py +++ b/tests/darwin/utils/find_files_test.py @@ -20,7 +20,9 @@ class FindFileTestCase(TestCase): "/testdir/testdir2.invalidextension", ] fake_supported_files = [f"testdir/testfile{ext}" for ext in SUPPORTED_EXTENSIONS] - fake_supported_files_varied_case = [f"testdir/testdir2/testfile{ext.upper()}" for ext in SUPPORTED_EXTENSIONS] + fake_supported_files_varied_case = [ + f"testdir/testdir2/testfile{ext.upper()}" for ext in SUPPORTED_EXTENSIONS + ] fake_files = [ "testdir/testdir2/testfile.png", "testdir/testdir2/testfile2.png", @@ -46,7 +48,9 @@ def test_find_files_returns_a_list_of_files(self, mock_is_extension_allowed): [self.assertIsInstance(file, Path) for file in output] @patch("darwin.utils.is_extension_allowed_by_filename", return_value=True) - def test_find_files_excludes_files_in_excluded_list(self, mock_is_extension_allowed): + def test_find_files_excludes_files_in_excluded_list( + self, mock_is_extension_allowed + ): output = find_files( self.fake_files, files_to_exclude=[ @@ -76,12 +80,18 @@ def test_uses_correct_glob_if_recursive(self, mock_is_extension_allowed): self.assertEqual(mock_glob.call_args[0][0], "**/*") @patch("darwin.utils.is_extension_allowed_by_filename") - def test_glob_results_in_correct_call_to_is_extension_allowed_by_filename(self, mock_is_extension_allowed): + def test_glob_results_in_correct_call_to_is_extension_allowed_by_filename( + self, mock_is_extension_allowed + ): mock_is_extension_allowed.return_value = True with patch("darwin.utils.Path.is_dir") as mock_is_dir: with patch("darwin.utils.Path.glob") as mock_glob: mock_is_dir.return_value = True - mock_glob.return_value = [Path("1.png"), Path("1/b/c/2.png"), Path("1/b/c/3.png")] + mock_glob.return_value = [ + Path("1.png"), + Path("1/b/c/2.png"), + Path("1/b/c/3.png"), + ] result = find_files(["1"], files_to_exclude=[], recursive=True) @@ -128,32 +138,47 @@ def dependency_factory(self) -> Dependencies: return self.Dependencies(ieabf=ieabf, iveabf=iveabf, iieabf=iieabf) def test_ieabf_returns_true_for_a_valid_extension(self): - valid_extensions = [*self.fake_supported_files, *self.fake_supported_files_varied_case] + valid_extensions = [ + *self.fake_supported_files, + *self.fake_supported_files_varied_case, + ] results = [self.dependency_factory().ieabf(file) for file in valid_extensions] self.assertTrue(all(results)) def test_ieabf_returns_false_for_an_invalid_extension(self): - results = [self.dependency_factory().ieabf(file) for file in self.fake_invalid_files] + results = [ + self.dependency_factory().ieabf(file) for file in self.fake_invalid_files + ] self.assertFalse(all(results)) def test_iveabf_returns_true_for_a_valid_extension(self): - results = [self.dependency_factory().iveabf(file) for file in SUPPORTED_VIDEO_EXTENSIONS] + results = [ + self.dependency_factory().iveabf(file) + for file in SUPPORTED_VIDEO_EXTENSIONS + ] self.assertTrue(all(results)) def test_iveabf_returns_false_for_an_invalid_extension(self): - results = [self.dependency_factory().iveabf(file) for file in self.fake_invalid_files] + results = [ + self.dependency_factory().iveabf(file) for file in self.fake_invalid_files + ] self.assertFalse(all(results)) def test_iieabf_returns_true_for_a_valid_extension(self): - results = [self.dependency_factory().iieabf(file) for file in SUPPORTED_IMAGE_EXTENSIONS] + results = [ + self.dependency_factory().iieabf(file) + for file in SUPPORTED_IMAGE_EXTENSIONS + ] self.assertTrue(all(results)) def test_iieabf_returns_false_for_an_invalid_extension(self): - results = [self.dependency_factory().iieabf(file) for file in self.fake_invalid_files] + results = [ + self.dependency_factory().iieabf(file) for file in self.fake_invalid_files + ] self.assertFalse(all(results)) diff --git a/tests/darwin/utils_test.py b/tests/darwin/utils_test.py index deecae7f1..aef7be2f3 100644 --- a/tests/darwin/utils_test.py +++ b/tests/darwin/utils_test.py @@ -149,7 +149,9 @@ def test_parses_darwin_images_correctly(self, tmp_path): assert annotation_file.path == import_file assert annotation_file.filename == "P49-RediPad-ProPlayLEFTY_442.jpg" assert annotation_file.dataset_name == None - assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="") + assert annotation_file.version == dt.AnnotationFileVersion( + major=1, minor=0, suffix="" + ) assert len(annotation_file.annotations) == 2 assert len(annotation_file.annotation_classes) == 2 @@ -236,34 +238,59 @@ def test_parses_darwin_videos_correctly(self, tmp_path): assert annotation_file.path == import_file assert annotation_file.filename == "above tractor.mp4" assert annotation_file.dataset_name == None - assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="") + assert annotation_file.version == dt.AnnotationFileVersion( + major=1, minor=0, suffix="" + ) assert len(annotation_file.annotations) == 1 assert len(annotation_file.annotation_classes) == 1 assert annotation_file.is_video assert annotation_file.image_width == 3840 assert annotation_file.image_height == 2160 - assert annotation_file.image_url == "https://my-website.com/api/videos/209/original" - assert annotation_file.workview_url == "https://my-website.com/workview?dataset=102&image=530" + assert ( + annotation_file.image_url + == "https://my-website.com/api/videos/209/original" + ) + assert ( + annotation_file.workview_url + == "https://my-website.com/workview?dataset=102&image=530" + ) assert not annotation_file.seq - assert annotation_file.frame_urls == ["https://my-website.com/api/videos/209/frames/0"] + assert annotation_file.frame_urls == [ + "https://my-website.com/api/videos/209/frames/0" + ] assert annotation_file.remote_path == "/" assert annotation_file.annotations == [ dt.VideoAnnotation( annotation_class=dt.AnnotationClass( - name="Hand", annotation_type="polygon", annotation_internal_type=None + name="Hand", + annotation_type="polygon", + annotation_internal_type=None, ), frames={ 3: dt.Annotation( annotation_class=dt.AnnotationClass( - name="Hand", annotation_type="polygon", annotation_internal_type=None + name="Hand", + annotation_type="polygon", + annotation_internal_type=None, ), data={ - "path": [{"x": 748.0, "y": 732.0}, {"x": 751.0, "y": 735.0}, {"x": 748.0, "y": 733.0}], - "bounding_box": {"x": 363.0, "y": 701.0, "w": 400.0, "h": 547.0}, + "path": [ + {"x": 748.0, "y": 732.0}, + {"x": 751.0, "y": 735.0}, + {"x": 748.0, "y": 733.0}, + ], + "bounding_box": { + "x": 363.0, + "y": 701.0, + "w": 400.0, + "h": 547.0, + }, }, - subs=[dt.SubAnnotation(annotation_type="instance_id", data=119)], + subs=[ + dt.SubAnnotation(annotation_type="instance_id", data=119) + ], ) }, keyframes={3: True}, @@ -356,11 +383,15 @@ def test_parses_darwin_v2_images_correctly(self, tmp_path): assert annotation_file.filename == "item-0.jpg" assert annotation_file.dataset_name == "Dataset 0" assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e" - assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="") + assert annotation_file.version == dt.AnnotationFileVersion( + major=2, minor=0, suffix="" + ) assert len(annotation_file.annotations) == 1 assert len(annotation_file.annotation_classes) == 1 - assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7" + assert ( + annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7" + ) assert not annotation_file.is_video assert annotation_file.image_width == 123 assert annotation_file.image_height == 456 @@ -463,13 +494,20 @@ def test_parses_darwin_v2_videos_correctly(self, tmp_path): assert annotation_file.filename == "item-0.mp4" assert annotation_file.dataset_name == "Dataset 0" assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e" - assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="") + assert annotation_file.version == dt.AnnotationFileVersion( + major=2, minor=0, suffix="" + ) assert len(annotation_file.annotations) == 1 assert len(annotation_file.annotation_classes) == 1 - assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7" + assert ( + annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7" + ) assert list(annotation_file.annotations[0].frames.keys()) == [3] - assert annotation_file.annotations[0].frames[3].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7" + assert ( + annotation_file.annotations[0].frames[3].id + == "f8f5f235-bd47-47be-b4fe-07d49e0177a7" + ) assert annotation_file.is_video assert annotation_file.image_width == 123 assert annotation_file.image_height == 456 @@ -599,8 +637,13 @@ def test_imports_a_skeleton(self, tmp_path): annotation_file: dt.AnnotationFile = parse_darwin_json(import_file, None) - assert annotation_file.annotations[0].annotation_class.annotation_type == "polygon" - assert annotation_file.annotations[1].annotation_class.annotation_type == "skeleton" + assert ( + annotation_file.annotations[0].annotation_class.annotation_type == "polygon" + ) + assert ( + annotation_file.annotations[1].annotation_class.annotation_type + == "skeleton" + ) def test_imports_multiple_skeletetons(self, tmp_path): content = """ @@ -696,9 +739,17 @@ def test_imports_multiple_skeletetons(self, tmp_path): annotation_file: dt.AnnotationFile = parse_darwin_json(import_file, None) - assert annotation_file.annotations[0].annotation_class.annotation_type == "polygon" - assert annotation_file.annotations[1].annotation_class.annotation_type == "skeleton" - assert annotation_file.annotations[2].annotation_class.annotation_type == "skeleton" + assert ( + annotation_file.annotations[0].annotation_class.annotation_type == "polygon" + ) + assert ( + annotation_file.annotations[1].annotation_class.annotation_type + == "skeleton" + ) + assert ( + annotation_file.annotations[2].annotation_class.annotation_type + == "skeleton" + ) def test_returns_true_w_json_content_type(self): response: Response = Response() @@ -737,7 +788,9 @@ def good_raster_annotation(self) -> dt.JSONFreeForm: "slot_names": ["0"], } - def test_parses_a_raster_annotation(self, good_raster_annotation: dt.JSONFreeForm) -> None: + def test_parses_a_raster_annotation( + self, good_raster_annotation: dt.JSONFreeForm + ) -> None: annotation = _parse_darwin_raster_annotation(good_raster_annotation) assert annotation is not None @@ -761,7 +814,9 @@ def test_raises_value_error_for_missing_top_level_fields( with pytest.raises(ValueError): _parse_darwin_raster_annotation(annotation) - @pytest.mark.parametrize("parameter_name", ["dense_rle", "mask_annotation_ids_mapping", "total_pixels"]) + @pytest.mark.parametrize( + "parameter_name", ["dense_rle", "mask_annotation_ids_mapping", "total_pixels"] + ) def test_raises_value_error_for_missing_raster_layer_fields( self, good_raster_annotation: dt.JSONFreeForm, parameter_name: str ) -> None: @@ -783,7 +838,9 @@ def good_mask_annotation(self) -> dt.JSONFreeForm: "slot_names": ["0"], } - def test_parses_a_raster_annotation(self, good_mask_annotation: dt.JSONFreeForm) -> None: + def test_parses_a_raster_annotation( + self, good_mask_annotation: dt.JSONFreeForm + ) -> None: annotation = _parse_darwin_mask_annotation(good_mask_annotation) assert annotation is not None @@ -804,13 +861,17 @@ def test_raises_value_error_for_missing_top_level_fields( with pytest.raises(ValueError): _parse_darwin_raster_annotation(annotation) - def test_raises_value_error_for_missing_mask_fields(self, good_mask_annotation: dt.JSONFreeForm) -> None: + def test_raises_value_error_for_missing_mask_fields( + self, good_mask_annotation: dt.JSONFreeForm + ) -> None: annotation = good_mask_annotation del annotation["mask"]["sparse_rle"] with pytest.raises(ValueError): _parse_darwin_raster_annotation(annotation) - def test_raises_value_error_for_invalid_mask_fields(self, good_mask_annotation: dt.JSONFreeForm) -> None: + def test_raises_value_error_for_invalid_mask_fields( + self, good_mask_annotation: dt.JSONFreeForm + ) -> None: annotation = good_mask_annotation annotation["mask"]["sparse_rle"] = "invalid" with pytest.raises(ValueError): diff --git a/tests/e2e_test_internals/test_run_cli_command.py b/tests/e2e_test_internals/test_run_cli_command.py index c8ce45201..4fae8aa54 100644 --- a/tests/e2e_test_internals/test_run_cli_command.py +++ b/tests/e2e_test_internals/test_run_cli_command.py @@ -15,12 +15,16 @@ def test_does_not_allow_directory_traversal() -> None: assert excinfo.value == "Cannot pass directory traversal to 'run_cli_command'." with pytest.raises(DarwinException) as excinfo: - run_cli_command("darwin --help", working_directory="/usr/bin/../", server_wait=0) + run_cli_command( + "darwin --help", working_directory="/usr/bin/../", server_wait=0 + ) assert excinfo.value == "Cannot pass directory traversal to 'run_cli_command'." @mock.patch("e2e_tests.helpers.run") -def test_passes_working_directory_to_run_cli_command(mock_subprocess_run: mock.Mock) -> None: +def test_passes_working_directory_to_run_cli_command( + mock_subprocess_run: mock.Mock, +) -> None: mock_subprocess_run.reset_mock() run_cli_command("darwin --help", "/usr/bin", server_wait=0) @@ -30,8 +34,12 @@ def test_passes_working_directory_to_run_cli_command(mock_subprocess_run: mock.M @mock.patch("e2e_tests.helpers.run") -def test_passes_back_returncode_stdout_and_stderr(mock_subprocess_run: mock.Mock) -> None: - CompletedProcess = namedtuple("CompletedProcess", ["returncode", "stdout", "stderr"]) +def test_passes_back_returncode_stdout_and_stderr( + mock_subprocess_run: mock.Mock, +) -> None: + CompletedProcess = namedtuple( + "CompletedProcess", ["returncode", "stdout", "stderr"] + ) mocked_output = CompletedProcess(returncode=137, stdout=b"stdout", stderr=b"stderr") mock_subprocess_run.return_value = mocked_output @@ -46,7 +54,9 @@ def test_passes_back_returncode_stdout_and_stderr(mock_subprocess_run: mock.Mock @mock.patch("e2e_tests.helpers.run") -def test_does_not_pass_working_directory_to_run_cli_command(mock_subprocess_run: mock.Mock) -> None: +def test_does_not_pass_working_directory_to_run_cli_command( + mock_subprocess_run: mock.Mock, +) -> None: mock_subprocess_run.reset_mock() run_cli_command("darwin --help", server_wait=0) diff --git a/tests/fixtures.py b/tests/fixtures.py index f59fead65..89394d24f 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -49,7 +49,9 @@ def release_name() -> str: @pytest.fixture -def team_dataset_path(darwin_datasets_path: Path, team_slug: str, dataset_name: str) -> Path: +def team_dataset_path( + darwin_datasets_path: Path, team_slug: str, dataset_name: str +) -> Path: return darwin_datasets_path / team_slug / dataset_name @@ -114,7 +116,9 @@ def local_config_file( config.put(["teams", team_slug, "datasets_dir"], str(darwin_datasets_path)) config.put(["teams", team_slug_darwin_json_v2, "api_key"], "mock_api_key") - config.put(["teams", team_slug_darwin_json_v2, "datasets_dir"], str(darwin_datasets_path)) + config.put( + ["teams", team_slug_darwin_json_v2, "datasets_dir"], str(darwin_datasets_path) + ) # Useful if the test needs to reuse attrs yield config