Skip to content

Commit

Permalink
Merge branch 'master' into DAR-2991
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie authored Jul 23, 2024
2 parents ed5fa54 + ce48079 commit c7afdc8
Show file tree
Hide file tree
Showing 15 changed files with 404 additions and 62 deletions.
10 changes: 8 additions & 2 deletions darwin/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def _run(args: Namespace, parser: ArgumentParser) -> None:
print(__version__)

elif args.command == "convert":
f.convert(args.format, args.files, args.output_dir)
f.convert(args.format, args.files, args.output_dir, legacy=args.legacy)
elif args.command == "dataset":
if args.action == "remote":
f.list_remote_datasets(args.all, args.team)
Expand Down Expand Up @@ -156,6 +156,9 @@ def _run(args: Namespace, parser: ArgumentParser) -> None:
args.force_slots,
args.ignore_slots,
args.no_folders,
args.retry,
args.retry_timeout,
args.retry_interval,
)
elif args.action == "import":
f.dataset_import(
Expand All @@ -168,10 +171,13 @@ def _run(args: Namespace, parser: ArgumentParser) -> None:
args.import_annotators,
args.import_reviewers,
args.overwrite,
legacy=args.legacy,
cpu_limit=args.cpu_limit,
)
elif args.action == "convert":
f.dataset_convert(args.dataset, args.format, args.output_dir)
f.dataset_convert(
args.dataset, args.format, args.output_dir, legacy=args.legacy
)
elif args.action == "set-file-status":
f.set_file_status(args.dataset, args.status, args.files)
elif args.action == "delete-files":
Expand Down
49 changes: 45 additions & 4 deletions darwin/cli_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import sys
import traceback
from functools import partial
from glob import glob
from itertools import tee
from pathlib import Path
Expand Down Expand Up @@ -411,6 +412,9 @@ def pull_dataset(
force_slots: bool = False,
ignore_slots: bool = False,
no_folders: bool = False,
retry: bool = False,
retry_timeout: int = 600,
retry_interval: int = 10,
) -> None:
"""
Downloads a remote dataset (images and annotations) in the datasets directory.
Expand All @@ -431,6 +435,12 @@ def pull_dataset(
Pulls all slots of items into deeper file structure ({prefix}/{item_name}/{slot_name}/{file_name})
no_folders: bool
Does not recreate the folders in the dataset. Defaults to False.
retry: bool
If True, will repeatedly try to download the release if it is still processing until the timeout is reached.
retry_timeout: int
If retrying, total time to wait for the release to be ready for download
retry_interval: int
If retrying, time to wait between retries of checking if the release is ready for download.
"""
version: str = DatasetIdentifier.parse(dataset_slug).version or "latest"
client: Client = _load_client(offline=False, maybe_guest=True)
Expand All @@ -448,19 +458,22 @@ def pull_dataset(
if no_folders:
folders = False
try:
release: Release = dataset.get_release(version)
release: Release = dataset.get_release(version, retry)
dataset.pull(
release=release,
only_annotations=only_annotations,
use_folders=folders,
video_frames=video_frames,
force_slots=force_slots,
ignore_slots=ignore_slots,
retry=retry,
retry_timeout=retry_timeout,
retry_interval=retry_interval,
)
print_new_version_info(client)
except NotFound:
_error(
f"Version '{dataset.identifier}:{version}' does not exist "
f"Version '{dataset.identifier}:{version}' does not exist. "
f"Use 'darwin dataset releases' to list all available versions."
)
except UnsupportedExportFormat as uef:
Expand Down Expand Up @@ -856,6 +869,7 @@ def dataset_import(
import_annotators: bool = False,
import_reviewers: bool = False,
overwrite: bool = False,
legacy: bool = False,
use_multi_cpu: bool = False,
cpu_limit: Optional[int] = None,
) -> None:
Expand Down Expand Up @@ -889,6 +903,9 @@ def dataset_import(
overwrite : bool, default: False
If ``True`` it will bypass a warning that the import will overwrite the current annotations if any are present.
If ``False`` this warning will be skipped and the import will overwrite the current annotations without warning.
legacy : bool, default: False
If ``True`` it will not resize the annotations to be isotropic.
If ``False`` it will resize the annotations to be isotropic.
use_multi_cpu : bool, default: False
If ``True`` it will use all multiple CPUs to speed up the import process.
cpu_limit : Optional[int], default: Core count - 2
Expand All @@ -899,6 +916,10 @@ def dataset_import(

try:
importer: ImportParser = get_importer(format)

if format == "nifti" and legacy:
importer = partial(importer, legacy=True)

dataset: RemoteDataset = client.get_remote_dataset(
dataset_identifier=dataset_slug
)
Expand All @@ -918,6 +939,7 @@ def dataset_import(
overwrite,
use_multi_cpu,
cpu_limit,
no_legacy=False if legacy else True,
)

except ImporterNotFoundError:
Expand Down Expand Up @@ -1170,7 +1192,10 @@ def validate_schemas(


def dataset_convert(
dataset_identifier: str, format: str, output_dir: Optional[PathLike] = None
dataset_identifier: str,
format: str,
output_dir: Optional[PathLike] = None,
legacy: bool = False,
) -> None:
"""
Converts the annotations from the given dataset to the given format.
Expand All @@ -1186,12 +1211,20 @@ def dataset_convert(
output_dir : Optional[PathLike], default: None
The folder where the exported annotation files will be. If None it will be the inside the
annotations folder of the dataset under 'other_formats/{format}'.
legacy : bool, default: False
This flag is only for the nifti format.
If True, it will not export the annotations using legacy calculations.
If False, it will resize the annotations using the new calculation by dividing with pixdims.
"""
identifier: DatasetIdentifier = DatasetIdentifier.parse(dataset_identifier)
client: Client = _load_client(team_slug=identifier.team_slug)

try:
parser: ExportParser = get_exporter(format)

if format == "nifti" and legacy:
parser = partial(parser, legacy=True)

dataset: RemoteDataset = client.get_remote_dataset(
dataset_identifier=identifier
)
Expand Down Expand Up @@ -1222,7 +1255,9 @@ def dataset_convert(
_error(f"No dataset with name '{e.name}'")


def convert(format: str, files: List[PathLike], output_dir: Path) -> None:
def convert(
format: str, files: List[PathLike], output_dir: Path, legacy: bool = False
) -> None:
"""
Converts the given files to the specified format.
Expand All @@ -1234,9 +1269,15 @@ def convert(format: str, files: List[PathLike], output_dir: Path) -> None:
List of files to be converted.
output_dir: Path
Folder where the exported annotations will be placed.
legacy: bool, default: False
This flag is only for the nifti format.
If True, it will not export the annotations using legacy calculations.
If False, it will resize the annotations using the new calculation by dividing with pixdims.
"""
try:
parser: ExportParser = get_exporter(format)
if format == "nifti" and legacy:
parser = partial(parser, legacy=True)
except ExporterNotFoundError:
_error(f"Unsupported export format, currently supported: {export_formats}")
except AttributeError:
Expand Down
15 changes: 15 additions & 0 deletions darwin/dataset/release.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import shutil
from enum import Enum
from pathlib import Path
from typing import Any, Dict, Optional

Expand All @@ -8,6 +9,12 @@
from darwin.dataset.identifier import DatasetIdentifier


class ReleaseStatus(Enum):
PENDING = "pending"
COMPLETE = "complete"
FAILED = "failed"


class Release:
"""
Represents a release/export. Releases created this way can only contain items with 'completed'
Expand All @@ -23,6 +30,8 @@ class Release:
The version of the ``Release``.
name : str
The name of the ``Release``.
status : ReleaseStatus
The status of the ``Release``.
url : Optional[str]
The full url used to download the ``Release``.
export_date : datetime.datetime
Expand All @@ -48,6 +57,8 @@ class Release:
The version of the ``Release``.
name : str
The name of the ``Release``.
status : ReleaseStatus
The status of the ``Release``.
url : Optional[str]
The full url used to download the ``Release``.
export_date : datetime.datetime
Expand All @@ -70,6 +81,7 @@ def __init__(
team_slug: str,
version: str,
name: str,
status: ReleaseStatus,
url: Optional[str],
export_date: datetime.datetime,
image_count: Optional[int],
Expand All @@ -82,6 +94,7 @@ def __init__(
self.team_slug = team_slug
self.version = version
self.name = name
self.status = ReleaseStatus(status)
self.url = url
self.export_date = export_date
self.image_count = image_count
Expand Down Expand Up @@ -156,6 +169,7 @@ def parse_json(
team_slug=team_slug,
version=payload["version"],
name=payload["name"],
status=payload["status"],
export_date=export_date,
url=None,
available=False,
Expand All @@ -170,6 +184,7 @@ def parse_json(
team_slug=team_slug,
version=payload["version"],
name=payload["name"],
status=payload["status"],
image_count=payload["metadata"]["num_images"],
class_count=len(payload["metadata"]["annotation_classes"]),
export_date=export_date,
Expand Down
Loading

0 comments on commit c7afdc8

Please sign in to comment.