Skip to content

Commit

Permalink
adapted always-include option to use mime-likes
Browse files Browse the repository at this point in the history
  • Loading branch information
tclose committed Feb 15, 2024
1 parent 12c2988 commit 6b9119f
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 32 deletions.
39 changes: 21 additions & 18 deletions xnat_ingest/cli/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,16 @@
@click.option(
"--always-include",
"-i",
default=None,
type=click.Choice(("all", "dicom", "associated"), case_sensitive=False),
default=(),
type=str,
multiple=True,
envvar="XNAT_INGEST_ALWAYSINCLUDE",
help=(
"Whether to include scans in the upload regardless of whether they are "
"specified in a column or not"
"Scan types to always include in the upload, regardless of whether they are"
"specified in a column or not. Specified using the scan types IANA mime-type or "
"fileformats \"mime-like\" (see https://arcanaframework.github.io/fileformats/), "
"e.g. 'application/json', 'medimage/dicom-series', "
"'image/jpeg'). Use 'core/file-set' to include all file-types in the session"
),
)
@click.option(
Expand Down Expand Up @@ -145,7 +149,7 @@ def upload(
log_file: Path,
log_emails: LogEmail,
mail_server: MailServer,
always_include: str,
always_include: ty.Sequence[str],
raise_errors: bool,
store_credentials: ty.Tuple[str, str],
work_dir: ty.Optional[Path],
Expand Down Expand Up @@ -275,6 +279,18 @@ def iter_staged_sessions():

# Create corresponding session on XNAT
xproject = xnat_repo.connection.projects[session.project_id]

# Access Arcana dataset associated with project
try:
dataset = Dataset.load(session.project_id, xnat_repo)
except Exception as e:
logger.warning(
e,
f"Did not load dataset definition from {session.project_id} project "
f"on {server}. Only the scan types specified in --always-include",
)
dataset = None

xsubject = xnat_repo.connection.classes.SubjectData(
label=session.subject_id, parent=xproject
)
Expand All @@ -297,19 +313,6 @@ def iter_staged_sessions():
f"{session.project_id}:{session.subject_id}:{session.visit_id}"
)

# Access Arcana dataset associated with project
try:
dataset = Dataset.load(session.project_id, xnat_repo)
except Exception as e:
add_exc_note(
e,
f"Did not load dataset definition from {session.project_id} project "
f"on {server}. Please set one up using the Arcana command line tool "
"in order to check presence of required scans and associated "
"files (e.g. raw-data exports)",
)
raise e

# Anonymise DICOMs and save to directory prior to upload
if always_include:
logger.info(
Expand Down
23 changes: 9 additions & 14 deletions xnat_ingest/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,19 +104,19 @@ def dicom_dirs(self) -> ty.List[Path]:

def select_resources(
self,
dataset: Dataset,
always_include: ty.Optional[str] = None,
dataset: ty.Optional[Dataset],
always_include: ty.Sequence[str] = (),
) -> ty.Iterator[ty.Tuple[str, str, str, FileSet]]:
"""Returns selected resources that match the columns in the dataset definition
Parameters
----------
dataset : Dataset
Arcana dataset definition
always_include : str, optional
whether to scans regardless of whether they are explicitly
specified by a column in the dataset or not. Valid options are
'all', 'dicoms', 'associated'
always_include : sequence[str]
mime-types or "mime-like" (see https://arcanaframework.github.io/fileformats/)
of file-format to always include in the upload, regardless of whether they are
specified in the dataset or not
Yields
------
Expand All @@ -132,16 +132,11 @@ def select_resources(
store = MockDataStore(self)

uploaded = set()
if always_include:
for mime_like in always_include:
fileformat = from_mime(mime_like)
for scan in self.scans.values():
for resource_name, fileset in scan.resources.items():
if (
always_include == "all"
or always_include == "dicom"
and resource_name == "DICOM"
or always_include == "associated"
and resource_name != "DICOM"
):
if isinstance(fileset, fileformat):
uploaded.add((scan.id, resource_name))
yield scan.id, scan.type, resource_name, fileset
for column in dataset.columns.values():
Expand Down

0 comments on commit 6b9119f

Please sign in to comment.