From 6b9119f51e876aa52afad40f080821fb1dea58a1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 15 Feb 2024 22:24:00 +1100 Subject: [PATCH] adapted always-include option to use mime-likes --- xnat_ingest/cli/upload.py | 39 +++++++++++++++++++++------------------ xnat_ingest/session.py | 23 +++++++++-------------- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/xnat_ingest/cli/upload.py b/xnat_ingest/cli/upload.py index 897f1ae..c8a9fb0 100644 --- a/xnat_ingest/cli/upload.py +++ b/xnat_ingest/cli/upload.py @@ -95,12 +95,16 @@ @click.option( "--always-include", "-i", - default=None, - type=click.Choice(("all", "dicom", "associated"), case_sensitive=False), + default=(), + type=str, + multiple=True, envvar="XNAT_INGEST_ALWAYSINCLUDE", help=( - "Whether to include scans in the upload regardless of whether they are " - "specified in a column or not" + "Scan types to always include in the upload, regardless of whether they are" + "specified in a column or not. Specified using the scan types IANA mime-type or " + "fileformats \"mime-like\" (see https://arcanaframework.github.io/fileformats/), " + "e.g. 'application/json', 'medimage/dicom-series', " + "'image/jpeg'). Use 'core/file-set' to include all file-types in the session" ), ) @click.option( @@ -145,7 +149,7 @@ def upload( log_file: Path, log_emails: LogEmail, mail_server: MailServer, - always_include: str, + always_include: ty.Sequence[str], raise_errors: bool, store_credentials: ty.Tuple[str, str], work_dir: ty.Optional[Path], @@ -275,6 +279,18 @@ def iter_staged_sessions(): # Create corresponding session on XNAT xproject = xnat_repo.connection.projects[session.project_id] + + # Access Arcana dataset associated with project + try: + dataset = Dataset.load(session.project_id, xnat_repo) + except Exception as e: + logger.warning( + e, + f"Did not load dataset definition from {session.project_id} project " + f"on {server}. Only the scan types specified in --always-include", + ) + dataset = None + xsubject = xnat_repo.connection.classes.SubjectData( label=session.subject_id, parent=xproject ) @@ -297,19 +313,6 @@ def iter_staged_sessions(): f"{session.project_id}:{session.subject_id}:{session.visit_id}" ) - # Access Arcana dataset associated with project - try: - dataset = Dataset.load(session.project_id, xnat_repo) - except Exception as e: - add_exc_note( - e, - f"Did not load dataset definition from {session.project_id} project " - f"on {server}. Please set one up using the Arcana command line tool " - "in order to check presence of required scans and associated " - "files (e.g. raw-data exports)", - ) - raise e - # Anonymise DICOMs and save to directory prior to upload if always_include: logger.info( diff --git a/xnat_ingest/session.py b/xnat_ingest/session.py index 6503699..cf6a64a 100644 --- a/xnat_ingest/session.py +++ b/xnat_ingest/session.py @@ -104,8 +104,8 @@ def dicom_dirs(self) -> ty.List[Path]: def select_resources( self, - dataset: Dataset, - always_include: ty.Optional[str] = None, + dataset: ty.Optional[Dataset], + always_include: ty.Sequence[str] = (), ) -> ty.Iterator[ty.Tuple[str, str, str, FileSet]]: """Returns selected resources that match the columns in the dataset definition @@ -113,10 +113,10 @@ def select_resources( ---------- dataset : Dataset Arcana dataset definition - always_include : str, optional - whether to scans regardless of whether they are explicitly - specified by a column in the dataset or not. Valid options are - 'all', 'dicoms', 'associated' + always_include : sequence[str] + mime-types or "mime-like" (see https://arcanaframework.github.io/fileformats/) + of file-format to always include in the upload, regardless of whether they are + specified in the dataset or not Yields ------ @@ -132,16 +132,11 @@ def select_resources( store = MockDataStore(self) uploaded = set() - if always_include: + for mime_like in always_include: + fileformat = from_mime(mime_like) for scan in self.scans.values(): for resource_name, fileset in scan.resources.items(): - if ( - always_include == "all" - or always_include == "dicom" - and resource_name == "DICOM" - or always_include == "associated" - and resource_name != "DICOM" - ): + if isinstance(fileset, fileformat): uploaded.add((scan.id, resource_name)) yield scan.id, scan.type, resource_name, fileset for column in dataset.columns.values():