Skip to content

Commit

Permalink
Merge pull request #9 from Australian-Imaging-Service/develop
Browse files Browse the repository at this point in the history
debugging reworked stage command
  • Loading branch information
tclose authored May 29, 2024
2 parents d7a9100 + ed6033a commit 438b0c7
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 14 deletions.
26 changes: 26 additions & 0 deletions xnat_ingest/cli/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@
import typing as ty
import traceback
import click
import tempfile
from tqdm import tqdm
from xnat_ingest.cli.base import cli
from xnat_ingest.session import ImagingSession
from arcana.xnat import Xnat
from xnat_ingest.utils import (
DicomField,
AssociatedFiles,
logger,
LogFile,
LogEmail,
MailServer,
XnatLogin,
set_logger_handling,
)

Expand Down Expand Up @@ -149,6 +152,15 @@
envvar="XNAT_INGEST_STAGE_DEIDENTIFY",
help="whether to deidentify the file names and DICOM metadata before staging",
)
@click.option(
"--xnat-login",
nargs=3,
type=XnatLogin.cli_type,
default=None,
metavar="<host> <user> <password>",
help="The XNAT server to upload to plus the user and password to use",
envvar="XNAT_INGEST_TRANSFER_XNAT_LOGIN",
)
def stage(
dicoms_path: str,
staging_dir: Path,
Expand All @@ -164,6 +176,7 @@ def stage(
mail_server: MailServer,
raise_errors: bool,
deidentify: bool,
xnat_login: XnatLogin,
):
set_logger_handling(
log_level=log_level,
Expand All @@ -172,6 +185,18 @@ def stage(
mail_server=mail_server,
)

if xnat_login:
xnat_repo = Xnat(
server=xnat_login.host,
user=xnat_login.user,
password=xnat_login.password,
cache_dir=Path(tempfile.mkdtemp()),
)
with xnat_repo.connection:
project_list = list(xnat_repo.connection.projects)
else:
project_list = None

msg = f"Loading DICOM sessions from '{dicoms_path}'"

if associated_files:
Expand Down Expand Up @@ -207,6 +232,7 @@ def stage(
associated_files=associated_files,
remove_original=delete,
deidentify=deidentify,
project_list=project_list,
)
except Exception as e:
if not raise_errors:
Expand Down
36 changes: 33 additions & 3 deletions xnat_ingest/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from .dicom import dcmedit_path
import random
import string
import platform

logger = logging.getLogger("xnat-ingest")

Expand Down Expand Up @@ -80,6 +81,14 @@ def __getitem__(self, fieldname: str) -> ty.Any:
def name(self):
return f"{self.project_id}-{self.subject_id}-{self.visit_id}"

@property
def invalid_ids(self):
return (
self.project_id.startswith("INVALID")
or self.subject_id.startswith("INVALID")
or self.visit_id.startswith("INVALID")
)

@property
def staging_relpath(self):
return [self.project_id, self.subject_id, self.visit_id]
Expand Down Expand Up @@ -304,7 +313,7 @@ def get_id(field):
)
id_ = None
if not id_:
id_ = "UNKNOWN" + "".join(
id_ = "INVALID-MISSING-ID-" + "".join(
random.choices(string.ascii_letters + string.digits, k=8)
)
return id_
Expand Down Expand Up @@ -497,6 +506,7 @@ def stage(
associated_files: ty.Optional[AssociatedFiles] = None,
remove_original: bool = False,
deidentify: bool = True,
project_list: ty.Optional[ty.List[str]] = None,
) -> "ImagingSession":
r"""Stages and deidentifies files by removing the fields listed `FIELDS_TO_ANONYMISE` and
replacing birth date with 01/01/<BIRTH-YEAR> and returning new imaging session
Expand All @@ -507,6 +517,9 @@ def stage(
destination directory to save the deidentified files. The session will be saved
to a directory with the project, subject and session IDs as subdirectories of
this directory, along with the scans manifest
work_dir : Path, optional
the directory the staged sessions are created in before they are moved into
the staging directory
associated_files : ty.Tuple[str, str], optional
Glob pattern used to select the non-dicom files to include in the session. Note
that the pattern is relative to the parent directory containing the DICOM files
Expand All @@ -524,6 +537,9 @@ def stage(
delete original files after they have been staged, false by default
deidentify : bool, optional
deidentify the scans in the staging process, true by default
project_list : list[str], optional
list of available projects in the store, used to check whether the project ID
is valid
Returns
-------
Expand All @@ -535,9 +551,14 @@ def stage(
"Did not find `dcmedit` tool from the MRtrix package on the system path, "
"de-identification will be performed by pydicom instead and may be slower"
)

staged_scans = []
staged_metadata = {}
session_dir = dest_dir / self.project_id / self.subject_id / self.visit_id
if project_list is None or self.project_id in project_list:
project_dir = self.project_id
else:
project_dir = "INVALID-UNRECOGNISED-PROJECT-" + self.project_id
session_dir = dest_dir / project_dir / self.subject_id / self.visit_id
session_dir.mkdir(parents=True)
for scan in tqdm(
self.scans.values(), f"Staging DICOM sessions to {session_dir}"
Expand Down Expand Up @@ -595,9 +616,18 @@ def stage(

if deidentify:
# Transform the names of the paths to remove any identiable information
if associated_files.glob.startswith("/") or (
platform.system() == "Windows"
and re.match(r"[a-zA-Z]:\\", associated_files.glob)
):
assoc_glob_pattern = associated_files.glob
else:
assoc_glob_pattern = (
str(dicom_dir) + os.path.sep + associated_files.glob
)
transformed_fspaths = transform_paths(
list(associated_fspaths),
f"{dicom_dir}/{associated_files.glob}",
assoc_glob_pattern,
self.metadata,
staged_metadata,
)
Expand Down
4 changes: 4 additions & 0 deletions xnat_ingest/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ def test_stage_and_upload(
# "info",
"--raise-errors",
"--delete",
"--xnat-login",
"http://localhost:8080",
"admin",
"admin",
],
)

Expand Down
23 changes: 12 additions & 11 deletions xnat_ingest/tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@ def imaging_session() -> ImagingSession:
ImagingScan(
id=str(d.metadata["SeriesNumber"]),
type=str(d.metadata["SeriesDescription"]),
resources={"DICOM": d}) for d in dicoms]
resources={"DICOM": d},
)
for d in dicoms
]
return ImagingSession(
project_id="PROJECTID",
subject_id="SUBJECTID",
Expand Down Expand Up @@ -110,17 +113,17 @@ def dataset(tmp_path: Path) -> Dataset:
return dataset


@pytest.mark.xfail(condition=platform.system() == "Linux", reason="Not working on ubuntu")
@pytest.mark.xfail(
condition=platform.system() == "Linux", reason="Not working on ubuntu"
)
def test_session_select_resources(
imaging_session: ImagingSession, dataset: Dataset, tmp_path: Path
):

assoc_dir = tmp_path / "assoc"
assoc_dir.mkdir()

for fspath in get_raw_data_files(
first_name=FIRST_NAME, last_name=LAST_NAME
):
for fspath in get_raw_data_files(first_name=FIRST_NAME, last_name=LAST_NAME):
fspath.rename(assoc_dir / fspath.name)

staging_dir = tmp_path / "staging"
Expand All @@ -130,7 +133,7 @@ def test_session_select_resources(
staging_dir,
associated_files=AssociatedFiles(
str(assoc_dir) + "/{PatientName.given_name}_{PatientName.family_name}*.ptd",
r".*/[^\.]+.[^\.]+.[^\.]+.(?P<id>\d+)\.[A-Z]+_(?P<resource>[^\.]+).*"
r".*/[^\.]+.[^\.]+.[^\.]+.(?P<id>\d+)\.[A-Z]+_(?P<resource>[^\.]+).*",
),
)

Expand All @@ -141,16 +144,14 @@ def test_session_select_resources(
assert set(ids) == set(("1", "2", "4", "602", "603"))
assert set(descs) == set(
[
"AC CT 3.0 SWB HD_FoV",
"AC CT 30 SWB HD_FoV",
"PET SWB 8MIN",
"Topogram 0.6 Tr60",
"Topogram 06 Tr60",
"602",
"603",
]
)
assert set(resource_names) == set(
("DICOM", "LISTMODE", "COUNTRATE", "EM_SINO")
)
assert set(resource_names) == set(("DICOM", "LISTMODE", "COUNTRATE", "EM_SINO"))
assert set(type(s) for s in scans) == set(
[
DicomSeries,
Expand Down

0 comments on commit 438b0c7

Please sign in to comment.