Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allows primary files to be of any format supported by fileformats #19

Merged
merged 7 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import os
from pathlib import Path
import logging
import typing as ty
import tempfile
from logging.handlers import SMTPHandler
import pytest
from click.testing import CliRunner
import xnat4tests
import xnat4tests # type: ignore[import-untyped]
from datetime import datetime
from xnat_ingest.utils import logger
from medimages4tests.dummy.raw.pet.siemens.biograph_vision.vr20b.pet_listmode import (
get_data as get_listmode_data,
)
from medimages4tests.dummy.raw.pet.siemens.biograph_vision.vr20b.pet_countrate import (
get_data as get_countrate_data,
)

# Set DEBUG logging for unittests

Expand Down Expand Up @@ -110,3 +117,9 @@ def emit(self, record):
# Capture the email message and append it to the list
msg = self.format(record)
self.emails.append(msg)


def get_raw_data_files(out_dir: ty.Optional[Path] = None, **kwargs) -> ty.List[Path]:
if out_dir is None:
out_dir = Path(tempfile.mkdtemp())
return get_listmode_data(out_dir, **kwargs) + get_countrate_data(out_dir, **kwargs)
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ dependencies = [
"natsort",
"paramiko",
"xnat",
"arcana",
"arcana-xnat >=0.4.1",
"frametree",
"frametree-xnat",
]
license = { file = "LICENSE" }
authors = [{ name = "Thomas G. Close", email = "[email protected]" }]
Expand Down
1 change: 0 additions & 1 deletion real-tests/usyd_transfer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from click.testing import CliRunner
from xnat_ingest.cli import transfer
from xnat_ingest.utils import show_cli_trace
Expand Down
6 changes: 3 additions & 3 deletions scripts/dcm_performance_mrtrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
"StudyInstanceUID",
"StudyID",
"PatientID",
"AccessionNumber"
"AccessionNumber",
]

series = DicomSeries(get_image().iterdir())
series = DicomSeries(get_image().iterdir(), specific_tags=METADATA_KEYS)

timeit.timeit(lambda: series.select_metadata(METADATA_KEYS))
timeit.timeit(lambda: series.metadata)
6 changes: 3 additions & 3 deletions scripts/dcm_performance_pydicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
"StudyInstanceUID",
"StudyID",
"PatientID",
"AccessionNumber"
"AccessionNumber",
]

series = DicomSeries(get_image().iterdir())
series = DicomSeries(get_image().iterdir(), specific_tags=METADATA_KEYS)

timeit.timeit(lambda: series.select_metadata(METADATA_KEYS))
timeit.timeit(lambda: series.metadata)
2 changes: 1 addition & 1 deletion scripts/run_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from xnat_ingest.utils import show_cli_trace
from click.testing import CliRunner

PATTERN = "{PatientName.given_name}_{PatientName.family_name}_{SeriesDate}.*"
PATTERN = "{PatientName.family_name}_{PatientName.given_name}_{SeriesDate}.*"

runner = CliRunner()

Expand Down
109 changes: 83 additions & 26 deletions xnat_ingest/cli/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
from tqdm import tqdm
from xnat_ingest.cli.base import cli
from xnat_ingest.session import ImagingSession
from arcana.xnat import Xnat
from frametree.xnat import Xnat # type: ignore[import-untyped]
from xnat_ingest.utils import (
DicomField,
AssociatedFiles,
logger,
LogFile,
Expand All @@ -30,31 +29,76 @@
are uploaded to XNAT
""",
)
@click.argument("dicoms_path", type=str, envvar="XNAT_INGEST_STAGE_DICOMS_PATH")
@click.argument("files_path", type=str, envvar="XNAT_INGEST_STAGE_DICOMS_PATH")
@click.argument(
"staging_dir", type=click.Path(path_type=Path), envvar="XNAT_INGEST_STAGE_DIR"
)
@click.option(
"--datatype",
type=str,
metavar="<mime-type>",
multiple=True,
default=["medimage/dicom-series"],
envvar="XNAT_INGEST_STAGE_DATATYPE",
help="The datatype of the primary files to to upload",
)
@click.option(
"--project-field",
type=DicomField,
type=str,
default="StudyID",
envvar="XNAT_INGEST_STAGE_PROJECT",
help=("The keyword or tag of the DICOM field to extract the XNAT project ID from "),
help=("The keyword of the metadata field to extract the XNAT project ID from "),
)
@click.option(
"--subject-field",
type=DicomField,
type=str,
default="PatientID",
envvar="XNAT_INGEST_STAGE_SUBJECT",
help=("The keyword or tag of the DICOM field to extract the XNAT subject ID from "),
help=("The keyword of the metadata field to extract the XNAT subject ID from "),
)
@click.option(
"--visit-field",
type=DicomField,
type=str,
default="AccessionNumber",
envvar="XNAT_INGEST_STAGE_VISIT",
help=(
"The keyword of the metadata field to extract the XNAT imaging session ID from "
),
)
@click.option(
"--session-field",
type=str,
default=None,
envvar="XNAT_INGEST_STAGE_SESSION",
help=(
"The keyword or tag of the DICOM field to extract the XNAT imaging session ID from "
"The keyword of the metadata field to extract the XNAT imaging session ID from "
),
)
@click.option(
"--scan-id-field",
type=str,
default="SeriesNumber",
envvar="XNAT_INGEST_STAGE_SCAN_ID",
help=(
"The keyword of the metadata field to extract the XNAT imaging scan ID from "
),
)
@click.option(
"--scan-desc-field",
type=str,
default="SeriesDescription",
envvar="XNAT_INGEST_STAGE_SCAN_DESC",
help=(
"The keyword of the metadata field to extract the XNAT imaging scan description from "
),
)
@click.option(
"--resource-field",
type=str,
default="ImageType[-1]",
envvar="XNAT_INGEST_STAGE_RESOURCE",
help=(
"The keyword of the metadata field to extract the XNAT imaging resource ID from "
),
)
@click.option(
Expand All @@ -66,18 +110,19 @@
@click.option(
"--associated-files",
type=AssociatedFiles.cli_type,
nargs=2,
nargs=3,
default=None,
multiple=True,
envvar="XNAT_INGEST_STAGE_ASSOCIATED",
metavar="<glob> <id-pattern>",
metavar="<datatype> <glob> <id-pattern>",
help=(
'The "glob" arg is a glob pattern by which to detect associated files to be '
"attached to the DICOM sessions. Note that when this pattern corresponds to a "
"relative path it is considered to be relative to the parent directory containing "
"the DICOMs for the session NOT the current working directory Can contain string "
"templates corresponding to DICOM metadata fields, which are substituted before "
"the glob is called. For example, "
'"./associated/{PatientName.given_name}_{PatientName.family_name}/*)" '
'"./associated/{PatientName.family_name}_{PatientName.given_name}/*)" '
"will find all files under the subdirectory within '/path/to/dicoms/associated' that matches "
"<GIVEN-NAME>_<FAMILY-NAME>. Will be interpreted as being relative to `dicoms_dir` "
"if a relative path is provided.\n"
Expand Down Expand Up @@ -181,12 +226,17 @@
type=bool,
)
def stage(
dicoms_path: str,
files_path: str,
staging_dir: Path,
associated_files: AssociatedFiles,
project_field: DicomField,
subject_field: DicomField,
visit_field: DicomField,
datatype: str,
associated_files: ty.List[AssociatedFiles],
project_field: str,
subject_field: str,
visit_field: str,
session_field: str | None,
scan_id_field: str,
scan_desc_field: str,
resource_field: str,
project_id: str | None,
delete: bool,
log_level: str,
Expand Down Expand Up @@ -219,26 +269,33 @@
else:
project_list = None

msg = f"Loading DICOM sessions from '{dicoms_path}'"
if session_field is None and datatype == "medimage/dicom-series":
session_field = "StudyInstanceUID"

Check warning on line 273 in xnat_ingest/cli/stage.py

View check run for this annotation

Codecov / codecov/patch

xnat_ingest/cli/stage.py#L273

Added line #L273 was not covered by tests

msg = f"Loading {datatype} sessions from '{files_path}'"

if associated_files:
msg += f" with associated files selected from '{associated_files.glob}'"
if not associated_files.glob.startswith("/"):
msg += " (relative to the directories in which the DICOMs are found)"
for assoc_files in associated_files:
msg += f" with associated files selected from '{assoc_files.glob}'"
if not assoc_files.glob.startswith("/"):
msg += " (relative to the directories in which the primary files are found)"

Check warning on line 280 in xnat_ingest/cli/stage.py

View check run for this annotation

Codecov / codecov/patch

xnat_ingest/cli/stage.py#L280

Added line #L280 was not covered by tests

logger.info(msg)

sessions = ImagingSession.from_dicoms(
dicoms_path=dicoms_path,
sessions = ImagingSession.from_paths(
files_path=files_path,
project_field=project_field,
subject_field=subject_field,
visit_field=visit_field,
session_field=session_field,
scan_id_field=scan_id_field,
scan_desc_field=scan_desc_field,
resource_field=resource_field,
project_id=project_id,
)

logger.info("Staging sessions to '%s'", str(staging_dir))

for session in tqdm(sessions, f"Staging DICOM sessions found in '{dicoms_path}'"):
for session in tqdm(sessions, f"Staging DICOM sessions found in '{files_path}'"):
try:
session_staging_dir = staging_dir.joinpath(*session.staging_relpath)
if session_staging_dir.exists():
Expand All @@ -251,7 +308,7 @@
# Identify theDeidentify files if necessary and save them to the staging directory
session.stage(
staging_dir,
associated_files=associated_files,
associated_file_groups=associated_files,
remove_original=delete,
deidentify=deidentify,
project_list=project_list,
Expand Down
13 changes: 7 additions & 6 deletions xnat_ingest/cli/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
import click
from tqdm import tqdm
from natsort import natsorted
import xnat
import xnat # type: ignore[import-untyped]
import boto3
import paramiko
from fileformats.generic import File
from arcana.core.data.set import Dataset
from arcana.xnat import Xnat
from xnat.exceptions import XNATResponseError
from frametree.core.frameset import FrameSet # type: ignore[import-untyped]
from frametree.xnat import Xnat # type: ignore[import-untyped]
from xnat.exceptions import XNATResponseError # type: ignore[import-untyped]
from xnat_ingest.cli.base import cli
from xnat_ingest.session import ImagingSession
from xnat_ingest.utils import (
Expand Down Expand Up @@ -349,7 +349,7 @@
missing_datasets = set()
for project_id in project_ids:
try:
dataset = Dataset.load(project_id, xnat_repo)
dataset = FrameSet.load(project_id, xnat_repo)

Check warning on line 352 in xnat_ingest/cli/upload.py

View check run for this annotation

Codecov / codecov/patch

xnat_ingest/cli/upload.py#L352

Added line #L352 was not covered by tests
except Exception:
missing_datasets.add(project_id)
else:
Expand Down Expand Up @@ -392,7 +392,7 @@

# Access Arcana dataset associated with project
try:
dataset = Dataset.load(session.project_id, xnat_repo)
dataset = FrameSet.load(session.project_id, xnat_repo)
except Exception as e:
logger.warning(
"Did not load dataset definition (%s) from %s project "
Expand Down Expand Up @@ -446,6 +446,7 @@
image_type = scan.metadata.get("ImageType")
if image_type and image_type[:2] == ["DERIVED", "SECONDARY"]:
modality = "SC"
resource_name = "secondary"
else:
modality = scan.metadata.get(
"Modality", default_scan_modality
Expand Down
37 changes: 9 additions & 28 deletions xnat_ingest/dicom.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
import typing as ty
import subprocess as sp

# import re
import pydicom

# from fileformats.core import FileSet
# from fileformats.application import Dicom
# from fileformats.extras.application.medical import dicom_read_metadata


dcmedit_path: ty.Optional[str]
try:
dcmedit_path = sp.check_output("which dcmedit", shell=True).decode("utf-8").strip()
except sp.CalledProcessError:
dcmedit_path = None


dcminfo_path: ty.Optional[str]
try:
dcminfo_path = sp.check_output("which dcminfo", shell=True).decode("utf-8").strip()
except sp.CalledProcessError:
dcminfo_path = None


def tag2keyword(tag: ty.Tuple[str, str]) -> str:
return pydicom.datadict.dictionary_keyword(tag)
return pydicom.datadict.dictionary_keyword((int(tag[0]), int(tag[1])))

Check warning on line 25 in xnat_ingest/dicom.py

View check run for this annotation

Codecov / codecov/patch

xnat_ingest/dicom.py#L25

Added line #L25 was not covered by tests


def keyword2tag(keyword: str) -> ty.Tuple[str, str]:
tag_str = hex(pydicom.datadict.tag_for_keyword(keyword))[2:]
tag = pydicom.datadict.tag_for_keyword(keyword)
if not tag:
raise ValueError(f"Could not find tag for keyword '{keyword}'")
tag_str = hex(tag)[2:]

Check warning on line 32 in xnat_ingest/dicom.py

View check run for this annotation

Codecov / codecov/patch

xnat_ingest/dicom.py#L29-L32

Added lines #L29 - L32 were not covered by tests
return (f"{tag_str[:-4].zfill(4)}", tag_str[-4:])


Expand All @@ -49,27 +54,3 @@

def __str__(self):
return f"'{self.keyword}' field ({','.join(self.tag)})"


# @FileSet.read_metadata.register
# def mrtrix_dicom_read_metadata(
# dcm: Dicom, selected_keys: ty.Optional[ty.Sequence[str]] = None
# ) -> ty.Mapping[str, ty.Any]:
# if dcminfo_path is None or selected_keys is None:
# return dicom_read_metadata(dcm, selected_keys)

# tags = [keyword2tag(k) for k in selected_keys]
# tag_str = " ".join(f"-t {t[0]} {t[1]}" for t in tags)
# cmd = f"dcminfo {tag_str} {dcm.fspath}"
# line_re = re.compile(r"\[([0-9A-F]{4}),([0-9A-F]{4})] (.*)")
# dcminfo_output = sp.check_output(cmd, shell=True).decode("utf-8")
# metadata = {}
# for line in dcminfo_output.splitlines():
# match = line_re.match(line)
# if not match:
# continue
# t1, t2, val = match.groups()
# key = tag2keyword((t1, t2))
# val = val.strip()
# metadata[key] = val
# return metadata
Loading
Loading