Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debugged datatype, logging and missing ID handling #22

Merged
merged 9 commits into from
Oct 2, 2024
Merged
50 changes: 25 additions & 25 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
exclude: ^(arcana/_version\.py|versioneer\.py)$
args:
- -l 88
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
exclude: ^(xnat_checks/_version\.py|versioneer\.py)$
args:
- --ignore-words=.codespell-ignorewords
- repo: https://github.com/PyCQA/flake8
rev: 4.0.1
hooks:
- id: flake8
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
exclude: ^(arcana/_version\.py|versioneer\.py)$
args:
- -l 88
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
exclude: ^(xnat_checks/_version\.py|versioneer\.py)$
args:
- --ignore-words=.codespell-ignorewords
- repo: https://github.com/PyCQA/flake8
rev: 7.0.0
hooks:
- id: flake8
23 changes: 23 additions & 0 deletions scripts/get_pet_tst.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import tempfile
from pathlib import Path
from fileformats.medimage import DicomSeries
from medimages4tests.dummy.dicom.pet.wholebody.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_pet_image,
)


tmp_path = Path(tempfile.mkdtemp())

series = DicomSeries(
get_pet_image(
tmp_path,
first_name="first",
last_name="last",
StudyInstanceUID="StudyInstanceUID",
PatientID="PatientID",
AccessionNumber="AccessionNumber",
StudyID="xnat_project",
).iterdir()
)

print(series.metadata["StudyID"])
35 changes: 24 additions & 11 deletions xnat_ingest/cli/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import tempfile
from tqdm import tqdm
from fileformats.core import FileSet
from fileformats.medimage import DicomSeries
from xnat_ingest.cli.base import cli
from xnat_ingest.session import ImagingSession
from frametree.xnat import Xnat # type: ignore[import-untyped]
Expand All @@ -25,13 +26,13 @@


@cli.command(
help="""Stages DICOM and associated files found in the input directories into separate
directories for each session
help="""Stages images found in the input directories into separate directories for each
imaging acquisition session

DICOMS_PATH is either the path to a directory containing the DICOM files to upload, or
a glob pattern that selects the DICOM paths directly
FILES_PATH is either the path to a directory containing the files to upload, or
a glob pattern that selects the paths directly

STAGING_DIR is the directory that the files for each session are collated to before they
OUTPUT_DIR is the directory that the files for each session are collated to before they
are uploaded to XNAT
""",
)
Expand All @@ -42,9 +43,15 @@
type=str,
metavar="<mime-type>",
multiple=True,
default=["medimage/dicom-series"],
envvar="XINGEST_DATATYPE",
help="The datatype of the primary files to to upload",
default=None,
envvar="XINGEST_DATATYPES",
help=(
'The MIME-type(s) (or "MIME-like" see FileFormats docs) of potential datatype(s) '
"of the primary files to to upload, defaults to 'medimage/dicom-series'. "
"Any formats implemented in the FileFormats Python package "
"(https://github.com/ArcanaFramework/fileformats) that implement the 'read_metadata' "
'"extra" are supported, see FF docs on how to add support for new formats.'
),
)
@click.option(
"--project-field",
Expand Down Expand Up @@ -250,7 +257,7 @@
def stage(
files_path: str,
output_dir: Path,
datatype: str,
datatype: list[str] | None,
associated_files: ty.List[AssociatedFiles],
project_field: str,
subject_field: str,
Expand Down Expand Up @@ -279,6 +286,11 @@
logger_configs=loggers,
additional_loggers=additional_loggers,
)
datatypes: list[ty.Type[FileSet]]
if not datatype:
datatypes = [DicomSeries]
else:
datatypes = [FileSet.from_mime(dt) for dt in datatype] # type: ignore[misc]

Check warning on line 293 in xnat_ingest/cli/stage.py

View check run for this annotation

Codecov / codecov/patch

xnat_ingest/cli/stage.py#L293

Added line #L293 was not covered by tests

if xnat_login:
xnat_repo = Xnat(
Expand All @@ -292,10 +304,10 @@
else:
project_list = None

if session_field is None and datatype == "medimage/dicom-series":
if session_field is None and DicomSeries in datatypes:
session_field = "StudyInstanceUID"

msg = f"Loading {datatype} sessions from '{files_path}'"
msg = f"Loading {list(datatypes)} sessions from '{files_path}'"

for assoc_files in associated_files:
msg += f" with associated files selected from '{assoc_files.glob}'"
Expand All @@ -319,6 +331,7 @@
def do_stage() -> None:
sessions = ImagingSession.from_paths(
files_path=files_path,
datatypes=datatypes,
project_field=project_field,
subject_field=subject_field,
visit_field=visit_field,
Expand Down
4 changes: 3 additions & 1 deletion xnat_ingest/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ class ImagingResource:

@checksums.default
def calculate_checksums(self) -> dict[str, str]:
return self.fileset.hash_files(crypto=hashlib.md5)
return self.fileset.hash_files(
crypto=hashlib.md5, relative_to=self.fileset.parent
)

@property
def datatype(self) -> ty.Type[FileSet]:
Expand Down
33 changes: 21 additions & 12 deletions xnat_ingest/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@
multiple_sessions: ty.DefaultDict[str, ty.Set[ty.Tuple[str, str, str]]] = (
defaultdict(set)
)
missing_ids: dict[str, dict[str, str]] = defaultdict(dict)
for resource in tqdm(
resources,
"Sorting resources into XNAT tree structure...",
Expand All @@ -338,21 +339,28 @@
try:
value = resource.metadata[field_name]
except KeyError:
value = ""

Check warning on line 342 in xnat_ingest/session.py

View check run for this annotation

Codecov / codecov/patch

xnat_ingest/session.py#L342

Added line #L342 was not covered by tests
if not value:
if session_uid and field_type in ("project", "subject", "visit"):
value = (
"INVALID_MISSING_"
+ field_type.upper()
+ "_"
+ "".join(
random.choices(
string.ascii_letters + string.digits, k=8
try:
value = missing_ids[session_uid][field_type]
except KeyError:
value = missing_ids[session_uid][field_type] = (
"INVALID_MISSING_"
+ field_type.upper()
+ "_"
+ "".join(
random.choices(
string.ascii_letters + string.digits, k=8
)
)
)
else:
raise ImagingSessionParseError(

Check warning on line 359 in xnat_ingest/session.py

View check run for this annotation

Codecov / codecov/patch

xnat_ingest/session.py#L359

Added line #L359 was not covered by tests
f"Did not find '{field_name}' field in {resource!r}, "
"cannot uniquely identify the resource, found:\n"
+ "\n".join(resource.metadata)
)
raise ImagingSessionParseError(
f"Did not find '{field_name}' field in {resource}, "
"cannot uniquely identify the resource"
)
if index is not None:
value = value[index]
value_str = str(value)
Expand Down Expand Up @@ -399,7 +407,8 @@
raise ImagingSessionParseError(
"Multiple session UIDs found with the same project/subject/visit ID triplets: "
+ "\n".join(
f"{i} -> {p}:{s}:{v}" for i, (p, s, v) in multiple_sessions.items()
f"{i} -> " + str(["{p}:{s}:{v}" for p, s, v in sess])
for i, sess in multiple_sessions.items()
)
)
return list(sessions.values())
Expand Down
15 changes: 1 addition & 14 deletions xnat_ingest/tests/test_session.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
import pytest
import typing as ty
from fileformats.core import from_mime, FileSet
from fileformats.core import from_mime
from fileformats.medimage import (
DicomSeries,
Vnd_Siemens_Biograph128Vision_Vr20b_PetRawData,
Expand All @@ -10,22 +10,17 @@
)
from frametree.core.frameset import FrameSet # type: ignore[import-untyped]
from frametree.common import FileSystem # type: ignore[import-untyped]
from medimages4tests.dummy.dicom.base import default_dicom_dir # type: ignore[import-untyped]
from medimages4tests.dummy.dicom.pet.wholebody.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_pet_image,
__file__ as pet_src_file,
)
from medimages4tests.dummy.dicom.ct.ac.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_ac_image,
__file__ as ac_src_file,
)
from medimages4tests.dummy.dicom.pet.topogram.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_topogram_image,
__file__ as topogram_src_file,
)
from medimages4tests.dummy.dicom.pet.statistics.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_statistics_image,
__file__ as statistics_src_file,
)
from xnat_ingest.session import ImagingSession, ImagingScan
from xnat_ingest.store import DummyAxes
Expand Down Expand Up @@ -66,26 +61,18 @@ def imaging_session() -> ImagingSession:
DicomSeries(d.iterdir())
for d in (
get_pet_image(
out_dir=default_dicom_dir(pet_src_file).with_suffix(".with-spaces"),
first_name=FIRST_NAME,
last_name=LAST_NAME,
),
get_ac_image(
out_dir=default_dicom_dir(ac_src_file).with_suffix(".with-spaces"),
first_name=FIRST_NAME,
last_name=LAST_NAME,
),
get_topogram_image(
out_dir=default_dicom_dir(topogram_src_file).with_suffix(
".with-spaces"
),
first_name=FIRST_NAME,
last_name=LAST_NAME,
),
get_statistics_image(
out_dir=default_dicom_dir(statistics_src_file).with_suffix(
".with-spaces"
),
first_name=FIRST_NAME,
last_name=LAST_NAME,
),
Expand Down
3 changes: 3 additions & 0 deletions xnat_ingest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@
) -> None:
"""Set up logging for the application"""

if not logger_configs:
logger_configs = [LoggerConfig("stream", "info", "stdout")]

Check warning on line 130 in xnat_ingest/utils.py

View check run for this annotation

Codecov / codecov/patch

xnat_ingest/utils.py#L130

Added line #L130 was not covered by tests

loggers = [logger]
for log in additional_loggers:
loggers.append(logging.getLogger(log))
Expand Down
Loading