Skip to content

Commit

Permalink
escape scan types so they are valid Windows/POSIX file/directory names
Browse files Browse the repository at this point in the history
  • Loading branch information
tclose committed Mar 14, 2024
1 parent 6437665 commit da34b2f
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 26 deletions.
30 changes: 19 additions & 11 deletions xnat_ingest/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from arcana.core.data.tree import DataTree
from arcana.core.exceptions import ArcanaDataMatchError
from .exceptions import DicomParseError, StagingError
from .utils import add_exc_note, transform_paths
from .utils import add_exc_note, transform_paths, DicomField, AssociatedFiles
from .dicom import dcmedit_path
import random
import string
Expand Down Expand Up @@ -91,7 +91,9 @@ def make_session_id(cls, project_id, subject_id, visit_id):
def modalities(self) -> ty.Set[str]:
modalities = self.metadata["Modality"]
if not isinstance(modalities, str):
modalities = set(tuple(m) if not isinstance(m, str) else m for m in modalities)
modalities = set(
tuple(m) if not isinstance(m, str) else m for m in modalities
)
return modalities

@property
Expand Down Expand Up @@ -200,9 +202,9 @@ def metadata(self):
def from_dicoms(
cls,
dicoms_path: str | Path,
project_field: str = "StudyID",
subject_field: str = "PatientID",
visit_field: str = "AccessionNumber",
project_field: DicomField = DicomField("StudyID"),
subject_field: DicomField = DicomField("PatientID"),
visit_field: DicomField = DicomField("AccessionNumber"),
project_id: str | None = None,
) -> ty.List["ImagingSession"]:
"""Loads all imaging sessions from a list of DICOM files
Expand Down Expand Up @@ -478,7 +480,7 @@ def save(self, save_dir: Path, just_manifest: bool = False) -> "ImagingSession":
def stage(
self,
dest_dir: Path,
associated_files: ty.Optional[ty.Tuple[str, str]] = None,
associated_files: ty.Optional[AssociatedFiles] = None,
remove_original: bool = False,
deidentify: bool = True,
) -> "ImagingSession":
Expand Down Expand Up @@ -528,7 +530,11 @@ def stage(
):
staged_resources = {}
for resource_name, fileset in scan.resources.items():
scan_dir = session_dir / f"{scan.id}-{scan.type}" / resource_name
# Ensure scan type is a valid directory name
scan_type = re.sub(
r"[\"\*\/\:\<\>\?\\\|\+\,\.\;\=\[\]]+", "", scan.type
)
scan_dir = session_dir / f"{scan.id}-{scan_type}" / resource_name
scan_dir.mkdir(parents=True, exist_ok=True)
if isinstance(fileset, DicomSeries):
staged_dicom_paths = []
Expand Down Expand Up @@ -559,7 +565,7 @@ def stage(
# substitute string templates int the glob template with values from the
# DICOM metadata to construct a glob pattern to select files associated
# with current session
associated_fspaths = set()
associated_fspaths: ty.Set[Path] = set()
for dicom_dir in self.dicom_dirs:
assoc_glob = dicom_dir / associated_files.glob.format(**self.metadata)
# Select files using the constructed glob pattern
Expand All @@ -579,7 +585,7 @@ def stage(
if deidentify:
# Transform the names of the paths to remove any identiable information
transformed_fspaths = transform_paths(
associated_fspaths,
list(associated_fspaths),
associated_files.glob,
self.metadata,
staged_metadata,
Expand All @@ -603,7 +609,7 @@ def stage(
shutil.copyfile(old, dest_path)
staged_associated_fspaths.append(dest_path)
else:
staged_associated_fspaths = associated_fspaths
staged_associated_fspaths = list(associated_fspaths)

# Identify scan id, type and resource names from deidentified file paths
assoc_scans = {}
Expand All @@ -624,7 +630,9 @@ def stage(
except IndexError:
scan_type = scan_id
if scan_id not in assoc_scans:
assoc_resources = defaultdict(list)
assoc_resources: ty.DefaultDict[str, ty.List[Path]] = defaultdict(
list
)
assoc_scans[scan_id] = (scan_type, assoc_resources)
else:
prev_scan_type, assoc_resources = assoc_scans[scan_id]
Expand Down
34 changes: 19 additions & 15 deletions xnat_ingest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def split_envvar_value(cls, envvar):
@attrs.define
class LogEmail(CliType):

address: str = None
loglevel: str = None
subject: str = None
address: str
loglevel: str
subject: str

def __str__(self):
return self.address
Expand All @@ -64,7 +64,7 @@ def path_or_none_converter(path: str | Path | None):
class LogFile(MultiCliType):

path: Path = attrs.field(converter=path_or_none_converter, default=None)
loglevel: str = None
loglevel: ty.Optional[str] = None

def __str__(self):
return str(self.path)
Expand All @@ -76,27 +76,30 @@ def __fspath__(self):
@attrs.define
class MailServer(CliType):

host: str = None
sender_email: str = None
user: str = None
password: str = None
host: str
sender_email: str
user: str
password: str


@attrs.define
class AssociatedFiles(CliType):

glob: str = None
identity_pattern: str = None
glob: str
identity_pattern: str


def set_logger_handling(
log_level: str, log_emails: ty.List[LogEmail] | None, log_file: LogFile | None, mail_server: MailServer
log_level: str,
log_emails: ty.List[LogEmail] | None,
log_file: LogFile | None,
mail_server: MailServer,
):

levels = [log_level]
if log_emails:
levels.extend(le.loglevel for le in log_emails)
if log_file:
if log_file and log_file.loglevel:
levels.append(log_file.loglevel)

min_log_level = min(getattr(logging, ll.upper()) for ll in levels)
Expand All @@ -108,7 +111,7 @@ def set_logger_handling(
raise ValueError(
"Mail server needs to be provided, either by `--mail-server` option or "
"XNAT_INGEST_MAILSERVER environment variable if logger emails "
"are provided: " + ", ".join(log_emails)
"are provided: " + ", ".join(str(le) for le in log_emails)
)
for log_email in log_emails:
smtp_hdle = logging.handlers.SMTPHandler(
Expand All @@ -126,7 +129,8 @@ def set_logger_handling(
if log_file:
log_file.path.parent.mkdir(exist_ok=True)
log_file_hdle = logging.FileHandler(log_file)
log_file_hdle.setLevel(getattr(logging, log_file.loglevel.upper()))
if log_file.loglevel:
log_file_hdle.setLevel(getattr(logging, log_file.loglevel.upper()))
log_file_hdle.setFormatter(
logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
)
Expand Down Expand Up @@ -280,7 +284,7 @@ def transform_paths(
while templ_attr_re.findall(expr):
expr = templ_attr_re.sub(r"{\1.\2}", expr)

group_count = Counter()
group_count: Counter[str] = Counter()

# Create regex groups for string template args
def str_templ_to_regex_group(match) -> str:
Expand Down

0 comments on commit da34b2f

Please sign in to comment.