Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Crawl dataset's metadata only once and before Nipype's workflow #1317

Merged
merged 9 commits into from
Aug 16, 2024
Prev Previous commit
Next Next commit
fix: mechanism to store/retrieve inputs
oesteban committed Aug 16, 2024
commit 446b6b9a08a75f2df997249914af0757955dbf1d
5 changes: 4 additions & 1 deletion mriqc/cli/run.py
Original file line number Diff line number Diff line change
@@ -266,7 +266,10 @@ def main(argv=None):
)
),
)
config.to_filename(config.execution.log_dir / f'config-{config.execution.run_uuid}.toml')
config.to_filename(
config.execution.log_dir / f'config-{config.execution.run_uuid}.toml',
store_inputs=False, # Inputs are not necessary anymore
)
sys.exit(exitcode)


46 changes: 43 additions & 3 deletions mriqc/config.py
Original file line number Diff line number Diff line change
@@ -91,6 +91,7 @@
from __future__ import annotations

import os
import pickle
import sys
from contextlib import suppress
from pathlib import Path
@@ -576,8 +577,8 @@ class workflow(_Config):

analysis_level: list[str] = ['participant']
"""Level of analysis."""
biggest_file_gb: int = 1
"""Size of largest file in GB."""
biggest_file_gb: dict[int] = 1
"""Dictionary holding the size of largest file in GB (per modality)."""
deoblique: bool = False
"""Deoblique the functional scans during head motion correction preprocessing."""
despike: bool = False
@@ -590,6 +591,12 @@ class workflow(_Config):
"""Turn on FFT based spike detector (slow)."""
inputs: list[str | os.PathLike] | None = None
"""List of files to be processed with MRIQC."""
inputs_entities: dict[list[dict]]
"""List of entities corresponding to inputs."""
inputs_metadata: dict[list[dict | list[dict]]] | None = None
"""List of metadata corresponding to inputs."""
inputs_path: Path | None = None
"""Path to a pickle file with the input paths and metadata."""
min_len_dwi: int = 7
"""
Minimum DWI length to be considered a "processable" dataset
@@ -602,6 +609,21 @@ class workflow(_Config):
template_id: str = 'MNI152NLin2009cAsym'
"""TemplateFlow ID of template used for the anatomical processing."""

_hidden: tuple[str, ...] = ('inputs', 'inputs_entities', 'inputs_metadata')

@classmethod
def init(cls) -> None:
if cls.inputs_path is None:
cls.inputs_path = execution.work_dir / f'inputs-{execution.run_uuid}.pkl'

if cls.inputs_path.exists():
with open(cls.inputs_path, 'rb') as handle:
_inputs = pickle.load(handle)

cls.inputs = _inputs['paths']
cls.inputs_metadata = _inputs['metadata']
cls.inputs_entities = _inputs['entities']


class loggers:
"""Keep loggers easily accessible (see :py:func:`init`)."""
@@ -727,7 +749,10 @@ def dumps() -> str:
return dumps(get())


def to_filename(filename: str | os.PathLike | None = None) -> Path:
def to_filename(
filename: str | os.PathLike | None = None,
store_inputs: bool = True,
) -> Path:
"""Write settings to file."""

if filename:
@@ -738,6 +763,21 @@ def to_filename(filename: str | os.PathLike | None = None) -> Path:
settings.file_path.parent.mkdir(exist_ok=True, parents=True)
settings.file_path.write_text(dumps())
loggers.cli.debug(f'Saved MRIQC config file: {settings.file_path}.')

if store_inputs:
if workflow.inputs_path is None:
workflow.inputs_path = execution.work_dir / f'inputs-{execution.run_uuid}.pkl'

# Pickle inputs
with open(workflow.inputs_path, 'wb') as handle:
inputs_dict = {
'paths': workflow.inputs,
'metadata': workflow.inputs_metadata,
'entities': workflow.inputs_entities,
}
pickle.dump(inputs_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

loggers.cli.debug(f'Saved MRIQC inputs file: {workflow.inputs_path}.')
return settings.file_path


8 changes: 4 additions & 4 deletions mriqc/utils/misc.py
Original file line number Diff line number Diff line change
@@ -447,8 +447,8 @@ def initialize_meta_and_data(
_datalad_get(dataset)

# Extract metadata and filesize
config.workflow.input_metadata = {}
config.workflow.input_entities = {}
config.workflow.inputs_metadata = {}
config.workflow.inputs_entities = {}
config.workflow.biggest_file_gb = {}
for mod, input_list in config.workflow.inputs.items():
config.loggers.cli.log(
@@ -495,8 +495,8 @@ def initialize_meta_and_data(

# Finalizing (write to config so that values are propagated)
_max_size = np.max(size)
config.workflow.input_metadata[mod] = metadata
config.workflow.input_entities[mod] = entities
config.workflow.inputs_metadata[mod] = metadata
config.workflow.inputs_entities[mod] = entities
config.workflow.biggest_file_gb[mod] = float(_max_size) # Cast required to store YAML

config.loggers.cli.log(