From e37d3b5157bc07fe67e96dd6f7d8ea449b9c3235 Mon Sep 17 00:00:00 2001 From: Stefan Appelhoff Date: Mon, 5 Aug 2024 09:38:57 +0200 Subject: [PATCH] Expose ignore_json, add ignore_nosub to file match/search funcs (#1281) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * _return_root_paths now only looks in subfolders of root starting with 'sub-' * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * changed length of l. 2444 * Update mne_bids/path.py Co-authored-by: Richard Höchenberger * make it a parameter and expose it * add missing entries to mailmap, and sort * add Kaare to authors * add changelog entry --------- Co-authored-by: kaare-mikkelsen Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: kaare-mikkelsen Co-authored-by: Richard Höchenberger --- .mailmap | 33 ++++++++++++++++++----------- CITATION.cff | 4 ++++ doc/authors.rst | 1 + doc/whats_new.rst | 3 ++- mne_bids/path.py | 53 ++++++++++++++++++++++++++++++++++++++++------- 5 files changed, 73 insertions(+), 21 deletions(-) diff --git a/.mailmap b/.mailmap index 528f023a5..8ce007771 100644 --- a/.mailmap +++ b/.mailmap @@ -8,26 +8,35 @@ Ariel Rokem Chris Holdgraf Chris Holdgraf Clemens Brunner +Dominik Welke +Dominik Welke dominikwelke <33089761+dominikwelke@users.noreply.github.com> +Eduard Ort +Eduard Ort +Evgenii Kalenkovich +Ezequiel Mikulan +Franziska von Albedyll +Franziska von Albedyll <59033598+fravona2211@users.noreply.github.com> +Fu-Te Wong +Julius Welzel <52565341+JuliusWelzel@users.noreply.github.com> +Kaare Mikkelsen +Kaare Mikkelsen Kambiz Tavabi +Laetitia Fesselier +Laetitia Fesselier Mainak Jas Mainak Jas Marijn van Vliet +Mathieu Scheltienne +Mathieu Scheltienne +Matt Sanderson Maximilien Chaumon Maximilien Chaumon -Romain Quentin +Pierre Guetschel <25532709+PierreGtch@users.noreply.github.com> Richard Höchenberger +Richard Koehler +Robert Luke <748691+rob-luke@users.noreply.github.com> +Romain Quentin Sophie Herbst Stefan Appelhoff Teon Brooks -Dominik Welke -Dominik Welke dominikwelke <33089761+dominikwelke@users.noreply.github.com> -Ezequiel Mikulan -Fu-Te Wong -Matt Sanderson -Robert Luke <748691+rob-luke@users.noreply.github.com> -Evgenii Kalenkovich -Eduard Ort -Eduard Ort Tom Donoghue -Franziska von Albedyll -Franziska von Albedyll <59033598+fravona2211@users.noreply.github.com> diff --git a/CITATION.cff b/CITATION.cff index 29e9e1aef..727e91000 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -179,6 +179,10 @@ authors: family-names: Welzel affiliation: 'Department of Neurology, Kiel University, Germany' orcid: 'https://orcid.org/0000-0003-4782-5360' + - given-names: Kaare + family-names: Mikkelsen + affiliation: 'Department of Electrical and Computer Engineering, Aarhus University, Denmark' + orcid: 'https://orcid.org/0000-0002-7360-8629' - given-names: Amaia family-names: Benitez affiliation: 'Magnetoencephalography Core, National Institutes of Health, Bethesda, Maryland, USA' diff --git a/doc/authors.rst b/doc/authors.rst index fe8ae8881..487d69cb5 100644 --- a/doc/authors.rst +++ b/doc/authors.rst @@ -46,4 +46,5 @@ .. _Pierre Guetschel: https://github.com/PierreGtch .. _Mara Wolter: https://github.com/marakw .. _Julius Welzel: https://github.com/JuliusWelzel +.. _Kaare Mikkelsen: https://github.com/kaare-mikkelsen .. _Amaia Benitez: https://github.com/AmaiaBA diff --git a/doc/whats_new.rst b/doc/whats_new.rst index d8f0798dc..4b6d2c846 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -17,6 +17,7 @@ Version 0.16 (unreleased) The following authors contributed for the first time. Thank you so much! 🤩 +* `Kaare Mikkelsen`_ * `Amaia Benitez`_ The following authors had contributed before. Thank you for sticking around! 🤘 @@ -31,7 +32,7 @@ Detailed list of changes 🚀 Enhancements ^^^^^^^^^^^^^^^ -- nothing yet +- :meth:`mne_bids.BIDSPath.match()` and :func:`mne_bids.find_matching_paths` now have additional parameters ``ignore_json`` and ``ignore_nosub``, to give users more control over which type of files are matched, by `Kaare Mikkelsen`_ (:gh:`1281`) 🧐 API and behavior changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/mne_bids/path.py b/mne_bids/path.py index dd99b6b9c..17fa557e9 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -695,7 +695,9 @@ def rm(self, *, safe_remove=True, verbose=None): raise RuntimeError("The root must not be None to remove files.") # Planning: - paths_matched = self.match(ignore_json=False, check=self.check) + paths_matched = self.match( + ignore_json=False, ignore_nosub=False, check=self.check + ) subjects = set() paths_to_delete = list() paths_to_update = {} @@ -1005,7 +1007,7 @@ def update(self, *, check=None, **kwargs): raise e return self - def match(self, ignore_json=True, check=False): + def match(self, *, ignore_json=True, ignore_nosub=False, check=False): """Get a list of all matching paths in the root directory. Performs a recursive search, starting in ``.root`` (if set), based on @@ -1015,6 +1017,9 @@ def match(self, ignore_json=True, check=False): ---------- ignore_json : bool If ``True``, ignores json files. Defaults to ``True``. + ignore_nosub : bool + If ``True``, ignores all files that are not of the form ``root/sub-*``. + Defaults to ``False``. check : bool If ``True``, only returns paths that conform to BIDS. If ``False`` (default), the ``.check`` attribute of the returned @@ -1035,7 +1040,10 @@ def match(self, ignore_json=True, check=False): ) paths = _return_root_paths( - self.root, datatype=self.datatype, ignore_json=ignore_json + self.root, + datatype=self.datatype, + ignore_json=ignore_json, + ignore_nosub=ignore_nosub, ) fnames = _filter_fnames( @@ -2325,6 +2333,9 @@ def find_matching_paths( extensions=None, datatypes=None, check=False, + *, + ignore_json=False, + ignore_nosub=False, ): """Get list of all matching paths for all matching entity values. @@ -2384,6 +2395,11 @@ def find_matching_paths( (default), the ``.check`` attribute of the returned :class:`mne_bids.BIDSPath` object will be set to ``True`` for paths that do conform to BIDS, and to ``False`` for those that don't. + ignore_json : bool + If ``True``, ignores json files. Defaults to ``False``. + ignore_nosub : bool + If ``True``, ignores all files that are not of the form ``root/sub-*``. + Defaults to ``False``. Returns ------- @@ -2391,7 +2407,9 @@ def find_matching_paths( The matching paths. """ - fpaths = _return_root_paths(root, datatype=datatypes, ignore_json=False) + fpaths = _return_root_paths( + root, datatype=datatypes, ignore_json=ignore_json, ignore_nosub=ignore_nosub + ) fpaths_filtered = _filter_fnames( fpaths, @@ -2413,16 +2431,29 @@ def find_matching_paths( return bids_paths -def _return_root_paths(root, datatype=None, ignore_json=True): - """Return all paths in root. +def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False): + """Return all file paths in root. Can be filtered by datatype (which is present in the path but not in the BIDSPath basename). Can also be list of datatypes. + + Parameters + ---------- root : pathlib.Path | str The root of the BIDS path. datatype : str | array-like of str | None The BIDS data type, e.g., ``'anat'``, ``'func'``, ``'eeg'``, ``'meg'``, ``'ieeg'``. + ignore_json : bool + If ``True`` (default), do not return files ending with ``.json``. + ignore_nosub : bool + If ``True``, return only files of the form ``root/sub-*``. Defaults to + ``False``. + + Returns + ------- + paths : list of pathlib.Path + All paths in `root`, filtered according to the function parameters. """ root = Path(root) # if root is str @@ -2433,13 +2464,19 @@ def _return_root_paths(root, datatype=None, ignore_json=True): search_str = "*.*" paths = root.rglob(search_str) - # Only keep files (not directories), and omit the JSON sidecars - # if ignore_json is True. + # Only keep files (not directories), ... + # and omit the JSON sidecars if `ignore_json` is True. if ignore_json: paths = [p for p in paths if p.is_file() and p.suffix != ".json"] else: paths = [p for p in paths if p.is_file()] + # only keep files which are of the form root/sub-*, + # such that we only look in 'sub'-folders: + if ignore_nosub: + root_sub = str(root / "sub-") + paths = [p for p in paths if str(p).startswith(root_sub)] + return paths