From 550ee7b8e0e54eb1b3bd4ae7daf6adac02ad184b Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Thu, 14 Nov 2024 15:06:04 -0500 Subject: [PATCH 1/2] BUG: Fix bug with anonymization --- mne/_fiff/meas_info.py | 8 +- mne/_fiff/proc_history.py | 54 +++++--- mne/_fiff/tests/test_meas_info.py | 199 +++++++++++++++++++++-------- mne/_fiff/write.py | 4 +- mne/io/fiff/tests/test_raw_fiff.py | 15 +++ mne/io/tests/test_raw.py | 4 +- mne/utils/_testing.py | 4 +- mne/utils/numerics.py | 5 +- 8 files changed, 206 insertions(+), 87 deletions(-) diff --git a/mne/_fiff/meas_info.py b/mne/_fiff/meas_info.py index c881822e44a..9edb8a96d3b 100644 --- a/mne/_fiff/meas_info.py +++ b/mne/_fiff/meas_info.py @@ -1059,7 +1059,9 @@ class HeliumInfo(ValidatedDict): _check_types, name='helium_info["orig_file_guid"]', types=str ), "meas_date": partial( - _check_types, name='helium_info["meas_date"]', types=datetime.datetime + _check_types, + name='helium_info["meas_date"]', + types=(datetime.datetime, None), ), } @@ -3525,9 +3527,7 @@ def anonymize_info(info, daysback=None, keep_his=False, verbose=None): if none_meas_date and hi.get("meas_date") is not None: hi["meas_date"] = _ensure_meas_date_none_or_dt(DATE_NONE) elif hi.get("meas_date") is not None: - hi["meas_date"] = _ensure_meas_date_none_or_dt( - _add_timedelta_to_stamp(hi["meas_date"], -delta_t) - ) + hi["meas_date"] = hi["meas_date"] - delta_t di = info.get("device_info") if di is not None: diff --git a/mne/_fiff/proc_history.py b/mne/_fiff/proc_history.py index 86a094cfa41..caa2d3de554 100644 --- a/mne/_fiff/proc_history.py +++ b/mne/_fiff/proc_history.py @@ -24,24 +24,38 @@ write_string, ) -_proc_keys = [ - "parent_file_id", - "block_id", - "parent_block_id", - "date", - "experimenter", - "creator", -] -_proc_ids = [ - FIFF.FIFF_PARENT_FILE_ID, - FIFF.FIFF_BLOCK_ID, - FIFF.FIFF_PARENT_BLOCK_ID, - FIFF.FIFF_MEAS_DATE, - FIFF.FIFF_EXPERIMENTER, - FIFF.FIFF_CREATOR, -] -_proc_writers = [write_id, write_id, write_id, write_int, write_string, write_string] -_proc_casters = [dict, dict, dict, np.array, str, str] +_proc_map = dict( # ID, caster, writer + parent_file_id=( + FIFF.FIFF_PARENT_FILE_ID, + dict, + write_id, + ), + block_id=( + FIFF.FIFF_BLOCK_ID, + dict, + write_id, + ), + parent_block_id=( + FIFF.FIFF_PARENT_BLOCK_ID, + dict, + write_id, + ), + date=( + FIFF.FIFF_MEAS_DATE, + lambda d: tuple(int(dd) for dd in d), + write_int, + ), + experimenter=( + FIFF.FIFF_EXPERIMENTER, + str, + write_string, + ), + creator=( + FIFF.FIFF_CREATOR, + str, + write_string, + ), +) def _read_proc_history(fid, tree): @@ -98,7 +112,7 @@ def _read_proc_history(fid, tree): for i_ent in range(proc_record["nent"]): kind = proc_record["directory"][i_ent].kind pos = proc_record["directory"][i_ent].pos - for key, id_, cast in zip(_proc_keys, _proc_ids, _proc_casters): + for key, (id_, cast, _) in _proc_map.items(): if kind == id_: tag = read_tag(fid, pos) record[key] = cast(tag.data) @@ -122,7 +136,7 @@ def _write_proc_history(fid, info): start_block(fid, FIFF.FIFFB_PROCESSING_HISTORY) for record in info["proc_history"]: start_block(fid, FIFF.FIFFB_PROCESSING_RECORD) - for key, id_, writer in zip(_proc_keys, _proc_ids, _proc_writers): + for key, (id_, _, writer) in _proc_map.items(): if key in record: writer(fid, id_, record[key]) _write_maxfilter_record(fid, record["max_info"]) diff --git a/mne/_fiff/tests/test_meas_info.py b/mne/_fiff/tests/test_meas_info.py index 6624ee0dd94..3e3c150573f 100644 --- a/mne/_fiff/tests/test_meas_info.py +++ b/mne/_fiff/tests/test_meas_info.py @@ -69,7 +69,13 @@ write_inverse_operator, ) from mne.transforms import Transform -from mne.utils import _empty_hash, _record_warnings, assert_object_equal, catch_logging +from mne.utils import ( + _empty_hash, + _record_warnings, + assert_object_equal, + catch_logging, + object_diff, +) root_dir = Path(__file__).parents[2] fiducials_fname = root_dir / "data" / "fsaverage" / "fsaverage-fiducials.fif" @@ -582,6 +588,8 @@ def test_check_consistency(): def _test_anonymize_info(base_info): """Test that sensitive information can be anonymized.""" pytest.raises(TypeError, anonymize_info, "foo") + assert isinstance(base_info, Info) + base_info = base_info.copy() default_anon_dos = datetime(2000, 1, 1, 0, 0, 0, tzinfo=timezone.utc) default_str = "mne_anonymize" @@ -589,22 +597,20 @@ def _test_anonymize_info(base_info): default_desc = "Anonymized using a time shift" + " to preserve age at acquisition" # Test no error for incomplete info - info = base_info.copy() - info.pop("file_id") - anonymize_info(info) + bad_info = base_info.copy() + bad_info.pop("file_id") + anonymize_info(bad_info) + del bad_info - # Fake some subject data + # Fake some additional data + _complete_info(base_info) meas_date = datetime(2010, 1, 1, 0, 0, 0, tzinfo=timezone.utc) with base_info._unlock(): base_info["meas_date"] = meas_date - base_info["subject_info"] = dict( - id=1, - his_id="foobar", - last_name="bar", - first_name="bar", + base_info["subject_info"].update( birthday=date(1987, 4, 8), + his_id="foobar", sex=0, - hand=1, ) # generate expected info... @@ -613,15 +619,29 @@ def _test_anonymize_info(base_info): exp_info = base_info.copy() exp_info._unlocked = True exp_info["description"] = default_desc - exp_info["experimenter"] = default_str - exp_info["proj_name"] = default_str + erase_strs = ( + ("experimenter",), + ("proj_name",), + ("subject_info", "first_name"), + ("subject_info", "middle_name"), + ("subject_info", "last_name"), + ("device_info", "site"), + ("device_info", "serial"), + ("helium_info", "orig_file_guid"), + ("proc_history", 0, "experimenter"), + ) + for tp in erase_strs: + this = exp_info + for lev in tp[:-1]: + this = this[lev] + this[tp[-1]] = default_str exp_info["proj_id"] = np.array([0]) - exp_info["subject_info"]["first_name"] = default_str - exp_info["subject_info"]["last_name"] = default_str - exp_info["subject_info"]["id"] = default_subject_id + for key in ("sex", "id", "height", "weight"): + exp_info["subject_info"][key] = 0 exp_info["subject_info"]["his_id"] = str(default_subject_id) - exp_info["subject_info"]["sex"] = 0 del exp_info["subject_info"]["hand"] # there's no "unknown" setting + exp_info["utc_offset"] = None + exp_info["proc_history"][0]["block_id"]["machid"][:] = 0 # this bday is 3653 days different. the change in day is due to a # different number of leap days between 1987 and 1977 than between @@ -635,14 +655,25 @@ def _test_anonymize_info(base_info): # adjust each expected outcome delta_t = timedelta(days=3653) - for key in ("file_id", "meas_id"): - value = exp_info.get(key) - if value is not None: - assert "msecs" not in value - tmp = _add_timedelta_to_stamp((value["secs"], value["usecs"]), -delta_t) - value["secs"] = tmp[0] - value["usecs"] = tmp[1] - value["machid"][:] = 0 + + def _adjust_back(e_i, dt): + for key in ("file_id", "meas_id"): + value = e_i.get(key) + if value is not None: + assert "msecs" not in value + tmp = _add_timedelta_to_stamp((value["secs"], value["usecs"]), -dt) + value["secs"] = tmp[0] + value["usecs"] = tmp[1] + value["machid"][:] = 0 + e_i["helium_info"]["meas_date"] -= dt + ds = int(round(dt.total_seconds())) + e_i["proc_history"][0]["date"] = ( + e_i["proc_history"][0]["date"][0] - ds, + e_i["proc_history"][0]["date"][1], + ) + e_i["proc_history"][0]["block_id"]["secs"] -= ds + + _adjust_back(exp_info, delta_t) # exp 2 tests the keep_his option exp_info_2 = exp_info.copy() @@ -656,26 +687,19 @@ def _test_anonymize_info(base_info): with exp_info_3._unlock(): exp_info_3["subject_info"]["birthday"] = date(1987, 2, 24) exp_info_3["meas_date"] = meas_date - delta_t_2 - for key in ("file_id", "meas_id"): - value = exp_info_3.get(key) - if value is not None: - assert "msecs" not in value - tmp = _add_timedelta_to_stamp((value["secs"], value["usecs"]), -delta_t_2) - value["secs"] = tmp[0] - value["usecs"] = tmp[1] - value["machid"][:] = 0 + _adjust_back(exp_info_3, delta_t_2) # exp 4 tests is a supplied daysback delta_t_3 = timedelta(days=223 + 364 * 500) new_info = anonymize_info(base_info.copy()) - assert_object_equal(new_info, exp_info) + assert_object_equal(new_info, exp_info, err_msg="anon mismatch") new_info = anonymize_info(base_info.copy(), keep_his=True) - assert_object_equal(new_info, exp_info_2) + assert_object_equal(new_info, exp_info_2, err_msg="anon keep_his mismatch") new_info = anonymize_info(base_info.copy(), daysback=delta_t_2.days) - assert_object_equal(new_info, exp_info_3) + assert_object_equal(new_info, exp_info_3, err_msg="anon daysback mismatch") with pytest.raises(RuntimeError, match="anonymize_info generated"): anonymize_info(base_info.copy(), daysback=delta_t_3.days) @@ -684,25 +708,33 @@ def _test_anonymize_info(base_info): # test with meas_date = None with base_info._unlock(): base_info["meas_date"] = None - exp_info_3._unlocked = True - exp_info_3["meas_date"] = None - exp_info_3["file_id"]["secs"] = DATE_NONE[0] - exp_info_3["file_id"]["usecs"] = DATE_NONE[1] - exp_info_3["meas_id"]["secs"] = DATE_NONE[0] - exp_info_3["meas_id"]["usecs"] = DATE_NONE[1] - exp_info_3["subject_info"].pop("birthday", None) - exp_info_3._unlocked = False + with exp_info_3._unlock(): + exp_info_3["meas_date"] = None + exp_info_3["helium_info"]["meas_date"] = None + for var in ( + exp_info_3["file_id"], + exp_info_3["meas_id"], + exp_info_3["proc_history"][0]["block_id"], + ): + var["secs"] = DATE_NONE[0] + var["usecs"] = DATE_NONE[1] + exp_info_3["subject_info"].pop("birthday", None) + exp_info_3["proc_history"][0]["date"] = DATE_NONE if base_info["meas_date"] is None: with pytest.warns(RuntimeWarning, match="all information"): new_info = anonymize_info(base_info.copy(), daysback=delta_t_2.days) else: new_info = anonymize_info(base_info.copy(), daysback=delta_t_2.days) - assert_object_equal(new_info, exp_info_3) + assert_object_equal( + new_info, + exp_info_3, + err_msg="meas_date=None daysback mismatch", + ) with _record_warnings(): # meas_date is None new_info = anonymize_info(base_info.copy()) - assert_object_equal(new_info, exp_info_3) + assert_object_equal(new_info, exp_info_3, err_msg="meas_date=None mismatch") @pytest.mark.parametrize( @@ -728,12 +760,65 @@ def test_meas_date_convert(stamp, dt): assert str(dt[0]) in repr(info) +def _complete_info(info): + """Complete the meas info fields.""" + for key in ("file_id", "meas_id"): + assert info[key] is not None + info["subject_info"] = dict( + id=1, + sex=1, + hand=1, + first_name="a", + middle_name="b", + last_name="c", + his_id="d", + birthday=date(2000, 1, 1), + weight=1.0, + height=2.0, + ) + info["helium_info"] = dict( + he_level_raw=12.34, + helium_level=45.67, + meas_date=datetime(2024, 11, 14, 14, 8, 2, tzinfo=timezone.utc), + orig_file_guid="e", + ) + info["experimenter"] = "f" + info["description"] = "g" + with info._unlock(): + info["proj_id"] = np.ones(1, int) + info["proj_name"] = "h" + info["utc_offset"] = "i" + d = (1717707794, 2) + info["proc_history"] = [ + dict( + block_id=dict( + version=4, + machid=np.ones(2, int), + secs=d[0], + usecs=d[1], + date=d, + ), + experimenter="j", + max_info=dict( + max_st=[], + sss_ctc=[], + sss_cal=[], + sss_info=dict(head_pos=None, in_order=8), + ), + date=d, + ), + ] + info["device_info"] = dict(serial="k", site="l") + info._check_consistency() + + def test_anonymize(tmp_path): """Test that sensitive information can be anonymized.""" pytest.raises(TypeError, anonymize_info, "foo") # Fake some subject data raw = read_raw_fif(raw_fname) + _complete_info(raw.info) raw.set_annotations( Annotations(onset=[0, 1], duration=[1, 1], description="dummy", orig_time=None) ) @@ -745,8 +830,8 @@ def test_anonymize(tmp_path): # test mne.anonymize_info() events = read_events(event_name) epochs = Epochs(raw, events[:1], 2, 0.0, 0.1, baseline=None) - _test_anonymize_info(raw.info.copy()) - _test_anonymize_info(epochs.info.copy()) + _test_anonymize_info(raw.info) + _test_anonymize_info(epochs.info) # test instance methods & I/O roundtrip for inst, keep_his in zip((raw, epochs), (True, False)): @@ -792,17 +877,19 @@ def test_anonymize(tmp_path): assert_allclose(raw.annotations.onset, expected_onset) -def test_anonymize_with_io(tmp_path): - """Test that IO does not break anonymization.""" - raw = read_raw_fif(raw_fname) - +@pytest.mark.parametrize("daysback", [None, 28826]) +def test_anonymize_with_io(tmp_path, daysback): + """Test that IO does not break anonymization and all fields.""" + raw = read_raw_fif(raw_fname).crop(0, 1) + _complete_info(raw.info) temp_path = tmp_path / "tmp_raw.fif" raw.save(temp_path) - - raw2 = read_raw_fif(temp_path) - - daysback = (raw2.info["meas_date"].date() - date(1924, 1, 1)).days + raw2 = read_raw_fif(temp_path).load_data() raw2.anonymize(daysback=daysback) + raw2.save(temp_path, overwrite=True) + raw3 = read_raw_fif(temp_path) + d = object_diff(raw2.info, raw3.info) + assert d == "['file_id']['machid'] array mismatch\n" @testing.requires_testing_data diff --git a/mne/_fiff/write.py b/mne/_fiff/write.py index ec8dddfd41a..427d4f12e54 100644 --- a/mne/_fiff/write.py +++ b/mne/_fiff/write.py @@ -72,7 +72,9 @@ def write_int(fid, kind, data): data_size = 4 data = np.asarray(data) if data.dtype.kind not in "uib" and data.size > 0: - raise TypeError(f"Cannot safely write data with dtype {data.dtype} as int") + raise TypeError( + f"Cannot safely write data kind {kind} with dtype {data.dtype} as int", + ) max_val = data.max() if data.size > 0 else 0 if max_val > INT32_MAX: raise TypeError( diff --git a/mne/io/fiff/tests/test_raw_fiff.py b/mne/io/fiff/tests/test_raw_fiff.py index 2978a09287c..fec1731deff 100644 --- a/mne/io/fiff/tests/test_raw_fiff.py +++ b/mne/io/fiff/tests/test_raw_fiff.py @@ -51,6 +51,7 @@ fif_fname = data_dir / "sample_audvis_trunc_raw.fif" ms_fname = testing_path / "SSS" / "test_move_anon_raw.fif" skip_fname = testing_path / "misc" / "intervalrecording_raw.fif" +tri_fname = testing_path / "SSS" / "TRIUX" / "triux_bmlhus_erm_raw.fif" base_dir = Path(__file__).parents[2] / "tests" / "data" test_fif_fname = base_dir / "test_raw.fif" @@ -2189,3 +2190,17 @@ def test_init_kwargs(cast): raw2 = read_raw_fif(**raw._init_kwargs) for r in (raw, raw2): assert isinstance(r._init_kwargs["fname"], pathlib.Path) + + +@pytest.mark.slowtest +@pytest.mark.parametrize("fname", [ms_fname, tri_fname]) +def test_fif_files(fname): + """Test reading of various FIF files.""" + _test_raw_reader( + read_raw_fif, + fname=fname, + allow_maxshield="yes", + verbose="error", + test_kwargs=False, + test_preloading=False, + ) diff --git a/mne/io/tests/test_raw.py b/mne/io/tests/test_raw.py index fa2224136d7..c2e6b6b151f 100644 --- a/mne/io/tests/test_raw.py +++ b/mne/io/tests/test_raw.py @@ -315,7 +315,7 @@ def _test_raw_reader( ids = [id(ch["loc"]) for ch in raw.info["chs"]] assert len(set(ids)) == len(ids) - full_data = raw._data + full_data = raw.get_data() assert raw.__class__.__name__ in repr(raw) # to test repr assert raw.info.__class__.__name__ in repr(raw.info) assert isinstance(raw.info["dig"], type(None) | list) @@ -355,7 +355,7 @@ def _test_raw_reader( with pytest.raises(OSError, match="raw must end with .fif or .fif.gz"): raw.save(out_fname_h5) - raw3 = read_raw_fif(out_fname) + raw3 = read_raw_fif(out_fname, allow_maxshield="yes") assert_named_constants(raw3.info) assert set(raw.info.keys()) == set(raw3.info.keys()) assert_allclose( diff --git a/mne/utils/_testing.py b/mne/utils/_testing.py index cdc65d7ce5c..323b530a641 100644 --- a/mne/utils/_testing.py +++ b/mne/utils/_testing.py @@ -179,10 +179,10 @@ def assert_and_remove_boundary_annot(annotations, n=1): annotations.delete(idx) -def assert_object_equal(a, b): +def assert_object_equal(a, b, *, err_msg="Object mismatch"): """Assert two objects are equal.""" d = object_diff(a, b) - assert d == "", d + assert d == "", f"{err_msg}\n{d}" def _raw_annot(meas_date, orig_time): diff --git a/mne/utils/numerics.py b/mne/utils/numerics.py index 07ba9ff7f56..c287fb42305 100644 --- a/mne/utils/numerics.py +++ b/mne/utils/numerics.py @@ -809,8 +809,9 @@ def object_diff(a, b, pre="", *, allclose=False): if a.getvalue() != b.getvalue(): out += pre + " StringIO mismatch\n" elif isinstance(a, datetime | date): - if (a - b).total_seconds() != 0: - out += pre + f" {a.__class__.__name__} mismatch\n" + ts = (a - b).total_seconds() + if ts != 0: + out += pre + f" {a.__class__.__name__} mismatch ({a} vs {b} by {ts} sec)\n" elif sparse.issparse(a): # sparsity and sparse type of b vs a already checked above by type() if b.shape != a.shape: From 05b051107779e9b47b83c016a63ca868bbf98b5f Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Thu, 14 Nov 2024 16:06:24 -0500 Subject: [PATCH 2/2] FIX: Dec --- mne/io/fiff/tests/test_raw_fiff.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mne/io/fiff/tests/test_raw_fiff.py b/mne/io/fiff/tests/test_raw_fiff.py index fec1731deff..760b98b9502 100644 --- a/mne/io/fiff/tests/test_raw_fiff.py +++ b/mne/io/fiff/tests/test_raw_fiff.py @@ -2193,6 +2193,7 @@ def test_init_kwargs(cast): @pytest.mark.slowtest +@testing.requires_testing_data @pytest.mark.parametrize("fname", [ms_fname, tri_fname]) def test_fif_files(fname): """Test reading of various FIF files."""