From 550ee7b8e0e54eb1b3bd4ae7daf6adac02ad184b Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Thu, 14 Nov 2024 15:06:04 -0500
Subject: [PATCH 1/2] BUG: Fix bug with anonymization

---
 mne/_fiff/meas_info.py             |   8 +-
 mne/_fiff/proc_history.py          |  54 +++++---
 mne/_fiff/tests/test_meas_info.py  | 199 +++++++++++++++++++++--------
 mne/_fiff/write.py                 |   4 +-
 mne/io/fiff/tests/test_raw_fiff.py |  15 +++
 mne/io/tests/test_raw.py           |   4 +-
 mne/utils/_testing.py              |   4 +-
 mne/utils/numerics.py              |   5 +-
 8 files changed, 206 insertions(+), 87 deletions(-)

diff --git a/mne/_fiff/meas_info.py b/mne/_fiff/meas_info.py
index c881822e44a..9edb8a96d3b 100644
--- a/mne/_fiff/meas_info.py
+++ b/mne/_fiff/meas_info.py
@@ -1059,7 +1059,9 @@ class HeliumInfo(ValidatedDict):
             _check_types, name='helium_info["orig_file_guid"]', types=str
         ),
         "meas_date": partial(
-            _check_types, name='helium_info["meas_date"]', types=datetime.datetime
+            _check_types,
+            name='helium_info["meas_date"]',
+            types=(datetime.datetime, None),
         ),
     }
 
@@ -3525,9 +3527,7 @@ def anonymize_info(info, daysback=None, keep_his=False, verbose=None):
         if none_meas_date and hi.get("meas_date") is not None:
             hi["meas_date"] = _ensure_meas_date_none_or_dt(DATE_NONE)
         elif hi.get("meas_date") is not None:
-            hi["meas_date"] = _ensure_meas_date_none_or_dt(
-                _add_timedelta_to_stamp(hi["meas_date"], -delta_t)
-            )
+            hi["meas_date"] = hi["meas_date"] - delta_t
 
     di = info.get("device_info")
     if di is not None:
diff --git a/mne/_fiff/proc_history.py b/mne/_fiff/proc_history.py
index 86a094cfa41..caa2d3de554 100644
--- a/mne/_fiff/proc_history.py
+++ b/mne/_fiff/proc_history.py
@@ -24,24 +24,38 @@
     write_string,
 )
 
-_proc_keys = [
-    "parent_file_id",
-    "block_id",
-    "parent_block_id",
-    "date",
-    "experimenter",
-    "creator",
-]
-_proc_ids = [
-    FIFF.FIFF_PARENT_FILE_ID,
-    FIFF.FIFF_BLOCK_ID,
-    FIFF.FIFF_PARENT_BLOCK_ID,
-    FIFF.FIFF_MEAS_DATE,
-    FIFF.FIFF_EXPERIMENTER,
-    FIFF.FIFF_CREATOR,
-]
-_proc_writers = [write_id, write_id, write_id, write_int, write_string, write_string]
-_proc_casters = [dict, dict, dict, np.array, str, str]
+_proc_map = dict(  # ID, caster, writer
+    parent_file_id=(
+        FIFF.FIFF_PARENT_FILE_ID,
+        dict,
+        write_id,
+    ),
+    block_id=(
+        FIFF.FIFF_BLOCK_ID,
+        dict,
+        write_id,
+    ),
+    parent_block_id=(
+        FIFF.FIFF_PARENT_BLOCK_ID,
+        dict,
+        write_id,
+    ),
+    date=(
+        FIFF.FIFF_MEAS_DATE,
+        lambda d: tuple(int(dd) for dd in d),
+        write_int,
+    ),
+    experimenter=(
+        FIFF.FIFF_EXPERIMENTER,
+        str,
+        write_string,
+    ),
+    creator=(
+        FIFF.FIFF_CREATOR,
+        str,
+        write_string,
+    ),
+)
 
 
 def _read_proc_history(fid, tree):
@@ -98,7 +112,7 @@ def _read_proc_history(fid, tree):
             for i_ent in range(proc_record["nent"]):
                 kind = proc_record["directory"][i_ent].kind
                 pos = proc_record["directory"][i_ent].pos
-                for key, id_, cast in zip(_proc_keys, _proc_ids, _proc_casters):
+                for key, (id_, cast, _) in _proc_map.items():
                     if kind == id_:
                         tag = read_tag(fid, pos)
                         record[key] = cast(tag.data)
@@ -122,7 +136,7 @@ def _write_proc_history(fid, info):
         start_block(fid, FIFF.FIFFB_PROCESSING_HISTORY)
         for record in info["proc_history"]:
             start_block(fid, FIFF.FIFFB_PROCESSING_RECORD)
-            for key, id_, writer in zip(_proc_keys, _proc_ids, _proc_writers):
+            for key, (id_, _, writer) in _proc_map.items():
                 if key in record:
                     writer(fid, id_, record[key])
             _write_maxfilter_record(fid, record["max_info"])
diff --git a/mne/_fiff/tests/test_meas_info.py b/mne/_fiff/tests/test_meas_info.py
index 6624ee0dd94..3e3c150573f 100644
--- a/mne/_fiff/tests/test_meas_info.py
+++ b/mne/_fiff/tests/test_meas_info.py
@@ -69,7 +69,13 @@
     write_inverse_operator,
 )
 from mne.transforms import Transform
-from mne.utils import _empty_hash, _record_warnings, assert_object_equal, catch_logging
+from mne.utils import (
+    _empty_hash,
+    _record_warnings,
+    assert_object_equal,
+    catch_logging,
+    object_diff,
+)
 
 root_dir = Path(__file__).parents[2]
 fiducials_fname = root_dir / "data" / "fsaverage" / "fsaverage-fiducials.fif"
@@ -582,6 +588,8 @@ def test_check_consistency():
 def _test_anonymize_info(base_info):
     """Test that sensitive information can be anonymized."""
     pytest.raises(TypeError, anonymize_info, "foo")
+    assert isinstance(base_info, Info)
+    base_info = base_info.copy()
 
     default_anon_dos = datetime(2000, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
     default_str = "mne_anonymize"
@@ -589,22 +597,20 @@ def _test_anonymize_info(base_info):
     default_desc = "Anonymized using a time shift" + " to preserve age at acquisition"
 
     # Test no error for incomplete info
-    info = base_info.copy()
-    info.pop("file_id")
-    anonymize_info(info)
+    bad_info = base_info.copy()
+    bad_info.pop("file_id")
+    anonymize_info(bad_info)
+    del bad_info
 
-    # Fake some subject data
+    # Fake some additional data
+    _complete_info(base_info)
     meas_date = datetime(2010, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
     with base_info._unlock():
         base_info["meas_date"] = meas_date
-        base_info["subject_info"] = dict(
-            id=1,
-            his_id="foobar",
-            last_name="bar",
-            first_name="bar",
+        base_info["subject_info"].update(
             birthday=date(1987, 4, 8),
+            his_id="foobar",
             sex=0,
-            hand=1,
         )
 
     # generate expected info...
@@ -613,15 +619,29 @@ def _test_anonymize_info(base_info):
     exp_info = base_info.copy()
     exp_info._unlocked = True
     exp_info["description"] = default_desc
-    exp_info["experimenter"] = default_str
-    exp_info["proj_name"] = default_str
+    erase_strs = (
+        ("experimenter",),
+        ("proj_name",),
+        ("subject_info", "first_name"),
+        ("subject_info", "middle_name"),
+        ("subject_info", "last_name"),
+        ("device_info", "site"),
+        ("device_info", "serial"),
+        ("helium_info", "orig_file_guid"),
+        ("proc_history", 0, "experimenter"),
+    )
+    for tp in erase_strs:
+        this = exp_info
+        for lev in tp[:-1]:
+            this = this[lev]
+        this[tp[-1]] = default_str
     exp_info["proj_id"] = np.array([0])
-    exp_info["subject_info"]["first_name"] = default_str
-    exp_info["subject_info"]["last_name"] = default_str
-    exp_info["subject_info"]["id"] = default_subject_id
+    for key in ("sex", "id", "height", "weight"):
+        exp_info["subject_info"][key] = 0
     exp_info["subject_info"]["his_id"] = str(default_subject_id)
-    exp_info["subject_info"]["sex"] = 0
     del exp_info["subject_info"]["hand"]  # there's no "unknown" setting
+    exp_info["utc_offset"] = None
+    exp_info["proc_history"][0]["block_id"]["machid"][:] = 0
 
     # this bday is 3653 days different. the change in day is due to a
     # different number of leap days between 1987 and 1977 than between
@@ -635,14 +655,25 @@ def _test_anonymize_info(base_info):
 
     # adjust each expected outcome
     delta_t = timedelta(days=3653)
-    for key in ("file_id", "meas_id"):
-        value = exp_info.get(key)
-        if value is not None:
-            assert "msecs" not in value
-            tmp = _add_timedelta_to_stamp((value["secs"], value["usecs"]), -delta_t)
-            value["secs"] = tmp[0]
-            value["usecs"] = tmp[1]
-            value["machid"][:] = 0
+
+    def _adjust_back(e_i, dt):
+        for key in ("file_id", "meas_id"):
+            value = e_i.get(key)
+            if value is not None:
+                assert "msecs" not in value
+                tmp = _add_timedelta_to_stamp((value["secs"], value["usecs"]), -dt)
+                value["secs"] = tmp[0]
+                value["usecs"] = tmp[1]
+                value["machid"][:] = 0
+        e_i["helium_info"]["meas_date"] -= dt
+        ds = int(round(dt.total_seconds()))
+        e_i["proc_history"][0]["date"] = (
+            e_i["proc_history"][0]["date"][0] - ds,
+            e_i["proc_history"][0]["date"][1],
+        )
+        e_i["proc_history"][0]["block_id"]["secs"] -= ds
+
+    _adjust_back(exp_info, delta_t)
 
     # exp 2 tests the keep_his option
     exp_info_2 = exp_info.copy()
@@ -656,26 +687,19 @@ def _test_anonymize_info(base_info):
     with exp_info_3._unlock():
         exp_info_3["subject_info"]["birthday"] = date(1987, 2, 24)
         exp_info_3["meas_date"] = meas_date - delta_t_2
-    for key in ("file_id", "meas_id"):
-        value = exp_info_3.get(key)
-        if value is not None:
-            assert "msecs" not in value
-            tmp = _add_timedelta_to_stamp((value["secs"], value["usecs"]), -delta_t_2)
-            value["secs"] = tmp[0]
-            value["usecs"] = tmp[1]
-            value["machid"][:] = 0
+    _adjust_back(exp_info_3, delta_t_2)
 
     # exp 4 tests is a supplied daysback
     delta_t_3 = timedelta(days=223 + 364 * 500)
 
     new_info = anonymize_info(base_info.copy())
-    assert_object_equal(new_info, exp_info)
+    assert_object_equal(new_info, exp_info, err_msg="anon mismatch")
 
     new_info = anonymize_info(base_info.copy(), keep_his=True)
-    assert_object_equal(new_info, exp_info_2)
+    assert_object_equal(new_info, exp_info_2, err_msg="anon keep_his mismatch")
 
     new_info = anonymize_info(base_info.copy(), daysback=delta_t_2.days)
-    assert_object_equal(new_info, exp_info_3)
+    assert_object_equal(new_info, exp_info_3, err_msg="anon daysback mismatch")
 
     with pytest.raises(RuntimeError, match="anonymize_info generated"):
         anonymize_info(base_info.copy(), daysback=delta_t_3.days)
@@ -684,25 +708,33 @@ def _test_anonymize_info(base_info):
     # test with meas_date = None
     with base_info._unlock():
         base_info["meas_date"] = None
-    exp_info_3._unlocked = True
-    exp_info_3["meas_date"] = None
-    exp_info_3["file_id"]["secs"] = DATE_NONE[0]
-    exp_info_3["file_id"]["usecs"] = DATE_NONE[1]
-    exp_info_3["meas_id"]["secs"] = DATE_NONE[0]
-    exp_info_3["meas_id"]["usecs"] = DATE_NONE[1]
-    exp_info_3["subject_info"].pop("birthday", None)
-    exp_info_3._unlocked = False
+    with exp_info_3._unlock():
+        exp_info_3["meas_date"] = None
+        exp_info_3["helium_info"]["meas_date"] = None
+        for var in (
+            exp_info_3["file_id"],
+            exp_info_3["meas_id"],
+            exp_info_3["proc_history"][0]["block_id"],
+        ):
+            var["secs"] = DATE_NONE[0]
+            var["usecs"] = DATE_NONE[1]
+        exp_info_3["subject_info"].pop("birthday", None)
+        exp_info_3["proc_history"][0]["date"] = DATE_NONE
 
     if base_info["meas_date"] is None:
         with pytest.warns(RuntimeWarning, match="all information"):
             new_info = anonymize_info(base_info.copy(), daysback=delta_t_2.days)
     else:
         new_info = anonymize_info(base_info.copy(), daysback=delta_t_2.days)
-    assert_object_equal(new_info, exp_info_3)
+    assert_object_equal(
+        new_info,
+        exp_info_3,
+        err_msg="meas_date=None daysback mismatch",
+    )
 
     with _record_warnings():  # meas_date is None
         new_info = anonymize_info(base_info.copy())
-    assert_object_equal(new_info, exp_info_3)
+    assert_object_equal(new_info, exp_info_3, err_msg="meas_date=None mismatch")
 
 
 @pytest.mark.parametrize(
@@ -728,12 +760,65 @@ def test_meas_date_convert(stamp, dt):
     assert str(dt[0]) in repr(info)
 
 
+def _complete_info(info):
+    """Complete the meas info fields."""
+    for key in ("file_id", "meas_id"):
+        assert info[key] is not None
+    info["subject_info"] = dict(
+        id=1,
+        sex=1,
+        hand=1,
+        first_name="a",
+        middle_name="b",
+        last_name="c",
+        his_id="d",
+        birthday=date(2000, 1, 1),
+        weight=1.0,
+        height=2.0,
+    )
+    info["helium_info"] = dict(
+        he_level_raw=12.34,
+        helium_level=45.67,
+        meas_date=datetime(2024, 11, 14, 14, 8, 2, tzinfo=timezone.utc),
+        orig_file_guid="e",
+    )
+    info["experimenter"] = "f"
+    info["description"] = "g"
+    with info._unlock():
+        info["proj_id"] = np.ones(1, int)
+        info["proj_name"] = "h"
+        info["utc_offset"] = "i"
+        d = (1717707794, 2)
+        info["proc_history"] = [
+            dict(
+                block_id=dict(
+                    version=4,
+                    machid=np.ones(2, int),
+                    secs=d[0],
+                    usecs=d[1],
+                    date=d,
+                ),
+                experimenter="j",
+                max_info=dict(
+                    max_st=[],
+                    sss_ctc=[],
+                    sss_cal=[],
+                    sss_info=dict(head_pos=None, in_order=8),
+                ),
+                date=d,
+            ),
+        ]
+        info["device_info"] = dict(serial="k", site="l")
+    info._check_consistency()
+
+
 def test_anonymize(tmp_path):
     """Test that sensitive information can be anonymized."""
     pytest.raises(TypeError, anonymize_info, "foo")
 
     # Fake some subject data
     raw = read_raw_fif(raw_fname)
+    _complete_info(raw.info)
     raw.set_annotations(
         Annotations(onset=[0, 1], duration=[1, 1], description="dummy", orig_time=None)
     )
@@ -745,8 +830,8 @@ def test_anonymize(tmp_path):
     # test mne.anonymize_info()
     events = read_events(event_name)
     epochs = Epochs(raw, events[:1], 2, 0.0, 0.1, baseline=None)
-    _test_anonymize_info(raw.info.copy())
-    _test_anonymize_info(epochs.info.copy())
+    _test_anonymize_info(raw.info)
+    _test_anonymize_info(epochs.info)
 
     # test instance methods & I/O roundtrip
     for inst, keep_his in zip((raw, epochs), (True, False)):
@@ -792,17 +877,19 @@ def test_anonymize(tmp_path):
     assert_allclose(raw.annotations.onset, expected_onset)
 
 
-def test_anonymize_with_io(tmp_path):
-    """Test that IO does not break anonymization."""
-    raw = read_raw_fif(raw_fname)
-
+@pytest.mark.parametrize("daysback", [None, 28826])
+def test_anonymize_with_io(tmp_path, daysback):
+    """Test that IO does not break anonymization and all fields."""
+    raw = read_raw_fif(raw_fname).crop(0, 1)
+    _complete_info(raw.info)
     temp_path = tmp_path / "tmp_raw.fif"
     raw.save(temp_path)
-
-    raw2 = read_raw_fif(temp_path)
-
-    daysback = (raw2.info["meas_date"].date() - date(1924, 1, 1)).days
+    raw2 = read_raw_fif(temp_path).load_data()
     raw2.anonymize(daysback=daysback)
+    raw2.save(temp_path, overwrite=True)
+    raw3 = read_raw_fif(temp_path)
+    d = object_diff(raw2.info, raw3.info)
+    assert d == "['file_id']['machid'] array mismatch\n"
 
 
 @testing.requires_testing_data
diff --git a/mne/_fiff/write.py b/mne/_fiff/write.py
index ec8dddfd41a..427d4f12e54 100644
--- a/mne/_fiff/write.py
+++ b/mne/_fiff/write.py
@@ -72,7 +72,9 @@ def write_int(fid, kind, data):
     data_size = 4
     data = np.asarray(data)
     if data.dtype.kind not in "uib" and data.size > 0:
-        raise TypeError(f"Cannot safely write data with dtype {data.dtype} as int")
+        raise TypeError(
+            f"Cannot safely write data kind {kind} with dtype {data.dtype} as int",
+        )
     max_val = data.max() if data.size > 0 else 0
     if max_val > INT32_MAX:
         raise TypeError(
diff --git a/mne/io/fiff/tests/test_raw_fiff.py b/mne/io/fiff/tests/test_raw_fiff.py
index 2978a09287c..fec1731deff 100644
--- a/mne/io/fiff/tests/test_raw_fiff.py
+++ b/mne/io/fiff/tests/test_raw_fiff.py
@@ -51,6 +51,7 @@
 fif_fname = data_dir / "sample_audvis_trunc_raw.fif"
 ms_fname = testing_path / "SSS" / "test_move_anon_raw.fif"
 skip_fname = testing_path / "misc" / "intervalrecording_raw.fif"
+tri_fname = testing_path / "SSS" / "TRIUX" / "triux_bmlhus_erm_raw.fif"
 
 base_dir = Path(__file__).parents[2] / "tests" / "data"
 test_fif_fname = base_dir / "test_raw.fif"
@@ -2189,3 +2190,17 @@ def test_init_kwargs(cast):
     raw2 = read_raw_fif(**raw._init_kwargs)
     for r in (raw, raw2):
         assert isinstance(r._init_kwargs["fname"], pathlib.Path)
+
+
+@pytest.mark.slowtest
+@pytest.mark.parametrize("fname", [ms_fname, tri_fname])
+def test_fif_files(fname):
+    """Test reading of various FIF files."""
+    _test_raw_reader(
+        read_raw_fif,
+        fname=fname,
+        allow_maxshield="yes",
+        verbose="error",
+        test_kwargs=False,
+        test_preloading=False,
+    )
diff --git a/mne/io/tests/test_raw.py b/mne/io/tests/test_raw.py
index fa2224136d7..c2e6b6b151f 100644
--- a/mne/io/tests/test_raw.py
+++ b/mne/io/tests/test_raw.py
@@ -315,7 +315,7 @@ def _test_raw_reader(
     ids = [id(ch["loc"]) for ch in raw.info["chs"]]
     assert len(set(ids)) == len(ids)
 
-    full_data = raw._data
+    full_data = raw.get_data()
     assert raw.__class__.__name__ in repr(raw)  # to test repr
     assert raw.info.__class__.__name__ in repr(raw.info)
     assert isinstance(raw.info["dig"], type(None) | list)
@@ -355,7 +355,7 @@ def _test_raw_reader(
     with pytest.raises(OSError, match="raw must end with .fif or .fif.gz"):
         raw.save(out_fname_h5)
 
-    raw3 = read_raw_fif(out_fname)
+    raw3 = read_raw_fif(out_fname, allow_maxshield="yes")
     assert_named_constants(raw3.info)
     assert set(raw.info.keys()) == set(raw3.info.keys())
     assert_allclose(
diff --git a/mne/utils/_testing.py b/mne/utils/_testing.py
index cdc65d7ce5c..323b530a641 100644
--- a/mne/utils/_testing.py
+++ b/mne/utils/_testing.py
@@ -179,10 +179,10 @@ def assert_and_remove_boundary_annot(annotations, n=1):
         annotations.delete(idx)
 
 
-def assert_object_equal(a, b):
+def assert_object_equal(a, b, *, err_msg="Object mismatch"):
     """Assert two objects are equal."""
     d = object_diff(a, b)
-    assert d == "", d
+    assert d == "", f"{err_msg}\n{d}"
 
 
 def _raw_annot(meas_date, orig_time):
diff --git a/mne/utils/numerics.py b/mne/utils/numerics.py
index 07ba9ff7f56..c287fb42305 100644
--- a/mne/utils/numerics.py
+++ b/mne/utils/numerics.py
@@ -809,8 +809,9 @@ def object_diff(a, b, pre="", *, allclose=False):
         if a.getvalue() != b.getvalue():
             out += pre + " StringIO mismatch\n"
     elif isinstance(a, datetime | date):
-        if (a - b).total_seconds() != 0:
-            out += pre + f" {a.__class__.__name__} mismatch\n"
+        ts = (a - b).total_seconds()
+        if ts != 0:
+            out += pre + f" {a.__class__.__name__} mismatch ({a} vs {b} by {ts} sec)\n"
     elif sparse.issparse(a):
         # sparsity and sparse type of b vs a already checked above by type()
         if b.shape != a.shape:

From 05b051107779e9b47b83c016a63ca868bbf98b5f Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Thu, 14 Nov 2024 16:06:24 -0500
Subject: [PATCH 2/2] FIX: Dec

---
 mne/io/fiff/tests/test_raw_fiff.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mne/io/fiff/tests/test_raw_fiff.py b/mne/io/fiff/tests/test_raw_fiff.py
index fec1731deff..760b98b9502 100644
--- a/mne/io/fiff/tests/test_raw_fiff.py
+++ b/mne/io/fiff/tests/test_raw_fiff.py
@@ -2193,6 +2193,7 @@ def test_init_kwargs(cast):
 
 
 @pytest.mark.slowtest
+@testing.requires_testing_data
 @pytest.mark.parametrize("fname", [ms_fname, tri_fname])
 def test_fif_files(fname):
     """Test reading of various FIF files."""