harp-tech · jsiegle · Apr 25, 2024 · Apr 25, 2024 · Apr 26, 2024 · Apr 29, 2024
diff --git a/harp/clock.py b/harp/clock.py
@@ -0,0 +1,248 @@
+import numpy as np
+import warnings
+
+
+def align_timestamps_to_anchor_points(timestamps_to_align, start_times, anchor_times):
+    """
+    Aligns a set of timestamps to anchor points (e.g., Harp clock times)
+
+    We assume these timestamps are acquired by a system that is
+    not aligned to the Harp clock. This function finds the nearest
+    anchor point in the Harp clock for each timestamp, and then
+    interpolates between the anchor points to align the timestamps.
+
+    `decode_harp_clock` must be run first, in order to find the
+    start of each second in the Harp clock.
+
+    Parameters
+    ----------
+    timestamps_to_align : np.array
+        Local timestamps (in seconds) to align to the Harp clock
+    start_times : np.array
+        Local start times of each second in the Harp clock
+        (output by `decode_harp_clock`)
+    anchor_times : np.array
+        Global clock times in seconds (typically the Harp clock times,
+        but can be any set of anchor points to align to)
+        (output by `decode_harp_clock`)
+
+    Returns
+    -------
+    aligned_times : np.array
+        Aligned timestamps
+    """
+
+    if len(start_times) != len(anchor_times):
+        raise ValueError(
+            "The number of start times must equal the number of Harp times"
+        )
+
+    N = len(start_times)
+
+    slopes = np.zeros((N + 1,))
+    offsets = np.zeros((N + 1,))
+
+    # compute overall slope and offset
+    A = np.vstack([start_times, np.ones(len(start_times))]).T
+    slopes[0], offsets[0] = np.linalg.lstsq(A, anchor_times, rcond=None)[0]
+
+    # compute slope and offset for each segment
+    for i in range(N):
+        x = start_times[i : i + 2]
+        y = anchor_times[i : i + 2]
+        A = np.vstack([x, np.ones(len(x))]).T
+        slopes[i + 1], offsets[i + 1] = np.linalg.lstsq(A, y, rcond=None)[0]
+
+    # find the nearest anchor point for each timestamp to align
+    nearest = np.searchsorted(start_times, timestamps_to_align, side="left")
+
+    nearest[nearest == N] = 0
+
+    # interpolate between the anchor points
+    aligned_times = timestamps_to_align * slopes[nearest] + offsets[nearest]
+
+    return aligned_times
+
+
+def decode_harp_clock(timestamps, states, baud_rate=1000.0):
+    """
+    Decodes Harp clock times (in seconds) from a sequence of local
+    event timestamps and states.
+
+    The Harp Behavior board can be configured to output a digital
+    signal that encodes the current Harp time as a 32-bit integer,
+    which is emitted once per second.
+
+    The format of the signal is as follows:
+
+      Default value: high
+      Start bit: low -- indicates the transition to the next second
+      8 bits: byte 0, with least significant bit first
+      2 bits: high / low transition
+      8 bits: byte 1, with least significant bit first
+      2 bits: high / low transition
+      8 bits: byte 2, with least significant bit first
+      2 bits: high / low transition
+      8 bits: byte 3, with least significant bit first
+      Final bit: reset to high
+
+    Although the baud rate of the internal Harp clock is
+    100 kHz, the Behavior board outputs the clock signal
+    at a lower baud rate (typically 1 kHz), so it can be
+    acquired by data acquisition systems with sample rates
+    as low as 5 kHz.
+
+    Parameters
+    ----------
+    timestamps : np.array
+        Float times in seconds for each clock line transition
+        If the acquisition system outputs integer sample numbers
+        for each event, divide by the sample rate to convert to seconds
+    states : np.array
+        States (1 or 0) for each clock line transition
+    baud_rate : float
+        The baud rate of the Harp clock signal
+
+    Returns
+    -------
+    start_times : np.array
+        Timestamps at which each second begins
+    harp_times : np.array
+        Harp clock times in seconds
+    """
+
+    min_delta = 0.5  # seconds -- Harp clock events must always be
+    # at least this far apart
+
+    barcode_edges = get_barcode_edges(timestamps, min_delta)
+
+    start_times = np.array([timestamps[edges[0]] for edges in barcode_edges])
+
+    harp_times = np.array(
+        [
+            convert_barcode(
+                timestamps[edges[0] : edges[1]],
+                states[edges[0] : edges[1]],
+                baud_rate=baud_rate,
+            )
+            for edges in barcode_edges
+        ]
+    )
+
+    start_times_corrected, harp_times_corrected = remove_outliers(
+        start_times, harp_times
+    )
+
+    return start_times_corrected, harp_times_corrected
+
+
+def get_barcode_edges(timestamps, min_delta):
+    """
+    Returns the start and end indices of each barcode
+
+    Parameters
+    ----------
+    timestamps : np.array
+        Timestamps (ins seconds) of clock line events
+        (high and low states)
+    min_delta : int
+        The minimum length between the end of one
+        barcode and the start of the next
+
+    Returns
+    -------
+    edges : list of tuples
+        Contains the start and end indices of each barcode
+    """
+
+    (splits,) = np.where(np.diff(timestamps) > min_delta)
+
+    return list(zip(splits[:-1] + 1, splits[1:] + 1))
+
+
+def convert_barcode(transition_times, states, baud_rate):
+    """
+    Converts Harp clock barcode to a clock time in seconds
+
+    Parameters
+    ----------
+    transition_times : np.array
+        Times (in seconds) each clock line transition
+    states : np.array
+        states (1 or 0) for each clock line transition
+    baud_rate : float
+        The baud rate of the clock signal
+
+    Returns
+    -------
+    harp_time : int
+        Harp time in seconds for the current barcode
+
+    """
+
+    intervals = np.round(np.diff(transition_times * baud_rate)).astype("int")
+
+    barcode = np.concatenate(
+        [np.ones((count,)) * state for state, count in zip(states[:-1], intervals)]
+    ).astype("int")
+
+    val = np.concatenate(
+        (np.arange(1, 9), np.arange(11, 19), np.arange(21, 29), np.arange(31, 39))
+    )
+
+    s = np.flip(barcode[val])
+    harp_time = s.dot(2 ** np.arange(s.size)[::-1])
+
+    return harp_time
+
+
+def remove_outliers(start_times, harp_times):
+    """
+    Removes outliers from the Harp clock times
+
+    These outliers are caused by problems decoding
+    the Harp clock signal, leading to consecutive times that
+    do not increase by exactly 1. These will be removed from
+    the array of Harp times, so they will not be used
+    as anchor points during subsequent clock alignment.
+
+    If the times jump to a new value and continue to
+    increase by 1, either due to a reset of the Harp clock
+    or a gap in the data, these will be ignored.
+
+    Parameters
+    ----------
+    start_times : np.array
+        Harp clock start times in seconds
+    harp_times : np.array
+        Harp clock times in seconds
+
+    Returns
+    -------
+    corrected_start_times : np.array
+        Corrected Harp clock times in seconds
+    corrected_harp_times : np.array
+        Corrected Harp clock times in seconds
+    """
+
+    original_indices = np.arange(len(harp_times))
+
+    new_indices = np.concatenate(
+        [
+            sub_array
+            for sub_array in np.split(
+                original_indices, np.where(np.diff(harp_times) != 1)[0] + 1
+            )
+            if len(sub_array) > 1
+        ]
+    )
+
+    num_outliers = len(original_indices) - len(new_indices)
+
+    if num_outliers > 0:
+        warnings.warn(
+            f"{num_outliers} outlier{'s' if num_outliers > 1 else ''} "
+            + "found in the decoded Harp clock. Removing..."
+        )
+
+    return start_times[new_indices], harp_times[new_indices]
diff --git a/tests/test_clock.py b/tests/test_clock.py
@@ -0,0 +1,87 @@
+import numpy as np
+from pytest import mark
+from harp.clock import decode_harp_clock, align_timestamps_to_anchor_points
+import warnings
+
+# fmt: off
+testinput = [
+    {
+        # contains two valid Harp clock barcodes
+        'timestamps': np.array([
+            0.        , 0.96106667, 0.96306667, 0.96406667, 0.96506667,
+            0.96706667, 0.96906667, 0.97106667, 0.97306667, 0.97506667,
+            0.97606667, 0.97706667, 0.98006667, 0.98106667, 0.98306667,
+            0.98406667, 0.98506667, 0.98806667, 0.99006667, 0.99106667,
+            1.00006667, 1.96116667, 1.96216667, 1.96416667, 1.96516667,
+            1.96716667, 1.96916667, 1.97116667, 1.97316667, 1.97516667,
+            1.97616667, 1.97716667, 1.98016667, 1.98116667, 1.98316667,
+            1.98416667, 1.98516667, 1.98816667, 1.99016667, 1.99116667,
+            2.00016667, 2.96126667, 2.96426667, 2.96726667, 2.96926667,
+            2.97126667, 2.97326667, 2.97526667, 2.97626667, 2.97726667,
+            2.98026667, 2.98126667, 2.98326667, 2.98426667, 2.98526667,
+            2.98826667, 2.99026667, 2.99126667]),
+        'states': np.array([
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]),
+        'expected_start_times': np.array([0.96106667, 1.96116667]),
+        'expected_harp_times': np.array([3806874, 3806875])
+    },
+    {
+        # contains 4 valid Harp clock barcodes and one invalid barcode
+        'timestamps': np.array([
+            0.14036667, 1.10146667, 1.10246667, 1.10346667, 1.10446667,
+            1.11146667, 1.11246667, 1.11646667, 1.11746667, 1.11846667,
+            1.11946667, 1.12146667, 1.12246667, 1.12546667, 1.12746667,
+            1.12846667, 1.13046667, 1.13146667, 1.14046667, 2.10156667,
+            2.10356667, 2.11156667, 2.11256667, 2.11656667, 2.11756667,
+            2.11856667, 2.11956667, 2.12156667, 2.12256667, 2.12556667,
+            2.12756667, 2.12856667, 2.13056667, 2.13156667, 2.14056667,
+            3.10163333, 3.10263333, 3.11163333, 3.11263333, 3.11663333,
+            3.11763333, 3.11863333, 3.11963333, 3.12163333, 3.12263333,
+            3.12563333, 3.12763333, 3.12863333, 3.13063333, 3.13163333,
+            3.14063333, 4.10173333, 4.11073333, 4.11173333, 4.11673333,
+            4.11873333, 4.11973333, 4.12173333, 4.12273333, 4.12573333,
+            4.12773333, 4.12873333, 4.13073333, 4.13173333, 4.14073333,
+            5.1018    , 5.1028    , 5.1038    , 5.1108    , 5.1118    ,
+            5.1168    , 5.1188    , 5.1198    , 5.1218    , 5.1228    ,
+            5.1258    , 5.1278    , 5.1288    , 5.1308    , 5.1318    ,
+            5.1368    , 5.1388    , 5.1398    , 5.14183333, 5.1428    ,
+            5.14583333, 5.14783333, 5.14883333, 5.15083333, 5.15183333,
+            5.16083333, 6.1059    ]),
+        'states': np.array([
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0]),
+        'expected_start_times': np.array([1.10146667, 2.10156667, 3.10163333, 4.10173333]),
+        'expected_harp_times': np.array([2600957, 2600958, 2600959, 2600960])
+    }
+]
+# fmt: on
+
+
+@mark.parametrize("test_input", testinput)
+def test_create_reader(test_input):
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        start_times, harp_times = decode_harp_clock(
+            test_input["timestamps"],
+            test_input["states"],
+            baud_rate=1000,
+        )
+
+    assert np.allclose(start_times, test_input["expected_start_times"])
+    assert np.allclose(harp_times, test_input["expected_harp_times"])
+
+    # test alignment for samples 1/2 second after anchors
+    ts = start_times + 0.5
+    aligned_times = align_timestamps_to_anchor_points(ts, start_times, harp_times)
+    assert np.allclose(aligned_times, harp_times + 0.5)
+
+    # test alignment for samples 1/2 second before anchors
+    ts = start_times - 0.5
+    aligned_times = align_timestamps_to_anchor_points(ts, start_times, harp_times)
+    assert np.allclose(aligned_times, harp_times - 0.5)