add alex cde (#6)

* add alex cde * fret cde * faster fretcde * allow front end to do alex/fret cde * make fret config optional for bursts * fix namespace clash in dataclass * derive field options from dataframe * fret / alex 2cde in example * add result to bursts * add citation information * add fret cde test * add fret cde reference data * suppress hooks warning * remove old fret cde * make stream names configurable * do fret and alex cda via post burst search hooks * add default hook functions * cde accepts stream kwargs * fix typing * cosmetics * fix tests * ensure tuples are returned from goupby * check if turning off hooks fixes ci tests * revert config * uv pip pytest * use system python * let uv install python * use uv to pin requirements * specify python version * test ubuntu only * remove system flag * update pinned requirements, drop py39, use pins in tests * remove ' * where do these keep coming from * use astral ruff * cache uv * refactor and test process photon data method * remove self-explanatory ai comments
Jhsmit · Nov 26, 2024 · 53129f2 · 53129f2
1 parent 0163d6a
commit 53129f2
Show file tree

Hide file tree

Showing 31 changed files with 1,575 additions and 1,573 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -1,9 +1,8 @@
 name: Ruff
 on: [push, pull_request]
-
 jobs:
   ruff:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: chartboost/ruff-action@v1
+      - uses: astral-sh/ruff-action@v1
diff --git a/.github/workflows/pin_requirements.yml b/.github/workflows/pin_requirements.yml
@@ -15,16 +15,14 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v3
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
         with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install pip-tools
-        run: pip install pip-tools
+          # Install a specific version of uv.
+          version: "0.5.4"
 
       - name: Generate requirements file
-        run: pip-compile --extra web --output-file requirements-${{ matrix.os }}-${{ matrix.python-version }}.txt pyproject.toml
+        run: uv pip compile --all-extras pyproject.toml -o requirements-${{ matrix.os }}-${{ matrix.python-version }}.txt
 
       - name: Upload requirements file
         uses: actions/upload-artifact@v3

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -6,40 +6,32 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ "ubuntu-latest", "macos-latest" , "windows-latest"]
         python-version: ["3.10" ]
-    defaults:
-      run:
-        shell: bash
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-latest
     steps:
       - name: Check out repository
         uses: actions/checkout@v3
 
-      - name: Download test file windows
-        if: runner.os == 'Windows'
-        run: |
-            C:\\msys64\\usr\\bin\\wget.exe "https://filedn.eu/loRXwzWCNnU4XoFPGbllt1y/datafile_1.ptu" -O tests/test_data/input/ds1/datafile_1.ptu
-      - name: Download test file other platforms
-        if: runner.os != 'Windows'
-        run: |
-          wget "https://filedn.eu/loRXwzWCNnU4XoFPGbllt1y/datafile_1.ptu" -O tests/test_data/input/ds1/datafile_1.ptu
-
-      - name: Set up python ${{ matrix.python-version }}
-        id: setup-python
-        uses: actions/setup-python@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
         with:
-          python-version: ${{ matrix.python-version }}
-          cache: pip
-          cache-dependency-path: requirements/requirements-${{ matrix.os }}-${{ matrix.python-version }}.txt
-
-      - name: Install pinned requirements
+          # Install a specific version of uv.
+          version: "0.5.4"
+          enable-cache: true
+          cache-dependency-glob: requirements/requirements-ubuntu-latest-${{ matrix.python-version }}.txt
+
+      - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements/requirements-${{ matrix.os }}-${{ matrix.python-version }}.txt --prefer-binary
+          python -m pip install uv
+          uv venv -p ${{ matrix.python-version }}
+          . .venv/bin/activate
+          echo PATH=$PATH >> $GITHUB_ENV
+          uv pip install -r requirements/requirements-ubuntu-latest-${{ matrix.python-version }}.txt
+          uv pip install -e .
 
-      - name: Install test requirements
-        run: pip install .[test]
+      - name: Download test file
+        run: |
+          wget "https://filedn.eu/loRXwzWCNnU4XoFPGbllt1y/datafile_1.ptu" -O tests/test_data/input/ds1/datafile_1.ptu
 
       - name: Run tests
         run: |

diff --git a/CITE.txt b/CITE.txt
@@ -14,4 +14,8 @@ https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4805879/
 
 burst search:
 "Data registration and selective single-molecule analysis using multi-parameter fluorescence detection"
-DOI: 10.1016/S0168-1656(00)00412-0
+DOI: 10.1016/S0168-1656(00)00412-0
+
+fret/alex cde:
+"Disentangling Subpopulations in Single-Molecule FRET and ALEX Experiments with Photon Distribution Analysis"
+https://doi.org/10.1016/j.bpj.2011.11.4025
diff --git a/default_testing.yaml b/default_testing.yaml
@@ -40,6 +40,12 @@ burst_search:
       M: 100
       T: 500.e-6
 
+hooks:
+  alex_2cde:
+    tau: 50.e-6 # make sure to format as float
+  fret_2cde:
+    tau: 50.e-6
+
 # settings related to dont-fret's web interface
 web:
   password: null
@@ -48,3 +54,5 @@ web:
   burst_filters: # default filters to apply to burst search filters
     - name: n_photons
       min: 150
+    - name: alex_2cde
+      max: 100
diff --git a/dont_fret/channel_kde.py b/dont_fret/channel_kde.py
@@ -0,0 +1,205 @@
+"""
+Module for channel based kernel density estimation
+
+This is based on:
+Disentangling Subpopulations in Single-Molecule FRET and ALEX Experiments with Photon Distribution Analysis
+https://doi.org/10.1016/j.bpj.2011.11.4025
+
+Please cite the paper when using this module.
+
+"""
+
+import warnings
+from typing import Optional
+
+import numpy as np
+import polars as pl
+from numba import float64, int64, jit, types
+
+from dont_fret.expr import is_in_expr
+
+
+def compute_fret_2cde(
+    burst_photons: pl.DataFrame,
+    kde_rates: pl.DataFrame,
+    dem_stream: str = "DD",
+    aem_stream: str = "DA",
+) -> np.ndarray:
+    """
+    burst_photons: Dataframe with columns: timestamps, stream, burst_index
+    kde_rates: Dataframe with columns: timestamps, D_em, A_em.
+    dem_stream: photon stream which is donor emission (default: DD)
+    aem_stream: photon stream which is acceptor emission (default: DA)
+    """
+    joined_df = burst_photons.join(kde_rates, on=["timestamps"], how="left").filter(
+        pl.col("stream") == pl.col("stream_right")
+    )
+
+    f_DA = pl.col("stream") == aem_stream
+    f_DD = pl.col("stream") == dem_stream
+    df_DA = joined_df.filter(f_DA)
+    df_DD = joined_df.filter(f_DD)
+
+    matching_indices = np.intersect1d(df_DA["burst_index"].unique(), df_DD["burst_index"].unique())
+
+    DA_groups = {k: v for k, v in df_DA.group_by(["burst_index"])}
+    DD_groups = {k: v for k, v in df_DD.group_by(["burst_index"])}
+
+    N: int = burst_photons["burst_index"].max() + 1  # type: ignore
+    output = np.full(N, fill_value=np.nan)
+    for j in matching_indices:
+        df_DA_j = DA_groups[(j,)]
+        df_DD_j = DD_groups[(j,)]
+
+        kde_DA_DA = df_DA_j["A_em"].to_numpy()  # select DA density - kde^DA_DA
+        kde_DA_DD = df_DD_j["A_em"].to_numpy()  # kde^DA_DD (in the paper called kde^A_D)
+        kde_DD_DD = df_DD_j["D_em"].to_numpy()  # kde^DD_DD
+        kde_DD_DA = df_DA_j["D_em"].to_numpy()  # kde^DD_DA
+
+        try:
+            nbkde_DA_DA = (1 + 2 / len(kde_DA_DA)) * (kde_DA_DA - 1)
+            nbkde_DD_DD = (1 + 2 / len(kde_DD_DD)) * (kde_DD_DD - 1)
+
+            # when denom is zero, it doesnt count towards number of photons
+            # see "Such cases are removed by the computer algorithm", in Tomov et al.
+            denom = kde_DA_DD + nbkde_DD_DD
+            ED = (kde_DA_DD / denom).sum() / np.count_nonzero(denom)  # when
+
+            denom = kde_DD_DA + nbkde_DA_DA
+            EA = (kde_DD_DA / denom).sum() / np.count_nonzero(denom)  # = (1 - E)_A
+
+            fret_cde = 110 - 100 * (ED + EA)
+            output[j] = fret_cde
+        except ZeroDivisionError:
+            output[j] = np.nan
+    return output
+
+
+# refactor to indices / loop
+def compute_alex_2cde(
+    burst_photons: pl.DataFrame,
+    kde_rates: pl.DataFrame,
+    dex_streams: Optional[list[str]] = None,
+    aex_streams: Optional[list[str]] = None,
+) -> pl.Series:
+    """
+    burst_photons: Dataframe with columns: timestamps, stream, burst_index
+    kde_rates: Dataframe with columns: timestamps, D_ex, A_ex.
+    dex_streams: list of photon streams which are donor excitation (default: DD, DA)
+    aex_streams: list of photon streams which are acceptor excitation (default: AA)
+    """
+    dex_streams = dex_streams if dex_streams else ["DD", "DA"]
+    aex_streams = aex_streams if aex_streams else ["AA"]
+
+    f_dex = is_in_expr("stream", dex_streams)
+    f_aex = is_in_expr("stream", aex_streams)
+
+    # equivalent to (but faster):
+    # joined_df = burst_photons.join(kde_rates, on=['timestamps', 'stream'], how='inner')
+    # still, this is the slowest step. would be nice if we can improve since we know the indices
+
+    joined_df = burst_photons.join(kde_rates, on=["timestamps"], how="left").filter(
+        pl.col("stream") == pl.col("stream_right")
+    )
+
+    b_df = joined_df.select(
+        [
+            pl.col("burst_index"),
+            pl.col("stream"),
+            (pl.col("A_ex") / pl.col("D_ex")).alias("ratio_AD"),
+        ]
+    )
+
+    # tomov et al eqn 10 and 11
+    df_f_dex = b_df.filter(f_dex)
+    agg_dex = df_f_dex.group_by("burst_index", maintain_order=True).agg(
+        [pl.col("ratio_AD").sum(), pl.len().alias("N_dex")]
+    )
+
+    df_f_aex = b_df.filter(f_aex)
+    agg_dax = df_f_aex.group_by("burst_index", maintain_order=True).agg(
+        [(1 / pl.col("ratio_AD")).sum().alias("ratio_DA"), pl.len().alias("N_aex")]
+    )
+
+    combined = pl.concat([agg_dex, agg_dax], how="align")
+
+    # tomov et al eqn 12
+    # this is an addition in the paper
+    # in fretbursts its subtracted
+    ax_2cde_bracket = (1 / pl.col("N_aex")) * pl.col("ratio_AD") + (1 / pl.col("N_dex")) * pl.col(
+        "ratio_DA"
+    )
+
+    ax_2cde_norm = pl.lit(100) - pl.lit(50) * ax_2cde_bracket
+
+    alex_2cde = combined.select(ax_2cde_norm.alias("alex_2cde")).to_series()
+
+    return alex_2cde
+
+
+def make_kernel(
+    tau: float, timestamps_unit: float, domain_size: int = 10, kernel="laplace"
+) -> np.ndarray:
+    window_size = domain_size * (tau / timestamps_unit)
+    window_size_even_int = 2 * round(window_size / 2)
+
+    # check that rounding error isnt too large
+    rel_dev = (window_size - window_size_even_int) / window_size
+    if np.abs(rel_dev) > 0.01:
+        warnings.warn(
+            "Kernel window size deviation from rounding larger than 1 percent. Choose a smaller `tau` with respect to `timestamps_unit'"
+        )
+
+    t_eval = np.linspace(-domain_size / 2, domain_size / 2, window_size_even_int + 1, endpoint=True)
+    kernel = np.exp(-np.abs(t_eval))
+
+    return kernel
+
+
+def convolve_stream(data: pl.DataFrame, streams: list[str], kernel: np.ndarray) -> np.ndarray:
+    f_expr = is_in_expr("stream", streams)
+
+    df = data.filter(f_expr)
+    # TODO warn on copy
+    # dataframes read from .pq cannot be converted zero-copy
+    event_times = df["timestamps"].to_numpy(allow_copy=True)
+    eval_times = data["timestamps"].to_numpy(allow_copy=True)
+    return async_convolve(event_times, eval_times, kernel)
+
+
+@jit(
+    float64[:](
+        types.Array(int64, 1, "C", readonly=True),
+        types.Array(int64, 1, "C", readonly=True),
+        types.Array(float64, 1, "C", readonly=True),
+    ),
+    nopython=True,
+    nogil=True,
+)
+def async_convolve(event_times, eval_times, kernel):
+    """convolve integer timestamps with a kernel"""
+
+    i_lower = 0
+    i_upper = 0
+
+    window_half_size = len(kernel) // 2
+    result = np.zeros_like(eval_times, dtype=np.float64)
+
+    for i, time in enumerate(eval_times):
+        while event_times[i_upper] < time + window_half_size:
+            i_upper += 1
+            if i_upper == len(event_times):
+                i_upper -= 1
+                break
+
+        while event_times[i_lower] < time - window_half_size:
+            i_lower += 1
+            if i_lower == len(event_times):
+                i_lower -= 1
+                break
+
+        for j in range(i_lower, i_upper):
+            idx = event_times[j] - time + window_half_size
+            result[i] += kernel[idx]
+
+    return result
diff --git a/dont_fret/config/config.py b/dont_fret/config/config.py
@@ -3,7 +3,7 @@
 import os
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
-from typing import Optional, Union
+from typing import Any, Optional, Union
 
 import polars as pl
 import yaml
@@ -43,11 +43,15 @@ def as_expr(self) -> list[pl.Expr]:
 class Web:
     """settings related to web application"""
 
-    default_dir: Path
+    default_dir: Path = field(default_factory=Path)
     protect_filebrowser: bool = True
     burst_filters: list[BurstFilterItem] = field(default_factory=list)
     password: Optional[str] = None
 
+    # todo configurable settings
+    fret_2cde: bool = True  # calculate fret_2cde after burst search with default settings
+    alex_2cde: bool = True
+
 
 @dataclass
 class BurstColor:
@@ -62,7 +66,8 @@ class DontFRETConfig:
     channels: dict[str, Channel]
     streams: dict[str, list[str]]
     burst_search: dict[str, list[BurstColor]]
-    web: Web
+    hooks: dict[str, dict[str, Any]] = field(default_factory=dict)
+    web: Web = field(default_factory=Web)
 
     @classmethod
     def from_dict(cls, data: Data):

diff --git a/dont_fret/config/default.yaml b/dont_fret/config/default.yaml
@@ -34,6 +34,15 @@ burst_search:
       M: 100
       T: 500.e-6
 
+# post-burst search hooks to apply
+# hooks are of the form my_hook(burst_data, photon_data, **kwargs)
+# kwargs are as specified here
+hooks:
+  alex_2cde:
+    tau: 150.e-6 # make sure to format as float
+  fret_2cde:
+    tau: 50.e-6
+
 # settings related to dont-fret's web interface
 web:
   password: null # set to null to disable password protection