Skip to content

Commit

Permalink
show the uncalibrated line in the comparative plot
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Jun 7, 2024
1 parent 90d6c22 commit de1ba04
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,22 @@
import vegafusion
from typer import Option, Argument

from depiction_targeted_preproc.workflow.qc.plot_peak_density import plot_density_combined
from depiction_targeted_preproc.workflow.qc.plot_peak_density import plot_density_combined_full


def exp_plot_compare_peak_density(
tables_marker_distances_calib: Annotated[list[Path], Argument()],
table_marker_distance_uncalib: Annotated[Path, Option()],
output_pdf: Annotated[Path, Option()],
) -> None:
vegafusion.enable()

table = pl.concat(
[pl.read_parquet(path).with_columns(variant=pl.lit(path.parents[1].name)) for path in
tables_marker_distances_calib])
table = pl.concat([table, pl.read_parquet(table_marker_distance_uncalib).with_columns(variant=pl.lit("uncalib"))])

plot_density_combined(df_peak_dist=table, out_pdf=output_pdf)
plot_density_combined_full(df_peak_dist=table, out_pdf=output_pdf)


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion src/depiction_targeted_preproc/workflow/experimental.smk
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@ rule exp_mass_list_preparation:

rule exp_plot_compare_peak_density:
input:
tables_marker_distance=expand("{{sample}}/{exp_variant}/qc/table_marker_distances_calib.parquet", exp_variant=exp_variants)
tables_marker_distance=expand("{{sample}}/{exp_variant}/qc/table_marker_distances_calib.parquet", exp_variant=exp_variants),
table_marker_distance_uncalib="{sample}/reg_shift/qc/table_marker_distances_baseline.parquet",
output:
pdf="{sample}/exp_plot_compare_peak_density.pdf"
shell:
"python -m depiction_targeted_preproc.workflow.exp.plot_compare_peak_density"
" {input.tables_marker_distance}"
" --table-marker-distance-uncalib {input.table_marker_distance_uncalib}"
" --output-pdf {output.pdf}"
53 changes: 42 additions & 11 deletions src/depiction_targeted_preproc/workflow/qc/plot_peak_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import polars as pl
import typer
import vegafusion
from KDEpy import FFTKDE
from typer import Option

from depiction_targeted_preproc.workflow.qc.plot_calibration_map import get_mass_groups
Expand All @@ -15,27 +16,57 @@ def subsample_dataframe(df: pl.DataFrame) -> pl.DataFrame:
return df.sample(n_samples, seed=1, shuffle=True)


def plot_density_combined(df_peak_dist: pl.DataFrame, out_pdf: Path) -> None:
n_tot = len(df_peak_dist)
df_peak_dist = subsample_dataframe(df_peak_dist)
# def plot_density_combined(df_peak_dist: pl.DataFrame, out_pdf: Path) -> None:
# n_tot = len(df_peak_dist)
# df_peak_dist = subsample_dataframe(df_peak_dist)
# chart = (
# (
# alt.Chart(df_peak_dist)
# .mark_line()
# .transform_density(
# density="dist", as_=["dist", "density"], groupby=["variant"], maxsteps=250, bandwidth=0.01
# )
# .encode(x="dist:Q", color="variant:N")
# .properties(width=500, height=300)
# )
# .encode(y=alt.Y("density:Q"))
# .properties(title="Linear scale")
# )
# chart = chart.properties(
# title=f"Density of target-surrounding peak distances (n_tot = {n_tot} sampled to n = {len(df_peak_dist)})")
# chart.save(out_pdf)


def plot_density_combined_full(df_peak_dist: pl.DataFrame, out_pdf: Path) -> None:
variants = df_peak_dist["variant"].unique().to_list()
collect = []
for variant in variants:
df_variant = df_peak_dist.filter(variant=variant)
dist, density = FFTKDE(bw="ISJ").fit(df_variant["dist"].to_numpy()).evaluate(2 ** 10)
collect.append(pl.DataFrame({"dist": list(dist), "density": list(density), "variant": variant}))

dist_min = df_peak_dist["dist"].min()
dist_max = df_peak_dist["dist"].max()
df_density = pl.concat(collect).filter((pl.col("dist") >= dist_min) & (pl.col("dist") <= dist_max))

chart = (
(
alt.Chart(df_peak_dist)
alt.Chart(df_density)
.mark_line()
.transform_density(
density="dist", as_=["dist", "density"], groupby=["variant"], maxsteps=250, bandwidth=0.01
)
.encode(x="dist:Q", color="variant:N")
.encode(x="dist:Q", y="density:Q", color="variant:N")
.properties(width=500, height=300)
)
.encode(y=alt.Y("density:Q"))
.properties(title="Linear scale")
)
chart = chart.properties(title=f"Density of target-surrounding peak distances (n_tot = {n_tot} sampled to n = {len(df_peak_dist)})")
chart = chart.properties(
title=f"Density of target-surrounding peak distances (N={len(df_peak_dist):,})"
)
chart.save(out_pdf)


def plot_density_groups(df_peak_dist: pl.DataFrame, mass_groups: pl.DataFrame, out_peak_density_ranges: Path) -> None:
# TODO use kdepy as above

# TODO merge these two functions
df_peak_dist_grouped = df_peak_dist.sort("mz_target").join_asof(
mass_groups, left_on="mz_target", right_on="mz_min", strategy="backward"
Expand Down Expand Up @@ -81,7 +112,7 @@ def qc_plot_peak_density(
out_peak_density_ranges=output_pdf,
)
else:
plot_density_combined(df_peak_dist=table, out_pdf=output_pdf)
plot_density_combined_full(df_peak_dist=table, out_pdf=output_pdf)


if __name__ == "__main__":
Expand Down

0 comments on commit de1ba04

Please sign in to comment.