Skip to content

Commit

Permalink
Added multiplet recovery metrics to graph stage's report.json
Browse files Browse the repository at this point in the history
  • Loading branch information
ptajvar committed Oct 22, 2024
1 parent 3d4cc74 commit b63fb91
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

_ Add `depth` column to `discarded_edgelist.parquet` output of the GRAPH stage that indicates at which refinement iteration the edge is removed.
_ Add `edges_removed_in_multiplet_recovery_first_iteration`, `edges_removed_in_multiplet_recovery_refinement` and `fraction_edges_removed_in_refinement` to graph report.json.
- Add `is_potential_doublet` and `n_edges_to_split_doublet` columns to adata.obs.
- Add `fraction_potential_doublets` and `n_edges_to_split_potential_doublets` to annotate report.json.
- Add `--max-edges-to-split` option to `graph` to specify the maximum number of edges that can be removed between two sub-components during multiplet recovery.
Expand Down
20 changes: 18 additions & 2 deletions src/pixelator/graph/community_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ def connect_components(

# save the edge list (discarded)
logger.debug("Save discarded edge list")
removed_edgelist.collect().write_parquet(
removed_edgelist = removed_edgelist.collect()
removed_edgelist.write_parquet(
Path(output) / f"{sample_name}.discarded_edgelist.parquet"
)

Expand All @@ -186,8 +187,23 @@ def connect_components(

logger.debug("Generate graph report")
result_metrics = edgelist_metrics(graph_output_edgelist)
result_metrics["edges_with_colliding_upi_count"] = len(problematic_edges)

result_metrics["edges_with_colliding_upi_count"] = (
removed_edgelist["depth"] == 0
).sum()
result_metrics["edges_removed_in_multiplet_recovery_first_iteration"] = (
removed_edgelist["depth"] == 1
).sum()
result_metrics["edges_removed_in_multiplet_recovery_refinement"] = (
removed_edgelist["depth"] > 1
).sum()
result_metrics["fraction_edges_removed_in_refinement"] = (
removed_edgelist["depth"] > 1
).sum() / max(len(removed_edgelist), 1)

del graph_output_edgelist
del removed_edgelist

report = GraphSampleReport(
sample_id=sample_name,
**result_metrics,
Expand Down
3 changes: 3 additions & 0 deletions src/pixelator/graph/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,9 @@ class EdgelistMetrics(typing.TypedDict, total=True):
fraction_pixels_in_largest_component: float

edges_with_colliding_upi_count: int
edges_removed_in_multiplet_recovery_first_iteration: int
edges_removed_in_multiplet_recovery_refinement: int
fraction_edges_removed_in_refinement: float


MetricsDict = typing.TypeVar(
Expand Down
17 changes: 17 additions & 0 deletions src/pixelator/report/models/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,23 @@ class GraphSampleReport(SampleReport):
description="The number of edges with UPIs that have appeared both as UPIA and UPIB.",
)

edges_removed_in_multiplet_recovery_first_iteration: int = pydantic.Field(
...,
description="The number of edges removed in the first iteration of multiplet recovery.",
)

edges_removed_in_multiplet_recovery_refinement: int = pydantic.Field(
...,
description="The number of edges removed in the refinement of multiplet recovery.",
)

fraction_edges_removed_in_refinement: float = pydantic.Field(
...,
ge=0,
le=1,
description="The fraction of total removed edges that are removed in the refinement of multiplet recovery.",
)

@pydantic.computed_field(
return_type=float,
description="The ratio of the total number of A-pixels and the total number of B-pixels in the graph.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
"fraction_molecules_in_largest_component": 0.0006480881399870382,
"fraction_pixels_in_largest_component": 0.0004888381945576015,
"edges_with_colliding_upi_count": 0,
"edges_removed_in_multiplet_recovery_first_iteration": 0,
"edges_removed_in_multiplet_recovery_refinement": 0,
"fraction_edges_removed_in_refinement": 0.0,
"a_pixel_b_pixel_ratio": 1.0055555555555555,
"pixel_count": 6137.0
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
"fraction_molecules_in_largest_component": 0.0007451564828614009,
"fraction_pixels_in_largest_component": 0.000501378791677112,
"edges_with_colliding_upi_count": 0,
"edges_removed_in_multiplet_recovery_first_iteration": 0,
"edges_removed_in_multiplet_recovery_refinement": 0,
"fraction_edges_removed_in_refinement": 0.0,
"a_pixel_b_pixel_ratio": 1.0035158211953792,
"pixel_count": 7978.0
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
"fraction_molecules_in_largest_component": 0.0006480881399870382,
"fraction_pixels_in_largest_component": 0.0004888381945576015,
"edges_with_colliding_upi_count": 0,
"edges_removed_in_multiplet_recovery_first_iteration": 0,
"edges_removed_in_multiplet_recovery_refinement": 0,
"fraction_edges_removed_in_refinement": 0.0,
"a_pixel_b_pixel_ratio": 1.0055555555555555,
"pixel_count": 6137
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
"fraction_molecules_in_largest_component": 0.0007451564828614009,
"fraction_pixels_in_largest_component": 0.000501378791677112,
"edges_with_colliding_upi_count": 0,
"edges_removed_in_multiplet_recovery_first_iteration": 0,
"edges_removed_in_multiplet_recovery_refinement": 0,
"fraction_edges_removed_in_refinement": 0.0,
"a_pixel_b_pixel_ratio": 1.0035158211953792,
"pixel_count": 7978
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
sample_id,component_count,molecule_count,read_count,marker_count,a_pixel_count,b_pixel_count,fraction_molecules_in_largest_component,fraction_pixels_in_largest_component,edges_with_colliding_upi_count,a_pixel_b_pixel_ratio,pixel_count,read_count_per_molecule_mean,read_count_per_molecule_std,read_count_per_molecule_min,read_count_per_molecule_q1,read_count_per_molecule_q2,read_count_per_molecule_q3,read_count_per_molecule_max,read_count_per_molecule_count,read_count_per_molecule_iqr
pbmcs_unstimulated,3052,3086,6237,68,3077,3060,0.0006480881399870382,0.0004888381945576015,0,1.0055555555555555,6137,2.0210628645495787,0.1458331527003784,2.0,2.0,2.0,2.0,4.0,3086,0.0
uropod_control,3963,4026,8117,68,3996,3982,0.0007451564828614009,0.000501378791677112,0,1.0035158211953792,7978,2.016145057128664,0.12798892626922903,2.0,2.0,2.0,2.0,4.0,4026,0.0
sample_id,component_count,molecule_count,read_count,marker_count,a_pixel_count,b_pixel_count,fraction_molecules_in_largest_component,fraction_pixels_in_largest_component,edges_with_colliding_upi_count,edges_removed_in_multiplet_recovery_first_iteration,edges_removed_in_multiplet_recovery_refinement,fraction_edges_removed_in_refinement,a_pixel_b_pixel_ratio,pixel_count,read_count_per_molecule_mean,read_count_per_molecule_std,read_count_per_molecule_min,read_count_per_molecule_q1,read_count_per_molecule_q2,read_count_per_molecule_q3,read_count_per_molecule_max,read_count_per_molecule_count,read_count_per_molecule_iqr
pbmcs_unstimulated,3052,3086,6237,68,3077,3060,0.0006480881399870382,0.0004888381945576015,0,0,0,0.0,1.0055555555555555,6137,2.0210628645495787,0.1458331527003784,2.0,2.0,2.0,2.0,4.0,3086,0.0
uropod_control,3963,4026,8117,68,3996,3982,0.0007451564828614009,0.000501378791677112,0,0,0,0.0,1.0035158211953792,7978,2.016145057128664,0.12798892626922903,2.0,2.0,2.0,2.0,4.0,4026,0.0

0 comments on commit b63fb91

Please sign in to comment.