diff --git a/CHANGELOG.md b/CHANGELOG.md index f3b5f507..d31a873c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added _ Add `depth` column to `discarded_edgelist.parquet` output of the GRAPH stage that indicates at which refinement iteration the edge is removed. +_ Add `edges_removed_in_multiplet_recovery_first_iteration`, `edges_removed_in_multiplet_recovery_refinement` and `fraction_edges_removed_in_refinement` to graph report.json. - Add `is_potential_doublet` and `n_edges_to_split_doublet` columns to adata.obs. - Add `fraction_potential_doublets` and `n_edges_to_split_potential_doublets` to annotate report.json. - Add `--max-edges-to-split` option to `graph` to specify the maximum number of edges that can be removed between two sub-components during multiplet recovery. diff --git a/src/pixelator/graph/community_detection.py b/src/pixelator/graph/community_detection.py index da4d60a4..73f6ee0d 100644 --- a/src/pixelator/graph/community_detection.py +++ b/src/pixelator/graph/community_detection.py @@ -172,7 +172,8 @@ def connect_components( # save the edge list (discarded) logger.debug("Save discarded edge list") - removed_edgelist.collect().write_parquet( + removed_edgelist = removed_edgelist.collect() + removed_edgelist.write_parquet( Path(output) / f"{sample_name}.discarded_edgelist.parquet" ) @@ -186,8 +187,23 @@ def connect_components( logger.debug("Generate graph report") result_metrics = edgelist_metrics(graph_output_edgelist) - result_metrics["edges_with_colliding_upi_count"] = len(problematic_edges) + + result_metrics["edges_with_colliding_upi_count"] = ( + removed_edgelist["depth"] == 0 + ).sum() + result_metrics["edges_removed_in_multiplet_recovery_first_iteration"] = ( + removed_edgelist["depth"] == 1 + ).sum() + result_metrics["edges_removed_in_multiplet_recovery_refinement"] = ( + removed_edgelist["depth"] > 1 + ).sum() + result_metrics["fraction_edges_removed_in_refinement"] = ( + removed_edgelist["depth"] > 1 + ).sum() / max(len(removed_edgelist), 1) + del graph_output_edgelist + del removed_edgelist + report = GraphSampleReport( sample_id=sample_name, **result_metrics, diff --git a/src/pixelator/graph/utils.py b/src/pixelator/graph/utils.py index 4a850f07..c7d7eec9 100644 --- a/src/pixelator/graph/utils.py +++ b/src/pixelator/graph/utils.py @@ -257,6 +257,9 @@ class EdgelistMetrics(typing.TypedDict, total=True): fraction_pixels_in_largest_component: float edges_with_colliding_upi_count: int + edges_removed_in_multiplet_recovery_first_iteration: int + edges_removed_in_multiplet_recovery_refinement: int + fraction_edges_removed_in_refinement: float MetricsDict = typing.TypeVar( diff --git a/src/pixelator/report/models/graph.py b/src/pixelator/report/models/graph.py index b0fd768b..70150c86 100644 --- a/src/pixelator/report/models/graph.py +++ b/src/pixelator/report/models/graph.py @@ -68,6 +68,23 @@ class GraphSampleReport(SampleReport): description="The number of edges with UPIs that have appeared both as UPIA and UPIB.", ) + edges_removed_in_multiplet_recovery_first_iteration: int = pydantic.Field( + ..., + description="The number of edges removed in the first iteration of multiplet recovery.", + ) + + edges_removed_in_multiplet_recovery_refinement: int = pydantic.Field( + ..., + description="The number of edges removed in the refinement of multiplet recovery.", + ) + + fraction_edges_removed_in_refinement: float = pydantic.Field( + ..., + ge=0, + le=1, + description="The fraction of total removed edges that are removed in the refinement of multiplet recovery.", + ) + @pydantic.computed_field( return_type=float, description="The ratio of the total number of A-pixels and the total number of B-pixels in the graph.", diff --git a/tests/report/assets/reports_only/graph/pbmcs_unstimulated.report.json b/tests/report/assets/reports_only/graph/pbmcs_unstimulated.report.json index fb4ca9f4..264d8875 100644 --- a/tests/report/assets/reports_only/graph/pbmcs_unstimulated.report.json +++ b/tests/report/assets/reports_only/graph/pbmcs_unstimulated.report.json @@ -20,6 +20,9 @@ "fraction_molecules_in_largest_component": 0.0006480881399870382, "fraction_pixels_in_largest_component": 0.0004888381945576015, "edges_with_colliding_upi_count": 0, + "edges_removed_in_multiplet_recovery_first_iteration": 0, + "edges_removed_in_multiplet_recovery_refinement": 0, + "fraction_edges_removed_in_refinement": 0.0, "a_pixel_b_pixel_ratio": 1.0055555555555555, "pixel_count": 6137.0 } diff --git a/tests/report/assets/reports_only/graph/uropod_control.report.json b/tests/report/assets/reports_only/graph/uropod_control.report.json index f42c08ac..9a123767 100644 --- a/tests/report/assets/reports_only/graph/uropod_control.report.json +++ b/tests/report/assets/reports_only/graph/uropod_control.report.json @@ -20,6 +20,9 @@ "fraction_molecules_in_largest_component": 0.0007451564828614009, "fraction_pixels_in_largest_component": 0.000501378791677112, "edges_with_colliding_upi_count": 0, + "edges_removed_in_multiplet_recovery_first_iteration": 0, + "edges_removed_in_multiplet_recovery_refinement": 0, + "fraction_edges_removed_in_refinement": 0.0, "a_pixel_b_pixel_ratio": 1.0035158211953792, "pixel_count": 7978.0 } diff --git a/tests/report/snapshots/test_graph/test_graph_metrics_lookup/pbmcs_unstimulated/pbmcs_unstimulated_graph_metrics.json b/tests/report/snapshots/test_graph/test_graph_metrics_lookup/pbmcs_unstimulated/pbmcs_unstimulated_graph_metrics.json index d66ac5fa..2dfeacb0 100644 --- a/tests/report/snapshots/test_graph/test_graph_metrics_lookup/pbmcs_unstimulated/pbmcs_unstimulated_graph_metrics.json +++ b/tests/report/snapshots/test_graph/test_graph_metrics_lookup/pbmcs_unstimulated/pbmcs_unstimulated_graph_metrics.json @@ -20,6 +20,9 @@ "fraction_molecules_in_largest_component": 0.0006480881399870382, "fraction_pixels_in_largest_component": 0.0004888381945576015, "edges_with_colliding_upi_count": 0, + "edges_removed_in_multiplet_recovery_first_iteration": 0, + "edges_removed_in_multiplet_recovery_refinement": 0, + "fraction_edges_removed_in_refinement": 0.0, "a_pixel_b_pixel_ratio": 1.0055555555555555, "pixel_count": 6137 } \ No newline at end of file diff --git a/tests/report/snapshots/test_graph/test_graph_metrics_lookup/uropod_control/uropod_control_graph_metrics.json b/tests/report/snapshots/test_graph/test_graph_metrics_lookup/uropod_control/uropod_control_graph_metrics.json index d5a7b4d2..18f5719f 100644 --- a/tests/report/snapshots/test_graph/test_graph_metrics_lookup/uropod_control/uropod_control_graph_metrics.json +++ b/tests/report/snapshots/test_graph/test_graph_metrics_lookup/uropod_control/uropod_control_graph_metrics.json @@ -20,6 +20,9 @@ "fraction_molecules_in_largest_component": 0.0007451564828614009, "fraction_pixels_in_largest_component": 0.000501378791677112, "edges_with_colliding_upi_count": 0, + "edges_removed_in_multiplet_recovery_first_iteration": 0, + "edges_removed_in_multiplet_recovery_refinement": 0, + "fraction_edges_removed_in_refinement": 0.0, "a_pixel_b_pixel_ratio": 1.0035158211953792, "pixel_count": 7978 } \ No newline at end of file diff --git a/tests/report/snapshots/test_graph/test_graph_summary/graph_summary.csv b/tests/report/snapshots/test_graph/test_graph_summary/graph_summary.csv index 7698a141..0370acbb 100644 --- a/tests/report/snapshots/test_graph/test_graph_summary/graph_summary.csv +++ b/tests/report/snapshots/test_graph/test_graph_summary/graph_summary.csv @@ -1,3 +1,3 @@ -sample_id,component_count,molecule_count,read_count,marker_count,a_pixel_count,b_pixel_count,fraction_molecules_in_largest_component,fraction_pixels_in_largest_component,edges_with_colliding_upi_count,a_pixel_b_pixel_ratio,pixel_count,read_count_per_molecule_mean,read_count_per_molecule_std,read_count_per_molecule_min,read_count_per_molecule_q1,read_count_per_molecule_q2,read_count_per_molecule_q3,read_count_per_molecule_max,read_count_per_molecule_count,read_count_per_molecule_iqr -pbmcs_unstimulated,3052,3086,6237,68,3077,3060,0.0006480881399870382,0.0004888381945576015,0,1.0055555555555555,6137,2.0210628645495787,0.1458331527003784,2.0,2.0,2.0,2.0,4.0,3086,0.0 -uropod_control,3963,4026,8117,68,3996,3982,0.0007451564828614009,0.000501378791677112,0,1.0035158211953792,7978,2.016145057128664,0.12798892626922903,2.0,2.0,2.0,2.0,4.0,4026,0.0 +sample_id,component_count,molecule_count,read_count,marker_count,a_pixel_count,b_pixel_count,fraction_molecules_in_largest_component,fraction_pixels_in_largest_component,edges_with_colliding_upi_count,edges_removed_in_multiplet_recovery_first_iteration,edges_removed_in_multiplet_recovery_refinement,fraction_edges_removed_in_refinement,a_pixel_b_pixel_ratio,pixel_count,read_count_per_molecule_mean,read_count_per_molecule_std,read_count_per_molecule_min,read_count_per_molecule_q1,read_count_per_molecule_q2,read_count_per_molecule_q3,read_count_per_molecule_max,read_count_per_molecule_count,read_count_per_molecule_iqr +pbmcs_unstimulated,3052,3086,6237,68,3077,3060,0.0006480881399870382,0.0004888381945576015,0,0,0,0.0,1.0055555555555555,6137,2.0210628645495787,0.1458331527003784,2.0,2.0,2.0,2.0,4.0,3086,0.0 +uropod_control,3963,4026,8117,68,3996,3982,0.0007451564828614009,0.000501378791677112,0,0,0,0.0,1.0035158211953792,7978,2.016145057128664,0.12798892626922903,2.0,2.0,2.0,2.0,4.0,4026,0.0