Skip to content

Commit

Permalink
New additional benchmarking to check scaling with input size.
Browse files Browse the repository at this point in the history
 - Also changed `unwrap()` to `expect()`.
  • Loading branch information
vsbuffalo committed May 18, 2024
1 parent 39217cd commit 7448e82
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 1 deletion.
57 changes: 57 additions & 0 deletions additional_benchmarks/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import numpy as np

GRANGES = "../target/release/granges"
SEQLENS = "../tests_data/hg38_seqlens.tsv"
NREPS = 1

rule granges:
output: GRANGES
shell:
"""
(cargo build --release --features=dev-commands)
"""

rule random_bed:
input: seqlens=SEQLENS, granges=GRANGES
output: "random_bed/{size}__{rep}.bed.gz"
shell:
"""
{input.granges} random-bed --sort --num {wildcards.size} {input.seqlens} | gzip > {output}
"""

rule granges_filter:
input: a="random_bed/{size}__A.bed.gz", b="random_bed/{size}__B.bed.gz",
genome=SEQLENS, granges=GRANGES
output: "results/granges_filter__size_{size}.bed"
benchmark: repeat("benchmarks/granges_filter__size_{size}.tsv", NREPS)
shell:
"""
{input.granges} filter --left {input.a} --right {input.b} --genome {input.genome} > {output}
"""


rule bedtools_intersect:
input: a="random_bed/{size}__A.bed.gz", b="random_bed/{size}__B.bed.gz"
output: "results/bedtools_intersect__size_{size}.bed"
benchmark: repeat("benchmarks/bedtools_intersect__size_{size}.tsv", NREPS)
shell:
"""
bedtools intersect -a {input.a} -b {input.b} > {output}
"""

rule plyranges_join_overlap_inner:
input: a="random_bed/{size}__A.bed.gz", b="random_bed/{size}__B.bed.gz"
output: "results/plyranges_join_overlap_inner__size_{size}.bed"
benchmark: repeat("benchmarks/plyranges_join_overlap_inner__size_{size}.tsv", NREPS)
shell:
"""
Rscript Rscripts/plyranges_join_overlap_inner.r {input.a} {input.b} > {output}
"""

tools = ["bedtools_intersect", "granges_filter", "plyranges_join_overlap_inner"]
sizes = np.logspace(3, 9, 10)

all_benches = expand("results/{tool}__size_{size}.bed", tool=tools, size=sizes)

rule all:
input: all_benches
18 changes: 18 additions & 0 deletions additional_benchmarks/rscripts/plyranges_join_overlap_inner.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
library(plyranges)

args <- commandArgs(trailingOnly = TRUE)
if (length(args) != 3) {
stop("Usage: Rscript script.R <bed_file_a> <bed_file_b> <output_file>")
}

bed_file_a <- args[1]
bed_file_b <- args[2]
output_file <- args[3]

# load data into mem
granges_a <- read_bed(bed_file_a)
granges_b <- read_bed(bed_file_b)

intersection_result <- join_overlap_inner(granges_a, granges_b)

write_bed(intersection_result, output_file)
3 changes: 2 additions & 1 deletion src/granges.rs
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,8 @@ where
for (seqname, indices) in self.data_indices()? {
let mut new_data = Vec::new();
for index in indices {
let value = data.get(index).unwrap();
let value = data.get(index)
.expect("Invalid index in GRanges::data_by_seqname(). This indicates invalid data indices (developer error).");
new_data.push((*value).clone());
}
all_new_data.insert(&seqname, new_data)?;
Expand Down

0 comments on commit 7448e82

Please sign in to comment.