Skip to content

Commit

Permalink
Merge pull request #422 from sunbeam-labs/421-improve-benchmarking-re…
Browse files Browse the repository at this point in the history
…ports-to-include-relevant-info

Improve benchmarking reports
  • Loading branch information
Ulthran authored Nov 10, 2023
2 parents 68722cc + a28619b commit facec83
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_tar_release.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Test Tarball on Release
name: Test Tar

on:
release:
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ extensions/*
!extensions/.placeholder
build
projects/
.vscode/
.vscode/
4 changes: 3 additions & 1 deletion docs/commands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@ Usage examples:
1. To run all targets (not including extensions):
``sunbeam run --profile /path/to/project/``
2. To specify multiple targets:
``sunbeam run --profile /path/to/project/ all_decontam all_assembly all_annotation``
3. The equivalent of 2, using the deprecated ``--target_list`` option:
``sunbeam run --profile /path/to/project/ --target_list all_decontam all_assembly all_annotation``
.. code-block:: shell
-h/--help: Display help.
-s/--sunbeam_dir: Path to sunbeam installation.
--target_list: A list of targets to run successively.
--target_list: A list of targets to run successively. (DEPRECATED)
<snakemake options>: You can pass further arguments to Snakemake after ``--``, e.g: ``$ sunbeam run -- --cores 12``. See http://snakemake.readthedocs.io for more information.
.. tip::
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ channels:
- conda-forge
- bioconda
dependencies:
- snakemake
- snakemake >=7.32.0
- git # Ensure sunbeam extend works even with tar installation of main pipeline
- python =3.12.0
91 changes: 86 additions & 5 deletions src/sunbeamlib/post.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import csv
import datetime
import os
from pathlib import Path
from snakemake.common import Rules
from typing import Dict


def compile_benchmarks(benchmark_fp: str, stats_fp: str) -> None:
def compile_benchmarks(
benchmark_fp: str, Cfg: Dict[str, Dict | str], rules: Rules
) -> None:
"""Aggregate all the benchmark files into one and put it in stats_fp"""
stats_fp = Path(Cfg["all"]["root"]) / "stats"
benchmarks = []
try:
benchmarks = os.listdir(benchmark_fp)
Expand All @@ -21,10 +27,10 @@ def compile_benchmarks(benchmark_fp: str, stats_fp: str) -> None:

if not os.path.exists(stats_fp):
os.makedirs(stats_fp)
stats_file = os.path.join(
stats_fp,
f"{str(int(datetime.datetime.now().timestamp() * 1000))}_benchmarks.tsv",
)

dt = str(int(datetime.datetime.now().timestamp() * 1000))
stats_file = os.path.join(stats_fp, f"{dt}_benchmarks.tsv")

with open(stats_file, "w") as f:
writer = csv.writer(f, delimiter="\t")
writer.writerow(headers)
Expand All @@ -33,3 +39,78 @@ def compile_benchmarks(benchmark_fp: str, stats_fp: str) -> None:
reader = csv.reader(g, delimiter="\t")
next(reader) # Headers line
writer.writerow([fp[:-4]] + next(reader))

compile_file_stats(stats_fp, Cfg, dt, rules)


def compile_file_stats(
stats_fp: str, Cfg: Dict[str, Dict | str], dt: str, rules: Rules
) -> None:
"""Collect data on all inputs and outputs (as long as they still exist at this point) as well as dbs"""
file_stats = {}
output_fp = Path(Cfg["all"]["root"]) / Cfg["all"]["output_fp"]

stats_file = os.path.join(stats_fp, f"{dt}_file_stats.tsv")

# Collect info on all files in params
param_fps = set()
for n, r in rules._rules.items():
if r.params:
for p in r.params:
if isinstance(p, dict):
for k, v in p.items():
try:
param_fps.add(Path(v))
except TypeError:
continue
elif isinstance(p, list):
for i in p:
try:
param_fps.add(Path(i))
except TypeError:
continue
else:
try:
param_fps.add(Path(p))
except TypeError:
continue

for fp in param_fps:
if fp.exists():
if fp.is_file():
file_stats[fp] = fp.stat().st_size
elif fp.is_dir():
for file in os.listdir(fp):
nfp = fp / file
if nfp.exists() and nfp.is_file():
file_stats[nfp] = nfp.stat().st_size

# Collect info on all files in output_fp
for root, dirs, files in os.walk(output_fp):
for file in files:
fp = Path(root) / file
file_stats[fp] = fp.stat().st_size

# Collect info on all dbs in Cfg
for section, values in Cfg.items():
if section == "all":
continue
for k, v in values.items():
try:
vp = Path(v)
except TypeError:
continue
if vp.exists():
if vp.is_file():
file_stats[vp] = vp.stat().st_size
elif vp.is_dir():
for file in os.listdir(vp):
fp = vp / file
file_stats[fp] = fp.stat().st_size

# Write to file
with open(stats_file, "w") as f:
writer = csv.writer(f, delimiter="\t")
writer.writerow(["file", "size"])
for fp, size in file_stats.items():
writer.writerow([fp, size])
52 changes: 25 additions & 27 deletions src/sunbeamlib/script_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ def main(argv=sys.argv):
help="Use mamba instead of conda to manage environments",
)
parser.add_argument(
"--target_list", nargs="+", default=[], help="List of sunbeam targets"
"--target_list",
nargs="+",
default=[],
help="List of sunbeam targets (DEPRECATED)",
)

# The remaining args (after --) are passed to Snakemake
Expand All @@ -51,32 +54,27 @@ def main(argv=sys.argv):

conda_cmd = "conda" if not args.mamba else "mamba"

cmds = list()
if args.target_list == []:
args.target_list = [""]

for target in args.target_list:
if target:
print(f"Running sunbeam on target: {target}")
if args.target_list:
sys.stderr.write(
"Warning: passing targets to '--target_list' is deprecated. "
"Please use 'sunbeam run <opts> target1 target2 target3' instead.\n"
)

# Including target when it's en empty string breaks stuff so the extra
# list comp avoids that
snakemake_args = [
arg
for arg in [
"snakemake",
"--snakefile",
str(snakefile),
"--conda-prefix",
str(conda_prefix),
"--conda-frontend",
conda_cmd,
target,
]
if arg
] + remaining
print("Running: " + " ".join(snakemake_args))
snakemake_args = (
[
"snakemake",
"--snakefile",
str(snakefile),
"--conda-prefix",
str(conda_prefix),
"--conda-frontend",
conda_cmd,
]
+ remaining
+ args.target_list
)
print("Running: " + " ".join(snakemake_args))

cmds.append(subprocess.run(snakemake_args))
cmd = subprocess.run(snakemake_args)

sys.exit(cmds[0].returncode)
sys.exit(cmd.returncode)
4 changes: 2 additions & 2 deletions src/sunbeamlib/slurm_profile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ set-resources:
- remove_low_complexity:runtime=120
- align_to_host:mem_mb=16000
- align_to_host:runtime=240
- filter_unmapped_reads:mem_mb=24000
- filter_unmapped_reads:runtime=240
- filter_reads:mem_mb=24000
- filter_reads:runtime=240
- megahit_paired:mem_mb=20000
- megahit_paired:runtime=720
- final_filter:mem_mb=4000
Expand Down
9 changes: 7 additions & 2 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ rule samples:
[print(sample) for sample in sorted(list(Samples.keys()))]


localrules:
all,
samples,


onstart:
try:
shutil.rmtree(BENCHMARK_FP)
Expand All @@ -151,9 +156,9 @@ onstart:

onsuccess:
print("Sunbeam finished!")
compile_benchmarks(BENCHMARK_FP, Cfg["all"]["root"] / "stats")
compile_benchmarks(BENCHMARK_FP, Cfg, rules)


onerror:
print("Sunbeam failed with error.")
compile_benchmarks(BENCHMARK_FP, Cfg["all"]["root"] / "stats")
compile_benchmarks(BENCHMARK_FP, Cfg, rules)

0 comments on commit facec83

Please sign in to comment.