Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve benchmarking reports #422

Merged
2 changes: 1 addition & 1 deletion .github/workflows/test_tar_release.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Test Tarball on Release
name: Test Tar

on:
release:
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ extensions/*
!extensions/.placeholder
build
projects/
.vscode/
.vscode/
4 changes: 3 additions & 1 deletion docs/commands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@ Usage examples:
1. To run all targets (not including extensions):
``sunbeam run --profile /path/to/project/``
2. To specify multiple targets:
``sunbeam run --profile /path/to/project/ all_decontam all_assembly all_annotation``
3. The equivalent of 2, using the deprecated ``--target_list`` option:
``sunbeam run --profile /path/to/project/ --target_list all_decontam all_assembly all_annotation``

.. code-block:: shell

-h/--help: Display help.
-s/--sunbeam_dir: Path to sunbeam installation.
--target_list: A list of targets to run successively.
--target_list: A list of targets to run successively. (DEPRECATED)
<snakemake options>: You can pass further arguments to Snakemake after ``--``, e.g: ``$ sunbeam run -- --cores 12``. See http://snakemake.readthedocs.io for more information.

.. tip::
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ channels:
- conda-forge
- bioconda
dependencies:
- snakemake
- snakemake >=7.32.0
- git # Ensure sunbeam extend works even with tar installation of main pipeline
- python =3.12.0
91 changes: 86 additions & 5 deletions src/sunbeamlib/post.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import csv
import datetime
import os
from pathlib import Path
from snakemake.common import Rules
from typing import Dict


def compile_benchmarks(benchmark_fp: str, stats_fp: str) -> None:
def compile_benchmarks(
benchmark_fp: str, Cfg: Dict[str, Dict | str], rules: Rules
) -> None:
"""Aggregate all the benchmark files into one and put it in stats_fp"""
stats_fp = Path(Cfg["all"]["root"]) / "stats"
benchmarks = []
try:
benchmarks = os.listdir(benchmark_fp)
Expand All @@ -21,10 +27,10 @@ def compile_benchmarks(benchmark_fp: str, stats_fp: str) -> None:

if not os.path.exists(stats_fp):
os.makedirs(stats_fp)
stats_file = os.path.join(
stats_fp,
f"{str(int(datetime.datetime.now().timestamp() * 1000))}_benchmarks.tsv",
)

dt = str(int(datetime.datetime.now().timestamp() * 1000))
stats_file = os.path.join(stats_fp, f"{dt}_benchmarks.tsv")

with open(stats_file, "w") as f:
writer = csv.writer(f, delimiter="\t")
writer.writerow(headers)
Expand All @@ -33,3 +39,78 @@ def compile_benchmarks(benchmark_fp: str, stats_fp: str) -> None:
reader = csv.reader(g, delimiter="\t")
next(reader) # Headers line
writer.writerow([fp[:-4]] + next(reader))

compile_file_stats(stats_fp, Cfg, dt, rules)


def compile_file_stats(
stats_fp: str, Cfg: Dict[str, Dict | str], dt: str, rules: Rules
) -> None:
"""Collect data on all inputs and outputs (as long as they still exist at this point) as well as dbs"""
file_stats = {}
output_fp = Path(Cfg["all"]["root"]) / Cfg["all"]["output_fp"]

stats_file = os.path.join(stats_fp, f"{dt}_file_stats.tsv")

# Collect info on all files in params
param_fps = set()
for n, r in rules._rules.items():
if r.params:
for p in r.params:
if isinstance(p, dict):
for k, v in p.items():
try:
param_fps.add(Path(v))
except TypeError:
continue
elif isinstance(p, list):
for i in p:
try:
param_fps.add(Path(i))
except TypeError:
continue
else:
try:
param_fps.add(Path(p))
except TypeError:
continue

for fp in param_fps:
if fp.exists():
if fp.is_file():
file_stats[fp] = fp.stat().st_size
elif fp.is_dir():
for file in os.listdir(fp):
nfp = fp / file
if nfp.exists() and nfp.is_file():
file_stats[nfp] = nfp.stat().st_size

# Collect info on all files in output_fp
for root, dirs, files in os.walk(output_fp):
for file in files:
fp = Path(root) / file
file_stats[fp] = fp.stat().st_size

# Collect info on all dbs in Cfg
for section, values in Cfg.items():
if section == "all":
continue
for k, v in values.items():
try:
vp = Path(v)
except TypeError:
continue
if vp.exists():
if vp.is_file():
file_stats[vp] = vp.stat().st_size
elif vp.is_dir():
for file in os.listdir(vp):
fp = vp / file
file_stats[fp] = fp.stat().st_size

# Write to file
with open(stats_file, "w") as f:
writer = csv.writer(f, delimiter="\t")
writer.writerow(["file", "size"])
for fp, size in file_stats.items():
writer.writerow([fp, size])
52 changes: 25 additions & 27 deletions src/sunbeamlib/script_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ def main(argv=sys.argv):
help="Use mamba instead of conda to manage environments",
)
parser.add_argument(
"--target_list", nargs="+", default=[], help="List of sunbeam targets"
"--target_list",
nargs="+",
default=[],
help="List of sunbeam targets (DEPRECATED)",
)

# The remaining args (after --) are passed to Snakemake
Expand All @@ -51,32 +54,27 @@ def main(argv=sys.argv):

conda_cmd = "conda" if not args.mamba else "mamba"

cmds = list()
if args.target_list == []:
args.target_list = [""]

for target in args.target_list:
if target:
print(f"Running sunbeam on target: {target}")
if args.target_list:
sys.stderr.write(
"Warning: passing targets to '--target_list' is deprecated. "
"Please use 'sunbeam run <opts> target1 target2 target3' instead.\n"
)

# Including target when it's en empty string breaks stuff so the extra
# list comp avoids that
snakemake_args = [
arg
for arg in [
"snakemake",
"--snakefile",
str(snakefile),
"--conda-prefix",
str(conda_prefix),
"--conda-frontend",
conda_cmd,
target,
]
if arg
] + remaining
print("Running: " + " ".join(snakemake_args))
snakemake_args = (
[
"snakemake",
"--snakefile",
str(snakefile),
"--conda-prefix",
str(conda_prefix),
"--conda-frontend",
conda_cmd,
]
+ remaining
+ args.target_list
)
print("Running: " + " ".join(snakemake_args))

cmds.append(subprocess.run(snakemake_args))
cmd = subprocess.run(snakemake_args)

sys.exit(cmds[0].returncode)
sys.exit(cmd.returncode)
4 changes: 2 additions & 2 deletions src/sunbeamlib/slurm_profile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ set-resources:
- remove_low_complexity:runtime=120
- align_to_host:mem_mb=16000
- align_to_host:runtime=240
- filter_unmapped_reads:mem_mb=24000
- filter_unmapped_reads:runtime=240
- filter_reads:mem_mb=24000
- filter_reads:runtime=240
- megahit_paired:mem_mb=20000
- megahit_paired:runtime=720
- final_filter:mem_mb=4000
Expand Down
9 changes: 7 additions & 2 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ rule samples:
[print(sample) for sample in sorted(list(Samples.keys()))]


localrules:
all,
samples,


onstart:
try:
shutil.rmtree(BENCHMARK_FP)
Expand All @@ -151,9 +156,9 @@ onstart:

onsuccess:
print("Sunbeam finished!")
compile_benchmarks(BENCHMARK_FP, Cfg["all"]["root"] / "stats")
compile_benchmarks(BENCHMARK_FP, Cfg, rules)


onerror:
print("Sunbeam failed with error.")
compile_benchmarks(BENCHMARK_FP, Cfg["all"]["root"] / "stats")
compile_benchmarks(BENCHMARK_FP, Cfg, rules)