Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add bbmap tool #748

Merged
merged 12 commits into from
Aug 21, 2024
31 changes: 31 additions & 0 deletions .github/workflows/build-push-bbmap-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Build & Push BBMap Image to GHCR

on:
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
paths:
- 'src/loaders/compute_tools/bbmap/versions.yaml'
- '.github/workflows/build-push-bbmap-image.yml'
- '.github/workflows/build-push-tool-images.yml'

push:
branches:
- main
- master
- develop
paths:
- 'src/loaders/compute_tools/bbmap/versions.yaml'
- '.github/workflows/build-push-bbmap-image.yml'
- '.github/workflows/build-push-tool-images.yml'

jobs:
trigger-build-push:
uses: ./.github/workflows/build-push-tool-images.yml
with:
tool_name: bbmap
version_file: 'src/loaders/compute_tools/bbmap/versions.yaml'
secrets: inherit
31 changes: 31 additions & 0 deletions src/loaders/compute_tools/bbmap/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM continuumio/miniconda3:24.5.0-0

# NOTE: If the tool version changes, ensure the metadata information saved after running the tool in the _run_bbmap_single method is updated
ARG BBMAP_VER=39.06
ENV CONDA_ENV bbmap-$BBMAP_VER

# Add Bioconda and Conda-Forge channels
RUN conda config --add channels bioconda
RUN conda config --add channels conda-forge

# Install BBMap
ARG PYTHON_VER=3.11
RUN conda create -n $CONDA_ENV python=$PYTHON_VER bbmap=$BBMAP_VER

# Activate the environment
RUN echo "source activate $CONDA_ENV" >> ~/.bashrc

# Set up directories
RUN mkdir -p /app
COPY ./ /app/collections
RUN rm -r /app/collections/.git


ENV PYTHONPATH /app/collections
WORKDIR /app

ENV PY_SCRIPT=/app/collections/src/loaders/compute_tools/bbmap/bbmap.py

RUN chmod -R 777 /app/collections

ENTRYPOINT ["/app/collections/src/loaders/compute_tools/entrypoint.sh"]
50 changes: 50 additions & 0 deletions src/loaders/compute_tools/bbmap/bbmap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Run BBMap tool on a set of faa files.

This tool serves a distinct purpose separate from collection tools; instead, it is suited for CDM work.
Therefore, the parser program is not compatible with data generated by this tool.
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved

"""
from pathlib import Path

from src.loaders.common.loader_common_names import TOOL_METADATA
from src.loaders.compute_tools.tool_common import ToolRunner, run_command, create_tool_metadata


def _run_bbmap_single(
tool_safe_data_id: str,
data_id: str,
source_file: Path,
output_dir: Path,
threads_per_tool_run: int,
debug: bool) -> None:

metadata_file = output_dir / TOOL_METADATA
if metadata_file.exists():
print(f"Skipping {source_file} as it has already been processed.")
return

command = [
'stats.sh',
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
'in=' + str(source_file),
'out=' + str(output_dir / 'result.json'),
]

run_command(command, output_dir if debug else None)

# Save run info to a metadata file in the output directory for parsing later
metadata = {'tool_name': 'bbmap',
'version': '39.06',
'command': command,
'data_id': data_id,
'source_file': str(source_file),}
create_tool_metadata(output_dir, metadata)


def main():
runner = ToolRunner("bbmap")
runner.parallel_single_execution(_run_bbmap_single, unzip=True)


if __name__ == "__main__":
main()
5 changes: 5 additions & 0 deletions src/loaders/compute_tools/bbmap/versions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
versions:
- version: 0.1.0
date: 2024-08-16
notes: |
- initial BBMap implementation
2 changes: 1 addition & 1 deletion src/loaders/jobs/taskfarmer/task_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

Copy link
Collaborator Author

@Tianhao-Gu Tianhao-Gu Aug 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BBMap tool operates very quickly, processing each genome under a second. Using the default configuration for task scheduling is sufficient.

'''

TOOLS_AVAILABLE = ['gtdb_tk', 'checkm2', 'microtrait', 'mash', 'eggnog']
TOOLS_AVAILABLE = ['gtdb_tk', 'checkm2', 'microtrait', 'mash', 'eggnog', 'bbmap']

NODE_TIME_LIMIT_DEFAULT = 5 # hours
# Used as THREADS variable in the batch script which controls the number of parallel tasks per node
Expand Down
Loading