diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..ff16024 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,34 @@ +on: + push: + branches: + - main + +name: Release + +jobs: + bump-version: + name: Release version + runs-on: ubuntu-latest + + steps: + - uses: GoogleCloudPlatform/release-please-action@v3 + id: release + with: + release-type: python # just keep a changelog, no version anywhere outside of git tags + package-name: juno_template + lint: + name: Lint Code Base + runs-on: ubuntu-latest + + steps: + - name: Checkout Code + uses: actions/checkout@v3 + + - name: Lint Code Base + uses: github/super-linter@v4 + env: + VALIDATE_ALL_CODEBASE: false + DEFAULT_BRANCH: main + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + VALIDATE_SNAKEMAKE_SNAKEFMT: true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a151261 --- /dev/null +++ b/.gitignore @@ -0,0 +1,144 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Vscode stuff: +.vscode + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Added stuff +.snakemake +envs/src +input +output +sample_sheet.yaml +config/sample_sheet.yaml +config/variables.yaml +config/juno_call.txt +config/user_parameters.yaml +core.* diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..f73a69e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "juno-library"] + path = base_juno_pipeline + url = https://github.com/RIVM-bioinformatics/base_juno_pipeline.git diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7558497 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog + +## [1.0.1](https://github.com/RIVM-bioinformatics/juno-template/compare/v1.0.0...v1.0.1) (2023-07-12) + + +### Dependencies + +* remove anaconda and defaults and add no defaults channel ([0b4fccb](https://github.com/RIVM-bioinformatics/juno-template/commit/0b4fccb29d192570060ed81f6222b78293e195a7)) diff --git a/README.md b/README.md new file mode 100644 index 0000000..a4802bc --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# Juno-Template +A template pipeline where the other juno pipelines are based on. + +## Contribution guidelines +Juno pipelines use a [feature branch workflow](https://www.atlassian.com/git/tutorials/comparing-workflows/feature-branch-workflow). To work on features, create a branch from the `main` branch to make changes to. This branch can be merged to the main branch via a pull request. Hotfixes for bugs can be committed to the `main` branch. + +Please adhere to the [conventional commits](https://www.conventionalcommits.org/) specification for commit messages. These commit messages can be picked up by [release please](https://github.com/googleapis/release-please) to create meaningful release messages. diff --git a/Snakefile b/Snakefile new file mode 100644 index 0000000..cd1fde2 --- /dev/null +++ b/Snakefile @@ -0,0 +1,25 @@ +import yaml + + +sample_sheet=config["sample_sheet"] +with open(sample_sheet) as f: + SAMPLES = yaml.safe_load(f) + +for param in ["threads", "mem_gb"]: + for k in config[param]: + config[param][k] = int(config[param][k]) + +# print(SAMPLES) + +OUT = config["output_dir"] + +localrules: + all, + + +include: "workflow/rules/rule.smk" + + +rule all: + input: + expand(OUT + "/{sample}_combined.fastq", sample=SAMPLES), diff --git a/base_juno_pipeline b/base_juno_pipeline new file mode 160000 index 0000000..3d05d8b --- /dev/null +++ b/base_juno_pipeline @@ -0,0 +1 @@ +Subproject commit 3d05d8bd7f0010547e3ac5a2d8751a000f44dffb diff --git a/config/pipeline_parameters.yaml b/config/pipeline_parameters.yaml new file mode 100644 index 0000000..d240a8f --- /dev/null +++ b/config/pipeline_parameters.yaml @@ -0,0 +1,5 @@ +threads: + template_rule: 1 + +mem_gb: + template_rule: 1 diff --git a/envs/mamba.yaml b/envs/mamba.yaml new file mode 100644 index 0000000..57a4e0e --- /dev/null +++ b/envs/mamba.yaml @@ -0,0 +1,5 @@ +name: mamba +channels: + - conda-forge +dependencies: + - mamba diff --git a/envs/template_master.yaml b/envs/template_master.yaml new file mode 100644 index 0000000..9ca076d --- /dev/null +++ b/envs/template_master.yaml @@ -0,0 +1,15 @@ +name: juno_template +channels: + - conda-forge + - bioconda + - biocore + - nodefaults +dependencies: + - git=2.40.* + - mamba=1.3.* + - pandas=1.5.* + - snakemake=7.18.* + - pip=23.* + - python=3.11.* + - pip: + - "--editable=git+https://github.com/RIVM-bioinformatics/juno-library.git@v2.0.0#egg=juno_library" diff --git a/run_pipeline.sh b/run_pipeline.sh new file mode 100755 index 0000000..56cb9f6 --- /dev/null +++ b/run_pipeline.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +set -euo pipefail + +#----------------------------------------------# +# User parameters +if [ ! -z "${1}" ] || [ ! -z "${2}" ] #|| [ ! -z "${irods_input_projectID}" ] +then + input_dir="${1}" + output_dir="${2}" +# PROJECT_NAME="${irods_input_projectID}" +else + echo "One of the parameters is missing, make sure there is an input directory, output directory and project name(param 1, 2 or irods_input_projectID)." + exit 1 +fi + +if [ ! -d "${input_dir}" ] || [ ! -d "${output_dir}" ] +then + echo "The input directory $input_dir, output directory $output_dir or fastq dir ${input_dir}/clean_fastq does not exist" + exit 1 +else + input_fastq="${input_dir}/clean_fastq" +fi + +#----------------------------------------------# +# Create/update necessary environments +PATH_MAMBA_YAML="envs/mamba.yaml" +PATH_MASTER_YAML="envs/template_master.yaml" +MAMBA_NAME=$(head -n 1 ${PATH_MAMBA_YAML} | cut -f2 -d ' ') +MASTER_NAME=$(head -n 1 ${PATH_MASTER_YAML} | cut -f2 -d ' ') + +echo -e "\nUpdating necessary environments to run the pipeline..." + +# Removing strict mode because it sometimes breaks the code for +# activating an environment and for testing whether some variables +# are set or not +set +euo pipefail + +conda env update -f "${PATH_MAMBA_YAML}" +source activate "${MAMBA_NAME}" + +mamba env update -f "${PATH_MASTER_YAML}" + +source activate "${MASTER_NAME}" + +#----------------------------------------------# +# Run the pipeline + +echo -e "\nRun pipeline..." + +if [ ! -z ${irods_runsheet_sys__runsheet__lsf_queue} ]; then + QUEUE="${irods_runsheet_sys__runsheet__lsf_queue}" +else + QUEUE="bio" +fi + +set -euo pipefail + +python template.py --queue "${QUEUE}" -i "${input_dir}" -o "${output_dir}" + +result=$? + +# Propagate metadata + +set +euo pipefail + +SEQ_KEYS= +SEQ_ENV=`env | grep irods_input_sequencing` +for SEQ_AVU in ${SEQ_ENV} +do + SEQ_KEYS="${SEQ_KEYS} ${SEQ_AVU%%=*}" +done + +for key in $SEQ_KEYS irods_input_illumina__Flowcell irods_input_illumina__Instrument \ + irods_input_illumina__Date irods_input_illumina__Run_number irods_input_illumina__Run_Id +do + if [ ! -z ${!key} ] ; then + attrname=${key:12} + attrname=${attrname/__/::} + echo "${attrname}: '${!key}'" >> ${OUTPUTDIR}/metadata.yml + fi +done + +set -euo pipefail + +exit ${result} diff --git a/template.py b/template.py new file mode 100644 index 0000000..36724d2 --- /dev/null +++ b/template.py @@ -0,0 +1,85 @@ +""" +Juno template +Authors: Karim Hajji, Roxanne Wolthuis +Organization: Rijksinstituut voor Volksgezondheid en Milieu (RIVM) +Department: Infektieziekteonderzoek, Diagnostiek en Laboratorium + Surveillance (IDS), Bacteriologie (BPD) +Date: 05-04-2023 +""" + +from pathlib import Path +import pathlib +import yaml +import argparse +import sys +from dataclasses import dataclass, field +from juno_library import Pipeline +from typing import Optional +from version import __package_name__, __version__, __description__ + +def main() -> None: + juno_template = JunoTemplate() + juno_template.run() + +@dataclass +class JunoTemplate(Pipeline): + pipeline_name: str = __package_name__ + pipeline_version: str = __version__ + input_type: str = "fastq" + + def _add_args_to_parser(self) -> None: + super()._add_args_to_parser() + + self.parser.description = "Template juno pipeline. If you see this message please change it to something appropriate" + + self.add_argument( + "--example-option", + dest="example", + type=str, + required=False, + metavar="STR", + help="This is an optional argument, specific for this pipeline. General options are included in juno-library.", + ) + + def _parse_args(self) -> argparse.Namespace: + args = super()._parse_args() + + # Optional arguments are loaded into self here + self.example: bool = args.example + + return args + + # Extra class methods for this pipeline can be defined here + def example_class_method(self): + print(f"example option is set to {self.example}") + + def setup(self) -> None: + super().setup() + + if self.snakemake_args["use_singularity"]: + self.snakemake_args["singularity_args"] = " ".join( + [ + self.snakemake_args["singularity_args"] + ] # paths that singularity should be able to read from can be bound by adding to the above list + ) + + # Extra class methods for this pipeline can be invoked here + if self.example: + self.example_class_method() + + with open( + Path(__file__).parent.joinpath("config/pipeline_parameters.yaml") + ) as f: + parameters_dict = yaml.safe_load(f) + self.snakemake_config.update(parameters_dict) + + self.user_parameters = { + "input_dir": str(self.input_dir), + "output_dir": str(self.output_dir), + "exclusion_file": str(self.exclusion_file), + "example": str(self.example), # other user parameters can be included in user_parameters.yaml here + } + + +if __name__ == "__main__": + main() diff --git a/version.py b/version.py new file mode 100644 index 0000000..e5d8a19 --- /dev/null +++ b/version.py @@ -0,0 +1,6 @@ +__package_name__ = "juno_template" +__authors__ = "author" +__email__ = "author@rivm.nl" +__license__ = "AGPLv3" +__version__ = "1.0" +__description__ = "Template juno pipeline. If you see this message please change it to something appropriate" \ No newline at end of file diff --git a/workflow/envs/env.yaml b/workflow/envs/env.yaml new file mode 100644 index 0000000..e69de29 diff --git a/workflow/rules/rule.smk b/workflow/rules/rule.smk new file mode 100644 index 0000000..1730f76 --- /dev/null +++ b/workflow/rules/rule.smk @@ -0,0 +1,17 @@ +rule template_rule: + input: + lambda wc: SAMPLES[wc.sample]["R1"], + lambda wc: SAMPLES[wc.sample]["R2"], + output: + OUT + "/{sample}_combined.fastq", + log: + OUT + "/log/{sample}_template_rule.log" + message: + "Merging {input}." + resources: + mem_gb=config["mem_gb"]["template_rule"], + params: script = "workflow/scripts/script.py" + threads: config["threads"]["template_rule"] + shell: """ + python {params.script} {input} > {output} + """ diff --git a/workflow/scripts/script.py b/workflow/scripts/script.py new file mode 100644 index 0000000..81656b6 --- /dev/null +++ b/workflow/scripts/script.py @@ -0,0 +1,6 @@ +import subprocess +import sys + +subprocess.call( + ["cat"] + sys.argv[1:], +)