diff --git a/.circleci/config.yml b/.circleci/config.yml index d575bd62..2ae5674e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ version: 2 jobs: build: docker: - - image: cimg/python:3.10 + - image: continuumio/miniconda3 steps: - checkout diff --git a/.github/workflows/check_conda_envs.yml b/.github/workflows/check_conda_envs.yml new file mode 100755 index 00000000..d1879f3c --- /dev/null +++ b/.github/workflows/check_conda_envs.yml @@ -0,0 +1,27 @@ +name: Check Conda Envs + +on: + pull_request: + branches: [main] + push: + branches: [main] + workflow_dispatch: + inputs: + envs: + description: 'Regex for envs' + required: false + default: './,workflow/envs/' + +jobs: + check_conda_envs: + name: Check Conda Envs + runs-on: ubuntu-latest + + steps: + - name: Checkout Code + uses: actions/checkout@v3 + + - name: Check Conda Envs + uses: Ulthran/conda_env_check@v0 + with: + envs: "./,workflow/envs/" diff --git a/MANIFEST.in b/MANIFEST.in index 6ee0cab1..d3fc5818 100755 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ -include sunbeamlib/data/*.yml -include sunbeamlib/data/*.yaml \ No newline at end of file +include src/sunbeamlib/*.yml +include src/sunbeamlib/*.yaml \ No newline at end of file diff --git a/Readme.md b/README.md similarity index 83% rename from Readme.md rename to README.md index 912539cb..210630e8 100644 --- a/Readme.md +++ b/README.md @@ -2,7 +2,11 @@ # Sunbeam: a robust, extensible metagenomic sequencing pipeline -[![CircleCI](https://circleci.com/gh/sunbeam-labs/sunbeam/tree/main.svg?style=shield)](https://circleci.com/gh/sunbeam-labs/sunbeam/tree/main) [![Super-Linter](https://github.com/sunbeam-labs/sunbeam/actions/workflows/linter.yml/badge.svg)](https://github.com/sunbeam-labs/sunbeam/actions/workflows/linter.yml) [![Documentation Status](https://readthedocs.org/projects/sunbeam/badge/?version=stable)](https://sunbeam.readthedocs.io/en/stable/?badge=stable) [![DOI:10.1186/s40168-019-0658-x](https://img.shields.io/badge/Published%20in-Microbiome-1abc9c.svg)](https://doi.org/10.1186/s40168-019-0658-x) +[![CircleCI](https://circleci.com/gh/sunbeam-labs/sunbeam/tree/main.svg?style=shield)](https://circleci.com/gh/sunbeam-labs/sunbeam/tree/main) +[![Super-Linter](https://github.com/sunbeam-labs/sunbeam/actions/workflows/linter.yml/badge.svg)](https://github.com/sunbeam-labs/sunbeam/actions/workflows/linter.yml) +[![Conda Envs Status](https://byob.yarr.is/sunbeam-labs/sunbeam/env_check)](https://github.com/sunbeam-labs/sunbeam/actions/workflows/check_conda_envs.yml) +[![Documentation Status](https://readthedocs.org/projects/sunbeam/badge/?version=stable)](https://sunbeam.readthedocs.io/en/stable/?badge=stable) +[![DOI:10.1186/s40168-019-0658-x](https://img.shields.io/badge/Published%20in-Microbiome-1abc9c.svg)](https://doi.org/10.1186/s40168-019-0658-x) Sunbeam is a pipeline written in [snakemake](http://snakemake.readthedocs.io) that simplifies and automates many of the steps in metagenomic sequencing diff --git a/dev_scripts/reformat.sh b/dev_scripts/reformat.sh index fe0627c3..289244ab 100755 --- a/dev_scripts/reformat.sh +++ b/dev_scripts/reformat.sh @@ -5,4 +5,4 @@ cd $SUNBEAM_DIR snakefmt workflow/rules/ snakefmt workflow/Snakefile -black workflow/scripts/ sunbeamlib/ tests/ +black workflow/scripts/ src/sunbeamlib/ tests/e2e/*.py tests/unit/rules/*.py tests/unit/sunbeamlib/*.py tests/*.py diff --git a/docs/install.rst b/docs/install.rst index 45846cfa..6298859d 100755 --- a/docs/install.rst +++ b/docs/install.rst @@ -48,14 +48,6 @@ code under the 'sunbeamlib/' directory. If you have major or incompatible changes to make to the environment you should consider creating a new one under a different name so that you always have a working version installed. --m/--no_mamba -++++++++++++++++ - -Don't use mamba in base environment as dependency solver. It is the default -option to use mamba because it is considerably faster than conda in solving new -environments. However it can also sometimes be a pain to install, especially -with crowded 'base' environments. - -v/--verbose +++++++++++++++ diff --git a/environment.yml b/environment.yml index 59c9a6f5..e3346ba9 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,5 @@ channels: - bioconda dependencies: - snakemake - - ruamel.yaml - - semantic_version - git # Ensure sunbeam extend works even with tar installation of main pipeline - python>=3.10 diff --git a/install.sh b/install.sh index cf6dba27..d4175ea5 100755 --- a/install.sh +++ b/install.sh @@ -9,7 +9,6 @@ read -r -d '' __usage <<-'EOF' -s --sunbeam_dir [arg] Location of Sunbeam source code. Default: this directory -c --conda [arg] Location of Conda installation. Default: $CONDA_PREFIX -u --update [arg] Update sunbeam [lib]rary, conda [env], or [all]. - -m --no_mamba Don't use mamba in base environment as dependency solver. -v --verbose Show subcommand output. -d --debug Run in debug mode. -h --help Display this message and exit. @@ -67,7 +66,6 @@ __sunbeam_dir="${arg_s:-$(readlink -f ${__dir})}" __sunbeam_env="${arg_e:-sunbeam${__version_tag}}" __update_lib=false __update_env=false -__install_mamba=true if [[ "${arg_u}" = "all" || "${arg_u}" = "env" ]]; then __update_lib=true __update_env=true @@ -75,10 +73,6 @@ elif [[ "${arg_u}" = "lib" ]]; then __update_lib=true fi -if [[ "${arg_m:?}" = "1" ]]; then - __install_mamba=false -fi - __old_path=$PATH PATH=$PATH:${__conda_path}/bin @@ -94,10 +88,6 @@ function __test_conda() { command -v conda &> /dev/null && echo true || echo false } -function __test_mamba() { - command -v mamba &> /dev/null && echo true || echo false -} - function __detect_conda_install() { local discovered=$(__test_conda) if [[ $discovered = true ]]; then @@ -160,12 +150,7 @@ function install_conda () { } function install_environment () { - if [[ $(__test_mamba) = true ]]; then - cmd=mamba - else - cmd=conda - fi - debug_capture $cmd env create --name=$__sunbeam_env \ + debug_capture conda env create --name=$__sunbeam_env \ --quiet --file environment.yml if [[ $(__test_env) != true ]]; then installation_error "Environment creation" @@ -198,8 +183,6 @@ info " Sunbeam env: '${__sunbeam_env}'" debug "Components detected:" __conda_installed=$(__test_conda) debug " Conda: ${__conda_installed}" -__mamba_installed=$(__test_mamba) -debug " Mamba: ${__mamba_installed}" __env_exists=$(__test_env) debug " Environment: ${__env_exists}" __sunbeam_installed=$(__test_sunbeam) @@ -221,16 +204,6 @@ else __env_changed=true fi -# Install mamba -if [[ $__mamba_installed = true ]]; then - info "Mamba already installed." -else - if [[ $__install_mamba = true ]]; then - info "Installing mamba..." - conda install --yes --quiet -n base -c conda-forge mamba || (info "Mamba failed to install, this is usually because you have too many packages already installed to your base environment. Install again without mamba (--no_mamba) or try to fix conflicts in base env." && exit 1) - fi -fi - conda config --set channel_priority strict # Set channel priority on new install # Create Conda environment for Sunbeam diff --git a/pyproject.toml b/pyproject.toml new file mode 100755 index 00000000..f89d7daf --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,160 @@ +[project] +# This is the name of your project. The first time you publish this +# package, this name will be registered for you. It will determine how +# users can install this project, e.g.: +# +# $ pip install sampleproject +# +# And where it will live on PyPI: https://pypi.org/project/sampleproject/ +# +# There are some restrictions on what makes a valid project name +# specification here: +# https://packaging.python.org/specifications/core-metadata/#name +name = "sunbeamlib" # Required + +# Versions should comply with PEP 440: +# https://www.python.org/dev/peps/pep-0440/ +# +# For a discussion on single-sourcing the version, see +# https://packaging.python.org/guides/single-sourcing-package-version/ +#version = "0.0.0" # Required +dynamic = ["version"] + +# This is a one-line description or tagline of what your project does. This +# corresponds to the "Summary" metadata field: +# https://packaging.python.org/specifications/core-metadata/#summary +description = "A helper package for running the sunbeam pipeline" # Optional + +# This is an optional longer description of your project that represents +# the body of text which users will see when they visit PyPI. +# +# Often, this is the same as your README, so you can just read it in from +# that file directly (as we have already done above) +# +# This field corresponds to the "Description" metadata field: +# https://packaging.python.org/specifications/core-metadata/#description-optional +readme = "README.md" # Optional + +# Specify which Python versions you support. In contrast to the +# 'Programming Language' classifiers above, 'pip install' will check this +# and refuse to install the project if the version does not match. See +# https://packaging.python.org/guides/distributing-packages-using-setuptools/#python-requires +requires-python = ">=3.7" + +# This is either text indicating the license for the distribution, or a file +# that contains the license +# https://packaging.python.org/en/latest/specifications/core-metadata/#license +# license = {file = "LICENSE.txt"} + +# This field adds keywords for your project which will appear on the +# project page. What does your project relate to? +# +# Note that this is a list of additional keywords, separated +# by commas, to be used to assist searching for the distribution in a +# larger catalog. +#keywords = ["bioinformatics", "metagenomics"] # Optional + +# This should be your name or the name of the organization who originally +# authored the project, and a valid email address corresponding to the name +# listed. +# authors = [ +# {name = "A. Random Developer", email = "author@example.com" } # Optional +# ] + +# This should be your name or the names of the organization who currently +# maintains the project, and a valid email address corresponding to the name +# listed. +maintainers = [ + {name = "Charlie Bushman", email = "ctbushman@gmail.com" } # Optional +] + +# Classifiers help users find your project by categorizing it. +# +# For a list of valid classifiers, see https://pypi.org/classifiers/ +classifiers = [ # Optional + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + "Development Status :: 5 - Production/Stable", + + # Indicate who your project is intended for + # "Intended Audience :: Developers", + #"Topic :: Bioinformatics", + + # Pick your license as you wish + "License :: OSI Approved :: MIT License", + + # Specify the Python versions you support here. In particular, ensure + # that you indicate you support Python 3. These classifiers are *not* + # checked by "pip install". See instead "python_requires" below. + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3 :: Only", +] + +# This field lists other packages that your project depends on to run. +# Any package you put here will be installed by pip when your project is +# installed, so they must be valid existing projects. +# +# For an analysis of this field vs pip's requirements files see: +# https://packaging.python.org/discussions/install-requires-vs-requirements/ +dependencies = [ # Optional + "more-itertools", + "pyyaml", +] + +# List additional groups of dependencies here (e.g. development +# dependencies). Users will be able to install these using the "extras" +# syntax, for example: +# +# $ pip install sampleproject[dev] +# +# Similar to `dependencies` above, these must be valid existing +# projects. +[project.optional-dependencies] # Optional +dev = ["black"] +test = ["pytest"] + +# List URLs that are relevant to your project +# +# This field corresponds to the "Project-URL" and "Home-Page" metadata fields: +# https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use +# https://packaging.python.org/specifications/core-metadata/#home-page-optional +# +# Examples listed include a pattern for specifying where the package tracks +# issues, where the source is hosted, where to say thanks to the package +# maintainers, and where to support the project financially. The key is +# what's used to render the link text on PyPI. +[project.urls] # Optional +"Homepage" = "https://github.com/sunbeam-labs/sunbeam" +"Bug Reports" = "https://github.com/sunbeam-labs/sunbeam/issues" +"Docs" = "https://sunbeam.readthedocs.io/en/stable/" +# "Funding" = "https://donate.pypi.org" +# "Say Thanks!" = "http://saythanks.io/to/example" +"Source" = "https://github.com/sunbeam-labs/sunbeam" + +# The following would provide a command line executable called `sample` +# which executes the function `main` from this package when invoked. +[project.scripts] # Optional +sunbeam = "sunbeamlib.script_sunbeam:main" + +# This is configuration specific to the `setuptools` build backend. +# If you are using a different build backend, you will need to change this. +[tool.setuptools] +# If there are data files included in your packages that need to be +# installed, specify them here. +#package-data = {"sunbeamlib" = ["*.yml", "*.yaml"]} + +[build-system] +# These are the assumed default build requirements from pip: +# https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support +requires = ["setuptools>=61.0.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.dynamic] +version = {attr = "sunbeamlib.__version__"} \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index b19e5b66..00000000 --- a/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name="sunbeam", - setup_requires=['setuptools'], - install_requires=['more-itertools', 'semantic_version', 'pytest'], - packages=find_packages(), - include_package_data=True, - package_data={"sunbeamlib": ["sunbeamlib/data/*.yml", "sunbeamlib/data/*.yaml"]}, - entry_points={'console_scripts': [ - 'sunbeam = sunbeamlib.scripts.command:main' - ]}, - classifiers=[ - 'Programming Language :: Python :: 3.9' - ] -) diff --git a/sunbeamlib/__init__.py b/src/sunbeamlib/__init__.py similarity index 74% rename from sunbeamlib/__init__.py rename to src/sunbeamlib/__init__.py index 277ce17d..f0adf88e 100755 --- a/sunbeamlib/__init__.py +++ b/src/sunbeamlib/__init__.py @@ -1,24 +1,47 @@ -__author__ = "Erik Clarke" -__license__ = "GPL2+" - import os import re import sys import csv - from pathlib import Path +from typing import Dict, List -from semantic_version import Version -from sunbeamlib.parse import parse_fasta -__version__ = str(Version.coerce(os.environ.get("SUNBEAM_VER", "0.0.0"))) +class Version: + def __init__(self, version: str) -> None: + self.version = version + if self.version.startswith("v"): + self.version = self.version[1:] + version_parts = self.version.split(".") + self.major = version_parts[0] + try: + self.minor = version_parts[1] + except IndexError: + self.minor = 0 + try: + self.patch = version_parts[2] + except IndexError: + self.patch = 0 + if len(self.patch.split("-")) > 1: + self.patch = self.patch.split("-")[0] -def load_sample_list(samplelist_fp, paired_end=True, root_proj=""): + def __str__(self) -> str: + return f"{self.major}.{self.minor}.{self.patch}" + + +__version__ = str(Version(os.environ.get("SUNBEAM_VER", "0.0.0"))) +__author__ = "Erik Clarke" +__license__ = "GPL2+" + + +def load_sample_list( + samplelist_fp: Path, paired_end: bool = True +) -> Dict[str, Dict[str, str]]: """ Build a list of samples from a sample list file. :param samplelist_fp: a Path to a whitespace-delimited samplelist file, where the first entry is the sample name and the rest is ignored. + :param paired_end: if True, will look for a second column with mate pair :returns: A dictionary of samples with sample name and associated file(s) """ Samples = {} @@ -43,7 +66,9 @@ def load_sample_list(samplelist_fp, paired_end=True, root_proj=""): return Samples -def guess_format_string(fnames, paired_end=True, split_pattern="([_.])"): +def guess_format_string( + fnames: List[str], paired_end: bool = True, split_pattern: str = "([_.])" +) -> str: """ Try to guess the format string given a list of filenames. :param fnames: a list of filename strings @@ -93,7 +118,7 @@ class SampleFormatError(Exception): pass -def _verify_path(fp): +def _verify_path(fp: str) -> str: if not fp: raise ValueError("Missing filename") path = Path(fp) @@ -102,20 +127,13 @@ def _verify_path(fp): return str(path.resolve()) -def circular(seq, kmin, kmax, min_len): +def circular(seq: str, kmin: int, kmax: int, min_len: int) -> bool: """Determine if a sequence is circular. Checks for repeated k-mer at beginning and end of a sequence for a given - range of values for k.""" + range of values for k. + """ if len(seq) < min_len: return False # Short-circuit checking: returns True for the first kmer that matches return any([k for k in range(kmin, kmax + 1) if seq[0:k] == seq[len(seq) - k :]]) - - -def read_seq_ids(fasta_fp): - """ - Return the sequence identifiers for a given fasta filename. - """ - with open(str(fasta_fp)) as f: - return list(parse_fasta(f)) diff --git a/sunbeamlib/config.py b/src/sunbeamlib/config.py similarity index 66% rename from sunbeamlib/config.py rename to src/sunbeamlib/config.py index bca8dd6d..7ac27565 100644 --- a/sunbeamlib/config.py +++ b/src/sunbeamlib/config.py @@ -1,19 +1,18 @@ import os import sys -from collections.abc import Mapping +import yaml from pathlib import Path from pkg_resources import resource_stream +from typing import Dict, TextIO, Tuple, Union -from semantic_version import Version -import ruamel.yaml -from sunbeamlib import __version__ +from sunbeamlib import __version__, Version -def makepath(path): +def makepath(path: str) -> Path: return Path(path).expanduser() -def verify(path): +def verify(path: str) -> Path: path = Path(path) if path.exists(): return path.resolve() @@ -21,7 +20,7 @@ def verify(path): raise ValueError("Path %s does not exist" % path) -def validate_paths(cfg, root): +def validate_paths(cfg: Dict[str, str], root: Path) -> Dict[str, Union[str, Path]]: """Process paths in config file subsection. For each key ending in _fp, the value is: @@ -31,6 +30,7 @@ def validate_paths(cfg, root): - expanded home directory ~ :param cfg: a config file subsection + :param root: the root directory for the project :returns: an updated copy of cfg """ new_cfg = dict() @@ -51,7 +51,7 @@ def validate_paths(cfg, root): return new_cfg -def check_compatibility(cfg): +def check_compatibility(cfg: Dict[str, Dict[str, str]]) -> Tuple[str, str]: """Returns the major version numbers from the package and config file, respectively""" cfg_version = Version(cfg["all"].get("version", "0.0.0")) @@ -60,7 +60,7 @@ def check_compatibility(cfg): return (pkg_version.major, cfg_version.major) -def check_config(cfg): +def check_config(cfg: Dict[str, Dict[str, str]]) -> Dict[str, Dict[str, str]]: """Resolve root in config file, then validate paths.""" if "root" in cfg["all"]: @@ -75,42 +75,37 @@ def check_config(cfg): return new_cfg -def output_subdir(cfg, section): +def output_subdir(cfg: Dict[str, Dict[str, str]], section: str) -> Path: return cfg["all"]["output_fp"] / cfg[section]["suffix"] -def _update_dict(target, new): +def _update_dict( + target: Dict[str, Union[str, Dict]], new: Dict[str, Union[str, Dict]] +) -> Dict[str, Union[str, Dict]]: for k, v in new.items(): - if isinstance(v, Mapping): - # We could use .get() here but ruamel.yaml's weird Mapping - # subclass outputs errors to stdout if the key doesn't exist - if k in target: - target[k] = _update_dict(target[k], v) - else: - target[k] = _update_dict({}, v) + if isinstance(v, dict): + target[k] = _update_dict(target.get(k, {}), v) else: target[k] = v return target -def _update_dict_strict(target, new): +def _update_dict_strict( + target: Dict[str, Union[str, Dict]], new: Dict[str, Union[str, Dict]] +) -> Dict[str, Union[str, Dict]]: for k, v in new.items(): - if isinstance(v, Mapping) and k in target.keys(): - target[k] = _update_dict_strict(target.get(k, {}), v) - elif k in target.keys(): - target[k] = v - else: - sys.stderr.write("Key '%s' not found in target, skipping\n" % k) - continue + target[k] = _update_dict_strict(target.get(k, {}), v) return target -def update(config_str, new, strict=False): - config = ruamel.yaml.round_trip_load(config_str) +def update( + config_str: str, new: Dict[str, Union[str, Dict]], strict: bool = False +) -> Dict[str, Union[str, Dict]]: + config = yaml.safe_load(config_str) if strict: config = _update_dict_strict(config, new) else: - sbx_config = ruamel.yaml.round_trip_load(extension_config()) + sbx_config = yaml.safe_load(extension_config()) if sbx_config: for k, v in sbx_config.items(): if k not in config.keys(): @@ -120,12 +115,14 @@ def update(config_str, new, strict=False): return config -def new(project_fp, version=__version__, template=None): +def new( + project_fp: Union[str, Path], version: str = __version__, template: TextIO = None +) -> str: if template: config = template.read() else: config = str( - resource_stream("sunbeamlib", "data/default_config.yml").read().decode() + resource_stream("sunbeamlib", "default_config.yml").read().decode() ) # add config from extensions config = config + extension_config() @@ -133,7 +130,7 @@ def new(project_fp, version=__version__, template=None): return config.format(PROJECT_FP=project_fp, SB_VERSION=version) -def extension_config(): +def extension_config() -> str: config = "" sunbeam_dir = Path(os.getenv("SUNBEAM_DIR", os.getcwd())) for sbx in os.listdir(sunbeam_dir / "extensions"): @@ -153,16 +150,14 @@ def extension_config(): return config -def load_defaults(default_name): - return ruamel.yaml.safe_load( - resource_stream("sunbeamlib", "data/{}.yml".format(default_name)) - .read() - .decode() +def load_defaults(default_name: str) -> Dict[str, Union[str, Dict]]: + return yaml.safe_load( + resource_stream("sunbeamlib", "{}.yml".format(default_name)).read().decode() ) -def dump(config, out=sys.stdout): - if isinstance(config, Mapping): - ruamel.yaml.round_trip_dump(config, out) +def dump(config: Union[str, Dict], out: TextIO = sys.stdout) -> None: + if isinstance(config, dict): + yaml.safe_dump(config, out) else: out.write(config) diff --git a/src/sunbeamlib/decontam.py b/src/sunbeamlib/decontam.py new file mode 100755 index 00000000..85f59f86 --- /dev/null +++ b/src/sunbeamlib/decontam.py @@ -0,0 +1,31 @@ +from typing import Dict, Iterator, Tuple, Union + +from sunbeamlib.parse import parse_sam + + +def get_mapped_reads(fp: str, min_pct_id: float, min_len_frac: float) -> Iterator[str]: + with open(fp, "r") as sam_file: + for read in parse_sam(sam_file): + if ( + (not read["FLAG"] & 0x4) # not unmapped + and (_get_frac(read) > min_len_frac) + and (_get_pct_identity(read) > min_pct_id) + ): + yield read["QNAME"] + + +def _get_pct_identity( + read: Dict[str, Union[int, float, str, Tuple[int, str]]] +) -> float: + edit_dist = read.get("NM", 0) + pct_mm = float(edit_dist) / len(read["SEQ"]) + return 1 - pct_mm + + +def _get_frac(read: Dict[str, Union[int, float, str, Tuple[int, str]]]) -> float: + clip = 0 + for pair in read["CIGAR"]: + if pair[1] == "S" or pair[1] == "H": + clip += pair[0] + frac = float(len(read["SEQ"])) / (len(read["SEQ"]) + clip) + return frac diff --git a/sunbeamlib/data/default_config.yml b/src/sunbeamlib/default_config.yml similarity index 100% rename from sunbeamlib/data/default_config.yml rename to src/sunbeamlib/default_config.yml diff --git a/sunbeamlib/data/default_profile.yaml b/src/sunbeamlib/default_profile.yaml similarity index 100% rename from sunbeamlib/data/default_profile.yaml rename to src/sunbeamlib/default_profile.yaml diff --git a/sunbeamlib/data/microb120.yml b/src/sunbeamlib/microb120.yml similarity index 100% rename from sunbeamlib/data/microb120.yml rename to src/sunbeamlib/microb120.yml diff --git a/src/sunbeamlib/parse.py b/src/sunbeamlib/parse.py new file mode 100755 index 00000000..686f7bd3 --- /dev/null +++ b/src/sunbeamlib/parse.py @@ -0,0 +1,120 @@ +from itertools import groupby +from more_itertools import grouper +from pathlib import Path +from typing import Dict, Iterator, List, TextIO, Tuple, Union + + +BLAST6_DEFAULTS = [ + "qseqid", + "sseqid", + "pident", + "length", + "mismatch", + "gapopen", + "qstart", + "qend", + "sstart", + "send", + "evalue", + "bitscore", +] + + +# Source: https://www.biostars.org/p/710/ +def parse_fasta(f: TextIO) -> Iterator[Tuple[str, str]]: + faiter = (x[1] for x in groupby(f, lambda line: line[0] == ">")) + + for header in faiter: + # drop the ">" + header_str = header.__next__()[1:].strip() + + # join all sequence lines to one. + seq_str = "".join(s.strip() for s in faiter.__next__()) + + yield (header_str, seq_str) + + +def read_seq_ids(fasta_fp: Union[str, Path]) -> List[Tuple[str, str]]: + """ + Return the sequence identifiers for a given fasta filepath. + """ + with open(str(fasta_fp)) as f: + return list(parse_fasta(f)) + + +def write_fasta(record: Tuple[str, str], f: TextIO) -> None: + f.write(f">{record[0]}\n") + f.write(f"{record[1]}\n") + + +def parse_fastq(f: TextIO) -> Iterator[Tuple[str, str, str, str]]: + for g in grouper(f.readlines(), 4): + header_str = g[0][1:].strip() + seq_str = g[1].strip() + plus_str = g[2].strip() + quality_str = g[3].strip() + + yield (header_str, seq_str, plus_str, quality_str) + + +def write_fastq(record: Tuple[str, str, str, str], f: TextIO) -> None: + for i, l in enumerate(record): + if i == 0: + f.write(f"@{l}\n") + else: + f.write(f"{l}\n") + + +def write_many_fastq(record_list: List[Tuple[str, str, str, str]], f: TextIO) -> None: + record_list = [ + [f"@{r[0]}\n", f"{r[1]}\n", f"{r[2]}\n", f"{r[3]}\n"] for r in record_list + ] + record_list = [item for sublist in record_list for item in sublist] + f.writelines(record_list) + + +def parse_sam( + f: TextIO, +) -> Iterator[Dict[str, Union[int, float, str, Tuple[int, str]]]]: + for line in f: + if line.startswith("@"): + continue + + fields = line.strip().split("\t") + result = { + "QNAME": fields[0], + "FLAG": int(fields[1]), + "RNAME": fields[2], + "POS": int(fields[3]), + "MAPQ": int(fields[4]), + "CIGAR": fields[5], + "RNEXT": fields[6], + "PNEXT": int(fields[7]), + "TLEN": int(fields[8]), + "SEQ": fields[9], + "QUAL": fields[10], + } + + cigar_tuples = [] + current_length = "0" + + for char in result["CIGAR"]: + if char.isdigit(): + current_length += char + else: + cigar_tuples.append((int(current_length), char)) + current_length = "0" + + result["CIGAR"] = cigar_tuples + + # Parse optional fields + optional_fields = fields[11:] + for field in optional_fields: + tag, data_type, value = field.split(":") + if data_type == "i": + value = int(value) + elif data_type == "f": + value = float(value) + result[tag] = value + + yield result diff --git a/sunbeamlib/data/pmacs.yml b/src/sunbeamlib/pmacs.yml similarity index 100% rename from sunbeamlib/data/pmacs.yml rename to src/sunbeamlib/pmacs.yml diff --git a/sunbeamlib/post.py b/src/sunbeamlib/post.py similarity index 91% rename from sunbeamlib/post.py rename to src/sunbeamlib/post.py index 402041ce..92754b15 100755 --- a/sunbeamlib/post.py +++ b/src/sunbeamlib/post.py @@ -3,7 +3,7 @@ import os -def compile_benchmarks(benchmark_fp: str, stats_fp: str): +def compile_benchmarks(benchmark_fp: str, stats_fp: str) -> None: """Aggregate all the benchmark files into one and put it in stats_fp""" benchmarks = [] try: diff --git a/sunbeamlib/qc.py b/src/sunbeamlib/qc.py similarity index 86% rename from sunbeamlib/qc.py rename to src/sunbeamlib/qc.py index 49a91490..afc4c54e 100644 --- a/sunbeamlib/qc.py +++ b/src/sunbeamlib/qc.py @@ -3,10 +3,12 @@ """ import gzip +from pathlib import Path from sunbeamlib.parse import parse_fastq, write_many_fastq +from typing import List, TextIO -def filter_ids(fp_in, fp_out, ids, log): +def filter_ids(fp_in: Path, fp_out: Path, ids: List[str], log: TextIO) -> None: """Remove ids from FASTQ file. fp_in: path to input FASTQ @@ -31,7 +33,7 @@ def filter_ids(fp_in, fp_out, ids, log): write_many_fastq(records, f_out) -def remove_pair_id(id, log): +def remove_pair_id(id: str, log: TextIO) -> str: """Remove the 1 or 2 from a paired read ID id: id string diff --git a/sunbeamlib/reports.py b/src/sunbeamlib/reports.py similarity index 81% rename from sunbeamlib/reports.py rename to src/sunbeamlib/reports.py index bd0c2422..004958b0 100644 --- a/sunbeamlib/reports.py +++ b/src/sunbeamlib/reports.py @@ -1,13 +1,13 @@ -from collections import OrderedDict +import pandas import re import os import sys - -import pandas +from collections import OrderedDict from io import StringIO +from typing import TextIO -def parse_trim_summary_paired(f): +def parse_trim_summary_paired(f: TextIO) -> OrderedDict[str, str]: for line in f.readlines(): if line.startswith("Input Read"): vals = re.findall("\D+\: (\d+)", line) @@ -15,7 +15,7 @@ def parse_trim_summary_paired(f): return OrderedDict(zip(keys, vals)) -def parse_trim_summary_single(f): +def parse_trim_summary_single(f: TextIO) -> OrderedDict[str, str]: for line in f: if line.startswith("Input Read"): vals = re.findall("\D+\: (\d+)", line) @@ -23,17 +23,19 @@ def parse_trim_summary_single(f): return OrderedDict(zip(keys, vals)) -def parse_decontam_log(f): +def parse_decontam_log(f: TextIO) -> OrderedDict[str, str]: keys = f.readline().rstrip().split("\t") vals = f.readline().rstrip().split("\t") return OrderedDict(zip(keys, vals)) -def parse_komplexity_log(f): - return OrderedDict([("komplexity", len(f.readlines()))]) +def parse_komplexity_log(f: TextIO) -> OrderedDict[str, str]: + return OrderedDict([("komplexity", str(len(f.readlines())))]) -def summarize_qual_decontam(tfile, dfile, kfile, paired_end): +def summarize_qual_decontam( + tfile: str, dfile: str, kfile: str, paired_end: bool +) -> pandas.DataFrame: """Return a dataframe for summary information for trimmomatic and decontam rule""" tname = os.path.basename(tfile).split(".out")[0] with open(tfile) as tf: @@ -56,7 +58,7 @@ def summarize_qual_decontam(tfile, dfile, kfile, paired_end): ) -def parse_fastqc_quality(filename): +def parse_fastqc_quality(filename: str) -> pandas.DataFrame: with open(filename) as f: report = f.read() try: diff --git a/sunbeamlib/scripts/_config.py b/src/sunbeamlib/script_config.py similarity index 97% rename from sunbeamlib/scripts/_config.py rename to src/sunbeamlib/script_config.py index 95e595f5..6f9998d1 100644 --- a/sunbeamlib/scripts/_config.py +++ b/src/sunbeamlib/script_config.py @@ -1,5 +1,5 @@ import sys -import ruamel.yaml +import yaml import argparse from sunbeamlib import config @@ -107,7 +107,7 @@ def update(args): "to prevent unexpected loss of config settings." ) - old_config = ruamel.yaml.safe_load(args.config_file) + old_config = yaml.safe_load(args.config_file) # Remove the old version number old_config.get("all", {}).pop("version", None) @@ -134,7 +134,7 @@ def update(args): def modify(args): update_src = args.str if args.str else args.file - new_values = ruamel.yaml.safe_load(update_src) + new_values = yaml.safe_load(update_src) if isinstance(new_values, str) and args.str: raise SystemExit( "Invalid YAML in --str. Did you make sure to put spaces between " diff --git a/sunbeamlib/scripts/extend.py b/src/sunbeamlib/script_extend.py similarity index 100% rename from sunbeamlib/scripts/extend.py rename to src/sunbeamlib/script_extend.py diff --git a/sunbeamlib/scripts/init.py b/src/sunbeamlib/script_init.py similarity index 97% rename from sunbeamlib/scripts/init.py rename to src/sunbeamlib/script_init.py index b78df2eb..3279ccb7 100644 --- a/sunbeamlib/scripts/init.py +++ b/src/sunbeamlib/script_init.py @@ -2,10 +2,10 @@ import shutil import sys import argparse -import ruamel.yaml +import yaml from pathlib import Path -from .list_samples import ( +from sunbeamlib.script_list_samples import ( build_sample_list, MissingMatePairError, SampleFormatError, @@ -170,7 +170,7 @@ def write_config(args, project_fp, samplelists): cfg = config.new(project_fp=project_fp, template=args.template) defaults = {} if args.defaults: - defaults = ruamel.yaml.safe_load(args.defaults) + defaults = yaml.safe_load(args.defaults) # Override loaded config defaults (if any) for a few specific items. paired = layout == "paired" defaults["all"] = defaults.get("all", {}) @@ -189,7 +189,7 @@ def write_config(args, project_fp, samplelists): def write_profile(args, project_fp): sunbeam_dir = Path(os.getenv("SUNBEAM_DIR", os.getcwd())) - template_fp = sunbeam_dir / "sunbeamlib" / "data" / f"{args.profile}_profile.yaml" + template_fp = sunbeam_dir / "src" / "sunbeamlib" / f"{args.profile}_profile.yaml" config_fp = project_fp / "config.yaml" shutil.copyfile(template_fp, config_fp) with open(config_fp, "a") as f: diff --git a/sunbeamlib/scripts/list_samples.py b/src/sunbeamlib/script_list_samples.py similarity index 100% rename from sunbeamlib/scripts/list_samples.py rename to src/sunbeamlib/script_list_samples.py diff --git a/sunbeamlib/scripts/run.py b/src/sunbeamlib/script_run.py similarity index 88% rename from sunbeamlib/scripts/run.py rename to src/sunbeamlib/script_run.py index 8868281c..99ded2f3 100644 --- a/sunbeamlib/scripts/run.py +++ b/src/sunbeamlib/script_run.py @@ -27,6 +27,12 @@ def main(argv=sys.argv): default=os.getenv("SUNBEAM_DIR", os.getcwd()), help="Path to Sunbeam installation", ) + parser.add_argument( + "-m", + "--mamba", + action="store_true", + help="Use mamba instead of conda to manage environments", + ) parser.add_argument( "--target_list", nargs="+", default=[], help="List of sunbeam targets" ) @@ -43,6 +49,8 @@ def main(argv=sys.argv): conda_prefix = Path(args.sunbeam_dir) / ".snakemake" + conda_cmd = "conda" if not args.mamba else "mamba" + cmds = list() if args.target_list == []: args.target_list = [""] @@ -61,6 +69,8 @@ def main(argv=sys.argv): str(snakefile), "--conda-prefix", str(conda_prefix), + "--conda-frontend", + conda_cmd, target, ] if arg diff --git a/sunbeamlib/scripts/command.py b/src/sunbeamlib/script_sunbeam.py similarity index 84% rename from sunbeamlib/scripts/command.py rename to src/sunbeamlib/script_sunbeam.py index ef59e01b..f3b2e5fa 100644 --- a/sunbeamlib/scripts/command.py +++ b/src/sunbeamlib/script_sunbeam.py @@ -1,11 +1,12 @@ -import sys import argparse +import sys + import sunbeamlib -from sunbeamlib.scripts.run import main as Run -from sunbeamlib.scripts.init import main as Init -from sunbeamlib.scripts._config import main as Config -from sunbeamlib.scripts.list_samples import main as ListSamples -from sunbeamlib.scripts.extend import main as Extend +from sunbeamlib.script_run import main as Run +from sunbeamlib.script_init import main as Init +from sunbeamlib.script_config import main as Config +from sunbeamlib.script_list_samples import main as ListSamples +from sunbeamlib.script_extend import main as Extend def main(): diff --git a/sunbeamlib/data/slurm_profile.yaml b/src/sunbeamlib/slurm_profile.yaml similarity index 100% rename from sunbeamlib/data/slurm_profile.yaml rename to src/sunbeamlib/slurm_profile.yaml diff --git a/src/sunbeamlib/tibanna_profile.yaml b/src/sunbeamlib/tibanna_profile.yaml new file mode 100755 index 00000000..d70ccd39 --- /dev/null +++ b/src/sunbeamlib/tibanna_profile.yaml @@ -0,0 +1,85 @@ +# Default options for running sunbeam on slurm +rerun-incomplete: True +rerun-triggers: "mtime" +latency-wait: 90 +jobs: 10 +keep-going: True +notemp: True +printshellcmds: True +nolock: True +verbose: True +use-conda: True +cores: 24 +tibanna: True +default-remote-prefix: "" + + +# Default resource configuration +default-resources: + - runtime=15 + - mem_mb=8000 + - disk_mb=1000 +# set-threads: map rule names to threads +set-threads: + - diamond_reads=8 + - gene_hits=8 + - adapter_removal_unpaired=4 + - adapter_removal_paired=4 + - trimmomatic_unpaired=4 + - trimmomatic_paired=4 + - align_to_host=4 + - kraken2_classify_report=8 + - megahit_paired=4 + - megahit_unpaired=4 + - run_blastn=4 + - run_diamond_blastp=4 + - run_diamond_blastx=4 + - align_to_genome=4 + # - samtools_convert=4 +# set-resources: map rule names to resources in general +set-resources: + - fq_2_fa:mem_mb=8000 + - fq_2_fa:runtime=60 + - build_gene_clusters_diamond_db:mem_mb=32000 + - build_gene_clusters_diamond_db:runtime=30 + - diamond_reads:mem_mb=32000 + - diamond_reads:runtime=1440 + - gene_hits:mem_mb=32000 + - gene_hits:runtime=1440 + - sample_intake:mem_mb=4000 + - sample_intake:runtime=20 + - adapter_removal_paired:mem_mb=8000 + - adapter_removal_paired:runtime=60 + - qc_final:mem_mb=8000 + - qc_final:runtime=120 + - demultiplex:mem_mb=10000 + - demultiplex:runtime=720 + - trimmomatic_paired:mem_mb=8000 + - trimmomatic_paired:runtime=240 + - fastqc:mem_mb=4000 + - fastqc:runtime=120 + - remove_low_complexity:mem_mb=32000 + - remove_low_complexity:runtime=120 + - align_to_host:mem_mb=16000 + - align_to_host:runtime=240 + - filter_unmapped_reads:mem_mb=24000 + - filter_unmapped_reads:runtime=240 + - megahit_paired:mem_mb=20000 + - megahit_paired:runtime=720 + - final_filter:mem_mb=4000 + - kraken2_classify_report:mem_mb=48000 + - kraken2_classify_report:runtime=2880 + - kraken2_biom:mem_mb=4000 + - kraken2_biom:runtime=240 + - classic_k2_biom:mem_mb=4000 + - classic_k2_biom:runtime=240 + - run_blastn:mem_mb=16000 + - run_blastn:runtime=720 + - run_diamond_blastp:mem_mb=16000 + - run_diamond_blastp:runtime=720 + - run_diamond_blastx:mem_mb=24000 + - run_diamond_blastx:runtime=720 + - prodigal:mem_mb=16000 + - prodigal:runtime=720 + - aggregate_results:mem_mb=8000 + - aggregate_results:runtime=120 \ No newline at end of file diff --git a/sunbeamlib/decontam.py b/sunbeamlib/decontam.py deleted file mode 100755 index 624ecf58..00000000 --- a/sunbeamlib/decontam.py +++ /dev/null @@ -1,31 +0,0 @@ -import pysam - - -def get_mapped_reads(fp, min_pct_id, min_len_frac): - sam = pysam.AlignmentFile(fp) - for read in sam: - if ( - (not read.is_unmapped) - and (_get_frac(read) > min_len_frac) - and (_get_pct_identity(read) > min_pct_id) - ): - yield read.query_name - - -def _get_pct_identity(read): - if read.has_tag("NM"): - edit_dist = read.get_tag("NM") - else: - edit_dist = 0 - pct_mm = float(edit_dist) / read.alen - return 1 - pct_mm - - -def _get_frac(read): - cigar = read.cigartuples - clip = 0 - for pair in cigar: - if pair[0] == 4 or pair[0] == 5: - clip = clip + pair[1] - frac = float(read.query_alignment_length) / (read.query_alignment_length + clip) - return frac diff --git a/sunbeamlib/parse.py b/sunbeamlib/parse.py deleted file mode 100755 index 28184795..00000000 --- a/sunbeamlib/parse.py +++ /dev/null @@ -1,70 +0,0 @@ -from itertools import groupby -from more_itertools import grouper - - -BLAST6_DEFAULTS = [ - "qseqid", - "sseqid", - "pident", - "length", - "mismatch", - "gapopen", - "qstart", - "qend", - "sstart", - "send", - "evalue", - "bitscore", -] - - -# Source: https://www.biostars.org/p/710/ -def parse_fasta(f): - faiter = (x[1] for x in groupby(f, lambda line: line[0] == ">")) - - for header in faiter: - # drop the ">" - header_str = header.__next__()[1:].strip() - - # join all sequence lines to one. - seq_str = "".join(s.strip() for s in faiter.__next__()) - - yield (header_str, seq_str) - - -def write_fasta(record, f): - f.write(f">{record[0]}\n") - f.write(f"{record[1]}\n") - - -def parse_fastq(f): - for g in grouper(f.readlines(), 4): - header_str = g[0][1:].strip() - seq_str = g[1].strip() - plus_str = g[2].strip() - quality_str = g[3].strip() - - yield (header_str, seq_str, plus_str, quality_str) - - -def write_fastq(record, f): - for i, l in enumerate(record): - if i == 0: - f.write(f"@{l}\n") - else: - f.write(f"{l}\n") - - -def write_many_fastq(record_list, f): - record_list = [ - [f"@{r[0]}\n", f"{r[1]}\n", f"{r[2]}\n", f"{r[3]}\n"] for r in record_list - ] - record_list = [item for sublist in record_list for item in sublist] - f.writelines(record_list) - - -def parse_blast6(f, outfmt=BLAST6_DEFAULTS): - for line in f.readlines(): - vals = line.strip().split("\t") - if len(outfmt) == len(vals): - yield dict(zip(outfmt, vals)) diff --git a/sunbeamlib/scripts/__init__.py b/sunbeamlib/scripts/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/config_fixture.py b/tests/config_fixture.py index e090d171..51d288e9 100755 --- a/tests/config_fixture.py +++ b/tests/config_fixture.py @@ -3,38 +3,32 @@ import shutil import sys import tempfile +import yaml from pathlib import Path -from ruamel.yaml import YAML @pytest.fixture def config(): tests_dir = Path(__file__).parent.resolve() - yaml = YAML(typ="safe") with open(tests_dir / "test_config.yml") as f: - config_dict = yaml.load(f) - - if not shutil.which("mamba"): - sys.exit( - "Sunbeam needs mamba to be installed to run tests `conda install -c conda-forge mamba`. If you want to run sunbeam (not the tests) without mamba you can use `sunbeam run --profile /path/to/project/ --conda-frontend=conda`." - ) + config_dict = yaml.safe_load(f) yield config_dict @pytest.fixture def output_dir(config): - yaml = config + config = config output_dir = Path() - if not yaml["output_dir"]: + if not config["output_dir"]: output_dir = Path(tempfile.mkdtemp()) else: - output_dir = Path(yaml["output_dir"]) + output_dir = Path(config["output_dir"]) output_dir.mkdir(parents=True, exist_ok=True) if not os.listdir(output_dir) == []: - if yaml["overwrite"]: + if config["overwrite"]: shutil.rmtree(output_dir) output_dir.mkdir() else: @@ -42,7 +36,7 @@ def output_dir(config): "overwrite is set to false but output_dir points to a non-empty directory" ) - if not yaml["temp_env"]: + if not config["temp_env"]: pass else: # TODO: Create temp_env @@ -67,5 +61,5 @@ def output_dir(config): shutil.move(extensions_moved_fp, extensions_fp) except FileNotFoundError as e: pass - if not yaml["output_dir"]: + if not config["output_dir"]: shutil.rmtree(output_dir) diff --git a/tests/e2e/test_sunbeam_config.py b/tests/e2e/test_sunbeam_config.py index 906eda58..caf932bd 100755 --- a/tests/e2e/test_sunbeam_config.py +++ b/tests/e2e/test_sunbeam_config.py @@ -3,8 +3,8 @@ import shutil import subprocess as sp import sys +import yaml from pathlib import Path -from ruamel.yaml import YAML test_dir = Path(__file__).parent.parent.resolve() sys.path.append(test_dir) @@ -58,9 +58,8 @@ def config_modify(init): def test_config_modify(config_modify): output_dir, hosts_fp = config_modify - yaml = YAML(typ="safe") with open(output_dir / "sunbeam_config.yml") as f: - config_dict = yaml.load(f) + config_dict = yaml.safe_load(f) assert config_dict["qc"]["host_fp"] == str(hosts_fp) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/tests/unit/sunbeamlib/__init__.py b/tests/unit/sunbeamlib/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/tests/unit/sunbeamlib/test__init__.py b/tests/unit/sunbeamlib/test__init__.py index 3c11186b..2af759ee 100755 --- a/tests/unit/sunbeamlib/test__init__.py +++ b/tests/unit/sunbeamlib/test__init__.py @@ -14,7 +14,6 @@ guess_format_string, _verify_path, circular, - read_seq_ids, ) data_dir = Path(__file__).parent / "data" diff --git a/tests/unit/sunbeamlib/test_parse.py b/tests/unit/sunbeamlib/test_parse.py index a5f748c4..a0d1c1ee 100755 --- a/tests/unit/sunbeamlib/test_parse.py +++ b/tests/unit/sunbeamlib/test_parse.py @@ -1,7 +1,4 @@ import gzip -import os -import pytest -import shutil import sys from pathlib import Path diff --git a/workflow/Snakefile b/workflow/Snakefile index ad7e2bd2..962e74c8 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -17,8 +17,9 @@ from pathlib import Path, PurePath from snakemake.utils import update_config, listfiles from snakemake.exceptions import WorkflowError -from sunbeamlib import load_sample_list, read_seq_ids +from sunbeamlib import load_sample_list from sunbeamlib.config import * +from sunbeamlib.parse import read_seq_ids from sunbeamlib.post import * from sunbeamlib.reports import * @@ -74,7 +75,6 @@ for sbx in sbxs: Samples = load_sample_list( Cfg["all"]["samplelist_fp"], Cfg["all"]["paired_end"], - Cfg["all"]["root"] / Cfg["all"]["output_fp"], ) Pairs = ["1", "2"] if Cfg["all"]["paired_end"] else ["1"] diff --git a/workflow/envs/cutadapt.yml b/workflow/envs/cutadapt.yml index fa062e41..2058827b 100755 --- a/workflow/envs/cutadapt.yml +++ b/workflow/envs/cutadapt.yml @@ -1,5 +1,7 @@ -name: cutadapt channels: + - conda-forge - bioconda dependencies: - - cutadapt \ No newline at end of file + - cutadapt + - python =3.10.12 +name: cutadapt diff --git a/workflow/envs/qc.yml b/workflow/envs/qc.yml index 278e499a..a47f0781 100755 --- a/workflow/envs/qc.yml +++ b/workflow/envs/qc.yml @@ -1,9 +1,9 @@ -name: qc -channels: - - bioconda - - conda-forge -dependencies: - - bwa - - fastqc - - trimmomatic - - python>=3.10 \ No newline at end of file +name: qc +channels: + - bioconda + - conda-forge +dependencies: + - bwa + - fastqc + - trimmomatic + - python =3.11.5 \ No newline at end of file diff --git a/workflow/envs/samtools.linux-64.pin.txt b/workflow/envs/samtools.linux-64.pin.txt deleted file mode 100644 index 8a3f3031..00000000 --- a/workflow/envs/samtools.linux-64.pin.txt +++ /dev/null @@ -1,40 +0,0 @@ -# This file may be used to create an environment using: -# $ conda create --name --file -# platform: linux-64 -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.10-3_cp310.conda#4eb33d14d794b0f4be116443ffed3853 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a -https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.1.0-he5830b7_0.conda#56ca14d57ac29a75d23a39eb3ee0ddeb -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.19.1-hd590300_0.conda#e8c18d865be43e2fb3f7a145b6adf1f5 -https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8 -https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-h516909a_1.tar.bz2#6f8720dff19e17ce5d48cfe7f3d2f0a3 -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.2-hd590300_0.conda#e5ac5227582d6c83ccf247288c0eb095 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.52.0-h61bc06f_0.conda#613955a50485812985c059e7b269f42e -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7 -https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe -https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-hfc55251_7.conda#32ae18eb2a687912fc9e92a501c0a11b -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.1-h659d440_0.conda#1b5126ec25763eb17ef74c8763d26e84 -https://conda.anaconda.org/conda-forge/linux-64/python-3.10.12-hd12c33a_0_cpython.conda#eb6f1df105f37daedd6dca78523baa75 -https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.2.1-hca28451_0.conda#96aec6156d58591f5a4e67056521ce1b -https://conda.anaconda.org/conda-forge/noarch/setuptools-68.0.0-pyhd8ed1ab_0.conda#5a7739d0f57ee64133c9d32e6507c46d -https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.1-pyhd8ed1ab_0.conda#8f467ba2db2b5470d297953d9c1f9c7d -https://conda.anaconda.org/conda-forge/noarch/pip-23.2.1-pyhd8ed1ab_0.conda#e2783aa3f9235225eec92f9081c5b801 -https://conda.anaconda.org/bioconda/linux-64/pysam-0.21.0-py310h41dec4a_1.tar.bz2#9af70a0f7f85e62d8262bcb3c439a7e8 diff --git a/workflow/envs/samtools.yml b/workflow/envs/samtools.yml deleted file mode 100755 index 2ace65da..00000000 --- a/workflow/envs/samtools.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: samtools -channels: - - conda-forge - - bioconda -dependencies: - - pysam - - python>=3.10 \ No newline at end of file diff --git a/workflow/rules/decontaminate.smk b/workflow/rules/decontaminate.smk index 5f755e39..6c09fd53 100644 --- a/workflow/rules/decontaminate.smk +++ b/workflow/rules/decontaminate.smk @@ -60,8 +60,6 @@ rule get_mapped_reads: params: pct_id=Cfg["qc"]["pct_id"], frac=Cfg["qc"]["frac"], - conda: - "../envs/samtools.yml" script: "../scripts/get_mapped_reads.py"