Skip to content

Commit

Permalink
Merge pull request #73 from mdmparis/merge_folders
Browse files Browse the repository at this point in the history
- defense-finder is now able to run defense-finder-models >= v2, in which all models (except Cas and RM) are in a single model directory.
- add retrocompatibility with older models
- add a check to let the user know whether they run the last version of defense-finder-models or not. (checks for new updates once a month)
- pin to Macsyfinder version 2.1.4
- remove support for python < 3.10
  • Loading branch information
jeanrjc authored Feb 6, 2025
2 parents 96a1ba5 + c6df78f commit feace4b
Show file tree
Hide file tree
Showing 17 changed files with 48,792 additions and 22,718 deletions.
52 changes: 32 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ To retrieve it run:
defense-finder update
```

**Conda/Mamba** installation is not linked to this repo and is publish by other than us, so there might be some delays between a release here on pip and on conda.
***We recommand to install with pip*** when possible.

### Updating DefenseFinder
When you have not used DefenseFinder in the last days, make sure you have the latest versions of the models.
To verify and downloaded if necessary the latest models run:
Expand Down Expand Up @@ -204,25 +207,34 @@ Usage: defense-finder run [OPTIONS] FILE
Search for all known anti-phage defense systems in the target fasta file.

Options:
-o, --out-dir TEXT The target directory where to store the results.
Defaults to the current directory.
-w, --workers INTEGER The workers count. By default all cores will be used
(w=0).
-c, --coverage FLOAT Minimal percentage of coverage for each profiles. By
default set to 0.4
--db-type TEXT The macsyfinder --db-type option. Run macsyfinder
--help for more details. Possible values are
ordered_replicon, gembase, unordered, defaults to
ordered_replicon.
--preserve-raw Preserve raw MacsyFinder outputs alongside Defense
Finder results inside the output directory.
--models-dir TEXT Specify a directory containing your models.
--no-cut-ga Advanced! Run macsyfinder in no-cut-ga mode. The
validity of the genes and systems found is not
guaranteed!
--log-level TEXT set the logging level among DEBUG, [INFO], WARNING,
ERROR, CRITICAL
-h, --help Show this message and exit.
-o, --out-dir TEXT The target directory where to store the
results. Defaults to the current directory.
-w, --workers INTEGER The workers count. By default all cores will
be used (w=0).
-c, --coverage FLOAT Minimal percentage of coverage for each
profiles. By default set to 0.4
--db-type TEXT The macsyfinder --db-type option. Run
macsyfinder --help for more details. Possible
values are ordered_replicon,
gembase, unordered, defaults to
ordered_replicon.
--preserve-raw Preserve raw MacsyFinder outputs alongside
Defense Finder results inside the output
directory.
--models-dir TEXT Specify a directory containing your models.
--no-cut-ga Advanced! Run macsyfinder in no-cut-ga mode.
The validity of the genes and systems found is
not guaranteed!
-a, --antidefensefinder Also run AntiDefenseFinder models to find
antidefense systems.
-A, --antidefensefinder-only Run only AntiDefenseFinder for antidefense
system and not DefenseFinder
--log-level TEXT set the logging level among DEBUG, [INFO],
WARNING, ERROR, CRITICAL
--index-dir TEXT Specify a directory to write the index files
required by macsyfinder when the input file is
in a read-only folder
-h, --help Show this message and exit.
```
## Development
Expand Down Expand Up @@ -254,4 +266,4 @@ done
```
---
For questions: you can contact [email protected]
For questions: you can contact [email protected], [email protected], or [email protected]
15 changes: 11 additions & 4 deletions defense_finder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,24 @@
import colorlog

from macsypy.scripts import macsyfinder
from warnings import simplefilter
import pandas as pd
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)


def run(protein_file_name, dbtype, workers, coverage, adf, adf_only, tmp_dir, models_dir, nocut_ga, loglevel, index_dir):
def run(protein_file_name, dbtype, workers, coverage, adf, adf_only, tmp_dir, models_dir, nocut_ga, loglevel, index_dir, models_main_ver):
scripts=[]

if adf_only == False:
gen_args = ['--db-type', dbtype, '--sequence-db', protein_file_name, '--models', 'defense-finder-models/DefenseFinder_{i}', 'all',
if models_main_ver >= 2:
scripts.append(['--db-type', dbtype, '--sequence-db',protein_file_name, '--models', 'defense-finder-models/DefenseFinder', 'all',
'--out-dir', os.path.join(tmp_dir, 'DefenseFinder'), '--w', str(workers),
'--coverage-profile', str(coverage), '--exchangeable-weight', '1'])
else:
gen_args = ['--db-type', dbtype, '--sequence-db', protein_file_name, '--models', 'defense-finder-models/DefenseFinder_{i}', 'all',
'--out-dir', os.path.join(tmp_dir, 'DF_{i}'), '--w', str(workers),
'--coverage-profile', str(coverage), '--exchangeable-weight', '1']
scripts = [[f.format(i=i) for f in gen_args] for i in range(1, 6)]

scripts = [[f.format(i=i) for f in gen_args] for i in range(1, 6)]
scripts.append(['--db-type', dbtype, '--sequence-db',protein_file_name, '--models', 'defense-finder-models/RM', 'all',
'--out-dir', os.path.join(tmp_dir, 'RM'), '--w', str(workers),
'--coverage-profile', str(coverage), '--exchangeable-weight', '1'])
Expand Down
2 changes: 1 addition & 1 deletion defense_finder_cli/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.3.0"
__version__ = "2.0.0"
75 changes: 66 additions & 9 deletions defense_finder_cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,21 @@
import shutil
import click
import defense_finder
import defense_finder_updater
import defense_finder_posttreat
from pyhmmer.easel import SequenceFile, TextSequence, Alphabet
import pyrodigal
import sys

from warnings import simplefilter, catch_warnings

with catch_warnings():
simplefilter("ignore")
import defense_finder_updater

from macsypy.scripts.macsydata import get_version_message
from macsypy.scripts.macsydata import _find_all_installed_packages
from macsypy.scripts.macsydata import RemoteModelIndex
import datetime

import colorlog
try:
Expand All @@ -16,6 +25,28 @@
logging = colorlog.wrappers.logging


def check_last_version_models():
file_lastver = os.path.join(os.environ["HOME"], ".defensefinder_model_lastversion")
if os.path.isfile(file_lastver):
with open(file_lastver, "r") as file_lastver_file:
time_last_ver, last_version = file_lastver_file.read().split("___")
time_last_ver = datetime.datetime.strptime(time_last_ver, '%Y-%m-%d')
else:
time_last_ver = datetime.datetime(1000, 1, 1)

now = datetime.datetime.now()

if time_last_ver < now - datetime.timedelta(days = 30):
remote = RemoteModelIndex(org="mdmparis")
packages = remote.list_packages()
dfmods = [pack for pack in packages if pack == "defense-finder-models"][0]
all_versions = remote.list_package_vers(dfmods)
last_version = all_versions[0]
with open(file_lastver, "w") as file_lastver_file:
file_lastver_file.write(f"{now.strftime('%Y-%m-%d')}___{last_version}")

return last_version

@click.group(context_settings=dict(help_option_names=["-h", "--help"]))
def cli():
"""Systematic search of all known anti-phage systems by MDM Labs, Paris.
Expand Down Expand Up @@ -55,6 +86,8 @@ def update(models_dir=None, force_reinstall: bool = False):
defense_finder_updater.update_models(models_dir, force_reinstall)




@cli.command()
@click.argument('file', type=click.Path(exists=True))
@click.option('-o', '--out-dir', 'outdir',
Expand All @@ -79,7 +112,6 @@ def update(models_dir=None, force_reinstall: bool = False):
help='set the logging level among DEBUG, [INFO], WARNING, ERROR, CRITICAL')
@click.option('--index-dir', 'index_dir', required=False, help='Specify a directory to write the index files required by macsyfinder when the input file is in a read-only folder')


def run(file: str, outdir: str, dbtype: str, workers: int, coverage: float, preserve_raw: bool, adf: bool,
adf_only: bool, no_cut_ga: bool, models_dir: str = None, loglevel : str = "INFO",
index_dir: str = None):
Expand Down Expand Up @@ -155,20 +187,46 @@ def run(file: str, outdir: str, dbtype: str, workers: int, coverage: float, pres

else:
protein_file_name = filename
versions_models = []

models = _find_all_installed_packages(models_dir=models_dir).models()
modelok = False
for m in models:
if "casfinder" in m.path.lower() or "defense-finder-models" in m.path.lower():
versions_models.append([m.path, m.version])
if ("defense-finder" in m.path.lower()):
last_version_df = check_last_version_models()
if m.version != last_version_df.strip():
models_main_ver = int(m.version.split(".")[0])
logger.warning(f"Be careful, this is not the latest version of the model, last version = {last_version_df}")
logger.warning(">>> Run `defense-finder update` to be up to date")
else:
logger.info(f"Awesome, you are using the last version of the defense-finder-models : {last_version_df}")
models_main_ver = int(m.version.split(".")[0])

if len(versions_models) != 2:
logger.error(f"Uncomplete defense-finder models, we found only {' '.join([vm[0] for vm in versions_models])}. Cas and defense-finder models are required")
logger.error(f">>> Run `defense-finder update` to download the models")
sys.exit(1)

logger.info(f"Running DefenseFinder version {__version__}")
defense_finder.run(protein_file_name, dbtype, workers, coverage, adf,adf_only, tmp_dir, models_dir, no_cut_ga, loglevel, index_dir)
nl = '\n'
tab = "\t"

logger.info(f"""Using the following models:
{nl.join([f"{path+tab+version}" for path, version in versions_models])}
""")

defense_finder.run(protein_file_name, dbtype, workers, coverage, adf,adf_only, tmp_dir, models_dir, no_cut_ga, loglevel, index_dir, models_main_ver)
logger.info("Post-treatment of the data")
defense_finder_posttreat.run(tmp_dir, outdir, os.path.splitext(os.path.basename(filename))[0])

if not preserve_raw:
shutil.rmtree(tmp_dir)

models = _find_all_installed_packages().models()
versions_models = []
for m in models:
if "cas" in m.path.lower() or "defense-finder" in m.path.lower():
versions_models.append([m.path, m.version])


nl = "\n"
tab = "\t"

Expand All @@ -187,7 +245,6 @@ def run(file: str, outdir: str, dbtype: str, workers: int, coverage: float, pres
Using the following models:
{nl.join([f"{path+tab+version}" for path, version in versions_models])}
""")

if __name__ == "__main__":
Expand Down
7 changes: 4 additions & 3 deletions defense_finder_updater/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import shlex
from macsypy.scripts import macsydata

from macsypy.scripts.macsydata import main as mdmain


def update_models(models_dir, force_reinstall: bool):
# Updating DefenseFinder models
args_models_dir = f"-t {models_dir}" if models_dir is not None else "-u"
args_force = "-f" if force_reinstall else ""
cmd_args = f"install -U {args_models_dir} {args_force} --org mdmparis defense-finder-models"
macsydata.main(shlex.split(cmd_args))
mdmain(shlex.split(cmd_args))

# Updating CASFinder models
args_models_dir = f"-t {models_dir}" if models_dir is not None else "-u"
cmd_args = f"install -U {args_models_dir} {args_force} CasFinder"
macsydata.main(shlex.split(cmd_args))
mdmain(shlex.split(cmd_args))

2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
macsyfinder==2.1.1
macsyfinder==2.1.4
colorlog==6.3.0a1
click==8.0.3
pyhmmer
Expand Down
Loading

0 comments on commit feace4b

Please sign in to comment.