diff --git a/tb-profiler b/tb-profiler index 70e4691..e71e1dc 100644 --- a/tb-profiler +++ b/tb-profiler @@ -187,6 +187,10 @@ def main_profile(args): pp.logging.info("[green]Profiling finished sucessfully![/]",extra={"markup": True}) def main_update_tbdb(args): + + if args.match_ref: + args.match_ref = os.path.abspath(args.match_ref) + if pp.nofolder("tbdb"): pp.run_cmd(f"git clone {args.repo}") os.chdir("tbdb") @@ -379,7 +383,7 @@ algorithm.add_argument('--snp_diff_db','--snp-diff_db',type=str,help=argparse.SU algorithm.add_argument('--snp_diff_no_store','--snp-diff-no-store',action='store_true',help=argparse.SUPPRESS) algorithm.add_argument('--no_trim','--no-trim',action="store_true",help="Don't trim files using trimmomatic") algorithm.add_argument('--no_coverage_qc','--no-coverage-qc',action="store_true",help="Don't collect flagstats") -algorithm.add_argument('--no_samclip','--no-samclip',action="store_false",help="Don't remove clipped reads from variant calling") +algorithm.add_argument('--no_samclip','--no-samclip',action="store_true",help="Don't remove clipped reads from variant calling") algorithm.add_argument('--no_delly','--no-delly',action="store_true",help="Don't run delly") algorithm.add_argument('--no_lineage','--no-lineage',action="store_true",help=argparse.SUPPRESS) algorithm.add_argument('--add_variant_annotations','--add-variant-annotations',action="store_true",help=argparse.SUPPRESS) @@ -400,17 +404,17 @@ parser_sub.set_defaults(func=main_profile) parser_sub = subparsers.add_parser('lineage', help='Profile only lineage', formatter_class=ArgumentDefaultsRichHelpFormatter) parser_sub.add_argument('--bam','-a',required=True, help='BAM file. Make sure it has been generated using the H37Rv genome (GCA_000195955.2)') parser_sub.add_argument('--prefix','-p',default="tbprofiler",help='Sample prefix') -parser_sub.add_argument('--snps',action="store_true",help='Sample prefix') +parser_sub.add_argument('--barcode_snps','--barcode-snps',help='Dump barcoding mutations to a file') parser_sub.add_argument('--caller',default='freebayes',choices=["bcftools","freebayes","gatk"],type=str,help="Variant caller") -parser_sub.add_argument('--kmer_counter',default='kmc',choices=["kmc","dsk"],type=str,help="Kmer counter") +parser_sub.add_argument('--kmer_counter','--kmer-counter',default='kmc',choices=["kmc","dsk"],type=str,help="Kmer counter") parser_sub.add_argument('--platform','-m',choices=["illumina","nanopore","pacbio"],default="illumina",help='NGS Platform used to generate data') parser_sub.add_argument('--db',default='tbdb',help='Mutation panel name') parser_sub.add_argument('--spoligotype',action="store_true",help="Perform in-silico spoligotyping") -parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)') -parser_sub.add_argument('--text_template',type=str,help='Jinja2 formatted template for output') +parser_sub.add_argument('--external_db','--external-db',type=str,help='Path to db files prefix (overrides "--db" parameter)') +parser_sub.add_argument('--text_template','--text-template',type=str,help='Jinja2 formatted template for output') parser_sub.add_argument('--threads','-t',default=1,help='Threads to use',type=int) parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') @@ -427,12 +431,12 @@ parser_sub.add_argument('--txt',action="store_true",help="Add text output") parser_sub.add_argument('--csv',action="store_true",help="Add CSV output") parser_sub.add_argument('--docx',action="store_true",help="Add docx output. This requires docxtpl to be installed") parser_sub.add_argument('--text_template',type=str,help='Jinja2 formatted template for output') -parser_sub.add_argument('--kmer_counter',default='kmc',choices=["kmc","dsk"],type=str,help="Kmer counter") +parser_sub.add_argument('--kmer_counter','--kmer-counter',default='kmc',choices=["kmc","dsk"],type=str,help="Kmer counter") parser_sub.add_argument('--platform','-m',choices=["illumina","nanopore","pacbio"],default="illumina",help='NGS Platform used to generate data') parser_sub.add_argument('--db',default='tbdb',help='Mutation panel name') -parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)') +parser_sub.add_argument('--external_db','--external-db',type=str,help='Path to db files prefix (overrides "--db" parameter)') parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--threads','-t',default=1,help='Threads to use',type=int) parser_sub.add_argument('--ram',default=2,type=int_2_or_more,help='Maximum memory to use in Gb') parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") @@ -445,13 +449,13 @@ parser_sub.add_argument('--prefix','-p',default="tbprofiler",help='Sample prefix parser_sub.add_argument('--samples',help='File with samples (one per line)') parser_sub.add_argument('--itol',action="store_true",help='Generate itol config files') parser_sub.add_argument('--full',action="store_true",help='Output mutations in main result file') -parser_sub.add_argument('--all_variants',action="store_true",help='Output all variants in variant matrix') -parser_sub.add_argument('--mark_missing',action="store_true",help='An asteriks will be use to mark predictions which are affected by missing data at a drug resistance position') +parser_sub.add_argument('--all_variants','--all-variants',action="store_true",help='Output all variants in variant matrix') +parser_sub.add_argument('--mark_missing','--mark-missing',action="store_true",help='An asteriks will be use to mark predictions which are affected by missing data at a drug resistance position') parser_sub.add_argument('--db',default='tbdb',help='Full path to mutation database json file to use') parser_sub.add_argument('--format',default='txt',choices=['txt','csv'],help='Format of the output') -parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)') +parser_sub.add_argument('--external_db','--external-db',type=str,help='Path to db files prefix (overrides "--db" parameter)') parser_sub.add_argument('--dir','-d',nargs="+",default=["results"],help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') @@ -463,12 +467,12 @@ parser_sub.add_argument('json',default="tbprofiler",help='Sample prefix') parser_sub.add_argument('--txt',action="store_true",help="Add text output") parser_sub.add_argument('--csv',action="store_true",help="Add CSV output") parser_sub.add_argument('--docx',action="store_true",help="Add docx output. This requires docxtpl to be installed") -parser_sub.add_argument('--docx_template',help="Supply custom template for --docx output") -parser_sub.add_argument('--text_template',type=str,help='Jinja2 formatted template for output') +parser_sub.add_argument('--docx_template','--docx-template',help="Supply custom template for --docx output") +parser_sub.add_argument('--text_template','--text-template',type=str,help='Jinja2 formatted template for output') parser_sub.add_argument('--db',default='tbdb',help='Mutation panel name') -parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)') +parser_sub.add_argument('--external_db','--external-db',type=str,help='Path to db files prefix (overrides "--db" parameter)') parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--suspect',action="store_true",help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) @@ -476,26 +480,26 @@ parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBU parser_sub.set_defaults(func=main_reformat) parser_sub = subparsers.add_parser('create_db', help='Generate the files required to run tb-profiler', formatter_class=ArgumentDefaultsRichHelpFormatter) -parser_sub.add_argument('--prefix','-p',type=str,help='The input CSV file containing the mutations',required=True) -parser_sub.add_argument('--csv','-c',nargs="+",default=["mutations.csv"],type=str,help='The prefix for all output files') +parser_sub.add_argument('--prefix','-p',type=str,help='The prefix for all output files',required=True) +parser_sub.add_argument('--csv','-c',nargs="+",default=["mutations.csv"],type=str,help='The input CSV file containing the mutations') parser_sub.add_argument('--watchlist','-w',default="watchlist.csv",type=str,help='A csv file containing genes to profile but without any specific associated mutations') parser_sub.add_argument('--spoligotypes',default="spoligotype_spacers.txt",type=str,help='A file containing a list of spoligotype spacers') -parser_sub.add_argument('--spoligotype_annotations',default="spoligotype_list.csv") +parser_sub.add_argument('--spoligotype_annotations','--spoligotype-annotations',default="spoligotype_list.csv") parser_sub.add_argument('--barcode',default="barcode.bed",type=str,help='A bed file containing lineage barcode SNPs') parser_sub.add_argument('--bedmask',default="mask.bed",type=str,help='A bed file containing a list of low-complexity regions') parser_sub.add_argument('--rules',type=str,default="rules.txt",help='A file containing python rules') -parser_sub.add_argument('--amplicon_primers',type=str,help='A file containing a list of amplicon primers') -parser_sub.add_argument('--match_ref',type=str,help='The prefix for all output files') +parser_sub.add_argument('--amplicon_primers','--amplicon-primers',type=str,help='A file containing a list of amplicon primers') +parser_sub.add_argument('--match_ref','--match-ref',type=str,help='Match the chromosome name to the given fasta file') parser_sub.add_argument('--custom',action="store_true",help='Tells the script this is a custom database, this is used to alter the generation of the version definition') parser_sub.add_argument('--db_name',help='Overrides the name of the database in the version file') -parser_sub.add_argument('--db_commit',help='Overrides the commit string of the database in the version file') -parser_sub.add_argument('--db_author',help='Overrides the author of the database in the version file') -parser_sub.add_argument('--db_date',help='Overrides the date of the database in the version file') -parser_sub.add_argument('--include_original_mutation',action="store_true", help='Include the original mutation (before reformatting) as part of the variant annotaion') +parser_sub.add_argument('--db_commit','--db-commit',help='Overrides the commit string of the database in the version file') +parser_sub.add_argument('--db_author','--db-author',help='Overrides the author of the database in the version file') +parser_sub.add_argument('--db_date','--db-date',help='Overrides the date of the database in the version file') +parser_sub.add_argument('--include_original_mutation','--include-original-mutation',action="store_true", help='Include the original mutation (before reformatting) as part of the variant annotaion') parser_sub.add_argument('--load',action="store_true", help='Automaticaly load database') -parser_sub.add_argument('--no_overwrite',action="store_true", help="Don't load if existing database with prefix exists") +parser_sub.add_argument('--no_overwrite','--no-overwrite',action="store_true", help="Don't load if existing database with prefix exists") parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') @@ -504,7 +508,7 @@ parser_sub.set_defaults(func=main_create_db) parser_sub = subparsers.add_parser('load_library', help='Load new library', formatter_class=ArgumentDefaultsRichHelpFormatter) parser_sub.add_argument('prefix',type=str,help='Prefix to the library files') parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') @@ -515,9 +519,9 @@ parser_sub.add_argument('--prefix','-p',help='Database name') parser_sub.add_argument('--repo','-r',default="https://github.com/jodyphelan/tbdb.git",help='Repository to pull from') parser_sub.add_argument('--branch','-b',default="tbdb",help='Branch to pull from') parser_sub.add_argument('--commit','-c',help='Git commit hash to checkout (default: latest)') -parser_sub.add_argument('--match_ref',type=str,help='The prefix for all output files') +parser_sub.add_argument('--match_ref','--match-ref',type=str,help='The prefix for all output files') parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') @@ -527,9 +531,9 @@ parser_sub = subparsers.add_parser('batch', help='Run tb-profiler for several sa parser_sub.add_argument('--csv',help='CSV with samples and files',required=True) parser_sub.add_argument('--args',type=str, help='Arguments to use with tb-profiler') parser_sub.add_argument('--jobs','-j',default=1,help='Threads to use',type=int) -parser_sub.add_argument('--threads_per_job','-t',default=1,help='Threads to use',type=int) +parser_sub.add_argument('--threads_per_job','--threads-per-job','-t',default=1,help='Threads to use',type=int) parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') @@ -537,7 +541,7 @@ parser_sub.set_defaults(func=main_batch) parser_sub = subparsers.add_parser('list_db', help='List loaded databases', formatter_class=ArgumentDefaultsRichHelpFormatter) parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') @@ -545,7 +549,7 @@ parser_sub.set_defaults(func=main_list_db) parser_sub = subparsers.add_parser('version', help='Output program version and exit', formatter_class=ArgumentDefaultsRichHelpFormatter) parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') -parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS) +parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') diff --git a/tbprofiler/reformat.py b/tbprofiler/reformat.py index 84edcc9..02da7ba 100644 --- a/tbprofiler/reformat.py +++ b/tbprofiler/reformat.py @@ -203,6 +203,11 @@ def create_lineage_result( lineage: List[Lineage] ): main_lineage, sub_lineage = get_main_lineage(lineage) + pipeline = Pipeline( + software_version=args.version, + db_version=args.conf['version'], + software=[{'process':k,'software':v} for k,v in shared_dict.items()] + ) data = { 'id':args.prefix, 'lineage':lineage, @@ -210,6 +215,7 @@ def create_lineage_result( 'main_lineage':main_lineage, 'tbprofiler_version':args.version, 'db_version':args.conf['version'], + 'pipeline':pipeline } return LineageResult(**data) diff --git a/tests/example_collate.txt b/tests/example_collate.txt index 0840729..f65153f 100644 --- a/tests/example_collate.txt +++ b/tests/example_collate.txt @@ -1,7 +1,7 @@ sample main_lineage sub_lineage spoligotype drtype target_median_depth pct_reads_mapped num_reads_mapped num_dr_variants num_other_variants rifampicin isoniazid ethambutol pyrazinamide moxifloxacin levofloxacin bedaquiline delamanid pretomanid linezolid streptomycin amikacin kanamycin capreomycin clofazimine ethionamide por5A_bcftools lineage4 lineage4.3.4.2 - MDR-TB 59.0 99.9 54238 4 24 rpoB p.Ser450Leu (1.00) inhA c.-777C>T (1.00) embB p.Met306Val (1.00) pncA p.Val125Gly (1.00) - - - - - - - - - - - inhA c.-777C>T (1.00) por5A_freebayes lineage4 lineage4.3.4.2 - MDR-TB 59.0 99.9 54238 4 24 rpoB p.Ser450Leu (1.00) inhA c.-777C>T (1.00) embB p.Met306Val (1.00) pncA p.Val125Gly (1.00) - - - - - - - - - - - inhA c.-777C>T (1.00) -por5A_gatk lineage4 lineage4.3.4.2 - MDR-TB 59.0 99.9 54238 4 28 rpoB p.Ser450Leu (1.00) inhA c.-777C>T (1.00) embB p.Met306Val (1.00) pncA p.Val125Gly (1.00) - - - - - - - - - - - inhA c.-777C>T (1.00) +por5A_gatk lineage4 lineage4.3.4.2 - MDR-TB 59.0 99.9 54238 4 24 rpoB p.Ser450Leu (1.00) inhA c.-777C>T (1.00) embB p.Met306Val (1.00) pncA p.Val125Gly (1.00) - - - - - - - - - - - inhA c.-777C>T (1.00) por5A_lofreq lineage4 lineage4.3.4.2 - MDR-TB 59.0 99.9 54238 4 23 rpoB p.Ser450Leu (1.00) inhA c.-777C>T (1.00) embB p.Met306Val (1.00) pncA p.Val125Gly (1.00) - - - - - - - - - - - inhA c.-777C>T (1.00) -por5A_pilon lineage4 lineage4.3.4.2 - MDR-TB 59.0 99.9 54238 4 25 rpoB p.Ser450Leu (1.00) inhA c.-777C>T (1.00) embB p.Met306Val (1.00) pncA p.Val125Gly (1.00) - - - - - - - - - - - inhA c.-777C>T (1.00) +por5A_pilon lineage4 lineage4.3.4.2 - MDR-TB 59.0 99.9 54238 4 24 rpoB p.Ser450Leu (1.00) inhA c.-777C>T (1.00) embB p.Met306Val (1.00) pncA p.Val125Gly (1.00) - - - - - - - - - - - inhA c.-777C>T (1.00) por5_vcf lineage4 lineage4.3.4.2 - MDR-TB 4 24 rpoB p.Ser450Leu (1.00) inhA c.-777C>T (1.00) embB p.Met306Val (1.00) pncA p.Val125Gly (1.00) - - - - - - - - - - - inhA c.-777C>T (1.00)