From 3faf9809d3a09f35e4cf0b33a3eb354e4cac4cca Mon Sep 17 00:00:00 2001 From: iquasere Date: Tue, 6 Apr 2021 13:39:35 +0100 Subject: [PATCH] Now creates the output directories if non existent --- upimapi.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/upimapi.py b/upimapi.py index f3a12e0..1ba5a51 100644 --- a/upimapi.py +++ b/upimapi.py @@ -15,6 +15,7 @@ import urllib.request import subprocess import psutil +import pathlib from io import StringIO import pandas as pd @@ -22,7 +23,7 @@ from uniprot_support import UniprotSupport -__version__ = '1.1.3' +__version__ = '1.1.4' upmap = UniprotSupport() @@ -71,8 +72,12 @@ def get_arguments(self): diamond_args.add_argument("-t", "--threads", default='1', help="Number of threads to use in annotation steps") diamond_args.add_argument("-mts", "--max-target-seqs", default='50', help="Number of annotations to output per sequence inputed") - diamond_args.add_argument("-b", "--block-size", help="Number of annotations to output per sequence inputed") - diamond_args.add_argument("-c", "--index-chunks", help="Number of annotations to output per sequence inputed") + diamond_args.add_argument("-b", "--block-size", help="Billions of sequence letters to be processed at a time " + "(UPIMAPI determines best value for this parameter if not " + "set") + diamond_args.add_argument("-c", "--index-chunks", help="Number of chunks for processing the seed index " + "(UPIMAPI determines best value for this parameter if not " + "set") args = parser.parse_args() args.diamond_output = args.diamond_output.rstrip('/') @@ -240,8 +245,8 @@ def recursive_uniprot_fasta(self, all_ids, output, max_iter=5, step=1000): 'information are available at {} and information obtained is available at {}'.format( str(len(ids_missing)), ids_unmapped_output, output)) - def recursive_uniprot_information(self, ids, output, max_iter=5, excel=False, - columns=list(), databases=list(), step=1000): + def recursive_uniprot_information(self, ids, output, max_iter=5, excel=False, columns=list(), databases=list(), + step=1000): if os.path.isfile(output) and os.stat(output).st_size > 1: try: result = (pd.read_csv(output, sep='\t', low_memory=False) if not @@ -341,9 +346,12 @@ def run_diamond(self, query, aligned, unaligned, database, threads='12', max_tar def upimapi(self): args = self.get_arguments() + pathlib.Path('/'.join(args.output.split('/')[:-1])).mkdir(parents=True, exist_ok=True) # Using annotation with DIAMOND if args.use_diamond: + pathlib.Path(args.diamond_output).mkdir(parents=True, exist_ok=True) + if not args.database.endswith(".dmnd"): self.generate_diamond_database(args.database, '{}.dmnd'.format('.'.join(args.database.split('.')[:-1]))) args.database = '{}.dmnd'.format('.'.join(args.database.split('.')[:-1]))