From c9e9ebb44449efe09d90a649841016054d33555e Mon Sep 17 00:00:00 2001 From: genomewalker Date: Thu, 25 Jul 2024 16:05:17 +0200 Subject: [PATCH] Updated README Fixed location for sorted bam files --- bam_filter/filter.py | 9 +-------- bam_filter/reassign.py | 9 ++------- bam_filter/sam_utils.py | 10 ++++++---- bam_filter/utils.py | 1 + 4 files changed, 10 insertions(+), 19 deletions(-) diff --git a/bam_filter/filter.py b/bam_filter/filter.py index 54b19c0..c24bfda 100644 --- a/bam_filter/filter.py +++ b/bam_filter/filter.py @@ -93,12 +93,12 @@ def filter_references(args): mode="filter", ) - sorted_bam = args.bam.replace(".bam", ".bf-sorted.bam") bam = check_bam_file( bam=args.bam, threads=4, reference_lengths=args.reference_lengths, sort_memory=args.sort_memory, + sorted_bam=out_files["sorted_bam"], ) if bam is None: logging.warning("No reference sequences with alignments found in the BAM file") @@ -264,11 +264,4 @@ def filter_references(args): if args.low_memory: os.remove(out_files["bam_tmp_sorted"]) # check if sorted BAM file exists, if yes remove it - if os.path.exists(sorted_bam): - os.remove(sorted_bam) - # check if sorted BAM index file exists, if yes remove it - if os.path.exists(sorted_bam + ".bai"): - os.remove(sorted_bam + ".bai") - elif os.path.exists(sorted_bam + ".csi"): - os.remove(sorted_bam + ".csi") logging.info("ALL DONE.") diff --git a/bam_filter/reassign.py b/bam_filter/reassign.py index 6db3778..72cd49e 100644 --- a/bam_filter/reassign.py +++ b/bam_filter/reassign.py @@ -966,12 +966,12 @@ def reassign(args): mode="reassign", bam_reassigned=args.bam_reassigned, ) - sorted_bam = bam.replace(".bam", ".bf-sorted.bam") bam = check_bam_file( bam=args.bam, threads=args.threads, reference_lengths=args.reference_lengths, sort_memory=args.sort_memory, + sorted_bam=out_files["sorted_bam"], ) if bam is None: logging.warning("No reference sequences with alignments found in the BAM file") @@ -1009,10 +1009,5 @@ def reassign(args): disable_sort=args.disable_sort, tmp_dir=tmp_dir, ) - if os.path.exists(sorted_bam): - os.remove(sorted_bam) - if os.path.exists(sorted_bam + ".bai"): - os.remove(sorted_bam + ".bai") - elif os.path.exists(sorted_bam + ".csi"): - os.remove(sorted_bam + ".csi") + log.info("Done!") diff --git a/bam_filter/sam_utils.py b/bam_filter/sam_utils.py index cc1a1d2..7eb93b4 100644 --- a/bam_filter/sam_utils.py +++ b/bam_filter/sam_utils.py @@ -788,11 +788,12 @@ def check_bam_file( threads=1, reference_lengths=None, sort_memory="1G", + sorted_bam=None, ): logging.info("Checking BAM file status") save = pysam.set_verbosity(0) - def process_bam(bam, s_threads): + def evaluate_bam(bam, s_threads, sorted_bam=None): with pysam.AlignmentFile(bam, "rb", threads=s_threads) as samfile: references = samfile.references log.info(f"::: Found {samfile.nreferences:,} reference sequences") @@ -816,7 +817,8 @@ def process_bam(bam, s_threads): if samfile.header["HD"]["SO"] != "coordinate": log.info("::: BAM file is not sorted by coordinates, sorting it...") - sorted_bam = bam.replace(".bam", ".bf-sorted.bam") + if sorted_bam is None: + sorted_bam = bam.replace(".bam", ".bf-sorted.bam") pysam.sort( "-@", str(s_threads), "-m", str(sort_memory), "-o", sorted_bam, bam ) @@ -833,11 +835,11 @@ def process_bam(bam, s_threads): try: s_threads = min(threads, 4) - bam, reopened = process_bam(bam, s_threads) + bam, reopened = evaluate_bam(bam, s_threads, sorted_bam) # If the BAM file was sorted and reopened, check it again if reopened: - bam, _ = process_bam(bam, s_threads) + bam, _ = evaluate_bam(bam, s_threads, sorted_bam) pysam.set_verbosity(save) return bam diff --git a/bam_filter/utils.py b/bam_filter/utils.py index ae71eb4..ad01338 100644 --- a/bam_filter/utils.py +++ b/bam_filter/utils.py @@ -1520,6 +1520,7 @@ def create_output_files( log.error("Mode not recognized") exit(1) out_files["tmp_dir"] = tmp_dir + out_files["sorted_bam"] = f"{tmp_dir}/{prefix}.bf-sorted.bam" return out_files # out_files = {