Skip to content

Commit

Permalink
Merge pull request #36 from genomewalker:fix-sorted-location
Browse files Browse the repository at this point in the history
Updated README
  • Loading branch information
genomewalker authored Jul 25, 2024
2 parents f67632d + c9e9ebb commit 393cc58
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 19 deletions.
9 changes: 1 addition & 8 deletions bam_filter/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,12 @@ def filter_references(args):
mode="filter",
)

sorted_bam = args.bam.replace(".bam", ".bf-sorted.bam")
bam = check_bam_file(
bam=args.bam,
threads=4,
reference_lengths=args.reference_lengths,
sort_memory=args.sort_memory,
sorted_bam=out_files["sorted_bam"],
)
if bam is None:
logging.warning("No reference sequences with alignments found in the BAM file")
Expand Down Expand Up @@ -264,11 +264,4 @@ def filter_references(args):
if args.low_memory:
os.remove(out_files["bam_tmp_sorted"])
# check if sorted BAM file exists, if yes remove it
if os.path.exists(sorted_bam):
os.remove(sorted_bam)
# check if sorted BAM index file exists, if yes remove it
if os.path.exists(sorted_bam + ".bai"):
os.remove(sorted_bam + ".bai")
elif os.path.exists(sorted_bam + ".csi"):
os.remove(sorted_bam + ".csi")
logging.info("ALL DONE.")
9 changes: 2 additions & 7 deletions bam_filter/reassign.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,12 +966,12 @@ def reassign(args):
mode="reassign",
bam_reassigned=args.bam_reassigned,
)
sorted_bam = bam.replace(".bam", ".bf-sorted.bam")
bam = check_bam_file(
bam=args.bam,
threads=args.threads,
reference_lengths=args.reference_lengths,
sort_memory=args.sort_memory,
sorted_bam=out_files["sorted_bam"],
)
if bam is None:
logging.warning("No reference sequences with alignments found in the BAM file")
Expand Down Expand Up @@ -1009,10 +1009,5 @@ def reassign(args):
disable_sort=args.disable_sort,
tmp_dir=tmp_dir,
)
if os.path.exists(sorted_bam):
os.remove(sorted_bam)
if os.path.exists(sorted_bam + ".bai"):
os.remove(sorted_bam + ".bai")
elif os.path.exists(sorted_bam + ".csi"):
os.remove(sorted_bam + ".csi")

log.info("Done!")
10 changes: 6 additions & 4 deletions bam_filter/sam_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,11 +788,12 @@ def check_bam_file(
threads=1,
reference_lengths=None,
sort_memory="1G",
sorted_bam=None,
):
logging.info("Checking BAM file status")
save = pysam.set_verbosity(0)

def process_bam(bam, s_threads):
def evaluate_bam(bam, s_threads, sorted_bam=None):
with pysam.AlignmentFile(bam, "rb", threads=s_threads) as samfile:
references = samfile.references
log.info(f"::: Found {samfile.nreferences:,} reference sequences")
Expand All @@ -816,7 +817,8 @@ def process_bam(bam, s_threads):

if samfile.header["HD"]["SO"] != "coordinate":
log.info("::: BAM file is not sorted by coordinates, sorting it...")
sorted_bam = bam.replace(".bam", ".bf-sorted.bam")
if sorted_bam is None:
sorted_bam = bam.replace(".bam", ".bf-sorted.bam")
pysam.sort(
"-@", str(s_threads), "-m", str(sort_memory), "-o", sorted_bam, bam
)
Expand All @@ -833,11 +835,11 @@ def process_bam(bam, s_threads):

try:
s_threads = min(threads, 4)
bam, reopened = process_bam(bam, s_threads)
bam, reopened = evaluate_bam(bam, s_threads, sorted_bam)

# If the BAM file was sorted and reopened, check it again
if reopened:
bam, _ = process_bam(bam, s_threads)
bam, _ = evaluate_bam(bam, s_threads, sorted_bam)

pysam.set_verbosity(save)
return bam
Expand Down
1 change: 1 addition & 0 deletions bam_filter/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1520,6 +1520,7 @@ def create_output_files(
log.error("Mode not recognized")
exit(1)
out_files["tmp_dir"] = tmp_dir
out_files["sorted_bam"] = f"{tmp_dir}/{prefix}.bf-sorted.bam"
return out_files

# out_files = {
Expand Down

0 comments on commit 393cc58

Please sign in to comment.