From 2fab1f3442adc9ebaa94406bebccb34db4695544 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Thu, 21 Nov 2024 13:24:17 +0100 Subject: [PATCH] black --- .../scripts/merge_snv_variantcallers.py | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/BALSAMIC/assets/scripts/merge_snv_variantcallers.py b/BALSAMIC/assets/scripts/merge_snv_variantcallers.py index e26de237d..0c1dcec32 100644 --- a/BALSAMIC/assets/scripts/merge_snv_variantcallers.py +++ b/BALSAMIC/assets/scripts/merge_snv_variantcallers.py @@ -4,9 +4,10 @@ import re import click + def collect_header(vcf): header = [] - with gzip.open(vcf, 'rt') as gz_vcf: + with gzip.open(vcf, "rt") as gz_vcf: for line in gz_vcf: if line.startswith("#"): header.append(line) @@ -14,6 +15,7 @@ def collect_header(vcf): break return header + def add_header_categories(header_categories, header): for hdr_row in header: hdr_row_split = hdr_row.split("=") @@ -25,6 +27,7 @@ def add_header_categories(header_categories, header): header_categories[cat].append("=".join(hdr_row_split[1:])) return header_categories + def get_description_field(header_row): pattern = r'Description="(.*?)"' match = re.search(pattern, header_row) @@ -32,18 +35,22 @@ def get_description_field(header_row): return match.group(1) return None + def update_description_text(row, new_description): return re.sub(r'Description="(.*?)"', f'Description="{new_description}"', row) + def get_number_field(header_row): - pattern = r'Number=([^,]+)' + pattern = r"Number=([^,]+)" match = re.search(pattern, header_row) if match: return match.group(1) return None + def update_number_text(row, new_number): - return re.sub(r'Number=([^,]+)', f'Number="{new_number}"', row) + return re.sub(r"Number=([^,]+)", f'Number="{new_number}"', row) + def merge_headers(vcf1, vcf2): """ @@ -126,7 +133,7 @@ def merge_header_row(line1, line2): # Print warning to standard error print( f"Warning: Number fields differ for header lines. line1: {line1}, line2: {line2}", - file=sys.stderr + file=sys.stderr, ) updated_line = update_number_text(updated_line, ".") @@ -136,7 +143,7 @@ def merge_header_row(line1, line2): def read_variants(vcf): variants = {} - with gzip.open(vcf, 'rt') as gz_vcf: + with gzip.open(vcf, "rt") as gz_vcf: for line in gz_vcf: if line.startswith("#"): continue # Skip header lines @@ -144,9 +151,23 @@ def read_variants(vcf): normal = False if len(fields) > 10: # TUMOR + NORMAL - chrom, pos, id, ref, alt, qual, filter, info, format, tumor, normal = fields[:11] + ( + chrom, + pos, + id, + ref, + alt, + qual, + filter, + info, + format, + tumor, + normal, + ) = fields[:11] else: # TUMOR ONLY - chrom, pos, id, ref, alt, qual, filter, info, format, tumor = fields[:11] + chrom, pos, id, ref, alt, qual, filter, info, format, tumor = fields[ + :11 + ] # Use a tuple of (chrom, pos, ref, alt) as the key key = (chrom, pos, ref, alt) @@ -193,7 +214,9 @@ def merge_info_fields(info_fields): # Recreate INFO field with merged key-value pairs merged_info_str = ";".join([f"{key}" for key in merged_info_single_fields]) - merged_info_str = merged_info_str + ";".join([f"{key}={value}" for key, value in merged_info.items()]) + merged_info_str = merged_info_str + ";".join( + [f"{key}={value}" for key, value in merged_info.items()] + ) return merged_info_str @@ -248,6 +271,7 @@ def merge_variants(vcf1, vcf2): return merged_variants + def write_merged_vcf(output_file, header, merged_variants): """ Writes the merged variants along with the header to the output VCF file. @@ -260,6 +284,7 @@ def write_merged_vcf(output_file, header, merged_variants): for variant in merged_variants: f.write(variant + "\n") + @click.command() @click.argument("vcf1", type=click.Path(exists=True)) @click.argument("vcf2", type=click.Path(exists=True)) @@ -283,5 +308,6 @@ def main(vcf1: str, vcf2: str, output_file: str) -> None: # Write the merged VCF to output file write_merged_vcf(output_file, merged_header, merged_variants) + if __name__ == "__main__": - main() \ No newline at end of file + main()