From 43b77e8564607b452926dfb5e0264d909b70a4c5 Mon Sep 17 00:00:00 2001 From: e-sollier Date: Tue, 4 Jun 2024 10:43:04 +0200 Subject: [PATCH] add warning for missing basemod --- figeno/bam.py | 99 ++++++------------------------------- figeno/cli/cli.py | 2 +- figeno/gui/gui_browser.py | 2 +- figeno/gui/gui_server.py | 2 +- figeno/gui/gui_webview.py | 2 +- figeno/gui/package.json | 2 +- figeno/track_alignments.py | 6 +-- figeno/track_basemodfreq.py | 18 +++---- pyproject.toml | 2 +- 9 files changed, 33 insertions(+), 102 deletions(-) diff --git a/figeno/bam.py b/figeno/bam.py index 32321dc..405304f 100644 --- a/figeno/bam.py +++ b/figeno/bam.py @@ -297,63 +297,6 @@ def read_read_groups(samfile,region): else: add_read_pile(read,reads_unphased,margin) return reads_HP1, reads_HP2, reads_unphased, reads_all -""" -def decode_read_basemod(read,base="C",mod="m"): - if "H" in read.cigarstring: return [],0 - index_seq2ref_coord = {} - for x in read.get_aligned_pairs(): - if x[1] is not None: - index_seq2ref_coord[x[0]] = x[1] - - if (base, 1, mod) in read.modified_bases: - methyl = read.modified_bases[(base, 1, mod)] - strand=1 - elif (base, 0, mod) in read.modified_bases: - methyl = read.modified_bases[(base, 0, mod)] - strand=0 - else: - return [],0 - l=[] - for pos,lik in methyl: - if pos in index_seq2ref_coord: - if lik<110: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,0)) - else: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,1)) - return l,strand - -def decode_read_basemods2(read,basemods): - #basemods is a list of 1 or 2 tuples: (base,mod,color) - if "H" in read.cigarstring: return [],0 - index_seq2ref_coord = {} - for x in read.get_aligned_pairs(): - if x[1] is not None: - index_seq2ref_coord[x[0]] = x[1] - - if (basemods[0][0], 1, basemods[0][1]) in read.modified_bases: - strand=1 - elif (basemods[0][0], 0, basemods[0][1]) in read.modified_bases: - strand=0 - else: - return [] - - basemods1 = read.modified_bases[(basemods[0][0],strand,basemods[0][1])] - d={} - for pos,lik in basemods1: - if pos in index_seq2ref_coord: - if lik<110: d[index_seq2ref_coord[pos]] = 0 - else: d[index_seq2ref_coord[pos]] = basemods[0][2] - - if len(basemods)>1: - basemods2 = read.modified_bases[(basemods[1][0],strand,basemods[1][1])] - for pos,lik in basemods2: - if pos in index_seq2ref_coord: - if lik<110 and (not index_seq2ref_coord[pos] in d): d[index_seq2ref_coord[pos]] = 0 - elif lik >=110: d[index_seq2ref_coord[pos]] = basemods[1][2] - - l=[] - for x in sorted(d.keys()): - l.append((x,x+1,d[x])) - return l -""" def reverse_complement(seq): l="" @@ -388,7 +331,7 @@ def fix_hardclipped_read(read,samfile): -def decode_read_basemods(read,basemods,samfile,fix_hardclip_basemod=True): +def decode_read_basemods(read,basemods,samfile,fix_hardclip_basemod=True,warnings=[]): """basemods is a list of 1 or 2 tuples: (base,mod,color)""" if fix_hardclip_basemod: read = fix_hardclipped_read(read,samfile) if "H" in read.cigarstring: return [] @@ -410,13 +353,19 @@ def decode_read_basemods(read,basemods,samfile,fix_hardclip_basemod=True): ML_index=0 seq=read.get_forward_sequence() if seq is None: return [] + basemods_in_bam=[] + basemods_missing=set() + for b,m,c in basemods: basemods_missing.add((b,m)) d={} # reference position to color of base modification for MM in MMs: MM = MM.split(",") base,mod = (MM[0][0],MM[0][2]) # Assume MM[0] is: base+mod? + basemods_in_bam.append("("+base+","+mod+")") color=None for b,m,c in basemods: - if b==base and m==mod: color=c + if b==base and m==mod: + color=c + basemods_missing.remove((b,m)) if color is None: # Ignore this basemod. ML_index+= len(MM)-1 continue @@ -435,32 +384,14 @@ def decode_read_basemods(read,basemods,samfile,fix_hardclip_basemod=True): elif not ref_index in d: d[ref_index] = 0 ML_index+=1 seq_index+=1 + + if len(basemods_missing)>0: + basemods_missing=["("+b+","+m+")" for b,m in basemods_missing] + warning_message="The following base modifications were not found in the bam file: "+", ".join(basemods_missing)+".\n" + warning_message+="Only the following base modifications were found in the bam file: "+", ".join(basemods_in_bam)+"." + if not warning_message in warnings: + warnings.append(warning_message) l=[] for x in sorted(d.keys()): l.append((x,x+1,d[x])) return l - -""" -def decode_read_m_hm(read): - index_seq2ref_coord = {} - for x in read.get_aligned_pairs(): - if x[1] is not None: - index_seq2ref_coord[x[0]] = x[1] - if ("C", 1, "m") in read.modified_bases: - strand=1 - elif ("C", 0, "m") in read.modified_bases: - strand=0 - else: - return [],0 - l=[] - for i in range(len(read.modified_bases[("C",strand,"m")])): - pos = read.modified_bases[("C",strand,"m")][i][0] - lik_m = read.modified_bases[("C",strand,"m")][i][1] - lik_h = read.modified_bases[("C",strand,"h")][i][1] - if pos in index_seq2ref_coord: - if lik_m>120: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,1)) - elif lik_h>120: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,2)) - else: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,0)) - return l,strand - -""" \ No newline at end of file diff --git a/figeno/cli/cli.py b/figeno/cli/cli.py index 808844d..046b482 100644 --- a/figeno/cli/cli.py +++ b/figeno/cli/cli.py @@ -2,7 +2,7 @@ from figeno.cli import gui, init,make -__version__ = "1.2.0" +__version__ = "1.2.1" def main(): parser = ArgumentParser("figeno",formatter_class=ArgumentDefaultsHelpFormatter) diff --git a/figeno/gui/gui_browser.py b/figeno/gui/gui_browser.py index 290769e..e089b0d 100644 --- a/figeno/gui/gui_browser.py +++ b/figeno/gui/gui_browser.py @@ -94,7 +94,7 @@ def run(): warnings=[] figeno_make(data,warnings=warnings) warning="\n\n".join(warnings) - if warning!="": print(warning) + #if warning!="": print(warning) return jsonify({"status":"success","message":data["output"]["file"],"warning":warning}) except KnownException as e: print(str(e)) diff --git a/figeno/gui/gui_server.py b/figeno/gui/gui_server.py index 212cf50..2cdbbcd 100644 --- a/figeno/gui/gui_server.py +++ b/figeno/gui/gui_server.py @@ -79,7 +79,7 @@ def run(): warnings=[] figeno_make(data,warnings=warnings) warning="\n\n".join(warnings) - if warning!="": print(warning) + #if warning!="": print(warning) return jsonify({"status":"success","message":data["output"]["file"],"warning":warning}) except KnownException as e: print(str(e)) diff --git a/figeno/gui/gui_webview.py b/figeno/gui/gui_webview.py index 41caefc..198cbe1 100644 --- a/figeno/gui/gui_webview.py +++ b/figeno/gui/gui_webview.py @@ -103,7 +103,7 @@ def run(): warnings=[] figeno_make(data,warnings=warnings) warning="\n\n".join(warnings) - if warning!="": print(warning) + #if warning!="": print(warning) return jsonify({"status":"success","message":data["output"]["file"],"warning":warning}) except KnownException as e: print(str(e)) diff --git a/figeno/gui/package.json b/figeno/gui/package.json index 2e2d0d7..633c8ee 100644 --- a/figeno/gui/package.json +++ b/figeno/gui/package.json @@ -1,6 +1,6 @@ { "name": "figeno", - "version": "1.2.0", + "version": "1.2.1", "private": true, "homepage": "./", "dependencies": { diff --git a/figeno/track_alignments.py b/figeno/track_alignments.py index 9ee5fca..35d425a 100644 --- a/figeno/track_alignments.py +++ b/figeno/track_alignments.py @@ -100,7 +100,7 @@ def draw(self, regions, box ,hmargin,warnings=[]): #self.update_group_sizes(regions,box) # Compute group sizes so that the same borders can be used for all regions boxes = split_box(box,regions,hmargin) for i in range(len(regions)): - self.draw_region(regions[i][0],boxes[i],self.region_group_piles[i]) + self.draw_region(regions[i][0],boxes[i],self.region_group_piles[i],warnings=warnings) if self.link_splitreads: self.draw_splitread_lines(box) self.draw_title(box) @@ -108,7 +108,7 @@ def draw(self, regions, box ,hmargin,warnings=[]): for x in self.kwargs: warnings.append(x+" parameter was ignored in the alignments track because it is not one of the accepted parameters.") - def draw_region(self,region,box,group_piles): + def draw_region(self,region,box,group_piles,warnings=[]): if abs(region.end-region.start)>1e6: print("WARNING: you are using an alignments track for a region larger than 1Mb. This might be very slow; the alignments track is intended for regions < 100kb.") if self.bounding_box: draw_bounding_box(box) @@ -226,7 +226,7 @@ def convert_y(y): if self.color_by=="basemod": if (not "projection" in box) or box["projection"]!="polar": patches_methyl=[] #methyl = decode_read_basemod(read,"C","m")[0] - basemods = decode_read_basemods(read,self.basemods,self.samfile,fix_hardclip_basemod=self.fix_hardclip_basemod) + basemods = decode_read_basemods(read,self.basemods,self.samfile,fix_hardclip_basemod=self.fix_hardclip_basemod,warnings=warnings) #methyl = decode_read_m_hm(read)[0] methyl_merged = merge_methylation_rectangles(basemods,int(region.end-region.start)/1000) for rect_start,rect_end,color in methyl_merged: diff --git a/figeno/track_basemodfreq.py b/figeno/track_basemodfreq.py index aa7fae8..0ba1afc 100644 --- a/figeno/track_basemodfreq.py +++ b/figeno/track_basemodfreq.py @@ -37,7 +37,7 @@ def __init__(self,style="lines",smooth=4,gap_frac=0.1,bams=[],bedmethyls=[],show def draw(self, regions, box ,hmargin,warnings=[]): boxes = split_box(box,regions,hmargin) for i in range(len(regions)): - self.draw_region(regions[i][0],boxes[i]) + self.draw_region(regions[i][0],boxes[i],warnings=warnings) self.draw_title(box) if self.is_empty: warnings.append("No data was found in the displayed regions for the basemod_freq track.") @@ -45,14 +45,14 @@ def draw(self, regions, box ,hmargin,warnings=[]): for x in self.kwargs: warnings.append(x+" parameter was ignored in the basemod_freq track because it is not one of the accepted parameters.") - def draw_region(self,region,box): + def draw_region(self,region,box,warnings=[]): margin_y = (box["top"]-box["bottom"]) * 0.03 if self.bounding_box: draw_bounding_box(box) for bam_params in self.bams: - self.draw_region_bam(region,box,**bam_params) + self.draw_region_bam(region,box,**bam_params,warnings=warnings) for bedmethyl_params in self.bedmethyls: self.draw_region_bedmethyl(region,box,**bedmethyl_params) @@ -68,7 +68,7 @@ def draw_region(self,region,box): box["ax"].add_patch(rect) box["ax"].text(legend_x+legend_width*1.1,legend_y - hp*legend_height*3.0,"Haplotype "+str(hp+1),horizontalalignment="left",verticalalignment="center") - def draw_region_bam(self,region,box,file,base,mod,min_coverage=5,linewidth=3,opacity=1,fix_hardclip=False,split_by_haplotype=False,labels=[""],colors=["#27ae60"]): + def draw_region_bam(self,region,box,file,base,mod,min_coverage=5,linewidth=3,opacity=1,fix_hardclip=False,split_by_haplotype=False,labels=[""],colors=["#27ae60"],warnings=[]): min_coverage=int(min_coverage) linewidth=float(linewidth) opacity=float(opacity) @@ -97,11 +97,11 @@ def transform_coord(pos): # Read methylation reads_HP1,reads_HP2,reads_unphased,reads_all = read_read_groups(samfile,region) if split_by_haplotype: - df_met1 = smooth_methylation(create_basemod_table_bam(reads_HP1,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip),w=self.smooth,start=region.start,end=region.end) - df_met2 = smooth_methylation(create_basemod_table_bam(reads_HP2,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip),w=self.smooth,start=region.start,end=region.end) + df_met1 = smooth_methylation(create_basemod_table_bam(reads_HP1,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip,warnings=warnings),w=self.smooth,start=region.start,end=region.end) + df_met2 = smooth_methylation(create_basemod_table_bam(reads_HP2,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip,warnings=warnings),w=self.smooth,start=region.start,end=region.end) dfs = [df_met1,df_met2] else: - dfs = [smooth_methylation(create_basemod_table_bam(reads_all,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip),w=self.smooth,start=region.start,end=region.end)] + dfs = [smooth_methylation(create_basemod_table_bam(reads_all,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip,warnings=warnings),w=self.smooth,start=region.start,end=region.end)] for j in range(len(dfs)): df = dfs[j] @@ -251,14 +251,14 @@ def smooth_methylation(df,w=2,start=None,end=None): if end is not None: df = df.loc[df["pos"]<=end,:] return df -def create_basemod_table_bam(reads,base,mod,chr,start,end,samfile=None,fix_hardclip=False,min_coverage=5): +def create_basemod_table_bam(reads,base,mod,chr,start,end,samfile=None,fix_hardclip=False,min_coverage=5,warnings=[]): d={"chr":[],"pos":[],"metPercentage":[],"coverage":[]} pos2methylated={} pos2unmethylated={} for x in reads: for read in x: - methyl = decode_read_basemods(read,[(base,mod,1)],samfile,fix_hardclip) + methyl = decode_read_basemods(read,[(base,mod,1)],samfile,fix_hardclip,warnings=warnings) for pos,end2,state in methyl: if base=="C" and (read.flag&16)!=0: pos-=1 # if reverse strand, consider the position of the C in the CpG in the forward orientation. diff --git a/pyproject.toml b/pyproject.toml index abfde06..cb15870 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ packages = ["figeno", "figeno.data", "figeno.cli", "figeno.gui"] [project] name = 'figeno' -version = "1.2.0" +version = "1.2.1" description = 'Package for generating genomics figures.' readme = 'README.md' authors = [