Skip to content

Commit

Permalink
add warning for missing basemod
Browse files Browse the repository at this point in the history
  • Loading branch information
e-sollier committed Jun 4, 2024
1 parent 09308ca commit 43b77e8
Show file tree
Hide file tree
Showing 9 changed files with 33 additions and 102 deletions.
99 changes: 15 additions & 84 deletions figeno/bam.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,63 +297,6 @@ def read_read_groups(samfile,region):
else: add_read_pile(read,reads_unphased,margin)
return reads_HP1, reads_HP2, reads_unphased, reads_all

"""
def decode_read_basemod(read,base="C",mod="m"):
if "H" in read.cigarstring: return [],0
index_seq2ref_coord = {}
for x in read.get_aligned_pairs():
if x[1] is not None:
index_seq2ref_coord[x[0]] = x[1]
if (base, 1, mod) in read.modified_bases:
methyl = read.modified_bases[(base, 1, mod)]
strand=1
elif (base, 0, mod) in read.modified_bases:
methyl = read.modified_bases[(base, 0, mod)]
strand=0
else:
return [],0
l=[]
for pos,lik in methyl:
if pos in index_seq2ref_coord:
if lik<110: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,0))
else: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,1))
return l,strand
def decode_read_basemods2(read,basemods):
#basemods is a list of 1 or 2 tuples: (base,mod,color)
if "H" in read.cigarstring: return [],0
index_seq2ref_coord = {}
for x in read.get_aligned_pairs():
if x[1] is not None:
index_seq2ref_coord[x[0]] = x[1]
if (basemods[0][0], 1, basemods[0][1]) in read.modified_bases:
strand=1
elif (basemods[0][0], 0, basemods[0][1]) in read.modified_bases:
strand=0
else:
return []
basemods1 = read.modified_bases[(basemods[0][0],strand,basemods[0][1])]
d={}
for pos,lik in basemods1:
if pos in index_seq2ref_coord:
if lik<110: d[index_seq2ref_coord[pos]] = 0
else: d[index_seq2ref_coord[pos]] = basemods[0][2]
if len(basemods)>1:
basemods2 = read.modified_bases[(basemods[1][0],strand,basemods[1][1])]
for pos,lik in basemods2:
if pos in index_seq2ref_coord:
if lik<110 and (not index_seq2ref_coord[pos] in d): d[index_seq2ref_coord[pos]] = 0
elif lik >=110: d[index_seq2ref_coord[pos]] = basemods[1][2]
l=[]
for x in sorted(d.keys()):
l.append((x,x+1,d[x]))
return l
"""

def reverse_complement(seq):
l=""
Expand Down Expand Up @@ -388,7 +331,7 @@ def fix_hardclipped_read(read,samfile):



def decode_read_basemods(read,basemods,samfile,fix_hardclip_basemod=True):
def decode_read_basemods(read,basemods,samfile,fix_hardclip_basemod=True,warnings=[]):
"""basemods is a list of 1 or 2 tuples: (base,mod,color)"""
if fix_hardclip_basemod: read = fix_hardclipped_read(read,samfile)
if "H" in read.cigarstring: return []
Expand All @@ -410,13 +353,19 @@ def decode_read_basemods(read,basemods,samfile,fix_hardclip_basemod=True):
ML_index=0
seq=read.get_forward_sequence()
if seq is None: return []
basemods_in_bam=[]
basemods_missing=set()
for b,m,c in basemods: basemods_missing.add((b,m))
d={} # reference position to color of base modification
for MM in MMs:
MM = MM.split(",")
base,mod = (MM[0][0],MM[0][2]) # Assume MM[0] is: base+mod?
basemods_in_bam.append("("+base+","+mod+")")
color=None
for b,m,c in basemods:
if b==base and m==mod: color=c
if b==base and m==mod:
color=c
basemods_missing.remove((b,m))
if color is None: # Ignore this basemod.
ML_index+= len(MM)-1
continue
Expand All @@ -435,32 +384,14 @@ def decode_read_basemods(read,basemods,samfile,fix_hardclip_basemod=True):
elif not ref_index in d: d[ref_index] = 0
ML_index+=1
seq_index+=1

if len(basemods_missing)>0:
basemods_missing=["("+b+","+m+")" for b,m in basemods_missing]
warning_message="The following base modifications were not found in the bam file: "+", ".join(basemods_missing)+".\n"
warning_message+="Only the following base modifications were found in the bam file: "+", ".join(basemods_in_bam)+"."
if not warning_message in warnings:
warnings.append(warning_message)
l=[]
for x in sorted(d.keys()):
l.append((x,x+1,d[x]))
return l

"""
def decode_read_m_hm(read):
index_seq2ref_coord = {}
for x in read.get_aligned_pairs():
if x[1] is not None:
index_seq2ref_coord[x[0]] = x[1]
if ("C", 1, "m") in read.modified_bases:
strand=1
elif ("C", 0, "m") in read.modified_bases:
strand=0
else:
return [],0
l=[]
for i in range(len(read.modified_bases[("C",strand,"m")])):
pos = read.modified_bases[("C",strand,"m")][i][0]
lik_m = read.modified_bases[("C",strand,"m")][i][1]
lik_h = read.modified_bases[("C",strand,"h")][i][1]
if pos in index_seq2ref_coord:
if lik_m>120: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,1))
elif lik_h>120: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,2))
else: l.append((index_seq2ref_coord[pos],index_seq2ref_coord[pos]+1,0))
return l,strand
"""
2 changes: 1 addition & 1 deletion figeno/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from figeno.cli import gui, init,make

__version__ = "1.2.0"
__version__ = "1.2.1"

def main():
parser = ArgumentParser("figeno",formatter_class=ArgumentDefaultsHelpFormatter)
Expand Down
2 changes: 1 addition & 1 deletion figeno/gui/gui_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def run():
warnings=[]
figeno_make(data,warnings=warnings)
warning="\n\n".join(warnings)
if warning!="": print(warning)
#if warning!="": print(warning)
return jsonify({"status":"success","message":data["output"]["file"],"warning":warning})
except KnownException as e:
print(str(e))
Expand Down
2 changes: 1 addition & 1 deletion figeno/gui/gui_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def run():
warnings=[]
figeno_make(data,warnings=warnings)
warning="\n\n".join(warnings)
if warning!="": print(warning)
#if warning!="": print(warning)
return jsonify({"status":"success","message":data["output"]["file"],"warning":warning})
except KnownException as e:
print(str(e))
Expand Down
2 changes: 1 addition & 1 deletion figeno/gui/gui_webview.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def run():
warnings=[]
figeno_make(data,warnings=warnings)
warning="\n\n".join(warnings)
if warning!="": print(warning)
#if warning!="": print(warning)
return jsonify({"status":"success","message":data["output"]["file"],"warning":warning})
except KnownException as e:
print(str(e))
Expand Down
2 changes: 1 addition & 1 deletion figeno/gui/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "figeno",
"version": "1.2.0",
"version": "1.2.1",
"private": true,
"homepage": "./",
"dependencies": {
Expand Down
6 changes: 3 additions & 3 deletions figeno/track_alignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,15 @@ def draw(self, regions, box ,hmargin,warnings=[]):
#self.update_group_sizes(regions,box) # Compute group sizes so that the same borders can be used for all regions
boxes = split_box(box,regions,hmargin)
for i in range(len(regions)):
self.draw_region(regions[i][0],boxes[i],self.region_group_piles[i])
self.draw_region(regions[i][0],boxes[i],self.region_group_piles[i],warnings=warnings)
if self.link_splitreads:
self.draw_splitread_lines(box)
self.draw_title(box)

for x in self.kwargs:
warnings.append(x+" parameter was ignored in the alignments track because it is not one of the accepted parameters.")

def draw_region(self,region,box,group_piles):
def draw_region(self,region,box,group_piles,warnings=[]):
if abs(region.end-region.start)>1e6: print("WARNING: you are using an alignments track for a region larger than 1Mb. This might be very slow; the alignments track is intended for regions < 100kb.")
if self.bounding_box:
draw_bounding_box(box)
Expand Down Expand Up @@ -226,7 +226,7 @@ def convert_y(y):
if self.color_by=="basemod":
if (not "projection" in box) or box["projection"]!="polar": patches_methyl=[]
#methyl = decode_read_basemod(read,"C","m")[0]
basemods = decode_read_basemods(read,self.basemods,self.samfile,fix_hardclip_basemod=self.fix_hardclip_basemod)
basemods = decode_read_basemods(read,self.basemods,self.samfile,fix_hardclip_basemod=self.fix_hardclip_basemod,warnings=warnings)
#methyl = decode_read_m_hm(read)[0]
methyl_merged = merge_methylation_rectangles(basemods,int(region.end-region.start)/1000)
for rect_start,rect_end,color in methyl_merged:
Expand Down
18 changes: 9 additions & 9 deletions figeno/track_basemodfreq.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,22 @@ def __init__(self,style="lines",smooth=4,gap_frac=0.1,bams=[],bedmethyls=[],show
def draw(self, regions, box ,hmargin,warnings=[]):
boxes = split_box(box,regions,hmargin)
for i in range(len(regions)):
self.draw_region(regions[i][0],boxes[i])
self.draw_region(regions[i][0],boxes[i],warnings=warnings)
self.draw_title(box)
if self.is_empty:
warnings.append("No data was found in the displayed regions for the basemod_freq track.")

for x in self.kwargs:
warnings.append(x+" parameter was ignored in the basemod_freq track because it is not one of the accepted parameters.")

def draw_region(self,region,box):
def draw_region(self,region,box,warnings=[]):
margin_y = (box["top"]-box["bottom"]) * 0.03

if self.bounding_box:
draw_bounding_box(box)

for bam_params in self.bams:
self.draw_region_bam(region,box,**bam_params)
self.draw_region_bam(region,box,**bam_params,warnings=warnings)

for bedmethyl_params in self.bedmethyls:
self.draw_region_bedmethyl(region,box,**bedmethyl_params)
Expand All @@ -68,7 +68,7 @@ def draw_region(self,region,box):
box["ax"].add_patch(rect)
box["ax"].text(legend_x+legend_width*1.1,legend_y - hp*legend_height*3.0,"Haplotype "+str(hp+1),horizontalalignment="left",verticalalignment="center")

def draw_region_bam(self,region,box,file,base,mod,min_coverage=5,linewidth=3,opacity=1,fix_hardclip=False,split_by_haplotype=False,labels=[""],colors=["#27ae60"]):
def draw_region_bam(self,region,box,file,base,mod,min_coverage=5,linewidth=3,opacity=1,fix_hardclip=False,split_by_haplotype=False,labels=[""],colors=["#27ae60"],warnings=[]):
min_coverage=int(min_coverage)
linewidth=float(linewidth)
opacity=float(opacity)
Expand Down Expand Up @@ -97,11 +97,11 @@ def transform_coord(pos):
# Read methylation
reads_HP1,reads_HP2,reads_unphased,reads_all = read_read_groups(samfile,region)
if split_by_haplotype:
df_met1 = smooth_methylation(create_basemod_table_bam(reads_HP1,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip),w=self.smooth,start=region.start,end=region.end)
df_met2 = smooth_methylation(create_basemod_table_bam(reads_HP2,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip),w=self.smooth,start=region.start,end=region.end)
df_met1 = smooth_methylation(create_basemod_table_bam(reads_HP1,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip,warnings=warnings),w=self.smooth,start=region.start,end=region.end)
df_met2 = smooth_methylation(create_basemod_table_bam(reads_HP2,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip,warnings=warnings),w=self.smooth,start=region.start,end=region.end)
dfs = [df_met1,df_met2]
else:
dfs = [smooth_methylation(create_basemod_table_bam(reads_all,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip),w=self.smooth,start=region.start,end=region.end)]
dfs = [smooth_methylation(create_basemod_table_bam(reads_all,base,mod,region.chr,region.start-200,region.end+200,min_coverage=min(4,min_coverage),samfile=samfile,fix_hardclip=fix_hardclip,warnings=warnings),w=self.smooth,start=region.start,end=region.end)]

for j in range(len(dfs)):
df = dfs[j]
Expand Down Expand Up @@ -251,14 +251,14 @@ def smooth_methylation(df,w=2,start=None,end=None):
if end is not None: df = df.loc[df["pos"]<=end,:]
return df

def create_basemod_table_bam(reads,base,mod,chr,start,end,samfile=None,fix_hardclip=False,min_coverage=5):
def create_basemod_table_bam(reads,base,mod,chr,start,end,samfile=None,fix_hardclip=False,min_coverage=5,warnings=[]):
d={"chr":[],"pos":[],"metPercentage":[],"coverage":[]}

pos2methylated={}
pos2unmethylated={}
for x in reads:
for read in x:
methyl = decode_read_basemods(read,[(base,mod,1)],samfile,fix_hardclip)
methyl = decode_read_basemods(read,[(base,mod,1)],samfile,fix_hardclip,warnings=warnings)
for pos,end2,state in methyl:
if base=="C" and (read.flag&16)!=0:
pos-=1 # if reverse strand, consider the position of the C in the CpG in the forward orientation.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ packages = ["figeno", "figeno.data", "figeno.cli", "figeno.gui"]

[project]
name = 'figeno'
version = "1.2.0"
version = "1.2.1"
description = 'Package for generating genomics figures.'
readme = 'README.md'
authors = [
Expand Down

0 comments on commit 43b77e8

Please sign in to comment.