-
Notifications
You must be signed in to change notification settings - Fork 0
/
Prob_agreement_by_chance.py
92 lines (74 loc) · 2.22 KB
/
Prob_agreement_by_chance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import sys, os
flank_match = "51M"
allowed_dis = 20
outs_path = "/home/qmtran/Indel_Analysis/Data_reads22_29/outs50/out"
fq_path = "/home/qmtran/Indel_Analysis/Data_reads22_29/reads50/"
sam_path = "/home/qmtran/Indel_Analysis/Data_reads22_29/reads50/alignment/"
fq_list_fn = sys.argv[1]
if __name__ == '__main__':
print ("###########################################################")
print ("Prob. of agreement by chance ")
print ("###########################################################")
#alist = ["Bowtie2."]
#alist = ["SHRiMP."]
#alist = ["Razers3."]
#alist = ["Cushaw2."]
#alist = ["Bwasw."]
#alist = ["Bwamem."]
#alist = ["Bwa."]
#alist = ["Gassst."]
#alist = ["Smalt."]
alist = ["Bowtie2.", "SHRiMP.", "Razers3.", "Cushaw2.", "Bwasw.", "Bwamem.", "Bwa.", "Gassst.", "Smalt."]
for f in open(fq_list_fn):
total = sum(1 for line in open(fq_path + f.strip()))
#print(f.strip())
c = 0
t = -1
m = {}
for outline in open(outs_path+f.strip()[:-3]+".txt"):
c+=1
if c%3 == 1 :
osl = outline.split()
#print(osl)
#print(osl[3])
t = int(osl[3])
if c%3 == 0 :
#print(outline)
m[outline.strip()] = t
#print(m)
#chr_agreed = 0
total_mapped = 0
total_r_oa = 0
for aligner in alist:
#print(aligner)
agreed = 0
mapped = 0
r_oa = 0
for line in open(sam_path + f[:-3] + aligner + "sam"):
if line[0] == "@":
pass
else:
sl = line.split()
#print(f)
#print(sl)
#print(int(sl[0][sl[0].find("##")+2:])-int(flank_match[:-1])) #true pos
#print(sl[3]) #estimate
if (sl[3] == ''):
continue
if (abs(int(sl[0][sl[0].find("##")+2:])-int(flank_match[:-1])-int(sl[3])) < allowed_dis):
mapped += 1
if sl[9] in m:
#print(m[sl[9]])
r_oa += 1.0/m[sl[9]]
total_r_oa += r_oa
if (sl[5] == '*'):
continue
if(sl[5].find('M') > 2):
continue
if (abs(int(sl[0][sl[0].find("##")+2:])-int(sl[3])) == int(sl[5][:(sl[5].find('M'))])-1):
agreed += 1
total_mapped += mapped
#chr_agreed += agreed
print(str(mapped)+"\t"+str(r_oa))
#print(str(total/4)+"\t"+str(total_mapped)+"\t"+str(r_oa/total_mapped))
print ("###########################################################")