-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLcount.py
90 lines (76 loc) · 3.06 KB
/
Lcount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import pandas as pd
from ast import literal_eval
import numpy as np
from collections import defaultdict
class merger():
def __init__(self):
self.genes = {} ## for tRNA count in all samples
self.muts = {} # for tRNA mutations
self.counts=defaultdict(dict)
def add_gene(self, gene, pos):
if not (gene in self.genes.keys() ):
self.genes[gene] = []
if not pos in self.genes[gene]:
self.genes[gene].append(pos)
def add_muts(self, gene, sample, pos, cnt, totalcnt):
if not (sample in self.muts.keys() ):
self.muts[sample] = {}
if not (gene in self.muts[sample]):
self.muts[sample][gene]={"Count": 0, "Pos":{}}
ttcnt = totalcnt
pos = pos.strip()
self.muts[sample][gene]["Count"] = ttcnt
self.muts[sample][gene]["Pos"][pos] = cnt
def make_mutation_tables(self,file):
with open (file, "r") as myfile:
dat = myfile.readlines()
current_gene = ""
cnt,totalcnt = 0,0
for i in range(0,len(dat)):
data = dat[i].strip().split('\t')
pos = data[0]
dicti = data[1]
if(data[0].startswith('tRNA') or data[0].startswith('m')):
current_gene = data[0].strip()
totalcnt = data[1]
else:
cnt = literal_eval(dicti)["count"]
self.add_gene(current_gene,pos)
self.add_muts(current_gene, file, pos, cnt, totalcnt)
def make_H_file(self):
positions = []
for tRNA , poses in self.genes.items():
for p in sorted(poses):
n = tRNA.strip()+".p"+p
positions.append(n.rstrip())
for i,j in self.muts.items():
samp = i[:-8]
for g in sorted(positions):
t = g.split(".")[0]
p = g.split(".")[1].strip()[1:]
if t in list(j.keys()):
for tR, v in j.items():
if tR == t :
if p in list(v["Pos"].keys()):
self.counts[samp+".rDA"][g] = str(int(v["Count"]) - int(v["Pos"][p]))
self.counts[samp+".mDA"][g] = (v["Pos"][p])
else:
self.counts[samp+'.rDA'][g] = "0"
self.counts[samp+'.mDA'][g] = "0"
else:
self.counts[samp+'.rDA'][g] = "0"
self.counts[samp+'.mDA'][g] = "0"
def export_mutations_table(self, outfile="countfile.txt"):
count_df = pd.DataFrame.from_dict(self.counts)
count_df.to_csv(outfile,sep="\t", header=True, index=True)
def main():
my_merger = merger()
files = os.listdir()
cntfiles = sorted(filter(lambda x: x[-8:] == ".tRNAcnt", files))
for f in cntfiles:
my_merger.make_mutation_tables(f)
my_merger.make_H_file()
my_merger.export_mutations_table()
if __name__ == "__main__":
main()