-
Notifications
You must be signed in to change notification settings - Fork 0
/
GPCR_Project_func.py
100 lines (83 loc) · 3.32 KB
/
GPCR_Project_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
def process_SMAP(SMAP_output):
''' Returns a dictionary with name of the proteins and their alignments '''
import re
re_dict = {
'templ_prot_re' : re.compile('Template=.+_'),
'query_prot_re' : re.compile('>Query Chain:.+_'),
'templ_prot_aln_re' : re.compile('Query:.+'),
'query_prot_aln_re' : re.compile('Template:.+'),
}
SMAP_dict = {}
with open(SMAP_output, 'r') as output:
for line in output:
for regex in re_dict:
if re_dict[regex].match(line):
if regex == 'templ_prot_re':
templ_prot = re_dict[regex].findall(line)[0][-5:-1]
SMAP_dict[templ_prot] = None
elif regex == 'query_prot_re':
query_prot = re_dict[regex].findall(line)[0][-5:-1]
SMAP_dict[query_prot] = None
elif regex == 'templ_prot_aln_re':
templ_prot_aln = re_dict[regex].findall(line)[0][10:]
SMAP_dict[templ_prot] = templ_prot_aln.split(' ')
elif regex == 'query_prot_aln_re':
query_prot_aln = re_dict[regex].findall(line)[0][10:]
SMAP_dict[query_prot] = query_prot_aln.split(' ')
return SMAP_dict
def records_to_list(filename):
from Bio import SeqIO
records = []
with open(filename, 'rU') as handle:
for record in SeqIO.parse(handle, 'fasta'):
records.append(record)
return records
def write_data_to_csv(data, filename):
import csv
with open(filename, 'ab') as output:
csv_obj = csv.writer(output, delimiter=',')
csv_obj.writerows(data)
def read_csv(filename):
import csv
csvfile = open(filename, 'rb')
csv_obj = csv.reader(csvfile, delimiter=',')
return csv_obj
def dict_to_csv(dictfile, csv_name):
from pandas import DataFrame
seq_index_df = DataFrame.from_dict(dictfile)
#print seq_index_df
seq_index_df.to_csv(csv_name)
def clean_records(records_list):
pass
def aln_merger(seq_1, seq_2):
''' Given two alignments for the same sequence, merges the two alignments
and returns a single alignment '''
from Bio.Seq import MutableSeq
mut_seq_1 = seq_1.tomutable()
mut_seq_2 = seq_2.tomutable()
for i_1, char_1 in enumerate(mut_seq_1):
for i_2, char_2 in enumerate(mut_seq_2):
if i_1 == i_2:
if char_1 != char_2:
if char_1 == '-':
mut_seq_2.insert(i_2, '-')
elif char_2 == '-':
mut_seq_1.insert(i_1, '-')
if mut_seq_1 == mut_seq_2:
return mut_seq_1.toseq()
elif len(mut_seq_1) > len(mut_seq_2):
for num in range(len(mut_seq_1) - len(mut_seq_2)):
mut_seq_1.pop()
return mut_seq_2.toseq()
elif len(mut_seq_2) > len(mut_seq_1):
for num in range(len(mut_seq_2) - len(mut_seq_1)):
mut_seq_2.pop()
return mut_seq_2.toseq()
#seq1 = Seq('-AAA-G--F--')
#seq2 = Seq('ACAG-F-')
#
#merge_seq = aln_merger(seq1, seq2)
#print merge_seq