-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExtractClusters.py
55 lines (39 loc) · 1.08 KB
/
ExtractClusters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import sys
inputFileName = sys.argv[1]
outputFolderName = sys.argv[2]
if not os.path.isdir(outputFolderName):
os.makedirs(outputFolderName)
f = open(inputFileName, 'rt')
counter = -1
sequences = {}
line = f.readline()
while line and not line == "<seq>\n":
line = f.readline()
line = f.readline()
while line and not line == "</seq>\n":
if line[0]==">":
counter += 1
sequences[counter] = line
elif len(line) > 0:
sequences[counter] += line
line = f.readline()
f.close()
f = open(inputFileName, 'rt')
line = f.readline()
while line and not line == "<seqgroups>\n":
line = f.readline()
line = f.readline()
while line and not line == "</seqgroups>\n":
splitLine = line.split("=")
if splitLine[0] == "name":
outFile = os.path.join(outputFolderName, splitLine[1].replace(" ", "").strip() + ".fasta")
if splitLine[0] == "numbers":
clusterFile = open(outFile, 'w')
seqIDNumbers = splitLine[1].strip().strip(";").split(";")
for seqID in seqIDNumbers:
index = int(seqID)
clusterFile.write(sequences[index]);
clusterFile.close()
line = f.readline()
f.close()