-
Notifications
You must be signed in to change notification settings - Fork 0
/
rmdup_seqs.py
57 lines (46 loc) · 1.39 KB
/
rmdup_seqs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
import argparse
__author__ = 'Connor Morgan-Lang'
def set_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--fasta",
help="The fasta file to be subsetted.",
required=True)
parser.add_argument("-o", "--output",
help="The output fasta file [DEFAULT = duprm.fasta]",
required=False,
default="duprm.fasta")
args = parser.parse_args()
return args
def read_fasta(fasta):
seqs = dict()
header = ""
sequence = ""
with open(fasta) as fas:
line = fas.readline()
while line:
if line[0] == '>':
if header:
seqs[header] = sequence
header = line.strip()
sequence = ""
else:
sequence += line.strip()
line = fas.readline()
seqs[header] = sequence
return seqs
def write_unique(seqs, output):
fasta_out = open(output, 'w')
written = list()
for header in seqs:
if header not in written:
written.append(header)
fasta_out.write(header + "\n")
fasta_out.write(seqs[header] + "\n")
fasta_out.close()
return
def main():
args = set_arguments()
seqs = read_fasta(args.fasta)
write_unique(seqs, args.output)
main()