forked from smirarab/pasta
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_seqtools.py
executable file
·63 lines (51 loc) · 2.73 KB
/
run_seqtools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#! /usr/bin/env python
'''
Created on Aug 8, 2013
@author: smirarab
'''
import sys
from pasta.alignment import CompactAlignment
import argparse
from copy import copy
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Manipulate Alignments')
parser.add_argument('-infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin,
help='name of the input file (default: standard input)')
parser.add_argument('-outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
help='name of the output file (default: standard output)')
parser.add_argument('-informat', nargs='?', default="FASTA", choices = ["COMPACT3", "FASTA"],
help='format of the input file (default: FASTA)')
parser.add_argument('-outformat', nargs='?', default="FASTA", choices = ["COMPACT3", "FASTA", "PHYLIP"],
help='format of the output file (default: FASTA)')
parser.add_argument('-masksites', metavar='N', type=int,
help='sites with less than N non-gap characters will be masked out')
parser.add_argument('-filterfragments', metavar='N', type=int,
help='sequences with less than N non-gap sequences will be removed')
parser.add_argument('-rename', metavar='MappingFile', type=argparse.FileType('r'),
help='Rename sequences, according to the mapping file generated by PASTA')
# parser.add_argument('actions', nargs='*',
# help='a list of actions (separated by space). Supported actions are: mask (see maskmin), rename (see namemap)')
args = parser.parse_args()
alg = CompactAlignment()
alg.read_file_object(args.infile,args.informat)
# if args.actions:
# print >>sys.stderr, "will perform the following actions: %s" %",".join(args.actions)
# else:
# print >>sys.stderr, "No actions will be taken. Input in %s will be written to output in %s." %(args.informat,args.outformat)
if args.masksites:
alg.mask_gapy_sites(args.masksites)
if args.filterfragments:
rem = []
for k, v in alg.iteritems():
if len(v.seq) < args.filterfragments:
rem.append(k)
for k in rem:
alg.pop(k, None)
if args.rename:
lines=filter(lambda x: x!="\n", args.rename.readlines())
namemap=dict(zip((x.strip() for i,x in enumerate(lines) if i%2 == 0),
(x.strip() for i,x in enumerate(lines) if i%2 == 1)))
names = copy(alg.keys())
for k in names:
alg[namemap[k]] = alg.pop(k)
alg.write(args.outfile, args.outformat)