-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathpartition.py
82 lines (60 loc) · 2.37 KB
/
partition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
import sys
import os
from itertools import starmap, chain, imap
from operator import itemgetter
from seqio import iteratorFromExtension, recordToString, fastaRecordToString, seqlen
from nucio import fileIterator, openerFromExtension
from args import parseArgs, getHelpStr, argflag, CLArgument
from misc import defdef
description = ("Usage: partition.py [-options] "
"<reads_per_file (int)> <files_per_dir (int)> <input.{fa,fq}> [input2.{fa,fq} ...]")
argument_list = [["sameformat", "samefmt", argflag, False,
("Output files will be in the same format "
"as the input files. By default they are converted "
"to fasta.")],
["minlen", "minlen", int, 1,
("Only output reads that are greater than or equal to 'minlen' "
"Default: 1")]]
arguments = map(CLArgument._make, argument_list)
if not len(sys.argv) > 1:
sys.exit(getHelpStr(description,arguments) + "\n")
(p_arg_map, args_remaining) = parseArgs(sys.argv[1:], arguments)
if not len(args_remaining) >= 3:
sys.exit(getHelpStr(description,arguments) + "\n")
def pstr(num):
return "%04d" % num
(rpf,fpd) = map(int,args_remaining[:2])
in_files = args_remaining[2:]
openers = map(openerFromExtension,in_files)
iterators = map(iteratorFromExtension, imap(itemgetter(1), openers))
openfuncs = map(defdef(open), imap(itemgetter(0),openers))
input_data = chain.from_iterable(starmap(fileIterator,
zip(in_files, iterators, openfuncs)))
total_reads = 0
dnum = 0
fnum = 0
fh = None
readidx_fh = open("ReadIndex.txt", "w")
recordString = recordToString if p_arg_map["samefmt"] else fastaRecordToString
for record in input_data:
if seqlen(record) < p_arg_map["minlen"]:
continue
if total_reads % rpf == 0:
if total_reads % (rpf * fpd) == 0:
dnum += 1
fnum = 0
os.mkdir(pstr(dnum))
fnum += 1
if fh:
fh.close()
current_file ="%s/p%s" % (pstr(dnum),pstr(fnum))
fh = open(current_file, "w")
clean_name = str(record.name).split()[0]
clean_record = record._replace(name=clean_name)
readidx_fh.write(clean_name +"\t" + current_file + "\n")
fh.write(recordString(clean_record))
fh.write("\n")
total_reads += 1
readidx_fh.close()
fh.close()