-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrim_to_exact_level.py
49 lines (43 loc) · 2.11 KB
/
trim_to_exact_level.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python
'''
This script will take a file that has gone through qiimeToMaaslin.py
and will trim it to exactly a certain number tax levels
Inputs:
- Abundance table with levels delineated by a consistent marker
-- SampleNumber 1 2 3 4
Metadata yes no yes yes
k_Bacteria|f_ 1 1 1 1
- Marker of taxa level division (default = "|")
- Row number (starting at zero) that contains the last metadata row
- Output filpath
- Taxa level requested
- Indicator of whether to keep full string or just leaf
'''
import argparse, csv
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-i','--input-fp',help='qiimeToMaaslin.py output file', type=str, required=True)
parser.add_argument('-o','--output-fp',help='Location for file to be written to', type=str, required=True)
parser.add_argument('-s','--separator',help='Character/string that marks separation of taxa levels', type=str, default="|")
parser.add_argument('-r','--row',help='Row index (starting at zero) that marks the last metadata row', type=int, required=True)
parser.add_argument('-t','--taxa-level',help='Number of level that you would like to retain (starting from one)', type=int, required=True)
parser.add_argument('-l','--leaves-only',help='Output only leaves', action='store_true', default=False)
args = parser.parse_args()
with open(args.input_fp, 'rU') as input_file, open(args.output_fp, 'wb') as output_file:
input_reader = csv.reader(input_file, delimiter='\t')
output_writer = csv.writer(output_file, delimiter='\t')
for i in range(args.row):
output_writer.writerow(input_reader.next())
if args.leaves_only:
for remaining_row in input_reader:
this_tax = remaining_row[0].split(args.separator)
if len(this_tax) == args.taxa_level:
this_name = this_tax[args.taxa_level -1]
output_writer.writerow([this_name] + remaining_row[1:])
else:
for remaining_row in input_reader:
this_tax = remaining_row[0].split(args.separator)
if len(this_tax) == args.taxa_level:
output_writer.writerow(remaining_row)
if __name__ == '__main__':
main()