-
Notifications
You must be signed in to change notification settings - Fork 3
/
collapse_expr_from_binary_csv.py
44 lines (37 loc) · 1.39 KB
/
collapse_expr_from_binary_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import argparse
import os
lines_per_file = 20000
out_dir = 'exprTable'
parser = argparse.ArgumentParser()
parser.add_argument('csv_path')
args = parser.parse_args()
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
idx = 0
out_lines = []
def transform_line(line, spacer=''):
'''The file has 3 PCA columns added to end of each line; move these to be
before the gene expression values and collapse the gene expression patterns
into a single column.
'''
return ','.join(line[:8] + line[-2:] + [spacer.join(line[8:-2])])
with open(args.csv_path) as csv_in:
header = csv_in.readline().strip().split(',')
header = transform_line(header, spacer=' ')
for line in csv_in:
line = line.strip().split(',')
out_lines.append(transform_line(line))
if len(out_lines) == 20000:
out_file = os.path.splitext(args.csv_path)[0] + '.expstr.{!s}.csv'.format(idx)
with open(os.path.join(out_dir, out_file), 'w') as out:
out.write(header + '\n')
out.write('\n'.join(out_lines) + '\n')
out_lines = []
idx += 1
else:
out_file = os.path.splitext(args.csv_path)[0] + '.expstr.{!s}.csv'.format(idx)
with open(os.path.join(out_dir, out_file), 'w') as out:
out.write(header + '\n')
out.write(''.join(out_lines) + '\n')
out_lines = []
idx += 1