Skip to content

Commit

Permalink
Add scripts for pre-processing data
Browse files Browse the repository at this point in the history
  • Loading branch information
morispi committed May 10, 2021
1 parent 2069177 commit bed6112
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 0 deletions.
Binary file added utils/.preprocessTELL-Seq.py.swp
Binary file not shown.
16 changes: 16 additions & 0 deletions utils/preprocessBAMstLFR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pysam
import sys
import os

bamFile = pysam.AlignmentFile(sys.argv[1], "rb");
outFile = pysam.AlignmentFile(os.path.splitext(sys.argv[1])[0] + "_barcodes_extracted" + os.path.splitext(sys.argv[1])[1], "wb", template=bamFile)

iter = bamFile.fetch()
for al in iter:
t = al.query_name.split("#")
al.query_name = t[0]
al.set_tag("BX", t[1])
outFile.write(al)

bamFile.close()
outFile.close()
27 changes: 27 additions & 0 deletions utils/preprocessFASTQstLFR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import sys
import re
import csv
import os

r = open(sys.argv[1]);
o = open(os.path.splitext(sys.argv[1])[0] + "_barcodes_in_headers" + os.path.splitext(sys.argv[1])[1], "w");

header = r.readline()[:-1];
while header != '':
line = r.readline();
data = line;
line = r.readline();
data += line;
line = r.readline();
data += line;

t = header.split("#");
tt = t[1].split("/");
barcode = tt[0]
o.write(t[0] + "/" + tt[1].split("\t")[0].split(" ")[0] + "\tBX:Z:" + barcode + "\n");
o.write(data);

header = r.readline()[:-1]

r.close();
o.close();
48 changes: 48 additions & 0 deletions utils/preprocessTELL-Seq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import sys
import re
import csv
import os

r1 = open(sys.argv[1]);
r2 = open(sys.argv[2]);
i1 = open(sys.argv[3]);

o1 = open(os.path.splitext(sys.argv[1])[0] + "_barcodes_in_headers" + os.path.splitext(sys.argv[1])[1], "w");
o2 = open(os.path.splitext(sys.argv[2])[0] + "_barcodes_in_headers" + os.path.splitext(sys.argv[2])[1], "w");

header1 = r1.readline()[:-1];
while header1 != '':
line = r1.readline();
data1 = line;
line = r1.readline();
data1 += line;
line = r1.readline();
data1 += line;

header2 = r2.readline()[:-1];
line = r2.readline();
data2 = line;
line = r2.readline();
data2 += line;
line = r2.readline();
data2 += line;

header3 = i1.readline();
barcode = i1.readline();
i1.readline();
i1.readline();

h = header1.split(" ")[0].split("\t")[0];
o1.write(h + "\tBX:Z:" + barcode);
o1.write(data1);
h = header2.split(" ")[0].split("\t")[0];
o2.write(h + "\tBX:Z:" + barcode);
o2.write(data2);

header1 = r1.readline()[:-1]

r1.close();
r2.close();
i1.close();
o1.close();
o2.close();

0 comments on commit bed6112

Please sign in to comment.