-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrRNA_Split_Jobs.py
68 lines (55 loc) · 4.19 KB
/
rRNA_Split_Jobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python
import sys
import os
import os.path
import shutil
import subprocess
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
Input_File = sys.argv[1]
Input_Path = os.path.dirname(Input_File)
Input_FName = os.path.basename(Input_File)
JobID1 = sys.argv[2]
Python = "/home/j/jparkins/mobolaji/python"
Filter_rRNA = "/home/j/jparkins/mobolaji/Metatranscriptome_Scripts/Mobolaji/rRNA_Filter.py"
PBS_Submit_LowMem = """#!/bin/bash
#PBS -l nodes=1:ppn=8,walltime=6:00:00
#PBS -N NAME
module load gcc intel/15.0.2 openmpi java blast extras python
cd $PBS_O_WORKDIR
export OMP_NUM_THREADS=8
OLDPATH=$PATH:/home/j/jparkins/ctorma/emboss/bin/:/home/j/jparkins/mobolaji/Tools/Barrnap/bin/:/home/j/jparkins/mobolaji/Tools/HMMer/hmmer-3.1b2-linux-intel-x86_64/binaries/:/home/j/jparkins/mobolaji/Tools/Python27/Python-2.7.12/:/home/j/jparkins/mobolaji/Tools/Bowtie2/bowtie2-2.3.0/:/home/j/jparkins/mobolaji/Tools/SAMTOOLS/samtools-1.3.1/
NEWPATH=/home/j/jparkins/mobolaji/:$OLDPATH
export PATH=$NEWPATH
COMMANDS"""
Preprocess_jobs = []
for split in sorted(os.listdir(os.path.join(Input_Path, os.path.splitext(Input_FName)[0] + "_unpaired_n_contaminants"))):
if split.endswith(".fastq"):
Split_File = os.path.splitext(os.path.join(Input_Path, os.path.splitext(Input_FName)[0] + "_unpaired_n_contaminants", split))[0]
COMMANDSx = [Python + " " + Filter_rRNA + " " + Split_File + ".fastq" + " " + Split_File + "_mRNA.fastq" + " " + Split_File + "_rRNA.fastq"]
with open(os.path.join(Input_Path, os.path.splitext(Input_FName)[0] + "_unpaired_n_contaminants", os.path.splitext(split)[0] + "_rRNA_Filter.pbs"), "w") as PBS_script_out:
for line in PBS_Submit_LowMem.splitlines():
if "NAME" in line:
line = line.replace("NAME", os.path.splitext(split)[0] + "_rRNA_Filter")
if "COMMANDS" in line:
PBS_script_out.write("\n".join(COMMANDSx))
break
PBS_script_out.write(line + "\n")
JobIDx = subprocess.check_output("ssh gpc01 " + "\"" + "cd " + os.path.dirname(Split_File) + ";" + "qsub" + " " + os.path.join(Input_Path, os.path.splitext(Input_FName)[0] + "_unpaired_n_contaminants", os.path.splitext(split)[0] + "_rRNA_Filter.pbs") + "\"", shell=True)
Preprocess_jobs.append(JobIDx.strip("\n"))
for split in sorted(os.listdir(os.path.join(Input_Path, os.path.splitext(Input_FName)[0] + "_paired_n_contaminants"))):
if split.split("_paired_n_contaminants_split_")[0].endswith("1"):
Split_File1 = os.path.join(Input_Path, os.path.splitext(Input_FName)[0] + "_paired_n_contaminants", split.split("_paired_n_contaminants_split_")[0][:-1] + "1" + "_paired_n_contaminants_split_" + split.split("_paired_n_contaminants_split_")[1])
Split_File2 = os.path.join(Input_Path, os.path.splitext(Input_FName)[0] + "_paired_n_contaminants", split.split("_paired_n_contaminants_split_")[0][:-1] + "2" + "_paired_n_contaminants_split_" + split.split("_paired_n_contaminants_split_")[1])
COMMANDSy = [Python + " " + Filter_rRNA + " " + Split_File1 + " " + os.path.splitext(Split_File1)[0] + "_mRNA.fastq" + " " + os.path.splitext(Split_File1)[0] + "_rRNA.fastq" + " " + Split_File2 + " " + os.path.splitext(Split_File2)[0] + "_mRNA.fastq" + " " + os.path.splitext(Split_File2)[0] + "_rRNA.fastq"]
with open(os.path.join(Input_Path, os.path.splitext(Input_FName)[0] + "_paired_n_contaminants", os.path.splitext(split)[0] + "_rRNA_Filter.pbs"), "w") as PBS_script_out:
for line in PBS_Submit_LowMem.splitlines():
if "NAME" in line:
line = line.replace("NAME", os.path.splitext(split)[0] + "_rRNA_Filter")
if "COMMANDS" in line:
PBS_script_out.write("\n".join(COMMANDSy))
break
PBS_script_out.write(line + "\n")
JobIDy = subprocess.check_output("ssh gpc01 " + "\"" + "cd " + os.path.dirname(Split_File1) + ";" + "qsub" + " " + os.path.join(Input_Path, os.path.splitext(Input_FName)[0] + "_paired_n_contaminants", os.path.splitext(split)[0] + "_rRNA_Filter.pbs") + "\"", shell=True)
Preprocess_jobs.append(JobIDy.strip("\n"))
print ":".join(Preprocess_jobs[-10:])