Blastx relevant scripts uploaded

mitchgill16 · Jan 23, 2022 · de6f6fd · de6f6fd
1 parent 535c0e3
commit de6f6fd
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 0 deletions.
diff --git a/Scripts/blastx_pipeline.sh b/Scripts/blastx_pipeline.sh
@@ -0,0 +1,12 @@
+#Arg1 = cds of a crop to make the database for blast
+#Arg2 = original 'lncrna' fastas to be filtered
+cds=$1
+query=$2
+makeblastdb -dbtype nucl -in $cds
+blastn -query $query -db $1 -outfmt 6 -evalue 1e-10 > blast_results.txt
+python extract_blast.py blast_results.txt
+uniq blast_results.txt_to_filter.txt > transcripts_to_filter.txt      
+awk 'BEGIN{while((getline<"transcripts_to_filter.txt")>0)l[">"$1]=1}/^>/{f=!l[$1]}f' $2 > transcripts_filtered.fa
+wc -l $2
+wc -l transcripts_filtered.fa
+wc -l transcripts_to_filter.txt
diff --git a/Scripts/extract_blast.py b/Scripts/extract_blast.py
@@ -0,0 +1,19 @@
+import pandas as pd
+import sys
+my_file = sys.argv[1]
+print(my_file)
+col_heads = ["qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore"]
+df = pd.read_csv(my_file, sep = "\t", header=None)
+df.columns = col_heads
+outfile = my_file+"_to_filter.txt"
+f = open(outfile, "w")
+for index, row in df.iterrows():
+   length = row["length"]
+   cell = row["qseqid"]
+   cell = cell.split(':')
+   cell = cell[3].split('-')
+   l = int(cell[1]) - int(cell[0])
+   l = l*0.9
+   if(length >= l):
+       f.write(row["qseqid"] + "\n")
+f.close()