Skip to content

Commit

Permalink
Blastx relevant scripts uploaded
Browse files Browse the repository at this point in the history
  • Loading branch information
mitchgill16 authored Jan 23, 2022
1 parent 535c0e3 commit de6f6fd
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 0 deletions.
12 changes: 12 additions & 0 deletions Scripts/blastx_pipeline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#Arg1 = cds of a crop to make the database for blast
#Arg2 = original 'lncrna' fastas to be filtered
cds=$1
query=$2
makeblastdb -dbtype nucl -in $cds
blastn -query $query -db $1 -outfmt 6 -evalue 1e-10 > blast_results.txt
python extract_blast.py blast_results.txt
uniq blast_results.txt_to_filter.txt > transcripts_to_filter.txt
awk 'BEGIN{while((getline<"transcripts_to_filter.txt")>0)l[">"$1]=1}/^>/{f=!l[$1]}f' $2 > transcripts_filtered.fa
wc -l $2
wc -l transcripts_filtered.fa
wc -l transcripts_to_filter.txt
19 changes: 19 additions & 0 deletions Scripts/extract_blast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pandas as pd
import sys
my_file = sys.argv[1]
print(my_file)
col_heads = ["qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore"]
df = pd.read_csv(my_file, sep = "\t", header=None)
df.columns = col_heads
outfile = my_file+"_to_filter.txt"
f = open(outfile, "w")
for index, row in df.iterrows():
length = row["length"]
cell = row["qseqid"]
cell = cell.split(':')
cell = cell[3].split('-')
l = int(cell[1]) - int(cell[0])
l = l*0.9
if(length >= l):
f.write(row["qseqid"] + "\n")
f.close()

0 comments on commit de6f6fd

Please sign in to comment.