-
Notifications
You must be signed in to change notification settings - Fork 9
/
run_comb_annot.sh
executable file
·63 lines (53 loc) · 3.28 KB
/
run_comb_annot.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/bin/sh
comb_annot=$1
BLAST=$2
out_name=$3
maker_dir=$4
out_dir=$5
snap_dir=$6
SCRIPTS=$7
. $SCRIPTS/configs.cf
cd $comb_annot
rm -rf $comb_annot/ref.*
ln -s $PROTEIN1 $comb_annot/ref1_protein.fasta
$BLAST/makeblastdb -in ref1_protein.fasta -dbtype prot -parse_seqids -out ref
$SCRIPTS/combined_annot.sh $out_name $comb_annot $maker_dir/seq.fasta $out_dir/annot $maker_dir $snap_dir $SCRIPTS SGD 90 $comb_annot # results in $comb_annot/gff/$out_name.genes.gff
rm -rf $comb_annot/ref.*
ln -s $PROTEIN2 $comb_annot/ref_protein.fasta
ln -s $EST2 $comb_annot/ref_est.fasta
ln -s $REPEAT_PROTEIN $comb_annot/te_protein.fasta
ln -s $CFG_DIR/maker_opts.ctl $comb_annot/maker_opts.ctl
ln -s $CFG_DIR/maker_bopts.ctl $comb_annot/maker_bopts.ctl
ln -s $CFG_DIR/maker_exe.ctl $comb_annot/maker_exe.ctl
$BLAST/makeblastdb -in ref_protein.fasta -dbtype prot -parse_seqids -out ref
$SCRIPTS/unannot_regions.sh $maker_dir/seq.fasta $comb_annot/gff/$out_name.genes.gff $comb_annot $SCRIPTS # resutls in $comb_annot/non_orf.fasta
rm $comb_annot/seq.fasta
ln -s $comb_annot/non_orf.fasta $comb_annot/seq.fasta
$SCRIPTS/maker.sh $comb_annot $snap_dir $SCRIPTS # results in $comb_annot/genes.gff
less $comb_annot/non_orf.fasta | grep ">" > $comb_annot/head.txt
mv $comb_annot/genes.gff $comb_annot/add1.genes.gff
$BIN/conv_scf_pos $comb_annot/head.txt $comb_annot/add1.genes.gff > $comb_annot/genes.gff
#$BIN/conv_scf_head /home/sj/Desktop/GCA_000766165.2_ASM76616v2_genomic.fna > $comb_annot/$out_name.scf.fasta
echo "#" > $comb_annot/$out_name.codex
mkdir -p $comb_annot/more_annot
$SCRIPTS/combined_annot.sh $out_name $comb_annot/more_annot $maker_dir/seq.fasta $comb_annot $comb_annot $snap_dir $SCRIPTS ENSEMBL 80 $comb_annot # results in $comb_annot/more_annot/gff/$out_name.genes.gff
less $comb_annot/more_annot/gff/$out_name.genes.gff >> $comb_annot/gff/$out_name.genes.gff
less $comb_annot/more_annot/blast_out/$out_name.blastx.out >> $comb_annot/blast_out/$out_name.blastx.out
rm -rf $comb_annot/more_annot
rm -rf $comb_annot/seq.fasta
rm -rf $comb_annot/non_orf.fasta
rm -rf $comb_annot/genes.gff
$SCRIPTS/unannot_regions.sh $maker_dir/seq.fasta $comb_annot/gff/$out_name.genes.gff $comb_annot $SCRIPTS # resutls in $comb_annot/non_orf.fasta
$AUGUSTUS/augustus --gff3=on --species=$AUGUSTUS_REF non_orf.fasta > $comb_annot/genes.gff
less $comb_annot/genes.gff | sed '/^#/d' | sed '/^$/d' | grep -P "gene|CDS" > $comb_annot/temp.gff
mv $comb_annot/temp.gff $comb_annot/add2.genes.gff
#$GeneMark --format=GFF --imod $genemark_mod $comb_annot/non_orf.fasta
#less $comb_annot/non_orf.fasta.gff | sed '/^#/ d' | sed '/^$/d' | awk '{print $1" maker gene "$5" "$6" . "$8" . UNDEF"}' > $comb_annot/genes.gff
less $comb_annot/non_orf.fasta | grep ">" > $comb_annot/head.txt
$BIN/conv_scf_pos $comb_annot/head.txt $comb_annot/add2.genes.gff > $comb_annot/genes.gff
echo "#" > $comb_annot/$out_name.codex
mkdir -p $comb_annot/more_annot
$SCRIPTS/combined_annot.sh $out_name $comb_annot/more_annot $maker_dir/seq.fasta $comb_annot $comb_annot $snap_dir $SCRIPTS ENSEMBL 80 $comb_annot # results in $comb_annot/more_annot/gff/$out_name.genes.gff
less $comb_annot/more_annot/gff/$out_name.genes.gff >> $comb_annot/gff/$out_name.genes.gff
less $comb_annot/more_annot/blast_out/$out_name.blastx.out >> $comb_annot/blast_out/$out_name.blastx.out
rm -rf $comb_annot/more_annot