-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathexampleAnalysis.sh
79 lines (58 loc) · 2.7 KB
/
exampleAnalysis.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/bash
# This is an example script for a full pipeline of assembly, annotation and core phylogeny
# In order to use it you need prokka, spades.py, hmmsearch, muscle, Gblocks, and raxmlHPC in your PATH or adjust the location below
##### Settings if submitted on a SLURM workload manager by 'sbatch exampleAnalysis.sh'
#SBATCH -J bcgTree
#SBATCH -n 1
#SBATCH -c 32
#SBATCH --mem 100G
##### Global settings (if on a SLURM manager, should be consistent with settings above
THREADS=32
RAM=100
mkdir -p testdata_phylogeny
cd testdata_phylogeny
##### Getting genome data as test data set
### Reference strain assemblies
# Paenibacillus polymyxa
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/146/875/GCA_000146875.2_ASM14687v2/GCA_000146875.2_ASM14687v2_genomic.fna.gz
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/463/565/GCA_000463565.1_S6/GCA_000463565.1_S6_genomic.fna.gz
# Paenibacillus odorifer
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/758/725/GCA_000758725.1_ASM75872v1/GCA_000758725.1_ASM75872v1_genomic.fna.gz
# Paenibacillus larvae
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/003/265/GCA_002003265.1_ASM200326v1/GCA_002003265.1_ASM200326v1_genomic.fna.gz
# Brevibacillus brevis (outgroup)
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/010/165/GCA_000010165.1_ASM1016v1/GCA_000010165.1_ASM1016v1_genomic.fna.gz
gunzip *.fna.gz
###raw sequencing reads
# Paenibacillus polymyxa MBD06
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR340/009/ERR3402559/ERR3402559_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR340/009/ERR3402559/ERR3402559_2.fastq.gz
##### Assembly of Paenibacillus polymyxa MBD06 (raw reads)
spades.py --careful \
--pe1-1 ERR3402559_1.fastq.gz \
--pe1-2 ERR3402559_2.fastq.gz \
-t $THREADS -m $RAM -o spades_PPol
cp spades_PPol/scaffolds.fasta ./PPol_MBD06.fna
##### Annotation
for genome in $(ls *.fna);
do
prokka --outdir prokka_$genome --cpus $THREADS --norrna --notrna $genome;
cp prokka_$genome/*.faa ./$genome.faa
done
##### Config file creation
# basic parameters
echo "--threads=$THREADS" > testdata_config.txt
echo "--bootstraps=1000" >> testdata_config.txt
echo "--outdir=testdata_phylogeny" >> testdata_config.txt
# proteome files
for proteome in $(ls *.faa);
do
echo '--proteome "'$proteome'"="'$proteome'"' >> testdata_config.txt
done
## optional binary locations, need to be changed for the user if they are necessary (and # removed)
#echo "--hmmsearch-bin=/PATH/TO/BINARY" >> testdata_config.txt
#echo "--muscle-bin=/PATH/TO/BINARY" >> testdata_config.txt
#echo "--gblocks-bin=/PATH/TO/BINARY" >> testdata_config.txt
#echo "--raxml-bin=/PATH/TO/BINARY" >> testdata_config.txt
##### Phylogenetic calculation
bcgTree.pl @testdata_config.txt