Skip to content

Commit

Permalink
1.0.2 changes (#19)
Browse files Browse the repository at this point in the history
* MTBseq 1.0.2

check for dependencies
+ --check option
prepared for bioconda

* code correction

* rename MTBseq.pl
  • Loading branch information
cutpatel authored Sep 6, 2018
1 parent 1ee33c9 commit 5c8501a
Show file tree
Hide file tree
Showing 940 changed files with 128 additions and 310,456 deletions.
4 changes: 4 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
*.fai text eol=lf
*.fasta text eol=lf
*.vcf text eol=lf
*.pl text eol=lf
*.pm text eol=lf
*.txt eol=lf


# Keep as is
*.vcf.idx binary
Expand Down
105 changes: 85 additions & 20 deletions MTBseq.pl → MTBseq
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use lib "$RealBin/lib";
use Cwd;
use Getopt::Long;
use IPC::Cmd qw[can_run run];
use TBbwa;
use TBstats;
use TBrefine;
Expand All @@ -19,7 +20,7 @@
use TBtools;


my $VERSION = "1.0.1";
my $VERSION = "1.0.2";

# get current working directory and time.
my $W_dir = getcwd();
Expand Down Expand Up @@ -49,6 +50,8 @@
my $SAMTOOLS_dir = "$RealBin/opt/samtools_1.6";
my $PICARD_dir = "$RealBin/opt/picard_2.17.0";
my $GATK_dir = "$RealBin/opt/GenomeAnalysisTK_3.8";
my $BIN_dir = "$RealBin/opt";
#my $TEST_dir = "$RealBin/test";

# initialize command-line parameter.
my $step = "";
Expand Down Expand Up @@ -76,6 +79,8 @@
my $naming_scheme = "";
my $help = "";
my $opt_version = "";
#my $test_run = "";
my $check = "";

# get command-line parameter.
GetOptions('step:s' => \$step,
Expand All @@ -101,13 +106,70 @@
'quiet' => \$quiet,
'threads:i' => \$threads,
'help' => \$help,
'version' => \$opt_version
'version' => \$opt_version,
# 'test' => \$test_run,
'check' => \$check,
);

#set tool calls
my $GATK_call = "gatk";
my $PICARD_call = "picard";
my $SAMTOOLS_call = "samtools";
my $BWA_call = "bwa";

# print help message if specified or error if step is not defined or wrong.
if($help eq '1') { help($VERSION); exit 0; }
if($opt_version eq '1') { version($VERSION); exit 0; }

#check for neccessary perl modules
for my $module (qw(
MCE
Statistics::Basic)) {
eval "use $module";
if ($@) {
die "<ERROR>\t",timer(),"\tNot found perl module: $module\n" if $@;}
else {
print "<INFO>\t",timer(),"\tFound perl module: $module\n";}}

# check for neccessary programs
for my $executable (qw(bwa samtools gatk picard)) {
if (my $canrun = can_run($executable)){
print "<INFO>\t",timer(),"\tFound $executable in your PATH!\n";}
elsif ($canrun = can_run("$BIN_dir\/$executable")){
print "<INFO>\t",timer(),"\tFound $executable in the MTBseq /opt folder!\n";
if ($executable=~"samtools"){
$SAMTOOLS_call="$SAMTOOLS_dir\/$executable";}
elsif ($executable=~"bwa"){
$BWA_call="$BWA_dir\/$executable";}}
elsif ($executable=~"gatk"){
if (-f "$GATK_dir\/GenomeAnalysisTK.jar"){
$GATK_call="java -jar $GATK_dir\/GenomeAnalysisTK.jar";
print "<INFO>\t",timer(),"\tFound $executable in the MTBseq $GATK_dir folder!\n";}
else {die "<ERROR>\t",timer(),"\t$executable is not installed or not in your PATH!\n\n";}}
elsif ($executable=~"picard"){
if (-f "$PICARD_dir\/picard.jar"){
$PICARD_call="java -jar $PICARD_dir\/picard.jar";
print "<INFO>\t",timer(),"\tFound $executable in the MTBseq $PICARD_dir folder!\n";}
else {die "<ERROR>\t",timer(),"\t$executable is not installed or not in your PATH!\n\n";}}
else {die "<ERROR>\t",timer(),"\t$executable is not installed or not in your PATH!\n\n";}}

#check version
open (my $sam_ver, '-|', 'samtools --version 2>&1') or die "<ERROR>\t",timer(),"\tCould not test samtools version\n"; my $sam_ver_out = <$sam_ver>; close $sam_ver;
$sam_ver_out =~ qr/samtools\s(\d+\.\d+)/ms;
my $sam_version = defined $1 ? $1 : 0;
if ($sam_version < 1.6) {
warn "<WARN>\t",timer(),"\tNeed samtools >= 1.6, please upgrade the version of samtools in your PATH.\n";
warn "<WARN>\t",timer(),"\tFor this execution of MTBseq $SAMTOOLS_dir from the MTBseq /opt directory will be used\n";
$SAMTOOLS_call="$SAMTOOLS_dir\/samtools";}

if($check eq '1') {exit 0;}

#if($test_run eq '1') {
#chdir $TEST_dir;
#my $commandline = "$RealBin/MTBseq --step TBfull";
#system($commandline)==0 or die "$commandline failed: $?\n"; }
if($step eq '' ) { nostep(); exit 1; }

unless(($step eq 'TBfull' ) ||
($step eq 'TBbwa' ) ||
($step eq 'TBrefine' ) ||
Expand Down Expand Up @@ -143,7 +205,6 @@
if($distance eq '' ) { $distance = 12; }
if($quiet eq '' ) { $quiet = 0; }
if($threads eq '' ) { $threads = 1; }
if($threads > 8 ) { $threads = 8; }

# set name of $ref fasta file and gene annotation.
my $refg = $ref;
Expand Down Expand Up @@ -226,16 +287,6 @@
print $logprint "<INFO>\t",timer(),"\t$STRAIN_OUT\n";
print $logprint "<INFO>\t",timer(),"\t$GROUPS_OUT\n";

system("mkdir -p $BAM_OUT");
system("mkdir -p $GATK_OUT");
system("mkdir -p $MPILE_OUT");
system("mkdir -p $POS_OUT");
system("mkdir -p $CALL_OUT");
system("mkdir -p $STATS_OUT");
system("mkdir -p $JOIN_OUT");
system("mkdir -p $AMEND_OUT");
system("mkdir -p $STRAIN_OUT");
system("mkdir -p $GROUPS_OUT");

# initialize check up and content arrays.
my %check_up;
Expand Down Expand Up @@ -294,15 +345,29 @@
print $logprint "\n<INFO>\t",timer(),"\t### [TBbwa] selected ###\n";
}
opendir(WORKDIR,"$W_dir") || die print $logprint "<ERROR>\t",timer(),"\tCan\'t open directory $W_dir: MTBseq.pl line: ", __LINE__ ," \n";
opendir(BAMDIR,"$BAM_OUT") || die print $logprint "<ERROR>\t",timer(),"\tCan\'t open directory $BAM_OUT: MTBseq.pl line: ", __LINE__ , "\n";
@fastq_files = grep { $_ =~ /^\w.*R\d+\.f(ast)?q\.gz/ && -f "$W_dir/$_" } readdir(WORKDIR);
@bam_files = grep { $_ =~ /^\w.*\.bam$/ && -f "$BAM_OUT/$_" } readdir(BAMDIR);
closedir(WORKDIR);
closedir(BAMDIR);

if(scalar(@fastq_files) == 0) {
print $logprint "\n<ERROR>\t",timer(),"\tNo read files to map! Check content of $W_dir!\n";
exit 1;
}

system("mkdir -p $BAM_OUT");
system("mkdir -p $GATK_OUT");
system("mkdir -p $MPILE_OUT");
system("mkdir -p $POS_OUT");
system("mkdir -p $CALL_OUT");
system("mkdir -p $STATS_OUT");
system("mkdir -p $JOIN_OUT");
system("mkdir -p $AMEND_OUT");
system("mkdir -p $STRAIN_OUT");
system("mkdir -p $GROUPS_OUT");

opendir(BAMDIR,"$BAM_OUT") || die print $logprint "<ERROR>\t",timer(),"\tCan\'t open directory $BAM_OUT: MTBseq.pl line: ", __LINE__ , "\n";
@bam_files = grep { $_ =~ /^\w.*\.bam$/ && -f "$BAM_OUT/$_" } readdir(BAMDIR);
closedir(BAMDIR);

%check_up = map { (my $id = $_) =~ s/\.bam$//; $id => $id; } @bam_files;
for(my $i = 0; $i < scalar(@fastq_files); $i++) {
my $tmp = $fastq_files[$i];
Expand All @@ -320,7 +385,7 @@
print $logprint "<INFO>\t",timer(),"\t$fastq\n";
}
print $logprint "\n<INFO>\t",timer(),"\tStart BWA mapping...\n";
tbbwa($logprint,$W_dir,$VAR_dir,$BWA_dir,$SAMTOOLS_dir,$BAM_OUT,$ref,$threads,$naming_scheme,@fastq_files_new);
tbbwa($logprint,$W_dir,$VAR_dir,$BWA_dir,$SAMTOOLS_dir,$BWA_call,$SAMTOOLS_call,$BAM_OUT,$ref,$threads,$naming_scheme,@fastq_files_new);
print $logprint "<INFO>\t",timer(),"\tFinished BWA mapping!\n";
@fastq_files = ();
@bam_files = ();
Expand Down Expand Up @@ -361,7 +426,7 @@
print $logprint "<INFO>\t",timer(),"\t$bam\n";
}
print $logprint "\n<INFO>\t",timer(),"\tStart GATK refinement...\n";
tbrefine($logprint,$W_dir,$VAR_dir,$PICARD_dir,$GATK_dir,$BAM_OUT,$GATK_OUT,$ref,$basecalib,$threads,@gatk_files_new);
tbrefine($logprint,$W_dir,$VAR_dir,$PICARD_dir,$GATK_dir,$PICARD_call,$GATK_call,$BAM_OUT,$GATK_OUT,$ref,$basecalib,$threads,@gatk_files_new);
print $logprint "<INFO>\t",timer(),"\tFinished GATK refinement!\n";
@bam_files = ();
@gatk_files = ();
Expand Down Expand Up @@ -402,7 +467,7 @@
print $logprint "<INFO>\t",timer(),"\t$bam\n";
}
print $logprint "\n<INFO>\t",timer(),"\tStart creating .mpileup files...\n";
tbpile($logprint,$VAR_dir,$SAMTOOLS_dir,$GATK_dir,$GATK_OUT,$MPILE_OUT,$ref,$threads,@gatk_files_new);
tbpile($logprint,$VAR_dir,$SAMTOOLS_dir,$SAMTOOLS_call,$GATK_dir,$GATK_OUT,$MPILE_OUT,$ref,$threads,@gatk_files_new);
print $logprint "<INFO>\t",timer(),"\tFinished creating .mpileup files!\n";
@gatk_files = ();
@mpile_files = ();
Expand Down Expand Up @@ -524,7 +589,7 @@
print $logprint "<INFO>\t",timer(),"\t$bam\n";
}
print $logprint "\n<INFO>\t",timer(),"\tStart statistics calculation...\n";
tbstats($logprint,$W_dir,$VAR_dir,$SAMTOOLS_dir,$BAM_OUT,$POS_OUT,$STATS_OUT,$ref,$refg,$micovf,$micovr,$miphred20,$mifreq,$all_vars,$snp_vars,$lowfreq_vars,$date_string,@bam_files_new);
tbstats($logprint,$W_dir,$VAR_dir,$SAMTOOLS_dir,$SAMTOOLS_call,$BAM_OUT,$POS_OUT,$STATS_OUT,$ref,$refg,$micovf,$micovr,$miphred20,$mifreq,$all_vars,$snp_vars,$lowfreq_vars,$date_string,@bam_files_new);
print $logprint "<INFO>\t",timer(),"\tFinished statistics calculation!\n";
@bam_files = ();
@pos_files = ();
Expand Down
36 changes: 19 additions & 17 deletions lib/TBbwa.pm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use TBtools;
use Exporter;
use vars qw($VERSION @ISA @EXPORT);

$VERSION = 1.0.1;
$VERSION = 1.0.2;
@ISA = qw(Exporter);
@EXPORT = qw(tbbwa);

Expand All @@ -20,6 +20,8 @@ sub tbbwa {
my $VAR_dir = shift;
my $BWA_dir = shift;
my $SAMTOOLS_dir = shift;
my $BWA_call = shift;
my $SAMTOOLS_call = shift;
my $BAM_OUT = shift;
my $ref = shift;
my $threads = shift;
Expand Down Expand Up @@ -67,53 +69,53 @@ sub tbbwa {
# index reference with bwa, if it isn't indexed already.
unless(-f "$VAR_dir/$ref.amb" && -f "$VAR_dir/$ref.ann" && -f "$VAR_dir/$ref.bwt" && -f "$VAR_dir/$ref.pac" && -f "$VAR_dir/$ref.sa"){
print $logprint "<INFO>\t",timer(),"\tStart indexing reference genome $ref...\n";
print $logprint "<INFO>\t",timer(),"\t$BWA_dir/bwa index $VAR_dir/$ref >> $BAM_OUT/$logfile\n";
$commandline = "$BWA_dir/bwa index $VAR_dir/$ref 2>> $BAM_OUT/$logfile";
print $logprint "<INFO>\t",timer(),"\t$BWA_call index $VAR_dir/$ref >> $BAM_OUT/$logfile\n";
$commandline = "$BWA_call index $VAR_dir/$ref 2>> $BAM_OUT/$logfile";
system($commandline)==0 or die "$commandline failed: $?\n";
print $logprint "<INFO>\t",timer(),"\tFinished indexing reference genome $ref!\n";
}
# index reference with samtools, if it isn't indexed already.
unless(-f "$VAR_dir/$ref.fai") {
print $logprint "<INFO>\t",timer(),"\tStart using samtools for indexing of $ref...\n";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_dir/samtools faidx $VAR_dir/$ref >> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_dir/samtools faidx $VAR_dir/$ref >> $BAM_OUT/$logfile >> $BAM_OUT/$logfile";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_call faidx $VAR_dir/$ref >> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_call faidx $VAR_dir/$ref >> $BAM_OUT/$logfile >> $BAM_OUT/$logfile";
system($commandline)==0 or die "$commandline failed: $?\n";
print $logprint "<INFO>\t",timer(),"\tFinished using samtools for indexing of $ref!\n";
}
# map reads with bwa-mem and -t parameter.
print $logprint "<INFO>\t",timer(),"\tStart BWA mapping for $fullID...\n";
print $logprint "<INFO>\t",timer(),"\t$BWA_dir/bwa mem -t $threads -R $read_naming_scheme $VAR_dir/$ref $files_string > $BAM_OUT/$fullID.sam 2>> $BAM_OUT/$logfile\n";
$commandline = "$BWA_dir/bwa mem -t $threads -R $read_naming_scheme $VAR_dir/$ref $files_string > $BAM_OUT/$fullID.sam 2>> $BAM_OUT/$logfile";
print $logprint "<INFO>\t",timer(),"\t$BWA_call mem -t $threads -R $read_naming_scheme $VAR_dir/$ref $files_string > $BAM_OUT/$fullID.sam 2>> $BAM_OUT/$logfile\n";
$commandline = "$BWA_call mem -t $threads -R $read_naming_scheme $VAR_dir/$ref $files_string > $BAM_OUT/$fullID.sam 2>> $BAM_OUT/$logfile";
system($commandline)==0 or die "$commandline failed: $?\n";
print $logprint "<INFO>\t",timer(),"\tFinished BWA mapping for $fullID!\n";
# convert from .sam to .bam format with samtools -S (sam input) and (-b bam output) and -T (reference).
print $logprint "<INFO>\t",timer(),"\tStart using samtools to convert from .sam to .bam for $fullID...\n";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_dir/samtools view -@ $threads -b -T $VAR_dir/$ref -o $BAM_OUT/$fullID.bam $BAM_OUT/$fullID.sam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_dir/samtools view -@ $threads -b -T $VAR_dir/$ref -o $BAM_OUT/$fullID.bam $BAM_OUT/$fullID.sam 2>> $BAM_OUT/$logfile";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_call view -@ $threads -b -T $VAR_dir/$ref -o $BAM_OUT/$fullID.bam $BAM_OUT/$fullID.sam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_call view -@ $threads -b -T $VAR_dir/$ref -o $BAM_OUT/$fullID.bam $BAM_OUT/$fullID.sam 2>> $BAM_OUT/$logfile";
system($commandline)==0 or die "$commandline failed: $?\n";
print $logprint "<INFO>\t",timer(),"\tFinished file conversion for $fullID!\n";
# sort with samtools.
print $logprint "<INFO>\t",timer(),"\tStart using samtools for sorting of $fullID...\n";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_dir/samtools sort -@ $threads -T /tmp/$fullID.sorted -o $BAM_OUT/$fullID.sorted.bam $BAM_OUT/$fullID.bam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_dir/samtools sort -@ $threads -T /tmp/$fullID.sorted -o $BAM_OUT/$fullID.sorted.bam $BAM_OUT/$fullID.bam 2>> $BAM_OUT/$logfile";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_call sort -@ $threads -T /tmp/$fullID.sorted -o $BAM_OUT/$fullID.sorted.bam $BAM_OUT/$fullID.bam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_call sort -@ $threads -T /tmp/$fullID.sorted -o $BAM_OUT/$fullID.sorted.bam $BAM_OUT/$fullID.bam 2>> $BAM_OUT/$logfile";
system($commandline)==0 or die "$commandline failed: $?\n";
print $logprint "<INFO>\t",timer(),"\tFinished using samtools for sorting of $fullID!\n";
# indexing with samtools.
print $logprint "<INFO>\t",timer(),"\tStart using samtools for indexing of $fullID...\n";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_dir/samtools index -b $BAM_OUT/$fullID.sorted.bam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_dir/samtools index -b $BAM_OUT/$fullID.sorted.bam 2>> $BAM_OUT/$logfile";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_call index -b $BAM_OUT/$fullID.sorted.bam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_call index -b $BAM_OUT/$fullID.sorted.bam 2>> $BAM_OUT/$logfile";
system($commandline)==0 or die "$commandline failed: $?\n";
print $logprint "<INFO>\t",timer(),"\tFinished using samtools for indexing of $fullID!\n";
# removing pcr duplicates with samtools.
print $logprint "<INFO>\t",timer(),"\tStart removing putative PCR duplicates from $fullID...\n";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_dir/samtools rmdup $BAM_OUT/$fullID.sorted.bam $BAM_OUT/$fullID.nodup.bam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_dir/samtools rmdup $BAM_OUT/$fullID.sorted.bam $BAM_OUT/$fullID.nodup.bam 2>> $BAM_OUT/$logfile";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_call rmdup $BAM_OUT/$fullID.sorted.bam $BAM_OUT/$fullID.nodup.bam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_call rmdup $BAM_OUT/$fullID.sorted.bam $BAM_OUT/$fullID.nodup.bam 2>> $BAM_OUT/$logfile";
system($commandline)==0 or die "$commandline failed: $?\n";
print $logprint "<INFO>\t",timer(),"\tFinished removing putative PCR duplicates for $fullID!\n";
# recreate index with samtools.
print $logprint "<INFO>\t",timer(),"\tStart recreating index for $fullID...\n";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_dir/samtools index -b $BAM_OUT/$fullID.nodup.bam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_dir/samtools index -b $BAM_OUT/$fullID.nodup.bam 2>> $BAM_OUT/$logfile";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_call index -b $BAM_OUT/$fullID.nodup.bam 2>> $BAM_OUT/$logfile\n";
$commandline = "$SAMTOOLS_call index -b $BAM_OUT/$fullID.nodup.bam 2>> $BAM_OUT/$logfile";
system($commandline)==0 or die "$commandline failed: $?\n";
print $logprint "<INFO>\t",timer(),"\tFinished recreating index for $fullID!\n";
# removing temporary files.
Expand Down
7 changes: 4 additions & 3 deletions lib/TBpile.pm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use TBtools;
use Exporter;
use vars qw($VERSION @ISA @EXPORT);

$VERSION = 1.0.0;
$VERSION = 1.0.1;
@ISA = qw(Exporter);
@EXPORT = qw(tbpile);

Expand All @@ -18,6 +18,7 @@ sub tbpile {
my $logprint = shift;
my $VAR_dir = shift;
my $SAMTOOLS_dir = shift;
my $SAMTOOLS_call = shift;
my $GATK_dir = shift;
my $GATK_OUT = shift;
my $MPILE_OUT = shift;
Expand Down Expand Up @@ -79,8 +80,8 @@ sub tbpile {

# create .mpileup files.
print $logprint "<INFO>\t",timer(),"\tStart using samtools for creating a .mpileup file for $fullID...\n";
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_dir/samtools mpileup -B -A -x -f $VAR_dir/$ref $GATK_OUT/$file > $MPILE_OUT/$mpile_file 2>> $MPILE_OUT/$mpile_logfile\n";
system("$SAMTOOLS_dir/samtools mpileup -B -A -x -f $VAR_dir/$ref $GATK_OUT/$file > $MPILE_OUT/$mpile_file 2>> $MPILE_OUT/$mpile_logfile");
print $logprint "<INFO>\t",timer(),"\t$SAMTOOLS_call mpileup -B -A -x -f $VAR_dir/$ref $GATK_OUT/$file > $MPILE_OUT/$mpile_file 2>> $MPILE_OUT/$mpile_logfile\n";
system("$SAMTOOLS_call mpileup -B -A -x -f $VAR_dir/$ref $GATK_OUT/$file > $MPILE_OUT/$mpile_file 2>> $MPILE_OUT/$mpile_logfile");
print $logprint "<INFO>\t",timer(),"\tFinished using samtools for creating a .mpileup file for $fullID!\n";
# finished.
}
Expand Down
Loading

0 comments on commit 5c8501a

Please sign in to comment.