From 41c43e104871e3e015cf3269da8650406e3d2f86 Mon Sep 17 00:00:00 2001 From: mmterpstra Date: Fri, 13 May 2016 12:35:19 +0200 Subject: [PATCH] - Made the fastq filter in src/NugeneMergeFastqFiles.pl optional - Added eb files for new version --- ...roups-0.1.2-foss-2016a-Perl-5.20.2-bare.eb | 36 +++++++++++++++++++ ...roups-0.1.4-foss-2016a-Perl-5.20.2-bare.eb | 36 +++++++++++++++++++ ...ups-0.1.4-goolf-1.7.20-Perl-5.20.2-bare.eb | 36 +++++++++++++++++++ src/NugeneMergeFastqFiles.pl | 27 +++++++++----- 4 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 eb/DigitalBarcodeReadgroups-0.1.2-foss-2016a-Perl-5.20.2-bare.eb create mode 100644 eb/DigitalBarcodeReadgroups-0.1.4-foss-2016a-Perl-5.20.2-bare.eb create mode 100644 eb/DigitalBarcodeReadgroups-0.1.4-goolf-1.7.20-Perl-5.20.2-bare.eb diff --git a/eb/DigitalBarcodeReadgroups-0.1.2-foss-2016a-Perl-5.20.2-bare.eb b/eb/DigitalBarcodeReadgroups-0.1.2-foss-2016a-Perl-5.20.2-bare.eb new file mode 100644 index 0000000..6068b6f --- /dev/null +++ b/eb/DigitalBarcodeReadgroups-0.1.2-foss-2016a-Perl-5.20.2-bare.eb @@ -0,0 +1,36 @@ +easyblock = 'Tarball' + +name = 'DigitalBarcodeReadgroups' +version = '0.1.2' + +homepage = 'https://github.com/mmterpstra/%s' % (name) +description = """Misc tools""" + +toolchain = {'name': 'foss', 'version': '2016a'} + +source_urls = [('https://github.com/mmterpstra/%s/archive/' % (name) )] +sources = [ '%s.tar.gz' % version ] + +perl = 'Perl' +perlver = '5.20.2' +perlversuffix = '-bare' +versionsuffix = '-%s-%s%s' % (perl, perlver, perlversuffix) + +samtools='SAMtools' +samtoolsver='1.3' + +dependencies = [ + (perl, perlver, perlversuffix), + (samtools, samtoolsver), +] + +# +# Check if all binaries are present. +# + +sanity_check_paths = { + 'files': [ 'src/NugeneDigitalSplitter.pl', 'src/NugeneMergeFastqFiles.pl' ], + 'dirs': [ 'src/' ], +} + +moduleclass = 'bio' diff --git a/eb/DigitalBarcodeReadgroups-0.1.4-foss-2016a-Perl-5.20.2-bare.eb b/eb/DigitalBarcodeReadgroups-0.1.4-foss-2016a-Perl-5.20.2-bare.eb new file mode 100644 index 0000000..990c552 --- /dev/null +++ b/eb/DigitalBarcodeReadgroups-0.1.4-foss-2016a-Perl-5.20.2-bare.eb @@ -0,0 +1,36 @@ +easyblock = 'Tarball' + +name = 'DigitalBarcodeReadgroups' +version = '0.1.4' + +homepage = 'https://github.com/mmterpstra/%s' % (name) +description = """Misc tools""" + +toolchain = {'name': 'foss', 'version': '2016a'} + +source_urls = [('https://github.com/mmterpstra/%s/archive/' % (name) )] +sources = [ '%s.tar.gz' % version ] + +perl = 'Perl' +perlver = '5.20.2' +perlversuffix = '-bare' +versionsuffix = '-%s-%s%s' % (perl, perlver, perlversuffix) + +samtools='SAMtools' +samtoolsver='1.3' + +dependencies = [ + (perl, perlver, perlversuffix), + (samtools, samtoolsver), +] + +# +# Check if all binaries are present. +# + +sanity_check_paths = { + 'files': [ 'src/NugeneDigitalSplitter.pl', 'src/NugeneMergeFastqFiles.pl' ], + 'dirs': [ 'src/' ], +} + +moduleclass = 'bio' diff --git a/eb/DigitalBarcodeReadgroups-0.1.4-goolf-1.7.20-Perl-5.20.2-bare.eb b/eb/DigitalBarcodeReadgroups-0.1.4-goolf-1.7.20-Perl-5.20.2-bare.eb new file mode 100644 index 0000000..60d0695 --- /dev/null +++ b/eb/DigitalBarcodeReadgroups-0.1.4-goolf-1.7.20-Perl-5.20.2-bare.eb @@ -0,0 +1,36 @@ +easyblock = 'Tarball' + +name = 'DigitalBarcodeReadgroups' +version = '0.1.4' + +homepage = 'https://github.com/mmterpstra/%s' % (name) +description = """Misc tools""" + +toolchain = {'name': 'goolf', 'version': '1.7.20'} + +source_urls = [('https://github.com/mmterpstra/%s/archive/' % (name) )] +sources = [ '%s.tar.gz' % version ] + +perl = 'Perl' +perlver = '5.20.2' +perlversuffix = '-bare' +versionsuffix = '-%s-%s%s' % (perl, perlver, perlversuffix) + +samtools='SAMtools' +samtoolsver='1.2' + +dependencies = [ + (perl, perlver, perlversuffix), + (samtools, samtoolsver), +] + +# +# Check if all binaries are present. +# + +sanity_check_paths = { + 'files': [ 'src/NugeneDigitalSplitter.pl', 'src/NugeneMergeFastqFiles.pl' ], + 'dirs': [ 'src/' ], +} + +moduleclass = 'bio' diff --git a/src/NugeneMergeFastqFiles.pl b/src/NugeneMergeFastqFiles.pl index a025c9f..4471a4f 100644 --- a/src/NugeneMergeFastqFiles.pl +++ b/src/NugeneMergeFastqFiles.pl @@ -2,18 +2,27 @@ use warnings; use strict; use Data::Dumper; +use Getopt::Std; +use Scalar::Util qw(looks_like_number); + &main(); -our $qualFilter = 10; sub main { #use List::Util qw/max/; my $in; - my $use="$0 randombarcodes.fq.gz outdir reads_1.fq.gz reads_2.fq.gz. + + my $use="$0 [-q INT] randombarcodes.fq.gz outdir reads_1.fq.gz reads_2.fq.gz. Note: -now filters for q<$qualFilter barcode reads and N bases in randombarcodes.fq.gz"; +now filters for illumina 1.8 base quality < -q INT barcode reads and N bases in randombarcodes.fq.gz"; #open/check some resuired files + my $opts; + getopts('q:', \%{$opts}); + die "[FATAL] if '-q INT' is specified use integer for INT".$use if(defined($opts -> {'q'}) && not(looks_like_number($opts -> {'q'}))); + + die $use."\n" if(scalar(@ARGV) < 2); + open(my $randomBcHandle,"-|",'gzip -dc '.$ARGV[0]) or die "Cannot read open randombarcode file ".$ARGV[0]."\n".$use; @@ -21,14 +30,13 @@ sub main { die "Outdir does not exist! ".$ARGV[1]."\n".$use;; } - #open handles fastq files open(my $fastq1Handle,"-|",'gzip -dc '.$ARGV[2]) - or die "Cannot read open fq1 file ".$ARGV[2]; + or die "[FATAL] Cannot read open fq1 file ".$ARGV[2]; open(my $fastq1OutHandle,"|-",'gzip -c > '.GetOutFileName($ARGV[1],$ARGV[2])) - or die "Cannot write fq1 file ".$ARGV[2]; + or die "[FATAL] Cannot write fq1 file ".$ARGV[2]; my $fastq2Handle; my $fastq2OutHandle; @@ -63,7 +71,7 @@ sub main { my $fcid= getFCID($fq1); $stats -> {'recordcount'}++; - if(TestRandomBarcodeQual($rfq)){ + if(not(defined($opts -> {'q'})) || TestRandomBarcodeQual($opts -> {'q'},$rfq)){ $stats -> {'passcount'}++; $fq1 = setFCID($fq1,$fcid."_".$rbc); @@ -150,6 +158,7 @@ sub GetOutFileName { } sub TestRandomBarcodeQual { + my $q = shift @_; my $fq = shift @_; if(not( $fq->[1] =~ /^[ATCGatcg]*$/)){ #warn $fq->[1] . $&; @@ -162,9 +171,9 @@ sub TestRandomBarcodeQual { my @qualsOrd = map{ord()}(@quals); for my $qual (@qualsOrd){ #die Dumper($fq,\@quals); - if($qual < (33+$qualFilter)){ + if($qual < (33+$q)){ #die Dumper($fq,\@quals,\@qualsOrd); - return 0 if($qual < (33+10)); + return 0 ;#if($qual < (33+$q)); } } return 1;