Skip to content

Commit

Permalink
- Made the fastq filter in src/NugeneMergeFastqFiles.pl optional
Browse files Browse the repository at this point in the history
- Added eb files for new version
  • Loading branch information
mmterpstra committed May 13, 2016
1 parent f39822b commit 41c43e1
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 9 deletions.
36 changes: 36 additions & 0 deletions eb/DigitalBarcodeReadgroups-0.1.2-foss-2016a-Perl-5.20.2-bare.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
easyblock = 'Tarball'

name = 'DigitalBarcodeReadgroups'
version = '0.1.2'

homepage = 'https://github.com/mmterpstra/%s' % (name)
description = """Misc tools"""

toolchain = {'name': 'foss', 'version': '2016a'}

source_urls = [('https://github.com/mmterpstra/%s/archive/' % (name) )]
sources = [ '%s.tar.gz' % version ]

perl = 'Perl'
perlver = '5.20.2'
perlversuffix = '-bare'
versionsuffix = '-%s-%s%s' % (perl, perlver, perlversuffix)

samtools='SAMtools'
samtoolsver='1.3'

dependencies = [
(perl, perlver, perlversuffix),
(samtools, samtoolsver),
]

#
# Check if all binaries are present.
#

sanity_check_paths = {
'files': [ 'src/NugeneDigitalSplitter.pl', 'src/NugeneMergeFastqFiles.pl' ],
'dirs': [ 'src/' ],
}

moduleclass = 'bio'
36 changes: 36 additions & 0 deletions eb/DigitalBarcodeReadgroups-0.1.4-foss-2016a-Perl-5.20.2-bare.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
easyblock = 'Tarball'

name = 'DigitalBarcodeReadgroups'
version = '0.1.4'

homepage = 'https://github.com/mmterpstra/%s' % (name)
description = """Misc tools"""

toolchain = {'name': 'foss', 'version': '2016a'}

source_urls = [('https://github.com/mmterpstra/%s/archive/' % (name) )]
sources = [ '%s.tar.gz' % version ]

perl = 'Perl'
perlver = '5.20.2'
perlversuffix = '-bare'
versionsuffix = '-%s-%s%s' % (perl, perlver, perlversuffix)

samtools='SAMtools'
samtoolsver='1.3'

dependencies = [
(perl, perlver, perlversuffix),
(samtools, samtoolsver),
]

#
# Check if all binaries are present.
#

sanity_check_paths = {
'files': [ 'src/NugeneDigitalSplitter.pl', 'src/NugeneMergeFastqFiles.pl' ],
'dirs': [ 'src/' ],
}

moduleclass = 'bio'
36 changes: 36 additions & 0 deletions eb/DigitalBarcodeReadgroups-0.1.4-goolf-1.7.20-Perl-5.20.2-bare.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
easyblock = 'Tarball'

name = 'DigitalBarcodeReadgroups'
version = '0.1.4'

homepage = 'https://github.com/mmterpstra/%s' % (name)
description = """Misc tools"""

toolchain = {'name': 'goolf', 'version': '1.7.20'}

source_urls = [('https://github.com/mmterpstra/%s/archive/' % (name) )]
sources = [ '%s.tar.gz' % version ]

perl = 'Perl'
perlver = '5.20.2'
perlversuffix = '-bare'
versionsuffix = '-%s-%s%s' % (perl, perlver, perlversuffix)

samtools='SAMtools'
samtoolsver='1.2'

dependencies = [
(perl, perlver, perlversuffix),
(samtools, samtoolsver),
]

#
# Check if all binaries are present.
#

sanity_check_paths = {
'files': [ 'src/NugeneDigitalSplitter.pl', 'src/NugeneMergeFastqFiles.pl' ],
'dirs': [ 'src/' ],
}

moduleclass = 'bio'
27 changes: 18 additions & 9 deletions src/NugeneMergeFastqFiles.pl
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,41 @@
use warnings;
use strict;
use Data::Dumper;
use Getopt::Std;
use Scalar::Util qw(looks_like_number);

&main();

our $qualFilter = 10;
sub main {
#use List::Util qw/max/; my $in;
my $use="$0 randombarcodes.fq.gz outdir reads_1.fq.gz reads_2.fq.gz.

my $use="$0 [-q INT] randombarcodes.fq.gz outdir reads_1.fq.gz reads_2.fq.gz.
Note:
now filters for q<$qualFilter barcode reads and N bases in randombarcodes.fq.gz";
now filters for illumina 1.8 base quality < -q INT barcode reads and N bases in randombarcodes.fq.gz";

#open/check some resuired files

my $opts;
getopts('q:', \%{$opts});
die "[FATAL] if '-q INT' is specified use integer for INT".$use if(defined($opts -> {'q'}) && not(looks_like_number($opts -> {'q'})));

die $use."\n" if(scalar(@ARGV) < 2);

open(my $randomBcHandle,"-|",'gzip -dc '.$ARGV[0])
or die "Cannot read open randombarcode file ".$ARGV[0]."\n".$use;

if(! -e $ARGV[1]){
die "Outdir does not exist! ".$ARGV[1]."\n".$use;;
}


#open handles fastq files

open(my $fastq1Handle,"-|",'gzip -dc '.$ARGV[2])
or die "Cannot read open fq1 file ".$ARGV[2];
or die "[FATAL] Cannot read open fq1 file ".$ARGV[2];

open(my $fastq1OutHandle,"|-",'gzip -c > '.GetOutFileName($ARGV[1],$ARGV[2]))
or die "Cannot write fq1 file ".$ARGV[2];
or die "[FATAL] Cannot write fq1 file ".$ARGV[2];

my $fastq2Handle;
my $fastq2OutHandle;
Expand Down Expand Up @@ -63,7 +71,7 @@ sub main {
my $fcid= getFCID($fq1);

$stats -> {'recordcount'}++;
if(TestRandomBarcodeQual($rfq)){
if(not(defined($opts -> {'q'})) || TestRandomBarcodeQual($opts -> {'q'},$rfq)){
$stats -> {'passcount'}++;

$fq1 = setFCID($fq1,$fcid."_".$rbc);
Expand Down Expand Up @@ -150,6 +158,7 @@ sub GetOutFileName {
}

sub TestRandomBarcodeQual {
my $q = shift @_;
my $fq = shift @_;
if(not( $fq->[1] =~ /^[ATCGatcg]*$/)){
#warn $fq->[1] . $&;
Expand All @@ -162,9 +171,9 @@ sub TestRandomBarcodeQual {
my @qualsOrd = map{ord()}(@quals);
for my $qual (@qualsOrd){
#die Dumper($fq,\@quals);
if($qual < (33+$qualFilter)){
if($qual < (33+$q)){
#die Dumper($fq,\@quals,\@qualsOrd);
return 0 if($qual < (33+10));
return 0 ;#if($qual < (33+$q));
}
}
return 1;
Expand Down

0 comments on commit 41c43e1

Please sign in to comment.