-
Notifications
You must be signed in to change notification settings - Fork 7
/
0_get_cp_reads.pl
executable file
·40 lines (29 loc) · 1.13 KB
/
0_get_cp_reads.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/perl
# Script that fishes chloroplast reads from whole-genome read sets
#Carlos P Cantalapiedra (1), Ruben Sancho (1,2), Bruno Contreras Moreira (1,3)
#1) Estacion Experimental de Aula Dei-CSIC, Zaragoza, Spain
#2) Escuela Politecnica Superior de Huesca, U.Zaragoza, Spain
#3) Fundacion ARAID, Zaragoza, Spain
use strict;
use FindBin '$Bin';
my $DUKEXE = "'$Bin'".'/bin/duk/duk';
#my $refcpFASTA = $Bin.'/poaceae.fna';
# contains several cps from Poaceae species:
#>NC_015820 Acidosasa purpurea
#>NC_008591 Agrostis stolonifera
#...
if(!$ARGV[0] || !-d $ARGV[0] || !$ARGV[2])
{
die "# usage: $_ <folder with all-read files> <output folder with cp-read files> <FASTA file with related cp genomes>\n";
}
my ($inpDIR,$outDIR,$refcpFASTA) = (@ARGV);
mkdir($outDIR) if(!-d $outDIR);
opendir(READS,$inpDIR);
my @readfiles = grep {!/^\./} grep {/.fastq/ || /.fq/} readdir(READS);
closedir(READS);
foreach my $readf (@readfiles)
{
print "# fishing cp reads from $inpDIR/$readf ...\n";
system("$DUKEXE -k 24 -c 2 -m $outDIR/cp-$readf -o $outDIR/cp-$readf.duk.log $refcpFASTA $inpDIR/$readf");
system("gzip $outDIR/cp-$readf");
}