-
Notifications
You must be signed in to change notification settings - Fork 7
/
gffmask.pl
executable file
·60 lines (48 loc) · 1.4 KB
/
gffmask.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/perl
# $maskchar = "x";
$maskchar = "n"; # HACK so that prog defaults to DNA - should really use "-c n" when masking nucleotides
$width = 50;
$usage .= "$0 -- mask GFF-denoted segments out of a FASTA format file\n";
$usage .= "\n";
$usage .= "Usage: $0 [-c maskchar] <GFF files...>\n";
$usage .= "\n";
$usage .= "Acts as a filter on STDIN\n";
$usage .= "\n";
$usage .= "Default maskchar=\"$maskchar\"\n";
$usage .= "\n";
while (@ARGV) {
last unless ($ARGV[0] =~ /^-/);
$arg = shift;
if ($arg eq "-c") { $maskchar = substr(shift,0,1) }
else { die "Unknown option $arg\n\n$usage" }
}
(@ARGV>=1) or die "$usage\nInsufficient number of GFF files specified\n";
foreach $gff (@ARGV) {
open gff or die "Couldn't open $gff: $!";
while (<gff>) {
s/#.*//;
next unless /\S/;
($seqname,$from,$label,$start,$end,$score,$strand,$frame,$group) = split /\t/;
push @{$gff{uc $seqname}},"$start $end";
}
close gff;
}
sub mask {
my $array;
print $tagline;
}
$/ = ">";
$dummy = <STDIN>;
while (($/="\n",$_=<STDIN>)[1]) {
($seqname) = split;
$/ = ">";
$sequence = <STDIN>;
chomp $sequence;
$sequence =~ s/\s//g;
foreach (@{$gff{uc $seqname}}) {
($start,$end) = split;
substr($sequence,$start-1,$end+1-$start) = $maskchar x ($end+1-$start);
}
print ">$seqname\n";
for ($i=0;$i<length $sequence;$i+=$width) { print substr($sequence,$i,$width)."\n" }
}