forked from vikas0633/perl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathN50Intergener.pl
100 lines (67 loc) · 1.96 KB
/
N50Intergener.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env perl
use strict;
use warnings;
my $usage = "usage: $0 gff3_file\n\n";
my $gff3 = $ARGV[0] or die $usage;
main: {
my %contig_to_gene_list;
my $gene_counter = 0;
open (my $fh, $gff3) or die "Error, cannot open file $gff3";
while (<$fh>) {
chomp;
unless (/\w/) { next; }
my @x = split (/\t/);
if ($x[2] eq 'gene') {
$gene_counter++;
my ($scaff, $lend, $rend, $name) = ($x[0], $x[3], $x[4], $x[8]);
push (@{$contig_to_gene_list{$scaff}}, { name => $name,
lend => $lend,
rend => $rend,
midpt => ($lend + $rend) / 2,
} );
}
}
close $fh;
my @intergenics;
foreach my $scaffold (keys %contig_to_gene_list) {
my @genes = sort {$a->{midpt}<=>$b->{midpt}} @{$contig_to_gene_list{$scaffold}};
my $prev_gene = shift @genes;
while (@genes) {
my $next_gene = shift @genes;
my $prev_acc = $prev_gene->{name};
my $next_acc = $next_gene->{name};
my $delta = $next_gene->{midpt} - $prev_gene->{midpt};
# print "$scaffold\t$prev_acc\t$next_acc\t$delta\n";
if ($delta > 0) {
push (@intergenics, { dist => $delta,
accA => $prev_acc,
accB => $next_acc,
} );
}
$prev_gene = $next_gene;
}
}
@intergenics = sort {$a->{dist}<=>$b->{dist}} @intergenics;
my $intergene_counter = 0;
my %seen;
my $sum_interdist = 0;
my $prev_percent = 0;
foreach my $intergenic (@intergenics) {
my ($dist, $accA, $accB) = ($intergenic->{dist}, $intergenic->{accA}, $intergenic->{accB});
$sum_interdist += $dist;
unless ($seen{$accA}) {
$seen{$accA} = 1;
$intergene_counter++;
}
unless ($seen{$accB}) {
$seen{$accB} = 1;
$intergene_counter++;
}
my $percent = sprintf ("%.2f", $intergene_counter / $gene_counter * 100);
if (int($percent) > $prev_percent && int($percent) % 5 == 0) {
print "$percent\t$intergene_counter\t$sum_interdist\n";
$prev_percent = int($percent);
}
}
exit(0);
}