-
Notifications
You must be signed in to change notification settings - Fork 0
/
append_abundance_blast_hits.pl
59 lines (57 loc) · 1.36 KB
/
append_abundance_blast_hits.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
use strict;
use warnings;
my %tax;
my %hit;
my %unc;
my %name;
my %exp;
open DIAMOND, "zcat nHd.2.3.1.aug.proteins.fasta.uniref90.blastp.daa.tagc.taxonhierarchy.gz |";
while (<DIAMOND>) {
if (/^(\S+)\t(\S+).*\t.*,(.*?,.*?,.*?),.*?,.*?,$/) {
if (not defined $hit {$1}) {
$hit {$1} = $2;
$tax {$1} = $3;
}
}
}
close DIAMOND;
open NAMES, "</exports/blast_db/uniref90.fasta";
while (<NAMES>) {
if (/>(\S+)\s+(.*)/) {
$name {$1} = $2;
}
}
close NAMES;
open EXP, "<edi_abundance.tsv";
while (<EXP>) {
if (/^(\S+).*\t(\S+\t\S+)$/) {
if (not defined $exp {$1}) {
$exp {$1} = $2;
}
}
}
close EXP;
open UNC, "<nHd.2.3.1.aug.proteins.fasta.tg.default.maker.proteins.final.fasta.1e-5.tophit.txt";
while (<UNC>) {
if (/^(\S+)\t(\S+)/) {
if (not defined $unc {$1}) {
$unc {$1} = $2;
}
}
}
close UNC;
####
while (<>) {
chomp;
print $_;
print "\t" . ( defined $exp {"$_"} ? $exp {"$_"} : "NO_EXPRESSION" );
print "\t" . ( defined $unc {"$_"} ? $unc {"$_"} : "NO_UNC_HIT" );
print "\t" . ( defined $hit {"$_"} ? $hit {"$_"} : "NO_UNIREF90_HIT" );
if (defined $hit {"$_"}) {
my $uniref = $hit {$_};
print "\t" . $name { $uniref } . "\t" . $tax { $_ } . "\n";
}
else {
print "\t.\t.\n";
}
}