-
Notifications
You must be signed in to change notification settings - Fork 0
/
allele_effect_lookup.pl
executable file
·118 lines (94 loc) · 3 KB
/
allele_effect_lookup.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/perl
#inputvariables segment file made by gwas_segment_maker.pl and a "3.txt" allele effects file output from Tassel
#segmentfile header
#CHR Start Stop Peak Peak_val Merged
#Tassel "3.txt" allele effects file
#Trait Marker Locus Site Allele Effect Obs
use strict;
use Getopt::Std;
use Data::Dumper;
#reading options
our ($opt_e, $opt_s);
getopts('e:s:');
if (!$opt_e) {
print STDERR "\nAllele effects file (-e) required\n\n\n";
die;
}
if (!$opt_s) {
print STDERR "\Segment file (-s) required\n\n\n";
die;
}
#open the allele effects file or die
open EFFECTSINFILE, "<", $opt_e or die "No such input file $opt_e";
my $seen_SNPs;
my $effect_lookup; #this is a pointer to the hash to lookup allele effects;
my $first_line = 1; #used to recognize the header;
#reads the effects file line by line
while (<EFFECTSINFILE>) {
# the variable $_ contains the current line.
chomp $_; #strips the new line character from the current input
#skip the header row
if ($first_line==1) {
#TODO add some checks to make sure the header is what we expect
$first_line=0;
next;
}
my @row = split('\t', $_);
my $chromosome = $row[2];
my $position = $row[3];
unless($effect_lookup->{$chromosome}->{$position}) {
$effect_lookup->{$chromosome}->{$position} = [];
}
push @{$effect_lookup->{$chromosome}->{$position}}, $_;
}
#print Dumper($effect_lookup);
close EFFECTSINFILE;
#open the segment file or die
open SEGMENTINFILE, "<", $opt_s or die "No such input file $opt_e";
#my $seen_SNPs;
$first_line = 1;
#reads the segment file line by line
while (<SEGMENTINFILE>) {
chomp $_;
if ($first_line==1) {
#TODO add some checks to make sure the header is what we expect
print $_."\tAllele1\tAllele2\tAllele1Count\tAllele2Count\tAllele1Effect\tAllele2Effect\tTotalAlleles\n";
$first_line=0;
next;
}
my @row = split('\t', $_);
my $chromosome = $row[0];
my $position = $row[3];
my @effectlines = @{$effect_lookup->{$chromosome}->{$position}};
#check for multi-allelic SNPs
my $total_allele_count = scalar(@effectlines)/2;
my %snp_info;
my $which_of_pair = 0;
foreach my $effectline (@effectlines) {
my @effect_row = split('\t', $effectline);
my $allele = $effect_row[4];
my $effect = $effect_row[5];
my $allele_count = $effect_row[6];
if ($which_of_pair == 0) {
$snp_info{'1_allele'} = $allele;
$snp_info{'1_allele_effect'} = $effect;
$snp_info{'1_count'} = $allele_count;
$which_of_pair++;
} elsif ($which_of_pair == 1) {
$snp_info{'2_allele'} = $allele;
$snp_info{'2_allele_effect'} = $effect;
$snp_info{'2_count'} = $allele_count;
$which_of_pair = 0;
print $_."\t".
$snp_info{'1_allele'}."\t".
$snp_info{'2_allele'}."\t".
$snp_info{'1_count'}."\t".
$snp_info{'2_count'}."\t".
$snp_info{'1_allele_effect'}."\t".
$snp_info{'2_allele_effect'}."\t".
$total_allele_count."\n";
} else {
die "ERROR: SNPs pairs out of sync\n";
}
}
}