-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBimFileHandler.pm
executable file
·118 lines (83 loc) · 2.54 KB
/
BimFileHandler.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/perl -w
package BimFileHandler;
use strict;
use warnings;
use Cwd;
#Constructor
sub new{
my $self = {};
bless($self, "BimFileHandler");
return $self;
}
1;
# This subroutine parses a BIM files and returns the set of SNPs that lie
# between two limiting positions on the chromosome. It also writes the
# selected SNPs into a new file.
# The limiting positions are 50 MB on either side of a SNP of interest
# INVOKE: getDifferentSNPsFromBimFiles();
sub getSNPsFromBimFile{
use constant START_POSITION => 49366316; #50 MB to left of SNP of interest
use constant END_POSITION => 149366316; #50 MB to the right of SNP of interest
my %snpMap = ();
print "Enter the name of the BIM file:";
my $filename = <STDIN>;
chomp($filename);
my $line;
my $snpID;
my $pos;
my @elements;
my @inputfilenameparts = split(/\//, $filename);
my @fileNameAndExtension = split(/\./, $inputfilenameparts[-1]);
my $outputfilename = $fileNameAndExtension[0]."-selectSNPs.".$fileNameAndExtension[1];
open (INPUT, "$filename") or die "Unable to open $filename. Exiting\n";
open (OUTPUT, ">$outputfilename");
foreach $line (<INPUT>){
chomp($line);
@elements = split(/\t/, $line);
$snpID = $elements[1];
$pos = $elements[3];
if($pos <= END_POSITION && $pos >= START_POSITION){
$snpMap{$snpID} = $line;
print OUTPUT "$line\n";
}
}
close OUTPUT;
close INPUT;
return %snpMap;
}
# Given two hashmaps, this function finds the key-value pairs that are found in the first map
# AND are not found in the second map. I.E. The K-V pairs that are unique to the first map.
# The assumption is that the second map is a subset of the first. This is not a generic method
# for determining the proper difference between two hashmaps.
# INVOKE: findDifferenceBetweenSnpSets(\%Map1, \%Map2). Note map arguments are passed by reference
sub findDifferenceBetweenSnpSets{
my @dif = ();
my (%map1) = %{$_[1]}; # $_[0] gives the name of the method as a ref!!!
my @keys1 = keys %map1;
my (%map2) = %{$_[2]};
my @keys2 = keys %map2;
my $i;
my $j;
my $found = 0;
my $key1;
foreach $key1 (@keys1){
if(!defined($map2{$key1}) && $key1 =~ /rs[0-9]+/){
push(@dif, "$map1{$key1}");
}
}
return @dif;
}
# A method for writing lines from an array into a file
# INVOKE: writeFile(@array)
sub writeFile{
print "Enter the name of the output BIM file:";
my $filename = <STDIN>;
chomp($filename);
my (@dif) = @{$_[1]};
my $line;
open(OUTFILE, ">$filename");
foreach $line (@dif){
print OUTFILE "$line\n";
}
close OUTFILE;
}