-
Notifications
You must be signed in to change notification settings - Fork 0
/
binifyBedgraph.cpp
134 lines (118 loc) · 2.68 KB
/
binifyBedgraph.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#include <fstream>
#include <iostream>
#include <math.h>
using namespace std;
char chrom[5];
int start;
int stop;
float map;
class binEntry
{ public:
int start;
int stop;
float map;
binEntry()
{
start = -1;
stop = -1;
map = -1;
}
binEntry(int startInp, int stopInp, float mapInp)
{
start = startInp;
stop = stopInp;
map = mapInp;
}
};
float getWeightedAverage(binEntry *entries, int numEntries)
{
int i = 0;
float sum = 0.0;
for (i = 0; i < numEntries; ++i) {
binEntry curEntry = entries[i];
sum += curEntry.map * (curEntry.stop - curEntry.start) / 5000.0;
}
return sum;
}
int main(int argc, char** argv)
{
if(argc < 2)
{
cout << "Please pass an input file location." << endl;
return 1;
}
std::ifstream infile(argv[1]);
int firstBin = 0;
float curSum = 0;
binEntry curEntries[5000]; // worst case, we'll have 1 range for every bp in bin.
int numEntries = 0; // keep track of how many entries we have.
while (infile >> chrom >> start >> stop >> map)
{
int roundedStart = floor(start / 5000) * 5000;
int roundedEnd = floor(stop / 5000) * 5000;
// if firstBin < rounded start, that means our current bin is before the next
// range's start, aka we are moving onto the next bin.
// save any data we have so far and start incrementing to find the right bin.
if (firstBin < roundedStart)
{
if( numEntries > 0)
{
float avgMap = getWeightedAverage(curEntries, numEntries);
cout << firstBin << " " << avgMap << endl;
numEntries = 0;
firstBin += 5000;
}
while(firstBin < roundedStart)
{
//cout << firstBin << " " << 0 << endl;
firstBin += 5000;
}
}
// we've found the right bin now.
// find the minimum start, for later knowledge.
int min = 0;
if(start > firstBin)
{
min = start; // this range starts within this bin
}
else
{
min = firstBin; // range started before this bin.
}
int end = 0;
// if this range ends within this bin...
if(roundedEnd == firstBin)
{
end = stop;
binEntry entry(min, end, map);
curEntries[numEntries] = entry;
numEntries += 1;
}
else
{
while(roundedEnd > firstBin)
{
end = firstBin + 5000;
binEntry entry(min, end, map);
curEntries[numEntries] = entry;
numEntries += 1;
// & do completion logic.
float avgMap = getWeightedAverage(curEntries, numEntries);
cout << firstBin << " " << avgMap << endl;
firstBin += 5000;
numEntries = 0;
min = firstBin;
}
end = stop;
binEntry entry(min, end, map);
curEntries[numEntries] = entry;
numEntries += 1;
}
}
if(numEntries > 0)
{
float avgMap = getWeightedAverage(curEntries, numEntries);
cout << firstBin << " " << avgMap << endl;
}
return 0;
}