-
Notifications
You must be signed in to change notification settings - Fork 1
/
simulate.cpp
140 lines (132 loc) · 4.96 KB
/
simulate.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#include <numeric>
#include <iostream>
#include "simulate.h"
namespace TrajectoryLikelihood {
// Private function prototypes
vector<int> runLengthEncodeSequence (const vector<int>& seq);
// Function definitions
// runLengthEncodeSequence converts a sequence of 0's (for inserted residues) and +ve numbers (for ancestral residues)
// to a run-length encoded form as in trajec.cpp
vector<int> runLengthEncodeSequence (const vector<int>& seq) {
vector<int> rle;
int lastSign = 0, count = 0;
for (int c: seq) {
const int currentSign = (c == 0 ? -1 : +1);
if (currentSign == lastSign)
++count;
else {
if (count)
rle.push_back (lastSign * count);
count = 1;
lastSign = currentSign;
}
}
if (count)
rle.push_back (lastSign * count);
return rle;
}
void simulateIndels (vector<int>& seq, const IndelParams& params, double time, mt19937& rnd, int verbose, int insertedValue) {
uniform_real_distribution<double> uniform (0.0, 1.0);
const double insRate = params.totalInsertionRatePerSite();
const double delRate = params.totalRightwardDeletionRatePerSite();
if (verbose > 2)
cerr << "Initial sequence length is " << seq.size() << ", time remaining is " << time << endl;
while (time > 0) {
const int seqLen = seq.size();
const double totalInsRate = insRate * (seqLen + 1), totalDelRate = delRate * seqLen;
const double totalIndelRate = totalInsRate + totalDelRate;
const double timeToNextEvent = -log(uniform(rnd)) / totalIndelRate;
if (verbose > 3)
cerr << "Sequence length is " << seq.size() << ", insertion rate " << totalInsRate << ", deletion rate " << totalDelRate << ", time to next event is " << timeToNextEvent << endl;
time -= timeToNextEvent;
if (time < 0)
break;
double r = uniform(rnd) * totalIndelRate;
int pos = 0, k = 1;
if (r < totalInsRate) {
// insertion
while (r > insRate && pos <= seqLen) {
r -= insRate;
++pos;
}
while ((r -= params.insertionRate(k)) > 0 && (pos + k) < seqLen)
++k;
seq.insert (seq.begin() + pos, k, insertedValue);
if (verbose > 2)
cerr << "With time " << time << " remaining, " << k << "-residue insertion at position " << pos << " increased sequence length to " << seq.size() << endl;
} else {
// deletion
r -= totalInsRate;
while (r > delRate && pos < seqLen) {
r -= delRate;
++pos;
}
while ((r -= params.rightwardDeletionRate(k)) > 0 && (pos + k) < seqLen)
++k;
seq.erase (seq.begin() + pos, seq.begin() + pos + k);
if (verbose > 2)
cerr << "With time " << time << " remaining, " << k << "-residue deletion at position " << pos << " reduced sequence length to " << seq.size() << endl;
}
if (verbose > 4)
cerr << "Sequence: (" << to_string_join (runLengthEncodeSequence (seq)) << ")" << endl;
}
if (verbose > 3)
cerr << "Final sequence: (" << to_string_join (runLengthEncodeSequence (seq)) << ")" << endl;
}
SimulationCounts chopZoneSimulatedCounts (const IndelParams& params, double time, const SimulationConfig& config, mt19937& rnd) {
SimulationCounts simCounts (config.maxGapLen);
for (int trial = 0; trial < config.trials; ++trial) {
if (config.verbose > 1)
cerr << "Beginning simulation trial #" << (trial + 1) << endl;
vector<int> seq (config.initSeqLen);
iota (seq.begin(), seq.end(), 1);
simulateIndels (seq, params, time, rnd, config.verbose, 0);
int pos = 0;
while (pos < seq.size() && seq[pos] == 0)
++pos;
int endPos = ((int) seq.size()) - 1;
while (endPos >= 0 && seq[endPos] == 0)
--endPos;
while (pos < endPos) {
const int lastAncIndex = seq[pos];
int nInsertions = 0;
while (seq[++pos] == 0)
++nInsertions;
const int nDeletions = seq[pos] - lastAncIndex - 1;
if (nInsertions <= config.maxGapLen && nDeletions <= config.maxGapLen)
++simCounts.count[nInsertions][nDeletions];
else
++simCounts.overflowCount;
}
}
return simCounts;
}
vector<vector<double> > chopZoneSimulatedProbabilities (const IndelParams& params, double time, const SimulationConfig& config, mt19937& rnd, bool reportCountsInstead) {
const SimulationCounts simCounts = chopZoneSimulatedCounts (params, time, config, rnd);
if (config.verbose > 1) {
cerr << "Counts:" << endl;
for (const auto& v: simCounts.count) {
for (auto c: v)
cerr << c << ' ';
cerr << endl;
}
cerr << simCounts.overflowCount << endl;
}
vector<vector<double> > probs (config.maxGapLen + 1, vector<double> (config.maxGapLen + 1, 0.));
long long norm;
if (reportCountsInstead)
norm = 1;
else {
norm = simCounts.overflowCount;
for (const auto& v: simCounts.count)
for (auto c: v)
norm += c;
}
for (int i = 0; i <= config.maxGapLen; ++i)
for (int j = 0; j <= config.maxGapLen; ++j)
probs[i][j] = simCounts.count[i][j] / (double) norm;
if (reportCountsInstead)
probs.push_back (vector<double> (1, simCounts.overflowCount));
return probs;
}
}