-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathCrossSim.py
145 lines (123 loc) · 4.85 KB
/
CrossSim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/env python
# encoding: utf-8
"""
CrossSim.py
Simulates crosses among haploid individuals. 8 parent crosses with various crossing schemes.
Created by Joshua Shapiro on 2008-10-06.
"""
from __future__ import division
import sys
import os
import optparse
from numpy import *
import Crosses
import Individual
def getOptions():
"""Get command line options"""
parser = optparse.OptionParser()
parser.add_option("-r", "--replicates",
action = "store", dest = "reps",
type = "int", default = 1,
help = "Number of simulations to run")
parser.add_option("-s","--scheme",
action = "store", dest = "scheme", default = 'collab',
help = "Crossing scheme. One of: 'collab' (collaborative cross), 'round' (round robin crosses), 'random' (random crossing)")
parser.add_option("-g", "--generations",
action = "store", dest = "generations",
type = "int", default = 0,
help = "Number of generations fo simulate. For Collaborative cross, this is the number of additional random mating generations")
parser.add_option("-n", "--nInd",
action = "store", dest = "nInd",
type = "int", default = None,
help = "Number of individuals to simulate for each generation.")
parser.add_option("-f", "--finalInd",
action = "store", dest = "fInd",
type = "int", default = 315,
help = "Final number of individuals to simulate. If it is greater than the number of individuals present in the last generation, an additional round of crosses may be added")
(options, args) = parser.parse_args()
return options
class SimStats(object):
"""structure for holding simulation statistics, filled dynamically"""
def __init__(self):
pass
def main():
options = getOptions()
parentIDs = [0,1,2,3,4,5,6,7]
parents = [Individual.Haploid(name = p, newChr = 1) for p in parentIDs ]
allStats = list()
for i in range(0, int(options.reps)):
stats = SimStats()
if options.scheme == 'collab':
population = Crosses.collabCross(parents)
if options.generations > 0:
for i in range(0, options.generations - 1):
population = Crosses.randomCross(population, nOffspring = options.nInd)
population = Crosses.randomCross(population, nOffspring = options.fInd)
elif options.scheme == 'random':
if options.generations == 0:
gens = 1
else:
gens = options.generations
population = Crosses.abaCross(parents)
for i in range(0, gens - 1):
population = Crosses.randomCross(population, nOffspring = options.nInd)
population = Crosses.randomCross(population, nOffspring = options.fInd)
elif options.scheme == 'bigRand':
if options.generations == 0:
gens = 1
else:
gens = options.generations
population = Crosses.randInfCross(parents, options.fInd, generations = gens)
#crosses done, data in population list
#calculate stats for each cross
#longest unrecombined segment
breaks = list()
for ind in population:
breaks += [s[0] for s in ind.chromosomes[0].segments]
breaks.sort()
breaks = [x for i,x in enumerate(breaks) if (i == 0 or breaks[i] != breaks[i-1])]
breaks.append(1.0)
lastbp = 0
maxSeg = 0
segs = list()
for bp in breaks:
segLen = float(bp - lastbp)
lastbp = bp
segs.append(segLen * 200)
stats.medSeg = median(segs)
stats.meanSeg = mean(segs)
stats.varSeg = var(segs)
stats.maxSeg = max(segs)
#proportion of genome covered for each parent
sites = range(0,201)
freqs = [[0 for _ in parents] for _ in sites]
for i in sites:
for ind in population:
#increment count at that parental location
freqs[i][ind.chromosomes[0].getParentAtLocation(float(i)/200)] += 1
missingSegment = 0
anyMissing = 0
lowSegment = 0
anyLow = 0
for site in freqs:
hasMissing = 0
hasLow = 0
for parent in site:
if parent == 0:
missingSegment += 1
hasMissing = 1
if parent < 20:
lowSegment += 1
hasLow = 1
anyMissing += hasMissing
anyLow += hasLow
stats.missing = missingSegment/len(freqs) * len(parents)
stats.low = lowSegment/len(freqs) * len (parents)
stats.anyMissing = anyMissing/len(freqs)
stats.anyLow = anyLow/len(freqs)
allStats.append(stats)
print "median\tmean\tvar\tmax\tFracMissing\tFracLowFreq\ttotFracMissing\ttotFracLow"
for stat in allStats:
print "%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f" % (stat.medSeg, stat.meanSeg, stat.varSeg, stat.maxSeg, stat.anyMissing, stat.anyLow, stat.missing, stat.low)
if __name__ == '__main__':
main()