forked from natl/kepler-analyser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkepler_analyser.py
executable file
·318 lines (275 loc) · 11.4 KB
/
kepler_analyser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#!/bin/python
"""
kepler_analyser.py
Provides classes that collect information about a grid of models and use the
routines in model_analysis.py to analyse the light curves.
Author: Nathanael Lampe
Created: June 27, 2015
How to use me:
import kepler_analyser
grid = kepler_analyser.ModelGrid('example_data', 'xrba', 'MODELS.txt', 'output_data')
number_of_runs = len(grid)
grid.analyse_all()
The ModelGrid class expects a directory structure (see for example the 'example_data'
directory) and a file with a "key table" describing the parameters of each model run
("MODELS.txt" in the example). Different directory structures and key tables can be
accommodated by making a subclass: see for example ElmoGrid.
TODO:
- cut out accretion dips?
- Work out why lmfit doesn't work on certain configurations
"""
from __future__ import division, unicode_literals, print_function
import numpy as np
import bottleneck as bn
import sys
import os
import logging
try:
from utils import kepler # For reading binary light curve files
except:
pass
import model_analysis
# You can set any models to ignore or flag. There are three arrays for
# this and each requires a string with the model name
# notAnalysable : Model will be skipped for any reason
# twinPeaks : Model will be flagged as a twin peaked burst
notAnalysable = ['xrba324', 'xrba325','xrba326']
twinPeaks = ['xrba076','xrba281','xrba282','xrba362','xrba363','xrba364',
'xrba366','xrba387','xrba400','xrba401','xrba402','xrba403',
'xrba408'] + ['xrba%i' % ii for ii in xrange(410,422)]
class ModelGrid:
"""
A class to facilitate the automated analysis of the light curves from a series
of KEPLER runs.
This class is setup for the grid from A. Heger, and reads ascii versions of the
light curve files. For a new model grid, make a subclass and modify the necessary
parts. See ElmoAnalyser as an example.
All data will go in a specified output directory. It will have the following structure:
outputDir
|>modelId1
|>0.data # burst 1
|>1.data # burst 2
|>2.data # burst 3
|>mean.data # mean lightcurve
|>modelId2
|>0.data # burst 1
|>1.data # burst 2
|>2.data # burst 3
|>mean.data # mean lightcurve
...
|>db.csv # information for each separated burst
|>summ.csv # mean burst information
Example:
grid = ModelGrid('model_files', 'xrba', 'MODELS.txt', 'burst_analysis')
grid.analyse_all()
"""
def __init__(self, base_dir, base_name, parameter_filename, output_dir, notAnalysable=[], twinPeaks=[]):
"""
base_dir: all files are in (subdirectories of) this directory
base_name: all problem names are base_name followed by a number
parameter_filename: name of the file in base_dir that contains the
parameter values for each run
output_dir: location of output files
notAnalysable: manually categorize models
twinPeaks: manually categorize models
"""
self.base_dir = base_dir
self.base_name = base_name
self.parameter_filename = parameter_filename
self.output_dir = output_dir
self.notAnalysable = notAnalysable
self.twinPeaks = twinPeaks
self.dbVals = {}
self.summVals = {}
self.key_table = self.load_key_table()
def load_key_table(self):
"""
Return a dictionary with arrays that lists the problem names as well as some information
on them. Fields are:
name : model filename without .data extension
acc : accretion rate
z : metallicity
h : hydrogen fraction
pul : comment line describing the lightcurve (may be empty, must exist)
: The comments on this line will be printed in the output table
cyc : Number of numerical steps simulated
comm : area for comments
"""
path = os.path.join(self.base_dir, self.parameter_filename)
dt = [ (str('name'), 'S7'), (str('acc'), float), (str('z'), float),
(str('h'), float), (str('lAcc'), float), (str('pul'), 'S20'),
(str('cyc'), int), (str('comm'), 'S200') ]
data = np.genfromtxt(path, dtype = dt, delimiter=[7,15,8,8,10,20,10,200] )
table = {}
for name in data.dtype.names:
table[name] = data[name]
table['name'] = [name.strip() for name in data['name']]
#table['cyc'] = [cyc.strip() for cyc in data['cyc']]
table['pul'] = [pul.strip() for pul in data['pul']]
table['qb'] = np.ones(len(table['name']))*0.15
return table
def __len__(self):
"""
Return the number of runs listed in the key_table
"""
return len(self.key_table['name'])
def build_output_directories(self):
"""
Create the directories where the output will be placed in
"""
self.safe_mkdir(self.output_dir)
for name in self.key_table["name"]:
self.safe_mkdir(os.path.join(self.output_dir, name.strip()))
def safe_mkdir(self, path):
"""
Create a directory only if it does not exist yet
"""
if not os.path.exists(path):
os.mkdir(path)
elif not os.path.isdir(path):
logging.warn(
'Path already exists, but is not a directory: {}'.format(path))
def load_light_curve(self, modelID):
"""
Load the light curve from filename, and return time, luminosity and
radius
"""
filename = os.path.join(self.base_dir, '{}.data'.format(modelID))
return np.loadtxt(filename, skiprows=1, unpack=True)
def get_model_id(self, index):
"""
For given index, return the model id
"""
return self.key_table['name'][index]
def is_completed(self, index):
"""
Return whether the analysis of the run with given index is completed
"""
modelID = self.get_model_id(index)
if (modelID in self.dbVals) and (modelID in self.summVals):
return True
return False
def analyse_all(self, step=1, skip_completed=False):
"""
Iterate over all model runs and analyse the light curves
step: analyse every 'step' run. Useful for debugging.
skip_completed: whether to skip runs that have already been analysed.
Useful for resuming.
"""
self.build_output_directories()
for i in xrange(0, len(self), step):
if not (skip_completed and self.is_completed(i)):
self.analyse_one(i)
self.save_burst_data()
def analyse_one(self, index):
"""
Analyse a single light curve with given index. Results are returned as
well as stored in self.dbVals[modelID] and self.summVals[modelID].
"""
modelID = self.get_model_id(index)
try:
burstTime, burstLum, burstRad = self.load_light_curve(modelID)
except: # Problem reading light curve; file may not exist or be corrupt
logging.warn(
'Problem loading light curve {}. Skipping.'.format(modelID))
return None, None
# If the number of cycles was unknown, get it from the light curve
if self.key_table['cyc'][index]==0:
self.key_table['cyc'][index] = len(burstTime)
ma = model_analysis.ModelAnalysis(modelID, burstTime, burstLum,
burstRad)
if modelID not in self.notAnalysable:
# Locate bursts
ma.separate(os.path.join(self.output_dir, modelID))
x = ma.separated
flag = ma.get_flag(modelID in self.twinPeaks,
modelID in self.notAnalysable)
print('model: %s; flag: %i' % (modelID, flag))
# For 1.4Msun, 10km NS (rest frame) and solar composition
# TODO: make more generic?
lAccErgs = float(1.17e46*self.key_table['acc'][index])
alphas = ma.get_alpha(lAccErgs)
dbVals = ma.get_burst_values()
ma.get_mean_lcv(self.output_dir)
summVals = ma.get_mean_values(
{k: v[index] for k, v in self.key_table.items()})
self.dbVals[modelID] = dbVals
self.summVals[modelID] = summVals
return dbVals, summVals
def save_burst_data(self):
"""
Save the results from the analyses to CSV files
"""
# Collect data from all runs and bursts
dbVals = []
summVals = []
for i in xrange(0, len(self)):
modelID = self.get_model_id(i)
if self.is_completed(i):
db = self.dbVals[modelID]
summ = self.summVals[modelID]
if db!=None:
dbVals = dbVals + db
summVals.append(summ)
summHead = ['model', 'num', 'acc', 'z', 'h', 'lAcc', 'pul', 'cyc',
'burstLength', 'uBurstLength', 'peakLum', 'uPeakLum',
'persLum', 'uPersLum', 'fluence', 'uFluence', 'tau',
'uTau', 'tDel', 'uTDel', 'conv', 'uConv', 'r1090',
'uR1090', 'r2590', 'uR2590', 'singAlpha', 'uSingAlpha',
'singDecay', 'uSingDecay', 'alpha', 'uAlpha', 'flag', 'Qb']
dbHead = ['model', 'burst', 'bstart', 'btime', 'fluence', 'peakLum',
'persLum', 'tau', 'tdel', 'conv', 't10', 't25', 't90',
'fitAlpha', 'fitDecay', 'alpha']
ofile = open(os.path.join(self.output_dir, 'db.csv'), 'w')
ofile.write('# ' + ','.join(dbHead) + '\n')
for line in dbVals:
ofile.write(','.join(map(repr, line)) + '\n')
ofile.close()
ofile = open(os.path.join(self.output_dir, 'summ.csv'), 'w')
ofile.write('# ' + ','.join(summHead) + '\n')
for line in summVals:
ofile.write(','.join(map(repr, line)) + '\n')
ofile.close()
class ElmoGrid(ModelGrid):
"""
Setup for analysing the "elmo" grid from L. Keek. Directly reads KEPLER's
binary lc files.
Example:
grid = ElmoGrid('example_data', 'xrbh', 'params.txt', 'elmo_analysis')
grid.analyse_all()
"""
def load_key_table(self):
"""
Alternate version that loads from a simpler file format
"""
path = os.path.join(self.base_dir, self.parameter_filename)
X = 0.71 # Accretion composition is solar
Z = 0.02
data = np.loadtxt(path, unpack=True)
number = np.array(data[0], dtype=np.int)
mdot = data[1]
qb = data[2]
table = {}
table['name'] = ['{base}{number}'.format(base=self.base_name, number=i) for i in number]
table['lAcc'] = mdot
table['acc'] = mdot*1.75e-8
table['z'] = np.ones_like(mdot)*Z
table['h'] = np.ones_like(mdot)*X
# Why is this a string? Because sometimes it uses characters to
# describe the train
table['pul'] = np.zeros(len(mdot), dtype=np.str)
# This should be pulled from the lcv, not from any table..
table['cyc'] = np.zeros_like(mdot)
table['comm'] = np.zeros(len(mdot), dtype=np.str)
table['qb'] = qb
return table
def load_light_curve(self, modelID):
"""
Load the light curve from filename, and return time, luminosity and
radius
"""
filename = os.path.join(self.base_dir, modelID,
'{}.lc'.format(modelID))
lc = kepler.LcFile(filename)
return lc.time, lc.lum, lc.radius