-
Notifications
You must be signed in to change notification settings - Fork 0
/
pwaves.py
6014 lines (5530 loc) · 288 KB
/
pwaves.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 24 14:09:02 2019
@author: fearthekraken
"""
import sys
import re
import os.path
import numpy as np
import pandas as pd
import copy
from itertools import chain
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import scipy.io as so
import scipy.stats as stats
from scipy.signal import hilbert
from statsmodels.stats.multicomp import MultiComparison
from statsmodels.stats.anova import AnovaRM
import pdb
import pyphi
import sleepy
import AS
############## SUPPORTING FUNCTIONS ##############
def mx1d(rec_dict, mouse_avg):
"""
Create a 1-dimensional vector of values for each trial, recording, or mouse
@Params
rec_dict - dictionary with recordings as keys and 1D data arrays as values
mouse_avg - method for data averaging; by 'mouse', 'rec'[ording], or 'trial'
@Returns
data - 1D vector of data, with 1 value per trial, recording, or mouse
labels - list of mouse/recording names, or integers to number trials
"""
recordings = list(rec_dict.keys())
# average values for each recording
if mouse_avg == 'rec':
data = [np.nanmean(rec_dict[rec]) for rec in recordings]
labels = recordings
# collect all values
elif mouse_avg in ['trial', 'trials']:
data = []
for rec in recordings:
_ = [data.append(i) for i in rec_dict[rec]]
labels = np.arange(0, len(data))
# collect all values for each mouse and average within mice
elif mouse_avg == 'mouse':
mdict = {rec.split('_')[0]:[] for rec in recordings}
mice = list(mdict.keys())
for rec in recordings:
idf = rec.split('_')[0]
_ = [mdict[idf].append(i) for i in rec_dict[rec]]
data = [np.nanmean(mdict[idf]) for idf in mice]
labels = mice
return data, labels
def mx2d(rec_dict, mouse_avg='trials', d1_size=''):
"""
Create a 2-dimensional matrix (subjects x time) from 1-dimensional data (time only)
@Params
rec_dict - dictionary with recordings as keys and lists of 1D data arrays as values
mouse_avg - method for data averaging
'trials' - mx rows are individual trials from all recordings
'rec' - mx rows are trial averages for each recording
'mouse' - mx rows are trial averages for each mouse
d1_size - number of data points/time bins per trial
@Returns
mx - 2D data matrix (rows = trials/recordings/mice, columns = time bins)
labels - list of mouse/recording names, or integers to number trials
"""
recordings = list(rec_dict.keys())
# if d1_size not provided, find a sample trial to determine number of data points
if d1_size == '':
for rec in recordings:
if len(rec_dict[rec]) > 0:
d1_size = len(rec_dict[rec][0])
break
# create matrix of trials x time bins
if mouse_avg in ['trials', 'trials']:
ntrials = sum([len(rec_dict[rec]) for rec in recordings])
tmx = np.zeros((ntrials, d1_size))
row = 0
for rec in recordings:
for data in rec_dict[rec]:
tmx[row, :] = data
row += 1
mx=tmx; labels=np.arange(0,tmx.shape[0])
else:
# create matrix of recording averages x time bins
mx_dict = mx2d_dict(rec_dict, mouse_avg, d1_size)
if mouse_avg == 'rec':
rmx = np.zeros((len(recordings), d1_size))
for i,rec in enumerate(recordings):
rmx[i,:] = np.nanmean(mx_dict[rec], axis=0)
mx=rmx; labels=recordings
# create matrix of mouse averages x time bins
elif mouse_avg == 'mouse':
mouse_dict = {rec.split('_')[0]:[] for rec in recordings}
mice = list(mouse_dict.keys())
mmx = np.zeros((len(mice), d1_size))
for i,m in enumerate(mice):
mmx[i,:] = np.nanmean(mx_dict[m], axis=0)
mx=mmx; labels=mice
return mx, labels
def mx2d_dict(rec_dict, mouse_avg, d1_size=''):
"""
Create a 2-dimensional matrix (trials x time) from 1-dimensional data (time only)
for EACH recording or mouse, and return as dictionary
@Params
rec_dict - dictionary with recordings as keys and lists of 1D data arrays as values
mouse_avg - method for data collecting
'rec' - 1 dict key per recording, mx rows contain all
individual trials for that recording
'mouse' - 1 dict key per mouse, mx rows contain all
individual trials for that mouse (multiple recordings)
d1_size - number of data points/time bins per trial
@Returns
mx_dict - dictionary (key = recording/mouse, value = mx of trials x time bins)
"""
recordings = list(rec_dict.keys())
# if d1_size not provided, find a sample trial to determine number of data points
if d1_size == '':
for rec in recordings:
if len(rec_dict[rec]) > 0:
d1_size = len(rec_dict[rec][0])
break
if mouse_avg == 'rec':
mx_dict = {rec:0 for rec in recordings} # 1 key per recording
elif mouse_avg == 'mouse':
mx_dict = {rec.split('_')[0]:[] for rec in recordings} # 1 key per mouse
for rec in recordings:
idf = rec.split('_')[0]
trials = rec_dict[rec]
# if there are no trials for a recording, create empty mx of correct size
if len(trials) == 0:
rmx = np.empty((1, d1_size))
rmx[:] = np.nan
# if there are 1+ trials, create mx of trials x time bins
else:
rmx = np.zeros((len(trials), d1_size))
for i, data in enumerate(trials):
rmx[i, :] = data
if mouse_avg == 'rec':
mx_dict[rec] = rmx
# if averaging by mouse, collect mxs for all recordings
elif mouse_avg == 'mouse':
mx_dict[idf].append(rmx)
if mouse_avg == 'rec':
return mx_dict
elif mouse_avg == 'mouse':
# if averaging by mouse, vertically stack trials from all recordings
for m in mx_dict.keys():
mmx = np.concatenate((mx_dict[m]), axis=0)
mx_dict[m] = mmx
return mx_dict
def mx3d(rec_dict, mouse_avg, d_shape=''):
"""
Create a 3-dimensional matrix (freq x time x subject) from 2-dimensional data (freq x time)
@Params
rec_dict - dictionary with recordings as keys and lists of 2D data arrays as values
mouse_avg - method for data averaging
'trials' - array layers are individual trials from all recordings
'rec' - array layers are trial averages for each recording
'mouse' - array layers are trial averages for each mouse
d_shape - 2-element tuple containing the number of rows and columns for each trial mx
@Returns
mx - 3D data matrix (rows=frequencies, columns=time bins, layers=trials/recordings/mice)
labels - list of mouse/recording names, or integers to number trials
"""
recordings = list(rec_dict.keys())
# if d_shape not provided, find a sample trial to determine number of data points
if len(d_shape) == 0:
for rec in recordings:
if len(rec_dict[rec]) > 0:
d_shape = rec_dict[rec][0].shape
break
if len(d_shape) == 0:
raise ValueError('\n\n ### ERROR: inputted dictionary does not contain any data. ###')
# create empty 3D matrix of appropriate size
if mouse_avg == 'trials':
ntrials = sum([len(rec_dict[rec]) for rec in recordings])
mx = np.zeros((d_shape[0], d_shape[1], ntrials))
labels = np.arange(0, ntrials)
elif mouse_avg == 'rec':
mx = np.zeros((d_shape[0], d_shape[1], len(recordings)))
labels = recordings
elif mouse_avg == 'mouse':
mouse_dict = {rec.split('_')[0]:[] for rec in recordings}
mice = list(mouse_dict.keys())
mx = np.zeros((d_shape[0], d_shape[1], len(mice)))
labels = mice
layer = 0
for rec in recordings:
trials = rec_dict[rec]
# if averaging by trial, each mx layer = 1 trial
if mouse_avg == 'trials':
for data in trials:
mx[:,:,layer] = data
layer += 1
else:
# if there are no trials for a recording, create empty 2D mx of correct size
if len(trials) == 0:
rmx = np.empty((d_shape[0], d_shape[1]))
rmx[:] = np.nan
else:
rmx = np.array((trials)).mean(axis=0)
# if averaging by recording, each mx layer = 1 recording average
if mouse_avg == 'rec':
mx[:,:,layer] = rmx
layer += 1
elif mouse_avg == 'mouse':
idf = rec.split('_')[0]
mouse_dict[idf].append(rmx)
if mouse_avg == 'trials':
return mx, labels
elif mouse_avg == 'rec':
return mx, labels
# if averaging by mouse, each mx layer = 1 mouse average
elif mouse_avg == 'mouse':
for m in mice:
mmx = np.nanmean(mouse_dict[m], axis=0)
mx[:,:,layer] = mmx
layer += 1
return mx, labels
def build_featmx(MX, pre, post):
"""
For each time bin in input spectrogram, collect SP for a surrounding time window,
and reshape collected SP into 1D vectors stored in feature matrix rows
@Params
MX - EEG spectrogram from one recording (frequencies x time bins)
pre, post - time window (s) to collect EEG, relative to each bin of $MX
@Returns
A - feature matrix with consecutive rows as reshaped SP windows, centered on
consecutive time bins
"""
N = MX.shape[1] # no. of time bins
nfreq = MX.shape[0] # no. of frequencies
j = 0
# create feature mx (1 row per time bin, 1 column per freq per time bin in collection window)
A = np.zeros((N - pre - post, nfreq * (pre + post)))
for i in range(pre, N - post-1):
# collect SP window surrounding each consecutive time bin
C = MX[:, i - pre:i + post]
# reshape SP window to 1D vector, store in next row of matrix $A
A[j,:] = np.reshape(C.T, (nfreq * (pre + post),))
j += 1
return A
def cross_validation(S, r, theta, nfold=5):
"""
Perform $nfold cross-validation for linear regression with power constraint (ridge regression)
@Params
S - stimulus matrix of trials x variables
e.g. for spectrogram stimulus, trials = time bins and variables = frequencies
r - response vector of measured values for each trial
theta - list of floats, specifying candidate regularization parameters for linear
regression model. Optimal parameter value chosen to maximize avg model performance
nfold - no. of subsets to divide data
@Returns
Etest - model performance on test set (R2 values) for each power constraint param
Etrain - model performance on training set for each param
"""
# no. of trials and model variables
ntheta = len(theta)
ninp = r.shape[0]
nsub = round(ninp / nfold)
nparam = S.shape[1]
# normalize variable values to mean of zero
for i in range(nparam):
S[:,i] -= S[:,i].mean()
# get $nfold subsets of training & test data
test_idx = []
train_idx = []
for i in range(nfold):
idx = np.arange(i*nsub, min(((i+1)*nsub, ninp)), dtype='int')
idy = np.setdiff1d(np.arange(0, ninp), idx)
test_idx.append(idx)
train_idx.append(idy)
l = 0
# for each regularization param, collect avg model performance (R2 values)
Etest = np.zeros((ntheta,))
Etrain = np.zeros((ntheta,))
for th in theta:
ptest = np.zeros((nfold,))
ptrain = np.zeros((nfold,))
j = 0
# perform $nfold iterations of training/testing for regression model
for (p, q) in zip(test_idx, train_idx):
k = ridge_regression(S[q,:], r[q], th)
pred_test = np.dot(S[p,:], k) # predicted values
pred_train = np.dot(S[q,:], k)
rtest = r[p] # measured values
rtrain = r[q]
# model performance
ptest[j] = 1 - np.var(pred_test - rtest) / np.var(rtest)
ptrain[j] = 1 - np.var(pred_train - rtrain) / np.var(rtrain)
j += 1
Etest[l] = np.mean(ptest)
Etrain[l] = np.mean(ptrain)
l += 1
return Etest, Etrain
def ridge_regression(A, r, theta):
"""
Estimate linear filter to optimally predict neural activity from EEG spectrogram
@Params
A - feature matrix; norm. power value for SP frequencies * relative times * trials
r - response vector; measured neural signal for each trial
theta - regularization/power constraint parameter
@Returns
k - coefficient matrix optimally relating measured signal to EEG features (freqs * times)
"""
S = A.copy()
n = S.shape[1]
AC = np.dot(S.T, S) # input matrix SP
AC = AC + np.eye(n)*theta # power constraint
SR = np.dot(S.T, r) # output matrix SP*signal
# linear solution optimally approximating S * k = r
k = scipy.linalg.solve(AC, SR)
return k
def df_from_timecourse_dict(tdict_list, mice_list, dose_list, state_cols=[]):
"""
Create dataframe from equivalent-length lists of timecourse dictionaries
(key = brain state, value = 2D matrix of mice x time bins)
@Params
tdict_list - list of dictionaries, each with brain states as keys and timecourse
matrices as values; each drug dose has 1 dictionary
mice_list - list of mouse names, corresponding to the rows in each matrix
dose_list - list of drug doses, corresponding to each dictionary
state_cols - list of brain state labels (e.g. ['REM', 'Wake', 'NREM']), corresponding
to the keys in each dictionary (e.g. [1,2,3])
@Returns
df - dataframe with info columns for mouse, dose, and brain state, and
data columns for each time bin
e.g. 'Mouse' 'dose' 'state' 't0' 't1' 't2'
'Bob' 0.25 'REM' 0.2 0.4 0.6
"""
# create labels for each mx column/time bin (e.g. column 0 --> t0)
state_cols = list(tdict_list[0].keys())
times = ['t'+str(i) for i in range(tdict_list[0][state_cols[0]].shape[1])]
df = pd.DataFrame(columns=['Mouse', 'dose', 'state']+times)
for (tdict, mice, dose) in zip(tdict_list, mice_list, dose_list):
# create mini-dataframe for each brain state
for state in state_cols:
state_df = pd.DataFrame(columns=['Mouse', 'dose', 'state']+times)
# for each subject, add mouse name, drug dose, and brain state
state_df['Mouse'] = mice
state_df['dose'] = [dose]*len(mice)
state_df['state'] = [state]*len(mice)
# add data values for each subject in each time bin
for col,t in enumerate(times):
state_df[t] = tdict[state][:,col]
df = df.append(state_df)
return df
def df_from_rec_dict(rec_dict, val_label=''):
"""
Create dataframe from dictionary of recording data
@Params
rec_dict - dictionary with recording names as keys and 1D data arrays as values
val_label - optional label for data in $rec_dict
@Returns
df - dataframe with rows containing mouse name, recording name, and data value
"""
if not val_label:
val_label = 'value'
recordings = list(rec_dict.keys())
data = []
for rec in recordings:
idf = rec.split('_')[0]
for trial_val in rec_dict[rec]:
data.append((idf, rec, trial_val))
df = pd.DataFrame(columns=['mouse', 'rec', val_label], data=data)
return df
def get_iwins(win, sr, inclusive=True):
"""
Calculate start (iwin1) and stop (iwin2) indices of data collection, relative
to an event at idx=0
@Params
win - float or 2-element list, specifying the time window (s) to collect data
e.g. float=0.5 --> collect interval from -0.5 to +0.5 s surrounding event
e.g. float=[-1.0, 0.2] --> collect interval from -1.0 to +0.2 s surrounding event
sr - sampling rate (Hz)
inclusive - if True, include the last sample in the time window
@Returns
iwin1, iwin2 - number of data samples before and after the event to collect
"""
# if $win has 2 elements, convert both to number of samples (abs. value)
if (isinstance(win, list) or isinstance(win, tuple)):
if len(win) == 2:
iwin1 = int(round(np.abs(win[0])*sr))
iwin2 = int(round(win[1]*sr))
# include last idx in $win unless it's zero
if inclusive and win[1] != 0:
iwin2 += 1
# if $win is a NEGATIVE number, collect iwin1 samples BEFORE the event
elif win < 0:
iwin1 = int(round(np.abs(win)*sr))
iwin2 = 0
# if $win is a POSITIVE number, collect iwin2 samples AFTER the event
elif win > 0:
iwin1 = 0
iwin2 = int(round(win*sr)) + 1
return iwin1, iwin2
def jitter_laser(lsr, jitter, sr):
"""
Create a sham laser vector with randomly "jittered" stimulations (step pulses only!)
@Params
lsr - laser stimulation vector
jitter - randomly shift indices of true laser pulses by up to +/- $jitter seconds
sr - sampling rate (Hz)
@Returns
jlsr - sham laser vector with randomly jittered pulses
"""
np.random.seed(0)
# get the first idx of each true laser pulse, and the pulse duration
lsr_seq = sleepy.get_sequences(np.where(lsr==1)[0])
pulse_bins = len(lsr_seq[0])
laser_start = [seq[0] for seq in lsr_seq]
# get random indices in range -$jitter to +$jitter s, and apply to laser onset indices
jter = np.random.randint(-jitter*sr, jitter*sr, len(laser_start))
jidx = laser_start + jter
# create new sham laser vector from jittered laser indices
jlsr = np.zeros((len(lsr)))
for ji in jidx:
# make sure each pulse fits within the laser vector
if ji < 0:
ji += jitter*sr
elif ji > len(lsr) - pulse_bins:
ji = ji - jitter*sr - pulse_bins
# fill in jittered laser pulses
jlsr[ji : ji + pulse_bins] = 1
return jlsr
def spike_threshold(data, th, sign=1):
"""
Detect potential spikes as waveforms crossing the given threshold $th
@Params
data - spiking raw data
th - threshold to pass to qualify as a spike
sign - direction of detected spikes
1 - spikes are "valleys" below signal
-1 - spikes are "mountains" above signal
@Returns
sidx - indices of spike waveforms
"""
if sign == 1:
lidx = np.where(data[0:-2] > data[1:-1])[0] # < previous point
ridx = np.where(data[1:-1] <= data[2:])[0] # <= following point
thidx = np.where(data[1:-1] < (-1 * th))[0] # < th
sidx = np.intersect1d(lidx, np.intersect1d(ridx, thidx)) + 1
else:
lidx = np.where(data[0:-2] < data[1:-1])[0]
ridx = np.where(data[1:-1] >= data[2:])[0]
thidx = np.where(data[1:-1] > th)[0]
sidx = np.intersect1d(lidx, np.intersect1d(ridx, thidx)) + 1
return sidx
def detect_noise(LFP, sr, thres=800, win=5):
"""
Return indices of LFP signal within detected "noise" windows
@Params
LFP - LFP signal
sr - sampling rate (Hz)
thres - threshold for detecting LFP noise (uV, absolute value)
win - size of noise window (s) surrounding each threshold crossing
@Returns
noise_idx - indices corresponding to LFP noise
"""
# indices of all noise threshold crossings (+/-)
noise = np.where(np.abs(LFP) > thres)[0]
noise_idx = []
while len(noise) > 0:
# collect $win s window surrounding each instance of LFP noise
pre = 0 if (noise[0] - sr*win/2) < 0 else int(noise[0] - sr*win/2)
if (noise[0] + sr*win/2) >= len(LFP):
noise_win = np.arange(pre, len(LFP))
else:
noise_win = np.arange(pre, int(noise[0] + sr*win/2))
# update noise indices, continue to next detected noise outside window
noise_idx.extend(noise_win)
noise = [n for n in noise if n not in noise_win]
# return indices within noise windows
noise_idx = np.unique(noise_idx)
return noise_idx
############## DATA COLLECTION FUNCTIONS ##############
def get_SP_subset(rec_dict, win, sub_win, pnorm, d_shape=''):
"""
Isolate specified time window from each spectrogram in data dictionary
@Params
rec_dict - data dictionary (key=recording name, value=list of SPs;
freq x time bins)
win - time window (s) of SPs in $rec_dict
sub_win - time window (s) indicating subset of SP columns to isolate
pnorm - if > 0, normalize frequencies in original SP by their mean values
d_shape - 2-element tuple with no. of rows and columns for each SP
@Returns
rec_dict - data dictionary with the isolated SP windows specified by $sub_win
"""
recordings = list(rec_dict.keys())
# if d_shape not provided, find a sample SP to determine number of data points
if len(d_shape) == 0:
for rec in recordings:
if len(rec_dict[rec]) > 0:
d_shape = rec_dict[rec][0].shape
break
# get indices of SP columns/time points to isolate
t = np.linspace(win[0], win[1], d_shape[1])
sub_idx = np.intersect1d(np.where(t>=sub_win[0])[0], np.where(t<=sub_win[1])[0])
for rec in recordings:
# isolate and normalize subset from each SP
sub_sps = [AS.adjust_spectrogram(sp, pnorm=pnorm, psmooth=0)[:, sub_idx] for sp in rec_dict[rec]]
rec_dict[rec] = sub_sps
return rec_dict
def get_lsr_phase(ppath, recordings, istate, bp_filt, min_state_dur=5, ma_thr=20,
ma_state=3, flatten_tnrem=False, post_stim=0.1, psave=False):
"""
Get instantaneous phase for each P-wave and laser pulse during any brain state
@Params
ppath - base folder
recordings - list of recordings
istate - brain state to analyze
bp_filt - 2-element list specifying the lowest and highest frequencies to use
for bandpass filtering
min_state_dur - minimum brain state duration (min) to be included in analysis
ma_thr, ma_state - max duration and brain state for microarousals
flatten_tnrem - brain state for transition sleep
post_stim - P-waves within $post_stim s of laser onset are "laser-triggered"
(all others are "spontaneous")
Laser pulses followed by 1+ P-waves within $post_stim s are "successful"
(all others are "failed")
psave - optional string specifying a filename to save the data (if False, data is not saved)
"""
# clean data inputs
if not isinstance(recordings, list):
recordings = [recordings]
if isinstance(istate, list):
istate = istate[0]
lsr_pwaves = {rec:[] for rec in recordings} # phase of laser-triggered P-waves
spon_pwaves = {rec:[] for rec in recordings} # phase of spontaneous P-waves
success_lsr = {rec:[] for rec in recordings} # phase of successful laser pulses
fail_lsr = {rec:[] for rec in recordings} # phase of failed laser pulses
# collect phase vector for each state sequence
all_phases = {rec:[] for rec in recordings}
success_latencies = {rec:[] for rec in recordings}
# collect raw LFPs associated with instantaneous phases
lsr_p_lfp = {rec:[] for rec in recordings}
spon_p_lfp = {rec:[] for rec in recordings}
amp_lfp = {rec:[] for rec in recordings}
for rec in recordings:
print('Getting P-waves for ' + rec + ' ...')
# load sampling rate
sr = sleepy.get_snr(ppath, rec)
nbin = int(np.round(sr) * 2.5)
dt = (1.0 / sr) * nbin
# load and adjust brain state annotation
M, _ = sleepy.load_stateidx(ppath, rec)
M = AS.adjust_brainstate(M, dt, ma_thr=ma_thr, ma_state=ma_state,
flatten_tnrem=flatten_tnrem)
# load EEG, LFP, and P-wave indices
EEG = so.loadmat(os.path.join(ppath, rec, 'EEG.mat'), squeeze_me=True)['EEG']
LFP, p_idx = load_pwaves(ppath, rec)
# load laser and find laser-triggered P-waves
lsr = sleepy.load_laser(ppath, rec)
lsr_p_idx, spon_p_idx, lsr_yes_p, lsr_no_p = get_lsr_pwaves(p_idx, lsr, post_stim)
# create event vectors to match EEG indices
lvec = np.zeros((len(EEG)))
lvec[lsr_p_idx] = 1
svec = np.zeros((len(EEG)))
svec[spon_p_idx] = 1
yvec = np.zeros((len(EEG)))
yvec[lsr_yes_p] = 1
nvec = np.zeros((len(EEG)))
nvec[lsr_no_p] = 1
# get brain state sequences
sseq = sleepy.get_sequences(np.where(M==istate)[0])
for seq in sseq:
if len(seq) > min_state_dur/2.5:
# get EEG data for seq
sidx = int(round(seq[0]*nbin))+20
eidx = int(round(seq[-1]*nbin))
seq_eeg = EEG[sidx : eidx]
seq_lfp = LFP[sidx : eidx]
# bandpass filter EEG
a1 = float(bp_filt[0]) / (sr/2.0)
a2 = float(bp_filt[1]) / (sr/2.0)
seq_eeg = sleepy.my_bpfilter(seq_eeg, a1, a2)
res = hilbert(seq_eeg) # hilbert transform
iphase = np.angle(res) # get instantaneous phase
# get event indices in seq
li, si, yi, ni = [np.where(vec[sidx:eidx])[0] for vec in [lvec, svec, yvec, nvec]]
lsr_i = np.concatenate((yi, ni))
# get idx of first P-wave triggered by each successful lsr pulse
fli = [li[np.where(li>=y)[0][0]] for y in yi]
tri = [yi[np.where(yi<=l)[0][-1]] for l in li]
yi = [y for y,f in zip(yi,fli) if f-y > 12]
li = [l for l,t in zip(li,tri) if l-t > 12]
# get event phases
lphase, sphase, yphase, nphase, lsrphase = [iphase[i] for i in [li, si, yi, ni, lsr_i]]
_ = [lsr_pwaves[rec].append(lp) for lp in lphase]
_ = [spon_pwaves[rec].append(sp) for sp in sphase]
_ = [success_lsr[rec].append(yp) for yp in yphase]
_ = [fail_lsr[rec].append(np) for np in nphase]
# get tuples of (lsr pulse phase, LFP amplitude in post_stim seconds)
_ = [lsr_p_lfp[rec].append((iphase[i], seq_lfp[i-50 : i+50])) for i in li]
_ = [spon_p_lfp[rec].append((iphase[i], seq_lfp[i-50 : i+50])) for i in si]
_ = [amp_lfp[rec].append((iphase[l], seq_lfp[l : l + int(post_stim*sr)])) for l in lsr_i]
# save files
if psave:
filename = psave if isinstance(psave, str) else f'lsrSurround_LFP'
so.savemat(os.path.join(ppath, f'{filename}_lsr_phase_{istate}.mat'), lsr_pwaves)
so.savemat(os.path.join(ppath, f'{filename}_spon_phase_{istate}.mat'), spon_pwaves)
so.savemat(os.path.join(ppath, f'{filename}_success_phase_{istate}.mat'), success_lsr)
so.savemat(os.path.join(ppath, f'{filename}_fail_phase_{istate}.mat'), fail_lsr)
so.savemat(os.path.join(ppath, f'{filename}_all_phase_{istate}.mat'), all_phases)
so.savemat(os.path.join(ppath, f'{filename}_phase_lfp_{istate}.mat'), amp_lfp)
so.savemat(os.path.join(ppath, f'{filename}_lsr_p_lfp_{istate}.mat'), lsr_p_lfp)
so.savemat(os.path.join(ppath, f'{filename}_spon_p_lfp_{istate}.mat'), spon_p_lfp)
return lsr_pwaves, spon_pwaves, success_lsr, fail_lsr, all_phases, success_latencies, amp_lfp, lsr_p_lfp, spon_p_lfp
def get_surround(ppath, recordings, istate, win, signal_type, recalc_highres=False,
tstart=0, tend=-1, ma_thr=20, ma_state=3, flatten_tnrem=False,
nsr_seg=2, perc_overlap=0.95, null=False, null_win=[0.5,0.5],
p_iso=0, pcluster=0, clus_event='waves', psave=False):
"""
Collect raw signal surrounding events
@Params
ppath - base folder
recordings - list of recordings
istate - brain state(s) to analyze
win: time window (s) to collect data relative to the event
signal_type: specifies the type of data to collect
'EEG', 'EEG2' --> raw hippocampal or prefrontal EEG
'SP', 'SP2' --> hippocampal or prefrontal SP
'SP_NORM', 'SP2_NORM' --> norm. hippocampal or prefrontal SP
'SP_CALC', 'SP2_CALC' --> calculate each SP using surrounding EEG
'SP_CALC_NORM', 'SP2_CALC_NORM' --> normalize calculated SP by whole SP mean
'LFP' --> processed LFP signal
recalc_highres - if True, recalculate high-resolution spectrogram from EEG,
using $nsr_seg and $perc_overlap params
tstart, tend - time (s) into recording to start and stop collecting data
ma_thr, ma_state - max duration and brain state for microarousals
flatten_tnrem - brain state for transition sleep
nsr_seg, perc_overlap - set FFT bin size (s) and overlap (%) for spectrogram calculation
null - if True, also collect data surrounding randomized control points in $istate
null_win - if > 0, qualifying "null" points must be free of P-waves and laser pulses in surrounding $null_win interval (s)
if = 0, "null" points are randomly selected from all state indices
p_iso, pcluster, clus_event - see SOMETHING ELSE
psave - optional string specifying a filename to save the data (if False, data is not saved)
@Returns
p_signal - dictionary with brain states as keys, and sub-dictionaries as values
Sub-dictionaries have mouse recordings as keys, with lists of 2D or 3D signals as values
Signals (SPs, EEGs, or LFPs) represent the time window ($win s) surrounding each P-wave
null_signal - dictionary structured as above, but containing signals surrounding each
randomly selected control point
data.shape - tuple with shape of the data from one trial
"""
import time
START = time.perf_counter()
# clean data inputs
if not isinstance(recordings, list):
recordings = [recordings]
if not isinstance(istate, list):
istate = [istate]
if len(istate) == 0:
istate = ['total']
brstate = 'total'
p_signal = {s:{rec:[] for rec in recordings} for s in istate} # signal surrounding P-waves
null_signal = {s:{rec:[] for rec in recordings} for s in istate} # signal surrounding randomized time points
for rec in recordings:
print('Getting P-waves for ' + rec + ' ...')
p_signal = {s:[] for s in istate} # signal surrounding P-waves
null_signal = {s:[] for s in istate} # signal surrounding randomized time points
# load sampling rate
sr = sleepy.get_snr(ppath, rec)
nbin = int(np.round(sr) * 2.5)
dt = (1.0 / sr) * nbin
iwin1, iwin2 = get_iwins(win, sr)
# load EEG and EEG2
EEG = so.loadmat(os.path.join(ppath, rec, 'EEG.mat'), squeeze_me=True)['EEG']
if os.path.exists(os.path.join(ppath, rec, 'EEG2.mat')):
EEG2 = so.loadmat(os.path.join(ppath, rec, 'EEG2.mat'), squeeze_me=True)['EEG2']
# adjust Intan idx to properly translate to SP idx
spi_adjust = np.linspace(-sr, sr, len(EEG))
# load or calculate entire high-res spectrogram
# SP calculated using EEG2
if ('SP2' in signal_type) and (signal_type != 'SP2_CALC'):
SP, f, t, sp_dt, sp_nbin, _ = AS.highres_spectrogram(ppath, rec, nsr_seg=nsr_seg, perc_overlap=perc_overlap,
recalc_highres=recalc_highres, mode='EEG2')
#sp_nbin = len(EEG) / SP.shape[1]
sp_win1 = int(round(iwin1/sp_nbin))
sp_win2 = int(round(iwin2/sp_nbin))
# SP calculated using EEG
elif ('SP' in signal_type) and (signal_type != 'SP_CALC'):
SP, f, t, sp_dt, sp_nbin, _ = AS.highres_spectrogram(ppath, rec, nsr_seg=nsr_seg, perc_overlap=perc_overlap,
recalc_highres=recalc_highres, mode='EEG')
sp_win1 = int(round(iwin1/sp_nbin))
sp_win2 = int(round(iwin2/sp_nbin))
# calculate SP mean
if '_NORM' in signal_type:
SP_mean = SP.mean(axis=1)
SP_norm = np.divide(SP, np.repeat([SP_mean], SP.shape[1], axis=0).T) # normalize entire spectrogram
# load and adjust brain state annotation
M, _ = sleepy.load_stateidx(ppath, rec)
M = AS.adjust_brainstate(M, dt, ma_thr=ma_thr, ma_state=ma_state,
flatten_tnrem=flatten_tnrem)
# load LFP and P-wave indices
LFP, p_idx = load_pwaves(ppath, rec)
# isolate single or clustered P-waves
if p_iso and pcluster:
print('ERROR: cannot accept both p_iso and pcluster arguments')
return
elif p_iso:
p_idx = get_p_iso(p_idx, sr, win=p_iso)
elif pcluster:
p_idx = get_pclusters(p_idx, sr, win=pcluster, return_event=clus_event)
# define start and end points of analysis
istart = int(np.round(tstart*sr))
if tend == -1:
iend = len(EEG) - 1
else:
iend = int(np.round(tend*sr))
for pi in p_idx:
if pi >= iwin1 and pi + iwin2 < len(EEG) and istart <= pi <= iend:
if istate[0] != 'total':
brstate = int(M[int(pi/nbin)])
# get data of desired signal type
if signal_type == 'EEG':
data = EEG[pi-iwin1 : pi+iwin2]
elif signal_type == 'EEG2':
data = EEG2[pi-iwin1 : pi+iwin2]
# calculate SP from EEG or EEG2
elif 'CALC' in signal_type:
if 'SP2' in signal_type:
tmp = EEG2[pi-iwin1 : pi+iwin2]
elif 'SP' in signal_type:
tmp = EEG[pi-iwin1 : pi+iwin2]
f, t, data = scipy.signal.spectrogram(tmp, fs=sr, window='hanning',
nperseg=int(nsr_seg*sr), noverlap=int(nsr_seg*sr*perc_overlap))
# normalize calculated SP based on entire recording
if 'NORM' in signal_type:
data = np.divide(data, np.repeat([SP_mean], data.shape[1], axis=0).T)
# if not calculating, get SP or SP_NORM from whole recording calculation
elif 'SP' in signal_type:
spi = int(round((pi + spi_adjust[pi])/sp_nbin))
if 'NORM' in signal_type:
data = SP_norm[:, spi-sp_win1 : spi+sp_win2]
else:
data = SP[:, spi-sp_win1 : spi+sp_win2]
elif signal_type == 'LFP':
data = LFP[pi-iwin1 : pi+iwin2]
else:
print(signal_type + ' IS AN INVALID SIGNAL TYPE')
return
# collect data in relevant dictionary
if brstate in istate:
p_signal[brstate].append(data)
# collect signals surrounding random control time points
if null:
null_iwin1, null_iwin2 = get_iwins(null_win, sr)
# sample "null" REM epochs with no P-waves/laser pulses
if null_win != 0:
# find all points that don't qualify as "null"
not_null_idx = np.zeros((10000000))
for i, pi in enumerate(p_idx):
p_win = np.arange(pi-null_iwin1, pi+null_iwin2)
not_null_idx[i*len(p_win) : i*len(p_win)+len(p_win)] = p_win
# get rid of trailing zeros (computational efficiency)
not_null_idx = np.trim_zeros(not_null_idx, 'b')
for s in istate:
if istate[0] != 'total':
# get array of all possible indices in state s
sseq = sleepy.get_sequences(np.where(M==s)[0])
sseq_idx = [np.arange(seq[0]*nbin, seq[-1]*nbin+nbin) for seq in sseq]
sseq_idx = np.array((list(chain.from_iterable(sseq_idx))))
sseq_idx = [sidx for sidx in sseq_idx if sidx > iwin1 and sidx < len(EEG)-iwin2 and istart < sidx < iend]
else:
sseq_idx = np.arange(iwin1, len(EEG)-iwin2)
# keep only state indices that are not next to a P-wave/laser pulse
if null_win != 0:
sseq_idx = np.setdiff1d(sseq_idx, not_null_idx)
# randomly select from all state indices
else:
sseq_idx = np.array((sseq_idx))
np.random.seed(0)
# select number of random indices matching the number of P-waves
r_idx = np.random.randint(low=0, high=len(sseq_idx), size=len(p_signal[s]))
null_idx = sseq_idx[r_idx]
for ni in null_idx:
# get data of desired signal type
if signal_type == 'EEG':
data = EEG[ni-iwin1 : ni+iwin2]
elif signal_type == 'EEG2':
data = EEG2[ni-iwin1 : ni+iwin2]
# calculate SP from EEG or EEG2
elif 'CALC' in signal_type:
if 'SP2' in signal_type:
tmp = EEG2[ni-iwin1 : ni+iwin2]
elif 'SP' in signal_type:
tmp = EEG[ni-iwin1 : ni+iwin2]
f, t, data = scipy.signal.spectrogram(tmp, fs=sr, window='hanning',
nperseg=int(nsr_seg * sr), noverlap=int(nsr_seg * sr * perc_overlap))
# normalize calculated SP based on entire recording
if 'NORM' in signal_type:
data = np.divide(data, np.repeat([SP_mean], data.shape[1], axis=0).T)
# if not calculating, get SP or SP_NORM from whole recording calculation
elif 'SP' in signal_type:
spi = int(round((ni + spi_adjust[ni])/sp_nbin))
if 'NORM' in signal_type:
data = SP_norm[:, spi-sp_win1 : spi+sp_win2]
else:
data = SP[:, spi-sp_win1 : spi+sp_win2]
elif signal_type == 'LFP':
data = LFP[ni-iwin1 : ni+iwin2]
else:
print(signal_type + ' IS AN INVALID SIGNAL TYPE')
return
# collect data in null dictionary
null_signal[s].append(data)
# save tmp files to free up more room for computation
for s in istate:
so.savemat(f'TMP_{rec}_pwaves_{s}.mat', {'data':p_signal[s]})
so.savemat(f'TMP_{rec}_null_{s}.mat', {'data':null_signal[s]})
if psave:
print('\n Assembling data dictionaries and saving .mat files ...\n')
else:
print('\n Assembling data dictionaries ...\n')
# collect data from all recordings for each state from tmp files
p_signal = {s:{rec:0 for rec in recordings} for s in istate}
null_signal = {s:{rec:0 for rec in recordings} for s in istate}
# assemble final data dictionaries
for s in istate:
for rec in recordings:
p_signal[s][rec] = so.loadmat(f'TMP_{rec}_pwaves_{s}.mat')['data']
null_signal[s][rec] = so.loadmat(f'TMP_{rec}_null_{s}.mat')['data']
# remove temporary files
for rec in recordings:
os.remove(f'TMP_{rec}_pwaves_{s}.mat')
os.remove(f'TMP_{rec}_null_{s}.mat')
# save files
if psave:
for s in istate:
filename = psave if isinstance(psave, str) else f'Surround_{signal_type}'
so.savemat(os.path.join(ppath, f'{filename}_pwaves_{s}.mat'), p_signal[s])
so.savemat(os.path.join(ppath, f'{filename}_null_{s}.mat'), null_signal[s])
so.savemat(os.path.join(ppath, f'{filename}_data_shape.mat'), {'data_shape':data.shape})
END = time.perf_counter()
print(f'COMPUTING TIME --> {END-START:0.2f} seconds ({len(recordings)} recordings, {len(istate)} brainstates, signal type = {signal_type})')
return p_signal, null_signal, data.shape
def get_lsr_surround(ppath, recordings, istate, win, signal_type, recalc_highres=False,
tstart=0, tend=-1, ma_thr=20, ma_state=3, flatten_tnrem=False,
nsr_seg=2, perc_overlap=0.95, null=False, null_win=[0.5,0.5],
null_match='spon', post_stim=0.1, lsr_iso=0, lsr_win=[],
p_iso=0, pcluster=0, clus_event='waves', psave=False):
"""
Collect raw signal surrounding spontaneous and laser-triggered P-waves, and
successful and failed laser pulses
@Params
ppath - base folder
recordings - list of recordings
istate - brain state(s) to analyze
win - time window (s) to collect data relative to P-waves and randomized points
signal_type: specifies the type of data to collect
'EEG', 'EEG2' --> raw hippocampal or prefrontal EEG
'SP', 'SP2' --> hippocampal or prefrontal SP
'SP_NORM', 'SP2_NORM' --> norm. hippocampal or prefrontal SP
'SP_CALC', 'SP2_CALC' --> calculate each SP using surrounding EEG
'SP_CALC_NORM', 'SP2_CALC_NORM' --> normalize calculated SP by whole SP mean
'LFP' --> processed LFP signal
recalc_highres - if True, recalculate high-resolution spectrogram from EEG,
using $nsr_seg and $perc_overlap params
tstart, tend - time (s) into recording to start and stop collecting data
ma_thr, ma_state - max duration and brain state for microarousals
flatten_tnrem - brain state for transition sleep
nsr_seg, perc_overlap - set FFT bin size (s) and overlap (%) for spectrogram calculation
null - if True, also collect data surrounding randomized control points in $istate
null_win - if > 0, qualifying "null" points must be free of P-waves and laser pulses in surrounding $null_win interval (s)
if = 0, "null" points are randomly selected from all state indices
null_match - the no. of random control points is matched with the no. of some other event type
'spon' - # control points equals the # of spontaneous P-waves
'lsr' - # control points equals the # of laser-triggered P-waves
'success' - # control points equals the # of successful laser pulses
'fail' - # control points equals the # of failed laser pulses
'all lsr' - # control points equals the # of total laser pulses
post_stim - P-waves within $post_stim s of laser onset are "laser-triggered"
(all others are "spontaneous")
Laser pulses followed by 1+ P-waves within $post_stim s are "successful"
(all others are "failed")
lsr_iso - if > 0, do not analyze laser pulses that are preceded within $lsr_iso s by 1+ P-waves
lsr_win - time window (s) to collect data relative to successful and failed laser pulses
p_iso, pcluster, clus_event - see SOMETHING ELSE
psave - optional string specifying a filename to save the data (if False, data is not saved)
@Returns
lsr_pwaves - dictionary with brain states as keys, and sub-dictionaries as values
Sub-dictionaries have mouse recordings as keys, with lists of 2D or 3D signals as values
Signals (SPs, EEGs, or LFPs) represent the time window ($win s) surrounding each laser-triggered P-wave
spon_pwaves - signals surrounding each spontaneous P-wave
success_lsr - signals surrounding each successful laser pulse
fail_lsr - signals surrounding each failed laser pulse
null_pts - signals surrounding each randomly selected control point
data.shape - tuple with shape of the data from one trial
"""
import time
START = time.perf_counter()
# clean data inputs
if not isinstance(recordings, list):
recordings = [recordings]
if not isinstance(istate, list):
istate = [istate]
if len(istate) == 0:
istate = ['total']
brstate = 'total'
# no. of laser pulses not collected due to preceding P-waves
lsr_elim_counts = {'success':0, 'fail':0}
for rec in recordings:
print('Getting P-waves for ' + rec + ' ...')
lsr_pwaves = {s:[] for s in istate} # signal surrounding laser-triggered P-waves
spon_pwaves = {s:[] for s in istate} # signal surrounding spontaneous P-waves
success_lsr = {s:[] for s in istate} # signal surrounding successful laser pulses
fail_lsr = {s:[] for s in istate} # signal surrounding failed laser pulses