-
Notifications
You must be signed in to change notification settings - Fork 1
/
preprocessing_funcs.py
119 lines (102 loc) · 5.49 KB
/
preprocessing_funcs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
######## BIN_SPIKES ########
def bin_spikes(spike_times,dt,wdw_start,wdw_end):
"""
Function that puts spikes into bins
Parameters
----------
spike_times: an array of arrays
an array of neurons. within each neuron's array is an array containing all the spike times of that neuron
dt: number (any format)
size of time bins
wdw_start: number (any format)
the start time for putting spikes in bins
wdw_end: number (any format)
the end time for putting spikes in bins
Returns
-------
neural_data: a matrix of size "number of time bins" x "number of neurons"
the number of spikes in each time bin for each neuron
"""
edges=np.arange(wdw_start,wdw_end,dt) #Get edges of time bins
num_bins=edges.shape[0]-1 #Number of bins
num_neurons=spike_times.shape[0] #Number of neurons
neural_data=np.empty([num_bins,num_neurons]) #Initialize array for binned neural data
#Count number of spikes in each bin for each neuron, and put in array
for i in range(num_neurons):
neural_data[:,i]=np.histogram(spike_times[i],edges)[0]
return neural_data
######## BIN_OUTPUT #######
def bin_output(outputs,output_times,dt,wdw_start,wdw_end,downsample_factor=1):
"""
Function that puts outputs into bins
Parameters
----------
outputs: matrix of size "number of times the output was recorded" x "number of features in the output"
each entry in the matrix is the value of the output feature
output_times: a vector of size "number of times the output was recorded"
each entry has the time the output was recorded
dt: number (any format)
size of time bins
wdw_start: number (any format)
the start time for binning the outputs
wdw_end: number (any format)
the end time for binning the outputs
downsample_factor: integer, optional, default=1
how much to downsample the outputs prior to binning
larger values will increase speed, but decrease precision
Returns
-------
outputs_binned: matrix of size "number of time bins" x "number of features in the output"
the average value of each output feature in every time bin
"""
###Downsample output###
#We just take 1 out of every "downsample_factor" values#
if downsample_factor!=1: #Don't downsample if downsample_factor=1
downsample_idxs=np.arange(0,output_times.shape[0],downsample_factor) #Get the idxs of values we are going to include after downsampling
outputs=outputs[downsample_idxs,:] #Get the downsampled outputs
output_times=output_times[downsample_idxs] #Get the downsampled output times
###Put outputs into bins###
edges=np.arange(wdw_start,wdw_end,dt) #Get edges of time bins
num_bins=edges.shape[0]-1 #Number of bins
output_dim=outputs.shape[1] #Number of output features
outputs_binned=np.empty([num_bins,output_dim]) #Initialize matrix of binned outputs
#Loop through bins, and get the mean outputs in those bins
for i in range(num_bins): #Loop through bins
idxs=np.where((np.squeeze(output_times)>edges[i]) & (np.squeeze(output_times)<edges[i+1]))[0] #Indices to consider the output signal (when it's in the correct time range)
for j in range(output_dim): #Loop through output features
outputs_binned[i,j]=np.mean(outputs[idxs,j])
return outputs_binned
###$$ GET_SPIKES_WITH_HISTORY #####
def get_spikes_with_history(neural_data,bins_before,bins_after,bins_current=1):
"""
Function that creates the covariate matrix of neural activity
Parameters
----------
neural_data: a matrix of size "number of time bins" x "number of neurons"
the number of spikes in each time bin for each neuron
bins_before: integer
How many bins of neural data prior to the output are used for decoding
bins_after: integer
How many bins of neural data after the output are used for decoding
bins_current: 0 or 1, optional, default=1
Whether to use the concurrent time bin of neural data for decoding
Returns
-------
X: a matrix of size "number of total time bins" x "number of surrounding time bins used for prediction" x "number of neurons"
For every time bin, there are the firing rates of all neurons from the specified number of time bins before (and after)
"""
num_examples=neural_data.shape[0] #Number of total time bins we have neural data for
num_neurons=neural_data.shape[1] #Number of neurons
surrounding_bins=bins_before+bins_after+bins_current #Number of surrounding time bins used for prediction
X=np.empty([num_examples,surrounding_bins,num_neurons]) #Initialize covariate matrix with NaNs
X[:] = np.NaN
#Loop through each time bin, and collect the spikes occurring in surrounding time bins
#Note that the first "bins_before" and last "bins_after" rows of X will remain filled with NaNs, since they don't get filled in below.
#This is because, for example, we cannot collect 10 time bins of spikes before time bin 8
start_idx=0
for i in range(num_examples-bins_before-bins_after): #The first bins_before and last bins_after bins don't get filled in
end_idx=start_idx+surrounding_bins; #The bins of neural data we will be including are between start_idx and end_idx (which will have length "surrounding_bins")
X[i+bins_before,:,:]=neural_data[start_idx:end_idx,:] #Put neural data from surrounding bins in X, starting at row "bins_before"
start_idx=start_idx+1;
return X