-
Notifications
You must be signed in to change notification settings - Fork 0
/
Spectrogram().r
579 lines (470 loc) · 32.1 KB
/
Spectrogram().r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
# Spectrogram()
# Input: Any of the following three options:
# (A) a path to a soundfile saved somewhere on your computer
# (B) an R object containing the information contained in this soundfile (imported into R using the 'audio', 'phonTools', 'tuneR', or 'sound' package)
# (C) a numeric vector containing the information inside the soundfile
# Note: Option (A) requires at least one of the following packages to be installed: 'audio', 'phonTools', 'tuneR', or 'sound'.
# Output: if plot=FALSE, returns a matrix containing the result of the analysis, with attributes indicating all the non-NULL argument specifications that influenced the result of the analysis;
# if plot=TRUE (the default), nothing is returned, and instead a plot of the spectrogram is produced
# Usage - Option (A): Spectrogram("C:/Users/MyUsername/Desktop/soundfile.wav")
# Option (B): library("audio")
# x = load.wave("C:/Users/MyUsername/Desktop/soundfile.wav")
# Spectrogram(x)
# Option (C): x = c(3.094482e-02, 2.154541e-02, 3.213501e-02, ...) # Created using sine-waves, etc.
# Spectrogram(x)
# -------------------------------------------------------------------------------
Spectrogram = function( # Begin argument list
# [1]
Audio,
# The 'Audio' argument specifies the audio data from which the spectrogram is to be made.
# Three options are available for using this argument.
# Option (A):
# You can set 'Audio' to be a character string (i.e. a character vector of length 1) with the path to a soundfile saved somewhere on your computer
# Note that this requires at least one of the following packages to be installed: 'audio', 'phonTools', 'tuneR', or 'sound'.
# Option (B)
# You can also set 'Audio' to be one of the following four kinds of R objects (each of which contains an R-internal representation of the information inside a soundfile):
# - An object of class 'audioSample', created with the 'load.wave()' function from the package 'audio'
# - An object of class 'sound', created with the 'loadsound()' function from the package 'phonTools'
# - An object of class 'Wave', created with the 'readWave()' function from the package 'tuneR'
# - An object of class 'Sample', created with the 'loadSample()' function from the package 'sound'
# In table form, this can be represented as follows:
#_CLASS_______FUNCTION_____PACKAGE__
# audioSample load.wave() audio
# sound loadsound() phonTools
# Wave readWave() tuneR
# Sample loadSample() sound
# Option (C)
# You can also set 'Audio' to be a numeric vector (representing a sequence of samples taken from a sound wave) if and only if the SamplingFrequency argument is specified (i.e. not NULL).
# (Note that setting 'Audio' to be a 2-row or 2-column matrix (representing the two channels in a stereo soundfile) is currently unsupported.)
# [2]
SamplingFrequency=NULL,
# SamplingFrequency. The sampling frequency/rate of the sound in Hertz.
# Only necessary to specify if 'Audio' is of class 'numeric'; if it is of any other class, this is ignored (hence does not need to be specified).
# The default value for SamplingFrequency is set to 22050 in the phonTools spectrogram function, but it is safer to make the user always specify it if they want to use a numeric vector for the Audio object.
# [3]
WindowLength = 5,
# The desired length (in milliseconds) of the analysis window used to create the spectrogram
# The default is 5 milliseconds in spectrogram() from package 'phonTools'
# [4]
FrequencyResolution = 4,
# Set this to any positive integer, with higher numbers mean better resolution.
# Specifically, for any integer X provided, 1/X the analysis bandwidth (as determined by the number of samples in the analysis window) will be used.
# Note that this greatly impacts the processing time, so adjust with care!
# Either both of the following can be left as NULL, or one (not both!) can be specified:
# [5]
TimeStepSize = NULL, # Number of milliseconds that the window will be moved for each adjacent analysis
nTimeSteps = NULL, # The overall total number of time steps
# If both of the above are left at NULL, nTimeSteps=400 will be used.
# Note that also this greatly impacts the processing time, so adjust with care!
# [6]
Preemphasis = TRUE,
# Should pre-emphasis be applied (to all frequency regions, i.e. with a dummy frequency cutoff of 0)?
# In other words, should the spectral slope at all frequencies increase by 6 dB per octave using a single-pole filter?
# This affects both the underlying spectrogram matrix as well as the plot thereof.
# [7a]
DynamicRange = 70,
# The default for spectrogram() in package 'phonTools' was 50, whereas Praat's is 70.
# 70 seems to be preferable because you can see more things without distorting what's important.
# Values less than this many dB below the maximum are 'clipped' to that value.
# If this is set to NULL, no such clipping occurs.
## This affects both the underlying spectrogram matrix as well as the plot thereof.
# [7b]
Omit0Frequency = FALSE,
# The frequency band at 0 Hz is usually at very low values (e.g. -400 to -300 dB).
# Select TRUE to omit this frequency band from the resulting spectrogram (both the matrix and the plot thereof).
# [8a]
WindowType = "kaiser",
# A character string indicating the desired type of window function to be applied to the signal
# All of the following types are supported:
# * "rectangular" / "square"
# * "blackman"
# * "hann" / "hanning" (i.e. sine-squared)
# * "hamming" (i.e. raised sine-squared)
# * "cosine" / "sine"
# * "bartlett"
# * "gaussian"
# * "kaiser"
# (Note that all names are in lowercase.)
# [8b]
WindowParameter = NULL,
# This is only relevant if the WindowType is set to "gaussian" or "kaiser"; it will be ignored in all other cases
# For those two kinds of window function, this specifies the relevant parameter behind the gaussian/kaiser function.
# If 'WindowParameter' is set to NULL for these two window types, the following defaults will be used:
# - kaiser: 3 (not 2!)
# - gaussian: 0.4
# [9]
plot = TRUE,
# Whether the spectrogram should be plotted or not
# If FALSE, no spectrogram is plotted, and instead, a matrix is returned containing the magnitude at each bin center.
# The column names of this matrix correspond to time, and the row names correspond to frequency.
# Note that both are fully unrounded numbers stored as a character string (e.g. "115.532879818594").
# ^-^-^-^-^-^-^ Plotting parameters ^-^-^-^-^-^-^
#(All of the following will do nothing if 'plot' is set to FALSE.)
# [10]
PlotFast=TRUE,
# If set to FALSE, the filled.contour() function will be used. This produces much better looking graphics (which is best for putting into publications), but takes considerably longer to plot.
# If set to TRUE (the default), the image() function will be used instead, with 'useRaster' set to TRUE. This makes the plotting very fast, which is optimal for when one is dynamically exploring/interacting with the signal.
# (This may not work properly if raster graphics are not supported on your device. See help("image") for details.)
# [11]
add = FALSE,
# This determines whether an entirely new plot is drawn (with all the annotation) or whether just the core image is drawn
# Careful - this should only be set to TRUE if a spectrogram has already been drawn (and therefore a graphics device / window is already open.
# Note: If 'add' is set to TRUE, the coordinate system of the pre-existing plot will be used; hence, any specifications of xlim and ylim will be ignored for the subsequent call to Spectrogram(..., add=TRUE).
# [12]
col = NULL,
# At present, you can use this argument in four ways:
# - If you leave this at NULL, the color map will be DarkBlue-Blue-Cyan-Yellow-Orange-Red-Brown
# - If you set this to "alternate", the color map will be Black-Red-Orange-Yellow-White
# - You can also set this to "greyscale"/"grayscale" to have things mapped onto a continuum from black to white.
# - Finally, you can also provide a custom vector of colors to use.
# [13a]
xlim = NULL,
# If left NULL, this will be set to the full time range of the soundfile
# [13b]
ylim = NULL,
# If left NULL, this will go from 0 to the soundfile's Nyquist frequency.
# Note that Praat and phonTools default to 5000 Hz.
# (Avoiding a fixed arbitrary number makes the user think more carefully about what they are zooming into.)
# [14]
# Main title, x axis label, and y axis label
main = "",
xlab = "Time (ms)",
ylab = "Frequency (Hz)"
){ # Begin function definition
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Check to make sure Audio and SamplingFrequency are acceptable (and compatible with each other)
AudioClass = class(Audio)
AcceptableClasses = c("character","audioSample","sound","Wave","Sample","numeric")
AudioClassAcceptable = as.logical( sum( AudioClass == AcceptableClasses ) )
if(!AudioClassAcceptable){
stop("The 'Audio' argument must be one of the following classes:\n character, audioSample, sound, Wave, Sample, numeric")
} # End 'if class for 'Audio' argument is not one of the acceptable classes'
# If Audio is numeric, ensure that the SamplingFrequency is specified
if( (AudioClass=="numeric" & is.null(SamplingFrequency)) ){
stop("Must specify SamplingFrequency.")
} # End 'if/else using numeric vector but did not specify sampling rate'
if( AudioClass!="numeric" # If the Audio object is one of the four audio classes (sound/Wave/Sample/audioSample), ...
& !is.null(SamplingFrequency) ){ # and if they provide a superfluous SamplingFrequency, ...
# ...then issue a warning that it will be ignored:
warning("Specified SamplingFrequency ignored; the one associated with Audio object has been used instead.")
} # End check whether to issue a warning
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Samples and SamplingFrequency
# Now store the actual samples themselves in a variable 'Samples' (doing stereo-to-mono conversion as necessary)
# Also, store the sample rate in a variable 'SamplingFrequency' (over-writing the NULL input to the function as necessary)
# /------------------------------------\
# | Notes on stereo-to-mono conversion:|
# \------------------------------------/
# | spectrogram() in package 'phonTools' and specgram() in package 'signal' require the input to be mono (in one way or another)
# | spectro() in package 'seewave' allows stereo files as input but throws away the second channel.
# | However, here, information from both channels will be used, in order not to throw away information
# | There are two ways of doing so: either summing and re-normalizing the two channels, or averaging the two channels.
# | (See http://www.mathworks.com/matlabcentral/newsreader/view_thread/44379 for discussion.)
# | The latter will be done here, due to (1) its conceptual simplicity, (2) this is what is implemented 'tuneR:::mono', and (3) this appears to be the standard: http://dsp.stackexchange.com/questions/2484/converting-from-stereo-to-mono-by-averaging
# | This will be consistently implemented as '(object$right + object$left)/2', following the mono() function in 'tuneR'.
if( AudioClass=="character"){ # A character string containing a file path
if( length(Audio)>=2 ){ stop("'Audio' is of length >= 2.\n If specifying a file path, the character vector must be of length 1.") }
if( !file.exists(Audio) ){ stop("No file exists at the path specified in the 'Audio' argument.") }
InstalledPackages = rownames(installed.packages())
audioInstalled = ( "audio" %in% InstalledPackages )
phonToolsInstalled = ( "phonTools" %in% InstalledPackages )
tuneRInstalled = ( "tuneR" %in% InstalledPackages )
soundInstalled = ( "sound" %in% InstalledPackages )
if( !phonToolsInstalled & !tuneRInstalled & !soundInstalled & !audioInstalled ){
stop("To specify a character-string file path for the 'Audio' argument,\n at least one of the following packages must be installed:\n audio, phonTools, tuneR, or sound")
} # End 'if none of the four packages for handling audio are installed'
# Now only read in one required package, with the hierarchy audio > phonTools > tuneR > sound.
# Using that package, over-write the character string file path with the actual audio data.
# Finally, adjust the AudioClass variable as appropriate (so as to flow into the code below for determining the sampling frequency).
if(audioInstalled){
require("audio")
Audio = load.wave(Audio)
AudioClass = "audioSample"
}else{
if(audioInstalled){
require("phonTools")
Audio = loadsound(Audio)
AudioClass = "sound"
}else{
if(audioInstalled){
require("tuneR")
Audio = readWave(Audio)
AudioClass = "Wave"
}else{
if(audioInstalled){
require("sound")
Audio = loadSample(Audio)
AudioClass = "Sample"
} # End if package 'sound' is installed
} # End if/else package 'tuneR' is installed
} # End if/else package 'phonTools' is installed
} # End if/else package 'audio' is installed
} # End "if Audio is of class 'character'"
if( AudioClass=="audioSample"){ # From 'audio' package
IsStereo = is.matrix(Audio) # A stereo audioSample is stored as a matrix (and a mono one is not)
if(IsStereo){ Samples = ( Audio[1,] + Audio[2,] ) / 2
}else{ Samples = as.numeric( Audio )
} # End 'if/else is stereo'
SamplingFrequency = attributes(Audio)$rate
} # End "if Audio is of class 'audioSample'"
if( AudioClass=="sound"){ # From 'phonTools' package
# Since, at present, phonTools only supports mono soundfiles (i.e. a 'sound' object is by definition mono), so no checking of stereo/mono status or stereo-to-mono conversion is necessary
Samples = as.numeric( Audio$sound )
SamplingFrequency = Audio$fs
} # End "if Audio is of class 'sound'"
if( AudioClass=="Wave"){ # From 'tuneR' package
IsStereo = attributes(Wave)$stereo
if(IsStereo){ Samples = ( attributes(Audio)$left + attributes(Wave)$right ) / 2
}else{ Samples = attributes(Audio)$left # Recall that 'attributes(Wave)$right' is 'numeric(0)' if a mono file is being dealt with
} # End 'if/else is stereo'
SamplingFrequency = attributes(Audio)$samp.rate
} # End "if Audio is of class 'Wave'"
if( AudioClass=="Sample"){ # From 'sound' package
IsStereo = ( nrow(Audio)==2 ) # Recall the stereo-mono distinction is encoded via the differing number of rows in the Sample object
if(IsStereo){ Samples = ( Audio$sound[1,] + Audio$sound[2,] ) / 2
}else{ Samples = Audio$sound[1,]
} # End 'if/else is stereo'
SamplingFrequency = Audio$rate
} # End "if Audio is of class 'Sample'"
if( AudioClass=="numeric"){
Samples = Audio # As-is (since it must be numeric by this point in the code)
# SamplingFrequency has already been specified as an argument to the function (as enforced above)
} # End "if Audio is of class 'numeric'"
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Convert the window length that the user specified into the actual number of samples to be used.
nSamplesInWindow = ceiling( (SamplingFrequency/1000) * WindowLength ) # 1000 because 'WindowLength' is defined above as being in milliseconds
# 'nSamplesInWindow' needs to be odd. So if it is even (i.e. if the remainder of dividing it by 2 is 0), add +1 to make it odd
if(nSamplesInWindow%%2==0){ nSamplesInWindow = nSamplesInWindow + 1 }
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Run checks on the settings for the timestep (i.e. the number of samples advanced from one analysis to the next)
# Stop computation if the user has provided both 'TimeStepSize' *and* 'nTimeSteps':
if( !is.null(TimeStepSize) & !is.null(nTimeSteps) ){
stop("You can only specify either TimeStepSize *or* nTimeSteps, not both.")
} # End 'if the user simultaneously specified both TimeStepSize *and* nTimeSteps'
# If the 'TimeStepSize' and 'nTimeSteps' are both set to NULL, use an nTimeSteps of 400 (as per the default in spectrogram() from the 'phonTools' package)
if( is.null(TimeStepSize) & is.null(nTimeSteps) ){ nTimeSteps = 400 }
# If the user manually specified 'TimeStepSize' (i.e. a number of milliseconds)
if(!is.null(TimeStepSize)){ TimeStep = floor( TimeStepSize/1000 * SamplingFrequency ) }
# If the user manually specified 'nTimeSteps' (i.e. an integer value)
if(!is.null(nTimeSteps)){ TimeStep = floor( length(Samples)/nTimeSteps ) }
# NOTE: Keep in mind that the units of 'TimeStep' is *samples*, not milliseconds.
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# If the user selected to apply preemphasis...
if(Preemphasis==TRUE){
PreemphasizedSamples = as.numeric( stats:::filter(Samples, c(1, -1), method = "convolution", sides = 1) )
# filter() is a low-level function from the 'stats' package
# The package is specified in case it the function name masked by other identically-named functions in other packages (e.g. package 'signal')
# The filter() function leaves the first sample as NA, so just fill this in with the original value
PreemphasizedSamples[1] <- Samples[1]
# Now, over-write the original samples object with the pre-emphasized samples
# In the function that this code will ultimately reside in, there is no danger of re-running these lines of code multiple times and thereby cyclically distort the Samples data
Samples = PreemphasizedSamples
} # End 'if applying pre-emphasis'
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Create the necessary padding
HalfWindowSize = floor(nSamplesInWindow/2)
ZeroPadding = rep(0, HalfWindowSize)
ZeroPaddedSamples = c(ZeroPadding, Samples, ZeroPadding)
to = length(ZeroPaddedSamples) - HalfWindowSize*2 # 'Twice half' because it was rounded above
LeftEdgeLocations = seq(from=1, to=to, by=TimeStep)
AnalysisBandwidth = SamplingFrequency/nSamplesInWindow
FrequencyResolutionFactor = AnalysisBandwidth/FrequencyResolution # 'FrequencyResolutionModifier' is an argument to the function. Default=4.
nSamplesPadding = (SamplingFrequency - FrequencyResolutionFactor * nSamplesInWindow)/FrequencyResolutionFactor
# If 'nSamplesPadding' ends up being negative, clip it to 0 instead.
if(nSamplesPadding < 0){ nSamplesPadding = 0 }
nSamplesInPaddedWindow = nSamplesInWindow + nSamplesPadding
## Create an adjusted version of 'nSamplesPadding' that is constrained to be an odd number
#if(nSamplesPadding%%2==0){ # i.e. if the remainder is 0 (hence even)
#nSamplesPadding_Adjusted = nSamplesPadding + 1
#}else{ # i.e. if the remainder is 1 (hence already odd)
#nSamplesPadding_Adjusted = nSamplesPadding
#} # End 'if/else 'nSamplesPadding' is odd
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Create a windowing function to apply to the signal
Windowing = function(CurrentLeftEdge){ # CurrentLeftEdge = LeftEdgeLocations[4]
# Extract out the target samples for the current analysis
TargetSamples = ZeroPaddedSamples[CurrentLeftEdge:(CurrentLeftEdge + nSamplesInWindow - 1)]
# Append zeroes to the end based on the adjusted padding
#TrailingZeroesAppended = c(TargetSamples, rep(0, times=nSamplesPadding_Adjusted))
times = FrequencyResolution*length(TargetSamples) - length(TargetSamples)
TrailingZeroesAppended = c(TargetSamples, rep(0, times=times))
# First make sure the 'WindowType' argument is OK
AcceptableWindowTypes = c("rectangular", "square", "blackman", "hann", "hanning", "hamming", "cosine", "sine", "bartlett", "gaussian", "kaiser" )
WindowTypeAcceptable = as.logical( sum( WindowType == AcceptableWindowTypes ) )
if(!WindowTypeAcceptable){
stop("The 'WindowType' argument must be one of the following:\n rectangular / square, blackman, hann / hanning, hamming, cosine / sine, bartlett, gaussian, kaiser")
} # End 'if the WindowType argument is not one of the acceptable values'
IntegerSequence = 0:(nSamplesInWindow-1) # Technically not needed for 'rectangular', but include here nonetheless
if(WindowType == "rectangular"|
WindowType == "square" ){ WindowFunction = rep(1, nSamplesInWindow) }
if(WindowType == "blackman" ){ WindowFunction = (7938/18608) - (9240/18608) *
cos((2 * pi * IntegerSequence)/(nSamplesInWindow - 1)) +
(1430/18608) *
cos((4 * pi * IntegerSequence)/(nSamplesInWindow - 1)) }
if(WindowType == "hann" |
WindowType == "hanning" ){ WindowFunction = 0.5 * (1 - cos((2 * pi * IntegerSequence)/(nSamplesInWindow - 1))) }
if(WindowType == "hamming" ){ WindowFunction = 0.54 - 0.46 * cos((2 * pi * IntegerSequence)/(nSamplesInWindow - 1)) }
if(WindowType == "cosine" |
WindowType == "sine" ){ WindowFunction = sin((IntegerSequence * pi)/(nSamplesInWindow - 1)) }
if(WindowType == "bartlett" ){ WindowFunction = (2/(nSamplesInWindow - 1)) *
((nSamplesInWindow - 1)/2 - abs(IntegerSequence - (nSamplesInWindow - 1)/2)) }
if(WindowType == "gaussian" ){ if(is.null(WindowParameter)){ WindowParameter = 0.4 }
WindowFunction = exp(-0.5 * ((IntegerSequence - (nSamplesInWindow - 1)/2)/
(WindowParameter * (nSamplesInWindow - 1)/2))^2)
}
if(WindowType == "kaiser" ){ if(is.null(WindowParameter)){ WindowParameter = 3 } # Note that the default argument for spectrogram() in phonTools is 3, whereas the default value for windowfunc() in phonTools is 2.
WindowFunction = besselI(WindowParameter * pi * sqrt(1 - (2 * (IntegerSequence)/(nSamplesInWindow - 1) - 1)^2),0)/
besselI(WindowParameter * pi, 0)
}
if( WindowType!="gaussian" & WindowType!="kaiser" & !is.null(WindowParameter) ){warning("The specification of WindowParameter was ignored.\n(WindowParameter is only used if WindowType is 'gaussian' or 'kaiser'.)")}
# Now use this window function to do the core computations of the spectrogram
WindowedSamples = TrailingZeroesAppended * WindowFunction
MeanSubtracted = WindowedSamples - mean(WindowedSamples)
FFT = fft(MeanSubtracted)[1:(nSamplesInPaddedWindow/2 + 1)]
AbsoluteValueSquared = abs(FFT)^2
LogScaled = log(AbsoluteValueSquared, 10) * 10
return(LogScaled)
} # End function 'Windowing()'
# Now produce a matrix containing the magnitude at each bin center.
UnnormalizedSpectrogramMatrix = t( sapply(X=LeftEdgeLocations, FUN=Windowing) )
# Transpose in order to get time to extend across the rows and frequency to extend across the columns.
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Post-processing
# Normalize the spectrogram matrix by making the highest value to be 0.
SpectrogramMatrix = UnnormalizedSpectrogramMatrix - max(UnnormalizedSpectrogramMatrix)
# Clip very low values to be within the dynamic range
if(!is.null(DynamicRange)){
NegativeDecibelThreshold = -1 * DynamicRange # 'DynamicRange' is assumed to be positive. Perhaps eventually enforce this with 'stop()'.
SpectrogramMatrix[which(SpectrogramMatrix < NegativeDecibelThreshold)] = NegativeDecibelThreshold
} # End 'if DynamicRange is non-NULL'
# Determine the time and frequency values for every row and column in the matrix
TimeSequence = (LeftEdgeLocations + floor(nSamplesInWindow/2)) * (1000/SamplingFrequency)
FrequencySequence = (0:(nSamplesInPaddedWindow/2)) * (SamplingFrequency/nSamplesInPaddedWindow)
if( Omit0Frequency ){
SpectrogramMatrix = SpectrogramMatrix[,-1]
FrequencySequence = FrequencySequence[-1]
# Leave TimeSequence alone because that is is unrelated to the omission of the 0-frequency band.
} # End 'if omitting the 0-frequency band'
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# If requested, return the matrix object
if(plot==FALSE){
# Add the times as row names and frequencies as column names
rownames(SpectrogramMatrix) = as.numeric(TimeSequence)
colnames(SpectrogramMatrix) = as.numeric(FrequencySequence)
# For reconstructability purposes, include *all* the arguments/parameters used in the creation of the spectrogram as attributes to the matrix object
# Note that any NULL attributes will NOT be included! (because of how attributes work in R in general)
attributes(SpectrogramMatrix) = append( attributes(SpectrogramMatrix), list(SamplingFrequency=SamplingFrequency,
WindowLength=WindowLength,
FrequencyResolution=FrequencyResolution,
TimeStepSize=TimeStepSize,
nTimeSteps=nTimeSteps,
Preemphasis=Preemphasis,
DynamicRange=DynamicRange,
Omit0Frequency=Omit0Frequency,
WindowType=WindowType,
WindowParameter=WindowParameter))
return(SpectrogramMatrix)
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# If requested, plot the spectrogram
}else{ # i.f. is plot==TRUE
if(is.null(col)){ ColorGenerator = colorRampPalette(c("dark blue", "blue", "cyan", "yellow", "orange", "red", "brown")) }else{
if(identical(col,"alternate")){ ColorGenerator = colorRampPalette(c("black", "red", "orange", "yellow", "white")) }else{
if( identical(col,"greyscale") | identical(col,"grayscale") ){ ColorGenerator = colorRampPalette(c("white", "black")) }else{
ColorGenerator = colorRampPalette( col ) # By this point in the code, it is assumed that the user has supplied a vector of colors to create the spectrogram
}}} # End 'if/else' for greyscale, alternate, and NULL
AmplitudeRange = range(SpectrogramMatrix, finite = TRUE) # 'finite=TRUE' excludes values of positive/negative infinity
MinimumAmplitude = AmplitudeRange[1]
MaximumAmplitude = AmplitudeRange[2] # Should always be zero due to the normalization in the post-processing
# For the number of color levels, have one color per decibel in the amplitude range
MinimumAmplitude = AmplitudeRange[1]
MaximumAmplitude = AmplitudeRange[2] # Should always be zero due to the normalization in the post-processing
nColorLevels = abs(MinimumAmplitude) - abs(MaximumAmplitude) # In the original code from spectrogram() in package 'phonTools', the number from the line of code above is multiplied by 1.2
ColorLevels = seq(from=MinimumAmplitude, to=MaximumAmplitude, length.out=nColorLevels+1) # Add 1 so that a color is assigned to both endpoints of the range
ColorPalette = ColorGenerator(round(nColorLevels)) # Rounded in case the user provides a decimal number, or if DynamicRange is set to a non-whole number.
# If xlim is NULL, set it to the full time range of the soundfile
if( is.null(xlim) ){ xlim = range(TimeSequence) }
# if ylim is NULL, set it to cap out at the Nyquist frequency (i.e. the highest visible frequency given the sampling rate of the soundfile)
if( is.null(ylim) ){ ylim = range(FrequencySequence) }
# If the user provides a maximum ylim value that is beyond the Nyquist frquency (i.e. half of the sampling frequency), clip it to the Nyquist frequency
MaximumFrequency = ylim[which.max(ylim)]
if( MaximumFrequency > (SamplingFrequency/2) ){ ylim[which.max(ylim)] = SamplingFrequency/2 }
if( PlotFast == TRUE){ # i.e. if the user wants to use image(..., useRaster=TRUE)
image( x=TimeSequence,
y=FrequencySequence,
z=SpectrogramMatrix,
xlim=xlim,
ylim=ylim,
zlim=AmplitudeRange,
col=ColorPalette,
add=add,
xaxs="i",
yaxs="i",
xlab=xlab,
ylab=ylab,
# breaks, # Leave this unspecified since it defaults to equidistant breaks, which is exactly what I want
main=main,
oldstyle=FALSE, # FALSE = there are color intervals of equal lengths between the limits
# TRUE = the midpoints of the color intervals are equally spaced, with zlim[1] and zlim[2] as midpoints
useRaster=TRUE # As described up above, this will significantly improve processing speed (which is the whole point of this being 'PlotFast') but may not always work (depending on the device the user is sending the image to)
) # End call to 'image()'
}else{ # i.e. if the user wants to use filled.contour
# Set up the plot (only if 'add' is FALSE - the default)
if(add==FALSE){
plot.new()
plot.window(xlim=xlim, ylim=ylim, xaxs="i", yaxs="i")
} # End 'if add is FALSE'
# The following version of filled.contour(), with the period at the front, omits the color bar to the right of the normal filled.contour() function
.filled.contour(x=TimeSequence, y=FrequencySequence, z=SpectrogramMatrix, levels=ColorLevels, col=ColorPalette)
# Add all the other typical dressings to the plot (again, only if 'add' is FALSE - the default)
if(add==FALSE){
Axis(TimeSequence, side=1)
Axis(FrequencySequence, side=2)
title(main=main, xlab=xlab, ylab=ylab)
} # End 'if add is FALSE'
} # End 'if/else PlotFast is TRUE'
box() # Putting this here makes sure that there always is a box, no matter what (e.g. it doesn't disappear with repeated over-plottings)
} # End 'if/else plot==TRUE'
#########################################
# Quick synopsis of arguments: #
# Spectrogram( Audio, #
# SamplingFrequency=NULL, #
# WindowLength = 5, #
# FrequencyResolution = 4, #
# TimeStepSize = NULL, #
# nTimeSteps = NULL, #
# Preemphasis = TRUE, #
# DynamicRange = 70, #
# Omit0Frequency = FALSE, #
# WindowType = "kaiser", #
# WindowParameter = NULL, #
# plot = TRUE, #
# PlotFast = TRUE, #
# add = FALSE, #
# col = NULL, #
# xlim = NULL, #
# ylim = NULL, #
# main = "", #
# xlab = "Time (ms)", #
# ylab = "Frequency (Hz)") #
#########################################
} # End definition of function 'Spectrogram()'
########################################################################
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but without any warranty; without even the implied warranty of #
# merchantability or fitness for a particular purpose. See the #
# GNU General Public License for more details. #
# #
# To receive a copy of the GNU General Public License, see: #
# http://www.gnu.org/licenses/ #
########################################################################
# LICENSE NOTE:
# This script was modified from the spectrogram() function in the 'phonTools' package.
# http://cran.r-project.org/web/packages/phonTools/index.html
# The latter is released under the two-clause Berkeley Software Distribution (BSD) license.
# http://opensource.org/licenses/BSD-2-Clause
# (c) 2012-2015 Santiago Barreda