-
Notifications
You must be signed in to change notification settings - Fork 0
/
SSA_Analysis.m
203 lines (187 loc) · 7.43 KB
/
SSA_Analysis.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
function[s] = SSA_Analysis(ts, M, varargin)
%% Performs singular spectra analyses for a collection of time series.
% Computes RCs (reconstructed components), and performs a Monte Carlo
% significance test
%
% [s] = SSA_Analysis(ts, M)
% Conducts a singular spectrum analysis of a time series using the
% Broomhead-King algorithm. Applies a 1000 iteration Monte Carlo SSA (MC-SSA)
% procedure to test singular vector significance (p<0.05) against a red
% AR(1)-noise background. Calculates reconstructed components. Estimates
% the period of singular vectors. Returns a structure 's' with analysis
% output and metadata. Plots analysis data.
%
% [s] = SSA_Analysis(..., 'noplot')
% Suppresses the output plots.
%
% ANALYSIS OPTIONS:
%
% SSA_Analysis(..., 'algorithm', algorithm)
% Specifies whether to use the Broomhead-King or Vautard-Ghil algorithm.
%
% SSA_Analysis(..., 'MC', MC)
% Specifies the number of Monte Carlo iterations to use during significance
% testing. A greater number of iterations increases statistical robustness,
% but also raises runtime.
%
% SSA_Analysis(..., 'noiseType', noise)
% Specifies wheter to perform the significance test against a red AR(1), or
% white noise process. By default, SSA_Analysis uses a red significance
% test. Use a white significance test (Gaussian noise, variance=1, mean=0)
% for processes with minimal autocorrelation. Use a red test for processes
% with high autocorrelation or long memory. A red test is generally more
% stringent and applicable to many geophysical / geological processes.
%
% SSA_Analysis(..., 'p', p)
% Specify the significance level cutoff to use for significance testing. By
% default SSA_Analysis uses p<0.05 to assign significance.
%
% SSA_Analysis(..., 'noSigTest')
% Blocks the MC-SSA significance test.
%
% RUNTIME OPTIONS:
% Huge time series? MC-SSA taking forever? Try these options...
%
% SSA_Analysis(..., 'parallel')
% If possible, runs the MC-SSA significance tests in parallel. This can
% significantly speed runtime for calculations longer than several minutes.
% If the "Distributed Computing Toolbox" is not licensed, notifies the user
% and proceeds in serial.
%
% SSA_Analysis(..., 'estimateRuntime')
% Displays an estimate of total runtime for the MC-SSA.
%
% SSA_Analysis(..., 'showProgress')
% Displays a progress bar with percent completion and estimated remaining
% time for MC-SSA. Not available for parallel computations.
%
% SSA_Analysis(..., 'noConvergeTest')
% A flag to block the test for Monte Carlo convergence. This may slightly
% improve runtime for very large significance tests.
%
%
% ----- Inputs -----
%
% ts: A time series vector with equally spaced observations.
%
% M: The embedding dimension / sampling window length. M corresponds to the
% longest singular vector wavelength that can be extracted from the
% data. Larger values of M thus provide information on more
% wavelengths, while smaller values of M yield higher confidence in
% extracted information.
%
% algorithm: The algorithm used to compute the SSA
% 'BK': Broomhead-King - Slightly less bias for nonstationary time series
% 'VG': Vautard-Ghil - Enhanced noise reduction for short time series
%
% MC: The number of Monte Carlo iterations used in the Rule N significance
% test. Must be a positive integer.
%
% noise: A flag for the noise used in the Rule N significance test
% 'red' (Default): AR(1) noise with added white noise
% 'white': white Gaussian noise. (mean = 0, variance = 1)
%
% p: The significance level that the significance test should pass. Must be
% a positive number on the interval (0, 1).
%
%
% ----- Outputs -----
%
% s: a structure containing some or all of the following fields.
%
% tsm0: The analysis time series. This is the time series normalized to a
% mean of 0.
%
% singVals: The singular values of the time series.
%
% singVecs: The singular vectors of each time series. Each column is a
% singular vector.
%
% singFreq: An estimate of the frequency of each singular vector.
%
% singPeriod: An estimate of the period of each singular vector.
%
% T: The trajectory matrix constructed for the analyses. Each dim1 x
% dim2 matrix is the trajectory matrix for one time series.
%
% C: The covariance matrix for the trajectory matrix.
%
% RCs: The reconstructed components of each time series. Each dim1 x dim2
% matrix corresponds to a particular time series. Each column of each
% matrix contains 1 reconstructed component.
%
% surrVals: The surrogate eigenvalues from the Monte Carlo significance test.
%
% isSigVal: A boolean/logical vector indicating whether singular values
% passed the significance test.
%
% sigThreshold: The threshold singular value for significance. Singular
% values must be greater than or equal to this threshold to be significant.
%
% sig_p: The actual significance level of the MC-SSA. (Depending on
% the number of Monte Carlo iterations, the significance level of the
% Rule N test may be slightly higher than the user-specified value).
% *** NOTE: To ensure that p = sig_p, choose values of p and MC such
% that p*MC is an integer. Equivalently, MC must be a multiple of 1/p ***
%
% metadata: Information to support replication of the analysis.
%
%
% ----- Written By -----
%
% Jonathan King, 2018, University of Arizona ([email protected])
% Parse the inputs
[plotting, algorithm, sigTest, mcssaArgs, p, convergeTest] = setup(varargin{:});
% Declare the initial structure
s = struct();
% Run an SSA on the time series
[s.singVals, s.singVecs, s.tsm0, s.T, s.C] = simpleSSA(ts, M, algorithm);
s.singVals = s.singVals';
% Get the RCs
s.RCs = getRCs( s.singVecs, s.T, algorithm);
% If testing significance
if sigTest
% Generate the surrogate singular values
s.surrVals = MC_SSA(ts, M, algorithm, s.singVecs, mcssaArgs{:} );
% Get the significance threshold
[s.sigThreshold, s.lowerTail, s.sig_p] = mcthreshold( s.surrVals, p, '2tail');
% Get the indices of significant singular values
s.isSigVal = ( s.singVals >= s.sigThreshold )';
% Record Monte Carlo convergence data
if convergeTest
[s.MCsigThresh, s.MC_p, s.MCisSig] = mcssaConvergence( s.surrVals, p, s.singVals);
end
end
% Estimate the periods/frequencies of the singular vectors
s.singPeriod = singVecPeriod(s.singVecs);
s.singFreq = 1 ./ s.singPeriod;
% Assign the metadata
s.metadata = [{'M';'algorithm'}, {M;algorithm}];
if sigTest
s.metadata(3:5,1:2) = [{'MC';'noise';'p'}, {mcssaArgs{1};mcssaArgs{2};p}];
end
% Make plots if desired
if plotting
ssaconvergence(s);
ssasignificance(s);
end
end
%%%%% Helper Functions %%%%%
function[plotting, algorithm, sigTest, mcssaArgs, p, convergeTest] = setup( varargin)
% Parse the inputs
[plotting, algorithm, MC, noise, p, sigTest, parallel, estimateRuntime, showProgress, convergeTest] = parseInputs( varargin,...
{'noplot','algorithm','MC','noiseType','p','noSigTest','parallel','estimateRuntime','showProgress','noConvergeTest'},...
{true, 'BK', 1000, 'red', 0.05, true, false, false, false, true},...
{ 'b', {'BK','VG'}, {}, {'red','white'},{}, 'b', 'b', 'b', 'b', 'b'} );
% Get the arguments for MC_SSA
mcssaArgs = {MC, noise};
if parallel
mcssaArgs = [mcssaArgs, 'parallel'];
end
if estimateRuntime
mcssaArgs = [mcssaArgs, 'estimateRuntime'];
end
if showProgress
mcssaArgs = [mcssaArgs, 'showProgress'];
end
end