-
Notifications
You must be signed in to change notification settings - Fork 1
/
comparison.m
212 lines (169 loc) · 6.63 KB
/
comparison.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
%% Comparison script for Streaming r-truncated SVD (Moses, PM, FD, & GROUSE)
%
% Description:
% This code is supplied as additional material alongside our paper:
% "MOSES: A Streaming Algorithm for Linear Dimensionality Reduction"
%
%
% Please ensure you have an up-to-date MATLAB version (> 2017a) as older
% versions have a problems handling character/string arrays in certain
% cases which are extensively used in this script.
%
% The script is segmented into four main categories:
%
% -- Synthetic data evaluation: bench PM, MOSES, FD, RFD, & GROUSE using
% synthetic datasets
% -- Real data evaluation: bench PM, MOSES, FD, RFD, & GROUSE using real
% datasets
% -- Speed tests: compare the execution speed of MOSES when compared
% with PM, FD, RFD, & GROUSE
% -- MOSES scaling tests: compare the performance of MOSES, in terms of
% error across different parameters of
% block size (b), rank (r), and ambient dim. (n)
%
% Author: Andreas Grammenos ([email protected])
%
% Last touched date: 30/12/2018
%
% License:
% code: GPLv3, author: A. Grammenos
% paper: A. Eftekhari, R. Hauser, and A. Grammenos retain their respective
% copyrights (pre-print link: https://arxiv.org/abs/1806.01304)
%
%
%% Initialisation
% clear/close everything
clc; clear; close all;
% enable for reproducibility, comment for (slightly) different
% (~random) results
rng(200);
% declare global variables
global pflag
global datasetPath
global use_fast_moses_only
global use_offline_svds
global use_fdr
global use_blk_err
global pdf_print
global fig_print
global run_synthetic
global run_real
global run_speed_test
global run_moses_scaling
global run_full_scaling
global run_exp1
global run_exp2
global run_exp3
% experiments to run
run_synthetic = 1; % run synthetic evaluation (set 0 to skip)
run_real = 0; % run real data evaluation (set 0 to skip)
run_speed_test = 0; % run the calc. speed tests (set 0 to skip)
run_moses_scaling = 0; % run the scaling moses tests (set 0 to skip)
% global flags setup
% printing flags
pflag = 1; % print resulting figures to ./graphs/
pdf_print = 0; % print resulting figures as .pdf
fig_print = 1; % print resulting figures as .fig
% execution configuration
use_fast_moses_only = 1;% speed up by using fast moses <-- USE IT :)
use_offline_svds = 0; % drastically speed up execution by disabling
% offline svds calculation WARNING THIS OPTION IS
% PAINFULLY SLOW. <- DEF. DISABLE IT :)
use_fdr = 0; % use robust fd -- same as fd but on the recon.
% we normalise using a*Id; using the shifted
% subspace by a*Id does not work well in our case.
use_blk_err = 0; % calc. errors per block not per column
% provides a DRASTIC improvement in speed but less
% granular error reporting. For GROUSE & FD is 100
% for PM and MOSES is equal to their respective
% block sizes for each run. <- Prob. use it
% moses scaling flags
run_full_scaling = 0; % no need to run unless performing full error check
run_exp1 = 1; % run experiment 1: fixed b, r, variable n
run_exp2 = 1; % run experiment 2: fixed r, n, variable b
run_exp3 = 1; % run experiment 3: fixed b, n, variable r
% setup the vars for the workspace
setup_vars();
%% Synthetic data
if run_synthetic == 1
fprintf("\n ** Running synthetic data evaluation ** \n");
% Define synthetic data problem parameters
n = 200; % Ambient dim
r = 10; % Algorithm aims to find rank-r truncation of input data
T = 10*n; % Scope of the algorithm (namely, max time)
nSim = 10; % number of simulations
% NOTE: change alpha to see differences.
alpha = [0.01, 0.1, 0.5, 1];
% run the evaluation loop
for i = 1:size(alpha, 2)
synthetic_dataset_eval(n, T, r, alpha(i), nSim);
end
fprintf("\n ** Finished synthetic data evaluation ** \n");
else
fprintf("\n ** Skipping synthetic data evaluation **\n");
end
%% Real data
% check if we run real data
if run_real ~= 1
fprintf("\n ** Skipping real data evaluation **\n");
else
fprintf("\n ** Running real data evaluation ** \n");
% setup path datasets
lightData = strcat(datasetPath, 'q8calibLight.dat');
tempData = strcat(datasetPath, 'q8calibHumTemp.dat');
voltData = strcat(datasetPath, 'q8calibVolt.dat');
humidData = strcat(datasetPath, 'q8calibHumid.dat');
% Light data
light_r = 20; % target rank for light data
real_dataset_eval(lightData, light_r, "Light");
% Temperature data
temp_r = 20; % target rank for temperature data
real_dataset_eval(tempData, temp_r, "Temperature");
% Volt Data
volt_r = 20; % target rank for voltage data
real_dataset_eval(voltData, volt_r, "Voltage");
% Temperature data
humid_r = 20; % target rank for humidity data
real_dataset_eval(humidData, humid_r, "Humidity");
fprintf("\n ** Finished real data evaluation ** \n");
end
%% Speed tests
if run_speed_test ~= 1
fprintf("\n ** Skipping algorithm speed evaluation **\n");
else
fprintf("\n ** Running algorithm speed evaluation **\n");
% power law distribution params
alpha = 1;
% no. of trials
trials = 5;
% different ambient dims
n_arr = 200:200:1000;
% After setting the parameters, run the speed tests
r = 1; % target rank
fprintf("\n !! Testing thin-r recovery n >>>> r, with r=%d !!\n", r);
speed_test(n_arr, r, alpha, trials)
r = 10; % target rank
fprintf("\n !! Testing avg-r recovery n >>> r, with r=%d !!\n", r);
speed_test(n_arr, r, alpha, trials)
r = 50; % target rank
fprintf("\n !! Testing fat-r recovery n > r, with r=%d !!\n", r);
speed_test(n_arr, r, alpha, trials)
r = 100; % target rank
fprintf("\n !! Testing super fat-r recovery n > r, with r=%d !!\n", r);
speed_test(n_arr, r, alpha, trials)
fprintf("\n ** Finished algorithm speed evaluation **\n");
end
%% MOSES Scaling tests
if run_moses_scaling ~= 1
fprintf("\n ** Skipping MOSES scaling evaluation **\n");
else
fprintf("\n ** Running MOSES scaling evaluation **\n");
% Scaling test parameters
n_arr = 200:200:1200; % ambient dimension array
r_arr = 5:5:25; % r-rank
m_blk_mul = 1:1:15; % block multiplier (we are bound by r)
% Execute the scaling test
moses_scaling(n_arr, r_arr, m_blk_mul);
fprintf("\n ** Finished MOSES scaling evaluation **\n");
end
%% Comparison script end.