-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathnesta.m
139 lines (110 loc) · 3.68 KB
/
nesta.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
% -*- texinfo -*-
% @deftypefn {} {[@var{mu}, @dots{}] =} nesta (@var{y}, @var{A}, @dots{})
% Recover a sparse signal @var{x} from incomplete data @var{y} and a
% measurement matrix (@var{A} or @var{A},@var{At}) using Nesterov's
% algorithm for quadratically constrained l1-norm minimization.
%
% See @ref{vrvm} for detailed usage information.
% @end deftypefn
%
function [x, obj, parms] = ...
nesta (b, A, At, nu0, lambda0, alpha0, beta0, iters)
% check for the minimum number of arguments.
if (nargin < 2)
error('at least two arguments required');
end
% check the required measurement vector.
if (isempty(b) || !isvector(b) || !iscolumn(b))
error('measurement must be a vector');
end
% check the required measurement operator.
if (!isempty(A) && (nargin < 3 || isempty(At)))
% matrix specification. check the data type.
if (!ismatrix(A))
error('invalid measurement: expected matrix');
end
% store copies of the measurement matrix and its gramian.
B = A;
G = A' * A;
% create function handles for forward, inverse, and projection.
A = @(x) B * x;
At = @(y) B' * y;
AtA = @(x) G * x;
elseif (!isempty(A) && nargin >= 3 && !isempty(At))
% function handle specification. check the data types.
if (!is_function_handle(A) || !is_function_handle(At))
error('invalid measurement: expected function handles');
end
% create a function handle for projection.
AtA = @(x) At(A(x));
end
% check for a prior nu parameter.
if (nargin < 4 || isempty(nu0))
% none specified. use a default value.
nu0 = 1e-3;
end
% check for a prior lambda parameter.
if (nargin < 5 || isempty(lambda0))
% none specified. use a default value.
lambda0 = 1e-3;
end
% check for an iteration count argument.
if (nargin < 8 || isempty(iters))
% none specified. use a default value.
iters = [10, 50];
elseif (isscalar(iters))
% adjust the iteration scheme to accomodate nesta.
iters = [round(iters / 50), 50];
end
% define the gradient of the l1 term.
df = @(x, nu) (x ./ nu) .* (abs(x) < nu) + sign(x) .* (abs(x) >= nu);
% initialize the transformed data vector.
h = At(b);
% get the problem sizes.
m = length(b);
n = length(h);
% initialize the iterates.
x = zeros(n, 1);
y = zeros(n, 1);
z = zeros(n, 1);
% set up the vector of thresholds, and compute the max threshold.
nuv = linspace(0.9, 0.01, iters(1));
nu0 = max(abs(h));
% compute the constant for the quadratic constraint.
vareps = 0.1 * sqrt(m / (nu0 / lambda0));
% initialize the objective vector.
obj = repmat(0, prod(iters), 1);
% iterate, outer loop.
for it = 1 : iters(1)
% set the current threshold value and lipschitz constant.
nu = nu0 * nuv(it);
L = 1 / nu;
% initialize the gradient accumulator.
xc = x;
adf = zeros(size(x));
% iterate, inner loop.
for jt = 1 : iters(2)
% compute the gradient.
dfdx = df(x, nu);
% compute the scale factors for the accumulator and iterates.
alpha = (jt + 1) / 2;
tau = 2 / (jt + 3);
% update the accumulator.
adf += alpha .* dfdx;
% update the y-iterate.
ly = max(0, (L/vareps) * norm(b - A(x - dfdx ./ L)) - L);
y = x + (ly / L) .* h - dfdx ./ L;
y -= (ly / (ly + L)) .* AtA(y);
% update the z-iterate.
lz = max(0, (L/vareps) * norm(b - A(xc - adf ./ L)) - L);
z = xc + (lz / L) .* h - adf ./ L;
z -= (lz / (lz + L)) .* AtA(z);
% update the x-iterate.
x = tau .* z + (1 - tau) .* y;
% store the objective.
obj((it-1) * iters(2) + jt) = sum(abs(x));
end
end
% return an empty set of variational parameters.
parms = [];
end