-
Notifications
You must be signed in to change notification settings - Fork 9
/
align.py
284 lines (236 loc) · 7.52 KB
/
align.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
import numpy as np
import utils
def _inside(img, pt):
return 0 <= pt[0] < img.shape[0] and 0 <= pt[1] < img.shape[1]
def _transf(pt, A, s, b):
return s * np.dot(pt, A.T) + b
def _inv_transf(pt, A, s, b):
return np.dot(pt - b, A) / s
def _horn(L, R, weights=None, scale=True):
'''
Use Horn's closed form absolute orientation
method with orthogonal matrices to align a set
of points L to a set of points R. L and R are
sets in which each row is a point and the ith
point of L corresponds to the ith point of R.
Args:
L: set of points to align, a point per row.
R: set of points to align to, a point per row.
weights: an array of weights in [0, 1] for
each correspondence. If 'None',
correspondences are unweighted.
scale: whether should also solve the scale. If
'False', s = 1.
Returns:
A, b, s: a transformation such that
s * (L @ A) + b ~ R,
making the sum of squares of errors the
least possible. See the paper (Horn et al.,
1988) for details.
'''
L = np.asarray(L)
R = np.asarray(R)
if weights is None:
weights = 1
else:
weights = np.expand_dims(weights, axis=-1)
# compute points' centroids
L_centroid = np.mean(weights * L, axis=0)
R_centroid = np.mean(weights * R, axis=0)
# translate points to make centroids origin
L_ = L - L_centroid
R_ = R - R_centroid
# find scale 's'
if scale:
Sr = np.sum(np.reshape(weights, -1) * np.sum(R_ * R_, axis=1), axis=0)
Sl = np.sum(np.reshape(weights, -1) * np.sum(L_ * L_, axis=1), axis=0)
s = np.sqrt(Sr / Sl)
else:
s = 1
# find rotation 'A'
M = np.dot(R_.T, weights * L_)
MTM = np.dot(M.T, M)
w, u = np.linalg.eigh(MTM)
u = u.T
# solve even if M is not full rank
rank = M.shape[0] - np.sum(np.isclose(w, 0))
if rank < M.shape[0] - 1:
raise Exception(
'rank of M must be at least shape(M)[0] - 1 for unique solution')
elif rank == M.shape[0] - 1:
# get non zero eigenvalues and eigenvectors
mask = np.logical_not(np.isclose(w, 0))
u = u[mask]
w = w[mask]
# compute S pseudoinverse
w = np.expand_dims(w, 1)
lhs = np.expand_dims(u / np.sqrt(w), axis=-1)
rhs = np.expand_dims(u, axis=1)
S_pseudoinv = np.sum(np.matmul(lhs, rhs), axis=0)
# compute MTM svd
u, _, v = np.linalg.svd(MTM)
# compute tomasi's fix
lhs = np.expand_dims(u[-1], axis=-1)
rhs = np.expand_dims(v[-1], axis=0)
tfix = np.dot(lhs, rhs)
# find whether should sum or subtract
A_base = np.dot(M, S_pseudoinv)
A = A_base + tfix
if np.linalg.det(A) < 0:
A = A_base - tfix
else:
w = np.expand_dims(w, 1)
lhs = np.expand_dims(u / np.sqrt(w), axis=-1)
rhs = np.expand_dims(u, axis=1)
S_inv = np.sum(np.matmul(lhs, rhs), axis=0)
A = np.dot(M, S_inv)
# find translation 'b'
b = R_centroid - s * np.dot(A, L_centroid)
return A, b, s
def iterative(img1,
pts1,
img2,
pts2,
descs1=None,
descs2=None,
euclidean_lambda=500,
weighted=False,
max_iter=10):
'''
Iteratively align image 'img1' to 'img2', using
Horn's absolute orientation method to minimize
the mean squared error between keypoints'
correspondences in sets 'pts1' and 'pts2'.
Correspondences between keypoints are found
with the following metric:
d(u, v) = ||SIFT(u) - SIFT(v)||^2 +
+ (\lambda * ||u - v||^2) / MSE
where '\lambda' is a user specified weight and
'MSE' is the mean squared error from the
previous alignment. For the first iteration,
MSE = Inf.
Args:
img1: np array with image to align.
pts1: np array with img1 keypoints, one
keypoint coordinate per row.
img2: np array with image to align to.
pts2: same as pts1, but for img2.
descs1: precomputed descriptors for img1.
descs2: same as descs1, but for img2.
euclidean_lambda: \lambda in above equation.
weighted: whether should consider the
correspondence confidence - computed as
the reciprocal of its distance - in
Horn's method.
max_iter: Maximum number of iterations.
Returns:
A, b, s: the found alignment. For further
information, read _horn() documentation.
'''
# initialize before first alignment
mse = np.inf
euclidean_weight = -1
A = np.identity(2)
s = 1
b = np.array([0, 0])
# precompute sift descriptors, if not given
if descs1 is None:
descs1 = utils.sift_descriptors(img1, pts1, scale=8)
if descs2 is None:
descs2 = utils.sift_descriptors(img2, pts2, scale=8)
# iteratively align
for _ in range(max_iter):
# convergence criterion
if np.isclose(mse * euclidean_weight, euclidean_lambda):
break
# compute weight of correspondences' euclidean distance
euclidean_weight = euclidean_lambda / (mse + 1e-5)
# find correspondences
pairs = utils.find_correspondences(
descs1,
descs2,
pts1=pts1,
pts2=pts2,
euclidean_weight=euclidean_weight,
transf=lambda x: _transf(x, A, s, b),
thr=0.8)
# end alignment if no further correspondences are found
if len(pairs) <= 1:
break
# make correspondence aligned array
if weighted:
max_dist = np.max(np.asarray(pairs)[:, 2])
w = []
L = []
R = []
for pair in pairs:
L.append(pts1[pair[0]])
R.append(pts2[pair[1]])
w.append((max_dist - pair[2]) / max_dist)
else:
w = None
L = []
R = []
for pair in pairs:
L.append(pts1[pair[0]])
R.append(pts2[pair[1]])
# find alignment transformation
A, b, s = _horn(L, R, weights=w)
# compute alignment mse
L = np.array(L)
R = np.array(R)
error = R - (s * np.dot(L, A.T) + b)
dists = np.sum(error * error, axis=1)
mse = np.mean(dists)
# filter points and corresponding descriptors
# that are out of the images overlap
pts1_ = []
descs1_ = []
for i, pt in enumerate(pts1):
t_pt = _transf(pt, A, s, b)
if _inside(img2, t_pt):
pts1_.append(pt)
descs1_.append(descs1[i])
pts1 = pts1_
descs1 = np.array(descs1_)
# same for second set
pts2_ = []
descs2_ = []
for i, pt in enumerate(pts2):
t_pt = _inv_transf(pt, A, s, b)
if _inside(img1, t_pt):
pts2_.append(pt)
descs2_.append(descs2[i])
pts2 = pts2_
descs2 = np.array(descs2_)
return A, b, s
if __name__ == '__main__':
import sys
import cv2
if len(sys.argv) < 5:
raise Exception(
'Expected 4 arguments: <img1_path> <pts1_path> <img2_path> <pts2_path>, found {}'.
format(len(sys.argv) - 1))
img1_path, pts1_path, img2_path, pts2_path = sys.argv[1:]
# load images
img1 = cv2.imread(img1_path, 0)
img2 = cv2.imread(img2_path, 0)
# load detection points
pts1 = utils.load_dets_txt(pts1_path)
pts2 = utils.load_dets_txt(pts2_path)
A, b, s = iterative(img1, pts1, img2, pts2)
# generate aligned images
aligned = np.zeros_like(img1, dtype=img1.dtype)
for ref_row in range(img1.shape[0]):
for ref_col in range(img1.shape[1]):
t_row, t_col = np.dot(A.T, (np.array([ref_row, ref_col]) - b) / s)
if 0 <= t_row < img1.shape[0] - 1 and 0 <= t_col < img1.shape[1] - 1:
aligned[ref_row, ref_col] = utils.bilinear_interpolation(
t_row, t_col, img1)
# display current alignment
diff = np.stack([img2, img2, aligned], axis=-1)
cv2.imshow('prealignment', img1)
cv2.imshow('target', img2)
cv2.imshow('aligned', aligned)
cv2.imshow('diff', diff)
cv2.waitKey(0)