-
Notifications
You must be signed in to change notification settings - Fork 0
/
sift.py
516 lines (451 loc) · 26.4 KB
/
sift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
# AUTHOR :Lv Wenchao
# coding : utf-8
# @Time : 2021/8/23 10:43
# @FileName : sift.py
# @Software : PyCharm
from numpy import all, any, array, arctan2, cos, sin, exp, dot, log, logical_and, roll, sqrt, stack, trace, \
unravel_index, pi, deg2rad, rad2deg, where, zeros, floor, full, nan, isnan, round, float32
from numpy.linalg import det, lstsq, norm
from cv2 import resize, GaussianBlur, subtract, KeyPoint, INTER_LINEAR, INTER_NEAREST
from functools import cmp_to_key
import logging
import cv2
####################
# Global variables #
####################
logger = logging.getLogger(__name__)
float_tolerance = 1e-7
#################
# Main function #
#################
def computeKeypointsAndDescriptors(image, sigma=1.6, num_intervals=3, assumed_blur=0.5, image_border_width=5,
eigenvalue_ratio=10):
"""Compute SIFT keypoints and descriptors for an input image
"""
image = image.astype('float32')
# 构建最开始的图像
base_image = _generateBaseImage(image, sigma, assumed_blur)
# 计算高斯金字塔层数
num_octaves = _computeNumberOfOctaves(base_image.shape)
# 生成高斯核
gaussian_kernels = _generateGaussianKernels(sigma, num_intervals)
gaussian_images = _generateGaussianImages(base_image, num_octaves, gaussian_kernels)
dog_images = _generateDoGImages(gaussian_images)
keypoints = _findScaleSpaceExtrema(gaussian_images, dog_images, num_intervals, sigma, image_border_width,
eigenvalue_ratio)
# 移除复杂点
keypoints = _removeDuplicateKeypoints(keypoints)
keypoints = _convertKeypointsToInputImageSize(keypoints)
# 描述符生成
descriptors = _generateDescriptors(keypoints, gaussian_images)
return keypoints, descriptors
#########################
# Image pyramid related #
#########################
# 构建不同尺寸的图像
def _generateBaseImage(image, sigma, assumed_blur):
"""Generate base image from input image by upsampling by 2 in both directions and blurring
"""
logger.debug('Generating base image...')
image = resize(image, (0, 0), fx=2, fy=2, interpolation=INTER_LINEAR)
sigma_diff = sqrt(max((sigma ** 2) - ((2 * assumed_blur) ** 2), 0.01))
return GaussianBlur(image, (0, 0), sigmaX=sigma_diff,
sigmaY=sigma_diff) # the image blur is now sigma instead of assumed_blur
def _computeNumberOfOctaves(image_shape):
"""Compute number of octaves in image pyramid as function of base image shape (OpenCV default)
"""
return int(round(log(min(image_shape)) / log(2) - 1))
def _generateGaussianKernels(sigma, num_intervals):
"""Generate list of gaussian kernels at which to blur the input image. Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper.
:param sigma:σ,初始sigma
:param num_intervals: s --- 尺度参数
"""
logger.debug('Generating scales...')
# 组内层数
num_images_per_octave = num_intervals + 3
# 尺度参数 k=2^(1/s)
k = 2 ** (1. / num_intervals)
gaussian_kernels = zeros(
num_images_per_octave) # scale of gaussian blur necessary to go from one blur scale to the next within an octave
gaussian_kernels[0] = sigma
for image_index in range(1, num_images_per_octave):
sigma_previous = (k ** (image_index - 1)) * sigma
sigma_total = k * sigma_previous
gaussian_kernels[image_index] = sqrt(sigma_total ** 2 - sigma_previous ** 2)
return gaussian_kernels
def _generateGaussianImages(image, num_octaves, gaussian_kernels):
"""Generate scale-space pyramid of Gaussian images
"""
logger.debug('Generating Gaussian images...')
gaussian_images = []
for octave_index in range(num_octaves):
# 获取一组内的所有层
gaussian_images_in_octave = [image]
for gaussian_kernel in gaussian_kernels[1:]:
image = GaussianBlur(image, (0, 0), sigmaX=gaussian_kernel, sigmaY=gaussian_kernel)
gaussian_images_in_octave.append(image)
# 高斯金字塔添加一层
gaussian_images.append(gaussian_images_in_octave)
octave_base = gaussian_images_in_octave[-3]
image = resize(octave_base, (int(octave_base.shape[1] / 2), int(octave_base.shape[0] / 2)),
interpolation=INTER_NEAREST)
return array(gaussian_images, dtype=object)
def _generateDoGImages(gaussian_images):
"""Generate Difference-of-Gaussians image pyramid
"""
logger.debug('Generating Difference-of-Gaussian images...')
dog_images = []
for gaussian_images_in_octave in gaussian_images:
dog_images_in_octave = []
for first_image, second_image in zip(gaussian_images_in_octave, gaussian_images_in_octave[1:]):
dog_images_in_octave.append(subtract(second_image,
first_image)) # ordinary subtraction will not work because the images are unsigned integers
dog_images.append(dog_images_in_octave)
return array(dog_images, dtype=object)
###############################
# Scale-space extrema related #
###############################
def _findScaleSpaceExtrema(gaussian_images, dog_images, num_intervals, sigma, image_border_width, eigenvalue_ratio,
contrast_threshold=0.04):
"""Find pixel positions of all scale-space extrema in the image pyramid
"""
logger.debug('Finding scale-space extrema...')
threshold = floor(0.5 * contrast_threshold / num_intervals * 255) # from OpenCV implementation
keypoints = []
for octave_index, dog_images_in_octave in enumerate(dog_images):
for image_index, (first_image, second_image, third_image) in enumerate(
zip(dog_images_in_octave, dog_images_in_octave[1:], dog_images_in_octave[2:])):
# (i, j) is the center of the 3x3 array
for i in range(image_border_width, first_image.shape[0] - image_border_width):
for j in range(image_border_width, first_image.shape[1] - image_border_width):
# 判断离散空间极值点
if _isPixelAnExtremum(first_image[i - 1:i + 2, j - 1:j + 2], second_image[i - 1:i + 2, j - 1:j + 2],
third_image[i - 1:i + 2, j - 1:j + 2], threshold):
# 关键点定位
localization_result = _localizeExtremumViaQuadraticFit(i, j, image_index + 1, octave_index,
num_intervals, dog_images_in_octave,
sigma, contrast_threshold,
image_border_width, eigenvalue_ratio)
# 说明该点为特征点
if localization_result is not None:
keypoint, localized_image_index = localization_result
# 获取关键点方向
keypoints_with_orientations = _computeKeypointsWithOrientations(keypoint, octave_index,
gaussian_images[
octave_index][
localized_image_index])
for keypoint_with_orientation in keypoints_with_orientations:
keypoints.append(keypoint_with_orientation)
return keypoints
def _isPixelAnExtremum(first_subimage, second_subimage, third_subimage, threshold):
"""Return True if the center element of the 3x3x3 input array is strictly greater than or less than all its neighbors, False otherwise
"""
center_pixel_value = second_subimage[1, 1]
if abs(center_pixel_value) > threshold:
if center_pixel_value > 0:
return all(center_pixel_value >= first_subimage) and \
all(center_pixel_value >= third_subimage) and \
all(center_pixel_value >= second_subimage[0, :]) and \
all(center_pixel_value >= second_subimage[2, :]) and \
center_pixel_value >= second_subimage[1, 0] and \
center_pixel_value >= second_subimage[1, 2]
elif center_pixel_value < 0:
return all(center_pixel_value <= first_subimage) and \
all(center_pixel_value <= third_subimage) and \
all(center_pixel_value <= second_subimage[0, :]) and \
all(center_pixel_value <= second_subimage[2, :]) and \
center_pixel_value <= second_subimage[1, 0] and \
center_pixel_value <= second_subimage[1, 2]
return False
# 特征点提取与去除偏差
def _localizeExtremumViaQuadraticFit(i, j, image_index, octave_index, num_intervals, dog_images_in_octave, sigma,
contrast_threshold, image_border_width, eigenvalue_ratio=10,
num_attempts_until_convergence=5):
"""Iteratively refine pixel positions of scale-space extrema via quadratic fit around each extremum's neighbors
"""
logger.debug('Localizing scale-space extrema...')
extremum_is_outside_image = False
image_shape = dog_images_in_octave[0].shape
# 五次迭代
gradient = array([])
hessian = array([])
pixel_cube = array([])
for attempt_index in range(num_attempts_until_convergence):
# need to convert from uint8 to float32 to compute derivatives and need to rescale pixel values to [0, 1] to apply Lowe's thresholds
first_image, second_image, third_image = dog_images_in_octave[image_index - 1:image_index + 2]
pixel_cube = stack([first_image[i - 1:i + 2, j - 1:j + 2],
second_image[i - 1:i + 2, j - 1:j + 2],
third_image[i - 1:i + 2, j - 1:j + 2]]).astype('float32') / 255.
# 计算差分和Hissan矩阵
gradient = _computeGradientAtCenterPixel(pixel_cube)
hessian = _computeHessianAtCenterPixel(pixel_cube)
# 计算偏移量
# 进行线性拟合
extremum_update = -lstsq(hessian, gradient, rcond=None)[0]
if abs(extremum_update[0]) < 0.5 and abs(extremum_update[1]) < 0.5 and abs(extremum_update[2]) < 0.5:
break
# 位置更新
j += int(round(extremum_update[0]))
i += int(round(extremum_update[1]))
image_index += int(round(extremum_update[2]))
# make sure the new pixel_cube will lie entirely within the image
if i < image_border_width or i >= image_shape[0] - image_border_width or j < image_border_width or j >= \
image_shape[1] - image_border_width or image_index < 1 or image_index > num_intervals:
extremum_is_outside_image = True
break
if extremum_is_outside_image:
logger.debug('Updated extremum moved outside of image before reaching convergence. Skipping...')
return None
if attempt_index >= num_attempts_until_convergence - 1:
logger.debug('Exceeded maximum number of attempts without reaching convergence for this extremum. Skipping...')
return None
functionValueAtUpdatedExtremum = pixel_cube[1, 1, 1] + 0.5 * dot(gradient, extremum_update)
# 排除低相应点,
if abs(functionValueAtUpdatedExtremum) * num_intervals >= contrast_threshold:
xy_hessian = hessian[:2, :2]
xy_hessian_trace = trace(xy_hessian)
xy_hessian_det = det(xy_hessian)
if xy_hessian_det > 0 and eigenvalue_ratio * (xy_hessian_trace ** 2) < (
(eigenvalue_ratio + 1) ** 2) * xy_hessian_det:
# Contrast check passed -- construct and return OpenCV KeyPoint object
keypoint = KeyPoint()
keypoint.pt = (
(j + extremum_update[0]) * (2 ** octave_index), (i + extremum_update[1]) * (2 ** octave_index))
# 金字塔等级怎么确定的
keypoint.octave = octave_index + image_index * (2 ** 8) + int(round((extremum_update[2] + 0.5) * 255)) * (
2 ** 16)
keypoint.size = sigma * (2 ** ((image_index + extremum_update[2]) / float32(num_intervals))) * (
2 ** (octave_index + 1)) # octave_index + 1 because the input image was doubled
keypoint.response = abs(functionValueAtUpdatedExtremum)
# 范围更新点位置
return keypoint, image_index
return None
def _computeGradientAtCenterPixel(pixel_array):
"""Approximate gradient at center pixel [1, 1, 1] of 3x3x3 array using central difference formula of order O(h^2), where h is the step size
"""
# With step size h, the central difference formula of order O(h^2) for f'(x) is (f(x + h) - f(x - h)) / (2 * h)
# Here h = 1, so the formula simplifies to f'(x) = (f(x + 1) - f(x - 1)) / 2
# NOTE: x corresponds to second array axis, y corresponds to first array axis, and s (scale) corresponds to third array axis
dx = 0.5 * (pixel_array[1, 1, 2] - pixel_array[1, 1, 0])
dy = 0.5 * (pixel_array[1, 2, 1] - pixel_array[1, 0, 1])
ds = 0.5 * (pixel_array[2, 1, 1] - pixel_array[0, 1, 1])
return array([dx, dy, ds])
def _computeHessianAtCenterPixel(pixel_array):
"""Approximate Hessian at center pixel [1, 1, 1] of 3x3x3 array using central difference formula of order O(h^2), where h is the step size
"""
# With step size h, the central difference formula of order O(h^2) for f''(x) is (f(x + h) - 2 * f(x) + f(x - h)) / (h ^ 2)
# Here h = 1, so the formula simplifies to f''(x) = f(x + 1) - 2 * f(x) + f(x - 1)
# With step size h, the central difference formula of order O(h^2) for (d^2) f(x, y) / (dx dy) = (f(x + h, y + h) - f(x + h, y - h) - f(x - h, y + h) + f(x - h, y - h)) / (4 * h ^ 2)
# Here h = 1, so the formula simplifies to (d^2) f(x, y) / (dx dy) = (f(x + 1, y + 1) - f(x + 1, y - 1) - f(x - 1, y + 1) + f(x - 1, y - 1)) / 4
# NOTE: x corresponds to second array axis, y corresponds to first array axis, and s (scale) corresponds to third array axis
center_pixel_value = pixel_array[1, 1, 1]
dxx = pixel_array[1, 1, 2] - 2 * center_pixel_value + pixel_array[1, 1, 0]
dyy = pixel_array[1, 2, 1] - 2 * center_pixel_value + pixel_array[1, 0, 1]
dss = pixel_array[2, 1, 1] - 2 * center_pixel_value + pixel_array[0, 1, 1]
dxy = 0.25 * (pixel_array[1, 2, 2] - pixel_array[1, 2, 0] - pixel_array[1, 0, 2] + pixel_array[1, 0, 0])
dxs = 0.25 * (pixel_array[2, 1, 2] - pixel_array[2, 1, 0] - pixel_array[0, 1, 2] + pixel_array[0, 1, 0])
dys = 0.25 * (pixel_array[2, 2, 1] - pixel_array[2, 0, 1] - pixel_array[0, 2, 1] + pixel_array[0, 0, 1])
return array([[dxx, dxy, dxs],
[dxy, dyy, dys],
[dxs, dys, dss]])
#########################
# Keypoint orientations #
#########################
def _computeKeypointsWithOrientations(keypoint, octave_index, gaussian_image, radius_factor=3, num_bins=36,
peak_ratio=0.8, scale_factor=1.5):
"""Compute orientations for each keypoint
"""
logger.debug('Computing keypoint orientations...')
keypoints_with_orientations = []
image_shape = gaussian_image.shape
scale = scale_factor * keypoint.size / float32(
2 ** (octave_index + 1)) # compare with keypoint.size computation in localizeExtremumViaQuadraticFit()
radius = int(round(radius_factor * scale))
weight_factor = -0.5 / (scale ** 2)
raw_histogram = zeros(num_bins)
smooth_histogram = zeros(num_bins)
for i in range(-radius, radius + 1):
region_y = int(round(keypoint.pt[1] / float32(2 ** octave_index))) + i
if 0 < region_y < image_shape[0] - 1:
for j in range(-radius, radius + 1):
region_x = int(round(keypoint.pt[0] / float32(2 ** octave_index))) + j
if 0 < region_x < image_shape[1] - 1:
dx = gaussian_image[region_y, region_x + 1] - gaussian_image[region_y, region_x - 1]
dy = gaussian_image[region_y - 1, region_x] - gaussian_image[region_y + 1, region_x]
gradient_magnitude = sqrt(dx * dx + dy * dy)
gradient_orientation = rad2deg(arctan2(dy, dx))
weight = exp(weight_factor * (
i ** 2 + j ** 2)) # constant in front of exponential can be dropped because we will find peaks later
histogram_index = int(round(gradient_orientation * num_bins / 360.))
raw_histogram[histogram_index % num_bins] += weight * gradient_magnitude
for n in range(num_bins):
smooth_histogram[n] = (6 * raw_histogram[n] + 4 * (raw_histogram[n - 1] + raw_histogram[(n + 1) % num_bins]) +
raw_histogram[n - 2] + raw_histogram[(n + 2) % num_bins]) / 16.
orientation_max = max(smooth_histogram)
orientation_peaks = \
where(logical_and(smooth_histogram > roll(smooth_histogram, 1), smooth_histogram > roll(smooth_histogram, -1)))[
0]
for peak_index in orientation_peaks:
peak_value = smooth_histogram[peak_index]
if peak_value >= peak_ratio * orientation_max:
# Quadratic peak interpolation
# The interpolation update is given by equation (6.30) in https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html
left_value = smooth_histogram[(peak_index - 1) % num_bins]
right_value = smooth_histogram[(peak_index + 1) % num_bins]
interpolated_peak_index = (peak_index + 0.5 * (left_value - right_value) / (
left_value - 2 * peak_value + right_value)) % num_bins
orientation = 360. - interpolated_peak_index * 360. / num_bins
if abs(orientation - 360.) < float_tolerance:
orientation = 0
new_keypoint = KeyPoint(*keypoint.pt, keypoint.size, orientation, keypoint.response, keypoint.octave)
keypoints_with_orientations.append(new_keypoint)
return keypoints_with_orientations
##############################
# Duplicate keypoint removal #
##############################
def _compareKeypoints(keypoint1, keypoint2):
"""Return True if keypoint1 is less than keypoint2
"""
if keypoint1.pt[0] != keypoint2.pt[0]:
return keypoint1.pt[0] - keypoint2.pt[0]
if keypoint1.pt[1] != keypoint2.pt[1]:
return keypoint1.pt[1] - keypoint2.pt[1]
if keypoint1.size != keypoint2.size:
return keypoint2.size - keypoint1.size
if keypoint1.angle != keypoint2.angle:
return keypoint1.angle - keypoint2.angle
if keypoint1.response != keypoint2.response:
return keypoint2.response - keypoint1.response
if keypoint1.octave != keypoint2.octave:
return keypoint2.octave - keypoint1.octave
return keypoint2.class_id - keypoint1.class_id
def _removeDuplicateKeypoints(keypoints):
"""Sort keypoints and remove duplicate keypoints
"""
if len(keypoints) < 2:
return keypoints
keypoints.sort(key=cmp_to_key(_compareKeypoints))
unique_keypoints = [keypoints[0]]
for next_keypoint in keypoints[1:]:
last_unique_keypoint = unique_keypoints[-1]
if last_unique_keypoint.pt[0] != next_keypoint.pt[0] or \
last_unique_keypoint.pt[1] != next_keypoint.pt[1] or \
last_unique_keypoint.size != next_keypoint.size or \
last_unique_keypoint.angle != next_keypoint.angle:
unique_keypoints.append(next_keypoint)
return unique_keypoints
#############################
# Keypoint scale conversion #
#############################
def _convertKeypointsToInputImageSize(keypoints):
"""Convert keypoint point, size, and octave to input image size
"""
converted_keypoints = []
for keypoint in keypoints:
keypoint.pt = tuple(0.5 * array(keypoint.pt))
keypoint.size *= 0.5
keypoint.octave = (keypoint.octave & ~255) | ((keypoint.octave - 1) & 255)
converted_keypoints.append(keypoint)
return converted_keypoints
#########################
# Descriptor generation #
#########################
def _unpackOctave(keypoint):
"""Compute octave, layer, and scale from a keypoint
"""
octave = keypoint.octave & 255
layer = (keypoint.octave >> 8) & 255
if octave >= 128:
octave = octave | -128
scale = 1 / float32(1 << octave) if octave >= 0 else float32(1 << -octave)
return octave, layer, scale
def _generateDescriptors(keypoints, gaussian_images, window_width=4, num_bins=8, scale_multiplier=3,
descriptor_max_value=0.2):
"""Generate descriptors for each keypoint
"""
logger.debug('Generating descriptors...')
descriptors = []
for keypoint in keypoints:
octave, layer, scale = _unpackOctave(keypoint)
gaussian_image = gaussian_images[octave + 1, layer]
num_rows, num_cols = gaussian_image.shape
point = round(scale * array(keypoint.pt)).astype('int')
bins_per_degree = num_bins / 360.
angle = 360. - keypoint.angle
cos_angle = cos(deg2rad(angle))
sin_angle = sin(deg2rad(angle))
weight_multiplier = -0.5 / ((0.5 * window_width) ** 2)
row_bin_list = []
col_bin_list = []
magnitude_list = []
orientation_bin_list = []
histogram_tensor = zeros((window_width + 2, window_width + 2,
num_bins)) # first two dimensions are increased by 2 to account for border effects
# Descriptor window size (described by half_width) follows OpenCV convention
hist_width = scale_multiplier * 0.5 * scale * keypoint.size
half_width = int(
round(hist_width * sqrt(2) * (window_width + 1) * 0.5)) # sqrt(2) corresponds to diagonal length of a pixel
half_width = int(min(half_width, sqrt(num_rows ** 2 + num_cols ** 2))) # ensure half_width lies within image
for row in range(-half_width, half_width + 1):
for col in range(-half_width, half_width + 1):
row_rot = col * sin_angle + row * cos_angle
col_rot = col * cos_angle - row * sin_angle
row_bin = (row_rot / hist_width) + 0.5 * window_width - 0.5
col_bin = (col_rot / hist_width) + 0.5 * window_width - 0.5
if -1 < row_bin < window_width and -1 < col_bin < window_width:
window_row = int(round(point[1] + row))
window_col = int(round(point[0] + col))
if 0 < window_row < num_rows - 1 and 0 < window_col < num_cols - 1:
dx = gaussian_image[window_row, window_col + 1] - gaussian_image[window_row, window_col - 1]
dy = gaussian_image[window_row - 1, window_col] - gaussian_image[window_row + 1, window_col]
gradient_magnitude = sqrt(dx * dx + dy * dy)
gradient_orientation = rad2deg(arctan2(dy, dx)) % 360
weight = exp(weight_multiplier * ((row_rot / hist_width) ** 2 + (col_rot / hist_width) ** 2))
row_bin_list.append(row_bin)
col_bin_list.append(col_bin)
magnitude_list.append(weight * gradient_magnitude)
orientation_bin_list.append((gradient_orientation - angle) * bins_per_degree)
for row_bin, col_bin, magnitude, orientation_bin in zip(row_bin_list, col_bin_list, magnitude_list,
orientation_bin_list):
# Smoothing via trilinear interpolation
# Notations follows https://en.wikipedia.org/wiki/Trilinear_interpolation
# Note that we are really doing the inverse of trilinear interpolation here (we take the center value of the cube and distribute it among its eight neighbors)
row_bin_floor, col_bin_floor, orientation_bin_floor = floor([row_bin, col_bin, orientation_bin]).astype(int)
row_fraction, col_fraction, orientation_fraction = row_bin - row_bin_floor, col_bin - col_bin_floor, orientation_bin - orientation_bin_floor
if orientation_bin_floor < 0:
orientation_bin_floor += num_bins
if orientation_bin_floor >= num_bins:
orientation_bin_floor -= num_bins
c1 = magnitude * row_fraction
c0 = magnitude * (1 - row_fraction)
c11 = c1 * col_fraction
c10 = c1 * (1 - col_fraction)
c01 = c0 * col_fraction
c00 = c0 * (1 - col_fraction)
c111 = c11 * orientation_fraction
c110 = c11 * (1 - orientation_fraction)
c101 = c10 * orientation_fraction
c100 = c10 * (1 - orientation_fraction)
c011 = c01 * orientation_fraction
c010 = c01 * (1 - orientation_fraction)
c001 = c00 * orientation_fraction
c000 = c00 * (1 - orientation_fraction)
histogram_tensor[row_bin_floor + 1, col_bin_floor + 1, orientation_bin_floor] += c000
histogram_tensor[row_bin_floor + 1, col_bin_floor + 1, (orientation_bin_floor + 1) % num_bins] += c001
histogram_tensor[row_bin_floor + 1, col_bin_floor + 2, orientation_bin_floor] += c010
histogram_tensor[row_bin_floor + 1, col_bin_floor + 2, (orientation_bin_floor + 1) % num_bins] += c011
histogram_tensor[row_bin_floor + 2, col_bin_floor + 1, orientation_bin_floor] += c100
histogram_tensor[row_bin_floor + 2, col_bin_floor + 1, (orientation_bin_floor + 1) % num_bins] += c101
histogram_tensor[row_bin_floor + 2, col_bin_floor + 2, orientation_bin_floor] += c110
histogram_tensor[row_bin_floor + 2, col_bin_floor + 2, (orientation_bin_floor + 1) % num_bins] += c111
descriptor_vector = histogram_tensor[1:-1, 1:-1, :].flatten() # Remove histogram borders
# Threshold and normalize descriptor_vector
threshold = norm(descriptor_vector) * descriptor_max_value
descriptor_vector[descriptor_vector > threshold] = threshold
descriptor_vector /= max(norm(descriptor_vector), float_tolerance)
# Multiply by 512, round, and saturate between 0 and 255 to convert from float32 to unsigned char (OpenCV convention)
descriptor_vector = round(512 * descriptor_vector)
descriptor_vector[descriptor_vector < 0] = 0
descriptor_vector[descriptor_vector > 255] = 255
descriptors.append(descriptor_vector)
return array(descriptors, dtype='float32')