From 86184157b9e0719dba019c78769b89788e2ce432 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Sat, 9 Feb 2019 01:23:17 +0900 Subject: [PATCH 01/24] rename psroi_pooling_2d -> ps_roi_average_pooling_2d --- .../links/model/fcis/fcis_resnet101.py | 6 +++--- chainercv/functions/__init__.py | 3 +-- ...pooling_2d.py => ps_roi_average_pooling_2d.py} | 14 +++++++------- ...ng_2d.py => test_ps_roi_average_pooling_2d.py} | 15 ++++++++------- 4 files changed, 19 insertions(+), 19 deletions(-) rename chainercv/functions/{psroi_pooling_2d.py => ps_roi_average_pooling_2d.py} (97%) rename tests/functions_tests/{test_psroi_pooling_2d.py => test_ps_roi_average_pooling_2d.py} (89%) diff --git a/chainercv/experimental/links/model/fcis/fcis_resnet101.py b/chainercv/experimental/links/model/fcis/fcis_resnet101.py index c706fe68c6..70e5419d13 100644 --- a/chainercv/experimental/links/model/fcis/fcis_resnet101.py +++ b/chainercv/experimental/links/model/fcis/fcis_resnet101.py @@ -6,7 +6,7 @@ import numpy as np from chainercv.experimental.links.model.fcis import FCIS -from chainercv.functions import psroi_pooling_2d +from chainercv.functions import ps_roi_average_pooling_2d from chainercv.links import Conv2DBNActiv from chainercv.links.model.faster_rcnn.region_proposal_network import \ RegionProposalNetwork @@ -365,7 +365,7 @@ def _pool( self, h_cls_seg, h_ag_loc, rois, roi_indices, gt_roi_labels): # PSROI Pooling # shape: (n_roi, n_class, 2, roi_size, roi_size) - roi_cls_ag_seg_scores = psroi_pooling_2d( + roi_cls_ag_seg_scores = ps_roi_average_pooling_2d( h_cls_seg, rois, roi_indices, self.n_class * 2, self.roi_size, self.roi_size, self.spatial_scale, self.group_size) @@ -374,7 +374,7 @@ def _pool( (-1, self.n_class, 2, self.roi_size, self.roi_size)) # shape: (n_roi, 2*4, roi_size, roi_size) - roi_ag_loc_scores = psroi_pooling_2d( + roi_ag_loc_scores = ps_roi_average_pooling_2d( h_ag_loc, rois, roi_indices, 2 * 4, self.roi_size, self.roi_size, self.spatial_scale, self.group_size) diff --git a/chainercv/functions/__init__.py b/chainercv/functions/__init__.py index 5e5e7dde05..71fa9b6802 100644 --- a/chainercv/functions/__init__.py +++ b/chainercv/functions/__init__.py @@ -1,2 +1 @@ -from chainercv.functions.psroi_pooling_2d import psroi_pooling_2d # NOQA -from chainercv.functions.psroi_pooling_2d import PSROIPooling2D # NOQA +from chainercv.functions.ps_roi_average_pooling_2d import ps_roi_average_pooling_2d # NOQA diff --git a/chainercv/functions/psroi_pooling_2d.py b/chainercv/functions/ps_roi_average_pooling_2d.py similarity index 97% rename from chainercv/functions/psroi_pooling_2d.py rename to chainercv/functions/ps_roi_average_pooling_2d.py index 4944d9433d..08b2518927 100644 --- a/chainercv/functions/psroi_pooling_2d.py +++ b/chainercv/functions/ps_roi_average_pooling_2d.py @@ -47,7 +47,7 @@ def _roi_pooling_slice(size, stride, max_size, roi_offset): return slice(start, end), end - start -class PSROIPooling2D(function.Function): +class PSROIAveragePooling2D(function.Function): def __init__(self, out_c, out_h, out_w, spatial_scale, group_size): self.out_c, self.out_h, self.out_w = out_c, out_h, out_w @@ -197,7 +197,7 @@ def forward_gpu(self, inputs): float bin_area = (hend - hstart) * (wend - wstart); top_data = is_empty? (float) 0. : out_sum / bin_area; - ''', 'psroi_pooling_2d_fwd' + ''', 'ps_roi_average_pooling_2d_fwd' )(bottom_data, bottom_rois, bottom_roi_indices, self.spatial_scale, channels, height, width, self.out_c, self.out_h, self.out_w, self.group_size, @@ -327,7 +327,7 @@ def backward_gpu(self, inputs, gy): &bottom_diff[bottom_diff_offset + bottom_index], diff_val); } } - ''', 'psroi_pooling_2d_bwd' + ''', 'ps_roi_average_pooling_2d_bwd' )(gy[0], bottom_rois, bottom_roi_indices, self.spatial_scale, channels, height, width, self.out_c, self.out_h, self.out_w, @@ -336,11 +336,11 @@ def backward_gpu(self, inputs, gy): return bottom_diff, None, None -def psroi_pooling_2d( +def ps_roi_average_pooling_2d( x, rois, roi_indices, out_c, out_h, out_w, spatial_scale, group_size ): - """Position Sensitive Region of Interest (ROI) pooling function. + """Position Sensitive Region of Interest (ROI) Average pooling function. This function computes position sensitive average of input spatial patch with the given region of interests. Each ROI is splitted into @@ -368,5 +368,5 @@ def psroi_pooling_2d( `R-FCN `_. """ - return PSROIPooling2D(out_c, out_h, out_w, spatial_scale, - group_size)(x, rois, roi_indices) + return PSROIAveragePooling2D(out_c, out_h, out_w, spatial_scale, + group_size)(x, rois, roi_indices) diff --git a/tests/functions_tests/test_psroi_pooling_2d.py b/tests/functions_tests/test_ps_roi_average_pooling_2d.py similarity index 89% rename from tests/functions_tests/test_psroi_pooling_2d.py rename to tests/functions_tests/test_ps_roi_average_pooling_2d.py index 59c66562d5..2809b9bf9d 100644 --- a/tests/functions_tests/test_psroi_pooling_2d.py +++ b/tests/functions_tests/test_ps_roi_average_pooling_2d.py @@ -10,7 +10,7 @@ from chainercv import functions -class TestPSROIPolling2D(unittest.TestCase): +class TestPSROIAveragePolling2D(unittest.TestCase): def setUp(self): self.N = 3 @@ -43,7 +43,7 @@ def check_forward(self, x_data, roi_data, roi_index_data): x = chainer.Variable(x_data) rois = chainer.Variable(roi_data) roi_indices = chainer.Variable(roi_index_data) - y = functions.psroi_pooling_2d( + y = functions.ps_roi_average_pooling_2d( x, rois, roi_indices, self.out_c, self.out_h, self.out_w, self.spatial_scale, self.group_size) self.assertEqual(y.data.dtype, np.float32) @@ -63,11 +63,12 @@ def test_forward_gpu(self): cuda.to_gpu(self.roi_indices)) def check_backward(self, x_data, roi_data, roi_index_data, y_grad_data): + def f(x, rois, roi_indices): + return functions.ps_roi_average_pooling_2d( + x, rois, roi_indices, self.out_c, self.out_h, self.out_w, + self.spatial_scale, self.group_size) gradient_check.check_backward( - functions.PSROIPooling2D( - self.out_c, self.out_h, self.out_w, - self.spatial_scale, self.group_size), - (x_data, roi_data, roi_index_data), y_grad_data, + f, (x_data, roi_data, roi_index_data), y_grad_data, no_grads=[False, True, True], **self.check_backward_options) @condition.retry(3) @@ -85,7 +86,7 @@ def apply_backward(self, x_data, roi_data, roi_index_data, y_grad_data): x = chainer.Variable(x_data) rois = chainer.Variable(roi_data) roi_indices = chainer.Variable(roi_index_data) - y = functions.psroi_pooling_2d( + y = functions.ps_roi_average_pooling_2d( x, rois, roi_indices, self.out_c, self.out_h, self.out_w, self.spatial_scale, self.group_size) x.cleargrad() From d988ad6c8284307fdc51f6186af6e115d7fcd720 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Sat, 9 Feb 2019 01:55:48 +0900 Subject: [PATCH 02/24] add ps_roi_average_align_2d --- chainercv/functions/__init__.py | 1 + .../functions/ps_roi_average_align_2d.py | 629 ++++++++++++++++++ .../test_ps_roi_average_align_2d.py | 115 ++++ 3 files changed, 745 insertions(+) create mode 100644 chainercv/functions/ps_roi_average_align_2d.py create mode 100644 tests/functions_tests/test_ps_roi_average_align_2d.py diff --git a/chainercv/functions/__init__.py b/chainercv/functions/__init__.py index 71fa9b6802..a78f903a83 100644 --- a/chainercv/functions/__init__.py +++ b/chainercv/functions/__init__.py @@ -1 +1,2 @@ +from chainercv.functions.ps_roi_average_align_2d import ps_roi_average_align_2d # NOQA from chainercv.functions.ps_roi_average_pooling_2d import ps_roi_average_pooling_2d # NOQA diff --git a/chainercv/functions/ps_roi_average_align_2d.py b/chainercv/functions/ps_roi_average_align_2d.py new file mode 100644 index 0000000000..d3b29f3ebc --- /dev/null +++ b/chainercv/functions/ps_roi_average_align_2d.py @@ -0,0 +1,629 @@ +# Modified work: +# ----------------------------------------------------------------------------- +# Copyright (c) 2018 Preferred Infrastructure, Inc. +# Copyright (c) 2018 Preferred Networks, Inc. +# ----------------------------------------------------------------------------- + +# Original work: +# ----------------------------------------------------------------------------- +# Copyright (c) 2015 by Contributors +# \file roi_pooling.cu +# \brief roi pooling operator +# \author Ross Girshick, Kye-Hyeon Kim, Jian Guo +# \changed to roi_align by Elaine Bao +# \file roi_align.cu +# \roi align operator described in Mask RCNN +# ----------------------------------------------------------------------------- + +from __future__ import division + +import numpy as np +import six + +import chainer +from chainer.backends import cuda +from chainer import function +from chainer.utils import type_check + + +def _pair(x): + if isinstance(x, chainer.utils.collections_abc.Iterable): + return x + return x, x + + +def _get_bilinear_interp_params(y, x, height, width): + if y < -1 or y > height or x < -1 or x > width: + # out of range, so it is empty + return (None,) * 8 + + if y <= 0: + y = 0 + if x <= 0: + x = 0 + + y_low = int(y) + x_low = int(x) + + if y_low >= height - 1: + y_high = y_low = height - 1 + y = float(y_low) + else: + y_high = y_low + 1 + + if x_low >= width - 1: + x_high = x_low = width - 1 + x = float(x_low) + else: + x_high = x_low + 1 + + ly = y - y_low + lx = x - x_low + hy = 1. - ly + hx = 1. - lx + + w1 = hy * hx + w2 = hy * lx + w3 = ly * hx + w4 = ly * lx + + return y_low, x_low, y_high, x_high, w1, w2, w3, w4 + + +_GET_BILINEAR_INTERP_KERNEL = ''' +__device__ +bool get_bilinear_interp_params( + T x, T y, const int height, const int width, + int &y_low, int &x_low, int &y_high, int &x_high, + T &w1, T &w2, T &w3, T &w4) { + // deal with cases that inverse elements are + // out of feature map boundary + if (y < -1. || y > height || x < -1. || x > width) { + // empty + return false; + } + + if (y <= 0) { + y = 0; + } + if (x <= 0) { + x = 0; + } + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly; + T hx = 1. - lx; + + w1 = hy * hx; + w2 = hy * lx; + w3 = ly * hx; + w4 = ly * lx; + + return true; +} +''' + + +class PSROIAverageAlign2D(function.Function): + + def __init__( + self, out_c, out_h, out_w, spatial_scale, + group_size, sampling_ratio=None + ): + if not (isinstance(out_c, int) and out_c > 0): + raise TypeError( + 'out_c must be positive integer: {}, {}' + .format(type(out_c), out_c)) + if not (isinstance(out_h, int) and out_h > 0): + raise TypeError( + 'out_h must be positive integer: {}, {}' + .format(type(out_h), out_h)) + if not (isinstance(out_w, int) and out_w > 0): + raise TypeError( + 'out_w must be positive integer: {}, {}' + .format(type(out_w), out_w)) + if isinstance(spatial_scale, int): + spatial_scale = float(spatial_scale) + if not (isinstance(group_size, int) and group_size > 0): + raise TypeError( + 'group_size must be positive integer: {}, {}' + .format(type(group_size), group_size)) + if not (isinstance(spatial_scale, float) and spatial_scale > 0): + raise TypeError( + 'spatial_scale must be a positive float number: {}, {}' + .format(type(spatial_scale), spatial_scale)) + sampling_ratio = _pair(sampling_ratio) + if not all((isinstance(s, int) and s >= 1) or s is None + for s in sampling_ratio): + raise TypeError( + 'sampling_ratio must be integer >= 1 or a pair of it: {}' + .format(sampling_ratio)) + + self.out_c, self.out_h, self.out_w = out_c, out_h, out_w + self.spatial_scale = spatial_scale + self.group_size = group_size + self.sampling_ratio = sampling_ratio + + def check_type_forward(self, in_types): + type_check.expect(in_types.size() == 3) + + x_type, roi_type, roi_index_type = in_types + type_check.expect( + x_type.dtype == np.float32, + x_type.ndim == 4, + roi_type.dtype == np.float32, + roi_type.ndim == 2, + roi_type.shape[1] == 4, + roi_index_type.dtype == np.int32, + roi_index_type.ndim == 1, + roi_type.shape[0] == roi_index_type.shape[0] + ) + + def forward_cpu(self, inputs): + self.retain_inputs((1, 2)) + self._bottom_data_shape = inputs[0].shape + + bottom_data, bottom_rois, bottom_roi_indices = inputs + channels, height, width = bottom_data.shape[1:] + n_roi = bottom_rois.shape[0] + top_data = np.empty( + (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) + + group_size = self.group_size + pooled_dim, pooled_width, pooled_height \ + = self.out_c, self.out_w, self.out_h + spatial_scale = self.spatial_scale + + for i in six.moves.range(top_data.size): + pw = i % pooled_width + ph = int(i / pooled_width) % pooled_height + ctop = int(i / pooled_width / pooled_height) % pooled_dim + n = int(i / pooled_width / pooled_height / pooled_dim) + + roi_batch_ind = int(bottom_roi_indices[n]) + roi_start_h = bottom_rois[n, 0] * spatial_scale + roi_start_w = bottom_rois[n, 1] * spatial_scale + roi_end_h = bottom_rois[n, 2] * spatial_scale + roi_end_w = bottom_rois[n, 3] * spatial_scale + + roi_height = max(roi_end_h - roi_start_h, 1.) + roi_width = max(roi_end_w - roi_start_w, 1.) + bin_size_h = 1. * roi_height / pooled_height + bin_size_w = 1. * roi_width / pooled_width + + gh = np.floor(float(ph) * group_size / pooled_height) + gw = np.floor(float(pw) * group_size / pooled_width) + gh = int(min(max(gh, 0), group_size - 1)) + gw = int(min(max(gw, 0), group_size - 1)) + c = (ctop * group_size + gh) * group_size + gw + + if self.sampling_ratio[0] is None: + roi_bin_grid_h = np.ceil(roi_height / pooled_height) + else: + roi_bin_grid_h = self.sampling_ratio[0] + if self.sampling_ratio[1] is None: + roi_bin_grid_w = np.ceil(roi_width / pooled_width) + else: + roi_bin_grid_w = self.sampling_ratio[1] + + count = roi_bin_grid_h * roi_bin_grid_w + + output_val = 0. + iy = 0 + while iy < roi_bin_grid_h: + y = roi_start_h + ph * bin_size_h + \ + (iy + .5) * bin_size_h / roi_bin_grid_h + ix = 0 + while ix < roi_bin_grid_w: + x = roi_start_w + pw * bin_size_w + \ + (ix + .5) * bin_size_w / roi_bin_grid_w + + # bilinear interpolation {{ + y_low, x_low, y_high, x_high, w1, w2, w3, w4 = \ + _get_bilinear_interp_params(y, x, height, width) + if y_low is None: + continue + + v1 = bottom_data[roi_batch_ind, c, y_low, x_low] + v2 = bottom_data[roi_batch_ind, c, y_low, x_high] + v3 = bottom_data[roi_batch_ind, c, y_high, x_low] + v4 = bottom_data[roi_batch_ind, c, y_high, x_high] + + output_val += w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4 + + # }} + + ix += 1 + iy += 1 + + output_val /= count + top_data[n, ctop, ph, pw] = output_val + + return top_data, + + def forward_gpu(self, inputs): + self.retain_inputs((1, 2)) + self._bottom_data_shape = inputs[0].shape + + bottom_data, bottom_rois, bottom_roi_indices = inputs + channels, height, width = bottom_data.shape[1:] + n_roi = bottom_rois.shape[0] + top_data = cuda.cupy.empty( + (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) + if self.sampling_ratio[0] is None: + sampling_ratio_h = 0 + else: + sampling_ratio_h = self.sampling_ratio[0] + if self.sampling_ratio[1] is None: + sampling_ratio_w = 0 + else: + sampling_ratio_w = self.sampling_ratio[1] + cuda.elementwise( + ''' + raw T bottom_data, T spatial_scale, int32 channels, + int32 height, int32 width, + int32 pooled_dim, int32 pooled_height, int32 pooled_width, + int32 group_size, int32 sampling_ratio_h, int32 sampling_ratio_w, + raw T bottom_rois, raw int32 bottom_roi_indices + + ''', + 'T top_data', + ''' + // pos in output filter + int ph = (i / pooled_width) % pooled_height; + int pw = i % pooled_width; + int ctop = (i / pooled_width / pooled_height) % pooled_dim; + int n = i / pooled_width / pooled_height / pooled_dim; + + int roi_batch_ind = bottom_roi_indices[n]; + T roi_start_h = static_cast( + round(bottom_rois[n * 4 + 0])) * spatial_scale; + T roi_start_w = static_cast( + round(bottom_rois[n * 4 + 1])) * spatial_scale; + T roi_end_h = static_cast( + round(bottom_rois[n * 4 + 2])) * spatial_scale; + T roi_end_w = static_cast( + round(bottom_rois[n * 4 + 3])) * spatial_scale; + + // Force too small ROIs to be 1x1 + T roi_height = max(roi_end_h - roi_start_h, 0.1); + T roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 + + // Compute w and h at bottom + T bin_size_h = roi_height / static_cast(pooled_height); + T bin_size_w = roi_width / static_cast(pooled_width); + + // Compute c at bottom + int gh = floor( + static_cast(ph) * group_size / pooled_height); + int gw = floor( + static_cast(pw) * group_size / pooled_width); + gh = min(max(gh, 0), group_size - 1); + gw = min(max(gw, 0), group_size - 1); + int c = (ctop * group_size + gh) * group_size + gw; + + int bottom_data_offset = + (roi_batch_ind * channels + c) * height * width; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio_h > 0) + ? sampling_ratio_h + : ceil(roi_height / pooled_height); // e.g. = 2 + int roi_bin_grid_w = (sampling_ratio_w > 0) + ? sampling_ratio_w + : ceil(roi_width / pooled_width); + + T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g. iy = 0, 1 + { + T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g. 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // bilinear_interpolation {{ + int y_low, x_low, y_high, x_high; + T w1, w2, w3, w4; + bool ret = get_bilinear_interp_params( + x, y, height, width, + y_low, x_low, y_high, x_high, + w1, w2, w3, w4 + ); + if (!ret) { + continue; + } + + T v1 = bottom_data[bottom_data_offset + + y_low * width + x_low]; + T v2 = bottom_data[bottom_data_offset + + y_low * width + x_high]; + T v3 = bottom_data[bottom_data_offset + + y_high * width + x_low]; + T v4 = bottom_data[bottom_data_offset + + y_high * width + x_high]; + // }} + + output_val += (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + } + } + output_val /= count; + + top_data = output_val; + ''', + 'ps_roi_average_align_2d_fwd', + preamble=_GET_BILINEAR_INTERP_KERNEL, + )(bottom_data, self.spatial_scale, channels, height, width, + self.out_c, self.out_h, self.out_w, self.group_size, + sampling_ratio_h, sampling_ratio_w, + bottom_rois, bottom_roi_indices, top_data) + + return top_data, + + def backward_cpu(self, inputs, gy): + _, bottom_rois, bottom_roi_indices = inputs + channels, height, width = self._bottom_data_shape[1:] + bottom_diff = np.zeros(self._bottom_data_shape, np.float32) + + spatial_scale = self.spatial_scale + pooled_dim = self.out_c + pooled_height = self.out_h + pooled_width = self.out_w + group_size = self.group_size + top_diff = gy[0] + + for i in six.moves.range(top_diff.size): + pw = i % pooled_width + ph = int(i / pooled_width) % pooled_height + ctop = int(i / pooled_width / pooled_height) % pooled_dim + n = int(i / pooled_width / pooled_height / pooled_dim) + + roi_batch_ind = int(bottom_roi_indices[n]) + roi_start_h = bottom_rois[n, 0] * spatial_scale + roi_start_w = bottom_rois[n, 1] * spatial_scale + roi_end_h = bottom_rois[n, 2] * spatial_scale + roi_end_w = bottom_rois[n, 3] * spatial_scale + + roi_width = max(roi_end_w - roi_start_w, 1.) + roi_height = max(roi_end_h - roi_start_h, 1.) + bin_size_h = 1. * roi_height / pooled_height + bin_size_w = 1. * roi_width / pooled_width + + gh = np.floor(float(ph) * group_size / pooled_height) + gw = np.floor(float(pw) * group_size / pooled_width) + gh = int(min(max(gh, 0), group_size - 1)) + gw = int(min(max(gw, 0), group_size - 1)) + c = (ctop * group_size + gh) * group_size + gw + + top_diff_this_bin = top_diff[n, ctop, ph, pw] + + if self.sampling_ratio[0] is None: + roi_bin_grid_h = np.ceil(roi_height / pooled_height) + else: + roi_bin_grid_h = self.sampling_ratio[0] + if self.sampling_ratio[1] is None: + roi_bin_grid_w = np.ceil(roi_width / pooled_width) + else: + roi_bin_grid_w = self.sampling_ratio[1] + + count = roi_bin_grid_h * roi_bin_grid_w + + iy = 0 + while iy < roi_bin_grid_h: + y = roi_start_h + ph * bin_size_h + \ + (iy + .5) * bin_size_h / roi_bin_grid_h + ix = 0 + while ix < roi_bin_grid_w: + x = roi_start_w + pw * bin_size_w + \ + (ix + .5) * bin_size_w / roi_bin_grid_w + + # bilinear_interpolation_gradient {{ + y_low, x_low, y_high, x_high, w1, w2, w3, w4 = \ + _get_bilinear_interp_params(y, x, height, width) + if y_low is None: + continue + + g1 = top_diff_this_bin * w1 / count + g2 = top_diff_this_bin * w2 / count + g3 = top_diff_this_bin * w3 / count + g4 = top_diff_this_bin * w4 / count + + if (x_low >= 0 and x_high >= 0 and + y_low >= 0 and y_high >= 0): + bottom_diff[roi_batch_ind, c, y_low, x_low] += g1 + bottom_diff[roi_batch_ind, c, y_low, x_high] += g2 + bottom_diff[roi_batch_ind, c, y_high, x_low] += g3 + bottom_diff[roi_batch_ind, c, y_high, x_high] += g4 + ix += 1 + iy += 1 + + return bottom_diff, None, None + + def backward_gpu(self, inputs, gy): + _, bottom_rois, bottom_roi_indices = inputs + channels, height, width = self._bottom_data_shape[1:] + bottom_diff = cuda.cupy.zeros(self._bottom_data_shape, np.float32) + + if self.sampling_ratio[0] is None: + sampling_ratio_h = 0 + else: + sampling_ratio_h = self.sampling_ratio[0] + if self.sampling_ratio[1] is None: + sampling_ratio_w = 0 + else: + sampling_ratio_w = self.sampling_ratio[1] + cuda.elementwise( + ''' + raw T top_diff, T spatial_scale, + int32 channels, int32 height, int32 width, + int32 pooled_dim, int32 pooled_height, int32 pooled_width, + int32 group_size, int32 sampling_ratio_h, int32 sampling_ratio_w, + raw T bottom_rois, raw int32 bottom_roi_indices + ''', + 'raw T bottom_diff', + ''' + // (n, c, h, w) coords in bottom data + int pw = i % pooled_width; + int ph = (i / pooled_width) % pooled_height; + int ctop = (i / pooled_width / pooled_height) % pooled_dim; + int n = i / pooled_width / pooled_height / pooled_dim; + + // Do not using rounding; this implementation detail is critical + int roi_batch_ind = bottom_roi_indices[n]; + T roi_start_h = static_cast( + round(bottom_rois[n * 4 + 0])) * spatial_scale; + T roi_start_w = static_cast( + round(bottom_rois[n * 4 + 1])) * spatial_scale; + T roi_end_h = static_cast( + round(bottom_rois[n * 4 + 2])) * spatial_scale; + T roi_end_w = static_cast( + round(bottom_rois[n * 4 + 3])) * spatial_scale; + + // Force too small ROIs to be 1x1 + T roi_height = max(roi_end_h - roi_start_h, 0.1); + T roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 + + // Compute w and h at bottom + T bin_size_h = roi_height / static_cast(pooled_height); + T bin_size_w = roi_width / static_cast(pooled_width); + + // Compute c at bottom + int gh = floor( + static_cast(ph) * group_size / pooled_height); + int gw = floor( + static_cast(pw) * group_size / pooled_width); + gh = min(max(gh, 0), group_size - 1); + gw = min(max(gw, 0), group_size - 1); + int c = (ctop * group_size + gh) * group_size + gw; + + int bottom_diff_offset = + (roi_batch_ind * channels + c) * height * width; + + int top_offset = + (n * pooled_dim + ctop) * pooled_height * pooled_width; + T top_diff_this_bin = + top_diff[top_offset + ph * pooled_width + pw]; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio_h > 0) + ? sampling_ratio_h + : ceil(roi_height / pooled_height); // e.g. = 2 + int roi_bin_grid_w = (sampling_ratio_w > 0) + ? sampling_ratio_w + : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g. 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // bilinear_interpolation_gradient {{ + int y_low, x_low, y_high, x_high; + T w1, w2, w3, w4; + bool ret = get_bilinear_interp_params( + x, y, height, width, + y_low, x_low, y_high, x_high, + w1, w2, w3, w4 + ); + if (!ret) { + continue; + } + + T g1 = top_diff_this_bin * w1 / count; + T g2 = top_diff_this_bin * w2 / count; + T g3 = top_diff_this_bin * w3 / count; + T g4 = top_diff_this_bin * w4 / count; + + if (x_low >= 0 && x_high >= 0 && + y_low >= 0 && y_high >= 0) { + atomicAdd(&bottom_diff[bottom_diff_offset + + y_low * width + x_low], g1); + atomicAdd(&bottom_diff[bottom_diff_offset + + y_low * width + x_high], g2); + atomicAdd(&bottom_diff[bottom_diff_offset + + y_high * width + x_low], g3); + atomicAdd(&bottom_diff[bottom_diff_offset + + y_high * width + x_high], g4); + } + } + } + ''', 'ps_roi_average_align_2d_bwd', + preamble=_GET_BILINEAR_INTERP_KERNEL, + )(gy[0], self.spatial_scale, channels, height, width, + self.out_c, self.out_h, self.out_w, + self.group_size, sampling_ratio_h, sampling_ratio_w, + bottom_rois, bottom_roi_indices, bottom_diff, size=gy[0].size) + + return bottom_diff, None, None + + +def ps_roi_average_align_2d( + x, rois, roi_indices, out_c, out_h, out_w, + spatial_scale, group_size, sampling_ratio=None +): + """Position Sensitive Region of Interest (ROI) Average align function. + + This function computes position sensitive average of input spatial patch + with the given region of interests. Each ROI is splitted into + :math:`(group\_size, group\_size)` regions, and position sensitive values + in each region is computed. + + Args: + x (~chainer.Variable): Input variable. The shape is expected to be + 4 dimentional: (n: batch, c: channel, h, height, w: width). + rois (array): Input roi. The shape is expected to + be :math:`(R, 4)`, and each datum is set as below: + (y_min, x_min, y_max, x_max). The dtype is :obj:`numpy.float32`. + roi_indices (array): Input roi indices. The shape is expected to + be :math:`(R, )`. The dtype is :obj:`numpy.int32`. + out_c (int): Channels of output image after pooled. + out_h (int): Height of output image after pooled. + out_w (int): Width of output image after pooled. + spatial_scale (float): Scale of the roi is resized. + group_size (int): Position sensitive group size. + sampling_ratio ((int, int) or int): Sampling step for the alignment. + It must be an integer over :math:`1` or :obj:`None`, and the value + is automatically decided when :obj:`None` is passed. Use of + different ratio in height and width axis is also supported by + passing tuple of int as ``(sampling_ratio_h, sampling_ratio_w)``. + ``sampling_ratio=s`` and ``sampling_ratio=(s, s)`` are equivalent. + + Returns: + ~chainer.Variable: Output variable. + + See the original paper proposing PSROIPooling: + `R-FCN `_. + + """ + return PSROIAverageAlign2D( + out_c, out_h, out_w, spatial_scale, + group_size, sampling_ratio)(x, rois, roi_indices) diff --git a/tests/functions_tests/test_ps_roi_average_align_2d.py b/tests/functions_tests/test_ps_roi_average_align_2d.py new file mode 100644 index 0000000000..16425ad4f1 --- /dev/null +++ b/tests/functions_tests/test_ps_roi_average_align_2d.py @@ -0,0 +1,115 @@ +import chainer +from chainer.backends import cuda +from chainer import gradient_check +from chainer import testing +from chainer.testing import attr +from chainer.testing import condition +import numpy as np +import unittest + +from chainercv import functions + + +@testing.parameterize(*testing.product({ + 'sampling_ratio': [None, 1, 2, (None, 3), (1, 2)], +})) +class TestPSROIAveragePolling2D(unittest.TestCase): + + def setUp(self): + self.N = 3 + self.group_size = 2 + self.out_c = 2 + self.n_channels = self.group_size * self.group_size * self.out_c + self.x = np.arange( + self.N * self.n_channels * 10 * 12, + dtype=np.float32).reshape((self.N, self.n_channels, 10, 12)) + np.random.shuffle(self.x) + self.x = 2 * self.x / self.x.size - 1 + self.x = self.x.astype(np.float32) + self.rois = np.array( + [[0, 0, 7, 7], + [1, 0, 5, 12], + [0, 1, 10, 5], + [3, 3, 4, 4]], + dtype=np.float32 + ) + self.roi_indices = np.array([0, 2, 1, 0], dtype=np.int32) + self.n_roi = self.rois.shape[0] + self.out_h, self.out_w = 4, 4 + self.spatial_scale = 1.0 + self.gy = np.random.uniform( + -1, 1, (self.n_roi, self.out_c, self.out_h, self.out_w)) + self.gy = self.gy.astype(np.float32) + self.check_backward_options = {'atol': 5e-4, 'rtol': 5e-3} + + def check_forward(self, x_data, roi_data, roi_index_data): + x = chainer.Variable(x_data) + rois = chainer.Variable(roi_data) + roi_indices = chainer.Variable(roi_index_data) + y = functions.ps_roi_average_align_2d( + x, rois, roi_indices, self.out_c, self.out_h, self.out_w, + self.spatial_scale, self.group_size, + sampling_ratio=self.sampling_ratio) + self.assertEqual(y.data.dtype, np.float32) + y_data = cuda.to_cpu(y.data) + self.assertEqual( + (self.n_roi, self.out_c, self.out_h, self.out_w), y_data.shape) + + @condition.retry(3) + def test_forward_cpu(self): + self.check_forward(self.x, self.rois, self.roi_indices) + + @attr.gpu + @condition.retry(3) + def test_forward_gpu(self): + self.check_forward( + cuda.to_gpu(self.x), cuda.to_gpu(self.rois), + cuda.to_gpu(self.roi_indices)) + + def check_backward(self, x_data, roi_data, roi_index_data, y_grad_data): + def f(x, rois, roi_indices): + return functions.ps_roi_average_align_2d( + x, rois, roi_indices, self.out_c, self.out_h, self.out_w, + self.spatial_scale, self.group_size, + sampling_ratio=self.sampling_ratio) + gradient_check.check_backward( + f, (x_data, roi_data, roi_index_data), y_grad_data, + no_grads=[False, True, True], **self.check_backward_options) + + @condition.retry(3) + def test_backward_cpu(self): + self.check_backward(self.x, self.rois, self.roi_indices, self.gy) + + @attr.gpu + @condition.retry(3) + def test_backward_gpu(self): + self.check_backward( + cuda.to_gpu(self.x), cuda.to_gpu(self.rois), + cuda.to_gpu(self.roi_indices), cuda.to_gpu(self.gy)) + + def apply_backward(self, x_data, roi_data, roi_index_data, y_grad_data): + x = chainer.Variable(x_data) + rois = chainer.Variable(roi_data) + roi_indices = chainer.Variable(roi_index_data) + y = functions.ps_roi_average_align_2d( + x, rois, roi_indices, self.out_c, self.out_h, self.out_w, + self.spatial_scale, self.group_size, + sampling_ratio=self.sampling_ratio) + x.cleargrad() + y.grad = y_grad_data + y.backward() + return x, y + + @attr.gpu + @condition.retry(3) + def test_consistency_with_gpu(self): + x_cpu, y_cpu = self.apply_backward( + self.x, self.rois, self.roi_indices, self.gy) + x_gpu, y_gpu = self.apply_backward( + cuda.to_gpu(self.x), cuda.to_gpu(self.rois), + cuda.to_gpu(self.roi_indices), cuda.to_gpu(self.gy)) + testing.assert_allclose(y_cpu.data, y_gpu.data) + testing.assert_allclose(x_cpu.grad, x_gpu.grad) + + +testing.run_module(__name__, __file__) From 4f21814367e95f71af9f528039071d668a481986 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Sat, 9 Feb 2019 02:00:47 +0900 Subject: [PATCH 03/24] check input types in ps_roi_average_pooling_2d --- .../functions/ps_roi_average_pooling_2d.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/chainercv/functions/ps_roi_average_pooling_2d.py b/chainercv/functions/ps_roi_average_pooling_2d.py index 08b2518927..6aace954cb 100644 --- a/chainercv/functions/ps_roi_average_pooling_2d.py +++ b/chainercv/functions/ps_roi_average_pooling_2d.py @@ -50,6 +50,24 @@ def _roi_pooling_slice(size, stride, max_size, roi_offset): class PSROIAveragePooling2D(function.Function): def __init__(self, out_c, out_h, out_w, spatial_scale, group_size): + if not (isinstance(out_c, int) and out_c > 0): + raise TypeError( + 'out_c must be positive integer: {}, {}' + .format(type(out_c), out_c)) + if not (isinstance(out_h, int) and out_h > 0): + raise TypeError( + 'out_h must be positive integer: {}, {}' + .format(type(out_h), out_h)) + if not (isinstance(out_w, int) and out_w > 0): + raise TypeError( + 'out_w must be positive integer: {}, {}' + .format(type(out_w), out_w)) + if isinstance(spatial_scale, int): + spatial_scale = float(spatial_scale) + if not (isinstance(group_size, int) and group_size > 0): + raise TypeError( + 'group_size must be positive integer: {}, {}' + .format(type(group_size), group_size)) self.out_c, self.out_h, self.out_w = out_c, out_h, out_w self.spatial_scale = spatial_scale self.group_size = group_size From 3bd04523d54dd8a81b5c22c3176953bd7b6fde12 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Sat, 9 Feb 2019 02:18:31 +0900 Subject: [PATCH 04/24] refactor ps_roi_average_align_2d.py --- .../functions/ps_roi_average_align_2d.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/chainercv/functions/ps_roi_average_align_2d.py b/chainercv/functions/ps_roi_average_align_2d.py index d3b29f3ebc..38e92def19 100644 --- a/chainercv/functions/ps_roi_average_align_2d.py +++ b/chainercv/functions/ps_roi_average_align_2d.py @@ -1,7 +1,7 @@ # Modified work: # ----------------------------------------------------------------------------- -# Copyright (c) 2018 Preferred Infrastructure, Inc. -# Copyright (c) 2018 Preferred Networks, Inc. +# Copyright (c) 2019 Preferred Infrastructure, Inc. +# Copyright (c) 2019 Preferred Networks, Inc. # ----------------------------------------------------------------------------- # Original work: @@ -268,6 +268,7 @@ def forward_gpu(self, inputs): n_roi = bottom_rois.shape[0] top_data = cuda.cupy.empty( (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) + if self.sampling_ratio[0] is None: sampling_ratio_h = 0 else: @@ -278,12 +279,10 @@ def forward_gpu(self, inputs): sampling_ratio_w = self.sampling_ratio[1] cuda.elementwise( ''' - raw T bottom_data, T spatial_scale, int32 channels, - int32 height, int32 width, + raw T bottom_data, raw T bottom_rois, raw int32 bottom_roi_indices, + T spatial_scale, int32 channels, int32 height, int32 width, int32 pooled_dim, int32 pooled_height, int32 pooled_width, - int32 group_size, int32 sampling_ratio_h, int32 sampling_ratio_w, - raw T bottom_rois, raw int32 bottom_roi_indices - + int32 group_size, int32 sampling_ratio_h, int32 sampling_ratio_w ''', 'T top_data', ''' @@ -375,10 +374,10 @@ def forward_gpu(self, inputs): ''', 'ps_roi_average_align_2d_fwd', preamble=_GET_BILINEAR_INTERP_KERNEL, - )(bottom_data, self.spatial_scale, channels, height, width, + )(bottom_data, bottom_rois, bottom_roi_indices, + self.spatial_scale, channels, height, width, self.out_c, self.out_h, self.out_w, self.group_size, - sampling_ratio_h, sampling_ratio_w, - bottom_rois, bottom_roi_indices, top_data) + sampling_ratio_h, sampling_ratio_w, top_data) return top_data, @@ -476,11 +475,10 @@ def backward_gpu(self, inputs, gy): sampling_ratio_w = self.sampling_ratio[1] cuda.elementwise( ''' - raw T top_diff, T spatial_scale, - int32 channels, int32 height, int32 width, + raw T top_diff, raw T bottom_rois, raw int32 bottom_roi_indices, + T spatial_scale, int32 channels, int32 height, int32 width, int32 pooled_dim, int32 pooled_height, int32 pooled_width, - int32 group_size, int32 sampling_ratio_h, int32 sampling_ratio_w, - raw T bottom_rois, raw int32 bottom_roi_indices + int32 group_size, int32 sampling_ratio_h, int32 sampling_ratio_w ''', 'raw T bottom_diff', ''' @@ -546,7 +544,6 @@ def backward_gpu(self, inputs, gy): static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); - // bilinear_interpolation_gradient {{ int y_low, x_low, y_high, x_high; T w1, w2, w3, w4; bool ret = get_bilinear_interp_params( @@ -578,10 +575,11 @@ def backward_gpu(self, inputs, gy): } ''', 'ps_roi_average_align_2d_bwd', preamble=_GET_BILINEAR_INTERP_KERNEL, - )(gy[0], self.spatial_scale, channels, height, width, + )(gy[0], bottom_rois, bottom_roi_indices, + self.spatial_scale, channels, height, width, self.out_c, self.out_h, self.out_w, self.group_size, sampling_ratio_h, sampling_ratio_w, - bottom_rois, bottom_roi_indices, bottom_diff, size=gy[0].size) + bottom_diff, size=gy[0].size) return bottom_diff, None, None @@ -622,6 +620,8 @@ def ps_roi_average_align_2d( See the original paper proposing PSROIPooling: `R-FCN `_. + See the original paper proposing ROIAlign: + `Mask R-CNN `_. """ return PSROIAverageAlign2D( From d8e0ad672d8e4623342299ec219c082d36201726 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Sat, 9 Feb 2019 02:21:39 +0900 Subject: [PATCH 05/24] fix typo in functions_tests --- tests/functions_tests/test_ps_roi_average_align_2d.py | 2 +- tests/functions_tests/test_ps_roi_average_pooling_2d.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/functions_tests/test_ps_roi_average_align_2d.py b/tests/functions_tests/test_ps_roi_average_align_2d.py index 16425ad4f1..64cb0d0435 100644 --- a/tests/functions_tests/test_ps_roi_average_align_2d.py +++ b/tests/functions_tests/test_ps_roi_average_align_2d.py @@ -13,7 +13,7 @@ @testing.parameterize(*testing.product({ 'sampling_ratio': [None, 1, 2, (None, 3), (1, 2)], })) -class TestPSROIAveragePolling2D(unittest.TestCase): +class TestPSROIAverageAlign2D(unittest.TestCase): def setUp(self): self.N = 3 diff --git a/tests/functions_tests/test_ps_roi_average_pooling_2d.py b/tests/functions_tests/test_ps_roi_average_pooling_2d.py index 2809b9bf9d..1b1d74ced8 100644 --- a/tests/functions_tests/test_ps_roi_average_pooling_2d.py +++ b/tests/functions_tests/test_ps_roi_average_pooling_2d.py @@ -10,7 +10,7 @@ from chainercv import functions -class TestPSROIAveragePolling2D(unittest.TestCase): +class TestPSROIAveragePooling2D(unittest.TestCase): def setUp(self): self.N = 3 From 6301201688c0cccacd3e7462565261fb577976a3 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Sat, 9 Feb 2019 02:21:54 +0900 Subject: [PATCH 06/24] add ps_roi_max_align_2d.py --- chainercv/functions/__init__.py | 1 + chainercv/functions/ps_roi_max_align_2d.py | 549 ++++++++++++++++++ .../functions_tests/test_ps_roi_max_align.py | 115 ++++ 3 files changed, 665 insertions(+) create mode 100644 chainercv/functions/ps_roi_max_align_2d.py create mode 100644 tests/functions_tests/test_ps_roi_max_align.py diff --git a/chainercv/functions/__init__.py b/chainercv/functions/__init__.py index a78f903a83..cf68d729e6 100644 --- a/chainercv/functions/__init__.py +++ b/chainercv/functions/__init__.py @@ -1,2 +1,3 @@ from chainercv.functions.ps_roi_average_align_2d import ps_roi_average_align_2d # NOQA from chainercv.functions.ps_roi_average_pooling_2d import ps_roi_average_pooling_2d # NOQA +from chainercv.functions.ps_roi_max_align_2d import ps_roi_max_align_2d # NOQA diff --git a/chainercv/functions/ps_roi_max_align_2d.py b/chainercv/functions/ps_roi_max_align_2d.py new file mode 100644 index 0000000000..fd1a6284db --- /dev/null +++ b/chainercv/functions/ps_roi_max_align_2d.py @@ -0,0 +1,549 @@ +# Modified work: +# ----------------------------------------------------------------------------- +# Copyright (c) 2019 Preferred Infrastructure, Inc. +# Copyright (c) 2019 Preferred Networks, Inc. +# ----------------------------------------------------------------------------- + +# Original work: +# ----------------------------------------------------------------------------- +# Copyright (c) 2015 by Contributors +# \file roi_pooling.cu +# \brief roi pooling operator +# \author Ross Girshick, Kye-Hyeon Kim, Jian Guo +# \changed to roi_align by Elaine Bao +# \file roi_align.cu +# \roi align operator described in Mask RCNN +# ----------------------------------------------------------------------------- + +from __future__ import division + +import numpy as np +import six + +import chainer +from chainer.backends import cuda +from chainer import function +from chainer.utils import type_check + +from chainercv.functions.ps_roi_average_align_2d \ + import _GET_BILINEAR_INTERP_KERNEL +from chainercv.functions.ps_roi_average_align_2d \ + import _get_bilinear_interp_params + + +def _pair(x): + if isinstance(x, chainer.utils.collections_abc.Iterable): + return x + return x, x + + +class PSROIMaxAlign2D(function.Function): + + def __init__( + self, out_c, out_h, out_w, spatial_scale, + group_size, sampling_ratio=None + ): + if not (isinstance(out_c, int) and out_c > 0): + raise TypeError( + 'out_c must be positive integer: {}, {}' + .format(type(out_c), out_c)) + if not (isinstance(out_h, int) and out_h > 0): + raise TypeError( + 'out_h must be positive integer: {}, {}' + .format(type(out_h), out_h)) + if not (isinstance(out_w, int) and out_w > 0): + raise TypeError( + 'out_w must be positive integer: {}, {}' + .format(type(out_w), out_w)) + if isinstance(spatial_scale, int): + spatial_scale = float(spatial_scale) + if not (isinstance(group_size, int) and group_size > 0): + raise TypeError( + 'group_size must be positive integer: {}, {}' + .format(type(group_size), group_size)) + if not (isinstance(spatial_scale, float) and spatial_scale > 0): + raise TypeError( + 'spatial_scale must be a positive float number: {}, {}' + .format(type(spatial_scale), spatial_scale)) + sampling_ratio = _pair(sampling_ratio) + if not all((isinstance(s, int) and s >= 1) or s is None + for s in sampling_ratio): + raise TypeError( + 'sampling_ratio must be integer >= 1 or a pair of it: {}' + .format(sampling_ratio)) + + self.out_c, self.out_h, self.out_w = out_c, out_h, out_w + self.spatial_scale = spatial_scale + self.group_size = group_size + self.sampling_ratio = sampling_ratio + + def check_type_forward(self, in_types): + type_check.expect(in_types.size() == 3) + + x_type, roi_type, roi_index_type = in_types + type_check.expect( + x_type.dtype == np.float32, + x_type.ndim == 4, + roi_type.dtype == np.float32, + roi_type.ndim == 2, + roi_type.shape[1] == 4, + roi_index_type.dtype == np.int32, + roi_index_type.ndim == 1, + roi_type.shape[0] == roi_index_type.shape[0] + ) + + def forward_cpu(self, inputs): + self.retain_inputs((1, 2)) + self._bottom_data_shape = inputs[0].shape + + bottom_data, bottom_rois, bottom_roi_indices = inputs + channels, height, width = bottom_data.shape[1:] + n_roi = bottom_rois.shape[0] + top_data = np.empty( + (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) + self.argmax_data = np.empty(top_data.shape, dtype=np.int32) + + group_size = self.group_size + pooled_dim, pooled_width, pooled_height \ + = self.out_c, self.out_w, self.out_h + spatial_scale = self.spatial_scale + + for i in six.moves.range(top_data.size): + pw = i % pooled_width + ph = int(i / pooled_width) % pooled_height + ctop = int(i / pooled_width / pooled_height) % pooled_dim + n = int(i / pooled_width / pooled_height / pooled_dim) + + roi_batch_ind = int(bottom_roi_indices[n]) + roi_start_h = bottom_rois[n, 0] * spatial_scale + roi_start_w = bottom_rois[n, 1] * spatial_scale + roi_end_h = bottom_rois[n, 2] * spatial_scale + roi_end_w = bottom_rois[n, 3] * spatial_scale + + roi_height = max(roi_end_h - roi_start_h, 1.) + roi_width = max(roi_end_w - roi_start_w, 1.) + bin_size_h = 1. * roi_height / pooled_height + bin_size_w = 1. * roi_width / pooled_width + + gh = np.floor(float(ph) * group_size / pooled_height) + gw = np.floor(float(pw) * group_size / pooled_width) + gh = int(min(max(gh, 0), group_size - 1)) + gw = int(min(max(gw, 0), group_size - 1)) + c = (ctop * group_size + gh) * group_size + gw + + if self.sampling_ratio[0] is None: + roi_bin_grid_h = np.ceil(roi_height / pooled_height) + else: + roi_bin_grid_h = self.sampling_ratio[0] + if self.sampling_ratio[1] is None: + roi_bin_grid_w = np.ceil(roi_width / pooled_width) + else: + roi_bin_grid_w = self.sampling_ratio[1] + + maxval = -1e20 + maxidx = -1 + iy = 0 + while iy < roi_bin_grid_h: + y = roi_start_h + ph * bin_size_h + \ + (iy + .5) * bin_size_h / roi_bin_grid_h + ix = 0 + while ix < roi_bin_grid_w: + x = roi_start_w + pw * bin_size_w + \ + (ix + .5) * bin_size_w / roi_bin_grid_w + + # bilinear interpolation {{ + y_low, x_low, y_high, x_high, w1, w2, w3, w4 = \ + _get_bilinear_interp_params(y, x, height, width) + if y_low is None: + continue + + v1 = bottom_data[roi_batch_ind, c, y_low, x_low] + v2 = bottom_data[roi_batch_ind, c, y_low, x_high] + v3 = bottom_data[roi_batch_ind, c, y_high, x_low] + v4 = bottom_data[roi_batch_ind, c, y_high, x_high] + + tmpval = w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4 + bottom_index = iy * roi_bin_grid_w + ix + if (tmpval > maxval): + maxval = tmpval + maxidx = bottom_index + + ix += 1 + iy += 1 + + top_data[n, ctop, ph, pw] = maxval + self.argmax_data[n, ctop, ph, pw] = maxidx + + return top_data, + + def forward_gpu(self, inputs): + self.retain_inputs((1, 2)) + self._bottom_data_shape = inputs[0].shape + + bottom_data, bottom_rois, bottom_roi_indices = inputs + channels, height, width = bottom_data.shape[1:] + n_roi = bottom_rois.shape[0] + top_data = cuda.cupy.empty( + (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) + self.argmax_data = cuda.cupy.empty(top_data.shape, np.int32) + + if self.sampling_ratio[0] is None: + sampling_ratio_h = 0 + else: + sampling_ratio_h = self.sampling_ratio[0] + if self.sampling_ratio[1] is None: + sampling_ratio_w = 0 + else: + sampling_ratio_w = self.sampling_ratio[1] + cuda.elementwise( + ''' + raw T bottom_data, raw T bottom_rois, + raw int32 bottom_roi_indices, + T spatial_scale, int32 channels, + int32 height, int32 width, + int32 pooled_dim, int32 pooled_height, int32 pooled_width, + int32 group_size, int32 sampling_ratio_h, int32 sampling_ratio_w + ''', + 'T top_data, int32 argmax_data', + ''' + // pos in output filter + int ph = (i / pooled_width) % pooled_height; + int pw = i % pooled_width; + int ctop = (i / pooled_width / pooled_height) % pooled_dim; + int n = i / pooled_width / pooled_height / pooled_dim; + + int roi_batch_ind = bottom_roi_indices[n]; + T roi_start_h = static_cast( + round(bottom_rois[n * 4 + 0])) * spatial_scale; + T roi_start_w = static_cast( + round(bottom_rois[n * 4 + 1])) * spatial_scale; + T roi_end_h = static_cast( + round(bottom_rois[n * 4 + 2])) * spatial_scale; + T roi_end_w = static_cast( + round(bottom_rois[n * 4 + 3])) * spatial_scale; + + // Force too small ROIs to be 1x1 + T roi_height = max(roi_end_h - roi_start_h, 0.1); + T roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 + + // Compute w and h at bottom + T bin_size_h = roi_height / static_cast(pooled_height); + T bin_size_w = roi_width / static_cast(pooled_width); + + // Compute c at bottom + int gh = floor( + static_cast(ph) * group_size / pooled_height); + int gw = floor( + static_cast(pw) * group_size / pooled_width); + gh = min(max(gh, 0), group_size - 1); + gw = min(max(gw, 0), group_size - 1); + int c = (ctop * group_size + gh) * group_size + gw; + + int bottom_data_offset = + (roi_batch_ind * channels + c) * height * width; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio_h > 0) + ? sampling_ratio_h + : ceil(roi_height / pooled_height); // e.g. = 2 + int roi_bin_grid_w = (sampling_ratio_w > 0) + ? sampling_ratio_w + : ceil(roi_width / pooled_width); + + T maxval = -1E+20; + int maxidx = -1; + for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g. iy = 0, 1 + { + T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g. 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // bilinear_interpolation {{ + int y_low, x_low, y_high, x_high; + T w1, w2, w3, w4; + bool ret = get_bilinear_interp_params( + x, y, height, width, + y_low, x_low, y_high, x_high, + w1, w2, w3, w4 + ); + if (!ret) { + continue; + } + + T v1 = bottom_data[bottom_data_offset + + y_low * width + x_low]; + T v2 = bottom_data[bottom_data_offset + + y_low * width + x_high]; + T v3 = bottom_data[bottom_data_offset + + y_high * width + x_low]; + T v4 = bottom_data[bottom_data_offset + + y_high * width + x_high]; + + // }} + + T tmpval = w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4; + int bottom_index = iy * roi_bin_grid_w + ix; + if (tmpval > maxval) { + maxval = tmpval; + maxidx = bottom_index; + } + } + } + top_data = maxval; + argmax_data = maxidx; + ''', + 'ps_roi_max_align_2d_fwd', + preamble=_GET_BILINEAR_INTERP_KERNEL, + )(bottom_data, bottom_rois, bottom_roi_indices, + self.spatial_scale, channels, height, width, + self.out_c, self.out_h, self.out_w, + self.group_size, sampling_ratio_h, sampling_ratio_w, + top_data, self.argmax_data) + + return top_data, + + def backward_cpu(self, inputs, gy): + _, bottom_rois, bottom_roi_indices = inputs + channels, height, width = self._bottom_data_shape[1:] + bottom_diff = np.zeros(self._bottom_data_shape, np.float32) + + spatial_scale = self.spatial_scale + pooled_dim = self.out_c + pooled_height = self.out_h + pooled_width = self.out_w + group_size = self.group_size + top_diff = gy[0] + + for i in six.moves.range(top_diff.size): + pw = i % pooled_width + ph = int(i / pooled_width) % pooled_height + ctop = int(i / pooled_width / pooled_height) % pooled_dim + n = int(i / pooled_width / pooled_height / pooled_dim) + + roi_batch_ind = int(bottom_roi_indices[n]) + roi_start_h = bottom_rois[n, 0] * spatial_scale + roi_start_w = bottom_rois[n, 1] * spatial_scale + roi_end_h = bottom_rois[n, 2] * spatial_scale + roi_end_w = bottom_rois[n, 3] * spatial_scale + + roi_width = max(roi_end_w - roi_start_w, 1.) + roi_height = max(roi_end_h - roi_start_h, 1.) + bin_size_h = 1. * roi_height / pooled_height + bin_size_w = 1. * roi_width / pooled_width + + gh = np.floor(float(ph) * group_size / pooled_height) + gw = np.floor(float(pw) * group_size / pooled_width) + gh = int(min(max(gh, 0), group_size - 1)) + gw = int(min(max(gw, 0), group_size - 1)) + c = (ctop * group_size + gh) * group_size + gw + + top_diff_this_bin = top_diff[n, ctop, ph, pw] + + if self.sampling_ratio[0] is None: + roi_bin_grid_h = np.ceil(roi_height / pooled_height) + else: + roi_bin_grid_h = self.sampling_ratio[0] + if self.sampling_ratio[1] is None: + roi_bin_grid_w = np.ceil(roi_width / pooled_width) + else: + roi_bin_grid_w = self.sampling_ratio[1] + + maxidx = self.argmax_data[n, ctop, ph, pw] + iy = int(maxidx / roi_bin_grid_w) + ix = maxidx % roi_bin_grid_w + + y = roi_start_h + ph * bin_size_h + \ + (iy + .5) * bin_size_h / roi_bin_grid_h + x = roi_start_w + pw * bin_size_w + \ + (ix + .5) * bin_size_w / roi_bin_grid_w + + # bilinear_interpolation_gradient {{ + y_low, x_low, y_high, x_high, w1, w2, w3, w4 = \ + _get_bilinear_interp_params(y, x, height, width) + if y_low is None: + continue + + g1 = top_diff_this_bin * w1 + g2 = top_diff_this_bin * w2 + g3 = top_diff_this_bin * w3 + g4 = top_diff_this_bin * w4 + + if (x_low >= 0 and x_high >= 0 and + y_low >= 0 and y_high >= 0): + bottom_diff[roi_batch_ind, c, y_low, x_low] += g1 + bottom_diff[roi_batch_ind, c, y_low, x_high] += g2 + bottom_diff[roi_batch_ind, c, y_high, x_low] += g3 + bottom_diff[roi_batch_ind, c, y_high, x_high] += g4 + + return bottom_diff, None, None + + def backward_gpu(self, inputs, gy): + _, bottom_rois, bottom_roi_indices = inputs + channels, height, width = self._bottom_data_shape[1:] + bottom_diff = cuda.cupy.zeros(self._bottom_data_shape, np.float32) + + if self.sampling_ratio[0] is None: + sampling_ratio_h = 0 + else: + sampling_ratio_h = self.sampling_ratio[0] + if self.sampling_ratio[1] is None: + sampling_ratio_w = 0 + else: + sampling_ratio_w = self.sampling_ratio[1] + cuda.elementwise( + ''' + raw T top_diff, raw int32 argmax_data, + raw T bottom_rois, raw int32 bottom_roi_indices, + T spatial_scale, int32 channels, int32 height, int32 width, + int32 pooled_dim, int32 pooled_height, int32 pooled_width, + int32 group_size, int32 sampling_ratio_h, int32 sampling_ratio_w + ''', + 'raw T bottom_diff', + ''' + // (n, c, h, w) coords in bottom data + int pw = i % pooled_width; + int ph = (i / pooled_width) % pooled_height; + int ctop = (i / pooled_width / pooled_height) % pooled_dim; + int n = i / pooled_width / pooled_height / pooled_dim; + + // Do not using rounding; this implementation detail is critical + int roi_batch_ind = bottom_roi_indices[n]; + T roi_start_h = static_cast( + round(bottom_rois[n * 4 + 0])) * spatial_scale; + T roi_start_w = static_cast( + round(bottom_rois[n * 4 + 1])) * spatial_scale; + T roi_end_h = static_cast( + round(bottom_rois[n * 4 + 2])) * spatial_scale; + T roi_end_w = static_cast( + round(bottom_rois[n * 4 + 3])) * spatial_scale; + + // Force too small ROIs to be 1x1 + T roi_height = max(roi_end_h - roi_start_h, 0.1); + T roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 + + // Compute w and h at bottom + T bin_size_h = roi_height / static_cast(pooled_height); + T bin_size_w = roi_width / static_cast(pooled_width); + + // Compute c at bottom + int gh = floor( + static_cast(ph) * group_size / pooled_height); + int gw = floor( + static_cast(pw) * group_size / pooled_width); + gh = min(max(gh, 0), group_size - 1); + gw = min(max(gw, 0), group_size - 1); + int c = (ctop * group_size + gh) * group_size + gw; + + int bottom_diff_offset = + (roi_batch_ind * channels + c) * height * width; + + int top_offset = + (n * pooled_dim + ctop) * pooled_height * pooled_width; + T top_diff_this_bin = + top_diff[top_offset + ph * pooled_width + pw]; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio_h > 0) + ? sampling_ratio_h + : ceil(roi_height / pooled_height); // e.g. = 2 + int roi_bin_grid_w = (sampling_ratio_w > 0) + ? sampling_ratio_w + : ceil(roi_width / pooled_width); + + int maxidx = argmax_data[top_offset + ph * pooled_width + pw]; + int iy = maxidx / roi_bin_grid_w; + int ix = maxidx % roi_bin_grid_w; + + T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g. 0.5, 1.5 + T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + int y_low, x_low, y_high, x_high; + T w1, w2, w3, w4; + bool ret = get_bilinear_interp_params( + x, y, height, width, + y_low, x_low, y_high, x_high, + w1, w2, w3, w4 + ); + if (!ret) { + continue; + } + + T g1 = top_diff_this_bin * w1; + T g2 = top_diff_this_bin * w2; + T g3 = top_diff_this_bin * w3; + T g4 = top_diff_this_bin * w4; + + if (x_low >= 0 && x_high >= 0 && + y_low >= 0 && y_high >= 0) { + atomicAdd(&bottom_diff[bottom_diff_offset + + y_low * width + x_low], g1); + atomicAdd(&bottom_diff[bottom_diff_offset + + y_low * width + x_high], g2); + atomicAdd(&bottom_diff[bottom_diff_offset + + y_high * width + x_low], g3); + atomicAdd(&bottom_diff[bottom_diff_offset + + y_high * width + x_high], g4); + } + ''', + 'ps_roi_max_align_2d_bwd', + preamble=_GET_BILINEAR_INTERP_KERNEL, + )(gy[0], self.argmax_data, bottom_rois, bottom_roi_indices, + self.spatial_scale, channels, height, width, + self.out_c, self.out_h, self.out_w, + self.group_size, sampling_ratio_h, sampling_ratio_w, + bottom_diff, size=gy[0].size) + + return bottom_diff, None, None + + +def ps_roi_max_align_2d( + x, rois, roi_indices, out_c, out_h, out_w, + spatial_scale, group_size, sampling_ratio=None +): + """Position Sensitive Region of Interest (ROI) Max align function. + + This function computes position sensitive max value of input spatial patch + with the given region of interests. Each ROI is splitted into + :math:`(group\_size, group\_size)` regions, and position sensitive values + in each region is computed. + + Args: + x (~chainer.Variable): Input variable. The shape is expected to be + 4 dimentional: (n: batch, c: channel, h, height, w: width). + rois (array): Input roi. The shape is expected to + be :math:`(R, 4)`, and each datum is set as below: + (y_min, x_min, y_max, x_max). The dtype is :obj:`numpy.float32`. + roi_indices (array): Input roi indices. The shape is expected to + be :math:`(R, )`. The dtype is :obj:`numpy.int32`. + out_c (int): Channels of output image after pooled. + out_h (int): Height of output image after pooled. + out_w (int): Width of output image after pooled. + spatial_scale (float): Scale of the roi is resized. + group_size (int): Position sensitive group size. + sampling_ratio ((int, int) or int): Sampling step for the alignment. + It must be an integer over :math:`1` or :obj:`None`, and the value + is automatically decided when :obj:`None` is passed. Use of + different ratio in height and width axis is also supported by + passing tuple of int as ``(sampling_ratio_h, sampling_ratio_w)``. + ``sampling_ratio=s`` and ``sampling_ratio=(s, s)`` are equivalent. + + Returns: + ~chainer.Variable: Output variable. + + See the original paper proposing PSROIPooling: + `R-FCN `_. + See the original paper proposing ROIAlign: + `Mask R-CNN `_. + + """ + return PSROIMaxAlign2D( + out_c, out_h, out_w, spatial_scale, + group_size, sampling_ratio)(x, rois, roi_indices) diff --git a/tests/functions_tests/test_ps_roi_max_align.py b/tests/functions_tests/test_ps_roi_max_align.py new file mode 100644 index 0000000000..a48fe4661c --- /dev/null +++ b/tests/functions_tests/test_ps_roi_max_align.py @@ -0,0 +1,115 @@ +import chainer +from chainer.backends import cuda +from chainer import gradient_check +from chainer import testing +from chainer.testing import attr +from chainer.testing import condition +import numpy as np +import unittest + +from chainercv import functions + + +@testing.parameterize(*testing.product({ + 'sampling_ratio': [None, 1, 2, (None, 3), (1, 2)], +})) +class TestPSROIMaxAlign2D(unittest.TestCase): + + def setUp(self): + self.N = 3 + self.group_size = 2 + self.out_c = 2 + self.n_channels = self.group_size * self.group_size * self.out_c + self.x = np.arange( + self.N * self.n_channels * 10 * 12, + dtype=np.float32).reshape((self.N, self.n_channels, 10, 12)) + np.random.shuffle(self.x) + self.x = 2 * self.x / self.x.size - 1 + self.x = self.x.astype(np.float32) + self.rois = np.array( + [[0, 0, 7, 7], + [1, 0, 5, 12], + [0, 1, 10, 5], + [3, 3, 4, 4]], + dtype=np.float32 + ) + self.roi_indices = np.array([0, 2, 1, 0], dtype=np.int32) + self.n_roi = self.rois.shape[0] + self.out_h, self.out_w = 4, 4 + self.spatial_scale = 1.0 + self.gy = np.random.uniform( + -1, 1, (self.n_roi, self.out_c, self.out_h, self.out_w)) + self.gy = self.gy.astype(np.float32) + self.check_backward_options = {'atol': 5e-4, 'rtol': 5e-3} + + def check_forward(self, x_data, roi_data, roi_index_data): + x = chainer.Variable(x_data) + rois = chainer.Variable(roi_data) + roi_indices = chainer.Variable(roi_index_data) + y = functions.ps_roi_max_align_2d( + x, rois, roi_indices, self.out_c, self.out_h, self.out_w, + self.spatial_scale, self.group_size, + sampling_ratio=self.sampling_ratio) + self.assertEqual(y.data.dtype, np.float32) + y_data = cuda.to_cpu(y.data) + self.assertEqual( + (self.n_roi, self.out_c, self.out_h, self.out_w), y_data.shape) + + @condition.retry(3) + def test_forward_cpu(self): + self.check_forward(self.x, self.rois, self.roi_indices) + + @attr.gpu + @condition.retry(3) + def test_forward_gpu(self): + self.check_forward( + cuda.to_gpu(self.x), cuda.to_gpu(self.rois), + cuda.to_gpu(self.roi_indices)) + + def check_backward(self, x_data, roi_data, roi_index_data, y_grad_data): + def f(x, rois, roi_indices): + return functions.ps_roi_max_align_2d( + x, rois, roi_indices, self.out_c, self.out_h, self.out_w, + self.spatial_scale, self.group_size, + sampling_ratio=self.sampling_ratio) + gradient_check.check_backward( + f, (x_data, roi_data, roi_index_data), y_grad_data, + no_grads=[False, True, True], **self.check_backward_options) + + @condition.retry(3) + def test_backward_cpu(self): + self.check_backward(self.x, self.rois, self.roi_indices, self.gy) + + @attr.gpu + @condition.retry(3) + def test_backward_gpu(self): + self.check_backward( + cuda.to_gpu(self.x), cuda.to_gpu(self.rois), + cuda.to_gpu(self.roi_indices), cuda.to_gpu(self.gy)) + + def apply_backward(self, x_data, roi_data, roi_index_data, y_grad_data): + x = chainer.Variable(x_data) + rois = chainer.Variable(roi_data) + roi_indices = chainer.Variable(roi_index_data) + y = functions.ps_roi_max_align_2d( + x, rois, roi_indices, self.out_c, self.out_h, self.out_w, + self.spatial_scale, self.group_size, + sampling_ratio=self.sampling_ratio) + x.cleargrad() + y.grad = y_grad_data + y.backward() + return x, y + + @attr.gpu + @condition.retry(3) + def test_consistency_with_gpu(self): + x_cpu, y_cpu = self.apply_backward( + self.x, self.rois, self.roi_indices, self.gy) + x_gpu, y_gpu = self.apply_backward( + cuda.to_gpu(self.x), cuda.to_gpu(self.rois), + cuda.to_gpu(self.roi_indices), cuda.to_gpu(self.gy)) + testing.assert_allclose(y_cpu.data, y_gpu.data) + testing.assert_allclose(x_cpu.grad, x_gpu.grad) + + +testing.run_module(__name__, __file__) From 127bd1206d6751c31914e8265e3cfe26e27fe518 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Sat, 9 Feb 2019 02:23:58 +0900 Subject: [PATCH 07/24] remove unnecessary import --- chainercv/functions/ps_roi_average_pooling_2d.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/chainercv/functions/ps_roi_average_pooling_2d.py b/chainercv/functions/ps_roi_average_pooling_2d.py index 6aace954cb..5c78b4b99d 100644 --- a/chainercv/functions/ps_roi_average_pooling_2d.py +++ b/chainercv/functions/ps_roi_average_pooling_2d.py @@ -33,9 +33,6 @@ from chainer import function from chainer.utils import type_check -if cuda.available: - import cupy as cp - def _roi_pooling_slice(size, stride, max_size, roi_offset): start = int(np.floor(size * stride)) @@ -142,7 +139,7 @@ def forward_gpu(self, inputs): bottom_data, bottom_rois, bottom_roi_indices = inputs channels, height, width = bottom_data.shape[1:] n_roi = bottom_rois.shape[0] - top_data = cp.empty( + top_data = cuda.cupy.empty( (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) cuda.elementwise( ''' From 526f014729e5cab7a9906a29432fe75be0511bd6 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 13:21:43 +0900 Subject: [PATCH 08/24] update ps_roi_average_align_2d --- .../functions/ps_roi_average_align_2d.py | 172 ++++++++---------- 1 file changed, 73 insertions(+), 99 deletions(-) diff --git a/chainercv/functions/ps_roi_average_align_2d.py b/chainercv/functions/ps_roi_average_align_2d.py index 38e92def19..78fb632b27 100644 --- a/chainercv/functions/ps_roi_average_align_2d.py +++ b/chainercv/functions/ps_roi_average_align_2d.py @@ -32,92 +32,66 @@ def _pair(x): return x, x -def _get_bilinear_interp_params(y, x, height, width): - if y < -1 or y > height or x < -1 or x > width: +def _get_bounds(p, limit): + if p < -1 or p > limit: # out of range, so it is empty - return (None,) * 8 - - if y <= 0: - y = 0 - if x <= 0: - x = 0 - - y_low = int(y) - x_low = int(x) - - if y_low >= height - 1: - y_high = y_low = height - 1 - y = float(y_low) + return None, None, None + if p <= 0: + p = 0 + low = int(np.floor(p)) + if low >= limit - 1: + high = low = limit - 1 + p = float(low) else: - y_high = y_low + 1 + high = low + 1 + return p, low, high - if x_low >= width - 1: - x_high = x_low = width - 1 - x = float(x_low) - else: - x_high = x_low + 1 +def _get_bilinear_interp_params(y, x, y_low, x_low, y_high, x_high): ly = y - y_low lx = x - x_low hy = 1. - ly hx = 1. - lx - w1 = hy * hx w2 = hy * lx w3 = ly * hx w4 = ly * lx - - return y_low, x_low, y_high, x_high, w1, w2, w3, w4 + return w1, w2, w3, w4 _GET_BILINEAR_INTERP_KERNEL = ''' __device__ -bool get_bilinear_interp_params( - T x, T y, const int height, const int width, - int &y_low, int &x_low, int &y_high, int &x_high, - T &w1, T &w2, T &w3, T &w4) { - // deal with cases that inverse elements are - // out of feature map boundary - if (y < -1. || y > height || x < -1. || x > width) { +bool get_bounds( + T &p, const int limit, int &low, int &high) { + if (p < -1. || p > limit) { // empty return false; } - - if (y <= 0) { - y = 0; + if (p <= 0) { + p = 0; } - if (x <= 0) { - x = 0; - } - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; + low = (int)p; + if (low >= limit - 1) { + high = low = limit - 1; + p = (T)low; } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; + high = low + 1; } + return true; +} +__device__ +void get_bilinear_interp_params( + T y, T x, int y_low, int x_low, int y_high, int x_high, + T &w1, T &w2, T &w3, T &w4) { T ly = y - y_low; T lx = x - x_low; T hy = 1. - ly; T hx = 1. - lx; - w1 = hy * hx; w2 = hy * lx; w3 = ly * hx; w4 = ly * lx; - - return true; } ''' @@ -216,32 +190,35 @@ def forward_cpu(self, inputs): c = (ctop * group_size + gh) * group_size + gw if self.sampling_ratio[0] is None: - roi_bin_grid_h = np.ceil(roi_height / pooled_height) + roi_bin_grid_h = int(np.ceil(roi_height / pooled_height)) else: roi_bin_grid_h = self.sampling_ratio[0] if self.sampling_ratio[1] is None: - roi_bin_grid_w = np.ceil(roi_width / pooled_width) + roi_bin_grid_w = int(np.ceil(roi_width / pooled_width)) else: roi_bin_grid_w = self.sampling_ratio[1] count = roi_bin_grid_h * roi_bin_grid_w output_val = 0. - iy = 0 - while iy < roi_bin_grid_h: + for iy in six.moves.range(roi_bin_grid_h): y = roi_start_h + ph * bin_size_h + \ (iy + .5) * bin_size_h / roi_bin_grid_h - ix = 0 - while ix < roi_bin_grid_w: + y, y_low, y_high = _get_bounds(y, height) + if y is None or y_low is None or y_high is None: + continue + for ix in six.moves.range(roi_bin_grid_w): x = roi_start_w + pw * bin_size_w + \ (ix + .5) * bin_size_w / roi_bin_grid_w - # bilinear interpolation {{ - y_low, x_low, y_high, x_high, w1, w2, w3, w4 = \ - _get_bilinear_interp_params(y, x, height, width) - if y_low is None: + x, x_low, x_high = _get_bounds(x, width) + if x is None or x_low is None or x_high is None: continue + # bilinear interpolation {{ + w1, w2, w3, w4 = _get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high) + v1 = bottom_data[roi_batch_ind, c, y_low, x_low] v2 = bottom_data[roi_batch_ind, c, y_low, x_high] v3 = bottom_data[roi_batch_ind, c, y_high, x_low] @@ -251,9 +228,6 @@ def forward_cpu(self, inputs): # }} - ix += 1 - iy += 1 - output_val /= count top_data[n, ctop, ph, pw] = output_val @@ -338,22 +312,21 @@ def forward_gpu(self, inputs): T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g. 0.5, 1.5 + int y_low, y_high; + bool y_ret = get_bounds(y, height, y_low, y_high); + if (!y_ret) continue; for (int ix = 0; ix < roi_bin_grid_w; ix++) { T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); - // bilinear_interpolation {{ - int y_low, x_low, y_high, x_high; + int x_low, x_high; + bool x_ret = get_bounds(x, width, x_low, x_high); + if (!x_ret) continue; + // bilinear_interpolation_gradient {{ T w1, w2, w3, w4; - bool ret = get_bilinear_interp_params( - x, y, height, width, - y_low, x_low, y_high, x_high, - w1, w2, w3, w4 - ); - if (!ret) { - continue; - } + get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high, w1, w2, w3, w4); T v1 = bottom_data[bottom_data_offset + y_low * width + x_low]; @@ -419,30 +392,32 @@ def backward_cpu(self, inputs, gy): top_diff_this_bin = top_diff[n, ctop, ph, pw] if self.sampling_ratio[0] is None: - roi_bin_grid_h = np.ceil(roi_height / pooled_height) + roi_bin_grid_h = int(np.ceil(roi_height / pooled_height)) else: roi_bin_grid_h = self.sampling_ratio[0] if self.sampling_ratio[1] is None: - roi_bin_grid_w = np.ceil(roi_width / pooled_width) + roi_bin_grid_w = int(np.ceil(roi_width / pooled_width)) else: roi_bin_grid_w = self.sampling_ratio[1] count = roi_bin_grid_h * roi_bin_grid_w - iy = 0 - while iy < roi_bin_grid_h: + for iy in six.moves.range(roi_bin_grid_h): y = roi_start_h + ph * bin_size_h + \ (iy + .5) * bin_size_h / roi_bin_grid_h - ix = 0 - while ix < roi_bin_grid_w: + y, y_low, y_high = _get_bounds(y, height) + if y is None or y_low is None or y_high is None: + continue + for ix in six.moves.range(roi_bin_grid_w): x = roi_start_w + pw * bin_size_w + \ (ix + .5) * bin_size_w / roi_bin_grid_w - # bilinear_interpolation_gradient {{ - y_low, x_low, y_high, x_high, w1, w2, w3, w4 = \ - _get_bilinear_interp_params(y, x, height, width) - if y_low is None: + x, x_low, x_high = _get_bounds(x, width) + if x is None or x_low is None or x_high is None: continue + # bilinear_interpolation_gradient {{ + w1, w2, w3, w4 = _get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high) g1 = top_diff_this_bin * w1 / count g2 = top_diff_this_bin * w2 / count @@ -455,8 +430,7 @@ def backward_cpu(self, inputs, gy): bottom_diff[roi_batch_ind, c, y_low, x_high] += g2 bottom_diff[roi_batch_ind, c, y_high, x_low] += g3 bottom_diff[roi_batch_ind, c, y_high, x_high] += g4 - ix += 1 - iy += 1 + # }} return bottom_diff, None, None @@ -539,21 +513,21 @@ def backward_gpu(self, inputs, gy): T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g. 0.5, 1.5 + int y_low, y_high; + bool y_ret = get_bounds(y, height, y_low, y_high); + if (!y_ret) continue; for (int ix = 0; ix < roi_bin_grid_w; ix++) { T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); - int y_low, x_low, y_high, x_high; + int x_low, x_high; + bool x_ret = get_bounds(x, width, x_low, x_high); + if (!x_ret) continue; + // bilinear_interpolation_gradient {{ T w1, w2, w3, w4; - bool ret = get_bilinear_interp_params( - x, y, height, width, - y_low, x_low, y_high, x_high, - w1, w2, w3, w4 - ); - if (!ret) { - continue; - } + get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high, w1, w2, w3, w4); T g1 = top_diff_this_bin * w1 / count; T g2 = top_diff_this_bin * w2 / count; From 40c0a236936888d2ed7cef0cd600391031c8bf28 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 13:30:52 +0900 Subject: [PATCH 09/24] update ps_roi_max_align_2d.py --- chainercv/functions/ps_roi_max_align_2d.py | 80 ++++++++++++---------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/chainercv/functions/ps_roi_max_align_2d.py b/chainercv/functions/ps_roi_max_align_2d.py index fd1a6284db..b056297904 100644 --- a/chainercv/functions/ps_roi_max_align_2d.py +++ b/chainercv/functions/ps_roi_max_align_2d.py @@ -29,6 +29,7 @@ import _GET_BILINEAR_INTERP_KERNEL from chainercv.functions.ps_roi_average_align_2d \ import _get_bilinear_interp_params +from chainercv.functions.ps_roi_average_align_2d import _get_bounds def _pair(x): @@ -132,31 +133,34 @@ def forward_cpu(self, inputs): c = (ctop * group_size + gh) * group_size + gw if self.sampling_ratio[0] is None: - roi_bin_grid_h = np.ceil(roi_height / pooled_height) + roi_bin_grid_h = int(np.ceil(roi_height / pooled_height)) else: roi_bin_grid_h = self.sampling_ratio[0] if self.sampling_ratio[1] is None: - roi_bin_grid_w = np.ceil(roi_width / pooled_width) + roi_bin_grid_w = int(np.ceil(roi_width / pooled_width)) else: roi_bin_grid_w = self.sampling_ratio[1] maxval = -1e20 maxidx = -1 - iy = 0 - while iy < roi_bin_grid_h: + for iy in six.moves.range(roi_bin_grid_h): y = roi_start_h + ph * bin_size_h + \ (iy + .5) * bin_size_h / roi_bin_grid_h - ix = 0 - while ix < roi_bin_grid_w: + y, y_low, y_high = _get_bounds(y, height) + if y is None or y_low is None or y_high is None: + continue + for ix in six.moves.range(roi_bin_grid_w): x = roi_start_w + pw * bin_size_w + \ (ix + .5) * bin_size_w / roi_bin_grid_w - # bilinear interpolation {{ - y_low, x_low, y_high, x_high, w1, w2, w3, w4 = \ - _get_bilinear_interp_params(y, x, height, width) - if y_low is None: + x, x_low, x_high = _get_bounds(x, width) + if x is None or x_low is None or x_high is None: continue + # bilinear interpolation {{ + w1, w2, w3, w4 = _get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high) + v1 = bottom_data[roi_batch_ind, c, y_low, x_low] v2 = bottom_data[roi_batch_ind, c, y_low, x_high] v3 = bottom_data[roi_batch_ind, c, y_high, x_low] @@ -168,8 +172,7 @@ def forward_cpu(self, inputs): maxval = tmpval maxidx = bottom_index - ix += 1 - iy += 1 + # }} top_data[n, ctop, ph, pw] = maxval self.argmax_data[n, ctop, ph, pw] = maxidx @@ -257,22 +260,21 @@ def forward_gpu(self, inputs): T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g. 0.5, 1.5 + int y_low, y_high; + bool y_ret = get_bounds(y, height, y_low, y_high); + if (!y_ret) continue; for (int ix = 0; ix < roi_bin_grid_w; ix++) { T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); + int x_low, x_high; + bool x_ret = get_bounds(x, width, x_low, x_high); + if (!x_ret) continue; // bilinear_interpolation {{ - int y_low, x_low, y_high, x_high; T w1, w2, w3, w4; - bool ret = get_bilinear_interp_params( - x, y, height, width, - y_low, x_low, y_high, x_high, - w1, w2, w3, w4 - ); - if (!ret) { - continue; - } + get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high, w1, w2, w3, w4); T v1 = bottom_data[bottom_data_offset + y_low * width + x_low]; @@ -344,11 +346,11 @@ def backward_cpu(self, inputs, gy): top_diff_this_bin = top_diff[n, ctop, ph, pw] if self.sampling_ratio[0] is None: - roi_bin_grid_h = np.ceil(roi_height / pooled_height) + roi_bin_grid_h = int(np.ceil(roi_height / pooled_height)) else: roi_bin_grid_h = self.sampling_ratio[0] if self.sampling_ratio[1] is None: - roi_bin_grid_w = np.ceil(roi_width / pooled_width) + roi_bin_grid_w = int(np.ceil(roi_width / pooled_width)) else: roi_bin_grid_w = self.sampling_ratio[1] @@ -361,11 +363,16 @@ def backward_cpu(self, inputs, gy): x = roi_start_w + pw * bin_size_w + \ (ix + .5) * bin_size_w / roi_bin_grid_w - # bilinear_interpolation_gradient {{ - y_low, x_low, y_high, x_high, w1, w2, w3, w4 = \ - _get_bilinear_interp_params(y, x, height, width) - if y_low is None: + y, y_low, y_high = _get_bounds(y, height) + if y is None or y_low is None or y_high is None: continue + x, x_low, x_high = _get_bounds(x, width) + if x is None or x_low is None or x_high is None: + continue + + # bilinear_interpolation_gradient {{ + w1, w2, w3, w4 = _get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high) g1 = top_diff_this_bin * w1 g2 = top_diff_this_bin * w2 @@ -465,16 +472,17 @@ def backward_gpu(self, inputs, gy): static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); - int y_low, x_low, y_high, x_high; + int y_low, y_high; + bool y_ret = get_bounds(y, height, y_low, y_high); + if (!y_ret) continue; + int x_low, x_high; + bool x_ret = get_bounds(x, width, x_low, x_high); + if (!x_ret) continue; + + // bilinear_interpolation_gradient {{ T w1, w2, w3, w4; - bool ret = get_bilinear_interp_params( - x, y, height, width, - y_low, x_low, y_high, x_high, - w1, w2, w3, w4 - ); - if (!ret) { - continue; - } + get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high, w1, w2, w3, w4); T g1 = top_diff_this_bin * w1; T g2 = top_diff_this_bin * w2; From 74d22742920c5540d48e1459b238d606ed0a264d Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 13:48:09 +0900 Subject: [PATCH 10/24] do more spatial_scale test for ps roi pooling/align functions --- tests/functions_tests/test_ps_roi_average_align_2d.py | 2 +- tests/functions_tests/test_ps_roi_average_pooling_2d.py | 4 +++- tests/functions_tests/test_ps_roi_max_align.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/functions_tests/test_ps_roi_average_align_2d.py b/tests/functions_tests/test_ps_roi_average_align_2d.py index 64cb0d0435..6197c52db3 100644 --- a/tests/functions_tests/test_ps_roi_average_align_2d.py +++ b/tests/functions_tests/test_ps_roi_average_align_2d.py @@ -12,6 +12,7 @@ @testing.parameterize(*testing.product({ 'sampling_ratio': [None, 1, 2, (None, 3), (1, 2)], + 'spatial_scale': [0.6, 1.0, 2.0], })) class TestPSROIAverageAlign2D(unittest.TestCase): @@ -36,7 +37,6 @@ def setUp(self): self.roi_indices = np.array([0, 2, 1, 0], dtype=np.int32) self.n_roi = self.rois.shape[0] self.out_h, self.out_w = 4, 4 - self.spatial_scale = 1.0 self.gy = np.random.uniform( -1, 1, (self.n_roi, self.out_c, self.out_h, self.out_w)) self.gy = self.gy.astype(np.float32) diff --git a/tests/functions_tests/test_ps_roi_average_pooling_2d.py b/tests/functions_tests/test_ps_roi_average_pooling_2d.py index 1b1d74ced8..232438515c 100644 --- a/tests/functions_tests/test_ps_roi_average_pooling_2d.py +++ b/tests/functions_tests/test_ps_roi_average_pooling_2d.py @@ -10,6 +10,9 @@ from chainercv import functions +@testing.parameterize(*testing.product({ + 'spatial_scale': [0.6, 1.0, 2.0], +})) class TestPSROIAveragePooling2D(unittest.TestCase): def setUp(self): @@ -33,7 +36,6 @@ def setUp(self): self.roi_indices = np.array([0, 2, 1, 0], dtype=np.int32) self.n_roi = self.rois.shape[0] self.out_h, self.out_w = 4, 4 - self.spatial_scale = 1.0 self.gy = np.random.uniform( -1, 1, (self.n_roi, self.out_c, self.out_h, self.out_w)) self.gy = self.gy.astype(np.float32) diff --git a/tests/functions_tests/test_ps_roi_max_align.py b/tests/functions_tests/test_ps_roi_max_align.py index a48fe4661c..06a36f6947 100644 --- a/tests/functions_tests/test_ps_roi_max_align.py +++ b/tests/functions_tests/test_ps_roi_max_align.py @@ -12,6 +12,7 @@ @testing.parameterize(*testing.product({ 'sampling_ratio': [None, 1, 2, (None, 3), (1, 2)], + 'spatial_scale': [0.6, 1.0, 2.0], })) class TestPSROIMaxAlign2D(unittest.TestCase): @@ -36,7 +37,6 @@ def setUp(self): self.roi_indices = np.array([0, 2, 1, 0], dtype=np.int32) self.n_roi = self.rois.shape[0] self.out_h, self.out_w = 4, 4 - self.spatial_scale = 1.0 self.gy = np.random.uniform( -1, 1, (self.n_roi, self.out_c, self.out_h, self.out_w)) self.gy = self.gy.astype(np.float32) From d6c07ebb3060520b7385e435eb8717a9a9b9353e Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 14:37:24 +0900 Subject: [PATCH 11/24] refactor ps_roi_average/max_align_2d --- .../functions/ps_roi_average_align_2d.py | 24 +++++++++---------- chainercv/functions/ps_roi_max_align_2d.py | 24 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/chainercv/functions/ps_roi_average_align_2d.py b/chainercv/functions/ps_roi_average_align_2d.py index 78fb632b27..1f27a0c039 100644 --- a/chainercv/functions/ps_roi_average_align_2d.py +++ b/chainercv/functions/ps_roi_average_align_2d.py @@ -180,13 +180,13 @@ def forward_cpu(self, inputs): roi_height = max(roi_end_h - roi_start_h, 1.) roi_width = max(roi_end_w - roi_start_w, 1.) - bin_size_h = 1. * roi_height / pooled_height - bin_size_w = 1. * roi_width / pooled_width + bin_size_h = roi_height / pooled_height + bin_size_w = roi_width / pooled_width - gh = np.floor(float(ph) * group_size / pooled_height) - gw = np.floor(float(pw) * group_size / pooled_width) - gh = int(min(max(gh, 0), group_size - 1)) - gw = int(min(max(gw, 0), group_size - 1)) + gh = int(np.floor(ph * group_size / pooled_height)) + gw = int(np.floor(pw * group_size / pooled_width)) + gh = min(max(gh, 0), group_size - 1) + gw = min(max(gw, 0), group_size - 1) c = (ctop * group_size + gh) * group_size + gw if self.sampling_ratio[0] is None: @@ -380,13 +380,13 @@ def backward_cpu(self, inputs, gy): roi_width = max(roi_end_w - roi_start_w, 1.) roi_height = max(roi_end_h - roi_start_h, 1.) - bin_size_h = 1. * roi_height / pooled_height - bin_size_w = 1. * roi_width / pooled_width + bin_size_h = roi_height / pooled_height + bin_size_w = roi_width / pooled_width - gh = np.floor(float(ph) * group_size / pooled_height) - gw = np.floor(float(pw) * group_size / pooled_width) - gh = int(min(max(gh, 0), group_size - 1)) - gw = int(min(max(gw, 0), group_size - 1)) + gh = int(np.floor(ph * group_size / pooled_height)) + gw = int(np.floor(pw * group_size / pooled_width)) + gh = min(max(gh, 0), group_size - 1) + gw = min(max(gw, 0), group_size - 1) c = (ctop * group_size + gh) * group_size + gw top_diff_this_bin = top_diff[n, ctop, ph, pw] diff --git a/chainercv/functions/ps_roi_max_align_2d.py b/chainercv/functions/ps_roi_max_align_2d.py index b056297904..b011fabac5 100644 --- a/chainercv/functions/ps_roi_max_align_2d.py +++ b/chainercv/functions/ps_roi_max_align_2d.py @@ -123,13 +123,13 @@ def forward_cpu(self, inputs): roi_height = max(roi_end_h - roi_start_h, 1.) roi_width = max(roi_end_w - roi_start_w, 1.) - bin_size_h = 1. * roi_height / pooled_height - bin_size_w = 1. * roi_width / pooled_width + bin_size_h = roi_height / pooled_height + bin_size_w = roi_width / pooled_width - gh = np.floor(float(ph) * group_size / pooled_height) - gw = np.floor(float(pw) * group_size / pooled_width) - gh = int(min(max(gh, 0), group_size - 1)) - gw = int(min(max(gw, 0), group_size - 1)) + gh = int(np.floor(float(ph) * group_size / pooled_height)) + gw = int(np.floor(float(pw) * group_size / pooled_width)) + gh = min(max(gh, 0), group_size - 1) + gw = min(max(gw, 0), group_size - 1) c = (ctop * group_size + gh) * group_size + gw if self.sampling_ratio[0] is None: @@ -334,13 +334,13 @@ def backward_cpu(self, inputs, gy): roi_width = max(roi_end_w - roi_start_w, 1.) roi_height = max(roi_end_h - roi_start_h, 1.) - bin_size_h = 1. * roi_height / pooled_height - bin_size_w = 1. * roi_width / pooled_width + bin_size_h = roi_height / pooled_height + bin_size_w = roi_width / pooled_width - gh = np.floor(float(ph) * group_size / pooled_height) - gw = np.floor(float(pw) * group_size / pooled_width) - gh = int(min(max(gh, 0), group_size - 1)) - gw = int(min(max(gw, 0), group_size - 1)) + gh = int(np.floor(float(ph) * group_size / pooled_height)) + gw = int(np.floor(float(pw) * group_size / pooled_width)) + gh = min(max(gh, 0), group_size - 1) + gw = min(max(gw, 0), group_size - 1) c = (ctop * group_size + gh) * group_size + gw top_diff_this_bin = top_diff[n, ctop, ph, pw] From 8d4ab8c9803f63c0ffb52c8ce1b4d32d34d61549 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 16:05:19 +0900 Subject: [PATCH 12/24] fix typo in ps_roi_average_pooling_2d --- chainercv/functions/ps_roi_average_pooling_2d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainercv/functions/ps_roi_average_pooling_2d.py b/chainercv/functions/ps_roi_average_pooling_2d.py index 5c78b4b99d..481bff7a0a 100644 --- a/chainercv/functions/ps_roi_average_pooling_2d.py +++ b/chainercv/functions/ps_roi_average_pooling_2d.py @@ -3,7 +3,7 @@ # Copyright (c) 2018 Preferred Networks, Inc. # ------------------------------------------------------------------------ -# Original works of CUDA kernel in forward_gpu and forward_gpu: +# Original works of CUDA kernel in forward_gpu and backward_gpu: # ------------------------------------------------------------------------ # Copyright (c) 2017 Microsoft # From 635ce7427459ae0acffb17d16e1031fa5b307cbe Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 17:27:51 +0900 Subject: [PATCH 13/24] update ps_roi_average_pooling_2d --- .../functions/ps_roi_average_pooling_2d.py | 173 ++++++++++-------- 1 file changed, 98 insertions(+), 75 deletions(-) diff --git a/chainercv/functions/ps_roi_average_pooling_2d.py b/chainercv/functions/ps_roi_average_pooling_2d.py index 481bff7a0a..5ecf6d13b9 100644 --- a/chainercv/functions/ps_roi_average_pooling_2d.py +++ b/chainercv/functions/ps_roi_average_pooling_2d.py @@ -89,47 +89,60 @@ def forward_cpu(self, inputs): self._bottom_data_shape = inputs[0].shape bottom_data, bottom_rois, bottom_roi_indices = inputs - channels, height, width = bottom_data.shape[1:] + height, width = bottom_data.shape[2:] n_roi = bottom_rois.shape[0] top_data = np.empty( (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) - for i_roi in six.moves.range(n_roi): - y_min, x_min, y_max, x_max = bottom_rois[i_roi] - batch_index = bottom_roi_indices[i_roi] - y_min = round(y_min * self.spatial_scale) - x_min = round(x_min * self.spatial_scale) - y_max = round(y_max * self.spatial_scale) - x_max = round(x_max * self.spatial_scale) - roi_height = max(y_max - y_min, 0.1) - roi_width = max(x_max - x_min, 0.1) - - stride_c = channels / self.out_c - stride_h = roi_height / self.out_h - stride_w = roi_width / self.out_w - group_h = int(round(self.out_h / self.group_size)) - group_w = int(round(self.out_w / self.group_size)) - - for out_h in six.moves.range(self.out_h): - slice_h, len_h = _roi_pooling_slice( - out_h, stride_h, height, int(y_min)) - if slice_h.stop <= slice_h.start: - continue - for out_w in six.moves.range(self.out_w): - slice_w, len_w = _roi_pooling_slice( - out_w, stride_w, width, int(x_min)) - if slice_w.stop <= slice_w.start: - continue - for out_c in six.moves.range(self.out_c): - slice_c, len_c = _roi_pooling_slice( - out_c, stride_c, channels, 0) - roi_data = bottom_data[ - batch_index, slice_c, slice_h, slice_w]\ - .reshape((len_c, -1)) - c = (out_h // group_h) * self.group_size \ - + (out_w // group_w) - top_data[i_roi, out_c, out_h, out_w] = np.average( - roi_data[c]) + group_size = self.group_size + pooled_dim, pooled_height, pooled_width \ + = self.out_c, self.out_h, self.out_w + spatial_scale = self.spatial_scale + + for i in six.moves.range(top_data.size): + pw = i % pooled_width + ph = int(i / pooled_width) % pooled_height + ctop = int(i / pooled_width / pooled_height) % pooled_dim + n = int(i / pooled_width / pooled_height / pooled_dim) + + roi_batch_ind = bottom_roi_indices[n] + roi_start_h = bottom_rois[n, 0] * spatial_scale + roi_start_w = bottom_rois[n, 1] * spatial_scale + roi_end_h = bottom_rois[n, 2] * spatial_scale + roi_end_w = bottom_rois[n, 3] * spatial_scale + + roi_height = max(roi_end_h - roi_start_h, 0.1) + roi_width = max(roi_end_w - roi_start_w, 0.1) + bin_size_h = roi_height / pooled_height + bin_size_w = roi_width / pooled_width + + hstart = int(np.floor(ph * bin_size_h + roi_start_h)) + wstart = int(np.floor(pw * bin_size_w + roi_start_w)) + hend = int(np.floor((ph + 1) * bin_size_h + roi_start_h)) + wend = int(np.floor((pw + 1) * bin_size_w + roi_start_w)) + hstart = min(max(hstart, 0), height) + wstart = min(max(wstart, 0), width) + hend = min(max(hend, 0), height) + wend = min(max(wend, 0), width) + + gh = int(np.floor(ph * group_size / pooled_height)) + gw = int(np.floor(pw * group_size / pooled_width)) + gh = min(max(gh, 0), group_size - 1) + gw = min(max(gw, 0), group_size - 1) + c = (ctop * group_size + gh) * group_size + gw + + if hstart >= hend or wstart >= wend: + top_data[n, ctop, ph, pw] = 0 + continue + + output_val = 0. + count = (hend - hstart) * (wend - wstart) + for iy in six.moves.range(hstart, hend): + for ix in six.moves.range(wstart, wend): + output_val += bottom_data[roi_batch_ind, c, iy, ix] + output_val /= count + top_data[n, ctop, ph, pw] = output_val + return top_data, def forward_gpu(self, inputs): @@ -222,46 +235,56 @@ def forward_gpu(self, inputs): def backward_cpu(self, inputs, gy): _, bottom_rois, bottom_roi_indices = inputs - channels, height, width = self._bottom_data_shape[1:] - n_roi = bottom_rois.shape[0] + top_diff = gy[0] + height, width = self._bottom_data_shape[2:] bottom_diff = np.zeros(self._bottom_data_shape, np.float32) - for i_roi in six.moves.range(n_roi): - y_min, x_min, y_max, x_max = bottom_rois[i_roi] - batch_index = bottom_roi_indices[i_roi] - y_min = round(y_min * self.spatial_scale) - x_min = round(x_min * self.spatial_scale) - y_max = round(y_max * self.spatial_scale) - x_max = round(x_max * self.spatial_scale) - roi_height = max(y_max - y_min, 0.1) - roi_width = max(x_max - x_min, 0.1) - - stride_c = channels / self.out_c - stride_h = roi_height / self.out_h - stride_w = roi_width / self.out_w - group_h = int(round(self.out_h / self.group_size)) - group_w = int(round(self.out_w / self.group_size)) - - for out_h in six.moves.range(self.out_h): - slice_h, len_h = _roi_pooling_slice( - out_h, stride_h, height, int(y_min)) - if slice_h.stop <= slice_h.start: - continue - for out_w in six.moves.range(self.out_w): - slice_w, len_w = _roi_pooling_slice( - out_w, stride_w, width, int(x_min)) - if slice_w.stop <= slice_w.start: - continue - for out_c in six.moves.range(self.out_c): - diff_val = gy[0][i_roi, out_c, out_h, out_w] - diff_val = diff_val / len_h / len_w - start_c = int(np.floor(out_c * stride_c)) - start_c = min(max(start_c, 0), channels) - - c = (out_h // group_h) * self.group_size \ - + (out_w // group_w) + start_c - bottom_diff[batch_index, c, slice_h, slice_w] \ - += diff_val + group_size = self.group_size + pooled_dim, pooled_width, pooled_height \ + = self.out_c, self.out_w, self.out_h + spatial_scale = self.spatial_scale + + for i in six.moves.range(top_diff.size): + pw = i % pooled_width + ph = int(i / pooled_width) % pooled_height + ctop = int(i / pooled_width / pooled_height) % pooled_dim + n = int(i / pooled_width / pooled_height / pooled_dim) + + roi_batch_ind = int(bottom_roi_indices[n]) + roi_start_h = bottom_rois[n, 0] * spatial_scale + roi_start_w = bottom_rois[n, 1] * spatial_scale + roi_end_h = bottom_rois[n, 2] * spatial_scale + roi_end_w = bottom_rois[n, 3] * spatial_scale + + roi_height = max(roi_end_h - roi_start_h, 0.1) + roi_width = max(roi_end_w - roi_start_w, 0.1) + bin_size_h = roi_height / pooled_height + bin_size_w = roi_width / pooled_width + + hstart = int(np.floor(ph * bin_size_h + roi_start_h)) + wstart = int(np.floor(pw * bin_size_w + roi_start_w)) + hend = int(np.floor((ph + 1) * bin_size_h + roi_start_h)) + wend = int(np.floor((pw + 1) * bin_size_w + roi_start_w)) + hstart = min(max(hstart, 0), height) + wstart = min(max(wstart, 0), width) + hend = min(max(hend, 0), height) + wend = min(max(wend, 0), width) + + gh = int(np.floor(ph * group_size / pooled_height)) + gw = int(np.floor(pw * group_size / pooled_width)) + gh = min(max(gh, 0), group_size - 1) + gw = min(max(gw, 0), group_size - 1) + c = (ctop * group_size + gh) * group_size + gw + + if (hstart >= hend) or (wstart >= wend): + continue + + count = (hend - hstart) * (wend - wstart) + diff_val = top_diff[n, ctop, ph, pw] / count + for iy in six.moves.range(hstart, hend): + for ix in six.moves.range(wstart, wend): + bottom_diff[roi_batch_ind, c, iy, ix] += diff_val + return bottom_diff, None, None def backward_gpu(self, inputs, gy): From 65710e5bdc0c5e917eed67b1fde68665022920f9 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 18:00:10 +0900 Subject: [PATCH 14/24] fix typo in ps_roi_average_pooling_2d --- chainercv/functions/ps_roi_average_pooling_2d.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chainercv/functions/ps_roi_average_pooling_2d.py b/chainercv/functions/ps_roi_average_pooling_2d.py index 5ecf6d13b9..8d0c26c854 100644 --- a/chainercv/functions/ps_roi_average_pooling_2d.py +++ b/chainercv/functions/ps_roi_average_pooling_2d.py @@ -118,8 +118,8 @@ def forward_cpu(self, inputs): hstart = int(np.floor(ph * bin_size_h + roi_start_h)) wstart = int(np.floor(pw * bin_size_w + roi_start_w)) - hend = int(np.floor((ph + 1) * bin_size_h + roi_start_h)) - wend = int(np.floor((pw + 1) * bin_size_w + roi_start_w)) + hend = int(np.ceil((ph + 1) * bin_size_h + roi_start_h)) + wend = int(np.ceil((pw + 1) * bin_size_w + roi_start_w)) hstart = min(max(hstart, 0), height) wstart = min(max(wstart, 0), width) hend = min(max(hend, 0), height) @@ -263,8 +263,8 @@ def backward_cpu(self, inputs, gy): hstart = int(np.floor(ph * bin_size_h + roi_start_h)) wstart = int(np.floor(pw * bin_size_w + roi_start_w)) - hend = int(np.floor((ph + 1) * bin_size_h + roi_start_h)) - wend = int(np.floor((pw + 1) * bin_size_w + roi_start_w)) + hend = int(np.ceil((ph + 1) * bin_size_h + roi_start_h)) + wend = int(np.ceil((pw + 1) * bin_size_w + roi_start_w)) hstart = min(max(hstart, 0), height) wstart = min(max(wstart, 0), width) hend = min(max(hend, 0), height) From 0aa99570ff53780af0e4f70a2d44881b9b606ddc Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 18:02:26 +0900 Subject: [PATCH 15/24] fix typo in ps_roi_max_align_2d and ps_roi_average_align_2d --- chainercv/functions/ps_roi_average_align_2d.py | 2 +- chainercv/functions/ps_roi_max_align_2d.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chainercv/functions/ps_roi_average_align_2d.py b/chainercv/functions/ps_roi_average_align_2d.py index 1f27a0c039..cdcfefc83c 100644 --- a/chainercv/functions/ps_roi_average_align_2d.py +++ b/chainercv/functions/ps_roi_average_align_2d.py @@ -172,7 +172,7 @@ def forward_cpu(self, inputs): ctop = int(i / pooled_width / pooled_height) % pooled_dim n = int(i / pooled_width / pooled_height / pooled_dim) - roi_batch_ind = int(bottom_roi_indices[n]) + roi_batch_ind = bottom_roi_indices[n] roi_start_h = bottom_rois[n, 0] * spatial_scale roi_start_w = bottom_rois[n, 1] * spatial_scale roi_end_h = bottom_rois[n, 2] * spatial_scale diff --git a/chainercv/functions/ps_roi_max_align_2d.py b/chainercv/functions/ps_roi_max_align_2d.py index b011fabac5..6503740c45 100644 --- a/chainercv/functions/ps_roi_max_align_2d.py +++ b/chainercv/functions/ps_roi_max_align_2d.py @@ -115,7 +115,7 @@ def forward_cpu(self, inputs): ctop = int(i / pooled_width / pooled_height) % pooled_dim n = int(i / pooled_width / pooled_height / pooled_dim) - roi_batch_ind = int(bottom_roi_indices[n]) + roi_batch_ind = bottom_roi_indices[n] roi_start_h = bottom_rois[n, 0] * spatial_scale roi_start_w = bottom_rois[n, 1] * spatial_scale roi_end_h = bottom_rois[n, 2] * spatial_scale @@ -326,7 +326,7 @@ def backward_cpu(self, inputs, gy): ctop = int(i / pooled_width / pooled_height) % pooled_dim n = int(i / pooled_width / pooled_height / pooled_dim) - roi_batch_ind = int(bottom_roi_indices[n]) + roi_batch_ind = bottom_roi_indices[n] roi_start_h = bottom_rois[n, 0] * spatial_scale roi_start_w = bottom_rois[n, 1] * spatial_scale roi_end_h = bottom_rois[n, 2] * spatial_scale From 7395fea7950ed5ae5955ead1f15b087c335aee39 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 18:16:23 +0900 Subject: [PATCH 16/24] refactor ps_roi_average_pooling_2d --- .../functions/ps_roi_average_pooling_2d.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/chainercv/functions/ps_roi_average_pooling_2d.py b/chainercv/functions/ps_roi_average_pooling_2d.py index 8d0c26c854..e227652132 100644 --- a/chainercv/functions/ps_roi_average_pooling_2d.py +++ b/chainercv/functions/ps_roi_average_pooling_2d.py @@ -94,10 +94,11 @@ def forward_cpu(self, inputs): top_data = np.empty( (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) - group_size = self.group_size - pooled_dim, pooled_height, pooled_width \ - = self.out_c, self.out_h, self.out_w spatial_scale = self.spatial_scale + pooled_dim = self.out_c + pooled_height = self.out_h + pooled_width = self.out_w + group_size = self.group_size for i in six.moves.range(top_data.size): pw = i % pooled_width @@ -137,9 +138,9 @@ def forward_cpu(self, inputs): output_val = 0. count = (hend - hstart) * (wend - wstart) - for iy in six.moves.range(hstart, hend): - for ix in six.moves.range(wstart, wend): - output_val += bottom_data[roi_batch_ind, c, iy, ix] + for y in six.moves.range(hstart, hend): + for x in six.moves.range(wstart, wend): + output_val += bottom_data[roi_batch_ind, c, y, x] output_val /= count top_data[n, ctop, ph, pw] = output_val @@ -239,10 +240,11 @@ def backward_cpu(self, inputs, gy): height, width = self._bottom_data_shape[2:] bottom_diff = np.zeros(self._bottom_data_shape, np.float32) - group_size = self.group_size - pooled_dim, pooled_width, pooled_height \ - = self.out_c, self.out_w, self.out_h spatial_scale = self.spatial_scale + pooled_dim = self.out_c + pooled_height = self.out_h + pooled_width = self.out_w + group_size = self.group_size for i in six.moves.range(top_diff.size): pw = i % pooled_width @@ -281,9 +283,9 @@ def backward_cpu(self, inputs, gy): count = (hend - hstart) * (wend - wstart) diff_val = top_diff[n, ctop, ph, pw] / count - for iy in six.moves.range(hstart, hend): - for ix in six.moves.range(wstart, wend): - bottom_diff[roi_batch_ind, c, iy, ix] += diff_val + for y in six.moves.range(hstart, hend): + for x in six.moves.range(wstart, wend): + bottom_diff[roi_batch_ind, c, y, x] += diff_val return bottom_diff, None, None From f166c7eca212fb1b74af841e8a6adef845e08384 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 18:24:06 +0900 Subject: [PATCH 17/24] refactor ps_roi_average_align_2d --- chainercv/functions/ps_roi_average_align_2d.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/chainercv/functions/ps_roi_average_align_2d.py b/chainercv/functions/ps_roi_average_align_2d.py index cdcfefc83c..424b2fabd0 100644 --- a/chainercv/functions/ps_roi_average_align_2d.py +++ b/chainercv/functions/ps_roi_average_align_2d.py @@ -161,10 +161,11 @@ def forward_cpu(self, inputs): top_data = np.empty( (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) - group_size = self.group_size - pooled_dim, pooled_width, pooled_height \ - = self.out_c, self.out_w, self.out_h spatial_scale = self.spatial_scale + pooled_dim = self.out_c + pooled_height = self.out_h + pooled_width = self.out_w + group_size = self.group_size for i in six.moves.range(top_data.size): pw = i % pooled_width From 098d5f274ed03b6cbdf06dd1a0af8b588012755a Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 18:24:12 +0900 Subject: [PATCH 18/24] fix typo in ps_roi_average_align_2d --- chainercv/functions/ps_roi_average_align_2d.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chainercv/functions/ps_roi_average_align_2d.py b/chainercv/functions/ps_roi_average_align_2d.py index 424b2fabd0..8833247683 100644 --- a/chainercv/functions/ps_roi_average_align_2d.py +++ b/chainercv/functions/ps_roi_average_align_2d.py @@ -179,8 +179,8 @@ def forward_cpu(self, inputs): roi_end_h = bottom_rois[n, 2] * spatial_scale roi_end_w = bottom_rois[n, 3] * spatial_scale - roi_height = max(roi_end_h - roi_start_h, 1.) - roi_width = max(roi_end_w - roi_start_w, 1.) + roi_height = max(roi_end_h - roi_start_h, 0.1) + roi_width = max(roi_end_w - roi_start_w, 0.1) bin_size_h = roi_height / pooled_height bin_size_w = roi_width / pooled_width @@ -379,8 +379,8 @@ def backward_cpu(self, inputs, gy): roi_end_h = bottom_rois[n, 2] * spatial_scale roi_end_w = bottom_rois[n, 3] * spatial_scale - roi_width = max(roi_end_w - roi_start_w, 1.) - roi_height = max(roi_end_h - roi_start_h, 1.) + roi_width = max(roi_end_w - roi_start_w, 0.1) + roi_height = max(roi_end_h - roi_start_h, 0.1) bin_size_h = roi_height / pooled_height bin_size_w = roi_width / pooled_width From 92afda6d475950216626995ba822009020573987 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 18:40:54 +0900 Subject: [PATCH 19/24] refactor cuda kernel in ps_roi_average_pooling_2d --- .../functions/ps_roi_average_pooling_2d.py | 96 +++++++++---------- 1 file changed, 44 insertions(+), 52 deletions(-) diff --git a/chainercv/functions/ps_roi_average_pooling_2d.py b/chainercv/functions/ps_roi_average_pooling_2d.py index e227652132..ae5074f6b5 100644 --- a/chainercv/functions/ps_roi_average_pooling_2d.py +++ b/chainercv/functions/ps_roi_average_pooling_2d.py @@ -157,14 +157,14 @@ def forward_gpu(self, inputs): (n_roi, self.out_c, self.out_h, self.out_w), dtype=np.float32) cuda.elementwise( ''' - raw float32 bottom_data, raw float32 bottom_rois, + raw T bottom_data, raw T bottom_rois, raw int32 bottom_roi_indices, - float32 spatial_scale, int32 channels, + T spatial_scale, int32 channels, int32 height, int32 width, int32 pooled_dim, int32 pooled_height, int32 pooled_width, int32 group_size ''', - 'float32 top_data', + 'T top_data', ''' // pos in output filter int ph = (i / pooled_width) % pooled_height; @@ -173,31 +173,27 @@ def forward_gpu(self, inputs): int n = i / pooled_width / pooled_height / pooled_dim; int roi_batch_ind = bottom_roi_indices[n]; - float roi_start_h = static_cast( - round(bottom_rois[n * 4 + 0])) * spatial_scale; - float roi_start_w = static_cast( - round(bottom_rois[n * 4 + 1])) * spatial_scale; - float roi_end_h = static_cast( - round(bottom_rois[n * 4 + 2])) * spatial_scale; - float roi_end_w = static_cast( - round(bottom_rois[n * 4 + 3])) * spatial_scale; + T roi_start_h = bottom_rois[n * 4 + 0] * spatial_scale; + T roi_start_w = bottom_rois[n * 4 + 1] * spatial_scale; + T roi_end_h = bottom_rois[n * 4 + 2] * spatial_scale; + T roi_end_w = bottom_rois[n * 4 + 3] * spatial_scale; // Force too small ROIs to be 1x1 - float roi_height = max(roi_end_h - roi_start_h, 0.1); - float roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 + T roi_height = max(roi_end_h - roi_start_h, 0.1); + T roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 // Compute w and h at bottom - float bin_size_h = roi_height / static_cast(pooled_height); - float bin_size_w = roi_width / static_cast(pooled_width); - - int hstart = static_cast(floor(static_cast(ph) - * bin_size_h + roi_start_h)); - int wstart = static_cast(floor(static_cast(pw) - * bin_size_w + roi_start_w)); - int hend = static_cast(ceil(static_cast(ph + 1) - * bin_size_h + roi_start_h)); - int wend = static_cast(ceil(static_cast(pw + 1) - * bin_size_w + roi_start_w)); + T bin_size_h = roi_height / static_cast(pooled_height); + T bin_size_w = roi_width / static_cast(pooled_width); + + int hstart = floor( + static_cast(ph) * bin_size_h + roi_start_h); + int wstart = floor( + static_cast(pw) * bin_size_w + roi_start_w); + int hend = ceil( + static_cast(ph + 1) * bin_size_h + roi_start_h); + int wend = ceil( + static_cast(pw + 1) * bin_size_w + roi_start_w); // Add roi offsets and clip to input boundaries hstart = min(max(hstart, 0), height); @@ -208,15 +204,15 @@ def forward_gpu(self, inputs): // Compute c at bottom int gh = floor( - static_cast(ph) * group_size / pooled_height); + static_cast(ph) * group_size / pooled_height); int gw = floor( - static_cast(pw) * group_size / pooled_width); + static_cast(pw) * group_size / pooled_width); gh = min(max(gh, 0), group_size - 1); gw = min(max(gw, 0), group_size - 1); int c = (ctop * group_size + gh) * group_size + gw; int data_offset = (roi_batch_ind * channels + c) * height * width; - float out_sum = 0; + T out_sum = 0; for (int h = hstart; h < hend; ++h){ for (int w = wstart; w < wend; ++w){ int bottom_index = h * width + w; @@ -224,8 +220,8 @@ def forward_gpu(self, inputs): } } - float bin_area = (hend - hstart) * (wend - wstart); - top_data = is_empty? (float) 0. : out_sum / bin_area; + T bin_area = (hend - hstart) * (wend - wstart); + top_data = is_empty? (T) 0. : out_sum / bin_area; ''', 'ps_roi_average_pooling_2d_fwd' )(bottom_data, bottom_rois, bottom_roi_indices, self.spatial_scale, channels, height, width, @@ -295,13 +291,13 @@ def backward_gpu(self, inputs, gy): bottom_diff = cuda.cupy.zeros(self._bottom_data_shape, np.float32) cuda.elementwise( ''' - raw float32 top_diff, raw float32 bottom_rois, + raw T top_diff, raw T bottom_rois, raw int32 bottom_roi_indices, - float32 spatial_scale, int32 channels, int32 height, int32 width, + T spatial_scale, int32 channels, int32 height, int32 width, int32 pooled_dim, int32 pooled_height, int32 pooled_width, int32 group_size ''', - 'raw float32 bottom_diff', + 'raw T bottom_diff', ''' int ph = (i / pooled_width) % pooled_height; int pw = i % pooled_width; @@ -310,31 +306,27 @@ def backward_gpu(self, inputs, gy): // [start, end) interval for spatial sampling int roi_batch_ind = bottom_roi_indices[n]; - float roi_start_h = static_cast( - round(bottom_rois[n * 4 + 0])) * spatial_scale; - float roi_start_w = static_cast( - round(bottom_rois[n * 4 + 1])) * spatial_scale; - float roi_end_h = static_cast( - round(bottom_rois[n * 4 + 2])) * spatial_scale; - float roi_end_w = static_cast( - round(bottom_rois[n * 4 + 3])) * spatial_scale; + T roi_start_h = bottom_rois[n * 4 + 0] * spatial_scale; + T roi_start_w = bottom_rois[n * 4 + 1] * spatial_scale; + T roi_end_h = bottom_rois[n * 4 + 2] * spatial_scale; + T roi_end_w = bottom_rois[n * 4 + 3] * spatial_scale; // Force too small ROIs to be 1x1 - float roi_height = max(roi_end_h - roi_start_h, 0.1); - float roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 + T roi_height = max(roi_end_h - roi_start_h, 0.1); + T roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 // Compute w and h at bottom - float bin_size_h = roi_height / static_cast(pooled_height); - float bin_size_w = roi_width / static_cast(pooled_width); + T bin_size_h = roi_height / static_cast(pooled_height); + T bin_size_w = roi_width / static_cast(pooled_width); int hstart = floor( - static_cast(ph) * bin_size_h + roi_start_h); + static_cast(ph) * bin_size_h + roi_start_h); int wstart = floor( - static_cast(pw) * bin_size_w + roi_start_w); + static_cast(pw) * bin_size_w + roi_start_w); int hend = ceil( - static_cast(ph + 1.0) * bin_size_h + roi_start_h); + static_cast(ph + 1.0) * bin_size_h + roi_start_h); int wend = ceil( - static_cast(pw + 1.0) * bin_size_w + roi_start_w); + static_cast(pw + 1.0) * bin_size_w + roi_start_w); // Add roi offsets and clip to input boundaries hstart = min(max(hstart, 0), height); @@ -345,9 +337,9 @@ def backward_gpu(self, inputs, gy): // Compute c at bottom int gh = floor( - static_cast(ph) * group_size / pooled_height); + static_cast(ph) * group_size / pooled_height); int gw = floor( - static_cast(pw) * group_size / pooled_width); + static_cast(pw) * group_size / pooled_width); gh = min(max(gh, 0), group_size - 1); gw = min(max(gw, 0), group_size - 1); int c = (ctop * group_size + gh) * group_size + gw; @@ -357,8 +349,8 @@ def backward_gpu(self, inputs, gy): int top_offset = (n * pooled_dim + ctop) * pooled_height * pooled_width; - float bin_area = (hend - hstart) * (wend - wstart); - float diff_val = is_empty ? (float) 0. : + T bin_area = (hend - hstart) * (wend - wstart); + T diff_val = is_empty ? (T) 0. : top_diff[top_offset + ph * pooled_width + pw] / bin_area; for (int h = hstart; h < hend; ++h){ for (int w = wstart; w < wend; ++w){ From 7001c903485cadd146956e812e7c05e47040c0cd Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 18:41:11 +0900 Subject: [PATCH 20/24] refactor cuda kernel in ps_roi_average_align_2d --- .../functions/ps_roi_average_align_2d.py | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/chainercv/functions/ps_roi_average_align_2d.py b/chainercv/functions/ps_roi_average_align_2d.py index 8833247683..80d198cce9 100644 --- a/chainercv/functions/ps_roi_average_align_2d.py +++ b/chainercv/functions/ps_roi_average_align_2d.py @@ -268,14 +268,10 @@ def forward_gpu(self, inputs): int n = i / pooled_width / pooled_height / pooled_dim; int roi_batch_ind = bottom_roi_indices[n]; - T roi_start_h = static_cast( - round(bottom_rois[n * 4 + 0])) * spatial_scale; - T roi_start_w = static_cast( - round(bottom_rois[n * 4 + 1])) * spatial_scale; - T roi_end_h = static_cast( - round(bottom_rois[n * 4 + 2])) * spatial_scale; - T roi_end_w = static_cast( - round(bottom_rois[n * 4 + 3])) * spatial_scale; + T roi_start_h = bottom_rois[n * 4 + 0] * spatial_scale; + T roi_start_w = bottom_rois[n * 4 + 1] * spatial_scale; + T roi_end_h = bottom_rois[n * 4 + 2] * spatial_scale; + T roi_end_w = bottom_rois[n * 4 + 3] * spatial_scale; // Force too small ROIs to be 1x1 T roi_height = max(roi_end_h - roi_start_h, 0.1); @@ -465,14 +461,10 @@ def backward_gpu(self, inputs, gy): // Do not using rounding; this implementation detail is critical int roi_batch_ind = bottom_roi_indices[n]; - T roi_start_h = static_cast( - round(bottom_rois[n * 4 + 0])) * spatial_scale; - T roi_start_w = static_cast( - round(bottom_rois[n * 4 + 1])) * spatial_scale; - T roi_end_h = static_cast( - round(bottom_rois[n * 4 + 2])) * spatial_scale; - T roi_end_w = static_cast( - round(bottom_rois[n * 4 + 3])) * spatial_scale; + T roi_start_h = bottom_rois[n * 4 + 0] * spatial_scale; + T roi_start_w = bottom_rois[n * 4 + 1] * spatial_scale; + T roi_end_h = bottom_rois[n * 4 + 2] * spatial_scale; + T roi_end_w = bottom_rois[n * 4 + 3] * spatial_scale; // Force too small ROIs to be 1x1 T roi_height = max(roi_end_h - roi_start_h, 0.1); From 4dbb9136a9d24a100ded372b42cf62a609f6c709 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 19:21:06 +0900 Subject: [PATCH 21/24] refactor ps_roi_max_align_2d --- chainercv/functions/ps_roi_max_align_2d.py | 36 +++++++++------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/chainercv/functions/ps_roi_max_align_2d.py b/chainercv/functions/ps_roi_max_align_2d.py index 6503740c45..0907b52ca4 100644 --- a/chainercv/functions/ps_roi_max_align_2d.py +++ b/chainercv/functions/ps_roi_max_align_2d.py @@ -121,13 +121,13 @@ def forward_cpu(self, inputs): roi_end_h = bottom_rois[n, 2] * spatial_scale roi_end_w = bottom_rois[n, 3] * spatial_scale - roi_height = max(roi_end_h - roi_start_h, 1.) - roi_width = max(roi_end_w - roi_start_w, 1.) + roi_height = max(roi_end_h - roi_start_h, 0.1) + roi_width = max(roi_end_w - roi_start_w, 0.1) bin_size_h = roi_height / pooled_height bin_size_w = roi_width / pooled_width - gh = int(np.floor(float(ph) * group_size / pooled_height)) - gw = int(np.floor(float(pw) * group_size / pooled_width)) + gh = int(np.floor(ph * group_size / pooled_height)) + gw = int(np.floor(pw * group_size / pooled_width)) gh = min(max(gh, 0), group_size - 1) gw = min(max(gw, 0), group_size - 1) c = (ctop * group_size + gh) * group_size + gw @@ -216,14 +216,10 @@ def forward_gpu(self, inputs): int n = i / pooled_width / pooled_height / pooled_dim; int roi_batch_ind = bottom_roi_indices[n]; - T roi_start_h = static_cast( - round(bottom_rois[n * 4 + 0])) * spatial_scale; - T roi_start_w = static_cast( - round(bottom_rois[n * 4 + 1])) * spatial_scale; - T roi_end_h = static_cast( - round(bottom_rois[n * 4 + 2])) * spatial_scale; - T roi_end_w = static_cast( - round(bottom_rois[n * 4 + 3])) * spatial_scale; + T roi_start_h = bottom_rois[n * 4 + 0] * spatial_scale; + T roi_start_w = bottom_rois[n * 4 + 1] * spatial_scale; + T roi_end_h = bottom_rois[n * 4 + 2] * spatial_scale; + T roi_end_w = bottom_rois[n * 4 + 3] * spatial_scale; // Force too small ROIs to be 1x1 T roi_height = max(roi_end_h - roi_start_h, 0.1); @@ -332,8 +328,8 @@ def backward_cpu(self, inputs, gy): roi_end_h = bottom_rois[n, 2] * spatial_scale roi_end_w = bottom_rois[n, 3] * spatial_scale - roi_width = max(roi_end_w - roi_start_w, 1.) - roi_height = max(roi_end_h - roi_start_h, 1.) + roi_height = max(roi_end_h - roi_start_h, 0.1) + roi_width = max(roi_end_w - roi_start_w, 0.1) bin_size_h = roi_height / pooled_height bin_size_w = roi_width / pooled_width @@ -419,14 +415,10 @@ def backward_gpu(self, inputs, gy): // Do not using rounding; this implementation detail is critical int roi_batch_ind = bottom_roi_indices[n]; - T roi_start_h = static_cast( - round(bottom_rois[n * 4 + 0])) * spatial_scale; - T roi_start_w = static_cast( - round(bottom_rois[n * 4 + 1])) * spatial_scale; - T roi_end_h = static_cast( - round(bottom_rois[n * 4 + 2])) * spatial_scale; - T roi_end_w = static_cast( - round(bottom_rois[n * 4 + 3])) * spatial_scale; + T roi_start_h = bottom_rois[n * 4 + 0] * spatial_scale; + T roi_start_w = bottom_rois[n * 4 + 1] * spatial_scale; + T roi_end_h = bottom_rois[n * 4 + 2] * spatial_scale; + T roi_end_w = bottom_rois[n * 4 + 3] * spatial_scale; // Force too small ROIs to be 1x1 T roi_height = max(roi_end_h - roi_start_h, 0.1); From 86c33ba849869edfe9563d08ca4e9f970a1666c5 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 19:41:44 +0900 Subject: [PATCH 22/24] fix typo in ps_roi_max_align_2d --- chainercv/functions/ps_roi_max_align_2d.py | 173 +++++++++++---------- 1 file changed, 89 insertions(+), 84 deletions(-) diff --git a/chainercv/functions/ps_roi_max_align_2d.py b/chainercv/functions/ps_roi_max_align_2d.py index 0907b52ca4..a679d9b38c 100644 --- a/chainercv/functions/ps_roi_max_align_2d.py +++ b/chainercv/functions/ps_roi_max_align_2d.py @@ -340,47 +340,49 @@ def backward_cpu(self, inputs, gy): c = (ctop * group_size + gh) * group_size + gw top_diff_this_bin = top_diff[n, ctop, ph, pw] + maxidx = self.argmax_data[n, ctop, ph, pw] - if self.sampling_ratio[0] is None: - roi_bin_grid_h = int(np.ceil(roi_height / pooled_height)) - else: - roi_bin_grid_h = self.sampling_ratio[0] - if self.sampling_ratio[1] is None: - roi_bin_grid_w = int(np.ceil(roi_width / pooled_width)) - else: - roi_bin_grid_w = self.sampling_ratio[1] + if maxidx != -1: + if self.sampling_ratio[0] is None: + roi_bin_grid_h = int(np.ceil(roi_height / pooled_height)) + else: + roi_bin_grid_h = self.sampling_ratio[0] + if self.sampling_ratio[1] is None: + roi_bin_grid_w = int(np.ceil(roi_width / pooled_width)) + else: + roi_bin_grid_w = self.sampling_ratio[1] - maxidx = self.argmax_data[n, ctop, ph, pw] - iy = int(maxidx / roi_bin_grid_w) - ix = maxidx % roi_bin_grid_w - - y = roi_start_h + ph * bin_size_h + \ - (iy + .5) * bin_size_h / roi_bin_grid_h - x = roi_start_w + pw * bin_size_w + \ - (ix + .5) * bin_size_w / roi_bin_grid_w - - y, y_low, y_high = _get_bounds(y, height) - if y is None or y_low is None or y_high is None: - continue - x, x_low, x_high = _get_bounds(x, width) - if x is None or x_low is None or x_high is None: - continue - - # bilinear_interpolation_gradient {{ - w1, w2, w3, w4 = _get_bilinear_interp_params( - y, x, y_low, x_low, y_high, x_high) - - g1 = top_diff_this_bin * w1 - g2 = top_diff_this_bin * w2 - g3 = top_diff_this_bin * w3 - g4 = top_diff_this_bin * w4 - - if (x_low >= 0 and x_high >= 0 and - y_low >= 0 and y_high >= 0): - bottom_diff[roi_batch_ind, c, y_low, x_low] += g1 - bottom_diff[roi_batch_ind, c, y_low, x_high] += g2 - bottom_diff[roi_batch_ind, c, y_high, x_low] += g3 - bottom_diff[roi_batch_ind, c, y_high, x_high] += g4 + iy = int(maxidx / roi_bin_grid_w) + ix = maxidx % roi_bin_grid_w + + y = roi_start_h + ph * bin_size_h + \ + (iy + .5) * bin_size_h / roi_bin_grid_h + x = roi_start_w + pw * bin_size_w + \ + (ix + .5) * bin_size_w / roi_bin_grid_w + + y, y_low, y_high = _get_bounds(y, height) + if y is None or y_low is None or y_high is None: + continue + x, x_low, x_high = _get_bounds(x, width) + if x is None or x_low is None or x_high is None: + continue + + # bilinear_interpolation_gradient {{ + w1, w2, w3, w4 = _get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high) + + g1 = top_diff_this_bin * w1 + g2 = top_diff_this_bin * w2 + g3 = top_diff_this_bin * w3 + g4 = top_diff_this_bin * w4 + + if (x_low >= 0 and x_high >= 0 and + y_low >= 0 and y_high >= 0): + bottom_diff[roi_batch_ind, c, y_low, x_low] += g1 + bottom_diff[roi_batch_ind, c, y_low, x_high] += g2 + bottom_diff[roi_batch_ind, c, y_high, x_low] += g3 + bottom_diff[roi_batch_ind, c, y_high, x_high] += g4 + # }} return bottom_diff, None, None @@ -444,53 +446,56 @@ def backward_gpu(self, inputs, gy): (n * pooled_dim + ctop) * pooled_height * pooled_width; T top_diff_this_bin = top_diff[top_offset + ph * pooled_width + pw]; + int maxidx = argmax_data[top_offset + ph * pooled_width + pw]; - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio_h > 0) - ? sampling_ratio_h - : ceil(roi_height / pooled_height); // e.g. = 2 - int roi_bin_grid_w = (sampling_ratio_w > 0) - ? sampling_ratio_w - : ceil(roi_width / pooled_width); + if (maxidx != -1) { + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio_h > 0) + ? sampling_ratio_h + : ceil(roi_height / pooled_height); // e.g. = 2 + int roi_bin_grid_w = (sampling_ratio_w > 0) + ? sampling_ratio_w + : ceil(roi_width / pooled_width); - int maxidx = argmax_data[top_offset + ph * pooled_width + pw]; - int iy = maxidx / roi_bin_grid_w; - int ix = maxidx % roi_bin_grid_w; - - T y = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g. 0.5, 1.5 - T x = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - int y_low, y_high; - bool y_ret = get_bounds(y, height, y_low, y_high); - if (!y_ret) continue; - int x_low, x_high; - bool x_ret = get_bounds(x, width, x_low, x_high); - if (!x_ret) continue; - - // bilinear_interpolation_gradient {{ - T w1, w2, w3, w4; - get_bilinear_interp_params( - y, x, y_low, x_low, y_high, x_high, w1, w2, w3, w4); - - T g1 = top_diff_this_bin * w1; - T g2 = top_diff_this_bin * w2; - T g3 = top_diff_this_bin * w3; - T g4 = top_diff_this_bin * w4; - - if (x_low >= 0 && x_high >= 0 && - y_low >= 0 && y_high >= 0) { - atomicAdd(&bottom_diff[bottom_diff_offset + - y_low * width + x_low], g1); - atomicAdd(&bottom_diff[bottom_diff_offset + - y_low * width + x_high], g2); - atomicAdd(&bottom_diff[bottom_diff_offset + - y_high * width + x_low], g3); - atomicAdd(&bottom_diff[bottom_diff_offset + - y_high * width + x_high], g4); + int iy = maxidx / roi_bin_grid_w; + int ix = maxidx % roi_bin_grid_w; + + T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g. 0.5, 1.5 + T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + int y_low, y_high; + bool y_ret = get_bounds(y, height, y_low, y_high); + if (!y_ret) continue; + int x_low, x_high; + bool x_ret = get_bounds(x, width, x_low, x_high); + if (!x_ret) continue; + + // bilinear_interpolation_gradient {{ + T w1, w2, w3, w4; + get_bilinear_interp_params( + y, x, y_low, x_low, y_high, x_high, w1, w2, w3, w4); + + T g1 = top_diff_this_bin * w1; + T g2 = top_diff_this_bin * w2; + T g3 = top_diff_this_bin * w3; + T g4 = top_diff_this_bin * w4; + + if (x_low >= 0 && x_high >= 0 && + y_low >= 0 && y_high >= 0) { + atomicAdd(&bottom_diff[bottom_diff_offset + + y_low * width + x_low], g1); + atomicAdd(&bottom_diff[bottom_diff_offset + + y_low * width + x_high], g2); + atomicAdd(&bottom_diff[bottom_diff_offset + + y_high * width + x_low], g3); + atomicAdd(&bottom_diff[bottom_diff_offset + + y_high * width + x_high], g4); + } + // }} } ''', 'ps_roi_max_align_2d_bwd', From 64b8ec4fcab84c8ac13254e33ed8c785b73fded0 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 20:02:45 +0900 Subject: [PATCH 23/24] set initial maxval as - np.inf for ps_roi_max_align_2d --- chainercv/functions/ps_roi_max_align_2d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chainercv/functions/ps_roi_max_align_2d.py b/chainercv/functions/ps_roi_max_align_2d.py index a679d9b38c..66a648129d 100644 --- a/chainercv/functions/ps_roi_max_align_2d.py +++ b/chainercv/functions/ps_roi_max_align_2d.py @@ -141,7 +141,7 @@ def forward_cpu(self, inputs): else: roi_bin_grid_w = self.sampling_ratio[1] - maxval = -1e20 + maxval = - np.inf maxidx = -1 for iy in six.moves.range(roi_bin_grid_h): y = roi_start_h + ph * bin_size_h + \ @@ -249,7 +249,7 @@ def forward_gpu(self, inputs): ? sampling_ratio_w : ceil(roi_width / pooled_width); - T maxval = -1E+20; + T maxval = - (T) (1.0 / 0.0); int maxidx = -1; for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g. iy = 0, 1 { From a2cfcc19a6b65f7528d4f1597dffb765cbc41cd0 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 18 Feb 2019 20:03:07 +0900 Subject: [PATCH 24/24] fix ps_roi_max_align_2d test to pass --- tests/functions_tests/test_ps_roi_max_align.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/functions_tests/test_ps_roi_max_align.py b/tests/functions_tests/test_ps_roi_max_align.py index 06a36f6947..8998a3652f 100644 --- a/tests/functions_tests/test_ps_roi_max_align.py +++ b/tests/functions_tests/test_ps_roi_max_align.py @@ -1,5 +1,6 @@ import chainer from chainer.backends import cuda +import chainer.functions as F from chainer import gradient_check from chainer import testing from chainer.testing import attr @@ -68,10 +69,15 @@ def test_forward_gpu(self): def check_backward(self, x_data, roi_data, roi_index_data, y_grad_data): def f(x, rois, roi_indices): - return functions.ps_roi_max_align_2d( + y = functions.ps_roi_max_align_2d( x, rois, roi_indices, self.out_c, self.out_h, self.out_w, self.spatial_scale, self.group_size, sampling_ratio=self.sampling_ratio) + xp = cuda.get_array_module(y) + y = F.where( + xp.isinf(y.array), xp.zeros(y.shape, dtype=y.dtype), y) + return y + gradient_check.check_backward( f, (x_data, roi_data, roi_index_data), y_grad_data, no_grads=[False, True, True], **self.check_backward_options)