diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h index ecb405dcc3c7..5dc1fe687e71 100644 --- a/src/operator/nn/deconvolution-inl.h +++ b/src/operator/nn/deconvolution-inl.h @@ -219,7 +219,11 @@ class DeconvolutionOp { using namespace mshadow::expr; if (param_.kernel.ndim() > 2) { - LOG(FATAL) << "If not using CUDNN, only 1D or 2D Deconvolution is supported"; + LOG(FATAL) << "Only 1D or 2D Deconvolution is natively supported. " + << ((MXNET_USE_MKLDNN || MXNET_USE_CUDNN) + ? "Fallback to native implementation (if occurred) is therefore " + "impossible for 3D Deconvolution." + : ""); } CHECK_EQ(req[deconv::kOut], kWriteTo); diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index 35d17c5ce0b6..30efd1f7749f 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -42,9 +42,14 @@ static void DeconvolutionComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { const DeconvolutionParam& params = nnvm::get(attrs.parsed); if (SupportMKLDNNDeconv(params, inputs[0])) { - MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs); - MKLDNNRun(MKLDNNDeconvolutionForward, attrs, ctx, inputs, req, outputs); - MKLDNN_OPCHECK_RUN(DeconvolutionCompute, attrs, ctx, inputs, req, outputs); + if (params.kernel.ndim() == 3) { + // we cannot check the output, as 3D deconvolution is not natively supported yet + MKLDNNRun(MKLDNNDeconvolutionForward, attrs, ctx, inputs, req, outputs); + } else { + MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs); + MKLDNNRun(MKLDNNDeconvolutionForward, attrs, ctx, inputs, req, outputs); + MKLDNN_OPCHECK_RUN(DeconvolutionCompute, attrs, ctx, inputs, req, outputs); + } return; } FallBackCompute(DeconvolutionCompute, attrs, ctx, inputs, req, outputs); @@ -57,9 +62,14 @@ static void DeconvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { const DeconvolutionParam& params = nnvm::get(attrs.parsed); if (SupportMKLDNNDeconv(params, inputs[0])) { - MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); - MKLDNNRun(MKLDNNDeconvolutionBackward, attrs, ctx, inputs, req, outputs); - MKLDNN_OPCHECK_RUN(DeconvolutionGradCompute, attrs, ctx, inputs, req, outputs); + if (params.kernel.ndim() == 3) { + // we cannot check the output, as 3D deconvolution is not natively supported yet + MKLDNNRun(MKLDNNDeconvolutionBackward, attrs, ctx, inputs, req, outputs); + } else { + MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); + MKLDNNRun(MKLDNNDeconvolutionBackward, attrs, ctx, inputs, req, outputs); + MKLDNN_OPCHECK_RUN(DeconvolutionGradCompute, attrs, ctx, inputs, req, outputs); + } return; } FallBackCompute(DeconvolutionGradCompute, attrs, ctx, inputs, req, outputs); @@ -99,12 +109,12 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape) { const DeconvolutionParam& param_ = nnvm::get(attrs.parsed); -#if MXNET_USE_CUDNN == 0 +#if MXNET_USE_CUDNN == 0 && MXNET_USE_MKLDNN == 0 if (param_.kernel.ndim() > 2) { - LOG(FATAL) << "If not using CUDNN, only 1D or 2D Deconvolution is supported"; + LOG(FATAL) << "If not using CUDNN or MKLDNN, only 1D or 2D Deconvolution is supported"; return false; } -#endif // CUDNN +#endif using namespace mshadow; if (!param_.no_bias) { diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc index 7678567d95c8..21608153bd5a 100644 --- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc @@ -29,7 +29,8 @@ namespace mxnet { namespace op { bool SupportMKLDNNDeconv(const DeconvolutionParam ¶ms, const NDArray &input) { - return params.kernel.ndim() == 2 && input.shape().ndim() == 4 && + return params.kernel.ndim() >= 1 && params.kernel.ndim() <= 3 && + input.shape().ndim() == (params.kernel.ndim() + 2) && (input.dtype() == mshadow::kFloat32 || input.dtype() == mshadow::kBfloat16); } @@ -322,10 +323,10 @@ DeconvDescCreator::DeconvDescCreator(const DeconvolutionParam ¶m, const NDAr strides(param.stride.ndim()), padding(param.pad.ndim()), dilates(param.dilate.ndim()) { - // assuming only deconv2D is supported for now CHECK_EQ(param.stride.ndim(), param.pad.ndim()); CHECK_EQ(param.stride.ndim(), param.dilate.ndim()); - CHECK_EQ(param.stride.ndim(), 2); + CHECK_GE(param.stride.ndim(), 1); + CHECK_LE(param.stride.ndim(), 3); for (int i = 0; i < param.stride.ndim(); ++i) { strides[i] = param.stride[i]; padding[i] = param.pad[i]; diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py index de0c249f52ab..061cc180f383 100644 --- a/tests/python/mkl/test_mkldnn.py +++ b/tests/python/mkl/test_mkldnn.py @@ -471,8 +471,8 @@ def check_convolution_training(stype): @with_seed() def test_Deconvolution(): def check_Deconvolution_training(stype): - for shape in [(3, 3, 10, 10)]: # testing only 2D for now - data_tmp = np.random.randint(256, size=shape) + for shape in [(3, 3, 10), (3, 3, 10, 10), (3, 3, 10, 10, 10)]: + data_tmp = np.random.normal(-0.1, 1, size=shape) data = mx.symbol.Variable('data', stype=stype) if np.array(shape).shape[0] == 3: @@ -481,6 +481,11 @@ def check_Deconvolution_training(stype): elif np.array(shape).shape[0] == 4: test = mx.symbol.Deconvolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4) weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3)) + elif np.array(shape).shape[0] == 5 and stype == "default": + # Unable to test fallback to native implementation for non-default storage types + # as 3D deconvolution is not natively supported + test = mx.symbol.Deconvolution(data=data, kernel=(3,3,3), stride=(2,2,2), num_filter=4) + weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3, 3)) else: return 0 bias_tmp = np.random.normal(0.1, 0.1, size=(4,))