diff --git a/dace/runtime/include/dace/math.h b/dace/runtime/include/dace/math.h index e743f1410f..afc08a64d3 100644 --- a/dace/runtime/include/dace/math.h +++ b/dace/runtime/include/dace/math.h @@ -525,7 +525,13 @@ namespace dace return (T)std::pow(a, (T)b); } - template + template::value>::type* = nullptr> + DACE_CONSTEXPR DACE_HDFI T ifloor(const T& a) + { + return a; + } + + template::value>::type* = nullptr> DACE_CONSTEXPR DACE_HDFI int ifloor(const T& a) { return (int)std::floor(a); diff --git a/tests/numpy/common.py b/tests/numpy/common.py index 5e84062dec..2784c8a0eb 100644 --- a/tests/numpy/common.py +++ b/tests/numpy/common.py @@ -11,7 +11,8 @@ rng = default_rng(42) -def compare_numpy_output(non_zero=False, +def compare_numpy_output(device=dace.dtypes.DeviceType.CPU, + non_zero=False, positive=False, check_dtype=False, validation_func=None, @@ -27,6 +28,7 @@ def compare_numpy_output(non_zero=False, Note that this should be used *instead* of the `@dace.program` annotation, not along with it! + :param device: Selects the target device for test execution. :param non_zero: if `True`, replace `0` inputs with `1`. :param positive: if `False`, floats sample from [-10.0, 10.0], and ints sample from [-3, 3). Else, floats sample from @@ -41,7 +43,7 @@ def compare_numpy_output(non_zero=False, """ def decorator(func): def test(): - dp = dace.program(func) + dp = dace.program(device=device)(func) def get_rand_arr(ddesc): if type(ddesc) is dace.dtypes.typeclass: @@ -115,7 +117,13 @@ def get_rand_arr(ddesc): numpy_thrown = e try: - dace_result = dp(**dace_input) + if device == dace.dtypes.DeviceType.GPU: + sdfg = dp.to_sdfg() + sdfg.apply_gpu_transformations() + dace_result = sdfg(**dace_input) + else: + dace_result = dp(**dace_input) + except Exception as e: dace_thrown = e diff --git a/tests/numpy/gpu_test.py b/tests/numpy/gpu_test.py new file mode 100644 index 0000000000..9225145b86 --- /dev/null +++ b/tests/numpy/gpu_test.py @@ -0,0 +1,26 @@ +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. +import dace +import pytest + +from common import compare_numpy_output + +""" +Test CUDA code generation for a subset of numpy-like functions on GPU target. + +Only a subset of the numpy tests is executed on GPU target to keep the test +execution time within a reasonable limit. This is of particular interest for +CI regression tests. These testcases are mainly supposed to cover GPU-related +issues reported to the DaCe porject or special cases for GPU code generation. +""" +gpu_device = dace.dtypes.DeviceType.GPU + + +# special case where `dace::math::ifloor` argument is integral +@pytest.mark.gpu +@compare_numpy_output(device=gpu_device, non_zero=True, positive=True) +def test_floordiv(A: dace.int64[5, 5], B: dace.int64[5, 5]): + return A // B + + +if __name__ == '__main__': + test_floordiv() \ No newline at end of file