Skip to content

Commit

Permalink
Fix for floordiv on GPU target (#1471)
Browse files Browse the repository at this point in the history
This PR addresses a `floordiv` runtime error detected on GPU target:
`std::ifloor` returns zero with integer argument (for example, the
result of an `int` division).

---------

Co-authored-by: Tal Ben-Nun <[email protected]>
  • Loading branch information
edopao and tbennun authored Dec 12, 2023
1 parent ae378e1 commit 2dcd74a
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 4 deletions.
8 changes: 7 additions & 1 deletion dace/runtime/include/dace/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,13 @@ namespace dace
return (T)std::pow(a, (T)b);
}

template<typename T>
template<typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
DACE_CONSTEXPR DACE_HDFI T ifloor(const T& a)
{
return a;
}

template<typename T, typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr>
DACE_CONSTEXPR DACE_HDFI int ifloor(const T& a)
{
return (int)std::floor(a);
Expand Down
14 changes: 11 additions & 3 deletions tests/numpy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
rng = default_rng(42)


def compare_numpy_output(non_zero=False,
def compare_numpy_output(device=dace.dtypes.DeviceType.CPU,
non_zero=False,
positive=False,
check_dtype=False,
validation_func=None,
Expand All @@ -27,6 +28,7 @@ def compare_numpy_output(non_zero=False,
Note that this should be used *instead* of the `@dace.program`
annotation, not along with it!
:param device: Selects the target device for test execution.
:param non_zero: if `True`, replace `0` inputs with `1`.
:param positive: if `False`, floats sample from [-10.0, 10.0], and ints
sample from [-3, 3). Else, floats sample from
Expand All @@ -41,7 +43,7 @@ def compare_numpy_output(non_zero=False,
"""
def decorator(func):
def test():
dp = dace.program(func)
dp = dace.program(device=device)(func)

def get_rand_arr(ddesc):
if type(ddesc) is dace.dtypes.typeclass:
Expand Down Expand Up @@ -115,7 +117,13 @@ def get_rand_arr(ddesc):
numpy_thrown = e

try:
dace_result = dp(**dace_input)
if device == dace.dtypes.DeviceType.GPU:
sdfg = dp.to_sdfg()
sdfg.apply_gpu_transformations()
dace_result = sdfg(**dace_input)
else:
dace_result = dp(**dace_input)

except Exception as e:
dace_thrown = e

Expand Down
26 changes: 26 additions & 0 deletions tests/numpy/gpu_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
import dace
import pytest

from common import compare_numpy_output

"""
Test CUDA code generation for a subset of numpy-like functions on GPU target.
Only a subset of the numpy tests is executed on GPU target to keep the test
execution time within a reasonable limit. This is of particular interest for
CI regression tests. These testcases are mainly supposed to cover GPU-related
issues reported to the DaCe porject or special cases for GPU code generation.
"""
gpu_device = dace.dtypes.DeviceType.GPU


# special case where `dace::math::ifloor` argument is integral
@pytest.mark.gpu
@compare_numpy_output(device=gpu_device, non_zero=True, positive=True)
def test_floordiv(A: dace.int64[5, 5], B: dace.int64[5, 5]):
return A // B


if __name__ == '__main__':
test_floordiv()

0 comments on commit 2dcd74a

Please sign in to comment.