diff --git a/dace/runtime/include/dace/math.h b/dace/runtime/include/dace/math.h
index e743f1410f..afc08a64d3 100644
--- a/dace/runtime/include/dace/math.h
+++ b/dace/runtime/include/dace/math.h
@@ -525,7 +525,13 @@ namespace dace
             return (T)std::pow(a, (T)b);
         }
 
-        template<typename T>
+        template<typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+        DACE_CONSTEXPR DACE_HDFI T ifloor(const T& a)
+        {
+            return a;
+        }
+
+        template<typename T, typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr>
         DACE_CONSTEXPR DACE_HDFI int ifloor(const T& a)
         {
             return (int)std::floor(a);
diff --git a/tests/numpy/common.py b/tests/numpy/common.py
index 5e84062dec..2784c8a0eb 100644
--- a/tests/numpy/common.py
+++ b/tests/numpy/common.py
@@ -11,7 +11,8 @@
 rng = default_rng(42)
 
 
-def compare_numpy_output(non_zero=False,
+def compare_numpy_output(device=dace.dtypes.DeviceType.CPU,
+                         non_zero=False,
                          positive=False,
                          check_dtype=False,
                          validation_func=None,
@@ -27,6 +28,7 @@ def compare_numpy_output(non_zero=False,
         Note that this should be used *instead* of the `@dace.program`
         annotation, not along with it!
 
+        :param device: Selects the target device for test execution.
         :param non_zero: if `True`, replace `0` inputs with `1`.
         :param positive: if `False`, floats sample from [-10.0, 10.0], and ints
                          sample from [-3, 3). Else, floats sample from
@@ -41,7 +43,7 @@ def compare_numpy_output(non_zero=False,
     """
     def decorator(func):
         def test():
-            dp = dace.program(func)
+            dp = dace.program(device=device)(func)
 
             def get_rand_arr(ddesc):
                 if type(ddesc) is dace.dtypes.typeclass:
@@ -115,7 +117,13 @@ def get_rand_arr(ddesc):
                 numpy_thrown = e
 
             try:
-                dace_result = dp(**dace_input)
+                if device == dace.dtypes.DeviceType.GPU:
+                    sdfg = dp.to_sdfg()
+                    sdfg.apply_gpu_transformations()
+                    dace_result = sdfg(**dace_input)
+                else:
+                    dace_result = dp(**dace_input)
+
             except Exception as e:
                 dace_thrown = e
 
diff --git a/tests/numpy/gpu_test.py b/tests/numpy/gpu_test.py
new file mode 100644
index 0000000000..9225145b86
--- /dev/null
+++ b/tests/numpy/gpu_test.py
@@ -0,0 +1,26 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+import pytest
+
+from common import compare_numpy_output
+
+"""
+Test CUDA code generation for a subset of numpy-like functions on GPU target.
+
+Only a subset of the numpy tests is executed on GPU target to keep the test
+execution time within a reasonable limit. This is of particular interest for
+CI regression tests. These testcases are mainly supposed to cover GPU-related
+issues reported to the DaCe porject or special cases for GPU code generation.
+"""
+gpu_device = dace.dtypes.DeviceType.GPU
+
+
+# special case where `dace::math::ifloor` argument is integral
+@pytest.mark.gpu
+@compare_numpy_output(device=gpu_device, non_zero=True, positive=True)
+def test_floordiv(A: dace.int64[5, 5], B: dace.int64[5, 5]):
+    return A // B
+
+
+if __name__ == '__main__':
+    test_floordiv()
\ No newline at end of file