From d61122d6122b7fe2c515e30442ae3ab77a47bb11 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Fri, 8 Nov 2024 00:42:55 -0800
Subject: [PATCH] Fix type inference and code generation for typeclasses and
 numpy types (#1725)

Fixes #1710
Supersedes #1721
---
 dace/codegen/cppunparse.py                 |  2 +
 dace/codegen/targets/cpp.py                |  4 ++
 dace/codegen/targets/fpga.py               |  6 ++-
 dace/codegen/targets/framecode.py          |  7 +++-
 dace/codegen/targets/intel_fpga.py         | 33 ++++++++--------
 dace/dtypes.py                             | 44 ++++++++++++++++------
 dace/runtime/include/dace/reduction.h      | 18 +++++++--
 dace/runtime/include/dace/types.h          |  1 +
 dace/sdfg/infer_types.py                   |  3 --
 dace/sdfg/validation.py                    |  2 +-
 tests/passes/dead_code_elimination_test.py | 41 +++++++++++++-------
 11 files changed, 112 insertions(+), 49 deletions(-)

diff --git a/dace/codegen/cppunparse.py b/dace/codegen/cppunparse.py
index c375147930..e5e5a57f09 100644
--- a/dace/codegen/cppunparse.py
+++ b/dace/codegen/cppunparse.py
@@ -349,6 +349,8 @@ def _Assign(self, t):
                             # if the veclen is greater than one, this should be defined with a vector data type
                             self.write("{}{} ".format(dace.dtypes._OCL_VECTOR_TYPES[inferred_type.type],
                                                       inferred_type.veclen))
+                        elif self.language == dace.dtypes.Language.OpenCL:
+                            self.write(dace.dtypes._OCL_TYPES[inferred_type.type] + " ")
                         else:
                             self.write(dace.dtypes._CTYPES[inferred_type.type] + " ")
                     else:
diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py
index 3f02d0e6cc..911a792ac9 100644
--- a/dace/codegen/targets/cpp.py
+++ b/dace/codegen/targets/cpp.py
@@ -1339,6 +1339,10 @@ def visit_Attribute(self, node):
         attrname = rname(node)
         module_name = attrname[:attrname.rfind(".")]
         func_name = attrname[attrname.rfind(".") + 1:]
+        if module_name == 'dace' and isinstance(getattr(dace, func_name, False), dtypes.typeclass):
+            # A type definition
+            dtype: dtypes.typeclass = getattr(dace, func_name)
+            return ast.copy_location(ast.Name(id=dtype.ctype, ctx=ast.Load), node)
         if module_name in dtypes._ALLOWED_MODULES:
             cppmodname = dtypes._ALLOWED_MODULES[module_name]
             return ast.copy_location(ast.Name(id=(cppmodname + func_name), ctx=ast.Load), node)
diff --git a/dace/codegen/targets/fpga.py b/dace/codegen/targets/fpga.py
index 0c74d6ec07..61ba9f95ad 100644
--- a/dace/codegen/targets/fpga.py
+++ b/dace/codegen/targets/fpga.py
@@ -2112,7 +2112,11 @@ def _generate_MapEntry(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgr
                                 end_type = None
                             if end_type is not None:
                                 if np.dtype(end_type.dtype.type) > np.dtype('uint32'):
-                                    loop_var_type = end_type.ctype
+                                    v = dace.config.Config.get("compiler", "fpga", "vendor")
+                                    if v.casefold() == 'intel_fpga'.casefold():
+                                        loop_var_type = end_type.ocltype
+                                    else:
+                                        loop_var_type = end_type.ctype
                                 elif np.issubdtype(np.dtype(end_type.dtype.type), np.unsignedinteger):
                                     loop_var_type = "size_t"
                     except (UnboundLocalError):
diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py
index d71ea40fee..0b8fa739fe 100644
--- a/dace/codegen/targets/framecode.py
+++ b/dace/codegen/targets/framecode.py
@@ -947,7 +947,12 @@ def generate_code(self,
             if not is_top_level and isvarName in sdfg.parent_nsdfg_node.symbol_mapping:
                 continue
             isvar = data.Scalar(isvarType)
-            callsite_stream.write('%s;\n' % (isvar.as_arg(with_types=True, name=isvarName)), sdfg)
+            if (schedule in (dtypes.ScheduleType.FPGA_Device, dtypes.ScheduleType.FPGA_Multi_Pumped)
+                    and config.Config.get('compiler', 'fpga', 'vendor').lower() == 'intel_fpga'):
+                # Emit OpenCL type
+                callsite_stream.write(f'{isvarType.ocltype} {isvarName};\n', sdfg)
+            else:
+                callsite_stream.write('%s;\n' % (isvar.as_arg(with_types=True, name=isvarName)), sdfg)
             self.dispatcher.defined_vars.add(isvarName, disp.DefinedType.Scalar, isvarType.ctype)
 
         callsite_stream.write('\n', sdfg)
diff --git a/dace/codegen/targets/intel_fpga.py b/dace/codegen/targets/intel_fpga.py
index 513dc0bbfc..9437dccbe3 100644
--- a/dace/codegen/targets/intel_fpga.py
+++ b/dace/codegen/targets/intel_fpga.py
@@ -169,15 +169,16 @@ def get_generated_codeobjects(self):
                                    "cpp",
                                    IntelFPGACodeGen,
                                    "Intel FPGA",
-                                   target_type="host")
+                                   target_type="host",
+                                   sdfg=self._global_sdfg)
 
         kernel_code_objs = [
-            CodeObject(kernel_name, code, "cl", IntelFPGACodeGen, "Intel FPGA", target_type="device")
+            CodeObject(kernel_name, code, "cl", IntelFPGACodeGen, "Intel FPGA", target_type="device", sdfg=self._global_sdfg)
             for (kernel_name, code, _) in self._kernel_codes
         ]
         # add the util header if present
         other_code_objs = [
-            CodeObject(file_name, code.getvalue(), "cl", IntelFPGACodeGen, "Intel FPGA", target_type="device")
+            CodeObject(file_name, code.getvalue(), "cl", IntelFPGACodeGen, "Intel FPGA", target_type="device", sdfg=self._global_sdfg)
             for (file_name, code) in self._other_codes.items()
         ]
 
@@ -299,8 +300,8 @@ def make_kernel_argument(self, data, var_name, is_output, with_vectorization):
             return "__global volatile  {}* restrict {}".format(vec_type, var_name)
         elif isinstance(data, dace.data.Stream):
             return None  # Streams are global objects
-        else:
-            return data.as_arg(with_types=True, name=var_name)
+        else: # Scalar or structure
+            return f'{data.dtype.ocltype} {var_name}'
 
     @staticmethod
     def generate_unroll_loop_pre(kernel_stream, factor, sdfg, cfg, state_id, node):
@@ -570,8 +571,9 @@ def generate_module(self, sdfg, cfg, state, kernel_name, module_name, subgraph,
             arg = self.make_kernel_argument(p, pname, is_output, True)
 
             if arg is not None:
-                #change c type long long to opencl type long
-                arg = arg.replace("long long", "long")
+                #change c type to opencl type
+                if arg in dtypes._CTYPES_TO_OCLTYPES:
+                    arg = dtypes._CTYPES_TO_OCLTYPES[arg]
 
                 kernel_args_opencl.append(arg)
                 kernel_args_host.append(p.as_arg(True, name=pname))
@@ -733,7 +735,7 @@ def generate_nsdfg_header(self, sdfg, cfg, state, state_id, node, memlet_referen
         arguments = [f'{atype} {aname}' for atype, aname, _ in memlet_references]
         fsyms = node.sdfg.used_symbols(all_symbols=False, keep_defined_in_mapping=True)
         arguments += [
-            f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
+            f'{node.sdfg.symbols[aname].ocltype} {aname}' for aname in sorted(node.symbol_mapping.keys())
             if aname in fsyms and aname not in sdfg.constants
         ]
         arguments = ', '.join(arguments)
@@ -769,8 +771,9 @@ def generate_nsdfg_arguments(self, sdfg, cfg, dfg, state, node):
             ptrname = cpp.ptr(in_memlet.data, desc, sdfg, self._frame)
             defined_type, defined_ctype = self._dispatcher.defined_vars.get(ptrname, 1)
 
-            #change c type long long to opencl type long
-            defined_ctype = defined_ctype.replace("long long", "long")
+            #change c type to opencl type
+            if defined_ctype in dtypes._CTYPES_TO_OCLTYPES:
+                defined_ctype = dtypes._CTYPES_TO_OCLTYPES[defined_ctype]
 
             if isinstance(desc, dace.data.Array) and (desc.storage == dtypes.StorageType.FPGA_Global
                                                       or desc.storage == dtypes.StorageType.FPGA_Local):
@@ -822,9 +825,9 @@ def generate_nsdfg_arguments(self, sdfg, cfg, dfg, state, node):
                 ptrname = cpp.ptr(out_memlet.data, desc, sdfg, self._frame)
                 defined_type, defined_ctype = self._dispatcher.defined_vars.get(ptrname, 1)
 
-                #change c type long long to opencl type long
-                if defined_ctype.__contains__("long long"):
-                    defined_ctype = defined_ctype.replace("long long", "long")
+                #change c type to opencl type
+                if defined_ctype in dtypes._CTYPES_TO_OCLTYPES:
+                    defined_ctype = dtypes._CTYPES_TO_OCLTYPES[defined_ctype]
 
                 if isinstance(desc, dace.data.Array) and (desc.storage == dtypes.StorageType.FPGA_Global
                                                           or desc.storage == dtypes.StorageType.FPGA_Local):
@@ -908,7 +911,7 @@ def allocate_view(self, sdfg: dace.SDFG, cfg: ControlFlowRegion, dfg: SDFGState,
             # derive the declaration/definition
 
             qualifier = "__global volatile "
-            atype = dtypes.pointer(nodedesc.dtype).ctype + " restrict"
+            atype = dtypes.pointer(nodedesc.dtype).ocltype + " restrict"
             aname = ptrname
             viewed_desc = sdfg.arrays[edge.data.data]
             eptr = cpp.ptr(edge.data.data, viewed_desc, sdfg, self._frame)
@@ -1261,7 +1264,7 @@ def generate_constants(self, sdfg, callsite_stream):
 
         for cstname, (csttype, cstval) in sdfg.constants_prop.items():
             if isinstance(csttype, dace.data.Array):
-                const_str = "__constant " + csttype.dtype.ctype + \
+                const_str = "__constant " + csttype.dtype.ocltype + \
                             " " + cstname + "[" + str(cstval.size) + "]"
 
                 if cstname not in self.generated_constants:
diff --git a/dace/dtypes.py b/dace/dtypes.py
index d0c6f23e03..465e73b2b1 100644
--- a/dace/dtypes.py
+++ b/dace/dtypes.py
@@ -250,12 +250,12 @@ class TilingType(aenum.AutoNumberEnum):
     numpy.int16: "short",
     numpy.int32: "int",
     numpy.intc: "int",
-    numpy.int64: "long long",
-    numpy.uint8: "unsigned char",
-    numpy.uint16: "unsigned short",
-    numpy.uint32: "unsigned int",
-    numpy.uintc: "unsigned int",
-    numpy.uint64: "unsigned long long",
+    numpy.int64: "int64_t",
+    numpy.uint8: "uint8_t",
+    numpy.uint16: "uint16_t",
+    numpy.uint32: "uint32_t",
+    numpy.uintc: "dace::uint",
+    numpy.uint64: "uint64_t",
     numpy.float16: "dace::float16",
     numpy.float32: "float",
     numpy.float64: "double",
@@ -275,17 +275,37 @@ class TilingType(aenum.AutoNumberEnum):
     numpy.int32: "int",
     numpy.intc: "int",
     numpy.int64: "long",
-    numpy.uint8: "unsigned char",
-    numpy.uint16: "unsigned short",
-    numpy.uint32: "unsigned int",
-    numpy.uint64: "unsigned long",
-    numpy.uintc: "unsigned int",
+    numpy.uint8: "uchar",
+    numpy.uint16: "ushort",
+    numpy.uint32: "uint",
+    numpy.uint64: "ulong",
+    numpy.uintc: "uint",
     numpy.float32: "float",
     numpy.float64: "double",
     numpy.complex64: "complex float",
     numpy.complex128: "complex double",
 }
 
+_CTYPES_TO_OCLTYPES = {
+    "void": "void",
+    "int": "int",
+    "float": "float",
+    "double": "double",
+    "dace::complex64": "complex float",
+    "dace::complex128": "complex double",
+    "bool": "bool",
+    "char": "char",
+    "short": "short",
+    "int": "int",
+    "int64_t": "long",
+    "uint8_t": "uchar",
+    "uint16_t": "ushort",
+    "uint32_t": "uint",
+    "dace::uint": "uint",
+    "uint64_t": "ulong",
+    "dace::float16": "half",
+}
+
 # Translation of types to OpenCL vector types
 _OCL_VECTOR_TYPES = {
     numpy.int8: "char",
@@ -1295,7 +1315,7 @@ def dtype_to_typeclass(dtype=None):
 bool = bool_
 
 TYPECLASS_TO_STRING = {
-    bool: "dace::bool",
+    bool: "dace::bool_",
     bool_: "dace::bool_",
     uint8: "dace::uint8",
     uint16: "dace::uint16",
diff --git a/dace/runtime/include/dace/reduction.h b/dace/runtime/include/dace/reduction.h
index 927bf449de..81017610ae 100644
--- a/dace/runtime/include/dace/reduction.h
+++ b/dace/runtime/include/dace/reduction.h
@@ -205,15 +205,27 @@ namespace dace {
 
 #if defined(DACE_USE_GPU_ATOMICS)
     template <>
-    struct _wcr_fixed<ReductionType::Sum, long long> {
+    struct _wcr_fixed<ReductionType::Sum, int64_t> {
        
-        static DACE_HDFI long long reduce_atomic(long long *ptr, const long long& value) {
+        static DACE_HDFI int64_t reduce_atomic(int64_t *ptr, const int64_t& value) {
             return _wcr_fixed<ReductionType::Sum, unsigned long long>::reduce_atomic((
                 unsigned long long *)ptr, 
                 static_cast<unsigned long long>(value));
         }
 
-        DACE_HDFI long long operator()(const long long &a, const long long &b) const { return a + b; }
+        DACE_HDFI int64_t operator()(const int64_t &a, const int64_t &b) const { return a + b; }
+    };
+
+    template <>
+    struct _wcr_fixed<ReductionType::Sum, uint64_t> {
+       
+        static DACE_HDFI uint64_t reduce_atomic(uint64_t *ptr, const uint64_t& value) {
+            return _wcr_fixed<ReductionType::Sum, unsigned long long>::reduce_atomic((
+                unsigned long long *)ptr, 
+                static_cast<unsigned long long>(value));
+        }
+
+        DACE_HDFI uint64_t operator()(const uint64_t &a, const uint64_t &b) const { return a + b; }
     };
 #endif
 
diff --git a/dace/runtime/include/dace/types.h b/dace/runtime/include/dace/types.h
index aa20877549..e5eed1e35e 100644
--- a/dace/runtime/include/dace/types.h
+++ b/dace/runtime/include/dace/types.h
@@ -74,6 +74,7 @@ namespace dace
     typedef uint16_t uint16;
     typedef uint32_t uint32;
     typedef uint64_t uint64;
+    typedef unsigned int uint;
     typedef float float32;
     typedef double float64;
 
diff --git a/dace/sdfg/infer_types.py b/dace/sdfg/infer_types.py
index 97010e95a7..c05708670e 100644
--- a/dace/sdfg/infer_types.py
+++ b/dace/sdfg/infer_types.py
@@ -34,9 +34,6 @@ def infer_out_connector_type(sdfg: SDFG, state: SDFGState, node: nodes.CodeNode,
     else:
         allocated_as_scalar = True
 
-    if node.out_connectors[cname].type is not None:
-        return node.out_connectors[cname].type
-
     # If nested SDFG, try to use internal array type
     if isinstance(node, nodes.NestedSDFG):
         scalar = (isinstance(node.sdfg.arrays[cname], data.Scalar) and allocated_as_scalar)
diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py
index 1f5c263206..c603597fb1 100644
--- a/dace/sdfg/validation.py
+++ b/dace/sdfg/validation.py
@@ -244,7 +244,7 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context
                     warnings.warn(f'Mismatch between constant and data descriptor of "{const_name}", '
                                   f'expected to find "{const_type}" but found "{sdfg.arrays[const_name]}".')
             elif const_name in sdfg.symbols:
-                if const_type != sdfg.symbols[const_name]:
+                if const_type.dtype != sdfg.symbols[const_name]:
                     # This should actually be an error, but there is a lots of code that depends on it.
                     warnings.warn(f'Mismatch between constant and symobl type of "{const_name}", '
                                   f'expected to find "{const_type}" but found "{sdfg.symbols[const_name]}".')
diff --git a/tests/passes/dead_code_elimination_test.py b/tests/passes/dead_code_elimination_test.py
index f8920b0538..a41a11c4d6 100644
--- a/tests/passes/dead_code_elimination_test.py
+++ b/tests/passes/dead_code_elimination_test.py
@@ -254,21 +254,31 @@ def test_dce_callback_manual():
     sdfg.validate()
 
 
-def test_dce_add_type_hint_of_variable():
+@pytest.mark.parametrize('dtype', (dace.float64, dace.bool, np.float64))
+def test_dce_add_type_hint_of_variable(dtype):
     """
     The code of this test comes from this issue: https://github.com/spcl/dace/issues/1150#issue-1445418361
+    and this issue: https://github.com/spcl/dace/issues/1710
+    and this PR: https://github.com/spcl/dace/pull/1721
     """
+    if dtype is dace.bool:
+        true_value = True
+        false_value = False
+    else:
+        true_value = 3.0
+        false_value = 7.0
+
     sdfg = dace.SDFG("test")
     state = sdfg.add_state()
-    sdfg.add_array("out", dtype=dace.float64, shape=(10,))
-    sdfg.add_array("cond", dtype=dace.bool, shape=(10,))
-    sdfg.add_array("tmp", dtype=dace.float64, shape=(10,), transient=True)
+    sdfg.add_array("out", dtype=dtype, shape=(10, ))
+    sdfg.add_array("cond", dtype=dace.bool, shape=(10, ))
+    sdfg.add_array("tmp", dtype=dtype, shape=(10, ), transient=True)
     tasklet, *_ = state.add_mapped_tasklet(
-        code="""
+        code=f"""
 if _cond:
-    _tmp = 3.0
+    _tmp = {true_value}
 else:
-    _tmp = 7.0
+    _tmp = {false_value}
 _out = _tmp
         """,
         inputs={"_cond": dace.Memlet(subset="k", data="cond")},
@@ -281,14 +291,17 @@ def test_dce_add_type_hint_of_variable():
         external_edges=True,
     )
     sdfg.simplify()
-    assert tasklet.code.as_string.startswith("_tmp: dace.float64")
+    assert tasklet.code.as_string.startswith("_tmp:")
 
     compiledsdfg = sdfg.compile()
-    cond = np.random.choice(a=[True, False], size=(10,))
-    out = np.zeros((10,))
-    compiledsdfg(cond=cond, out=out)
-    assert np.all(out == np.where(cond, 3.0, 7.0))
+    cond = np.random.choice(a=[True, False], size=(10, ))
+    if isinstance(dtype, dace.typeclass):
+        out = np.zeros((10, ), dtype=dtype.as_numpy_dtype())
+    else:
+        out = np.zeros((10, ), dtype=dtype)
 
+    compiledsdfg(cond=cond, out=out)
+    assert np.all(out == np.where(cond, true_value, false_value))
 
 
 if __name__ == '__main__':
@@ -305,4 +318,6 @@ def test_dce_add_type_hint_of_variable():
     test_dce()
     test_dce_callback()
     test_dce_callback_manual()
-    test_dce_add_type_hint_of_variable()
+    test_dce_add_type_hint_of_variable(dace.float64)
+    test_dce_add_type_hint_of_variable(dace.bool)
+    test_dce_add_type_hint_of_variable(np.float64)