From d61122d6122b7fe2c515e30442ae3ab77a47bb11 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Fri, 8 Nov 2024 00:42:55 -0800 Subject: [PATCH] Fix type inference and code generation for typeclasses and numpy types (#1725) Fixes #1710 Supersedes #1721 --- dace/codegen/cppunparse.py | 2 + dace/codegen/targets/cpp.py | 4 ++ dace/codegen/targets/fpga.py | 6 ++- dace/codegen/targets/framecode.py | 7 +++- dace/codegen/targets/intel_fpga.py | 33 ++++++++-------- dace/dtypes.py | 44 ++++++++++++++++------ dace/runtime/include/dace/reduction.h | 18 +++++++-- dace/runtime/include/dace/types.h | 1 + dace/sdfg/infer_types.py | 3 -- dace/sdfg/validation.py | 2 +- tests/passes/dead_code_elimination_test.py | 41 +++++++++++++------- 11 files changed, 112 insertions(+), 49 deletions(-) diff --git a/dace/codegen/cppunparse.py b/dace/codegen/cppunparse.py index c375147930..e5e5a57f09 100644 --- a/dace/codegen/cppunparse.py +++ b/dace/codegen/cppunparse.py @@ -349,6 +349,8 @@ def _Assign(self, t): # if the veclen is greater than one, this should be defined with a vector data type self.write("{}{} ".format(dace.dtypes._OCL_VECTOR_TYPES[inferred_type.type], inferred_type.veclen)) + elif self.language == dace.dtypes.Language.OpenCL: + self.write(dace.dtypes._OCL_TYPES[inferred_type.type] + " ") else: self.write(dace.dtypes._CTYPES[inferred_type.type] + " ") else: diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py index 3f02d0e6cc..911a792ac9 100644 --- a/dace/codegen/targets/cpp.py +++ b/dace/codegen/targets/cpp.py @@ -1339,6 +1339,10 @@ def visit_Attribute(self, node): attrname = rname(node) module_name = attrname[:attrname.rfind(".")] func_name = attrname[attrname.rfind(".") + 1:] + if module_name == 'dace' and isinstance(getattr(dace, func_name, False), dtypes.typeclass): + # A type definition + dtype: dtypes.typeclass = getattr(dace, func_name) + return ast.copy_location(ast.Name(id=dtype.ctype, ctx=ast.Load), node) if module_name in dtypes._ALLOWED_MODULES: cppmodname = dtypes._ALLOWED_MODULES[module_name] return ast.copy_location(ast.Name(id=(cppmodname + func_name), ctx=ast.Load), node) diff --git a/dace/codegen/targets/fpga.py b/dace/codegen/targets/fpga.py index 0c74d6ec07..61ba9f95ad 100644 --- a/dace/codegen/targets/fpga.py +++ b/dace/codegen/targets/fpga.py @@ -2112,7 +2112,11 @@ def _generate_MapEntry(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgr end_type = None if end_type is not None: if np.dtype(end_type.dtype.type) > np.dtype('uint32'): - loop_var_type = end_type.ctype + v = dace.config.Config.get("compiler", "fpga", "vendor") + if v.casefold() == 'intel_fpga'.casefold(): + loop_var_type = end_type.ocltype + else: + loop_var_type = end_type.ctype elif np.issubdtype(np.dtype(end_type.dtype.type), np.unsignedinteger): loop_var_type = "size_t" except (UnboundLocalError): diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py index d71ea40fee..0b8fa739fe 100644 --- a/dace/codegen/targets/framecode.py +++ b/dace/codegen/targets/framecode.py @@ -947,7 +947,12 @@ def generate_code(self, if not is_top_level and isvarName in sdfg.parent_nsdfg_node.symbol_mapping: continue isvar = data.Scalar(isvarType) - callsite_stream.write('%s;\n' % (isvar.as_arg(with_types=True, name=isvarName)), sdfg) + if (schedule in (dtypes.ScheduleType.FPGA_Device, dtypes.ScheduleType.FPGA_Multi_Pumped) + and config.Config.get('compiler', 'fpga', 'vendor').lower() == 'intel_fpga'): + # Emit OpenCL type + callsite_stream.write(f'{isvarType.ocltype} {isvarName};\n', sdfg) + else: + callsite_stream.write('%s;\n' % (isvar.as_arg(with_types=True, name=isvarName)), sdfg) self.dispatcher.defined_vars.add(isvarName, disp.DefinedType.Scalar, isvarType.ctype) callsite_stream.write('\n', sdfg) diff --git a/dace/codegen/targets/intel_fpga.py b/dace/codegen/targets/intel_fpga.py index 513dc0bbfc..9437dccbe3 100644 --- a/dace/codegen/targets/intel_fpga.py +++ b/dace/codegen/targets/intel_fpga.py @@ -169,15 +169,16 @@ def get_generated_codeobjects(self): "cpp", IntelFPGACodeGen, "Intel FPGA", - target_type="host") + target_type="host", + sdfg=self._global_sdfg) kernel_code_objs = [ - CodeObject(kernel_name, code, "cl", IntelFPGACodeGen, "Intel FPGA", target_type="device") + CodeObject(kernel_name, code, "cl", IntelFPGACodeGen, "Intel FPGA", target_type="device", sdfg=self._global_sdfg) for (kernel_name, code, _) in self._kernel_codes ] # add the util header if present other_code_objs = [ - CodeObject(file_name, code.getvalue(), "cl", IntelFPGACodeGen, "Intel FPGA", target_type="device") + CodeObject(file_name, code.getvalue(), "cl", IntelFPGACodeGen, "Intel FPGA", target_type="device", sdfg=self._global_sdfg) for (file_name, code) in self._other_codes.items() ] @@ -299,8 +300,8 @@ def make_kernel_argument(self, data, var_name, is_output, with_vectorization): return "__global volatile {}* restrict {}".format(vec_type, var_name) elif isinstance(data, dace.data.Stream): return None # Streams are global objects - else: - return data.as_arg(with_types=True, name=var_name) + else: # Scalar or structure + return f'{data.dtype.ocltype} {var_name}' @staticmethod def generate_unroll_loop_pre(kernel_stream, factor, sdfg, cfg, state_id, node): @@ -570,8 +571,9 @@ def generate_module(self, sdfg, cfg, state, kernel_name, module_name, subgraph, arg = self.make_kernel_argument(p, pname, is_output, True) if arg is not None: - #change c type long long to opencl type long - arg = arg.replace("long long", "long") + #change c type to opencl type + if arg in dtypes._CTYPES_TO_OCLTYPES: + arg = dtypes._CTYPES_TO_OCLTYPES[arg] kernel_args_opencl.append(arg) kernel_args_host.append(p.as_arg(True, name=pname)) @@ -733,7 +735,7 @@ def generate_nsdfg_header(self, sdfg, cfg, state, state_id, node, memlet_referen arguments = [f'{atype} {aname}' for atype, aname, _ in memlet_references] fsyms = node.sdfg.used_symbols(all_symbols=False, keep_defined_in_mapping=True) arguments += [ - f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys()) + f'{node.sdfg.symbols[aname].ocltype} {aname}' for aname in sorted(node.symbol_mapping.keys()) if aname in fsyms and aname not in sdfg.constants ] arguments = ', '.join(arguments) @@ -769,8 +771,9 @@ def generate_nsdfg_arguments(self, sdfg, cfg, dfg, state, node): ptrname = cpp.ptr(in_memlet.data, desc, sdfg, self._frame) defined_type, defined_ctype = self._dispatcher.defined_vars.get(ptrname, 1) - #change c type long long to opencl type long - defined_ctype = defined_ctype.replace("long long", "long") + #change c type to opencl type + if defined_ctype in dtypes._CTYPES_TO_OCLTYPES: + defined_ctype = dtypes._CTYPES_TO_OCLTYPES[defined_ctype] if isinstance(desc, dace.data.Array) and (desc.storage == dtypes.StorageType.FPGA_Global or desc.storage == dtypes.StorageType.FPGA_Local): @@ -822,9 +825,9 @@ def generate_nsdfg_arguments(self, sdfg, cfg, dfg, state, node): ptrname = cpp.ptr(out_memlet.data, desc, sdfg, self._frame) defined_type, defined_ctype = self._dispatcher.defined_vars.get(ptrname, 1) - #change c type long long to opencl type long - if defined_ctype.__contains__("long long"): - defined_ctype = defined_ctype.replace("long long", "long") + #change c type to opencl type + if defined_ctype in dtypes._CTYPES_TO_OCLTYPES: + defined_ctype = dtypes._CTYPES_TO_OCLTYPES[defined_ctype] if isinstance(desc, dace.data.Array) and (desc.storage == dtypes.StorageType.FPGA_Global or desc.storage == dtypes.StorageType.FPGA_Local): @@ -908,7 +911,7 @@ def allocate_view(self, sdfg: dace.SDFG, cfg: ControlFlowRegion, dfg: SDFGState, # derive the declaration/definition qualifier = "__global volatile " - atype = dtypes.pointer(nodedesc.dtype).ctype + " restrict" + atype = dtypes.pointer(nodedesc.dtype).ocltype + " restrict" aname = ptrname viewed_desc = sdfg.arrays[edge.data.data] eptr = cpp.ptr(edge.data.data, viewed_desc, sdfg, self._frame) @@ -1261,7 +1264,7 @@ def generate_constants(self, sdfg, callsite_stream): for cstname, (csttype, cstval) in sdfg.constants_prop.items(): if isinstance(csttype, dace.data.Array): - const_str = "__constant " + csttype.dtype.ctype + \ + const_str = "__constant " + csttype.dtype.ocltype + \ " " + cstname + "[" + str(cstval.size) + "]" if cstname not in self.generated_constants: diff --git a/dace/dtypes.py b/dace/dtypes.py index d0c6f23e03..465e73b2b1 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -250,12 +250,12 @@ class TilingType(aenum.AutoNumberEnum): numpy.int16: "short", numpy.int32: "int", numpy.intc: "int", - numpy.int64: "long long", - numpy.uint8: "unsigned char", - numpy.uint16: "unsigned short", - numpy.uint32: "unsigned int", - numpy.uintc: "unsigned int", - numpy.uint64: "unsigned long long", + numpy.int64: "int64_t", + numpy.uint8: "uint8_t", + numpy.uint16: "uint16_t", + numpy.uint32: "uint32_t", + numpy.uintc: "dace::uint", + numpy.uint64: "uint64_t", numpy.float16: "dace::float16", numpy.float32: "float", numpy.float64: "double", @@ -275,17 +275,37 @@ class TilingType(aenum.AutoNumberEnum): numpy.int32: "int", numpy.intc: "int", numpy.int64: "long", - numpy.uint8: "unsigned char", - numpy.uint16: "unsigned short", - numpy.uint32: "unsigned int", - numpy.uint64: "unsigned long", - numpy.uintc: "unsigned int", + numpy.uint8: "uchar", + numpy.uint16: "ushort", + numpy.uint32: "uint", + numpy.uint64: "ulong", + numpy.uintc: "uint", numpy.float32: "float", numpy.float64: "double", numpy.complex64: "complex float", numpy.complex128: "complex double", } +_CTYPES_TO_OCLTYPES = { + "void": "void", + "int": "int", + "float": "float", + "double": "double", + "dace::complex64": "complex float", + "dace::complex128": "complex double", + "bool": "bool", + "char": "char", + "short": "short", + "int": "int", + "int64_t": "long", + "uint8_t": "uchar", + "uint16_t": "ushort", + "uint32_t": "uint", + "dace::uint": "uint", + "uint64_t": "ulong", + "dace::float16": "half", +} + # Translation of types to OpenCL vector types _OCL_VECTOR_TYPES = { numpy.int8: "char", @@ -1295,7 +1315,7 @@ def dtype_to_typeclass(dtype=None): bool = bool_ TYPECLASS_TO_STRING = { - bool: "dace::bool", + bool: "dace::bool_", bool_: "dace::bool_", uint8: "dace::uint8", uint16: "dace::uint16", diff --git a/dace/runtime/include/dace/reduction.h b/dace/runtime/include/dace/reduction.h index 927bf449de..81017610ae 100644 --- a/dace/runtime/include/dace/reduction.h +++ b/dace/runtime/include/dace/reduction.h @@ -205,15 +205,27 @@ namespace dace { #if defined(DACE_USE_GPU_ATOMICS) template <> - struct _wcr_fixed { + struct _wcr_fixed { - static DACE_HDFI long long reduce_atomic(long long *ptr, const long long& value) { + static DACE_HDFI int64_t reduce_atomic(int64_t *ptr, const int64_t& value) { return _wcr_fixed::reduce_atomic(( unsigned long long *)ptr, static_cast(value)); } - DACE_HDFI long long operator()(const long long &a, const long long &b) const { return a + b; } + DACE_HDFI int64_t operator()(const int64_t &a, const int64_t &b) const { return a + b; } + }; + + template <> + struct _wcr_fixed { + + static DACE_HDFI uint64_t reduce_atomic(uint64_t *ptr, const uint64_t& value) { + return _wcr_fixed::reduce_atomic(( + unsigned long long *)ptr, + static_cast(value)); + } + + DACE_HDFI uint64_t operator()(const uint64_t &a, const uint64_t &b) const { return a + b; } }; #endif diff --git a/dace/runtime/include/dace/types.h b/dace/runtime/include/dace/types.h index aa20877549..e5eed1e35e 100644 --- a/dace/runtime/include/dace/types.h +++ b/dace/runtime/include/dace/types.h @@ -74,6 +74,7 @@ namespace dace typedef uint16_t uint16; typedef uint32_t uint32; typedef uint64_t uint64; + typedef unsigned int uint; typedef float float32; typedef double float64; diff --git a/dace/sdfg/infer_types.py b/dace/sdfg/infer_types.py index 97010e95a7..c05708670e 100644 --- a/dace/sdfg/infer_types.py +++ b/dace/sdfg/infer_types.py @@ -34,9 +34,6 @@ def infer_out_connector_type(sdfg: SDFG, state: SDFGState, node: nodes.CodeNode, else: allocated_as_scalar = True - if node.out_connectors[cname].type is not None: - return node.out_connectors[cname].type - # If nested SDFG, try to use internal array type if isinstance(node, nodes.NestedSDFG): scalar = (isinstance(node.sdfg.arrays[cname], data.Scalar) and allocated_as_scalar) diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py index 1f5c263206..c603597fb1 100644 --- a/dace/sdfg/validation.py +++ b/dace/sdfg/validation.py @@ -244,7 +244,7 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context warnings.warn(f'Mismatch between constant and data descriptor of "{const_name}", ' f'expected to find "{const_type}" but found "{sdfg.arrays[const_name]}".') elif const_name in sdfg.symbols: - if const_type != sdfg.symbols[const_name]: + if const_type.dtype != sdfg.symbols[const_name]: # This should actually be an error, but there is a lots of code that depends on it. warnings.warn(f'Mismatch between constant and symobl type of "{const_name}", ' f'expected to find "{const_type}" but found "{sdfg.symbols[const_name]}".') diff --git a/tests/passes/dead_code_elimination_test.py b/tests/passes/dead_code_elimination_test.py index f8920b0538..a41a11c4d6 100644 --- a/tests/passes/dead_code_elimination_test.py +++ b/tests/passes/dead_code_elimination_test.py @@ -254,21 +254,31 @@ def test_dce_callback_manual(): sdfg.validate() -def test_dce_add_type_hint_of_variable(): +@pytest.mark.parametrize('dtype', (dace.float64, dace.bool, np.float64)) +def test_dce_add_type_hint_of_variable(dtype): """ The code of this test comes from this issue: https://github.com/spcl/dace/issues/1150#issue-1445418361 + and this issue: https://github.com/spcl/dace/issues/1710 + and this PR: https://github.com/spcl/dace/pull/1721 """ + if dtype is dace.bool: + true_value = True + false_value = False + else: + true_value = 3.0 + false_value = 7.0 + sdfg = dace.SDFG("test") state = sdfg.add_state() - sdfg.add_array("out", dtype=dace.float64, shape=(10,)) - sdfg.add_array("cond", dtype=dace.bool, shape=(10,)) - sdfg.add_array("tmp", dtype=dace.float64, shape=(10,), transient=True) + sdfg.add_array("out", dtype=dtype, shape=(10, )) + sdfg.add_array("cond", dtype=dace.bool, shape=(10, )) + sdfg.add_array("tmp", dtype=dtype, shape=(10, ), transient=True) tasklet, *_ = state.add_mapped_tasklet( - code=""" + code=f""" if _cond: - _tmp = 3.0 + _tmp = {true_value} else: - _tmp = 7.0 + _tmp = {false_value} _out = _tmp """, inputs={"_cond": dace.Memlet(subset="k", data="cond")}, @@ -281,14 +291,17 @@ def test_dce_add_type_hint_of_variable(): external_edges=True, ) sdfg.simplify() - assert tasklet.code.as_string.startswith("_tmp: dace.float64") + assert tasklet.code.as_string.startswith("_tmp:") compiledsdfg = sdfg.compile() - cond = np.random.choice(a=[True, False], size=(10,)) - out = np.zeros((10,)) - compiledsdfg(cond=cond, out=out) - assert np.all(out == np.where(cond, 3.0, 7.0)) + cond = np.random.choice(a=[True, False], size=(10, )) + if isinstance(dtype, dace.typeclass): + out = np.zeros((10, ), dtype=dtype.as_numpy_dtype()) + else: + out = np.zeros((10, ), dtype=dtype) + compiledsdfg(cond=cond, out=out) + assert np.all(out == np.where(cond, true_value, false_value)) if __name__ == '__main__': @@ -305,4 +318,6 @@ def test_dce_add_type_hint_of_variable(): test_dce() test_dce_callback() test_dce_callback_manual() - test_dce_add_type_hint_of_variable() + test_dce_add_type_hint_of_variable(dace.float64) + test_dce_add_type_hint_of_variable(dace.bool) + test_dce_add_type_hint_of_variable(np.float64)