Modernize HIP CMake commands, fix corner cases (#1518)

Enables support for newer HIP versions with CMake, fix branch ordering when making data descriptors.
spcl · Feb 19, 2024 · f28e960 · f28e960
1 parent 47ef29a
commit f28e960
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 35 deletions.
diff --git a/dace/codegen/CMakeLists.txt b/dace/codegen/CMakeLists.txt
@@ -265,6 +265,8 @@ endif()
 
 # Create HIP object files
 if(DACE_ENABLE_HIP)
+  enable_language(HIP)
+
   # Get local AMD architectures
   if (NOT DEFINED LOCAL_HIP_ARCHITECTURES)
     # Compile and run a test program
@@ -304,8 +306,8 @@ if(DACE_ENABLE_HIP)
   set(DACE_LIBS ${DACE_LIBS} hip::host)
 
   set_source_files_properties(${DACE_HIP_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
-  hip_prepare_target_commands(${DACE_PROGRAM_NAME} OBJ DACE_HIP_OBJECTS DACE_HIP_SOURCES ${DACE_HIP_FILES})
-  set(DACE_OBJECTS ${DACE_OBJECTS} ${DACE_HIP_OBJECTS})
+  set_source_files_properties(${DACE_HIP_FILES} PROPERTIES LANGUAGE HIP)
+  set(DACE_OBJECTS ${DACE_OBJECTS} ${DACE_HIP_FILES})
 endif() # DACE_ENABLE_HIP
 
 # create verilator RTL simulation objects

diff --git a/dace/data.py b/dace/data.py
@@ -37,6 +37,38 @@ def create_datadescriptor(obj, no_custom_desc=False):
         return obj.__descriptor__()
     elif not no_custom_desc and hasattr(obj, 'descriptor'):
         return obj.descriptor
+    elif type(obj).__module__ == "torch" and type(obj).__name__ == "Tensor":
+        # special case for torch tensors. Maybe __array__ could be used here for a more
+        # general solution, but torch doesn't support __array__ for cuda tensors.
+        try:
+            # If torch is importable, define translations between typeclasses and torch types. These are reused by daceml.
+            # conversion happens here in pytorch:
+            # https://github.com/pytorch/pytorch/blob/143ef016ee1b6a39cf69140230d7c371de421186/torch/csrc/utils/tensor_numpy.cpp#L237
+            import torch
+            TYPECLASS_TO_TORCH_DTYPE = {
+                dtypes.bool_: torch.bool,
+                dtypes.int8: torch.int8,
+                dtypes.int16: torch.int16,
+                dtypes.int32: torch.int32,
+                dtypes.int64: torch.int64,
+                dtypes.uint8: torch.uint8,
+                dtypes.float16: torch.float16,
+                dtypes.float32: torch.float32,
+                dtypes.float64: torch.float64,
+                dtypes.complex64: torch.complex64,
+                dtypes.complex128: torch.complex128,
+            }
+
+            TORCH_DTYPE_TO_TYPECLASS = {v: k for k, v in TYPECLASS_TO_TORCH_DTYPE.items()}
+
+            storage = dtypes.StorageType.GPU_Global if obj.device.type == 'cuda' else dtypes.StorageType.Default
+
+            return Array(dtype=TORCH_DTYPE_TO_TYPECLASS[obj.dtype],
+                         strides=obj.stride(),
+                         shape=tuple(obj.shape),
+                         storage=storage)
+        except ImportError:
+            raise ValueError("Attempted to convert a torch.Tensor, but torch could not be imported")
     elif dtypes.is_array(obj) and (hasattr(obj, '__array_interface__') or hasattr(obj, '__cuda_array_interface__')):
         if dtypes.is_gpu_array(obj):
             interface = obj.__cuda_array_interface__
@@ -79,38 +111,6 @@ def create_datadescriptor(obj, no_custom_desc=False):
         dtype = dtypes.typeclass(obj.dtype.type)
         itemsize = obj.itemsize
         return Array(dtype=dtype, shape=obj.shape, strides=tuple(s // itemsize for s in obj.strides), storage=storage)
-    elif type(obj).__module__ == "torch" and type(obj).__name__ == "Tensor":
-        # special case for torch tensors. Maybe __array__ could be used here for a more
-        # general solution, but torch doesn't support __array__ for cuda tensors.
-        try:
-            # If torch is importable, define translations between typeclasses and torch types. These are reused by daceml.
-            # conversion happens here in pytorch:
-            # https://github.com/pytorch/pytorch/blob/143ef016ee1b6a39cf69140230d7c371de421186/torch/csrc/utils/tensor_numpy.cpp#L237
-            import torch
-            TYPECLASS_TO_TORCH_DTYPE = {
-                dtypes.bool_: torch.bool,
-                dtypes.int8: torch.int8,
-                dtypes.int16: torch.int16,
-                dtypes.int32: torch.int32,
-                dtypes.int64: torch.int64,
-                dtypes.uint8: torch.uint8,
-                dtypes.float16: torch.float16,
-                dtypes.float32: torch.float32,
-                dtypes.float64: torch.float64,
-                dtypes.complex64: torch.complex64,
-                dtypes.complex128: torch.complex128,
-            }
-
-            TORCH_DTYPE_TO_TYPECLASS = {v: k for k, v in TYPECLASS_TO_TORCH_DTYPE.items()}
-
-            storage = dtypes.StorageType.GPU_Global if obj.device.type == 'cuda' else dtypes.StorageType.Default
-
-            return Array(dtype=TORCH_DTYPE_TO_TYPECLASS[obj.dtype],
-                         strides=obj.stride(),
-                         shape=tuple(obj.shape),
-                         storage=storage)
-        except ImportError:
-            raise ValueError("Attempted to convert a torch.Tensor, but torch could not be imported")
     elif symbolic.issymbolic(obj):
         return Scalar(symbolic.symtype(obj))
     elif isinstance(obj, dtypes.typeclass):

diff --git a/dace/runtime/include/dace/cuda/halfvec.cuh b/dace/runtime/include/dace/cuda/halfvec.cuh
@@ -530,12 +530,13 @@ namespace dace { namespace math {
     HALF_VEC_UFUNC(exp)
     HALF_VEC_UFUNC(tanh)
 } }
-#endif
 
 // Vector comparison functions
 DACE_DFI half2 max(half2 a, half2 b) {
     return make_half2(max(a.x, b.x), max(a.y, b.y));
 }
+#endif
+
 
 DACE_DFI half4 max(half4 a, half b) {
     half2 bvec = __half2half2(b);