diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES index 2d334ee4e326e..a560ca0b8eece 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES +++ b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES @@ -15,6 +15,9 @@ atomic/atomic_max.cl atomic/atomic_sub.cl atomic/atomic_store.cl synchronization/barrier.cl +images/image_common.cl +images/image.cl +images/image_array.cl math/acos.cl math/acosh.cl math/asin.cl diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl b/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl new file mode 100644 index 0000000000000..d20ee55f2cb04 --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl @@ -0,0 +1,531 @@ +#include "image_common.h" +#include + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +#ifdef cl_khr_3d_image_writes +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#endif + +// Declare ockl functions/builtins that we link from the ROCm device libs. +float4 __ockl_image_load_1D(_CLC_CONST_AS unsigned int *tex, int coord); +float4 __ockl_image_load_2D(_CLC_CONST_AS unsigned int *tex, int2 coord); +float4 __ockl_image_load_3D(_CLC_CONST_AS unsigned int *tex, int3 coord); +half4 __ockl_image_loadh_1D(_CLC_CONST_AS unsigned int *tex, int coord); +half4 __ockl_image_loadh_2D(_CLC_CONST_AS unsigned int *tex, int2 coord); +half4 __ockl_image_loadh_3D(_CLC_CONST_AS unsigned int *tex, int3 coord); + +void __ockl_image_store_1D(_CLC_CONST_AS unsigned int *tex, int coord, + float4 color); +void __ockl_image_store_2D(_CLC_CONST_AS unsigned int *tex, int2 coord, + float4 color); +void __ockl_image_store_3D(_CLC_CONST_AS unsigned int *tex, int3 coord, + float4 color); +void __ockl_image_storeh_1D(_CLC_CONST_AS unsigned int *tex, int coord, + half4 color); +void __ockl_image_storeh_2D(_CLC_CONST_AS unsigned int *tex, int2 coord, + half4 color); +void __ockl_image_storeh_3D(_CLC_CONST_AS unsigned int *tex, int3 coord, + half4 color); + +float4 __ockl_image_sample_1D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float coord); +float4 __ockl_image_sample_2D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float2 coord); +float4 __ockl_image_sample_3D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float3 coord); +half4 __ockl_image_sampleh_1D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float coord); +half4 __ockl_image_sampleh_2D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float2 coord); +half4 __ockl_image_sampleh_3D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float3 coord); + +// +// IMAGES +// + +// Fetch Ops + +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, builtin_ret_t, \ + builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 18, __spirv_ImageFetch, I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_t coord) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + builtin_ret_t##4 color = \ + __ockl_image_load##builtin_ret_postfix##_##dimension##D(tex, coord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, float, ) + +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, half, h) + +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) + +// Float +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, float, f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, float, f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, float, f, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, float2, Dv2_f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, float2, Dv2_f, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, float4, Dv4_f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, float4, Dv4_f, int3, Dv3_i) + +// Half +#ifdef cl_khr_fp16 +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half, DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half, DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half, DF16_, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half2, Dv2_DF16_, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half4, Dv4_DF16_, int3, Dv3_i) +#endif + +// Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, int, i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, int, i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, int, i, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, int2, Dv2_i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, int2, Dv2_i, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, int4, Dv4_i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, int4, Dv4_i, int3, Dv3_i) + +// Unsigned Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, uint, j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, uint, j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, uint, j, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, uint2, Dv2_j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, uint2, Dv2_j, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, uint4, Dv4_j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, uint4, Dv4_j, int3, Dv3_i) + +// Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, short, s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, short, s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, short, s, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, short2, Dv2_s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, short2, Dv2_s, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, short4, Dv4_s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, short4, Dv4_s, int3, Dv3_i) + +// Unsigned Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, ushort, t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, ushort, t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, ushort, t, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, ushort2, Dv2_t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, ushort2, Dv2_t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, ushort2, Dv2_t, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, ushort4, Dv4_t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, ushort4, Dv4_t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, ushort4, Dv4_t, int3, Dv3_i) + +// Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, char, a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, char, a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, char, a, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, char2, Dv2_a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, char2, Dv2_a, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, char4, Dv4_a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, char4, Dv4_a, int3, Dv3_i) + +// Unsigned Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, uchar, h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, uchar, h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, uchar, h, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, uchar2, Dv2_h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, uchar2, Dv2_h, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, uchar4, Dv4_h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, uchar4, Dv4_h, int3, Dv3_i) + +#undef _CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN + +// Write Ops + +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, builtin_ret_t, \ + builtin_ret_postfix) \ + _CLC_DEF void _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 18, __spirv_ImageWrite, I, coord_mangled##elem_t_mangled##EvT_T0_T1_)( \ + ulong imageHandle, coord_t coord, elem_t color) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + builtin_ret_t##4 outColor = \ + __clc_cast_from_##elem_t##_to_##builtin_ret_t##4(color); \ + __ockl_image_store##builtin_ret_postfix##_##dimension##D(tex, coord, \ + outColor); \ + } + +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, float, ) + +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, half, h) + +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) + +// Float +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, float, f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, float, f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, float, f, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, float2, Dv2_f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, float2, Dv2_f, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, float4, Dv4_f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, float4, Dv4_f, int3, Dv3_i) + +// Half +#ifdef cl_khr_fp16 +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half, DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half, DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half, DF16_, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half2, Dv2_DF16_, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half4, Dv4_DF16_, int3, Dv3_i) +#endif + +// Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, int, i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, int, i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, int, i, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, int2, Dv2_i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, int2, Dv2_i, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, int4, Dv4_i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, int4, Dv4_i, int3, Dv3_i) + +// Unsigned Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, uint, j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, uint, j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, uint, j, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, uint2, Dv2_j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, uint2, Dv2_j, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, uint4, Dv4_j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, uint4, Dv4_j, int3, Dv3_i) + +// Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, short, s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, short, s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, short, s, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, short2, Dv2_s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, short2, Dv2_s, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, short4, Dv4_s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, short4, Dv4_s, int3, Dv3_i) + +// Unsigned Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, ushort, t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, ushort, t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, ushort, t, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, ushort2, Dv2_t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, ushort2, Dv2_t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, ushort2, Dv2_t, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, ushort4, Dv4_t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, ushort4, Dv4_t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, ushort4, Dv4_t, int3, Dv3_i) + +// Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, char, a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, char, a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, char, a, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, char2, Dv2_a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, char2, Dv2_a, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, char4, Dv4_a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, char4, Dv4_a, int3, Dv3_i) + +// Unsigned Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, uchar, h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, uchar, h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, uchar, h, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, uchar2, Dv2_h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, uchar2, Dv2_h, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, uchar4, Dv4_h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, uchar4, Dv4_h, int3, Dv3_i) + +#undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN + +// +// SAMPLED IMAGES +// + +// Read Ops + +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, builtin_ret_t, \ + builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 17, __spirv_ImageRead, I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_t coord) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + _CLC_CONST_AS unsigned int *samp = tex + SAMPLER_OBJECT_OFFSET_DWORD; \ + builtin_ret_t##4 color = \ + __ockl_image_sample##builtin_ret_postfix##_##dimension##D(tex, samp, \ + coord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, float, ) + +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, half, h) + +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) + +// Float +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, float, f, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float, f, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float, f, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, float2, Dv2_f, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float2, Dv2_f, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float2, Dv2_f, float3, + Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, float4, Dv4_f, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float4, Dv4_f, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float4, Dv4_f, float3, + Dv3_f) + +// Half +#ifdef cl_khr_fp16 +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half, DF16_, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half, DF16_, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half, DF16_, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_DF16_, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half2, Dv2_DF16_, float3, + Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_DF16_, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half4, Dv4_DF16_, float3, + Dv3_f) +#endif + +// Int +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, int, i, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, int, i, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, int, i, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, int2, Dv2_i, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, int2, Dv2_i, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, int2, Dv2_i, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, int4, Dv4_i, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, int4, Dv4_i, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, int4, Dv4_i, float3, Dv3_f) + +// Unsigned Int +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, uint, j, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint, j, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint, j, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, uint2, Dv2_j, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint2, Dv2_j, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint2, Dv2_j, float3, + Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, uint4, Dv4_j, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint4, Dv4_j, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint4, Dv4_j, float3, + Dv3_f) + +// Short +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, short, s, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short, s, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short, s, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, short2, Dv2_s, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short2, Dv2_s, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short2, Dv2_s, float3, + Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, short4, Dv4_s, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short4, Dv4_s, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short4, Dv4_s, float3, + Dv3_f) + +// Unsigned Short +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, ushort, t, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort, t, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort, t, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, ushort2, Dv2_t, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort2, Dv2_t, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort2, Dv2_t, float3, + Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, ushort4, Dv4_t, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort4, Dv4_t, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort4, Dv4_t, float3, + Dv3_f) + +// Char +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, char, a, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, char, a, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, char, a, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, char2, Dv2_a, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, char2, Dv2_a, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, char2, Dv2_a, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, char4, Dv4_a, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, char4, Dv4_a, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, char4, Dv4_a, float3, Dv3_f) + +// Unsigned Char +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, uchar, h, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar, h, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar, h, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, uchar2, Dv2_h, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar2, Dv2_h, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar2, Dv2_h, float3, + Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, uchar4, Dv4_h, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar4, Dv4_h, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar4, Dv4_h, float3, + Dv3_f) + +#undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN + +#undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN + +#undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN + +#undef _CLC_CONST_AS +#undef _CLC_MANGLE_FUNC_IMG_HANDLE diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image_array.cl b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_array.cl new file mode 100644 index 0000000000000..62efb4257a95e --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_array.cl @@ -0,0 +1,525 @@ +#include "image_common.h" +#include + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +#define _CLC_ARRAY_COORD_PARAMS_1D(coord, layer) coord, layer +#define _CLC_ARRAY_COORD_PARAMS_2D(coord, layer) coord.x, coord.y, layer, 0 + +// Declare ockl functions/builtins that we link from the ROCm device libs. +float4 __ockl_image_load_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord); +float4 __ockl_image_load_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord); +half4 __ockl_image_loadh_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord); +half4 __ockl_image_loadh_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord); + +void __ockl_image_store_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord, + float4 color); +void __ockl_image_store_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord, + float4 color); +void __ockl_image_storeh_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord, + half4 color); +void __ockl_image_storeh_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord, + half4 color); + +float4 __ockl_image_sample_1Da(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float2 coord); +float4 __ockl_image_sample_2Da(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float4 coord); +half4 __ockl_image_sampleh_1Da(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float2 coord); +half4 __ockl_image_sampleh_2Da(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float4 coord); + +// +// IMAGE ARRAYS +// + +// Read Ops + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE(23, __spirv_ImageArrayFetch, \ + I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_i)( \ + ulong imageHandle, coord_t coord, int layer) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + int##vec_size arrayCoord = \ + (int##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, layer)); \ + builtin_ret_t##4 color = \ + __ockl_image_load##builtin_ret_postfix##_##dimension##Da(tex, \ + arrayCoord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + float, ) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + half, h) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) + +// Float +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float, f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float, f, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float2, Dv2_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float4, Dv4_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i, + 4) + +// Half +#ifdef cl_khr_fp16 +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half, DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half, DF16_, int2, Dv2_i, + 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_DF16_, int, i, + 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_DF16_, int2, + Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_DF16_, int, i, + 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_DF16_, int2, + Dv2_i, 4) +#endif + +// Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int, i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int, i, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int2, Dv2_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int4, Dv4_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i, + 4) + +// Unsigned Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint, j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint, j, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint2, Dv2_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint4, Dv4_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i, + 4) + +// Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short, s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short, s, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short2, Dv2_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short4, Dv4_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i, + 4) + +// Unsigned Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort, t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort, t, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort2, Dv2_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort2, Dv2_t, int2, + Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort4, Dv4_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort4, Dv4_t, int2, + Dv2_i, 4) + +// Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char, a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char, a, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char2, Dv2_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char4, Dv4_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i, + 4) + +// Unsigned Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar, h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar, h, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar2, Dv2_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar4, Dv4_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i, + 4) + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN + +// Write Ops + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF void _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 23, __spirv_ImageArrayWrite, I, \ + coord_mangled##elem_t_mangled##EvT_T0_iT1_)( \ + ulong imageHandle, coord_t coord, int layer, elem_t color) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + int##vec_size arrayCoord = \ + (int##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, layer)); \ + builtin_ret_t##4 outColor = \ + __clc_cast_from_##elem_t##_to_##builtin_ret_t##4(color); \ + __ockl_image_store##builtin_ret_postfix##_##dimension##Da(tex, arrayCoord, \ + outColor); \ + } + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + float, ) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + half, h) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) + +// Float +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float, f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float, f, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float2, Dv2_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float4, Dv4_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i, + 4) + +// Half +#ifdef cl_khr_fp16 +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half, DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half, DF16_, int2, Dv2_i, + 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_DF16_, int, i, + 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_DF16_, int2, + Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_DF16_, int, i, + 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_DF16_, int2, + Dv2_i, 4) +#endif + +// Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int, i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int, i, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int2, Dv2_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int4, Dv4_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i, + 4) + +// Unsigned Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint, j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint, j, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint2, Dv2_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint4, Dv4_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i, + 4) + +// Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short, s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short, s, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short2, Dv2_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short4, Dv4_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i, + 4) + +// Unsigned Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort, t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort, t, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort2, Dv2_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort2, Dv2_t, int2, + Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort4, Dv4_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort4, Dv4_t, int2, + Dv2_i, 4) + +// Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char, a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char, a, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char2, Dv2_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char4, Dv4_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i, + 4) + +// Unsigned Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar, h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar, h, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar2, Dv2_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar4, Dv4_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i, + 4) + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN + +// +// SAMPLED IMAGE ARRAYS +// + +// Read Ops + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE(22, __spirv_ImageArrayRead, \ + I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_i)( \ + ulong imageHandle, coord_t coord, int layer) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + _CLC_CONST_AS unsigned int *samp = tex + SAMPLER_OBJECT_OFFSET_DWORD; \ + float##vec_size arrayCoord = (float##vec_size)( \ + _CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, (float)layer)); \ + builtin_ret_t##4 color = \ + __ockl_image_sample##builtin_ret_postfix##_##dimension##Da( \ + tex, samp, arrayCoord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + float, ) + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + half, h) + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) + +// Float +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float, f, float, f, + 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float, f, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float2, Dv2_f, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float2, Dv2_f, + float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float4, Dv4_f, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float4, Dv4_f, + float2, Dv2_f, 4) + +// Half +#ifdef cl_khr_fp16 +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half, DF16_, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half, DF16_, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_DF16_, + float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_, + float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_DF16_, + float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_, + float2, Dv2_f, 4) +#endif + +// Int +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int, i, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int, i, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int2, Dv2_i, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int2, Dv2_i, float2, + Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int4, Dv4_i, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int4, Dv4_i, float2, + Dv2_f, 4) + +// Unsigned Int +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint, j, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint, j, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint2, Dv2_j, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint2, Dv2_j, float2, + Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint4, Dv4_j, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint4, Dv4_j, float2, + Dv2_f, 4) + +// Short +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short, s, float, f, + 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short, s, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short2, Dv2_s, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short2, Dv2_s, + float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short4, Dv4_s, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short4, Dv4_s, + float2, Dv2_f, 4) + +// Unsigned Short +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort, t, float, f, + 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort, t, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort2, Dv2_t, + float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort2, Dv2_t, + float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort4, Dv4_t, + float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort4, Dv4_t, + float2, Dv2_f, 4) + +// Char +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char, a, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char, a, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char2, Dv2_a, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char2, Dv2_a, float2, + Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char4, Dv4_a, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char4, Dv4_a, float2, + Dv2_f, 4) + +// Unsigned Char +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar, h, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar, h, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar2, Dv2_h, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar2, Dv2_h, float2, + Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar4, Dv4_h, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar4, Dv4_h, float2, + Dv2_f, 4) + +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN + +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN + +#undef _CLC_ARRAY_COORD_PARAMS_1D +#undef _CLC_ARRAY_COORD_PARAMS_2D + +#undef _CLC_CONST_AS +#undef _CLC_MANGLE_FUNC_IMG_HANDLE diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.cl b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.cl new file mode 100644 index 0000000000000..af2734bb04ce4 --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.cl @@ -0,0 +1,159 @@ +#include "image_common.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +// From +// https://github.com/ROCm/clr/tree/amd-staging/hipamd/include/hip/amd_detail/texture_fetch_functions.h +_CLC_CONST_AS const unsigned int SAMPLER_OBJECT_OFFSET_DWORD = 12; + +// Using the builtin as_type() and as_typen() functions to reinterpret types. +// The restriction being is that element "type"s need to be of the same size. +#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_DEF to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3( \ + vec4_elem_t##4 from) { \ + vec4_elem_t##3 casted = as_##vec4_elem_t##3(from); \ + return as_##to_t##3(casted); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_DEF to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2( \ + vec4_elem_t##4 from) { \ + vec4_elem_t##4 casted = as_##vec4_elem_t##4(from); \ + return as_##to_t##2((vec4_elem_t##2)(casted.x, casted.y)); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_DEF to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t( \ + vec4_elem_t##4 from) { \ + vec4_elem_t##4 casted = as_##vec4_elem_t##4(from); \ + return as_##to_t(casted.x); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4( \ + from_t##3 from) { \ + vec4_elem_t##3 casted = as_##vec4_elem_t##3(from); \ + return as_##vec4_elem_t##4(casted); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4( \ + from_t##2 from) { \ + vec4_elem_t##2 casted = as_##vec4_elem_t##2(from); \ + return (vec4_elem_t##4)(casted.x, casted.y, 0, 0); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4( \ + from_t from) { \ + vec4_elem_t casted = as_##vec4_elem_t(from); \ + return (vec4_elem_t##4)(casted, 0, 0, 0); \ + } + +// Generic casts between builtin types. +#define _CLC_DEFINE_CAST_VEC4(vec4_elem_t, to_t) \ + _CLC_DEF to_t##4 __clc_cast_from_##vec4_elem_t##4_to_##to_t##4( \ + vec4_elem_t##4 from) { \ + return (to_t##4)(from.x, from.y, from.z, from.w); \ + } +#define _CLC_DEFINE_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_DEF to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3( \ + vec4_elem_t##4 from) { \ + return (to_t##3)(from.x, from.y, from.z); \ + } +#define _CLC_DEFINE_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_DEF to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2( \ + vec4_elem_t##4 from) { \ + return (to_t##2)(from.x, from.y); \ + } +#define _CLC_DEFINE_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_DEF to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t( \ + vec4_elem_t##4 from) { \ + return (to_t)from.x; \ + } +#define _CLC_DEFINE_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4( \ + from_t##3 from) { \ + return (vec4_elem_t##4)(from.x, from.y, from.z, 0); \ + } +#define _CLC_DEFINE_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4( \ + from_t##2 from) { \ + return (vec4_elem_t##4)(from.x, from.y, 0, 0); \ + } +#define _CLC_DEFINE_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4( \ + from_t from) { \ + return (vec4_elem_t##4)(from, 0, 0, 0); \ + } + +// Helpers to extract N channel(s) from a four-channel (RGBA/XYZW) color type. + +#define _CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, to_t) + +#define _CLC_DEFINE_EXTRACT_COLOR_HELPERS(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_CAST_SCALAR_TO_VEC4(from_t, to_t) + +// Define casts between supported builtin types for image color + +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, float) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, int) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(int, float) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, uint) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(uint, float) +#ifdef cl_khr_fp16 +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, half) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, short) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(short, half) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, ushort) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(ushort, half) +#endif + +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, short) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(short, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, ushort) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(ushort, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, char) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(char, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, uchar) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uchar, float) +#ifdef cl_khr_fp16 +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, int) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(int, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, uint) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uint, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, char) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(char, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, uchar) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uchar, half) +#endif + +#undef _CLC_DEFINE_EXTRACT_COLOR_HELPERS +#undef _CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS + +#undef _CLC_DEFINE_CAST_SCALAR_TO_VEC4 +#undef _CLC_DEFINE_CAST_VEC2_TO_VEC4 +#undef _CLC_DEFINE_CAST_VEC3_TO_VEC4 +#undef _CLC_DEFINE_CAST_VEC4_TO_SCALAR +#undef _CLC_DEFINE_CAST_VEC4_TO_VEC3 +#undef _CLC_DEFINE_CAST_VEC4_TO_VEC2 +#undef _CLC_DEFINE_CAST_VEC4 +#undef _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4 +#undef _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4 +#undef _CLC_DEFINE_BUILTIN_VEC3_TO_VEC4 +#undef _CLC_DEFINE_BUILTIN_VEC4_TO_SCALAR +#undef _CLC_DEFINE_BUILTIN_VEC4_TO_VEC3 +#undef _CLC_DEFINE_BUILTIN_VEC4_TO_VEC2 +#undef _CLC_DEFINE_BUILTIN_VEC4 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h new file mode 100644 index 0000000000000..eb32fb77d5c2d --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h @@ -0,0 +1,164 @@ +#ifndef CLC_SPIRV_IMAGE_COMMON +#define CLC_SPIRV_IMAGE_COMMON + +#include + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +#ifdef _WIN32 +#define _CLC_MANGLE_FUNC_IMG_HANDLE(namelength, name, prefix, postfix) \ + _Z##namelength##name##prefix##y##postfix +#else +#define _CLC_MANGLE_FUNC_IMG_HANDLE(namelength, name, prefix, postfix) \ + _Z##namelength##name##prefix##m##postfix +#endif + +// The ockl functions/builtins we link against from the ROCm device libs expect +// resources to reside in constant address space. +#if __clang_major__ >= 8 +#define _CLC_CONST_AS __constant +#elif __clang_major__ >= 7 +#define _CLC_CONST_AS __attribute__((address_space(4))) +#else +#define _CLC_CONST_AS __attribute__((address_space(2))) +#endif + +// From +// https://github.com/ROCm/clr/tree/amd-staging/hipamd/include/hip/amd_detail/texture_fetch_functions.h +// defined in "image_common.cl" +extern _CLC_CONST_AS const unsigned int SAMPLER_OBJECT_OFFSET_DWORD; + +// Helpers for casting between two builitin vector types and/or scalar types. + +// Using the builtin as_type() and as_typen() functions to reinterpret types. +// The restriction being is that element "type"s need to be of the same size. +#define _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_DECL to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_DECL to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_DECL to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4( \ + from_t##3 from); + +#define _CLC_DECLARE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4( \ + from_t##2 from); + +#define _CLC_DECLARE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4( \ + from_t from); + +// Generic casts between builtin types. +#define _CLC_DECLARE_CAST_VEC4(vec4_elem_t, to_t) \ + _CLC_DECL to_t##4 __clc_cast_from_##vec4_elem_t##4_to_##to_t##4( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_DECL to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_DECL to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_DECL to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4( \ + from_t##3 from); + +#define _CLC_DECLARE_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4( \ + from_t##2 from); + +#define _CLC_DECLARE_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4( \ + from_t from); + +// Helpers to extract N channel(s) from a four-channel (RGBA/XYZW) color type. + +#define _CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, to_t) + +#define _CLC_DECLARE_EXTRACT_COLOR_HELPERS(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DECLARE_CAST_SCALAR_TO_VEC4(from_t, to_t) + +// Define casts between supported builtin types for image color + +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, float) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, int) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(int, float) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, uint) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(uint, float) +#ifdef cl_khr_fp16 +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, half) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, short) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(short, half) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, ushort) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(ushort, half) +#endif + +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, float) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, short) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(short, float) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, ushort) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(ushort, float) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, char) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(char, float) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, uchar) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(uchar, float) +#ifdef cl_khr_fp16 +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, half) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, int) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(int, half) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, uint) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(uint, half) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, char) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(char, half) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, uchar) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(uchar, half) +#endif + +#undef _CLC_DECLARE_EXTRACT_COLOR_HELPERS +#undef _CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS + +#undef _CLC_DECLARE_CAST_SCALAR_TO_VEC4 +#undef _CLC_DECLARE_CAST_VEC2_TO_VEC4 +#undef _CLC_DECLARE_CAST_VEC3_TO_VEC4 +#undef _CLC_DECLARE_CAST_VEC4_TO_SCALAR +#undef _CLC_DECLARE_CAST_VEC4_TO_VEC3 +#undef _CLC_DECLARE_CAST_VEC4_TO_VEC2 +#undef _CLC_DECLARE_CAST_VEC4 +#undef _CLC_DECLARE_BUILTIN_CAST_SCALAR_TO_VEC4 +#undef _CLC_DECLARE_BUILTIN_CAST_VEC2_TO_VEC4 +#undef _CLC_DECLARE_BUILTIN_VEC3_TO_VEC4 +#undef _CLC_DECLARE_BUILTIN_VEC4_TO_SCALAR +#undef _CLC_DECLARE_BUILTIN_VEC4_TO_VEC3 +#undef _CLC_DECLARE_BUILTIN_VEC4_TO_VEC2 +#undef _CLC_DECLARE_BUILTIN_VEC4 + +#endif // CLC_SPIRV_IMAGE_COMMON diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 72841724fa01d..83fb3ad6652b0 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/GeorgeWeb/unified-runtime.git") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 8860fc1dd9fb1..f20369fb6eb31 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -4,4 +4,4 @@ # Date: Thu Dec 19 11:26:01 2024 +0000 # Merge pull request #2277 from igchor/cooperative_fix # [Spec] fix urKernelSuggestMaxCooperativeGroupCountExp -set(UNIFIED_RUNTIME_TAG ea0f3a1f5f15f9af7bf40bd13669afeb9ada569c) +set(UNIFIED_RUNTIME_TAG georgi/bindless-hip) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index 130e54b386cda..3d1b2484b9a77 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -2039,7 +2039,7 @@ void release_external_semaphore(external_semaphore semaphoreHandle, ```cpp #include -include::../../../test-e2e/bindless_images/examples/example_1_1D_read_write.cpp[lines=9..-1] +include::../../../test-e2e/bindless_images/examples/example_1_1D_read_write.cpp[lines=12..-1] ``` === Reading from a dynamically sized array of 2D images @@ -2055,14 +2055,14 @@ include::../../../test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cp ```cpp #include -include::../../../test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp[lines=9..-1] +include::../../../test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp[lines=10..-1] ``` === 1D image array read/write ```cpp #include -include::../../../test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp[lines=9..-1] +include::../../../test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp[lines=14..-1] ``` === Sampling a cubemap @@ -2070,7 +2070,7 @@ include::../../../test-e2e/bindless_images/examples/example_4_1D_array_read_writ ```c++ #include -include::../../../test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp[lines=9..-1] +include::../../../test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp[lines=10..-1] ``` === Using imported memory and semaphore objects @@ -2078,7 +2078,7 @@ include::../../../test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp ```c++ #include -include::../../../test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp[lines=8..-1] +include::../../../test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp[lines=14..-1] ``` == Implementation notes diff --git a/sycl/test-e2e/bindless_images/3_channel_format.cpp b/sycl/test-e2e/bindless_images/3_channel_format.cpp index a3668f4f31973..c82544f01dc53 100644 --- a/sycl/test-e2e/bindless_images/3_channel_format.cpp +++ b/sycl/test-e2e/bindless_images/3_channel_format.cpp @@ -1,4 +1,8 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip || level_zero +// UNSUPPORTED-INTENDED: Unimplemented in the HIP adapter yet. +// Also, the feature is not fully implemented in the Level Zero stack. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/array/fetch_sampled_array.cpp b/sycl/test-e2e/bindless_images/array/fetch_sampled_array.cpp index 723c7233ea9a8..c6a1fad34d187 100644 --- a/sycl/test-e2e/bindless_images/array/fetch_sampled_array.cpp +++ b/sycl/test-e2e/bindless_images/array/fetch_sampled_array.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_image_array // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/array/read_sampled_array.cpp b/sycl/test-e2e/bindless_images/array/read_sampled_array.cpp index 65aabee94c242..f093910e3bfda 100644 --- a/sycl/test-e2e/bindless_images/array/read_sampled_array.cpp +++ b/sycl/test-e2e/bindless_images/array/read_sampled_array.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_image_array // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -137,7 +137,17 @@ static bool runTest(sycl::range dims, sycl::range localSize, unsigned int seed = 0) { using VecType = sycl::vec; - sycl::device dev; + sycl::device dev{}; + // skip half tests if the device does not support the aspect. + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + sycl::queue q(dev); auto ctxt = q.get_context(); diff --git a/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp b/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp index 4815661efc2d2..47d84af028293 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp @@ -1,7 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_image_array -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out +// RUN: %if !any-device-is-hip %{ %{build} -o %t.out %} +// RUN: %if !any-device-is-hip %{ %{run} %t.out %} #include #include diff --git a/sycl/test-e2e/bindless_images/array/read_write_2d_subregion.cpp b/sycl/test-e2e/bindless_images/array/read_write_2d_subregion.cpp index 1b72a57bed47c..1bae687b08dd1 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_2d_subregion.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_2d_subregion.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_image_array // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp index ffc6a34db202a..7f0db5936adce 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp @@ -1,5 +1,4 @@ -// REQUIRES: linux -// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: aspect-ext_oneapi_image_array // RUN: %{build} -o %t.out // RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp index d228b308ab72e..2b63336c907f5 100644 --- a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp @@ -1,6 +1,4 @@ -// REQUIRES: cuda,aspect-ext_oneapi_cubemap // REQUIRES: aspect-ext_oneapi_cubemap_seamless_filtering -// REQUIRES: build-and-run-mode // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp index d74b728593ce2..5f2ef75faafd1 100644 --- a/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda,aspect-ext_oneapi_cubemap +// REQUIRES: aspect-ext_oneapi_cubemap // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy.cpp index 4a9263e44a13e..f6d091feb1a43 100644 --- a/sycl/test-e2e/bindless_images/device_to_device_copy.cpp +++ b/sycl/test-e2e/bindless_images/device_to_device_copy.cpp @@ -1,4 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in this test. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp index 250195358011a..43884b58b2cc4 100644 --- a/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp @@ -1,4 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in this test. // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -15,9 +18,9 @@ namespace syclexp = sycl::ext::oneapi::experimental; void copy_image_mem_handle_to_image_mem_handle( const syclexp::image_descriptor &dataInDesc, - const syclexp::image_descriptor &outDesc, - const std::vector &dataIn1, const std::vector &dataIn2, - sycl::device dev, sycl::queue q, std::vector &out) { + const syclexp::image_descriptor &outDesc, const std::vector &dataIn1, + const std::vector &dataIn2, sycl::device dev, sycl::queue q, + std::vector &out) { // Check that output image is double size of input images assert(outDesc.width == dataInDesc.width * 2); diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp index 0dea97a3f745e..1c5be23383f80 100644 --- a/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp index 680814bf6be77..3121affc105fd 100644 --- a/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp index 85bde0e9a0ee5..e9e9be06bbf71 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp @@ -1,6 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: windows -// XFAIL: * + +// XFAIL: run-mode // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15851 // RUN: %{build} -l d3d12 -l dxgi -l dxguid -o %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp index cc7425a1e585c..beef3c7fcf09c 100644 --- a/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp @@ -1,4 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp b/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp index f98c4c4fb073a..75f0b4c4a1061 100644 --- a/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp b/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp index d46eb88fa084c..c4fad4c74ee9c 100644 --- a/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp @@ -1,4 +1,5 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap +// REQUIRES: aspect-ext_oneapi_mipmap_anisotropy // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp b/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp index 69fb804f977c5..cba3d324158e4 100644 --- a/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp @@ -1,4 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp index c6ba9b48fad52..73c011f87e93d 100644 --- a/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_cubemap // REQUIRES: build-and-run-mode // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp b/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp index d555b73d49aac..7c7443f9e3ed5 100644 --- a/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp @@ -1,6 +1,12 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_external_memory_import +// REQUIRES: aspect-ext_oneapi_external_semaphore_import // RUN: %{build} -o %t.out +// This test is not being executed via the {run} command due to using invalid +// external input and output file descriptors for the external resource that is +// being imported. The purpose of this test is to showcase the interop APIs and +// in order to properly obtain those descriptors we would need a lot of Vulkan +// context and texture setup as a prerequisite to the example and complicate it. #include #include diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp index 8c36f431ba942..a7ea4825d8494 100644 --- a/sycl/test-e2e/bindless_images/image_get_info.cpp +++ b/sycl/test-e2e/bindless_images/image_get_info.cpp @@ -1,4 +1,8 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip || level_zero +// UNSUPPORTED-INTENDED: Image channels queries not working correctly on HIP. +// Also, the feature is not fully implemented in the Level Zero stack. // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp index 3f95d87f39b1b..39a81d68190c3 100644 --- a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp index a6e185a2b112c..49011973ef089 100644 --- a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp index 7f3ee74cdc949..778dd187aa2d9 100644 --- a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/read_norm_types.cpp b/sycl/test-e2e/bindless_images/read_norm_types.cpp index 80dace1ba0a53..9eec60c61178c 100644 --- a/sycl/test-e2e/bindless_images/read_norm_types.cpp +++ b/sycl/test-e2e/bindless_images/read_norm_types.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Returning non fp[32/16] values from sampling fails. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/read_sampled.cpp b/sycl/test-e2e/bindless_images/read_sampled.cpp index 3e786855381e0..e0b3c3a109a25 100644 --- a/sycl/test-e2e/bindless_images/read_sampled.cpp +++ b/sycl/test-e2e/bindless_images/read_sampled.cpp @@ -1,4 +1,8 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip || level_zero +// UNSUPPORTED-INTENDED: Returning non-FP values from fetching fails on HIP. +// Also, the feature is not fully implemented in the Level Zero stack. // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out @@ -127,7 +131,17 @@ static bool runTest(sycl::range dims, sycl::range localSize, unsigned int seed = 0) { using VecType = sycl::vec; - sycl::device dev; + sycl::device dev{}; + // skip half tests if not supported + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + sycl::queue q(dev); auto ctxt = q.get_context(); diff --git a/sycl/test-e2e/bindless_images/read_write_1D.cpp b/sycl/test-e2e/bindless_images/read_write_1D.cpp index e42f234a07642..ab148eee7f788 100644 --- a/sycl/test-e2e/bindless_images/read_write_1D.cpp +++ b/sycl/test-e2e/bindless_images/read_write_1D.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp index b768e9eb668eb..6b48b056c66cb 100644 --- a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp index 1c8157d9760bd..860f8ae9377e4 100644 --- a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp index 77f3ec299b7c1..f684075f1dcca 100644 --- a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Returning non fp[32/16] values from sampling fails. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp index 965dc9f00c1c4..5f537ea8855cb 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp @@ -1,4 +1,3 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_1d_usm // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp index 0a6da2d97f136..4518832215e9e 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp @@ -1,4 +1,3 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_2d // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp index 834ec5b6e8c79..2dccb63645abf 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp @@ -1,4 +1,3 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_2d_usm // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp index ccb096dbfbdc5..0cffd4ef864a6 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp @@ -1,4 +1,3 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_3d // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_2D.cpp b/sycl/test-e2e/bindless_images/sampling_2D.cpp index 316eebc0ace2c..2e253201f7713 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp index 96007abe1b511..aa98be888d7be 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp @@ -1,6 +1,12 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_images_shared_usm +// This test is unstable (sometimes passes) on HIP-AMD platforms. +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: While rarely, urBindlessImagesSampledImageCreateExp for +// USM image memory type (with linear sampler) sometimes returns an unsupported +// feature result code (1:1 mapping from the native errc from the HIP runtime). +// We think this is likely an issue in the ROCm drivers(could be arch-specific). + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp index f23fc4c470889..9e0c3a4e8f820 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp @@ -1,6 +1,13 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-fp16 +// This test is unstable (sometimes passes) on HIP-AMD platforms. +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: While rarely, urBindlessImagesSampledImageCreateExp for +// USM image memory type (with linear sampler) sometimes returns an unsupported +// feature result code (1:1 mapping from the native errc from the HIP runtime). +// We think this is likely an issue in the ROCm drivers(could be arch-specific). + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_3D.cpp b/sycl/test-e2e/bindless_images/sampling_3D.cpp index bf11c21191013..da6346e54a5d4 100644 --- a/sycl/test-e2e/bindless_images/sampling_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampling_3D.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: arch-amd_gpu_gfx90a +// UNSUPPORTED-INTENDED: AMD gfx90a devices don't support 3D linear filter mode + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_unique_addr_modes.cpp b/sycl/test-e2e/bindless_images/sampling_unique_addr_modes.cpp index 895f7082adce6..17c8cd71000da 100644 --- a/sycl/test-e2e/bindless_images/sampling_unique_addr_modes.cpp +++ b/sycl/test-e2e/bindless_images/sampling_unique_addr_modes.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_unique_addressing_per_dim // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/user_types/mipmap_read_user_type_2D.cpp b/sycl/test-e2e/bindless_images/user_types/mipmap_read_user_type_2D.cpp index 77913a2836565..087b341bb74f8 100644 --- a/sycl/test-e2e/bindless_images/user_types/mipmap_read_user_type_2D.cpp +++ b/sycl/test-e2e/bindless_images/user_types/mipmap_read_user_type_2D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/user_types/read_write_user_type.cpp b/sycl/test-e2e/bindless_images/user_types/read_write_user_type.cpp index db9347f9895e6..d855ff43113bc 100644 --- a/sycl/test-e2e/bindless_images/user_types/read_write_user_type.cpp +++ b/sycl/test-e2e/bindless_images/user_types/read_write_user_type.cpp @@ -1,4 +1,8 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip || level_zero +// UNSUPPORTED-INTENDED: Returning non-FP values from fetching fails on HIP. +// Also, the feature is not fully implemented in the Level Zero stack. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp index 57623c1b2da03..412d6e6f2c3cb 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp @@ -1,4 +1,5 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_external_memory_import +// REQUIRES: aspect-ext_oneapi_mipmap // REQUIRES: vulkan // REQUIRES: build-and-run-mode diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp index 328c01355599d..fdafcdbeef4a9 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda || (windows && level_zero && aspect-ext_oneapi_bindless_images) +// REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images) // REQUIRES: vulkan // REQUIRES: build-and-run-mode diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images_USM.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images_USM.cpp index 159b1c9ec1b44..47b1316e96360 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images_USM.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images_USM.cpp @@ -1,4 +1,5 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: aspect-ext_oneapi_external_memory_import // REQUIRES: vulkan // REQUIRES: build-and-run-mode diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index b93ac2f55c47e..90acb67737b74 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda || (windows && level_zero && aspect-ext_oneapi_bindless_images) +// REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images) // REQUIRES: vulkan // REQUIRES: build-and-run-mode diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp index af9163311727c..b1778d4da32cc 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp @@ -1,4 +1,5 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_external_semaphore_import +// REQUIRES: aspect-ext_oneapi_external_memory_import // REQUIRES: vulkan // REQUIRES: build-and-run-mode