diff --git a/aten/src/ATen/native/vulkan/api/Tensor.cpp b/aten/src/ATen/native/vulkan/api/Tensor.cpp index b74c463c0d8c41..0e1f385066ed84 100644 --- a/aten/src/ATen/native/vulkan/api/Tensor.cpp +++ b/aten/src/ATen/native/vulkan/api/Tensor.cpp @@ -7,36 +7,6 @@ namespace vulkan { namespace { -/** - * Determines an appropriate GPU Memory Layout qualifier based on the the - * StorageType requested and the c10::MemoryFormat specified. - */ -api::GPUMemoryLayout get_gpu_memory_layout( - const api::StorageType storage_type, - const c10::MemoryFormat memory_format) { - if (storage_type == api::StorageType::BUFFER) { - switch (memory_format) { - case c10::MemoryFormat::Contiguous: - return api::GPUMemoryLayout::TENSOR_WIDTH_PACKED; - case c10::MemoryFormat::ChannelsLast: - return api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED; - default: - VK_THROW("Invalid memory format used to create vTensor!"); - } - } - // For texture storage, always return a memory layout that packs the channels - // dimension. for now. With the way texture storage currently works, for 2-dim - // tensors, a channel dimension is added, as well as 3 channels of zero - // padding resulting in a final shape of {4, H, W}. For 1-dim tensors, it is - // unsqueezed to size {1, 1, L} and 3 channels of zero padding are added to - // produce a final size of {4, 1, L}. This is to ensure that physical texture - // positions correspond directly to logical tensor coordinates (so - // texelFetch(ivec3(x, y, 0), 0) will correspond to tensor[y, x]. - // - // TODO(ssjia): have 2D and 1D tensors use TENSOR_WIDTH_PACKED by default. - return api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED; -} - /* * Calculates the strides of a contiguous tensor. empty_tensor_restride from * TensorImpl.h was used as a reference. @@ -423,36 +393,6 @@ vTensor::vTensor( gpu_sizes_, dtype_)) {} -vTensor::vTensor( - api::Context* const context, - const std::vector& sizes, - const api::ScalarType dtype, - const api::StorageType storage_type, - const c10::MemoryFormat memory_format) - : vTensor( - context, - sizes, - dtype, - storage_type, - get_gpu_memory_layout(storage_type, memory_format)) {} - -vTensor::vTensor( - api::Context* const context, - const std::vector& sizes, - double q_scale, - int64_t q_zero_point, - const api::ScalarType dtype, - const api::StorageType storage_type, - const c10::MemoryFormat memory_format) - : vTensor( - context, - sizes, - q_scale, - q_zero_point, - dtype, - storage_type, - get_gpu_memory_layout(storage_type, memory_format)) {} - api::VulkanImage& vTensor::image( api::PipelineBarrier& pipeline_barrier, const api::PipelineStageFlags stage) const& { diff --git a/aten/src/ATen/native/vulkan/api/Tensor.h b/aten/src/ATen/native/vulkan/api/Tensor.h index ad6909119ae943..a7af6af5e4c320 100644 --- a/aten/src/ATen/native/vulkan/api/Tensor.h +++ b/aten/src/ATen/native/vulkan/api/Tensor.h @@ -6,7 +6,6 @@ #include #include -#include namespace at { namespace native { @@ -91,8 +90,9 @@ class vTensor final { api::Context* context, const std::vector& sizes, const api::ScalarType dtype, - const api::StorageType storage_type, - const api::GPUMemoryLayout memory_layout); + const api::StorageType storage_type = api::StorageType::TEXTURE_3D, + const api::GPUMemoryLayout memory_layout = + api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED); // Default constructor for quantized vTensor vTensor( @@ -101,26 +101,9 @@ class vTensor final { double q_scale, int64_t q_zero_point, const api::ScalarType dtype, - const api::StorageType storage_type, - const api::GPUMemoryLayout memory_layout); - - // Allows construction of vTensor from aten Tensor params - vTensor( - api::Context* context, - const std::vector& sizes, - const api::ScalarType dtype = api::kFloat, - const api::StorageType storage_type = api::StorageType::TEXTURE_3D, - const c10::MemoryFormat memory_format = c10::MemoryFormat::Contiguous); - - // Allows construction of quantized vTensor from aten Tensor params - vTensor( - api::Context* const context, - const std::vector& sizes, - double q_scale, - int64_t q_zero_point, - const api::ScalarType dtype = api::kQUInt8, const api::StorageType storage_type = api::StorageType::TEXTURE_3D, - const c10::MemoryFormat memory_format = c10::MemoryFormat::Contiguous); + const api::GPUMemoryLayout memory_layout = + api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED); // Copy Constructor and Assignment; Ideally copying would be disabled // (see the reasoning for move assignment below) but it is required for diff --git a/aten/src/ATen/native/vulkan/graph/Staging.h b/aten/src/ATen/native/vulkan/graph/Staging.h index dbe2d70145c874..e3a04620247bc7 100644 --- a/aten/src/ATen/native/vulkan/graph/Staging.h +++ b/aten/src/ATen/native/vulkan/graph/Staging.h @@ -2,6 +2,8 @@ #ifdef USE_VULKAN_API +#include + #include namespace at { diff --git a/aten/src/ATen/native/vulkan/ops/Common.h b/aten/src/ATen/native/vulkan/ops/Common.h index 7549e500095577..3cf0932240f5b1 100644 --- a/aten/src/ATen/native/vulkan/ops/Common.h +++ b/aten/src/ATen/native/vulkan/ops/Common.h @@ -2,6 +2,8 @@ #ifdef USE_VULKAN_API +#include + #include #include #include diff --git a/aten/src/ATen/native/vulkan/ops/Convert.h b/aten/src/ATen/native/vulkan/ops/Convert.h index fec6a281dcb516..d5d65434b26b30 100644 --- a/aten/src/ATen/native/vulkan/ops/Convert.h +++ b/aten/src/ATen/native/vulkan/ops/Convert.h @@ -12,6 +12,36 @@ namespace native { namespace vulkan { namespace ops { +/** + * Determines an appropriate GPU Memory Layout qualifier based on the the + * StorageType requested and the c10::MemoryFormat specified. + */ +inline api::GPUMemoryLayout get_gpu_memory_layout( + const api::StorageType storage_type, + const c10::MemoryFormat memory_format) { + if (storage_type == api::StorageType::BUFFER) { + switch (memory_format) { + case c10::MemoryFormat::Contiguous: + return api::GPUMemoryLayout::TENSOR_WIDTH_PACKED; + case c10::MemoryFormat::ChannelsLast: + return api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED; + default: + VK_THROW("Invalid memory format used to create vTensor!"); + } + } + // For texture storage, always return a memory layout that packs the channels + // dimension. for now. With the way texture storage currently works, for 2-dim + // tensors, a channel dimension is added, as well as 3 channels of zero + // padding resulting in a final shape of {4, H, W}. For 1-dim tensors, it is + // unsqueezed to size {1, 1, L} and 3 channels of zero padding are added to + // produce a final size of {4, 1, L}. This is to ensure that physical texture + // positions correspond directly to logical tensor coordinates (so + // texelFetch(ivec3(x, y, 0), 0) will correspond to tensor[y, x]. + // + // TODO(ssjia): have 2D and 1D tensors use TENSOR_WIDTH_PACKED by default. + return api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED; +} + /* * Converts a `c10::ScalarType` to an equivalent * `::at::native::vulkan::api::ScalarType`. diff --git a/aten/src/ATen/native/vulkan/ops/Copy.cpp b/aten/src/ATen/native/vulkan/ops/Copy.cpp index 890d0650783205..60bc3a341ba0df 100644 --- a/aten/src/ATen/native/vulkan/ops/Copy.cpp +++ b/aten/src/ATen/native/vulkan/ops/Copy.cpp @@ -282,7 +282,7 @@ vTensor to_vulkan(at::Tensor& src, const api::StorageType storage_type) { src.sizes().vec(), convert_dtype(src.scalar_type()), storage_type, - src.suggest_memory_format(), + get_gpu_memory_layout(storage_type, src.suggest_memory_format()), }; ops::pack_cpu_to_vulkan(src, v_ret); diff --git a/aten/src/ATen/native/vulkan/ops/Factory.cpp b/aten/src/ATen/native/vulkan/ops/Factory.cpp index 177745dbf485db..b746868c238fd1 100644 --- a/aten/src/ATen/native/vulkan/ops/Factory.cpp +++ b/aten/src/ATen/native/vulkan/ops/Factory.cpp @@ -15,14 +15,16 @@ Tensor _empty_affine_quantized( const double scale, const int64_t zero_point, const optional memory_format) { + api::StorageType storage_type = api::StorageType::TEXTURE_3D; return convert_quantized(vTensor{ api::context(), sizes.vec(), scale, zero_point, convert_dtype(dtype ? *dtype : c10::kFloat), - api::StorageType::TEXTURE_3D, - memory_format ? *memory_format : c10::MemoryFormat::Contiguous, + storage_type, + memory_format ? get_gpu_memory_layout(storage_type, *memory_format) + : api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, }); } @@ -33,12 +35,14 @@ Tensor empty_memory_format( const c10::optional device, const c10::optional pin_memory, const optional memory_format) { + api::StorageType storage_type = api::StorageType::TEXTURE_3D; return convert(vTensor{ api::context(), sizes.vec(), convert_dtype(dtype ? *dtype : c10::kFloat), - api::StorageType::TEXTURE_3D, - memory_format ? *memory_format : c10::MemoryFormat::Contiguous, + storage_type, + memory_format ? get_gpu_memory_layout(storage_type, *memory_format) + : api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, }); }