From 5d7bd718cd72d9407dddd2af4fc55946ed92f11d Mon Sep 17 00:00:00 2001
From: trivedivivek <5340687+trivedivivek@users.noreply.github.com>
Date: Tue, 22 Oct 2024 08:40:20 -0500
Subject: [PATCH] Reduced int precision for texture coordinates in q_linear op,
 to reduce shader register pressure.

Differential Revision: D64191093

Pull Request resolved: https://github.com/pytorch/executorch/pull/6354
---
 .../vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl
index 02cae3ed98..624878a17c 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl
@@ -89,13 +89,15 @@ void main() {
 
 #else // USING_TEXTURE
 
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+
 VEC4_T q_8w_linear(const ivec3 out_pos, const int K) {
-  ivec3 mat1_pos = ivec3(0, out_pos.yz);
-  ivec3 qmat2_pos = ivec3(0, out_pos.x * 4, 0);
+  u16vec3 mat1_pos = u16vec3(0, out_pos.yz);
+  u16vec3 qmat2_pos = u16vec3(0, out_pos.x * 4, 0);
 
   VEC4_T outtex = VEC4_T(0);
 
-  const ivec3 scales_pos = ivec3(out_pos.x, 0, 0);
+  const u16vec3 scales_pos = u16vec3(out_pos.x, 0, 0);
   const VEC4_T scales = load_texel(t_scales, scales_pos);
 
   for (int i = 0; i < K; i += 4) {
@@ -104,11 +106,11 @@ VEC4_T q_8w_linear(const ivec3 out_pos, const int K) {
     const VEC4_T sums = VEC4_T(
         dot(mat1_tex, load_texel(t_qmat2, qmat2_pos) * scales.x),
         dot(mat1_tex,
-            load_texel(t_qmat2, qmat2_pos + ivec3(0, 1, 0)) * scales.y),
+            load_texel(t_qmat2, qmat2_pos + u16vec3(0, 1, 0)) * scales.y),
         dot(mat1_tex,
-            load_texel(t_qmat2, qmat2_pos + ivec3(0, 2, 0)) * scales.z),
+            load_texel(t_qmat2, qmat2_pos + u16vec3(0, 2, 0)) * scales.z),
         dot(mat1_tex,
-            load_texel(t_qmat2, qmat2_pos + ivec3(0, 3, 0)) * scales.w));
+            load_texel(t_qmat2, qmat2_pos + u16vec3(0, 3, 0)) * scales.w));
 
     outtex += sums;