Skip to content

Commit

Permalink
ggml-cuda : remove some superfluous comments for TQ2_0 tile loading
Browse files Browse the repository at this point in the history
  • Loading branch information
compilade committed Jan 10, 2025
1 parent 983aa09 commit f5fddb6
Showing 1 changed file with 0 additions and 2 deletions.
2 changes: 0 additions & 2 deletions ggml/src/ggml-cuda/mmq.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1848,13 +1848,11 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
#ifdef INT8_MMA_AVAILABLE
x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k] = q;
#else
// NOTE: this might assume WARP_SIZE is >= 32
x_qs[i*(2*WARP_SIZE + 1) + k] = q;
#endif // INT8_MMA_AVAILABLE
}
}

// TODO: does this work with WARP_SIZE != 32?
#pragma unroll
for (int i0 = 0; i0 < mmq_y; i0 += nwarps * WARP_SIZE/(QI2_0/2)) {
int i = i0 + threadIdx.y*(2*WARP_SIZE/QI2_0) + threadIdx.x/(QI2_0/2);
Expand Down

0 comments on commit f5fddb6

Please sign in to comment.