Skip to content

Commit

Permalink
Define and optimize RDNA1
Browse files Browse the repository at this point in the history
  • Loading branch information
daniandtheweb committed Jul 3, 2024
1 parent e112b61 commit 1d1754f
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
4 changes: 4 additions & 0 deletions ggml-cuda/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ typedef float2 dfloat2;
#define RDNA2
#endif

#if defined(__gfx1010__) || defined(__gfx1012__)
#define RDNA1
#endif

#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
Expand Down
4 changes: 2 additions & 2 deletions ggml-cuda/mmq.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1972,9 +1972,9 @@ static __device__ void mul_mat_q_process_tile(

template <ggml_type type, int mmq_x, int nwarps, bool need_check>
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
#if defined(RDNA3) || defined(RDNA2)
#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
__launch_bounds__(WARP_SIZE*nwarps, 2)
#endif // defined(RDNA3) || defined(RDNA2)
#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
#else
#if __CUDA_ARCH__ >= CC_VOLTA
__launch_bounds__(WARP_SIZE*nwarps, 1)
Expand Down

0 comments on commit 1d1754f

Please sign in to comment.