diff --git a/src/interface/blas3/backend/amd_gpu.hpp b/src/interface/blas3/backend/amd_gpu.hpp index c8748829d..f61f60cd0 100644 --- a/src/interface/blas3/backend/amd_gpu.hpp +++ b/src/interface/blas3/backend/amd_gpu.hpp @@ -45,13 +45,13 @@ _gemm(sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K, if constexpr (s_a && s_b || ((s_a && _t_b) || (s_b && _t_a))) { return _dependencies; } else { - // computing arithmetic intensity with combination of input to use it as + // computing arithmetic ratio with combination of input to use it as // heuristic numerator is the number of fma, denominator is the number of // bytes access. const auto n_fma = (static_cast(_M) * static_cast(_K) * static_cast(_N)); const auto n_bytes_access = (_M * _K + _K * _N + _M * _N); - const auto arith_intensity = n_fma / n_bytes_access; + const auto arith_ratio = n_fma / n_bytes_access; static constexpr int ClSize = 64; static constexpr int tileWgSize = ClSize / sizeof(element_t); if (batch_type == gemm_batch_type_t::interleaved) { @@ -132,10 +132,10 @@ _gemm(sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K, } else #endif // GEMM_TALL_SKINNY_SUPPORT // Following configurations are taken using the auto tuner on amd-mi210 - // and divided following their arith_intensity or another ratio between _N + // and divided following their arith_ratio or another ratio between _N // and _K input size if ((_N >> 4) > _K) { - if (arith_intensity <= 100) { + if (arith_ratio <= 100) { return blas::Gemm_Launcher< container_0_t, container_1_t, container_2_t, 256, false, false, true, 64, Tile<4, 8, 16, 16>, _t_a, _t_b, s_a, s_b, @@ -158,7 +158,7 @@ _gemm(sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K, _stridea, _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, batch_size, _dependencies); } - } else if (arith_intensity >= 360) { + } else if (arith_ratio >= 360) { return blas::Gemm_Launcher< container_0_t, container_1_t, container_2_t, 256, false, false, true, 32, Tile<8, 8, 16, 16>, _t_a, _t_b, s_a, s_b, @@ -169,7 +169,7 @@ _gemm(sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K, template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea, _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, batch_size, _dependencies); - } else if (arith_intensity >= 240) { + } else if (arith_ratio >= 240) { return blas::Gemm_Launcher< container_0_t, container_1_t, container_2_t, 256, false, false, true, 64, Tile<4, 4, 16, 8>, _t_a, _t_b, s_a, s_b, @@ -180,7 +180,7 @@ _gemm(sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K, template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea, _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, batch_size, _dependencies); - } else if (arith_intensity > 162) { + } else if (arith_ratio > 162) { return blas::Gemm_Launcher< container_0_t, container_1_t, container_2_t, 256, false, false, true, 128, Tile<4, 4, 16, 8>, _t_a, _t_b, s_a, s_b, @@ -191,7 +191,7 @@ _gemm(sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K, template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea, _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, batch_size, _dependencies); - } else if (arith_intensity >= 100 && arith_intensity <= 162) { + } else if (arith_ratio >= 100 && arith_ratio <= 162) { return blas::Gemm_Launcher< container_0_t, container_1_t, container_2_t, 256, false, true, true, 128, Tile<2, 2, 16, 8>, _t_a, _t_b, s_a, s_b, @@ -202,7 +202,7 @@ _gemm(sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K, template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea, _b, _ldb, _strideb, _beta, _c, _ldc, _stridec, batch_size, _dependencies); - } else if (arith_intensity <= 100) { + } else if (arith_ratio <= 100) { return blas::Gemm_Launcher< container_0_t, container_1_t, container_2_t, 256, false, false, true, 128, Tile<1, 1, 16, 8>, _t_a, _t_b, s_a, s_b,