From 037e57d1a109d69fd989d865cb4c6c3dcb788ae3 Mon Sep 17 00:00:00 2001 From: Josh Fromm Date: Mon, 13 Jan 2025 11:17:59 -0800 Subject: [PATCH] Update GenAI CK Version Summary: This diff pulls in the latest from the develop branch of CK. There have been a few small API changes that we also accommodate in fbgemm. It looks like AMD also fixed a bunch of their compilation issues so we were able to re-add a bunch of files to the ck-library target. I dont see any performance or accuracy regressions from this bump in fbgemm kernels. Differential Revision: D68113590 --- .../quantize/ck_extensions/bf16_grouped/bf16_grouped_gemm.hip | 2 +- .../ck_extensions/bf16_grouped/kernels/bf16_grouped_common.h | 2 +- .../fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip | 2 +- .../fp8_rowwise_grouped/kernels/fp8_rowwise_grouped_common.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/bf16_grouped_gemm.hip b/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/bf16_grouped_gemm.hip index a00564b906..012884f09e 100644 --- a/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/bf16_grouped_gemm.hip +++ b/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/bf16_grouped_gemm.hip @@ -28,7 +28,7 @@ namespace fbgemm_gpu { // Define useful types that are needed for various kernels. using KernelArguments = - ck::tensor_operation::device::GroupedGemmTileLoopKernelArguments<0>; + ck::tensor_operation::device::GroupedGemmKernelArgument<0>; using ADataType = ck::bhalf_t; using BDataType = ck::bhalf_t; using CDataType = ck::bhalf_t; diff --git a/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/kernels/bf16_grouped_common.h b/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/kernels/bf16_grouped_common.h index 05aa80b6c5..dcf49d5cec 100644 --- a/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/kernels/bf16_grouped_common.h +++ b/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/kernels/bf16_grouped_common.h @@ -125,7 +125,7 @@ std::vector bf16_grouped_impl( // Get input information. int group_count = A.size(); using KernelArguments = - ck::tensor_operation::device::GroupedGemmTileLoopKernelArguments<0>; + ck::tensor_operation::device::GroupedGemmKernelArgument<0>; using GemmDesc = ck::tensor_operation::device::GemmDesc; // Create gemm shape containers. std::vector gemm_descs; diff --git a/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip b/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip index e053eebb00..343a98c8d2 100644 --- a/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip +++ b/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip @@ -29,7 +29,7 @@ namespace fbgemm_gpu { // Define useful types that are needed for various kernels. using KernelArguments = - ck::tensor_operation::device::GroupedGemmTileLoopKernelArguments<2>; + ck::tensor_operation::device::GroupedGemmKernelArgument<2>; using ADataType = ck::f8_t; using BDataType = ck::f8_t; using D0DataType = float; diff --git a/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped_common.h b/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped_common.h index 806adabca2..894a10eef7 100644 --- a/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped_common.h +++ b/fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped_common.h @@ -133,7 +133,7 @@ std::vector f8f8bf16_rowwise_grouped_impl( // Get input information. int group_count = XQ.size(); using KernelArguments = - ck::tensor_operation::device::GroupedGemmTileLoopKernelArguments<2>; + ck::tensor_operation::device::GroupedGemmKernelArgument<2>; using GemmDesc = ck::tensor_operation::device::GemmDesc; // Create gemm shape containers. std::vector gemm_descs;