Skip to content

Commit

Permalink
HIP: Only call rocblas_initialize on rocblas versions with the multip…
Browse files Browse the repository at this point in the history
…le instantation bug (#11080)

This disables the workaround on rocblas fixed versions (>=4.0.0) to eliminate the runtime cost and unnecessary VRAM allocation of loading all tensile objects.
  • Loading branch information
sARY77 authored Jan 28, 2025
1 parent 7fee288 commit cae9fb4
Showing 1 changed file with 20 additions and 2 deletions.
22 changes: 20 additions & 2 deletions ggml/src/ggml-cuda/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <algorithm>
#include <array>
#include <atomic>
#include <charconv>
#include <cinttypes>
#include <cstddef>
#include <cstdint>
Expand Down Expand Up @@ -172,8 +173,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
#ifdef __HIP_PLATFORM_AMD__
// Workaround for a rocBLAS bug when using multiple graphics cards:
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
rocblas_initialize();
CUDA_CHECK(cudaDeviceSynchronize());
{
int major_version = 0;
size_t version_length = 0;
if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
std::string version(version_length, '\0');
if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
version.resize(::strlen(version.c_str()));
int parsed_value = 0;
if (std::from_chars(version.c_str(), version.c_str() + version.length(), parsed_value).ec == std::errc()) {
major_version = parsed_value;
}
}
}
if (major_version < 4) {
GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
rocblas_initialize();
CUDA_CHECK(cudaDeviceSynchronize());
}
}
#endif

ggml_cuda_device_info info = {};
Expand Down

0 comments on commit cae9fb4

Please sign in to comment.