Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ggml: Add run-time detection of neon, i8mm and sve #9331

Merged
merged 3 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2507,6 +2507,9 @@ extern "C" {
GGML_API int ggml_cpu_has_cann (void);
GGML_API int ggml_cpu_has_llamafile (void);

// get the sve vector length in bytes
GGML_API int ggml_cpu_get_sve_cnt(void);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is better to not be facing the public API. Will merge this for now, but consider making it private.


//
// Internal types and functions exposed for tests and benchmarks
//
Expand Down
13 changes: 2 additions & 11 deletions ggml/src/ggml-aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -598,15 +598,6 @@ size_t quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_
return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
}

// Return the number of byte lanes in the SVE vector if SVE is supported; otherwise, returns 0 if SVE is not supported.
static int sve_lane_count(void) {
#if defined(__ARM_FEATURE_SVE)
return ggml_sve_cnt_b;
#else
return 0;
#endif
}

void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, const void * restrict vy, int nr, int nc) {
const int qk = QK8_0;
const int nb = n / qk;
Expand Down Expand Up @@ -843,7 +834,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *

#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
#if defined(__ARM_FEATURE_SVE)
if (ggml_cpu_has_sve() && sve_lane_count() == QK8_0) {
if (ggml_cpu_has_sve() && ggml_cpu_get_sve_cnt() == QK8_0) {
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
Expand Down Expand Up @@ -2020,7 +2011,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *

#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && sve_lane_count() == QK8_0) {
if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) {
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -4013,7 +4013,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
svfloat32_t sumv0 = svdup_n_f32(0.0f);
svfloat32_t sumv1 = svdup_n_f32(0.0f);

const int vector_length = ggml_sve_cnt_b*8;
const int vector_length = ggml_cpu_get_sve_cnt()*8;

// VLA Implementation using switch case
switch (vector_length) {
Expand Down Expand Up @@ -5597,7 +5597,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
svfloat32_t sumv0 = svdup_n_f32(0.0f);
svfloat32_t sumv1 = svdup_n_f32(0.0f);

const int vector_length = ggml_sve_cnt_b*8;
const int vector_length = ggml_cpu_get_sve_cnt()*8;

//VLA Implemenation for SVE
switch (vector_length) {
Expand Down
4 changes: 0 additions & 4 deletions ggml/src/ggml-quants.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,6 @@ void iq2xs_free_impl(enum ggml_type type);
void iq3xs_init_impl(int grid_size);
void iq3xs_free_impl(int grid_size);

#if defined(__ARM_FEATURE_SVE)
extern int ggml_sve_cnt_b;
#endif

#ifdef __cplusplus
}
#endif
101 changes: 86 additions & 15 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@
#include <unistd.h>
#endif

#if defined(__ARM_FEATURE_SVE)
int ggml_sve_cnt_b = 0;
#endif
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
#undef GGML_USE_LLAMAFILE
#endif
Expand Down Expand Up @@ -455,6 +452,15 @@ static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
float ggml_table_f32_f16[1 << 16];

#if defined(__ARM_ARCH)
struct ggml_arm_arch_features_type {
int has_neon;
int has_i8mm;
int has_sve;
int sve_cnt;
} ggml_arm_arch_features = {-1, -1, -1, 0};
#endif

GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
switch (status) {
case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
Expand Down Expand Up @@ -3673,6 +3679,66 @@ static inline int ggml_up(int n, int m) {

////////////////////////////////////////////////////////////////////////////////

#if defined(__ARM_ARCH)

#if defined(__linux__) && defined(__aarch64__)
#include <sys/auxv.h>
#elif defined(__APPLE__)
#include <sys/sysctl.h>
#endif

static void ggml_init_arm_arch_features(void) {
#if defined(__linux__) && defined(__aarch64__)
uint32_t hwcap = getauxval(AT_HWCAP);
uint32_t hwcap2 = getauxval(AT_HWCAP2);

ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);

#if defined(__ARM_FEATURE_SVE)
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
#endif
#elif defined(__APPLE__)
int oldp = 0;
size_t size = sizeof(oldp);
if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
oldp = 0;
}
ggml_arm_arch_features.has_neon = oldp;

if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
oldp = 0;
}
ggml_arm_arch_features.has_i8mm = oldp;

ggml_arm_arch_features.has_sve = 0;
ggml_arm_arch_features.sve_cnt = 0;
#else
// Run-time CPU feature detection not implemented for this platform, fallback to compile time
#if defined(__ARM_NEON)
ggml_arm_arch_features.has_neon = 1;
#else
ggml_arm_arch_features.has_neon = 0;
#endif

#if defined(__ARM_FEATURE_MATMUL_INT8)
ggml_arm_arch_features.has_i8mm = 1;
#else
ggml_arm_arch_features.has_i8mm = 0;
#endif

#if defined(__ARM_FEATURE_SVE)
ggml_arm_arch_features.has_sve = 1;
ggml_arm_arch_features.sve_cnt = 16;
#else
ggml_arm_arch_features.has_sve = 0;
ggml_arm_arch_features.sve_cnt = 0;
#endif
#endif
}
#endif

struct ggml_context * ggml_init(struct ggml_init_params params) {
// make this function thread safe
ggml_critical_section_start();
Expand Down Expand Up @@ -3723,6 +3789,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
}

#if defined(__ARM_ARCH)
ggml_init_arm_arch_features();
#endif

is_first_call = false;
}

Expand Down Expand Up @@ -3771,12 +3841,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {

GGML_ASSERT_ALIGNED(ctx->mem_buffer);

#if defined(__ARM_FEATURE_SVE)
if (!ggml_sve_cnt_b) {
ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
}
#endif

GGML_PRINT_DEBUG("%s: context initialized\n", __func__);

ggml_critical_section_end();
Expand Down Expand Up @@ -23578,16 +23642,16 @@ int ggml_cpu_has_fma(void) {
}

int ggml_cpu_has_neon(void) {
#if defined(__ARM_NEON)
return 1;
#if defined(__ARM_ARCH)
return ggml_arm_arch_features.has_neon;
#else
return 0;
#endif
}

int ggml_cpu_has_sve(void) {
#if defined(__ARM_FEATURE_SVE)
return 1;
#if defined(__ARM_ARCH)
return ggml_arm_arch_features.has_sve;
#else
return 0;
#endif
Expand Down Expand Up @@ -23734,11 +23798,18 @@ int ggml_cpu_has_vsx(void) {
}

int ggml_cpu_has_matmul_int8(void) {
#if defined(__ARM_FEATURE_MATMUL_INT8)
return 1;
#if defined(__ARM_ARCH)
return ggml_arm_arch_features.has_i8mm;
#else
return 0;
#endif
}

int ggml_cpu_get_sve_cnt(void) {
#if defined(__ARM_ARCH)
return ggml_arm_arch_features.sve_cnt;
#else
return 0;
#endif
}
////////////////////////////////////////////////////////////////////////////////
Loading