Skip to content

Commit

Permalink
AVX2 and AVX10 vpmaddubsw QD8_F16_QC4W GEMM move xor 0x88 from GEMM t…
Browse files Browse the repository at this point in the history
…o pack functions

- Generate FP16 versions of AVX vpmaddubsw GEMM microkernels
- Use uint4 packed weights using xnn_pack_qs8_qc4uw_gemm_goi_w

PiperOrigin-RevId: 658201549
  • Loading branch information
fbarchard authored and xnnpack-bot committed Aug 1, 2024
1 parent a8b769d commit 5d4acf1
Show file tree
Hide file tree
Showing 47 changed files with 11,886 additions and 0 deletions.
352 changes: 352 additions & 0 deletions bench/qd8-f16-qc4w-gemm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,182 @@


#if XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64)
static void qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256skx_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256skx_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256skx_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256skx_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256skx_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256skx_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256skx_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256skx_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256skx_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256skx_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256skx_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256skx_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256skx_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256skx_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256skx_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256skx_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256skx_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256skx_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256skx_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256skx_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256skx_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256skx_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256skx_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256skx_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256skx_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256skx_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256skx_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256skx_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256skx_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256skx_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256skx_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256skx_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256skx_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256skx_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256skx_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256skx_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256skx_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256skx_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256skx_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256skx_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256skx_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256skx_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256skx_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256skx_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256skx_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256skx_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256skx_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX256SKX);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256skx_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256skx(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256skx,
Expand Down Expand Up @@ -915,6 +1091,182 @@


#if XNN_ARCH_X86 || XNN_ARCH_X86_64
static void qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx2_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx2_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx2_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avx2_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avx2_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/2, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avx2_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avx2_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avx2_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/3, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avx2_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avx2_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avx2_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/4, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avx2_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx2_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx2_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx2_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avx2_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avx2_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/6, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avx2_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx2_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx2_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx2_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx2_madd(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx2_madd,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx2_madd)

static void qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx2_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx2_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx2_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avx2_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avx2_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/2, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avx2_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avx2_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avx2_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/3, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avx2_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avx2_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avx2_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/4, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avx2_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx2_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx2_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx2_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avx2_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avx2_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/6, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avx2_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx2_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx2_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx2_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx2_madd_prfm(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx2_madd_prfm,
xnn_init_f16_qc4w_minmax_avxvnni_madd_params,
xnn_pack_qs8_qc4uw_gemm_goi_w,
/*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckAVX2);
}

BENCHMARK_GEMM(qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx2_madd_prfm)

static void qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx2(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx2,
Expand Down
16 changes: 16 additions & 0 deletions cmake/gen/avx256skx_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,30 @@


SET(ALL_AVX256SKX_MICROKERNEL_SRCS
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-1x8c8-minmax-avx256skx-madd-prfm.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-1x8c8-minmax-avx256skx-madd.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-1x8c8-minmax-avx256skx.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-2x8c8-minmax-avx256skx.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-3x8c8-minmax-avx256skx.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-4x8c8-minmax-avx256skx.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-5x8c8-minmax-avx256skx-madd-prfm.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-5x8c8-minmax-avx256skx-madd.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-5x8c8-minmax-avx256skx.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-6x8c8-minmax-avx256skx.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-7x8c8-minmax-avx256skx-madd-prfm.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-7x8c8-minmax-avx256skx-madd.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-7x8c8-minmax-avx256skx.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-8x8c8-minmax-avx256skx-madd-prfm.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-8x8c8-minmax-avx256skx-madd.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-8x8c8-minmax-avx256skx.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-9x8c8-minmax-avx256skx-madd-prfm.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-9x8c8-minmax-avx256skx-madd.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-10x8c8-minmax-avx256skx-madd-prfm.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-10x8c8-minmax-avx256skx-madd.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-12x8c8-minmax-avx256skx-madd-prfm.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-12x8c8-minmax-avx256skx-madd.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-14x8c8-minmax-avx256skx-madd-prfm.c
src/qd8-f16-qc4w-gemm/gen/qd8-f16-qc4w-gemm-14x8c8-minmax-avx256skx-madd.c
src/qd8-f16-qc8w-gemm/gen/qd8-f16-qc8w-gemm-1x8c8-minmax-avx256skx.c
src/qd8-f16-qc8w-gemm/gen/qd8-f16-qc8w-gemm-5x8c8-minmax-avx256skx.c
src/qd8-f16-qc8w-gemm/gen/qd8-f16-qc8w-gemm-7x8c8-minmax-avx256skx.c
Expand Down
Loading

0 comments on commit 5d4acf1

Please sign in to comment.