Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove f16_f32_vcvt_params struct #6792

Merged
merged 1 commit into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 52 additions & 80 deletions bench/f16-f32-vcvt.cc

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions bench/vcvt-benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace {
static void f16_f32_vcvt(
benchmark::State& state,
xnn_f16_f32_vcvt_ukernel_fn cvt,
xnn_init_f16_f32_cvt_params_fn init_params = nullptr,
void* /*init_params*/ = nullptr,
benchmark::utils::IsaCheckFunction isa_check = nullptr)
{
if (isa_check && !isa_check(state)) {
Expand All @@ -45,12 +45,8 @@ static void f16_f32_vcvt(
std::generate(x.begin(), x.end(), std::ref(f16rng));
std::fill(y.begin(), y.end(), std::nanf(""));

xnn_f16_f32_cvt_params params;
if (init_params != nullptr) {
init_params(&params);
}
for (auto _ : state) {
cvt(num_elements * sizeof(uint16_t), x.data(), y.data(), &params);
cvt(num_elements * sizeof(uint16_t), x.data(), y.data(), nullptr);
}

const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
Expand Down
21 changes: 14 additions & 7 deletions src/amalgam/gen/avx.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,26 @@ void xnn_f16_f32_vcvt_ukernel__avx_int16_u16(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
const void* params) XNN_OOB_READS
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
assert(input != NULL);
assert(output != NULL);

const __m128i vsign_mask = _mm_load_si128((const __m128i*) params->sse_int16.sign_mask);
const __m128i vexp_offset = _mm_load_si128((const __m128i*) params->sse_int16.exp_offset);
const __m128 vexp_scale = _mm_load_ps(params->sse_int16.exp_scale);
const __m128i vmagic_mask = _mm_load_si128((const __m128i*) params->sse_int16.magic_mask);
const __m128 vmagic_bias = _mm_load_ps(params->sse_int16.magic_bias);
const __m128i vdenorm_cutoff = _mm_load_si128((const __m128i*) params->sse_int16.denorm_cutoff);
const __m128i vsign_mask = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vexp_offset = _mm_set1_epi16(UINT16_C(0x7000));
const __m128 vexp_scale = _mm_set1_ps(0x1.0p-112f);
const __m128i vmagic_mask = _mm_set1_epi16(UINT16_C(0x3F00));
const __m128 vmagic_bias = _mm_set1_ps(0.5f);
const __m128i vdenorm_cutoff = _mm_set1_epi16(UINT16_C(0x0400));

XNN_FORCE_REALIZATION(vsign_mask);
XNN_FORCE_REALIZATION(vexp_offset);
XNN_FORCE_REALIZATION(vexp_scale);
XNN_FORCE_REALIZATION(vmagic_mask);
XNN_FORCE_REALIZATION(vmagic_bias);
XNN_FORCE_REALIZATION(vdenorm_cutoff);

const uint16_t* i = (const uint16_t*) input;
for (; batch >= 16 * sizeof(uint16_t); batch -= 16 * sizeof(uint16_t)) {
Expand Down
2 changes: 1 addition & 1 deletion src/amalgam/gen/avx512skx.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void xnn_f16_f32_vcvt_ukernel__avx512skx_u16(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
Expand Down
2 changes: 1 addition & 1 deletion src/amalgam/gen/f16c.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ void xnn_f16_f32_vcvt_ukernel__f16c_u16(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
const void* params) XNN_OOB_READS
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
Expand Down
18 changes: 12 additions & 6 deletions src/amalgam/gen/neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,24 @@ void xnn_f16_f32_vcvt_ukernel__neon_int16_u16(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
const void* params) XNN_OOB_READS
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
assert(input != NULL);
assert(output != NULL);

const uint16x8_t vsign_mask = vmovq_n_u16(0x8000);
const uint16x8_t vexp_offset = vmovq_n_u16(0x7000);
const float32x4_t vexp_scale = vld1q_dup_f32(&params->neon.exp_scale);
const uint32x4_t vmagic_bias = vmovq_n_u32(0x3F000000);
const uint16x8_t vdenorm_cutoff = vmovq_n_u16(0x0400);
const uint16x8_t vsign_mask = vmovq_n_u16(UINT16_C(0x8000));
const uint16x8_t vexp_offset = vmovq_n_u16(UINT16_C(0x7000));
const float32x4_t vexp_scale = vmovq_n_f32(0x1.0p-112f);
const uint32x4_t vmagic_bias = vmovq_n_u32(UINT32_C(0x3F000000));
const uint16x8_t vdenorm_cutoff = vmovq_n_u16(UINT16_C(0x0400));

XNN_FORCE_REALIZATION(vsign_mask);
XNN_FORCE_REALIZATION(vexp_offset);
XNN_FORCE_REALIZATION(vexp_scale);
XNN_FORCE_REALIZATION(vmagic_bias);
XNN_FORCE_REALIZATION(vdenorm_cutoff);

const uint16_t* i = (const uint16_t*) input;
for (; batch >= 16 * sizeof(uint16_t); batch -= 16 * sizeof(uint16_t)) {
Expand Down
2 changes: 1 addition & 1 deletion src/amalgam/gen/neonfp16.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ void xnn_f16_f32_vcvt_ukernel__neonfp16_u16(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
const void* params) XNN_OOB_READS
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
Expand Down
28 changes: 14 additions & 14 deletions src/amalgam/gen/scalar.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,19 @@ void xnn_f16_f32_vcvt_ukernel__scalar_u1(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
assert(input != NULL);
assert(output != NULL);

const uint32_t vsign_mask = params->scalar.sign_mask;
const uint32_t vexp_offset = params->scalar.exp_offset;
const float vexp_scale = params->scalar.exp_scale;
const uint32_t vmagic_mask = params->scalar.magic_mask;
const float vmagic_bias = params->scalar.magic_bias;
const uint32_t vdenorm_cutoff = params->scalar.denorm_cutoff;
const uint32_t vsign_mask = 0x80000000;
const uint32_t vexp_offset = 0x70000000;
const float vexp_scale = 0x1.0p-112f;
const uint32_t vmagic_mask = 0x3F000000;
const float vmagic_bias = 0.5f;
const uint32_t vdenorm_cutoff = 0x08000000;

const uint16_t* i = (const uint16_t*) input;
uint32_t* o = (uint32_t*) output;
Expand All @@ -93,19 +93,19 @@ void xnn_f16_f32_vcvt_ukernel__scalar_u4(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
assert(input != NULL);
assert(output != NULL);

const uint32_t vsign_mask = params->scalar.sign_mask;
const uint32_t vexp_offset = params->scalar.exp_offset;
const float vexp_scale = params->scalar.exp_scale;
const uint32_t vmagic_mask = params->scalar.magic_mask;
const float vmagic_bias = params->scalar.magic_bias;
const uint32_t vdenorm_cutoff = params->scalar.denorm_cutoff;
const uint32_t vsign_mask = 0x80000000;
const uint32_t vexp_offset = 0x70000000;
const float vexp_scale = 0x1.0p-112f;
const uint32_t vmagic_mask = 0x3F000000;
const float vmagic_bias = 0.5f;
const uint32_t vdenorm_cutoff = 0x08000000;

const uint16_t* i = (const uint16_t*) input;
uint32_t* o = (uint32_t*) output;
Expand Down
21 changes: 14 additions & 7 deletions src/amalgam/gen/sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,26 @@ void xnn_f16_f32_vcvt_ukernel__sse2_int16_u32(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
const void* params) XNN_OOB_READS
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
assert(input != NULL);
assert(output != NULL);

const __m128i vsign_mask = _mm_load_si128((const __m128i*) params->sse_int16.sign_mask);
const __m128i vexp_offset = _mm_load_si128((const __m128i*) params->sse_int16.exp_offset);
const __m128 vexp_scale = _mm_load_ps(params->sse_int16.exp_scale);
const __m128i vmagic_mask = _mm_load_si128((const __m128i*) params->sse_int16.magic_mask);
const __m128 vmagic_bias = _mm_load_ps(params->sse_int16.magic_bias);
const __m128i vdenorm_cutoff = _mm_load_si128((const __m128i*) params->sse_int16.denorm_cutoff);
const __m128i vsign_mask = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vexp_offset = _mm_set1_epi16(UINT16_C(0x7000));
const __m128 vexp_scale = _mm_set1_ps(0x1.0p-112f);
const __m128i vmagic_mask = _mm_set1_epi16(UINT16_C(0x3F00));
const __m128 vmagic_bias = _mm_set1_ps(0.5f);
const __m128i vdenorm_cutoff = _mm_set1_epi16(UINT16_C(0x0400));

XNN_FORCE_REALIZATION(vsign_mask);
XNN_FORCE_REALIZATION(vexp_offset);
XNN_FORCE_REALIZATION(vexp_scale);
XNN_FORCE_REALIZATION(vmagic_mask);
XNN_FORCE_REALIZATION(vmagic_bias);
XNN_FORCE_REALIZATION(vdenorm_cutoff);

const uint16_t* i = (const uint16_t*) input;
for (; batch >= 32 * sizeof(uint16_t); batch -= 32 * sizeof(uint16_t)) {
Expand Down
21 changes: 14 additions & 7 deletions src/amalgam/gen/sse41.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,26 @@ void xnn_f16_f32_vcvt_ukernel__sse41_int16_u16(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
const void* params) XNN_OOB_READS
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
assert(input != NULL);
assert(output != NULL);

const __m128i vsign_mask = _mm_load_si128((const __m128i*) params->sse_int16.sign_mask);
const __m128i vexp_offset = _mm_load_si128((const __m128i*) params->sse_int16.exp_offset);
const __m128 vexp_scale = _mm_load_ps(params->sse_int16.exp_scale);
const __m128i vmagic_mask = _mm_load_si128((const __m128i*) params->sse_int16.magic_mask);
const __m128 vmagic_bias = _mm_load_ps(params->sse_int16.magic_bias);
const __m128i vdenorm_cutoff = _mm_load_si128((const __m128i*) params->sse_int16.denorm_cutoff);
const __m128i vsign_mask = _mm_set1_epi16(UINT16_C(0x8000));
const __m128i vexp_offset = _mm_set1_epi16(UINT16_C(0x7000));
const __m128 vexp_scale = _mm_set1_ps(0x1.0p-112f);
const __m128i vmagic_mask = _mm_set1_epi16(UINT16_C(0x3F00));
const __m128 vmagic_bias = _mm_set1_ps(0.5f);
const __m128i vdenorm_cutoff = _mm_set1_epi16(UINT16_C(0x0400));

XNN_FORCE_REALIZATION(vsign_mask);
XNN_FORCE_REALIZATION(vexp_offset);
XNN_FORCE_REALIZATION(vexp_scale);
XNN_FORCE_REALIZATION(vmagic_mask);
XNN_FORCE_REALIZATION(vmagic_bias);
XNN_FORCE_REALIZATION(vdenorm_cutoff);

const uint16_t* i = (const uint16_t*) input;
for (; batch >= 16 * sizeof(uint16_t); batch -= 16 * sizeof(uint16_t)) {
Expand Down
21 changes: 14 additions & 7 deletions src/amalgam/gen/wasmrelaxedsimd.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,26 @@ void xnn_f16_f32_vcvt_ukernel__wasmrelaxedsimd_int16_u16(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
const void* params) XNN_OOB_READS
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
assert(input != NULL);
assert(output != NULL);

const v128_t vsign_mask = wasm_v128_load64_splat(params->wasmsimd_int16.sign_mask);
const v128_t vexp_offset = wasm_v128_load64_splat(params->wasmsimd_int16.exp_offset);
const v128_t vexp_scale = wasm_v128_load64_splat(params->wasmsimd_int16.exp_scale);
const v128_t vmagic_mask = wasm_v128_load64_splat(params->wasmsimd_int16.magic_mask);
const v128_t vmagic_bias = wasm_v128_load64_splat(params->wasmsimd_int16.magic_bias);
const v128_t vdenorm_cutoff = wasm_v128_load64_splat(params->wasmsimd_int16.denorm_cutoff);
const v128_t vsign_mask = wasm_u16x8_const_splat(UINT16_C(0x8000));
const v128_t vexp_offset = wasm_u16x8_const_splat(UINT16_C(0x7000));
const v128_t vexp_scale = wasm_f32x4_const_splat(0x1.0p-112f);
const v128_t vmagic_mask = wasm_u16x8_const_splat(UINT16_C(0x3F00));
const v128_t vmagic_bias = wasm_f32x4_const_splat(0.5f);
const v128_t vdenorm_cutoff = wasm_u16x8_const_splat(UINT16_C(0x0400));

XNN_FORCE_REALIZATION(vsign_mask);
XNN_FORCE_REALIZATION(vexp_offset);
XNN_FORCE_REALIZATION(vexp_scale);
XNN_FORCE_REALIZATION(vmagic_mask);
XNN_FORCE_REALIZATION(vmagic_bias);
XNN_FORCE_REALIZATION(vdenorm_cutoff);

const uint16_t* i = (const uint16_t*) input;
for (; batch >= 16 * sizeof(uint16_t); batch -= 16 * sizeof(uint16_t)) {
Expand Down
21 changes: 14 additions & 7 deletions src/amalgam/gen/wasmsimd.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,26 @@ void xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_u16(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
const void* params) XNN_OOB_READS
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
assert(input != NULL);
assert(output != NULL);

const v128_t vsign_mask = wasm_v128_load64_splat(params->wasmsimd_int16.sign_mask);
const v128_t vexp_offset = wasm_v128_load64_splat(params->wasmsimd_int16.exp_offset);
const v128_t vexp_scale = wasm_v128_load64_splat(params->wasmsimd_int16.exp_scale);
const v128_t vmagic_mask = wasm_v128_load64_splat(params->wasmsimd_int16.magic_mask);
const v128_t vmagic_bias = wasm_v128_load64_splat(params->wasmsimd_int16.magic_bias);
const v128_t vdenorm_cutoff = wasm_v128_load64_splat(params->wasmsimd_int16.denorm_cutoff);
const v128_t vsign_mask = wasm_u16x8_const_splat(UINT16_C(0x8000));
const v128_t vexp_offset = wasm_u16x8_const_splat(UINT16_C(0x7000));
const v128_t vexp_scale = wasm_f32x4_const_splat(0x1.0p-112f);
const v128_t vmagic_mask = wasm_u16x8_const_splat(UINT16_C(0x3F00));
const v128_t vmagic_bias = wasm_f32x4_const_splat(0.5f);
const v128_t vdenorm_cutoff = wasm_u16x8_const_splat(UINT16_C(0x0400));

XNN_FORCE_REALIZATION(vsign_mask);
XNN_FORCE_REALIZATION(vexp_offset);
XNN_FORCE_REALIZATION(vexp_scale);
XNN_FORCE_REALIZATION(vmagic_mask);
XNN_FORCE_REALIZATION(vmagic_bias);
XNN_FORCE_REALIZATION(vdenorm_cutoff);

const uint16_t* i = (const uint16_t*) input;
for (; batch >= 16 * sizeof(uint16_t); batch -= 16 * sizeof(uint16_t)) {
Expand Down
10 changes: 0 additions & 10 deletions src/configs/unary-elementwise-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -520,12 +520,10 @@ static void init_f16_to_f32_cvt_config(void) {
f16_to_f32_cvt_config.element_tile = 16;
} else {
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__neon_int16_u16;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_neon_params;
f16_to_f32_cvt_config.element_tile = 16;
}
} else if (!XNN_PLATFORM_MOBILE) {
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__scalar_u4;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params;
f16_to_f32_cvt_config.element_tile = 4;
}
#elif XNN_ARCH_ARM64
Expand All @@ -542,38 +540,30 @@ static void init_f16_to_f32_cvt_config(void) {
f16_to_f32_cvt_config.element_tile = 16;
} else if (hardware_config->use_x86_avx) {
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__avx_int16_u16;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_sse_int16_params;
f16_to_f32_cvt_config.element_tile = 16;
} else if (hardware_config->use_x86_sse4_1) {
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__sse41_int16_u16;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_sse_int16_params;
f16_to_f32_cvt_config.element_tile = 16;
} else {
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__sse2_int16_u32;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_sse_int16_params;
f16_to_f32_cvt_config.element_tile = 32;
}
#elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
#if XNN_ARCH_WASMRELAXEDSIMD
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__wasmrelaxedsimd_int16_u16;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_wasmsimd_int16_params;
f16_to_f32_cvt_config.element_tile = 16;
#else
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_u16;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_wasmsimd_int16_params;
f16_to_f32_cvt_config.element_tile = 16;
#endif
#elif XNN_ARCH_WASM
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__scalar_u1;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params;
f16_to_f32_cvt_config.element_tile = 1;
#elif XNN_ARCH_RISCV
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__scalar_u4;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params;
f16_to_f32_cvt_config.element_tile = 4;
#else
f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__scalar_u4;
f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params;
f16_to_f32_cvt_config.element_tile = 4;
#endif
}
Expand Down
2 changes: 1 addition & 1 deletion src/f16-f32-vcvt/avx512skx.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ void xnn_f16_f32_vcvt_ukernel__avx512skx_u${BATCH_TILE}(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
Expand Down
2 changes: 1 addition & 1 deletion src/f16-f32-vcvt/f16c.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ void xnn_f16_f32_vcvt_ukernel__f16c_u${BATCH_TILE}(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
const void* params) XNN_OOB_READS
{
assert(batch != 0);
assert(batch % sizeof(uint16_t) == 0);
Expand Down
Loading
Loading