Skip to content

Commit

Permalink
Remove min/max parameters from cvt kernels
Browse files Browse the repository at this point in the history
These were only ever set to the min/max of the output type.

PiperOrigin-RevId: 688619374
  • Loading branch information
dsharletg authored and xnnpack-bot committed Oct 22, 2024
1 parent 743f95f commit e9975bc
Show file tree
Hide file tree
Showing 160 changed files with 245 additions and 1,438 deletions.
8 changes: 2 additions & 6 deletions bench/convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ void xnnpack_convert_f32_qs8(benchmark::State& state) {
benchmark_unary_operator<float, int8_t>(
[](uint32_t flags, xnn_operator_t* op) {
return xnn_create_convert_nc_f32_qs8(
1.0f / 128.0f /* scale */, 1 /* zero point */,
std::numeric_limits<int8_t>::min(),
std::numeric_limits<int8_t>::max(), flags, op);
1.0f / 128.0f /* scale */, 1 /* zero point */, flags, op);
},
xnn_reshape_convert_nc_f32_qs8, xnn_setup_convert_nc_f32_qs8, state);
}
Expand All @@ -44,9 +42,7 @@ void xnnpack_convert_f32_qu8(benchmark::State& state) {
benchmark_unary_operator<float, uint8_t>(
[](uint32_t flags, xnn_operator_t* op) {
return xnn_create_convert_nc_f32_qu8(
1.0f / 128.0f /* scale */, 127 /* zero point */,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max(), flags, op);
1.0f / 128.0f /* scale */, 127 /* zero point */, flags, op);
},
xnn_reshape_convert_nc_f32_qu8, xnn_setup_convert_nc_f32_qu8, state);
}
Expand Down
4 changes: 1 addition & 3 deletions bench/f16-qs8-vcvt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ static void f16_qs8_vcvt(
xnn_f16_qs8_cvt_params params;
init_params(&params,
1.0f /* scale */,
1 /* output zero point */,
std::numeric_limits<int8_t>::min() + 1 /* output min */,
std::numeric_limits<int8_t>::max() - 1 /* output max */);
1 /* output zero point */);

cvt_benchmark<xnn_float16, int8_t>(state, arch_flags, cvt, &params);
}
Expand Down
4 changes: 1 addition & 3 deletions bench/f32-qs8-vcvt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ static void f32_qs8_vcvt(
xnn_f32_qs8_cvt_params params;
init_params(&params,
25.0f /* scale */,
1 /* output zero point */,
std::numeric_limits<int8_t>::min() + 1 /* output min */,
std::numeric_limits<int8_t>::max() - 1 /* output max */);
1 /* output zero point */);

cvt_benchmark<float, int8_t>(state, arch_flags, cvt, &params);
}
Expand Down
4 changes: 1 addition & 3 deletions bench/f32-qu8-vcvt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ static void f32_qu8_vcvt(
xnn_f32_qu8_cvt_params params;
init_params(&params,
25.0f /* scale */,
127 /* output zero point */,
std::numeric_limits<uint8_t>::min() + 1 /* output min */,
std::numeric_limits<uint8_t>::max() - 1 /* output max */);
127 /* output zero point */);

cvt_benchmark<float, uint8_t>(state, arch_flags, cvt, &params);
}
Expand Down
4 changes: 0 additions & 4 deletions include/xnnpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -2949,8 +2949,6 @@ enum xnn_status xnn_run_convert_nc_f32_f16(
enum xnn_status xnn_create_convert_nc_f32_qs8(
float output_scale,
int8_t output_zero_point,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* convert_op_out);

Expand Down Expand Up @@ -2982,8 +2980,6 @@ enum xnn_status xnn_run_convert_nc_f32_qs8(
enum xnn_status xnn_create_convert_nc_f32_qu8(
float output_scale,
uint8_t output_zero_point,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* convert_op_out);

Expand Down
10 changes: 0 additions & 10 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-neonfp16arith-u16.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u16(

const float16x8_t vscale = vreinterpretq_f16_u16(vld1q_dup_u16(&params->scalar.scale));
const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->scalar.output_zero_point);
const int8x16_t voutput_min = vld1q_dup_s8(&params->scalar.output_min);
const int8x16_t voutput_max = vld1q_dup_s8(&params->scalar.output_max);
for (; batch >= 16 * sizeof(uint16_t); batch -= 16 * sizeof(uint16_t)) {
float16x8_t vx0 = vreinterpretq_f16_u16(vld1q_u16(i)); i += 8;
float16x8_t vx8 = vreinterpretq_f16_u16(vld1q_u16(i)); i += 8;
Expand All @@ -48,10 +46,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u16(

int8x16_t vy0 = vcombine_s8(vqmovn_s16(vacc0), vqmovn_s16(vacc8));

vy0 = vmaxq_s8(vy0, voutput_min);

vy0 = vminq_s8(vy0, voutput_max);

vst1q_s8(output, vy0); output += 16;
}
for (; batch >= 8 * sizeof(uint16_t); batch -= 8 * sizeof(uint16_t)) {
Expand All @@ -64,8 +58,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u16(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, vget_low_s8(voutput_min));
vy = vmin_s8(vy, vget_low_s8(voutput_max));
vst1_s8(output, vy); output += 8;
}
if XNN_UNLIKELY(batch != 0) {
Expand All @@ -79,8 +71,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u16(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, vget_low_s8(voutput_min));
vy = vmin_s8(vy, vget_low_s8(voutput_max));

if (batch & (4 * sizeof(uint16_t))) {
vst1_lane_u32((void*) output, vreinterpret_u32_s8(vy), 0); output += 4;
Expand Down
12 changes: 0 additions & 12 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-neonfp16arith-u24.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u24(

const float16x8_t vscale = vreinterpretq_f16_u16(vld1q_dup_u16(&params->scalar.scale));
const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->scalar.output_zero_point);
const int8x16_t voutput_min = vld1q_dup_s8(&params->scalar.output_min);
const int8x16_t voutput_max = vld1q_dup_s8(&params->scalar.output_max);
for (; batch >= 24 * sizeof(uint16_t); batch -= 24 * sizeof(uint16_t)) {
float16x8_t vx0 = vreinterpretq_f16_u16(vld1q_u16(i)); i += 8;
float16x8_t vx8 = vreinterpretq_f16_u16(vld1q_u16(i)); i += 8;
Expand All @@ -53,12 +51,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u24(
int8x16_t vy0 = vcombine_s8(vqmovn_s16(vacc0), vqmovn_s16(vacc8));
int8x8_t vy16 = vqmovn_s16(vacc16);

vy0 = vmaxq_s8(vy0, voutput_min);
vy16 = vmax_s8(vy16, vget_low_s8(voutput_min));

vy0 = vminq_s8(vy0, voutput_max);
vy16 = vmin_s8(vy16, vget_low_s8(voutput_max));

vst1q_s8(output, vy0); output += 16;
vst1_s8(output, vy16); output += 8;
}
Expand All @@ -72,8 +64,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u24(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, vget_low_s8(voutput_min));
vy = vmin_s8(vy, vget_low_s8(voutput_max));
vst1_s8(output, vy); output += 8;
}
if XNN_UNLIKELY(batch != 0) {
Expand All @@ -87,8 +77,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u24(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, vget_low_s8(voutput_min));
vy = vmin_s8(vy, vget_low_s8(voutput_max));

if (batch & (4 * sizeof(uint16_t))) {
vst1_lane_u32((void*) output, vreinterpret_u32_s8(vy), 0); output += 4;
Expand Down
12 changes: 0 additions & 12 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-neonfp16arith-u32.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u32(

const float16x8_t vscale = vreinterpretq_f16_u16(vld1q_dup_u16(&params->scalar.scale));
const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->scalar.output_zero_point);
const int8x16_t voutput_min = vld1q_dup_s8(&params->scalar.output_min);
const int8x16_t voutput_max = vld1q_dup_s8(&params->scalar.output_max);
for (; batch >= 32 * sizeof(uint16_t); batch -= 32 * sizeof(uint16_t)) {
float16x8_t vx0 = vreinterpretq_f16_u16(vld1q_u16(i)); i += 8;
float16x8_t vx8 = vreinterpretq_f16_u16(vld1q_u16(i)); i += 8;
Expand All @@ -57,12 +55,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u32(
int8x16_t vy0 = vcombine_s8(vqmovn_s16(vacc0), vqmovn_s16(vacc8));
int8x16_t vy16 = vcombine_s8(vqmovn_s16(vacc16), vqmovn_s16(vacc24));

vy0 = vmaxq_s8(vy0, voutput_min);
vy16 = vmaxq_s8(vy16, voutput_min);

vy0 = vminq_s8(vy0, voutput_max);
vy16 = vminq_s8(vy16, voutput_max);

vst1q_s8(output, vy0); output += 16;
vst1q_s8(output, vy16); output += 16;
}
Expand All @@ -76,8 +68,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u32(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, vget_low_s8(voutput_min));
vy = vmin_s8(vy, vget_low_s8(voutput_max));
vst1_s8(output, vy); output += 8;
}
if XNN_UNLIKELY(batch != 0) {
Expand All @@ -91,8 +81,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u32(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, vget_low_s8(voutput_min));
vy = vmin_s8(vy, vget_low_s8(voutput_max));

if (batch & (4 * sizeof(uint16_t))) {
vst1_lane_u32((void*) output, vreinterpret_u32_s8(vy), 0); output += 4;
Expand Down
16 changes: 0 additions & 16 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-neonfp16arith-u64.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u64(

const float16x8_t vscale = vreinterpretq_f16_u16(vld1q_dup_u16(&params->scalar.scale));
const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->scalar.output_zero_point);
const int8x16_t voutput_min = vld1q_dup_s8(&params->scalar.output_min);
const int8x16_t voutput_max = vld1q_dup_s8(&params->scalar.output_max);
for (; batch >= 64 * sizeof(uint16_t); batch -= 64 * sizeof(uint16_t)) {
float16x8_t vx0 = vreinterpretq_f16_u16(vld1q_u16(i)); i += 8;
float16x8_t vx8 = vreinterpretq_f16_u16(vld1q_u16(i)); i += 8;
Expand Down Expand Up @@ -75,16 +73,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u64(
int8x16_t vy32 = vcombine_s8(vqmovn_s16(vacc32), vqmovn_s16(vacc40));
int8x16_t vy48 = vcombine_s8(vqmovn_s16(vacc48), vqmovn_s16(vacc56));

vy0 = vmaxq_s8(vy0, voutput_min);
vy16 = vmaxq_s8(vy16, voutput_min);
vy32 = vmaxq_s8(vy32, voutput_min);
vy48 = vmaxq_s8(vy48, voutput_min);

vy0 = vminq_s8(vy0, voutput_max);
vy16 = vminq_s8(vy16, voutput_max);
vy32 = vminq_s8(vy32, voutput_max);
vy48 = vminq_s8(vy48, voutput_max);

vst1q_s8(output, vy0); output += 16;
vst1q_s8(output, vy16); output += 16;
vst1q_s8(output, vy32); output += 16;
Expand All @@ -100,8 +88,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u64(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, vget_low_s8(voutput_min));
vy = vmin_s8(vy, vget_low_s8(voutput_max));
vst1_s8(output, vy); output += 8;
}
if XNN_UNLIKELY(batch != 0) {
Expand All @@ -115,8 +101,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u64(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, vget_low_s8(voutput_min));
vy = vmin_s8(vy, vget_low_s8(voutput_max));

if (batch & (4 * sizeof(uint16_t))) {
vst1_lane_u32((void*) output, vreinterpret_u32_s8(vy), 0); output += 4;
Expand Down
6 changes: 0 additions & 6 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-neonfp16arith-u8.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u8(

const float16x8_t vscale = vreinterpretq_f16_u16(vld1q_dup_u16(&params->scalar.scale));
const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->scalar.output_zero_point);
const int8x8_t voutput_min = vld1_dup_s8(&params->scalar.output_min);
const int8x8_t voutput_max = vld1_dup_s8(&params->scalar.output_max);
for (; batch >= 8 * sizeof(uint16_t); batch -= 8 * sizeof(uint16_t)) {
float16x8_t vx = vreinterpretq_f16_u16(vld1q_u16(i)); i += 8;

Expand All @@ -43,8 +41,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u8(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, voutput_min);
vy = vmin_s8(vy, voutput_max);
vst1_s8(output, vy); output += 8;
}
if XNN_UNLIKELY(batch != 0) {
Expand All @@ -58,8 +54,6 @@ void xnn_f16_qs8_vcvt_ukernel__neonfp16arith_u8(
vacc = vqaddq_s16(vacc, voutput_zero_point);

int8x8_t vy = vqmovn_s16(vacc);
vy = vmax_s8(vy, voutput_min);
vy = vmin_s8(vy, voutput_max);

if (batch & (4 * sizeof(uint16_t))) {
vst1_lane_u32((void*) output, vreinterpret_u32_s8(vy), 0); output += 4;
Expand Down
4 changes: 2 additions & 2 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-fmagic-u1.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ void xnn_f16_qs8_vcvt_ukernel__scalar_fmagic_u1(

const xnn_float16* i = input;
const float vscale = xnn_float16_to_float(params->scalar.scale);
const float voutput_min_less_zero_point = (float) ((int32_t) params->scalar.output_min - (int32_t) params->scalar.output_zero_point);
const float voutput_max_less_zero_point = (float) ((int32_t) params->scalar.output_max - (int32_t) params->scalar.output_zero_point);
const float voutput_min_less_zero_point = (float) ((int32_t) -128 - (int32_t) params->scalar.output_zero_point);
const float voutput_max_less_zero_point = (float) ((int32_t) 127 - (int32_t) params->scalar.output_zero_point);
const float vmagic_bias = 12582912.0f;
const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point;

Expand Down
4 changes: 2 additions & 2 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-fmagic-u2.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ void xnn_f16_qs8_vcvt_ukernel__scalar_fmagic_u2(

const xnn_float16* i = input;
const float vscale = xnn_float16_to_float(params->scalar.scale);
const float voutput_min_less_zero_point = (float) ((int32_t) params->scalar.output_min - (int32_t) params->scalar.output_zero_point);
const float voutput_max_less_zero_point = (float) ((int32_t) params->scalar.output_max - (int32_t) params->scalar.output_zero_point);
const float voutput_min_less_zero_point = (float) ((int32_t) -128 - (int32_t) params->scalar.output_zero_point);
const float voutput_max_less_zero_point = (float) ((int32_t) 127 - (int32_t) params->scalar.output_zero_point);
const float vmagic_bias = 12582912.0f;
const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point;

Expand Down
4 changes: 2 additions & 2 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-fmagic-u3.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ void xnn_f16_qs8_vcvt_ukernel__scalar_fmagic_u3(

const xnn_float16* i = input;
const float vscale = xnn_float16_to_float(params->scalar.scale);
const float voutput_min_less_zero_point = (float) ((int32_t) params->scalar.output_min - (int32_t) params->scalar.output_zero_point);
const float voutput_max_less_zero_point = (float) ((int32_t) params->scalar.output_max - (int32_t) params->scalar.output_zero_point);
const float voutput_min_less_zero_point = (float) ((int32_t) -128 - (int32_t) params->scalar.output_zero_point);
const float voutput_max_less_zero_point = (float) ((int32_t) 127 - (int32_t) params->scalar.output_zero_point);
const float vmagic_bias = 12582912.0f;
const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point;

Expand Down
4 changes: 2 additions & 2 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-fmagic-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ void xnn_f16_qs8_vcvt_ukernel__scalar_fmagic_u4(

const xnn_float16* i = input;
const float vscale = xnn_float16_to_float(params->scalar.scale);
const float voutput_min_less_zero_point = (float) ((int32_t) params->scalar.output_min - (int32_t) params->scalar.output_zero_point);
const float voutput_max_less_zero_point = (float) ((int32_t) params->scalar.output_max - (int32_t) params->scalar.output_zero_point);
const float voutput_min_less_zero_point = (float) ((int32_t) -128 - (int32_t) params->scalar.output_zero_point);
const float voutput_max_less_zero_point = (float) ((int32_t) 127 - (int32_t) params->scalar.output_zero_point);
const float vmagic_bias = 12582912.0f;
const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point;

Expand Down
4 changes: 2 additions & 2 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-imagic-u1.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ void xnn_f16_qs8_vcvt_ukernel__scalar_imagic_u1(
const xnn_float16* i = input;
const float vscale = xnn_float16_to_float(params->scalar.scale);
const float vmagic_bias = 12582912.0f;
const float output_min_less_zero_point = (float) ((int32_t) params->scalar.output_min - (int32_t) params->scalar.output_zero_point);
const float output_max_less_zero_point = (float) ((int32_t) params->scalar.output_max - (int32_t) params->scalar.output_zero_point);
const float output_min_less_zero_point = (float) ((int32_t) -128 - (int32_t) params->scalar.output_zero_point);
const float output_max_less_zero_point = (float) ((int32_t) 127 - (int32_t) params->scalar.output_zero_point);
const int32_t vmagic_min = (int32_t) float_as_uint32(vmagic_bias + output_min_less_zero_point);
const int32_t vmagic_max = (int32_t) float_as_uint32(vmagic_bias + output_max_less_zero_point);
const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point;
Expand Down
4 changes: 2 additions & 2 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-imagic-u2.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ void xnn_f16_qs8_vcvt_ukernel__scalar_imagic_u2(
const xnn_float16* i = input;
const float vscale = xnn_float16_to_float(params->scalar.scale);
const float vmagic_bias = 12582912.0f;
const float output_min_less_zero_point = (float) ((int32_t) params->scalar.output_min - (int32_t) params->scalar.output_zero_point);
const float output_max_less_zero_point = (float) ((int32_t) params->scalar.output_max - (int32_t) params->scalar.output_zero_point);
const float output_min_less_zero_point = (float) ((int32_t) -128 - (int32_t) params->scalar.output_zero_point);
const float output_max_less_zero_point = (float) ((int32_t) 127 - (int32_t) params->scalar.output_zero_point);
const int32_t vmagic_min = (int32_t) float_as_uint32(vmagic_bias + output_min_less_zero_point);
const int32_t vmagic_max = (int32_t) float_as_uint32(vmagic_bias + output_max_less_zero_point);
const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point;
Expand Down
4 changes: 2 additions & 2 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-imagic-u3.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ void xnn_f16_qs8_vcvt_ukernel__scalar_imagic_u3(
const xnn_float16* i = input;
const float vscale = xnn_float16_to_float(params->scalar.scale);
const float vmagic_bias = 12582912.0f;
const float output_min_less_zero_point = (float) ((int32_t) params->scalar.output_min - (int32_t) params->scalar.output_zero_point);
const float output_max_less_zero_point = (float) ((int32_t) params->scalar.output_max - (int32_t) params->scalar.output_zero_point);
const float output_min_less_zero_point = (float) ((int32_t) -128 - (int32_t) params->scalar.output_zero_point);
const float output_max_less_zero_point = (float) ((int32_t) 127 - (int32_t) params->scalar.output_zero_point);
const int32_t vmagic_min = (int32_t) float_as_uint32(vmagic_bias + output_min_less_zero_point);
const int32_t vmagic_max = (int32_t) float_as_uint32(vmagic_bias + output_max_less_zero_point);
const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point;
Expand Down
4 changes: 2 additions & 2 deletions src/f16-qs8-vcvt/gen/f16-qs8-vcvt-scalar-imagic-u4.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ void xnn_f16_qs8_vcvt_ukernel__scalar_imagic_u4(
const xnn_float16* i = input;
const float vscale = xnn_float16_to_float(params->scalar.scale);
const float vmagic_bias = 12582912.0f;
const float output_min_less_zero_point = (float) ((int32_t) params->scalar.output_min - (int32_t) params->scalar.output_zero_point);
const float output_max_less_zero_point = (float) ((int32_t) params->scalar.output_max - (int32_t) params->scalar.output_zero_point);
const float output_min_less_zero_point = (float) ((int32_t) -128 - (int32_t) params->scalar.output_zero_point);
const float output_max_less_zero_point = (float) ((int32_t) 127 - (int32_t) params->scalar.output_zero_point);
const int32_t vmagic_min = (int32_t) float_as_uint32(vmagic_bias + output_min_less_zero_point);
const int32_t vmagic_max = (int32_t) float_as_uint32(vmagic_bias + output_max_less_zero_point);
const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point;
Expand Down
Loading

0 comments on commit e9975bc

Please sign in to comment.