Skip to content

Commit

Permalink
w
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Jan 25, 2025
1 parent 7aa9fcf commit 1cf015d
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 18 deletions.
12 changes: 6 additions & 6 deletions src/layer/arm/requantize_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
for (; i + 7 < size; i += 8)
{
float32x4_t _v0 = vcvtq_f32_s32(vld1q_s32(intptr));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32((intptr + 4)));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32(intptr + 4));
_v0 = vmulq_f32(_v0, _scale0);
_v1 = vmulq_f32(_v1, _scale1);
vst1_s8(ptr, float2int8relu(_v0, _v1));
Expand Down Expand Up @@ -158,7 +158,7 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
for (; i + 7 < size; i += 8)
{
float32x4_t _v0 = vcvtq_f32_s32(vld1q_s32(intptr));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32((intptr + 4)));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32(intptr + 4));
#if __aarch64__
_v0 = vfmaq_f32(_bias0, _v0, _scale0);
_v1 = vfmaq_f32(_bias1, _v1, _scale1);
Expand Down Expand Up @@ -265,7 +265,7 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
for (; i + 7 < size; i += 8)
{
float32x4_t _v0 = vcvtq_f32_s32(vld1q_s32(intptr));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32((intptr + 4)));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32(intptr + 4));
_v0 = vmulq_f32(_v0, _scale0);
_v1 = vmulq_f32(_v1, _scale1);
vst1_s8(ptr, float2int8leakyrelu(_v0, _v1, _slope));
Expand Down Expand Up @@ -326,7 +326,7 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
for (; i + 7 < size; i += 8)
{
float32x4_t _v0 = vcvtq_f32_s32(vld1q_s32(intptr));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32((intptr + 4)));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32(intptr + 4));
#if __aarch64__
_v0 = vfmaq_f32(_bias0, _v0, _scale0);
_v1 = vfmaq_f32(_bias1, _v1, _scale1);
Expand Down Expand Up @@ -433,7 +433,7 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
for (; i + 7 < size; i += 8)
{
float32x4_t _v0 = vcvtq_f32_s32(vld1q_s32(intptr));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32((intptr + 4)));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32(intptr + 4));
_v0 = vmulq_f32(_v0, _scale_in0);
_v1 = vmulq_f32(_v1, _scale_in1);
_v0 = activation_ps(_v0, activation_type, activation_params);
Expand Down Expand Up @@ -494,7 +494,7 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
for (; i + 7 < size; i += 8)
{
float32x4_t _v0 = vcvtq_f32_s32(vld1q_s32(intptr));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32((intptr + 4)));
float32x4_t _v1 = vcvtq_f32_s32(vld1q_s32(intptr + 4));
#if __aarch64__
_v0 = vfmaq_f32(_bias0, _v0, _scale_in0);
_v1 = vfmaq_f32(_bias1, _v1, _scale_in1);
Expand Down
12 changes: 6 additions & 6 deletions src/layer/loongarch/requantize_loongarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
{
__builtin_prefetch(intptr + 32);
__m128 _v0 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr, 0));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld((intptr + 4, 0)));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr + 4, 0));
_v0 = __lsx_vfmul_s(_v0, _scale0);
_v1 = __lsx_vfmul_s(_v1, _scale1);
*((int64_t*)ptr) = float2int8relu(_v0, _v1);
Expand Down Expand Up @@ -159,7 +159,7 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
{
__builtin_prefetch(intptr + 32);
__m128 _v0 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr, 0));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld((intptr + 4, 0)));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr + 4, 0));
_v0 = __lsx_vfmadd_s(_v0, _scale0, _bias0);
_v1 = __lsx_vfmadd_s(_v1, _scale1, _bias1);
*((int64_t*)ptr) = float2int8relu(_v0, _v1);
Expand Down Expand Up @@ -258,7 +258,7 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
{
__builtin_prefetch(intptr + 32);
__m128 _v0 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr, 0));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld((intptr + 4, 0)));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr + 4, 0));
_v0 = __lsx_vfmul_s(_v0, _scale0);
_v1 = __lsx_vfmul_s(_v1, _scale1);
*((int64_t*)ptr) = float2int8leakyrelu(_v0, _v1, _slope);
Expand Down Expand Up @@ -320,7 +320,7 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
{
__builtin_prefetch(intptr + 32);
__m128 _v0 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr, 0));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld((intptr + 4, 0)));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr + 4, 0));
_v0 = __lsx_vfmadd_s(_v0, _scale0, _bias0);
_v1 = __lsx_vfmadd_s(_v1, _scale1, _bias1);
*((int64_t*)ptr) = float2int8leakyrelu(_v0, _v1, _slope);
Expand Down Expand Up @@ -419,7 +419,7 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
{
__builtin_prefetch(intptr + 32);
__m128 _v0 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr, 0));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld((intptr + 4, 0)));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr + 4, 0));
_v0 = __lsx_vfmul_s(_v0, _scale_in0);
_v1 = __lsx_vfmul_s(_v1, _scale_in1);
_v0 = activation_ps(_v0, activation_type, activation_params);
Expand Down Expand Up @@ -481,7 +481,7 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
{
__builtin_prefetch(intptr + 32);
__m128 _v0 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr, 0));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld((intptr + 4, 0)));
__m128 _v1 = (__m128)__msa_ffint_s_w(__lsx_vld(intptr + 4, 0));
_v0 = __lsx_vfmadd_s(_v0, _scale_in0, _bias0);
_v1 = __lsx_vfmadd_s(_v1, _scale_in1, _bias1);
_v0 = activation_ps(_v0, activation_type, activation_params);
Expand Down
12 changes: 6 additions & 6 deletions src/layer/mips/requantize_mips.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
{
__builtin_prefetch(intptr + 32);
v4f32 _v0 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr, 0));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w((intptr + 4, 0)));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr + 4, 0));
_v0 = __msa_fmul_w(_v0, _scale0);
_v1 = __msa_fmul_w(_v1, _scale1);
*((int64_t*)ptr) = float2int8relu(_v0, _v1);
Expand Down Expand Up @@ -159,7 +159,7 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
{
__builtin_prefetch(intptr + 32);
v4f32 _v0 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr, 0));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w((intptr + 4, 0)));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr + 4, 0));
_v0 = __msa_fmadd_w(_bias0, _v0, _scale0);
_v1 = __msa_fmadd_w(_bias1, _v1, _scale1);
*((int64_t*)ptr) = float2int8relu(_v0, _v1);
Expand Down Expand Up @@ -258,7 +258,7 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
{
__builtin_prefetch(intptr + 32);
v4f32 _v0 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr, 0));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w((intptr + 4, 0)));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr + 4, 0));
_v0 = __msa_fmul_w(_v0, _scale0);
_v1 = __msa_fmul_w(_v1, _scale1);
*((int64_t*)ptr) = float2int8leakyrelu(_v0, _v1, _slope);
Expand Down Expand Up @@ -320,7 +320,7 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
{
__builtin_prefetch(intptr + 32);
v4f32 _v0 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr, 0));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w((intptr + 4, 0)));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr + 4, 0));
_v0 = __msa_fmadd_w(_bias0, _v0, _scale0);
_v1 = __msa_fmadd_w(_bias1, _v1, _scale1);
*((int64_t*)ptr) = float2int8leakyrelu(_v0, _v1, _slope);
Expand Down Expand Up @@ -419,7 +419,7 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
{
__builtin_prefetch(intptr + 32);
v4f32 _v0 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr, 0));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w((intptr + 4, 0)));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr + 4, 0));
_v0 = __msa_fmul_w(_v0, _scale_in0);
_v1 = __msa_fmul_w(_v1, _scale_in1);
_v0 = activation_ps(_v0, activation_type, activation_params);
Expand Down Expand Up @@ -481,7 +481,7 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
{
__builtin_prefetch(intptr + 32);
v4f32 _v0 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr, 0));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w((intptr + 4, 0)));
v4f32 _v1 = (v4f32)__msa_ffint_s_w(__msa_ld_w(intptr + 4, 0));
_v0 = __msa_fmadd_w(_bias0, _v0, _scale_in0);
_v1 = __msa_fmadd_w(_bias1, _v1, _scale_in1);
_v0 = activation_ps(_v0, activation_type, activation_params);
Expand Down

0 comments on commit 1cf015d

Please sign in to comment.