Skip to content

Commit

Permalink
restrict one dim quantize scale size, test quantize oom (#5892)
Browse files Browse the repository at this point in the history
* restrict one dim quantize scale size

* sse2 requantize pack8
  • Loading branch information
nihui authored Feb 5, 2025
1 parent 956bccd commit ff5b554
Show file tree
Hide file tree
Showing 9 changed files with 2,061 additions and 2,943 deletions.
1,336 changes: 597 additions & 739 deletions src/layer/arm/quantize_arm.cpp

Large diffs are not rendered by default.

1,202 changes: 475 additions & 727 deletions src/layer/arm/quantize_arm_asimdhp.cpp

Large diffs are not rendered by default.

624 changes: 226 additions & 398 deletions src/layer/loongarch/quantize_loongarch.cpp

Large diffs are not rendered by default.

624 changes: 226 additions & 398 deletions src/layer/mips/quantize_mips.cpp

Large diffs are not rendered by default.

64 changes: 24 additions & 40 deletions src/layer/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,72 +46,59 @@ static inline signed char float2int8(float v)
return (signed char)int32;
}

static void quantize(const float* ptr, signed char* s8ptr, float scale, int size)
{
for (int i = 0; i < size; i++)
{
*s8ptr = float2int8(*ptr * scale);
ptr++;
s8ptr++;
}
}

int Quantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
int dims = bottom_blob.dims;
const int dims = bottom_blob.dims;
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int channels = bottom_blob.c;

if (dims == 1)
{
int w = bottom_blob.w;

top_blob.create(w, (size_t)1u, opt.blob_allocator);
if (top_blob.empty())
return -100;

// assert scale_data_size == 1

const float* ptr = bottom_blob;
signed char* outptr = top_blob;
signed char* s8ptr = top_blob;

if (scale_data_size == 1)
{
const float scale = scale_data[0];
const float scale = scale_data[0];

#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < w; i++)
{
outptr[i] = float2int8(ptr[i] * scale);
}
}
else
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < w; i++)
{
outptr[i] = float2int8(ptr[i] * scale_data[i]);
}
}
quantize(ptr, s8ptr, scale, w);
}

if (dims == 2)
{
int w = bottom_blob.w;
int h = bottom_blob.h;

top_blob.create(w, h, (size_t)1u, opt.blob_allocator);
if (top_blob.empty())
return -100;

#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < h; i++)
{
const float* ptr0 = bottom_blob.row(i);
signed char* outptr0 = top_blob.row<signed char>(i);
const float* ptr = bottom_blob.row(i);
signed char* s8ptr = top_blob.row<signed char>(i);

const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[i];

for (int j = 0; j < w; j++)
{
outptr0[j] = float2int8(ptr0[j] * scale);
}
quantize(ptr, s8ptr, scale, w);
}
}

if (dims == 3)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

top_blob.create(w, h, channels, (size_t)1u, opt.blob_allocator);
if (top_blob.empty())
return -100;
Expand All @@ -120,14 +107,11 @@ int Quantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt)
for (int q = 0; q < channels; q++)
{
const float* ptr = bottom_blob.channel(q);
signed char* outptr = top_blob.channel(q);
signed char* s8ptr = top_blob.channel(q);

const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[q];

for (int i = 0; i < size; i++)
{
outptr[i] = float2int8(ptr[i] * scale);
}
quantize(ptr, s8ptr, scale, w * h);
}
}

Expand Down
Loading

0 comments on commit ff5b554

Please sign in to comment.