restrict one dim quantize scale size, test quantize oom (#5892)

* restrict one dim quantize scale size * sse2 requantize pack8
Tencent · Feb 5, 2025 · ff5b554 · ff5b554
1 parent 956bccd
commit ff5b554
Show file tree

Hide file tree

Showing 9 changed files with 2,061 additions and 2,943 deletions.
diff --git a/src/layer/arm/quantize_arm.cpp b/src/layer/arm/quantize_arm.cpp
diff --git a/src/layer/arm/quantize_arm_asimdhp.cpp b/src/layer/arm/quantize_arm_asimdhp.cpp
diff --git a/src/layer/loongarch/quantize_loongarch.cpp b/src/layer/loongarch/quantize_loongarch.cpp
diff --git a/src/layer/mips/quantize_mips.cpp b/src/layer/mips/quantize_mips.cpp
diff --git a/src/layer/quantize.cpp b/src/layer/quantize.cpp
@@ -46,72 +46,59 @@ static inline signed char float2int8(float v)
     return (signed char)int32;
 }
 
+static void quantize(const float* ptr, signed char* s8ptr, float scale, int size)
+{
+    for (int i = 0; i < size; i++)
+    {
+        *s8ptr = float2int8(*ptr * scale);
+        ptr++;
+        s8ptr++;
+    }
+}
+
 int Quantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
-    int dims = bottom_blob.dims;
+    const int dims = bottom_blob.dims;
+    const int w = bottom_blob.w;
+    const int h = bottom_blob.h;
+    const int channels = bottom_blob.c;
 
     if (dims == 1)
     {
-        int w = bottom_blob.w;
-
         top_blob.create(w, (size_t)1u, opt.blob_allocator);
         if (top_blob.empty())
             return -100;
 
+        // assert scale_data_size == 1
+
         const float* ptr = bottom_blob;
-        signed char* outptr = top_blob;
+        signed char* s8ptr = top_blob;
 
-        if (scale_data_size == 1)
-        {
-            const float scale = scale_data[0];
+        const float scale = scale_data[0];
 
-            #pragma omp parallel for num_threads(opt.num_threads)
-            for (int i = 0; i < w; i++)
-            {
-                outptr[i] = float2int8(ptr[i] * scale);
-            }
-        }
-        else
-        {
-            #pragma omp parallel for num_threads(opt.num_threads)
-            for (int i = 0; i < w; i++)
-            {
-                outptr[i] = float2int8(ptr[i] * scale_data[i]);
-            }
-        }
+        quantize(ptr, s8ptr, scale, w);
     }
 
     if (dims == 2)
     {
-        int w = bottom_blob.w;
-        int h = bottom_blob.h;
-
         top_blob.create(w, h, (size_t)1u, opt.blob_allocator);
         if (top_blob.empty())
             return -100;
 
         #pragma omp parallel for num_threads(opt.num_threads)
         for (int i = 0; i < h; i++)
         {
-            const float* ptr0 = bottom_blob.row(i);
-            signed char* outptr0 = top_blob.row<signed char>(i);
+            const float* ptr = bottom_blob.row(i);
+            signed char* s8ptr = top_blob.row<signed char>(i);
 
             const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[i];
 
-            for (int j = 0; j < w; j++)
-            {
-                outptr0[j] = float2int8(ptr0[j] * scale);
-            }
+            quantize(ptr, s8ptr, scale, w);
         }
     }
 
     if (dims == 3)
     {
-        int w = bottom_blob.w;
-        int h = bottom_blob.h;
-        int channels = bottom_blob.c;
-        int size = w * h;
-
         top_blob.create(w, h, channels, (size_t)1u, opt.blob_allocator);
         if (top_blob.empty())
             return -100;
@@ -120,14 +107,11 @@ int Quantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt)
         for (int q = 0; q < channels; q++)
         {
             const float* ptr = bottom_blob.channel(q);
-            signed char* outptr = top_blob.channel(q);
+            signed char* s8ptr = top_blob.channel(q);
 
             const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[q];
 
-            for (int i = 0; i < size; i++)
-            {
-                outptr[i] = float2int8(ptr[i] * scale);
-            }
+            quantize(ptr, s8ptr, scale, w * h);
         }
     }