fix preloaded kernels to avoid mult PE optimization

StanfordAHA · Jun 1, 2024 · b02e1b8 · b02e1b8
1 parent 11f3ebf
commit b02e1b8
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/...ardware_benchmarks/apps/depthwise_conv_preload_fp/depthwise_conv_preload_fp_generator.cpp b/...ardware_benchmarks/apps/depthwise_conv_preload_fp/depthwise_conv_preload_fp_generator.cpp
@@ -43,8 +43,8 @@ class DepthwiseConv : public Halide::Generator<DepthwiseConv> {
         // create preload kernel
         const int block_size = int(ksize);
         Func kernel_preload("kernel_preload");
-        float step = 0.03f;
-        Expr value = cast<bfloat16_t>(-1.0f + step * (y * block_size + x));
+        float step = 0.3f;
+        Expr value = cast<bfloat16_t>(2.8f + step * (y * block_size + x));
         kernel_preload(c, x, y) = cast<bfloat16_t>(value);
 
         // DepthwiseConv Expression

diff --git a/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/process.cpp b/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/process.cpp
@@ -233,14 +233,14 @@ int main( int argc, char **argv ) {
 
     // Kernel generation similar to the preload kernel in the Halide generator
     int block_size = ksize;
-    float step = 0.03f;
+    float step = 0.3f;
     // Assuming the kernel buffer dimensions are (C, block_size, block_size)
     Buffer<uint16_t> kernel_stencil(C, block_size, block_size);
     // Populate the kernel buffer
     for (int c = 0; c < C; ++c) {
         for (int y = 0; y < block_size; ++y) {
             for (int x = 0; x < block_size; ++x) {
-                float value = -1.0f + step * (y * block_size + x);
+                float value = 2.8f + step * (y * block_size + x);
                 kernel_stencil(c, x, y) = float_to_bfloat16_process(value);
             }
         }