From b02e1b84deaa917358c7cdd5f3ec6ee137a50df1 Mon Sep 17 00:00:00 2001 From: yuchen-mei Date: Sat, 1 Jun 2024 11:38:26 -0700 Subject: [PATCH] fix preloaded kernels to avoid mult PE optimization --- .../depthwise_conv_preload_fp_generator.cpp | 4 ++-- .../apps/depthwise_conv_preload_fp/process.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/depthwise_conv_preload_fp_generator.cpp b/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/depthwise_conv_preload_fp_generator.cpp index 67c87fd0b..1a98d9a3d 100644 --- a/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/depthwise_conv_preload_fp_generator.cpp +++ b/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/depthwise_conv_preload_fp_generator.cpp @@ -43,8 +43,8 @@ class DepthwiseConv : public Halide::Generator { // create preload kernel const int block_size = int(ksize); Func kernel_preload("kernel_preload"); - float step = 0.03f; - Expr value = cast(-1.0f + step * (y * block_size + x)); + float step = 0.3f; + Expr value = cast(2.8f + step * (y * block_size + x)); kernel_preload(c, x, y) = cast(value); // DepthwiseConv Expression diff --git a/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/process.cpp b/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/process.cpp index 2955de472..16d20391d 100644 --- a/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/process.cpp +++ b/apps/hardware_benchmarks/apps/depthwise_conv_preload_fp/process.cpp @@ -233,14 +233,14 @@ int main( int argc, char **argv ) { // Kernel generation similar to the preload kernel in the Halide generator int block_size = ksize; - float step = 0.03f; + float step = 0.3f; // Assuming the kernel buffer dimensions are (C, block_size, block_size) Buffer kernel_stencil(C, block_size, block_size); // Populate the kernel buffer for (int c = 0; c < C; ++c) { for (int y = 0; y < block_size; ++y) { for (int x = 0; x < block_size; ++x) { - float value = -1.0f + step * (y * block_size + x); + float value = 2.8f + step * (y * block_size + x); kernel_stencil(c, x, y) = float_to_bfloat16_process(value); } }