Skip to content

Commit

Permalink
fix preloaded kernels to avoid mult PE optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
yuchen-mei committed Jun 1, 2024
1 parent 11f3ebf commit b02e1b8
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ class DepthwiseConv : public Halide::Generator<DepthwiseConv> {
// create preload kernel
const int block_size = int(ksize);
Func kernel_preload("kernel_preload");
float step = 0.03f;
Expr value = cast<bfloat16_t>(-1.0f + step * (y * block_size + x));
float step = 0.3f;
Expr value = cast<bfloat16_t>(2.8f + step * (y * block_size + x));
kernel_preload(c, x, y) = cast<bfloat16_t>(value);

// DepthwiseConv Expression
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,14 @@ int main( int argc, char **argv ) {

// Kernel generation similar to the preload kernel in the Halide generator
int block_size = ksize;
float step = 0.03f;
float step = 0.3f;
// Assuming the kernel buffer dimensions are (C, block_size, block_size)
Buffer<uint16_t> kernel_stencil(C, block_size, block_size);
// Populate the kernel buffer
for (int c = 0; c < C; ++c) {
for (int y = 0; y < block_size; ++y) {
for (int x = 0; x < block_size; ++x) {
float value = -1.0f + step * (y * block_size + x);
float value = 2.8f + step * (y * block_size + x);
kernel_stencil(c, x, y) = float_to_bfloat16_process(value);
}
}
Expand Down

0 comments on commit b02e1b8

Please sign in to comment.