Skip to content

Commit

Permalink
pointwise_fp for testing
Browse files Browse the repository at this point in the history
  • Loading branch information
yuchen-mei committed May 27, 2024
1 parent 11b82f7 commit 11f3ebf
Show file tree
Hide file tree
Showing 4 changed files with 434 additions and 0 deletions.
52 changes: 52 additions & 0 deletions apps/hardware_benchmarks/apps/pointwise_fp/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Usage:
# make all: compiles all code without running
# generator: create Halide generator
# design: create cpu design
# design-clockwork: create clockwork design
# image: create an image with random data
# run: run cpu design with image
# run-clockwork: run clockwork design with image
# compare: compare two output images
# test: run and compare to cpu output
# eval: evaluate runtime
# clean: remove bin directory

###############################################################################
# MODIFICATION: NONE
#------------------------------------------------------------------------------
# Makefile parameter/variable declarations
#
###############################################################################

include ../../hw_support/Makefile.inc



###############################################################################
# MODIFICATION: OPTIONAL
#------------------------------------------------------------------------------
# App-specific info
#
# TESTNAME : name of the app
# USE_CORE_IR_VALID : whether to generate valid signal for coreir codegen
# HL_TARGET : Halide target
# RDAI_PLATFORM_RUNTIME : RDAI platform runtime to use
###############################################################################

TESTNAME = pointwise_fp
USE_COREIR_VALID = 1
HL_TARGET = host-x86-64-bfloat_hardware-enable_ponds
RDAI_PLATFORM_RUNTIME = clockwork_sim
EXT = mat


###############################################################################
# MODIFICATION : NONE
#------------------------------------------------------------------------------
# Include hardwrae build targets
#
###############################################################################

include ../../hw_support/hardware_targets.mk


22 changes: 22 additions & 0 deletions apps/hardware_benchmarks/apps/pointwise_fp/cgra_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"IOs": {
"inputs": [
{
"name": "hw_input",
"bitwidth": 16,
"shape": [32, 56, 56],
"pixels_per_cycle": 1,
"datafile": "bin/hw_input_stencil.raw"
}
],
"output": {
"name": "hw_output",
"bitwidth": 16,
"pixels_per_cycle": 1,
"datafile": "bin/hw_output.raw"
}
},
"testing": {
"total_cycles": -1
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "Halide.h"

namespace {

using namespace std;

using namespace Halide;
using namespace Halide::ConciseCasts;

class ReluLayer : public Halide::Generator<ReluLayer> {
public:
Input<Buffer<uint16_t>> input{"input", 3};
Output<Buffer<uint16_t>> output{"output", 3};

GeneratorParam<int> out_img{"out_img", 56};
GeneratorParam<int> n_oc{"n_oc", 32};

GeneratorParam<int> unroll{"unroll", 8};

void generate() {
/* THE ALGORITHM */

Var x("x"), y("y"), w("w");
Func hw_input("hw_input"), input_host("input_host"), input_glb("input_glb"), input_cgra("input_cgra");
Func hw_output("hw_output"), output_glb("output_glb"), output_cgra("output_cgra");
Func relu6;

hw_input(w, x, y) = cast<bfloat16_t>(input(w, x, y));

relu6(w, x, y) = hw_input(w, x, y) * cast<bfloat16_t>(2.0f);

hw_output(w, x, y) = relu6(w, x, y);
output(w, x, y) = cast<uint16_t>(hw_output(w, x, y));

/* THE SCHEDULE */
if (get_target().has_feature(Target::CoreIR) ||
get_target().has_feature(Target::HLS)) {

} else if (get_target().has_feature(Target::Clockwork)) {
Var xi,yi, xo,yo;

output.bound(x, 0, out_img);
output.bound(y, 0, out_img);
output.bound(w, 0, n_oc);

//hw_input.compute_root();
hw_output.compute_root();
hw_output
.tile(x,y, xo,yo, xi,yi, out_img, out_img)
.reorder(w, xi, yi, xo, yo)
.hw_accelerate(xi, xo);
hw_output.unroll(w, unroll);

relu6.compute_at(hw_output, xo).unroll(w, unroll);

hw_input.stream_to_accelerator();
hw_input.in().unroll(w, unroll);

} else {

}
}
};

} // namespace

HALIDE_REGISTER_GENERATOR(ReluLayer, pointwise_fp)
Loading

0 comments on commit 11f3ebf

Please sign in to comment.