Skip to content

Commit

Permalink
Added halide implementations for subset of npbench
Browse files Browse the repository at this point in the history
  • Loading branch information
lukastruemper committed May 13, 2022
1 parent 0384ddf commit c070ebc
Show file tree
Hide file tree
Showing 29 changed files with 1,510 additions and 0 deletions.
32 changes: 32 additions & 0 deletions halide/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Prerequisites
*.d

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Fortran module files
*.mod
*.smod

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app
14 changes: 14 additions & 0 deletions halide/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
cmake_minimum_required(VERSION 3.16)
project(NPBench_Halide)

find_package(likwid REQUIRED COMPONENTS marker)
find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()

add_subdirectory(deep_learning)
add_subdirectory(image_processing)
add_subdirectory(weather_stencils)
5 changes: 5 additions & 0 deletions halide/deep_learning/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.16)

add_subdirectory(conv2d_bias)
add_subdirectory(softmax)
add_subdirectory(mlp)
28 changes: 28 additions & 0 deletions halide/deep_learning/conv2d_bias/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.16)
project(conv2d_bias)

# Set up language settings
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS NO)

# Find Halide
find_package(Halide REQUIRED)

# Generator
add_halide_generator(conv2d_bias.generator SOURCES conv2d_bias_generator.cpp)

# Filters
add_halide_library(conv2d_bias FROM conv2d_bias.generator)
add_halide_library(conv2d_bias_auto_schedule FROM conv2d_bias.generator
GENERATOR conv2d_bias
AUTOSCHEDULER Halide::Adams2019)

# Main executable
add_executable(conv2d_bias_process process.cpp)
target_link_libraries(conv2d_bias_process
PRIVATE
likwid::likwid
Halide::ImageIO
conv2d_bias
conv2d_bias_auto_schedule)
70 changes: 70 additions & 0 deletions halide/deep_learning/conv2d_bias/conv2d_bias_generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include "Halide.h"

namespace {

using namespace Halide;

class Conv2dBias : public Halide::Generator<Conv2dBias> {
public:
Input<Buffer<float, 4>> input{"input"};
Input<Buffer<float, 4>> filter{"filter"};
Input<Buffer<float, 1>> bias{"bias"};
Output<Buffer<float, 4>> output{"output"};

void generate() {
const int N = 8, CI = 3, CO = 16, W = 256, H = 256, K = 20;
const int border = K - 1;
/* THE ALGORITHM */

Var x("x"), y("y"), c("c"), n("n");

RDom r(0, CI, 0, K, 0, K);

output(c, x, y, n) = bias(c);
output(c, x, y, n) += filter(r.x, r.y, r.z, c) * input(r.x, x + r.y, y + r.z, n);

/* THE SCHEDULE */

// Ask Halide to compile for this specific size:

input.dim(0).set_bounds(0, CI).set_stride(1);
input.dim(1).set_bounds(0, W).set_stride(CI);
input.dim(2).set_bounds(0, H).set_stride(CI * W);
input.dim(3).set_bounds(0, N).set_stride(CI * W * H);

filter.dim(0).set_bounds(0, CI).set_stride(1);
filter.dim(1).set_bounds(0, K).set_stride(CI);
filter.dim(2).set_bounds(0, K).set_stride(CI * K);
filter.dim(3).set_bounds(0, CO).set_stride(CI * K * K);

bias.dim(0).set_bounds(0, CO).set_stride(1);

output.dim(0).set_bounds(0, CO).set_stride(1);
output.dim(1).set_bounds(0, W - border).set_stride(CO);
output.dim(2).set_bounds(0, H - border).set_stride(CO * (W - border));
output.dim(3).set_bounds(0, N).set_stride(CO * (W - border) * (H - border));

// estimates

input.dim(0).set_estimate(0, CI);
input.dim(1).set_estimate(0, W);
input.dim(2).set_estimate(0, H);
input.dim(3).set_estimate(0, N);

output.dim(0).set_estimate(0, CO);
output.dim(1).set_estimate(0, W - border);
output.dim(2).set_estimate(0, H - border);
output.dim(3).set_estimate(0, N);

filter.dim(0).set_estimate(0, CI);
filter.dim(1).set_estimate(0, K);
filter.dim(2).set_estimate(0, K);
filter.dim(3).set_estimate(0, CO);

bias.dim(0).set_estimate(0, CO);
}
};

} // namespace

HALIDE_REGISTER_GENERATOR(Conv2dBias, conv2d_bias)
107 changes: 107 additions & 0 deletions halide/deep_learning/conv2d_bias/process.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#include <chrono>
#include <cstdio>
#include <omp.h>

#include "conv2d_bias.h"
#include "conv2d_bias_auto_schedule.h"

#include "HalideBuffer.h"

#ifdef LIKWID_PERFMON
#include <likwid-marker.h>
#else
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_SWITCH
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
#endif

using namespace Halide::Runtime;

int main(int argc, char **argv) {
const int N = 8, CI = 3, CO = 16, W = 256, H = 256, K = 20;

const int border = K - 1;

Buffer<float, 4> input(CI, W, H, N);
Buffer<float, 4> filter(CI, K, K, CO);
Buffer<float, 1> bias(CO);
Buffer<float, 4> output(CO, W - border, H - border, N);

for (int n = 0; n < N; n++) {
for (int y = 0; y < H; y++) {
for (int x = 0; x < W; x++) {
for (int c = 0; c < CI; c++) {
input(c, x, y, n) = rand();
}
}
}
}

for (int co = 0; co < CO; co++) {
for (int y = 0; y < K; y++) {
for (int x = 0; x < K; x++) {
for (int ci = 0; ci < CI; ci++) {
filter(ci, x, y, co) = rand();
}
}
}
}

for (int x = 0; x < CO; x++) {
bias(x) = rand();
}

// Timing code

std::vector<double> runtimes;
for (int i = 0; i < 30; i++)
{
Buffer<float, 4> input_ = input.copy();
Buffer<float, 4> filter_ = filter.copy();
Buffer<float, 1> bias_ = bias.copy();
Buffer<float, 4> output_ = output.copy();

double t_start = omp_get_wtime();

conv2d_bias_auto_schedule(input_, filter_, bias_, output_);
output_.device_sync();

double t_end = omp_get_wtime();
runtimes.push_back(t_end - t_start);
}

auto n = runtimes.size() / 2;
nth_element(runtimes.begin(), runtimes.begin()+n, runtimes.end());

auto med = runtimes[n];
if(!(runtimes.size() & 1)) {
auto max_it = max_element(runtimes.begin(), runtimes.begin()+n);
med = (*max_it + med) / 2.0;
}
printf("Runtime: %f\n", med);

Buffer<float, 4> input_ = input.copy();
Buffer<float, 4> filter_ = filter.copy();
Buffer<float, 1> bias_ = bias.copy();
Buffer<float, 4> output_ = output.copy();

LIKWID_MARKER_INIT;
LIKWID_MARKER_THREADINIT;

LIKWID_MARKER_START("Compute");

conv2d_bias_auto_schedule(input_, filter_, bias_, output_);
output_.device_sync();

LIKWID_MARKER_STOP("Compute");

LIKWID_MARKER_CLOSE;

printf("Success!\n");
return 0;
}
28 changes: 28 additions & 0 deletions halide/deep_learning/mlp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.16)
project(mlp)

# Set up language settings
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS NO)

# Find Halide
find_package(Halide REQUIRED)

# Generator
add_halide_generator(mlp.generator SOURCES mlp_generator.cpp)

# Filters
add_halide_library(mlp FROM mlp.generator)
add_halide_library(mlp_auto_schedule FROM mlp.generator
GENERATOR mlp
AUTOSCHEDULER Halide::Adams2019)

# Main executable
add_executable(mlp_process process.cpp)
target_link_libraries(mlp_process
PRIVATE
likwid::likwid
Halide::ImageIO
mlp
mlp_auto_schedule)
Loading

0 comments on commit c070ebc

Please sign in to comment.