Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BLAS scal and trsv benchmarks #201

Merged
merged 6 commits into from
Mar 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions benchmarks/linear_algebra/blas/level1/scal/scal_generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#include <tiramisu/tiramisu.h>
#include <string>
#include "benchmarks.h"

using namespace tiramisu;

/*
* Benchmark for BLAS DSCAL
* X = alpha*X
*
* inputs:
* --------
* - n: size of vector X.
* - alpha: scale factor.
* - X: vector to scale.
*
* outputs:
* ---------
* The operation is done inplace.
*/

#if TIRAMISU_XLARGE || TIRAMISU_LARGE
#define VECTORIZE_V 512
#elif TIRAMISU_MEDIUM
#define VECTORIZE_V 256
#else
#define VECTORIZE_V 16
#endif

void generate_function(std::string name)
{
tiramisu::init();

// -------------------------------------------------------
// Layer I
// -------------------------------------------------------
function scal(name);

computation SIZES("{SIZES[0]}", expr(), false, p_int32, &scal);
computation alpha("{alpha[0]}", expr(), false, p_float64, &scal);
computation X("[M]->{X[i]: 0<=i<M}", expr(), false, p_float64, &scal);

constant M_cst("M", SIZES(0), p_int32, true, NULL, 0, &scal);

var i("i");
computation result("[M]->{result[i]: 0<=i<M}", alpha(0)*X(i), true, p_float64, &scal);

// -------------------------------------------------------
// Layer II
// -------------------------------------------------------
result.vectorize(i, VECTORIZE_V);

// -------------------------------------------------------
// Layer III
// -------------------------------------------------------
buffer b_SIZES("b_SIZES", {expr(1)}, p_int32, a_input, &scal);
buffer b_alpha("b_alpha", {expr(1)}, p_float64, a_input, &scal);
buffer b_X("b_X", {M_cst}, p_float64, a_output, &scal);

SIZES.set_access("{SIZES[0]->b_SIZES[0]}");
alpha.set_access("{alpha[0]->b_alpha[0]}");
X.set_access("{X[i]->b_X[i]}");

result.set_access("{result[i]->b_X[i]}");

// -------------------------------------------------------
// Code Generation
// -------------------------------------------------------
scal.set_arguments({&b_SIZES, &b_alpha, &b_X});
scal.gen_time_space_domain();
scal.gen_isl_ast();
scal.gen_halide_stmt();
scal.gen_halide_obj("generated_" + name + ".o");
}

int main(int argc, char** argv)
{
generate_function("scal");
return 0;
}
94 changes: 94 additions & 0 deletions benchmarks/linear_algebra/blas/level1/scal/scal_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#include <Halide.h>
#include <tiramisu/tiramisu.h>
#include <iostream>
#include "generated_scal.o.h"
#include "benchmarks.h"

#define nrow SIZE

int scal_ref(int n, double alpha, double* X)
{
for (int i = 0; i < n; ++i)
X[i] = alpha * X[i];

return 0;
}

int main(int argc, char** argv)
{
std::vector<std::chrono::duration<double, std::milli>> duration_vector_1, duration_vector_2;

bool run_ref = false, run_tiramisu = false;

const char* env_ref = std::getenv("RUN_REF");
if (env_ref != NULL && env_ref[0] == '1')
run_ref = true;

const char* env_tiramisu = std::getenv("RUN_TIRAMISU");
if (env_tiramisu != NULL && env_tiramisu[0] == '1')
run_tiramisu = true;

// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------

double alpha = 2.5;

Halide::Buffer<int> SIZES(1);
SIZES(0) = nrow;

Halide::Buffer<double> b_alpha(1);
b_alpha(0) = alpha;

Halide::Buffer<double> b_X(nrow), b_X_ref(nrow);

// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------

{
for (int i = 0; i < NB_TESTS; ++i)
{
init_buffer(b_X_ref, (double)1);
auto start = std::chrono::high_resolution_clock::now();

if (run_ref)
scal_ref(nrow, alpha, b_X_ref.data());

auto end = std::chrono::high_resolution_clock::now();
duration_vector_1.push_back(end - start);
}
}

{
for (int i = 0; i < NB_TESTS; ++i)
{
init_buffer(b_X, (double)1);
auto start = std::chrono::high_resolution_clock::now();

if (run_tiramisu)
scal(SIZES.raw_buffer(), b_alpha.raw_buffer(), b_X.raw_buffer());

auto end = std::chrono::high_resolution_clock::now();
duration_vector_2.push_back(end - start);
}
}

print_time("performance_cpu.csv", "scal",
{"Ref", "Tiramisu"},
{median(duration_vector_1), median(duration_vector_2)});

if (CHECK_CORRECTNESS && run_ref && run_tiramisu)
compare_buffers("scal", b_X_ref, b_X);

if (PRINT_OUTPUT)
{
std::cout << "Tiramisu " << std::endl;
print_buffer(b_X);

std::cout << "Reference " << std::endl;
print_buffer(b_X_ref);
}

return 0;
}
102 changes: 102 additions & 0 deletions benchmarks/linear_algebra/blas/level2/trsv/trsv_generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#include <tiramisu/tiramisu.h>
#include <string>
#include "benchmarks.h"

using namespace tiramisu;

/*
* Benchmark for BLAS DTRSV
*
* Resolve the linear system :
* AX = b
* where A is an upper triangular matrix.
*
* inputs:
* --------
* - n: order of matrix A.
* - A: matrix of size nxn.
* - b: a vector of size n.
* right-hand side of the linear system.
*
* outputs:
* ---------
* - X: a vector of size n.
* solution of the linear system.
*
* Algorithm:
* for (i = 0; i < n; ++i)
* forward = 0;
* for (j = 0; j < i; ++j)
* forward += A[i][j] * X[j]
*
* X[i] = (b[i] - forward) / A[i][i]
*/

void generate_function(std::string name)
{
tiramisu::init();

// -------------------------------------------------------
// Layer I
// -------------------------------------------------------
function trsv(name);

// Inputs
computation SIZES("{SIZES[0]}", expr(), false, p_int32, &trsv);

computation A("[N]->{A[i,j]: 0<=i<N and 0<=j<N}", expr(), false, p_float64, &trsv);
computation b("[N]->{b[i]: 0<=i<N}", expr(), false, p_float64, &trsv);

constant N_cst("N", SIZES(0), p_int32, true, NULL, 0, &trsv);

// Outputs
computation X("[N]->{X[i]: 0<=i<N}", expr(), true, p_float64, &trsv);
computation forward_init("[N]->{forward_init[i]: 0<=i<N}",
expr((double)0), true, p_float64, &trsv);

computation forward("[N]->{forward[i,j]: 0<=i<N and 0<=j<i}",
expr(), true, p_float64, &trsv);

var i("i"), j("j");
forward.set_expression(forward(i, j-1) + A(i, j)*X(j));
X.set_expression((b(i) - forward(i, i)) / A(i, i));

// -------------------------------------------------------
// Layer II
// -------------------------------------------------------
forward.after(forward_init, i);
X.after(forward, i);

// -------------------------------------------------------
// Layer III
// -------------------------------------------------------
buffer b_SIZES("b_SIZES", {expr(1)}, p_int32, a_input, &trsv);
buffer b_A("b_A", {N_cst, N_cst}, p_float64, a_input, &trsv);
buffer b_b("b_b", {N_cst}, p_float64, a_input, &trsv);

buffer b_X("b_X", {N_cst}, p_float64, a_output, &trsv);
buffer b_forward("b_forward", {expr(1)}, p_float64, a_temporary, &trsv);

SIZES.set_access("{SIZES[0]->b_SIZES[0]}");
A.set_access("{A[i,j]->b_A[i,j]}");
b.set_access("{b[i]->b_b[i]}");

X.set_access("{X[i]->b_X[i]}");
forward_init.set_access("{forward_init[i]->b_forward[0]}");
forward.set_access("{forward[i,j]->b_forward[0]}");

// -------------------------------------------------------
// Code Generation
// -------------------------------------------------------
trsv.set_arguments({&b_SIZES, &b_A, &b_b, &b_X});
trsv.gen_time_space_domain();
trsv.gen_isl_ast();
trsv.gen_halide_stmt();
trsv.gen_halide_obj("generated_" + name + ".o");
}

int main(int argc, char** argv)
{
generate_function("trsv");
return 0;
}
114 changes: 114 additions & 0 deletions benchmarks/linear_algebra/blas/level2/trsv/trsv_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#include <Halide.h>
#include <tiramisu/tiramisu.h>
#include <iostream>
#include "generated_trsv.o.h"
#include "benchmarks.h"

#define MAT_N N

int trsv_ref(int n, double* const A, double* const b, double* X)
{
double forward;

for (int i = 0; i < n; ++i) {
forward = 0;
for (int j = 0; j < i; ++j)
forward += A[i*n + j]*X[j];

X[i] = (b[i] - forward) / A[i*n + i];
}

return 0;
}

int main(int argc, char** argv)
{
std::vector<std::chrono::duration<double, std::milli>> duration_vector_1, duration_vector_2;

bool run_ref = false, run_tiramisu = false;

const char* env_ref = std::getenv("RUN_REF");
if (env_ref != NULL && env_ref[0] == '1')
run_ref = true;

const char* env_tiramisu = std::getenv("RUN_TIRAMISU");
if (env_tiramisu != NULL && env_tiramisu[0] == '1')
run_tiramisu = true;

// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------

Halide::Buffer<int> SIZES(1);
SIZES(0) = MAT_N;

Halide::Buffer<double> b_A(MAT_N, MAT_N);
Halide::Buffer<double> b_b(MAT_N);
Halide::Buffer<double> b_X(MAT_N), b_X_ref(MAT_N);

/*
* The example here is of the form :
* | 1 0 0 ... 0 | |X1| |1|
* | 1 2 0 ... 0 | |X2| |2|
* | 1 2 3 ... 0 | |X3| = |3|
* | ........... | |..| |.|
* | 1 2 3 ... N | |XN| |N|
*
* The solutions are of the form : Xk = 1/k
*/
init_buffer(b_A, (double)0);
for (int i = 0; i < MAT_N; ++i)
for (int j = 0; j <= i; ++j)
b_A(j, i) = j+1;

for (int i = 0; i < MAT_N; ++i)
b_b(i) = i+1;

// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------

{
for (int i = 0; i < NB_TESTS; ++i)
{
auto start = std::chrono::high_resolution_clock::now();

if (run_ref)
trsv_ref(MAT_N, b_A.data(), b_b.data(), b_X_ref.data());

auto end = std::chrono::high_resolution_clock::now();
duration_vector_1.push_back(end - start);
}
}

{
for (int i = 0; i < NB_TESTS; ++i)
{
auto start = std::chrono::high_resolution_clock::now();

if (run_tiramisu)
trsv(SIZES.raw_buffer(), b_A.raw_buffer(), b_b.raw_buffer(), b_X.raw_buffer());

auto end = std::chrono::high_resolution_clock::now();
duration_vector_2.push_back(end - start);
}
}

print_time("performance_cpu.csv", "trsv",
{"Ref", "Tiramisu"},
{median(duration_vector_1), median(duration_vector_2)});

if (CHECK_CORRECTNESS && run_ref && run_tiramisu)
compare_buffers("trsv", b_X_ref, b_X);

if (PRINT_OUTPUT)
{
std::cout << "Tiramisu " << std::endl;
print_buffer(b_X);

std::cout << "Reference " << std::endl;
print_buffer(b_X_ref);
}

return 0;
}