Skip to content
This repository has been archived by the owner on Nov 2, 2023. It is now read-only.

Commit

Permalink
Merge pull request #46 from mhaseeb123/fix_1d
Browse files Browse the repository at this point in the history
format 1d stencil codes and add openmp impl
  • Loading branch information
Muhammad Haseeb authored Oct 23, 2023
2 parents 1aef2b2 + 2a0ec55 commit 6711d32
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 159 deletions.
20 changes: 13 additions & 7 deletions apps/1d_stencil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,26 @@ target_include_directories(
PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/../../include
${ARGPARSE_INCLUDE_DIR} ${MDSPAN_INCLUDE_DIR})

add_executable(stencil_omp stencil_omp.cpp)
target_include_directories(
stencil_omp
PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/../../include
${ARGPARSE_INCLUDE_DIR} ${MDSPAN_INCLUDE_DIR})

add_executable(stencil_stdpar stencil_stdpar.cpp)
target_link_libraries(stencil_stdpar stdexec)
target_include_directories(
stencil_stdpar
PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/../../include
${ARGPARSE_INCLUDE_DIR} ${MDSPAN_INCLUDE_DIR})

# TODO, fix cmake
add_executable(stencil_stdexec stencil_stdexec.cpp)
target_link_libraries(stencil_stdexec stdexec)
target_include_directories(
stencil_stdexec
PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/../../include
${ARGPARSE_INCLUDE_DIR} ${MDSPAN_INCLUDE_DIR})
# TODO, fix cmake
add_executable(stencil_stdexec stencil_stdexec.cpp)
target_link_libraries(stencil_stdexec stdexec)
target_include_directories(
stencil_stdexec
PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/../../include
${ARGPARSE_INCLUDE_DIR} ${MDSPAN_INCLUDE_DIR})

if("${STDPAR}" STREQUAL "gpu")
add_executable(stencil_cuda stencil_cuda.cpp)
Expand Down
55 changes: 25 additions & 30 deletions apps/1d_stencil/stencil_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@
struct args_params_t : public argparse::Args {
bool& results = kwarg("results", "print generated results (default: false)")
.set_default(false);
std::uint64_t& nx =
kwarg("nx", "Local x dimension (of each partition)").set_default(10);
std::uint64_t& nt = kwarg("nt", "Number of time steps").set_default(45);
std::uint64_t& np = kwarg("np", "Number of partitions").set_default(10);
std::uint64_t& size = kwarg("size", "Number of elements").set_default(10);
bool& k = kwarg("k", "Heat transfer coefficient").set_default(0.5);
double& dt = kwarg("dt", "Timestep unit (default: 1.0[s])").set_default(1.0);
double& dx = kwarg("dx", "Local x dimension").set_default(1.0);
Expand All @@ -23,51 +21,51 @@ struct args_params_t : public argparse::Args {
bool& time = kwarg("t, time", "print time").set_default(true);
};

using Real_t = double;
///////////////////////////////////////////////////////////////////////////////
// Command-line variables
bool header = true; // print csv heading
constexpr double k = 0.5; // heat transfer coefficient
constexpr double dt = 1.; // time step
constexpr double dx = 1.; // grid spacing
constexpr bool header = true; // print csv heading
constexpr Real_t k = 0.5; // heat transfer coefficient
constexpr Real_t dt = 1.; // time step
constexpr Real_t dx = 1.; // grid spacing

// Our operator
__device__ double heat(double left, double middle, double right) {
__device__ Real_t heat(const Real_t left, const Real_t middle, const Real_t right,
const Real_t k, const Real_t dt, const Real_t dx) {
return middle + (k * dt / (dx * dx)) * (left - 2 * middle + right);
}

__global__ void heat_equation(double* current, double* next, std::size_t size) {
__global__ void heat_equation(Real_t* current, Real_t* next, std::size_t size,
const Real_t k, const Real_t dt, const Real_t dx) {
std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;

if (i < size) {
std::size_t left = (i == 0) ? size - 1 : i - 1;
std::size_t right = (i == size - 1) ? 0 : i + 1;
next[i] = heat(current[left], current[i], current[right]);
next[i] = heat(current[left], current[i], current[right], k, dt, dx);
}
}

int benchmark(args_params_t const& args) {
// Parameters (for simplicity, some are hardcoded)
std::uint64_t np = args.np; // Number of partitions.
std::uint64_t nx = args.nx; // Number of grid points.
std::uint64_t size = args.size; // Number of elements.
std::uint64_t nt = args.nt; // Number of steps.
std::size_t size = np * nx;

double* h_current = nullptr;
double* h_next = nullptr;
Real_t* h_current = nullptr;
Real_t* h_next = nullptr;

// Measure execution time.
Timer timer;

// Memory allocation
if (args.results) {
h_current = new double[size];
h_next = new double[size];
h_current = new Real_t[size];
h_next = new Real_t[size];
}

double* d_current;
double* d_next;
cudaMalloc(&d_current, size * sizeof(double));
cudaMalloc(&d_next, size * sizeof(double));
Real_t* d_current;
Real_t* d_next;
cudaMalloc(&d_current, size * sizeof(Real_t));
cudaMalloc(&d_next, size * sizeof(Real_t));
thrust::sequence(thrust::device, d_current, d_current + size, 0);
thrust::sequence(thrust::device, d_next, d_next + size, 0);

Expand All @@ -77,25 +75,22 @@ int benchmark(args_params_t const& args) {

// Actual time step loop
for (std::size_t t = 0; t < nt; ++t) {
heat_equation<<<blocks, threadsPerBlock>>>(d_current, d_next, size);
heat_equation<<<blocks, threadsPerBlock>>>(d_current, d_next, size, k, dt, dx);
std::swap(d_current, d_next);
}
cudaDeviceSynchronize();
auto time = timer.stop();

if (args.results) {
// Copy result back to host
cudaMemcpy(h_current, d_current, size * sizeof(double),
cudaMemcpy(h_current, d_current, size * sizeof(Real_t),
cudaMemcpyDeviceToHost);

// Print results
for (std::size_t i = 0; i < np; ++i) {
std::cout << "U[" << i << "] = {";
for (std::size_t j = 0; j < nx; ++j) {
std::cout << h_current[i * nx + j] << " ";
}
std::cout << "}\n";
for (std::size_t i = 0; i != size; ++i) {
std::cout << h_current[i] << " ";
}
std::cout << "\n";
// Cleanup
delete[] h_current;
delete[] h_next;
Expand Down
130 changes: 130 additions & 0 deletions apps/1d_stencil/stencil_omp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* MIT License
*
* Copyright (c) 2023 Weile Wei
* Copyright (c) 2023 The Regents of the University of California,
* through Lawrence Berkeley National Laboratory (subject to receipt of any
* required approvals from the U.S. Dept. of Energy).All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "argparse/argparse.hpp"
#include "commons.hpp"
#include <omp.h>

// parameters
struct args_params_t : public argparse::Args {
bool& results = kwarg("results", "print generated results (default: false)")
.set_default(false);
std::uint64_t& nt = kwarg("nt", "Number of time steps").set_default(45);
std::uint64_t& size = kwarg("size", "Number of elements").set_default(10);
int& nthreads = kwarg("nthreads", "Number of openmp threads").set_default(1);
bool& k = kwarg("k", "Heat transfer coefficient").set_default(0.5);
double& dt = kwarg("dt", "Timestep unit (default: 1.0[s])").set_default(1.0);
double& dx = kwarg("dx", "Local x dimension").set_default(1.0);
bool& help = flag("h, help", "print help");
bool& time = kwarg("t, time", "print time").set_default(true);
};

using Real_t = double;
///////////////////////////////////////////////////////////////////////////////
// Command-line variables
constexpr Real_t k = 0.5; // heat transfer coefficient
constexpr Real_t dt = 1.; // time step
constexpr Real_t dx = 1.; // grid spacing

///////////////////////////////////////////////////////////////////////////////
//[stepper_1
struct stepper {
// Our operator
Real_t heat(const Real_t left, const Real_t middle, const Real_t right, const Real_t k = ::k,
const Real_t dt = ::dt, const Real_t dx = ::dx) {
return middle + (k * dt / (dx * dx)) * (left - 2 * middle + right);
}

// do all the work on 'size' data points for 'nt' time steps
[[nodiscard]] std::vector<Real_t> do_work(const std::size_t size, const std::size_t nt, const int nthreads) {
std::vector<Real_t> current(size);
std::vector<Real_t> next(size);

#pragma omp parallel for num_threads(nthreads)
for (std::size_t i = 0; i < size; ++i) {
current[i] = Real_t(i);
}

// Actual time step loop
for (std::size_t t = 0; t != nt; ++t) {
// OpenMP parallel for loop
#pragma omp parallel for num_threads(nthreads)
for (std::size_t i = 0; i < size; ++i) {
std::size_t left = (i == 0) ? size - 1 : i - 1;
std::size_t right = (i == size - 1) ? 0 : i + 1;
next[i] = heat(current[left], current[i], current[right], k, dt, dx);
}
std::swap(current, next);
}

return current;
}
};

///////////////////////////////////////////////////////////////////////////////
int benchmark(args_params_t const& args) {
std::uint64_t size = args.size; // Number of elements.
std::uint64_t nt = args.nt; // Number of steps.
int nthreads = args.nthreads;

// Create the stepper object
stepper step;

// Measure execution time.
Timer timer;

auto solution = step.do_work(size, nt, nthreads);
auto time = timer.stop();

// Print the final solution
if (args.results) {
for (const auto& ele: solution) {
std::cout << ele << " ";
}
std::cout << "\n";
}

if (args.time) {
std::cout << "Duration: " << time << " ms."
<< "\n";
}

return 0;
}

int main(int argc, char* argv[]) {
// parse params
args_params_t args = argparse::parse<args_params_t>(argc, argv);
// see if help wanted
if (args.help) {
args.print(); // prints all variables
return 0;
}

benchmark(args);

return 0;
}
Loading

0 comments on commit 6711d32

Please sign in to comment.