Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update benchmarks #53

Merged
merged 4 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ jobs:
platform: x64

- name: configure gcc
run: cmake -S . -B build -DTP_BUILD_EXAMPLES=OFF -DTP_BUILD_BENCHMARKS=OFF -DCMAKE_BUILD_TYPE=Debug
run: cmake -S . -B build -DTP_BUILD_EXAMPLES=OFF -DTP_BUILD_BENCHMARKS=OFF -DTP_THREAD_SANITIZER=OFF -DCMAKE_BUILD_TYPE=Debug

- name: configure clang
run: cmake -S . -B build-clang -DTP_BUILD_EXAMPLES=OFF -DTP_BUILD_BENCHMARKS=OFF -DCMAKE_BUILD_TYPE=Debug
run: cmake -S . -B build-clang -DTP_BUILD_EXAMPLES=OFF -DTP_BUILD_BENCHMARKS=OFF -DTP_THREAD_SANITIZER=OFF -DCMAKE_BUILD_TYPE=Debug
env:
CC: clang
CXX: clang++
Expand Down
9 changes: 5 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,19 +137,20 @@ install(FILES ${PROJECT_BINARY_DIR}/include/thread_pool/version.h
option(TP_BUILD_TESTS "Turn on to build unit tests." ON)
option(TP_BUILD_EXAMPLES "Turn on to build examples." ON)
option(TP_BUILD_BENCHMARKS "Turn on to build benchmarks." ON)
option(TP_THREAD_SANITIZER "Turn on to build with thread sanitizer." OFF)

if(${TP_BUILD_TESTS} OR ${TP_BUILD_EXAMPLES} OR ${TP_BUILD_BENCHMARKS})
if(TP_BUILD_TESTS OR TP_BUILD_EXAMPLES OR TP_BUILD_BENCHMARKS)
# see https://github.com/TheLartians/CPM.cmake for more info
include(cmake/CPM.cmake)
endif()

if(${TP_BUILD_TESTS})
if(TP_BUILD_TESTS)
enable_testing()
add_subdirectory(test)
endif()
if(${TP_BUILD_EXAMPLES})
if(TP_BUILD_EXAMPLES)
add_subdirectory(examples)
endif()
if(${TP_BUILD_BENCHMARKS})
if(TP_BUILD_BENCHMARKS)
add_subdirectory(benchmark)
endif()
33 changes: 26 additions & 7 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
"binaryDir": "${sourceDir}/out/build/${presetName}",
"installDir": "${sourceDir}/out/install/${presetName}",
"cacheVariables": {
"CMAKE_C_COMPILER": "cl",
"CMAKE_CXX_COMPILER": "cl"
},
"condition": {
Expand All @@ -40,15 +39,17 @@
"hidden": true,
"inherits": "linux-base",
"cacheVariables": {
"CMAKE_C_COMPILER": "gcc",
"CMAKE_CXX_COMPILER": "g++"
}
},
{
"name": "gcc-debug",
"inherits": "gcc-base",
"displayName": "GCC Debug",
"cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" }
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"TP_THREAD_SANITIZER": "ON"
}
},
{
"name": "gcc-release",
Expand All @@ -61,22 +62,33 @@
"hidden": true,
"inherits": "linux-base",
"cacheVariables": {
"CMAKE_C_COMPILER": "clang",
"CMAKE_CXX_COMPILER": "clang++"
}
},
{
"name": "clang-debug",
"inherits": "clang-base",
"displayName": "Clang Debug",
"cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" }
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"TP_THREAD_SANITIZER": "ON"
}
},
{
"name": "clang-release",
"inherits": "clang-base",
"displayName": "Clang Release",
"cacheVariables": { "CMAKE_BUILD_TYPE": "Release" }
},
{
"name": "clang-release-with-debug-info",
"inherits": "clang-base",
"displayName": "Clang RelWithDebInfo",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "RelWithDebInfo",
"TP_THREAD_SANITIZER": "ON"
}
},
{
"name": "x64-debug",
"displayName": "x64 Debug",
Expand Down Expand Up @@ -112,10 +124,17 @@
{
"name": "x64-release",
"displayName": "x64 Release",
"description": "Target Windows (64-bit) with the Visual Studio development environment. (RelWithDebInfo)",
"description": "Target Windows (64-bit) with the Visual Studio development environment. (Release)",
"inherits": "x64-debug",
"cacheVariables": { "CMAKE_BUILD_TYPE": "Release" }
},
{
"name": "x64-release-with-debug",
"displayName": "x64 Release w/Debug",
"description": "Target Windows (64-bit) with the Visual Studio development environment. (RelWithDebInfo)",
"inherits": "x64-debug",
"cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" }
},
{
"name": "x64-windows-vcpkg",
"displayName": "Install only",
Expand All @@ -125,7 +144,7 @@
"CMAKE_BUILD_TYPE": "Release",
"TP_BUILD_TESTS": "OFF",
"TP_BUILD_EXAMPLES": "OFFF",
"TP_BUILD_BENCHMARKS":"OFF"
"TP_BUILD_BENCHMARKS": "OFF"
}
}
]
Expand Down
11 changes: 11 additions & 0 deletions benchmark/include/utilities.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <nanobench.h>

#include <algorithm>
#include <cmath>
#include <iterator>
Expand Down Expand Up @@ -53,3 +55,12 @@ template <typename T, typename Rng = ankerl::nanobench::Rng>

return computations;
}

template <std::ranges::range Seq, typename ValueType = std::ranges::range_value_t<Seq>>
requires std::is_integral_v<ValueType>
void generate_random_data(Seq&& seq) {
static ankerl::nanobench::Rng rng(std::random_device{}());
std::uniform_int_distribution<ValueType> distribution(std::numeric_limits<ValueType>::min(),
std::numeric_limits<ValueType>::max());
std::ranges::generate(seq, [&] { return distribution(rng); });
}
137 changes: 137 additions & 0 deletions benchmark/source/count_primes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#include <doctest/doctest.h>
#include <nanobench.h>
#include <thread_pool/thread_pool.h>
#include <utilities.h>

#include <BS_thread_pool_light.hpp>
#include <iostream>
#include <random>
#include <riften/thiefpool.hpp>

template <std::integral ValueType>
bool is_prime(const ValueType& value) {
if (value <= 3 && value > 1) return true;

// no need to check above sqrt(n)
const auto n = static_cast<ValueType>(std::ceil(std::sqrt(value) + 1));

for (auto i = 2; i < n; ++i) {
if (n % i == 0) {
return false;
}
}
return true;
}

template <std::integral ValueType>
void count_if_prime(const ValueType& value, std::uint64_t& count) {
if (is_prime(value)) ++count;
}

template <std::integral ValueType>
void count_if_prime_tp(const ValueType& value, std::atomic<std::uint64_t>& count) {
if (is_prime(value)) ++count;
}

template <std::integral ValueType>
std::uint64_t count_primes(const std::vector<ValueType>& values) {
std::uint64_t count = 0;
for (const auto& value : values) count_if_prime(value, std::ref(count));
return count;
}

template <std::integral ValueType>
std::uint64_t count_primes_thread_pool(const std::vector<ValueType>& values) {
std::atomic<std::uint64_t> count(0);

{
dp::thread_pool<> pool{};
for (const auto& value : values) {
pool.enqueue_detach(count_if_prime_tp<std::uint64_t>, value, std::ref(count));
}
}

return count.load();
}

template <std::integral ValueType>
void run_benchmark(const std::size_t& size) {
ankerl::nanobench::Bench bench;
auto bench_title = std::string("count primes ") + std::to_string(sizeof(ValueType) * 8) +
" bit " + std::to_string(size);
bench.title(bench_title).warmup(10).relative(true);

// generate the data
std::vector<ValueType> values(size);
generate_random_data(values);

std::atomic<std::uint64_t> count(0);
bench.run("dp::thread_pool", [&] {
{
dp::thread_pool<> pool{};
for (const auto& value : values) {
pool.enqueue_detach(count_if_prime_tp<ValueType>, value, std::ref(count));
}
}
});

count.store(0);
bench.run("BS::thread_pool_light", [&] {
BS::thread_pool_light bs_thread_pool{std::thread::hardware_concurrency()};
for (const auto& value : values) {
bs_thread_pool.push_task(count_if_prime_tp<ValueType>, value, std::ref(count));
}
});

count.store(0);
bench.run("riften::thief_pool", [&] {
riften::Thiefpool pool{};
for (const auto& value : values) {
pool.enqueue_detach(count_if_prime_tp<ValueType>, value, std::ref(count));
}
});
}

TEST_CASE("count primes") {
using namespace std::chrono_literals;

// test sequentially and with thread pool
std::vector<std::uint64_t> values(100);
generate_random_data(values);

auto result = count_primes(values);
auto pool_result = count_primes_thread_pool(values);

CHECK(result == pool_result);

std::vector<std::uint32_t> values2(100);
generate_random_data(values2);

result = count_primes(values2);
pool_result = count_primes_thread_pool(values2);

CHECK(result == pool_result);

std::vector<std::uint16_t> values3(100);
generate_random_data(values3);

result = count_primes(values3);
pool_result = count_primes_thread_pool(values3);

CHECK(result == pool_result);

std::vector<std::size_t> small_int_args = {10'000, 100'000, 1'000'000};
for (const auto& size : small_int_args) {
run_benchmark<std::uint16_t>(size);
}

std::vector<std::size_t> args = {100, 1000, 10'000};
for (const auto& size : args) {
run_benchmark<std::uint32_t>(size);
}

std::vector<std::size_t> large_int_args = {100, 1000};
for (const auto& size : large_int_args) {
run_benchmark<std::uint64_t>(size);
}
}
79 changes: 79 additions & 0 deletions benchmark/source/thread_pool_scaling.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#include <doctest/doctest.h>
#include <nanobench.h>
#include <thread_pool/thread_pool.h>

#include <chrono>
#include <riften/thiefpool.hpp>
#include <thread>

inline void thread_task() {
int a = 0;
int b = 1;

#pragma unroll
for (int i = 0; i < 50; ++i) {
#pragma unroll
for (int j = 0; j < 25; ++j) {
a = a + b;
b = b + a;
}
}
int result = b;
// ankerl::nanobench::doNotOptimizeAway(result);
}

// tests how well the thread pool scales for a given task
TEST_CASE("dp::thread_pool scaling") {
using namespace std::chrono_literals;
ankerl::nanobench::Bench bench;
const auto bench_title = std::string("equilibrium 64,000");

// clang-format off
bench.title(bench_title)
.warmup(10)
.minEpochIterations(10)
.relative(true)
.timeUnit(1ms, "ms");
// clang-format on

for (unsigned int n_threads = 1; n_threads <= std::thread::hardware_concurrency();
n_threads++) {
const std::string run_title = "dp::thread_pool n_threads: " + std::to_string(n_threads);
dp::thread_pool pool{n_threads};
std::vector<std::future<void>> results(64'000);
bench.run(run_title, [&] {
for (auto i = 0; i < 64'000; i++) {
results[i] = pool.enqueue(thread_task);
}
for (auto& result : results) result.get();
});
results.clear();
}
}

TEST_CASE("riften::ThiefPool scaling") {
using namespace std::chrono_literals;
ankerl::nanobench::Bench bench;
const auto bench_title = std::string("equilibrium 64,000");

// clang-format off
bench.title(bench_title)
.warmup(10)
.minEpochIterations(100)
.relative(true)
.timeUnit(1ms, "ms");
// clang-format on

for (unsigned int n_threads = 1; n_threads <= std::thread::hardware_concurrency();
n_threads++) {
const std::string run_title = "riften::ThiefPool n_threads: " + std::to_string(n_threads);

bench.run(run_title, [=] {
riften::Thiefpool pool(n_threads);

for (auto i = 0; i < 64'000; i++) {
pool.enqueue_detach(thread_task);
}
});
}
}
2 changes: 1 addition & 1 deletion include/thread_pool/thread_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ namespace dp {
typename ReturnType = std::invoke_result_t<Function &&, Args &&...>>
requires std::invocable<Function, Args...>
[[nodiscard]] std::future<ReturnType> enqueue(Function f, Args... args) {
#if __cpp_lib_move_only_function
#ifdef __cpp_lib_move_only_function
// we can do this in C++23 because we now have support for move only functions
std::promise<ReturnType> promise;
auto future = promise.get_future();
Expand Down
Loading