From f72e7c4b69651a2cba460187f19b494d497e3767 Mon Sep 17 00:00:00 2001 From: Trent Houliston Date: Sun, 1 Sep 2024 18:50:46 +1000 Subject: [PATCH] Improve the reliability of windows time based actions (#145) Occasionally in CI, the chrono controller would fail to stop and continue to run even after shutdown had happened. Looking at the traces it seems that the shutdown from chrono controller stays running forever as well as chrono controller continuing to emit tasks. The only way this could happen is if shutdown failed to acquire the mutex forever. By setting running to false first outside of the mutex the main chrono thread should stop emitting tasks if it's looping. If it's sleeping on the condition variable then locking the mutex then notifying should ensure it wakes up. Also when windows ran tests, often they would time out. This was because the timer that was used for sleeping would often vastly overestimate the amount of sleep time. For example, the sleep for 1ms tests were often sleeping for 15 to 30ms instead. --- .github/workflows/sonarcloud.yaml | 2 +- .github/workflows/windows.yaml | 2 ++ src/extension/ChronoController.cpp | 2 +- src/util/precise_sleep.cpp | 12 +++++++++++- tests/test_util/TestBase.hpp | 3 ++- tests/tests/api/ReactionStatisticsTiming.cpp | 3 ++- tests/tests/api/TimeTravelFrozen.cpp | 3 ++- tests/tests/dsl/IdleCrossPool.cpp | 3 ++- tests/tests/dsl/IdleGlobal.cpp | 3 ++- tests/tests/dsl/IdleSingle.cpp | 3 ++- tests/tests/dsl/IdleSingleGlobal.cpp | 3 ++- tests/tests/dsl/IdleSync.cpp | 6 ++++-- tests/tests/dsl/Inline.cpp | 5 +++-- tests/tests/dsl/Sync.cpp | 14 ++++++++------ tests/tests/dsl/SyncMulti.cpp | 3 ++- 15 files changed, 46 insertions(+), 21 deletions(-) diff --git a/.github/workflows/sonarcloud.yaml b/.github/workflows/sonarcloud.yaml index 829bba961..3a02f51e5 100644 --- a/.github/workflows/sonarcloud.yaml +++ b/.github/workflows/sonarcloud.yaml @@ -84,7 +84,7 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: traces-macos + name: traces-sonar path: build/tests/**/*.trace retention-days: 1 # This sets the artifact TTL to 1 day (minimum is 1 day) overwrite: true diff --git a/.github/workflows/windows.yaml b/.github/workflows/windows.yaml index 9f72fb49b..2163cc35b 100644 --- a/.github/workflows/windows.yaml +++ b/.github/workflows/windows.yaml @@ -56,6 +56,8 @@ jobs: uses: ilammy/msvc-dev-cmd@v1 - name: Configure CMake + env: + CXXFLAGS: -DNUCLEAR_TEST_TIME_UNIT_DEN=10 shell: cmd run: | cmake -E make_directory build diff --git a/src/extension/ChronoController.cpp b/src/extension/ChronoController.cpp index d32e758c1..f7c01bf36 100644 --- a/src/extension/ChronoController.cpp +++ b/src/extension/ChronoController.cpp @@ -90,8 +90,8 @@ namespace extension { // When we shutdown we notify so we quit now on().then("Shutdown Chrono Controller", [this] { - const std::lock_guard lock(mutex); running.store(false, std::memory_order_release); + const std::lock_guard lock(mutex); wait.notify_all(); }); diff --git a/src/util/precise_sleep.cpp b/src/util/precise_sleep.cpp index 1162f98fd..37b536172 100644 --- a/src/util/precise_sleep.cpp +++ b/src/util/precise_sleep.cpp @@ -26,6 +26,7 @@ #include #include + #include #include "platform.hpp" @@ -38,8 +39,17 @@ namespace util { // Negative for relative time, positive for absolute time // Measures in 100ns increments so divide by 100 ft.QuadPart = -static_cast(ns.count() / 100); + // Create a waitable timer with as high resolution as possible + ::HANDLE timer{}; + if ( + #ifdef CREATE_WAITABLE_TIMER_HIGH_RESOLUTION + (timer = CreateWaitableTimerEx(NULL, NULL, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS)) == NULL + && + #endif + (timer = CreateWaitableTimer(NULL, TRUE, NULL)) == NULL) { + throw std::runtime_error("Failed to create waitable timer"); + } - ::HANDLE timer = ::CreateWaitableTimer(nullptr, TRUE, nullptr); ::SetWaitableTimer(timer, &ft, 0, nullptr, nullptr, 0); ::WaitForSingleObject(timer, INFINITE); ::CloseHandle(timer); diff --git a/tests/test_util/TestBase.hpp b/tests/test_util/TestBase.hpp index a8fa5b293..8b18a15ad 100644 --- a/tests/test_util/TestBase.hpp +++ b/tests/test_util/TestBase.hpp @@ -82,7 +82,8 @@ class TestBase : public NUClear::Reactor { if (!clean_shutdown) { powerplant.shutdown(true); - emit(std::make_unique("Test timed out")); + emit( + std::make_unique("Test timed out after " + std::to_string(timeout.count()) + " ms")); } }); diff --git a/tests/tests/api/ReactionStatisticsTiming.cpp b/tests/tests/api/ReactionStatisticsTiming.cpp index e1af4020b..ba54639fe 100644 --- a/tests/tests/api/ReactionStatisticsTiming.cpp +++ b/tests/tests/api/ReactionStatisticsTiming.cpp @@ -26,6 +26,7 @@ #include "test_util/TestBase.hpp" #include "test_util/TimeUnit.hpp" #include "test_util/common.hpp" +#include "util/precise_sleep.hpp" using TimeUnit = test_util::TimeUnit; @@ -81,7 +82,7 @@ class TestReactor : public test_util::TestBase { }); on>().then(light_name, [] { code_events.emplace_back("Started " + light_name, NUClear::clock::now()); - std::this_thread::sleep_for(TimeUnit(scale)); + NUClear::util::precise_sleep(TimeUnit(scale)); code_events.emplace_back("Finished " + light_name, NUClear::clock::now()); }); diff --git a/tests/tests/api/TimeTravelFrozen.cpp b/tests/tests/api/TimeTravelFrozen.cpp index da3019394..bad45e46d 100644 --- a/tests/tests/api/TimeTravelFrozen.cpp +++ b/tests/tests/api/TimeTravelFrozen.cpp @@ -5,6 +5,7 @@ #include "test_util/TestBase.hpp" #include "test_util/common.hpp" +#include "util/precise_sleep.hpp" constexpr std::chrono::milliseconds EVENT_1_TIME = std::chrono::milliseconds(4); constexpr std::chrono::milliseconds EVENT_2_TIME = std::chrono::milliseconds(8); @@ -48,7 +49,7 @@ class TestReactor : public test_util::TestBase { }); on>().then([this] { - std::this_thread::sleep_for(SHUTDOWN_TIME); + NUClear::util::precise_sleep(SHUTDOWN_TIME); add_event("Finished"); powerplant.shutdown(); }); diff --git a/tests/tests/dsl/IdleCrossPool.cpp b/tests/tests/dsl/IdleCrossPool.cpp index d4afcf777..76b67f651 100644 --- a/tests/tests/dsl/IdleCrossPool.cpp +++ b/tests/tests/dsl/IdleCrossPool.cpp @@ -26,6 +26,7 @@ #include "test_util/TestBase.hpp" #include "test_util/TimeUnit.hpp" #include "test_util/common.hpp" +#include "util/precise_sleep.hpp" class TestReactor : public test_util::TestBase { public: @@ -44,7 +45,7 @@ class TestReactor : public test_util::TestBase { default_thread_id = std::this_thread::get_id(); events.push_back("Step<2>"); // Sleep for a bit to coax out any more idle triggers - std::this_thread::sleep_for(test_util::TimeUnit(2)); + NUClear::util::precise_sleep(test_util::TimeUnit(2)); powerplant.shutdown(); }); diff --git a/tests/tests/dsl/IdleGlobal.cpp b/tests/tests/dsl/IdleGlobal.cpp index bb632f824..0256c1671 100644 --- a/tests/tests/dsl/IdleGlobal.cpp +++ b/tests/tests/dsl/IdleGlobal.cpp @@ -26,6 +26,7 @@ #include "test_util/TestBase.hpp" #include "test_util/TimeUnit.hpp" #include "test_util/common.hpp" +#include "util/precise_sleep.hpp" class TestReactor : public test_util::TestBase { public: @@ -100,7 +101,7 @@ class TestReactor : public test_util::TestBase { static void wait_for_set(const std::atomic& flag) { while (!flag.load(std::memory_order_acquire)) { - std::this_thread::sleep_for(test_util::TimeUnit(1)); + NUClear::util::precise_sleep(test_util::TimeUnit(1)); } } diff --git a/tests/tests/dsl/IdleSingle.cpp b/tests/tests/dsl/IdleSingle.cpp index c0570dc2d..9bad95873 100644 --- a/tests/tests/dsl/IdleSingle.cpp +++ b/tests/tests/dsl/IdleSingle.cpp @@ -26,6 +26,7 @@ #include "test_util/TestBase.hpp" #include "test_util/TimeUnit.hpp" #include "test_util/common.hpp" +#include "util/precise_sleep.hpp" namespace Catch { @@ -69,7 +70,7 @@ class TestReactor : public test_util::TestBase { on, Pool<>, Sync>().then([this](const TaskA& t) { entry_calls[t.i].fetch_add(1, std::memory_order_relaxed); emit(std::make_unique(t.i)); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + NUClear::util::precise_sleep(std::chrono::milliseconds(1)); }); // Run this at low priority but have it first diff --git a/tests/tests/dsl/IdleSingleGlobal.cpp b/tests/tests/dsl/IdleSingleGlobal.cpp index 68bb50cfb..1fa09a52a 100644 --- a/tests/tests/dsl/IdleSingleGlobal.cpp +++ b/tests/tests/dsl/IdleSingleGlobal.cpp @@ -50,7 +50,8 @@ class TestReactor : public test_util::TestBase { public: static constexpr int n_loops = 10000; - explicit TestReactor(std::unique_ptr environment) : TestBase(std::move(environment), false) { + explicit TestReactor(std::unique_ptr environment) + : TestBase(std::move(environment), false, std::chrono::seconds(2)) { /* * Run idle on the default pool, and a task on the main pool. diff --git a/tests/tests/dsl/IdleSync.cpp b/tests/tests/dsl/IdleSync.cpp index fc53cd75c..783a45f29 100644 --- a/tests/tests/dsl/IdleSync.cpp +++ b/tests/tests/dsl/IdleSync.cpp @@ -24,7 +24,9 @@ #include #include "test_util/TestBase.hpp" +#include "test_util/TimeUnit.hpp" #include "test_util/common.hpp" +#include "util/precise_sleep.hpp" class TestReactor : public test_util::TestBase { public: @@ -32,14 +34,14 @@ class TestReactor : public test_util::TestBase { on>, MainThread>().then([this] { emit(std::make_unique>()); - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + NUClear::util::precise_sleep(test_util::TimeUnit(1)); emit(std::make_unique>()); }); // Idle testing for default thread on>, Sync>().then([this] { add_event("Default Start"); - std::this_thread::sleep_for(std::chrono::milliseconds(300)); + NUClear::util::precise_sleep(test_util::TimeUnit(3)); add_event("Default End"); }); diff --git a/tests/tests/dsl/Inline.cpp b/tests/tests/dsl/Inline.cpp index aa9c7b9d5..3a21a73fc 100644 --- a/tests/tests/dsl/Inline.cpp +++ b/tests/tests/dsl/Inline.cpp @@ -26,6 +26,7 @@ #include "test_util/TestBase.hpp" #include "test_util/TimeUnit.hpp" #include "test_util/common.hpp" +#include "util/precise_sleep.hpp" class TestReactor : public test_util::TestBase { public: @@ -60,12 +61,12 @@ class TestReactor : public test_util::TestBase { on>, MainThread>().then([this] { emit(std::make_unique("Main Local")); emit(std::make_unique("Main Inline")); - std::this_thread::sleep_for(test_util::TimeUnit(2)); // Sleep for a bit to give other threads a chance + NUClear::util::precise_sleep(test_util::TimeUnit(2)); // Sleep for a bit to give other threads a chance }); on>, Pool<>>().then([this] { emit(std::make_unique("Default Local")); emit(std::make_unique("Default Inline")); - std::this_thread::sleep_for(test_util::TimeUnit(2)); // Sleep for a bit to give other threads a chance + NUClear::util::precise_sleep(test_util::TimeUnit(2)); // Sleep for a bit to give other threads a chance }); on().then([this] { diff --git a/tests/tests/dsl/Sync.cpp b/tests/tests/dsl/Sync.cpp index cd83dba5d..67e7fb463 100644 --- a/tests/tests/dsl/Sync.cpp +++ b/tests/tests/dsl/Sync.cpp @@ -24,7 +24,9 @@ #include #include "test_util/TestBase.hpp" +#include "test_util/TimeUnit.hpp" #include "test_util/common.hpp" +#include "util/precise_sleep.hpp" class TestReactor : public test_util::TestBase { public: @@ -40,14 +42,14 @@ class TestReactor : public test_util::TestBase { events.push_back("Sync A " + m.data); // Sleep for some time to be safe - std::this_thread::sleep_for(std::chrono::milliseconds(5)); + NUClear::util::precise_sleep(test_util::TimeUnit(1)); // Emit a message 1 here, it should not run yet events.push_back("Sync A emitting"); emit(std::make_unique>("From Sync A")); // Sleep for some time again - std::this_thread::sleep_for(std::chrono::milliseconds(5)); + NUClear::util::precise_sleep(test_util::TimeUnit(1)); events.push_back("Sync A " + m.data + " finished"); }); @@ -56,14 +58,14 @@ class TestReactor : public test_util::TestBase { events.push_back("Sync B " + m.data); // Sleep for some time to be safe - std::this_thread::sleep_for(std::chrono::milliseconds(5)); + NUClear::util::precise_sleep(test_util::TimeUnit(1)); // Emit a message 1 here, it should not run yet events.push_back("Sync B emitting"); emit(std::make_unique>("From Sync B")); // Sleep for some time again - std::this_thread::sleep_for(std::chrono::milliseconds(5)); + NUClear::util::precise_sleep(test_util::TimeUnit(1)); events.push_back("Sync B " + m.data + " finished"); }); @@ -72,13 +74,13 @@ class TestReactor : public test_util::TestBase { events.push_back("Sync C " + m.data); // Sleep for some time to be safe - std::this_thread::sleep_for(std::chrono::milliseconds(5)); + NUClear::util::precise_sleep(test_util::TimeUnit(1)); // Emit a message 1 here, it should not run yet events.push_back("Sync C waiting"); // Sleep for some time again - std::this_thread::sleep_for(std::chrono::milliseconds(5)); + NUClear::util::precise_sleep(test_util::TimeUnit(1)); events.push_back("Sync C " + m.data + " finished"); diff --git a/tests/tests/dsl/SyncMulti.cpp b/tests/tests/dsl/SyncMulti.cpp index 7f22836a8..649784205 100644 --- a/tests/tests/dsl/SyncMulti.cpp +++ b/tests/tests/dsl/SyncMulti.cpp @@ -26,6 +26,7 @@ #include "test_util/TestBase.hpp" #include "test_util/TimeUnit.hpp" #include "test_util/common.hpp" +#include "util/precise_sleep.hpp" class TestReactor : public test_util::TestBase { public: @@ -36,7 +37,7 @@ class TestReactor : public test_util::TestBase { auto start = test_util::round_to_test_units(std::chrono::steady_clock::now() - start_time); events.push_back(event + " started @ " + std::to_string(start.count())); // Sleep for a bit to give a chance for the other threads to cause problems - std::this_thread::sleep_for(test_util::TimeUnit(2)); + NUClear::util::precise_sleep(test_util::TimeUnit(2)); auto end = test_util::round_to_test_units(std::chrono::steady_clock::now() - start_time); events.push_back(event + " finished @ " + std::to_string(end.count())); }