Skip to content

Commit

Permalink
Improve the reliability of windows time based actions (#145)
Browse files Browse the repository at this point in the history
Occasionally in CI, the chrono controller would fail to stop and
continue to run even after shutdown had happened.

Looking at the traces it seems that the shutdown from chrono controller
stays running forever as well as chrono controller continuing to emit
tasks.

The only way this could happen is if shutdown failed to acquire the
mutex forever.

By setting running to false first outside of the mutex the main chrono
thread should stop emitting tasks if it's looping.

If it's sleeping on the condition variable then locking the mutex then
notifying should ensure it wakes up.

Also when windows ran tests, often they would time out.
This was because the timer that was used for sleeping would often vastly
overestimate the amount of sleep time.
For example, the sleep for 1ms tests were often sleeping for 15 to 30ms
instead.
  • Loading branch information
TrentHouliston authored Sep 1, 2024
1 parent 1f9aee6 commit f72e7c4
Show file tree
Hide file tree
Showing 15 changed files with 46 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/sonarcloud.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
if: always()
uses: actions/upload-artifact@v4
with:
name: traces-macos
name: traces-sonar
path: build/tests/**/*.trace
retention-days: 1 # This sets the artifact TTL to 1 day (minimum is 1 day)
overwrite: true
2 changes: 2 additions & 0 deletions .github/workflows/windows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ jobs:
uses: ilammy/msvc-dev-cmd@v1

- name: Configure CMake
env:
CXXFLAGS: -DNUCLEAR_TEST_TIME_UNIT_DEN=10
shell: cmd
run: |
cmake -E make_directory build
Expand Down
2 changes: 1 addition & 1 deletion src/extension/ChronoController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ namespace extension {

// When we shutdown we notify so we quit now
on<Shutdown>().then("Shutdown Chrono Controller", [this] {
const std::lock_guard<std::mutex> lock(mutex);
running.store(false, std::memory_order_release);
const std::lock_guard<std::mutex> lock(mutex);
wait.notify_all();
});

Expand Down
12 changes: 11 additions & 1 deletion src/util/precise_sleep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <chrono>
#include <cstdint>
#include <stdexcept>

#include "platform.hpp"

Expand All @@ -38,8 +39,17 @@ namespace util {
// Negative for relative time, positive for absolute time
// Measures in 100ns increments so divide by 100
ft.QuadPart = -static_cast<int64_t>(ns.count() / 100);
// Create a waitable timer with as high resolution as possible
::HANDLE timer{};
if (
#ifdef CREATE_WAITABLE_TIMER_HIGH_RESOLUTION
(timer = CreateWaitableTimerEx(NULL, NULL, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS)) == NULL
&&
#endif
(timer = CreateWaitableTimer(NULL, TRUE, NULL)) == NULL) {
throw std::runtime_error("Failed to create waitable timer");
}

::HANDLE timer = ::CreateWaitableTimer(nullptr, TRUE, nullptr);
::SetWaitableTimer(timer, &ft, 0, nullptr, nullptr, 0);
::WaitForSingleObject(timer, INFINITE);
::CloseHandle(timer);
Expand Down
3 changes: 2 additions & 1 deletion tests/test_util/TestBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ class TestBase : public NUClear::Reactor {

if (!clean_shutdown) {
powerplant.shutdown(true);
emit<Scope::INLINE>(std::make_unique<Fail>("Test timed out"));
emit<Scope::INLINE>(
std::make_unique<Fail>("Test timed out after " + std::to_string(timeout.count()) + " ms"));
}
});

Expand Down
3 changes: 2 additions & 1 deletion tests/tests/api/ReactionStatisticsTiming.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "test_util/TestBase.hpp"
#include "test_util/TimeUnit.hpp"
#include "test_util/common.hpp"
#include "util/precise_sleep.hpp"

using TimeUnit = test_util::TimeUnit;

Expand Down Expand Up @@ -81,7 +82,7 @@ class TestReactor : public test_util::TestBase<TestReactor> {
});
on<Trigger<LightTask>>().then(light_name, [] {
code_events.emplace_back("Started " + light_name, NUClear::clock::now());
std::this_thread::sleep_for(TimeUnit(scale));
NUClear::util::precise_sleep(TimeUnit(scale));
code_events.emplace_back("Finished " + light_name, NUClear::clock::now());
});

Expand Down
3 changes: 2 additions & 1 deletion tests/tests/api/TimeTravelFrozen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "test_util/TestBase.hpp"
#include "test_util/common.hpp"
#include "util/precise_sleep.hpp"

constexpr std::chrono::milliseconds EVENT_1_TIME = std::chrono::milliseconds(4);
constexpr std::chrono::milliseconds EVENT_2_TIME = std::chrono::milliseconds(8);
Expand Down Expand Up @@ -48,7 +49,7 @@ class TestReactor : public test_util::TestBase<TestReactor> {
});

on<Trigger<WaitForShutdown>>().then([this] {
std::this_thread::sleep_for(SHUTDOWN_TIME);
NUClear::util::precise_sleep(SHUTDOWN_TIME);
add_event("Finished");
powerplant.shutdown();
});
Expand Down
3 changes: 2 additions & 1 deletion tests/tests/dsl/IdleCrossPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "test_util/TestBase.hpp"
#include "test_util/TimeUnit.hpp"
#include "test_util/common.hpp"
#include "util/precise_sleep.hpp"

class TestReactor : public test_util::TestBase<TestReactor> {
public:
Expand All @@ -44,7 +45,7 @@ class TestReactor : public test_util::TestBase<TestReactor> {
default_thread_id = std::this_thread::get_id();
events.push_back("Step<2>");
// Sleep for a bit to coax out any more idle triggers
std::this_thread::sleep_for(test_util::TimeUnit(2));
NUClear::util::precise_sleep(test_util::TimeUnit(2));
powerplant.shutdown();
});

Expand Down
3 changes: 2 additions & 1 deletion tests/tests/dsl/IdleGlobal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "test_util/TestBase.hpp"
#include "test_util/TimeUnit.hpp"
#include "test_util/common.hpp"
#include "util/precise_sleep.hpp"

class TestReactor : public test_util::TestBase<TestReactor> {
public:
Expand Down Expand Up @@ -100,7 +101,7 @@ class TestReactor : public test_util::TestBase<TestReactor> {

static void wait_for_set(const std::atomic<bool>& flag) {
while (!flag.load(std::memory_order_acquire)) {
std::this_thread::sleep_for(test_util::TimeUnit(1));
NUClear::util::precise_sleep(test_util::TimeUnit(1));
}
}

Expand Down
3 changes: 2 additions & 1 deletion tests/tests/dsl/IdleSingle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "test_util/TestBase.hpp"
#include "test_util/TimeUnit.hpp"
#include "test_util/common.hpp"
#include "util/precise_sleep.hpp"

namespace Catch {

Expand Down Expand Up @@ -69,7 +70,7 @@ class TestReactor : public test_util::TestBase<TestReactor> {
on<Trigger<TaskA>, Pool<>, Sync<TestReactor>>().then([this](const TaskA& t) {
entry_calls[t.i].fetch_add(1, std::memory_order_relaxed);
emit(std::make_unique<TaskB>(t.i));
std::this_thread::sleep_for(std::chrono::milliseconds(1));
NUClear::util::precise_sleep(std::chrono::milliseconds(1));
});

// Run this at low priority but have it first
Expand Down
3 changes: 2 additions & 1 deletion tests/tests/dsl/IdleSingleGlobal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ class TestReactor : public test_util::TestBase<TestReactor> {
public:
static constexpr int n_loops = 10000;

explicit TestReactor(std::unique_ptr<NUClear::Environment> environment) : TestBase(std::move(environment), false) {
explicit TestReactor(std::unique_ptr<NUClear::Environment> environment)
: TestBase(std::move(environment), false, std::chrono::seconds(2)) {

/*
* Run idle on the default pool, and a task on the main pool.
Expand Down
6 changes: 4 additions & 2 deletions tests/tests/dsl/IdleSync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,24 @@
#include <nuclear>

#include "test_util/TestBase.hpp"
#include "test_util/TimeUnit.hpp"
#include "test_util/common.hpp"
#include "util/precise_sleep.hpp"

class TestReactor : public test_util::TestBase<TestReactor> {
public:
explicit TestReactor(std::unique_ptr<NUClear::Environment> environment) : TestBase(std::move(environment), false) {

on<Trigger<Step<1>>, MainThread>().then([this] {
emit(std::make_unique<Step<2>>());
std::this_thread::sleep_for(std::chrono::milliseconds(100));
NUClear::util::precise_sleep(test_util::TimeUnit(1));
emit(std::make_unique<Step<3>>());
});

// Idle testing for default thread
on<Trigger<Step<2>>, Sync<TestReactor>>().then([this] {
add_event("Default Start");
std::this_thread::sleep_for(std::chrono::milliseconds(300));
NUClear::util::precise_sleep(test_util::TimeUnit(3));
add_event("Default End");
});

Expand Down
5 changes: 3 additions & 2 deletions tests/tests/dsl/Inline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "test_util/TestBase.hpp"
#include "test_util/TimeUnit.hpp"
#include "test_util/common.hpp"
#include "util/precise_sleep.hpp"

class TestReactor : public test_util::TestBase<TestReactor> {
public:
Expand Down Expand Up @@ -60,12 +61,12 @@ class TestReactor : public test_util::TestBase<TestReactor> {
on<Trigger<Step<1>>, MainThread>().then([this] {
emit(std::make_unique<SimpleMessage>("Main Local"));
emit<Scope::INLINE>(std::make_unique<SimpleMessage>("Main Inline"));
std::this_thread::sleep_for(test_util::TimeUnit(2)); // Sleep for a bit to give other threads a chance
NUClear::util::precise_sleep(test_util::TimeUnit(2)); // Sleep for a bit to give other threads a chance
});
on<Trigger<Step<2>>, Pool<>>().then([this] {
emit(std::make_unique<SimpleMessage>("Default Local"));
emit<Scope::INLINE>(std::make_unique<SimpleMessage>("Default Inline"));
std::this_thread::sleep_for(test_util::TimeUnit(2)); // Sleep for a bit to give other threads a chance
NUClear::util::precise_sleep(test_util::TimeUnit(2)); // Sleep for a bit to give other threads a chance
});

on<Startup>().then([this] {
Expand Down
14 changes: 8 additions & 6 deletions tests/tests/dsl/Sync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
#include <nuclear>

#include "test_util/TestBase.hpp"
#include "test_util/TimeUnit.hpp"
#include "test_util/common.hpp"
#include "util/precise_sleep.hpp"

class TestReactor : public test_util::TestBase<TestReactor> {
public:
Expand All @@ -40,14 +42,14 @@ class TestReactor : public test_util::TestBase<TestReactor> {
events.push_back("Sync A " + m.data);

// Sleep for some time to be safe
std::this_thread::sleep_for(std::chrono::milliseconds(5));
NUClear::util::precise_sleep(test_util::TimeUnit(1));

// Emit a message 1 here, it should not run yet
events.push_back("Sync A emitting");
emit(std::make_unique<Message<1>>("From Sync A"));

// Sleep for some time again
std::this_thread::sleep_for(std::chrono::milliseconds(5));
NUClear::util::precise_sleep(test_util::TimeUnit(1));

events.push_back("Sync A " + m.data + " finished");
});
Expand All @@ -56,14 +58,14 @@ class TestReactor : public test_util::TestBase<TestReactor> {
events.push_back("Sync B " + m.data);

// Sleep for some time to be safe
std::this_thread::sleep_for(std::chrono::milliseconds(5));
NUClear::util::precise_sleep(test_util::TimeUnit(1));

// Emit a message 1 here, it should not run yet
events.push_back("Sync B emitting");
emit(std::make_unique<Message<1>>("From Sync B"));

// Sleep for some time again
std::this_thread::sleep_for(std::chrono::milliseconds(5));
NUClear::util::precise_sleep(test_util::TimeUnit(1));

events.push_back("Sync B " + m.data + " finished");
});
Expand All @@ -72,13 +74,13 @@ class TestReactor : public test_util::TestBase<TestReactor> {
events.push_back("Sync C " + m.data);

// Sleep for some time to be safe
std::this_thread::sleep_for(std::chrono::milliseconds(5));
NUClear::util::precise_sleep(test_util::TimeUnit(1));

// Emit a message 1 here, it should not run yet
events.push_back("Sync C waiting");

// Sleep for some time again
std::this_thread::sleep_for(std::chrono::milliseconds(5));
NUClear::util::precise_sleep(test_util::TimeUnit(1));

events.push_back("Sync C " + m.data + " finished");

Expand Down
3 changes: 2 additions & 1 deletion tests/tests/dsl/SyncMulti.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "test_util/TestBase.hpp"
#include "test_util/TimeUnit.hpp"
#include "test_util/common.hpp"
#include "util/precise_sleep.hpp"

class TestReactor : public test_util::TestBase<TestReactor> {
public:
Expand All @@ -36,7 +37,7 @@ class TestReactor : public test_util::TestBase<TestReactor> {
auto start = test_util::round_to_test_units(std::chrono::steady_clock::now() - start_time);
events.push_back(event + " started @ " + std::to_string(start.count()));
// Sleep for a bit to give a chance for the other threads to cause problems
std::this_thread::sleep_for(test_util::TimeUnit(2));
NUClear::util::precise_sleep(test_util::TimeUnit(2));
auto end = test_util::round_to_test_units(std::chrono::steady_clock::now() - start_time);
events.push_back(event + " finished @ " + std::to_string(end.count()));
}
Expand Down

0 comments on commit f72e7c4

Please sign in to comment.