From 1b4c5999994df214e3cf84250ad6d33ad51c479a Mon Sep 17 00:00:00 2001 From: Trent Houliston Date: Thu, 15 Aug 2024 12:33:05 +1000 Subject: [PATCH] Change how ReactionStatistics works to be more useful for monitoring events --- src/dsl/operation/TypeBind.hpp | 3 + src/id.hpp | 6 + src/message/ReactionStatistics.hpp | 105 +++++++---- src/threading/ReactionTask.hpp | 22 +-- src/util/CallbackGenerator.hpp | 36 +++- src/util/usage_clock.cpp | 44 +++++ src/util/usage_clock.hpp | 30 +++ tests/tests/api/ReactionStatistics.cpp | 14 +- tests/tests/api/ReactionStatisticsTiming.cpp | 181 +++++++++++++++++++ 9 files changed, 383 insertions(+), 58 deletions(-) create mode 100644 src/util/usage_clock.cpp create mode 100644 src/util/usage_clock.hpp create mode 100644 tests/tests/api/ReactionStatisticsTiming.cpp diff --git a/src/dsl/operation/TypeBind.hpp b/src/dsl/operation/TypeBind.hpp index ca19057a..15f015aa 100644 --- a/src/dsl/operation/TypeBind.hpp +++ b/src/dsl/operation/TypeBind.hpp @@ -29,6 +29,7 @@ namespace NUClear { // Forward declarations namespace message { + struct ReactionEvent; struct ReactionStatistics; struct LogMessage; } // namespace message @@ -40,6 +41,8 @@ namespace dsl { template struct EmitStats : std::true_type {}; template <> + struct EmitStats : std::false_type {}; + template <> struct EmitStats : std::false_type {}; template <> struct EmitStats : std::false_type {}; diff --git a/src/id.hpp b/src/id.hpp index b5953524..453832fb 100644 --- a/src/id.hpp +++ b/src/id.hpp @@ -30,6 +30,12 @@ namespace NUClear { /// This type is used when NUClear requires a unique identifier using id_t = std::size_t; +/// A reaction and task id pair identify a specific task/reaction combination +struct IDPair { + id_t reaction_id{0}; + id_t task_id{0}; +}; + } // namespace NUClear #endif // NUCLEAR_UTIL_ID_HPP diff --git a/src/message/ReactionStatistics.hpp b/src/message/ReactionStatistics.hpp index 840d9a4a..666dd0c1 100644 --- a/src/message/ReactionStatistics.hpp +++ b/src/message/ReactionStatistics.hpp @@ -25,11 +25,15 @@ #include #include +#include #include #include "../clock.hpp" #include "../id.hpp" #include "../threading/ReactionIdentifiers.hpp" +#include "../util/GroupDescriptor.hpp" +#include "../util/ThreadPoolDescriptor.hpp" +#include "../util/usage_clock.hpp" namespace NUClear { namespace message { @@ -39,43 +43,76 @@ namespace message { */ struct ReactionStatistics { + struct Event { + struct ThreadInfo { + std::thread::id thread_id; + util::ThreadPoolDescriptor pool{util::ThreadPoolDescriptor::AllPools()}; + }; + + ThreadInfo thread{}; + NUClear::clock::time_point nuclear_time; + std::chrono::steady_clock::time_point realtime; + util::cpu_clock::time_point cpu_time; + + static Event now() { + + // Get the thread pool information for this thread if it's a NUClear thread + const Event::ThreadInfo thread_info; + + return Event{ + thread_info, + NUClear::clock::now(), + std::chrono::steady_clock::now(), + util::cpu_clock::now(), + }; + } + }; + ReactionStatistics(std::shared_ptr identifiers, - const NUClear::id_t& reaction_id, - const NUClear::id_t& task_id, - const NUClear::id_t& cause_reaction_id, - const NUClear::id_t& cause_task_id, - const clock::time_point& emitted, - const clock::time_point& start, - const clock::time_point& finish, - std::exception_ptr exception) + const IDPair& cause, + const IDPair& target, + util::ThreadPoolDescriptor target_threadpool, + util::GroupDescriptor target_group) : identifiers(std::move(identifiers)) - , reaction_id(reaction_id) - , task_id(task_id) - , cause_reaction_id(cause_reaction_id) - , cause_task_id(cause_task_id) - , emitted(emitted) - , started(start) - , finished(finish) - , exception(std::move(exception)) {} - - /// A string containing the username/on arguments/and callback name of the reaction + , cause(cause) + , target(target) + , target_threadpool(std::move(target_threadpool)) + , target_group(std::move(target_group)) + , created(Event::now()) {}; + + + /// The identifiers for the reaction that was executed std::shared_ptr identifiers; - /// The id of this reaction - NUClear::id_t reaction_id{0}; - /// The task id of this reaction - NUClear::id_t task_id{0}; - /// The reaction id of the reaction that caused this one or 0 if there was not one - NUClear::id_t cause_reaction_id{0}; - /// The reaction id of the task that caused this task or 0 if there was not one - NUClear::id_t cause_task_id{0}; - /// The time that this reaction was emitted to the thread pool - clock::time_point emitted; - /// The time that execution started on this reaction - clock::time_point started; - /// The time that execution finished on this reaction - clock::time_point finished; - /// An exception pointer that can be rethrown (if the reaction threw an exception) - std::exception_ptr exception{nullptr}; + + /// The reaction/task pair that caused this reaction or 0s if it was a non NUClear cause + IDPair cause; + /// The reaction/task pair that was executed + IDPair target; + + /// The thread pool that this reaction was intended to run on + util::ThreadPoolDescriptor target_threadpool; + /// The group that this reaction was intended to run in + util::GroupDescriptor target_group; + + /// The time and thread information for when this reaction was created + Event created; + /// The time and thread information for when this reaction was started + Event started; + /// The time and thread information for when this reaction was finished + Event finished; + + /// The exception that was thrown by this reaction or nullptr if no exception was thrown + std::exception_ptr exception; + }; + + struct ReactionEvent { + enum Event : uint8_t { CREATED, MISSING_DATA, BLOCKED, STARTED, FINISHED }; + + ReactionEvent(const Event& type, std::shared_ptr statistics) + : type(type), statistics(std::move(statistics)) {} + + Event type; + std::shared_ptr statistics; }; } // namespace message diff --git a/src/threading/ReactionTask.hpp b/src/threading/ReactionTask.hpp index 39aaab40..a83159bb 100644 --- a/src/threading/ReactionTask.hpp +++ b/src/threading/ReactionTask.hpp @@ -84,19 +84,15 @@ namespace threading { , pool_descriptor(thread_pool_fn(*this)) , group_descriptor(group_fn(*this)) // Only create a stats object if we wouldn't cause an infinite loop of stats - , stats(parent != nullptr && parent->emit_stats - && (current_task == nullptr || current_task->stats != nullptr) - ? std::make_shared( - parent->identifiers, - parent->id, - id, - current_task != nullptr ? current_task->parent->id : 0, - current_task != nullptr ? current_task->id : 0, - clock::now(), - clock::time_point(std::chrono::seconds(0)), - clock::time_point(std::chrono::seconds(0)), - nullptr) - : nullptr) { + , stats( + parent != nullptr && parent->emit_stats && (current_task == nullptr || current_task->stats != nullptr) + ? std::make_shared( + parent->identifiers, + IDPair{parent->id, id}, + current_task != nullptr ? IDPair{current_task->parent->id, current_task->id} : IDPair{0, 0}, + pool_descriptor, + group_descriptor) + : nullptr) { // Increment the number of active tasks if (parent != nullptr) { parent->active_tasks.fetch_add(1, std::memory_order_release); diff --git a/src/util/CallbackGenerator.hpp b/src/util/CallbackGenerator.hpp index 3818de88..f18746b1 100644 --- a/src/util/CallbackGenerator.hpp +++ b/src/util/CallbackGenerator.hpp @@ -74,6 +74,14 @@ namespace util { // Check if we should even run if (!DSL::precondition(*task)) { + + // Set the created status as rejected and emit it + if (task->stats != nullptr) { + PowerPlant::powerplant->emit( + std::make_unique(message::ReactionEvent::BLOCKED, task->stats)); + } + + // Nothing to run return nullptr; } @@ -87,18 +95,33 @@ namespace util { // Check if our data is good (all the data exists) otherwise terminate the call if (!check_data(data)) { + + // Set the created status as no data and emit it + if (task->stats != nullptr) { + PowerPlant::powerplant->emit( + std::make_unique(message::ReactionEvent::MISSING_DATA, task->stats)); + } + + // Nothing to run return nullptr; } + // Set the created status as no data and emit it + if (task->stats != nullptr) { + PowerPlant::powerplant->emit( + std::make_unique(message::ReactionEvent::CREATED, task->stats)); + } + // We have to make a copy of the callback because the "this" variable can go out of scope auto c = callback; task->callback = [c, data](threading::ReactionTask& task) noexcept { // Update our thread's priority to the correct level update_current_thread_priority(task.priority); - // Record our start time if (task.stats != nullptr) { - task.stats->started = clock::now(); + task.stats->started = message::ReactionStatistics::Event::now(); + PowerPlant::powerplant->emit( + std::make_unique(message::ReactionEvent::STARTED, task.stats)); } // We have to catch any exceptions @@ -113,17 +136,14 @@ namespace util { } } - // Our finish time - if (task.stats != nullptr) { - task.stats->finished = clock::now(); - } - // Run our postconditions DSL::postcondition(task); // Emit our reaction statistics if it wouldn't cause a loop if (task.stats != nullptr) { - PowerPlant::powerplant->emit_shared(task.stats); + task.stats->finished = message::ReactionStatistics::Event::now(); + PowerPlant::powerplant->emit( + std::make_unique(message::ReactionEvent::FINISHED, task.stats)); } }; diff --git a/src/util/usage_clock.cpp b/src/util/usage_clock.cpp new file mode 100644 index 00000000..fd132277 --- /dev/null +++ b/src/util/usage_clock.cpp @@ -0,0 +1,44 @@ +#include "usage_clock.hpp" + +#include + +// Windows +#if defined(_WIN32) + #include "platform.hpp" + +namespace NUClear { +namespace util { + + cpu_clock::time_point cpu_clock::now() noexcept { + FILETIME creation_time; + FILETIME exit_time; + FILETIME kernel_time; + FILETIME user_time; + if (GetThreadTimes(GetCurrentThread(), &creation_time, &exit_time, &kernel_time, &user_time) != -1) { + // Time in in 100 nanosecond intervals + uint64_t time = ((uint64_t(user_time.dwHighDateTime) << 32) | user_time.dwLowDateTime) + + ((uint64_t(kernel_time.dwHighDateTime) << 32) | kernel_time.dwLowDateTime); + return time_point(std::chrono::duration>(time)); + } + return time_point(); + } + +} // namespace util +} // namespace NUClear + +#else + #include + +namespace NUClear { +namespace util { + + cpu_clock::time_point cpu_clock::now() noexcept { + ::timespec ts{}; + ::clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); + return time_point(std::chrono::seconds(ts.tv_sec) + std::chrono::nanoseconds(ts.tv_nsec)); + } + +} // namespace util +} // namespace NUClear + +#endif // _WIN32 diff --git a/src/util/usage_clock.hpp b/src/util/usage_clock.hpp new file mode 100644 index 00000000..03322672 --- /dev/null +++ b/src/util/usage_clock.hpp @@ -0,0 +1,30 @@ +#ifndef NUCLEAR_UTIL_USAGE_CLOCK_HPP +#define NUCLEAR_UTIL_USAGE_CLOCK_HPP + +#include + +namespace NUClear { +namespace util { + + /** + * A clock that measures CPU time. + */ + struct cpu_clock { + using duration = std::chrono::nanoseconds; ///< The duration type of the clock. + using rep = duration::rep; ///< The representation type of the duration. + using period = duration::period; ///< The tick period of the clock. + using time_point = std::chrono::time_point; ///< The time point type of the clock. + static const bool is_steady = true; ///< Indicates if the clock is steady. + + /** + * Get the current time point of the cpu clock for the current thread + * + * @return The current time point. + */ + static time_point now() noexcept; + }; + +} // namespace util +} // namespace NUClear + +#endif // NUCLEAR_UTIL_USAGE_CLOCK_HPP diff --git a/tests/tests/api/ReactionStatistics.cpp b/tests/tests/api/ReactionStatistics.cpp index 6974249d..1a7c0621 100644 --- a/tests/tests/api/ReactionStatistics.cpp +++ b/tests/tests/api/ReactionStatistics.cpp @@ -35,7 +35,7 @@ template struct Message {}; struct LoopMessage {}; -using NUClear::message::ReactionStatistics; +using NUClear::message::ReactionEvent; class TestReactor : public test_util::TestBase { public: @@ -43,18 +43,26 @@ class TestReactor : public test_util::TestBase { // This reaction is here to emit something from a ReactionStatistics trigger // This shouldn't cause reaction statistics of their own otherwise everything would explode - on>().then("Loop Statistics", [this](const ReactionStatistics&) { + on>().then("Loop Statistics", [this](const ReactionEvent&) { // emit(std::make_unique()); }); on>().then("No Statistics", [] {}); - on>().then("Reaction Stats Handler", [this](const ReactionStatistics& stats) { + on>().then("Reaction Stats Handler", [this](const ReactionEvent& event) { + const auto& stats = *event.statistics; + // Other reactions statistics run on this because of built in NUClear reactors (e.g. chrono controller etc) // We want to filter those out so only our own stats are shown if (stats.identifiers->name.empty() || stats.identifiers->reactor != reactor_name) { return; } + + // Skip stats until the finished event + if (event.type != ReactionEvent::FINISHED) { + return; + } + events.push_back("Stats for " + stats.identifiers->name + " from " + stats.identifiers->reactor); events.push_back(stats.identifiers->dsl); diff --git a/tests/tests/api/ReactionStatisticsTiming.cpp b/tests/tests/api/ReactionStatisticsTiming.cpp new file mode 100644 index 00000000..f01efb7f --- /dev/null +++ b/tests/tests/api/ReactionStatisticsTiming.cpp @@ -0,0 +1,181 @@ +/* + * MIT License + * + * Copyright (c) 2023 NUClear Contributors + * + * This file is part of the NUClear codebase. + * See https://github.com/Fastcode/NUClear for further info. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "test_util/TestBase.hpp" +#include "test_util/TimeUnit.hpp" + +// Anonymous namespace to keep everything file local +namespace { + +using TimeUnit = test_util::TimeUnit; + +/// Events that occur during the test and the time they occur +/// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +std::vector> code_events; +/// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +std::vector> stat_events; + +struct Usage { + std::map real; + std::map cpu; +}; + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +Usage usage; + +struct DoTest {}; +struct HeavyTask {}; +struct LightTask {}; + +const std::string heavy_name = "Heavy"; // NOLINT(cert-err58-cpp) +const std::string light_name = "Light"; // NOLINT(cert-err58-cpp) +const std::string initial_name = "Initial"; // NOLINT(cert-err58-cpp) +constexpr int scale = 5; // Number of time units to sleep/wait for + +using NUClear::message::ReactionEvent; + +class TestReactor : public test_util::TestBase { +public: + TestReactor(std::unique_ptr environment) : TestBase(std::move(environment)) { + + on>, Priority::LOW>().then(initial_name + ":" + heavy_name, [this] { + code_events.emplace_back("Started " + initial_name + ":" + heavy_name, NUClear::clock::now()); + code_events.emplace_back("Created " + heavy_name, NUClear::clock::now()); + emit(std::make_unique()); + code_events.emplace_back("Finished " + initial_name + ":" + heavy_name, NUClear::clock::now()); + }); + on>().then(heavy_name, [] { + code_events.emplace_back("Started " + heavy_name, NUClear::clock::now()); + auto start = NUClear::clock::now(); + while (NUClear::clock::now() - start < TimeUnit(scale)) { + } + code_events.emplace_back("Finished " + heavy_name, NUClear::clock::now()); + }); + + on>, Priority::LOW>().then(initial_name + ":" + light_name, [this] { + code_events.emplace_back("Started " + initial_name + ":" + light_name, NUClear::clock::now()); + code_events.emplace_back("Created " + light_name, NUClear::clock::now()); + emit(std::make_unique()); + code_events.emplace_back("Finished " + initial_name + ":" + light_name, NUClear::clock::now()); + }); + on>().then(light_name, [] { + code_events.emplace_back("Started " + light_name, NUClear::clock::now()); + std::this_thread::sleep_for(TimeUnit(scale)); + code_events.emplace_back("Finished " + light_name, NUClear::clock::now()); + }); + + on>().then([](const ReactionEvent& event) { + const auto& stats = *event.statistics; + // Check the name ends with light_name or heavy_name + if (stats.identifiers->name.substr(stats.identifiers->name.size() - light_name.size()) == light_name + || stats.identifiers->name.substr(stats.identifiers->name.size() - heavy_name.size()) == heavy_name) { + + switch (event.type) { + case ReactionEvent::CREATED: + stat_events.emplace_back("Created " + stats.identifiers->name, stats.created.nuclear_time); + break; + case ReactionEvent::STARTED: + stat_events.emplace_back("Started " + stats.identifiers->name, stats.started.nuclear_time); + break; + case ReactionEvent::FINISHED: + stat_events.emplace_back("Finished " + stats.identifiers->name, stats.finished.nuclear_time); + usage.real[stats.identifiers->name] = stats.finished.realtime - stats.started.realtime; + usage.cpu[stats.identifiers->name] = stats.finished.cpu_time - stats.started.cpu_time; + break; + default: break; + } + } + }); + + on().then("Startup", [this] { + auto start = NUClear::clock::now(); + code_events.emplace_back("Created " + initial_name + ":" + heavy_name, start); + code_events.emplace_back("Created " + initial_name + ":" + light_name, start); + emit(std::make_unique>()); + }); + } +}; +} // namespace + +TEST_CASE("Testing reaction statistics timing", "[api][reactionstatistics][timing]") { + + NUClear::Configuration config; + config.thread_count = 1; + NUClear::PowerPlant plant(config); + plant.install(); + plant.start(); + + // Sort the stats events by timestamp as they are not always going to be in order due to how stats are processed + std::stable_sort(stat_events.begin(), stat_events.end(), [](const auto& lhs, const auto& rhs) { + return lhs.second < rhs.second; + }); + + + auto make_delta = [](const std::vector>& events) { + std::vector delta_events; + auto first = events.front().second; + for (const auto& event : events) { + auto delta = event.second - first; + auto units = test_util::round_to_test_units(delta / scale).count(); + delta_events.push_back(event.first + " @ Step " + std::to_string(units)); + } + return delta_events; + }; + + // Convert the events to delta strings where 1 unit is 1 step unit + std::vector delta_code_events = make_delta(code_events); + std::vector delta_stat_events = make_delta(stat_events); + + const std::vector expected = { + "Created Initial:Heavy @ Step 0", + "Created Initial:Light @ Step 0", + "Started Initial:Heavy @ Step 0", + "Created Heavy @ Step 0", + "Finished Initial:Heavy @ Step 0", + "Started Heavy @ Step 0", + "Finished Heavy @ Step 1", + "Started Initial:Light @ Step 1", + "Created Light @ Step 1", + "Finished Initial:Light @ Step 1", + "Started Light @ Step 1", + "Finished Light @ Step 2", + }; + + + /* Info Scope */ { + INFO("Code Events:\n" << test_util::diff_string(expected, delta_code_events)); + REQUIRE(delta_code_events == expected); + } + /* Info Scope */ { + INFO("Statistic Events:\n" << test_util::diff_string(expected, delta_stat_events)); + REQUIRE(delta_stat_events == expected); + } + + // Most of heavy real time should be cpu time + REQUIRE(usage.cpu[heavy_name] > usage.real[heavy_name] / 2); + + // Most of light real time should be sleeping + REQUIRE(usage.cpu[light_name] < usage.real[light_name] / 2); +}