From 563cd01f98c4fcfa7de9e85937ddbda3cc2fc9fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=84=E9=A3=8F?= Date: Fri, 21 Feb 2025 08:00:47 +0000 Subject: [PATCH 1/3] add global metrics registration --- core/monitor/Monitor.cpp | 32 ++++++++++ core/monitor/Monitor.h | 13 ++++ core/monitor/SelfMonitorServer.cpp | 5 ++ .../metric_models/SelfMonitorMetricEvent.cpp | 19 +++++- .../metric_models/SelfMonitorMetricEvent.h | 7 ++- .../SelfMonitorMetricEventUnittest.cpp | 59 +++++++++++++++++++ 6 files changed, 133 insertions(+), 2 deletions(-) diff --git a/core/monitor/Monitor.cpp b/core/monitor/Monitor.cpp index 0fa2b2a4e3..64c8dab8a9 100644 --- a/core/monitor/Monitor.cpp +++ b/core/monitor/Monitor.cpp @@ -13,6 +13,8 @@ // limitations under the License. #include "Monitor.h" + +#include "MetricRecord.h" #if defined(__linux__) #include #include @@ -635,4 +637,34 @@ void LoongCollectorMonitor::Stop() { LOG_INFO(sLogger, ("LoongCollector monitor", "stopped successfully")); } +bool LoongCollectorMonitor::GetAgentMetricData(SelfMonitorMetricEvent& event) { + lock_guard lock(mGlobalMetricsMux); + if (mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT].find("") + != mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT].end()) { + event.Copy(mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT][""]); + return true; + } + return false; +} + +void LoongCollectorMonitor::SetAgentMetricData(const SelfMonitorMetricEvent& event) { + lock_guard lock(mGlobalMetricsMux); + mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT][""].Copy(event); +} + +bool LoongCollectorMonitor::GetRunnerMetricData(const std::string& runnerName, SelfMonitorMetricEvent& event) { + lock_guard lock(mGlobalMetricsMux); + if (mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER].find(runnerName) + != mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER].end()) { + event.Copy(mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER][runnerName]); + return true; + } + return false; +} + +void LoongCollectorMonitor::SetRunnerMetricData(const std::string& runnerName, const SelfMonitorMetricEvent& event) { + lock_guard lock(mGlobalMetricsMux); + mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER][runnerName].Copy(event); +} + } // namespace logtail diff --git a/core/monitor/Monitor.h b/core/monitor/Monitor.h index a76acd6008..8a54107f84 100644 --- a/core/monitor/Monitor.h +++ b/core/monitor/Monitor.h @@ -20,8 +20,10 @@ #include #include #include +#include #include "MetricManager.h" +#include "MetricTypes.h" #if defined(_MSC_VER) #include @@ -188,6 +190,11 @@ class LoongCollectorMonitor { void Init(); void Stop(); + bool GetAgentMetricData(SelfMonitorMetricEvent& event); + void SetAgentMetricData(const SelfMonitorMetricEvent& event); + bool GetRunnerMetricData(const std::string& runnerName, SelfMonitorMetricEvent& event); + void SetRunnerMetricData(const std::string& runnerName, const SelfMonitorMetricEvent& event); + void SetAgentCpu(double cpu) { SET_GAUGE(mAgentCpu, cpu); } void SetAgentMemory(uint64_t mem) { SET_GAUGE(mAgentMemory, mem); } void SetAgentGoMemory(uint64_t mem) { SET_GAUGE(mAgentGoMemory, mem); } @@ -214,6 +221,12 @@ class LoongCollectorMonitor { LoongCollectorMonitor(); ~LoongCollectorMonitor(); + std::mutex mGlobalMetricsMux; + // 一个全局级别指标的副本,由 SelfMonitorServer::PushSelfMonitorMetricEvents 更新,格式为: + // {MetricCategory: {key:MetricValue}} + // 现支持 Agent 和 Runner 指标的保存、获取 + std::map > mGlobalMetrics; + // MetricRecord MetricsRecordRef mMetricsRecordRef; diff --git a/core/monitor/SelfMonitorServer.cpp b/core/monitor/SelfMonitorServer.cpp index c7fd5692ae..bc888f05c4 100644 --- a/core/monitor/SelfMonitorServer.cpp +++ b/core/monitor/SelfMonitorServer.cpp @@ -16,6 +16,8 @@ #include "monitor/SelfMonitorServer.h" +#include "MetricConstants.h" +#include "Monitor.h" #include "common/LogtailCommonFlags.h" #include "runner/ProcessorRunner.h" @@ -139,8 +141,11 @@ void SelfMonitorServer::PushSelfMonitorMetricEvents(std::vectorSetAgentMetricData(event); shouldSkip = !ProcessSelfMonitorMetricEvent(event, mSelfMonitorMetricRules->mAgentMetricsRule); } else if (event.mCategory == MetricCategory::METRIC_CATEGORY_RUNNER) { + LoongCollectorMonitor::GetInstance()->SetRunnerMetricData(event.GetLabels()[METRIC_LABEL_KEY_RUNNER_NAME], + event); shouldSkip = !ProcessSelfMonitorMetricEvent(event, mSelfMonitorMetricRules->mRunnerMetricsRule); } else if (event.mCategory == MetricCategory::METRIC_CATEGORY_COMPONENT) { shouldSkip = !ProcessSelfMonitorMetricEvent(event, mSelfMonitorMetricRules->mComponentMetricsRule); diff --git a/core/monitor/metric_models/SelfMonitorMetricEvent.cpp b/core/monitor/metric_models/SelfMonitorMetricEvent.cpp index 7eccca1e49..18b3ba21ad 100644 --- a/core/monitor/metric_models/SelfMonitorMetricEvent.cpp +++ b/core/monitor/metric_models/SelfMonitorMetricEvent.cpp @@ -135,7 +135,7 @@ void SelfMonitorMetricEvent::SetInterval(size_t interval) { mSendInterval = interval; } -void SelfMonitorMetricEvent::Merge(SelfMonitorMetricEvent& event) { +void SelfMonitorMetricEvent::Merge(const SelfMonitorMetricEvent& event) { if (mSendInterval != event.mSendInterval) { mSendInterval = event.mSendInterval; mLastSendInterval = 0; @@ -152,6 +152,23 @@ void SelfMonitorMetricEvent::Merge(SelfMonitorMetricEvent& event) { mUpdatedFlag = true; } +void SelfMonitorMetricEvent::Copy(const SelfMonitorMetricEvent& event) { + mSendInterval = event.mSendInterval; + mLastSendInterval = event.mLastSendInterval; + mCategory = event.mCategory; + mKey = event.mKey; + for (auto label = event.mLabels.begin(); label != event.mLabels.end(); label++) { + mLabels[label->first] = label->second; + } + for (auto counter = event.mCounters.begin(); counter != event.mCounters.end(); counter++) { + mCounters[counter->first] = counter->second; + } + for (auto gauge = event.mGauges.begin(); gauge != event.mGauges.end(); gauge++) { + mGauges[gauge->first] = gauge->second; + } + mUpdatedFlag = event.mUpdatedFlag; +} + bool SelfMonitorMetricEvent::ShouldSend() { mLastSendInterval++; return (mLastSendInterval >= mSendInterval) && mUpdatedFlag; diff --git a/core/monitor/metric_models/SelfMonitorMetricEvent.h b/core/monitor/metric_models/SelfMonitorMetricEvent.h index 98b185aea9..1d70867690 100644 --- a/core/monitor/metric_models/SelfMonitorMetricEvent.h +++ b/core/monitor/metric_models/SelfMonitorMetricEvent.h @@ -42,12 +42,17 @@ class SelfMonitorMetricEvent { SelfMonitorMetricEvent(const std::map& metricRecord); void SetInterval(size_t interval); - void Merge(SelfMonitorMetricEvent& event); + void Merge(const SelfMonitorMetricEvent& event); + void Copy(const SelfMonitorMetricEvent& event); bool ShouldSend(); bool ShouldDelete(); void ReadAsMetricEvent(MetricEvent* metricEventPtr); + std::unordered_map& GetLabels() { return mLabels; } + std::unordered_map& GetCounters() { return mCounters; } + std::unordered_map& GetGauges() { return mGauges; } + SelfMonitorMetricEventKey mKey; // labels + category std::string mCategory; // category private: diff --git a/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp b/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp index 91f1712bcd..5748be9d47 100644 --- a/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp +++ b/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "MetricConstants.h" +#include "MetricRecord.h" +#include "Monitor.h" #include "monitor/MetricManager.h" #include "monitor/metric_models/SelfMonitorMetricEvent.h" #include "unittest/Unittest.h" @@ -27,6 +30,7 @@ class SelfMonitorMetricEventUnittest : public ::testing::Test { void TestCreateFromGoMetricMap(); void TestMerge(); void TestSendInterval(); + void TestGlobalMetrics(); private: std::shared_ptr mSourceBuffer; @@ -38,6 +42,7 @@ APSARA_UNIT_TEST_CASE(SelfMonitorMetricEventUnittest, TestCreateFromMetricEvent, APSARA_UNIT_TEST_CASE(SelfMonitorMetricEventUnittest, TestCreateFromGoMetricMap, 1); APSARA_UNIT_TEST_CASE(SelfMonitorMetricEventUnittest, TestMerge, 2); APSARA_UNIT_TEST_CASE(SelfMonitorMetricEventUnittest, TestSendInterval, 3); +APSARA_UNIT_TEST_CASE(SelfMonitorMetricEventUnittest, TestGlobalMetrics, 4); void SelfMonitorMetricEventUnittest::TestCreateFromMetricEvent() { std::vector> labels; @@ -198,6 +203,60 @@ void SelfMonitorMetricEventUnittest::TestSendInterval() { APSARA_TEST_TRUE(event.ShouldDelete()); // 第三次调用,间隔计数达到3,应返回true } +void SelfMonitorMetricEventUnittest::TestGlobalMetrics() { + { // test set/get agent metric + SelfMonitorMetricEvent originAgentEvent; + SelfMonitorMetricEvent wantAgentEvent; + APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetAgentMetricData(wantAgentEvent)); + + // set + originAgentEvent.mCategory = MetricCategory::METRIC_CATEGORY_AGENT; + originAgentEvent.mLabels = {{METRIC_LABEL_KEY_PROJECT, "test_project"}, {METRIC_LABEL_KEY_OS, "Linux"}}; + originAgentEvent.mCounters = {{"test_counter", 1}}; + originAgentEvent.mGauges = {{METRIC_AGENT_CPU, 0.3}, {METRIC_AGENT_MEMORY, 99}}; + LoongCollectorMonitor::GetInstance()->SetAgentMetricData(originAgentEvent); + + // get + APSARA_TEST_TRUE(LoongCollectorMonitor::GetInstance()->GetAgentMetricData(wantAgentEvent)); + APSARA_TEST_EQUAL(MetricCategory::METRIC_CATEGORY_AGENT, wantAgentEvent.mCategory); + APSARA_TEST_EQUAL("test_project", wantAgentEvent.GetLabels()[METRIC_LABEL_KEY_PROJECT]); + APSARA_TEST_EQUAL("Linux", wantAgentEvent.GetLabels()[METRIC_LABEL_KEY_OS]); + APSARA_TEST_EQUAL("", wantAgentEvent.GetLabels()[""]); + APSARA_TEST_EQUAL(1, wantAgentEvent.GetCounters()["test_counter"]); + APSARA_TEST_EQUAL(0, wantAgentEvent.GetCounters()[""]); + APSARA_TEST_EQUAL(0.3, wantAgentEvent.GetGauges()[METRIC_AGENT_CPU]); + APSARA_TEST_EQUAL(99, wantAgentEvent.GetGauges()[METRIC_AGENT_MEMORY]); + APSARA_TEST_EQUAL(0, wantAgentEvent.GetGauges()[""]); + } + { // test set/get runner metric + SelfMonitorMetricEvent originRunnerEvent; + SelfMonitorMetricEvent wantRunnerEvent; + APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetRunnerMetricData("", wantRunnerEvent)); + + // set + std::string runnerName = METRIC_LABEL_VALUE_RUNNER_NAME_HTTP_SINK; + originRunnerEvent.mCategory = MetricCategory::METRIC_CATEGORY_RUNNER; + originRunnerEvent.mLabels + = {{METRIC_LABEL_KEY_RUNNER_NAME, runnerName}, {METRIC_LABEL_KEY_PROJECT, "test_project"}}; + originRunnerEvent.mCounters = {{METRIC_RUNNER_IN_EVENTS_TOTAL, 1}, {METRIC_RUNNER_TOTAL_DELAY_MS, 99}}; + originRunnerEvent.mGauges = {{METRIC_RUNNER_LAST_RUN_TIME, 1111111}}; + LoongCollectorMonitor::GetInstance()->SetRunnerMetricData(runnerName, originRunnerEvent); + + // get + APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetRunnerMetricData("", wantRunnerEvent)); + APSARA_TEST_TRUE(LoongCollectorMonitor::GetInstance()->GetRunnerMetricData(runnerName, wantRunnerEvent)); + APSARA_TEST_EQUAL(MetricCategory::METRIC_CATEGORY_RUNNER, wantRunnerEvent.mCategory); + APSARA_TEST_EQUAL("test_project", wantRunnerEvent.GetLabels()[METRIC_LABEL_KEY_PROJECT]); + APSARA_TEST_EQUAL(runnerName, wantRunnerEvent.GetLabels()[METRIC_LABEL_KEY_RUNNER_NAME]); + APSARA_TEST_EQUAL("", wantRunnerEvent.GetLabels()[""]); + APSARA_TEST_EQUAL(1, wantRunnerEvent.GetCounters()[METRIC_RUNNER_IN_EVENTS_TOTAL]); + APSARA_TEST_EQUAL(99, wantRunnerEvent.GetCounters()[METRIC_RUNNER_TOTAL_DELAY_MS]); + APSARA_TEST_EQUAL(0, wantRunnerEvent.GetCounters()[""]); + APSARA_TEST_EQUAL(1111111, wantRunnerEvent.GetGauges()[METRIC_RUNNER_LAST_RUN_TIME]); + APSARA_TEST_EQUAL(0, wantRunnerEvent.GetGauges()[""]); + } +} + } // namespace logtail int main(int argc, char** argv) { From 6aa689e5477ef05f9828f2fee0d36adc84c02e7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=84=E9=A3=8F?= Date: Wed, 26 Feb 2025 06:01:22 +0000 Subject: [PATCH 2/3] polish --- core/monitor/Monitor.cpp | 18 +++---- core/monitor/Monitor.h | 9 ++-- core/monitor/SelfMonitorServer.cpp | 8 ++- .../metric_models/SelfMonitorMetricEvent.cpp | 53 ++++++++++--------- .../metric_models/SelfMonitorMetricEvent.h | 20 ++++--- .../SelfMonitorMetricEventUnittest.cpp | 48 ++++++++--------- 6 files changed, 80 insertions(+), 76 deletions(-) diff --git a/core/monitor/Monitor.cpp b/core/monitor/Monitor.cpp index 64c8dab8a9..1cc08a37ce 100644 --- a/core/monitor/Monitor.cpp +++ b/core/monitor/Monitor.cpp @@ -637,34 +637,34 @@ void LoongCollectorMonitor::Stop() { LOG_INFO(sLogger, ("LoongCollector monitor", "stopped successfully")); } -bool LoongCollectorMonitor::GetAgentMetricData(SelfMonitorMetricEvent& event) { +bool LoongCollectorMonitor::GetAgentMetric(SelfMonitorMetricEvent& event) { lock_guard lock(mGlobalMetricsMux); - if (mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT].find("") + if (mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT].find(mAgentMetricKey) != mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT].end()) { - event.Copy(mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT][""]); + event = mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT][mAgentMetricKey]; return true; } return false; } -void LoongCollectorMonitor::SetAgentMetricData(const SelfMonitorMetricEvent& event) { +void LoongCollectorMonitor::SetAgentMetric(const SelfMonitorMetricEvent& event) { lock_guard lock(mGlobalMetricsMux); - mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT][""].Copy(event); + mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT][mAgentMetricKey] = event; } -bool LoongCollectorMonitor::GetRunnerMetricData(const std::string& runnerName, SelfMonitorMetricEvent& event) { +bool LoongCollectorMonitor::GetRunnerMetric(const std::string& runnerName, SelfMonitorMetricEvent& event) { lock_guard lock(mGlobalMetricsMux); if (mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER].find(runnerName) != mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER].end()) { - event.Copy(mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER][runnerName]); + event = mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER][runnerName]; return true; } return false; } -void LoongCollectorMonitor::SetRunnerMetricData(const std::string& runnerName, const SelfMonitorMetricEvent& event) { +void LoongCollectorMonitor::SetRunnerMetric(const std::string& runnerName, const SelfMonitorMetricEvent& event) { lock_guard lock(mGlobalMetricsMux); - mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER][runnerName].Copy(event); + mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER][runnerName] = event; } } // namespace logtail diff --git a/core/monitor/Monitor.h b/core/monitor/Monitor.h index 8a54107f84..c1f33935fa 100644 --- a/core/monitor/Monitor.h +++ b/core/monitor/Monitor.h @@ -190,10 +190,10 @@ class LoongCollectorMonitor { void Init(); void Stop(); - bool GetAgentMetricData(SelfMonitorMetricEvent& event); - void SetAgentMetricData(const SelfMonitorMetricEvent& event); - bool GetRunnerMetricData(const std::string& runnerName, SelfMonitorMetricEvent& event); - void SetRunnerMetricData(const std::string& runnerName, const SelfMonitorMetricEvent& event); + bool GetAgentMetric(SelfMonitorMetricEvent& event); + void SetAgentMetric(const SelfMonitorMetricEvent& event); + bool GetRunnerMetric(const std::string& runnerName, SelfMonitorMetricEvent& event); + void SetRunnerMetric(const std::string& runnerName, const SelfMonitorMetricEvent& event); void SetAgentCpu(double cpu) { SET_GAUGE(mAgentCpu, cpu); } void SetAgentMemory(uint64_t mem) { SET_GAUGE(mAgentMemory, mem); } @@ -226,6 +226,7 @@ class LoongCollectorMonitor { // {MetricCategory: {key:MetricValue}} // 现支持 Agent 和 Runner 指标的保存、获取 std::map > mGlobalMetrics; + const std::string mAgentMetricKey = "agent"; // MetricRecord MetricsRecordRef mMetricsRecordRef; diff --git a/core/monitor/SelfMonitorServer.cpp b/core/monitor/SelfMonitorServer.cpp index bc888f05c4..00b55a9411 100644 --- a/core/monitor/SelfMonitorServer.cpp +++ b/core/monitor/SelfMonitorServer.cpp @@ -18,7 +18,6 @@ #include "MetricConstants.h" #include "Monitor.h" -#include "common/LogtailCommonFlags.h" #include "runner/ProcessorRunner.h" using namespace std; @@ -141,11 +140,10 @@ void SelfMonitorServer::PushSelfMonitorMetricEvents(std::vectorSetAgentMetricData(event); + LoongCollectorMonitor::GetInstance()->SetAgentMetric(event); shouldSkip = !ProcessSelfMonitorMetricEvent(event, mSelfMonitorMetricRules->mAgentMetricsRule); } else if (event.mCategory == MetricCategory::METRIC_CATEGORY_RUNNER) { - LoongCollectorMonitor::GetInstance()->SetRunnerMetricData(event.GetLabels()[METRIC_LABEL_KEY_RUNNER_NAME], - event); + LoongCollectorMonitor::GetInstance()->SetRunnerMetric(event.GetLabel(METRIC_LABEL_KEY_RUNNER_NAME), event); shouldSkip = !ProcessSelfMonitorMetricEvent(event, mSelfMonitorMetricRules->mRunnerMetricsRule); } else if (event.mCategory == MetricCategory::METRIC_CATEGORY_COMPONENT) { shouldSkip = !ProcessSelfMonitorMetricEvent(event, mSelfMonitorMetricRules->mComponentMetricsRule); @@ -162,7 +160,7 @@ void SelfMonitorServer::PushSelfMonitorMetricEvents(std::vectorGetCategory(); @@ -131,14 +128,14 @@ void SelfMonitorMetricEvent::CreateKey() { } void SelfMonitorMetricEvent::SetInterval(size_t interval) { - mLastSendInterval = 0; + mIntervalsSinceLastSend = 0; mSendInterval = interval; } void SelfMonitorMetricEvent::Merge(const SelfMonitorMetricEvent& event) { if (mSendInterval != event.mSendInterval) { mSendInterval = event.mSendInterval; - mLastSendInterval = 0; + mIntervalsSinceLastSend = 0; } for (auto counter = event.mCounters.begin(); counter != event.mCounters.end(); counter++) { if (mCounters.find(counter->first) != mCounters.end()) @@ -152,30 +149,13 @@ void SelfMonitorMetricEvent::Merge(const SelfMonitorMetricEvent& event) { mUpdatedFlag = true; } -void SelfMonitorMetricEvent::Copy(const SelfMonitorMetricEvent& event) { - mSendInterval = event.mSendInterval; - mLastSendInterval = event.mLastSendInterval; - mCategory = event.mCategory; - mKey = event.mKey; - for (auto label = event.mLabels.begin(); label != event.mLabels.end(); label++) { - mLabels[label->first] = label->second; - } - for (auto counter = event.mCounters.begin(); counter != event.mCounters.end(); counter++) { - mCounters[counter->first] = counter->second; - } - for (auto gauge = event.mGauges.begin(); gauge != event.mGauges.end(); gauge++) { - mGauges[gauge->first] = gauge->second; - } - mUpdatedFlag = event.mUpdatedFlag; -} - bool SelfMonitorMetricEvent::ShouldSend() { - mLastSendInterval++; - return (mLastSendInterval >= mSendInterval) && mUpdatedFlag; + mIntervalsSinceLastSend++; + return (mIntervalsSinceLastSend >= mSendInterval) && mUpdatedFlag; } bool SelfMonitorMetricEvent::ShouldDelete() { - return (mLastSendInterval >= mSendInterval) && !mUpdatedFlag; + return (mIntervalsSinceLastSend >= mSendInterval) && !mUpdatedFlag; } void SelfMonitorMetricEvent::ReadAsMetricEvent(MetricEvent* metricEventPtr) { @@ -199,8 +179,29 @@ void SelfMonitorMetricEvent::ReadAsMetricEvent(MetricEvent* metricEventPtr) { gauge->first, {UntypedValueMetricType::MetricTypeGauge, gauge->second}); } // set flags - mLastSendInterval = 0; + mIntervalsSinceLastSend = 0; mUpdatedFlag = false; } +std::string SelfMonitorMetricEvent::GetLabel(const std::string& labelKey) { + if (mLabels.find(labelKey) != mLabels.end()) { + return mLabels.at(labelKey); + } + return ""; +} + +uint64_t SelfMonitorMetricEvent::GetCounter(const std::string& counterName) { + if (mCounters.find(counterName) != mCounters.end()) { + return mCounters.at(counterName); + } + return 0; +} + +double SelfMonitorMetricEvent::GetGauge(const std::string& gaugeName) { + if (mGauges.find(gaugeName) != mGauges.end()) { + return mGauges.at(gaugeName); + } + return 0; +} + } // namespace logtail diff --git a/core/monitor/metric_models/SelfMonitorMetricEvent.h b/core/monitor/metric_models/SelfMonitorMetricEvent.h index 1d70867690..902857975e 100644 --- a/core/monitor/metric_models/SelfMonitorMetricEvent.h +++ b/core/monitor/metric_models/SelfMonitorMetricEvent.h @@ -15,8 +15,10 @@ */ #pragma once -#include "MetricRecord.h" -#include "models/PipelineEventGroup.h" +#include + +#include "models/MetricEvent.h" +#include "monitor/metric_models/MetricRecord.h" namespace logtail { @@ -37,21 +39,23 @@ struct SelfMonitorMetricRules { using SelfMonitorMetricEventKey = int64_t; class SelfMonitorMetricEvent { public: - SelfMonitorMetricEvent(); + SelfMonitorMetricEvent() = default; + SelfMonitorMetricEvent(const SelfMonitorMetricEvent& event) = default; + SelfMonitorMetricEvent(MetricsRecord* metricRecord); SelfMonitorMetricEvent(const std::map& metricRecord); void SetInterval(size_t interval); void Merge(const SelfMonitorMetricEvent& event); - void Copy(const SelfMonitorMetricEvent& event); bool ShouldSend(); bool ShouldDelete(); void ReadAsMetricEvent(MetricEvent* metricEventPtr); - std::unordered_map& GetLabels() { return mLabels; } - std::unordered_map& GetCounters() { return mCounters; } - std::unordered_map& GetGauges() { return mGauges; } + // 调用的对象应是不再修改的只读对象,不用加锁 + std::string GetLabel(const std::string& labelKey); + uint64_t GetCounter(const std::string& counterName); + double GetGauge(const std::string& gaugeName); SelfMonitorMetricEventKey mKey; // labels + category std::string mCategory; // category @@ -62,7 +66,7 @@ class SelfMonitorMetricEvent { std::unordered_map mCounters; std::unordered_map mGauges; int32_t mSendInterval; - int32_t mLastSendInterval; + int32_t mIntervalsSinceLastSend; bool mUpdatedFlag; #ifdef APSARA_UNIT_TEST_MAIN diff --git a/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp b/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp index 5748be9d47..66d45f8130 100644 --- a/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp +++ b/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp @@ -170,7 +170,7 @@ void SelfMonitorMetricEventUnittest::TestMerge() { event1.Merge(event2); // 检验间隔是否被设置为 event2 的间隔 - APSARA_TEST_EQUAL(0, event1.mLastSendInterval); + APSARA_TEST_EQUAL(0, event1.mIntervalsSinceLastSend); APSARA_TEST_EQUAL(10, event1.mSendInterval); // 检验计数器是否正确合并 APSARA_TEST_EQUAL(300, event1.mCounters["counter1"]); @@ -207,31 +207,31 @@ void SelfMonitorMetricEventUnittest::TestGlobalMetrics() { { // test set/get agent metric SelfMonitorMetricEvent originAgentEvent; SelfMonitorMetricEvent wantAgentEvent; - APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetAgentMetricData(wantAgentEvent)); + APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetAgentMetric(wantAgentEvent)); // set originAgentEvent.mCategory = MetricCategory::METRIC_CATEGORY_AGENT; originAgentEvent.mLabels = {{METRIC_LABEL_KEY_PROJECT, "test_project"}, {METRIC_LABEL_KEY_OS, "Linux"}}; originAgentEvent.mCounters = {{"test_counter", 1}}; originAgentEvent.mGauges = {{METRIC_AGENT_CPU, 0.3}, {METRIC_AGENT_MEMORY, 99}}; - LoongCollectorMonitor::GetInstance()->SetAgentMetricData(originAgentEvent); + LoongCollectorMonitor::GetInstance()->SetAgentMetric(originAgentEvent); // get - APSARA_TEST_TRUE(LoongCollectorMonitor::GetInstance()->GetAgentMetricData(wantAgentEvent)); + APSARA_TEST_TRUE(LoongCollectorMonitor::GetInstance()->GetAgentMetric(wantAgentEvent)); APSARA_TEST_EQUAL(MetricCategory::METRIC_CATEGORY_AGENT, wantAgentEvent.mCategory); - APSARA_TEST_EQUAL("test_project", wantAgentEvent.GetLabels()[METRIC_LABEL_KEY_PROJECT]); - APSARA_TEST_EQUAL("Linux", wantAgentEvent.GetLabels()[METRIC_LABEL_KEY_OS]); - APSARA_TEST_EQUAL("", wantAgentEvent.GetLabels()[""]); - APSARA_TEST_EQUAL(1, wantAgentEvent.GetCounters()["test_counter"]); - APSARA_TEST_EQUAL(0, wantAgentEvent.GetCounters()[""]); - APSARA_TEST_EQUAL(0.3, wantAgentEvent.GetGauges()[METRIC_AGENT_CPU]); - APSARA_TEST_EQUAL(99, wantAgentEvent.GetGauges()[METRIC_AGENT_MEMORY]); - APSARA_TEST_EQUAL(0, wantAgentEvent.GetGauges()[""]); + APSARA_TEST_EQUAL("test_project", wantAgentEvent.GetLabel(METRIC_LABEL_KEY_PROJECT)); + APSARA_TEST_EQUAL("Linux", wantAgentEvent.GetLabel(METRIC_LABEL_KEY_OS)); + APSARA_TEST_EQUAL("", wantAgentEvent.GetLabel("")); + APSARA_TEST_EQUAL(1, wantAgentEvent.GetCounter("test_counter")); + APSARA_TEST_EQUAL(0, wantAgentEvent.GetCounter("")); + APSARA_TEST_EQUAL(0.3, wantAgentEvent.GetGauge(METRIC_AGENT_CPU)); + APSARA_TEST_EQUAL(99, wantAgentEvent.GetGauge(METRIC_AGENT_MEMORY)); + APSARA_TEST_EQUAL(0, wantAgentEvent.GetGauge("")); } { // test set/get runner metric SelfMonitorMetricEvent originRunnerEvent; SelfMonitorMetricEvent wantRunnerEvent; - APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetRunnerMetricData("", wantRunnerEvent)); + APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetRunnerMetric("", wantRunnerEvent)); // set std::string runnerName = METRIC_LABEL_VALUE_RUNNER_NAME_HTTP_SINK; @@ -240,20 +240,20 @@ void SelfMonitorMetricEventUnittest::TestGlobalMetrics() { = {{METRIC_LABEL_KEY_RUNNER_NAME, runnerName}, {METRIC_LABEL_KEY_PROJECT, "test_project"}}; originRunnerEvent.mCounters = {{METRIC_RUNNER_IN_EVENTS_TOTAL, 1}, {METRIC_RUNNER_TOTAL_DELAY_MS, 99}}; originRunnerEvent.mGauges = {{METRIC_RUNNER_LAST_RUN_TIME, 1111111}}; - LoongCollectorMonitor::GetInstance()->SetRunnerMetricData(runnerName, originRunnerEvent); + LoongCollectorMonitor::GetInstance()->SetRunnerMetric(runnerName, originRunnerEvent); // get - APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetRunnerMetricData("", wantRunnerEvent)); - APSARA_TEST_TRUE(LoongCollectorMonitor::GetInstance()->GetRunnerMetricData(runnerName, wantRunnerEvent)); + APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetRunnerMetric("", wantRunnerEvent)); + APSARA_TEST_TRUE(LoongCollectorMonitor::GetInstance()->GetRunnerMetric(runnerName, wantRunnerEvent)); APSARA_TEST_EQUAL(MetricCategory::METRIC_CATEGORY_RUNNER, wantRunnerEvent.mCategory); - APSARA_TEST_EQUAL("test_project", wantRunnerEvent.GetLabels()[METRIC_LABEL_KEY_PROJECT]); - APSARA_TEST_EQUAL(runnerName, wantRunnerEvent.GetLabels()[METRIC_LABEL_KEY_RUNNER_NAME]); - APSARA_TEST_EQUAL("", wantRunnerEvent.GetLabels()[""]); - APSARA_TEST_EQUAL(1, wantRunnerEvent.GetCounters()[METRIC_RUNNER_IN_EVENTS_TOTAL]); - APSARA_TEST_EQUAL(99, wantRunnerEvent.GetCounters()[METRIC_RUNNER_TOTAL_DELAY_MS]); - APSARA_TEST_EQUAL(0, wantRunnerEvent.GetCounters()[""]); - APSARA_TEST_EQUAL(1111111, wantRunnerEvent.GetGauges()[METRIC_RUNNER_LAST_RUN_TIME]); - APSARA_TEST_EQUAL(0, wantRunnerEvent.GetGauges()[""]); + APSARA_TEST_EQUAL("test_project", wantRunnerEvent.GetLabel(METRIC_LABEL_KEY_PROJECT)); + APSARA_TEST_EQUAL(runnerName, wantRunnerEvent.GetLabel(METRIC_LABEL_KEY_RUNNER_NAME)); + APSARA_TEST_EQUAL("", wantRunnerEvent.GetLabel("")); + APSARA_TEST_EQUAL(1, wantRunnerEvent.GetCounter(METRIC_RUNNER_IN_EVENTS_TOTAL)); + APSARA_TEST_EQUAL(99, wantRunnerEvent.GetCounter(METRIC_RUNNER_TOTAL_DELAY_MS)); + APSARA_TEST_EQUAL(0, wantRunnerEvent.GetCounter("")); + APSARA_TEST_EQUAL(1111111, wantRunnerEvent.GetGauge(METRIC_RUNNER_LAST_RUN_TIME)); + APSARA_TEST_EQUAL(0, wantRunnerEvent.GetGauge("")); } } From 3639a5bccb1f94d066ecb6723b509206ff5f9a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=84=E9=A3=8F?= Date: Wed, 26 Feb 2025 07:03:05 +0000 Subject: [PATCH 3/3] polish --- core/monitor/Monitor.cpp | 29 +++++++++---------- core/monitor/Monitor.h | 9 ++++-- .../SelfMonitorMetricEventUnittest.cpp | 1 - 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/core/monitor/Monitor.cpp b/core/monitor/Monitor.cpp index 1cc08a37ce..e1e3ae3c47 100644 --- a/core/monitor/Monitor.cpp +++ b/core/monitor/Monitor.cpp @@ -179,7 +179,9 @@ void LogtailMonitor::Monitor() { lastCheckHardLimitTime = monitorTime; GetMemStat(); + LoongCollectorMonitor::GetInstance()->SetAgentMemory(mMemStat.mRss); CalCpuStat(curCpuStat, mCpuStat); + LoongCollectorMonitor::GetInstance()->SetAgentCpu(mCpuStat.mCpuUsage); if (CheckHardMemLimit()) { LOG_ERROR(sLogger, ("Resource used by program exceeds hard limit", @@ -249,10 +251,6 @@ bool LogtailMonitor::SendStatusProfile(bool suicide) { sleep(10); _exit(1); } - // CPU usage of Logtail process. - LoongCollectorMonitor::GetInstance()->SetAgentCpu(mCpuStat.mCpuUsage); - // Memory usage of Logtail process. - LoongCollectorMonitor::GetInstance()->SetAgentMemory(mMemStat.mRss); return mIsThreadRunning; } @@ -639,32 +637,33 @@ void LoongCollectorMonitor::Stop() { bool LoongCollectorMonitor::GetAgentMetric(SelfMonitorMetricEvent& event) { lock_guard lock(mGlobalMetricsMux); - if (mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT].find(mAgentMetricKey) - != mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT].end()) { - event = mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT][mAgentMetricKey]; - return true; - } - return false; + event = mGlobalMetrics.mAgentMetric; + return true; } void LoongCollectorMonitor::SetAgentMetric(const SelfMonitorMetricEvent& event) { lock_guard lock(mGlobalMetricsMux); - mGlobalMetrics[MetricCategory::METRIC_CATEGORY_AGENT][mAgentMetricKey] = event; + mGlobalMetrics.mAgentMetric = event; } bool LoongCollectorMonitor::GetRunnerMetric(const std::string& runnerName, SelfMonitorMetricEvent& event) { + if (runnerName.empty()) { + return false; + } lock_guard lock(mGlobalMetricsMux); - if (mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER].find(runnerName) - != mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER].end()) { - event = mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER][runnerName]; + if (mGlobalMetrics.mRunnerMetrics.find(runnerName) != mGlobalMetrics.mRunnerMetrics.end()) { + event = mGlobalMetrics.mRunnerMetrics[runnerName]; return true; } return false; } void LoongCollectorMonitor::SetRunnerMetric(const std::string& runnerName, const SelfMonitorMetricEvent& event) { + if (runnerName.empty()) { + return; + } lock_guard lock(mGlobalMetricsMux); - mGlobalMetrics[MetricCategory::METRIC_CATEGORY_RUNNER][runnerName] = event; + mGlobalMetrics.mRunnerMetrics[runnerName] = event; } } // namespace logtail diff --git a/core/monitor/Monitor.h b/core/monitor/Monitor.h index c1f33935fa..6fff5c6508 100644 --- a/core/monitor/Monitor.h +++ b/core/monitor/Monitor.h @@ -221,12 +221,15 @@ class LoongCollectorMonitor { LoongCollectorMonitor(); ~LoongCollectorMonitor(); - std::mutex mGlobalMetricsMux; // 一个全局级别指标的副本,由 SelfMonitorServer::PushSelfMonitorMetricEvents 更新,格式为: // {MetricCategory: {key:MetricValue}} // 现支持 Agent 和 Runner 指标的保存、获取 - std::map > mGlobalMetrics; - const std::string mAgentMetricKey = "agent"; + struct GlobalMetrics { + SelfMonitorMetricEvent mAgentMetric; + std::unordered_map mRunnerMetrics; + }; + std::mutex mGlobalMetricsMux; + GlobalMetrics mGlobalMetrics; // MetricRecord MetricsRecordRef mMetricsRecordRef; diff --git a/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp b/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp index 66d45f8130..669c103fbf 100644 --- a/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp +++ b/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp @@ -207,7 +207,6 @@ void SelfMonitorMetricEventUnittest::TestGlobalMetrics() { { // test set/get agent metric SelfMonitorMetricEvent originAgentEvent; SelfMonitorMetricEvent wantAgentEvent; - APSARA_TEST_FALSE(LoongCollectorMonitor::GetInstance()->GetAgentMetric(wantAgentEvent)); // set originAgentEvent.mCategory = MetricCategory::METRIC_CATEGORY_AGENT;