From 4d77d321df142b9f9371111dcaf10e63ab06a43c Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Jun 2024 14:13:18 +0800 Subject: [PATCH 1/3] fix:print error msg --- source/client/src/clientMonitor.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 9e990dd545e2..3067961696d8 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -147,7 +147,8 @@ static void monitorReadSendSlowLog(TdFilePtr pFile, void* pTransporter, SEpSet * while(1){ int64_t readSize = taosReadFile(pFile, buf + offset, SLOW_LOG_SEND_SIZE - offset); if (readSize <= 0) { - uError("failed to read len from file:%p since %s", pFile, terrstr()); + if (readSize < 0) + uError("failed to read len from file:%p since %s", pFile, terrstr()); return; } @@ -423,7 +424,7 @@ void monitorCounterInc(int64_t clusterId, const char* counterName, const char** MonitorClient* pMonitor = *ppMonitor; taos_counter_t** ppCounter = (taos_counter_t**)taosHashGet(pMonitor->counters, counterName, strlen(counterName)); - if (ppCounter == NULL || *ppCounter != NULL) { + if (ppCounter == NULL || *ppCounter == NULL) { uError("monitorCounterInc not found pCounter %"PRIx64":%s.", clusterId, counterName); goto end; } @@ -543,10 +544,6 @@ static void* monitorThreadFunc(void *param){ } #endif - if (-1 != atomic_val_compare_exchange_32(&slowLogFlag, -1, 0)) { - return NULL; - } - char tmpPath[PATH_MAX] = {0}; if (getSlowLogTmpDir(tmpPath, sizeof(tmpPath)) < 0){ return NULL; @@ -568,6 +565,10 @@ static void* monitorThreadFunc(void *param){ uError("open queue error since %s", terrstr()); return NULL; } + + if (-1 != atomic_val_compare_exchange_32(&slowLogFlag, -1, 0)) { + return NULL; + } uDebug("monitorThreadFunc start"); while (1) { if (slowLogFlag > 0) break; @@ -666,8 +667,8 @@ int32_t monitorPutData2MonitorQueue(int64_t clusterId, char* value){ slowLogData->clusterId = clusterId; slowLogData->value = value; uDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " value:%s", slowLogData->clusterId, slowLogData->value); - while (monitorQueue == NULL) { - taosMsleep(100); + while (atomic_load_32(&slowLogFlag) == -1) { + taosMsleep(5); } if (taosWriteQitem(monitorQueue, slowLogData) == 0){ tsem2_post(&monitorSem); From e73f6ba47e44159d606a8cd1f3b32d0554e7760f Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Jun 2024 16:05:49 +0800 Subject: [PATCH 2/3] fix:[TD-30769]make monitorInterval effect right now --- source/client/src/clientHb.c | 8 ++++++++ source/client/src/clientMonitor.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 19b6655af16f..0a480e1cbdca 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -19,6 +19,7 @@ #include "scheduler.h" #include "trpc.h" #include "tglobal.h" +#include "clientMonitor.h" typedef struct { union { @@ -546,7 +547,14 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) { } SAppInstInfo *pInst = pAppHbMgr->pAppInstInfo; + int32_t oldInterval = pInst->monitorParas.tsMonitorInterval; pInst->monitorParas = pRsp.monitorParas; + if(oldInterval > pInst->monitorParas.tsMonitorInterval){ + char* value = taosStrdup(""); + if(monitorPutData2MonitorQueue(pInst->clusterId, value) < 0){ + taosMemoryFree(value); + } + } tscDebug("[monitor] paras from hb, clusterId:%" PRIx64 " monitorParas threshold:%d scope:%d", pInst->clusterId, pRsp.monitorParas.tsSlowLogThreshold, pRsp.monitorParas.tsSlowLogScope); diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 3067961696d8..260a0ebbaa8b 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -221,8 +221,8 @@ static void reportSendProcess(void* param, void* tmrId) { SEpSet ep = getEpSet_s(&pInst->mgmtEp); generateClusterReport(pMonitor->registry, pInst->pTransporter, &ep); - taosRUnLockLatch(&monitorLock); taosTmrReset(reportSendProcess, pInst->monitorParas.tsMonitorInterval * 1000, param, monitorTimer, &tmrId); + taosRUnLockLatch(&monitorLock); } static void sendAllSlowLog(){ @@ -450,7 +450,7 @@ static void monitorFreeSlowLogData(MonitorSlowLogData* pData) { static void monitorThreadFuncUnexpectedStopped(void) { atomic_store_32(&slowLogFlag, -1); } static void reportSlowLog(void* param, void* tmrId) { - taosRLockLatch(&monitorLock); + taosWLockLatch(&monitorLock); if (atomic_load_32(&monitorFlag) == 1) { taosRUnLockLatch(&monitorLock); return; @@ -471,9 +471,11 @@ static void reportSlowLog(void* param, void* tmrId) { SEpSet ep = getEpSet_s(&pInst->mgmtEp); monitorReadSendSlowLog((*(SlowLogClient**)tmp)->pFile, pInst->pTransporter, &ep); - taosRUnLockLatch(&monitorLock); - taosTmrReset(reportSlowLog, pInst->monitorParas.tsMonitorInterval * 1000, param, monitorTimer, &tmrId); + if((*(SlowLogClient**)tmp)->timer == tmrId){ + taosTmrReset(reportSlowLog, pInst->monitorParas.tsMonitorInterval * 1000, param, monitorTimer, &(*(SlowLogClient**)tmp)->timer); + } + taosWUnLockLatch(&monitorLock); } static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpPath){ @@ -535,6 +537,23 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP taosWUnLockLatch(&monitorLock); } +static void restartReportTimer(int64_t clusterId){ + taosWLockLatch(&monitorLock); + + void* tmp = taosHashGet(monitorSlowLogHash, &clusterId, LONG_BYTES); + if(tmp){ + taosTmrStopA(&(*(SlowLogClient**)tmp)->timer); + SAppInstInfo* pInst = getAppInstByClusterId(clusterId); + if(pInst == NULL){ + uError("failed to get app inst, clusterId:%"PRIx64, clusterId); + return; + } + (*(SlowLogClient**)tmp)->timer = taosTmrStart(reportSlowLog, pInst->monitorParas.tsMonitorInterval * 1000, (void*)clusterId, monitorTimer); + + } + taosWUnLockLatch(&monitorLock); +} + static void* monitorThreadFunc(void *param){ setThreadName("client-monitor-slowlog"); @@ -579,7 +598,9 @@ static void* monitorThreadFunc(void *param){ uDebug("[monitor] read slow log data from queue, clusterId:%" PRIx64 " value:%s", slowLogData->clusterId, slowLogData->value); if (slowLogData->value == NULL){ monitorSendAllSlowLogFromTempDir(slowLogData->clusterId); - }else{ + } else if(strlen(slowLogData->value) == 0){ + restartReportTimer(slowLogData->clusterId); + } else{ monitorWriteSlowLog2File(slowLogData, tmpPath); } } From ad6eb205241de8eb7846d9121f2268a03f34946d Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Jun 2024 16:52:10 +0800 Subject: [PATCH 3/3] fix:disable slow log monitor --- source/client/src/clientMonitor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 260a0ebbaa8b..1c636a5505d3 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -680,6 +680,7 @@ void monitorClose() { } int32_t monitorPutData2MonitorQueue(int64_t clusterId, char* value){ + return -1; // disable slow log monitor MonitorSlowLogData* slowLogData = taosAllocateQitem(sizeof(MonitorSlowLogData), DEF_QITEM, 0); if (slowLogData == NULL) { uError("[monitor] failed to allocate slow log data");