From cb10eb87cd1452eb1fa050747d9bd5cd7a1af8f6 Mon Sep 17 00:00:00 2001 From: lai <11598235+unique0lai@users.noreply.github.com> Date: Wed, 28 Aug 2024 20:30:54 +0800 Subject: [PATCH 1/9] =?UTF-8?q?feat:=20bmw=20cmdb=E7=BC=93=E5=AD=98?= =?UTF-8?q?=E6=97=B6=E6=95=88=E6=80=A7=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../internal/alarm/cmdbcache/base.go | 64 +- .../internal/alarm/cmdbcache/base_test.go | 2 +- .../internal/alarm/cmdbcache/business.go | 16 +- .../internal/alarm/cmdbcache/business_test.go | 2 +- .../internal/alarm/cmdbcache/cmdb_watch.go | 581 ------------------ .../alarm/cmdbcache/cmdb_watch_test.go | 95 --- .../internal/alarm/cmdbcache/daemon.go | 157 +++++ .../internal/alarm/cmdbcache/dynamic_group.go | 15 +- .../alarm/cmdbcache/dynamic_group_test.go | 2 +- .../internal/alarm/cmdbcache/event_process.go | 405 ++++++++++++ .../alarm/cmdbcache/event_process_test.go | 23 + .../internal/alarm/cmdbcache/event_watch.go | 226 +++++++ .../internal/alarm/cmdbcache/host.go | 5 +- .../internal/alarm/cmdbcache/host_test.go | 2 +- .../internal/alarm/cmdbcache/module.go | 2 +- .../internal/alarm/cmdbcache/module_test.go | 2 +- .../internal/alarm/cmdbcache/ratelimit.go | 2 +- .../alarm/cmdbcache/relation_builder.go | 3 +- .../alarm/cmdbcache/service_instance.go | 5 +- .../alarm/cmdbcache/service_instance_test.go | 2 +- .../internal/alarm/cmdbcache/set.go | 2 +- .../internal/alarm/cmdbcache/set_test.go | 2 +- .../apm/pre_calculate/window/distributive.go | 5 +- 23 files changed, 894 insertions(+), 726 deletions(-) delete mode 100644 pkg/bk-monitor-worker/internal/alarm/cmdbcache/cmdb_watch.go delete mode 100644 pkg/bk-monitor-worker/internal/alarm/cmdbcache/cmdb_watch_test.go create mode 100644 pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go create mode 100644 pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go create mode 100644 pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go index bb4d6acc2..75c05fdc9 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -90,11 +90,6 @@ type Manager interface { useBiz() bool // GetConcurrentLimit 并发限制 GetConcurrentLimit() int - - // CleanByEvents 根据事件清理缓存 - CleanByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error - // UpdateByEvents 根据事件更新缓存 - UpdateByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error } // BaseCacheManager 基础缓存管理器 @@ -359,3 +354,60 @@ func RefreshAll(ctx context.Context, cacheManager Manager, concurrentLimit int) return nil } + +// RefreshByBizList 按业务列表刷新缓存 +func RefreshByBizList(ctx context.Context, cacheManager Manager, bizIDs []int, concurrentLimit int) error { + // 并发控制 + wg := sync.WaitGroup{} + limitChan := make(chan struct{}, concurrentLimit) + + // 按业务刷新缓存 + errChan := make(chan error, len(bizIDs)) + for _, bizId := range bizIDs { + limitChan <- struct{}{} + wg.Add(1) + go func(bizId int) { + defer func() { + wg.Done() + <-limitChan + }() + err := cacheManager.RefreshByBiz(ctx, bizId) + if err != nil { + errChan <- errors.Wrapf(err, "refresh %s cache by biz failed, biz: %d", cacheManager.Type(), bizId) + } + }(bizId) + } + + // 等待所有任务完成 + wg.Wait() + close(errChan) + for err := range errChan { + return err + } + + // 按业务清理缓存 + errChan = make(chan error, len(bizIDs)) + for _, bizId := range bizIDs { + limitChan <- struct{}{} + wg.Add(1) + go func(bizId int) { + defer func() { + wg.Done() + <-limitChan + }() + err := cacheManager.CleanByBiz(ctx, bizId) + if err != nil { + errChan <- errors.Wrapf(err, "clean %s cache by biz failed, biz: %d", cacheManager.Type(), bizId) + } + }(bizId) + } + + // 等待所有任务完成 + wg.Wait() + close(errChan) + for err := range errChan { + return err + } + + return nil +} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base_test.go index 65e3095ef..f75f06976 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base_test.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go index ec713f190..1f20629e7 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -286,17 +286,3 @@ func (m *BusinessCacheManager) CleanByEvents(ctx context.Context, resourceType s return nil } - -// UpdateByEvents 根据事件更新缓存 -func (m *BusinessCacheManager) UpdateByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - if resourceType != "biz" || len(events) == 0 { - return nil - } - - // 如果有更新就直接刷新全局缓存 - if err := m.RefreshGlobal(ctx); err != nil { - return err - } - - return nil -} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business_test.go index 80d93d523..c5a15ba2d 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business_test.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/cmdb_watch.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/cmdb_watch.go deleted file mode 100644 index d47989f26..000000000 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/cmdb_watch.go +++ /dev/null @@ -1,581 +0,0 @@ -// MIT License - -// Copyright (c) 2021~2022 腾讯蓝鲸 - -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: - -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. - -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -package cmdbcache - -import ( - "context" - "encoding/json" - "fmt" - "strconv" - "sync" - "time" - - "github.com/pkg/errors" - - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/config" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/utils/remote" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" -) - -// CmdbResourceType cmdb监听资源类型 -type CmdbResourceType string - -const ( - CmdbResourceTypeHost CmdbResourceType = "host" - CmdbResourceTypeHostRelation CmdbResourceType = "host_relation" - CmdbResourceTypeBiz CmdbResourceType = "biz" - CmdbResourceTypeSet CmdbResourceType = "set" - CmdbResourceTypeModule CmdbResourceType = "module" - CmdbResourceTypeMainlineInstance CmdbResourceType = "mainline_instance" - CmdbResourceTypeProcess CmdbResourceType = "process" - CmdbResourceTypeDynamicGroup CmdbResourceType = "dynamic_group" -) - -// CmdbResourceTypeFields cmdb资源类型对应的监听字段 -var CmdbResourceTypeFields = map[CmdbResourceType][]string{ - CmdbResourceTypeHost: {"bk_host_id", "bk_host_innerip", "bk_cloud_id", "bk_agent_id"}, - CmdbResourceTypeHostRelation: {"bk_host_id", "bk_biz_id"}, - CmdbResourceTypeBiz: {"bk_biz_id"}, - CmdbResourceTypeSet: {"bk_biz_id", "bk_set_id", "set_template_id"}, - CmdbResourceTypeModule: {"bk_module_id", "bk_biz_id", "service_template_id"}, - CmdbResourceTypeMainlineInstance: {"bk_obj_id", "bk_inst_id", "bk_obj_name", "bk_inst_name"}, - CmdbResourceTypeProcess: {"bk_biz_id"}, -} - -// CmdbResourceWatcher cmdb资源监听器 -type CmdbResourceWatcher struct { - // 缓存key前缀 - prefix string - // cmdb api client - cmdbApi *cmdb.Client - - // redis client - redisClient redis.UniversalClient -} - -// NewCmdbResourceWatcher 创建cmdb资源监听器 -func NewCmdbResourceWatcher(prefix string, rOpt *redis.Options) (*CmdbResourceWatcher, error) { - // 创建redis client - redisClient, err := redis.GetClient(rOpt) - if err != nil { - return nil, errors.Wrap(err, "failed to create redis client") - } - - // 创建cmdb api client - cmdbApi := getCmdbApi() - - return &CmdbResourceWatcher{ - prefix: prefix, - redisClient: redisClient, - cmdbApi: cmdbApi, - }, nil - -} - -// getBkCursor 获取cmdb资源变更事件游标 -func (w *CmdbResourceWatcher) getBkCursor(ctx context.Context, resourceType CmdbResourceType) string { - // 从redis中获取cmdb资源变更游标 - bkCursorKey := fmt.Sprintf("%s.cmdb_resource_watch_cursor.%s", w.prefix, resourceType) - bkCursorResult := w.redisClient.Get(ctx, bkCursorKey) - if bkCursorResult.Err() != nil { - if !errors.Is(bkCursorResult.Err(), redis.Nil) { - logger.Errorf("get cmdb resource watch cursor error: %v", bkCursorResult.Err()) - return "" - } - } - return bkCursorResult.Val() -} - -// setBkCursor 记录cmdb资源变更事件游标 -func (w *CmdbResourceWatcher) setBkCursor(ctx context.Context, resourceType CmdbResourceType, cursor string) error { - // 设置cmdb资源变更游标 - bkCursorKey := fmt.Sprintf("%s.cmdb_resource_watch_cursor.%s", w.prefix, resourceType) - if _, err := w.redisClient.Set(ctx, bkCursorKey, cursor, time.Hour).Result(); err != nil { - return errors.Wrap(err, "set cmdb resource watch cursor error") - } - return nil -} - -// Watch 监听资源变更事件并记录 -func (w *CmdbResourceWatcher) Watch(ctx context.Context, resourceType CmdbResourceType) (bool, error) { - params := map[string]interface{}{ - "bk_fields": CmdbResourceTypeFields[resourceType], - "bk_resource": resourceType, - "bk_supplier_account": "0", - } - - // 获取资源变更事件游标 - bkCursor := w.getBkCursor(ctx, resourceType) - if bkCursor != "" { - params["bk_cursor"] = bkCursor - } - - // 请求监听资源变化事件API - var resp cmdb.ResourceWatchResp - _, err := w.cmdbApi.ResourceWatch().SetContext(ctx).SetBody(params).SetResult(&resp).Request() - err = api.HandleApiResultError(resp.ApiCommonRespMeta, err, "watch cmdb resource api failed") - if err != nil { - return false, err - } - - // 无资源变更事件 - if !resp.Data.BkWatched { - if len(resp.Data.BkEvents) == 0 { - return false, nil - } - - // 记录资源变更事件游标 - newCursor := resp.Data.BkEvents[len(resp.Data.BkEvents)-1].BkCursor - if newCursor != "" && newCursor != bkCursor { - if err := w.setBkCursor(ctx, resourceType, newCursor); err != nil { - logger.Error("set cmdb resource watch cursor error: %v", err) - } - } - - return false, nil - } - - // 记录cmdb资源变更事件 - events := make([]string, 0) - for _, event := range resp.Data.BkEvents { - val, _ := json.Marshal(event) - events = append(events, string(val)) - } - bkEventKey := fmt.Sprintf("%s.cmdb_resource_watch_event.%s", w.prefix, resourceType) - w.redisClient.RPush(ctx, bkEventKey, events) - - // 记录最后一个cmdb资源变更事件游标 - if len(resp.Data.BkEvents) > 0 { - err = w.setBkCursor(ctx, resourceType, resp.Data.BkEvents[len(resp.Data.BkEvents)-1].BkCursor) - if err != nil { - logger.Error("set cmdb resource watch cursor error: %v", err) - } - } - - return true, nil -} - -// Run 启动cmdb资源监听任务 -func (w *CmdbResourceWatcher) Run(ctx context.Context) { - waitGroup := sync.WaitGroup{} - logger.Info("start watch cmdb resource") - - // 按资源类型启动处理任务 - for resourceType := range CmdbResourceTypeFields { - waitGroup.Add(1) - resourceType := resourceType - // 启动监听任务 - go func() { - defer waitGroup.Done() - lastTime := time.Now() - haveEvent, err := true, error(nil) - for { - select { - case <-ctx.Done(): - return - default: - // 如果上次监听时间小于5秒且监听无事件,则等待到5秒 - if !haveEvent && time.Now().Sub(lastTime) < time.Second*5 { - time.Sleep(time.Second*5 - time.Now().Sub(lastTime)) - } - - haveEvent, err = w.Watch(ctx, resourceType) - if err != nil { - logger.Errorf("watch cmdb resource(%s) error: %v", resourceType, err) - } - } - // 记录上次监听时间 - lastTime = time.Now() - } - }() - } - - // 等待任务结束 - waitGroup.Wait() -} - -// WatchCmdbResourceChangeEventTaskParams 监听cmdb资源变更任务参数 -type WatchCmdbResourceChangeEventTaskParams struct { - Prefix string `json:"prefix" mapstructure:"prefix"` - Redis redis.Options `json:"redis" mapstructure:"redis"` -} - -// WatchCmdbResourceChangeEventTask 监听cmdb资源变更任务 -func WatchCmdbResourceChangeEventTask(ctx context.Context, payload []byte) error { - // 任务参数解析 - var params WatchCmdbResourceChangeEventTaskParams - err := json.Unmarshal(payload, ¶ms) - if err != nil { - return errors.Wrapf(err, "unmarshal payload failed, payload: %s", string(payload)) - } - - // 创建cmdb资源变更事件监听器 - watcher, err := NewCmdbResourceWatcher(params.Prefix, ¶ms.Redis) - if err != nil { - return errors.Wrap(err, "new cmdb resource watcher failed") - } - - watcher.Run(ctx) - return nil -} - -// CmdbEventHandler cmdb资源变更事件处理器 -type CmdbEventHandler struct { - // 缓存key前缀 - prefix string - - // redis client - redisClient redis.UniversalClient - - // cache cacheManager - cacheManager Manager - - // 资源类型 - resourceTypes []CmdbResourceType - - // full refresh interval - fullRefreshInterval time.Duration -} - -// NewCmdbEventHandler 创建cmdb资源变更事件处理器 -func NewCmdbEventHandler(prefix string, rOpt *redis.Options, cacheType string, fullRefreshInterval time.Duration, concurrentLimit int) (*CmdbEventHandler, error) { - // 创建redis client - redisClient, err := redis.GetClient(rOpt) - if err != nil { - return nil, errors.Wrap(err, "failed to create redis client") - } - - // 创建缓存管理器 - cacheManager, err := NewCacheManagerByType(rOpt, prefix, cacheType, concurrentLimit) - if err != nil { - return nil, errors.Wrap(err, "new cache Manager failed") - } - - // 获取关联资源类型 - resourceTypes, ok := cmdbEventHandlerResourceTypeMap[cacheType] - if !ok { - return nil, errors.Errorf("unsupported cache type: %s", cacheType) - } - - return &CmdbEventHandler{ - prefix: prefix, - redisClient: redisClient, - cacheManager: cacheManager, - resourceTypes: resourceTypes, - fullRefreshInterval: fullRefreshInterval, - }, nil -} - -// Close 关闭操作 -func (h *CmdbEventHandler) Close() { - GetRelationMetricsBuilder().ClearAllMetrics() -} - -// getBkEvents 获取全部资源变更事件 -func (h *CmdbEventHandler) getBkEvents(ctx context.Context, resourceType CmdbResourceType) ([]cmdb.ResourceWatchEvent, error) { - // 获取资源变更事件 - bkEventKey := fmt.Sprintf("%s.cmdb_resource_watch_event.%s", h.prefix, resourceType) - - // 从redis中获取该资源类型的所有事件 - eventStrings := make([]string, 0) - for { - result, err := h.redisClient.LPop(ctx, bkEventKey).Result() - if err != nil { - if !errors.Is(err, redis.Nil) { - logger.Errorf("get cmdb resource(%s) watch event error: %v", resourceType, err) - break - } - } - // 如果没有事件了,退出 - if result == "" { - break - } - - eventStrings = append(eventStrings, result) - } - - // 解析事件 - events := make([]cmdb.ResourceWatchEvent, 0) - for _, eventStr := range eventStrings { - var event cmdb.ResourceWatchEvent - err := json.Unmarshal([]byte(eventStr), &event) - if err != nil { - logger.Errorf("unmarshal cmdb resource(%s) watch event error: %v", resourceType, err) - continue - } - events = append(events, event) - } - - return events, nil -} - -// ifRunRefreshAll 判断是否执行全量刷新 -func (h *CmdbEventHandler) ifRunRefreshAll(ctx context.Context, cacheType string) bool { - // 获取最后一次全量刷新时间 - lastUpdateTimeKey := fmt.Sprintf("%s.cmdb_last_refresh_all_time.%s", h.prefix, cacheType) - lastUpdateTime, err := h.redisClient.Get(ctx, lastUpdateTimeKey).Result() - if err != nil { - if !errors.Is(err, redis.Nil) { - logger.Errorf("get last update time error: %v", err) - return false - } - } - var lastUpdateTimestamp int64 - if lastUpdateTime != "" { - lastUpdateTimestamp, err = strconv.ParseInt(lastUpdateTime, 10, 64) - } else { - lastUpdateTimestamp = 0 - } - - // 如果超过全量刷新间隔时间,执行全量刷新 - if time.Now().Unix()-lastUpdateTimestamp > int64(h.fullRefreshInterval.Seconds()) { - return true - } - - return false -} - -// Handle 处理cmdb资源变更事件 -func (h *CmdbEventHandler) Handle(ctx context.Context) { - // 如果超过全量刷新间隔时间,执行全量刷新 - if h.ifRunRefreshAll(ctx, h.cacheManager.Type()) { - // 全量刷新 - err := RefreshAll(ctx, h.cacheManager, h.cacheManager.GetConcurrentLimit()) - if err != nil { - logger.Errorf("refresh all cache failed: %v", err) - } - - logger.Infof("refresh all cmdb resource(%s) cache", h.cacheManager.Type()) - - // 记录全量刷新时间 - lastUpdateTimeKey := fmt.Sprintf("%s.cmdb_last_refresh_all_time.%s", h.prefix, h.cacheManager.Type()) - _, err = h.redisClient.Set(ctx, lastUpdateTimeKey, strconv.FormatInt(time.Now().Unix(), 10), 24*time.Hour).Result() - if err != nil { - logger.Errorf("set last update time error: %v", err) - } - - return - } - - // 处理资源变更事件 - for _, resourceType := range h.resourceTypes { - // 获取资源变更事件 - events, err := h.getBkEvents(ctx, resourceType) - if err != nil { - logger.Errorf("get cmdb resource(%s) watch event error: %v", resourceType, err) - continue - } - - logger.Infof("get cmdb resource(%s) watch event: %d", resourceType, len(events)) - - // 重置 - h.cacheManager.Reset() - - // 无事件 - if len(events) == 0 { - continue - } - - updateEvents := make([]map[string]interface{}, 0) - cleanEvents := make([]map[string]interface{}, 0) - - for _, event := range events { - switch event.BkEventType { - case "update", "create": - updateEvents = append(updateEvents, event.BkDetail) - case "delete": - cleanEvents = append(cleanEvents, event.BkDetail) - } - } - - // 更新缓存 - if len(updateEvents) > 0 { - logger.Infof("update cmdb resource(%s) cache by events: %d", resourceType, len(updateEvents)) - err := h.cacheManager.UpdateByEvents(ctx, string(resourceType), updateEvents) - if err != nil { - logger.Errorf("update cache by events failed: %v", err) - } - } - - // 清理缓存 - if len(cleanEvents) > 0 { - logger.Infof("clean cmdb resource(%s) cache by events: %d", resourceType, len(cleanEvents)) - err := h.cacheManager.CleanByEvents(ctx, string(resourceType), cleanEvents) - if err != nil { - logger.Errorf("clean cache by events failed: %v", err) - } - } - } -} - -// cmdbEventHandlerResourceTypeMap cmdb资源事件执行器与资源类型映射 -var cmdbEventHandlerResourceTypeMap = map[string][]CmdbResourceType{ - "host_topo": {CmdbResourceTypeHost, CmdbResourceTypeHostRelation, CmdbResourceTypeMainlineInstance}, - "business": {CmdbResourceTypeBiz}, - "module": {CmdbResourceTypeModule}, - "set": {CmdbResourceTypeSet}, - "service_instance": {CmdbResourceTypeProcess}, - "dynamic_group": {CmdbResourceTypeDynamicGroup}, -} - -// RefreshTaskParams cmdb缓存刷新任务参数 -type RefreshTaskParams struct { - // 缓存key前缀 - Prefix string `json:"prefix" mapstructure:"prefix"` - // redis配置 - Redis redis.Options `json:"redis" mapstructure:"redis"` - - // 事件处理间隔时间(秒) - EventHandleInterval int `json:"event_handle_interval" mapstructure:"event_handle_interval"` - // 全量刷新间隔时间(秒) - FullRefreshIntervals map[string]int `json:"full_refresh_intervals" mapstructure:"full_refresh_intervals"` - - // 业务执行并发数 - BizConcurrent int `json:"biz_concurrent" mapstructure:"biz_concurrent"` - - CacheTypes []string `json:"cache_types" mapstructure:"cache_types"` -} - -// CacheRefreshTask cmdb缓存刷新任务 -func CacheRefreshTask(ctx context.Context, payload []byte) error { - // 任务参数解析 - var params RefreshTaskParams - err := json.Unmarshal(payload, ¶ms) - if err != nil { - return errors.Wrapf(err, "unmarshal payload failed, payload: %s", string(payload)) - } - - // 业务执行并发数 - bizConcurrent := params.BizConcurrent - if bizConcurrent <= 0 { - bizConcurrent = 5 - } - - // 事件处理间隔时间,最低1分钟 - eventHandleInterval := time.Second * time.Duration(params.EventHandleInterval) - if eventHandleInterval <= 60 { - eventHandleInterval = time.Hour - } - - // 全量刷新间隔时间,最低10分钟 - fullRefreshIntervals := make(map[string]time.Duration, len(params.FullRefreshIntervals)) - for cacheType, interval := range params.FullRefreshIntervals { - fullRefreshIntervals[cacheType] = time.Second * time.Duration(interval) - } - - // 需要刷新的缓存类型 - cacheTypes := params.CacheTypes - if len(cacheTypes) == 0 { - for cacheType := range cmdbEventHandlerResourceTypeMap { - cacheTypes = append(cacheTypes, cacheType) - } - } else { - for _, cacheType := range cacheTypes { - if _, ok := cmdbEventHandlerResourceTypeMap[cacheType]; !ok { - return errors.Errorf("unsupported cache type: %s", cacheType) - } - } - } - - wg := sync.WaitGroup{} - cancelCtx, cancel := context.WithCancel(ctx) - defer cancel() - - // 推送自定义上报数据 - wg.Add(1) - go func() { - // 启动指标上报 - reporter, err := remote.NewSpaceReporter(config.BuildInResultTableDetailKey, config.PromRemoteWriteUrl) - if err != nil { - logger.Errorf("[cmdb_relation] new space reporter: %v", err) - return - } - defer func() { - err = reporter.Close(ctx) - }() - spaceReport := GetRelationMetricsBuilder().WithSpaceReport(reporter) - - for { - ticker := time.NewTicker(time.Minute) - - // 事件处理间隔时间 - select { - case <-cancelCtx.Done(): - GetRelationMetricsBuilder().ClearAllMetrics() - ticker.Stop() - return - case <-ticker.C: - // 上报指标 - logger.Infof("[cmdb_relation] space report push all") - if err = spaceReport.PushAll(cancelCtx, time.Now()); err != nil { - logger.Errorf("[cmdb_relation] relation metrics builder push all error: %v", err.Error()) - } - } - } - }() - - for _, cacheType := range cacheTypes { - wg.Add(1) - cacheType := cacheType - fullRefreshInterval, ok := fullRefreshIntervals[cacheType] - // 最低600秒的间隔 - if !ok { - fullRefreshInterval = time.Second * 600 - } - - go func() { - defer wg.Done() - - // 创建资源变更事件处理器 - handler, err := NewCmdbEventHandler(params.Prefix, ¶ms.Redis, cacheType, fullRefreshInterval, bizConcurrent) - if err != nil { - logger.Errorf("[cmdb_relation] new cmdb event handler failed: %v", err) - cancel() - return - } - - logger.Infof("[cmdb_relation] start handle cmdb resource(%s) event", cacheType) - defer logger.Infof("[cmdb_relation] end handle cmdb resource(%s) event", cacheType) - - for { - tn := time.Now() - // 事件处理 - handler.Handle(cancelCtx) - - // 事件处理间隔时间 - select { - case <-cancelCtx.Done(): - handler.Close() - return - case <-time.After(eventHandleInterval - time.Now().Sub(tn)): - } - } - }() - } - - wg.Wait() - return nil -} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/cmdb_watch_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/cmdb_watch_test.go deleted file mode 100644 index 434cbd948..000000000 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/cmdb_watch_test.go +++ /dev/null @@ -1,95 +0,0 @@ -// MIT License - -// Copyright (c) 2021~2022 腾讯蓝鲸 - -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: - -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. - -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -package cmdbcache - -//import ( -// "context" -// "encoding/json" -// "os" -// "os/signal" -// "sync" -// "testing" -// -// "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" -//) -// -//func TestResourceWatch(t *testing.T) { -// redisOptions := redis.Options{ -// Mode: "standalone", -// Addrs: []string{"127.0.0.1:6379"}, -// } -// -// // 系统信号 -// signalChan := make(chan os.Signal, 1) -// signal.Notify(signalChan, os.Interrupt, os.Kill) -// -// //调用cancel函数取消 -// ctx, cancel := context.WithCancel(context.Background()) -// defer cancel() -// -// // 监听信号 -// go func() { -// <-signalChan -// cancel() -// }() -// -// prefix := t.Name() -// -// wg := &sync.WaitGroup{} -// wg.Add(1) -// -// //go func() { -// // defer cancel() -// // defer wg.Done() -// // -// // params := &WatchCmdbResourceChangeEventTaskParams{ -// // Redis: redisOptions, -// // Prefix: prefix, -// // } -// // payload, _ := json.Marshal(params) -// // if err := WatchCmdbResourceChangeEventTask(ctx, payload); err != nil { -// // t.Errorf("TestWatch failed, err: %v", err) -// // return -// // } -// //}() -// -// go func() { -// defer cancel() -// defer wg.Done() -// -// params := &RefreshTaskParams{ -// Redis: redisOptions, -// Prefix: prefix, -// EventHandleInterval: 60, -// CacheTypes: []string{"host_topo"}, -// FullRefreshIntervals: map[string]int{"host_topo": 1800, "business": 1800, "module": 1800, "set": 1800, "service_instance": 60}, -// } -// payload, _ := json.Marshal(params) -// if err := CacheRefreshTask(ctx, payload); err != nil { -// t.Errorf("TestHandle failed, err: %v", err) -// return -// } -// }() -// -// wg.Wait() -//} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go index 1940f6b9d..865cc4cc1 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go @@ -11,8 +11,43 @@ package cmdbcache import ( "context" + "encoding/json" + "sync" + "time" + + "github.com/pkg/errors" + + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/config" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/utils/remote" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) +// WatchCmdbResourceChangeEventTaskParams 监听cmdb资源变更任务参数 +type WatchCmdbResourceChangeEventTaskParams struct { + Prefix string `json:"prefix" mapstructure:"prefix"` + Redis redis.Options `json:"redis" mapstructure:"redis"` +} + +// WatchCmdbResourceChangeEventTask 监听cmdb资源变更任务 +func WatchCmdbResourceChangeEventTask(ctx context.Context, payload []byte) error { + // 任务参数解析 + var params WatchCmdbResourceChangeEventTaskParams + err := json.Unmarshal(payload, ¶ms) + if err != nil { + return errors.Wrapf(err, "unmarshal payload failed, payload: %s", string(payload)) + } + + // 创建cmdb资源变更事件监听器 + watcher, err := NewCmdbResourceWatcher(params.Prefix, ¶ms.Redis) + if err != nil { + return errors.Wrap(err, "new cmdb resource watcher failed") + } + + watcher.Run(ctx) + return nil +} + type ResourceWatchDaemon struct { } @@ -29,6 +64,128 @@ func (c *ResourceWatchDaemon) GetTaskDimension(payload []byte) string { return "" } +// RefreshTaskParams cmdb缓存刷新任务参数 +var cmdbCacheTypes = []string{"host_topo", "business", "module", "set", "service_instance", "dynamic_group"} + +// RefreshTaskParams cmdb缓存刷新任务参数 +type RefreshTaskParams struct { + // 缓存key前缀 + Prefix string `json:"prefix" mapstructure:"prefix"` + // redis配置 + Redis redis.Options `json:"redis" mapstructure:"redis"` + + // 事件处理间隔时间(秒) + EventHandleInterval int `json:"event_handle_interval" mapstructure:"event_handle_interval"` + // 全量刷新间隔时间(秒) + FullRefreshIntervals map[string]int `json:"full_refresh_intervals" mapstructure:"full_refresh_intervals"` + + // 业务执行并发数 + BizConcurrent int `json:"biz_concurrent" mapstructure:"biz_concurrent"` + + CacheTypes []string `json:"cache_types" mapstructure:"cache_types"` +} + +// CacheRefreshTask cmdb缓存刷新任务 +func CacheRefreshTask(ctx context.Context, payload []byte) error { + // 任务参数解析 + var params RefreshTaskParams + err := json.Unmarshal(payload, ¶ms) + if err != nil { + return errors.Wrapf(err, "unmarshal payload failed, payload: %s", string(payload)) + } + + // 业务执行并发数 + bizConcurrent := params.BizConcurrent + if bizConcurrent <= 0 { + bizConcurrent = 5 + } + + // 事件处理间隔时间,最低1分钟 + eventHandleInterval := time.Second * time.Duration(params.EventHandleInterval) + if eventHandleInterval <= 60 { + eventHandleInterval = time.Hour + } + + // 全量刷新间隔时间 + fullRefreshIntervals := make(map[string]time.Duration, len(params.FullRefreshIntervals)) + for cacheType, interval := range params.FullRefreshIntervals { + fullRefreshIntervals[cacheType] = time.Second * time.Duration(interval) + } + + // 需要刷新的缓存类型 + cacheTypes := params.CacheTypes + if len(cacheTypes) == 0 { + cacheTypes = cmdbCacheTypes + } + + wg := sync.WaitGroup{} + cancelCtx, cancel := context.WithCancel(ctx) + defer cancel() + + // 推送自定义上报数据 + wg.Add(1) + go func() { + // 启动指标上报 + reporter, err := remote.NewSpaceReporter(config.BuildInResultTableDetailKey, config.PromRemoteWriteUrl) + if err != nil { + logger.Errorf("[cmdb_relation] new space reporter: %v", err) + return + } + defer func() { + err = reporter.Close(ctx) + }() + spaceReport := GetRelationMetricsBuilder().WithSpaceReport(reporter) + + for { + ticker := time.NewTicker(time.Minute) + + // 事件处理间隔时间 + select { + case <-cancelCtx.Done(): + GetRelationMetricsBuilder().ClearAllMetrics() + ticker.Stop() + return + case <-ticker.C: + // 上报指标 + logger.Infof("[cmdb_relation] space report push all") + if err = spaceReport.PushAll(cancelCtx, time.Now()); err != nil { + logger.Errorf("[cmdb_relation] relation metrics builder push all error: %v", err.Error()) + } + } + } + }() + + wg.Add(1) + go func() { + defer wg.Done() + + // 创建资源变更事件处理器 + handler, err := NewCmdbEventHandler(params.Prefix, ¶ms.Redis, cacheTypes, fullRefreshIntervals, bizConcurrent) + if err != nil { + logger.Errorf("[cmdb_relation] new cmdb event handler failed: %v", err) + cancel() + return + } + + for { + tn := time.Now() + // 事件处理 + handler.Run(cancelCtx) + + // 事件处理间隔时间 + select { + case <-cancelCtx.Done(): + handler.Close() + return + case <-time.After(eventHandleInterval - time.Now().Sub(tn)): + } + } + }() + + wg.Wait() + return nil +} + type CacheRefreshDaemon struct{} // Start 启动缓存刷新 diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go index 7add5e4cf..f624e62fb 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -30,10 +30,11 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -233,13 +234,3 @@ func (m *DynamicGroupCacheManager) CleanGlobal(ctx context.Context) error { } return nil } - -// CleanByEvents 清除事件相关的动态分组缓存 -func (m *DynamicGroupCacheManager) CleanByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - return nil -} - -// UpdateByEvents 更新事件相关的动态分组缓存 -func (m *DynamicGroupCacheManager) UpdateByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - return nil -} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group_test.go index 4a2195232..2cd9b286c 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group_test.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go new file mode 100644 index 000000000..be1b6e4d1 --- /dev/null +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go @@ -0,0 +1,405 @@ +// MIT License + +// Copyright (c) 2021~2024 腾讯蓝鲸 + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package cmdbcache + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "sync" + "time" + + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/pkg/errors" + + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" +) + +// CmdbEventHandler cmdb资源变更事件处理器 +type CmdbEventHandler struct { + // 缓存key前缀 + prefix string + + // redis client + redisClient redis.UniversalClient + + // 缓存管理器 + cacheManagers []Manager + + // 全量刷新间隔时间 + fullRefreshIntervals map[string]time.Duration + + // 预处理结果 + // 是否刷新业务列表 + refreshBiz bool + // 待刷新主机拓扑业务列表 + refreshBizHostTopo sync.Map + // 待清理主机相关key + cleanHostKeys sync.Map + // 待刷新服务实例业务列表 + refreshBizServiceInstance sync.Map + // 待清理服务实例相关key + cleanServiceInstanceKeys sync.Map + // 待更新拓扑节点 + refreshTopoNode sync.Map + // 待删除拓扑节点 + cleanTopoNode sync.Map + // 待刷新动态分组业务列表 + refreshBizDynamicGroup sync.Map + // 待刷新集群业务列表 + refreshBizSet sync.Map + // 待清理集群相关key + cleanSetKeys sync.Map + // 待刷新模块业务列表 + refreshBizModule sync.Map + // 待清理模块相关key + cleanModuleKeys sync.Map +} + +// NewCmdbEventHandler 创建cmdb资源变更事件处理器 +func NewCmdbEventHandler(prefix string, rOpt *redis.Options, cacheTypes []string, fullRefreshIntervals map[string]time.Duration, concurrentLimit int) (*CmdbEventHandler, error) { + // 创建redis client + redisClient, err := redis.GetClient(rOpt) + if err != nil { + return nil, errors.Wrap(err, "failed to create redis client") + } + + // 创建缓存管理器 + cacheManagers := make([]Manager, 0, len(cacheTypes)) + for _, cacheType := range cacheTypes { + cacheManager, err := NewCacheManagerByType(rOpt, prefix, cacheType, concurrentLimit) + if err != nil { + return nil, errors.Wrap(err, "new cache Manager failed") + } + cacheManagers = append(cacheManagers, cacheManager) + } + + return &CmdbEventHandler{ + prefix: prefix, + redisClient: redisClient, + cacheManagers: cacheManagers, + fullRefreshIntervals: fullRefreshIntervals, + }, nil +} + +// Close 关闭操作 +func (h *CmdbEventHandler) Close() { + GetRelationMetricsBuilder().ClearAllMetrics() +} + +// getEvents 获取资源变更事件 +func (h *CmdbEventHandler) getEvents(ctx context.Context, resourceType CmdbResourceType) ([]cmdb.ResourceWatchEvent, error) { + // 获取资源变更事件 + bkEventKey := fmt.Sprintf("%s.cmdb_resource_watch_event.%s", h.prefix, resourceType) + + // 从redis中获取该资源类型的所有事件 + eventStrings := make([]string, 0) + for { + result, err := h.redisClient.LPop(ctx, bkEventKey).Result() + if err != nil { + if !errors.Is(err, redis.Nil) { + logger.Errorf("get cmdb resource(%s) watch event error: %v", resourceType, err) + break + } + } + // 如果没有事件了,退出 + if result == "" { + break + } + + eventStrings = append(eventStrings, result) + } + + // 解析事件 + events := make([]cmdb.ResourceWatchEvent, 0) + for _, eventStr := range eventStrings { + var event cmdb.ResourceWatchEvent + err := json.Unmarshal([]byte(eventStr), &event) + if err != nil { + logger.Errorf("unmarshal cmdb resource(%s) watch event error: %v", resourceType, err) + continue + } + events = append(events, event) + } + + return events, nil +} + +// resetPreprocessResults 重置预处理结果 +func (h *CmdbEventHandler) resetPreprocessResults() { + h.refreshBiz = false + h.refreshBizHostTopo = sync.Map{} + h.cleanHostKeys = sync.Map{} + h.refreshBizServiceInstance = sync.Map{} + h.cleanServiceInstanceKeys = sync.Map{} + h.refreshTopoNode = sync.Map{} + h.cleanTopoNode = sync.Map{} + h.refreshBizDynamicGroup = sync.Map{} + h.refreshBizSet = sync.Map{} + h.cleanSetKeys = sync.Map{} + h.refreshBizModule = sync.Map{} + h.cleanModuleKeys = sync.Map{} +} + +// preprocessEvents 预处理资源变更事件 +func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType CmdbResourceType, events []cmdb.ResourceWatchEvent) error { + var host *AlarmHostInfo + hosts := make(map[int]*AlarmHostInfo) + + for _, event := range events { + // 尝试获取主机信息 + bkHostId, ok := event.BkDetail["bk_host_id"].(float64) + if ok { + host, ok = hosts[int(bkHostId)] + if !ok { + result := h.redisClient.HGet(ctx, fmt.Sprintf("%s.%s", h.prefix, hostCacheKey), strconv.Itoa(int(bkHostId))) + if result.Err() != nil { + if !errors.Is(result.Err(), redis.Nil) { + logger.Errorf("get host(%d) info error: %v", int(bkHostId), result.Err()) + } + } else { + err := json.Unmarshal([]byte(result.Val()), &host) + if err != nil { + logger.Errorf("unmarshal host(%d) info error: %v", int(bkHostId), err) + } else { + hosts[int(bkHostId)] = host + } + } + } + } else { + host = nil + } + + switch resourceType { + case CmdbResourceTypeBiz: + // 如果是业务事件,将刷新业务标志置为true + h.refreshBiz = true + case CmdbResourceTypeSet: + bizId, ok1 := event.BkDetail["bk_biz_id"].(float64) + bkSetId, ok2 := event.BkDetail["bk_set_id"].(float64) + if !ok1 || !ok2 { + continue + } + h.refreshBizSet.Store(int(bizId), struct{}{}) + + // 如果是删除事件,将集群ID加入待清理列表 + if event.BkEventType == "delete" { + h.cleanSetKeys.Store(int(bkSetId), struct{}{}) + } + case CmdbResourceTypeModule: + bizId, ok1 := event.BkDetail["bk_biz_id"].(float64) + bkModuleId, ok2 := event.BkDetail["bk_module_id"].(float64) + if !ok1 || !ok2 { + continue + } + h.refreshBizModule.Store(int(bizId), struct{}{}) + + // 如果是删除事件,将模块ID加入待清理列表 + if event.BkEventType == "delete" { + h.cleanModuleKeys.Store(int(bkModuleId), struct{}{}) + } + case CmdbResourceTypeHost: + // todo: implement this + continue + case CmdbResourceTypeHostRelation: + bkBizId, ok := event.BkDetail["bk_biz_id"].(float64) + if !ok { + continue + } + + // 如果拉不到主机信息,直接刷新业务并清理主机ID + if host == nil { + h.refreshBizHostTopo.Store(int(bkBizId), struct{}{}) + h.cleanHostKeys.Store(int(bkHostId), struct{}{}) + continue + } + + // 尝试将主机关联字段加入待清理列表,如果刷新业务时发现这些字段不存在,将会进行清理 + if host.BkAgentId != "" { + h.cleanHostKeys.Store(host.BkAgentId, struct{}{}) + } + if host.BkHostInnerip != "" { + h.cleanHostKeys.Store(fmt.Sprintf("%s|%d", host.BkHostInnerip, host.BkCloudId), struct{}{}) + } + + if event.BkEventType == "delete" || host.BkBizId != int(bkBizId) { + // 如果是删除事件,将主机ID加入待清理列表 + h.cleanHostKeys.Store(int(bkHostId), struct{}{}) + // 如果是删除事件,将业务ID加入待刷新列表 + h.refreshBizHostTopo.Store(host.BkBizId, struct{}{}) + h.refreshBizHostTopo.Store(int(bkBizId), struct{}{}) + } else { + // 如果是更新事件,将业务ID加入待刷新列表 + h.refreshBizHostTopo.Store(int(bkBizId), struct{}{}) + } + case CmdbResourceTypeMainlineInstance: + bkObjId := event.BkDetail["bk_obj_id"].(string) + bkInstId, ok := event.BkDetail["bk_inst_id"].(float64) + if !ok { + continue + } + topoNodeKey := fmt.Sprintf("%s.%d", bkObjId, int(bkInstId)) + if event.BkEventType == "delete" { + // 如果是删除事件,将拓扑节点ID加入待清理列表 + h.cleanTopoNode.Store(topoNodeKey, struct{}{}) + } else { + // 如果是更新事件,将拓扑节点ID加入待刷新列表 + topo := map[string]interface{}{ + "bk_inst_id": int(bkInstId), + "bk_inst_name": event.BkDetail["bk_inst_name"], + "bk_obj_id": bkObjId, + "bk_obj_name": event.BkDetail["bk_obj_name"], + } + value, _ := json.Marshal(topo) + h.refreshTopoNode.Store(topoNodeKey, string(value)) + } + case CmdbResourceTypeProcess: + serviceInstanceId, ok1 := event.BkDetail["service_instance_id"].(float64) + bkBizId, ok2 := event.BkDetail["bk_biz_id"].(float64) + if !ok1 || !ok2 { + continue + } + + if event.BkEventType == "delete" { + // 如果是删除事件,将服务实例ID加入待清理列表 + h.cleanServiceInstanceKeys.Store(int(serviceInstanceId), struct{}{}) + } else { + // 如果是更新事件,将业务ID加入待刷新列表 + h.refreshBizServiceInstance.Store(int(bkBizId), struct{}{}) + } + } + } + return nil +} + +// refreshEvents 刷新资源变更事件 +func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { + // todo: implement this + return nil +} + +// getFullRefreshInterval 获取全量刷新间隔时间 +func (h *CmdbEventHandler) getFullRefreshInterval(cacheType string) time.Duration { + fullRefreshInterval, ok := h.fullRefreshIntervals[cacheType] + // 最低600秒的间隔 + if !ok { + fullRefreshInterval = time.Second * 300 + } + return fullRefreshInterval +} + +// ifRunRefreshAll 判断是否执行全量刷新 +func (h *CmdbEventHandler) ifRunRefreshAll(ctx context.Context, cacheType string) bool { + // 获取最后一次全量刷新时间 + lastUpdateTimeKey := fmt.Sprintf("%s.cmdb_last_refresh_all_time.%s", h.prefix, cacheType) + lastUpdateTime, err := h.redisClient.Get(ctx, lastUpdateTimeKey).Result() + if err != nil { + if !errors.Is(err, redis.Nil) { + logger.Errorf("get last update time error: %v", err) + return false + } + } + var lastUpdateTimestamp int64 + if lastUpdateTime != "" { + lastUpdateTimestamp, err = strconv.ParseInt(lastUpdateTime, 10, 64) + } else { + lastUpdateTimestamp = 0 + } + + // 如果超过全量刷新间隔时间,执行全量刷新 + if time.Now().Unix()-lastUpdateTimestamp > int64(h.getFullRefreshInterval(cacheType).Seconds()) { + return true + } + + return false +} + +// Run 处理cmdb资源变更事件 +// 1. 遍历所有缓存类型,如果超过全量刷新间隔时间,先执行全量刷新 +// 2. 从缓存中获取资源变更并进行预处理 +// 3. 根据预处理结果,执行缓存变更动作 +func (h *CmdbEventHandler) Run(ctx context.Context) { + wg := sync.WaitGroup{} + + // 如果超过全量刷新间隔时间,先执行全量刷新 + for _, cacheManager := range h.cacheManagers { + wg.Add(1) + + cacheManager := cacheManager + go func() { + defer wg.Done() + + if h.ifRunRefreshAll(ctx, cacheManager.Type()) { + // 全量刷新 + err := RefreshAll(ctx, cacheManager, cacheManager.GetConcurrentLimit()) + if err != nil { + logger.Errorf("refresh all cache failed: %v", err) + } + + logger.Infof("refresh all cmdb resource(%s) cache", cacheManager.Type()) + + // 记录全量刷新时间 + lastUpdateTimeKey := fmt.Sprintf("%s.cmdb_last_refresh_all_time.%s", h.prefix, cacheManager.Type()) + _, err = h.redisClient.Set(ctx, lastUpdateTimeKey, strconv.FormatInt(time.Now().Unix(), 10), 24*time.Hour).Result() + if err != nil { + logger.Errorf("set last update time error: %v", err) + } + } + }() + } + wg.Wait() + + // 重置预处理结果 + h.resetPreprocessResults() + + // 从缓存中获取资源变更并进行预处理 + for _, resourceType := range CmdbResourceTypes { + wg.Add(1) + resourceType := resourceType + go func() { + defer wg.Done() + + // 获取资源变更事件 + events, err := h.getEvents(ctx, resourceType) + if err != nil { + logger.Errorf("get cmdb resource(%s) watch event error: %v", resourceType, err) + return + } + + // 预处理资源变更事件 + err = h.preprocessEvents(ctx, resourceType, events) + if err != nil { + logger.Errorf("preprocess cmdb resource(%s) watch event error: %v", resourceType, err) + } + }() + } + wg.Wait() + + // 根据预处理结果,执行缓存变更动作 + err := h.refreshEvents(ctx) + if err != nil { + logger.Errorf("refresh cmdb resource event error: %v", err) + } +} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go new file mode 100644 index 000000000..ab305f12b --- /dev/null +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go @@ -0,0 +1,23 @@ +// MIT License + +// Copyright (c) 2021~2024 腾讯蓝鲸 + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package cmdbcache diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go new file mode 100644 index 000000000..e194f0bfe --- /dev/null +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go @@ -0,0 +1,226 @@ +// MIT License + +// Copyright (c) 2021~2024 腾讯蓝鲸 + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package cmdbcache + +import ( + "context" + "encoding/json" + "fmt" + "sync" + "time" + + "github.com/pkg/errors" + + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" +) + +// CmdbResourceType cmdb监听资源类型 +type CmdbResourceType string + +const ( + CmdbResourceTypeHost CmdbResourceType = "host" + CmdbResourceTypeHostRelation CmdbResourceType = "host_relation" + CmdbResourceTypeBiz CmdbResourceType = "biz" + CmdbResourceTypeSet CmdbResourceType = "set" + CmdbResourceTypeModule CmdbResourceType = "module" + CmdbResourceTypeMainlineInstance CmdbResourceType = "mainline_instance" + CmdbResourceTypeProcess CmdbResourceType = "process" +) + +// CmdbResourceTypes cmdb资源类型 +var CmdbResourceTypes = []CmdbResourceType{ + CmdbResourceTypeHost, + CmdbResourceTypeHostRelation, + CmdbResourceTypeBiz, + CmdbResourceTypeSet, + CmdbResourceTypeModule, + CmdbResourceTypeMainlineInstance, + CmdbResourceTypeProcess, +} + +// CmdbResourceTypeFields cmdb资源类型对应的监听字段 +var CmdbResourceTypeFields = map[CmdbResourceType][]string{ + CmdbResourceTypeHost: {"bk_host_id", "bk_host_innerip", "bk_cloud_id", "bk_agent_id"}, + CmdbResourceTypeBiz: {"bk_biz_id"}, + CmdbResourceTypeSet: {"bk_biz_id", "bk_set_id", "set_template_id"}, + CmdbResourceTypeModule: {"bk_module_id", "bk_biz_id", "service_template_id"}, +} + +// CmdbResourceWatcher cmdb资源监听器 +type CmdbResourceWatcher struct { + // 缓存key前缀 + prefix string + // cmdb api client + cmdbApi *cmdb.Client + + // redis client + redisClient redis.UniversalClient +} + +// NewCmdbResourceWatcher 创建cmdb资源监听器 +func NewCmdbResourceWatcher(prefix string, rOpt *redis.Options) (*CmdbResourceWatcher, error) { + // 创建redis client + redisClient, err := redis.GetClient(rOpt) + if err != nil { + return nil, errors.Wrap(err, "failed to create redis client") + } + + // 创建cmdb api client + cmdbApi := getCmdbApi() + + return &CmdbResourceWatcher{ + prefix: prefix, + redisClient: redisClient, + cmdbApi: cmdbApi, + }, nil + +} + +// getBkCursor 获取cmdb资源变更事件游标 +func (w *CmdbResourceWatcher) getBkCursor(ctx context.Context, resourceType CmdbResourceType) string { + // 从redis中获取cmdb资源变更游标 + bkCursorKey := fmt.Sprintf("%s.cmdb_resource_watch_cursor.%s", w.prefix, resourceType) + bkCursorResult := w.redisClient.Get(ctx, bkCursorKey) + if bkCursorResult.Err() != nil { + if !errors.Is(bkCursorResult.Err(), redis.Nil) { + logger.Errorf("get cmdb resource watch cursor error: %v", bkCursorResult.Err()) + return "" + } + } + return bkCursorResult.Val() +} + +// setBkCursor 记录cmdb资源变更事件游标 +func (w *CmdbResourceWatcher) setBkCursor(ctx context.Context, resourceType CmdbResourceType, cursor string) error { + // 设置cmdb资源变更游标 + bkCursorKey := fmt.Sprintf("%s.cmdb_resource_watch_cursor.%s", w.prefix, resourceType) + if _, err := w.redisClient.Set(ctx, bkCursorKey, cursor, time.Hour).Result(); err != nil { + return errors.Wrap(err, "set cmdb resource watch cursor error") + } + return nil +} + +// Watch 监听资源变更事件并记录 +func (w *CmdbResourceWatcher) Watch(ctx context.Context, resourceType CmdbResourceType) (bool, error) { + params := map[string]interface{}{ + "bk_resource": resourceType, + "bk_supplier_account": "0", + } + + // 获取资源变更事件游标 + bkCursor := w.getBkCursor(ctx, resourceType) + if bkCursor != "" { + params["bk_cursor"] = bkCursor + } + + // 补充bk_fields参数 + if fields, ok := CmdbResourceTypeFields[resourceType]; ok { + params["bk_fields"] = fields + } + // 请求监听资源变化事件API + var resp cmdb.ResourceWatchResp + _, err := w.cmdbApi.ResourceWatch().SetContext(ctx).SetBody(params).SetResult(&resp).Request() + err = api.HandleApiResultError(resp.ApiCommonRespMeta, err, "watch cmdb resource api failed") + if err != nil { + return false, err + } + + // 无资源变更事件 + if !resp.Data.BkWatched { + if len(resp.Data.BkEvents) == 0 { + return false, nil + } + + // 记录资源变更事件游标 + newCursor := resp.Data.BkEvents[len(resp.Data.BkEvents)-1].BkCursor + if newCursor != "" && newCursor != bkCursor { + if err := w.setBkCursor(ctx, resourceType, newCursor); err != nil { + logger.Error("set cmdb resource watch cursor error: %v", err) + } + } + + return false, nil + } + + // 记录cmdb资源变更事件 + events := make([]string, 0) + for _, event := range resp.Data.BkEvents { + val, _ := json.Marshal(event) + _ = fmt.Sprintf("%s", val) + events = append(events, string(val)) + } + bkEventKey := fmt.Sprintf("%s.cmdb_resource_watch_event.%s", w.prefix, resourceType) + w.redisClient.RPush(ctx, bkEventKey, events) + + // 记录最后一个cmdb资源变更事件游标 + if len(resp.Data.BkEvents) > 0 { + err = w.setBkCursor(ctx, resourceType, resp.Data.BkEvents[len(resp.Data.BkEvents)-1].BkCursor) + if err != nil { + logger.Error("set cmdb resource watch cursor error: %v", err) + } + } + + return true, nil +} + +// Run 启动cmdb资源监听任务 +func (w *CmdbResourceWatcher) Run(ctx context.Context) { + waitGroup := sync.WaitGroup{} + logger.Info("start watch cmdb resource") + + // 按资源类型启动处理任务 + for _, resourceType := range CmdbResourceTypes { + waitGroup.Add(1) + resourceType := resourceType + // 启动监听任务 + go func() { + defer waitGroup.Done() + lastTime := time.Now() + haveEvent, err := true, error(nil) + for { + select { + case <-ctx.Done(): + return + default: + // 如果上次监听时间小于5秒且监听无事件,则等待到5秒 + if !haveEvent && time.Now().Sub(lastTime) < time.Second*5 { + time.Sleep(time.Second*5 - time.Now().Sub(lastTime)) + } + + haveEvent, err = w.Watch(ctx, resourceType) + if err != nil { + logger.Errorf("watch cmdb resource(%s) error: %v", resourceType, err) + } + } + // 记录上次监听时间 + lastTime = time.Now() + } + }() + } + + // 等待任务结束 + waitGroup.Wait() +} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go index 672d8ae29..a87df86e6 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -35,10 +35,11 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) // hostFields 主机字段 diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host_test.go index fad45b9d4..087e65412 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host_test.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go index 9c097b7da..21be68355 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module_test.go index 0d7e5d66c..dc057e552 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module_test.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/ratelimit.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/ratelimit.go index 0a2dc8a1f..1930f92da 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/ratelimit.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/ratelimit.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/relation_builder.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/relation_builder.go index 11003ea48..e8b64375b 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/relation_builder.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/relation_builder.go @@ -18,8 +18,9 @@ import ( "github.com/prometheus/prometheus/prompb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/utils/remote" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/utils/remote" ) var ( diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go index b821e158f..de54fdc02 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -34,10 +34,11 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance_test.go index ba36c01f7..8ce9df527 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance_test.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go index 38cc7d75d..f6d5dff28 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set_test.go index 6089bb3be..5f7ad8d5c 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set_test.go @@ -1,6 +1,6 @@ // MIT License -// Copyright (c) 2021~2022 腾讯蓝鲸 +// Copyright (c) 2021~2024 腾讯蓝鲸 // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/bk-monitor-worker/internal/apm/pre_calculate/window/distributive.go b/pkg/bk-monitor-worker/internal/apm/pre_calculate/window/distributive.go index 44cc2fd67..a20b279f5 100644 --- a/pkg/bk-monitor-worker/internal/apm/pre_calculate/window/distributive.go +++ b/pkg/bk-monitor-worker/internal/apm/pre_calculate/window/distributive.go @@ -20,10 +20,11 @@ import ( "go.uber.org/zap" "golang.org/x/sync/semaphore" + monitorLogger "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/apm/pre_calculate/storage" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/metrics" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/utils/runtimex" - monitorLogger "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) // DistributiveWindowOptions all configs @@ -214,7 +215,7 @@ loop: } metrics.RecordApmPreCalcLocateSpanDuration(w.dataId, start) case <-w.ctx.Done(): - w.logger.Infof("Handle span stopped.") + w.logger.Infof("Run span stopped.") // clear data for _, subWindow := range w.subWindows { subWindow.m = &sync.Map{} From a0f3402abba2d24cfe49647f9ca52c124c9a4b40 Mon Sep 17 00:00:00 2001 From: lai <11598235+unique0lai@users.noreply.github.com> Date: Fri, 30 Aug 2024 11:04:37 +0800 Subject: [PATCH 2/9] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E5=8A=A8?= =?UTF-8?q?=E6=80=81=E6=8B=93=E6=89=91=E5=8F=8A=E6=9C=8D=E5=8A=A1=E5=AE=9E?= =?UTF-8?q?=E4=BE=8B=E5=88=B7=E6=96=B0=E6=97=B6=E6=95=88=E6=80=A7=20#10101?= =?UTF-8?q?58081119404718?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../internal/alarm/cmdbcache/business_test.go | 31 ------- .../internal/alarm/cmdbcache/event_process.go | 77 +++++++++++++---- .../alarm/cmdbcache/event_process_test.go | 83 ++++++++++++++++++ .../alarm/cmdbcache/event_watch_test.go | 84 +++++++++++++++++++ 4 files changed, 226 insertions(+), 49 deletions(-) create mode 100644 pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch_test.go diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business_test.go index c5a15ba2d..19d21ab90 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business_test.go @@ -223,35 +223,4 @@ func TestBusinessCacheManager(t *testing.T) { exists := client.Exists(ctx, cacheManager.GetCacheKey(businessCacheKey)) assert.EqualValues(t, 0, exists.Val()) }) - - t.Run("Event", func(t *testing.T) { - // 创建业务缓存管理器 - cacheManager, err := NewBusinessCacheManager(t.Name(), rOpts, 1) - if err != nil { - t.Error(err) - return - } - - err = cacheManager.UpdateByEvents(ctx, "biz", []map[string]interface{}{ - {"bk_biz_id": float64(2)}, - }) - if err != nil { - t.Error(err) - return - } - - assert.Len(t, client.HKeys(ctx, cacheManager.GetCacheKey(businessCacheKey)).Val(), 3) - - err = cacheManager.CleanByEvents(ctx, "biz", []map[string]interface{}{ - {"bk_biz_id": float64(2)}, - }) - err = cacheManager.CleanByEvents(ctx, "other", []map[string]interface{}{ - {"bk_biz_id": float64(3)}, - }) - err = cacheManager.UpdateByEvents(ctx, "other", []map[string]interface{}{ - {"bk_biz_id": float64(3)}, - }) - - assert.Len(t, client.HKeys(ctx, cacheManager.GetCacheKey(businessCacheKey)).Val(), 2) - }) } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go index be1b6e4d1..d309deef2 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go @@ -30,11 +30,11 @@ import ( "sync" "time" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" "github.com/pkg/errors" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) // CmdbEventHandler cmdb资源变更事件处理器 @@ -109,15 +109,17 @@ func (h *CmdbEventHandler) Close() { GetRelationMetricsBuilder().ClearAllMetrics() } +// getEventKey 获取资源变更事件key +func (h *CmdbEventHandler) getEventKey(resourceType CmdbResourceType) string { + return fmt.Sprintf("%s.cmdb_resource_watch_event.%s", h.prefix, resourceType) +} + // getEvents 获取资源变更事件 func (h *CmdbEventHandler) getEvents(ctx context.Context, resourceType CmdbResourceType) ([]cmdb.ResourceWatchEvent, error) { - // 获取资源变更事件 - bkEventKey := fmt.Sprintf("%s.cmdb_resource_watch_event.%s", h.prefix, resourceType) - // 从redis中获取该资源类型的所有事件 eventStrings := make([]string, 0) for { - result, err := h.redisClient.LPop(ctx, bkEventKey).Result() + result, err := h.redisClient.LPop(ctx, h.getEventKey(resourceType)).Result() if err != nil { if !errors.Is(err, redis.Nil) { logger.Errorf("get cmdb resource(%s) watch event error: %v", resourceType, err) @@ -221,8 +223,33 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm h.cleanModuleKeys.Store(int(bkModuleId), struct{}{}) } case CmdbResourceTypeHost: - // todo: implement this - continue + ip, _ := event.BkDetail["bk_host_innerip"].(string) + cloudId, _ := event.BkDetail["bk_cloud_id"].(float64) + agentId, _ := event.BkDetail["bk_agent_id"].(string) + + // 尝试将主机关联字段加入待清理列表,如果刷新业务时发现这些字段不存在,将会进行清理 + if ip != "" { + h.cleanHostKeys.Store(fmt.Sprintf("%s|%d", ip, int(cloudId)), struct{}{}) + } + if agentId != "" { + h.cleanHostKeys.Store(agentId, struct{}{}) + } + + // 如果是删除事件,将主机ID加入待清理列表 + if event.BkEventType == "delete" { + h.cleanHostKeys.Store(strconv.Itoa(int(bkHostId)), struct{}{}) + } + + // 将主机所属业务加入待刷新列表 + if host != nil { + h.refreshBizHostTopo.Store(host.BkBizId, struct{}{}) + if host.BkAgentId != "" { + h.cleanHostKeys.Store(host.BkAgentId, struct{}{}) + } + if host.BkHostInnerip != "" { + h.cleanHostKeys.Store(fmt.Sprintf("%s|%d", host.BkHostInnerip, host.BkCloudId), struct{}{}) + } + } case CmdbResourceTypeHostRelation: bkBizId, ok := event.BkDetail["bk_biz_id"].(float64) if !ok { @@ -232,7 +259,7 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm // 如果拉不到主机信息,直接刷新业务并清理主机ID if host == nil { h.refreshBizHostTopo.Store(int(bkBizId), struct{}{}) - h.cleanHostKeys.Store(int(bkHostId), struct{}{}) + h.cleanHostKeys.Store(strconv.Itoa(int(bkHostId)), struct{}{}) continue } @@ -246,7 +273,7 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm if event.BkEventType == "delete" || host.BkBizId != int(bkBizId) { // 如果是删除事件,将主机ID加入待清理列表 - h.cleanHostKeys.Store(int(bkHostId), struct{}{}) + h.cleanHostKeys.Store(strconv.Itoa(int(bkHostId)), struct{}{}) // 如果是删除事件,将业务ID加入待刷新列表 h.refreshBizHostTopo.Store(host.BkBizId, struct{}{}) h.refreshBizHostTopo.Store(int(bkBizId), struct{}{}) @@ -300,21 +327,31 @@ func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { return nil } +// getLastUpdateTime 获取最后一次全量刷新时间 +func (h *CmdbEventHandler) getLastUpdateTimeKey(cacheType string) string { + return fmt.Sprintf("%s.cmdb_last_refresh_all_time.%s", h.prefix, cacheType) +} + // getFullRefreshInterval 获取全量刷新间隔时间 func (h *CmdbEventHandler) getFullRefreshInterval(cacheType string) time.Duration { fullRefreshInterval, ok := h.fullRefreshIntervals[cacheType] - // 最低600秒的间隔 + // 默认全量刷新间隔时间为10分钟 if !ok { - fullRefreshInterval = time.Second * 300 + fullRefreshInterval = time.Second * 600 } + + // 最低全量刷新间隔时间为1分钟 + if fullRefreshInterval < time.Minute { + fullRefreshInterval = time.Minute + } + return fullRefreshInterval } // ifRunRefreshAll 判断是否执行全量刷新 -func (h *CmdbEventHandler) ifRunRefreshAll(ctx context.Context, cacheType string) bool { +func (h *CmdbEventHandler) ifRunRefreshAll(ctx context.Context, cacheType string, now int64) bool { // 获取最后一次全量刷新时间 - lastUpdateTimeKey := fmt.Sprintf("%s.cmdb_last_refresh_all_time.%s", h.prefix, cacheType) - lastUpdateTime, err := h.redisClient.Get(ctx, lastUpdateTimeKey).Result() + lastUpdateTime, err := h.redisClient.Get(ctx, h.getLastUpdateTimeKey(cacheType)).Result() if err != nil { if !errors.Is(err, redis.Nil) { logger.Errorf("get last update time error: %v", err) @@ -329,7 +366,7 @@ func (h *CmdbEventHandler) ifRunRefreshAll(ctx context.Context, cacheType string } // 如果超过全量刷新间隔时间,执行全量刷新 - if time.Now().Unix()-lastUpdateTimestamp > int64(h.getFullRefreshInterval(cacheType).Seconds()) { + if now-lastUpdateTimestamp > int64(h.getFullRefreshInterval(cacheType).Seconds()) { return true } @@ -351,7 +388,7 @@ func (h *CmdbEventHandler) Run(ctx context.Context) { go func() { defer wg.Done() - if h.ifRunRefreshAll(ctx, cacheManager.Type()) { + if h.ifRunRefreshAll(ctx, cacheManager.Type(), time.Now().Unix()) { // 全量刷新 err := RefreshAll(ctx, cacheManager, cacheManager.GetConcurrentLimit()) if err != nil { @@ -361,8 +398,12 @@ func (h *CmdbEventHandler) Run(ctx context.Context) { logger.Infof("refresh all cmdb resource(%s) cache", cacheManager.Type()) // 记录全量刷新时间 - lastUpdateTimeKey := fmt.Sprintf("%s.cmdb_last_refresh_all_time.%s", h.prefix, cacheManager.Type()) - _, err = h.redisClient.Set(ctx, lastUpdateTimeKey, strconv.FormatInt(time.Now().Unix(), 10), 24*time.Hour).Result() + _, err = h.redisClient.Set( + ctx, + h.getLastUpdateTimeKey(cacheManager.Type()), + strconv.FormatInt(time.Now().Unix(), 10), + 24*time.Hour, + ).Result() if err != nil { logger.Errorf("set last update time error: %v", err) } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go index ab305f12b..58272b9b4 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go @@ -21,3 +21,86 @@ // SOFTWARE. package cmdbcache + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" +) + +func TestGetEvents(t *testing.T) { + rOpts := &redis.Options{ + Mode: "standalone", + Addrs: []string{testRedisAddr}, + } + + client, _ := redis.GetClient(rOpts) + ctx := context.Background() + + // 注入测试数据 + eventData := []interface{}{ + "{\"bk_cursor\":\"123\",\"bk_resource\":\"host_relation\",\"bk_event_type\":\"delete\",\"bk_detail\":{\"bk_biz_id\":2,\"bk_host_id\":1,\"bk_module_id\":1,\"bk_set_id\":1,\"bk_supplier_account\":\"0\"}}", + "{\"bk_cursor\":\"124\",\"bk_resource\":\"host_relation\",\"bk_event_type\":\"create\",\"bk_detail\":{\"bk_biz_id\":2,\"bk_host_id\":2,\"bk_module_id\":2,\"bk_set_id\":2,\"bk_supplier_account\":\"0\"}}", + } + + handler, err := NewCmdbEventHandler(t.Name(), rOpts, []string{"host_topo"}, map[string]time.Duration{"host_topo": 61 * time.Second, "set": time.Second}, 1) + if err != nil { + t.Fatalf("failed to create handler: %v", err) + } + + // 验证刷新间隔设置 + assert.EqualValues(t, handler.getFullRefreshInterval("host_topo"), 61*time.Second) + + // 验证最小刷新间隔1分钟 + assert.EqualValues(t, handler.getFullRefreshInterval("set"), time.Minute) + + // 验证默认值10分钟 + assert.EqualValues(t, handler.getFullRefreshInterval("module"), 600*time.Second) + + key := handler.getEventKey("host_relation") + client.RPush(ctx, key, eventData...) + + // 获取事件 + events, err := handler.getEvents(ctx, CmdbResourceTypeHostRelation) + if err != nil { + t.Fatalf("failed to get events: %v", err) + } + + assert.EqualValues(t, len(events), 2) + + // 验证事件内容 + assert.EqualValues(t, events[0].BkCursor, "123") + assert.EqualValues(t, events[0].BkResource, "host_relation") + assert.EqualValues(t, events[0].BkEventType, "delete") + assert.EqualValues(t, events[1].BkCursor, "124") + assert.EqualValues(t, events[1].BkResource, "host_relation") + assert.EqualValues(t, events[1].BkEventType, "create") +} + +func TestIfRunRefreshAll(t *testing.T) { + rOpts := &redis.Options{ + Mode: "standalone", + Addrs: []string{testRedisAddr}, + } + + client, _ := redis.GetClient(rOpts) + ctx := context.Background() + + cacheType := "host_topo" + handler, err := NewCmdbEventHandler(t.Name(), rOpts, []string{cacheType}, map[string]time.Duration{cacheType: 61 * time.Second}, 1) + if err != nil { + t.Fatalf("failed to create handler: %v", err) + } + + now := time.Now() + client.Set(ctx, handler.getLastUpdateTimeKey(cacheType), now.Add(-60*time.Second).Unix(), 0) + + // 验证刷新时间间隔 + assert.False(t, handler.ifRunRefreshAll(ctx, cacheType, now.Unix())) + assert.False(t, handler.ifRunRefreshAll(ctx, cacheType, now.Add(time.Second).Unix())) + assert.True(t, handler.ifRunRefreshAll(ctx, cacheType, now.Add(2*time.Second).Unix())) +} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch_test.go new file mode 100644 index 000000000..7ef8fcf3c --- /dev/null +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch_test.go @@ -0,0 +1,84 @@ +// MIT License + +// Copyright (c) 2021~2024 腾讯蓝鲸 + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package cmdbcache + +//func TestResourceWatch(t *testing.T) { +// redisOptions := redis.Options{ +// Mode: "standalone", +// Addrs: []string{"127.0.0.1:6379"}, +// } +// +// // 系统信号 +// signalChan := make(chan os.Signal, 1) +// signal.Notify(signalChan, os.Interrupt, os.Kill) +// +// //调用cancel函数取消 +// ctx, cancel := context.WithCancel(context.Background()) +// defer cancel() +// +// // 监听信号 +// go func() { +// <-signalChan +// cancel() +// }() +// +// prefix := t.Name() +// +// wg := &sync.WaitGroup{} +// wg.Add(1) +// +// go func() { +// defer cancel() +// defer wg.Done() +// +// params := &WatchCmdbResourceChangeEventTaskParams{ +// Redis: redisOptions, +// Prefix: prefix, +// } +// payload, _ := json.Marshal(params) +// if err := WatchCmdbResourceChangeEventTask(ctx, payload); err != nil { +// t.Errorf("TestWatch failed, err: %v", err) +// return +// } +// }() + +//go func() { +// defer cancel() +// defer wg.Done() +// +// params := &RefreshTaskParams{ +// Redis: redisOptions, +// Prefix: prefix, +// EventHandleInterval: 60, +// CacheTypes: []string{"host_topo"}, +// FullRefreshIntervals: map[string]int{"host_topo": 1800, "business": 1800, "module": 1800, "set": 1800, "service_instance": 60}, +// } +// payload, _ := json.Marshal(params) +// if err := CacheRefreshTask(ctx, payload); err != nil { +// t.Errorf("TestHandle failed, err: %v", err) +// return +// } +//}() + +//wg.Wait() +//} From 955df428299e2c12baa71767e484627ccfeb1c1f Mon Sep 17 00:00:00 2001 From: lai <11598235+unique0lai@users.noreply.github.com> Date: Fri, 30 Aug 2024 18:11:42 +0800 Subject: [PATCH 3/9] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E5=8A=A8?= =?UTF-8?q?=E6=80=81=E6=8B=93=E6=89=91=E5=8F=8A=E6=9C=8D=E5=8A=A1=E5=AE=9E?= =?UTF-8?q?=E4=BE=8B=E5=88=B7=E6=96=B0=E6=97=B6=E6=95=88=E6=80=A7=20#10101?= =?UTF-8?q?58081119404718?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../internal/alarm/cmdbcache/base.go | 180 +++++------------- .../internal/alarm/cmdbcache/business.go | 35 +--- .../internal/alarm/cmdbcache/daemon.go | 5 +- .../internal/alarm/cmdbcache/dynamic_group.go | 13 +- .../internal/alarm/cmdbcache/event_process.go | 166 ++++++++++++++-- .../alarm/cmdbcache/event_process_test.go | 4 +- .../internal/alarm/cmdbcache/event_watch.go | 3 +- .../internal/alarm/cmdbcache/host.go | 59 +++--- .../internal/alarm/cmdbcache/module.go | 26 +-- .../alarm/cmdbcache/service_instance.go | 12 +- .../internal/alarm/cmdbcache/set.go | 28 ++- 11 files changed, 273 insertions(+), 258 deletions(-) diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go index 75c05fdc9..5e64ed217 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go @@ -33,7 +33,6 @@ import ( cfg "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/config" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/utils/jsonx" ) @@ -75,14 +74,18 @@ func getCmdbApi() *cmdb.Client { type Manager interface { // Type 缓存类型 Type() string + // GetCacheKey 获取缓存key + GetCacheKey(key string) string // RefreshByBiz 按业务刷新缓存 RefreshByBiz(ctx context.Context, bizID int) error + // RefreshByBizIds 按业务列表刷新缓存,并清理指定的缓存 + RefreshByBizIds(ctx context.Context, bizIds []int, concurrentLimit int) error // RefreshGlobal 刷新全局缓存 RefreshGlobal(ctx context.Context) error - // CleanByBiz 按业务清理缓存 - CleanByBiz(ctx context.Context, bizID int) error // CleanGlobal 清理全局缓存 CleanGlobal(ctx context.Context) error + // CleanPartial 清理部分缓存 + CleanPartial(ctx context.Context, cacheKey string, cleanFields []string) error // Reset 重置 Reset() @@ -119,20 +122,26 @@ func NewBaseCacheManager(prefix string, opt *redis.Options, concurrentLimit int) }, nil } +// Type 缓存类型 +func (c *BaseCacheManager) Type() string { + return "base" +} + // Reset 重置 func (c *BaseCacheManager) Reset() { - for key := range c.updatedFieldSet { - c.updateFieldLocks[key].Lock() - c.updatedFieldSet[key] = make(map[string]struct{}) - c.updateFieldLocks[key].Unlock() + for cacheKey := range c.updatedFieldSet { + c.updateFieldLocks[cacheKey].Lock() + c.updatedFieldSet[cacheKey] = make(map[string]struct{}) + c.updateFieldLocks[cacheKey].Unlock() } } // initUpdatedFieldSet 初始化更新字段集合,确保后续不存在并发问题 func (c *BaseCacheManager) initUpdatedFieldSet(keys ...string) { for _, key := range keys { - c.updatedFieldSet[c.GetCacheKey(key)] = make(map[string]struct{}) - c.updateFieldLocks[c.GetCacheKey(key)] = &sync.Mutex{} + cacheKey := c.GetCacheKey(key) + c.updatedFieldSet[cacheKey] = make(map[string]struct{}) + c.updateFieldLocks[cacheKey] = &sync.Mutex{} } } @@ -149,19 +158,20 @@ func (c *BaseCacheManager) GetCacheKey(key string) string { // UpdateHashMapCache 更新hashmap类型缓存 func (c *BaseCacheManager) UpdateHashMapCache(ctx context.Context, key string, data map[string]string) error { client := c.RedisClient + cacheKey := c.GetCacheKey(key) // 初始化更新字段集合 - updatedFieldSet, ok := c.updatedFieldSet[key] + updatedFieldSet, ok := c.updatedFieldSet[cacheKey] if !ok { return errors.Errorf("key %s not found in updatedFieldSet", key) } - lock, _ := c.updateFieldLocks[key] + lock, _ := c.updateFieldLocks[cacheKey] // 执行更新 pipeline := client.Pipeline() lock.Lock() for field, value := range data { - pipeline.HSet(ctx, key, field, value) + pipeline.HSet(ctx, cacheKey, field, value) updatedFieldSet[field] = struct{}{} if pipeline.Len() > 500 { @@ -185,16 +195,17 @@ func (c *BaseCacheManager) UpdateHashMapCache(ctx context.Context, key string, d // DeleteMissingHashMapFields 删除hashmap类型缓存中不存在的字段 func (c *BaseCacheManager) DeleteMissingHashMapFields(ctx context.Context, key string) error { client := c.RedisClient + cacheKey := c.GetCacheKey(key) // 获取已更新的字段,如果不存在则删除 - updatedFieldSet, ok := c.updatedFieldSet[key] + updatedFieldSet, ok := c.updatedFieldSet[cacheKey] if !ok || len(updatedFieldSet) == 0 { - client.Del(ctx, key) + client.Del(ctx, cacheKey) return nil } // 获取已存在的字段 - existsFields, err := client.HKeys(ctx, key).Result() + existsFields, err := client.HKeys(ctx, cacheKey).Result() if err != nil { return err } @@ -212,7 +223,7 @@ func (c *BaseCacheManager) DeleteMissingHashMapFields(ctx context.Context, key s } // 执行删除 - client.HDel(ctx, key, needDeleteFields...) + client.HDel(ctx, cacheKey, needDeleteFields...) return nil } @@ -220,7 +231,7 @@ func (c *BaseCacheManager) DeleteMissingHashMapFields(ctx context.Context, key s // UpdateExpire 更新缓存过期时间 func (c *BaseCacheManager) UpdateExpire(ctx context.Context, key string) error { client := c.RedisClient - result := client.Expire(ctx, key, time.Duration(c.Expire)*time.Second) + result := client.Expire(ctx, c.GetCacheKey(key), c.Expire*time.Second) if err := result.Err(); err != nil { return errors.Wrap(err, "expire hashmap failed") } @@ -237,13 +248,24 @@ func (c *BaseCacheManager) RefreshGlobal(ctx context.Context) error { return nil } -// CleanByBiz 清理业务缓存 -func (c *BaseCacheManager) CleanByBiz(ctx context.Context, bizID int) error { +// CleanGlobal 清理全局缓存 +func (c *BaseCacheManager) CleanGlobal(ctx context.Context) error { return nil } -// CleanGlobal 清理全局缓存 -func (c *BaseCacheManager) CleanGlobal(ctx context.Context) error { +// CleanPartial 清理部分缓存 +func (c *BaseCacheManager) CleanPartial(ctx context.Context, key string, cleanFields []string) error { + cacheKey := c.GetCacheKey(key) + needCleanFields := make([]string, 0) + for _, field := range cleanFields { + if _, ok := c.updatedFieldSet[cacheKey][field]; ok { + needCleanFields = append(needCleanFields, field) + } + } + + if len(needCleanFields) == 0 { + c.RedisClient.HDel(ctx, cacheKey, cleanFields...) + } return nil } @@ -275,119 +297,15 @@ func NewCacheManagerByType(opt *redis.Options, prefix string, cacheType string, return cacheManager, err } -// RefreshAll 执行缓存管理器 -func RefreshAll(ctx context.Context, cacheManager Manager, concurrentLimit int) error { - // 判断是否启用业务缓存刷新 - if cacheManager.useBiz() { - // 获取业务列表 - cmdbApi := getCmdbApi() - var result cmdb.SearchBusinessResp - _, err := cmdbApi.SearchBusiness().SetResult(&result).Request() - if err = api.HandleApiResultError(result.ApiCommonRespMeta, err, "search business failed"); err != nil { - return err - } - - // 并发控制 - wg := sync.WaitGroup{} - limitChan := make(chan struct{}, concurrentLimit) - - // 按业务刷新缓存 - errChan := make(chan error, len(result.Data.Info)) - for _, biz := range result.Data.Info { - limitChan <- struct{}{} - wg.Add(1) - go func(bizId int) { - defer func() { - wg.Done() - <-limitChan - }() - err := cacheManager.RefreshByBiz(ctx, bizId) - if err != nil { - errChan <- errors.Wrapf(err, "refresh %s cache by biz failed, biz: %d", cacheManager.Type(), bizId) - } - }(biz.BkBizId) - } - - // 等待所有任务完成 - wg.Wait() - close(errChan) - for err := range errChan { - return err - } - - // 按业务清理缓存 - errChan = make(chan error, len(result.Data.Info)) - for _, biz := range result.Data.Info { - limitChan <- struct{}{} - wg.Add(1) - go func(bizId int) { - defer func() { - wg.Done() - <-limitChan - }() - err := cacheManager.CleanByBiz(ctx, bizId) - if err != nil { - errChan <- errors.Wrapf(err, "clean %s cache by biz failed, biz: %d", cacheManager.Type(), bizId) - } - }(biz.BkBizId) - } - - // 等待所有任务完成 - wg.Wait() - close(errChan) - for err := range errChan { - return err - } - } - - // 刷新全局缓存 - err := cacheManager.RefreshGlobal(ctx) - if err != nil { - return errors.Wrapf(err, "refresh global %s cache failed", cacheManager.Type()) - } - - // 清理全局缓存 - err = cacheManager.CleanGlobal(ctx) - if err != nil { - return errors.Wrapf(err, "clean global %s cache failed", cacheManager.Type()) - } - - return nil -} - -// RefreshByBizList 按业务列表刷新缓存 -func RefreshByBizList(ctx context.Context, cacheManager Manager, bizIDs []int, concurrentLimit int) error { +// RefreshByBizIds 按业务列表刷新缓存,并清理指定的缓存 +func (c *BaseCacheManager) RefreshByBizIds(ctx context.Context, bizIds []int, concurrentLimit int) error { // 并发控制 wg := sync.WaitGroup{} limitChan := make(chan struct{}, concurrentLimit) // 按业务刷新缓存 - errChan := make(chan error, len(bizIDs)) - for _, bizId := range bizIDs { - limitChan <- struct{}{} - wg.Add(1) - go func(bizId int) { - defer func() { - wg.Done() - <-limitChan - }() - err := cacheManager.RefreshByBiz(ctx, bizId) - if err != nil { - errChan <- errors.Wrapf(err, "refresh %s cache by biz failed, biz: %d", cacheManager.Type(), bizId) - } - }(bizId) - } - - // 等待所有任务完成 - wg.Wait() - close(errChan) - for err := range errChan { - return err - } - - // 按业务清理缓存 - errChan = make(chan error, len(bizIDs)) - for _, bizId := range bizIDs { + errChan := make(chan error, len(bizIds)) + for _, bizId := range bizIds { limitChan <- struct{}{} wg.Add(1) go func(bizId int) { @@ -395,9 +313,9 @@ func RefreshByBizList(ctx context.Context, cacheManager Manager, bizIDs []int, c wg.Done() <-limitChan }() - err := cacheManager.CleanByBiz(ctx, bizId) + err := c.RefreshByBiz(ctx, bizId) if err != nil { - errChan <- errors.Wrapf(err, "clean %s cache by biz failed, biz: %d", cacheManager.Type(), bizId) + errChan <- errors.Wrapf(err, "refresh %s cache by biz failed, biz: %d", c.Type(), bizId) } }(bizId) } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go index 1f20629e7..a3dea76e1 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go @@ -32,12 +32,13 @@ import ( "github.com/TencentBlueKing/bk-apigateway-sdks/core/define" "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/metadata/models/space" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/store/mysql" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -242,15 +243,14 @@ func (m *BusinessCacheManager) RefreshGlobal(ctx context.Context) error { } // 更新缓存 - key := m.GetCacheKey(businessCacheKey) - err = m.UpdateHashMapCache(ctx, key, bizCacheData) + err = m.UpdateHashMapCache(ctx, businessCacheKey, bizCacheData) if err != nil { return errors.Wrap(err, "update business cache failed") } // 更新缓存过期时间 - if err := m.RedisClient.Expire(ctx, key, m.Expire).Err(); err != nil { - return errors.Wrap(err, "set business cache expire time failed") + if err := m.UpdateExpire(ctx, businessCacheKey); err != nil { + return errors.Wrap(err, "update expire failed") } return nil @@ -258,31 +258,8 @@ func (m *BusinessCacheManager) RefreshGlobal(ctx context.Context) error { // CleanGlobal 清理全局缓存 func (m *BusinessCacheManager) CleanGlobal(ctx context.Context) error { - key := m.GetCacheKey(businessCacheKey) - if err := m.DeleteMissingHashMapFields(ctx, key); err != nil { + if err := m.DeleteMissingHashMapFields(ctx, businessCacheKey); err != nil { return errors.Wrap(err, "delete missing fields failed") } return nil } - -// CleanByEvents 根据事件清理缓存 -func (m *BusinessCacheManager) CleanByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - if resourceType != "biz" { - return nil - } - - // 获取业务ID - bizIds := make([]string, 0, len(events)) - for _, event := range events { - if bizID, ok := event["bk_biz_id"].(float64); ok { - bizIds = append(bizIds, strconv.Itoa(int(bizID))) - } - } - - // 删除缓存 - if len(bizIds) > 0 { - m.RedisClient.HDel(ctx, m.GetCacheKey(businessCacheKey), bizIds...) - } - - return nil -} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go index 865cc4cc1..677c246d4 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go @@ -17,10 +17,11 @@ import ( "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/config" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/utils/remote" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) // WatchCmdbResourceChangeEventTaskParams 监听cmdb资源变更任务参数 @@ -160,7 +161,7 @@ func CacheRefreshTask(ctx context.Context, payload []byte) error { defer wg.Done() // 创建资源变更事件处理器 - handler, err := NewCmdbEventHandler(params.Prefix, ¶ms.Redis, cacheTypes, fullRefreshIntervals, bizConcurrent) + handler, err := NewCmdbEventHandler(params.Prefix, ¶ms.Redis, fullRefreshIntervals, bizConcurrent) if err != nil { logger.Errorf("[cmdb_relation] new cmdb event handler failed: %v", err) cancel() diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go index f624e62fb..22567a41a 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go @@ -30,11 +30,10 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -208,7 +207,7 @@ func (m *DynamicGroupCacheManager) RefreshByBiz(ctx context.Context, bizID int) } // 更新动态分组缓存 - err = m.UpdateHashMapCache(ctx, m.GetCacheKey(DynamicGroupCacheKey), dataMap) + err = m.UpdateHashMapCache(ctx, DynamicGroupCacheKey, dataMap) if err != nil { return errors.Wrap(err, "failed to update dynamic group cache") } @@ -218,17 +217,15 @@ func (m *DynamicGroupCacheManager) RefreshByBiz(ctx context.Context, bizID int) // RefreshGlobal 更新全局动态分组缓存 func (m *DynamicGroupCacheManager) RefreshGlobal(ctx context.Context) error { - result := m.RedisClient.Expire(ctx, m.GetCacheKey(DynamicGroupCacheKey), m.Expire) - if err := result.Err(); err != nil { - return errors.Wrap(err, "set dynamic group cache expire failed") + if err := m.UpdateExpire(ctx, DynamicGroupCacheKey); err != nil { + logger.Errorf("failed to update dynamic group cache expire time: %v", err) } return nil } // CleanGlobal 清除全局动态分组缓存 func (m *DynamicGroupCacheManager) CleanGlobal(ctx context.Context) error { - key := m.GetCacheKey(DynamicGroupCacheKey) - err := m.DeleteMissingHashMapFields(ctx, key) + err := m.DeleteMissingHashMapFields(ctx, DynamicGroupCacheKey) if err != nil { return errors.Wrap(err, "failed to clean global dynamic group cache") } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go index d309deef2..68b56e459 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go @@ -33,6 +33,7 @@ import ( "github.com/pkg/errors" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) @@ -45,12 +46,15 @@ type CmdbEventHandler struct { // redis client redisClient redis.UniversalClient - // 缓存管理器 - cacheManagers []Manager - // 全量刷新间隔时间 fullRefreshIntervals map[string]time.Duration + // 全量刷新间隔时间 + concurrentLimit int + + // 缓存管理器 + cacheManagers map[string]Manager + // 预处理结果 // 是否刷新业务列表 refreshBiz bool @@ -58,6 +62,8 @@ type CmdbEventHandler struct { refreshBizHostTopo sync.Map // 待清理主机相关key cleanHostKeys sync.Map + // 待清理AgentId相关key + cleanAgentIdKeys sync.Map // 待刷新服务实例业务列表 refreshBizServiceInstance sync.Map // 待清理服务实例相关key @@ -79,7 +85,7 @@ type CmdbEventHandler struct { } // NewCmdbEventHandler 创建cmdb资源变更事件处理器 -func NewCmdbEventHandler(prefix string, rOpt *redis.Options, cacheTypes []string, fullRefreshIntervals map[string]time.Duration, concurrentLimit int) (*CmdbEventHandler, error) { +func NewCmdbEventHandler(prefix string, rOpt *redis.Options, fullRefreshIntervals map[string]time.Duration, concurrentLimit int) (*CmdbEventHandler, error) { // 创建redis client redisClient, err := redis.GetClient(rOpt) if err != nil { @@ -87,20 +93,21 @@ func NewCmdbEventHandler(prefix string, rOpt *redis.Options, cacheTypes []string } // 创建缓存管理器 - cacheManagers := make([]Manager, 0, len(cacheTypes)) - for _, cacheType := range cacheTypes { + cacheManagers := make(map[string]Manager) + for _, cacheType := range cmdbCacheTypes { cacheManager, err := NewCacheManagerByType(rOpt, prefix, cacheType, concurrentLimit) if err != nil { return nil, errors.Wrap(err, "new cache Manager failed") } - cacheManagers = append(cacheManagers, cacheManager) + cacheManagers[cacheType] = cacheManager } return &CmdbEventHandler{ prefix: prefix, redisClient: redisClient, - cacheManagers: cacheManagers, fullRefreshIntervals: fullRefreshIntervals, + concurrentLimit: concurrentLimit, + cacheManagers: cacheManagers, }, nil } @@ -154,6 +161,7 @@ func (h *CmdbEventHandler) resetPreprocessResults() { h.refreshBiz = false h.refreshBizHostTopo = sync.Map{} h.cleanHostKeys = sync.Map{} + h.cleanAgentIdKeys = sync.Map{} h.refreshBizServiceInstance = sync.Map{} h.cleanServiceInstanceKeys = sync.Map{} h.refreshTopoNode = sync.Map{} @@ -232,7 +240,7 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm h.cleanHostKeys.Store(fmt.Sprintf("%s|%d", ip, int(cloudId)), struct{}{}) } if agentId != "" { - h.cleanHostKeys.Store(agentId, struct{}{}) + h.cleanAgentIdKeys.Store(agentId, struct{}{}) } // 如果是删除事件,将主机ID加入待清理列表 @@ -244,7 +252,7 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm if host != nil { h.refreshBizHostTopo.Store(host.BkBizId, struct{}{}) if host.BkAgentId != "" { - h.cleanHostKeys.Store(host.BkAgentId, struct{}{}) + h.cleanAgentIdKeys.Store(host.BkAgentId, struct{}{}) } if host.BkHostInnerip != "" { h.cleanHostKeys.Store(fmt.Sprintf("%s|%d", host.BkHostInnerip, host.BkCloudId), struct{}{}) @@ -265,7 +273,7 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm // 尝试将主机关联字段加入待清理列表,如果刷新业务时发现这些字段不存在,将会进行清理 if host.BkAgentId != "" { - h.cleanHostKeys.Store(host.BkAgentId, struct{}{}) + h.cleanAgentIdKeys.Store(host.BkAgentId, struct{}{}) } if host.BkHostInnerip != "" { h.cleanHostKeys.Store(fmt.Sprintf("%s|%d", host.BkHostInnerip, host.BkCloudId), struct{}{}) @@ -323,7 +331,85 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm // refreshEvents 刷新资源变更事件 func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { - // todo: implement this + wg := sync.WaitGroup{} + + // 刷新业务列表 + if h.refreshBiz { + businessCacheManager := h.cacheManagers["business"] + wg.Add(1) + go func() { + defer wg.Done() + + err := RefreshAll(ctx, businessCacheManager, h.concurrentLimit) + if err != nil { + logger.Errorf("refresh all business cache failed: %v", err) + } + }() + } + + // 刷新主机拓扑业务列表 + hostTopoBizIds := make([]int, 0) + h.refreshBizHostTopo.Range(func(key, value interface{}) bool { + bizId, _ := key.(int) + hostTopoBizIds = append(hostTopoBizIds, bizId) + return true + }) + if len(hostTopoBizIds) > 0 { + hostTopoCacheManager := h.cacheManagers["host_topo"] + + wg.Add(1) + go func() { + defer wg.Done() + + // 刷新主机拓扑缓存 + if err := hostTopoCacheManager.RefreshByBizIds(ctx, hostTopoBizIds, h.concurrentLimit); err != nil { + logger.Errorf("refresh host topo cache by biz failed: %v", err) + } + + // 清理hostCacheKey缓存 + cleanFields := make([]string, 0) + h.cleanHostKeys.Range(func(key, value interface{}) bool { + cleanFields = append(cleanFields, key.(string)) + return true + }) + if err := hostTopoCacheManager.CleanPartial(ctx, hostCacheKey, cleanFields); err != nil { + logger.Errorf("clean host topo cache partial failed: %v", err) + } + + // 清理hostAgentIDCacheKey缓存 + cleanFields = make([]string, 0) + h.cleanAgentIdKeys.Range(func(key, value interface{}) bool { + cleanFields = append(cleanFields, key.(string)) + return true + }) + if err := hostTopoCacheManager.CleanPartial(ctx, hostAgentIDCacheKey, cleanFields); err != nil { + logger.Errorf("clean host agentId cache partial failed: %v", err) + } + + // 清理topoCacheKey缓存 + cleanFields = make([]string, 0) + h.cleanTopoNode.Range(func(key, value interface{}) bool { + cleanFields = append(cleanFields, key.(string)) + return true + }) + if err := hostTopoCacheManager.CleanPartial(ctx, topoCacheKey, cleanFields); err != nil { + logger.Errorf("clean topo cache partial failed: %v", err) + } + }() + } + + // 刷新服务实例业务列表 + serviceInstanceBizIds := make([]int, 0) + h.refreshBizServiceInstance.Range(func(key, value interface{}) bool { + bizId, _ := key.(int) + serviceInstanceBizIds = append(serviceInstanceBizIds, bizId) + return true + }) + + if len(serviceInstanceBizIds) > 0 { + + } + return nil } @@ -444,3 +530,59 @@ func (h *CmdbEventHandler) Run(ctx context.Context) { logger.Errorf("refresh cmdb resource event error: %v", err) } } + +// RefreshAll 执行缓存管理器 +func RefreshAll(ctx context.Context, cacheManager Manager, concurrentLimit int) error { + // 判断是否启用业务缓存刷新 + if cacheManager.useBiz() { + // 获取业务列表 + cmdbApi := getCmdbApi() + var result cmdb.SearchBusinessResp + _, err := cmdbApi.SearchBusiness().SetResult(&result).Request() + if err = api.HandleApiResultError(result.ApiCommonRespMeta, err, "search business failed"); err != nil { + return err + } + + // 并发控制 + wg := sync.WaitGroup{} + limitChan := make(chan struct{}, concurrentLimit) + + // 按业务刷新缓存 + errChan := make(chan error, len(result.Data.Info)) + for _, biz := range result.Data.Info { + limitChan <- struct{}{} + wg.Add(1) + go func(bizId int) { + defer func() { + wg.Done() + <-limitChan + }() + err := cacheManager.RefreshByBiz(ctx, bizId) + if err != nil { + errChan <- errors.Wrapf(err, "refresh %s cache by biz failed, biz: %d", cacheManager.Type(), bizId) + } + }(biz.BkBizId) + } + + // 等待所有任务完成 + wg.Wait() + close(errChan) + for err := range errChan { + return err + } + } + + // 刷新全局缓存 + err := cacheManager.RefreshGlobal(ctx) + if err != nil { + return errors.Wrapf(err, "refresh global %s cache failed", cacheManager.Type()) + } + + // 清理全局缓存 + err = cacheManager.CleanGlobal(ctx) + if err != nil { + return errors.Wrapf(err, "clean global %s cache failed", cacheManager.Type()) + } + + return nil +} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go index 58272b9b4..24fc0f3f6 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go @@ -47,7 +47,7 @@ func TestGetEvents(t *testing.T) { "{\"bk_cursor\":\"124\",\"bk_resource\":\"host_relation\",\"bk_event_type\":\"create\",\"bk_detail\":{\"bk_biz_id\":2,\"bk_host_id\":2,\"bk_module_id\":2,\"bk_set_id\":2,\"bk_supplier_account\":\"0\"}}", } - handler, err := NewCmdbEventHandler(t.Name(), rOpts, []string{"host_topo"}, map[string]time.Duration{"host_topo": 61 * time.Second, "set": time.Second}, 1) + handler, err := NewCmdbEventHandler(t.Name(), rOpts, map[string]time.Duration{"host_topo": 61 * time.Second, "set": time.Second}, 1) if err != nil { t.Fatalf("failed to create handler: %v", err) } @@ -91,7 +91,7 @@ func TestIfRunRefreshAll(t *testing.T) { ctx := context.Background() cacheType := "host_topo" - handler, err := NewCmdbEventHandler(t.Name(), rOpts, []string{cacheType}, map[string]time.Duration{cacheType: 61 * time.Second}, 1) + handler, err := NewCmdbEventHandler(t.Name(), rOpts, map[string]time.Duration{cacheType: 61 * time.Second}, 1) if err != nil { t.Fatalf("failed to create handler: %v", err) } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go index e194f0bfe..a3312adad 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go @@ -31,10 +31,11 @@ import ( "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) // CmdbResourceType cmdb监听资源类型 diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go index a87df86e6..41fbd32a8 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go @@ -239,8 +239,8 @@ func NewAlarmHostInfoByListBizHostsTopoDataInfo(info *cmdb.ListBizHostsTopoDataI return host } -// HostAndTopoCacheManager 主机及拓扑缓存管理器 -type HostAndTopoCacheManager struct { +// HostTopoCacheManager 主机及拓扑缓存管理器 +type HostTopoCacheManager struct { *BaseCacheManager hostIpMap map[string]map[string]struct{} @@ -248,26 +248,26 @@ type HostAndTopoCacheManager struct { } // NewHostAndTopoCacheManager 创建主机及拓扑缓存管理器 -func NewHostAndTopoCacheManager(prefix string, opt *redis.Options, concurrentLimit int) (*HostAndTopoCacheManager, error) { +func NewHostAndTopoCacheManager(prefix string, opt *redis.Options, concurrentLimit int) (*HostTopoCacheManager, error) { manager, err := NewBaseCacheManager(prefix, opt, concurrentLimit) if err != nil { return nil, errors.Wrap(err, "new cache Manager failed") } manager.initUpdatedFieldSet(hostCacheKey, hostAgentIDCacheKey, hostIPCacheKey, topoCacheKey) - return &HostAndTopoCacheManager{ + return &HostTopoCacheManager{ BaseCacheManager: manager, hostIpMap: make(map[string]map[string]struct{}), }, nil } // Type 缓存类型 -func (m *HostAndTopoCacheManager) Type() string { +func (m *HostTopoCacheManager) Type() string { return "host_topo" } // RefreshByBiz 按业务刷新缓存 -func (m *HostAndTopoCacheManager) RefreshByBiz(ctx context.Context, bkBizId int) error { +func (m *HostTopoCacheManager) RefreshByBiz(ctx context.Context, bkBizId int) error { logger.Infof("start refresh cmdb cache by biz: %d", bkBizId) startTime := time.Now() defer func() { @@ -337,9 +337,8 @@ func (m *HostAndTopoCacheManager) RefreshByBiz(ctx context.Context, bkBizId int) } // RefreshGlobal 刷新全局缓存 -func (m *HostAndTopoCacheManager) RefreshGlobal(ctx context.Context) error { +func (m *HostTopoCacheManager) RefreshGlobal(ctx context.Context) error { // 刷新主机IP映射缓存 - key := m.GetCacheKey(hostIPCacheKey) data := make(map[string]string) for ip, hostIdMapping := range m.hostIpMap { hostIds := make([]string, 0, len(hostIdMapping)) @@ -349,15 +348,15 @@ func (m *HostAndTopoCacheManager) RefreshGlobal(ctx context.Context) error { data[ip] = fmt.Sprintf("[%s]", strings.Join(hostIds, ",")) } - err := m.UpdateHashMapCache(ctx, key, data) + err := m.UpdateHashMapCache(ctx, hostIPCacheKey, data) if err != nil { return errors.Wrap(err, "update host ip cache failed") } // 刷新缓存过期时间 for _, key := range []string{hostCacheKey, topoCacheKey, hostAgentIDCacheKey, hostIPCacheKey} { - if err := m.RedisClient.Expire(ctx, m.GetCacheKey(key), m.Expire).Err(); err != nil { - logger.Error("set cache expire time failed, key: %s, err: %v", key, err) + if err := m.UpdateExpire(ctx, key); err != nil { + logger.Errorf("update expire failed, key: %s, err: %v", key, err) } } @@ -365,18 +364,18 @@ func (m *HostAndTopoCacheManager) RefreshGlobal(ctx context.Context) error { } // Reset 重置 -func (m *HostAndTopoCacheManager) Reset() { +func (m *HostTopoCacheManager) Reset() { m.BaseCacheManager.Reset() m.hostIpMap = make(map[string]map[string]struct{}) } // CleanGlobal 清理全局缓存 -func (m *HostAndTopoCacheManager) CleanGlobal(ctx context.Context) error { +func (m *HostTopoCacheManager) CleanGlobal(ctx context.Context) error { keys := []string{ - m.GetCacheKey(hostIPCacheKey), - m.GetCacheKey(hostCacheKey), - m.GetCacheKey(topoCacheKey), - m.GetCacheKey(hostAgentIDCacheKey), + hostIPCacheKey, + hostCacheKey, + topoCacheKey, + hostAgentIDCacheKey, } for _, key := range keys { @@ -389,9 +388,7 @@ func (m *HostAndTopoCacheManager) CleanGlobal(ctx context.Context) error { } // 刷新拓扑缓存 -func (m *HostAndTopoCacheManager) refreshTopoCache(ctx context.Context, bkBizId int, topo *cmdb.SearchBizInstTopoData) error { - key := m.GetCacheKey(topoCacheKey) - +func (m *HostTopoCacheManager) refreshTopoCache(ctx context.Context, bkBizId int, topo *cmdb.SearchBizInstTopoData) error { topoNodes := make(map[string]string) topo.Traverse(func(node *cmdb.SearchBizInstTopoData) { value, _ := json.Marshal(map[string]interface{}{ @@ -403,7 +400,7 @@ func (m *HostAndTopoCacheManager) refreshTopoCache(ctx context.Context, bkBizId topoNodes[node.GetId()] = string(value) }) - err := m.UpdateHashMapCache(ctx, key, topoNodes) + err := m.UpdateHashMapCache(ctx, topoCacheKey, topoNodes) if err != nil { return errors.Wrap(err, "update cmdb topo hashmap cache failed") } @@ -413,8 +410,7 @@ func (m *HostAndTopoCacheManager) refreshTopoCache(ctx context.Context, bkBizId } // 刷新主机信息缓存 -func (m *HostAndTopoCacheManager) refreshHostCache(ctx context.Context, bkBizId int, hosts []*AlarmHostInfo) error { - key := m.GetCacheKey(hostCacheKey) +func (m *HostTopoCacheManager) refreshHostCache(ctx context.Context, bkBizId int, hosts []*AlarmHostInfo) error { hostMapping := make(map[string]string) for _, host := range hosts { value, _ := json.Marshal(host) @@ -426,7 +422,7 @@ func (m *HostAndTopoCacheManager) refreshHostCache(ctx context.Context, bkBizId } } - err := m.UpdateHashMapCache(ctx, key, hostMapping) + err := m.UpdateHashMapCache(ctx, hostCacheKey, hostMapping) if err != nil { return errors.Wrap(err, "update cmdb host hashmap cache failed") } @@ -436,9 +432,7 @@ func (m *HostAndTopoCacheManager) refreshHostCache(ctx context.Context, bkBizId } // 刷新主机AgentID缓存 -func (m *HostAndTopoCacheManager) refreshHostAgentIDCache(ctx context.Context, bkBizId int, hosts []*AlarmHostInfo) error { - key := m.GetCacheKey(hostAgentIDCacheKey) - +func (m *HostTopoCacheManager) refreshHostAgentIDCache(ctx context.Context, bkBizId int, hosts []*AlarmHostInfo) error { agentIDs := make(map[string]string) for _, host := range hosts { if host.BkAgentId != "" { @@ -446,7 +440,7 @@ func (m *HostAndTopoCacheManager) refreshHostAgentIDCache(ctx context.Context, b } } - err := m.UpdateHashMapCache(ctx, key, agentIDs) + err := m.UpdateHashMapCache(ctx, hostAgentIDCacheKey, agentIDs) if err != nil { return errors.Wrap(err, "update hashmap cmdb host agent id cache failed") } @@ -574,7 +568,7 @@ func getHostAndTopoByBiz(ctx context.Context, bkBizID int) ([]*AlarmHostInfo, *c } // CleanByEvents 通过变更事件清理缓存 -func (m *HostAndTopoCacheManager) CleanByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { +func (m *HostTopoCacheManager) CleanByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { if len(events) == 0 { return nil } @@ -659,7 +653,7 @@ func (m *HostAndTopoCacheManager) CleanByEvents(ctx context.Context, resourceTyp } // UpdateByEvents 通过变更事件更新缓存 -func (m *HostAndTopoCacheManager) UpdateByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { +func (m *HostTopoCacheManager) UpdateByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { if len(events) == 0 { return nil } @@ -721,7 +715,6 @@ func (m *HostAndTopoCacheManager) UpdateByEvents(ctx context.Context, resourceTy } } case "mainline_instance": - key := m.GetCacheKey(topoCacheKey) topoNodes := make(map[string]string) for _, event := range events { bkObjId := event["bk_obj_id"].(string) @@ -735,9 +728,9 @@ func (m *HostAndTopoCacheManager) UpdateByEvents(ctx context.Context, resourceTy value, _ := json.Marshal(topo) topoNodes[fmt.Sprintf("%s|%d", bkObjId, int(bkInstId))] = string(value) } - err := m.UpdateHashMapCache(ctx, key, topoNodes) + err := m.UpdateHashMapCache(ctx, topoCacheKey, topoNodes) if err != nil { - return errors.Wrapf(err, "update hashmap cache failed, key: %s", key) + return errors.Wrapf(err, "update hashmap cache failed, key: %s", topoCacheKey) } case "host_relation": for _, event := range events { diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go index 21be68355..2443befaa 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go @@ -163,7 +163,7 @@ func (m *ModuleCacheManager) RefreshByBiz(ctx context.Context, bizID int) error // 更新模块缓存 if moduleCacheData != nil { - err = m.UpdateHashMapCache(ctx, m.GetCacheKey(moduleCacheKey), moduleCacheData) + err = m.UpdateHashMapCache(ctx, moduleCacheKey, moduleCacheData) if err != nil { return errors.Wrapf(err, "refresh module cache by biz: %d failed", bizID) } @@ -176,7 +176,7 @@ func (m *ModuleCacheManager) RefreshByBiz(ctx context.Context, bizID int) error for templateID, moduleIDs := range templateToModules { serviceTemplateCacheData[templateID] = fmt.Sprintf("[%s]", strings.Join(moduleIDs, ",")) } - err = m.UpdateHashMapCache(ctx, m.GetCacheKey(serviceTemplateCacheKey), serviceTemplateCacheData) + err = m.UpdateHashMapCache(ctx, serviceTemplateCacheKey, serviceTemplateCacheData) if err != nil { return errors.Wrapf(err, "refresh service_template cache by biz: %d failed", bizID) } @@ -188,29 +188,23 @@ func (m *ModuleCacheManager) RefreshByBiz(ctx context.Context, bizID int) error // RefreshGlobal 刷新全局模块缓存 func (m *ModuleCacheManager) RefreshGlobal(ctx context.Context) error { - result := m.RedisClient.Expire(ctx, m.GetCacheKey(moduleCacheKey), m.Expire) - if err := result.Err(); err != nil { - return errors.Wrap(err, "set module cache expire time failed") - } - - result = m.RedisClient.Expire(ctx, m.GetCacheKey(serviceTemplateCacheKey), m.Expire) - if err := result.Err(); err != nil { - return errors.Wrap(err, "set service_template cache expire time failed") + keys := []string{moduleCacheKey, serviceTemplateCacheKey} + for _, key := range keys { + if err := m.UpdateExpire(ctx, key); err != nil { + logger.Errorf("failed to update %s cache expire time: %v", key, err) + } } - return nil } // CleanGlobal 清理全局模块缓存 func (m *ModuleCacheManager) CleanGlobal(ctx context.Context) error { - key := m.GetCacheKey(moduleCacheKey) - err := m.DeleteMissingHashMapFields(ctx, key) + err := m.DeleteMissingHashMapFields(ctx, moduleCacheKey) if err != nil { return errors.Wrap(err, "failed to delete missing hashmap fields") } - key = m.GetCacheKey(serviceTemplateCacheKey) - err = m.DeleteMissingHashMapFields(ctx, key) + err = m.DeleteMissingHashMapFields(ctx, serviceTemplateCacheKey) if err != nil { return errors.Wrap(err, "failed to delete missing hashmap fields") } @@ -283,7 +277,7 @@ func (m *ModuleCacheManager) CleanByEvents(ctx context.Context, resourceType str // 更新服务模板关联的模块缓存 if len(serviceTemplateCacheData) > 0 { - err := m.UpdateHashMapCache(ctx, m.GetCacheKey(serviceTemplateCacheKey), serviceTemplateCacheData) + err := m.UpdateHashMapCache(ctx, serviceTemplateCacheKey, serviceTemplateCacheData) if err != nil { return errors.Wrap(err, "failed to update service_template hashmap cache") } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go index de54fdc02..b93509946 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go @@ -197,7 +197,6 @@ func (m *ServiceInstanceCacheManager) RefreshByBiz(ctx context.Context, bkBizId } // 刷新服务实例缓存 - key := m.GetCacheKey(serviceInstanceCacheKey) serviceInstanceMap := make(map[string]string) for _, instance := range serviceInstances { value, err := json.Marshal(instance) @@ -206,13 +205,12 @@ func (m *ServiceInstanceCacheManager) RefreshByBiz(ctx context.Context, bkBizId } serviceInstanceMap[strconv.Itoa(instance.ID)] = string(value) } - err = m.UpdateHashMapCache(ctx, key, serviceInstanceMap) + err = m.UpdateHashMapCache(ctx, serviceInstanceCacheKey, serviceInstanceMap) if err != nil { return errors.Wrap(err, "update hashmap cmdb service instance cache failed") } // 刷新主机到服务实例缓存 - key = m.GetCacheKey(hostToServiceInstanceCacheKey) hostToServiceInstances := make(map[string][]string) for _, instance := range serviceInstances { hostToServiceInstances[strconv.Itoa(instance.BkHostId)] = append(hostToServiceInstances[strconv.Itoa(instance.BkHostId)], strconv.Itoa(instance.ID)) @@ -221,7 +219,7 @@ func (m *ServiceInstanceCacheManager) RefreshByBiz(ctx context.Context, bkBizId for hostId, instances := range hostToServiceInstances { hostToServiceInstancesStr[hostId] = fmt.Sprintf("[%s]", strings.Join(instances, ",")) } - err = m.UpdateHashMapCache(ctx, key, hostToServiceInstancesStr) + err = m.UpdateHashMapCache(ctx, hostToServiceInstanceCacheKey, hostToServiceInstancesStr) if err != nil { return errors.Wrap(err, "update hashmap host to service instance cache failed") } @@ -231,13 +229,11 @@ func (m *ServiceInstanceCacheManager) RefreshByBiz(ctx context.Context, bkBizId // CleanGlobal 清理全局缓存 func (m *ServiceInstanceCacheManager) CleanGlobal(ctx context.Context) error { - key := m.GetCacheKey(serviceInstanceCacheKey) - if err := m.DeleteMissingHashMapFields(ctx, key); err != nil { + if err := m.DeleteMissingHashMapFields(ctx, serviceInstanceCacheKey); err != nil { return errors.Wrap(err, "delete missing fields failed") } - key = m.GetCacheKey(hostToServiceInstanceCacheKey) - if err := m.DeleteMissingHashMapFields(ctx, key); err != nil { + if err := m.DeleteMissingHashMapFields(ctx, hostToServiceInstanceCacheKey); err != nil { return errors.Wrap(err, "delete missing fields failed") } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go index f6d5dff28..9beeb6b2f 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go @@ -34,10 +34,11 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -146,8 +147,7 @@ func (m *SetCacheManager) RefreshByBiz(ctx context.Context, bizID int) error { // 更新集群缓存 if len(setCacheData) > 0 { - key := m.GetCacheKey(setCacheKey) - err = m.UpdateHashMapCache(ctx, key, setCacheData) + err = m.UpdateHashMapCache(ctx, setCacheKey, setCacheData) if err != nil { return errors.Wrapf(err, "refresh set cache by biz: %d failed", bizID) } @@ -156,12 +156,11 @@ func (m *SetCacheManager) RefreshByBiz(ctx context.Context, bizID int) error { // 更新服务模板关联的模块缓存 if len(templateToSets) > 0 { - key := m.GetCacheKey(setTemplateCacheKey) setTemplateCacheData := make(map[string]string) for templateID, setIDs := range templateToSets { setTemplateCacheData[templateID] = fmt.Sprintf("[%s]", strings.Join(setIDs, ",")) } - err = m.UpdateHashMapCache(ctx, key, setTemplateCacheData) + err = m.UpdateHashMapCache(ctx, setTemplateCacheKey, setTemplateCacheData) if err != nil { return errors.Wrapf(err, "refresh set template cache by biz: %d failed", bizID) } @@ -173,26 +172,23 @@ func (m *SetCacheManager) RefreshByBiz(ctx context.Context, bizID int) error { // RefreshGlobal 刷新全局模块缓存 func (m *SetCacheManager) RefreshGlobal(ctx context.Context) error { - result := m.RedisClient.Expire(ctx, m.GetCacheKey(setCacheKey), m.Expire) - if err := result.Err(); err != nil { - return errors.Wrap(err, "set module cache expire time failed") - } - - result = m.RedisClient.Expire(ctx, m.GetCacheKey(setTemplateCacheKey), m.Expire) - if err := result.Err(); err != nil { - return errors.Wrap(err, "set template module cache expire time failed") + keys := []string{setCacheKey, setTemplateCacheKey} + for _, key := range keys { + if err := m.UpdateExpire(ctx, key); err != nil { + logger.Errorf("expire hashmap failed, key: %s, err: %v", key, err) + } } return nil } // CleanGlobal 清理全局模块缓存 func (m *SetCacheManager) CleanGlobal(ctx context.Context) error { - err := m.DeleteMissingHashMapFields(ctx, m.GetCacheKey(setCacheKey)) + err := m.DeleteMissingHashMapFields(ctx, setCacheKey) if err != nil { return errors.Wrap(err, "failed to delete missing hashmap fields") } - err = m.DeleteMissingHashMapFields(ctx, m.GetCacheKey(setTemplateCacheKey)) + err = m.DeleteMissingHashMapFields(ctx, setTemplateCacheKey) if err != nil { return errors.Wrap(err, "failed to delete missing hashmap fields") } @@ -269,7 +265,7 @@ func (m *SetCacheManager) CleanByEvents(ctx context.Context, resourceType string // 更新集群模板关联的集群缓存 if len(setTemplateCacheData) > 0 { - err := m.UpdateHashMapCache(ctx, m.GetCacheKey(setTemplateCacheKey), setTemplateCacheData) + err := m.UpdateHashMapCache(ctx, setTemplateCacheKey, setTemplateCacheData) if err != nil { return errors.Wrap(err, "failed to update set template hashmap cache") } From 532762d48e92fb0fe9fc78c0b616420eceafb447 Mon Sep 17 00:00:00 2001 From: lai <11598235+unique0lai@users.noreply.github.com> Date: Sat, 31 Aug 2024 17:13:03 +0800 Subject: [PATCH 4/9] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E5=8A=A8?= =?UTF-8?q?=E6=80=81=E6=8B=93=E6=89=91=E5=8F=8A=E6=9C=8D=E5=8A=A1=E5=AE=9E?= =?UTF-8?q?=E4=BE=8B=E5=88=B7=E6=96=B0=E6=97=B6=E6=95=88=E6=80=A7=20#10101?= =?UTF-8?q?58081119404718?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../internal/alarm/cmdbcache/business.go | 5 +- .../internal/alarm/cmdbcache/event_process.go | 260 +++++++++---- .../alarm/cmdbcache/event_process_test.go | 353 +++++++++++++++++- .../internal/alarm/cmdbcache/event_watch.go | 11 +- .../internal/alarm/cmdbcache/host.go | 244 +----------- .../internal/alarm/cmdbcache/host_test.go | 84 +---- .../alarm/cmdbcache/service_instance.go | 8 +- 7 files changed, 564 insertions(+), 401 deletions(-) diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go index a3dea76e1..363ce03b9 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/business.go @@ -32,13 +32,12 @@ import ( "github.com/TencentBlueKing/bk-apigateway-sdks/core/define" "github.com/pkg/errors" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/metadata/models/space" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/store/mysql" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -153,7 +152,7 @@ func (m *BusinessCacheManager) Type() string { // UseBiz 是否按业务执行 func (m *BusinessCacheManager) useBiz() bool { - return true + return false } // RefreshGlobal 刷新全局缓存 diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go index 68b56e459..65d2b69f2 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go @@ -32,12 +32,14 @@ import ( "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) +const DefaultFullRefreshInterval = time.Second * 600 + // CmdbEventHandler cmdb资源变更事件处理器 type CmdbEventHandler struct { // 缓存key前缀 @@ -72,8 +74,6 @@ type CmdbEventHandler struct { refreshTopoNode sync.Map // 待删除拓扑节点 cleanTopoNode sync.Map - // 待刷新动态分组业务列表 - refreshBizDynamicGroup sync.Map // 待刷新集群业务列表 refreshBizSet sync.Map // 待清理集群相关key @@ -166,7 +166,6 @@ func (h *CmdbEventHandler) resetPreprocessResults() { h.cleanServiceInstanceKeys = sync.Map{} h.refreshTopoNode = sync.Map{} h.cleanTopoNode = sync.Map{} - h.refreshBizDynamicGroup = sync.Map{} h.refreshBizSet = sync.Map{} h.cleanSetKeys = sync.Map{} h.refreshBizModule = sync.Map{} @@ -235,6 +234,15 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm cloudId, _ := event.BkDetail["bk_cloud_id"].(float64) agentId, _ := event.BkDetail["bk_agent_id"].(string) + if event.BkEventType == "create" { + continue + } + + if event.BkEventType == "delete" { + // 如果是删除事件,将主机ID加入待清理列表 + h.cleanHostKeys.Store(strconv.Itoa(int(bkHostId)), struct{}{}) + } + // 尝试将主机关联字段加入待清理列表,如果刷新业务时发现这些字段不存在,将会进行清理 if ip != "" { h.cleanHostKeys.Store(fmt.Sprintf("%s|%d", ip, int(cloudId)), struct{}{}) @@ -243,11 +251,6 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm h.cleanAgentIdKeys.Store(agentId, struct{}{}) } - // 如果是删除事件,将主机ID加入待清理列表 - if event.BkEventType == "delete" { - h.cleanHostKeys.Store(strconv.Itoa(int(bkHostId)), struct{}{}) - } - // 将主机所属业务加入待刷新列表 if host != nil { h.refreshBizHostTopo.Store(host.BkBizId, struct{}{}) @@ -264,30 +267,25 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm continue } - // 如果拉不到主机信息,直接刷新业务并清理主机ID - if host == nil { - h.refreshBizHostTopo.Store(int(bkBizId), struct{}{}) + // 将主机所属业务加入待刷新列表 + h.refreshBizHostTopo.Store(int(bkBizId), struct{}{}) + + // 如果是删除事件或业务与主机业务不一致,将主机相关加入待清理列表 + if event.BkEventType == "delete" || (host != nil && host.BkBizId != int(bkBizId)) { + // 如果是删除事件,将主机相关加入待清理列表 h.cleanHostKeys.Store(strconv.Itoa(int(bkHostId)), struct{}{}) - continue - } - // 尝试将主机关联字段加入待清理列表,如果刷新业务时发现这些字段不存在,将会进行清理 - if host.BkAgentId != "" { - h.cleanAgentIdKeys.Store(host.BkAgentId, struct{}{}) - } - if host.BkHostInnerip != "" { - h.cleanHostKeys.Store(fmt.Sprintf("%s|%d", host.BkHostInnerip, host.BkCloudId), struct{}{}) - } + if host != nil { + if host.BkAgentId != "" { + h.cleanAgentIdKeys.Store(host.BkAgentId, struct{}{}) + } + if host.BkHostInnerip != "" { + h.cleanHostKeys.Store(fmt.Sprintf("%s|%d", host.BkHostInnerip, host.BkCloudId), struct{}{}) + } + // 如果是删除事件,将业务ID加入待刷新列表 + h.refreshBizHostTopo.Store(host.BkBizId, struct{}{}) + } - if event.BkEventType == "delete" || host.BkBizId != int(bkBizId) { - // 如果是删除事件,将主机ID加入待清理列表 - h.cleanHostKeys.Store(strconv.Itoa(int(bkHostId)), struct{}{}) - // 如果是删除事件,将业务ID加入待刷新列表 - h.refreshBizHostTopo.Store(host.BkBizId, struct{}{}) - h.refreshBizHostTopo.Store(int(bkBizId), struct{}{}) - } else { - // 如果是更新事件,将业务ID加入待刷新列表 - h.refreshBizHostTopo.Store(int(bkBizId), struct{}{}) } case CmdbResourceTypeMainlineInstance: bkObjId := event.BkDetail["bk_obj_id"].(string) @@ -295,7 +293,7 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm if !ok { continue } - topoNodeKey := fmt.Sprintf("%s.%d", bkObjId, int(bkInstId)) + topoNodeKey := fmt.Sprintf("%s|%d", bkObjId, int(bkInstId)) if event.BkEventType == "delete" { // 如果是删除事件,将拓扑节点ID加入待清理列表 h.cleanTopoNode.Store(topoNodeKey, struct{}{}) @@ -345,6 +343,9 @@ func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { logger.Errorf("refresh all business cache failed: %v", err) } }() + + // 重置 + businessCacheManager.Reset() } // 刷新主机拓扑业务列表 @@ -364,6 +365,8 @@ func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { // 刷新主机拓扑缓存 if err := hostTopoCacheManager.RefreshByBizIds(ctx, hostTopoBizIds, h.concurrentLimit); err != nil { logger.Errorf("refresh host topo cache by biz failed: %v", err) + // 如果刷新不顺利,后续清理操作也不执行,否则可能会清理掉正常的缓存 + return } // 清理hostCacheKey缓存 @@ -395,6 +398,25 @@ func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { if err := hostTopoCacheManager.CleanPartial(ctx, topoCacheKey, cleanFields); err != nil { logger.Errorf("clean topo cache partial failed: %v", err) } + + // todo: 清理hostIpCacheKey缓存 + + // 重置 + hostTopoCacheManager.Reset() + }() + + // 刷新动态分组业务列表 + wg.Add(1) + go func() { + defer wg.Done() + + dynamicGroupCacheManager := h.cacheManagers["dynamic_group"] + if err := dynamicGroupCacheManager.RefreshByBizIds(ctx, hostTopoBizIds, h.concurrentLimit); err != nil { + logger.Errorf("refresh dynamic group cache by biz failed: %v", err) + } + + // 重置 + dynamicGroupCacheManager.Reset() }() } @@ -405,11 +427,108 @@ func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { serviceInstanceBizIds = append(serviceInstanceBizIds, bizId) return true }) - if len(serviceInstanceBizIds) > 0 { + serviceInstanceCacheManager := h.cacheManagers["service_instance"] + + wg.Add(1) + go func() { + defer wg.Done() + // 刷新服务实例缓存 + if err := serviceInstanceCacheManager.RefreshByBizIds(ctx, serviceInstanceBizIds, h.concurrentLimit); err != nil { + logger.Errorf("refresh service instance cache by biz failed: %v", err) + } + + // 清理serviceInstanceCacheKey缓存 + cleanFields := make([]string, 0) + h.cleanServiceInstanceKeys.Range(func(key, value interface{}) bool { + cleanFields = append(cleanFields, strconv.Itoa(key.(int))) + return true + }) + if err := serviceInstanceCacheManager.CleanPartial(ctx, serviceInstanceCacheKey, cleanFields); err != nil { + logger.Errorf("clean service instance cache partial failed: %v", err) + } + + // todo: 清理hostToServiceInstanceCacheKey缓存 + + // 重置 + serviceInstanceCacheManager.Reset() + }() } + // 刷新集群业务列表 + setBizIds := make([]int, 0) + h.refreshBizSet.Range(func(key, value interface{}) bool { + bizId, _ := key.(int) + setBizIds = append(setBizIds, bizId) + return true + }) + if len(setBizIds) > 0 { + setCacheManager := h.cacheManagers["set"] + + wg.Add(1) + go func() { + defer wg.Done() + + // 刷新集群缓存 + if err := setCacheManager.RefreshByBizIds(ctx, setBizIds, h.concurrentLimit); err != nil { + logger.Errorf("refresh set cache by biz failed: %v", err) + } + + // 清理setCacheKey缓存 + cleanFields := make([]string, 0) + h.cleanSetKeys.Range(func(key, value interface{}) bool { + cleanFields = append(cleanFields, strconv.Itoa(key.(int))) + return true + }) + if err := setCacheManager.CleanPartial(ctx, setCacheKey, cleanFields); err != nil { + logger.Errorf("clean set cache partial failed: %v", err) + } + + // todo: 清理setTemplateCacheKey缓存 + + // 重置 + setCacheManager.Reset() + }() + } + + // 刷新模块业务列表 + moduleBizIds := make([]int, 0) + h.refreshBizModule.Range(func(key, value interface{}) bool { + bizId, _ := key.(int) + moduleBizIds = append(moduleBizIds, bizId) + return true + }) + if len(moduleBizIds) > 0 { + moduleCacheManager := h.cacheManagers["module"] + wg.Add(1) + go func() { + defer wg.Done() + + // 刷新模块缓存 + if err := moduleCacheManager.RefreshByBizIds(ctx, moduleBizIds, h.concurrentLimit); err != nil { + logger.Errorf("refresh module cache by biz failed: %v", err) + } + + // 清理moduleCacheKey缓存 + cleanFields := make([]string, 0) + h.cleanModuleKeys.Range(func(key, value interface{}) bool { + cleanFields = append(cleanFields, strconv.Itoa(key.(int))) + return true + }) + if err := moduleCacheManager.CleanPartial(ctx, moduleCacheKey, cleanFields); err != nil { + logger.Errorf("clean module cache partial failed: %v", err) + } + + // todo: 清理serviceTemplateCacheKey缓存 + + // 重置 + moduleCacheManager.Reset() + }() + } + + wg.Wait() + return nil } @@ -423,7 +542,7 @@ func (h *CmdbEventHandler) getFullRefreshInterval(cacheType string) time.Duratio fullRefreshInterval, ok := h.fullRefreshIntervals[cacheType] // 默认全量刷新间隔时间为10分钟 if !ok { - fullRefreshInterval = time.Second * 600 + fullRefreshInterval = DefaultFullRefreshInterval } // 最低全量刷新间隔时间为1分钟 @@ -459,14 +578,9 @@ func (h *CmdbEventHandler) ifRunRefreshAll(ctx context.Context, cacheType string return false } -// Run 处理cmdb资源变更事件 -// 1. 遍历所有缓存类型,如果超过全量刷新间隔时间,先执行全量刷新 -// 2. 从缓存中获取资源变更并进行预处理 -// 3. 根据预处理结果,执行缓存变更动作 -func (h *CmdbEventHandler) Run(ctx context.Context) { +// runRefreshAll 判断所有的缓存类型,如果超过全量刷新间隔时间,先执行全量刷新 +func (h *CmdbEventHandler) runRefreshAll(ctx context.Context) { wg := sync.WaitGroup{} - - // 如果超过全量刷新间隔时间,先执行全量刷新 for _, cacheManager := range h.cacheManagers { wg.Add(1) @@ -474,34 +588,50 @@ func (h *CmdbEventHandler) Run(ctx context.Context) { go func() { defer wg.Done() - if h.ifRunRefreshAll(ctx, cacheManager.Type(), time.Now().Unix()) { - // 全量刷新 - err := RefreshAll(ctx, cacheManager, cacheManager.GetConcurrentLimit()) - if err != nil { - logger.Errorf("refresh all cache failed: %v", err) - } + // 判断是否执行全量刷新 + if !h.ifRunRefreshAll(ctx, cacheManager.Type(), time.Now().Unix()) { + return + } - logger.Infof("refresh all cmdb resource(%s) cache", cacheManager.Type()) + // 全量刷新 + err := RefreshAll(ctx, cacheManager, cacheManager.GetConcurrentLimit()) + if err != nil { + logger.Errorf("refresh all cache failed: %v", err) + } - // 记录全量刷新时间 - _, err = h.redisClient.Set( - ctx, - h.getLastUpdateTimeKey(cacheManager.Type()), - strconv.FormatInt(time.Now().Unix(), 10), - 24*time.Hour, - ).Result() - if err != nil { - logger.Errorf("set last update time error: %v", err) - } + // 重置 + cacheManager.Reset() + + logger.Infof("refresh all cmdb resource(%s) cache", cacheManager.Type()) + + // 记录全量刷新时间 + _, err = h.redisClient.Set( + ctx, + h.getLastUpdateTimeKey(cacheManager.Type()), + strconv.FormatInt(time.Now().Unix(), 10), + 24*time.Hour, + ).Result() + if err != nil { + logger.Errorf("set last update time error: %v", err) } }() } wg.Wait() +} + +// Run 处理cmdb资源变更事件 +// 1. 遍历所有缓存类型,如果超过全量刷新间隔时间,先执行全量刷新 +// 2. 从缓存中获取资源变更并进行预处理 +// 3. 根据预处理结果,执行缓存变更动作 +func (h *CmdbEventHandler) Run(ctx context.Context) { + // 如果超过全量刷新间隔时间,先执行全量刷新 + h.runRefreshAll(ctx) // 重置预处理结果 h.resetPreprocessResults() // 从缓存中获取资源变更并进行预处理 + wg := sync.WaitGroup{} for _, resourceType := range CmdbResourceTypes { wg.Add(1) resourceType := resourceType @@ -536,11 +666,9 @@ func RefreshAll(ctx context.Context, cacheManager Manager, concurrentLimit int) // 判断是否启用业务缓存刷新 if cacheManager.useBiz() { // 获取业务列表 - cmdbApi := getCmdbApi() - var result cmdb.SearchBusinessResp - _, err := cmdbApi.SearchBusiness().SetResult(&result).Request() - if err = api.HandleApiResultError(result.ApiCommonRespMeta, err, "search business failed"); err != nil { - return err + businesses, err := getBusinessList(ctx) + if err != nil { + return errors.Wrap(err, "get business list failed") } // 并发控制 @@ -548,8 +676,8 @@ func RefreshAll(ctx context.Context, cacheManager Manager, concurrentLimit int) limitChan := make(chan struct{}, concurrentLimit) // 按业务刷新缓存 - errChan := make(chan error, len(result.Data.Info)) - for _, biz := range result.Data.Info { + errChan := make(chan error, len(businesses)) + for _, biz := range businesses { limitChan <- struct{}{} wg.Add(1) go func(bizId int) { @@ -561,7 +689,7 @@ func RefreshAll(ctx context.Context, cacheManager Manager, concurrentLimit int) if err != nil { errChan <- errors.Wrapf(err, "refresh %s cache by biz failed, biz: %d", cacheManager.Type(), bizId) } - }(biz.BkBizId) + }(int(biz["bk_biz_id"].(float64))) } // 等待所有任务完成 diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go index 24fc0f3f6..97599d2c1 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go @@ -24,12 +24,17 @@ package cmdbcache import ( "context" + "encoding/json" + "fmt" + "reflect" "testing" "time" + "github.com/agiledragon/gomonkey/v2" "github.com/stretchr/testify/assert" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" ) func TestGetEvents(t *testing.T) { @@ -59,7 +64,7 @@ func TestGetEvents(t *testing.T) { assert.EqualValues(t, handler.getFullRefreshInterval("set"), time.Minute) // 验证默认值10分钟 - assert.EqualValues(t, handler.getFullRefreshInterval("module"), 600*time.Second) + assert.EqualValues(t, handler.getFullRefreshInterval("module"), DefaultFullRefreshInterval) key := handler.getEventKey("host_relation") client.RPush(ctx, key, eventData...) @@ -99,8 +104,352 @@ func TestIfRunRefreshAll(t *testing.T) { now := time.Now() client.Set(ctx, handler.getLastUpdateTimeKey(cacheType), now.Add(-60*time.Second).Unix(), 0) - // 验证刷新时间间隔 + // 验证全量刷新时间间隔判断 assert.False(t, handler.ifRunRefreshAll(ctx, cacheType, now.Unix())) assert.False(t, handler.ifRunRefreshAll(ctx, cacheType, now.Add(time.Second).Unix())) assert.True(t, handler.ifRunRefreshAll(ctx, cacheType, now.Add(2*time.Second).Unix())) } + +func TestRefreshAll(t *testing.T) { + getBusinessListPatch := gomonkey.ApplyFunc(getBusinessList, func(ctx context.Context) ([]map[string]interface{}, error) { + return DemoBusinesses, nil + }) + defer getBusinessListPatch.Reset() + + rOpts := &redis.Options{ + Mode: "standalone", + Addrs: []string{testRedisAddr}, + } + + ctx := context.Background() + + t.Run("TestRefreshAllWithBiz", func(t *testing.T) { + cm, _ := NewSetCacheManager(t.Name(), rOpts, 1) + refreshByBizCount := 0 + patchRefreshByBiz := gomonkey.ApplyMethod(reflect.TypeOf(cm), "RefreshByBiz", func(cm *SetCacheManager, ctx context.Context, bizID int) error { + refreshByBizCount++ + return nil + }) + defer patchRefreshByBiz.Reset() + + refreshGlobalCount := 0 + patchRefreshGlobal := gomonkey.ApplyMethod(reflect.TypeOf(cm), "RefreshGlobal", func(cm *SetCacheManager, ctx context.Context) error { + refreshGlobalCount++ + return nil + }) + defer patchRefreshGlobal.Reset() + + cleanGlobalCount := 0 + patchCleanGlobal := gomonkey.ApplyMethod(reflect.TypeOf(cm), "CleanGlobal", func(cm *SetCacheManager, ctx context.Context) error { + cleanGlobalCount++ + return nil + }) + defer patchCleanGlobal.Reset() + + err := RefreshAll(ctx, cm, 1) + if err != nil { + t.Fatalf("RefreshAll failed, err: %v", err) + } + + assert.Equal(t, refreshByBizCount, 2) + assert.Equal(t, refreshGlobalCount, 1) + assert.Equal(t, cleanGlobalCount, 1) + }) + + t.Run("TestRefreshAllWithoutBiz", func(t *testing.T) { + cm, _ := NewBusinessCacheManager(t.Name(), rOpts, 1) + refreshByBizCount := 0 + patchRefreshByBiz := gomonkey.ApplyMethod(reflect.TypeOf(cm), "RefreshByBiz", func(cm *BusinessCacheManager, ctx context.Context, bizID int) error { + refreshByBizCount++ + return nil + }) + defer patchRefreshByBiz.Reset() + + refreshGlobalCount := 0 + patchRefreshGlobal := gomonkey.ApplyMethod(reflect.TypeOf(cm), "RefreshGlobal", func(cm *BusinessCacheManager, ctx context.Context) error { + refreshGlobalCount++ + return nil + }) + defer patchRefreshGlobal.Reset() + + cleanGlobalCount := 0 + patchCleanGlobal := gomonkey.ApplyMethod(reflect.TypeOf(cm), "CleanGlobal", func(cm *BusinessCacheManager, ctx context.Context) error { + cleanGlobalCount++ + return nil + }) + defer patchCleanGlobal.Reset() + + err := RefreshAll(ctx, cm, 1) + if err != nil { + t.Fatalf("RefreshAll failed, err: %v", err) + } + + assert.Equal(t, refreshByBizCount, 0) + assert.Equal(t, refreshGlobalCount, 1) + assert.Equal(t, cleanGlobalCount, 1) + }) +} + +func TestRunRefreshAll(t *testing.T) { + rOpts := &redis.Options{ + Mode: "standalone", + Addrs: []string{testRedisAddr}, + } + + client, _ := redis.GetClient(rOpts) + ctx := context.Background() + + handler, err := NewCmdbEventHandler(t.Name(), rOpts, map[string]time.Duration{}, 1) + if err != nil { + t.Fatalf("failed to create handler: %v", err) + } + + refreshAllCount := 0 + patchRefreshAll := gomonkey.ApplyFunc(RefreshAll, func(ctx context.Context, cacheManager Manager, concurrentLimit int) error { + refreshAllCount++ + return nil + }) + defer patchRefreshAll.Reset() + + now := time.Now() + handler.runRefreshAll(ctx) + + // 验证RefreshAll调用次数 + assert.Equal(t, refreshAllCount, len(handler.cacheManagers)) + + // 验证全量刷新时间戳 + for cacheType := range handler.cacheManagers { + lastUpdateTimeKey := handler.getLastUpdateTimeKey(cacheType) + lastUpdateTime, _ := client.Get(ctx, lastUpdateTimeKey).Int64() + t.Logf("lastUpdateTime: %d", lastUpdateTime) + assert.True(t, lastUpdateTime >= now.Unix()) + } +} + +func TestPreprocessEvent(t *testing.T) { + rOpts := &redis.Options{ + Mode: "standalone", + Addrs: []string{testRedisAddr}, + } + + ctx := context.Background() + + // 测试用例 + testCases := []struct { + name string + resourceType CmdbResourceType + events []string + checkFunc func(*testing.T, *CmdbEventHandler) + }{ + { + name: "BizCreate", + resourceType: CmdbResourceTypeBiz, + events: []string{ + `{"bk_cursor":"1","bk_resource":"biz","bk_event_type":"create","bk_detail":{"bk_biz_id":1}}`, + }, + checkFunc: func(t *testing.T, handler *CmdbEventHandler) { + assert.Equal(t, handler.refreshBiz, true) + }, + }, + { + name: "BizDelete", + resourceType: CmdbResourceTypeBiz, + events: []string{ + `{"bk_cursor":"1","bk_resource":"biz","bk_event_type":"delete","bk_detail":{"bk_biz_id":1}}`, + }, + checkFunc: func(t *testing.T, handler *CmdbEventHandler) { + assert.Equal(t, handler.refreshBiz, true) + }, + }, + { + name: "BizUpdate", + resourceType: CmdbResourceTypeBiz, + events: []string{ + `{"bk_cursor":"1","bk_resource":"biz","bk_event_type":"update","bk_detail":{"bk_biz_id":1}}`, + }, + checkFunc: func(t *testing.T, handler *CmdbEventHandler) { + assert.Equal(t, handler.refreshBiz, true) + }, + }, + { + name: "Set", + resourceType: CmdbResourceTypeSet, + events: []string{ + `{"bk_cursor":"1","bk_resource":"set","bk_event_type":"update","bk_detail":{"bk_biz_id":1,"bk_set_id":10}}`, + `{"bk_cursor":"2","bk_resource":"set","bk_event_type":"create","bk_detail":{"bk_biz_id":2,"bk_set_id":11}}`, + `{"bk_cursor":"3","bk_resource":"set","bk_event_type":"delete","bk_detail":{"bk_biz_id":2,"bk_set_id":12}}`, + }, + checkFunc: func(t *testing.T, handler *CmdbEventHandler) { + _, ok := handler.refreshBizSet.Load(1) + assert.True(t, ok) + + _, ok = handler.refreshBizSet.Load(2) + assert.True(t, ok) + + _, ok = handler.cleanSetKeys.Load(12) + assert.True(t, ok) + + _, ok = handler.cleanSetKeys.Load(10) + assert.False(t, ok) + + _, ok = handler.cleanSetKeys.Load(11) + assert.False(t, ok) + }, + }, + { + name: "Module", + resourceType: CmdbResourceTypeModule, + events: []string{ + `{"bk_cursor":"1","bk_resource":"module","bk_event_type":"update","bk_detail":{"bk_biz_id":1,"bk_module_id":10}}`, + `{"bk_cursor":"2","bk_resource":"module","bk_event_type":"create","bk_detail":{"bk_biz_id":2,"bk_module_id":11}}`, + `{"bk_cursor":"3","bk_resource":"module","bk_event_type":"delete","bk_detail":{"bk_biz_id":2,"bk_module_id":12}}`, + }, + checkFunc: func(t *testing.T, handler *CmdbEventHandler) { + _, ok := handler.refreshBizModule.Load(1) + assert.True(t, ok) + + _, ok = handler.refreshBizModule.Load(2) + assert.True(t, ok) + + _, ok = handler.cleanModuleKeys.Load(12) + assert.True(t, ok) + + _, ok = handler.cleanModuleKeys.Load(10) + assert.False(t, ok) + + _, ok = handler.cleanModuleKeys.Load(11) + assert.False(t, ok) + }, + }, + { + name: "Topo", + resourceType: CmdbResourceTypeMainlineInstance, + events: []string{ + `{"bk_cursor":"1","bk_resource":"mainline_instance","bk_event_type":"update","bk_detail":{"bk_obj_id":"set","bk_inst_id":1, "bk_obj_name":"集群", "bk_inst_name":"node1"}}`, + `{"bk_cursor":"2","bk_resource":"mainline_instance","bk_event_type":"create","bk_detail":{"bk_obj_id":"module","bk_inst_id":2, "bk_obj_name":"模块", "bk_inst_name":"node2"}}`, + `{"bk_cursor":"3","bk_resource":"mainline_instance","bk_event_type":"delete","bk_detail":{"bk_obj_id":"module","bk_inst_id":3, "bk_obj_name":"集群", "bk_inst_name":"node3"}}`, + }, + checkFunc: func(t *testing.T, handler *CmdbEventHandler) { + _, ok := handler.refreshTopoNode.Load("set|1") + assert.True(t, ok) + + _, ok = handler.refreshTopoNode.Load("module|2") + assert.True(t, ok) + + _, ok = handler.cleanTopoNode.Load("module|3") + assert.True(t, ok) + }, + }, + { + name: "Host", + resourceType: CmdbResourceTypeHost, + events: []string{ + `{"bk_cursor":"1","bk_resource":"host","bk_event_type":"delete","bk_detail":{"bk_host_id":1, "bk_host_innerip":"127.0.0.1", "bk_cloud_id":0, "bk_agent_id":"xxx1"}}`, + `{"bk_cursor":"2","bk_resource":"host","bk_event_type":"update","bk_detail":{"bk_host_id":2, "bk_host_innerip":"127.0.0.1", "bk_cloud_id":2, "bk_agent_id":"xxx2"}}`, + `{"bk_cursor":"2","bk_resource":"host","bk_event_type":"create","bk_detail":{"bk_host_id":3, "bk_host_innerip":"127.0.0.1", "bk_cloud_id":3, "bk_agent_id":""}}`, + }, + checkFunc: func(t *testing.T, handler *CmdbEventHandler) { + _, ok := handler.refreshBizHostTopo.Load(1) + assert.True(t, ok) + + _, ok = handler.refreshBizHostTopo.Load(2) + assert.True(t, ok) + + _, ok = handler.cleanHostKeys.Load("1") + assert.True(t, ok) + + _, ok = handler.cleanHostKeys.Load("127.0.0.1|0") + assert.True(t, ok) + + _, ok = handler.cleanHostKeys.Load("127.0.0.1|2") + assert.True(t, ok) + + _, ok = handler.cleanAgentIdKeys.Load("xxx1") + assert.True(t, ok) + + _, ok = handler.cleanAgentIdKeys.Load("xxx2") + assert.True(t, ok) + }, + }, + { + name: "HostRelation", + resourceType: CmdbResourceTypeHostRelation, + events: []string{ + `{"bk_cursor":"1","bk_resource":"host_relation","bk_event_type":"delete","bk_detail":{"bk_host_id":1, "bk_biz_id":1, "bk_module_id":1, "bk_set_id":1}}`, + `{"bk_cursor":"2","bk_resource":"host_relation","bk_event_type":"update","bk_detail":{"bk_host_id":2, "bk_biz_id":4, "bk_module_id":2, "bk_set_id":2}}`, + `{"bk_cursor":"3","bk_resource":"host_relation","bk_event_type":"create","bk_detail":{"bk_host_id":3, "bk_biz_id":3, "bk_module_id":3, "bk_set_id":3}}`, + }, + checkFunc: func(t *testing.T, handler *CmdbEventHandler) { + _, ok := handler.refreshBizHostTopo.Load(1) + assert.True(t, ok) + + _, ok = handler.refreshBizHostTopo.Load(2) + assert.True(t, ok) + + _, ok = handler.refreshBizHostTopo.Load(3) + assert.True(t, ok) + + _, ok = handler.refreshBizHostTopo.Load(4) + assert.True(t, ok) + + _, ok = handler.cleanHostKeys.Load("1") + assert.True(t, ok) + + _, ok = handler.cleanHostKeys.Load("2") + assert.True(t, ok) + + _, ok = handler.cleanHostKeys.Load("127.0.0.1|0") + assert.True(t, ok) + + _, ok = handler.cleanHostKeys.Load("127.0.0.1|2") + assert.True(t, ok) + + _, ok = handler.cleanHostKeys.Load("3") + assert.False(t, ok) + }, + }, + } + + redisClient, err := redis.GetClient(rOpts) + if err != nil { + t.Fatalf("failed to create redis client: %v", err) + } + prefix := t.Name() + + hostKey := fmt.Sprintf("%s.%s", t.Name(), hostCacheKey) + redisClient.HSet( + ctx, + hostKey, + "1", + `{"bk_host_id":1,"bk_host_innerip":"127.0.0.1","bk_cloud_id":0,"bk_agent_id":"xxx1", "bk_biz_id":1}`, + "2", + `{"bk_host_id":2,"bk_host_innerip":"127.0.0.1","bk_cloud_id":2,"bk_agent_id":"xxx3", "bk_biz_id":2}`, + ) + + // 执行测试用例 + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // 创建事件处理器 + handler, err := NewCmdbEventHandler(prefix, rOpts, map[string]time.Duration{}, 1) + if err != nil { + t.Fatalf("failed to create handler: %v", err) + } + + // 事件处理 + events := make([]cmdb.ResourceWatchEvent, len(tc.events)) + for i, event := range tc.events { + err := json.Unmarshal([]byte(event), &events[i]) + if err != nil { + t.Fatalf("failed to unmarshal event: %v", err) + } + } + err = handler.preprocessEvents(ctx, tc.resourceType, events) + if err != nil { + t.Fatalf("failed to preprocess events: %v", err) + } + + // 验证处理结果 + tc.checkFunc(t, handler) + }) + } +} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go index a3312adad..796bf418f 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go @@ -64,10 +64,13 @@ var CmdbResourceTypes = []CmdbResourceType{ // CmdbResourceTypeFields cmdb资源类型对应的监听字段 var CmdbResourceTypeFields = map[CmdbResourceType][]string{ - CmdbResourceTypeHost: {"bk_host_id", "bk_host_innerip", "bk_cloud_id", "bk_agent_id"}, - CmdbResourceTypeBiz: {"bk_biz_id"}, - CmdbResourceTypeSet: {"bk_biz_id", "bk_set_id", "set_template_id"}, - CmdbResourceTypeModule: {"bk_module_id", "bk_biz_id", "service_template_id"}, + CmdbResourceTypeHost: {"bk_host_id", "bk_host_innerip", "bk_cloud_id", "bk_agent_id"}, + CmdbResourceTypeHostRelation: {"bk_host_id", "bk_biz_id", "bk_module_id", "bk_set_id"}, + CmdbResourceTypeBiz: {"bk_biz_id"}, + CmdbResourceTypeSet: {"bk_biz_id", "bk_set_id", "set_template_id"}, + CmdbResourceTypeModule: {"bk_module_id", "bk_biz_id", "service_template_id"}, + CmdbResourceTypeMainlineInstance: {"bk_obj_id", "bk_inst_id", "bk_obj_name", "bk_inst_name"}, + CmdbResourceTypeProcess: {"bk_biz_id"}, } // CmdbResourceWatcher cmdb资源监听器 diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go index 41fbd32a8..2101a6b33 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go @@ -130,7 +130,7 @@ type AlarmHostInfo struct { const ( hostCacheKey = "cmdb.host" hostAgentIDCacheKey = "cmdb.agent_id" - hostIPCacheKey = "cmdb.host_ip" + hostIpCacheKey = "cmdb.host_ip" topoCacheKey = "cmdb.topo" ) @@ -254,7 +254,7 @@ func NewHostAndTopoCacheManager(prefix string, opt *redis.Options, concurrentLim return nil, errors.Wrap(err, "new cache Manager failed") } - manager.initUpdatedFieldSet(hostCacheKey, hostAgentIDCacheKey, hostIPCacheKey, topoCacheKey) + manager.initUpdatedFieldSet(hostCacheKey, hostAgentIDCacheKey, hostIpCacheKey, topoCacheKey) return &HostTopoCacheManager{ BaseCacheManager: manager, hostIpMap: make(map[string]map[string]struct{}), @@ -348,13 +348,13 @@ func (m *HostTopoCacheManager) RefreshGlobal(ctx context.Context) error { data[ip] = fmt.Sprintf("[%s]", strings.Join(hostIds, ",")) } - err := m.UpdateHashMapCache(ctx, hostIPCacheKey, data) + err := m.UpdateHashMapCache(ctx, hostIpCacheKey, data) if err != nil { return errors.Wrap(err, "update host ip cache failed") } // 刷新缓存过期时间 - for _, key := range []string{hostCacheKey, topoCacheKey, hostAgentIDCacheKey, hostIPCacheKey} { + for _, key := range []string{hostCacheKey, topoCacheKey, hostAgentIDCacheKey, hostIpCacheKey} { if err := m.UpdateExpire(ctx, key); err != nil { logger.Errorf("update expire failed, key: %s, err: %v", key, err) } @@ -372,7 +372,7 @@ func (m *HostTopoCacheManager) Reset() { // CleanGlobal 清理全局缓存 func (m *HostTopoCacheManager) CleanGlobal(ctx context.Context) error { keys := []string{ - hostIPCacheKey, + hostIpCacheKey, hostCacheKey, topoCacheKey, hostAgentIDCacheKey, @@ -566,237 +566,3 @@ func getHostAndTopoByBiz(ctx context.Context, bkBizID int) ([]*AlarmHostInfo, *c return hosts, &bizInstTopoResp.Data[0], nil } - -// CleanByEvents 通过变更事件清理缓存 -func (m *HostTopoCacheManager) CleanByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - if len(events) == 0 { - return nil - } - - client := m.RedisClient - switch resourceType { - case "host": - agentIds := make([]string, 0) - hostKeys := make([]string, 0) - - // 提取需要删除的缓存key - for _, event := range events { - agentId, ok := event["bk_agent_id"].(string) - if ok && agentId != "" { - agentIds = append(agentIds, agentId) - } - - hostId, ok := event["bk_host_id"].(float64) - if ok && hostId != 0 { - hostKeys = append(hostKeys, strconv.Itoa(int(hostId))) - } - - ip, ok := event["bk_host_innerip"].(string) - bkCloudId, ok := event["bk_cloud_id"].(float64) - if ok && ip != "" { - hostKeys = append(hostKeys, fmt.Sprintf("%s|%d", ip, int(bkCloudId))) - } - } - - // 删除缓存 - if len(agentIds) > 0 { - err := client.HDel(ctx, m.GetCacheKey(hostAgentIDCacheKey), agentIds...).Err() - if err != nil { - logger.Errorf("hdel failed, key: %s, err: %v", m.GetCacheKey(hostAgentIDCacheKey), err) - } - } - if len(hostKeys) > 0 { - // 清理 relationMetrics 里的缓存数据 - result := m.RedisClient.HMGet(ctx, m.GetCacheKey(hostCacheKey), hostKeys...) - clearNodes := make([]*AlarmHostInfo, 0) - for _, value := range result.Val() { - // 如果找不到对应的缓存,不需要更新 - if value == nil { - continue - } - - var host *AlarmHostInfo - err := json.Unmarshal([]byte(value.(string)), &host) - if err != nil { - continue - } - clearNodes = append(clearNodes, host) - } - GetRelationMetricsBuilder().ClearMetricsWithHostID(clearNodes...) - - // 记录需要更新的业务ID - err := client.HDel(ctx, m.GetCacheKey(hostCacheKey), hostKeys...).Err() - if err != nil { - logger.Errorf("hdel failed, key: %s, err: %v", m.GetCacheKey(hostCacheKey), err) - } - } - case "mainline_instance": - key := m.GetCacheKey(topoCacheKey) - topoIds := make([]string, 0) - for _, event := range events { - bkObjId := event["bk_obj_id"].(string) - bkInstId, ok := event["bk_inst_id"].(float64) - if !ok { - continue - } - topoIds = append(topoIds, fmt.Sprintf("%s|%d", bkObjId, int(bkInstId))) - } - if len(topoIds) == 0 { - return nil - } - err := client.HDel(ctx, key, topoIds...).Err() - if err != nil { - return errors.Wrap(err, "hdel failed") - } - } - return nil -} - -// UpdateByEvents 通过变更事件更新缓存 -func (m *HostTopoCacheManager) UpdateByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - if len(events) == 0 { - return nil - } - - needCleanAgentIds := make(map[string]struct{}) - needCleanHostKeys := make(map[string]struct{}) - needUpdateBizIds := make(map[int]struct{}) - switch resourceType { - case "host": - key := m.GetCacheKey(hostCacheKey) - // 提取需要更新的缓存key - for _, event := range events { - cacheKeys := make([]string, 0) - - ip, ok := event["bk_host_innerip"].(string) - bkCloudId, ok := event["bk_cloud_id"].(float64) - hostKey := "" - - if ok && ip != "" { - hostKey = fmt.Sprintf("%s|%d", ip, int(bkCloudId)) - cacheKeys = append(cacheKeys, hostKey) - } - - bkHostId, ok := event["bk_host_id"].(float64) - if ok && bkHostId > 0 { - cacheKeys = append(cacheKeys, strconv.Itoa(int(bkHostId))) - } - - result := m.RedisClient.HMGet(ctx, key, cacheKeys...) - if result.Err() != nil { - return errors.Wrapf(result.Err(), "hmget failed, key: %s", key) - } - - agentId, ok := event["bk_agent_id"].(string) - - for _, value := range result.Val() { - if value == nil { - continue - } - var host *AlarmHostInfo - err := json.Unmarshal([]byte(value.(string)), &host) - if err != nil { - continue - } - needUpdateBizIds[host.BkBizId] = struct{}{} - - // 如果有agentId变更,需要清理agentId缓存 - if ok && agentId != host.BkAgentId && host.BkAgentId != "" { - needCleanAgentIds[host.BkAgentId] = struct{}{} - } - - // 如果有ip变更,需要清理ip缓存 - if host.BkHostInnerip != "" { - oldHostKey := fmt.Sprintf("%s|%d", host.BkHostInnerip, host.BkCloudId) - if hostKey != oldHostKey { - needCleanHostKeys[oldHostKey] = struct{}{} - } - } - } - } - case "mainline_instance": - topoNodes := make(map[string]string) - for _, event := range events { - bkObjId := event["bk_obj_id"].(string) - bkInstId := event["bk_inst_id"].(float64) - topo := map[string]interface{}{ - "bk_inst_id": int(bkInstId), - "bk_inst_name": event["bk_inst_name"], - "bk_obj_id": bkObjId, - "bk_obj_name": event["bk_obj_name"], - } - value, _ := json.Marshal(topo) - topoNodes[fmt.Sprintf("%s|%d", bkObjId, int(bkInstId))] = string(value) - } - err := m.UpdateHashMapCache(ctx, topoCacheKey, topoNodes) - if err != nil { - return errors.Wrapf(err, "update hashmap cache failed, key: %s", topoCacheKey) - } - case "host_relation": - for _, event := range events { - bkBizID, ok := event["bk_biz_id"].(float64) - if !ok { - continue - } - needUpdateBizIds[int(bkBizID)] = struct{}{} - } - } - - // 记录需要更新的业务ID - needUpdateBizIdSlice := make([]string, 0, len(needUpdateBizIds)) - for bizID := range needUpdateBizIds { - needUpdateBizIdSlice = append(needUpdateBizIdSlice, strconv.Itoa(bizID)) - } - logger.Infof("need update host cache biz ids: %v", strings.Join(needUpdateBizIdSlice, ",")) - - // 按业务刷新缓存 - wg := sync.WaitGroup{} - limitChan := make(chan struct{}, m.ConcurrentLimit) - for bizID := range needUpdateBizIds { - wg.Add(1) - limitChan <- struct{}{} - - go func(bizId int) { - defer func() { - <-limitChan - wg.Done() - }() - err := m.RefreshByBiz(ctx, bizId) - if err != nil { - logger.Errorf("failed to refresh host cache by biz: %d, err: %v", bizId, err) - } - }(bizID) - } - wg.Wait() - - // 清理agentId缓存 - if len(needCleanAgentIds) > 0 { - key := m.GetCacheKey(hostAgentIDCacheKey) - agentIds := make([]string, 0, len(needCleanAgentIds)) - for agentId := range needCleanAgentIds { - agentIds = append(agentIds, agentId) - } - - logger.Infof("clean agent id cache, agent ids: %v", agentIds) - err := m.RedisClient.HDel(ctx, key, agentIds...).Err() - if err != nil { - logger.Errorf("hdel failed, key: %s, err: %v", key, err) - } - } - - // 清理ip缓存 - if len(needCleanHostKeys) > 0 { - key := m.GetCacheKey(hostCacheKey) - hostKeys := make([]string, 0, len(needCleanHostKeys)) - for hostKey := range needCleanHostKeys { - hostKeys = append(hostKeys, hostKey) - } - - logger.Infof("clean host cache, host keys: %v", hostKeys) - err := m.RedisClient.HDel(ctx, key, hostKeys...).Err() - if err != nil { - logger.Errorf("hdel failed, key: %s, err: %v", key, err) - } - } - return nil -} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host_test.go index 087e65412..050d346fe 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host_test.go @@ -24,7 +24,6 @@ package cmdbcache import ( "context" - "encoding/json" "fmt" "sort" "strconv" @@ -211,82 +210,7 @@ func TestHostAndTopoCacheManager(t *testing.T) { t.Error(err) return } - assert.EqualValues(t, expectedHostIpKeys, client.HKeys(ctx, cacheManager.GetCacheKey(hostIPCacheKey)).Val()) - - // 生成变更事件数据 - allResult := client.HGetAll(ctx, cacheManager.GetCacheKey(hostCacheKey)) - if allResult.Err() != nil { - t.Error(allResult.Err()) - return - } - events := make([]map[string]interface{}, 0, len(allResult.Val())) - for _, v := range allResult.Val() { - var host *AlarmHostInfo - err := json.Unmarshal([]byte(v), &host) - if err != nil { - t.Error(err) - return - } - events = append(events, map[string]interface{}{ - "bk_host_id": float64(host.BkHostId), - "bk_host_innerip": host.BkHostInnerip, - "bk_cloud_id": float64(host.BkCloudId), - // 测试agent_id变化后是否会被删除 - "bk_agent_id": fmt.Sprintf("%s-change", host.BkAgentId), - }) - } - - fmt.Printf(client.HGet(ctx, cacheManager.GetCacheKey(hostCacheKey), "1").Val()) - - // 基于事件更新缓存 - err = cacheManager.UpdateByEvents(ctx, "host", events) - if err != nil { - t.Error(err) - return - } - - // 判断agent_id是否被删除 - for _, event := range events { - agentID := event["bk_agent_id"].(string) - oldAgentID := agentID[:len(agentID)-7] - host := client.HGet(ctx, cacheManager.GetCacheKey(hostCacheKey), strconv.Itoa(int(event["bk_host_id"].(float64)))).Val() - assert.NotEmpty(t, host) - assert.False(t, client.HExists(ctx, cacheManager.GetCacheKey(hostAgentIDCacheKey), oldAgentID).Val()) - } - - // 基于事件清理缓存 - err = cacheManager.CleanByEvents(ctx, "host", events) - if err != nil { - t.Error(err) - return - } - - topoEvent := map[string]interface{}{ - "bk_obj_id": "module", - "bk_inst_id": float64(6), - "bk_inst_name": "测试模块", - "bk_obj_name": "模块", - } - - err = cacheManager.CleanByEvents(ctx, "mainline_instance", []map[string]interface{}{topoEvent}) - if err != nil { - t.Error(err) - return - } - - assert.False(t, client.HExists(ctx, cacheManager.GetCacheKey(topoCacheKey), "module|6").Val()) - - err = cacheManager.UpdateByEvents(ctx, "mainline_instance", []map[string]interface{}{topoEvent}) - if err != nil { - t.Error(err) - return - } - - assert.True(t, client.HExists(ctx, cacheManager.GetCacheKey(topoCacheKey), "module|6").Val()) - - // 判断清理后是否为空 - assert.Empty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostAgentIDCacheKey)).Val()) - assert.Empty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostCacheKey)).Val()) + assert.EqualValues(t, expectedHostIpKeys, client.HKeys(ctx, cacheManager.GetCacheKey(hostIpCacheKey)).Val()) }) t.Run("Clean", func(t *testing.T) { @@ -313,11 +237,11 @@ func TestHostAndTopoCacheManager(t *testing.T) { // 判断是否存在所有的缓存键 assert.NotEmpty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostAgentIDCacheKey)).Val()) assert.NotEmpty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostCacheKey)).Val()) - assert.NotEmpty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostIPCacheKey)).Val()) + assert.NotEmpty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostIpCacheKey)).Val()) assert.NotEmpty(t, client.HKeys(ctx, cacheManager.GetCacheKey(topoCacheKey)).Val()) // 清理缓存 - cacheManager.initUpdatedFieldSet(hostAgentIDCacheKey, hostCacheKey, hostIPCacheKey, topoCacheKey) + cacheManager.initUpdatedFieldSet(hostAgentIDCacheKey, hostCacheKey, hostIpCacheKey, topoCacheKey) err = cacheManager.CleanGlobal(ctx) if err != nil { t.Error(err) @@ -327,7 +251,7 @@ func TestHostAndTopoCacheManager(t *testing.T) { // 判断清理后是否为空 assert.Empty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostAgentIDCacheKey)).Val()) assert.Empty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostCacheKey)).Val()) - assert.Empty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostIPCacheKey)).Val()) + assert.Empty(t, client.HKeys(ctx, cacheManager.GetCacheKey(hostIpCacheKey)).Val()) assert.Empty(t, client.HKeys(ctx, cacheManager.GetCacheKey(topoCacheKey)).Val()) }) } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go index b93509946..d65182f37 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go @@ -34,11 +34,10 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -86,11 +85,6 @@ func (m *ServiceInstanceCacheManager) Type() string { return "service_instance" } -// UseBiz 是否按业务执行 -func (m *ServiceInstanceCacheManager) useBiz() bool { - return true -} - // getServiceInstances 获取服务实例列表 func getServiceInstances(ctx context.Context, bkBizId int) ([]*AlarmServiceInstanceInfo, error) { cmdbApi := getCmdbApi() From f1b1573313856f95c7b1a916aa3e57394bf45e54 Mon Sep 17 00:00:00 2001 From: lai <11598235+unique0lai@users.noreply.github.com> Date: Sat, 31 Aug 2024 19:22:02 +0800 Subject: [PATCH 5/9] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E5=8A=A8?= =?UTF-8?q?=E6=80=81=E6=8B=93=E6=89=91=E5=8F=8A=E6=9C=8D=E5=8A=A1=E5=AE=9E?= =?UTF-8?q?=E4=BE=8B=E5=88=B7=E6=96=B0=E6=97=B6=E6=95=88=E6=80=A7=20#10101?= =?UTF-8?q?58081119404718?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../internal/alarm/cmdbcache/event_process.go | 63 ++++++++-- .../alarm/cmdbcache/event_process_test.go | 56 ++++++++- .../internal/alarm/cmdbcache/module.go | 119 +----------------- .../internal/alarm/cmdbcache/module_test.go | 47 ------- .../alarm/cmdbcache/service_instance.go | 55 -------- .../internal/alarm/cmdbcache/set.go | 117 ----------------- .../internal/alarm/cmdbcache/set_test.go | 47 ------- 7 files changed, 105 insertions(+), 399 deletions(-) diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go index 65d2b69f2..b6b40dc02 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go @@ -32,10 +32,9 @@ import ( "github.com/pkg/errors" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const DefaultFullRefreshInterval = time.Second * 600 @@ -66,22 +65,30 @@ type CmdbEventHandler struct { cleanHostKeys sync.Map // 待清理AgentId相关key cleanAgentIdKeys sync.Map - // 待刷新服务实例业务列表 - refreshBizServiceInstance sync.Map - // 待清理服务实例相关key - cleanServiceInstanceKeys sync.Map + // 待更新拓扑节点 refreshTopoNode sync.Map // 待删除拓扑节点 cleanTopoNode sync.Map + + // 待刷新服务实例业务列表 + refreshBizServiceInstance sync.Map + // 待清理服务实例相关key + cleanServiceInstanceKeys sync.Map + // 待刷新集群业务列表 refreshBizSet sync.Map // 待清理集群相关key cleanSetKeys sync.Map + // 待清理集群模板相关key + cleanSetTemplateIds sync.Map + // 待刷新模块业务列表 refreshBizModule sync.Map // 待清理模块相关key cleanModuleKeys sync.Map + // 待清理服务模板相关key + cleanServiceTemplateIds sync.Map } // NewCmdbEventHandler 创建cmdb资源变更事件处理器 @@ -211,11 +218,17 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm if !ok1 || !ok2 { continue } + + // 将业务ID加入待刷新列表 h.refreshBizSet.Store(int(bizId), struct{}{}) // 如果是删除事件,将集群ID加入待清理列表 if event.BkEventType == "delete" { h.cleanSetKeys.Store(int(bkSetId), struct{}{}) + setTemplateId, _ := event.BkDetail["set_template_id"].(float64) + if int(setTemplateId) != 0 { + h.cleanSetTemplateIds.Store(int(setTemplateId), struct{}{}) + } } case CmdbResourceTypeModule: bizId, ok1 := event.BkDetail["bk_biz_id"].(float64) @@ -223,11 +236,17 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm if !ok1 || !ok2 { continue } + + // 将业务ID加入待刷新列表 h.refreshBizModule.Store(int(bizId), struct{}{}) // 如果是删除事件,将模块ID加入待清理列表 if event.BkEventType == "delete" { h.cleanModuleKeys.Store(int(bkModuleId), struct{}{}) + serviceInstanceTemplateId, _ := event.BkDetail["service_template_id"].(float64) + if int(serviceInstanceTemplateId) != 0 { + h.cleanServiceTemplateIds.Store(int(serviceInstanceTemplateId), struct{}{}) + } } case CmdbResourceTypeHost: ip, _ := event.BkDetail["bk_host_innerip"].(string) @@ -328,7 +347,7 @@ func (h *CmdbEventHandler) preprocessEvents(ctx context.Context, resourceType Cm } // refreshEvents 刷新资源变更事件 -func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { +func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { wg := sync.WaitGroup{} // 刷新业务列表 @@ -427,6 +446,14 @@ func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { serviceInstanceBizIds = append(serviceInstanceBizIds, bizId) return true }) + h.refreshBizHostTopo.Range(func(key, value interface{}) bool { + bizId, _ := key.(int) + _, ok := h.refreshBizServiceInstance.Load(bizId) + if !ok { + serviceInstanceBizIds = append(serviceInstanceBizIds, bizId) + } + return true + }) if len(serviceInstanceBizIds) > 0 { serviceInstanceCacheManager := h.cacheManagers["service_instance"] @@ -485,7 +512,15 @@ func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { logger.Errorf("clean set cache partial failed: %v", err) } - // todo: 清理setTemplateCacheKey缓存 + // 清理setTemplateCacheKey缓存 + cleanFields = make([]string, 0) + h.cleanSetTemplateIds.Range(func(key, value interface{}) bool { + cleanFields = append(cleanFields, strconv.Itoa(key.(int))) + return true + }) + if err := setCacheManager.CleanPartial(ctx, setTemplateCacheKey, cleanFields); err != nil { + logger.Errorf("clean set template cache partial failed: %v", err) + } // 重置 setCacheManager.Reset() @@ -520,7 +555,15 @@ func (h *CmdbEventHandler) refreshEvents(ctx context.Context) error { logger.Errorf("clean module cache partial failed: %v", err) } - // todo: 清理serviceTemplateCacheKey缓存 + // 清理serviceTemplateCacheKey缓存 + cleanFields = make([]string, 0) + h.cleanServiceTemplateIds.Range(func(key, value interface{}) bool { + cleanFields = append(cleanFields, strconv.Itoa(key.(int))) + return true + }) + if err := moduleCacheManager.CleanPartial(ctx, serviceTemplateCacheKey, cleanFields); err != nil { + logger.Errorf("clean service template cache partial failed: %v", err) + } // 重置 moduleCacheManager.Reset() @@ -655,7 +698,7 @@ func (h *CmdbEventHandler) Run(ctx context.Context) { wg.Wait() // 根据预处理结果,执行缓存变更动作 - err := h.refreshEvents(ctx) + err := h.refreshByEvents(ctx) if err != nil { logger.Errorf("refresh cmdb resource event error: %v", err) } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go index 97599d2c1..0262c5084 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process_test.go @@ -275,9 +275,9 @@ func TestPreprocessEvent(t *testing.T) { name: "Set", resourceType: CmdbResourceTypeSet, events: []string{ - `{"bk_cursor":"1","bk_resource":"set","bk_event_type":"update","bk_detail":{"bk_biz_id":1,"bk_set_id":10}}`, - `{"bk_cursor":"2","bk_resource":"set","bk_event_type":"create","bk_detail":{"bk_biz_id":2,"bk_set_id":11}}`, - `{"bk_cursor":"3","bk_resource":"set","bk_event_type":"delete","bk_detail":{"bk_biz_id":2,"bk_set_id":12}}`, + `{"bk_cursor":"1","bk_resource":"set","bk_event_type":"update","bk_detail":{"bk_biz_id":1,"bk_set_id":10, "set_template_id":1}}`, + `{"bk_cursor":"2","bk_resource":"set","bk_event_type":"create","bk_detail":{"bk_biz_id":2,"bk_set_id":11, "set_template_id":2}}`, + `{"bk_cursor":"3","bk_resource":"set","bk_event_type":"delete","bk_detail":{"bk_biz_id":2,"bk_set_id":12, "set_template_id":3}}`, }, checkFunc: func(t *testing.T, handler *CmdbEventHandler) { _, ok := handler.refreshBizSet.Load(1) @@ -294,15 +294,18 @@ func TestPreprocessEvent(t *testing.T) { _, ok = handler.cleanSetKeys.Load(11) assert.False(t, ok) + + _, ok = handler.cleanSetTemplateIds.Load(3) + assert.True(t, ok) }, }, { name: "Module", resourceType: CmdbResourceTypeModule, events: []string{ - `{"bk_cursor":"1","bk_resource":"module","bk_event_type":"update","bk_detail":{"bk_biz_id":1,"bk_module_id":10}}`, - `{"bk_cursor":"2","bk_resource":"module","bk_event_type":"create","bk_detail":{"bk_biz_id":2,"bk_module_id":11}}`, - `{"bk_cursor":"3","bk_resource":"module","bk_event_type":"delete","bk_detail":{"bk_biz_id":2,"bk_module_id":12}}`, + `{"bk_cursor":"1","bk_resource":"module","bk_event_type":"update","bk_detail":{"bk_biz_id":1,"bk_module_id":10, "service_template_id":1}}`, + `{"bk_cursor":"2","bk_resource":"module","bk_event_type":"create","bk_detail":{"bk_biz_id":2,"bk_module_id":11, "service_template_id":2}}`, + `{"bk_cursor":"3","bk_resource":"module","bk_event_type":"delete","bk_detail":{"bk_biz_id":2,"bk_module_id":12, "service_template_id":3}}`, }, checkFunc: func(t *testing.T, handler *CmdbEventHandler) { _, ok := handler.refreshBizModule.Load(1) @@ -319,6 +322,9 @@ func TestPreprocessEvent(t *testing.T) { _, ok = handler.cleanModuleKeys.Load(11) assert.False(t, ok) + + _, ok = handler.cleanServiceTemplateIds.Load(3) + assert.True(t, ok) }, }, { @@ -453,3 +459,41 @@ func TestPreprocessEvent(t *testing.T) { }) } } + +func TestRefreshByEvents(t *testing.T) { + rOpts := &redis.Options{ + Mode: "standalone", + Addrs: []string{testRedisAddr}, + } + + ctx := context.Background() + + handler, err := NewCmdbEventHandler(t.Name(), rOpts, map[string]time.Duration{}, 1) + if err != nil { + t.Fatalf("failed to create handler: %v", err) + } + + t.Run("Biz", func(t *testing.T) { + refreshAllCount := 0 + patchRefreshAll := gomonkey.ApplyFunc(RefreshAll, func(ctx context.Context, cacheManager Manager, concurrentLimit int) error { + refreshAllCount++ + return nil + }) + defer patchRefreshAll.Reset() + + handler.refreshBiz = true + + err := handler.refreshByEvents(ctx) + if err != nil { + t.Fatalf("failed to refresh by events: %v", err) + } + + assert.Equal(t, refreshAllCount, 1) + }) + + t.Run("Host", func(t *testing.T) { + handler.refreshBizHostTopo.Store(1, struct{}{}) + handler.cleanHostKeys.Store("1", struct{}{}) + handler.cleanAgentIdKeys.Store("xxx1", struct{}{}) + }) +} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go index 2443befaa..f41b8fdd1 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go @@ -28,16 +28,16 @@ import ( "fmt" "strconv" "strings" - "sync" "github.com/TencentBlueKing/bk-apigateway-sdks/core/define" "github.com/mitchellh/mapstructure" "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -210,118 +210,3 @@ func (m *ModuleCacheManager) CleanGlobal(ctx context.Context) error { } return nil } - -// CleanByEvents 根据事件清理缓存 -func (m *ModuleCacheManager) CleanByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - // 只处理模块事件 - if resourceType != "module" || len(events) == 0 { - return nil - } - - // 提取模块ID及服务模板ID - needDeleteModuleIds := make(map[int]struct{}) - needUpdateServiceTemplateIds := make(map[string]struct{}) - for _, event := range events { - moduleID, ok := event["bk_module_id"].(float64) - if !ok { - continue - } - // 记录需要删除的模块ID - needDeleteModuleIds[int(moduleID)] = struct{}{} - - // 记录各个服务模板下需要删除的模块ID - if serviceTemplateID, ok := event["service_template_id"].(float64); ok && serviceTemplateID > 0 { - needUpdateServiceTemplateIds[strconv.Itoa(int(serviceTemplateID))] = struct{}{} - } - } - - // 删除服务模板关联的模块缓存 - serviceTemplateCacheData := make(map[string]string) - needDeleteServiceTemplateIds := make([]string, 0) - for serviceTemplateID := range needUpdateServiceTemplateIds { - // 查询存量缓存 - result := m.RedisClient.HGet(ctx, m.GetCacheKey(serviceTemplateCacheKey), serviceTemplateID) - if result.Err() != nil { - continue - } - var oldModuleIDs []int - err := json.Unmarshal([]byte(result.Val()), &oldModuleIDs) - if err != nil { - continue - } - - // 清理需要删除的模块ID - var newModuleIDs []string - for _, moduleID := range oldModuleIDs { - if _, ok := needDeleteModuleIds[moduleID]; !ok { - newModuleIDs = append(newModuleIDs, strconv.Itoa(moduleID)) - } - } - - // 如果删除后,服务模板下没有模块,则需要清理服务模板缓存,否则更新缓存 - if len(newModuleIDs) > 0 { - serviceTemplateCacheData[serviceTemplateID] = fmt.Sprintf("[%s]", strings.Join(newModuleIDs, ",")) - } else { - needDeleteServiceTemplateIds = append(needDeleteServiceTemplateIds, serviceTemplateID) - } - } - - // 删除模块缓存 - if len(needDeleteModuleIds) > 0 { - moduleIds := make([]string, 0, len(needDeleteModuleIds)) - for moduleID := range needDeleteModuleIds { - moduleIds = append(moduleIds, strconv.Itoa(moduleID)) - } - m.RedisClient.HDel(ctx, m.GetCacheKey(moduleCacheKey), moduleIds...) - } - - // 更新服务模板关联的模块缓存 - if len(serviceTemplateCacheData) > 0 { - err := m.UpdateHashMapCache(ctx, serviceTemplateCacheKey, serviceTemplateCacheData) - if err != nil { - return errors.Wrap(err, "failed to update service_template hashmap cache") - } - } - - // 清理服务模板关联的模块缓存 - if len(needDeleteServiceTemplateIds) > 0 { - m.RedisClient.HDel(ctx, m.GetCacheKey(serviceTemplateCacheKey), needDeleteServiceTemplateIds...) - } - - return nil -} - -// UpdateByEvents 根据事件更新缓存 -func (m *ModuleCacheManager) UpdateByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - if resourceType != "module" || len(events) == 0 { - return nil - } - - // 提取业务ID - needUpdateBizIds := make(map[int]struct{}) - for _, event := range events { - if bizID, ok := event["bk_biz_id"].(float64); ok { - needUpdateBizIds[int(bizID)] = struct{}{} - } - } - - // 按业务更新缓存 - wg := sync.WaitGroup{} - limitChan := make(chan struct{}, m.ConcurrentLimit) - for bizID := range needUpdateBizIds { - wg.Add(1) - limitChan <- struct{}{} - go func(bizID int) { - defer func() { - <-limitChan - wg.Done() - }() - err := m.RefreshByBiz(ctx, bizID) - if err != nil { - logger.Errorf("failed to refresh module cache by biz: %d, err: %v", bizID, err) - } - }(bizID) - } - wg.Wait() - return nil -} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module_test.go index dc057e552..2a32eae4b 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module_test.go @@ -113,51 +113,4 @@ func TestModuleCacheManager(t *testing.T) { assert.EqualValues(t, 0, client.HLen(ctx, cacheManager.GetCacheKey(moduleCacheKey)).Val()) assert.EqualValues(t, 0, client.HLen(ctx, cacheManager.GetCacheKey(serviceTemplateCacheKey)).Val()) }) - - t.Run("TestModuleCacheManager_Events", func(t *testing.T) { - cacheManager, err := NewModuleCacheManager(t.Name(), rOpts, 1) - if err != nil { - t.Error(err) - return - } - - events := []map[string]interface{}{ - { - "bk_biz_id": float64(2), - "bk_module_id": float64(1), - }, - } - - err = cacheManager.UpdateByEvents(ctx, "module", events) - if err != nil { - t.Error(err) - return - } - - assert.EqualValues(t, 3, client.HLen(ctx, cacheManager.GetCacheKey(moduleCacheKey)).Val()) - assert.EqualValues(t, 2, client.HLen(ctx, cacheManager.GetCacheKey(serviceTemplateCacheKey)).Val()) - - events = []map[string]interface{}{ - { - "bk_biz_id": float64(2), - "bk_module_id": float64(1), - "service_template_id": float64(1), - }, - { - "bk_biz_id": float64(2), - "bk_module_id": float64(2), - "service_template_id": float64(2), - }, - } - - err = cacheManager.CleanByEvents(ctx, "module", events) - if err != nil { - t.Error(err) - return - } - - assert.EqualValues(t, 1, client.HLen(ctx, cacheManager.GetCacheKey(moduleCacheKey)).Val()) - assert.EqualValues(t, 1, client.HLen(ctx, cacheManager.GetCacheKey(serviceTemplateCacheKey)).Val()) - assert.EqualValues(t, "[3]", client.HGet(ctx, cacheManager.GetCacheKey(serviceTemplateCacheKey), "2").Val()) - }) } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go index d65182f37..72bef010b 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go @@ -28,7 +28,6 @@ import ( "fmt" "strconv" "strings" - "sync" "github.com/TencentBlueKing/bk-apigateway-sdks/core/define" "github.com/mitchellh/mapstructure" @@ -37,7 +36,6 @@ import ( "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -233,56 +231,3 @@ func (m *ServiceInstanceCacheManager) CleanGlobal(ctx context.Context) error { return nil } - -// CleanByEvents 根据事件清理缓存 -func (m *ServiceInstanceCacheManager) CleanByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - return nil -} - -// UpdateByEvents 根据事件更新缓存 -func (m *ServiceInstanceCacheManager) UpdateByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - if len(events) == 0 { - return nil - } - - needUpdateBizIds := make(map[int]struct{}) - switch resourceType { - case "process": - for _, event := range events { - bkBizID, ok := event["bk_biz_id"].(float64) - if !ok { - continue - } - needUpdateBizIds[int(bkBizID)] = struct{}{} - } - } - - // 记录需要更新的业务ID - needUpdateBizIdSlice := make([]string, 0, len(needUpdateBizIds)) - for bizID := range needUpdateBizIds { - needUpdateBizIdSlice = append(needUpdateBizIdSlice, strconv.Itoa(bizID)) - } - logger.Infof("need update service instance cache biz ids: %v", strings.Join(needUpdateBizIdSlice, ",")) - - // 按业务刷新缓存 - wg := sync.WaitGroup{} - limitChan := make(chan struct{}, m.ConcurrentLimit) - for bizID := range needUpdateBizIds { - wg.Add(1) - limitChan <- struct{}{} - - go func(bizId int) { - defer func() { - <-limitChan - wg.Done() - }() - err := m.RefreshByBiz(ctx, bizId) - if err != nil { - logger.Errorf("failed to refresh service instance cache by biz: %d, err: %v", bizId, err) - } - }(bizID) - } - - wg.Wait() - return nil -} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go index 9beeb6b2f..fb9ca0969 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go @@ -28,7 +28,6 @@ import ( "fmt" "strconv" "strings" - "sync" "github.com/TencentBlueKing/bk-apigateway-sdks/core/define" "github.com/mitchellh/mapstructure" @@ -194,119 +193,3 @@ func (m *SetCacheManager) CleanGlobal(ctx context.Context) error { } return nil } - -// CleanByEvents 根据事件清理缓存 -func (m *SetCacheManager) CleanByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - if resourceType != "set" || len(events) == 0 { - return nil - } - - // 提取集群ID及集群模板ID - needDeleteSetIds := make(map[int]struct{}) - needUpdateSetTemplateIds := make(map[string]struct{}) - for _, event := range events { - setID, ok := event["bk_set_id"].(float64) - if !ok { - continue - } - // 记录需要删除的集群ID - needDeleteSetIds[int(setID)] = struct{}{} - - // 记录需要删除的集群模板关联的集群ID - if setTemplateID, ok := event["set_template_id"].(float64); ok && setTemplateID > 0 { - needUpdateSetTemplateIds[strconv.Itoa(int(setTemplateID))] = struct{}{} - } - } - - setTemplateCacheData := make(map[string]string) - needDeleteSetTemplateIds := make([]string, 0) - for setTemplateID := range needUpdateSetTemplateIds { - // 获取原有的集群ID - result := m.RedisClient.HGet(ctx, m.GetCacheKey(setTemplateCacheKey), setTemplateID) - if result.Err() != nil { - continue - } - - var oldSetIds []int - err := json.Unmarshal([]byte(result.Val()), &oldSetIds) - if err != nil { - continue - } - - // 计算新的集群ID - var newSetIds []string - for _, oldSetID := range oldSetIds { - if _, ok := needDeleteSetIds[oldSetID]; !ok { - newSetIds = append(newSetIds, strconv.Itoa(oldSetID)) - } - } - - // 更新集群模板关联的集群缓存 - if len(newSetIds) > 0 { - setTemplateCacheData[setTemplateID] = fmt.Sprintf("[%s]", strings.Join(newSetIds, ",")) - } else { - needDeleteSetTemplateIds = append(needDeleteSetTemplateIds, setTemplateID) - } - } - - // 删除缓存 - if len(needDeleteSetIds) > 0 { - setIds := make([]string, 0, len(needDeleteSetIds)) - for setID := range needDeleteSetIds { - setIds = append(setIds, strconv.Itoa(setID)) - } - m.RedisClient.HDel(ctx, m.GetCacheKey(setCacheKey), setIds...) - } - - // 删除集群模板关联的集群缓存 - if len(needDeleteSetTemplateIds) > 0 { - m.RedisClient.HDel(ctx, m.GetCacheKey(setTemplateCacheKey), needDeleteSetTemplateIds...) - } - - // 更新集群模板关联的集群缓存 - if len(setTemplateCacheData) > 0 { - err := m.UpdateHashMapCache(ctx, setTemplateCacheKey, setTemplateCacheData) - if err != nil { - return errors.Wrap(err, "failed to update set template hashmap cache") - } - } - - return nil -} - -// UpdateByEvents 根据事件更新缓存 -func (m *SetCacheManager) UpdateByEvents(ctx context.Context, resourceType string, events []map[string]interface{}) error { - if resourceType != "set" || len(events) == 0 { - return nil - } - - // 提取业务ID - needUpdateBizIds := make(map[int]struct{}) - for _, event := range events { - bizID, ok := event["bk_biz_id"].(float64) - if ok { - needUpdateBizIds[int(bizID)] = struct{}{} - } - } - - // 按业务更新缓存 - wg := sync.WaitGroup{} - limitChan := make(chan struct{}, m.ConcurrentLimit) - for bizID := range needUpdateBizIds { - wg.Add(1) - limitChan <- struct{}{} - go func(bizID int) { - defer func() { - <-limitChan - wg.Done() - }() - err := m.RefreshByBiz(ctx, bizID) - if err != nil { - logger.Errorf("failed to refresh set cache by biz: %d, err: %v", bizID, err) - } - }(bizID) - } - wg.Wait() - - return nil -} diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set_test.go index 5f7ad8d5c..edbf113ee 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set_test.go @@ -113,51 +113,4 @@ func TestSetCacheManager(t *testing.T) { assert.EqualValues(t, 0, client.HLen(ctx, cacheManager.GetCacheKey(setCacheKey)).Val()) assert.EqualValues(t, 0, client.HLen(ctx, cacheManager.GetCacheKey(setTemplateCacheKey)).Val()) }) - - t.Run("TestSetCacheManager_Events", func(t *testing.T) { - cacheManager, err := NewSetCacheManager(t.Name(), rOpts, 1) - if err != nil { - t.Error(err) - return - } - - events := []map[string]interface{}{ - { - "bk_biz_id": float64(2), - "bk_set_id": float64(1), - }, - } - - err = cacheManager.UpdateByEvents(ctx, "set", events) - if err != nil { - t.Error(err) - return - } - - assert.EqualValues(t, 3, client.HLen(ctx, cacheManager.GetCacheKey(setCacheKey)).Val()) - assert.EqualValues(t, 2, client.HLen(ctx, cacheManager.GetCacheKey(setTemplateCacheKey)).Val()) - - events = []map[string]interface{}{ - { - "bk_biz_id": float64(2), - "bk_set_id": float64(1), - "set_template_id": float64(1), - }, - { - "bk_biz_id": float64(2), - "bk_set_id": float64(3), - "set_template_id": float64(2), - }, - } - - err = cacheManager.CleanByEvents(ctx, "set", events) - if err != nil { - t.Error(err) - return - } - - assert.EqualValues(t, 1, client.HLen(ctx, cacheManager.GetCacheKey(setCacheKey)).Val()) - assert.EqualValues(t, 1, client.HLen(ctx, cacheManager.GetCacheKey(setTemplateCacheKey)).Val()) - assert.EqualValues(t, `[2]`, client.HGet(ctx, cacheManager.GetCacheKey(setTemplateCacheKey), "1").Val()) - }) } From 89ef743918f72ec1e77d2b864c27c5dcb98af351 Mon Sep 17 00:00:00 2001 From: lai <11598235+unique0lai@users.noreply.github.com> Date: Sat, 31 Aug 2024 22:04:53 +0800 Subject: [PATCH 6/9] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E5=8A=A8?= =?UTF-8?q?=E6=80=81=E6=8B=93=E6=89=91=E5=8F=8A=E6=9C=8D=E5=8A=A1=E5=AE=9E?= =?UTF-8?q?=E4=BE=8B=E5=88=B7=E6=96=B0=E6=97=B6=E6=95=88=E6=80=A7=20#10101?= =?UTF-8?q?58081119404718?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../internal/alarm/cmdbcache/base.go | 26 ++--- .../internal/alarm/cmdbcache/daemon.go | 8 -- .../internal/alarm/cmdbcache/dynamic_group.go | 4 +- .../internal/alarm/cmdbcache/event_process.go | 51 ++++------ .../internal/alarm/cmdbcache/event_watch.go | 2 +- .../alarm/cmdbcache/service_instance.go | 94 +++++++++++++++++++ 6 files changed, 132 insertions(+), 53 deletions(-) diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go index 5e64ed217..28e23bc55 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/base.go @@ -35,6 +35,7 @@ import ( "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/utils/jsonx" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -78,14 +79,12 @@ type Manager interface { GetCacheKey(key string) string // RefreshByBiz 按业务刷新缓存 RefreshByBiz(ctx context.Context, bizID int) error - // RefreshByBizIds 按业务列表刷新缓存,并清理指定的缓存 - RefreshByBizIds(ctx context.Context, bizIds []int, concurrentLimit int) error // RefreshGlobal 刷新全局缓存 RefreshGlobal(ctx context.Context) error // CleanGlobal 清理全局缓存 CleanGlobal(ctx context.Context) error // CleanPartial 清理部分缓存 - CleanPartial(ctx context.Context, cacheKey string, cleanFields []string) error + CleanPartial(ctx context.Context, cacheKey string, cleanFields []string) // Reset 重置 Reset() @@ -254,19 +253,24 @@ func (c *BaseCacheManager) CleanGlobal(ctx context.Context) error { } // CleanPartial 清理部分缓存 -func (c *BaseCacheManager) CleanPartial(ctx context.Context, key string, cleanFields []string) error { +func (c *BaseCacheManager) CleanPartial(ctx context.Context, key string, cleanFields []string) { + if len(cleanFields) == 0 { + return + } + cacheKey := c.GetCacheKey(key) needCleanFields := make([]string, 0) for _, field := range cleanFields { - if _, ok := c.updatedFieldSet[cacheKey][field]; ok { + if _, ok := c.updatedFieldSet[cacheKey][field]; !ok { needCleanFields = append(needCleanFields, field) } } - if len(needCleanFields) == 0 { - c.RedisClient.HDel(ctx, cacheKey, cleanFields...) + logger.Info(fmt.Sprintf("clean partial cache, key: %s, expect clean fields: %v, actual clean fields: %v", key, cleanFields, needCleanFields)) + + if len(needCleanFields) != 0 { + c.RedisClient.HDel(ctx, cacheKey, needCleanFields...) } - return nil } // UseBiz 是否按业务执行 @@ -298,7 +302,7 @@ func NewCacheManagerByType(opt *redis.Options, prefix string, cacheType string, } // RefreshByBizIds 按业务列表刷新缓存,并清理指定的缓存 -func (c *BaseCacheManager) RefreshByBizIds(ctx context.Context, bizIds []int, concurrentLimit int) error { +func RefreshByBizIds(ctx context.Context, cacheManager Manager, bizIds []int, concurrentLimit int) error { // 并发控制 wg := sync.WaitGroup{} limitChan := make(chan struct{}, concurrentLimit) @@ -313,9 +317,9 @@ func (c *BaseCacheManager) RefreshByBizIds(ctx context.Context, bizIds []int, co wg.Done() <-limitChan }() - err := c.RefreshByBiz(ctx, bizId) + err := cacheManager.RefreshByBiz(ctx, bizId) if err != nil { - errChan <- errors.Wrapf(err, "refresh %s cache by biz failed, biz: %d", c.Type(), bizId) + errChan <- errors.Wrapf(err, "refresh %s cache by biz failed, biz: %d", cacheManager.Type(), bizId) } }(bizId) } diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go index 677c246d4..51cc38dd5 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/daemon.go @@ -82,8 +82,6 @@ type RefreshTaskParams struct { // 业务执行并发数 BizConcurrent int `json:"biz_concurrent" mapstructure:"biz_concurrent"` - - CacheTypes []string `json:"cache_types" mapstructure:"cache_types"` } // CacheRefreshTask cmdb缓存刷新任务 @@ -113,12 +111,6 @@ func CacheRefreshTask(ctx context.Context, payload []byte) error { fullRefreshIntervals[cacheType] = time.Second * time.Duration(interval) } - // 需要刷新的缓存类型 - cacheTypes := params.CacheTypes - if len(cacheTypes) == 0 { - cacheTypes = cmdbCacheTypes - } - wg := sync.WaitGroup{} cancelCtx, cancel := context.WithCancel(ctx) defer cancel() diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go index 22567a41a..13d9c2379 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go @@ -30,10 +30,11 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( @@ -194,6 +195,7 @@ func getDynamicGroupList(ctx context.Context, bizID int) (map[string]map[string] // RefreshByBiz 更新业务下的动态分组缓存 func (m *DynamicGroupCacheManager) RefreshByBiz(ctx context.Context, bizID int) error { + logger.Infof("refresh dynamic group cache by biz: %d", bizID) dynamicGroupToRelatedIDs, err := getDynamicGroupList(ctx, bizID) if err != nil { return errors.Wrap(err, "failed to get dynamic group list") diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go index b6b40dc02..db5145816 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go @@ -359,7 +359,9 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { err := RefreshAll(ctx, businessCacheManager, h.concurrentLimit) if err != nil { - logger.Errorf("refresh all business cache failed: %v", err) + logger.Errorf("refresh all business cache by event failed: %v", err) + } else { + logger.Infof("refresh all business cache by event success") } }() @@ -382,10 +384,12 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { defer wg.Done() // 刷新主机拓扑缓存 - if err := hostTopoCacheManager.RefreshByBizIds(ctx, hostTopoBizIds, h.concurrentLimit); err != nil { + if err := RefreshByBizIds(ctx, hostTopoCacheManager, hostTopoBizIds, h.concurrentLimit); err != nil { logger.Errorf("refresh host topo cache by biz failed: %v", err) // 如果刷新不顺利,后续清理操作也不执行,否则可能会清理掉正常的缓存 return + } else { + logger.Infof("refresh host topo cache by event success, biz count: %d", len(hostTopoBizIds)) } // 清理hostCacheKey缓存 @@ -394,9 +398,8 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { cleanFields = append(cleanFields, key.(string)) return true }) - if err := hostTopoCacheManager.CleanPartial(ctx, hostCacheKey, cleanFields); err != nil { - logger.Errorf("clean host topo cache partial failed: %v", err) - } + + hostTopoCacheManager.CleanPartial(ctx, hostCacheKey, cleanFields) // 清理hostAgentIDCacheKey缓存 cleanFields = make([]string, 0) @@ -404,9 +407,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { cleanFields = append(cleanFields, key.(string)) return true }) - if err := hostTopoCacheManager.CleanPartial(ctx, hostAgentIDCacheKey, cleanFields); err != nil { - logger.Errorf("clean host agentId cache partial failed: %v", err) - } + hostTopoCacheManager.CleanPartial(ctx, hostAgentIDCacheKey, cleanFields) // 清理topoCacheKey缓存 cleanFields = make([]string, 0) @@ -414,9 +415,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { cleanFields = append(cleanFields, key.(string)) return true }) - if err := hostTopoCacheManager.CleanPartial(ctx, topoCacheKey, cleanFields); err != nil { - logger.Errorf("clean topo cache partial failed: %v", err) - } + hostTopoCacheManager.CleanPartial(ctx, topoCacheKey, cleanFields) // todo: 清理hostIpCacheKey缓存 @@ -430,7 +429,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { defer wg.Done() dynamicGroupCacheManager := h.cacheManagers["dynamic_group"] - if err := dynamicGroupCacheManager.RefreshByBizIds(ctx, hostTopoBizIds, h.concurrentLimit); err != nil { + if err := RefreshByBizIds(ctx, dynamicGroupCacheManager, hostTopoBizIds, h.concurrentLimit); err != nil { logger.Errorf("refresh dynamic group cache by biz failed: %v", err) } @@ -462,7 +461,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { defer wg.Done() // 刷新服务实例缓存 - if err := serviceInstanceCacheManager.RefreshByBizIds(ctx, serviceInstanceBizIds, h.concurrentLimit); err != nil { + if err := RefreshByBizIds(ctx, serviceInstanceCacheManager, serviceInstanceBizIds, h.concurrentLimit); err != nil { logger.Errorf("refresh service instance cache by biz failed: %v", err) } @@ -472,11 +471,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { cleanFields = append(cleanFields, strconv.Itoa(key.(int))) return true }) - if err := serviceInstanceCacheManager.CleanPartial(ctx, serviceInstanceCacheKey, cleanFields); err != nil { - logger.Errorf("clean service instance cache partial failed: %v", err) - } - - // todo: 清理hostToServiceInstanceCacheKey缓存 + serviceInstanceCacheManager.CleanPartial(ctx, serviceInstanceCacheKey, cleanFields) // 重置 serviceInstanceCacheManager.Reset() @@ -498,7 +493,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { defer wg.Done() // 刷新集群缓存 - if err := setCacheManager.RefreshByBizIds(ctx, setBizIds, h.concurrentLimit); err != nil { + if err := RefreshByBizIds(ctx, setCacheManager, setBizIds, h.concurrentLimit); err != nil { logger.Errorf("refresh set cache by biz failed: %v", err) } @@ -508,9 +503,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { cleanFields = append(cleanFields, strconv.Itoa(key.(int))) return true }) - if err := setCacheManager.CleanPartial(ctx, setCacheKey, cleanFields); err != nil { - logger.Errorf("clean set cache partial failed: %v", err) - } + setCacheManager.CleanPartial(ctx, setCacheKey, cleanFields) // 清理setTemplateCacheKey缓存 cleanFields = make([]string, 0) @@ -518,9 +511,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { cleanFields = append(cleanFields, strconv.Itoa(key.(int))) return true }) - if err := setCacheManager.CleanPartial(ctx, setTemplateCacheKey, cleanFields); err != nil { - logger.Errorf("clean set template cache partial failed: %v", err) - } + setCacheManager.CleanPartial(ctx, setTemplateCacheKey, cleanFields) // 重置 setCacheManager.Reset() @@ -541,7 +532,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { defer wg.Done() // 刷新模块缓存 - if err := moduleCacheManager.RefreshByBizIds(ctx, moduleBizIds, h.concurrentLimit); err != nil { + if err := RefreshByBizIds(ctx, moduleCacheManager, moduleBizIds, h.concurrentLimit); err != nil { logger.Errorf("refresh module cache by biz failed: %v", err) } @@ -551,9 +542,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { cleanFields = append(cleanFields, strconv.Itoa(key.(int))) return true }) - if err := moduleCacheManager.CleanPartial(ctx, moduleCacheKey, cleanFields); err != nil { - logger.Errorf("clean module cache partial failed: %v", err) - } + moduleCacheManager.CleanPartial(ctx, moduleCacheKey, cleanFields) // 清理serviceTemplateCacheKey缓存 cleanFields = make([]string, 0) @@ -561,9 +550,7 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { cleanFields = append(cleanFields, strconv.Itoa(key.(int))) return true }) - if err := moduleCacheManager.CleanPartial(ctx, serviceTemplateCacheKey, cleanFields); err != nil { - logger.Errorf("clean service template cache partial failed: %v", err) - } + moduleCacheManager.CleanPartial(ctx, serviceTemplateCacheKey, cleanFields) // 重置 moduleCacheManager.Reset() diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go index 796bf418f..4843a98af 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go @@ -70,7 +70,7 @@ var CmdbResourceTypeFields = map[CmdbResourceType][]string{ CmdbResourceTypeSet: {"bk_biz_id", "bk_set_id", "set_template_id"}, CmdbResourceTypeModule: {"bk_module_id", "bk_biz_id", "service_template_id"}, CmdbResourceTypeMainlineInstance: {"bk_obj_id", "bk_inst_id", "bk_obj_name", "bk_inst_name"}, - CmdbResourceTypeProcess: {"bk_biz_id"}, + //CmdbResourceTypeProcess: {"bk_biz_id", "service_instance_id"}, } // CmdbResourceWatcher cmdb资源监听器 diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go index 72bef010b..d322ee034 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go @@ -30,6 +30,7 @@ import ( "strings" "github.com/TencentBlueKing/bk-apigateway-sdks/core/define" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" "github.com/mitchellh/mapstructure" "github.com/pkg/errors" @@ -200,6 +201,8 @@ func (m *ServiceInstanceCacheManager) RefreshByBiz(ctx context.Context, bkBizId err = m.UpdateHashMapCache(ctx, serviceInstanceCacheKey, serviceInstanceMap) if err != nil { return errors.Wrap(err, "update hashmap cmdb service instance cache failed") + } else { + logger.Infof("refresh service instance cache by biz: %d, instance count: %d", bkBizId, len(serviceInstances)) } // 刷新主机到服务实例缓存 @@ -214,6 +217,8 @@ func (m *ServiceInstanceCacheManager) RefreshByBiz(ctx context.Context, bkBizId err = m.UpdateHashMapCache(ctx, hostToServiceInstanceCacheKey, hostToServiceInstancesStr) if err != nil { return errors.Wrap(err, "update hashmap host to service instance cache failed") + } else { + logger.Infof("refresh host to service instance cache by biz: %d, host count: %d", bkBizId, len(hostToServiceInstances)) } return nil @@ -231,3 +236,92 @@ func (m *ServiceInstanceCacheManager) CleanGlobal(ctx context.Context) error { return nil } + +func (m *ServiceInstanceCacheManager) CleanPartial(ctx context.Context, key string, cleanFields []string) { + if key != serviceInstanceCacheKey || len(cleanFields) == 0 { + return + } + + cacheKey := m.GetCacheKey(key) + needCleanFields := make([]string, 0) + for _, field := range cleanFields { + if _, ok := m.updatedFieldSet[cacheKey][field]; !ok { + needCleanFields = append(needCleanFields, field) + } + } + + logger.Info(fmt.Sprintf("clean partial cache, key: %s, expect clean fields: %v, actual clean fields: %v", key, cleanFields, needCleanFields)) + + if len(needCleanFields) != 0 { + // 查询需要清理的主机ID + results := m.RedisClient.HMGet(ctx, cacheKey, needCleanFields...).Val() + hostIdToCleanServiceInstanceIds := make(map[string][]int) + for _, result := range results { + if result == nil { + continue + } + var serviceInstance map[string]interface{} + if err := json.Unmarshal([]byte(result.(string)), &serviceInstance); err != nil { + logger.Errorf("unmarshal service instance failed, %v", err) + continue + } + hostId := strconv.Itoa(int(serviceInstance["bk_host_id"].(float64))) + hostIdToCleanServiceInstanceIds[hostId] = append(hostIdToCleanServiceInstanceIds[hostId], int(serviceInstance["id"].(float64))) + } + // 清理服务实例缓存 + m.RedisClient.HDel(ctx, cacheKey, needCleanFields...) + + // 清理主机到服务实例缓存 + cacheKey = m.GetCacheKey(hostToServiceInstanceCacheKey) + hostIds := make([]string, len(hostIdToCleanServiceInstanceIds)) + for hostId := range hostIdToCleanServiceInstanceIds { + hostIds = append(hostIds, hostId) + } + + logger.Infof("partial clean host to service instance cache, host ids: %v", hostIds) + + results = m.RedisClient.HMGet(ctx, cacheKey, hostIds...).Val() + for i, result := range results { + if result == nil { + continue + } + + hostId := hostIds[i] + // 查询主机到服务实例缓存 + var existsInstanceIds []int + if err := json.Unmarshal([]byte(result.(string)), &existsInstanceIds); err != nil { + logger.Errorf("unmarshal host to service instance cache failed, %v", err) + continue + } + + // 剔除需要清理的服务实例ID + cleanInstanceIds := hostIdToCleanServiceInstanceIds[hostId] + newInstanceIds := make([]int, 0, len(existsInstanceIds)) + for _, instanceId := range existsInstanceIds { + add := true + for _, id := range cleanInstanceIds { + if id == instanceId { + add = false + break + } + } + if add { + newInstanceIds = append(newInstanceIds, instanceId) + } + } + + // 更新主机到服务实例缓存 + if len(newInstanceIds) == 0 { + m.RedisClient.HDel(ctx, cacheKey, hostId) + } else { + value, err := json.Marshal(newInstanceIds) + if err != nil { + logger.Errorf("marshal host to service instance cache failed, %v", err) + continue + } + m.RedisClient.HSet(ctx, cacheKey, hostId, string(value)) + } + } + + } +} From 96d3d1e3d389fcd8621c144e129859c3358a59ee Mon Sep 17 00:00:00 2001 From: lai <11598235+unique0lai@users.noreply.github.com> Date: Sun, 1 Sep 2024 14:58:14 +0800 Subject: [PATCH 7/9] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E5=8A=A8?= =?UTF-8?q?=E6=80=81=E6=8B=93=E6=89=91=E5=8F=8A=E6=9C=8D=E5=8A=A1=E5=AE=9E?= =?UTF-8?q?=E4=BE=8B=E5=88=B7=E6=96=B0=E6=97=B6=E6=95=88=E6=80=A7=20#10101?= =?UTF-8?q?58081119404718?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go index db5145816..82aa827b5 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_process.go @@ -417,8 +417,6 @@ func (h *CmdbEventHandler) refreshByEvents(ctx context.Context) error { }) hostTopoCacheManager.CleanPartial(ctx, topoCacheKey, cleanFields) - // todo: 清理hostIpCacheKey缓存 - // 重置 hostTopoCacheManager.Reset() }() From 909c3701b47b62faccbca5fb2faafd63e5224ebf Mon Sep 17 00:00:00 2001 From: lai <11598235+unique0lai@users.noreply.github.com> Date: Sun, 1 Sep 2024 14:58:44 +0800 Subject: [PATCH 8/9] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E5=8A=A8?= =?UTF-8?q?=E6=80=81=E6=8B=93=E6=89=91=E5=8F=8A=E6=9C=8D=E5=8A=A1=E5=AE=9E?= =?UTF-8?q?=E4=BE=8B=E5=88=B7=E6=96=B0=E6=97=B6=E6=95=88=E6=80=A7=20#10101?= =?UTF-8?q?58081119404718?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../alarm/cmdbcache/event_watch_test.go | 61 ++++++++++++------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch_test.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch_test.go index 7ef8fcf3c..f531fd637 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch_test.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch_test.go @@ -22,6 +22,18 @@ package cmdbcache +//import ( +// "context" +// "encoding/json" +// "os" +// "os/signal" +// "sync" +// "syscall" +// "testing" +// +// "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" +//) +// //func TestResourceWatch(t *testing.T) { // redisOptions := redis.Options{ // Mode: "standalone", @@ -30,7 +42,7 @@ package cmdbcache // // // 系统信号 // signalChan := make(chan os.Signal, 1) -// signal.Notify(signalChan, os.Interrupt, os.Kill) +// signal.Notify(signalChan, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGKILL, syscall.SIGINT) // // //调用cancel函数取消 // ctx, cancel := context.WithCancel(context.Background()) @@ -45,8 +57,8 @@ package cmdbcache // prefix := t.Name() // // wg := &sync.WaitGroup{} -// wg.Add(1) // +// wg.Add(1) // go func() { // defer cancel() // defer wg.Done() @@ -61,24 +73,31 @@ package cmdbcache // return // } // }() - -//go func() { -// defer cancel() -// defer wg.Done() // -// params := &RefreshTaskParams{ -// Redis: redisOptions, -// Prefix: prefix, -// EventHandleInterval: 60, -// CacheTypes: []string{"host_topo"}, -// FullRefreshIntervals: map[string]int{"host_topo": 1800, "business": 1800, "module": 1800, "set": 1800, "service_instance": 60}, -// } -// payload, _ := json.Marshal(params) -// if err := CacheRefreshTask(ctx, payload); err != nil { -// t.Errorf("TestHandle failed, err: %v", err) -// return -// } -//}() - -//wg.Wait() +// wg.Add(1) +// go func() { +// defer cancel() +// defer wg.Done() +// +// params := &RefreshTaskParams{ +// Redis: redisOptions, +// Prefix: prefix, +// EventHandleInterval: 60, +// FullRefreshIntervals: map[string]int{ +// "host_topo": 3600 * 24, +// "business": 3600 * 24, +// "module": 3600 * 24, +// "set": 3600 * 24, +// "service_instance": 3600 * 24, +// "dynamic_group": 3600 * 24, +// }, +// } +// payload, _ := json.Marshal(params) +// if err := CacheRefreshTask(ctx, payload); err != nil { +// t.Errorf("TestHandle failed, err: %v", err) +// return +// } +// }() +// +// wg.Wait() //} From 0ac34587404f6f54de86b33e8287f8f1d268325c Mon Sep 17 00:00:00 2001 From: lai <11598235+unique0lai@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:48:51 +0800 Subject: [PATCH 9/9] style: sort import --- .../internal/alarm/cmdbcache/dynamic_group.go | 3 +-- pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go | 3 +-- pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go | 3 +-- pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go | 3 +-- .../internal/alarm/cmdbcache/service_instance.go | 2 +- pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go | 3 +-- 6 files changed, 6 insertions(+), 11 deletions(-) diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go index 13d9c2379..fc9f4c778 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/dynamic_group.go @@ -30,11 +30,10 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go index 4843a98af..6dbd373c6 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/event_watch.go @@ -31,11 +31,10 @@ import ( "github.com/pkg/errors" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) // CmdbResourceType cmdb监听资源类型 diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go index 2101a6b33..946995a75 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/host.go @@ -35,11 +35,10 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) // hostFields 主机字段 diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go index f41b8fdd1..3e805f1c3 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/module.go @@ -33,11 +33,10 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go index d322ee034..e08bf14fd 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/service_instance.go @@ -30,13 +30,13 @@ import ( "strings" "github.com/TencentBlueKing/bk-apigateway-sdks/core/define" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" "github.com/mitchellh/mapstructure" "github.com/pkg/errors" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const ( diff --git a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go index fb9ca0969..23e25ed7a 100644 --- a/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go +++ b/pkg/bk-monitor-worker/internal/alarm/cmdbcache/set.go @@ -33,11 +33,10 @@ import ( "github.com/mitchellh/mapstructure" "github.com/pkg/errors" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/alarm/redis" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bk-monitor-worker/internal/api/cmdb" + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) const (