From b8788c29d4bcc2a5df74181fc368240e7004bc0c Mon Sep 17 00:00:00 2001 From: nannan00 <17491932+nannan00@users.noreply.github.com> Date: Fri, 17 Nov 2023 03:36:56 -0600 Subject: [PATCH] feat(bklogin): monitoring (#1397) --- src/bk-login/bklogin/common/error_codes.py | 5 + .../bklogin/monitoring/healthz/__init__.py | 10 ++ .../bklogin/monitoring/healthz/probes.py | 22 ++++ .../bklogin/monitoring/healthz/urls.py | 18 +++ .../bklogin/monitoring/healthz/views.py | 60 +++++++++ .../bklogin/monitoring/metrics/__init__.py | 10 ++ .../bklogin/monitoring/metrics/urls.py | 15 +++ .../bklogin/monitoring/metrics/views.py | 27 ++++ .../bklogin/monitoring/tracing/__init__.py | 10 ++ .../bklogin/monitoring/tracing/apps.py | 26 ++++ .../bklogin/monitoring/tracing/hooks.py | 120 ++++++++++++++++++ .../monitoring/tracing/instrumentor.py | 59 +++++++++ .../bklogin/monitoring/tracing/otel.py | 85 +++++++++++++ .../bklogin/monitoring/tracing/sentry.py | 57 +++++++++ src/bk-login/bklogin/monitoring/urls.py | 18 +++ src/bk-login/bklogin/settings.py | 39 ++++++ src/bk-login/bklogin/urls.py | 1 + src/bk-login/poetry.lock | 24 +--- src/bk-login/pyproject.toml | 2 +- .../bkuser/monitoring/healthz/probes.py | 2 +- .../bkuser/monitoring/healthz/serializers.py | 2 +- .../bkuser/monitoring/healthz/views.py | 4 +- 22 files changed, 593 insertions(+), 23 deletions(-) create mode 100644 src/bk-login/bklogin/monitoring/healthz/__init__.py create mode 100644 src/bk-login/bklogin/monitoring/healthz/probes.py create mode 100644 src/bk-login/bklogin/monitoring/healthz/urls.py create mode 100644 src/bk-login/bklogin/monitoring/healthz/views.py create mode 100644 src/bk-login/bklogin/monitoring/metrics/__init__.py create mode 100644 src/bk-login/bklogin/monitoring/metrics/urls.py create mode 100644 src/bk-login/bklogin/monitoring/metrics/views.py create mode 100644 src/bk-login/bklogin/monitoring/tracing/__init__.py create mode 100644 src/bk-login/bklogin/monitoring/tracing/apps.py create mode 100644 src/bk-login/bklogin/monitoring/tracing/hooks.py create mode 100644 src/bk-login/bklogin/monitoring/tracing/instrumentor.py create mode 100644 src/bk-login/bklogin/monitoring/tracing/otel.py create mode 100644 src/bk-login/bklogin/monitoring/tracing/sentry.py create mode 100644 src/bk-login/bklogin/monitoring/urls.py diff --git a/src/bk-login/bklogin/common/error_codes.py b/src/bk-login/bklogin/common/error_codes.py index 370cc270b..929113264 100644 --- a/src/bk-login/bklogin/common/error_codes.py +++ b/src/bk-login/bklogin/common/error_codes.py @@ -45,6 +45,11 @@ class ErrorCodeCategoryEnum(str, StructuredEnum): class ErrorCodes: # 通用 INVALID_ARGUMENT = ErrorCode(_("参数非法")) + UNAUTHENTICATED = ErrorCode( + _("未认证"), + code_category=ErrorCodeCategoryEnum.UNAUTHENTICATED, + status_code=HTTPStatus.UNAUTHORIZED, + ) NO_PERMISSION = ErrorCode( _("无权限"), code_category=ErrorCodeCategoryEnum.NO_PERMISSION, diff --git a/src/bk-login/bklogin/monitoring/healthz/__init__.py b/src/bk-login/bklogin/monitoring/healthz/__init__.py new file mode 100644 index 000000000..1060b7bf4 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/healthz/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" diff --git a/src/bk-login/bklogin/monitoring/healthz/probes.py b/src/bk-login/bklogin/monitoring/healthz/probes.py new file mode 100644 index 000000000..469f1d637 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/healthz/probes.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from blue_krill.monitoring.probe.mysql import MySQLProbe, transfer_django_db_settings +from django.conf import settings +from django.utils.module_loading import import_string + + +def get_default_probes(): + return [import_string(p) for p in settings.HEALTHZ_PROBES] + + +class MysqlProbe(MySQLProbe): + name = "bklogin-mysql" + config = transfer_django_db_settings(settings.DATABASES["default"]) diff --git a/src/bk-login/bklogin/monitoring/healthz/urls.py b/src/bk-login/bklogin/monitoring/healthz/urls.py new file mode 100644 index 000000000..d8066e0e6 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/healthz/urls.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from django.urls import path + +from . import views + +urlpatterns = [ + path("ping", views.PingApi.as_view(), name="api.ping"), + path("healthz", views.HealthzApi.as_view(), name="api.healthz"), +] diff --git a/src/bk-login/bklogin/monitoring/healthz/views.py b/src/bk-login/bklogin/monitoring/healthz/views.py new file mode 100644 index 000000000..141fb8f5c --- /dev/null +++ b/src/bk-login/bklogin/monitoring/healthz/views.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from blue_krill.monitoring.probe.base import ProbeSet +from django.conf import settings +from django.http import HttpResponse +from django.views.generic import View + +from bklogin.common.error_codes import error_codes +from bklogin.common.response import APISuccessResponse + +from .probes import get_default_probes + + +class PingApi(View): + """就绪&存活探测 API""" + + def get(self, request, *args, **kwargs): + return HttpResponse("pong") + + +class HealthzApi(View): + """健康探测 API""" + + def get(self, request, *args, **kwargs): + token = request.GET.get("token", "") + if not settings.HEALTHZ_TOKEN: + raise error_codes.UNAUTHENTICATED.f( + "Healthz token was not configured in settings, request denied", replace=True + ) + if not (token and token == settings.HEALTHZ_TOKEN): + raise error_codes.UNAUTHENTICATED.f("Please provide valid token", replace=True) + + probe_set = ProbeSet(get_default_probes()) + diagnosis_list = probe_set.examination() + + if diagnosis_list.is_death: + # if something deadly exist, we have to make response non-200 which is easier to be found + # by monitor system and make response as a plain text + raise error_codes.SYSTEM_ERROR.f("internal server error", replace=True).set_data( + diagnosis_list.get_fatal_report() + ) + + results = [ + { + "system_name": i.system_name, + "alive": i.alive, + "issues": [{"fatal": j.fatal, "description": j.description} for j in i.issues], + } + for i in diagnosis_list.items + ] + + return APISuccessResponse(data={"results": results}) diff --git a/src/bk-login/bklogin/monitoring/metrics/__init__.py b/src/bk-login/bklogin/monitoring/metrics/__init__.py new file mode 100644 index 000000000..1060b7bf4 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/metrics/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" diff --git a/src/bk-login/bklogin/monitoring/metrics/urls.py b/src/bk-login/bklogin/monitoring/metrics/urls.py new file mode 100644 index 000000000..0a585da85 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/metrics/urls.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from django.conf.urls import url + +from . import views + +urlpatterns = [url(r"^metrics$", views.metric_view, name="prometheus-django-metrics")] diff --git a/src/bk-login/bklogin/monitoring/metrics/views.py b/src/bk-login/bklogin/monitoring/metrics/views.py new file mode 100644 index 000000000..7abb1916e --- /dev/null +++ b/src/bk-login/bklogin/monitoring/metrics/views.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from django.conf import settings +from django_prometheus.exports import ExportToDjangoView + +from bklogin.common.error_codes import error_codes + + +def metric_view(request): + """metric view with basic auth""" + token = request.GET.get("token", "") + if not settings.METRIC_TOKEN: + raise error_codes.UNAUTHENTICATED.f( + "Metric token was not configured in settings, request denied", replace=True + ) + if not (token and token == settings.METRIC_TOKEN): + raise error_codes.UNAUTHENTICATED.f("Please provide valid token", replace=True) + + return ExportToDjangoView(request) diff --git a/src/bk-login/bklogin/monitoring/tracing/__init__.py b/src/bk-login/bklogin/monitoring/tracing/__init__.py new file mode 100644 index 000000000..1060b7bf4 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/tracing/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" diff --git a/src/bk-login/bklogin/monitoring/tracing/apps.py b/src/bk-login/bklogin/monitoring/tracing/apps.py new file mode 100644 index 000000000..2b59a0893 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/tracing/apps.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from django.apps import AppConfig + +from .otel import setup_by_settings +from .sentry import init_sentry_sdk + + +class TracingConfig(AppConfig): + name = "bklogin.monitoring.tracing" + + def ready(self): + setup_by_settings() + init_sentry_sdk( + django_integrated=True, + redis_integrated=False, + celery_integrated=False, + ) diff --git a/src/bk-login/bklogin/monitoring/tracing/hooks.py b/src/bk-login/bklogin/monitoring/tracing/hooks.py new file mode 100644 index 000000000..58c189a2d --- /dev/null +++ b/src/bk-login/bklogin/monitoring/tracing/hooks.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +import json +from typing import Dict + +import requests +from django.http import HttpRequest, HttpResponse +from opentelemetry.trace import Span, StatusCode, format_trace_id + + +def handle_api_error(span: Span, result: Dict): + """统一处理新版 HTTP API 协议中的错误详情""" + if "error" not in result: + return + + err = result["error"] + span.set_attribute("error_code", err.get("code", "")) + span.set_attribute("error_message", err.get("message", "")) + span.set_attribute("error_system", err.get("system", "")) + # 错误详情若存在,则统一存到一个字段中 + if err_details := err.get("details", []): + span.set_attribute("error_details", json.dumps(err_details)) + + +def requests_response_hook(span: Span, request: requests.Request, response: requests.Response): + """用于处理 requests 库发起的请求响应,需要兼容支持新旧 esb,apigw,新版 HTTP 协议""" + if ( + # requests 请求异常, 例如访问超时等 + response is None + # 并非所有返回内容都是 json 格式的, 因此需要根据返回头进行判断, 避免处理二进制格式的内容 + or response.headers.get("Content-Type") != "application/json" + ): + return + + try: + result = json.loads(response.content) + except Exception: # pylint: disable=broad-except + return + if not isinstance(result, dict): + return + + request_id = ( + # new esb and apigateway + response.headers.get("x-bkapi-request-id") + # legacy api + or response.headers.get("X-Request-Id") + # old esb and other + or result.get("request_id", "") + ) + if request_id: + span.set_attribute("request_id", request_id) + + if "message" in result: + span.set_attribute("error_message", result["message"]) + + # 旧版本 API 中,code 为 0/'0'/'00' 表示成功 + code = result.get("code") + if code is not None: + span.set_attribute("error_code", str(code)) + if str(code) in ["0", "00"]: + span.set_status(StatusCode.OK) + else: + span.set_status(StatusCode.ERROR) + + # 后续均为处理新版 API 协议逻辑,因此此处直接 return + return + + # 根据新版本 HTTP API 协议,处理错误详情 + handle_api_error(span, result) + + if 200 <= response.status_code <= 299: # noqa: PLR2004 + span.set_status(StatusCode.OK) + else: + span.set_status(StatusCode.ERROR) + + +def django_request_hook(span: Span, request: HttpRequest): + """在 request 注入 trace_id,方便获取""" + trace_id = span.get_span_context().trace_id + request.otel_trace_id = format_trace_id(trace_id) + + +def django_response_hook(span: Span, request: HttpRequest, response: HttpResponse): + """处理 Django 响应,因用户管理已经使用新版本 HTTP 协议,因此仅支持新版协议""" + + if ( + # requests 请求异常, 例如访问超时等 + response is None + # 并非所有返回内容都是 json 格式的, 因此需要根据返回头进行判断, 避免处理二进制格式的内容 + or response.headers.get("Content-Type") != "application/json" + ): + return + + # 新版本协议中按照标准 HTTP 协议,200 <= code < 300 的都是正常 + if 200 <= response.status_code <= 299: # noqa: PLR2004 + span.set_status(StatusCode.OK) + return + + span.set_status(StatusCode.ERROR) + try: + result = json.loads(response.content) + except Exception: # pylint: disable=broad-except + return + if not isinstance(result, dict): + return + + # 若能够获取到 request_id,则一并记录 + request_id = response.headers.get("X-Request-Id") or result.get("request_id") + if request_id: + span.set_attribute("request_id", request_id) + + handle_api_error(span, result) diff --git a/src/bk-login/bklogin/monitoring/tracing/instrumentor.py b/src/bk-login/bklogin/monitoring/tracing/instrumentor.py new file mode 100644 index 000000000..0be9cf539 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/tracing/instrumentor.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +import logging +from typing import Collection + +from django.conf import settings +from opentelemetry.instrumentation import dbapi +from opentelemetry.instrumentation.celery import CeleryInstrumentor +from opentelemetry.instrumentation.django import DjangoInstrumentor +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor # type: ignore +from opentelemetry.instrumentation.logging import LoggingInstrumentor +from opentelemetry.instrumentation.redis import RedisInstrumentor +from opentelemetry.instrumentation.requests import RequestsInstrumentor + +from .hooks import django_request_hook, django_response_hook, requests_response_hook + +logger = logging.getLogger(__name__) + + +class BKLoginInstrumentor(BaseInstrumentor): + def instrumentation_dependencies(self) -> Collection[str]: + return [] + + def _instrument(self, **kwargs): + LoggingInstrumentor().instrument() + logger.info("otel instructment: logging") + RequestsInstrumentor().instrument(response_hook=requests_response_hook) + logger.info("otel instructment: requests") + DjangoInstrumentor().instrument(request_hook=django_request_hook, response_hook=django_response_hook) + logger.info("otel instructment: django") + RedisInstrumentor().instrument() + logger.info("otel instructment: redis") + CeleryInstrumentor().instrument() + logger.info("otel instructment: celery") + + if getattr(settings, "OTEL_INSTRUMENT_DB_API", False): + import MySQLdb # noqa + + dbapi.wrap_connect( + __name__, + MySQLdb, + "connect", + "mysql", + {"database": "db", "port": "port", "host": "host", "user": "user"}, + ) + logger.info("otel instructment: database api") + + def _uninstrument(self, **kwargs): + for instrumentor in self.instrumentors: + logger.info("otel uninstrument %s", instrumentor) + instrumentor.uninstrument() diff --git a/src/bk-login/bklogin/monitoring/tracing/otel.py b/src/bk-login/bklogin/monitoring/tracing/otel.py new file mode 100644 index 000000000..9701c1407 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/tracing/otel.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +import threading + +from django.conf import settings +from opentelemetry import trace +from opentelemetry.exporter.jaeger.thrift import JaegerExporter +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.resources import SERVICE_NAME, Resource +from opentelemetry.sdk.trace import ReadableSpan, TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.sdk.trace.sampling import _KNOWN_SAMPLERS + +from .instrumentor import BKLoginInstrumentor + + +class LazyBatchSpanProcessor(BatchSpanProcessor): + def __init__(self, *args, **kwargs): + super(LazyBatchSpanProcessor, self).__init__(*args, **kwargs) + # 停止默认线程 + self.done = True + with self.condition: + self.condition.notify_all() + self.worker_thread.join() # type: ignore + self.done = False + self.worker_thread = None # type: ignore + + def on_end(self, span: ReadableSpan) -> None: + if self.worker_thread is None: + self.worker_thread = threading.Thread(name=self.__class__.__name__, target=self.worker, daemon=True) + self.worker_thread.start() + super(LazyBatchSpanProcessor, self).on_end(span) + + def shutdown(self) -> None: + # signal the worker thread to finish and then wait for it + self.done = True + with self.condition: + self.condition.notify_all() + if self.worker_thread: + self.worker_thread.join() + self.span_exporter.shutdown() + + +def setup_trace_config(): + if not (settings.OTEL_GRPC_URL and settings.OTEL_BK_DATA_TOKEN): + # local environment, use jaeger as trace service + # docker run -p 16686:16686 -p 6831:6831/udp jaegertracing/all-in-one + trace.set_tracer_provider( + tracer_provider=TracerProvider(resource=Resource.create({SERVICE_NAME: settings.OTEL_SERVICE_NAME})) + ) + jaeger_exporter = JaegerExporter( + agent_host_name="localhost", agent_port=6831, udp_split_oversized_batches=True + ) + trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(jaeger_exporter)) # type: ignore + else: + trace.set_tracer_provider( + tracer_provider=TracerProvider( + resource=Resource.create( + { + "service.name": settings.OTEL_SERVICE_NAME, + "bk.data.token": settings.OTEL_DATA_TOKEN, + }, + ), + sampler=_KNOWN_SAMPLERS[settings.OTEL_SAMPLER], # type: ignore + ) + ) + otlp_exporter = OTLPSpanExporter(endpoint=settings.OTEL_GRPC_URL, insecure=True) + span_processor = LazyBatchSpanProcessor(otlp_exporter) + trace.get_tracer_provider().add_span_processor(span_processor) # type: ignore + + +def setup_by_settings(): + if not settings.ENABLE_OTEL_TRACE: + return + + setup_trace_config() + BKLoginInstrumentor().instrument() diff --git a/src/bk-login/bklogin/monitoring/tracing/sentry.py b/src/bk-login/bklogin/monitoring/tracing/sentry.py new file mode 100644 index 000000000..a8fa65eeb --- /dev/null +++ b/src/bk-login/bklogin/monitoring/tracing/sentry.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from typing import List + +import sentry_sdk +from sentry_sdk.integrations import Integration + + +def init_sentry_sdk(django_integrated: bool = True, redis_integrated: bool = False, celery_integrated: bool = False): + """Register celery error events to sentry""" + from django.conf import settings + + integrations: List[Integration] = [] + if django_integrated: + from sentry_sdk.integrations.django import DjangoIntegration + + integrations.append(DjangoIntegration()) + + if redis_integrated: + from sentry_sdk.integrations.redis import RedisIntegration + + integrations.append(RedisIntegration()) + + if celery_integrated: + from sentry_sdk.integrations.celery import CeleryIntegration + + integrations.append(CeleryIntegration()) + + if settings.SENTRY_DSN: + # 初始化 sentry_sdk + sentry_sdk.init( # type: ignore + # debug=True, + dsn=settings.SENTRY_DSN, + integrations=integrations, + # Set traces_sample_rate to 1.0 to capture 100% + # of transactions for performance monitoring. + # We recommend adjusting this value in production, + traces_sample_rate=1.0, + # If you wish to associate users to errors (assuming you are using + # django.contrib.auth) you may enable sending PII data. + send_default_pii=True, + # By default, the SDK will try to use the SENTRY_RELEASE + # environment variable, or infer a git commit + # SHA as release, however you may want to set + # something more human-readable. + # release="myapp@1.0.0", + # Can export the environment + # environment="production", + ) diff --git a/src/bk-login/bklogin/monitoring/urls.py b/src/bk-login/bklogin/monitoring/urls.py new file mode 100644 index 000000000..bd63364d5 --- /dev/null +++ b/src/bk-login/bklogin/monitoring/urls.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from django.urls import include, path + +urlpatterns = [ + # healthz + path("", include("bklogin.monitoring.healthz.urls")), + # prometheus + path("", include("bklogin.monitoring.metrics.urls")), +] diff --git a/src/bk-login/bklogin/settings.py b/src/bk-login/bklogin/settings.py index 6371eb1fd..402226228 100644 --- a/src/bk-login/bklogin/settings.py +++ b/src/bk-login/bklogin/settings.py @@ -39,6 +39,7 @@ "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", + "django_prometheus", "bklogin.authentication", ] @@ -236,3 +237,41 @@ }, }, } + +# ------------------------------------------ Healthz 配置 ------------------------------------------ + +# 调用 Healthz API 需要的 Token +HEALTHZ_TOKEN = env.str("HEALTHZ_TOKEN", "") +# 服务健康探针配置 +HEALTHZ_PROBES = env.list( + "HEALTHZ_PROBES", + default=[ + "bklogin.monitoring.healthz.probes.MysqlProbe", + ], +) + +# ------------------------------------------ Metric 配置 ------------------------------------------ + +# 调用 Metric API 需要的 Token +METRIC_TOKEN = env.str("METRIC_TOKEN", "") + +# ------------------------------------------ Tracing 配置 ------------------------------------------ + +# Sentry DSN 配置 +SENTRY_DSN = env.str("SENTRY_DSN", "") + +# 是否开启 OTEL 数据上报,默认不启用 +ENABLE_OTEL_TRACE = env.bool("ENABLE_OTEL_TRACE", False) +# 上报数据服务名称,一般使用默认值即可 +OTEL_SERVICE_NAME = env.str("OTEL_SERVICE_NAME", "bk-user") +# sdk 采样规则(always_on / always_off ...) +OTEL_SAMPLER = env.str("OTEL_SAMPLER", "always_on") +# OTEL 上报地址(grpc) +OTEL_GRPC_URL = env.str("OTEL_GRPC_URL", "") +# OTEL 上报到监控平台的数据 Token,可通过监控平台上新建应用获得 +OTEL_DATA_TOKEN = env.str("OTEL_DATA_TOKEN", "") +# 是否记录 DB 相关 tracing +OTEL_INSTRUMENT_DB_API = env.bool("OTEL_INSTRUMENT_DB_API", False) + +if ENABLE_OTEL_TRACE or SENTRY_DSN: + INSTALLED_APPS += ("bklogin.monitoring.tracing",) diff --git a/src/bk-login/bklogin/urls.py b/src/bk-login/bklogin/urls.py index 60a1c1ebe..f5522bc0f 100644 --- a/src/bk-login/bklogin/urls.py +++ b/src/bk-login/bklogin/urls.py @@ -12,4 +12,5 @@ urlpatterns = [ path("", include("bklogin.authentication.urls")), + path("", include("bklogin.monitoring.urls")), ] diff --git a/src/bk-login/poetry.lock b/src/bk-login/poetry.lock index 879908390..714cdd159 100644 --- a/src/bk-login/poetry.lock +++ b/src/bk-login/poetry.lock @@ -1951,47 +1951,35 @@ reference = "tencent" [[package]] name = "sentry-sdk" -version = "1.31.0" +version = "1.5.6" description = "Python client for Sentry (https://sentry.io)" optional = false python-versions = "*" files = [ - {file = "sentry-sdk-1.31.0.tar.gz", hash = "sha256:6de2e88304873484207fed836388e422aeff000609b104c802749fd89d56ba5b"}, - {file = "sentry_sdk-1.31.0-py2.py3-none-any.whl", hash = "sha256:64a7141005fb775b9db298a30de93e3b83e0ddd1232dc6f36eb38aebc1553291"}, + {file = "sentry-sdk-1.5.6.tar.gz", hash = "sha256:ac2a50128409d57655279817aedcb7800cace1f76b266f3dd62055d5afd6e098"}, + {file = "sentry_sdk-1.5.6-py2.py3-none-any.whl", hash = "sha256:1ab34e3851a34aeb3d1af1a0f77cec73978c4e9698e5210d050e4932953cb241"}, ] [package.dependencies] certifi = "*" -urllib3 = {version = ">=1.26.11", markers = "python_version >= \"3.6\""} +urllib3 = ">=1.10.0" [package.extras] aiohttp = ["aiohttp (>=3.5)"] -arq = ["arq (>=0.23)"] -asyncpg = ["asyncpg (>=0.23)"] beam = ["apache-beam (>=2.12)"] bottle = ["bottle (>=0.12.13)"] celery = ["celery (>=3)"] chalice = ["chalice (>=1.16.0)"] -clickhouse-driver = ["clickhouse-driver (>=0.2.0)"] django = ["django (>=1.8)"] falcon = ["falcon (>=1.4)"] -fastapi = ["fastapi (>=0.79.0)"] -flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"] -grpcio = ["grpcio (>=1.21.1)"] +flask = ["blinker (>=1.1)", "flask (>=0.11)"] httpx = ["httpx (>=0.16.0)"] -huey = ["huey (>=2)"] -loguru = ["loguru (>=0.5)"] -opentelemetry = ["opentelemetry-distro (>=0.35b0)"] -opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"] pure-eval = ["asttokens", "executing", "pure-eval"] -pymongo = ["pymongo (>=3.1)"] pyspark = ["pyspark (>=2.4.4)"] quart = ["blinker (>=1.1)", "quart (>=0.16.1)"] rq = ["rq (>=0.6)"] sanic = ["sanic (>=0.8)"] sqlalchemy = ["sqlalchemy (>=1.2)"] -starlette = ["starlette (>=0.19.1)"] -starlite = ["starlite (>=1.48)"] tornado = ["tornado (>=5)"] [package.source] @@ -2496,4 +2484,4 @@ reference = "tencent" [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.11" -content-hash = "2e9b6e0ba3df54edfc368ef362bd9a5c1ad1370cb383636fc6eb5d145e4ff28e" +content-hash = "ed8e326eafc9cf2a2e247c25428773f17d101b271af049766c0e583c9070c511" diff --git a/src/bk-login/pyproject.toml b/src/bk-login/pyproject.toml index f6d31fdd9..3b66835fe 100644 --- a/src/bk-login/pyproject.toml +++ b/src/bk-login/pyproject.toml @@ -21,7 +21,7 @@ django-environ = "0.8.1" whitenoise = "6.5.0" werkzeug = "2.3.7" python-json-logger = "2.0.7" -sentry-sdk = "1.31.0" +sentry-sdk = "1.5.6" django-prometheus = "2.3.1" opentelemetry-api = "1.20.0" opentelemetry-sdk = "1.20.0" diff --git a/src/bk-user/bkuser/monitoring/healthz/probes.py b/src/bk-user/bkuser/monitoring/healthz/probes.py index e60a9f88f..2b5f6955b 100644 --- a/src/bk-user/bkuser/monitoring/healthz/probes.py +++ b/src/bk-user/bkuser/monitoring/healthz/probes.py @@ -46,4 +46,4 @@ def _get_redis_probe_cls() -> Type[_RedisSentinelProbe] | Type[_RedisProbe]: return _RedisProbe -RedisProbe = _get_redis_probe_cls() +RedisProbe = _get_redis_probe_cls() # type: ignore diff --git a/src/bk-user/bkuser/monitoring/healthz/serializers.py b/src/bk-user/bkuser/monitoring/healthz/serializers.py index 594b65b9e..4e0c76f0e 100644 --- a/src/bk-user/bkuser/monitoring/healthz/serializers.py +++ b/src/bk-user/bkuser/monitoring/healthz/serializers.py @@ -16,7 +16,7 @@ class IssueSerializer(serializers.Serializer): description = serializers.CharField(help_text="问题描述", default="") -class DianosisSerializer(serializers.Serializer): +class DiagnosisSerializer(serializers.Serializer): system_name = serializers.CharField(help_text="探测的系统名称") alive = serializers.BooleanField(help_text="探测的系统是否存活", default=True) issues = IssueSerializer(help_text="检查到的问题", many=True) diff --git a/src/bk-user/bkuser/monitoring/healthz/views.py b/src/bk-user/bkuser/monitoring/healthz/views.py index 6e7937738..09779ccfb 100644 --- a/src/bk-user/bkuser/monitoring/healthz/views.py +++ b/src/bk-user/bkuser/monitoring/healthz/views.py @@ -16,7 +16,7 @@ from rest_framework.response import Response from bkuser.monitoring.healthz.probes import get_default_probes -from bkuser.monitoring.healthz.serializers import DianosisSerializer +from bkuser.monitoring.healthz.serializers import DiagnosisSerializer class HealthzApi(viewsets.ViewSet): @@ -48,4 +48,4 @@ def healthz(self, request): # by monitor system and make response as a plain text return Response(status=status.HTTP_500_INTERNAL_SERVER_ERROR, data=diagnosis_list.get_fatal_report()) - return Response(data={"results": DianosisSerializer(diagnosis_list.items, many=True).data}) + return Response(data={"results": DiagnosisSerializer(diagnosis_list.items, many=True).data})