From fa364f6595665f03a355d4573b77ef577b2a87a7 Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Tue, 1 Jun 2021 15:07:53 +0530 Subject: [PATCH 1/2] Fixes #3 - Added new metrics 'yupana_processor_dead' --- Makefile | 2 +- docker-compose.yml | 1 + scripts/config/prometheus.yml | 5 ++++- scripts/config/prometheus_rules.yml | 10 ++++++++++ yupana/api/status/view.py | 4 ++++ 5 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 scripts/config/prometheus_rules.yml diff --git a/Makefile b/Makefile index 144f9288..4e271a4d 100644 --- a/Makefile +++ b/Makefile @@ -93,7 +93,7 @@ server-migrate: DJANGO_READ_DOT_ENV_FILE=True $(PYTHON) $(PYDIR)/manage.py migrate -v 3 serve: - DJANGO_READ_DOT_ENV_FILE=True $(PYTHON) $(PYDIR)/manage.py runserver 127.0.0.1:8001 + DJANGO_READ_DOT_ENV_FILE=True $(PYTHON) $(PYDIR)/manage.py runserver 0.0.0.0:8001 server-static: mkdir -p ./yupana/static/client diff --git a/docker-compose.yml b/docker-compose.yml index d1c4d9d5..32f57824 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -33,4 +33,5 @@ services: - '9090:9090' volumes: - './scripts/config/prometheus.yml:/etc/prometheus/prometheus.yml' + - './scripts/config/prometheus_rules.yml:/etc/prometheus/prometheus_rules.yml' image: prom/prometheus \ No newline at end of file diff --git a/scripts/config/prometheus.yml b/scripts/config/prometheus.yml index 732d910b..ea4c1ffc 100644 --- a/scripts/config/prometheus.yml +++ b/scripts/config/prometheus.yml @@ -20,4 +20,7 @@ scrape_configs: # for docker versions under 18.03 onwards you can use host.docker.internal # to point at your localhost from the container # for version 17.06 - 18.03 we use docker.for.mac.localhost - - targets: ['docker.for.mac.localhost:8001'] # Uses /metrics by default + - targets: ['docker.for.mac.localhost:8001','172.17.0.1:8001'] # Uses /metrics by default + +rule_files: + - "prometheus_rules.yml" \ No newline at end of file diff --git a/scripts/config/prometheus_rules.yml b/scripts/config/prometheus_rules.yml new file mode 100644 index 00000000..86044a2d --- /dev/null +++ b/scripts/config/prometheus_rules.yml @@ -0,0 +1,10 @@ +groups: + - name: alerting_rules + rules: + - alert: YupanaProcessorDead + expr: rate(yupana_processor_dead_total[5m]) > 0 + labels: + severity: major + annotations: + summary: "Yupana processor dead" + description: "Yupana processor dead." diff --git a/yupana/api/status/view.py b/yupana/api/status/view.py index c29d4018..216cee10 100644 --- a/yupana/api/status/view.py +++ b/yupana/api/status/view.py @@ -27,8 +27,11 @@ from api.status.model import Status from api.status.serializer import StatusSerializer +from prometheus_client import Counter LOG = logging.getLogger(__name__) +PROCESSOR_DEAD_EXCEPTION = Counter('yupana_processor_dead', + 'Total number of time yupana process thread dies') @api_view(['GET', 'HEAD']) @@ -88,6 +91,7 @@ def status(request): active_threads_names = list_name_of_active_threads() if not all(item in active_threads_names for item in total_processors_names): dead_processors = set(total_processors_names).difference(active_threads_names) + PROCESSOR_DEAD_EXCEPTION.inc() LOG.error(format_message('SERVICE STATUS', 'Dead processors - %s' % dead_processors)) return Response('ERROR: Processor thread exited', status=http_status.HTTP_500_INTERNAL_SERVER_ERROR) From 8fad19538d17a8a0a33f7565784161d5efe6e35a Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Tue, 1 Jun 2021 15:37:01 +0530 Subject: [PATCH 2/2] Fix linting issue --- yupana/api/status/view.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yupana/api/status/view.py b/yupana/api/status/view.py index 216cee10..6b25cde0 100644 --- a/yupana/api/status/view.py +++ b/yupana/api/status/view.py @@ -21,17 +21,17 @@ from processor.processor_utils import (format_message, list_name_of_active_threads, list_name_of_processors) +from prometheus_client import Counter from rest_framework import permissions, status as http_status from rest_framework.decorators import api_view, permission_classes from rest_framework.response import Response from api.status.model import Status from api.status.serializer import StatusSerializer -from prometheus_client import Counter LOG = logging.getLogger(__name__) PROCESSOR_DEAD_EXCEPTION = Counter('yupana_processor_dead', - 'Total number of time yupana process thread dies') + 'Total number of time yupana process thread dies') @api_view(['GET', 'HEAD'])