Skip to content
This repository has been archived by the owner on Mar 28, 2023. It is now read-only.

Fixes #3 - Added new metrics 'yupana_processor_dead' #348

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ server-migrate:
DJANGO_READ_DOT_ENV_FILE=True $(PYTHON) $(PYDIR)/manage.py migrate -v 3

serve:
DJANGO_READ_DOT_ENV_FILE=True $(PYTHON) $(PYDIR)/manage.py runserver 127.0.0.1:8001
DJANGO_READ_DOT_ENV_FILE=True $(PYTHON) $(PYDIR)/manage.py runserver 0.0.0.0:8001

server-static:
mkdir -p ./yupana/static/client
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ services:
- '9090:9090'
volumes:
- './scripts/config/prometheus.yml:/etc/prometheus/prometheus.yml'
- './scripts/config/prometheus_rules.yml:/etc/prometheus/prometheus_rules.yml'
image: prom/prometheus
5 changes: 4 additions & 1 deletion scripts/config/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,7 @@ scrape_configs:
# for docker versions under 18.03 onwards you can use host.docker.internal
# to point at your localhost from the container
# for version 17.06 - 18.03 we use docker.for.mac.localhost
- targets: ['docker.for.mac.localhost:8001'] # Uses /metrics by default
- targets: ['docker.for.mac.localhost:8001','172.17.0.1:8001'] # Uses /metrics by default

rule_files:
- "prometheus_rules.yml"
10 changes: 10 additions & 0 deletions scripts/config/prometheus_rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
groups:
- name: alerting_rules
rules:
- alert: YupanaProcessorDead
expr: rate(yupana_processor_dead_total[5m]) > 0
labels:
severity: major
annotations:
summary: "Yupana processor dead"
description: "Yupana processor dead."
4 changes: 4 additions & 0 deletions yupana/api/status/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from processor.processor_utils import (format_message,
list_name_of_active_threads,
list_name_of_processors)
from prometheus_client import Counter
from rest_framework import permissions, status as http_status
from rest_framework.decorators import api_view, permission_classes
from rest_framework.response import Response
Expand All @@ -29,6 +30,8 @@
from api.status.serializer import StatusSerializer

LOG = logging.getLogger(__name__)
PROCESSOR_DEAD_EXCEPTION = Counter('yupana_processor_dead',
'Total number of time yupana process thread dies')


@api_view(['GET', 'HEAD'])
Expand Down Expand Up @@ -88,6 +91,7 @@ def status(request):
active_threads_names = list_name_of_active_threads()
if not all(item in active_threads_names for item in total_processors_names):
dead_processors = set(total_processors_names).difference(active_threads_names)
PROCESSOR_DEAD_EXCEPTION.inc()
LOG.error(format_message('SERVICE STATUS', 'Dead processors - %s' % dead_processors))
return Response('ERROR: Processor thread exited',
status=http_status.HTTP_500_INTERNAL_SERVER_ERROR)
Expand Down