Skip to content

Commit

Permalink
Merge branch 'tezos_signer_forwarder' into midl4
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolasochem committed Oct 25, 2023
2 parents e1c5304 + 133cff2 commit 94f9b48
Show file tree
Hide file tree
Showing 13 changed files with 559 additions and 0 deletions.
6 changes: 6 additions & 0 deletions charts/tezos-signer-forwarder/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v2
name: tezos-signer-forwarder
description: A chart for tezos-signer-forwarder
type: application
version: 0.0.0
appVersion: "10.0"
3 changes: 3 additions & 0 deletions charts/tezos-signer-forwarder/scripts/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/sh

/usr/sbin/sshd -D -e -p ${TUNNEL_ENDPOINT_PORT}
53 changes: 53 additions & 0 deletions charts/tezos-signer-forwarder/scripts/signer_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python
import os
from flask import Flask, request, jsonify
import requests

import logging
log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR)

application = Flask(__name__)

readiness_probe_path = os.getenv("READINESS_PROBE_PATH")
signer_port = os.getenv("SIGNER_PORT")
signer_metrics = os.getenv("SIGNER_METRICS") == "true"

# https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
# Configured readiness probe timeoutSeconds is 5s, timeout sync request before that.
SIGNER_CONNECT_TIMEOUT = 4.5

@application.route('/metrics', methods=['GET'])
def prometheus_metrics():
'''
Prometheus endpoint
This combines:
* the metrics from the signer, which themselves are a combination of the
prometheus node-exporter and custom probes (power status, etc)
* the `unhealthy_signers_total` metric exported by this script, verifying
whether the signer URL configured upstream returns a 200 OK
'''

try:
probe = requests.get(f"http://localhost:{signer_port}{readiness_probe_path}", timeout=SIGNER_CONNECT_TIMEOUT)
except requests.exceptions.ConnectTimeout:
#Timeout connect to node
probe = None
except requests.exceptions.ReadTimeout:
#Timeout read from node
probe = None
except requests.exceptions.RequestException:
probe = None
if probe and signer_metrics:
try:
healthz = requests.get(f"http://localhost:{signer_port}/healthz").text
except requests.exceptions.RequestException:
healthz = None
else:
healthz = None
return '''# number of unhealthy signers - should be 0 or 1
unhealthy_signers_total %s
%s''' % (0 if probe else 1, healthz or "")

if __name__ == "__main__":
application.run(host = "0.0.0.0", port = 31732, debug = False)
62 changes: 62 additions & 0 deletions charts/tezos-signer-forwarder/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "tezos-signer-forwarder.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "tezos-signer-forwarder.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name $.Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" $.Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "tezos-signer-forwarder.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "tezos-signer-forwarder.labels" -}}
helm.sh/chart: {{ include "tezos-signer-forwarder.chart" . }}
{{ include "tezos-signer-forwarder.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "tezos-signer-forwarder.selectorLabels" -}}
app.kubernetes.io/name: {{ include "tezos-signer-forwarder.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "tezos-signer-forwarder.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "tezos-signer-forwarder.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
63 changes: 63 additions & 0 deletions charts/tezos-signer-forwarder/templates/alertmanagerconfig.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{{- if .Values.alertmanagerConfig.enabled }}
{{- range .Values.signers }}
{{- if .monitoring_email }}
{{ $signer := . }}
{{- range .endpoints }}
{{- if .alert_when_down }}
apiVersion: monitoring.coreos.com/v1alpha1
kind: AlertmanagerConfig
metadata:
name: tezos-signer-{{ $signer.name }}-{{ .alias }}-email
labels:
{{- toYaml $.Values.alertmanagerConfig.labels | nindent 4 }}
spec:
route:
groupBy: ['job']
groupWait: 30s
groupInterval: 5m
repeatInterval: 12h
receiver: 'email_{{ $signer.name }}'
matchers:
- name: service
value: tezos-remote-signer-{{ $signer.name }}
regex: false
- name: alertType
value: tezos-remote-signer-alert
regex: false
- name: tezos_endpoint_name
value: {{ .alias }}
regex: false
continue: false

receivers:
- name: 'email_{{ $signer.name }}'
emailConfigs:
- to: "{{ $signer.monitoring_email }}"
sendResolved: true
headers:
- key: subject
value: '{{`[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }}`}}'
html: >-
{{`{{ if eq .Status "firing" }}
Attention Required for Tezos Remote Signer:
{{ else }}
Resolved Alert for Tezos Remote Signer:
{{ end }}
{{ range .Alerts -}}
{{ .Annotations.summary }}
{{ end }}`}}
text: >-
{{`{{ if eq .Status "firing" }}
Attention Required for Tezos Remote Signer:
{{ else }}
Resolved Alert for Tezos Remote Signer:
{{ end }}
{{ range .Alerts -}}
{{ .Annotations.summary }}
{{ end }}`}}
---
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
12 changes: 12 additions & 0 deletions charts/tezos-signer-forwarder/templates/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{{- range .Values.signers }}
{{- $name := .name }}
{{- range $i, $endpoint := .endpoints }}
apiVersion: v1
kind: ConfigMap
metadata:
name: tezos-signer-forwarder-config-{{ $name }}-{{ $i }}
data:
authorized_keys: "{{ $endpoint.ssh_pubkey }} signer"
---
{{- end }}
{{- end }}
51 changes: 51 additions & 0 deletions charts/tezos-signer-forwarder/templates/prometheusrule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{{- if .Values.prometheusRule.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
{{- toYaml .Values.prometheusRule.labels | nindent 4 }}
name: tezos-remote-signer-rules
spec:
groups:
- name: tezos-remote-signer.rules
rules:
- alert: SignerPowerLoss
annotations:
description: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" has lost power'
summary: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" has lost power'
expr: power{namespace="{{ .Release.Namespace }}"} != 0
for: 1m
labels:
severity: critical
alertType: tezos-remote-signer-alert
- alert: SignerWiredNetworkLoss
annotations:
description: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" has lost wired internet connection'
summary: 'Tezos remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" has lost wired internet connection'
expr: wired_network{namespace="{{ .Release.Namespace }}"} != 0
for: 1m
labels:
severity: critical
alertType: tezos-remote-signer-alert
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
{{- toYaml .Values.prometheusRule.labels | nindent 4 }}
name: tezos-remote-signer-reachability-rules
spec:
groups:
- name: tezos-remote-signer.rules
rules:
- alert: NoRemoteSigner
annotations:
description: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" is down'
summary: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" is down or unable to sign.'
expr: unhealthy_signers_total{namespace="{{ .Release.Namespace }}"} != 0
for: 1m
labels:
severity: critical
alertType: tezos-remote-signer-alert
---
{{- end }}
7 changes: 7 additions & 0 deletions charts/tezos-signer-forwarder/templates/secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: v1
kind: Secret
metadata:
name: tezos-signer-forwarder-secret-{{ .Values.name }}
data:
ssh_host_ecdsa_key: |
{{ println .Values.secrets.signer_target_host_key | b64enc | indent 4 -}}
50 changes: 50 additions & 0 deletions charts/tezos-signer-forwarder/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
apiVersion: v1
kind: Service
metadata:
name: tezos-remote-signer-ssh-ingress-{{ .Values.name }}
annotations:
{{ toYaml .Values.service_annotations | indent 4 }}
spec:
type: LoadBalancer
selector:
app.kubernetes.io/name: tezos-signer-forwarder
ports:
{{- range .Values.signers }}
{{- $name := .name }}
# undocumented k8s feature to make a service route to different pods
# based on the port - allows to reuse the same public ip in all cloud
# providers. For it to work, ports need to have names.
# https://github.com/kubernetes/kubernetes/issues/24875#issuecomment-794596576
{{- range $i, $endpoint := .endpoints }}
- port: {{ $endpoint.tunnel_endpoint_port }}
name: ssh-{{ trunc 9 $name }}-{{ $i }}
targetPort: ssh-{{ trunc 9 $name }}-{{ $i }}
{{- end }}
{{- end }}
# ensures that remote signers can always ssh
publishNotReadyAddresses: true
{{ if .Values.load_balancer_ip }}
loadBalancerIP: {{ .Values.load_balancer_ip }}
{{ end }}
---
{{- range .Values.signers }}
apiVersion: v1
kind: Service
metadata:
name: tezos-remote-signer-{{ .name }}
labels:
app.kubernetes.io/name: tezos-signer-forwarder
tezos_baker_name: {{ .name }}
spec:
selector:
app.kubernetes.io/name: tezos-signer-forwarder
tezos_baker_name: {{ .name }}
ports:
- port: {{ .signer_port }}
name: signer
- port: 31732
name: metrics
# make sure that the service always targets the same signer, when HA is in use.
sessionAffinity: ClientIP
---
{{- end }}
25 changes: 25 additions & 0 deletions charts/tezos-signer-forwarder/templates/servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{{- if .Values.serviceMonitor.enabled }}
{{- range .Values.signers }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/name: tezos-signer-forwarder
name: tezos-remote-signer-monitoring-{{ .name }}
spec:
endpoints:
- port: metrics
path: /metrics
# default scrape timeout of 10 can be too small for remote raspberry pis
scrapeTimeout: "20s"
selector:
matchLabels:
app.kubernetes.io/name: tezos-signer-forwarder
tezos_baker_name: {{ .name }}
targetLabels:
- tezos_baker_name
podTargetLabels:
- tezos_endpoint_name
---
{{- end }}
{{- end }}
Loading

0 comments on commit 94f9b48

Please sign in to comment.