diff --git a/charts/tezos-signer-forwarder/Chart.yaml b/charts/tezos-signer-forwarder/Chart.yaml new file mode 100644 index 000000000..9089b9ac4 --- /dev/null +++ b/charts/tezos-signer-forwarder/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: tezos-signer-forwarder +description: A chart for tezos-signer-forwarder +type: application +version: 0.0.0 +appVersion: "10.0" diff --git a/charts/tezos-signer-forwarder/scripts/entrypoint.sh b/charts/tezos-signer-forwarder/scripts/entrypoint.sh new file mode 100644 index 000000000..41233320e --- /dev/null +++ b/charts/tezos-signer-forwarder/scripts/entrypoint.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +/usr/sbin/sshd -D -e -p ${TUNNEL_ENDPOINT_PORT} diff --git a/charts/tezos-signer-forwarder/scripts/signer_exporter.py b/charts/tezos-signer-forwarder/scripts/signer_exporter.py new file mode 100644 index 000000000..b81362024 --- /dev/null +++ b/charts/tezos-signer-forwarder/scripts/signer_exporter.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +import os +from flask import Flask, request, jsonify +import requests + +import logging +log = logging.getLogger('werkzeug') +log.setLevel(logging.ERROR) + +application = Flask(__name__) + +readiness_probe_path = os.getenv("READINESS_PROBE_PATH") +signer_port = os.getenv("SIGNER_PORT") +signer_metrics = os.getenv("SIGNER_METRICS") == "true" + +# https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ +# Configured readiness probe timeoutSeconds is 5s, timeout sync request before that. +SIGNER_CONNECT_TIMEOUT = 4.5 + +@application.route('/metrics', methods=['GET']) +def prometheus_metrics(): + ''' + Prometheus endpoint + This combines: + * the metrics from the signer, which themselves are a combination of the + prometheus node-exporter and custom probes (power status, etc) + * the `unhealthy_signers_total` metric exported by this script, verifying + whether the signer URL configured upstream returns a 200 OK + ''' + + try: + probe = requests.get(f"http://localhost:{signer_port}{readiness_probe_path}", timeout=SIGNER_CONNECT_TIMEOUT) + except requests.exceptions.ConnectTimeout: + #Timeout connect to node + probe = None + except requests.exceptions.ReadTimeout: + #Timeout read from node + probe = None + except requests.exceptions.RequestException: + probe = None + if probe and signer_metrics: + try: + healthz = requests.get(f"http://localhost:{signer_port}/healthz").text + except requests.exceptions.RequestException: + healthz = None + else: + healthz = None + return '''# number of unhealthy signers - should be 0 or 1 +unhealthy_signers_total %s +%s''' % (0 if probe else 1, healthz or "") + +if __name__ == "__main__": + application.run(host = "0.0.0.0", port = 31732, debug = False) diff --git a/charts/tezos-signer-forwarder/templates/_helpers.tpl b/charts/tezos-signer-forwarder/templates/_helpers.tpl new file mode 100644 index 000000000..c3113e365 --- /dev/null +++ b/charts/tezos-signer-forwarder/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "tezos-signer-forwarder.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "tezos-signer-forwarder.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name $.Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" $.Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "tezos-signer-forwarder.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "tezos-signer-forwarder.labels" -}} +helm.sh/chart: {{ include "tezos-signer-forwarder.chart" . }} +{{ include "tezos-signer-forwarder.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "tezos-signer-forwarder.selectorLabels" -}} +app.kubernetes.io/name: {{ include "tezos-signer-forwarder.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "tezos-signer-forwarder.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "tezos-signer-forwarder.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/charts/tezos-signer-forwarder/templates/alertmanagerconfig.yaml b/charts/tezos-signer-forwarder/templates/alertmanagerconfig.yaml new file mode 100644 index 000000000..5a8701837 --- /dev/null +++ b/charts/tezos-signer-forwarder/templates/alertmanagerconfig.yaml @@ -0,0 +1,63 @@ +{{- if .Values.alertmanagerConfig.enabled }} +{{- range .Values.signers }} +{{- if .monitoring_email }} +{{ $signer := . }} +{{- range .endpoints }} +{{- if .alert_when_down }} +apiVersion: monitoring.coreos.com/v1alpha1 +kind: AlertmanagerConfig +metadata: + name: tezos-signer-{{ $signer.name }}-{{ .alias }}-email + labels: +{{- toYaml $.Values.alertmanagerConfig.labels | nindent 4 }} +spec: + route: + groupBy: ['job'] + groupWait: 30s + groupInterval: 5m + repeatInterval: 12h + receiver: 'email_{{ $signer.name }}' + matchers: + - name: service + value: tezos-remote-signer-{{ $signer.name }} + regex: false + - name: alertType + value: tezos-remote-signer-alert + regex: false + - name: tezos_endpoint_name + value: {{ .alias }} + regex: false + continue: false + + receivers: + - name: 'email_{{ $signer.name }}' + emailConfigs: + - to: "{{ $signer.monitoring_email }}" + sendResolved: true + headers: + - key: subject + value: '{{`[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }}`}}' + html: >- + {{`{{ if eq .Status "firing" }} + Attention Required for Tezos Remote Signer: + {{ else }} + Resolved Alert for Tezos Remote Signer: + {{ end }} + {{ range .Alerts -}} + {{ .Annotations.summary }} + {{ end }}`}} + text: >- + {{`{{ if eq .Status "firing" }} + Attention Required for Tezos Remote Signer: + {{ else }} + Resolved Alert for Tezos Remote Signer: + {{ end }} + {{ range .Alerts -}} + {{ .Annotations.summary }} + {{ end }}`}} +--- +{{- end }} +{{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/tezos-signer-forwarder/templates/config.yaml b/charts/tezos-signer-forwarder/templates/config.yaml new file mode 100644 index 000000000..975712d18 --- /dev/null +++ b/charts/tezos-signer-forwarder/templates/config.yaml @@ -0,0 +1,12 @@ +{{- range .Values.signers }} +{{- $name := .name }} +{{- range $i, $endpoint := .endpoints }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: tezos-signer-forwarder-config-{{ $name }}-{{ $i }} +data: + authorized_keys: "{{ $endpoint.ssh_pubkey }} signer" +--- +{{- end }} +{{- end }} diff --git a/charts/tezos-signer-forwarder/templates/prometheusrule.yaml b/charts/tezos-signer-forwarder/templates/prometheusrule.yaml new file mode 100644 index 000000000..f0090d558 --- /dev/null +++ b/charts/tezos-signer-forwarder/templates/prometheusrule.yaml @@ -0,0 +1,51 @@ +{{- if .Values.prometheusRule.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: +{{- toYaml .Values.prometheusRule.labels | nindent 4 }} + name: tezos-remote-signer-rules +spec: + groups: + - name: tezos-remote-signer.rules + rules: + - alert: SignerPowerLoss + annotations: + description: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" has lost power' + summary: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" has lost power' + expr: power{namespace="{{ .Release.Namespace }}"} != 0 + for: 1m + labels: + severity: critical + alertType: tezos-remote-signer-alert + - alert: SignerWiredNetworkLoss + annotations: + description: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" has lost wired internet connection' + summary: 'Tezos remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" has lost wired internet connection' + expr: wired_network{namespace="{{ .Release.Namespace }}"} != 0 + for: 1m + labels: + severity: critical + alertType: tezos-remote-signer-alert +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: +{{- toYaml .Values.prometheusRule.labels | nindent 4 }} + name: tezos-remote-signer-reachability-rules +spec: + groups: + - name: tezos-remote-signer.rules + rules: + - alert: NoRemoteSigner + annotations: + description: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" is down' + summary: 'Remote signer "{{`{{ $labels.tezos_endpoint_name }}`}}" for baker "{{`{{ $labels.tezos_baker_name }}`}}" is down or unable to sign.' + expr: unhealthy_signers_total{namespace="{{ .Release.Namespace }}"} != 0 + for: 1m + labels: + severity: critical + alertType: tezos-remote-signer-alert +--- +{{- end }} diff --git a/charts/tezos-signer-forwarder/templates/secret.yaml b/charts/tezos-signer-forwarder/templates/secret.yaml new file mode 100644 index 000000000..b330b7cdb --- /dev/null +++ b/charts/tezos-signer-forwarder/templates/secret.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: tezos-signer-forwarder-secret-{{ .Values.name }} +data: + ssh_host_ecdsa_key: | +{{ println .Values.secrets.signer_target_host_key | b64enc | indent 4 -}} diff --git a/charts/tezos-signer-forwarder/templates/service.yaml b/charts/tezos-signer-forwarder/templates/service.yaml new file mode 100644 index 000000000..32d5f4363 --- /dev/null +++ b/charts/tezos-signer-forwarder/templates/service.yaml @@ -0,0 +1,50 @@ +apiVersion: v1 +kind: Service +metadata: + name: tezos-remote-signer-ssh-ingress-{{ .Values.name }} + annotations: +{{ toYaml .Values.service_annotations | indent 4 }} +spec: + type: LoadBalancer + selector: + app.kubernetes.io/name: tezos-signer-forwarder + ports: +{{- range .Values.signers }} +{{- $name := .name }} + # undocumented k8s feature to make a service route to different pods + # based on the port - allows to reuse the same public ip in all cloud + # providers. For it to work, ports need to have names. + # https://github.com/kubernetes/kubernetes/issues/24875#issuecomment-794596576 +{{- range $i, $endpoint := .endpoints }} + - port: {{ $endpoint.tunnel_endpoint_port }} + name: ssh-{{ trunc 9 $name }}-{{ $i }} + targetPort: ssh-{{ trunc 9 $name }}-{{ $i }} +{{- end }} +{{- end }} + # ensures that remote signers can always ssh + publishNotReadyAddresses: true +{{ if .Values.load_balancer_ip }} + loadBalancerIP: {{ .Values.load_balancer_ip }} +{{ end }} +--- +{{- range .Values.signers }} +apiVersion: v1 +kind: Service +metadata: + name: tezos-remote-signer-{{ .name }} + labels: + app.kubernetes.io/name: tezos-signer-forwarder + tezos_baker_name: {{ .name }} +spec: + selector: + app.kubernetes.io/name: tezos-signer-forwarder + tezos_baker_name: {{ .name }} + ports: + - port: {{ .signer_port }} + name: signer + - port: 31732 + name: metrics + # make sure that the service always targets the same signer, when HA is in use. + sessionAffinity: ClientIP +--- +{{- end }} diff --git a/charts/tezos-signer-forwarder/templates/servicemonitor.yaml b/charts/tezos-signer-forwarder/templates/servicemonitor.yaml new file mode 100644 index 000000000..759156e09 --- /dev/null +++ b/charts/tezos-signer-forwarder/templates/servicemonitor.yaml @@ -0,0 +1,25 @@ +{{- if .Values.serviceMonitor.enabled }} +{{- range .Values.signers }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/name: tezos-signer-forwarder + name: tezos-remote-signer-monitoring-{{ .name }} +spec: + endpoints: + - port: metrics + path: /metrics + # default scrape timeout of 10 can be too small for remote raspberry pis + scrapeTimeout: "20s" + selector: + matchLabels: + app.kubernetes.io/name: tezos-signer-forwarder + tezos_baker_name: {{ .name }} + targetLabels: + - tezos_baker_name + podTargetLabels: + - tezos_endpoint_name +--- +{{- end }} +{{- end }} diff --git a/charts/tezos-signer-forwarder/templates/statefulset.yaml b/charts/tezos-signer-forwarder/templates/statefulset.yaml new file mode 100644 index 000000000..5b50770c9 --- /dev/null +++ b/charts/tezos-signer-forwarder/templates/statefulset.yaml @@ -0,0 +1,98 @@ +{{- range .Values.signers }} +{{- $name := .name }} +{{- $readiness_probe_path := .readiness_probe_path }} +{{- $signer_port := .signer_port }} +{{- $signer_metrics := .signer_metrics }} +{{- range $i, $endpoint := .endpoints }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: tezos-signer-forwarder-{{ $name}}-{{ $i }} + annotations: + "pulumi.com/skipAwait": "true" +spec: + replicas: 1 + serviceName: tezos-remote-signer-{{ $name }} + selector: + matchLabels: + app.kubernetes.io/name: tezos-signer-forwarder + template: + metadata: + annotations: + # ensure that the pod bounces each time endpoint config changes + # https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments + checksum/config: {{ print "$endpoint" | sha256sum }} + {{- with $.Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app.kubernetes.io/name: tezos-signer-forwarder + tezos_baker_name: {{ $name }} + tezos_endpoint_name: {{ $endpoint.alias }} + spec: + volumes: + - name: config-volume + configMap: + name: tezos-signer-forwarder-config-{{ $name }}-{{ $i }} + defaultMode: 0444 + - name: secret-volume + secret: + secretName: tezos-signer-forwarder-secret-{{ $.Values.name }} + defaultMode: 0400 + readOnly: true + containers: + - name: tezos-signer-forwarder + image: {{ $.Values.tezos_k8s_images.tezos_signer_forwarder }} + imagePullPolicy: IfNotPresent + command: + - /bin/sh + args: + - "-c" + - | +{{ tpl ($.Files.Get (print "scripts/entrypoint.sh")) $ | indent 12 }} + volumeMounts: + - name: config-volume + mountPath: /home/signer/.ssh/authorized_keys + subPath: authorized_keys + - name: secret-volume + mountPath: /etc/ssh/ssh_host_ecdsa_key + subPath: ssh_host_ecdsa_key + env: + - name: TUNNEL_ENDPOINT_PORT + value: "{{ $endpoint.tunnel_endpoint_port }}" + ports: + - name: signer + containerPort: {{ $signer_port }} + protocol: TCP + - name: ssh-{{ trunc 9 $name }}-{{ $i }} + containerPort: {{ $endpoint.tunnel_endpoint_port }} + protocol: TCP + readinessProbe: + httpGet: + path: {{ $readiness_probe_path }} + port: {{ $signer_port }} + timeoutSeconds: 5 + - name: prom-exporter + image: {{ $.Values.tezos_k8s_images.utils }} + ports: + - name: metrics + containerPort: 31732 + protocol: TCP + env: + - name: READINESS_PROBE_PATH + value: {{ $readiness_probe_path | quote }} + - name: SIGNER_PORT + value: {{ $signer_port | quote }} + - name: SIGNER_METRICS + value: {{ $signer_metrics | quote }} + command: + - /usr/local/bin/python + args: + - "-c" + - | +{{ tpl ($.Files.Get (print "scripts/signer_exporter.py")) $ | indent 12 }} + nodeSelector: + {{ toYaml $.Values.node_selector | indent 8 }} +--- +{{- end }} +{{- end }} diff --git a/charts/tezos-signer-forwarder/values.yaml b/charts/tezos-signer-forwarder/values.yaml new file mode 100644 index 000000000..aba7c658d --- /dev/null +++ b/charts/tezos-signer-forwarder/values.yaml @@ -0,0 +1,104 @@ +tezos_k8s_images: + utils: ghcr.io/oxheadalpha/tezos-k8s-utils:master + tezos_signer_forwarder: ghcr.io/oxheadalpha/tezos-k8s-signerforwarder:dev + +# List the endpoints below. +# Each endpoint represents a ssh server. +# To handle several endpoints, you can either: +# * instantiate several replicas of this chart, or +# * list several endpoints below. + +# Since this chart instantiates a service of type Loadbalancer, +# it may be the case that each such service comes with its own +# auto-assigned IP, increasing costs. +# Listing several signers below will put all +# associated pods behind the same LoadBalancer service. +# Consequenty, the same IP will be re-used between signers. +# If you prefer to have one IP per signer, instantiate this chart +# several times. +signers: + # signer name - to disambiguate them +- name: mysigner + + # Signer may have several endpoints: signer replicas signing for the same key. + # This is useful for a highly-available setup. + endpoints: + # the public key that the server is expecting. + # The signer should authenticate with the corresponding secret key. + - ssh_pubkey: "ssh-rsa AAAA...." + + # ssh tunnel connection establishes to this port + tunnel_endpoint_port: 50000 + + # Alias to distinguish the endpoints from one another + alias: "my home signer" + + # Whether to send alerts when down. set to false for cold standbys. + alert_when_down: true + + # Set a readiness probe path for your signer. + # By default, it is the known path implemented by every signer "/authorized_keys" + # When using tezos-remote-signer-os, you can set it to a path that performs more + # checks, such as: + # "/statusz/${PUBLIC_BAKING_KEY_HASH}?ledger_url=${LEDGER_AUTHORIZED_PATH_ENCODED}" + readiness_probe_path: /authorized_keys + + # The signer port that is being tunneled by the remote signer. + # When using HA signer, must be identical for all. + signer_port: 6732 + + # Whether the remote signer exposes prometheus metrics. + # Typically these will be hardware metrics of the signer. + # If true, these metrics will be labeled and scraped into + # the cluster's prometheus. + signer_metrics: false + + # Enter email address to send alerts to. + monitoring_email: "" + +# Name that goes into the service +# e.g tezos-signer-mybaker +# useful when one baker bakes for several addresses +# on different remote signers. +name: mybaker + +# to deploy in a specific node pool, put label here +node_selector: {} + +# LoadBalancer service annotations. On some cloud providers, it can +# be used to assign a static ip address. +service_annotations: {} + +# Load Balancer IP to set the ssh service. +# In some cloud providers, it is used to assign static ip. +load_balancer_ip: "" + +secrets: + # The ssh host key must be passed as input. + # Otherwise, when destroying and respinning the infra, + # the signer would not recognize the host and refuse to + # connect. + signer_target_host_key: | + -----BEGIN OPENSSH PRIVATE KEY----- + xxx + -----END OPENSSH PRIVATE KEY----- + +# Prometheus Operator is required in your cluster in order to enable +# serviceMonitor and prometheusRule below. +# Enable service monitor to scrape the /healthz endpoint of your +# remote signer. +# The /healthz endpoint is exposed by tezos-remote-signer-os: +# https://github.com/midl-dev/tezos-remote-signer-os +serviceMonitor: + enabled: false +# Enable Prometheus Rule to be alerted when your hardware remote signer +# provisioned with tezos-remote-signer-os loses power or wired network +# connectivity. +# For rules to be picked up by the Prometheus Operator, proper labels need +# to be set below. Refer to Prometheus operator documentation for details. +prometheusRule: + enabled: false + labels: {} +alertmanagerConfig: + enabled: false + labels: {} diff --git a/signerForwarder/Dockerfile b/signerForwarder/Dockerfile new file mode 100644 index 000000000..7a873468c --- /dev/null +++ b/signerForwarder/Dockerfile @@ -0,0 +1,25 @@ +FROM alpine:3.18.4 + +# add openssh and clean +RUN apk add --no-cache openssh shadow + +RUN adduser --system signer +# * is the hash of the password. Effectively, password login is disabled. +# but I need to do that otherwise sshd says account is locked. see: +# https://unix.stackexchange.com/a/193131/81131 +RUN usermod -p '*' signer + +#allow forwarding +RUN sed -ri 's/^.*GatewayPorts.*$/GatewayPorts yes/g' /etc/ssh/sshd_config +RUN sed -ri 's/^.*AllowTcpForwarding.*$/AllowTcpForwarding yes/g' /etc/ssh/sshd_config +RUN sed -ri 's/^.*PasswordAuthentication.*$/PasswordAuthentication no/g' /etc/ssh/sshd_config +RUN sed -ri 's/^.*ClientAliveInterval.*$/ClientAliveInterval 10/g' /etc/ssh/sshd_config +RUN sed -ri 's/^.*ClientAliveCountMax.*$/ClientAliveCountMax 2/g' /etc/ssh/sshd_config +RUN printf "AllowUsers signer\n" >> /etc/ssh/sshd_config +RUN cat /etc/ssh/sshd_config + +RUN mkdir /home/signer/.ssh && chown -R signer /home/signer + +# Note that the container must run as root. see +# https://superuser.com/a/1548482 +CMD ["/usr/sbin/sshd", "-D", "-e"]