From ea3ced22fd6dee297f528071cc1e5b0dcd977de8 Mon Sep 17 00:00:00 2001 From: Ben Jackson Date: Sat, 16 Dec 2023 11:39:19 +1100 Subject: [PATCH] feat: add metrics --- Makefile | 2 +- handlers/idler/metrics.go | 13 ++++++++++++ handlers/idler/service-kubernetes.go | 5 +++-- handlers/idler/service.go | 10 ++++++++++ handlers/metrics/metrics.go | 30 ++++++++++++++++++++++++++++ handlers/unidler/handler.go | 16 +++++++++++---- handlers/unidler/metrics.go | 30 ++++++++++++++++++++++++++++ handlers/unidler/unidler.go | 7 +++---- main.go | 6 ++++++ 9 files changed, 108 insertions(+), 11 deletions(-) create mode 100644 handlers/idler/metrics.go create mode 100644 handlers/metrics/metrics.go diff --git a/Makefile b/Makefile index 524dbf0..502bd07 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,7 @@ ifeq (, $(shell which controller-gen)) CONTROLLER_GEN_TMP_DIR=$$(mktemp -d) ;\ cd $$CONTROLLER_GEN_TMP_DIR ;\ go mod init tmp ;\ - go get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.6.2 ;\ + go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.6.2 ;\ rm -rf $$CONTROLLER_GEN_TMP_DIR ;\ } CONTROLLER_GEN=$(GOBIN)/controller-gen diff --git a/handlers/idler/metrics.go b/handlers/idler/metrics.go new file mode 100644 index 0000000..9b74a17 --- /dev/null +++ b/handlers/idler/metrics.go @@ -0,0 +1,13 @@ +package idler + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + idleEvents = promauto.NewCounter(prometheus.CounterOpts{ + Name: "aergia_idling_events", + Help: "The total number of events that aergia has processed to idle environments", + }) +) diff --git a/handlers/idler/service-kubernetes.go b/handlers/idler/service-kubernetes.go index 0d3753b..2c416bf 100644 --- a/handlers/idler/service-kubernetes.go +++ b/handlers/idler/service-kubernetes.go @@ -267,15 +267,16 @@ func (h *Idler) patchIngress(ctx context.Context, opLog logr.Logger, namespace c } } if patched { - // update the namespace to indicate it is not idled + // update the namespace to indicate it is idled namespaceCopy := namespace.DeepCopy() mergePatch, _ := json.Marshal(map[string]interface{}{ "metadata": map[string]interface{}{ "labels": map[string]string{ - "idling.amazee.io/idled": "false", + "idling.amazee.io/idled": "true", }, }, }) + idleEvents.Inc() if err := h.Client.Patch(ctx, namespaceCopy, client.RawPatch(types.MergePatchType, mergePatch)); err != nil { return fmt.Errorf(fmt.Sprintf("Error patching namespace %s", namespace.Name)) } diff --git a/handlers/idler/service.go b/handlers/idler/service.go index fe7bb14..53d4ef2 100644 --- a/handlers/idler/service.go +++ b/handlers/idler/service.go @@ -6,6 +6,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" client "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -32,6 +33,15 @@ func (h *Idler) ServiceIdler() { // in kubernetes, we can reliably check for the existence of this label so that // we only check namespaces that have been deployed by a lagoon at one point labelRequirements := generateLabelRequirements(h.Selectors.Service.Namespace) + // only evaluate namespaces that are not idled + selector := generateSelector(idlerSelector{ + Name: "idling.amazee.io/idled", + Operator: selection.NotEquals, + Values: []string{ + "true", + }, + }) + labelRequirements = append(labelRequirements, *selector) listOption = (&client.ListOptions{}).ApplyOptions([]client.ListOption{ client.MatchingLabelsSelector{ Selector: labels.NewSelector().Add(labelRequirements...), diff --git a/handlers/metrics/metrics.go b/handlers/metrics/metrics.go new file mode 100644 index 0000000..c98b6e7 --- /dev/null +++ b/handlers/metrics/metrics.go @@ -0,0 +1,30 @@ +package metrics + +import ( + "fmt" + "net/http" + "time" + + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +// NewServer returns a *http.Server serving prometheus metrics in a new +// goroutine. +// Caller should defer Shutdown() for cleanup. +func NewServer(log logr.Logger, addr string) *http.Server { + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.Handler()) + s := http.Server{ + Addr: addr, + Handler: mux, + ReadTimeout: 16 * time.Second, + WriteTimeout: 16 * time.Second, + } + go func() { + if err := s.ListenAndServe(); err != http.ErrServerClosed { + log.Error(fmt.Errorf("metrics server did not shut down cleanly"), err.Error()) + } + }() + return &s +} diff --git a/handlers/unidler/handler.go b/handlers/unidler/handler.go index 749c110..c179910 100644 --- a/handlers/unidler/handler.go +++ b/handlers/unidler/handler.go @@ -92,7 +92,9 @@ func (h *Unidler) ingressHandler(path string) func(http.ResponseWriter, *http.Re if allowUnidle { // if a namespace exists, it means that the custom-http-errors code is defined in the ingress object // so do something with that here, like kickstart the idler process to unidle targets - opLog.Info(fmt.Sprintf("Got request in namespace %s", ns)) + if h.Debug { + opLog.Info(fmt.Sprintf("Request for %s verfied: %t from xff:%s; tcip:%s; ua: %s, ", ns, verfied, xForwardedFor, trueClientIP, requestUserAgent)) + } file := fmt.Sprintf("%v/unidle.html", path) forceScaled := h.checkForceScaled(ctx, ns, opLog) @@ -103,6 +105,7 @@ func (h *Unidler) ingressHandler(path string) func(http.ResponseWriter, *http.Re // only unidle environments that aren't force scaled // actually do the unidling here, lock to prevent multiple unidle operations from running if verfied { + allowedRequests.Inc() w.Header().Set("X-Aergia-Allowed", "true") _, ok := h.Locks.Load(ns) if !ok { @@ -110,7 +113,8 @@ func (h *Unidler) ingressHandler(path string) func(http.ResponseWriter, *http.Re go h.Unidle(ctx, namespace, opLog) } } else { - w.Header().Set("X-Aergia-Denied", "true") + verificationRequired.Inc() + w.Header().Set("X-Aergia-Verification-Required", "true") } } if h.Debug == true { @@ -133,12 +137,15 @@ func (h *Unidler) ingressHandler(path string) func(http.ResponseWriter, *http.Re Verifier: signedNamespace, }) } else { - // respond with 503 to match the standard request + // respond with forbidden w.Header().Set("X-Aergia-Denied", "true") - h.genericError(w, r, opLog, ext, format, path, "", 503) + blockedRequests.Inc() + h.genericError(w, r, opLog, ext, format, path, "", 403) } } else { + w.Header().Set("X-Aergia-Denied", "true") w.Header().Set("X-Aergia-No-Namespace", "true") + noNamespaceRequests.Inc() h.genericError(w, r, opLog, ext, format, path, "", code) } h.setMetrics(r, start) @@ -184,6 +191,7 @@ func (h *Unidler) verifyRequest(r *http.Request, ns *corev1.Namespace) (string, // if hmac verification is enabled, perform the verification of the request signedNamespace := hmacSigner(ns.Name, []byte(h.VerifiedSecret)) verifier := r.URL.Query().Get("verifier") + verificationRequests.Inc() return signedNamespace, hmacVerifier(ns.Name, verifier, []byte(h.VerifiedSecret)) } return "", true diff --git a/handlers/unidler/metrics.go b/handlers/unidler/metrics.go index 3fc88fd..f78b094 100644 --- a/handlers/unidler/metrics.go +++ b/handlers/unidler/metrics.go @@ -5,6 +5,9 @@ import ( "net/http" "strconv" "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" ) func (h *Unidler) setMetrics(r *http.Request, start time.Time) { @@ -16,3 +19,30 @@ func (h *Unidler) setMetrics(r *http.Request, start time.Time) { h.RequestCount.WithLabelValues(proto).Inc() h.RequestDuration.WithLabelValues(proto).Observe(duration) } + +var ( + allowedRequests = promauto.NewCounter(prometheus.CounterOpts{ + Name: "aergia_allowed_requests", + Help: "The total number of requests that aergia has allowed", + }) + verificationRequests = promauto.NewCounter(prometheus.CounterOpts{ + Name: "aergia_verification_requests", + Help: "The total number of verificiation requests that aergia has recieved", + }) + verificationRequired = promauto.NewCounter(prometheus.CounterOpts{ + Name: "aergia_verification_required_requests", + Help: "The total number of verificiation required requests that aergia has received", + }) + blockedRequests = promauto.NewCounter(prometheus.CounterOpts{ + Name: "aergia_blocked_by_block_list", + Help: "The total number of requests that aergia has blocked by an allow or block list rule", + }) + noNamespaceRequests = promauto.NewCounter(prometheus.CounterOpts{ + Name: "aergia_no_namespace", + Help: "The total number of requests that aergia has received where no namespace was found", + }) + unidleEvents = promauto.NewCounter(prometheus.CounterOpts{ + Name: "aergia_unidling_events", + Help: "The total number of events that aergia has processed to unidle an environments", + }) +) diff --git a/handlers/unidler/unidler.go b/handlers/unidler/unidler.go index 214c817..b8d7bae 100644 --- a/handlers/unidler/unidler.go +++ b/handlers/unidler/unidler.go @@ -191,9 +191,7 @@ func (h *Unidler) Unidle(ctx context.Context, namespace *corev1.Namespace, opLog // this could still result in 503 for users until the resulting services/endpoints are active and receiving traffic for _, deploy := range deployments.Items { opLog.Info(fmt.Sprintf("Waiting for %s to be running - %s", deploy.ObjectMeta.Name, namespace.Name)) - timeout, cancel := context.WithTimeout(ctx, defaultPollTimeout) - defer cancel() - wait.PollUntilWithContext(timeout, defaultPollDuration, h.hasRunningPod(ctx, namespace.Name, deploy.Name)) + wait.PollUntilContextTimeout(ctx, defaultPollDuration, defaultPollTimeout, true, h.hasRunningPod(ctx, namespace.Name, deploy.Name)) } // remove the 503 code from any ingress objects that have it in this namespace h.removeCodeFromIngress(ctx, namespace.Name, opLog) @@ -202,10 +200,11 @@ func (h *Unidler) Unidle(ctx context.Context, namespace *corev1.Namespace, opLog mergePatch, _ := json.Marshal(map[string]interface{}{ "metadata": map[string]interface{}{ "labels": map[string]string{ - "idling.amazee.io/idled": "true", + "idling.amazee.io/idled": "false", }, }, }) + unidleEvents.Inc() if err := h.Client.Patch(ctx, namespaceCopy, client.RawPatch(types.MergePatchType, mergePatch)); err != nil { opLog.Info(fmt.Sprintf("Error patching namespace %s", namespace.Name)) } diff --git a/main.go b/main.go index e74b200..7888641 100644 --- a/main.go +++ b/main.go @@ -16,6 +16,7 @@ limitations under the License. package main import ( + "context" "flag" "fmt" "os" @@ -23,6 +24,7 @@ import ( "github.com/amazeeio/aergia-controller/controllers" "github.com/amazeeio/aergia-controller/handlers/idler" + "github.com/amazeeio/aergia-controller/handlers/metrics" "github.com/amazeeio/aergia-controller/handlers/unidler" u "github.com/amazeeio/aergia-controller/handlers/unidler" prometheusapi "github.com/prometheus/client_golang/api" @@ -250,6 +252,10 @@ func main() { os.Exit(1) } + setupLog.Info("starting aergia metrics server") + m := metrics.NewServer(setupLog, ":9912") + defer m.Shutdown(context.Background()) + setupLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager")