diff --git a/cmd/ssh-portal-api/serve.go b/cmd/ssh-portal-api/serve.go index feddb18d..aa38f49c 100644 --- a/cmd/ssh-portal-api/serve.go +++ b/cmd/ssh-portal-api/serve.go @@ -13,6 +13,11 @@ import ( "github.com/uselagoon/ssh-portal/internal/metrics" "github.com/uselagoon/ssh-portal/internal/rbac" "github.com/uselagoon/ssh-portal/internal/sshportalapi" + "golang.org/x/sync/errgroup" +) + +const ( + metricsPort = ":9911" ) // ServeCmd represents the serve command. @@ -31,10 +36,6 @@ type ServeCmd struct { // Run the serve command to ssh-portal API requests. func (cmd *ServeCmd) Run(log *slog.Logger) error { - // metrics needs a separate context because deferred Shutdown() will exit - // immediately the context is done, which is the case for ctx on SIGTERM. - m := metrics.NewServer(log, ":9911") - defer m.Shutdown(context.Background()) //nolint:errcheck // get main process context, which cancels on SIGTERM ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM) defer stop() @@ -65,6 +66,14 @@ func (cmd *ServeCmd) Run(log *slog.Logger) error { if err != nil { return fmt.Errorf("couldn't init keycloak client: %v", err) } - // start serving NATS requests - return sshportalapi.ServeNATS(ctx, stop, log, p, l, k, cmd.NATSURL) + // set up goroutine handler + eg, ctx := errgroup.WithContext(ctx) + // start the metrics server + metrics.Serve(ctx, eg, metricsPort) + // start serving SSH token requests + eg.Go(func() error { + // start serving NATS requests + return sshportalapi.ServeNATS(ctx, stop, log, p, l, k, cmd.NATSURL) + }) + return eg.Wait() } diff --git a/cmd/ssh-portal/serve.go b/cmd/ssh-portal/serve.go index 1ef4c827..ae7dd558 100644 --- a/cmd/ssh-portal/serve.go +++ b/cmd/ssh-portal/serve.go @@ -12,6 +12,11 @@ import ( "github.com/uselagoon/ssh-portal/internal/k8s" "github.com/uselagoon/ssh-portal/internal/metrics" "github.com/uselagoon/ssh-portal/internal/sshserver" + "golang.org/x/sync/errgroup" +) + +const ( + metricsPort = ":9912" ) // ServeCmd represents the serve command. @@ -26,10 +31,6 @@ type ServeCmd struct { // Run the serve command to handle SSH connection requests. func (cmd *ServeCmd) Run(log *slog.Logger) error { - // metrics needs a separate context because deferred Shutdown() will exit - // immediately the context is done, which is the case for ctx on SIGTERM. - m := metrics.NewServer(log, ":9912") - defer m.Shutdown(context.Background()) //nolint:errcheck // get main process context, which cancels on SIGTERM ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM) defer stop() @@ -60,6 +61,7 @@ func (cmd *ServeCmd) Run(log *slog.Logger) error { if err != nil { return fmt.Errorf("couldn't listen on port %d: %v", cmd.SSHServerPort, err) } + defer l.Close() // get kubernetes client c, err := k8s.NewClient() if err != nil { @@ -72,6 +74,14 @@ func (cmd *ServeCmd) Run(log *slog.Logger) error { hostkeys = append(hostkeys, []byte(hk)) } } - // start serving SSH connection requests - return sshserver.Serve(ctx, log, nc, l, c, hostkeys, cmd.LogAccessEnabled) + // set up goroutine handler + eg, ctx := errgroup.WithContext(ctx) + // start the metrics server + metrics.Serve(ctx, eg, metricsPort) + // start serving SSH token requests + eg.Go(func() error { + // start serving SSH connection requests + return sshserver.Serve(ctx, log, nc, l, c, hostkeys, cmd.LogAccessEnabled) + }) + return eg.Wait() } diff --git a/cmd/ssh-token/serve.go b/cmd/ssh-token/serve.go index 8bb29799..2e88225f 100644 --- a/cmd/ssh-token/serve.go +++ b/cmd/ssh-token/serve.go @@ -14,6 +14,11 @@ import ( "github.com/uselagoon/ssh-portal/internal/metrics" "github.com/uselagoon/ssh-portal/internal/rbac" "github.com/uselagoon/ssh-portal/internal/sshtoken" + "golang.org/x/sync/errgroup" +) + +const ( + metricsPort = ":9948" ) // ServeCmd represents the serve command. @@ -37,10 +42,6 @@ type ServeCmd struct { // Run the serve command to ssh-portal API requests. func (cmd *ServeCmd) Run(log *slog.Logger) error { - // metrics needs a separate context because deferred Shutdown() will exit - // immediately the context is done, which is the case for ctx on SIGTERM. - m := metrics.NewServer(log, ":9948") - defer m.Shutdown(context.Background()) //nolint:errcheck // get main process context, which cancels on SIGTERM ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM) defer stop() @@ -85,6 +86,7 @@ func (cmd *ServeCmd) Run(log *slog.Logger) error { if err != nil { return fmt.Errorf("couldn't listen on port %d: %v", cmd.SSHServerPort, err) } + defer l.Close() // check for persistent host key arguments var hostkeys [][]byte for _, hk := range []string{cmd.HostKeyECDSA, cmd.HostKeyED25519, @@ -93,7 +95,14 @@ func (cmd *ServeCmd) Run(log *slog.Logger) error { hostkeys = append(hostkeys, []byte(hk)) } } + // set up goroutine handler + eg, ctx := errgroup.WithContext(ctx) + // start the metrics server + metrics.Serve(ctx, eg, metricsPort) // start serving SSH token requests - return sshtoken.Serve(ctx, log, l, p, ldb, keycloakToken, keycloakPermission, - hostkeys) + eg.Go(func() error { + return sshtoken.Serve(ctx, log, l, p, ldb, keycloakToken, keycloakPermission, + hostkeys) + }) + return eg.Wait() } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 1bb27b52..4082db3f 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -2,29 +2,46 @@ package metrics import ( - "log/slog" + "context" + "fmt" "net/http" "time" "github.com/prometheus/client_golang/prometheus/promhttp" + "golang.org/x/sync/errgroup" ) -// NewServer returns a *http.Server serving prometheus metrics in a new -// goroutine. -// Caller should defer Shutdown() for cleanup. -func NewServer(log *slog.Logger, addr string) *http.Server { +const ( + metricsReadTimeout = 2 * time.Second + metricsShutdownTimeout = 2 * time.Second +) + +// Serve runs a prometheus metrics server in goroutines managed by eg. It will +// gracefully exit with a two second timeout. +// Callers should Wait() on eg before exiting. +func Serve(ctx context.Context, eg *errgroup.Group, metricsPort string) { + // configure metrics server mux := http.NewServeMux() mux.Handle("/metrics", promhttp.Handler()) - s := http.Server{ - Addr: addr, + metricsSrv := http.Server{ + Addr: metricsPort, + ReadTimeout: metricsReadTimeout, + WriteTimeout: metricsReadTimeout, Handler: mux, - ReadTimeout: 16 * time.Second, - WriteTimeout: 16 * time.Second, } - go func() { - if err := s.ListenAndServe(); err != http.ErrServerClosed { - log.Error("metrics server did not shut down cleanly", slog.Any("error", err)) + // start metrics server + eg.Go(func() error { + if err := metricsSrv.ListenAndServe(); err != http.ErrServerClosed { + return fmt.Errorf("metrics server exited with error: %v", err) } - }() - return &s + return nil + }) + // start metrics server shutdown handler for graceful shutdown + eg.Go(func() error { + <-ctx.Done() + timeoutCtx, cancel := + context.WithTimeout(context.Background(), metricsShutdownTimeout) + defer cancel() + return metricsSrv.Shutdown(timeoutCtx) + }) } diff --git a/internal/sshtoken/authhandler.go b/internal/sshtoken/authhandler.go index a7258d28..c32ec40c 100644 --- a/internal/sshtoken/authhandler.go +++ b/internal/sshtoken/authhandler.go @@ -59,7 +59,7 @@ func pubKeyAuth(log *slog.Logger, ldb LagoonDBService) ssh.PublicKeyHandler { authnSuccessTotal.Inc() ctx.SetValue(userUUID, user.UUID) log.Info("authentication successful", - slog.String("userID", user.UUID.String())) + slog.String("userUUID", user.UUID.String())) return true } }