diff --git a/pkg/grpcfactory/server.go b/pkg/grpcfactory/server.go index adad711..3fec42e 100644 --- a/pkg/grpcfactory/server.go +++ b/pkg/grpcfactory/server.go @@ -34,9 +34,13 @@ func (s *COSIProvisionerServer) Run(ctx context.Context, registry prometheus.Reg // Function to extract exemplars from the tracing context. exemplarFromContext := func(ctx context.Context) prometheus.Labels { - if span := trace.SpanContextFromContext(ctx); span.IsSampled() { - return prometheus.Labels{"traceID": span.TraceID().String()} + span := trace.SpanContextFromContext(ctx) + if span.IsValid() && span.IsSampled() { + traceID := span.TraceID().String() + klog.V(5).InfoS("Adding exemplar with traceID", "traceID", traceID) + return prometheus.Labels{"traceID": traceID} } + klog.V(5).Info("Traces are not enabled or traceID is nil, no exemplar added") return nil } @@ -75,33 +79,12 @@ func (s *COSIProvisionerServer) Run(ctx context.Context, registry prometheus.Reg // Create the OpenTelemetry stats handler for instrumentation. otelHandler := otelgrpc.NewServerHandler() + // Add gRPC server options including OpenTelemetry and Prometheus interceptors. // Add gRPC server options including OpenTelemetry and Prometheus interceptors. s.listenOpts = append(s.listenOpts, - grpc.StatsHandler(otelHandler), // Register the stats handler for OpenTelemetry. - grpc.ChainUnaryInterceptor( - srvMetrics.UnaryServerInterceptor(grpcprom.WithExemplarFromContext(exemplarFromContext)), // Add traceID as an exemplar. - func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp any, err error) { - traceID := trace.SpanContextFromContext(ctx).TraceID().String() - klog.V(3).InfoS("Handling gRPC unary request", "method", info.FullMethod, "traceID", traceID) - resp, err = handler(ctx, req) - if err != nil { - klog.ErrorS(err, "Error handling gRPC unary request", "method", info.FullMethod, "traceID", traceID) - } - return resp, err - }, - ), - grpc.ChainStreamInterceptor( - srvMetrics.StreamServerInterceptor(grpcprom.WithExemplarFromContext(exemplarFromContext)), // Add traceID as an exemplar. - func(srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { - traceID := trace.SpanContextFromContext(ss.Context()).TraceID().String() - klog.V(3).InfoS("Handling gRPC stream request", "method", info.FullMethod, "traceID", traceID) - err := handler(srv, ss) - if err != nil { - klog.ErrorS(err, "Error handling gRPC stream request", "method", info.FullMethod, "traceID", traceID) - } - return err - }, - ), + grpc.StatsHandler(otelHandler), // Register the stats handler for OpenTelemetry first. + grpc.ChainUnaryInterceptor(srvMetrics.UnaryServerInterceptor(grpcprom.WithExemplarFromContext(exemplarFromContext))), + grpc.ChainStreamInterceptor(srvMetrics.StreamServerInterceptor(grpcprom.WithExemplarFromContext(exemplarFromContext))), ) // Initialize the gRPC server. diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index ec10aaf..945c44d 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -24,7 +24,7 @@ func InitializeMetrics(prefix string, registry prometheus.Registerer) { Name: "s3_requests_total", Help: "Total number of S3 requests, categorized by action and status.", }, - []string{"action", "status", "trace_id"}, + []string{"action", "status"}, ) S3RequestDuration = prometheus.NewHistogramVec( @@ -34,7 +34,7 @@ func InitializeMetrics(prefix string, registry prometheus.Registerer) { Help: "Duration of S3 requests in seconds, categorized by action and status.", Buckets: prometheus.DefBuckets, }, - []string{"action", "status", "trace_id"}, + []string{"action", "status"}, ) IAMRequestsTotal = prometheus.NewCounterVec( @@ -43,7 +43,7 @@ func InitializeMetrics(prefix string, registry prometheus.Registerer) { Name: "iam_requests_total", Help: "Total number of IAM requests, categorized by action and status.", }, - []string{"action", "status", "trace_id"}, + []string{"action", "status"}, ) IAMRequestDuration = prometheus.NewHistogramVec( @@ -53,7 +53,7 @@ func InitializeMetrics(prefix string, registry prometheus.Registerer) { Help: "Duration of IAM requests in seconds, categorized by action and status.", Buckets: prometheus.DefBuckets, }, - []string{"action", "status", "trace_id"}, + []string{"action", "status"}, ) registry.MustRegister(S3RequestsTotal, S3RequestDuration, IAMRequestsTotal, IAMRequestDuration) diff --git a/pkg/metrics/prometheus_middleware.go b/pkg/metrics/prometheus_middleware.go index 16a4566..457386d 100644 --- a/pkg/metrics/prometheus_middleware.go +++ b/pkg/metrics/prometheus_middleware.go @@ -5,7 +5,6 @@ import ( "github.com/aws/smithy-go/middleware" "github.com/prometheus/client_golang/prometheus" - "go.opentelemetry.io/otel/trace" "k8s.io/klog/v2" ) @@ -24,13 +23,8 @@ func attachPrometheusMiddlewareMetrics(stack *middleware.Stack, requestDuration status = "error" } - traceID := "" - // Add traceID if available - if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() { - traceID = span.SpanContext().TraceID().String() - } - requestDuration.WithLabelValues(operationName, status, traceID).Observe(duration) - requestsTotal.WithLabelValues(operationName, status, traceID).Inc() + requestDuration.WithLabelValues(operationName, status).Observe(duration) + requestsTotal.WithLabelValues(operationName, status).Inc() })) defer timer.ObserveDuration()