Skip to content

Commit

Permalink
<review commit> remove traceID from labels due to cardinality
Browse files Browse the repository at this point in the history
  • Loading branch information
anurag4DSB committed Jan 8, 2025
1 parent 438bf10 commit fc7a2f8
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 39 deletions.
37 changes: 10 additions & 27 deletions pkg/grpcfactory/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,13 @@ func (s *COSIProvisionerServer) Run(ctx context.Context, registry prometheus.Reg

// Function to extract exemplars from the tracing context.
exemplarFromContext := func(ctx context.Context) prometheus.Labels {
if span := trace.SpanContextFromContext(ctx); span.IsSampled() {
return prometheus.Labels{"traceID": span.TraceID().String()}
span := trace.SpanContextFromContext(ctx)
if span.IsValid() && span.IsSampled() {
traceID := span.TraceID().String()
klog.V(5).InfoS("Adding exemplar with traceID", "traceID", traceID)
return prometheus.Labels{"traceID": traceID}
}
klog.V(5).Info("Traces are not enabled or traceID is nil, no exemplar added")
return nil
}

Expand Down Expand Up @@ -75,33 +79,12 @@ func (s *COSIProvisionerServer) Run(ctx context.Context, registry prometheus.Reg
// Create the OpenTelemetry stats handler for instrumentation.
otelHandler := otelgrpc.NewServerHandler()

// Add gRPC server options including OpenTelemetry and Prometheus interceptors.
// Add gRPC server options including OpenTelemetry and Prometheus interceptors.
s.listenOpts = append(s.listenOpts,
grpc.StatsHandler(otelHandler), // Register the stats handler for OpenTelemetry.
grpc.ChainUnaryInterceptor(
srvMetrics.UnaryServerInterceptor(grpcprom.WithExemplarFromContext(exemplarFromContext)), // Add traceID as an exemplar.
func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp any, err error) {
traceID := trace.SpanContextFromContext(ctx).TraceID().String()
klog.V(3).InfoS("Handling gRPC unary request", "method", info.FullMethod, "traceID", traceID)
resp, err = handler(ctx, req)
if err != nil {
klog.ErrorS(err, "Error handling gRPC unary request", "method", info.FullMethod, "traceID", traceID)
}
return resp, err
},
),
grpc.ChainStreamInterceptor(
srvMetrics.StreamServerInterceptor(grpcprom.WithExemplarFromContext(exemplarFromContext)), // Add traceID as an exemplar.
func(srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
traceID := trace.SpanContextFromContext(ss.Context()).TraceID().String()
klog.V(3).InfoS("Handling gRPC stream request", "method", info.FullMethod, "traceID", traceID)
err := handler(srv, ss)
if err != nil {
klog.ErrorS(err, "Error handling gRPC stream request", "method", info.FullMethod, "traceID", traceID)
}
return err
},
),
grpc.StatsHandler(otelHandler), // Register the stats handler for OpenTelemetry first.
grpc.ChainUnaryInterceptor(srvMetrics.UnaryServerInterceptor(grpcprom.WithExemplarFromContext(exemplarFromContext))),
grpc.ChainStreamInterceptor(srvMetrics.StreamServerInterceptor(grpcprom.WithExemplarFromContext(exemplarFromContext))),
)

// Initialize the gRPC server.
Expand Down
8 changes: 4 additions & 4 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func InitializeMetrics(prefix string, registry prometheus.Registerer) {
Name: "s3_requests_total",
Help: "Total number of S3 requests, categorized by action and status.",
},
[]string{"action", "status", "trace_id"},
[]string{"action", "status"},
)

S3RequestDuration = prometheus.NewHistogramVec(
Expand All @@ -34,7 +34,7 @@ func InitializeMetrics(prefix string, registry prometheus.Registerer) {
Help: "Duration of S3 requests in seconds, categorized by action and status.",
Buckets: prometheus.DefBuckets,
},
[]string{"action", "status", "trace_id"},
[]string{"action", "status"},
)

IAMRequestsTotal = prometheus.NewCounterVec(
Expand All @@ -43,7 +43,7 @@ func InitializeMetrics(prefix string, registry prometheus.Registerer) {
Name: "iam_requests_total",
Help: "Total number of IAM requests, categorized by action and status.",
},
[]string{"action", "status", "trace_id"},
[]string{"action", "status"},
)

IAMRequestDuration = prometheus.NewHistogramVec(
Expand All @@ -53,7 +53,7 @@ func InitializeMetrics(prefix string, registry prometheus.Registerer) {
Help: "Duration of IAM requests in seconds, categorized by action and status.",
Buckets: prometheus.DefBuckets,
},
[]string{"action", "status", "trace_id"},
[]string{"action", "status"},
)

registry.MustRegister(S3RequestsTotal, S3RequestDuration, IAMRequestsTotal, IAMRequestDuration)
Expand Down
10 changes: 2 additions & 8 deletions pkg/metrics/prometheus_middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (

"github.com/aws/smithy-go/middleware"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/trace"
"k8s.io/klog/v2"
)

Expand All @@ -24,13 +23,8 @@ func attachPrometheusMiddlewareMetrics(stack *middleware.Stack, requestDuration
status = "error"
}

traceID := ""
// Add traceID if available
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
traceID = span.SpanContext().TraceID().String()
}
requestDuration.WithLabelValues(operationName, status, traceID).Observe(duration)
requestsTotal.WithLabelValues(operationName, status, traceID).Inc()
requestDuration.WithLabelValues(operationName, status).Observe(duration)
requestsTotal.WithLabelValues(operationName, status).Inc()
}))
defer timer.ObserveDuration()

Expand Down

0 comments on commit fc7a2f8

Please sign in to comment.