diff --git a/disperser/apiserver/server.go b/disperser/apiserver/server.go index 017d628eac..6d0b17f5bf 100644 --- a/disperser/apiserver/server.go +++ b/disperser/apiserver/server.go @@ -24,6 +24,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/prometheus/client_golang/prometheus" "google.golang.org/grpc" + "google.golang.org/grpc/codes" "google.golang.org/grpc/reflection" ) @@ -244,7 +245,7 @@ func (s *DispersalServer) disperseBlob(ctx context.Context, blob *core.Blob, aut if err != nil { for _, param := range securityParams { quorumId := string(param.QuorumID) - s.metrics.HandleFailedRequest("400", quorumId, blobSize, "DisperseBlob") + s.metrics.HandleFailedRequest(codes.InvalidArgument.String(), quorumId, blobSize, "DisperseBlob") } return nil, api.NewInvalidArgError(err.Error()) } @@ -255,7 +256,7 @@ func (s *DispersalServer) disperseBlob(ctx context.Context, blob *core.Blob, aut if err != nil { for _, param := range securityParams { quorumId := string(param.QuorumID) - s.metrics.HandleFailedRequest("400", quorumId, blobSize, "DisperseBlob") + s.metrics.HandleFailedRequest(codes.InvalidArgument.String(), quorumId, blobSize, "DisperseBlob") } return nil, api.NewInvalidArgError(err.Error()) } @@ -265,7 +266,7 @@ func (s *DispersalServer) disperseBlob(ctx context.Context, blob *core.Blob, aut if err != nil { rateLimited := errors.Is(err, errSystemBlobRateLimit) || errors.Is(err, errSystemThroughputRateLimit) || errors.Is(err, errAccountBlobRateLimit) || errors.Is(err, errAccountThroughputRateLimit) if !rateLimited { - s.metrics.HandleFailedRequest("500", "", blobSize, "DisperseBlob") + s.metrics.HandleFailedRequest(codes.Internal.String(), "", blobSize, "DisperseBlob") return nil, api.NewInternalError(err.Error()) } return nil, api.NewResourceExhaustedError(err.Error()) @@ -555,14 +556,14 @@ func (s *DispersalServer) RetrieveBlob(ctx context.Context, req *pb.RetrieveBlob if err != nil { s.logger.Error("Failed to retrieve blob metadata", "err", err) // TODO: we need to distinguish NOT_FOUND from actual internal error. - s.metrics.IncrementFailedBlobRequestNum("500", "", "RetrieveBlob") + s.metrics.IncrementFailedBlobRequestNum(codes.Internal.String(), "", "RetrieveBlob") return nil, api.NewInternalError("failed to get blob metadata, please retry") } data, err := s.blobStore.GetBlobContent(ctx, blobMetadata.BlobHash) if err != nil { s.logger.Error("Failed to retrieve blob", "err", err) - s.metrics.HandleFailedRequest("500", "", len(data), "RetrieveBlob") + s.metrics.HandleFailedRequest(codes.Internal.String(), "", len(data), "RetrieveBlob") return nil, api.NewInternalError("failed to get blob data, please retry") } diff --git a/disperser/metrics.go b/disperser/metrics.go index 1202a4647a..5f7324ecb9 100644 --- a/disperser/metrics.go +++ b/disperser/metrics.go @@ -10,6 +10,7 @@ import ( "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" + "google.golang.org/grpc/codes" ) type MetricsConfig struct { @@ -82,7 +83,7 @@ func (g *Metrics) ObserveLatency(method string, latencyMs float64) { // IncrementSuccessfulBlobRequestNum increments the number of successful blob requests func (g *Metrics) IncrementSuccessfulBlobRequestNum(quorum string, method string) { g.NumBlobRequests.With(prometheus.Labels{ - "status_code": "200", + "status_code": codes.OK.String(), "status": "success", "quorum": quorum, "method": method, @@ -122,7 +123,7 @@ func (g *Metrics) HandleFailedRequest(statusCode string, quorum string, blobByte // HandleBlobStoreFailedRequest updates the number of requests failed to store blob and the size of the blob func (g *Metrics) HandleBlobStoreFailedRequest(quorum string, blobBytes int, method string) { g.NumBlobRequests.With(prometheus.Labels{ - "status_code": "500", + "status_code": codes.Internal.String(), "status": StoreBlobFailure, "quorum": quorum, "method": method, @@ -137,7 +138,7 @@ func (g *Metrics) HandleBlobStoreFailedRequest(quorum string, blobBytes int, met // HandleSystemRateLimitedRequest updates the number of system rate limited requests and the size of the blob func (g *Metrics) HandleSystemRateLimitedRequest(quorum string, blobBytes int, method string) { g.NumBlobRequests.With(prometheus.Labels{ - "status_code": "429", + "status_code": codes.ResourceExhausted.String(), "status": SystemRateLimitedFailure, "quorum": quorum, "method": method, @@ -152,7 +153,7 @@ func (g *Metrics) HandleSystemRateLimitedRequest(quorum string, blobBytes int, m // HandleAccountRateLimitedRequest updates the number of account rate limited requests and the size of the blob func (g *Metrics) HandleAccountRateLimitedRequest(quorum string, blobBytes int, method string) { g.NumBlobRequests.With(prometheus.Labels{ - "status_code": "429", + "status_code": codes.ResourceExhausted.String(), "status": AccountRateLimitedFailure, "quorum": quorum, "method": method, diff --git a/operators/churner/metrics.go b/operators/churner/metrics.go index 8bcce907dc..61c9ecfec3 100644 --- a/operators/churner/metrics.go +++ b/operators/churner/metrics.go @@ -10,6 +10,7 @@ import ( "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" + "google.golang.org/grpc/codes" ) type FailReason string @@ -28,14 +29,14 @@ const ( // Note: statusCodeMap must be maintained in sync with failure reason constants. var statusCodeMap map[FailReason]string = map[FailReason]string{ - FailReasonRateLimitExceeded: "429", - FailReasonInsufficientStakeToRegister: "400", - FailReasonInsufficientStakeToChurn: "400", - FailReasonQuorumIdOutOfRange: "400", - FailReasonPrevApprovalNotExpired: "429", - FailReasonInvalidSignature: "400", - FailReasonProcessChurnRequestFailed: "500", - FailReasonInvalidRequest: "400", + FailReasonRateLimitExceeded: codes.ResourceExhausted.String(), + FailReasonInsufficientStakeToRegister: codes.InvalidArgument.String(), + FailReasonInsufficientStakeToChurn: codes.InvalidArgument.String(), + FailReasonQuorumIdOutOfRange: codes.InvalidArgument.String(), + FailReasonPrevApprovalNotExpired: codes.ResourceExhausted.String(), + FailReasonInvalidSignature: codes.InvalidArgument.String(), + FailReasonProcessChurnRequestFailed: codes.Internal.String(), + FailReasonInvalidRequest: codes.InvalidArgument.String(), } type MetricsConfig struct { @@ -105,7 +106,7 @@ func (g *Metrics) IncrementFailedRequestNum(method string, reason FailReason) { g.logger.Error("cannot map failure reason to status code", "failure reason", reason) // Treat this as an internal server error. This is a conservative approach to // handle a negligence of mapping from failure reason to status code. - code = "500" + code = codes.Internal.String() } g.NumRequests.With(prometheus.Labels{ "status": code,