Skip to content

Commit

Permalink
Not found error handling + metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
pschork committed Apr 26, 2024
1 parent fdc52a8 commit 2734155
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 11 deletions.
8 changes: 8 additions & 0 deletions disperser/dataapi/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ func (g *Metrics) IncrementFailedRequestNum(method string) {
}).Inc()
}

// IncrementNotFoundRequestNum increments the number of not found requests
func (g *Metrics) IncrementNotFoundRequestNum(method string) {
g.NumRequests.With(prometheus.Labels{
"status": "not found",
"method": method,
}).Inc()
}

// Start starts the metrics server
func (g *Metrics) Start(ctx context.Context) {
g.logger.Info("Starting metrics server at ", "port", g.httpPort)
Expand Down
15 changes: 8 additions & 7 deletions disperser/dataapi/operator_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package dataapi

import (
"context"
"errors"
"net"
"sort"
"time"
Expand Down Expand Up @@ -106,26 +107,26 @@ func checkIsOnlineAndProcessOperator(operatorStatus OperatorOnlineStatus, operat
func (s *server) probeOperatorPorts(ctx context.Context, operatorId string) (*OperatorPortCheckResponse, error) {
operatorInfo, err := s.subgraphClient.QueryOperatorInfoByOperatorId(context.Background(), operatorId)
if err != nil {
s.logger.Error("Failed to fetch operator", "error", err)
return &OperatorPortCheckResponse{}, err
s.logger.Warn("Failed to fetch operator info", "error", err)
return &OperatorPortCheckResponse{}, errors.New("not found")
}

retrieverSocket := core.OperatorSocket(operatorInfo.Socket).GetRetrievalSocket()
retrieverStatus := checkIsOperatorOnline(retrieverSocket)
retrieverOnline := checkIsOperatorOnline(retrieverSocket)

disperserSocket := core.OperatorSocket(operatorInfo.Socket).GetDispersalSocket()
disperserStatus := checkIsOperatorOnline(disperserSocket)
disperserOnline := checkIsOperatorOnline(disperserSocket)

// Log the online status
s.logger.Info("Operator port status", "retrieval", retrieverStatus, "retrieverSocket", retrieverSocket, "disperser", disperserStatus, "disperserSocket", disperserSocket)
s.logger.Info("Operator port status", "retrieverOnline", retrieverOnline, "retrieverSocket", retrieverSocket, "disperserOnline", disperserOnline, "disperserSocket", disperserSocket)

// Create the metadata regardless of online status
portCheckResponse := &OperatorPortCheckResponse{
OperatorId: operatorId,
DisperserSocket: disperserSocket,
RetrieverSocket: retrieverSocket,
DisperserStatus: disperserStatus,
RetrieverStatus: retrieverStatus,
DisperserOnline: disperserOnline,
RetrieverOnline: retrieverOnline,
}

// Send the metadata to the results channel
Expand Down
16 changes: 12 additions & 4 deletions disperser/dataapi/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ type (
OperatorId string `json:"operator_id"`
DisperserSocket string `json:"disperser_socket"`
RetrieverSocket string `json:"retriever_socket"`
DisperserStatus bool `json:"disperser_status"`
RetrieverStatus bool `json:"retriever_status"`
DisperserOnline bool `json:"disperser_online"`
RetrieverOnline bool `json:"retriever_online"`
}
ErrorResponse struct {
Error string `json:"error"`
Expand Down Expand Up @@ -616,10 +616,18 @@ func (s *server) OperatorPortCheck(c *gin.Context) {
}))
defer timer.ObserveDuration()

operatorId := c.DefaultQuery("operatorId", "")
operatorId := c.DefaultQuery("operator_id", "")
s.logger.Info("Checking operator ports", "operatorId", operatorId)
portCheckResponse, err := s.probeOperatorPorts(c.Request.Context(), operatorId)
if err != nil {
s.metrics.IncrementFailedRequestNum("OperatorPortCheck")
if strings.Contains(err.Error(), "not found") {
err = errNotFound
s.logger.Warn("Operator not found", "operatorId", operatorId)
s.metrics.IncrementNotFoundRequestNum("OperatorPortCheck")
} else {
s.logger.Error("Operator port check failed", "error", err)
s.metrics.IncrementFailedRequestNum("OperatorPortCheck")
}
errorResponse(c, err)
return
}
Expand Down

0 comments on commit 2734155

Please sign in to comment.