Skip to content

Commit

Permalink
Merge pull request #202 from orbs-network/feature/improvements
Browse files Browse the repository at this point in the history
Metrics and status improvements
  • Loading branch information
Kirill Maksimov authored Dec 28, 2020
2 parents bd4ad0c + d164a32 commit 148a77b
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v1.11.0
v1.11.1
11 changes: 11 additions & 0 deletions services/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type DiskMetric struct {

type ProcessMetric struct {
Name string
Command string
MemoryUsedMbytes float64
PID int32
ParentPID int32
Expand Down Expand Up @@ -166,12 +167,21 @@ func getProcessMetrics(ctx context.Context) (processMetrics []ProcessMetric, err
} else {
for _, p := range processes {
var name string
var cmdline string
var memoryInfo *process.MemoryInfoStat
var memoryUsed uint64
var parentPID int32

name, _ = p.NameWithContext(ctx)
memoryInfo, _ = p.MemoryInfoWithContext(ctx)
cmdline, _ = p.CmdlineWithContext(ctx)
cmdLineSlice, _ := p.CmdlineSliceWithContext(ctx)

var baseCmdLength float64
if cmdLineSliceLen := len(cmdLineSlice); cmdLineSliceLen >= 1 {
baseCmdLength = float64(len(cmdLineSlice[0]))
}
max50 := int(math.Min(baseCmdLength+50, float64(len(cmdline))))

if memoryInfo != nil {
memoryUsed = memoryInfo.RSS
Expand All @@ -184,6 +194,7 @@ func getProcessMetrics(ctx context.Context) (processMetrics []ProcessMetric, err

processMetric := ProcessMetric{
Name: name,
Command: cmdline[0:max50],
PID: p.Pid,
ParentPID: parentPID,
MemoryUsedMbytes: toMB(memoryUsed),
Expand Down
35 changes: 32 additions & 3 deletions services/report_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package services
import (
"context"
"encoding/json"
"fmt"
"github.com/orbs-network/boyarin/boyar/config"
"github.com/orbs-network/boyarin/strelets/adapter"
"github.com/orbs-network/boyarin/utils"
Expand All @@ -17,6 +18,9 @@ import (
const SERVICE_STATUS_REPORT_PERIOD = 30 * time.Second
const SERVICE_STATUS_REPORT_TIMEOUT = 15 * time.Second

const MAX_CPU_LOAD = 75
const MAX_MEMORY_USED = 75

func WatchAndReportStatusAndMetrics(ctx context.Context, logger log.Logger, flags *config.Flags) govnr.ShutdownWaiter {
errorHandler := utils.NewLogErrors("service status reporter", logger)
startupTimestamp := time.Now()
Expand Down Expand Up @@ -62,29 +66,37 @@ type StatusResponse struct {
Payload map[string]interface{}
}

func statusResponseWithError(flags *config.Flags, err error) StatusResponse {
func statusResponseWithError(flags *config.Flags, dockerInfo interface{}, err error) StatusResponse {
return StatusResponse{
Status: "Failed to query Docker Swarm",
Timestamp: time.Now(),
Error: err.Error(),
Payload: map[string]interface{}{
"Version": version.GetVersion(),
"Docker": dockerInfo,
"Config": flags,
},
}
}

func statusFromMetrics(metrics Metrics) string {
return fmt.Sprintf("RAM = %dmb, CPU = %.2f%%, EFSAccess = %dms",
int(metrics.MemoryUsedMBytes), metrics.CPULoadPercent, metrics.EFSAccessTimeMs)
}

func GetStatusAndMetrics(ctx context.Context, logger log.Logger, flags *config.Flags, startupTimestamp time.Time, dockerStatusPeriod time.Duration) (status StatusResponse, metrics Metrics) {
// We really don't need any options here since we're just observing
orchestrator, err := adapter.NewDockerSwarm(&adapter.OrchestratorOptions{}, logger)
if err != nil {
status = statusResponseWithError(flags, err)
status = statusResponseWithError(flags, nil, err)
} else {
defer orchestrator.Close()

dockerInfo, _ := orchestrator.Info(ctx)

containerStatus, err := orchestrator.GetStatus(ctx, dockerStatusPeriod)
if err != nil {
status = statusResponseWithError(flags, err)
status = statusResponseWithError(flags, dockerInfo, err)
} else {
services := make(map[string][]*adapter.ContainerStatus)
for _, s := range containerStatus {
Expand All @@ -96,6 +108,7 @@ func GetStatusAndMetrics(ctx context.Context, logger log.Logger, flags *config.F
Timestamp: time.Now(),
Payload: map[string]interface{}{
"Version": version.GetVersion(),
"Docker": dockerInfo,
"Services": services,
"Config": flags,
},
Expand All @@ -109,7 +122,23 @@ func GetStatusAndMetrics(ctx context.Context, logger log.Logger, flags *config.F
status.Error = err.Error()
}

if metrics.CPULoadPercent >= 75 {
status.Status = "CPU usage is too high"
status.Error = fmt.Sprintf("CPU usage is higher that %d%% (currently at %f%%)",
MAX_CPU_LOAD, metrics.CPULoadPercent)
}

if metrics.MemoryUsedPercent >= 75 {
status.Status = "Memory usage is too high"
status.Error = fmt.Sprintf("Memory usage is higher that %d%% (currently at %f%%)",
MAX_MEMORY_USED, metrics.MemoryUsedPercent)
}

logger.Info("cpu load", log.Float64("cpuLoad", metrics.CPULoadPercent))
logger.Info("memory load", log.Float64("memoryUsed", metrics.MemoryUsedPercent))

status.Payload["Metrics"] = metrics
status.Status = statusFromMetrics(metrics)

return
}
33 changes: 33 additions & 0 deletions services/report_status_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package services

import (
"context"
"encoding/json"
"fmt"
"github.com/orbs-network/boyarin/boyar/config"
"github.com/orbs-network/boyarin/test/helpers"
"github.com/orbs-network/scribe/log"
"github.com/stretchr/testify/require"
"testing"
"time"
)

func TestDockerVersion(t *testing.T) {
helpers.SkipUnlessSwarmIsEnabled(t)

helpers.WithContext(func(ctx context.Context) {
logger := log.DefaultTestingLogger(t)

status, _ := GetStatusAndMetrics(ctx, logger, &config.Flags{
ConfigUrl: "http://some/fake/url",
}, time.Now(), 5*time.Second)

require.Regexp(t, "RAM.*CPU.*EFSAccess.*", status.Status)

version := status.Payload["Docker"]
require.NotNil(t, version)

raw, _ := json.MarshalIndent(status, "", " ")
fmt.Println(string(raw))
})
}
2 changes: 2 additions & 0 deletions strelets/adapter/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ type Orchestrator interface {
PurgeServiceData(ctx context.Context, containerName string) error
PurgeVirtualChainData(ctx context.Context, nodeAddress string, vcId uint32, containerName string) error

Info(ctx context.Context) (interface{}, error)

io.Closer
}

Expand Down
7 changes: 7 additions & 0 deletions strelets/adapter/orchestrator_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package adapter

import (
"context"
"github.com/docker/docker/api/types"
"github.com/stretchr/testify/mock"
"time"
)
Expand Down Expand Up @@ -59,3 +60,9 @@ func (a *OrchestratorMock) PurgeVirtualChainData(ctx context.Context, nodeAddres
res := a.MethodCalled("PurgeVirtualChainData", ctx, nodeAddress, vcId, containerName)
return res.Error(1)
}

func (a *OrchestratorMock) Info(ctx context.Context) (interface{}, error) {
return types.Info{
ServerVersion: "Mock",
}, nil
}
4 changes: 4 additions & 0 deletions strelets/adapter/swarm.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,7 @@ func (d *dockerSwarmOrchestrator) RemoveService(ctx context.Context, serviceName
func (d *dockerSwarmOrchestrator) Close() error {
return d.client.Close()
}

func (d *dockerSwarmOrchestrator) Info(ctx context.Context) (interface{}, error) {
return d.client.Info(ctx)
}

0 comments on commit 148a77b

Please sign in to comment.