diff --git a/container/common/helpers.go b/container/common/helpers.go index 7248f3b0b8..13abf9e33e 100644 --- a/container/common/helpers.go +++ b/container/common/helpers.go @@ -437,3 +437,20 @@ func (m deviceIdentifierMap) Find(major, minor uint64, namer DeviceNamer) string m[d] = s return s } + +// RemoveNetMetrics is used to remove any network metrics from the given MetricSet. +// It returns the original set as is if remove is false, or if there are no metrics +// to remove. +func RemoveNetMetrics(metrics container.MetricSet, remove bool) container.MetricSet { + if !remove { + return metrics + } + + // Check if there is anything we can remove, to avoid useless copying. + if !metrics.HasAny(container.AllNetworkMetrics) { + return metrics + } + + // A copy of all metrics except for network ones. + return metrics.Difference(container.AllNetworkMetrics) +} diff --git a/container/common/helpers_test.go b/container/common/helpers_test.go index 70fd11be1e..b926d53c79 100644 --- a/container/common/helpers_test.go +++ b/container/common/helpers_test.go @@ -16,13 +16,16 @@ package common import ( "errors" + "fmt" "math" "os" "path/filepath" + "reflect" "testing" "github.com/stretchr/testify/assert" + "github.com/google/cadvisor/container" info "github.com/google/cadvisor/info/v1" v2 "github.com/google/cadvisor/info/v2" ) @@ -193,3 +196,54 @@ func TestGetSpecCgroupV2Max(t *testing.T) { assert.EqualValues(t, spec.Processes.Limit, max) } + +func TestRemoveNetMetrics(t *testing.T) { + for _, ts := range []struct { + desc string + in, out container.MetricSet + }{ + { + desc: "nil set", + in: nil, + }, + { + desc: "empty set", + in: container.MetricSet{}, + }, + { + desc: "nothing to remove", + in: container.MetricSet{container.MemoryUsageMetrics: struct{}{}, container.PerfMetrics: struct{}{}}, + }, + { + desc: "also nothing to remove", + in: container.AllMetrics.Difference(container.AllNetworkMetrics), + }, + { + desc: "remove net from all", + in: container.AllMetrics, + out: container.AllMetrics.Difference(container.AllNetworkMetrics), + }, + { + desc: "remove net from some", + in: container.MetricSet{container.MemoryUsageMetrics: struct{}{}, container.NetworkTcpUsageMetrics: struct{}{}}, + out: container.MetricSet{container.MemoryUsageMetrics: struct{}{}}, + }, + } { + for _, remove := range []bool{true, false} { + ts, remove := ts, remove + desc := fmt.Sprintf("%s, remove: %v", ts.desc, remove) + t.Run(desc, func(t *testing.T) { + out := RemoveNetMetrics(ts.in, remove) + if !remove || ts.out == nil { + // Compare the actual underlying pointers. Can't use assert.Same + // because it checks for pointer type, and these are maps. + if reflect.ValueOf(ts.in) != reflect.ValueOf(out) { + t.Errorf("expected original map %p, got %p", ts.in, out) + } + } else { + assert.Equal(t, ts.out, out) + } + }) + } + } +} diff --git a/container/containerd/handler.go b/container/containerd/handler.go index 848550ec6e..57a2d82c67 100644 --- a/container/containerd/handler.go +++ b/container/containerd/handler.go @@ -126,7 +126,14 @@ func newContainerdContainerHandler( Aliases: []string{id, name}, } - libcontainerHandler := containerlibcontainer.NewHandler(cgroupManager, rootfs, int(taskPid), includedMetrics) + // Containers that don't have their own network -- this includes + // containers running in Kubernetes pods that use the network of the + // infrastructure container -- does not need their stats to be + // reported. This stops metrics being reported multiple times for each + // container in a pod. + metrics := common.RemoveNetMetrics(includedMetrics, cntr.Labels["io.cri-containerd.kind"] != "sandbox") + + libcontainerHandler := containerlibcontainer.NewHandler(cgroupManager, rootfs, int(taskPid), metrics) handler := &containerdContainerHandler{ machineInfoFactory: machineInfoFactory, @@ -134,7 +141,7 @@ func newContainerdContainerHandler( fsInfo: fsInfo, envs: make(map[string]string), labels: cntr.Labels, - includedMetrics: includedMetrics, + includedMetrics: metrics, reference: containerReference, libcontainerHandler: libcontainerHandler, } @@ -164,22 +171,12 @@ func (h *containerdContainerHandler) ContainerReference() (info.ContainerReferen return h.reference, nil } -func (h *containerdContainerHandler) needNet() bool { - // Since containerd does not handle networking ideally we need to return based - // on includedMetrics list. Here the assumption is the presence of cri-containerd - // label - if h.includedMetrics.Has(container.NetworkUsageMetrics) { - //TODO change it to exported cri-containerd constants - return h.labels["io.cri-containerd.kind"] == "sandbox" - } - return false -} - func (h *containerdContainerHandler) GetSpec() (info.ContainerSpec, error) { // TODO: Since we dont collect disk usage stats for containerd, we set hasFilesystem // to false. Revisit when we support disk usage stats for containerd hasFilesystem := false - spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, h.needNet(), hasFilesystem) + hasNet := h.includedMetrics.Has(container.NetworkUsageMetrics) + spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, hasNet, hasFilesystem) spec.Labels = h.labels spec.Envs = h.envs spec.Image = h.image @@ -204,13 +201,6 @@ func (h *containerdContainerHandler) GetStats() (*info.ContainerStats, error) { if err != nil { return stats, err } - // Clean up stats for containers that don't have their own network - this - // includes containers running in Kubernetes pods that use the network of the - // infrastructure container. This stops metrics being reported multiple times - // for each container in a pod. - if !h.needNet() { - stats.Network = info.NetworkStats{} - } // Get filesystem stats. err = h.getFsStats(stats) diff --git a/container/crio/handler.go b/container/crio/handler.go index 0885778565..a68325187e 100644 --- a/container/crio/handler.go +++ b/container/crio/handler.go @@ -148,7 +148,15 @@ func newCrioContainerHandler( Namespace: CrioNamespace, } - libcontainerHandler := containerlibcontainer.NewHandler(cgroupManager, rootFs, cInfo.Pid, includedMetrics) + // Find out if we need network metrics reported for this container. + // Containers that don't have their own network -- this includes + // containers running in Kubernetes pods that use the network of the + // infrastructure container -- does not need their stats to be + // reported. This stops metrics being reported multiple times for each + // container in a pod. + metrics := common.RemoveNetMetrics(includedMetrics, cInfo.Labels["io.kubernetes.container.name"] != "POD") + + libcontainerHandler := containerlibcontainer.NewHandler(cgroupManager, rootFs, cInfo.Pid, metrics) // TODO: extract object mother method handler := &crioContainerHandler{ @@ -161,7 +169,7 @@ func newCrioContainerHandler( rootfsStorageDir: rootfsStorageDir, envs: make(map[string]string), labels: cInfo.Labels, - includedMetrics: includedMetrics, + includedMetrics: metrics, reference: containerReference, libcontainerHandler: libcontainerHandler, cgroupManager: cgroupManager, @@ -210,16 +218,10 @@ func (h *crioContainerHandler) ContainerReference() (info.ContainerReference, er return h.reference, nil } -func (h *crioContainerHandler) needNet() bool { - if h.includedMetrics.Has(container.NetworkUsageMetrics) { - return h.labels["io.kubernetes.container.name"] == "POD" - } - return false -} - func (h *crioContainerHandler) GetSpec() (info.ContainerSpec, error) { hasFilesystem := h.includedMetrics.Has(container.DiskUsageMetrics) - spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, h.needNet(), hasFilesystem) + hasNet := h.includedMetrics.Has(container.NetworkUsageMetrics) + spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, hasNet, hasFilesystem) spec.Labels = h.labels spec.Envs = h.envs @@ -306,13 +308,7 @@ func (h *crioContainerHandler) GetStats() (*info.ContainerStats, error) { return stats, err } - if !h.needNet() { - // Clean up stats for containers that don't have their own network - this - // includes containers running in Kubernetes pods that use the network of the - // infrastructure container. This stops metrics being reported multiple times - // for each container in a pod. - stats.Network = info.NetworkStats{} - } else if len(stats.Network.Interfaces) == 0 { + if h.includedMetrics.Has(container.NetworkUsageMetrics) && len(stats.Network.Interfaces) == 0 { // No network related information indicates that the pid of the // container is not longer valid and we need to ask crio to // provide the pid of another container from that pod diff --git a/container/docker/handler.go b/container/docker/handler.go index bfb8e38165..6ba9fa4e25 100644 --- a/container/docker/handler.go +++ b/container/docker/handler.go @@ -33,7 +33,6 @@ import ( "github.com/google/cadvisor/zfs" "github.com/opencontainers/runc/libcontainer/cgroups" - dockercontainer "github.com/docker/docker/api/types/container" docker "github.com/docker/docker/client" "golang.org/x/net/context" "k8s.io/klog/v2" @@ -72,9 +71,6 @@ type dockerContainerHandler struct { // Image name used for this container. image string - // The network mode of the container - networkMode dockercontainer.NetworkMode - // Filesystem handler. fsHandler common.FsHandler @@ -188,6 +184,9 @@ func newDockerContainerHandler( return nil, fmt.Errorf("failed to inspect container %q: %v", id, err) } + // Do not report network metrics for containers that share netns with another container. + metrics := common.RemoveNetMetrics(includedMetrics, ctnr.HostConfig.NetworkMode.IsContainer()) + // TODO: extract object mother method handler := &dockerContainerHandler{ machineInfoFactory: machineInfoFactory, @@ -198,7 +197,7 @@ func newDockerContainerHandler( rootfsStorageDir: rootfsStorageDir, envs: make(map[string]string), labels: ctnr.Config.Labels, - includedMetrics: includedMetrics, + includedMetrics: metrics, zfsParent: zfsParent, } // Timestamp returned by Docker is in time.RFC3339Nano format. @@ -207,7 +206,7 @@ func newDockerContainerHandler( // This should not happen, report the error just in case return nil, fmt.Errorf("failed to parse the create timestamp %q for container %q: %v", ctnr.Created, id, err) } - handler.libcontainerHandler = containerlibcontainer.NewHandler(cgroupManager, rootFs, ctnr.State.Pid, includedMetrics) + handler.libcontainerHandler = containerlibcontainer.NewHandler(cgroupManager, rootFs, ctnr.State.Pid, metrics) // Add the name and bare ID as aliases of the container. handler.reference = info.ContainerReference{ @@ -217,7 +216,6 @@ func newDockerContainerHandler( Namespace: DockerNamespace, } handler.image = ctnr.Config.Image - handler.networkMode = ctnr.HostConfig.NetworkMode // Only adds restartcount label if it's greater than 0 if ctnr.RestartCount > 0 { handler.labels["restartcount"] = strconv.Itoa(ctnr.RestartCount) @@ -344,16 +342,10 @@ func (h *dockerContainerHandler) ContainerReference() (info.ContainerReference, return h.reference, nil } -func (h *dockerContainerHandler) needNet() bool { - if h.includedMetrics.Has(container.NetworkUsageMetrics) { - return !h.networkMode.IsContainer() - } - return false -} - func (h *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) { hasFilesystem := h.includedMetrics.Has(container.DiskUsageMetrics) - spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, h.needNet(), hasFilesystem) + hasNetwork := h.includedMetrics.Has(container.NetworkUsageMetrics) + spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, hasNetwork, hasFilesystem) spec.Labels = h.labels spec.Envs = h.envs @@ -462,13 +454,6 @@ func (h *dockerContainerHandler) GetStats() (*info.ContainerStats, error) { if err != nil { return stats, err } - // Clean up stats for containers that don't have their own network - this - // includes containers running in Kubernetes pods that use the network of the - // infrastructure container. This stops metrics being reported multiple times - // for each container in a pod. - if !h.needNet() { - stats.Network = info.NetworkStats{} - } // Get filesystem stats. err = h.getFsStats(stats) diff --git a/container/factory.go b/container/factory.go index 226a6fa6e5..92212daa34 100644 --- a/container/factory.go +++ b/container/factory.go @@ -93,6 +93,14 @@ var AllMetrics = MetricSet{ OOMMetrics: struct{}{}, } +// AllNetworkMetrics represents all network metrics that cAdvisor supports. +var AllNetworkMetrics = MetricSet{ + NetworkUsageMetrics: struct{}{}, + NetworkTcpUsageMetrics: struct{}{}, + NetworkAdvancedTcpUsageMetrics: struct{}{}, + NetworkUdpUsageMetrics: struct{}{}, +} + func (mk MetricKind) String() string { return string(mk) } @@ -104,6 +112,15 @@ func (ms MetricSet) Has(mk MetricKind) bool { return exists } +func (ms MetricSet) HasAny(ms1 MetricSet) bool { + for m := range ms1 { + if _, ok := ms[m]; ok { + return true + } + } + return false +} + func (ms MetricSet) add(mk MetricKind) { ms[mk] = struct{}{} }