diff --git a/.gitignore b/.gitignore
index 0617875..1714718 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
/systemd_exporter
/bin/golangci-lint
+.idea
+coverage.txt
diff --git a/.golangci.yml b/.golangci.yml
new file mode 100644
index 0000000..686f7cc
--- /dev/null
+++ b/.golangci.yml
@@ -0,0 +1,5 @@
+issues:
+ exclude:
+ - "not declared by package utf8"
+ - "unicode/utf8/utf8.go"
+
diff --git a/.travis.yml b/.travis.yml
index 1ca29c5..9011808 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,36 @@
-language: go
+# TODO Use CodeCov 'Flags' to separate coverage for integration tests and unit
+# tests. Near 100% on integration is expected - these tests cast a wide net to
+# catch many issues (but do not easily tell you where the issue is). Near 100%
+# on unit tests is a feat of heroism - these tests identify a specific code
+# chunk with an issue. We mainly care about 100% on unit tests, but 100% on
+# integration is an easy win and a nice to have
+#
+# See https://docs.codecov.io/docs/flags
+# Use go get github.com/stristr/go-acc && go-acc ./...
+# Or use coverpkg=github.com/povilasv/systemd_exporter,github.com/povilasv/systemd_exporter/systemd
+# This defines the script for us automatically. By default it installs
+# to requested go version then runs make
+language: go
go:
- "1.x"
+
+before_script: systemd --version
+os: linux
+
+go:
+ - 1.x
+
+before_script: systemd --version && systemctl list-units
+
+after_success:
+ - bash <(curl -s https://codecov.io/bash)
+
+jobs:
+ include:
+ - dist: xenial
+ name: xenial-229
+ - dist: bionic
+ name: bionic-237
+
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b34087b..60ea02d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,20 +2,18 @@
### **Breaking changes**
-* `systemd_unit_state` label `type` has new meaning
- Now shows Unit type (`service`, `scope`, etc), not Service Unit types (`simple`, `forking`, etc)
- or mount unit types(`aufs`,`ext3`, etc). Service and mount types have been moved to `systemd_unit_info`
+* `systemd_unit_state` label `type` has new meaning. Previously `type` contained service unit type (`simple`, `forking`, etc) or mount unit types (`aufs`, `ext3`, etc). Now `systemd_unit_state{type}` contains overall unit type (`service`, `scope`, etc) to allow easy PromQL group by clauses. Service and mount types have been moved to `systemd_unit_info`
### Changes
- [FEATURE] Read unit CPU usage from cgroup. Added `systemd_unit_cpu_seconds_total` metric. **Note** - Untested on unified hierarchy
- [FEATURE] Add `systemd_unit_info` with metainformation about units incl. subtype specific info
- [ENHANCEMENT] Added `type` label to all metrics named `systemd_unit-*` to support PromQL grouping
-* [ENHANCEMENT] `systemd_unit_state` works for all unit types, not just service and mount units
-* [ENHANCEMENT] Scrapes are approx 80% faster. If needed, set GOMAXPROCS to limit max concurrency
-* [CHANGE] Start tracking metric cardinality in readme
-* [CHANGE] Expanded default set of unit types monitored. Only device unit types are not enabled by default
-* [BUGFIX] `timer_last_trigger_seconds` metric is now exported as expected for all timers
+- [ENHANCEMENT] `systemd_unit_state` works for all unit types, not just service and mount units
+- [ENHANCEMENT] Scrapes are approx 80% faster. If needed, set GOMAXPROCS to limit max concurrency
+- [CHANGE] Start tracking metric cardinality in readme
+- [CHANGE] Expanded default set of unit types monitored. Only device unit types are not enabled by default
+- [BUGFIX] `timer_last_trigger_seconds` metric is now exported as expected for all timers
## 0.2.0 / 2019-03-20
diff --git a/Makefile b/Makefile
index 5c07f3a..c13eed5 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BRANCH := $(shell git branch | grep \* | cut -d ' ' -f2)
LINT_FLAGS := run --deadline=120s
LINTER := ./bin/golangci-lint
-TESTFLAGS := -v -cover
+TEST_FLAGS := -v -cover -race -coverprofile=coverage.txt -covermode=atomic
GO111MODULE := on
all: $(LINTER) deps test lint build
@@ -23,7 +23,12 @@ deps:
.PHONY: test
test:
- go test $(TESTFLAGS) ./...
+ifdef TRAVIS
+ sudo sh -c 'echo DefaultCPUAccounting=yes >> /etc/systemd/system.conf'
+ sudo sh -c 'echo DefaultMemoryAccounting=yes >> /etc/systemd/system.conf'
+ sudo systemctl daemon-reload
+endif
+ go test $(TEST_FLAGS) ./...
.PHONY: build
build: deps
diff --git a/README.md b/README.md
index 9d52ad1..e336437 100644
--- a/README.md
+++ b/README.md
@@ -79,15 +79,15 @@ Note that a number of unit types are filtered by default
| ----------------------------------------- | ----------- | -------- | ------------------------------------------------------------------ |
| systemd_exporter_build_info | Gauge | UNSTABLE | 1 per systemd-exporter |
| systemd_unit_info | Gauge | UNSTABLE | 1 per service + 1 per mount |
-| systemd_unit_cpu_seconds_total | Gauge | UNSTABLE | 2 per mount/scope/slice/socket/swap {mode="system/user"} |
+| systemd_unit_cpu_seconds_total | Counter | UNSTABLE | 12 per mount/scope/slice/socket/swap {mode="system/user"}|
| systemd_unit_state | Gauge | UNSTABLE | 5 per unit {state="activating/active/deactivating/failed/inactive} |
| systemd_unit_tasks_current | Gauge | UNSTABLE | 1 per service |
| systemd_unit_tasks_max | Gauge | UNSTABLE | 1 per service |
| systemd_unit_start_time_seconds | Gauge | UNSTABLE | 1 per service |
-| systemd_service_restart_total | Gauge | UNSTABLE | 1 per service |
+| systemd_service_restart_total | Counter | UNSTABLE | 1 per service |
| systemd_socket_accepted_connections_total | Counter | UNSTABLE | 1 per socket |
| systemd_socket_current_connections | Gauge | UNSTABLE | 1 per socket |
-| systemd_socket_refused_connections_total | Gauge | UNSTABLE | 1 per socket |
+| systemd_socket_refused_connections_total | Counter | UNSTABLE | 1 per socket. Requires systemd>239 |
| systemd_timer_last_trigger_seconds | Gauge | UNSTABLE | 1 per timer |
| systemd_process_resident_memory_bytes | Gauge | UNSTABLE | 1 per service |
| systemd_process_virtual_memory_bytes | Gauge | UNSTABLE | 1 per service |
@@ -95,3 +95,5 @@ Note that a number of unit types are filtered by default
| systemd_process_open_fds | Gauge | UNSTABLE | 1 per service |
| systemd_process_max_fds | Gauge | UNSTABLE | 1 per service |
| systemd_process_cpu_seconds_total | Counter | UNSTABLE | 1 per service |
+
+1Only present for units which have systemd `CPUAccounting` enabled
diff --git a/cgroup/Readme.md b/cgroup/Readme.md
new file mode 100644
index 0000000..01dfd92
--- /dev/null
+++ b/cgroup/Readme.md
@@ -0,0 +1,49 @@
+
+This package provides functions to retrieve control group metrics from the pseudo-filesystem `/sys/cgroup/`.
+
+**WARNING:** This package is a work in progress. Its API may still break in backwards-incompatible ways without warnings. Use it at your own risk.
+
+The Linux kernel supports two APIs for userspace to interact with control groups, the v1 API and the v2 API. See
+[this LWN Article](https://lwn.net/Articles/679786/) or
+[this kernel documentation](https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#deprecated-v1-core-features)
+for background on the two APIs. This package will interact with both v1 and v2 APIs.
+
+
+### Focus on Systemd
+
+This package is initially focused on reading metrics for systemd units. Therefore,
+the following systemd documentation is relevant.
+
+#### Systemd cgroup mount mode
+
+The kernel can mount the cgroupfs in any manner it chooses. However, anyone wanting to use that cgroupfs must know
+where/how it is mounted. When there was only one cgroup API, it was always mounted at `/sys/fs/cgroup`. With the
+transition from v1 to v2, the mounting approach differs per-distro, with some mounting only v2, some mounting only
+v1(all hierarchies), and some mounting a combination. For simplicity, this package initially focuses on the three
+mount "modes" supported by systemd:
+
+via [systemd.io](https://systemd.io/CGROUP_DELEGATION/#three-different-tree-setups-)
+
+1. Unified β this is the simplest mode, and exposes a pure cgroup v2 logic
+2. Legacy β this is the traditional cgroup v1 mode. In this mode the various controllers each get their own cgroup
+ file system mounted to `/sys/fs/cgroup//`
+3. Hybrid β this is a hybrid between the unified and legacy mode. Itβs set up mostly like legacy
+
+#### Systemd Supported Controllers
+
+The initial target controllers this package aims to read from are the controllers supported by systemd. Reading from
+other controllers may be supported in the future. Systemd guarantees that all v1 hierarchies are kept in sync.
+
+Via [systemd.io](https://systemd.io/CGROUP_DELEGATION/#controller-support):
+
+Systemd supports a number of controllers (but not all). Specifically, supported are:
+
+on cgroup v1: cpu, cpuacct, blkio, memory, devices, pids
+on cgroup v2: cpu, io, memory, pids
+
+It is our intention to natively support all cgroup v2 controllers as they are added
+to the kernel. However, regarding cgroup v1: at this point we will not add support
+for any other controllers anymore. This means systemd currently does not and will
+never manage the following controllers on cgroup v1: freezer, cpuset, net_cls,
+perf_event, net_prio, hugetlb
+
diff --git a/cgroup/cgroup.go b/cgroup/cgroup.go
new file mode 100644
index 0000000..31945b9
--- /dev/null
+++ b/cgroup/cgroup.go
@@ -0,0 +1,175 @@
+package cgroup
+
+import (
+ "fmt"
+ "github.com/pkg/errors"
+ "github.com/prometheus/common/log"
+ "golang.org/x/sys/unix"
+ "os"
+ "path/filepath"
+)
+
+// FS is the pseudo-filesystem cgroupfs, which provides an interface to
+// kernel data structures
+type FS struct {
+ mountPoint string
+
+ // WARNING: We only read this data once at process start, systemd updates
+ // may require restarting systemd-exporter
+ cgroupUnified MountMode
+}
+
+// DefaultMountPoint is the common mount point of the cgroupfs filesystem
+const DefaultMountPoint = "/sys/fs/cgroup"
+
+// NewDefaultFS returns a new cgroup FS mounted under the default mountPoint.
+// It will error if cgroup hierarchies are not laid out in a manner understood
+// by systemd.
+func NewDefaultFS() (FS, error) {
+
+ mode, err := cgUnifiedCached()
+ if err != nil || mode == MountModeUnknown {
+ return FS{}, fmt.Errorf("could not determine cgroupfs mount mode: %s", err)
+ }
+
+ return NewFS(DefaultMountPoint, mode)
+}
+
+// NewFS returns a new cgroup FS mounted under the given mountPoint. It does not check
+// the provided mount mode
+func NewFS(mountPoint string, mountMode MountMode) (FS, error) {
+ info, err := os.Stat(mountPoint)
+ if err != nil {
+ return FS{}, fmt.Errorf("could not read %s: %s", mountPoint, err)
+ }
+ if !info.IsDir() {
+ return FS{}, fmt.Errorf("mount point %s is not a directory", mountPoint)
+ }
+ return FS{mountPoint, mountMode}, nil
+}
+
+// path appends the given path elements to the filesystem path, adding separators
+// as necessary.
+func (fs FS) path(p ...string) string {
+ return filepath.Join(append([]string{string(fs.mountPoint)}, p...)...)
+}
+
+// MountMode constants describe how the kernel has mounted various cgroup filesystems under /sys/fs/cgroup.
+// Generally speaking, kernels using the cgroups-v1 API will have many cgroup controller hierarchies, each with
+// their own fs and their own mount point. Kernels using cgroups-v2 API will only have the one unified hierarchy.
+// To support back compatibility, kernels often mount both the v1 and v2 hierarchies at different points. Systemd
+// has to know where the hierarchies are, so it inspects the mounts under /sys/fs/cgroup and decides what
+// MountMode this kernel is using. See each constant for a description of that mode. This type corresponds to
+// the unified_cache variable in systemd/src/basic/cgroup-util.c
+type MountMode int8
+
+const (
+ // MountModeUnknown indicates we do not recognize the mount pattern of the cgroup filesystems in /sys/fs/cgroup.
+ // systemd source calls this mode CGROUP_UNIFIED_UNKNOWN
+ MountModeUnknown MountMode = iota
+ // MountModeLegacy indicates both systemd and individual cgroups are using cgroup-v1 hierarchies. There is
+ // typically one mount point per hierarchy, and no usage of the cgroup-v2 unified hierarchy.
+ // systemd source calls this mode CGROUP_UNIFIED_NONE
+ MountModeLegacy MountMode = iota
+ // MountModeHybrid indicates the systemd controller is using cgroup-v2 unified hierarchy for organizing
+ // processes, but all other cgroups are using cgroup-v1 legacy hierarchies.
+ // systemd source calls this CGROUP_UNIFIED_SYSTEMD and also stores the unified_systemd_v232 flag
+ MountModeHybrid MountMode = iota
+ // MountModeUnified indicates cgroup-v2 API is in full usage and there are no cgroup-v1 hierarchies mounted.
+ // Non-updated programs (e.g. container orchestrators such as docker/runc) that rely on cgroup-v1 mounts will break.
+ // systemd source calls this CGROUP_UNIFIED_ALL
+ MountModeUnified MountMode = iota
+)
+func (c MountMode) String() string {
+ return [...]string{"unknown", "none", "systemd", "all"}[c]
+}
+
+
+// Values copied from https://github.com/torvalds/linux/blob/master/include/uapi/linux/magic.h
+const (
+ tmpFsMagic = 0x01021994
+ cgroupSuperMagic = 0x27e0eb
+ cgroup2SuperMagic = 0x63677270
+)
+
+// cgUnifiedCached checks the filesystem types mounted under /sys/fs/cgroup to determine
+// which systemd layout (legacy/hybrid/unified) is in use.
+// We do not bother to track unified_systemd_v232 as our usage does not
+// depend on reading the systemd hierarchy directly, we only focus on reading
+// the controllers. If you care if /sys/fs/cgroup/systemd is v1 or v2 you need
+// to track this
+// WARNING: We cache this data once at process start. Systemd updates
+// may require restarting systemd-exporter
+// Equivalent to systemd cgroup-util.c#cg_unified_cached
+var statfsFunc = unix.Statfs
+func cgUnifiedCached() (MountMode, error) {
+ // if cgroupUnified != MountModeUnknown {
+ // return cgroupUnified, nil
+ // }
+
+ var fs unix.Statfs_t
+ err := statfsFunc("/sys/fs/cgroup/", &fs)
+ if err != nil {
+ return MountModeUnknown, errors.Wrapf(err, "failed statfs(/sys/fs/cgroup)")
+ }
+
+ switch fs.Type {
+ case cgroup2SuperMagic:
+ log.Debugf("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy")
+ return MountModeUnified, nil
+ case tmpFsMagic:
+ err := statfsFunc("/sys/fs/cgroup/unified/", &fs)
+
+ // Ignore err, we expect path to be missing on v232
+ if err == nil && fs.Type == cgroup2SuperMagic {
+ log.Debugf("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller")
+ return MountModeHybrid, nil
+ }
+
+ err = statfsFunc("/sys/fs/cgroup/systemd/", &fs)
+ if err != nil {
+ return MountModeUnknown, errors.Wrapf(err, "failed statfs(/sys/fs/cgroup/systemd)")
+ }
+
+ switch fs.Type {
+ case cgroup2SuperMagic:
+ log.Debugf("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)")
+ return MountModeHybrid, nil
+ case cgroupSuperMagic:
+ log.Debugf("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy")
+ return MountModeLegacy, nil
+ default:
+ return MountModeUnknown, errors.Errorf("unknown magic number %x for fstype returned by statfs(/sys/fs/cgroup/systemd)", fs.Type)
+ }
+
+ default:
+ return MountModeUnknown, errors.Errorf("unknown magic number %x for fstype returned by statfs(/sys/fs/cgroup)", fs.Type)
+ }
+}
+
+// cgGetPath returns the absolute path for a specific file in a specific controller
+// in the specific cgroup denoted by the passed subpath.
+// Input examples: ("cpu", "/system.slice", "cpuacct.usage_all")
+func (fs FS) cgGetPath(controller string, subpath string, suffix string) (string, error) {
+ // relevant systemd source code in cgroup-util.[h|c] specifically cg_get_path
+ // 2. Joins controller name with base path
+
+ if fs.cgroupUnified == MountModeUnknown {
+ return "", errors.Errorf("Cannot determine path with unknown mounting hierarchy")
+ }
+
+ // TODO Ensure controller name is valid
+ // TODO Convert controller name into guaranteed valid directory name
+ dn := controller
+
+ joined := ""
+ switch fs.cgroupUnified {
+ case MountModeLegacy, MountModeHybrid:
+ joined = fs.path(dn, subpath, suffix)
+ case MountModeUnified:
+ joined = fs.path(subpath, suffix)
+ default:
+ return "", errors.Errorf("unknown cgroup mount mode (e.g. unified mode) %d", fs.cgroupUnified)
+ }
+ return joined, nil
+}
diff --git a/cgroup/cgroup_test.go b/cgroup/cgroup_test.go
new file mode 100644
index 0000000..9c76d4a
--- /dev/null
+++ b/cgroup/cgroup_test.go
@@ -0,0 +1,174 @@
+package cgroup
+
+import (
+ "errors"
+ "golang.org/x/sys/unix"
+ "os"
+ "testing"
+)
+
+const (
+ testFixturesHybrid = "fixtures/cgroup-hybrid"
+)
+
+func TestMountModeParsing(t *testing.T) {
+ // This test cannot (easily) use test fixtures, because it relies on being
+ // able to call Statfs on mounted filesystems. So we only run inside
+ // system where we expect to find cgroupfs mounted in a mode systemd expects.
+ // For now, that's only inside TravisCI, but in future we may expand to run
+ // this by default on certain Linux systems
+ if _, inTravisCI := os.LookupEnv("TRAVIS"); inTravisCI == false {
+ return
+ }
+
+ if _, err := NewDefaultFS(); err != nil {
+ t.Errorf("expected success determining mount type inside of travis CI: %s", err)
+ }
+}
+
+
+func TestCgUnifiedCached(t *testing.T) {
+ // Build some functions we will use to simulate various cgroup mounting scenarios
+ noCgroupMount := func(path string, stat *unix.Statfs_t) error {
+ // No fs present on /sys/fs/cgroup/
+ return errors.New("boo")
+ }
+ unknownCgroupMount := func(path string, stat *unix.Statfs_t) error {
+ // Unknown fs type present on /sys/fs/cgroup/
+ stat.Type = 0x0
+ return nil
+ }
+ unifiedMount := func(path string, stat *unix.Statfs_t) error {
+ // unified fs present
+ switch path {
+ case "/sys/fs/cgroup/":
+ stat.Type = cgroup2SuperMagic
+ return nil
+ default:
+ return errors.New("pretend path not found")
+ }
+ }
+ hybridMountSystemdV232 := func(path string, stat *unix.Statfs_t) error {
+ switch path {
+ case "/sys/fs/cgroup/":
+ stat.Type = tmpFsMagic
+ case "/sys/fs/cgroup/systemd/":
+ stat.Type = cgroup2SuperMagic
+ }
+ return nil
+ }
+ hybridMountSystemdV233 := func(path string, stat *unix.Statfs_t) error {
+ switch path {
+ case "/sys/fs/cgroup/":
+ stat.Type = tmpFsMagic
+ case "/sys/fs/cgroup/unified/":
+ stat.Type = cgroup2SuperMagic
+ case "/sys/fs/cgroup/systemd/":
+ stat.Type = cgroupSuperMagic
+ }
+ return nil
+ }
+ legacyMount := func(path string, stat *unix.Statfs_t) error {
+ switch path {
+ case "/sys/fs/cgroup/":
+ stat.Type = tmpFsMagic
+ case "/sys/fs/cgroup/unified/":
+ return errors.New("pretend unified path not found")
+ case "/sys/fs/cgroup/systemd/":
+ stat.Type = cgroupSuperMagic
+ }
+ return nil
+ }
+ missingSystemdFolder := func(path string, stat *unix.Statfs_t) error {
+ switch path {
+ case "/sys/fs/cgroup/":
+ stat.Type = tmpFsMagic
+ case "/sys/fs/cgroup/unified/":
+ return errors.New("pretend unified path not found")
+ case "/sys/fs/cgroup/systemd/":
+ return errors.New("pretend we cannot stat systemd dir")
+ }
+ return nil
+ }
+ unknownSystemdFolderMountType := func(path string, stat *unix.Statfs_t) error {
+ switch path {
+ case "/sys/fs/cgroup/":
+ stat.Type = tmpFsMagic
+ case "/sys/fs/cgroup/unified/":
+ return errors.New("pretend unified path not found")
+ case "/sys/fs/cgroup/systemd/":
+ stat.Type = 0x0
+ }
+ return nil
+ }
+
+ tables := []struct {
+ name string
+ statFn func(string,*unix.Statfs_t) error
+ expectedMode MountMode
+ errExpected bool
+ }{
+ {"NoCgroupMount", noCgroupMount, MountModeUnknown, true},
+ {"UnknownCgroupMountType", unknownCgroupMount, MountModeUnknown, true},
+ {"LegacyMount", legacyMount, MountModeLegacy, false},
+ {"HybridMount, v232", hybridMountSystemdV232, MountModeHybrid, false},
+ {"HybridMount, v233+", hybridMountSystemdV233, MountModeHybrid, false},
+ {"MissingSystemdFolder", missingSystemdFolder, MountModeUnknown, true},
+ {"UnknownSystemdFolderType", unknownSystemdFolderMountType, MountModeUnknown, true},
+ {"UnifiedMount", unifiedMount, MountModeUnified, false},
+ }
+
+ for _, table := range tables {
+ statfsFunc = table.statFn
+ mode, err := cgUnifiedCached()
+ if table.errExpected && err == nil {
+ t.Errorf("%s: expected an err, but got mode %s with no error", table.name, mode)
+ }
+ if !table.errExpected && err != nil {
+ t.Errorf("%s: expected no error, but got mode %s with err: %s", table.name, mode, err)
+ }
+ if mode != table.expectedMode {
+ t.Errorf("%s: expected mode %s but got mode %s", table.name, table.expectedMode, mode)
+ }
+ }
+}
+
+func TestNewFS(t *testing.T) {
+ if _, err := NewFS("foobar", MountModeUnknown); err == nil {
+ t.Error("NewFS should have failed with non-existing path")
+ }
+
+ if _, err := NewFS("cgroups_test.go", MountModeUnknown); err == nil {
+ t.Error("want NewFS to fail if mount point is not a dir")
+ }
+
+ if _, err := NewFS(testFixturesHybrid, MountModeUnknown); err != nil {
+ t.Error("want NewFS to succeed if mount point exists")
+ }
+}
+
+func getHybridFixtures(t *testing.T) FS {
+ fs, err := NewFS(testFixturesHybrid, MountModeHybrid)
+ if err != nil {
+ t.Fatal("Unable to create hybrid text fixtures")
+ }
+ return fs
+}
+
+func TestCgSubpath(t *testing.T) {
+ fs := getHybridFixtures(t)
+
+ fs.cgroupUnified = MountModeUnknown
+ if _, err := fs.cgGetPath("cpu", "/system.slice", "cpuacct.usage_all"); err == nil {
+ t.Error("should not be able to determine path with unknown mount mode")
+ }
+ fs.cgroupUnified = MountModeHybrid
+ path, err := fs.cgGetPath("cpu", "/system.slice", "cpuacct.usage_all")
+ if err != nil {
+ t.Error("should be able to determine path with systemd mount mode")
+ }
+ want := testFixturesHybrid + "/cpu/system.slice/cpuacct.usage_all"
+ if path != want {
+ t.Errorf("bad response. Wanted %s, got %s", want, path)
+ }
+}
diff --git a/cgroup/cpuacct.go b/cgroup/cpuacct.go
new file mode 100644
index 0000000..e64eeb7
--- /dev/null
+++ b/cgroup/cpuacct.go
@@ -0,0 +1,147 @@
+package cgroup
+
+import (
+ "bufio"
+ "bytes"
+ "github.com/pkg/errors"
+ "io"
+ "io/ioutil"
+ "os"
+ "strconv"
+ "strings"
+)
+
+// CPUUsage stores one core's worth of CPU usage for a control group
+// (aka cgroup) of tasks (e.g. both processes and threads).
+// Equivalent to cpuacct.usage_percpu_user and cpuacct.usage_percpu_system
+type CPUUsage struct {
+ CPUId uint32
+ SystemNanosec uint64
+ UserNanosec uint64
+}
+
+// CPUAcct stores CPU accounting information (e.g. cpu usage) for a control
+// group (cgroup) of tasks. Equivalent to cpuacct.usage_all
+type CPUAcct struct {
+ CPUs []CPUUsage
+}
+
+// NewCPUAcct will locate and read the kernel's cpu accounting info for
+// the provided systemd cgroup subpath.
+func NewCPUAcct(cgSubpath string) (*CPUAcct, error) {
+ fs, err := NewDefaultFS()
+ if err != nil {
+ return nil, err
+ }
+ return fs.NewCPUAcct(cgSubpath)
+}
+
+// UsageUserNanosecs returns user (e.g. non-kernel) cpu consumption in nanoseconds, across all available cpu
+// cores, from the point that CPU accounting was enabled for this control group.
+func (c *CPUAcct) UsageUserNanosecs() uint64 {
+ var nanoseconds uint64
+ for _, cpu := range c.CPUs {
+ nanoseconds += cpu.UserNanosec
+ }
+ return nanoseconds
+}
+
+// UsageSystemNanosecs returns system (e.g. kernel) cpu consumption in nanoseconds, across all available cpu
+// cores, from the point that CPU accounting was enabled for this control group.
+func (c *CPUAcct) UsageSystemNanosecs() uint64 {
+ var nanoseconds uint64
+ for _, cpu := range c.CPUs {
+ nanoseconds += cpu.SystemNanosec
+ }
+ return nanoseconds
+}
+
+// UsageAllNanosecs returns total cpu consumption in nanoseconds, across all available cpu
+// cores, from the point that CPU accounting was enabled for this control group.
+func (c *CPUAcct) UsageAllNanosecs() uint64 {
+ var nanoseconds uint64
+ for _, cpu := range c.CPUs {
+ nanoseconds += cpu.SystemNanosec + cpu.UserNanosec
+ }
+ return nanoseconds
+}
+
+// ReadFileNoStat uses ioutil.ReadAll to read contents of entire file.
+// This is similar to ioutil.ReadFile but without the call to os.Stat, because
+// many files in /proc and /sys report incorrect file sizes (either 0 or 4096).
+// Reads a max file size of 512kB. For files larger than this, a scanner
+// should be used.
+// COPIED FROM prometheus/procfs WHICH ALSO USES APACHE 2.0
+func ReadFileNoStat(filename string) ([]byte, error) {
+ const maxBufferSize = 1024 * 512
+
+ f, err := os.Open(filename)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ reader := io.LimitReader(f, maxBufferSize)
+ return ioutil.ReadAll(reader)
+}
+
+// NewCPUAcct will locate and read the kernel's cpu accounting info for
+// the provided systemd cgroup subpath.
+func (fs FS) NewCPUAcct(cgSubpath string) (*CPUAcct, error) {
+ var cpuUsage CPUAcct
+
+ cgPath, err := fs.cgGetPath("cpu", cgSubpath, "cpuacct.usage_all")
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to get cpu controller path")
+ }
+
+ // Example cpuacct.usage_all
+ // cpu user system
+ // 0 21165924 0
+ // 1 13334251 0
+ b, err := ReadFileNoStat(cgPath)
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to read file %s", cgPath)
+ }
+
+ scanner := bufio.NewScanner(bytes.NewReader(b))
+ if ok := scanner.Scan(); !ok {
+ return nil, errors.Errorf("unable to scan file %s", cgPath)
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, errors.Wrapf(err, "unable to scan file %s", cgPath)
+ }
+ for scanner.Scan() {
+ if err := scanner.Err(); err != nil {
+ return nil, errors.Wrapf(err, "unable to scan file %s", cgPath)
+ }
+ text := scanner.Text()
+ vals := strings.Split(text, " ")
+ if len(vals) != 3 {
+ return nil, errors.Errorf("unable to parse contents of file %s", cgPath)
+ }
+ cpu, err := strconv.ParseUint(vals[0], 10, 32)
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to parse %s as uint32 (from %s)", vals[0], cgPath)
+ }
+ user, err := strconv.ParseUint(vals[1], 10, 64)
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to parse %s as uint64 (from %s)", vals[1], cgPath)
+ }
+ sys, err := strconv.ParseUint(vals[2], 10, 64)
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to parse %s as an in (from %s)", vals[2], cgPath)
+ }
+ onecpu := CPUUsage{
+ CPUId: uint32(cpu),
+ UserNanosec: user,
+ SystemNanosec: sys,
+ }
+ cpuUsage.CPUs = append(cpuUsage.CPUs, onecpu)
+ }
+ if len(cpuUsage.CPUs) < 1 {
+ return nil, errors.Errorf("no CPU/core info extracted from %s", cgPath)
+ }
+
+ return &cpuUsage, nil
+}
diff --git a/cgroup/cpuacct_test.go b/cgroup/cpuacct_test.go
new file mode 100644
index 0000000..fdd4c9a
--- /dev/null
+++ b/cgroup/cpuacct_test.go
@@ -0,0 +1,34 @@
+package cgroup
+
+import "testing"
+
+func TestNewCPUAcct(t *testing.T) {
+ fs := getHybridFixtures(t)
+ cpu, err := fs.NewCPUAcct("/")
+ if err != nil {
+ t.Error("want NewCPUAcct('/') to succeed")
+ }
+
+ if len(cpu.CPUs) != 4 {
+ t.Errorf("Wrong number of CPUs. Wanted %d got %d", 4, len(cpu.CPUs))
+ }
+
+ var expectedUser uint64 = 29531441016368
+ if cpu.UsageUserNanosecs() != expectedUser {
+ t.Errorf("Wrong user nanoseconds. Wanted %d got %d", expectedUser, cpu.UsageUserNanosecs())
+ }
+
+ var expectedSys uint64 = 619186701953
+ if cpu.UsageSystemNanosecs() != expectedSys {
+ t.Errorf("Wrong sys nanoseconds. Wanted %d got %d", expectedSys, cpu.UsageSystemNanosecs())
+ }
+
+ expectedTotal := expectedSys + expectedUser
+ if cpu.UsageAllNanosecs() != expectedTotal {
+ t.Errorf("Wrong total nanoseconds. Wanted %d got %d", expectedTotal, cpu.UsageAllNanosecs())
+ }
+
+ if _, err := fs.NewCPUAcct("foobar"); err == nil {
+ t.Errorf("expected error getting cpu accounting info for bogus cgroup")
+ }
+}
diff --git a/cgroup/fixtures/README.md b/cgroup/fixtures/README.md
new file mode 100644
index 0000000..6f071ae
--- /dev/null
+++ b/cgroup/fixtures/README.md
@@ -0,0 +1,11 @@
+Contains fixed state used as a baseline for running tests. The purpose of these test fixtures
+is to ensure that there is a well known and fixed environment in which tests are run so that
+results are repeatable
+
+Note: including symlinks into fixtures is important for testing. However this can break
+community toolchains and OS'es in unexpected ways. prometheus/procfs addressed this
+issue by using ttar to flatten their fixtures directory into a single standard file, and
+only folks who are running testing will unflatten this file. This prevents symlinks from
+appearing on disk for anyone only doing a git checkout. May be something to consider if
+we get problem reports. See https://github.com/prometheus/procfs/pull/79
+
diff --git a/cgroup/fixtures/cgroup-hybrid/cpu b/cgroup/fixtures/cgroup-hybrid/cpu
new file mode 120000
index 0000000..c5a8e01
--- /dev/null
+++ b/cgroup/fixtures/cgroup-hybrid/cpu
@@ -0,0 +1 @@
+cpu,cpuacct
\ No newline at end of file
diff --git a/cgroup/fixtures/cgroup-hybrid/cpu,cpuacct/cpuacct.usage_all b/cgroup/fixtures/cgroup-hybrid/cpu,cpuacct/cpuacct.usage_all
new file mode 100644
index 0000000..609c1f0
--- /dev/null
+++ b/cgroup/fixtures/cgroup-hybrid/cpu,cpuacct/cpuacct.usage_all
@@ -0,0 +1,5 @@
+cpu user system
+0 7746241803817 122204678803
+1 7385109326139 107275346559
+2 7307001772824 94093225654
+3 7093088113588 295613450937
diff --git a/cgroup/fixtures/cgroup-hybrid/memory/memory.stat b/cgroup/fixtures/cgroup-hybrid/memory/memory.stat
new file mode 100644
index 0000000..a2fd567
--- /dev/null
+++ b/cgroup/fixtures/cgroup-hybrid/memory/memory.stat
@@ -0,0 +1,36 @@
+cache 69984256
+rss 4866048
+rss_huge 0
+shmem 491520
+mapped_file 9818112
+dirty 8192
+writeback 0
+swap 0
+pgpgin 397887
+pgpgout 379613
+pgfault 541883
+pgmajfault 232
+inactive_anon 4096
+active_anon 5353472
+inactive_file 2621440
+active_file 63873024
+unevictable 2998272
+hierarchical_memory_limit 9223372036854771712
+hierarchical_memsw_limit 9223372036854771712
+total_cache 12469047296
+total_rss 2168885248
+total_rss_huge 10485760
+total_shmem 13168640
+total_mapped_file 228769792
+total_dirty 573440
+total_writeback 0
+total_swap 0
+total_pgpgin 135633232
+total_pgpgout 132074848
+total_pgfault 96879883
+total_pgmajfault 24509
+total_inactive_anon 11632640
+total_active_anon 2134667264
+total_inactive_file 9267785728
+total_active_file 3208708096
+total_unevictable 15052800
diff --git a/cgroup/memory.go b/cgroup/memory.go
new file mode 100644
index 0000000..506e392
--- /dev/null
+++ b/cgroup/memory.go
@@ -0,0 +1,232 @@
+package cgroup
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "github.com/pkg/errors"
+ "io"
+ "strconv"
+ "strings"
+)
+
+// MemStat represents the memory.stat file exported by the kernel when the memory cgroup controller is enabled.
+// See https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
+type MemStat struct {
+ // bytes of page cache memory
+ CacheBytes uint64
+ // bytes of anon and swap cache, including transparent hugepages.
+ // Note: Only anonymous and swap cache memory is listed as part of 'rss' stat.
+ // This should not be confused with the true 'resident set size' or the
+ // amount of physical memory used by the cgroup. 'rss + file_mapped" will
+ // give you resident set size of cgroup
+ RssBytes uint64
+ // bytes of anonymous transparent hugepages
+ RssHugeBytes uint64
+ // No kernel documentation
+ Shmem uint64
+ // bytes of mapped files (includes tmpfs/shmem)
+ MappedFileBytes uint64
+ // number of charging events to the memory cgroup. The charging
+ // event happens each time a page is accounted as either mapped
+ // anon page(RSS) or cache page(Page CacheBytes) to the cgroup.
+ PgPgIn uint64
+ // # of uncharging events to the memory cgroup. The uncharging
+ // event happens each time a page is unaccounted from the cgroup.
+ PgPgOut uint64
+ // no kernel documentation
+ PgFault uint64
+ // no kernel documentation
+ PgMajFault uint64
+ // # of bytes of swap usage
+ SwapBytes uint64
+ // # of bytes that are waiting to get written back to the disk.
+ DirtyBytes uint64
+ // writeback - # of bytes of file/anon cache that are queued for syncing to
+ // disk.
+ WritebackBytes uint64
+ // inactive_anon - # of bytes of anonymous and swap cache memory on inactive
+ // LRU list.
+ InactiveAnonBytes uint64
+ // active_anon - # of bytes of anonymous and swap cache memory on active
+ // LRU list.
+ ActiveAnonBytes uint64
+ // inactive_file - # of bytes of file-backed memory on inactive LRU list.
+ InactiveFileBytes uint64
+ // active_file - # of bytes of file-backed memory on active LRU list.
+ ActiveFileBytes uint64
+ // unevictable - # of bytes of memory that cannot be reclaimed (mlocked etc).
+ UnevictableBytes uint64
+
+ // status considering hierarchy (see memory.use_hierarchy settings)
+ // # of bytes of memory limit with regard to hierarchy
+ // under which the memory cgroup is
+ HierarchialMemoryLimitBytes uint64
+ // # of bytes of memory+swap limit with regard to
+ // hierarchy under which memory cgroup is.
+ HierarchialMemswLimitBytes uint64
+ // total_cache - sum of all children's "cache"
+ TotalCacheBytes uint64
+ // No kernel doc
+ TotalDirtyBytes uint64
+ // total_rss - sum of all children's "rss"
+ TotalRssBytes uint64
+ // No kernel docs
+ TotalRssHugeBytes uint64
+ // total_mapped_file - sum of all children's "cache"
+ TotalMappedFileBytes uint64
+ // No kernel docs
+ TotalPgFault uint64
+ // No kernel docs
+ TotalPgMajFault uint64
+ // total_pgpgout - sum of all children's "pgpgout"
+ TotalPgPgIn uint64
+ // total_pgpgout - sum of all children's "pgpgout"
+ TotalPgPgOut uint64
+ // No kernel doc
+ TotalShmemBytes uint64
+ // total_swap - sum of all children's "swap"
+ TotalSwapBytes uint64
+ // total_inactive_anon - sum of all children's "inactive_anon"
+ TotalInactiveAnonBytes uint64
+ // total_active_anon - sum of all children's "active_anon"
+ TotalActiveAnonBytes uint64
+ // total_inactive_file - sum of all children's "inactive_file"
+ TotalInactiveFileBytes uint64
+ // total_active_file - sum of all children's "active_file"
+ TotalActiveFileBytes uint64
+ // total_unevictable - sum of all children's "unevictable"
+ TotalUnevictableBytes uint64
+ // No kernel doc
+ TotalWritebackBytes uint64
+ // # The following additional stats are dependent on CONFIG_DEBUG_VM.
+ // inactive_ratio - VM internal parameter. (see mm/page_alloc.c)
+ // recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
+ // recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
+ // recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
+ // recent_scanned_file - VM internal parameter. (see mm/vmscan.c)
+}
+
+func parseMemStat(r io.Reader) (*MemStat, error) {
+ var m MemStat
+ s := bufio.NewScanner(r)
+ for s.Scan() {
+ // Each line has at least a name and value
+ fields := strings.Fields(s.Text())
+ if len(fields) < 2 {
+ return nil, fmt.Errorf("malformed memory.stat line: %q", s.Text())
+ }
+
+ v, err := strconv.ParseUint(fields[1], 0, 64)
+ if err != nil {
+ return nil, err
+ }
+
+ switch fields[0] {
+ case "cache":
+ m.CacheBytes = v
+ case "rss":
+ m.RssBytes = v
+ case "rss_huge":
+ m.RssHugeBytes = v
+ case "shmem":
+ m.Shmem = v
+ case "mapped_file":
+ m.MappedFileBytes = v
+ case "dirty":
+ m.DirtyBytes = v
+ case "writeback":
+ m.WritebackBytes = v
+ case "swap":
+ m.SwapBytes = v
+ case "pgpgin":
+ m.PgPgIn = v
+ case "pgpgout":
+ m.PgPgOut = v
+ case "pgfault":
+ m.PgFault = v
+ case "pgmajfault":
+ m.PgMajFault = v
+ case "inactive_anon":
+ m.InactiveAnonBytes = v
+ case "active_anon":
+ m.ActiveAnonBytes = v
+ case "inactive_file":
+ m.InactiveFileBytes = v
+ case "active_file":
+ m.ActiveFileBytes = v
+ case "unevictable":
+ m.UnevictableBytes = v
+ case "hierarchical_memory_limit":
+ m.HierarchialMemoryLimitBytes = v
+ case "hierarchical_memsw_limit":
+ m.HierarchialMemswLimitBytes = v
+ case "total_cache":
+ m.TotalCacheBytes = v
+ case "total_rss":
+ m.TotalRssBytes = v
+ case "total_rss_huge":
+ m.TotalRssHugeBytes = v
+ case "total_shmem":
+ m.TotalShmemBytes = v
+ case "total_mapped_file":
+ m.TotalMappedFileBytes = v
+ case "total_dirty":
+ m.TotalDirtyBytes = v
+ case "total_writeback":
+ m.TotalWritebackBytes = v
+ case "total_swap":
+ m.TotalSwapBytes = v
+ case "total_pgpgin":
+ m.TotalPgPgIn = v
+ case "total_pgpgout":
+ m.TotalPgPgOut = v
+ case "total_pgfault":
+ m.TotalPgFault = v
+ case "total_pgmajfault":
+ m.TotalPgMajFault = v
+ case "total_inactive_anon":
+ m.TotalInactiveAnonBytes = v
+ case "total_inactive_file":
+ m.TotalInactiveFileBytes = v
+ case "total_active_anon":
+ m.TotalActiveAnonBytes = v
+ case "total_active_file":
+ m.TotalActiveFileBytes = v
+ case "total_unevictable":
+ m.TotalUnevictableBytes = v
+ }
+ }
+
+ return &m, nil
+}
+
+// NewMemStat will locate and read the kernel's cpu accounting info for
+// the provided systemd cgroup subpath.
+func NewMemStat(cgSubpath string) (MemStat, error) {
+ fs, err := NewDefaultFS()
+ if err != nil {
+ return MemStat{}, err
+ }
+ return fs.NewMemStat(cgSubpath)
+}
+
+// NewMemStat returns an information about cgroup memory statistics.
+func (fs FS) NewMemStat(cgSubpath string) (MemStat, error) {
+ cgPath, err := fs.cgGetPath("memory", cgSubpath, "memory.stat")
+ if err != nil {
+ return MemStat{}, errors.Wrapf(err, "unable to get cpu controller path")
+ }
+
+ b, err := ReadFileNoStat(cgPath)
+ if err != nil {
+ return MemStat{}, err
+ }
+
+ m, err := parseMemStat(bytes.NewReader(b))
+ if err != nil {
+ return MemStat{}, fmt.Errorf("failed to parse meminfo: %v", err)
+ }
+
+ return *m, nil
+}
diff --git a/cgroup/memory_test.go b/cgroup/memory_test.go
new file mode 100644
index 0000000..192f450
--- /dev/null
+++ b/cgroup/memory_test.go
@@ -0,0 +1,58 @@
+package cgroup
+
+import (
+ "reflect"
+ "testing"
+)
+
+func TestMemStat(t *testing.T) {
+ expected := MemStat{
+ CacheBytes: 69984256,
+ RssBytes: 4866048,
+ RssHugeBytes: 0,
+ Shmem: 491520,
+ MappedFileBytes: 9818112,
+ DirtyBytes: 8192,
+ WritebackBytes: 0,
+ SwapBytes: 0,
+ PgPgIn: 397887,
+ PgPgOut: 379613,
+ PgFault: 541883,
+ PgMajFault: 232,
+ InactiveAnonBytes: 4096,
+ ActiveAnonBytes: 5353472,
+ InactiveFileBytes: 2621440,
+ ActiveFileBytes: 63873024,
+ UnevictableBytes: 2998272,
+
+ HierarchialMemoryLimitBytes: 9223372036854771712,
+ HierarchialMemswLimitBytes: 9223372036854771712,
+ TotalCacheBytes: 12469047296,
+ TotalRssBytes: 2168885248,
+ TotalRssHugeBytes: 10485760,
+ TotalShmemBytes: 13168640,
+ TotalMappedFileBytes: 228769792,
+ TotalDirtyBytes: 573440,
+ TotalWritebackBytes: 0,
+ TotalSwapBytes: 0,
+ TotalPgPgIn: 135633232,
+ TotalPgPgOut: 132074848,
+ TotalPgFault: 96879883,
+ TotalPgMajFault: 24509,
+ TotalInactiveAnonBytes: 11632640,
+ TotalActiveAnonBytes: 2134667264,
+ TotalInactiveFileBytes: 9267785728,
+ TotalActiveFileBytes: 3208708096,
+ TotalUnevictableBytes: 15052800}
+
+ have, err := getHybridFixtures(t).NewMemStat("/")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if !reflect.DeepEqual(have, expected) {
+ t.Logf("have: %+v", have)
+ t.Logf("expected: %+v", expected)
+ t.Errorf("structs are not equal")
+ }
+}
diff --git a/main.go b/main.go
index 16f6eec..190d872 100644
--- a/main.go
+++ b/main.go
@@ -3,6 +3,8 @@ package main
import (
"net/http"
_ "net/http/pprof"
+ "os"
+ "sync"
"github.com/povilasv/prommod"
"github.com/povilasv/systemd_exporter/systemd"
@@ -14,6 +16,40 @@ import (
)
func main() {
+ listenAddress := mainCore()
+
+ log.Infoln("Listening on", listenAddress)
+ if err := http.ListenAndServe(listenAddress, nil); err != nil {
+ log.Fatal(err)
+ }
+
+}
+
+func testMain(wg *sync.WaitGroup) *http.Server {
+ listenAddress := mainCore()
+
+ // Launch server in background
+ srv := &http.Server{Addr: listenAddress}
+ log.Infoln("Queuing test server startup")
+ go func() {
+ defer wg.Done()
+
+ // ErrServerClosed indicates graceful close
+ log.Infoln("Test server listening on", listenAddress)
+ if err := srv.ListenAndServe(); err != http.ErrServerClosed {
+ // unexpected error. port in use?
+ log.Fatalf("ListenAndServe(): %v", err)
+ }
+
+ // Reset http package
+ http.DefaultServeMux = http.NewServeMux()
+ log.Infoln("Test server shutdown")
+ }()
+
+ return srv
+}
+
+func mainCore() string {
var (
listenAddress = kingpin.Flag(
"web.listen-address",
@@ -37,6 +73,7 @@ func main() {
kingpin.Version(prommod.Print(version.Print("systemd_exporter")))
kingpin.HelpFlag.Short('h')
kingpin.Parse()
+ log.Debugf("Parsed '%s'", os.Args)
log.Infoln("Starting systemd_exporter", version.Info())
log.Infoln("Build context", version.BuildContext())
@@ -85,8 +122,5 @@ func main() {
}
})
- log.Infoln("Listening on", *listenAddress)
- if err := http.ListenAndServe(*listenAddress, nil); err != nil {
- log.Fatal(err)
- }
+ return *listenAddress
}
diff --git a/main_test.go b/main_test.go
new file mode 100644
index 0000000..67241cc
--- /dev/null
+++ b/main_test.go
@@ -0,0 +1,120 @@
+package main
+
+import (
+ "context"
+ "fmt"
+ "net/http"
+ "os"
+ "sync"
+ "testing"
+ "time"
+)
+
+var (
+ address = "127.0.0.1:9550"
+ binaryName = "systemd_exporter"
+ defaultArgs = []string{binaryName, fmt.Sprintf("--web.listen-address=%s", address)}
+)
+
+func TestMain(m *testing.M) {
+ // TODO accept arg for listen address
+ os.Exit(m.Run())
+}
+
+// TestNoop only exists as an example of how you can test
+func TestNoop(t *testing.T) {
+ noop := func() error { return nil }
+ if err := runServerAndTest(defaultArgs, address, noop); err != nil {
+ t.Errorf("No op failed")
+ }
+}
+
+// TestVersionFlag is an example of running a test that does not rely on the server being
+// online. TODO make a reusable runTest() for this use case
+// TODO this is broken. Because runServerAndTest is waiting for the server to come online,
+// but it never does (becaseu our args mean it prints version and exits), we do not exit
+// cleanly. Somethign hangs, which means test coverage is never written out. Bummer
+// func TestVersionFlag(t *testing.T) {
+// noop := func() error { return nil }
+// runServerAndTest(append(defaultArgs, "--version"), address, noop)
+// }
+
+func TestMetricEndpointReturnsHttp200(t *testing.T) {
+ test := func() error {
+ resp, err := getMetrics()
+ if err != nil {
+ return err
+ }
+ if want, have := http.StatusOK, resp.StatusCode; want != have {
+ return fmt.Errorf("wanted status code %d, received %d", want, have)
+ }
+ return nil
+ }
+ if err := runServerAndTest(defaultArgs, address, test); err != nil {
+ t.Errorf("Metric 200 failed")
+ }
+}
+
+func runServerAndTest(args []string, url string, fn func() error) error {
+ // Request server startup
+ serverDone := &sync.WaitGroup{}
+ serverDone.Add(1)
+ // TODO it would be cleaner to change main.go to use kingpin.MustParse
+ os.Args = args
+ srv := testMain(serverDone)
+
+ // ensure server is online before running test
+ fmt.Println("Waiting on test server startup...")
+ for i := 0; i < 10; i++ {
+ root := fmt.Sprintf("http://%s/", address)
+ if resp, err := getURL(root); err == nil && resp.StatusCode == http.StatusOK {
+ break
+ }
+ time.Sleep(10 * time.Millisecond)
+ if i == 9 {
+ return fmt.Errorf("can't connect to %s - unable to run any tests", root)
+ }
+ }
+ fmt.Println("Test server ready, running test...")
+
+ // Run the test
+ err := fn()
+
+ // Shutdown the server before we return
+ fmt.Println("Test complete, shutting down server...")
+ ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+ defer cancel() // TODO is this correct?
+
+ if err := srv.Shutdown(ctx); err != nil {
+ // TODO is this what we shold do with serverDone?
+ defer serverDone.Wait()
+ return fmt.Errorf("failed to start command: %s", err)
+ }
+
+ serverDone.Wait()
+ fmt.Println("Test server shutdown, testcase complete.")
+
+ return err
+}
+
+func getMetrics() (*http.Response, error) {
+ return getURL(fmt.Sprintf("http://%s/metrics", address))
+}
+
+func getURL(url string) (*http.Response, error) {
+ resp, err := http.Get(url)
+ if err != nil {
+ return nil, err
+ }
+ // b, err := ioutil.ReadAll(resp.Body)
+ // if err != nil {
+ // return nil, err
+ // }
+ // if err := resp.Body.Close(); err != nil {
+ // return nil, err
+ // }
+ // if want, have := http.StatusOK, resp.StatusCode; want != have {
+ // return nil, fmt.Errorf("want /metrics status code %d, have %d. Body:\n%s", want, have, b)
+ // }
+ return resp, nil
+}
diff --git a/systemd/cgroups.go b/systemd/cgroups.go
deleted file mode 100644
index 05399e0..0000000
--- a/systemd/cgroups.go
+++ /dev/null
@@ -1,263 +0,0 @@
-package systemd
-
-import (
- "bufio"
- "bytes"
- "io"
- "io/ioutil"
- "os"
- "path/filepath"
- "strconv"
- "strings"
-
- "github.com/pkg/errors"
- "github.com/prometheus/common/log"
- "golang.org/x/sys/unix"
-)
-
-// cgUnifiedMountMode constant values describe how cgroup filesystems (aka hierarchies) are
-// mounted underneath /sys/fs/cgroup. In cgroups-v1 there are many mounts,
-// one per controller (cpu, blkio, etc) and one for systemd itself. In
-// cgroups-v2 there is only one mount managed entirely by systemd and
-// internally exposing all controller syscalls. As kernel+distros migrate towards
-// cgroups-v2, systemd has a hybrid mode where it mounts v2 and uses
-// that for process management but also mounts all the v1 filesystem
-// hierarchies and uses them for resource accounting and control
-type cgUnifiedMountMode int8
-
-const (
- // unifModeUnknown indicates that we do not know if/how any
- // cgroup filesystems are mounted underneath /sys/fs/cgroup
- unifModeUnknown cgUnifiedMountMode = iota
- // unifModeNone indicates that both systemd and the controllers
- // are using v1 legacy mounts and there is no usage of the v2
- // unified hierarchy. a.k.a "legacy hierarchy"
- unifModeNone cgUnifiedMountMode = iota
- // unifModeSystemd indicates that systemd is using a v2 unified
- // hierarcy for organizing processes into control groups, but all
- // controller interaction is using v1 per-controller hierarchies.
- // a.k.a. "hybrid hierarchy"
- unifModeSystemd cgUnifiedMountMode = iota
- // unifModeAll indicates that v2 API is in full usage and there
- // are no v1 hierarchies exported. Programs (mainly container orchestrators
- // such as docker,runc,etc) that rely on v1 APIs will be broken.
- // a.k.a. "unified hierarchy"
- unifModeAll cgUnifiedMountMode = iota
-)
-
-// WARNING: We only read this data once at process start, systemd updates
-// may require restarting systemd-exporter
-var cgroupUnified cgUnifiedMountMode = unifModeUnknown
-
-// Values copied from https://github.com/torvalds/linux/blob/master/include/uapi/linux/magic.h
-const (
- tmpFsMagic = 0x01021994
- cgroupSuperMagic = 0x27e0eb
- cgroup2SuperMagic = 0x63677270
-)
-
-// cgUnifiedCached checks the filesystem types mounted under /sys/fs/cgroup to determine
-// which systemd layout (legacy/hybrid/unified) is in use.
-// We do not bother to track unified_systemd_v232 as our usage does not
-// depend on reading the systemd hierarchy directly, we only focus on reading
-// the controllers. If you care if /sys/fs/cgroup/systemd is v1 or v2 you need
-// to track this
-// WARNING: We cache this data once at process start. Systemd updates
-// may require restarting systemd-exporter
-func cgUnifiedCached() (cgUnifiedMountMode, error) {
- if cgroupUnified != unifModeUnknown {
- return cgroupUnified, nil
- }
-
- var fs unix.Statfs_t
- err := unix.Statfs("/sys/fs/cgroup/", &fs)
- if err != nil {
- return unifModeUnknown, errors.Wrapf(err, "failed statfs(/sys/fs/cgroup)")
- }
-
- switch fs.Type {
- case cgroup2SuperMagic:
- log.Debugf("Found cgroup2 on /sys/fs/cgroup, full unified hierarchy")
- cgroupUnified = unifModeAll
- case tmpFsMagic:
- err := unix.Statfs("/sys/fs/cgroup/unified", &fs)
-
- // Ignore err, we expect path to be missing on v232
- if err == nil && fs.Type == cgroup2SuperMagic {
- log.Debugf("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller")
- cgroupUnified = unifModeSystemd
- } else {
- err := unix.Statfs("/sys/fs/cgroup/systemd", &fs)
- if err != nil {
- return unifModeUnknown, errors.Wrapf(err, "failed statfs(/sys/fs/cgroup/systemd)")
- }
- switch fs.Type {
- case cgroup2SuperMagic:
- log.Debugf("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)")
- cgroupUnified = unifModeSystemd
- case cgroupSuperMagic:
- log.Debugf("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy")
- cgroupUnified = unifModeNone
- default:
- return unifModeUnknown, errors.Errorf("unknown magic number %x for fstype returned by statfs(/sys/fs/cgroup/systemd)", fs.Type)
- }
- }
- default:
- return unifModeUnknown, errors.Errorf("unknown magic number %x for fstype returned by statfs(/sys/fs/cgroup)", fs.Type)
- }
-
- return cgroupUnified, nil
-}
-
-// cgGetPath returns the absolute path for a specific file in a specific controller
-// in the specific cgroup denoted by the passed subpath.
-// Input examples: ("cpu", "/system.slice", "cpuacct.usage_all)
-func cgGetPath(controller string, subpath string, suffix string) (string, error) {
- // relevant systemd source code in cgroup-util.[h|c] specifically cg_get_path
- // 2. Joins controller name with base path
-
- unified, err := cgUnifiedCached()
- if err != nil {
- return "", errors.Wrapf(err, "failed to determine cgroup mounting hierarchy")
- }
-
- // TODO Ensure controller name is valid
- // TODO Convert controller name into guaranteed valid directory name
- dn := controller
-
- joined := ""
- switch unified {
- case unifModeNone, unifModeSystemd:
- joined = filepath.Join("/sys/fs/cgroup", dn, subpath, suffix)
- case unifModeAll:
- joined = filepath.Join("/sys/fs/cgroup", subpath, suffix)
- default:
- return "", errors.Errorf("unknown cgroup mount mode (e.g. unified mode) %d", unified)
- }
- return joined, nil
-}
-
-// CPUUsage stores one core's worth of CPU usage for a control group
-// (aka cgroup) of tasks (e.g. both processes and threads).
-// Equivalent to cpuacct.usage_percpu_user and cpuacct.usage_percpu_system
-type CPUUsage struct {
- CPUId uint32
- SystemNanosec uint64
- UserNanosec uint64
-}
-
-// CPUAcct stores CPU accounting information (e.g. cpu usage) for a control
-// group (cgroup) of tasks. Equivalent to cpuacct.usage_all
-type CPUAcct struct {
- CPUs []CPUUsage
-}
-
-// UsageUserNanosecs returns user (e.g. non-kernel) cpu consumption in nanoseconds, across all available cpu
-// cores, from the point that CPU accounting was enabled for this control group.
-func (c *CPUAcct) UsageUserNanosecs() uint64 {
- var nanoseconds uint64
- for _, cpu := range c.CPUs {
- nanoseconds += cpu.UserNanosec
- }
- return nanoseconds
-}
-
-// UsageSystemNanosecs returns system (e.g. kernel) cpu consumption in nanoseconds, across all available cpu
-// cores, from the point that CPU accounting was enabled for this control group.
-func (c *CPUAcct) UsageSystemNanosecs() uint64 {
- var nanoseconds uint64
- for _, cpu := range c.CPUs {
- nanoseconds += cpu.SystemNanosec
- }
- return nanoseconds
-}
-
-// UsageAllNanosecs returns total cpu consumption in nanoseconds, across all available cpu
-// cores, from the point that CPU accounting was enabled for this control group.
-func (c *CPUAcct) UsageAllNanosecs() uint64 {
- var nanoseconds uint64
- for _, cpu := range c.CPUs {
- nanoseconds += cpu.SystemNanosec + cpu.UserNanosec
- }
- return nanoseconds
-}
-
-// ReadFileNoStat uses ioutil.ReadAll to read contents of entire file.
-// This is similar to ioutil.ReadFile but without the call to os.Stat, because
-// many files in /proc and /sys report incorrect file sizes (either 0 or 4096).
-// Reads a max file size of 512kB. For files larger than this, a scanner
-// should be used.
-// COPIED FROM prometheus/procfs WHICH ALSO USES APACHE 2.0
-func ReadFileNoStat(filename string) ([]byte, error) {
- const maxBufferSize = 1024 * 512
-
- f, err := os.Open(filename)
- if err != nil {
- return nil, err
- }
- defer f.Close()
-
- reader := io.LimitReader(f, maxBufferSize)
- return ioutil.ReadAll(reader)
-}
-
-// NewCPUAcct will locate and read the kernel's cpu accounting info for
-// the provided systemd cgroup subpath.
-func NewCPUAcct(cgSubpath string) (*CPUAcct, error) {
- var cpuUsage CPUAcct
-
- cgPath, err := cgGetPath("cpu", cgSubpath, "cpuacct.usage_all")
- if err != nil {
- return nil, errors.Wrapf(err, "unable to get cpu controller path")
- }
-
- // Example cpuacct.usage_all
- // cpu user system
- // 0 21165924 0
- // 1 13334251 0
- b, err := ReadFileNoStat(cgPath)
- if err != nil {
- return nil, errors.Wrapf(err, "unable to read file %s", cgPath)
- }
-
- scanner := bufio.NewScanner(bytes.NewReader(b))
- if ok := scanner.Scan(); !ok {
- return nil, errors.Errorf("unable to scan file %s", cgPath)
- }
- if err := scanner.Err(); err != nil {
- return nil, errors.Wrapf(err, "unable to scan file %s", cgPath)
- }
- for scanner.Scan() {
- if err := scanner.Err(); err != nil {
- return nil, errors.Wrapf(err, "unable to scan file %s", cgPath)
- }
- text := scanner.Text()
- vals := strings.Split(text, " ")
- if len(vals) != 3 {
- return nil, errors.Errorf("unable to parse contents of file %s", cgPath)
- }
- cpu, err := strconv.ParseUint(vals[0], 10, 32)
- if err != nil {
- return nil, errors.Wrapf(err, "unable to parse %s as uint32 (from %s)", vals[0], cgPath)
- }
- user, err := strconv.ParseUint(vals[1], 10, 64)
- if err != nil {
- return nil, errors.Wrapf(err, "unable to parse %s as uint64 (from %s)", vals[1], cgPath)
- }
- sys, err := strconv.ParseUint(vals[2], 10, 64)
- if err != nil {
- return nil, errors.Wrapf(err, "unable to parse %s as an in (from %s)", vals[2], cgPath)
- }
- onecpu := CPUUsage{
- CPUId: uint32(cpu),
- UserNanosec: user,
- SystemNanosec: sys,
- }
- cpuUsage.CPUs = append(cpuUsage.CPUs, onecpu)
- }
- if len(cpuUsage.CPUs) < 1 {
- return nil, errors.Errorf("no CPU/core info extracted from %s", cgPath)
- }
-
- return &cpuUsage, nil
-}
diff --git a/systemd/systemd.go b/systemd/systemd.go
index 87af595..e102b38 100644
--- a/systemd/systemd.go
+++ b/systemd/systemd.go
@@ -3,6 +3,8 @@ package systemd
import (
"fmt"
"math"
+ "os"
+
// Register pprof-over-http handlers
_ "net/http/pprof"
"regexp"
@@ -12,6 +14,7 @@ import (
"github.com/coreos/go-systemd/dbus"
"github.com/pkg/errors"
+ "github.com/povilasv/systemd_exporter/cgroup"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"github.com/prometheus/procfs"
@@ -55,11 +58,17 @@ type Collector struct {
socketRefusedConnectionsDesc *prometheus.Desc
cpuTotalDesc *prometheus.Desc
unitCPUTotal *prometheus.Desc
- openFDs *prometheus.Desc
- maxFDs *prometheus.Desc
- vsize *prometheus.Desc
- maxVsize *prometheus.Desc
- rss *prometheus.Desc
+
+ unitMemCache *prometheus.Desc
+ unitMemRss *prometheus.Desc
+ unitMemDirty *prometheus.Desc
+ unitMemShmem *prometheus.Desc
+
+ openFDs *prometheus.Desc
+ maxFDs *prometheus.Desc
+ vsize *prometheus.Desc
+ maxVsize *prometheus.Desc
+ rss *prometheus.Desc
unitWhitelistPattern *regexp.Regexp
unitBlacklistPattern *regexp.Regexp
@@ -134,6 +143,27 @@ func NewCollector(logger log.Logger) (*Collector, error) {
[]string{"name", "type", "mode"}, nil,
)
+ unitMemCache := prometheus.NewDesc(
+ prometheus.BuildFQName(namespace, "", "unit_cached_bytes"),
+ "Unit Page CacheBytes",
+ []string{"name", "type"}, nil,
+ )
+ unitMemRss := prometheus.NewDesc(
+ prometheus.BuildFQName(namespace, "", "unit_rss_bytes"),
+ "Unit anon+swap cache, incl. transparent hugepages. Not true RSS",
+ []string{"name", "type"}, nil,
+ )
+ unitMemDirty := prometheus.NewDesc(
+ prometheus.BuildFQName(namespace, "", "unit_dirty_bytes"),
+ "Unit bytes waiting to get written to disk",
+ []string{"name", "type"}, nil,
+ )
+ unitMemShmem := prometheus.NewDesc(
+ prometheus.BuildFQName(namespace, "", "unit_shmem_bytes"),
+ "",
+ []string{"name", "type"}, nil,
+ )
+
openFDs := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "process_open_fds"),
"Number of open file descriptors.",
@@ -179,6 +209,10 @@ func NewCollector(logger log.Logger) (*Collector, error) {
socketRefusedConnectionsDesc: socketRefusedConnectionsDesc,
cpuTotalDesc: cpuTotalDesc,
unitCPUTotal: unitCPUTotal,
+ unitMemCache: unitMemCache,
+ unitMemRss: unitMemRss,
+ unitMemDirty: unitMemDirty,
+ unitMemShmem: unitMemShmem,
openFDs: openFDs,
maxFDs: maxFDs,
vsize: vsize,
@@ -222,6 +256,13 @@ func parseUnitType(unit dbus.UnitStatus) string {
return t[len(t)-1]
}
+// parseUnitTypeInterface extracts the dbus interface suffix for the interface unique to the passed unit type.
+// For example, a systemd "service unit" will be are exposed on dbus as "service objects", and all "service objects"
+// implement the org.freedesktop.systemd1.Service interface. This is used as input for dbus.GetUnitTypeProperty
+func parseUnitTypeInterface(unit dbus.UnitStatus) string {
+ return strings.Title(parseUnitType(unit))
+}
+
func (c *Collector) collect(ch chan<- prometheus.Metric) error {
begin := time.Now()
conn, err := c.newDbus()
@@ -260,80 +301,78 @@ func (c *Collector) collectUnit(conn *dbus.Conn, ch chan<- prometheus.Metric, un
logger := c.logger.With("unit", unit.Name)
- // Collect unit_state for all
+ // Collect unit_state for all unit types
err := c.collectUnitState(conn, ch, unit)
if err != nil {
logger.Warnf(errUnitMetricsMsg, err)
// TODO should we continue processing here?
}
- switch {
- case strings.HasSuffix(unit.Name, ".service"):
- err = c.collectServiceMetainfo(conn, ch, unit)
+ // Collect metrics from cgroups
+ switch parseUnitType(unit) {
+ case "service", "mount", "socket", "swap", "slice":
+ cgroupPath, err := c.getControlGroup(conn, unit)
+ if err != nil {
+ logger.Warnf(errUnitMetricsMsg, err)
+ }
+ // Everything below requires a cgroup
+ if cgroupPath == nil {
+ break
+ }
+ err = c.collectUnitCPUMetrics(*cgroupPath, conn, ch, unit)
+ if err != nil {
+ // Most sockets do not have a cpu cgroupfs entry, but a few big ones do (notably docker.socket). Quiet down
+ // error reporting if error came from a socket
+ if parseUnitType(unit) != "socket" {
+ logger.Warnf(errUnitMetricsMsg, err)
+ }
+ }
+ err = c.collectUnitMemMetrics(*cgroupPath, conn, ch, unit)
if err != nil {
logger.Warnf(errUnitMetricsMsg, err)
}
+ }
+ // Collect metrics from dbus
+ switch parseUnitType(unit) {
+ case "service":
+ err = c.collectServiceMetainfo(conn, ch, unit)
+ if err != nil {
+ logger.Warnf(errUnitMetricsMsg, err)
+ }
err = c.collectServiceStartTimeMetrics(conn, ch, unit)
if err != nil {
logger.Warnf(errUnitMetricsMsg, err)
}
-
if *enableRestartsMetrics {
err = c.collectServiceRestartCount(conn, ch, unit)
if err != nil {
logger.Warnf(errUnitMetricsMsg, err)
}
}
-
err = c.collectServiceTasksMetrics(conn, ch, unit)
if err != nil {
logger.Warnf(errUnitMetricsMsg, err)
}
-
err = c.collectServiceProcessMetrics(conn, ch, unit)
if err != nil {
logger.Warnf(errUnitMetricsMsg, err)
}
- err = c.collectUnitCPUUsageMetrics("Service", conn, ch, unit)
- if err != nil {
- logger.Warnf(errUnitMetricsMsg, err)
- }
- case strings.HasSuffix(unit.Name, ".mount"):
+ case "mount":
err = c.collectMountMetainfo(conn, ch, unit)
if err != nil {
logger.Warnf(errUnitMetricsMsg, err)
}
- err = c.collectUnitCPUUsageMetrics("Mount", conn, ch, unit)
- if err != nil {
- logger.Warnf(errUnitMetricsMsg, err)
- }
- case strings.HasSuffix(unit.Name, ".timer"):
+ case "timer":
err := c.collectTimerTriggerTime(conn, ch, unit)
if err != nil {
logger.Warnf(errUnitMetricsMsg, err)
}
- case strings.HasSuffix(unit.Name, ".socket"):
+ case "socket":
err := c.collectSocketConnMetrics(conn, ch, unit)
if err != nil {
logger.Warnf(errUnitMetricsMsg, err)
}
- // Most sockets do not have a cpu cgroupfs entry, but a
- // few do, notably docker.socket
- err = c.collectUnitCPUUsageMetrics("Socket", conn, ch, unit)
- if err != nil {
- logger.Warnf(errUnitMetricsMsg, err)
- }
- case strings.HasSuffix(unit.Name, ".swap"):
- err = c.collectUnitCPUUsageMetrics("Swap", conn, ch, unit)
- if err != nil {
- logger.Warnf(errUnitMetricsMsg, err)
- }
- case strings.HasSuffix(unit.Name, ".slice"):
- err = c.collectUnitCPUUsageMetrics("Slice", conn, ch, unit)
- if err != nil {
- logger.Warnf(errUnitMetricsMsg, err)
- }
default:
c.logger.Debugf(infoUnitNoHandler, unit.Name)
}
@@ -515,16 +554,15 @@ func (c *Collector) mustGetUnitStringTypeProperty(unitType string,
return propVal
}
-// A number of unit types support the 'ControlGroup' property needed to allow us to directly read their
-// resource usage from the kernel's cgroupfs cpu hierarchy. The only change is which dbus item we are querying
-func (c *Collector) collectUnitCPUUsageMetrics(unitType string, conn *dbus.Conn, ch chan<- prometheus.Metric, unit dbus.UnitStatus) error {
- propCGSubpath, err := conn.GetUnitTypeProperty(unit.Name, unitType, "ControlGroup")
+func (c *Collector) getControlGroup(conn *dbus.Conn, unit dbus.UnitStatus) (*string, error) {
+ unitTypeInterface := parseUnitTypeInterface(unit)
+ propCGSubpath, err := conn.GetUnitTypeProperty(unit.Name, unitTypeInterface, "ControlGroup")
if err != nil {
- return errors.Wrapf(err, errGetPropertyMsg, "ControlGroup")
+ return nil, errors.Wrapf(err, errGetPropertyMsg, "ControlGroup")
}
cgSubpath, ok := propCGSubpath.Value.Value().(string)
if !ok {
- return errors.Errorf(errConvertStringPropertyMsg, "ControlGroup", propCGSubpath.Value.Value())
+ return nil, errors.Errorf(errConvertStringPropertyMsg, "ControlGroup", propCGSubpath.Value.Value())
}
switch {
@@ -532,37 +570,32 @@ func (c *Collector) collectUnitCPUUsageMetrics(unitType string, conn *dbus.Conn,
cgSubpath == "" && unit.ActiveState == "failed":
// Expected condition, systemd has cleaned up and
// we have nothing to record
- return nil
+ return nil, nil
case cgSubpath == "" && unit.ActiveState == "active":
// Unexpected. Why is there no cgroup on an active unit?
- subType := c.mustGetUnitStringTypeProperty(unitType, "Type", "unknown", conn, unit)
- slice := c.mustGetUnitStringTypeProperty(unitType, "Slice", "unknown", conn, unit)
- return errors.Errorf("got 'no cgroup' from systemd for active unit (state=%s subtype=%s slice=%s)", unit.ActiveState, subType, slice)
+ subType := c.mustGetUnitStringTypeProperty(unitTypeInterface, "Type", "unknown", conn, unit)
+ slice := c.mustGetUnitStringTypeProperty(unitTypeInterface, "Slice", "unknown", conn, unit)
+ return nil, errors.Errorf("got 'no cgroup' from systemd for active unit (state=%s subtype=%s slice=%s)", unit.ActiveState, subType, slice)
case cgSubpath == "":
// We are likely reading a unit that is currently changing state, so
// we record this and bail
- subType := c.mustGetUnitStringTypeProperty(unitType, "Type", "unknown", conn, unit)
- slice := c.mustGetUnitStringTypeProperty(unitType, "Slice", "unknown", conn, unit)
+ subType := c.mustGetUnitStringTypeProperty(unitTypeInterface, "Type", "unknown", conn, unit)
+ slice := c.mustGetUnitStringTypeProperty(unitTypeInterface, "Slice", "unknown", conn, unit)
log.Debugf("Read 'no cgroup' from unit (name=%s state=%s subtype=%s slice=%s) ", unit.Name, unit.ActiveState, subType, slice)
- return nil
- }
-
- propCPUAcct, err := conn.GetUnitTypeProperty(unit.Name, unitType, "CPUAccounting")
- if err != nil {
- return errors.Wrapf(err, errGetPropertyMsg, "CPUAccounting")
- }
- cpuAcct, ok := propCPUAcct.Value.Value().(bool)
- if !ok {
- return errors.Errorf(errConvertStringPropertyMsg, "CPUAccounting", propCPUAcct.Value.Value())
- }
- if !cpuAcct {
- return nil
+ return nil, nil
+ default:
+ return &cgSubpath, nil
}
+}
- cpuUsage, err := NewCPUAcct(cgSubpath)
+// A number of unit types support the 'ControlGroup' property needed to allow us to directly read their
+// resource usage from the kernel's cgroupfs cpu hierarchy. The only change is which dbus item we are querying
+func (c *Collector) collectUnitCPUMetrics(cgSubpath string, conn *dbus.Conn, ch chan<- prometheus.Metric, unit dbus.UnitStatus) error {
+ // Don't bother reading CPUAccounting prop. It's faster to attempt a file read than to query dbus, and it works
+ // in more situations as well
+ cpuUsage, err := cgroup.NewCPUAcct(cgSubpath)
if err != nil {
- if unitType == "Socket" {
- log.Debugf("unable to read SocketUnit CPU accounting information (unit=%s)", unit.Name)
+ if perr, ok := err.(*os.PathError); ok && perr.Op == "open" {
return nil
}
return errors.Wrapf(err, errControlGroupReadMsg, "CPU usage")
@@ -581,6 +614,36 @@ func (c *Collector) collectUnitCPUUsageMetrics(unitType string, conn *dbus.Conn,
return nil
}
+func (c *Collector) collectUnitMemMetrics(cgSubpath string, conn *dbus.Conn, ch chan<- prometheus.Metric, unit dbus.UnitStatus) error {
+ // Don't bother reading MemoryAccounting prop. It's faster to attempt a file read than to query dbus, and it works
+ // in more situations as well. For ex: case where
+ // such as kernel cmdline has cgroups_enabled=memory but systemd still has DefaultMemoryAccounting=no. All cgroups
+ // will have a memory.stat file, but systemd will still report MemoryAccounting=false for most units
+ memStat, err := cgroup.NewMemStat(cgSubpath)
+ if err != nil {
+ if perr, ok := err.(*os.PathError); ok && perr.Op == "open" {
+ return nil
+ }
+ return errors.Wrapf(err, errControlGroupReadMsg, "Memory stat")
+ }
+
+ unitType := parseUnitType(unit)
+ ch <- prometheus.MustNewConstMetric(
+ c.unitMemCache, prometheus.GaugeValue,
+ float64(memStat.CacheBytes), unit.Name, unitType)
+ ch <- prometheus.MustNewConstMetric(
+ c.unitMemRss, prometheus.GaugeValue,
+ float64(memStat.RssBytes), unit.Name, unitType)
+ ch <- prometheus.MustNewConstMetric(
+ c.unitMemDirty, prometheus.GaugeValue,
+ float64(memStat.DirtyBytes), unit.Name, unitType)
+ ch <- prometheus.MustNewConstMetric(
+ c.unitMemShmem, prometheus.GaugeValue,
+ float64(memStat.Shmem), unit.Name, unitType)
+
+ return nil
+}
+
func (c *Collector) collectSocketConnMetrics(conn *dbus.Conn, ch chan<- prometheus.Metric, unit dbus.UnitStatus) error {
acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted")
if err != nil {
@@ -605,7 +668,7 @@ func (c *Collector) collectSocketConnMetrics(conn *dbus.Conn, ch chan<- promethe
return errors.Wrapf(err, errGetPropertyMsg, "NRefused")
}
ch <- prometheus.MustNewConstMetric(
- c.socketRefusedConnectionsDesc, prometheus.GaugeValue,
+ c.socketRefusedConnectionsDesc, prometheus.CounterValue,
float64(refusedConnectionCount.Value.Value().(uint32)), unit.Name)
return nil
diff --git a/systemd/systemd_test.go b/systemd/systemd_test.go
new file mode 100644
index 0000000..3f4f3ee
--- /dev/null
+++ b/systemd/systemd_test.go
@@ -0,0 +1,26 @@
+package systemd
+
+import (
+ "github.com/coreos/go-systemd/dbus"
+ "testing"
+)
+
+func TestParseUnitType(t *testing.T) {
+ x := dbus.UnitStatus{
+ Name: "test.service",
+ Description: "",
+ LoadState: "",
+ ActiveState: "",
+ SubState: "",
+ Followed: "",
+ Path: "",
+ JobId: 0,
+ JobType: "",
+ JobPath: "",
+ }
+ found := parseUnitType(x)
+ if found != "service" {
+ t.Errorf("Bad unit name parsing. Wanted %s got %s", "service", found)
+ }
+
+}