Skip to content

Commit

Permalink
systemd.go: Added watchdog metrics
Browse files Browse the repository at this point in the history
Signed-off-by: Jonathan Davies <[email protected]>
  • Loading branch information
jpds committed Nov 13, 2023
1 parent 25c6295 commit 22901df
Showing 1 changed file with 93 additions and 1 deletion.
94 changes: 93 additions & 1 deletion systemd/systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"context"
"fmt"
"math"
"strconv"

// Register pprof-over-http handlers
_ "net/http/pprof"
Expand All @@ -42,6 +43,7 @@ var (
systemdUser = kingpin.Flag("systemd.collector.user", "Connect to the user systemd instance.").Bool()
enableRestartsMetrics = kingpin.Flag("systemd.collector.enable-restart-count", "Enables service restart count metrics. This feature only works with systemd 235 and above.").Bool()
enableIPAccountingMetrics = kingpin.Flag("systemd.collector.enable-ip-accounting", "Enables service ip accounting metrics. This feature only works with systemd 235 and above.").Bool()
watchdogTimeRE = regexp.MustCompile(`\d+`)
)

var unitStatesName = []string{"active", "activating", "deactivating", "inactive", "failed"}
Expand Down Expand Up @@ -77,6 +79,9 @@ type Collector struct {
ipEgressBytes *prometheus.Desc
ipIngressPackets *prometheus.Desc
ipEgressPackets *prometheus.Desc
watchdogRuntimeSeconds *prometheus.Desc
watchdogLastPingMonotonic *prometheus.Desc
watchdogLastPingTimestamp *prometheus.Desc

unitIncludePattern *regexp.Regexp
unitExcludePattern *regexp.Regexp
Expand Down Expand Up @@ -186,6 +191,21 @@ func NewCollector(logger log.Logger) (*Collector, error) {
"Service unit egress IP accounting in packets.",
[]string{"name"}, nil,
)
watchdogRuntimeSeconds := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "watchdog_runtime_seconds"),
"systemd watchdog runtime seconds",
[]string{"device"}, nil,
)
watchdogLastPingMonotonic := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "watchdog_last_ping_monotonic_seconds"),
"systemd watchdog last ping monotonic seconds",
[]string{"device"}, nil,
)
watchdogLastPingTimestamp := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "watchdog_last_ping_time_seconds"),
"systemd watchdog last ping time seconds",
[]string{"device"}, nil,
)
unitIncludePattern := regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *unitInclude))
unitExcludePattern := regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *unitExclude))

Expand Down Expand Up @@ -215,6 +235,9 @@ func NewCollector(logger log.Logger) (*Collector, error) {
ipEgressPackets: ipEgressPackets,
unitIncludePattern: unitIncludePattern,
unitExcludePattern: unitExcludePattern,
watchdogRuntimeSeconds: watchdogRuntimeSeconds,
watchdogLastPingMonotonic: watchdogLastPingMonotonic,
watchdogLastPingTimestamp: watchdogLastPingTimestamp,
}, nil
}

Expand Down Expand Up @@ -243,7 +266,9 @@ func (c *Collector) Describe(desc chan<- *prometheus.Desc) {
desc <- c.ipEgressBytes
desc <- c.ipIngressPackets
desc <- c.ipEgressPackets

desc <- c.watchdogRuntimeSeconds
desc <- c.watchdogLastPingMonotonic
desc <- c.watchdogLastPingTimestamp
}

func parseUnitType(unit dbus.UnitStatus) string {
Expand All @@ -259,6 +284,11 @@ func (c *Collector) collect(ch chan<- prometheus.Metric) error {
}
defer conn.Close()

err = c.collectWatchdogMetrics(conn, ch)
if err != nil {
level.Debug(c.logger).Log("msg", "Failed to collect watchdog timestamps", "err", err)
}

allUnits, err := conn.ListUnitsContext(c.ctx)
if err != nil {
return errors.Wrap(err, "could not get list of systemd units from dbus")
Expand Down Expand Up @@ -620,3 +650,65 @@ func (c *Collector) filterUnits(units []dbus.UnitStatus, includePattern, exclude

return filtered
}

func (c *Collector) collectWatchdogMetrics(conn *dbus.Conn, ch chan<- prometheus.Metric) error {
watchdogDevice, err := conn.GetManagerProperty("WatchdogDevice")
if err != nil {
return err
}

watchdogDeviceString := strings.TrimPrefix(strings.TrimSuffix(watchdogDevice, `"`), `"`)

if len(watchdogDeviceString) == 0 {
level.Debug(c.logger).Log("msg", "No watchdog configured, ignoring metrics")
return nil
}

runtimeWatchdogUSecProperty, err := conn.GetManagerProperty("RuntimeWatchdogUSec")
if err != nil {
return err
}

watchdogLastPingMonotonicProperty, err := conn.GetManagerProperty("WatchdogLastPingTimestampMonotonic")
if err != nil {
return err
}

watchdogLastPingTimeProperty, err := conn.GetManagerProperty("WatchdogLastPingTimestamp")
if err != nil {
return err
}

parsedRuntimeWatchdog := watchdogTimeRE.FindString(runtimeWatchdogUSecProperty)
parsedWatchdogLastPingMonotonic := watchdogTimeRE.FindString(watchdogLastPingMonotonicProperty)
parsedWatchdogLastPingTime := watchdogTimeRE.FindString(watchdogLastPingTimeProperty)

runtimeWatchdogUSec, err := strconv.ParseFloat(parsedRuntimeWatchdog, 64)
if err != nil {
return err
}

watchdogLastPingMonotonic, err := strconv.ParseFloat(parsedWatchdogLastPingMonotonic, 64)
if err != nil {
return err
}

watchdogLastPingTimestamp, err := strconv.ParseFloat(parsedWatchdogLastPingTime, 64)
if err != nil {
return err
}

ch <- prometheus.MustNewConstMetric(
c.watchdogRuntimeSeconds, prometheus.GaugeValue,
float64(runtimeWatchdogUSec)/1e6, watchdogDevice)

ch <- prometheus.MustNewConstMetric(
c.watchdogLastPingMonotonic, prometheus.GaugeValue,
float64(watchdogLastPingMonotonic)/1e6, watchdogDevice)

ch <- prometheus.MustNewConstMetric(
c.watchdogLastPingTimestamp, prometheus.GaugeValue,
float64(watchdogLastPingTimestamp)/1e6, watchdogDevice)

return nil
}

0 comments on commit 22901df

Please sign in to comment.