From 3dfc023c1a1742bcf5f009c3088aa2cdc475096d Mon Sep 17 00:00:00 2001 From: Muhammad Adeel Date: Mon, 21 Oct 2024 23:37:28 +0200 Subject: [PATCH] Expose s390x CPU Topology to Prometheus --- info/v1/machine.go | 8 ++++ info/v1/machine_test.go | 2 + machine/info.go | 2 + machine/machine.go | 42 ++++++++++++++++++++- metrics/prometheus_machine.go | 16 ++++++++ metrics/testdata/prometheus_machine_metrics | 6 +++ utils/sysfs/sysfs.go | 6 +++ 7 files changed, 80 insertions(+), 2 deletions(-) diff --git a/info/v1/machine.go b/info/v1/machine.go index 4fa4b0e554..6df86a266e 100644 --- a/info/v1/machine.go +++ b/info/v1/machine.go @@ -193,6 +193,12 @@ type MachineInfo struct { // The number of cpu sockets in this machine. NumSockets int `json:"num_sockets"` + // The number of cpu books in this machine. + NumBooks int `json:"num_books,omitempty"` + + // The number of cpu drawers in this machine. + NumDrawers int `json:"num_drawers,omitempty"` + // Maximum clock speed for the cores, in KHz. CpuFrequency uint64 `json:"cpu_frequency_khz"` @@ -263,6 +269,8 @@ func (m *MachineInfo) Clone() *MachineInfo { NumCores: m.NumCores, NumPhysicalCores: m.NumPhysicalCores, NumSockets: m.NumSockets, + NumBooks: m.NumBooks, + NumDrawers: m.NumDrawers, CpuFrequency: m.CpuFrequency, MemoryCapacity: m.MemoryCapacity, SwapCapacity: m.SwapCapacity, diff --git a/info/v1/machine_test.go b/info/v1/machine_test.go index 7a5750bc26..595000b1c0 100644 --- a/info/v1/machine_test.go +++ b/info/v1/machine_test.go @@ -42,6 +42,8 @@ func getFakeMachineInfo() MachineInfo { NumCores: 1, NumPhysicalCores: 2, NumSockets: 3, + NumBooks: 1, + NumDrawers: 1, CpuFrequency: 4, MemoryCapacity: 5, SwapCapacity: 6, diff --git a/machine/info.go b/machine/info.go index 76d2272f6a..812e3c35ee 100644 --- a/machine/info.go +++ b/machine/info.go @@ -130,6 +130,8 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach NumCores: numCores, NumPhysicalCores: GetPhysicalCores(cpuinfo), NumSockets: GetSockets(cpuinfo), + NumBooks: GetBooks(cpuinfo), + NumDrawers: GetDrawers(cpuinfo), CpuFrequency: clockSpeed, MemoryCapacity: memoryCapacity, MemoryByType: memoryByType, diff --git a/machine/machine.go b/machine/machine.go index 0ba161614f..d7e9893ca3 100644 --- a/machine/machine.go +++ b/machine/machine.go @@ -20,6 +20,7 @@ import ( "os" "path" "regexp" + "runtime" "strconv" "strings" @@ -35,8 +36,10 @@ import ( ) var ( - coreRegExp = regexp.MustCompile(`(?m)^core id\s*:\s*([0-9]+)$`) - nodeRegExp = regexp.MustCompile(`(?m)^physical id\s*:\s*([0-9]+)$`) + coreRegExp = regexp.MustCompile(`(?m)^core id\s*:\s*([0-9]+)$`) + nodeRegExp = regexp.MustCompile(`(?m)^physical id\s*:\s*([0-9]+)$`) + bookRegExp = regexp.MustCompile(`(?m)^book id\s*:\s*([0-9]+)$`) + drawerRegExp = regexp.MustCompile(`(?m)^drawer id\s*:\s*([0-9]+)$`) // Power systems have a different format so cater for both cpuClockSpeedMHz = regexp.MustCompile(`(?:cpu MHz|CPU MHz|clock)\s*:\s*([0-9]+\.[0-9]+)(?:MHz)?`) memoryCapacityRegexp = regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`) @@ -96,6 +99,41 @@ func GetSockets(procInfo []byte) int { return numSocket } +// GetBooks returns number of CPU books reading from sysfs cpu path +func GetBooks(procInfo []byte) int { + if runtime.GOARCH != "s390x" { + return 0 + } + numBook := getUniqueMatchesCount(string(procInfo), bookRegExp) + if numBook == 0 { + // read number of books from /sys/bus/cpu/devices/cpu*/topology/book_id to deal with processors + // for which 'book id' is not available in /proc/cpuinfo + numBook = sysfs.GetUniqueCPUPropertyCount(cpuAttributesPath, sysfs.CPUBookID) + } + if numBook == 0 { + klog.Errorf("Cannot read number of books correctly, number of books set to %d", numBook) + } + return numBook +} + +// GetDrawer returns number of CPU drawerss reading from sysfs cpu path +func GetDrawers(procInfo []byte) int { + if runtime.GOARCH != "s390x" { + return 0 + } + numDrawer := getUniqueMatchesCount(string(procInfo), drawerRegExp) + if numDrawer == 0 { + // read number of books from /sys/bus/cpu/devices/cpu*/topology/book_id to deal with processors + // read number of drawers from /sys/bus/cpu/devices/cpu*/topology/drawer_id to deal with processors + // for which 'drawer id' is not available in /proc/cpuinfo + numDrawer = sysfs.GetUniqueCPUPropertyCount(cpuAttributesPath, sysfs.CPUDrawerID) + } + if numDrawer == 0 { + klog.Errorf("Cannot read number of drawers correctly, number of drawers set to %d", numDrawer) + } + return numDrawer +} + // GetClockSpeed returns the CPU clock speed, given a []byte formatted as the /proc/cpuinfo file. func GetClockSpeed(procInfo []byte) (uint64, error) { // First look through sys to find a max supported cpu frequency. diff --git a/metrics/prometheus_machine.go b/metrics/prometheus_machine.go index 67e5c13d74..2c3e96a12d 100644 --- a/metrics/prometheus_machine.go +++ b/metrics/prometheus_machine.go @@ -103,6 +103,22 @@ func NewPrometheusMachineCollector(i infoProvider, includedMetrics container.Met return metricValues{{value: float64(machineInfo.NumSockets), timestamp: machineInfo.Timestamp}} }, }, + { + name: "machine_cpu_books", + help: "Number of CPU books.", + valueType: prometheus.GaugeValue, + getValues: func(machineInfo *info.MachineInfo) metricValues { + return metricValues{{value: float64(machineInfo.NumBooks), timestamp: machineInfo.Timestamp}} + }, + }, + { + name: "machine_cpu_drawers", + help: "Number of CPU drawers.", + valueType: prometheus.GaugeValue, + getValues: func(machineInfo *info.MachineInfo) metricValues { + return metricValues{{value: float64(machineInfo.NumDrawers), timestamp: machineInfo.Timestamp}} + }, + }, { name: "machine_memory_bytes", help: "Amount of memory installed on the machine.", diff --git a/metrics/testdata/prometheus_machine_metrics b/metrics/testdata/prometheus_machine_metrics index 1b2ccedb23..60043e5ea0 100644 --- a/metrics/testdata/prometheus_machine_metrics +++ b/metrics/testdata/prometheus_machine_metrics @@ -1,3 +1,6 @@ +# HELP machine_cpu_books Number of CPU books. +# TYPE machine_cpu_books gauge +machine_cpu_books{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 0 1395066363000 # HELP machine_cpu_cache_capacity_bytes Cache size in bytes assigned to NUMA node and CPU core. # TYPE machine_cpu_cache_capacity_bytes gauge machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="",level="3",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 8.388608e+06 1395066363000 @@ -28,6 +31,9 @@ machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="7",level="2",ma # HELP machine_cpu_cores Number of logical CPU cores. # TYPE machine_cpu_cores gauge machine_cpu_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 4 1395066363000 +# HELP machine_cpu_drawers Number of CPU drawers. +# TYPE machine_cpu_drawers gauge +machine_cpu_drawers{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 0 1395066363000 # HELP machine_cpu_physical_cores Number of physical CPU cores. # TYPE machine_cpu_physical_cores gauge machine_cpu_physical_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1 1395066363000 diff --git a/utils/sysfs/sysfs.go b/utils/sysfs/sysfs.go index f2aedfc757..a42876e80a 100644 --- a/utils/sysfs/sysfs.go +++ b/utils/sysfs/sysfs.go @@ -49,6 +49,12 @@ const ( // (rather than the kernel's). The actual value is architecture and platform dependent. CPUCoreID = "core_id" + // On some architecture there exists additional level of book and drawer id + // CPUBookID is the book ID of cpu#. Typically corresponds to a physical book number. + CPUBookID = "book_id" + // CPUDrawerID is the drawer ID of cpu#. Typically corresponds to a physical drawer number. + CPUDrawerID = "drawer_id" + coreIDFilePath = "/" + sysFsCPUTopology + "/core_id" packageIDFilePath = "/" + sysFsCPUTopology + "/physical_package_id" bookIDFilePath = "/" + sysFsCPUTopology + "/book_id"