Skip to content

Commit

Permalink
Merge pull request #204 from appuio/feat/custom-node-exporter
Browse files Browse the repository at this point in the history
Add support for deploying an additional node-exporter DaemonSet
  • Loading branch information
simu authored Jun 19, 2024
2 parents 197c80b + 6f9e209 commit b330be6
Show file tree
Hide file tree
Showing 8 changed files with 695 additions and 4 deletions.
6 changes: 3 additions & 3 deletions .cruft.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template": "https://github.com/projectsyn/commodore-component-template.git",
"commit": "ea12efff947bce80cf31a3f1ed4412eab40e8b33",
"commit": "26ee71e475cca036551c68a6c6b2285fe86139a0",
"checkout": "main",
"context": {
"cookiecutter": {
Expand All @@ -16,8 +16,8 @@
"automerge_patch": "y",
"automerge_patch_v0": "n",
"automerge_patch_regexp_blocklist": "",
"automerge_patch_v0_regexp_allowlist": "",
"automerge_minor_regexp_allowlist": "",
"automerge_patch_v0_regexp_allowlist": "^quay.io/brancz/kube-rbac-proxy$",
"automerge_minor_regexp_allowlist": "^quay.io/appuio/oc$;^quay.io/brancz/kube-rbac-proxy$;^quay.io/prometheus/node-exporter$",
"auto_release": "y",
"copyright_holder": "VSHN AG <[email protected]>",
"copyright_year": "2021",
Expand Down
21 changes: 20 additions & 1 deletion class/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,14 @@ parameters:
oc:
image: quay.io/appuio/oc
tag: v4.14

node_exporter:
registry: quay.io
repository: prometheus/node-exporter
tag: v1.8.1
kube_rbac_proxy:
registry: quay.io
repository: brancz/kube-rbac-proxy
tag: v0.18.0

capacityAlerts:
enabled: true
Expand Down Expand Up @@ -402,3 +409,15 @@ parameters:
secrets: {}

cronjobs: {}

customNodeExporter:
enabled: false
collectors:
- network_route
args: []
metricRelabelings:
# only keep routes for host interfaces (assumes that host interfaces
# are `ensX` which should hold on RHCOS)
- action: keep
sourceLabels: ['__name__', 'device']
regex: 'node_network_route.*;ens.*'
208 changes: 208 additions & 0 deletions component/custom-node-exporter.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
local com = import 'lib/commodore.libjsonnet';
local kap = import 'lib/kapitan.libjsonnet';
local kube = import 'lib/kube.libjsonnet';

local nodeExporter = import 'github.com/openshift/cluster-monitoring-operator/jsonnet/components/node-exporter.libsonnet';

local inv = kap.inventory();
local params = inv.parameters.openshift4_monitoring;

// Disable all collectors by default. Note that this list may need to be
// updated manually if a new node-exporter release introduces additional
// collectors.
local neDefaultArgs = [
'--no-collector.arp',
'--no-collector.bcache',
'--no-collector.bonding',
'--no-collector.btrfs',
'--no-collector.buddyinfo',
'--no-collector.cgroups',
'--no-collector.conntrack',
'--no-collector.cpu',
'--no-collector.cpufreq',
'--no-collector.diskstats',
'--no-collector.dmi',
'--no-collector.drbd',
'--no-collector.drm',
'--no-collector.edac',
'--no-collector.entropy',
'--no-collector.ethtool',
'--no-collector.fibrechannel',
'--no-collector.filefd',
'--no-collector.filesystem',
'--no-collector.hwmon',
'--no-collector.infiniband',
'--no-collector.interrupts',
'--no-collector.ipvs',
'--no-collector.ksmd',
'--no-collector.lnstat',
'--no-collector.loadavg',
'--no-collector.logind',
'--no-collector.mdadm',
'--no-collector.meminfo',
'--no-collector.meminfo_numa',
'--no-collector.mountstats',
'--no-collector.netclass',
'--no-collector.netdev',
'--no-collector.netstat',
'--no-collector.network_route',
'--no-collector.nfs',
'--no-collector.nfsd',
'--no-collector.ntp',
'--no-collector.nvme',
'--no-collector.os',
'--no-collector.perf',
'--no-collector.powersupplyclass',
'--no-collector.pressure',
'--no-collector.processes',
'--no-collector.rapl',
'--no-collector.schedstat',
'--no-collector.selinux',
'--no-collector.slabinfo',
'--no-collector.sockstat',
'--no-collector.softirqs',
'--no-collector.softnet',
'--no-collector.stat',
'--no-collector.supervisord',
'--no-collector.sysctl',
'--no-collector.systemd',
'--no-collector.tapestats',
'--no-collector.tcpstat',
'--no-collector.textfile',
'--no-collector.thermal_zone',
'--no-collector.time',
'--no-collector.timex',
'--no-collector.udp_queues',
'--no-collector.uname',
'--no-collector.vmstat',
'--no-collector.watchdog',
'--no-collector.wifi',
'--no-collector.xfs',
'--no-collector.zfs',
'--no-collector.zoneinfo',
];

local containsStr(pat, str) = std.length(std.findSubstr(pat, str)) > 0;

local enabledCollectors =
com.renderArray(params.customNodeExporter.collectors);

local skipDefaultArg(a) = std.foldl(
function(skip, c) skip || containsStr(c, a),
enabledCollectors,
false
);

// generate command line args to enable collectors that are requested
local neCollectorArgs = [
'--collector.%s' % c
for c in enabledCollectors
];

local config = {
commonLabels: {
'app.kubernetes.io/part-of': 'openshift4-monitoring',
},
name: 'appuio-node-exporter',
namespace: params.namespace,
version: params.images.node_exporter.tag,
port: 9199,
image: '%(registry)s/%(repository)s:%(tag)s' % params.images.node_exporter,
kubeRbacProxyImage: '%(registry)s/%(repository)s:%(tag)s' % params.images.kube_rbac_proxy,
ignoredNetworkDevices:: '^.*$',
};

local ne = nodeExporter(config) {
// customize node-exporter args. We disable all collectors by default, and
// only enable the ones requested via component parameters.
daemonset+: {
spec+: {
template+: {
spec+: {
containers: std.map(
function(c)
if c.name == 'appuio-node-exporter' then
c {
args: [
a
for a in c.args
if !containsStr('collector', a)
] + [
// only add the disable args for collectors that the user
// hasn't requested, since node-exporter doesn't support
// passing a disable and enable flag for the same collector.
a
for a in neDefaultArgs
if !skipDefaultArg(a)
] + neCollectorArgs + params.customNodeExporter.args,
// fixup `date` call to use busybox compatible option
command: std.map(
function(cmd)
std.strReplace(cmd, '--iso-8601=seconds', '-Iseconds'),
c.command
),
}
else
c,
super.containers
),
// Fixup service-ca issued certificate secret name
volumes: std.map(
function(v) if v.name == 'node-exporter-tls' then
v {
secret: {
secretName: 'appuio-node-exporter-tls',
},
}
else
v,
super.volumes
),
},
},
},
},
// Fixup the secret name to use for the service-ca issued cert
service+: {
metadata+: {
annotations+: {
'service.beta.openshift.io/serving-cert-secret-name': 'appuio-node-exporter-tls',
},
},
},
// patch the service monitor to validate the TLS certificate and configure
// user-provided custom metricRelabelings.
serviceMonitor+: {
spec+: {
endpoints: std.map(
function(ep) ep {
metricRelabelings: params.customNodeExporter.metricRelabelings,
tlsConfig: {
ca: {},
caFile: '/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt',
cert: {},
certFile: '/etc/prometheus/secrets/metrics-client-certs/tls.crt',
keyFile: '/etc/prometheus/secrets/metrics-client-certs/tls.key',
serverName: 'appuio-node-exporter.openshift-monitoring.svc',
},
},
super.endpoints
),
},
},

// we don't need the networkpolicy
networkPolicy:: {},
// we don't need the servicemonitor generated by
// openshift-cluster-monitoring, we customize the one generated
// by the kube-prometheus Jsonnet.
minimalServiceMonitor:: {},
// we don't need a copy of the SCC for our node-exporter, we can use the one
// generated by the cluster-monitoring-operator.
securityContextConstraints:: {},
// we don't need the default node-exporter prometheus rules
mixin:: {},
prometheusRule:: {},
};

std.objectValues(ne)
3 changes: 3 additions & 0 deletions component/main.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,7 @@ local cronjobs = import 'cronjobs.libsonnet';
[if params.capacityAlerts.enabled then 'capacity_rules']: capacity.rules,
[if std.length(customRules.spec.groups) > 0 then 'custom_rules']: customRules,
[if std.length(cronjobs.cronjobs) > 0 then 'cronjobs']: cronjobs.cronjobs,
// TODO: enable flag
[if params.customNodeExporter.enabled then 'appuio_node_exporter']:
(import 'custom-node-exporter.libsonnet'),
}
56 changes: 56 additions & 0 deletions docs/modules/ROOT/pages/references/parameters.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -812,3 +812,59 @@ cronjobs:
spec:
failedJobsHistoryLimit: 1
----

== `customNodeExporter`

This parameter allows users to deploy an additional node-exporter DaemonSet.
We provide this option, since OpenShift's cluster-monitoring stack currently doesn't allow users to customize the bundled node-exporter DaemonSet.

Currently, the parameter is tailored to allow users to run an additional node-exporter which enables collectors that aren't enabled in the default node exporter.

The configuration is rendered by using the same Jsonnet that's used by the OpenShift cluster-monitoring stack to generate the default node-exporter DaemonSet.
The component further customizes the resulting manifests to ensure that there's no conflicts between the default node-exporter and the additional node-exporter.

The additional node-exporter is deployed in the namespace indicated by parameter `namespace`.
By default this is namespace `openshift-monitoring`.
The component also deploys a `ServiceMonitor` which ensures that the additional node-exporter is scraped by the cluster-monitoring stack's Prometheus.

Users can configure arbitrary recording and alerting rules which use metrics scraped from the additional node-exporter via parameter `rules`.

=== `enabled`

[horizontal]
type:: bool
default:: `false`

Whether to deploy the additional node-exporter.

=== `collectors`

[horizontal]
type:: list
default:: `["network_route"]`

Which collectors to enable in the additional node-exporter.
By default, all collectors are disabled.
Users can remove entries from this list by providing an existing entry prefixed with `~`.

=== `args`
[horizontal]
type:: list
default:: `[]`


Additional command line arguments to pass to the additional node-exporter.
Please note that specifying `--[no-]collector.<name>` here will break the DaemonSet, since `node-exporter` doesn't support specifying these flags multiple times.
Users should use parameter `customNodeExporter.collectors` to enable collectors.

=== `metricRelabelings`

[horizontal]
type:: list
default:: https://github.com/appuio/component-openshift4-monitoring/blob/master/class/defaults.yml[See `class/defaults.yml`]

This parameter allows users to specify the content of field `metricRelabelings` of the `ServiceMonitor` which is created for the additional node-exporter.
By default, the component drops all metrics except `node_network_route*` metrics for host devices prefixed with `ens`.
Since this component only applies to OpenShift 4, we know that any node's host interfaces will use device names that are prefixed with `ens`.

Users are encouraged to extend or overwrite this parameter to ensure all the metrics they're interested in are actually scraped by Prometheus.
33 changes: 33 additions & 0 deletions renovate.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,39 @@
"automerge",
"bump:patch"
]
},
{
"matchUpdateTypes": [
"patch"
],
"matchCurrentVersion": "/^v?0\\./",
"automerge": true,
"platformAutomerge": false,
"labels": [
"dependency",
"automerge",
"bump:patch"
],
"matchPackagePatterns": [
"^quay.io/brancz/kube-rbac-proxy$"
]
},
{
"matchUpdateTypes": [
"minor"
],
"automerge": true,
"platformAutomerge": false,
"labels": [
"dependency",
"automerge",
"bump:minor"
],
"matchPackagePatterns": [
"^quay.io/appuio/oc$",
"^quay.io/brancz/kube-rbac-proxy$",
"^quay.io/prometheus/node-exporter$"
]
}
]
}
3 changes: 3 additions & 0 deletions tests/custom-rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ parameters:
openshift4_monitoring:
manifests_version: release-4.13

customNodeExporter:
enabled: true

alerts:
excludeNamespaces:
- openshift-adp
Expand Down
Loading

0 comments on commit b330be6

Please sign in to comment.