Skip to content

Commit

Permalink
Merge pull request #53 from appuio/feat/syn-metrics
Browse files Browse the repository at this point in the history
Add logic to configure ServiceMonitors
  • Loading branch information
simu authored Jul 4, 2022
2 parents b408e2f + 80d7ecd commit 91f54f9
Show file tree
Hide file tree
Showing 9 changed files with 288 additions and 38 deletions.
10 changes: 10 additions & 0 deletions class/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,17 @@ parameters:
threshold: 85
for: 6h
severity: warning

# TBD: move to `monitoring` key?
ignore_alerts: []
monitoring:
enabled: true
instance: null
enableServiceMonitors:
cluster-logging-operator: true
fluentd: true
elasticsearch-cluster: true

clusterLogging:
managementState: Managed
logStore:
Expand Down
40 changes: 13 additions & 27 deletions component/alertrules.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,13 @@ local runbook(alertname) =
'https://hub.syn.tools/openshift4-logging/runbooks/%s.html' % alertname;

assert
std.member(inv.applications, 'openshift4-monitoring')
: 'openshift4-monitoring is not available';

// Function to process an array which supports removing previously added
// elements by prefixing them with ~
local render_array(arr) =
// extract real value of array entry
local realval(v) = std.lstripChars(v, '~');
// Compute whether each element should be included by keeping track of
// whether its last occurrence in the input array was prefixed with ~ or
// not.
local val_state = std.foldl(
function(a, it) a + it,
[
{ [realval(v)]: !std.startsWith(v, '~') }
for v in arr
],
{}
);
// Return filtered array containing only elements whose last occurrence
// wasn't prefixed by ~.
std.filter(
function(val) val_state[val],
std.objectFields(val_state)
);
std.member(inv.applications, 'openshift4-monitoring') ||
std.member(inv.applications, 'prometheus')
: 'neither component `openshift4-monitoring` nor `prometheus` enabled';

// Keep only alerts from params.ignore_alerts for which the last
// array entry wasn't prefixed with `~`.
local user_ignore_alerts = render_array(params.ignore_alerts);
local user_ignore_alerts = com.renderArray(params.ignore_alerts);

// Upstream alerts to ignore
local ignore_alerts = std.set(
Expand All @@ -62,7 +40,15 @@ local patch_alerts = {
// reuse their functionality as a black box to make sure our alerts work
// correctly in the environment into which we're deploying.

local global_alert_params = inv.parameters.openshift4_monitoring.alerts;
// XXX: We'll figure out how we do alert management, when we start working on
// alerting for the vendor-independent monitoring stack based on component
// prometheus.
local global_alert_params =
com.getValueOrDefault(
inv.parameters,
'openshift4_monitoring',
{ alerts: { ignoreNames: [] } }
).alerts;

local filter_patch_rules(g) =
// combine our set of alerts to ignore with the monitoring component's
Expand Down
2 changes: 2 additions & 0 deletions component/main.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ local group = 'operators.coreos.com/';
local clusterLoggingGroupVersion = 'logging.openshift.io/v1';

local alert_rules = import 'alertrules.libsonnet';
local metrics = import 'metrics.libsonnet';

local namespace_groups = (
if std.objectHas(params.clusterLogForwarding, 'namespaces') then
Expand Down Expand Up @@ -205,3 +206,4 @@ local namespace_groups = (
},
'60_prometheus_rules': alert_rules.rules,
} + (import 'kibana-host.libsonnet')
+ (import 'metrics.libsonnet')
117 changes: 117 additions & 0 deletions component/metrics.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
local kap = import 'lib/kapitan.libjsonnet';
local kube = import 'lib/kube.libjsonnet';
local prom = import 'lib/prometheus.libsonnet';

local inv = kap.inventory();
local params = inv.parameters.openshift4_logging;

local syn_metrics =
params.monitoring.enabled &&
std.member(inv.applications, 'prometheus');

local nsName = 'syn-monitoring-openshift4-logging';

local promInstance =
if params.monitoring.instance != null then
params.monitoring.instance
else
inv.parameters.prometheus.defaultInstance;

local serviceMonitors = [
prom.ServiceMonitor('cluster-logging-operator',) {
endpoints: {
operator: {
interval: '30s',
metricRelabelings: [
prom.DropRuntimeMetrics,
],
port: 'http-metrics',
},
},
selector: {
matchLabels: {
'control-plane': 'cluster-logging-operator',
},
},
targetNamespace: params.namespace,
},
prom.ServiceMonitor('fluentd') {
endpoints: {
fluentd:
prom.ServiceMonitorHttpsEndpoint('fluentd.openshift-logging.svc') {
// Fluentd doesn't need bearer token
bearerTokenFile:: '',
metricRelabelings: [
{
// Drop high-cardinality, low-value metrics regarding the amount
// of logs ingested *per pod & container*.
action: 'drop',
regex:
'(cluster_logging_collector_input_record_(bytes|total)|' +
'log_collected_bytes_total)',
sourceLabels: [ '__name__' ],
},
],
},
},
targetNamespace: params.namespace,
selector: {
matchLabels: {
'logging-infra': 'support',
},
},
},
prom.ServiceMonitor('elasticsearch-cluster') {
endpoints: {
elasticsearch:
prom.ServiceMonitorHttpsEndpoint('elasticsearch-metrics.openshift-logging.svc')
{
path: '/_prometheus/metrics',
port: 'elasticsearch',
metricRelabelings: [
prom.DropRuntimeMetrics,
],
},
},
targetNamespace: params.namespace,
selector: {
matchLabels: {
'cluster-name': 'elasticsearch',
'scrape-metrics': 'enabled',
},
},
},
];

if syn_metrics then
{
'70_monitoring_namespace': prom.RegisterNamespace(
kube.Namespace(nsName),
instance=promInstance,
),
'70_monitoring_servicemonitors': std.filter(
function(it) it != null,
[
if params.monitoring.enableServiceMonitors[sm.metadata.name] then
sm {
metadata+: {
namespace: nsName,
},
}
for sm in serviceMonitors
]
),
'70_monitoring_networkpolicy': prom.NetworkPolicy(instance=promInstance) {
metadata+: {
// The networkpolicy needs to be in the namespace in which OpenShift
// logging is deployed.
namespace: params.namespace,
},
},
}
else
std.trace(
'Monitoring disabled or component `prometheus` not present, '
+ 'not deploying ServiceMonitors',
{}
)
34 changes: 34 additions & 0 deletions docs/modules/ROOT/pages/references/parameters.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,40 @@ default:: []
This parameter can be used to disable alerts provided by openshift cluster-logging-operator.
The component supports removing entries in this parameter by providing the entry prefixed with `~`.

== `monitoring`

This parameter allows users to enable the component's monitoring configuration.
Currently the component has support for deploying custom `ServiceMonitors` on clusters which use component `prometheus` to manage a custom monitoring stack.

=== `enabled`

[horizontal]
type:: boolean
default:: `true`

Whether to deploy monitoring configurations.
If this parameter is set to `true`, the component will check whether component `prometheus` is present on the cluster.
If the component is missing, no configurations will be deployed regardless of the value of this parameter.

=== `instance`

[horizontal]
type:: string
default:: `null`

This parameter can be used to indicate which custom Prometheus instance should pick up the configurations managed by the component.

If the parameter is set to `null`, the default instance configured for component `prometheus` will be used.

=== `enableServiceMonitors`

[horizontal]
type:: dictionary
default:: https://github.com/appuio/component-openshift4-logging/blob/master/class/defaults.yml[See `class/defaults.yml`]

A dictionary with the names of service monitors as keys and booleans as the value.
Can be used to selectively enable or disable service monitors.

== `clusterLogging`

[horizontal]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: Namespace
metadata:
annotations: {}
labels:
monitoring.syn.tools/monitoring: 'true'
name: syn-monitoring-openshift4-logging
name: syn-monitoring-openshift4-logging
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
annotations: {}
labels:
name: allow-from-prometheus-monitoring
name: allow-from-prometheus-monitoring
namespace: openshift-logging
spec:
egress: []
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: syn-monitoring
podSelector: {}
policyTypes:
- Ingress
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
annotations: {}
labels:
name: cluster-logging-operator
name: cluster-logging-operator
namespace: syn-monitoring-openshift4-logging
spec:
endpoints:
- interval: 30s
metricRelabelings:
- action: drop
regex: (go_.*|process_.*|promhttp_.*)
sourceLabels:
- __name__
port: http-metrics
namespaceSelector:
matchNames:
- openshift-logging
selector:
matchLabels:
control-plane: cluster-logging-operator
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
annotations: {}
labels:
name: fluentd
name: fluentd
namespace: syn-monitoring-openshift4-logging
spec:
endpoints:
- interval: 30s
metricRelabelings:
- action: drop
regex: (cluster_logging_collector_input_record_(bytes|total)|log_collected_bytes_total)
sourceLabels:
- __name__
port: metrics
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
serverName: fluentd.openshift-logging.svc
namespaceSelector:
matchNames:
- openshift-logging
selector:
matchLabels:
logging-infra: support
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
annotations: {}
labels:
name: elasticsearch-cluster
name: elasticsearch-cluster
namespace: syn-monitoring-openshift4-logging
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
metricRelabelings:
- action: drop
regex: (go_.*|process_.*|promhttp_.*)
sourceLabels:
- __name__
path: /_prometheus/metrics
port: elasticsearch
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
serverName: elasticsearch-metrics.openshift-logging.svc
namespaceSelector:
matchNames:
- openshift-logging
selector:
matchLabels:
cluster-name: elasticsearch
scrape-metrics: enabled
15 changes: 4 additions & 11 deletions tests/syn-monitoring.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
applications:
- openshift4-operators as openshift-operators-redhat
# TODO: remove this application once we update the component to support the
# new monitoring stack.
- openshift4-monitoring
- prometheus

parameters:
Expand All @@ -11,6 +8,10 @@ parameters:
- type: https
source: https://raw.githubusercontent.com/appuio/component-openshift4-operators/v1.0.2/lib/openshift4-operators.libsonnet
output_path: vendor/lib/openshift4-operators.libsonnet
- type: https
source: https://raw.githubusercontent.com/projectsyn/component-prometheus/master/lib/prometheus.libsonnet
output_path: vendor/lib/prometheus.libsonnet


openshift4_operators:
defaultInstallPlanApproval: Automatic
Expand All @@ -26,11 +27,3 @@ parameters:
namespace: syn-monitoring
prometheus:
enabled: true

openshift4_monitoring:
alerts:
ignoreNames: []

openshift4_logging:
monitoring:
enabled: true

0 comments on commit 91f54f9

Please sign in to comment.