Skip to content

Commit

Permalink
Merge pull request #193 from appuio/release-4.14
Browse files Browse the repository at this point in the history
Support for OCP 4.14
  • Loading branch information
haasad authored Mar 25, 2024
2 parents 696b24c + e2d9f09 commit 3225f0f
Show file tree
Hide file tree
Showing 38 changed files with 3,050 additions and 22 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ jobs:
- release-4.12
- release-4.13
- team-routing
- release-4.14
defaults:
run:
working-directory: ${{ env.COMPONENT_NAME }}
Expand All @@ -66,6 +67,7 @@ jobs:
- release-4.12
- release-4.13
- team-routing
- release-4.14
defaults:
run:
working-directory: ${{ env.COMPONENT_NAME }}
Expand Down
2 changes: 1 addition & 1 deletion Makefile.vars.mk
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@ KUBENT_IMAGE ?= ghcr.io/doitintl/kube-no-trouble:latest
KUBENT_DOCKER ?= $(DOCKER_CMD) $(DOCKER_ARGS) $(root_volume) --entrypoint=/app/kubent $(KUBENT_IMAGE)

instance ?= capacity-alerts
test_instances = tests/capacity-alerts.yml tests/release-4.11.yml tests/remote-write.yml tests/user-workload-monitoring.yml tests/capacity-alerts-with-node-labels.yml tests/vsphere.yml tests/custom-rules.yml tests/release-4.12.yml tests/release-4.13.yml tests/team-routing.yml
test_instances = tests/capacity-alerts.yml tests/release-4.11.yml tests/remote-write.yml tests/user-workload-monitoring.yml tests/capacity-alerts-with-node-labels.yml tests/vsphere.yml tests/custom-rules.yml tests/release-4.12.yml tests/release-4.13.yml tests/team-routing.yml tests/release-4.14.yml
6 changes: 5 additions & 1 deletion class/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,18 @@ parameters:
# rules are fairly similar.
release-4.12: release-4.13
release-4.13: release-4.13
release-4.14: release-4.14
=_etcd_operator_version_map:
release-4.11: release-4.11
release-4.12: release-4.12
release-4.13: release-4.13
release-4.14: release-4.14
# no release branches newer than 4.9 exist
=_operator_lifecycle_manager_map:
release-4.11: release-4.9
release-4.12: release-4.9
release-4.13: release-4.9
release-4.14: release-4.9
jsonnetfile_parameters:
cmo_version: ${openshift4_monitoring:_cluster_monitoring_operator_version_map:${openshift4_monitoring:manifests_version}}
etcd_version: ${openshift4_monitoring:_etcd_operator_version_map:${openshift4_monitoring:manifests_version}}
Expand Down Expand Up @@ -215,6 +218,7 @@ parameters:
release-4.11: {}
release-4.12: {}
release-4.13: {}
release-4.14: {}
# Alerts to ignore for user workload monitoring
ignoreUserWorkload: []

Expand All @@ -241,7 +245,7 @@ parameters:
images:
oc:
image: quay.io/appuio/oc
tag: v4.12
tag: v4.14


capacityAlerts:
Expand Down
11 changes: 11 additions & 0 deletions class/openshift4-monitoring.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ parameters:
api-usage: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.13/bindata/assets/alerts/api-usage.yaml
cpu-utilization: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.13/bindata/assets/alerts/cpu-utilization.yaml
slos: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.13/bindata/assets/alerts/kube-apiserver-slos-basic.yaml
release-4.14:
api-usage: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.14/bindata/assets/alerts/api-usage.yaml
cpu-utilization: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.14/bindata/assets/alerts/cpu-utilization.yaml
slos: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.14/bindata/assets/alerts/kube-apiserver-slos-basic.yaml

machine-api-operator:
release-4.11:
Expand All @@ -22,6 +26,8 @@ parameters:
prometheus: https://raw.githubusercontent.com/openshift/machine-api-operator/release-4.12/install/0000_90_machine-api-operator_04_alertrules.yaml
release-4.13:
prometheus: https://raw.githubusercontent.com/openshift/machine-api-operator/release-4.13/install/0000_90_machine-api-operator_04_alertrules.yaml
release-4.14:
prometheus: https://raw.githubusercontent.com/openshift/machine-api-operator/release-4.14/install/0000_90_machine-api-operator_04_alertrules.yaml

ovn-kubernetes:
release-4.11:
Expand All @@ -39,6 +45,11 @@ parameters:
# We use the "self-hosted" variant of the control-plane alerts, so
# we don't have to worry about unresolved gotemplate references.
control_plane: https://raw.githubusercontent.com/openshift/cluster-network-operator/${openshift4_monitoring:manifests_version}/bindata/network/ovn-kubernetes/self-hosted/alert-rules-control-plane.yaml
release-4.14:
common: https://raw.githubusercontent.com/openshift/cluster-network-operator/${openshift4_monitoring:manifests_version}/bindata/network/ovn-kubernetes/common/alert-rules.yaml
# We use the "self-hosted" variant of the control-plane alerts, so
# we don't have to worry about unresolved gotemplate references.
control_plane: https://raw.githubusercontent.com/openshift/cluster-network-operator/release-4.13/bindata/network/ovn-kubernetes/self-hosted/alert-rules-control-plane.yaml

kapitan:
dependencies:
Expand Down
2 changes: 1 addition & 1 deletion docs/modules/ROOT/pages/references/parameters.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ The parent key for all of the following parameters is `openshift4_monitoring`.

[horizontal]
type:: string
default:: `release-4.13`
default:: `release-4.14`

Select which version of the upstream alerting (and recording) rules should be used by the component.
This parameter must be changed to match the cluster's OCP4 minor version.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,7 @@ spec:
- alert: SYN_NodeDiskIOSaturation
annotations:
description: |
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
This symptom might indicate disk saturation.
summary: Disk IO queue is high.
syn_component: openshift4-monitoring
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ spec:
configMapKeyRef:
key: silences.json
name: silence
image: quay.io/appuio/oc:v4.12
image: quay.io/appuio/oc:v4.14
imagePullPolicy: IfNotPresent
name: silence
ports: []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,7 @@ spec:
- alert: SYN_NodeDiskIOSaturation
annotations:
description: |
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
This symptom might indicate disk saturation.
summary: Disk IO queue is high.
syn_component: openshift4-monitoring
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ spec:
configMapKeyRef:
key: silences.json
name: silence
image: quay.io/appuio/oc:v4.12
image: quay.io/appuio/oc:v4.14
imagePullPolicy: IfNotPresent
name: silence
ports: []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1362,7 +1362,7 @@ spec:
- alert: SYN_NodeDiskIOSaturation
annotations:
description: |
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
This symptom might indicate disk saturation.
summary: Disk IO queue is high.
syn_component: openshift4-monitoring
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ spec:
configMapKeyRef:
key: silences.json
name: silence
image: quay.io/appuio/oc:v4.12
image: quay.io/appuio/oc:v4.14
imagePullPolicy: IfNotPresent
name: silence
ports: []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ spec:
configMapKeyRef:
key: silences.json
name: silence
image: quay.io/appuio/oc:v4.12
image: quay.io/appuio/oc:v4.14
imagePullPolicy: IfNotPresent
name: silence
ports: []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ spec:
- alert: SYN_NodeDiskIOSaturation
annotations:
description: |
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
This symptom might indicate disk saturation.
summary: Disk IO queue is high.
syn_component: openshift4-monitoring
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ spec:
configMapKeyRef:
key: silences.json
name: silence
image: quay.io/appuio/oc:v4.12
image: quay.io/appuio/oc:v4.14
imagePullPolicy: IfNotPresent
name: silence
ports: []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,7 @@ spec:
- alert: SYN_NodeDiskIOSaturation
annotations:
description: |
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
This symptom might indicate disk saturation.
summary: Disk IO queue is high.
syn_component: openshift4-monitoring
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ spec:
configMapKeyRef:
key: silences.json
name: silence
image: quay.io/appuio/oc:v4.12
image: quay.io/appuio/oc:v4.14
imagePullPolicy: IfNotPresent
name: silence
ports: []
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: redhatcop.redhat.io/v1alpha1
kind: Patch
metadata:
annotations:
argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true
labels:
name: namespace-openshift-monitoring-c4273dc15ddfdf7
name: namespace-openshift-monitoring-c4273dc15ddfdf7
namespace: syn-patch-operator
spec:
patches:
namespace-openshift-monitoring-c4273dc15ddfdf7-patch:
patchTemplate: |-
"metadata":
"labels":
"network.openshift.io/policy-group": "monitoring"
patchType: application/strategic-merge-patch+json
targetObjectRef:
apiVersion: v1
kind: Namespace
name: openshift-monitoring
serviceAccountRef:
name: patch-sa
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations: {}
labels:
name: syn-openshift4-monitoring-cluster-reader
rbac.authorization.k8s.io/aggregate-to-cluster-reader: 'true'
name: syn-openshift4-monitoring-cluster-reader
rules:
- apiGroups:
- monitoring.coreos.com
resources:
- '*'
verbs:
- get
- list
- watch
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
apiVersion: v1
data: {}
kind: Secret
metadata:
annotations: {}
labels:
name: alertmanager-main
name: alertmanager-main
namespace: openshift-monitoring
stringData:
alertmanager.yaml: |-
"inhibit_rules":
- "equal":
- "namespace"
- "alertname"
"source_match":
"severity": "critical"
"target_match_re":
"severity": "warning|info"
- "equal":
- "namespace"
- "alertname"
"source_match":
"severity": "warning"
"target_match_re":
"severity": "info"
"receivers":
- "name": "__component_openshift4_monitoring_null"
"route":
"group_interval": "5s"
"group_wait": "0s"
"repeat_interval": "10m"
"routes":
- "continue": false
"matchers":
- "namespace =~ \"\""
"receiver": "__component_openshift4_monitoring_null"
- "receiver": "__component_openshift4_monitoring_null"
type: Opaque
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
apiVersion: v1
data:
config.yaml: |-
"alertmanagerMain":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"volumeClaimTemplate":
"spec":
"resources":
"requests":
"storage": "2Gi"
"enableUserWorkload": true
"grafana":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"k8sPrometheusAdapter":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"kubeStateMetrics":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"openshiftStateMetrics":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"prometheusK8s":
"externalLabels":
"cluster_id": "c-green-test-1234"
"cluster_name": "Test Cluster 1234"
"tenant_id": "t-silent-test-1234"
"tenant_name": "Test Tenant 1234"
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"remoteWrite": []
"retention": "8d"
"volumeClaimTemplate":
"spec":
"resources":
"requests":
"storage": "50Gi"
"prometheusOperator":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"telemeterClient":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"thanosQuerier":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
kind: ConfigMap
metadata:
annotations: {}
labels:
name: cluster-monitoring-config
name: cluster-monitoring-config
namespace: openshift-monitoring
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
apiVersion: v1
data:
config.yaml: |-
"alertmanager":
"enableAlertmanagerConfig": true
"enabled": true
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"volumeClaimTemplate":
"spec":
"resources":
"requests":
"storage": "2Gi"
"prometheus":
"externalLabels":
"cluster_id": "c-green-test-1234-user-workload"
"cluster_name": "Test Cluster 1234 User Workload"
"tenant_id": "t-silent-test-1234"
"tenant_name": "Test Tenant 1234"
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"remoteWrite": []
"retention": "8d"
"volumeClaimTemplate":
"spec":
"resources":
"requests":
"storage": "50Gi"
"prometheusOperator":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
"thanosRuler":
"nodeSelector":
"node-role.kubernetes.io/infra": ""
kind: ConfigMap
metadata:
annotations: {}
labels:
name: user-workload-monitoring-config
name: user-workload-monitoring-config
namespace: openshift-user-workload-monitoring
Loading

0 comments on commit 3225f0f

Please sign in to comment.