diff --git a/class/defaults.yml b/class/defaults.yml index 7a5a066..50cbc84 100644 --- a/class/defaults.yml +++ b/class/defaults.yml @@ -71,20 +71,7 @@ parameters: memory: 128Mi clusterLogging: {} - - clusterLogForwarding: - enabled: false - forwarders: {} - namespace_groups: {} - application_logs: {} - audit_logs: - enabled: false - infrastructure_logs: - enabled: true - json: - enabled: false - typekey: 'kubernetes.labels.logFormat' - typename: 'nologformat' + clusterLogForwarder: {} operatorResources: clusterLogging: diff --git a/component/config_forwarding.libsonnet b/component/config_forwarding.libsonnet index 3004b7a..cc727ac 100644 --- a/component/config_forwarding.libsonnet +++ b/component/config_forwarding.libsonnet @@ -1,3 +1,4 @@ +local com = import 'lib/commodore.libjsonnet'; local kap = import 'lib/kapitan.libjsonnet'; local lib = import 'lib/openshift4-logging.libsonnet'; @@ -8,70 +9,84 @@ local deployLokistack = params.components.lokistack.enabled; local deployElasticsearch = params.components.elasticsearch.enabled; local forwardingOnly = !deployLokistack && !deployElasticsearch; +// ----------------------------------------------------------------------------- +// Legacy Rendering +// ----------------------------------------------------------------------------- + +local legacyConfigSpec = std.get(params, 'clusterLogForwarding', {}); +local legacyConfig = if std.length(legacyConfigSpec) > 0 then std.trace( + 'Parameter `clusterLogForwarding` is deprecated. Please update your config to use `clusterLogForwarder`', + legacyConfigSpec +) else {}; + local pipelineOutputRefs(pipeline) = local default = if forwardingOnly then [] else [ 'default' ]; std.get(pipeline, 'forwarders', []) + default; // Apply default config for application logs. -local patchAppLogDefaults = { - local outputRefs = pipelineOutputRefs(params.clusterLogForwarding.application_logs), - local enablePipeline = std.length(outputRefs) > 0, +local patchLegacyAppLogDefaults = { + local pipeline = std.get(legacyConfig, 'application_logs', { enabled: true }), + local pipelineOutputs = pipelineOutputRefs(pipeline), + local pipelineEnabled = std.length(pipelineOutputs) > 0, pipelines: { - [if enablePipeline then 'application-logs']: { + [if pipelineEnabled then 'application-logs']: { inputRefs: [ 'application' ], - outputRefs: outputRefs, + outputRefs: pipelineOutputs, }, }, }; // Apply default config for infra logs. -local patchInfraLogDefaults = { - local outputRefs = pipelineOutputRefs(params.clusterLogForwarding.infrastructure_logs), - local enablePipeline = params.clusterLogForwarding.infrastructure_logs.enabled && std.length(outputRefs) > 0, +local patchLegacyInfraLogDefaults = { + local pipeline = { enabled: true } + std.get(legacyConfig, 'infrastructure_logs', {}), + local pipelineOutputs = pipelineOutputRefs(pipeline), + local pipelineEnabled = pipeline.enabled && std.length(pipelineOutputs) > 0, pipelines: { - [if enablePipeline then 'infrastructure-logs']: { + [if pipelineEnabled then 'infrastructure-logs']: { inputRefs: [ 'infrastructure' ], - outputRefs: outputRefs, + outputRefs: pipelineOutputs, }, }, }; // Apply default config for audit logs. -local patchAuditLogDefaults = { - local outputRefs = pipelineOutputRefs(params.clusterLogForwarding.audit_logs), - local enablePipeline = params.clusterLogForwarding.audit_logs.enabled && std.length(outputRefs) > 0, +local patchLegacyAuditLogDefaults = { + local pipeline = std.get(legacyConfig, 'audit_logs', { enabled: false }), + local pipelineOutputs = pipelineOutputRefs(pipeline), + local pipelineEnabled = pipeline.enabled && std.length(pipelineOutputs) > 0, pipelines: { - [if enablePipeline then 'audit-logs']: { + [if pipelineEnabled then 'audit-logs']: { inputRefs: [ 'audit' ], - outputRefs: outputRefs, + outputRefs: pipelineOutputs, }, }, }; // Enable json parsing for default pipelines if configured. -local patchJsonLogging = { - local enableAppLogs = std.get(params.clusterLogForwarding.application_logs, 'json', false), - local enableInfraLogs = std.get(params.clusterLogForwarding.infrastructure_logs, 'json', false), +local legacyEnableJson = std.get(std.get(legacyConfig, 'json', {}), 'enabled', false); +local patchLegacyJsonLogging = { + local enableAppLogs = std.get(std.get(legacyConfig, 'application_logs', {}), 'json', false), + local enableInfraLogs = std.get(std.get(legacyConfig, 'infrastructure_logs', {}), 'json', false), pipelines: { [if enableAppLogs then 'application-logs']: { parse: 'json' }, [if enableInfraLogs then 'infrastructure-logs']: { parse: 'json' }, }, - [if deployElasticsearch && params.clusterLogForwarding.json.enabled then 'outputDefaults']: { + [if deployElasticsearch && legacyEnableJson then 'outputDefaults']: { elasticsearch: { - structuredTypeKey: params.clusterLogForwarding.json.typekey, - structuredTypeName: params.clusterLogForwarding.json.typename, + structuredTypeKey: std.get(legacyConfig.json, 'typekey', 'kubernetes.labels.logFormat'), + structuredTypeName: std.get(legacyConfig.json, 'typename', 'nologformat'), }, }, }; // Enable detectMultilineErrors for default pipelines if configured. -local patchMultilineErrors = { - local enableAppLogs = std.get(params.clusterLogForwarding.application_logs, 'detectMultilineErrors', false), - local enableInfraLogs = std.get(params.clusterLogForwarding.infrastructure_logs, 'detectMultilineErrors', false), +local patchLegacyMultilineErrors = { + local enableAppLogs = std.get(std.get(legacyConfig, 'application_logs', {}), 'detectMultilineErrors', false), + local enableInfraLogs = std.get(std.get(legacyConfig, 'infrastructure_logs', {}), 'detectMultilineErrors', false), pipelines: { [if enableAppLogs then 'application-logs']: { detectMultilineErrors: true }, @@ -81,19 +96,19 @@ local patchMultilineErrors = { // --- patch deprecated `clusterLogForwarding.namespace` config local namespaceGroups = ( - if std.objectHas(params.clusterLogForwarding, 'namespaces') then + if std.objectHas(legacyConfig, 'namespaces') then { [ns]: { namespaces: [ ns ], - forwarders: [ params.clusterLogForwarding.namespaces[ns].forwarder ], + forwarders: [ legacyConfig.namespaces[ns].forwarder ], } - for ns in std.objectFields(params.clusterLogForwarding.namespaces) + for ns in std.objectFields(legacyConfig.namespaces) } else {} -) + params.clusterLogForwarding.namespace_groups; +) + std.get(legacyConfig, 'namespace_groups', {}); // --- patch end // Add inputs entry for every namespace_group defined in `clusterLogForwarding.namespace_groups`. -local patchCustomInputs = { +local patchLegacyCustomInputs = { [if std.length(namespaceGroups) > 0 then 'inputs']: { [group]: { application: { @@ -105,7 +120,7 @@ local patchCustomInputs = { }; // Add pipelines entry for every namespace_group defined in `clusterLogForwarding.namespace_groups`. -local patchCustomPipelines = { +local patchLegacyCustomPipelines = { [if std.length(namespaceGroups) > 0 then 'pipelines']: { local enableJson = std.get(namespaceGroups[group], 'json', false), local enableMultilineError = std.get(namespaceGroups[group], 'detectMultilineErrors', false), @@ -121,35 +136,39 @@ local patchCustomPipelines = { }; // Add outputs entry for every forwarder defined in `clusterLogForwarding.forwarders`. -local patchCustomOutputs = { - [if std.length(params.clusterLogForwarding.forwarders) > 0 then 'outputs']: { - [name]: params.clusterLogForwarding.forwarders[name] - for name in std.objectFields(params.clusterLogForwarding.forwarders) +local patchLegacyCustomOutputs = { + [if std.length(std.get(legacyConfig, 'forwarders', {})) > 0 then 'outputs']: { + [name]: legacyConfig.forwarders[name] + for name in std.objectFields(legacyConfig.forwarders) }, }; -// ClusterLogForwarderSpecs: +// ----------------------------------------------------------------------------- +// End Legacy Rendering +// ----------------------------------------------------------------------------- + +// clusterLogForwarderSpec: // Consecutively apply patches to result of previous apply. local clusterLogForwarderSpec = std.foldl( // we use std.mergePatch here, because this way we don't need // to make each patch object mergeable by suffixing all keys with a +. function(manifest, patch) std.mergePatch(manifest, patch), [ - patchAppLogDefaults, - patchInfraLogDefaults, - patchAuditLogDefaults, - patchJsonLogging, - patchMultilineErrors, - patchCustomInputs, - patchCustomOutputs, - patchCustomPipelines, + patchLegacyAppLogDefaults, + patchLegacyInfraLogDefaults, + patchLegacyAuditLogDefaults, + patchLegacyJsonLogging, + patchLegacyMultilineErrors, + patchLegacyCustomInputs, + patchLegacyCustomOutputs, + patchLegacyCustomPipelines, ], { inputs: {}, outputs: {}, pipelines: {}, - } -); + }, +) + com.makeMergeable(params.clusterLogForwarder); // ClusterLogForwarder: // Create definitive ClusterLogForwarder resource from specs. @@ -176,8 +195,10 @@ local clusterLogForwarder = lib.ClusterLogForwarder(params.namespace, 'instance' }, }; +local enableLogForwarder = std.length(params.clusterLogForwarder) > 0 || std.get(legacyConfig, 'enabled', false); + // Define outputs below -if params.clusterLogForwarding.enabled then +if enableLogForwarder then { '31_cluster_logforwarding': clusterLogForwarder, } diff --git a/tests/elasticsearch.yml b/tests/elasticsearch.yml index 7310ac9..8532afe 100644 --- a/tests/elasticsearch.yml +++ b/tests/elasticsearch.yml @@ -11,20 +11,12 @@ parameters: - type: https source: https://raw.githubusercontent.com/appuio/component-openshift4-monitoring/v2.9.0/lib/openshift4-monitoring-alert-patching.libsonnet output_path: vendor/lib/alert-patching.libsonnet - - type: https - source: https://raw.githubusercontent.com/projectsyn/component-patch-operator/v1.1.0/lib/patch-operator.libsonnet - output_path: vendor/lib/patch-operator.libsonnet compile: - input_type: jsonnet input_paths: - tests/console-patch.jsonnet output_path: console-patching/ - patch_operator: - namespace: syn-patch-operator - patch_serviceaccount: - name: syn-patch-operator - openshift4_operators: defaultInstallPlanApproval: Automatic defaultSource: openshift-operators-redhat diff --git a/tests/forwardingonly.yml b/tests/forwardingonly.yml index f3efe3e..cfcbe5d 100644 --- a/tests/forwardingonly.yml +++ b/tests/forwardingonly.yml @@ -28,6 +28,3 @@ parameters: enabled: false elasticsearch: enabled: false - - clusterLogForwarding: - enabled: true diff --git a/tests/golden/forwardingonly/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml b/tests/golden/forwardingonly/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml deleted file mode 100644 index 11b7fde..0000000 --- a/tests/golden/forwardingonly/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: logging.openshift.io/v1 -kind: ClusterLogForwarder -metadata: - annotations: {} - labels: - name: instance - name: instance - namespace: openshift-logging -spec: {} diff --git a/tests/golden/legacy/openshift4-logging/console-patching/openshift4_console_params.yaml b/tests/golden/legacy/openshift4-logging/console-patching/openshift4_console_params.yaml new file mode 100644 index 0000000..f71555a --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/console-patching/openshift4_console_params.yaml @@ -0,0 +1,3 @@ +config: + plugins: + - logging-view-plugin diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/00_namespace.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/00_namespace.yaml new file mode 100644 index 0000000..1b27cf9 --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/00_namespace.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + annotations: + openshift.io/node-selector: '' + labels: + name: openshift-logging + openshift.io/cluster-monitoring: 'true' + name: openshift-logging diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/10_operator_group.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/10_operator_group.yaml new file mode 100644 index 0000000..ff11675 --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/10_operator_group.yaml @@ -0,0 +1,11 @@ +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + annotations: {} + labels: + name: cluster-logging + name: cluster-logging + namespace: openshift-logging +spec: + targetNamespaces: + - openshift-logging diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/20_subscriptions.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/20_subscriptions.yaml new file mode 100644 index 0000000..1f0b7ad --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/20_subscriptions.yaml @@ -0,0 +1,43 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + annotations: {} + labels: + name: cluster-logging + name: cluster-logging + namespace: openshift-logging +spec: + channel: stable-5.9 + config: + resources: + limits: + memory: 256Mi + requests: + cpu: 10m + memory: 128Mi + installPlanApproval: Automatic + name: cluster-logging + source: redhat-operators + sourceNamespace: openshift-operators-redhat +--- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + annotations: {} + labels: + name: loki-operator + name: loki-operator + namespace: openshift-operators-redhat +spec: + channel: stable-5.9 + config: + resources: + limits: + memory: 512Mi + requests: + cpu: 50m + memory: 381Mi + installPlanApproval: Automatic + name: loki-operator + source: openshift-operators-redhat + sourceNamespace: openshift-operators-redhat diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/30_cluster_logging.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/30_cluster_logging.yaml new file mode 100644 index 0000000..307f0ca --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/30_cluster_logging.yaml @@ -0,0 +1,17 @@ +apiVersion: logging.openshift.io/v1 +kind: ClusterLogging +metadata: + annotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true + labels: + name: instance + name: instance + namespace: openshift-logging +spec: + collection: + type: vector + logStore: + lokistack: + name: loki + type: lokistack + managementState: Managed diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml new file mode 100644 index 0000000..b6f6990 --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml @@ -0,0 +1,44 @@ +apiVersion: logging.openshift.io/v1 +kind: ClusterLogForwarder +metadata: + annotations: {} + labels: + name: instance + name: instance + namespace: openshift-logging +spec: + inputs: + - application: + namespaces: + - app-one + - app-two + name: my-apps + outputs: + - name: custom-forwarder + type: syslog + - elasticsearch: + version: 8 + name: my-other-forwarder + type: elasticsearch + pipelines: + - inputRefs: + - application + name: application-logs + outputRefs: + - my-other-forwarder + - default + - my-forwarder + parse: json + - detectMultilineErrors: true + inputRefs: + - infrastructure + name: infrastructure-logs + outputRefs: + - default + parse: json + - inputRefs: + - my-apps + name: my-apps + outputRefs: + - custom-forwarder + parse: json diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_ingester_fix.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_ingester_fix.yaml new file mode 100644 index 0000000..dcca6fb --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_ingester_fix.yaml @@ -0,0 +1,153 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +rules: + - apiGroups: + - '' + resources: + - pods + - pods/exec + verbs: + - get + - list + - watch + - create + - delete + - patch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: loki-ingester-check +subjects: + - kind: ServiceAccount + name: loki-ingester-check +--- +apiVersion: v1 +data: + wal-check.sh: | + #!/bin/bash + + set -e -o pipefail + + # Check if pod is in stuck state. + function check_pod() { + POD_NAME="loki-ingester-${1}" + echo "checking POD ${POD_NAME}" + PHASE=$(kubectl -n openshift-logging get po ${POD_NAME} -oyaml | yq '.status.phase') + if [ ${PHASE} != "Running" ]; then + return 0 + fi + READY=$(kubectl -n openshift-logging get po ${POD_NAME} -oyaml | yq '.status.conditions[] | select(.type == "ContainersReady") | .status') + if [ ${READY} == "True" ]; then + return 0 + fi + return 1 + } + + # Check directories of pod and remove non-existing checkpoint if present. + function check_dir() { + shopt -s extglob + POD_NAME="loki-ingester-${1}" + echo "checking DIR ${POD_NAME}" + DIR_CHP=$(kubectl -n openshift-logging exec -i ${POD_NAME} -- ls /tmp/wal | grep -o "^checkpoint\.[0-9]*$") + PATTERN=$(echo ${DIR_CHP} | sed 's/[^0-9]*//g') + DIR_WAL=$(kubectl -n openshift-logging exec -i ${POD_NAME} -- ls /tmp/wal | grep -o "^0*${PATTERN}$" || exit 0) + if [ -z $DIR_WAL ]; then + kubectl -n openshift-logging exec -i ${POD_NAME} -- rm -rf /tmp/wal/${DIR_CHP} + kubectl -n openshift-logging delete po ${POD_NAME} + fi + } + + # Check if pods are in stuck state for longer than ${SLEEP_TIME}. + # Only fix 1 pod at a time and immediatly exit if it is fixed. + function fix_pod() { + if ! check_pod $1; then + echo "stuck POD, waiting ${SLEEP_TIME}" + sleep ${SLEEP_TIME} + if ! check_pod $1; then + check_dir $1 + exit 0 + fi + fi + } + + fix_pod 0 + fix_pod 1 + + exit 0 +kind: ConfigMap +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 0 + jobTemplate: + spec: + activeDeadlineSeconds: 360 + backoffLimit: 1 + template: + spec: + containers: + - command: + - /usr/local/bin/wal-check.sh + env: + - name: SLEEP_TIME + value: 2m + image: quay.io/appuio/oc:v4.14 + imagePullPolicy: IfNotPresent + name: check-pod + ports: [] + stdin: false + tty: false + volumeMounts: + - mountPath: /usr/local/bin/wal-check.sh + name: wal-check + readOnly: true + subPath: wal-check.sh + nodeSelector: + node-role.kubernetes.io/infra: '' + restartPolicy: Never + serviceAccountName: loki-ingester-check + volumes: + - configMap: + defaultMode: 364 + name: loki-ingester-check + name: wal-check + schedule: '*/10 * * * *' diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_logstore.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_logstore.yaml new file mode 100644 index 0000000..77d8c18 --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_logstore.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +data: {} +kind: Secret +metadata: + annotations: {} + labels: + name: loki-logstore + name: loki-logstore +stringData: + access_key_id: '' + access_key_secret: '' + bucketnames: c-green-test-1234-logstore + endpoint: '' +type: Opaque diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_netpol.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_netpol.yaml new file mode 100644 index 0000000..f2cd3bb --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_netpol.yaml @@ -0,0 +1,54 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + annotations: {} + labels: + name: allow-console-logging-view-plugin + name: allow-console-logging-view-plugin +spec: + ingress: + - from: + - podSelector: + matchLabels: + app: console + component: ui + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openshift-console + ports: + - port: 9443 + protocol: TCP + podSelector: + matchLabels: + app.kubernetes.io/created-by: openshift-logging_instance + app.kubernetes.io/name: logging-view-plugin + policyTypes: + - Ingress +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + annotations: {} + labels: + name: allow-console-logging-lokistack-gateway + name: allow-console-logging-lokistack-gateway +spec: + ingress: + - from: + - podSelector: + matchLabels: + app: console + component: ui + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openshift-console + ports: + - port: 8080 + protocol: TCP + podSelector: + matchLabels: + app.kubernetes.io/component: lokistack-gateway + app.kubernetes.io/instance: loki + app.kubernetes.io/name: lokistack + policyTypes: + - Ingress diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_operator_metrics_token.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_operator_metrics_token.yaml new file mode 100644 index 0000000..0b86fe6 --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_operator_metrics_token.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Secret +metadata: + annotations: + argocd.argoproj.io/sync-options: Prune=false,Delete=false + kubernetes.io/service-account.name: loki-operator-controller-manager-metrics-reader + labels: + name: loki-operator-controller-manager-metrics-token + name: loki-operator-controller-manager-metrics-token + namespace: openshift-operators-redhat +type: kubernetes.io/service-account-token diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_rbac.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_rbac.yaml new file mode 100644 index 0000000..d5dde59 --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_rbac.yaml @@ -0,0 +1,18 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: {} + labels: + name: syn-loki-cluster-reader + rbac.authorization.k8s.io/aggregate-to-cluster-reader: 'true' + name: syn:loki:cluster-reader +rules: + - apiGroups: + - loki.grafana.com + resourceNames: + - logs + resources: + - application + - infrastructure + verbs: + - get diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_stack.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_stack.yaml new file mode 100644 index 0000000..259068c --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/50_loki_stack.yaml @@ -0,0 +1,60 @@ +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + annotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true + labels: + name: loki + name: loki +spec: + limits: + global: + ingestion: + ingestionBurstSize: 9 + ingestionRate: 5 + size: 1x.demo + storage: + schemas: + - effectiveDate: '2022-06-01' + version: v12 + - effectiveDate: '2024-09-01' + version: v13 + secret: + name: loki-logstore + type: s3 + storageClassName: '' + template: + compactor: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 1 + distributor: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + gateway: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + indexGateway: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + ingester: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + querier: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + queryFrontend: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + ruler: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 1 + tenants: + mode: openshift-logging diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/60_collector_alerts.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/60_collector_alerts.yaml new file mode 100644 index 0000000..268663f --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/60_collector_alerts.yaml @@ -0,0 +1,127 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + annotations: {} + labels: + name: syn-collector-rules + name: syn-collector-rules + namespace: openshift-logging +spec: + groups: + - name: logging_collector.alerts + rules: + - alert: SYN_CollectorNodeDown + annotations: + message: Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod + }} collector component for more than 10m. + summary: Collector cannot be scraped + expr: | + up{app_kubernetes_io_component = "collector", app_kubernetes_io_part_of = "cluster-logging"} == 0 + for: 10m + labels: + service: collector + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_CollectorHighErrorRate + annotations: + message: '{{ $value }}% of records have resulted in an error by {{ $labels.namespace + }}/{{ $labels.pod }} collector component.' + summary: '{{ $labels.namespace }}/{{ $labels.pod }} collector component + errors are high' + expr: | + 100 * ( + collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} + / + collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} + ) > 0.001 + for: 15m + labels: + service: collector + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_CollectorVeryHighErrorRate + annotations: + message: '{{ $value }}% of records have resulted in an error by {{ $labels.namespace + }}/{{ $labels.pod }} collector component.' + summary: '{{ $labels.namespace }}/{{ $labels.pod }} collector component + errors are very high' + expr: | + 100 * ( + collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} + / + collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} + ) > 0.05 + for: 15m + labels: + service: collector + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_ElasticsearchDeprecation + annotations: + message: The OpenShift Elasticsearch Operator is deprecated and is planned + to be removed in a future release. Red Hat provides bug fixes and support + for this feature during the current release lifecycle, but this feature + no longer receives enhancements. As an alternative to using the OpenShift + Elasticsearch Operator to manage the default log storage, you can use + the Loki Operator. + summary: Detected Elasticsearch as the in-cluster storage which is deprecated + and will be removed in a future release. + expr: | + sum(kube_pod_labels{namespace="openshift-logging",label_component='elasticsearch'}) > 0 + for: 5m + labels: + namespace: openshift-logging + service: storage + severity: Warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_FluentdDeprecation + annotations: + message: Fluentd is deprecated and is planned to be removed in a future + release. Red Hat provides bug fixes and support for this feature during + the current release lifecycle, but this feature no longer receives enhancements. + As an alternative to Fluentd, you can use Vector instead. + summary: Detected Fluentd as the collector which is deprecated and will + be removed in a future release. + expr: | + sum(kube_pod_labels{namespace="openshift-logging", label_implementation='fluentd', label_app_kubernetes_io_managed_by="cluster-logging-operator"}) > 0 + for: 5m + labels: + namespace: openshift-logging + service: collector + severity: Warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_KibanaDeprecation + annotations: + message: The Kibana web console is now deprecated and is planned to be + removed in a future logging release. + summary: Detected Kibana as the visualization which is deprecated and + will be removed in a future release. + expr: | + sum(kube_pod_labels{namespace="openshift-logging",label_component='kibana'}) > 0 + for: 5m + labels: + namespace: openshift-logging + service: visualization + severity: Warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_DiskBufferUsage + annotations: + message: 'Collectors potentially consuming too much node disk, {{ $value + }}% ' + summary: Detected consuming too much node disk on $labels.hostname host + expr: "(label_replace(sum by(hostname) (vector_buffer_byte_size{component_kind='sink',\ + \ buffer_type='disk'}), 'instance', '$1', 'hostname', '(.*)') \n/ on(instance)\ + \ group_left() sum by(instance) (node_filesystem_size_bytes{mountpoint='/var'}))\ + \ * 100 > 15\n" + for: 5m + labels: + service: collector + severity: Warning + syn: 'true' + syn_component: openshift4-logging diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml new file mode 100644 index 0000000..65a573e --- /dev/null +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml @@ -0,0 +1,225 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + annotations: {} + labels: + name: syn-loki-logging-rules + name: syn-loki-logging-rules + namespace: openshift-logging +spec: + groups: + - name: logging_loki.alerts + rules: + - alert: SYN_LokiRequestErrors + annotations: + message: '{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf + "%.2f" $value }}% errors.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Request-Errors + summary: At least 10% of requests are responded by 5xx server errors. + expr: | + sum( + job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m{status_code=~"5.."} + ) by (job, namespace, route) + / + sum( + job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m + ) by (job, namespace, route) + * 100 + > 10 + for: 15m + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiStackWriteRequestErrors + annotations: + message: '{{ printf "%.2f" $value }}% of write requests from {{ $labels.job + }} in {{ $labels.namespace }} are returned with server errors.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#LokiStack-Write-Request-Errors + summary: At least 10% of write requests to the lokistack-gateway are responded + with 5xx server errors. + expr: | + sum( + code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{code=~"5..", handler="push"} + ) by (job, namespace) + / + sum( + code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{handler="push"} + ) by (job, namespace) + * 100 + > 10 + for: 15m + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiStackReadRequestErrors + annotations: + message: '{{ printf "%.2f" $value }}% of query requests from {{ $labels.job + }} in {{ $labels.namespace }} are returned with server errors.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#LokiStack-Read-Request-Errors + summary: At least 10% of query requests to the lokistack-gateway are responded + with 5xx server errors. + expr: | + sum( + code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{code=~"5..", handler=~"query|query_range|label|labels|label_values"} + ) by (job, namespace) + / + sum( + code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{handler=~"query|query_range|label|labels|label_values"} + ) by (job, namespace) + * 100 + > 10 + for: 15m + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiRequestPanics + annotations: + message: '{{ $labels.job }} is experiencing an increase of {{ $value }} + panics.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Request-Panics + summary: A panic was triggered. + expr: | + sum( + increase( + loki_panic_total[10m] + ) + ) by (job, namespace) + > 0 + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiRequestLatency + annotations: + message: '{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf + "%.2f" $value }}s 99th percentile latency.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Request-Latency + summary: The 99th percentile is experiencing high latency (higher than + 1 second). + expr: | + histogram_quantile(0.99, + sum( + irate( + loki_request_duration_seconds_bucket{route!~"(?i).*tail.*"}[1m] + ) + ) by (job, le, namespace, route) + ) + > 1 + for: 15m + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiTenantRateLimit + annotations: + message: '{{ $labels.job }} {{ $labels.route }} is experiencing 429 errors.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Tenant-Rate-Limit + summary: At least 10% of requests are responded with the rate limit error + code. + expr: | + sum( + job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m{status_code="429"} + ) by (job, namespace, route) + / + sum( + job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m + ) by (job, namespace, route) + * 100 + > 10 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiStorageSlowWrite + annotations: + message: The storage path is experiencing slow write response rates. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Storage-Slow-Write + summary: The storage path is experiencing slow write response rates. + expr: | + histogram_quantile(0.99, + sum( + job_le_namespace_operation:loki_boltdb_shipper_request_duration_seconds_bucket:rate5m{operation="WRITE"} + ) by (job, le, namespace) + ) + > 1 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiStorageSlowRead + annotations: + message: The storage path is experiencing slow read response rates. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Storage-Slow-Read + summary: The storage path is experiencing slow read response rates. + expr: | + histogram_quantile(0.99, + sum( + job_le_namespace_operation:loki_boltdb_shipper_request_duration_seconds_bucket:rate5m{operation="Shipper.Query"} + ) by (job, le, namespace) + ) + > 5 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiWritePathHighLoad + annotations: + message: The write path is experiencing high load. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Write-Path-High-Load + summary: The write path is experiencing high load, causing backpressure + storage flushing. + expr: | + sum( + loki_ingester_wal_replay_flushing + ) by (job, namespace) + > 0 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiReadPathHighLoad + annotations: + message: The read path is experiencing high load. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Read-Path-High-Load + summary: The read path has high volume of queries, causing longer response + times. + expr: | + histogram_quantile(0.99, + sum( + rate( + loki_logql_querystats_latency_seconds_bucket[5m] + ) + ) by (job, le, namespace) + ) + > 30 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokistackSchemaUpgradesRequired + annotations: + message: |- + The LokiStack "{{ $labels.stack_name }}" in namespace "{{ $labels.stack_namespace }}" is using a storage schema + configuration that does not contain the latest schema version. It is recommended to update the schema + configuration to update the schema version to the latest version in the future. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Lokistack-Schema-Upgrades-Required + summary: One or more of the deployed LokiStacks contains an outdated storage + schema configuration. + expr: | + sum ( + lokistack_status_condition{reason="StorageNeedsSchemaUpdate",status="true"} + ) by (stack_namespace, stack_name) + > 0 + for: 1m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging diff --git a/tests/legacy.yml b/tests/legacy.yml index a4da5b7..7d5565b 100644 --- a/tests/legacy.yml +++ b/tests/legacy.yml @@ -1,3 +1,57 @@ -# Overwrite parameters here +applications: + - openshift4-operators as openshift-operators-redhat + - openshift4-monitoring -# parameters: {...} +parameters: + kapitan: + dependencies: + - type: https + source: https://raw.githubusercontent.com/appuio/component-openshift4-operators/v1.0.2/lib/openshift4-operators.libsonnet + output_path: vendor/lib/openshift4-operators.libsonnet + - type: https + source: https://raw.githubusercontent.com/appuio/component-openshift4-monitoring/v2.9.0/lib/openshift4-monitoring-alert-patching.libsonnet + output_path: vendor/lib/alert-patching.libsonnet + compile: + - input_type: jsonnet + input_paths: + - tests/console-patch.jsonnet + output_path: console-patching/ + + openshift4_operators: + defaultInstallPlanApproval: Automatic + defaultSource: openshift-operators-redhat + defaultSourceNamespace: openshift-operators-redhat + + openshift4_logging: + clusterLogForwarding: + enabled: true + forwarders: + custom-forwarder: + type: syslog + my-other-forwarder: + type: elasticsearch + elasticsearch: + version: 8 + namespace_groups: + my-apps: + namespaces: + - app-one + - app-two + forwarders: + - custom-forwarder + json: true + json: + enabled: true + application_logs: + json: true + forwarders: + - my-other-forwarder + infrastructure_logs: + json: true + detectMultilineErrors: true + + clusterLogForwarder: + pipelines: + application-logs: + outputRefs: + - my-forwarder diff --git a/tests/master.yml b/tests/master.yml index 71ef248..17c595a 100644 --- a/tests/master.yml +++ b/tests/master.yml @@ -26,16 +26,20 @@ parameters: channel: 'stable' alerts: 'master' - clusterLogForwarding: - enabled: true - forwarders: + clusterLogForwarder: + inputs: + my-apps: + application: + namespaces: + - app-one + - app-two + outputs: custom-forwarder: type: syslog - namespace_groups: + pipelines: my-apps: - namespaces: - - app-one - - app-two - forwarders: + parse: json + inputRefs: + - my-apps + outputRefs: - custom-forwarder - json: true diff --git a/tests/multilineerr.yml b/tests/multilineerr.yml index 2c3067a..b91a325 100644 --- a/tests/multilineerr.yml +++ b/tests/multilineerr.yml @@ -23,8 +23,8 @@ parameters: defaultSourceNamespace: openshift-operators-redhat openshift4_logging: - clusterLogForwarding: - enabled: true - application_logs: - json: true - detectMultilineErrors: true + clusterLogForwarder: + pipelines: + application-logs: + parse: json + detectMultilineErrors: true