diff --git a/services/centralized-kubecost/2.5.0/cosi-storage.yaml b/services/centralized-kubecost/2.5.0/cosi-storage.yaml index 380691256..d3106999f 100644 --- a/services/centralized-kubecost/2.5.0/cosi-storage.yaml +++ b/services/centralized-kubecost/2.5.0/cosi-storage.yaml @@ -21,3 +21,6 @@ spec: postBuild: substitute: releaseNamespace: ${releaseNamespace} + substituteFrom: + - kind: ConfigMap + name: substitution-vars diff --git a/services/centralized-kubecost/2.5.0/cosi-storage/cosi-bucket.yaml b/services/centralized-kubecost/2.5.0/cosi-storage/cosi-bucket.yaml index 2547e9f40..389f527f7 100644 --- a/services/centralized-kubecost/2.5.0/cosi-storage/cosi-bucket.yaml +++ b/services/centralized-kubecost/2.5.0/cosi-storage/cosi-bucket.yaml @@ -28,6 +28,7 @@ spec: valuesFrom: - kind: ConfigMap name: centralized-kubecost-2.5.0-d2iq-defaults + valuesKey: ${kubecostClusterMode:=single-cluster}-values.yaml - kind: ConfigMap name: centralized-kubecost-overrides optional: true diff --git a/services/centralized-kubecost/2.5.0/defaults/cm.yaml b/services/centralized-kubecost/2.5.0/defaults/cm.yaml index aefd023e8..04fc32282 100644 --- a/services/centralized-kubecost/2.5.0/defaults/cm.yaml +++ b/services/centralized-kubecost/2.5.0/defaults/cm.yaml @@ -4,16 +4,11 @@ metadata: name: centralized-kubecost-2.5.0-d2iq-defaults namespace: ${releaseNamespace} data: - values.yaml: | + shared-values.yaml: | --- global: prometheus: - enabled: false - fqdn: http://kubecost-prometheus-server.kommander.svc.cluster.local - - savedReports: - # Refer https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/reports to create reports - enabled: false + enabled: true grafana: enabled: false @@ -24,16 +19,8 @@ data: alertmanager: # If true, allow kubecost to write to alertmanager enabled: true - fqdn: http://kubecost-prometheus-alertmanager.kommander.svc.cluster.local - - kubecostModel: - federatedStorageConfigSecret: "federated-store" # Secret should have a key named "federated-store.yaml" with the federated storage credentials kubecostAggregator: - # deployMethod determines how Aggregator is deployed. Current options are - # "singlepod" (within cost-analyzer Pod) "statefulset" (separate - # StatefulSet), and "disabled". - deployMethod: statefulset persistentConfigsStorage: storageClass: "" # default storage class storageRequest: 1Gi @@ -58,10 +45,6 @@ data: image: jaegertracing/all-in-one imageVersion: 1.64.0 # Pin the image here to avoid pulling in latest as that would affect CVE scans - federatedETL: - # Centralized kubecost should not write data to s3 storage. It's read-only. - federatedCluster: false - kubecostFrontend: enabled: true fullImageName: gcr.io/kubecost1/frontend:prod-2.5.0 @@ -100,16 +83,130 @@ data: tls: [] prometheus: + kubeStateMetrics: + enabled: false + kube-state-metrics: + disabled: true + + extraScrapeConfigs: | + - job_name: kubecost + honor_labels: true + scrape_interval: 1m + scrape_timeout: 10s + metrics_path: /metrics + scheme: http + dns_sd_configs: + - names: + - {{ .Release.Name }}-cost-analyzer + type: 'A' + port: 9003 + - job_name: kubecost-networking + kubernetes_sd_configs: + - role: pod + relabel_configs: + # Scrape only the the targets matching the following metadata + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: {{ .Release.Name }}-network-costs + server: - fullnameOverride: "kubecost-prometheus-server" + priorityClassName: dkp-high-priority + retention: 14d + image: + repository: quay.io/prometheus/prometheus + tag: v2.55.1 # If clusterIDConfigmap is defined, instead use user-generated configmap with key CLUSTER_ID # to use as unique cluster ID in kubecost cost-analyzer deployment. # This overrides the cluster_id set in prometheus.server.global.external_labels. # NOTE: This does not affect the external_labels set in prometheus config. clusterIDConfigmap: kubecost-cluster-info-configmap + extraFlags: + - web.enable-admin-api + - web.enable-lifecycle + - storage.tsdb.wal-compression + resources: + limits: + cpu: 1000m + memory: 2500Mi + requests: + cpu: 300m + memory: 1500Mi global: + scrape_interval: 1m + scrape_timeout: 10s + evaluation_interval: 1m external_labels: cluster_id: $CLUSTER_ID + persistentVolume: + size: 32Gi + enabled: true + extraArgs: + log.level: info + log.format: json + storage.tsdb.min-block-duration: 2h + storage.tsdb.max-block-duration: 2h + query.max-concurrency: 1 + query.max-samples: 100000000 + enableAdminApi: true + service: + gRPC: + enabled: true + configmapReload: + prometheus: + enabled: true + #image: + #repository: ghcr.io/jimmidyson/configmap-reload + #tag: v0.14.0 + alertmanager: + enabled: true + #image: + #repository: ghcr.io/jimmidyson/configmap-reload + #tag: v0.14.0 + alertmanager: + priorityClassName: dkp-high-priority + enabled: true + image: + repository: quay.io/prometheus/alertmanager + tag: v0.27.0 + resources: + limits: + cpu: 50m + memory: 100Mi + requests: + cpu: 10m + memory: 50Mi + persistentVolume: + enabled: true + pushgateway: + enabled: false + persistentVolume: + enabled: false + serverFiles: + alerts: + groups: + - name: Kubecost + rules: + - alert: kubecostDown + expr: up{job="kubecost"} == 0 + annotations: + message: 'Kubecost metrics endpoint is not being scraped successfully.' + for: 10m + labels: + severity: warning + - alert: kubecostMetricsUnavailable + expr: sum(sum_over_time(node_cpu_hourly_cost[5m])) == 0 + annotations: + message: 'Kubecost metrics are not available in Prometheus.' + for: 10m + labels: + severity: warning + - alert: kubecostRecordingRulesNotEvaluated + expr: avg_over_time(kubecost_cluster_memory_working_set_bytes[5m]) == 0 + annotations: + message: 'Kubecost recording rules are not being successfully evaluated.' + for: 10m + labels: + severity: warning grafana: sidecar: @@ -126,10 +223,26 @@ data: clusterName: "" clusterProfile: production cloudIntegrationSecret: "" + currencyCode: USD productKey: enabled: false #key: YOUR_KEY - + single-cluster-values.yaml: | + --- + kubecostAggregator: + # deployMethod determines how Aggregator is deployed. Current options are + # "singlepod" (within cost-analyzer Pod) "statefulset" (separate + # StatefulSet), and "disabled". + deployMethod: singlepod + multi-cluster-values.yaml: | + --- + kubecostAggregator: + # deployMethod determines how Aggregator is deployed. Current options are + deployMethod: statefulset + federatedETL: + federatedCluster: true + kubecostModel: + federatedStorageConfigSecret: "federated-store" # Secret should have a key named "federated-store.yaml" with the federated storage credentials # COSI related resources bucketClasses: # Cluster scoped resource - name: kubecost-cosi-storage diff --git a/services/centralized-kubecost/2.5.0/kustomization.yaml b/services/centralized-kubecost/2.5.0/kustomization.yaml index c1809a075..0e370440c 100644 --- a/services/centralized-kubecost/2.5.0/kustomization.yaml +++ b/services/centralized-kubecost/2.5.0/kustomization.yaml @@ -3,6 +3,6 @@ kind: Kustomization resources: - move-to-konvoy.yaml - cosi-storage.yaml - - prerequisites.yaml + - pre-install.yaml - release.yaml - - post-install-jobs.yaml + - post-install.yaml diff --git a/services/centralized-kubecost/2.5.0/move-to-konvoy.yaml b/services/centralized-kubecost/2.5.0/move-to-konvoy.yaml index 51d48aae2..93a9fd184 100644 --- a/services/centralized-kubecost/2.5.0/move-to-konvoy.yaml +++ b/services/centralized-kubecost/2.5.0/move-to-konvoy.yaml @@ -5,7 +5,7 @@ metadata: namespace: ${releaseNamespace} spec: force: true - prune: true + prune: false wait: true interval: 6h retryInterval: 1m diff --git a/services/centralized-kubecost/2.5.0/post-install-jobs.yaml b/services/centralized-kubecost/2.5.0/post-install.yaml similarity index 80% rename from services/centralized-kubecost/2.5.0/post-install-jobs.yaml rename to services/centralized-kubecost/2.5.0/post-install.yaml index 38a6c7b68..855c8f8a6 100644 --- a/services/centralized-kubecost/2.5.0/post-install-jobs.yaml +++ b/services/centralized-kubecost/2.5.0/post-install.yaml @@ -1,7 +1,7 @@ apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: - name: centralized-kubecost-post-install-jobs + name: centralized-kubecost-post-install namespace: ${releaseNamespace} spec: force: true @@ -9,7 +9,7 @@ spec: wait: true interval: 6h retryInterval: 1m - path: ./services/centralized-kubecost/2.5.0/post-install-jobs + path: ./services/centralized-kubecost/2.5.0/post-install dependsOn: - name: centralized-kubecost-release namespace: ${releaseNamespace} diff --git a/services/centralized-kubecost/2.5.0/post-install-jobs/post-install-jobs.yaml b/services/centralized-kubecost/2.5.0/post-install/post-install-jobs.yaml similarity index 100% rename from services/centralized-kubecost/2.5.0/post-install-jobs/post-install-jobs.yaml rename to services/centralized-kubecost/2.5.0/post-install/post-install-jobs.yaml diff --git a/services/centralized-kubecost/2.5.0/prerequisites.yaml b/services/centralized-kubecost/2.5.0/pre-install.yaml similarity index 81% rename from services/centralized-kubecost/2.5.0/prerequisites.yaml rename to services/centralized-kubecost/2.5.0/pre-install.yaml index 58e18268a..03b00e11c 100644 --- a/services/centralized-kubecost/2.5.0/prerequisites.yaml +++ b/services/centralized-kubecost/2.5.0/pre-install.yaml @@ -1,7 +1,7 @@ apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: - name: centralized-kubecost-prerequisites + name: centralized-kubecost-pre-install namespace: ${releaseNamespace} spec: force: true @@ -9,7 +9,7 @@ spec: wait: true interval: 6h retryInterval: 1m - path: ./services/centralized-kubecost/2.5.0/prerequisites + path: ./services/centralized-kubecost/2.5.0/pre-install dependsOn: - name: kubecost-cosi-storage namespace: ${releaseNamespace} diff --git a/services/centralized-kubecost/2.5.0/prerequisites/prerequisites.yaml b/services/centralized-kubecost/2.5.0/pre-install/pre-install-jobs.yaml similarity index 85% rename from services/centralized-kubecost/2.5.0/prerequisites/prerequisites.yaml rename to services/centralized-kubecost/2.5.0/pre-install/pre-install-jobs.yaml index aa989026f..9517679ce 100644 --- a/services/centralized-kubecost/2.5.0/prerequisites/prerequisites.yaml +++ b/services/centralized-kubecost/2.5.0/pre-install/pre-install-jobs.yaml @@ -2,13 +2,13 @@ apiVersion: v1 kind: ServiceAccount metadata: - name: centralized-kubecost-prerequisites + name: centralized-kubecost-pre-install namespace: kubecost --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: centralized-kubecost-prerequisites + name: centralized-kubecost-pre-install rules: - apiGroups: [""] resources: ["configmaps", "namespaces"] @@ -20,27 +20,27 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: centralized-kubecost-prerequisites + name: centralized-kubecost-pre-install roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: centralized-kubecost-prerequisites + name: centralized-kubecost-pre-install subjects: - kind: ServiceAccount - name: centralized-kubecost-prerequisites + name: centralized-kubecost-pre-install namespace: kubecost --- apiVersion: batch/v1 kind: Job metadata: - name: centralized-kubecost-prerequisites + name: centralized-kubecost-pre-install namespace: kubecost spec: template: metadata: - name: centralized-kubecost-prerequisites + name: centralized-kubecost-pre-install spec: - serviceAccountName: centralized-kubecost-prerequisites + serviceAccountName: centralized-kubecost-pre-install restartPolicy: OnFailure priorityClassName: dkp-high-priority containers: @@ -72,6 +72,12 @@ spec: set -o errexit set +x + # check the value of kubecostClusterMode and exit early if it equals to single-cluster. + if [ "${kubecostClusterMode:=single-cluster}" = "single-cluster" ]; then + echo "kubecostClusterMode is single-cluster. Skipping the step." + exit 0 + fi + # Wait until federated-store secret is found. while ! kubectl get secret -n kubecost federated-store; do echo "federated-store secret not found. Waiting for it to be created." @@ -112,4 +118,4 @@ spec: kubectl create secret generic federated-store -n kubecost --from-file=federated-store.yaml="$tmpfile" --dry-run=client -o yaml | kubectl apply -f - kubectl label secret federated-store -n kubecost app.kubernetes.io/processed-by-kommander-centralized-kubecost=true --overwrite rm "$tmpfile" - # TODO(takirala): Test and support azure, nutanix cosi secrets. + # TODO(takirala): Test and support nutanix cosi secrets. diff --git a/services/centralized-kubecost/2.5.0/release.yaml b/services/centralized-kubecost/2.5.0/release.yaml index 74c007209..0085adb0c 100644 --- a/services/centralized-kubecost/2.5.0/release.yaml +++ b/services/centralized-kubecost/2.5.0/release.yaml @@ -20,5 +20,5 @@ spec: - kind: ConfigMap name: substitution-vars dependsOn: - - name: centralized-kubecost-prerequisites + - name: centralized-kubecost-pre-install namespace: ${releaseNamespace} diff --git a/services/centralized-kubecost/2.5.0/release/release.yaml b/services/centralized-kubecost/2.5.0/release/release.yaml index b7daa5833..3351b781a 100644 --- a/services/centralized-kubecost/2.5.0/release/release.yaml +++ b/services/centralized-kubecost/2.5.0/release/release.yaml @@ -28,6 +28,10 @@ spec: valuesFrom: - kind: ConfigMap name: centralized-kubecost-2.5.0-d2iq-defaults + valuesKey: shared-values.yaml + - kind: ConfigMap + name: centralized-kubecost-2.5.0-d2iq-defaults + valuesKey: ${kubecostClusterMode:=single-cluster}-values.yaml - kind: ConfigMap name: centralized-kubecost-overrides optional: true diff --git a/services/kommander/0.14.0/defaults/cm.yaml b/services/kommander/0.14.0/defaults/cm.yaml index 5bd7c0560..eeb6ddb92 100644 --- a/services/kommander/0.14.0/defaults/cm.yaml +++ b/services/kommander/0.14.0/defaults/cm.yaml @@ -68,8 +68,6 @@ data: repository: ${kommanderLicensingControllerWebhookImageRepository} defaultEnterpriseApps: - "centralized-kubecost" - - "kubecost" - - "kubecost-thanos-traefik" - "centralized-grafana" - "karma" - "karma-traefik" @@ -122,7 +120,6 @@ data: - "kube-prometheus-stack" - "prometheus-adapter" - "prometheus-thanos-traefik" - - "kubecost-thanos-traefik" - "cert-manager" - "karma-traefik" - "gatekeeper"