diff --git a/class/defaults.yml b/class/defaults.yml index 1c06d40..5ac5abc 100644 --- a/class/defaults.yml +++ b/class/defaults.yml @@ -1,4 +1,55 @@ parameters: machine_api_provider_cloudscale: =_metadata: {} - namespace: syn-machine-api-provider-cloudscale + namespace: openshift-machine-api + + secrets: {} + + images: + provider: + registry: ghcr.io + image: appuio/machine-api-provider-cloudscale + tag: v0.2.1 + machine_api_controllers_manager: + registry: ghcr.io + image: appuio/machine-api-provider-cloudscale + tag: v0.2.1 + kube_rbac_proxy: + registry: gcr.io + image: kubebuilder/kube-rbac-proxy + tag: v0.16.0 + + resources: + provider: + requests: + cpu: '10m' + memory: '32Mi' + limits: + cpu: '100m' + memory: '128Mi' + + machine_api_controllers_manager: + requests: + cpu: '10m' + memory: '32Mi' + limits: + cpu: '100m' + memory: '64Mi' + + alerts: + MachinesetEndpointNotFound: + enabled: true + rule: + expr: | + count(up{namespace="openshift-machine-api",endpoint=~"machineset.+"}) < 1 + for: 15m + labels: + severity: warning + annotations: + summary: Expected machineset target not found. Autoscaling and other machine operations might be impacted. + description: | + No machineset-controller target was found. This can impact machine operations such as autoscaling. + + The machineset controller is deployed by the 'machine-api-controllers-manager' container in the 'appuio-machine-api-provider-cloudscale' deployment. + Check for the existence of the 'appuio-machine-api-controllers' deployment and check the logs of the above manager if it does not exist. + If the deployment exists, check the deployment and replicaset status and events to check why the pod can't be created. diff --git a/component/main.jsonnet b/component/main.jsonnet index ed6019a..0445118 100644 --- a/component/main.jsonnet +++ b/component/main.jsonnet @@ -1,10 +1,242 @@ // main template for machine-api-provider-cloudscale +local com = import 'lib/commodore.libjsonnet'; local kap = import 'lib/kapitan.libjsonnet'; local kube = import 'lib/kube.libjsonnet'; local inv = kap.inventory(); // The hiera parameters for the component local params = inv.parameters.machine_api_provider_cloudscale; +local commonLabels = { + 'app.kubernetes.io/name': 'machine-api-provider', + 'app.kubernetes.io/instance': 'machine-api-provider-cloudscale', + 'app.kubernetes.io/part-of': 'syn', + 'app.kubernetes.io/managed-by': 'commodore', +}; + +local secrets = com.generateResources( + params.secrets, + function(name) kube.Secret(name) { + metadata+: { + namespace: params.namespace, + labels+: commonLabels, + }, + } +); + +local alertlabels = { + syn: 'true', + syn_component: 'machine-api-provider-cloudscale', +}; + +local alerts = function(name, groupName, alerts) + com.namespaced(params.namespace, kube._Object('monitoring.coreos.com/v1', 'PrometheusRule', name) { + spec+: { + groups+: [ + { + name: groupName, + rules: + std.sort(std.filterMap( + function(field) alerts[field].enabled == true, + function(field) alerts[field].rule { + alert: field, + labels+: alertlabels, + }, + std.objectFields(alerts) + ), function(x) x.alert), + }, + ], + }, + }); + +local serviceAccount = kube.ServiceAccount('appuio-machine-api-provider-cloudscale') { + metadata+: { + namespace: params.namespace, + labels+: commonLabels, + }, +}; + +local clusterRoleBinding = kube.ClusterRoleBinding('appuio-machine-api-provider-cloudscale') { + metadata+: { labels+: commonLabels }, + subjects_: [ serviceAccount ], + roleRef: { + apiGroup: 'rbac.authorization.k8s.io', + kind: 'ClusterRole', + name: 'cluster-admin', + }, +}; + +local kubeProxyContainer = function(upstreamPort, portName, exposePort) { + args: [ + '--secure-listen-address=0.0.0.0:%s' % exposePort, + '--upstream=http://localhost:%s' % upstreamPort, + '--logtostderr=true', + '--v=0', + ], + image: '%(registry)s/%(image)s:%(tag)s' % params.images.kube_rbac_proxy, + imagePullPolicy: 'IfNotPresent', + name: 'kube-rbac-proxy-%s' % portName, + ports: [ + { + containerPort: exposePort, + name: portName, + protocol: 'TCP', + }, + ], + resources: { + requests: { + cpu: '10m', + memory: '20Mi', + }, + }, + terminationMessagePath: '/dev/termination-log', + terminationMessagePolicy: 'File', +}; + +local deployment = kube._Object('apps/v1', 'Deployment', 'appuio-machine-api-provider-cloudscale') { + metadata+: { + namespace: params.namespace, + annotations+: {}, + labels+: commonLabels, + }, + spec: { + progressDeadlineSeconds: 600, + replicas: 1, + revisionHistoryLimit: 10, + selector: { + matchLabels: { + 'app.kubernetes.io/name': commonLabels['app.kubernetes.io/name'], + 'app.kubernetes.io/instance': commonLabels['app.kubernetes.io/instance'], + }, + }, + template: { + metadata: { + annotations: { + 'target.workload.openshift.io/management': '{"effect": "PreferredDuringScheduling"}', + }, + labels: { + 'app.kubernetes.io/name': commonLabels['app.kubernetes.io/name'], + 'app.kubernetes.io/instance': commonLabels['app.kubernetes.io/instance'], + }, + }, + spec: { + containers: [ + { + name: 'manager', + command: [ + 'machine-api-provider-cloudscale', + '-target=manager', + ], + args: [ + '-metrics-bind-address=127.0.0.1:8080', + '-health-probe-bind-address=:8081', + '-leader-elect=true', + '-namespace=%s' % params.namespace, + ], + image: '%(registry)s/%(image)s:%(tag)s' % params.images.provider, + imagePullPolicy: 'IfNotPresent', + livenessProbe: { + httpGet: { + path: '/readyz', + port: 8081, + scheme: 'HTTP', + }, + periodSeconds: 20, + initialDelaySeconds: 15, + }, + readinessProbe: { + httpGet: { + path: '/healthz', + port: 8081, + scheme: 'HTTP', + }, + periodSeconds: 10, + initialDelaySeconds: 5, + }, + resources: params.resources.provider, + }, + { + name: 'machine-api-controllers-manager', + command: [ + 'machine-api-provider-cloudscale', + '-target=machine-api-controllers-manager', + ], + args: [ + '-metrics-bind-address=127.0.0.1:8082', + '-health-probe-bind-address=:8083', + '-leader-elect=true', + '-namespace=%s' % params.namespace, + ], + image: '%(registry)s/%(image)s:%(tag)s' % params.images.machine_api_controllers_manager, + imagePullPolicy: 'IfNotPresent', + livenessProbe: { + httpGet: { + path: '/readyz', + port: 8083, + scheme: 'HTTP', + }, + periodSeconds: 20, + initialDelaySeconds: 15, + }, + readinessProbe: { + httpGet: { + path: '/healthz', + port: 8083, + scheme: 'HTTP', + }, + periodSeconds: 10, + initialDelaySeconds: 5, + }, + resources: params.resources.machine_api_controllers_manager, + }, + kubeProxyContainer(8080, 'manager-metrics', 8440), + kubeProxyContainer(8082, 'mac-metrics', 8442), + ], + dnsPolicy: 'ClusterFirst', + nodeSelector: { + 'node-role.kubernetes.io/master': '', + }, + priorityClassName: 'system-node-critical', + restartPolicy: 'Always', + schedulerName: 'default-scheduler', + securityContext: {}, + serviceAccount: serviceAccount.metadata.name, + serviceAccountName: serviceAccount.metadata.name, + terminationGracePeriodSeconds: 30, + tolerations: [ + { + effect: 'NoSchedule', + key: 'node-role.kubernetes.io/master', + }, + { + key: 'CriticalAddonsOnly', + operator: 'Exists', + }, + { + effect: 'NoExecute', + key: 'node.kubernetes.io/not-ready', + operator: 'Exists', + tolerationSeconds: 120, + }, + { + effect: 'NoExecute', + key: 'node.kubernetes.io/unreachable', + operator: 'Exists', + tolerationSeconds: 120, + }, + ], + }, + }, + }, +}; + + // Define outputs below { + '00_secrets': secrets, + + '10_serviceAccount': serviceAccount, + '10_clusterRoleBinding': clusterRoleBinding, + '11_deployment': deployment, + + '20_alerts': alerts('appuio-machine-api-provider-cloudscale', 'provider.alerts', params.alerts), } diff --git a/docs/modules/ROOT/pages/references/parameters.adoc b/docs/modules/ROOT/pages/references/parameters.adoc index 9619a09..dabb517 100644 --- a/docs/modules/ROOT/pages/references/parameters.adoc +++ b/docs/modules/ROOT/pages/references/parameters.adoc @@ -11,9 +11,110 @@ default:: `syn-machine-api-provider-cloudscale` The namespace in which to deploy this component. +== `images` + +[horizontal] +type:: dict +default:: https://github.com/projectsyn/component-machine-api-provider-cloudscale/blob/master/class/defaults.yml[See `class/defaults.yml`] + +The images to use for this component. + + +== `secrets` + +[horizontal] +type:: dict +default:: `{}` + +A dictionary of secrets to create. +The key is the name of the secret, the value is merged into the created secret. +Namespace is automatically set to the namespace of the component. +Commodore secret references should be passed as `stringData`. + + +== `resources` + +[horizontal] +type:: dict +default:: https://github.com/projectsyn/component-machine-api-provider-cloudscale/blob/master/class/defaults.yml[See `class/defaults.yml`] + +Resource requests and limits for the containers deployed by this component. + + +== `alerts` + +[horizontal] +type:: dict +example:: ++ +[source,yaml] +---- +BadThingsHappening: + enabled: true + rule: + annotations: + description: Bad things have been happening on {{$labels.node}} for more than 10 minutes. + message: Bad things have been happening on {{$labels.node}} for more than 10 minutes. + runbook_url: https://hub.syn.tools/machine-api-provider-cloudscale/runbooks/BadThingsHappening.html + expr: | + bad_thing_happening == 1 + for: 10m + labels: + severity: warning +---- + +`alerts` defines the alerts to be installed. +The dictionary key is used as the name of the alert. + + == Example [source,yaml] ---- -namespace: example-namespace +secrets: + cloudscale-rw-token: + stringData: + token: XXXX + cloudscale-user-data: + stringData: + ignitionHost: api-int.cluster-1.appuio.cloud + ignitionCA: | + -----BEGIN CERTIFICATE----- + XXXXXXX + -----END CERTIFICATE----- + userData: | + { + ignition: { + version: '3.1.0', + config: { + merge: [ { + source: 'https://%s:22623/config/%s' % [ context.data.ignitionHost, std.get(context.data, 'ignitionConfigName', 'worker') ], + } ], + }, + security: { + tls: { + certificateAuthorities: [ { + source: 'data:text/plain;charset=utf-8;base64,%s' % [ std.base64(context.data.ignitionCA) ], + } ], + }, + }, + }, + systemd: { + units: [ { + name: 'cloudscale-hostkeys.service', + enabled: true, + contents: "[Unit]\nDescription=Print SSH Public Keys to tty\nAfter=sshd-keygen.target\n\n[Install]\nWantedBy=multi-user.target\n\n[Service]\nType=oneshot\nStandardOutput=tty\nTTYPath=/dev/ttyS0\nExecStart=/bin/sh -c \"echo '-----BEGIN SSH HOST KEY KEYS-----'; cat /etc/ssh/ssh_host_*key.pub; echo '-----END SSH HOST KEY KEYS-----'\"", + } ], + }, + storage: { + files: [ { + filesystem: 'root', + path: '/etc/hostname', + mode: 420, + contents: { + source: 'data:,%s' % context.machine.metadata.name, + }, + } ], + }, + } ---- diff --git a/tests/defaults.yml b/tests/defaults.yml index a4da5b7..c003641 100644 --- a/tests/defaults.yml +++ b/tests/defaults.yml @@ -1,3 +1,50 @@ # Overwrite parameters here -# parameters: {...} +parameters: + machine_api_provider_cloudscale: + secrets: + cloudscale-rw-token: + stringData: + token: XXXX + cloudscale-user-data: + stringData: + ignitionHost: api-int.cluster-1.appuio.cloud + ignitionCA: | + -----BEGIN CERTIFICATE----- + XXXXXXX + -----END CERTIFICATE----- + userData: | + { + ignition: { + version: '3.1.0', + config: { + merge: [ { + source: 'https://%s:22623/config/%s' % [ context.data.ignitionHost, std.get(context.data, 'ignitionConfigName', 'worker') ], + } ], + }, + security: { + tls: { + certificateAuthorities: [ { + source: 'data:text/plain;charset=utf-8;base64,%s' % [ std.base64(context.data.ignitionCA) ], + } ], + }, + }, + }, + systemd: { + units: [ { + name: 'cloudscale-hostkeys.service', + enabled: true, + contents: "[Unit]\nDescription=Print SSH Public Keys to tty\nAfter=sshd-keygen.target\n\n[Install]\nWantedBy=multi-user.target\n\n[Service]\nType=oneshot\nStandardOutput=tty\nTTYPath=/dev/ttyS0\nExecStart=/bin/sh -c \"echo '-----BEGIN SSH HOST KEY KEYS-----'; cat /etc/ssh/ssh_host_*key.pub; echo '-----END SSH HOST KEY KEYS-----'\"", + } ], + }, + storage: { + files: [ { + filesystem: 'root', + path: '/etc/hostname', + mode: 420, + contents: { + source: 'data:,%s' % context.machine.metadata.name, + }, + } ], + }, + } diff --git a/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/00_secrets.yaml b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/00_secrets.yaml new file mode 100644 index 0000000..1516be2 --- /dev/null +++ b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/00_secrets.yaml @@ -0,0 +1,72 @@ +apiVersion: v1 +data: {} +kind: Secret +metadata: + annotations: {} + labels: + app.kubernetes.io/instance: machine-api-provider-cloudscale + app.kubernetes.io/managed-by: commodore + app.kubernetes.io/name: machine-api-provider + app.kubernetes.io/part-of: syn + name: cloudscale-rw-token + name: cloudscale-rw-token + namespace: openshift-machine-api +stringData: + token: XXXX +type: Opaque +--- +apiVersion: v1 +data: {} +kind: Secret +metadata: + annotations: {} + labels: + app.kubernetes.io/instance: machine-api-provider-cloudscale + app.kubernetes.io/managed-by: commodore + app.kubernetes.io/name: machine-api-provider + app.kubernetes.io/part-of: syn + name: cloudscale-user-data + name: cloudscale-user-data + namespace: openshift-machine-api +stringData: + ignitionCA: | + -----BEGIN CERTIFICATE----- + XXXXXXX + -----END CERTIFICATE----- + ignitionHost: api-int.cluster-1.appuio.cloud + userData: | + { + ignition: { + version: '3.1.0', + config: { + merge: [ { + source: 'https://%s:22623/config/%s' % [ context.data.ignitionHost, std.get(context.data, 'ignitionConfigName', 'worker') ], + } ], + }, + security: { + tls: { + certificateAuthorities: [ { + source: 'data:text/plain;charset=utf-8;base64,%s' % [ std.base64(context.data.ignitionCA) ], + } ], + }, + }, + }, + systemd: { + units: [ { + name: 'cloudscale-hostkeys.service', + enabled: true, + contents: "[Unit]\nDescription=Print SSH Public Keys to tty\nAfter=sshd-keygen.target\n\n[Install]\nWantedBy=multi-user.target\n\n[Service]\nType=oneshot\nStandardOutput=tty\nTTYPath=/dev/ttyS0\nExecStart=/bin/sh -c \"echo '-----BEGIN SSH HOST KEY KEYS-----'; cat /etc/ssh/ssh_host_*key.pub; echo '-----END SSH HOST KEY KEYS-----'\"", + } ], + }, + storage: { + files: [ { + filesystem: 'root', + path: '/etc/hostname', + mode: 420, + contents: { + source: 'data:,%s' % context.machine.metadata.name, + }, + } ], + }, + } +type: Opaque diff --git a/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/10_clusterRoleBinding.yaml b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/10_clusterRoleBinding.yaml new file mode 100644 index 0000000..ebdf4a7 --- /dev/null +++ b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/10_clusterRoleBinding.yaml @@ -0,0 +1,19 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + annotations: {} + labels: + app.kubernetes.io/instance: machine-api-provider-cloudscale + app.kubernetes.io/managed-by: commodore + app.kubernetes.io/name: machine-api-provider + app.kubernetes.io/part-of: syn + name: appuio-machine-api-provider-cloudscale + name: appuio-machine-api-provider-cloudscale +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: + - kind: ServiceAccount + name: appuio-machine-api-provider-cloudscale + namespace: openshift-machine-api diff --git a/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/10_serviceAccount.yaml b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/10_serviceAccount.yaml new file mode 100644 index 0000000..c352c31 --- /dev/null +++ b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/10_serviceAccount.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: {} + labels: + app.kubernetes.io/instance: machine-api-provider-cloudscale + app.kubernetes.io/managed-by: commodore + app.kubernetes.io/name: machine-api-provider + app.kubernetes.io/part-of: syn + name: appuio-machine-api-provider-cloudscale + name: appuio-machine-api-provider-cloudscale + namespace: openshift-machine-api diff --git a/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/11_deployment.yaml b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/11_deployment.yaml new file mode 100644 index 0000000..2779675 --- /dev/null +++ b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/11_deployment.yaml @@ -0,0 +1,152 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: {} + labels: + app.kubernetes.io/instance: machine-api-provider-cloudscale + app.kubernetes.io/managed-by: commodore + app.kubernetes.io/name: machine-api-provider + app.kubernetes.io/part-of: syn + name: appuio-machine-api-provider-cloudscale + name: appuio-machine-api-provider-cloudscale + namespace: openshift-machine-api +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/instance: machine-api-provider-cloudscale + app.kubernetes.io/name: machine-api-provider + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app.kubernetes.io/instance: machine-api-provider-cloudscale + app.kubernetes.io/name: machine-api-provider + spec: + containers: + - args: + - -metrics-bind-address=127.0.0.1:8080 + - -health-probe-bind-address=:8081 + - -leader-elect=true + - -namespace=openshift-machine-api + command: + - machine-api-provider-cloudscale + - -target=manager + image: ghcr.io/appuio/machine-api-provider-cloudscale:v0.2.1 + imagePullPolicy: IfNotPresent + livenessProbe: + httpGet: + path: /readyz + port: 8081 + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 20 + name: manager + readinessProbe: + httpGet: + path: /healthz + port: 8081 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 10m + memory: 32Mi + - args: + - -metrics-bind-address=127.0.0.1:8082 + - -health-probe-bind-address=:8083 + - -leader-elect=true + - -namespace=openshift-machine-api + command: + - machine-api-provider-cloudscale + - -target=machine-api-controllers-manager + image: ghcr.io/appuio/machine-api-provider-cloudscale:v0.2.1 + imagePullPolicy: IfNotPresent + livenessProbe: + httpGet: + path: /readyz + port: 8083 + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 20 + name: machine-api-controllers-manager + readinessProbe: + httpGet: + path: /healthz + port: 8083 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 100m + memory: 64Mi + requests: + cpu: 10m + memory: 32Mi + - args: + - --secure-listen-address=0.0.0.0:8440 + - --upstream=http://localhost:8080 + - --logtostderr=true + - --v=0 + image: gcr.io/kubebuilder/kube-rbac-proxy:v0.16.0 + imagePullPolicy: IfNotPresent + name: kube-rbac-proxy-manager-metrics + ports: + - containerPort: 8440 + name: manager-metrics + protocol: TCP + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + - args: + - --secure-listen-address=0.0.0.0:8442 + - --upstream=http://localhost:8082 + - --logtostderr=true + - --v=0 + image: gcr.io/kubebuilder/kube-rbac-proxy:v0.16.0 + imagePullPolicy: IfNotPresent + name: kube-rbac-proxy-mac-metrics + ports: + - containerPort: 8442 + name: mac-metrics + protocol: TCP + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + dnsPolicy: ClusterFirst + nodeSelector: + node-role.kubernetes.io/master: '' + priorityClassName: system-node-critical + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccount: appuio-machine-api-provider-cloudscale + serviceAccountName: appuio-machine-api-provider-cloudscale + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + - key: CriticalAddonsOnly + operator: Exists + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 120 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 120 diff --git a/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/20_alerts.yaml b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/20_alerts.yaml new file mode 100644 index 0000000..9416ba7 --- /dev/null +++ b/tests/golden/defaults/machine-api-provider-cloudscale/machine-api-provider-cloudscale/20_alerts.yaml @@ -0,0 +1,29 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + annotations: {} + labels: + name: appuio-machine-api-provider-cloudscale + name: appuio-machine-api-provider-cloudscale + namespace: openshift-machine-api +spec: + groups: + - name: provider.alerts + rules: + - alert: MachinesetEndpointNotFound + annotations: + description: | + No machineset-controller target was found. This can impact machine operations such as autoscaling. + + The machineset controller is deployed by the 'machine-api-controllers-manager' container in the 'appuio-machine-api-provider-cloudscale' deployment. + Check for the existence of the 'appuio-machine-api-controllers' deployment and check the logs of the above manager if it does not exist. + If the deployment exists, check the deployment and replicaset status and events to check why the pod can't be created. + summary: Expected machineset target not found. Autoscaling and other machine + operations might be impacted. + expr: | + count(up{namespace="openshift-machine-api",endpoint=~"machineset.+"}) < 1 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: machine-api-provider-cloudscale