-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Sebastian Hoß <[email protected]>
- Loading branch information
Showing
12 changed files
with
528 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
122 changes: 122 additions & 0 deletions
122
...ode-remediation/self-node-remediation.medik8s.io/v1alpha1/selfnoderemediationconfigs.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
apiVersion: "apiextensions.k8s.io/v1" | ||
kind: "CustomResourceDefinition" | ||
metadata: | ||
annotations: | ||
controller-gen.kubebuilder.io/version: "v0.14.0" | ||
name: "selfnoderemediationconfigs.self-node-remediation.medik8s.io" | ||
spec: | ||
group: "self-node-remediation.medik8s.io" | ||
names: | ||
kind: "SelfNodeRemediationConfig" | ||
listKind: "SelfNodeRemediationConfigList" | ||
plural: "selfnoderemediationconfigs" | ||
shortNames: | ||
- "snrc" | ||
- "snrconfig" | ||
singular: "selfnoderemediationconfig" | ||
scope: "Namespaced" | ||
versions: | ||
- name: "v1alpha1" | ||
schema: | ||
openAPIV3Schema: | ||
description: "SelfNodeRemediationConfig is the Schema for the selfnoderemediationconfigs API in which a user can configure the self node remediation agents" | ||
properties: | ||
apiVersion: | ||
description: "APIVersion defines the versioned schema of this representation of an object.\nServers should convert recognized schemas to the latest internal value, and\nmay reject unrecognized values.\nMore info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources" | ||
type: "string" | ||
kind: | ||
description: "Kind is a string value representing the REST resource this object represents.\nServers may infer this from the endpoint the client submits requests to.\nCannot be updated.\nIn CamelCase.\nMore info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds" | ||
type: "string" | ||
metadata: | ||
type: "object" | ||
spec: | ||
description: "SelfNodeRemediationConfigSpec defines the desired state of SelfNodeRemediationConfig" | ||
properties: | ||
apiCheckInterval: | ||
default: "15s" | ||
description: "the frequency for api-server connectivity check\nValid time units are \"ms\", \"s\", \"m\", \"h\".\nthe frequency for api-server connectivity check" | ||
pattern: "^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$" | ||
type: "string" | ||
apiServerTimeout: | ||
default: "5s" | ||
description: "Valid time units are \"ms\", \"s\", \"m\", \"h\".\ntimeout for each api-connectivity check" | ||
pattern: "^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$" | ||
type: "string" | ||
customDsTolerations: | ||
description: "CustomDsTolerations allows to add custom tolerations snr agents that are running on the ds in order to support remediation for different types of nodes." | ||
items: | ||
description: "The pod this Toleration is attached to tolerates any taint that matches\nthe triple <key,value,effect> using the matching operator <operator>." | ||
properties: | ||
effect: | ||
description: "Effect indicates the taint effect to match. Empty means match all taint effects.\nWhen specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute." | ||
type: "string" | ||
key: | ||
description: "Key is the taint key that the toleration applies to. Empty means match all taint keys.\nIf the key is empty, operator must be Exists; this combination means to match all values and all keys." | ||
type: "string" | ||
operator: | ||
description: "Operator represents a key's relationship to the value.\nValid operators are Exists and Equal. Defaults to Equal.\nExists is equivalent to wildcard for value, so that a pod can\ntolerate all taints of a particular category." | ||
type: "string" | ||
tolerationSeconds: | ||
description: "TolerationSeconds represents the period of time the toleration (which must be\nof effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,\nit is not set, which means tolerate the taint forever (do not evict). Zero and\nnegative values will be treated as 0 (evict immediately) by the system." | ||
format: "int64" | ||
type: "integer" | ||
value: | ||
description: "Value is the taint value the toleration matches to.\nIf the operator is Exists, the value should be empty, otherwise just a regular string." | ||
type: "string" | ||
type: "object" | ||
type: "array" | ||
endpointHealthCheckUrl: | ||
description: "EndpointHealthCheckUrl is an url that self node remediation agents which run on control-plane node will try to access when they can't contact their peers.\nThis is a part of self diagnostics which will decide whether the node should be remediated or not.\nIt will be ignored when empty (which is the default)." | ||
type: "string" | ||
hostPort: | ||
default: 30001 | ||
description: "HostPort is used for internal communication between SNR agents." | ||
minimum: 1.0 | ||
type: "integer" | ||
isSoftwareRebootEnabled: | ||
default: true | ||
description: "IsSoftwareRebootEnabled indicates whether self node remediation agent will do software reboot,\nif the watchdog device can not be used or will use watchdog only,\nwithout a fallback to software reboot" | ||
type: "boolean" | ||
maxApiErrorThreshold: | ||
default: 3 | ||
description: "after this threshold, the node will start contacting its peers" | ||
minimum: 1.0 | ||
type: "integer" | ||
peerApiServerTimeout: | ||
default: "5s" | ||
description: "Valid time units are \"ms\", \"s\", \"m\", \"h\"." | ||
pattern: "^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$" | ||
type: "string" | ||
peerDialTimeout: | ||
default: "5s" | ||
description: "Valid time units are \"ms\", \"s\", \"m\", \"h\".\ntimeout for establishing connection to peer" | ||
pattern: "^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$" | ||
type: "string" | ||
peerRequestTimeout: | ||
default: "5s" | ||
description: "Valid time units are \"ms\", \"s\", \"m\", \"h\".\ntimeout for each peer request" | ||
pattern: "^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$" | ||
type: "string" | ||
peerUpdateInterval: | ||
default: "15m" | ||
description: "Valid time units are \"ms\", \"s\", \"m\", \"h\"." | ||
pattern: "^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$" | ||
type: "string" | ||
safeTimeToAssumeNodeRebootedSeconds: | ||
default: 180 | ||
description: "SafeTimeToAssumeNodeRebootedSeconds is the time after which the healthy self node remediation\nagents will assume the unhealthy node has been rebooted, and it is safe to recover affected workloads.\nThis is extremely important as starting replacement Pods while they are still running on the failed\nnode will likely lead to data corruption and violation of run-once semantics.\nIn an effort to prevent this, the operator ignores values lower than a minimum calculated from the\nApiCheckInterval, ApiServerTimeout, MaxApiErrorThreshold, PeerDialTimeout, and PeerRequestTimeout fields." | ||
minimum: 0.0 | ||
type: "integer" | ||
watchdogFilePath: | ||
default: "/dev/watchdog" | ||
description: "WatchdogFilePath is the watchdog file path that should be available on each node, e.g. /dev/watchdog" | ||
type: "string" | ||
type: "object" | ||
status: | ||
description: "SelfNodeRemediationConfigStatus defines the observed state of SelfNodeRemediationConfig" | ||
type: "object" | ||
type: "object" | ||
served: true | ||
storage: true | ||
subresources: | ||
status: {} |
109 changes: 109 additions & 0 deletions
109
...self-node-remediation/self-node-remediation.medik8s.io/v1alpha1/selfnoderemediations.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
apiVersion: "apiextensions.k8s.io/v1" | ||
kind: "CustomResourceDefinition" | ||
metadata: | ||
annotations: | ||
controller-gen.kubebuilder.io/version: "v0.14.0" | ||
name: "selfnoderemediations.self-node-remediation.medik8s.io" | ||
spec: | ||
group: "self-node-remediation.medik8s.io" | ||
names: | ||
kind: "SelfNodeRemediation" | ||
listKind: "SelfNodeRemediationList" | ||
plural: "selfnoderemediations" | ||
shortNames: | ||
- "snr" | ||
- "snremediation" | ||
singular: "selfnoderemediation" | ||
scope: "Namespaced" | ||
versions: | ||
- name: "v1alpha1" | ||
schema: | ||
openAPIV3Schema: | ||
description: "SelfNodeRemediation is the Schema for the selfnoderemediations API" | ||
properties: | ||
apiVersion: | ||
description: "APIVersion defines the versioned schema of this representation of an object.\nServers should convert recognized schemas to the latest internal value, and\nmay reject unrecognized values.\nMore info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources" | ||
type: "string" | ||
kind: | ||
description: "Kind is a string value representing the REST resource this object represents.\nServers may infer this from the endpoint the client submits requests to.\nCannot be updated.\nIn CamelCase.\nMore info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds" | ||
type: "string" | ||
metadata: | ||
type: "object" | ||
spec: | ||
description: "SelfNodeRemediationSpec defines the desired state of SelfNodeRemediation" | ||
properties: | ||
remediationStrategy: | ||
default: "Automatic" | ||
description: "RemediationStrategy is the remediation method for unhealthy nodes.\nCurrently, it could be either \"Automatic\", \"OutOfServiceTaint\" or \"ResourceDeletion\".\nResourceDeletion will iterate over all pods and VolumeAttachment related to the unhealthy node and delete them.\nOutOfServiceTaint will add the out-of-service taint which is a new well-known taint \"node.kubernetes.io/out-of-service\"\nthat enables automatic deletion of pv-attached pods on failed nodes, \"out-of-service\" taint is only supported on clusters with k8s version 1.26+ or OCP/OKD version 4.13+.\nAutomatic will choose the most appropriate strategy during runtime." | ||
enum: | ||
- "Automatic" | ||
- "ResourceDeletion" | ||
- "OutOfServiceTaint" | ||
type: "string" | ||
type: "object" | ||
status: | ||
description: "SelfNodeRemediationStatus defines the observed state of SelfNodeRemediation" | ||
properties: | ||
conditions: | ||
description: "Represents the observations of a SelfNodeRemediation's current state.\nKnown .status.conditions.type are: \"Processing\"" | ||
items: | ||
description: "Condition contains details for one aspect of the current state of this API Resource.\n---\nThis struct is intended for direct use as an array at the field path .status.conditions. For example,\n\n\n\ttype FooStatus struct{\n\t // Represents the observations of a foo's current state.\n\t // Known .status.conditions.type are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t // other fields\n\t}" | ||
properties: | ||
lastTransitionTime: | ||
description: "lastTransitionTime is the last time the condition transitioned from one status to another.\nThis should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable." | ||
format: "date-time" | ||
type: "string" | ||
message: | ||
description: "message is a human readable message indicating details about the transition.\nThis may be an empty string." | ||
maxLength: 32768 | ||
type: "string" | ||
observedGeneration: | ||
description: "observedGeneration represents the .metadata.generation that the condition was set based upon.\nFor instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date\nwith respect to the current state of the instance." | ||
format: "int64" | ||
minimum: 0.0 | ||
type: "integer" | ||
reason: | ||
description: "reason contains a programmatic identifier indicating the reason for the condition's last transition.\nProducers of specific condition types may define expected values and meanings for this field,\nand whether the values are considered a guaranteed API.\nThe value should be a CamelCase string.\nThis field may not be empty." | ||
maxLength: 1024 | ||
minLength: 1 | ||
pattern: "^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$" | ||
type: "string" | ||
status: | ||
description: "status of the condition, one of True, False, Unknown." | ||
enum: | ||
- "True" | ||
- "False" | ||
- "Unknown" | ||
type: "string" | ||
type: | ||
description: "type of condition in CamelCase or in foo.example.com/CamelCase.\n---\nMany .condition.type values are consistent across resources like Available, but because arbitrary conditions can be\nuseful (see .node.status.conditions), the ability to deconflict is important.\nThe regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)" | ||
maxLength: 316 | ||
pattern: "^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$" | ||
type: "string" | ||
required: | ||
- "lastTransitionTime" | ||
- "message" | ||
- "reason" | ||
- "status" | ||
- "type" | ||
type: "object" | ||
type: "array" | ||
x-kubernetes-list-map-keys: | ||
- "type" | ||
x-kubernetes-list-type: "map" | ||
lastError: | ||
description: "LastError captures the last error that occurred during remediation.\nIf no error occurred it would be empty" | ||
type: "string" | ||
phase: | ||
description: "Phase represents the current phase of remediation,\nOne of: TBD" | ||
type: "string" | ||
timeAssumedRebooted: | ||
description: "TimeAssumedRebooted is the time by then the unhealthy node assumed to be rebooted" | ||
format: "date-time" | ||
type: "string" | ||
type: "object" | ||
type: "object" | ||
served: true | ||
storage: true | ||
subresources: | ||
status: {} |
Oops, something went wrong.