Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

etcd controller detect ghost member and update EtcdIsVoter condition #11232

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions pkg/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,11 @@ const (

etcdStatusType = v1.NodeConditionType("EtcdIsVoter")

StatusUnjoined MemberStatus = "unjoined"
StatusUnhealthy MemberStatus = "unhealthy"
StatusLearner MemberStatus = "learner"
StatusVoter MemberStatus = "voter"
StatusUnjoined MemberStatus = "unjoined"
StatusUnhealthy MemberStatus = "unhealthy"
StatusLearner MemberStatus = "learner"
StatusVoter MemberStatus = "voter"
StatusUnknownMember MemberStatus = "unknownMember"
)

var (
Expand Down Expand Up @@ -1189,6 +1190,7 @@ func (e *ETCD) manageLearners(ctx context.Context) {
// a map to track if a node is a member of the etcd cluster or not
nodeIsMember := make(map[string]bool)
nodesMap := make(map[string]*v1.Node)
membersWithoutNode := ""
for _, node := range nodes {
nodeIsMember[node.Name] = false
nodesMap[node.Name] = node
Expand All @@ -1214,7 +1216,7 @@ func (e *ETCD) manageLearners(ctx context.Context) {
}
}
if node == nil {
continue
membersWithoutNode += (" " + member.Name)
}

// verify if the member is healthy and set the status
Expand All @@ -1229,7 +1231,12 @@ func (e *ETCD) manageLearners(ctx context.Context) {
}

for nodeName, node := range nodesMap {
if !nodeIsMember[nodeName] {
if len(membersWithoutNode) > 0 {
statusMsg := fmt.Sprintf("unknown etcd members:%s", membersWithoutNode)
if err := e.setEtcdStatusCondition(node, client, nodeName, StatusUnknownMember, statusMsg); err != nil {
logrus.Errorf("Unable to set etcd status condition for node %s: %v", node.Name, err)
}
} else if !nodeIsMember[nodeName] {
if err := e.setEtcdStatusCondition(node, client, nodeName, StatusUnjoined, ""); err != nil {
logrus.Errorf("Unable to set etcd status condition for a node that is not a cluster member %s: %v", nodeName, err)
}
Expand Down Expand Up @@ -1345,6 +1352,13 @@ func (e *ETCD) setEtcdStatusCondition(node *v1.Node, client kubernetes.Interface
Reason: "NotAMember",
Message: "Node is not a member of the etcd cluster",
}
case StatusUnknownMember:
newCondition = v1.NodeCondition{
Type: etcdStatusType,
Status: "False",
Reason: "UnknownMember",
Message: "unknown member detected in etcd cluster",
}
default:
logrus.Warnf("Unknown etcd member status %s", memberStatus)
return nil
Expand Down