Skip to content

Commit

Permalink
Merge pull request rook#13379 from rook/mergify/bp/release-1.13/pr-13360
Browse files Browse the repository at this point in the history
mon: fix mon failover on path change (backport rook#13360)
  • Loading branch information
travisn authored Dec 12, 2023
2 parents 4e647e9 + d52312a commit 4a8aa5d
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 56 deletions.
21 changes: 4 additions & 17 deletions pkg/operator/ceph/cluster/mon/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,32 +367,19 @@ func (c *Cluster) checkHealth(ctx context.Context) error {
}
}

// failover mons running on host path to use persistent volumes if VolumeClaimTemplate is set and vice versa
// failover any mons present in the mon fail over list
for _, mon := range c.ClusterInfo.Monitors {
if c.HasMonPathChanged(mon.Name) {
logger.Infof("fail over mon %q due to change in mon path", mon.Name)
if c.monsToFailover.Has(mon.Name) {
logger.Infof("fail over mon %q from the mon fail over list", mon.Name)
c.failMon(len(c.ClusterInfo.Monitors), desiredMonCount, mon.Name)
c.monsToFailover.Delete(mon.Name)
return nil
}
}

return nil
}

// HasMonPathChanged checks if the mon storage path has changed from host path to persistent volume or vice versa
func (c *Cluster) HasMonPathChanged(mon string) bool {
var monPathChanged bool
if c.mapping.Schedule[mon] != nil && c.spec.Mon.VolumeClaimTemplate != nil {
logger.Infof("mon %q path has changed from host path to persistent volumes", mon)
monPathChanged = true
} else if c.mapping.Schedule[mon] == nil && c.spec.Mon.VolumeClaimTemplate == nil {
logger.Infof("mon %q path has changed from persistent volumes to host path", mon)
monPathChanged = true
}

return monPathChanged
}

func (c *Cluster) trackMonInOrOutOfQuorum(monName string, inQuorum bool) (bool, error) {
updateNeeded := false
var monsOutOfQuorum []string
Expand Down
34 changes: 0 additions & 34 deletions pkg/operator/ceph/cluster/mon/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,37 +645,3 @@ func TestUpdateMonInterval(t *testing.T) {
assert.Equal(t, time.Minute, h.interval)
})
}

func TestHasMonPathChanged(t *testing.T) {
t.Run("mon path changed from pv to hostpath", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.mapping.Schedule["a"] = nil
result := c.HasMonPathChanged("a")
assert.True(t, result)
})

t.Run("mon path has not changed from pv to hostpath", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.spec.Mon.VolumeClaimTemplate = &v1.PersistentVolumeClaim{Spec: v1.PersistentVolumeClaimSpec{}}
c.mapping.Schedule["b"] = nil
result := c.HasMonPathChanged("b")
c.spec.Mon.VolumeClaimTemplate = nil
assert.False(t, result)
})

t.Run("mon path changed from hostpath to pv", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.mapping.Schedule["c"] = &opcontroller.MonScheduleInfo{}
c.spec.Mon.VolumeClaimTemplate = &v1.PersistentVolumeClaim{Spec: v1.PersistentVolumeClaimSpec{}}
result := c.HasMonPathChanged("c")
assert.True(t, result)
})

t.Run("mon path has not changed from host path to pv", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.mapping.Schedule["d"] = &opcontroller.MonScheduleInfo{}
result := c.HasMonPathChanged("d")
c.spec.Mon.VolumeClaimTemplate = nil
assert.False(t, result)
})
}
26 changes: 21 additions & 5 deletions pkg/operator/ceph/cluster/mon/mon.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ type Cluster struct {
ownerInfo *k8sutil.OwnerInfo
isUpgrade bool
arbiterMon string
// list of mons to be failed over
monsToFailover sets.Set[string]
}

// monConfig for a single monitor
Expand Down Expand Up @@ -162,6 +164,7 @@ func New(ctx context.Context, clusterdContext *clusterd.Context, namespace strin
ClusterInfo: &cephclient.ClusterInfo{
Context: ctx,
},
monsToFailover: sets.New[string](),
}
}

Expand Down Expand Up @@ -1230,11 +1233,6 @@ var updateDeploymentAndWait = UpdateCephDeploymentAndWait

func (c *Cluster) updateMon(m *monConfig, d *apps.Deployment) error {

if c.HasMonPathChanged(m.DaemonName) {
logger.Infof("path has changed for mon %q. Skip updating mon deployment %q in order to failover the mon", m.DaemonName, d.Name)
return nil
}

// Expand mon PVC if storage request for mon has increased in cephcluster crd
if c.monVolumeClaimTemplate(m) != nil {
desiredPvc, err := c.makeDeploymentPVC(m, false)
Expand Down Expand Up @@ -1339,6 +1337,12 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e

p.ApplyToPodSpec(&d.Spec.Template.Spec)
if deploymentExists {
// skip update if mon path has changed
if hasMonPathChanged(existingDeployment, c.spec.Mon.VolumeClaimTemplate) {
c.monsToFailover.Insert(m.DaemonName)
return nil
}

// the existing deployment may have a node selector. if the cluster
// isn't using host networking and the deployment is using pvc storage,
// then the node selector can be removed. this may happen after
Expand Down Expand Up @@ -1404,6 +1408,18 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e
return nil
}

func hasMonPathChanged(d *apps.Deployment, claim *v1.PersistentVolumeClaim) bool {
if d.Labels["pvc_name"] == "" && claim != nil {
logger.Infof("skipping update for mon %q where path has changed from hostPath to pvc", d.Name)
return true
} else if d.Labels["pvc_name"] != "" && claim == nil {
logger.Infof("skipping update for mon %q where path has changed from pvc to hostPath", d.Name)
return true
}

return false
}

func waitForQuorumWithMons(context *clusterd.Context, clusterInfo *cephclient.ClusterInfo, mons []string, sleepTime int, requireAllInQuorum bool) error {
logger.Infof("waiting for mon quorum with %v", mons)

Expand Down
33 changes: 33 additions & 0 deletions pkg/operator/ceph/cluster/mon/mon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -884,3 +884,36 @@ func TestSkipReconcile(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, 1, result.Len())
}

func TestHasMonPathChanged(t *testing.T) {
monDeployment := &apps.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "rook-ceph-mon-a",
Namespace: "test",
Labels: map[string]string{
"pvc_name": "test-pvc",
},
}}

pvcTemplate := &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "test-claim",
},
}
t.Run("mon path changed from pv to hostpath", func(t *testing.T) {
assert.True(t, hasMonPathChanged(monDeployment, nil))
})

t.Run("mon path not changed from pv to hostpath", func(t *testing.T) {
assert.False(t, hasMonPathChanged(monDeployment, pvcTemplate))
})
t.Run("mon path changed from hostPath to pvc", func(t *testing.T) {
delete(monDeployment.Labels, "pvc_name")
assert.True(t, hasMonPathChanged(monDeployment, pvcTemplate))
})

t.Run("mon path not changed from hostPath to pvc", func(t *testing.T) {
delete(monDeployment.Labels, "pvc_name")
assert.False(t, hasMonPathChanged(monDeployment, nil))
})
}

0 comments on commit 4a8aa5d

Please sign in to comment.