Skip to content

Commit

Permalink
mon: fix mon failover on path change
Browse files Browse the repository at this point in the history
This PR fails over mon when the mon path is changed from hostPath to PVC or vice versa

Signed-off-by: sp98 <[email protected]>
(cherry picked from commit 4b6fd89)
  • Loading branch information
sp98 authored and mergify[bot] committed Dec 12, 2023
1 parent 1647c2a commit d52312a
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 56 deletions.
21 changes: 4 additions & 17 deletions pkg/operator/ceph/cluster/mon/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,32 +367,19 @@ func (c *Cluster) checkHealth(ctx context.Context) error {
}
}

// failover mons running on host path to use persistent volumes if VolumeClaimTemplate is set and vice versa
// failover any mons present in the mon fail over list
for _, mon := range c.ClusterInfo.Monitors {
if c.HasMonPathChanged(mon.Name) {
logger.Infof("fail over mon %q due to change in mon path", mon.Name)
if c.monsToFailover.Has(mon.Name) {
logger.Infof("fail over mon %q from the mon fail over list", mon.Name)
c.failMon(len(c.ClusterInfo.Monitors), desiredMonCount, mon.Name)
c.monsToFailover.Delete(mon.Name)
return nil
}
}

return nil
}

// HasMonPathChanged checks if the mon storage path has changed from host path to persistent volume or vice versa
func (c *Cluster) HasMonPathChanged(mon string) bool {
var monPathChanged bool
if c.mapping.Schedule[mon] != nil && c.spec.Mon.VolumeClaimTemplate != nil {
logger.Infof("mon %q path has changed from host path to persistent volumes", mon)
monPathChanged = true
} else if c.mapping.Schedule[mon] == nil && c.spec.Mon.VolumeClaimTemplate == nil {
logger.Infof("mon %q path has changed from persistent volumes to host path", mon)
monPathChanged = true
}

return monPathChanged
}

func (c *Cluster) trackMonInOrOutOfQuorum(monName string, inQuorum bool) (bool, error) {
updateNeeded := false
var monsOutOfQuorum []string
Expand Down
34 changes: 0 additions & 34 deletions pkg/operator/ceph/cluster/mon/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,37 +645,3 @@ func TestUpdateMonInterval(t *testing.T) {
assert.Equal(t, time.Minute, h.interval)
})
}

func TestHasMonPathChanged(t *testing.T) {
t.Run("mon path changed from pv to hostpath", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.mapping.Schedule["a"] = nil
result := c.HasMonPathChanged("a")
assert.True(t, result)
})

t.Run("mon path has not changed from pv to hostpath", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.spec.Mon.VolumeClaimTemplate = &v1.PersistentVolumeClaim{Spec: v1.PersistentVolumeClaimSpec{}}
c.mapping.Schedule["b"] = nil
result := c.HasMonPathChanged("b")
c.spec.Mon.VolumeClaimTemplate = nil
assert.False(t, result)
})

t.Run("mon path changed from hostpath to pv", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.mapping.Schedule["c"] = &opcontroller.MonScheduleInfo{}
c.spec.Mon.VolumeClaimTemplate = &v1.PersistentVolumeClaim{Spec: v1.PersistentVolumeClaimSpec{}}
result := c.HasMonPathChanged("c")
assert.True(t, result)
})

t.Run("mon path has not changed from host path to pv", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.mapping.Schedule["d"] = &opcontroller.MonScheduleInfo{}
result := c.HasMonPathChanged("d")
c.spec.Mon.VolumeClaimTemplate = nil
assert.False(t, result)
})
}
26 changes: 21 additions & 5 deletions pkg/operator/ceph/cluster/mon/mon.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ type Cluster struct {
ownerInfo *k8sutil.OwnerInfo
isUpgrade bool
arbiterMon string
// list of mons to be failed over
monsToFailover sets.Set[string]
}

// monConfig for a single monitor
Expand Down Expand Up @@ -162,6 +164,7 @@ func New(ctx context.Context, clusterdContext *clusterd.Context, namespace strin
ClusterInfo: &cephclient.ClusterInfo{
Context: ctx,
},
monsToFailover: sets.New[string](),
}
}

Expand Down Expand Up @@ -1230,11 +1233,6 @@ var updateDeploymentAndWait = UpdateCephDeploymentAndWait

func (c *Cluster) updateMon(m *monConfig, d *apps.Deployment) error {

if c.HasMonPathChanged(m.DaemonName) {
logger.Infof("path has changed for mon %q. Skip updating mon deployment %q in order to failover the mon", m.DaemonName, d.Name)
return nil
}

// Expand mon PVC if storage request for mon has increased in cephcluster crd
if c.monVolumeClaimTemplate(m) != nil {
desiredPvc, err := c.makeDeploymentPVC(m, false)
Expand Down Expand Up @@ -1339,6 +1337,12 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e

p.ApplyToPodSpec(&d.Spec.Template.Spec)
if deploymentExists {
// skip update if mon path has changed
if hasMonPathChanged(existingDeployment, c.spec.Mon.VolumeClaimTemplate) {
c.monsToFailover.Insert(m.DaemonName)
return nil
}

// the existing deployment may have a node selector. if the cluster
// isn't using host networking and the deployment is using pvc storage,
// then the node selector can be removed. this may happen after
Expand Down Expand Up @@ -1404,6 +1408,18 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e
return nil
}

func hasMonPathChanged(d *apps.Deployment, claim *v1.PersistentVolumeClaim) bool {
if d.Labels["pvc_name"] == "" && claim != nil {
logger.Infof("skipping update for mon %q where path has changed from hostPath to pvc", d.Name)
return true
} else if d.Labels["pvc_name"] != "" && claim == nil {
logger.Infof("skipping update for mon %q where path has changed from pvc to hostPath", d.Name)
return true
}

return false
}

func waitForQuorumWithMons(context *clusterd.Context, clusterInfo *cephclient.ClusterInfo, mons []string, sleepTime int, requireAllInQuorum bool) error {
logger.Infof("waiting for mon quorum with %v", mons)

Expand Down
33 changes: 33 additions & 0 deletions pkg/operator/ceph/cluster/mon/mon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -884,3 +884,36 @@ func TestSkipReconcile(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, 1, result.Len())
}

func TestHasMonPathChanged(t *testing.T) {
monDeployment := &apps.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "rook-ceph-mon-a",
Namespace: "test",
Labels: map[string]string{
"pvc_name": "test-pvc",
},
}}

pvcTemplate := &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "test-claim",
},
}
t.Run("mon path changed from pv to hostpath", func(t *testing.T) {
assert.True(t, hasMonPathChanged(monDeployment, nil))
})

t.Run("mon path not changed from pv to hostpath", func(t *testing.T) {
assert.False(t, hasMonPathChanged(monDeployment, pvcTemplate))
})
t.Run("mon path changed from hostPath to pvc", func(t *testing.T) {
delete(monDeployment.Labels, "pvc_name")
assert.True(t, hasMonPathChanged(monDeployment, pvcTemplate))
})

t.Run("mon path not changed from hostPath to pvc", func(t *testing.T) {
delete(monDeployment.Labels, "pvc_name")
assert.False(t, hasMonPathChanged(monDeployment, nil))
})
}

0 comments on commit d52312a

Please sign in to comment.