diff --git a/pkg/operator/ceph/cluster/mon/health.go b/pkg/operator/ceph/cluster/mon/health.go index 5488e611ad09..88770f330887 100644 --- a/pkg/operator/ceph/cluster/mon/health.go +++ b/pkg/operator/ceph/cluster/mon/health.go @@ -367,11 +367,12 @@ func (c *Cluster) checkHealth(ctx context.Context) error { } } - // failover mons running on host path to use persistent volumes if VolumeClaimTemplate is set and vice versa + // failover any mons present in the mon fail over list for _, mon := range c.ClusterInfo.Monitors { - if c.HasMonPathChanged(mon.Name) { - logger.Infof("fail over mon %q due to change in mon path", mon.Name) + if c.monsToFailover.Has(mon.Name) { + logger.Infof("fail over mon %q from the mon fail over list", mon.Name) c.failMon(len(c.ClusterInfo.Monitors), desiredMonCount, mon.Name) + c.monsToFailover.Delete(mon.Name) return nil } } @@ -379,20 +380,6 @@ func (c *Cluster) checkHealth(ctx context.Context) error { return nil } -// HasMonPathChanged checks if the mon storage path has changed from host path to persistent volume or vice versa -func (c *Cluster) HasMonPathChanged(mon string) bool { - var monPathChanged bool - if c.mapping.Schedule[mon] != nil && c.spec.Mon.VolumeClaimTemplate != nil { - logger.Infof("mon %q path has changed from host path to persistent volumes", mon) - monPathChanged = true - } else if c.mapping.Schedule[mon] == nil && c.spec.Mon.VolumeClaimTemplate == nil { - logger.Infof("mon %q path has changed from persistent volumes to host path", mon) - monPathChanged = true - } - - return monPathChanged -} - func (c *Cluster) trackMonInOrOutOfQuorum(monName string, inQuorum bool) (bool, error) { updateNeeded := false var monsOutOfQuorum []string diff --git a/pkg/operator/ceph/cluster/mon/health_test.go b/pkg/operator/ceph/cluster/mon/health_test.go index bef7fda8d1dd..bc0b7297aab2 100644 --- a/pkg/operator/ceph/cluster/mon/health_test.go +++ b/pkg/operator/ceph/cluster/mon/health_test.go @@ -645,37 +645,3 @@ func TestUpdateMonInterval(t *testing.T) { assert.Equal(t, time.Minute, h.interval) }) } - -func TestHasMonPathChanged(t *testing.T) { - t.Run("mon path changed from pv to hostpath", func(t *testing.T) { - c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil) - c.mapping.Schedule["a"] = nil - result := c.HasMonPathChanged("a") - assert.True(t, result) - }) - - t.Run("mon path has not changed from pv to hostpath", func(t *testing.T) { - c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil) - c.spec.Mon.VolumeClaimTemplate = &v1.PersistentVolumeClaim{Spec: v1.PersistentVolumeClaimSpec{}} - c.mapping.Schedule["b"] = nil - result := c.HasMonPathChanged("b") - c.spec.Mon.VolumeClaimTemplate = nil - assert.False(t, result) - }) - - t.Run("mon path changed from hostpath to pv", func(t *testing.T) { - c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil) - c.mapping.Schedule["c"] = &opcontroller.MonScheduleInfo{} - c.spec.Mon.VolumeClaimTemplate = &v1.PersistentVolumeClaim{Spec: v1.PersistentVolumeClaimSpec{}} - result := c.HasMonPathChanged("c") - assert.True(t, result) - }) - - t.Run("mon path has not changed from host path to pv", func(t *testing.T) { - c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil) - c.mapping.Schedule["d"] = &opcontroller.MonScheduleInfo{} - result := c.HasMonPathChanged("d") - c.spec.Mon.VolumeClaimTemplate = nil - assert.False(t, result) - }) -} diff --git a/pkg/operator/ceph/cluster/mon/mon.go b/pkg/operator/ceph/cluster/mon/mon.go index 6340eadf49ed..f3012d668cff 100644 --- a/pkg/operator/ceph/cluster/mon/mon.go +++ b/pkg/operator/ceph/cluster/mon/mon.go @@ -115,6 +115,8 @@ type Cluster struct { ownerInfo *k8sutil.OwnerInfo isUpgrade bool arbiterMon string + // list of mons to be failed over + monsToFailover sets.Set[string] } // monConfig for a single monitor @@ -162,6 +164,7 @@ func New(ctx context.Context, clusterdContext *clusterd.Context, namespace strin ClusterInfo: &cephclient.ClusterInfo{ Context: ctx, }, + monsToFailover: sets.New[string](), } } @@ -1230,11 +1233,6 @@ var updateDeploymentAndWait = UpdateCephDeploymentAndWait func (c *Cluster) updateMon(m *monConfig, d *apps.Deployment) error { - if c.HasMonPathChanged(m.DaemonName) { - logger.Infof("path has changed for mon %q. Skip updating mon deployment %q in order to failover the mon", m.DaemonName, d.Name) - return nil - } - // Expand mon PVC if storage request for mon has increased in cephcluster crd if c.monVolumeClaimTemplate(m) != nil { desiredPvc, err := c.makeDeploymentPVC(m, false) @@ -1339,6 +1337,12 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e p.ApplyToPodSpec(&d.Spec.Template.Spec) if deploymentExists { + // skip update if mon path has changed + if hasMonPathChanged(existingDeployment, c.spec.Mon.VolumeClaimTemplate) { + c.monsToFailover.Insert(m.DaemonName) + return nil + } + // the existing deployment may have a node selector. if the cluster // isn't using host networking and the deployment is using pvc storage, // then the node selector can be removed. this may happen after @@ -1404,6 +1408,18 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e return nil } +func hasMonPathChanged(d *apps.Deployment, claim *v1.PersistentVolumeClaim) bool { + if d.Labels["pvc_name"] == "" && claim != nil { + logger.Infof("skipping update for mon %q where path has changed from hostPath to pvc", d.Name) + return true + } else if d.Labels["pvc_name"] != "" && claim == nil { + logger.Infof("skipping update for mon %q where path has changed from pvc to hostPath", d.Name) + return true + } + + return false +} + func waitForQuorumWithMons(context *clusterd.Context, clusterInfo *cephclient.ClusterInfo, mons []string, sleepTime int, requireAllInQuorum bool) error { logger.Infof("waiting for mon quorum with %v", mons) diff --git a/pkg/operator/ceph/cluster/mon/mon_test.go b/pkg/operator/ceph/cluster/mon/mon_test.go index eda18f602121..49be715a0bae 100644 --- a/pkg/operator/ceph/cluster/mon/mon_test.go +++ b/pkg/operator/ceph/cluster/mon/mon_test.go @@ -884,3 +884,36 @@ func TestSkipReconcile(t *testing.T) { assert.NoError(t, err) assert.Equal(t, 1, result.Len()) } + +func TestHasMonPathChanged(t *testing.T) { + monDeployment := &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rook-ceph-mon-a", + Namespace: "test", + Labels: map[string]string{ + "pvc_name": "test-pvc", + }, + }} + + pvcTemplate := &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + }, + } + t.Run("mon path changed from pv to hostpath", func(t *testing.T) { + assert.True(t, hasMonPathChanged(monDeployment, nil)) + }) + + t.Run("mon path not changed from pv to hostpath", func(t *testing.T) { + assert.False(t, hasMonPathChanged(monDeployment, pvcTemplate)) + }) + t.Run("mon path changed from hostPath to pvc", func(t *testing.T) { + delete(monDeployment.Labels, "pvc_name") + assert.True(t, hasMonPathChanged(monDeployment, pvcTemplate)) + }) + + t.Run("mon path not changed from hostPath to pvc", func(t *testing.T) { + delete(monDeployment.Labels, "pvc_name") + assert.False(t, hasMonPathChanged(monDeployment, nil)) + }) +}