diff --git a/api/v1alpha1/nonadminbackup_types.go b/api/v1alpha1/nonadminbackup_types.go index 646694f..724e3ca 100644 --- a/api/v1alpha1/nonadminbackup_types.go +++ b/api/v1alpha1/nonadminbackup_types.go @@ -103,10 +103,22 @@ type NonAdminBackupStatus struct { // +optional VeleroDeleteBackupRequest *VeleroDeleteBackupRequest `json:"veleroDeleteBackupRequest,omitempty"` + // +optional + QueueInfo *QueueInfo `json:"queueInfo,omitempty"` + Phase NonAdminBackupPhase `json:"phase,omitempty"` Conditions []metav1.Condition `json:"conditions,omitempty"` } +// QueueInfo holds the queue position for a specific VeleroBackup. +// It is used to estimate how many backups are scheduled before the given VeleroBackup in the OADP namespace. +// This number is not guaranteed to be accurate, but it should be close. It's inaccurate for cases when +// Velero pod is not running or being restarted after Backup object were created. +// It counts only VeleroBackups that are still subject to be handled by OADP/Velero. +type QueueInfo struct { + EstimatedQueuePosition int `json:"estimatedQueuePosition"` // Number of backups ahead in the queue (0 if not queued) +} + // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:path=nonadminbackups,shortName=nab diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 0efd366..139228d 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -118,6 +118,11 @@ func (in *NonAdminBackupStatus) DeepCopyInto(out *NonAdminBackupStatus) { *out = new(VeleroDeleteBackupRequest) (*in).DeepCopyInto(*out) } + if in.QueueInfo != nil { + in, out := &in.QueueInfo, &out.QueueInfo + *out = new(QueueInfo) + **out = **in + } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions *out = make([]metav1.Condition, len(*in)) @@ -137,6 +142,21 @@ func (in *NonAdminBackupStatus) DeepCopy() *NonAdminBackupStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *QueueInfo) DeepCopyInto(out *QueueInfo) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QueueInfo. +func (in *QueueInfo) DeepCopy() *QueueInfo { + if in == nil { + return nil + } + out := new(QueueInfo) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VeleroBackup) DeepCopyInto(out *VeleroBackup) { *out = *in diff --git a/config/crd/bases/oadp.openshift.io_nonadminbackups.yaml b/config/crd/bases/oadp.openshift.io_nonadminbackups.yaml index 21510fa..4ac9f1d 100644 --- a/config/crd/bases/oadp.openshift.io_nonadminbackups.yaml +++ b/config/crd/bases/oadp.openshift.io_nonadminbackups.yaml @@ -619,6 +619,19 @@ spec: - Created - Deleting type: string + queueInfo: + description: |- + QueueInfo holds the queue position for a specific VeleroBackup. + It is used to estimate how many backups are scheduled before the given VeleroBackup in the OADP namespace. + This number is not guaranteed to be accurate, but it should be close. It's inaccurate for cases when + Velero pod is not running or being restarted after Backup object were created. + It counts only VeleroBackups that are still subject to be handled by OADP/Velero. + properties: + estimatedQueuePosition: + type: integer + required: + - estimatedQueuePosition + type: object veleroBackup: description: VeleroBackup contains information of the related Velero backup object. diff --git a/docs/design/Non_Admin_Controller_design.md b/docs/design/Non_Admin_Controller_design.md index 92cefc0..0297d39 100644 --- a/docs/design/Non_Admin_Controller_design.md +++ b/docs/design/Non_Admin_Controller_design.md @@ -92,11 +92,13 @@ This design intends to enable non-admin users the ability to perform Backup and - Listen to requests pertaining to Non-Admin Backup CRD - Process requests pertaining to Non-Admin Backup CRD - Update Non-Admin Backup CR status with the status/events from Velero Backup CR + - Update Non-Admin Backup CR status with the estimated queue position from the Velero Backup CRs - Cascade Any actions performed on Non-Admin Backup CR to corresponding Velero backup CR - **Non-Admin Restore (NAR) Controller:** The responsibilities of the NAR controller are: - Listen to requests pertaining to Non-Admin Restore CRD - Process requests pertaining to Non-Admin Restore CRD - Update Non-Admin Backup CR status with the status/events from Velero Restore CR + - Update Non-Admin Backup CR status with the estimated queue position from the Velero Restore CRs - Cascade Any actions performed on Non-Admin Restore CR to corresponding Velero restore CR - **Non-Admin BackupStorageLocation (NABSL) controller:** The responsibilities of the NABSL controller are: - Listen to requests pertaining to Non-Admin BSL CRD diff --git a/docs/design/nab_and_nar_status_update.md b/docs/design/nab_and_nar_status_update.md index e157d94..179c228 100644 --- a/docs/design/nab_and_nar_status_update.md +++ b/docs/design/nab_and_nar_status_update.md @@ -6,12 +6,11 @@ This document outlines the design around updating NonAdminBackup (NAB) and NonAd ## NonAdminBackup and NonAdminRestore Status -The `status` field of NAB and NAR objects contains the following fields: +The `status` field of NAB and NAR objects contains the following fields, which are updated by NAB and NAR controllers: - `phase` - `conditions` - `veleroBackup` for NAB and `veleroRestore` for NAR, which contains name, namespace and status of the related Velero object. - -which are updated by NAB and NAR controllers. +- `queueInfo` contains estimatedQueuePosition, which is best effort estimation of the position of the NAB/NAR in the Velero queue. Any reconciliation function that depends on data stored in the `status` field must ensure it operates on the most recent version of that field from the cluster before proceeding. @@ -118,6 +117,99 @@ status: phase: Created ``` +### Queue Info + +`queueInfo` contains `estimatedQueuePosition`, which represents the number of other Velero backups that need to be processed by Velero before the current NonAdminBackup (NAB) or NonAdminRestore (NAR) is handled. This estimate is accurate when the Velero pod is running continuously. However, it may become very inaccurate if the Velero pod was restarted or started after the Velero backups already existed in the cluster. +In the future the queueInfo may be extended with more fields to provide more information about the Velero queue such as time or size of the backups in the queue. + +```yaml +status: + conditions: + - lastTransitionTime: '2024-11-25T18:34:01Z' + message: backup accepted + reason: BackupAccepted + status: 'True' + type: Accepted + - lastTransitionTime: '2024-11-25T18:34:01Z' + message: Created Velero Backup object + reason: BackupScheduled + status: 'True' + type: Queued + phase: Created + queueInfo: + estimatedQueuePosition: 12 + veleroBackup: + nacuuid: mongo-persistent-anotherte-0d0b7b2c-ee76-412d-a867-2c23b8aa51ab + name: mongo-persistent-anotherte-0d0b7b2c-ee76-412d-a867-2c23b8aa51ab + namespace: openshift-adp + status: {} +``` + +When the Backup is InProgress, the status will be updated with the `estimatedQueuePosition` being set to 1 and the `veleroBackup` phase being set to InProgress. + +```yaml +status: + conditions: + - lastTransitionTime: '2024-11-27T10:47:49Z' + message: backup accepted + reason: BackupAccepted + status: 'True' + type: Accepted + - lastTransitionTime: '2024-11-27T10:47:50Z' + message: Created Velero Backup object + reason: BackupScheduled + status: 'True' + type: Queued + phase: Created + queueInfo: + estimatedQueuePosition: 1 + veleroBackup: + nacuuid: mongo-persistent-c95bc62d-f40c-47b8-8a28-0dd7addb4930 + name: mongo-persistent-anotherte-c95bc62d-f40c-47b8-8a28-0dd7addb4930 + namespace: openshift-adp + status: + expiration: '2024-12-27T10:48:45Z' + formatVersion: 1.1.0 + phase: InProgress + startTimestamp: '2024-11-27T10:48:45Z' + version: 1 +``` +After the Backup is successfull, the `veleroBackup` phase will be set to Completed with additional information about the backup and the `estimatedQueuePosition` will be set to 0. + +```yaml +status: + conditions: + - lastTransitionTime: '2024-11-27T10:47:49Z' + message: backup accepted + reason: BackupAccepted + status: 'True' + type: Accepted + - lastTransitionTime: '2024-11-27T10:47:50Z' + message: Created Velero Backup object + reason: BackupScheduled + status: 'True' + type: Queued + phase: Created + queueInfo: + estimatedQueuePosition: 0 + veleroBackup: + nacuuid: mongo-persistent-anotherte-c95bc62d-f40c-47b8-8a28-0dd7addb4930 + name: mongo-persistent-anotherte-c95bc62d-f40c-47b8-8a28-0dd7addb4930 + namespace: openshift-adp + status: + completionTimestamp: '2024-11-27T10:48:50Z' + expiration: '2024-12-27T10:48:45Z' + formatVersion: 1.1.0 + hookStatus: {} + phase: Completed + progress: + itemsBackedUp: 56 + totalItems: 56 + startTimestamp: '2024-11-27T10:48:45Z' + version: 1 +``` + + ## Status Update scenarios The following graph shows the lifecycle of a NonAdminBackup. @@ -159,7 +251,7 @@ flowchart TD createVB -->|No| createNewVB[Create VeleroBackup] createNewVB --> setCreatedPhase[NAB Phase: **Created**] setCreatedPhase --> setQueuedCondition[NAB Condition:: Queued=True
Reason: BackupScheduled
Message: Created Velero Backup object] - createVB -->|Yes| updateFromVB[Update NAB Status from VeleroBackup:
Phase, Start Time, Completion Time,
Expiration, Errors, Warnings,
Progress, ValidationErrors] + createVB -->|Yes| updateFromVB[Update NAB Status from VeleroBackup:
Phase, Start Time, Completion Time,
Expiration, Errors, Warnings,
Progress, ValidationErrors
Queue Info: estimatedQueuePosition] setQueuedCondition -->|Update Status if Changed
▶ Continue ║No Requeue║| endCreateUpdate[End Create/Update] updateFromVB -->|Update Status if Changed
▶ Continue ║No Requeue║| endCreateUpdate diff --git a/internal/common/constant/constant.go b/internal/common/constant/constant.go index a9f64ae..937d14f 100644 --- a/internal/common/constant/constant.go +++ b/internal/common/constant/constant.go @@ -49,6 +49,9 @@ const NameDelimiter = "-" // TrueString defines a constant for the True string const TrueString = "True" +// NamespaceString defines a constant for the Namespace string +const NamespaceString = "Namespace" + // MaximumNacObjectNameLength represents Generated Non Admin Object Name and // must be below 63 characters, because it's used within object Label Value const MaximumNacObjectNameLength = validation.DNS1123LabelMaxLength diff --git a/internal/common/function/function.go b/internal/common/function/function.go index 40b03ea..09228c3 100644 --- a/internal/common/function/function.go +++ b/internal/common/function/function.go @@ -183,6 +183,79 @@ func GetVeleroBackupByLabel(ctx context.Context, clientInstance client.Client, n } } +// GetActiveVeleroBackupsByLabel retrieves all VeleroBackup objects based on a specified label within a given namespace. +// It returns a slice of VeleroBackup objects or nil if none are found. +func GetActiveVeleroBackupsByLabel(ctx context.Context, clientInstance client.Client, namespace, labelKey, labelValue string) ([]velerov1.Backup, error) { + var veleroBackupList velerov1.BackupList + labelSelector := client.MatchingLabels{labelKey: labelValue} + + if err := clientInstance.List(ctx, &veleroBackupList, client.InNamespace(namespace), labelSelector); err != nil { + return nil, err + } + + // Filter out backups with a CompletionTimestamp + var activeBackups []velerov1.Backup + for _, backup := range veleroBackupList.Items { + if backup.Status.CompletionTimestamp == nil { + activeBackups = append(activeBackups, backup) + } + } + + if len(activeBackups) == 0 { + return nil, nil + } + + return activeBackups, nil +} + +// GetBackupQueueInfo determines the queue position of the specified VeleroBackup. +// It calculates how many queued Backups exist in the namespace that were created before this one. +func GetBackupQueueInfo(ctx context.Context, clientInstance client.Client, namespace string, targetBackup *velerov1.Backup) (nacv1alpha1.QueueInfo, error) { + var queueInfo nacv1alpha1.QueueInfo + + // If the target backup has no valid CreationTimestamp, it means that it's not yet reconciled by OADP/Velero. + // In this case, we can't determine its queue position, so we return nil. + if targetBackup == nil || targetBackup.CreationTimestamp.IsZero() { + return queueInfo, nil + } + + // If the target backup has a CompletionTimestamp, it means that it's already served. + if targetBackup.Status.CompletionTimestamp != nil { + queueInfo.EstimatedQueuePosition = 0 + return queueInfo, nil + } + + // List all Backup objects in the namespace + var backupList velerov1.BackupList + if err := clientInstance.List(ctx, &backupList, client.InNamespace(namespace)); err != nil { + return queueInfo, err + } + + // Extract the target backup's creation timestamp + targetTimestamp := targetBackup.CreationTimestamp.Time + + // The target backup is always in queue at least in the first position + // 0 is reserved for the backups that are already served. + queueInfo.EstimatedQueuePosition = 1 + + // Iterate through backups and calculate position + for i := range backupList.Items { + backup := &backupList.Items[i] + + // Skip backups that have CompletionTimestamp set. This means that the Velero won't be further processing this backup. + if backup.Status.CompletionTimestamp != nil { + continue + } + + // Count backups created earlier than the target backup + if backup.CreationTimestamp.Time.Before(targetTimestamp) { + queueInfo.EstimatedQueuePosition++ + } + } + + return queueInfo, nil +} + // GetVeleroDeleteBackupRequestByLabel retrieves a DeleteBackupRequest object based on a specified label within a given namespace. // It returns the DeleteBackupRequest only when exactly one object is found, throws an error if multiple backups are found, // or returns nil if no matches are found. diff --git a/internal/common/function/function_test.go b/internal/common/function/function_test.go index 64b33de..1270d8b 100644 --- a/internal/common/function/function_test.go +++ b/internal/common/function/function_test.go @@ -47,10 +47,14 @@ import ( var _ = ginkgo.Describe("PLACEHOLDER", func() {}) const ( - testNonAdminBackupNamespace = "non-admin-backup-namespace" - testNonAdminBackupName = "non-admin-backup-name" - testNonAdminBackupUUID = "12345678-1234-1234-1234-123456789abc" - defaultStr = "default" + testNonAdminBackupNamespace = "non-admin-backup-namespace" + testNonAdminBackupName = "non-admin-backup-name" + testNonAdminSecondBackupName = "non-admin-second-backup-name" + testNonAdminBackupUUID = "12345678-1234-1234-1234-123456789abc" + defaultStr = "default" + expectedIntZero = 0 + expectedIntOne = 1 + expectedIntTwo = 2 ) func TestGetNonAdminLabels(t *testing.T) { @@ -455,9 +459,8 @@ func TestGetVeleroBackupByLabel(t *testing.T) { scheme := runtime.NewScheme() const testAppStr = "test-app" - // Register VeleroBackup type with the scheme if err := velerov1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to register VeleroBackup type: %v", err) + t.Fatalf("Failed to register VeleroBackup type in TestGetVeleroBackupByLabel: %v", err) } tests := []struct { @@ -700,9 +703,8 @@ func TestGetVeleroDeleteBackupRequestByLabel(t *testing.T) { scheme := runtime.NewScheme() const testAppStr = "test-app" - // Register DeleteBackupRequest type with the scheme if err := velerov1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to register DeleteBackupRequest type: %v", err) + t.Fatalf("Failed to register DeleteBackupRequest type in TestGetVeleroDeleteBackupRequestByLabel: %v", err) } tests := []struct { @@ -802,3 +804,182 @@ func TestGetVeleroDeleteBackupRequestByLabel(t *testing.T) { }) } } + +func TestGetActiveVeleroBackupsByLabel(t *testing.T) { + log := zap.New(zap.UseDevMode(true)) + ctx := context.Background() + ctx = ctrl.LoggerInto(ctx, log) + scheme := runtime.NewScheme() + + if err := velerov1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to register VeleroBackup type in TestGetActiveVeleroBackupsByLabel: %v", err) + } + + tests := []struct { + name string + namespace string + labelKey string + labelValue string + mockBackups []velerov1.Backup + expectedCount int + }{ + { + name: "No active backups", + namespace: defaultStr, + labelKey: constant.NabOriginNACUUIDLabel, + labelValue: testNonAdminBackupUUID, + mockBackups: []velerov1.Backup{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultStr, + Name: testNonAdminBackupName, + Labels: map[string]string{constant.NabOriginNACUUIDLabel: testNonAdminBackupUUID}, + }, + Status: velerov1.BackupStatus{ + CompletionTimestamp: &metav1.Time{Time: time.Now()}, + }, + }, + }, + expectedCount: expectedIntZero, + }, + { + name: "One active backup", + namespace: defaultStr, + labelKey: constant.NabOriginNACUUIDLabel, + labelValue: testNonAdminBackupUUID, + mockBackups: []velerov1.Backup{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultStr, + Name: testNonAdminBackupName, + Labels: map[string]string{constant.NabOriginNACUUIDLabel: testNonAdminBackupUUID}, + }, + }, + }, + expectedCount: expectedIntOne, + }, + { + name: "Multiple active backups", + namespace: defaultStr, + labelKey: constant.NabOriginNACUUIDLabel, + labelValue: testNonAdminBackupUUID, + mockBackups: []velerov1.Backup{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultStr, + Name: testNonAdminBackupName, + Labels: map[string]string{constant.NabOriginNACUUIDLabel: testNonAdminBackupUUID}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultStr, + Name: testNonAdminSecondBackupName, + Labels: map[string]string{constant.NabOriginNACUUIDLabel: testNonAdminBackupUUID}, + }, + }, + }, + expectedCount: expectedIntTwo, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var objects []client.Object + for _, backup := range tt.mockBackups { + backupCopy := backup // Create a copy to avoid memory aliasing + objects = append(objects, &backupCopy) + } + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() + + result, err := GetActiveVeleroBackupsByLabel(ctx, client, tt.namespace, tt.labelKey, tt.labelValue) + assert.NoError(t, err) + assert.Equal(t, tt.expectedCount, len(result)) + }) + } +} + +func TestGetBackupQueueInfo(t *testing.T) { + log := zap.New(zap.UseDevMode(true)) + ctx := context.Background() + ctx = ctrl.LoggerInto(ctx, log) + scheme := runtime.NewScheme() + + if err := velerov1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to register VeleroBackup type in TestGetBackupQueueInfo: %v", err) + } + + tests := []struct { + name string + namespace string + targetBackup *velerov1.Backup + mockBackups []velerov1.Backup + expectedQueue int + }{ + { + name: "No backups in queue", + namespace: defaultStr, + targetBackup: &velerov1.Backup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultStr, + Name: testNonAdminBackupName, + CreationTimestamp: metav1.Time{Time: time.Now()}, + }, + }, + mockBackups: []velerov1.Backup{}, + expectedQueue: expectedIntOne, + }, + { + name: "One backup ahead in queue", + namespace: defaultStr, + targetBackup: &velerov1.Backup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultStr, + Name: testNonAdminSecondBackupName, + CreationTimestamp: metav1.Time{Time: time.Now().Add(1 * time.Hour)}, + }, + }, + mockBackups: []velerov1.Backup{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultStr, + Name: testNonAdminBackupName, + CreationTimestamp: metav1.Time{Time: time.Now()}, + }, + }, + }, + expectedQueue: expectedIntTwo, + }, + { + name: "Target backup already completed", + namespace: defaultStr, + targetBackup: &velerov1.Backup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultStr, + Name: testNonAdminBackupName, + CreationTimestamp: metav1.Time{Time: time.Now()}, + }, + Status: velerov1.BackupStatus{ + CompletionTimestamp: &metav1.Time{Time: time.Now()}, + }, + }, + mockBackups: []velerov1.Backup{}, + expectedQueue: expectedIntZero, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var objects []client.Object + for _, backup := range tt.mockBackups { + backupCopy := backup + objects = append(objects, &backupCopy) + } + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() + + queueInfo, err := GetBackupQueueInfo(ctx, client, tt.namespace, tt.targetBackup) + assert.NoError(t, err) + assert.Equal(t, tt.expectedQueue, queueInfo.EstimatedQueuePosition) + }) + } +} diff --git a/internal/controller/nonadminbackup_controller.go b/internal/controller/nonadminbackup_controller.go index 00231d3..4b3febe 100644 --- a/internal/controller/nonadminbackup_controller.go +++ b/internal/controller/nonadminbackup_controller.go @@ -642,7 +642,18 @@ func (r *NonAdminBackupReconciler) createVeleroBackupAndSyncWithNonAdminBackup(c logger.Info("VeleroBackup successfully created") } - veleroBackupLogger := logger.WithValues("VeleroBackup", types.NamespacedName{Name: veleroBackupNACUUID, Namespace: r.OADPNamespace}) + updatedQueueInfo := false + + // Determine how many Backups are scheduled before the given VeleroBackup in the OADP namespace. + queueInfo, err := function.GetBackupQueueInfo(ctx, r.Client, r.OADPNamespace, veleroBackup) + if err != nil { + // Log error and continue with the reconciliation, this is not critical error as it's just + // about the Velero Backup queue position information + logger.Error(err, "Failed to get the queue position for the VeleroBackup") + } else { + nab.Status.QueueInfo = &queueInfo + updatedQueueInfo = true + } updatedPhase := updateNonAdminPhase(&nab.Status.Phase, nacv1alpha1.NonAdminBackupPhaseCreated) @@ -655,29 +666,19 @@ func (r *NonAdminBackupReconciler) createVeleroBackupAndSyncWithNonAdminBackup(c }, ) - if updatedPhase || updatedCondition { - if err := r.Status().Update(ctx, nab); err != nil { - logger.Error(err, statusUpdateError) - return false, err - } - logger.V(1).Info(statusUpdateExit) - return false, nil // TODO (migi): We probably can safely continue with the reconciliation here - } - - logger.V(1).Info("NonAdminBackup status unchanged during VeleroBackup reconciliation") - // Ensure that the NonAdminBackup's NonAdminBackupStatus is in sync // with the VeleroBackup. Any required updates to the NonAdminBackup // Status will be applied based on the current state of the VeleroBackup. updated := updateNonAdminBackupVeleroBackupStatus(&nab.Status, veleroBackup) - if updated { + + if updated || updatedPhase || updatedCondition || updatedQueueInfo { if err := r.Status().Update(ctx, nab); err != nil { - veleroBackupLogger.Error(err, "Failed to update NonAdminBackup Status after VeleroBackup reconciliation") + logger.Error(err, statusUpdateError) return false, err } - logger.V(1).Info("NonAdminBackup Status updated successfully") + logger.V(1).Info(statusUpdateExit) } else { - logger.V(1).Info("NonAdminBackup Status unchanged during VeleroBackup reconciliation") + logger.V(1).Info("NonAdminBackup status unchanged during VeleroBackup reconciliation") } return false, nil @@ -689,12 +690,19 @@ func (r *NonAdminBackupReconciler) SetupWithManager(mgr ctrl.Manager) error { For(&nacv1alpha1.NonAdminBackup{}). WithEventFilter(predicate.CompositePredicate{ NonAdminBackupPredicate: predicate.NonAdminBackupPredicate{}, + VeleroBackupQueuePredicate: predicate.VeleroBackupQueuePredicate{ + OADPNamespace: r.OADPNamespace, + }, VeleroBackupPredicate: predicate.VeleroBackupPredicate{ OADPNamespace: r.OADPNamespace, }, }). // handler runs after predicate Watches(&velerov1.Backup{}, &handler.VeleroBackupHandler{}). + Watches(&velerov1.Backup{}, &handler.VeleroBackupQueueHandler{ + Client: r.Client, + OADPNamespace: r.OADPNamespace, + }). Complete(r) } diff --git a/internal/controller/nonadminbackup_controller_test.go b/internal/controller/nonadminbackup_controller_test.go index 495059f..79fd867 100644 --- a/internal/controller/nonadminbackup_controller_test.go +++ b/internal/controller/nonadminbackup_controller_test.go @@ -115,6 +115,13 @@ func checkTestNonAdminBackupStatus(nonAdminBackup *nacv1alpha1.NonAdminBackup, e return fmt.Errorf("NonAdminBackup Status Conditions [%v] Message %v does not contain expected message %v", index, nonAdminBackup.Status.Conditions[index].Message, expectedStatus.Conditions[index].Message) } } + + if nonAdminBackup.Status.QueueInfo != nil && expectedStatus.QueueInfo != nil { + if nonAdminBackup.Status.QueueInfo.EstimatedQueuePosition != expectedStatus.QueueInfo.EstimatedQueuePosition { + return fmt.Errorf("NonAdminBackup Status QueueInfo EstimatedQueuePosition %v is not equal to expected %v", nonAdminBackup.Status.QueueInfo.EstimatedQueuePosition, expectedStatus.QueueInfo.EstimatedQueuePosition) + } + } + return nil } @@ -635,7 +642,7 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func uuidCreatedByReconcile: true, result: reconcile.Result{Requeue: false}, }), - ginkgo.Entry("When triggered by Requeue(NonAdminBackup phase new; Conditions Accepted True; NonAdminBackup Status NACUUID set), should update NonAdminBackup phase to created and Condition to Queued True and Exit", nonAdminBackupSingleReconcileScenario{ + ginkgo.Entry("When triggered by Requeue(NonAdminBackup phase new; Conditions Accepted True; NonAdminBackup Status NACUUID set), should update NonAdminBackup phase to created and Condition to Queued True and Exit, 0 position in queue", nonAdminBackupSingleReconcileScenario{ addFinalizer: true, nonAdminBackupSpec: nacv1alpha1.NonAdminBackupSpec{ BackupSpec: &velerov1.BackupSpec{}, @@ -654,6 +661,9 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func }, nonAdminBackupExpectedStatus: nacv1alpha1.NonAdminBackupStatus{ Phase: nacv1alpha1.NonAdminBackupPhaseCreated, + QueueInfo: &nacv1alpha1.QueueInfo{ + EstimatedQueuePosition: 0, + }, Conditions: []metav1.Condition{ { Type: "Accepted", @@ -672,14 +682,17 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func uuidFromTestCase: true, result: reconcile.Result{}, }), - ginkgo.Entry("When triggered by VeleroBackup Update event, should update NonAdminBackup VeleroBackupStatus and Exit", nonAdminBackupSingleReconcileScenario{ + ginkgo.Entry("When triggered by VeleroBackup Update event, should update NonAdminBackup VeleroBackupStatus and Exit, 1st position in queue", nonAdminBackupSingleReconcileScenario{ createVeleroBackup: true, addFinalizer: true, nonAdminBackupSpec: nacv1alpha1.NonAdminBackupSpec{ BackupSpec: &velerov1.BackupSpec{}, }, nonAdminBackupPriorStatus: &nacv1alpha1.NonAdminBackupStatus{ - Phase: nacv1alpha1.NonAdminBackupPhaseCreated, + Phase: nacv1alpha1.NonAdminBackupPhaseCreated, + QueueInfo: &nacv1alpha1.QueueInfo{ + EstimatedQueuePosition: 5, + }, VeleroBackup: &nacv1alpha1.VeleroBackup{}, Conditions: []metav1.Condition{ { @@ -700,6 +713,9 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func }, nonAdminBackupExpectedStatus: nacv1alpha1.NonAdminBackupStatus{ Phase: nacv1alpha1.NonAdminBackupPhaseCreated, + QueueInfo: &nacv1alpha1.QueueInfo{ + EstimatedQueuePosition: 1, + }, VeleroBackup: &nacv1alpha1.VeleroBackup{ Status: &velerov1.BackupStatus{}, }, @@ -863,7 +879,8 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", ginkgo.By("Simulating VeleroBackup update to finished state") veleroBackup.Status = velerov1.BackupStatus{ - Phase: velerov1.BackupPhaseCompleted, + Phase: velerov1.BackupPhaseCompleted, + CompletionTimestamp: &metav1.Time{Time: time.Now()}, } // can not call .Status().Update() for veleroBackup object https://github.com/vmware-tanzu/velero/issues/8285 diff --git a/internal/handler/velerobackup_queue_handler.go b/internal/handler/velerobackup_queue_handler.go new file mode 100644 index 0000000..130a65b --- /dev/null +++ b/internal/handler/velerobackup_queue_handler.go @@ -0,0 +1,97 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package handler contains all event handlers of the project +package handler + +import ( + "context" + + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/workqueue" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/migtools/oadp-non-admin/internal/common/constant" + "github.com/migtools/oadp-non-admin/internal/common/function" +) + +// VeleroBackupQueueHandler contains event handlers for Velero Backup objects +type VeleroBackupQueueHandler struct { + Client client.Client + OADPNamespace string +} + +// Create event handler +func (VeleroBackupQueueHandler) Create(_ context.Context, _ event.CreateEvent, _ workqueue.RateLimitingInterface) { + // Create event handler for the Backup object +} + +// Update event handler adds Velero Backup's NonAdminBackup to controller queue +func (h VeleroBackupQueueHandler) Update(ctx context.Context, evt event.UpdateEvent, q workqueue.RateLimitingInterface) { + // Only update to the first in the queue Velero Backup should trigger changes to the + // NonAdminBackup objects. Updates to the Velero Backup 2nd and 3rd does not lower the + // queue. This optimizes the number of times we need to update the NonAdminBackup objects + // and the number of Velero Backup objects we need to react on. + + logger := function.GetLogger(ctx, evt.ObjectNew, "VeleroBackupQueueHandler") + + // Fetching Velero Backups triggered by NonAdminBackup to optimize our reconcile cycles + backups, err := function.GetActiveVeleroBackupsByLabel(ctx, h.Client, h.OADPNamespace, constant.ManagedByLabel, constant.ManagedByLabelValue) + if err != nil { + logger.Error(err, "Failed to get Velero Backups by label") + return + } + + if backups == nil { + // That should't really be the case as our Update event was triggered by a Velero Backup + // object that has a new CompletionTimestamp. + logger.V(1).Info("No pending velero backups found in namespace.", constant.NamespaceString, h.OADPNamespace) + } else { + nabEventAnnotations := evt.ObjectNew.GetAnnotations() + nabEventOriginNamespace := nabEventAnnotations[constant.NabOriginNamespaceAnnotation] + nabEventOriginName := nabEventAnnotations[constant.NabOriginNameAnnotation] + + for _, backup := range backups { + annotations := backup.GetAnnotations() + nabOriginNamespace := annotations[constant.NabOriginNamespaceAnnotation] + nabOriginName := annotations[constant.NabOriginNameAnnotation] + + // This object is within current queue, so there is no need to trigger changes to it. + // The VeleroBackupHandler will serve for that. + if nabOriginNamespace != nabEventOriginNamespace || nabOriginName != nabEventOriginName { + logger.V(1).Info("Processing Queue update for the NonAdmin Backup referenced by Velero Backup", "Name", backup.Name, constant.NamespaceString, backup.Namespace, "CreatedAt", backup.CreationTimestamp) + q.Add(reconcile.Request{NamespacedName: types.NamespacedName{ + Name: nabOriginName, + Namespace: nabOriginNamespace, + }}) + } else { + logger.V(1).Info("Ignoring Queue update for the NonAdmin Backup that triggered this event", "Name", backup.Name, constant.NamespaceString, backup.Namespace, "CreatedAt", backup.CreationTimestamp) + } + } + } +} + +// Delete event handler +func (VeleroBackupQueueHandler) Delete(_ context.Context, _ event.DeleteEvent, _ workqueue.RateLimitingInterface) { + // Delete event handler for the Backup object +} + +// Generic event handler +func (VeleroBackupQueueHandler) Generic(_ context.Context, _ event.GenericEvent, _ workqueue.RateLimitingInterface) { + // Generic event handler for the Backup object +} diff --git a/internal/predicate/composite_predicate.go b/internal/predicate/composite_predicate.go index 19e2499..98fab4f 100644 --- a/internal/predicate/composite_predicate.go +++ b/internal/predicate/composite_predicate.go @@ -28,9 +28,10 @@ import ( // CompositePredicate is a combination of NonAdminBackup and Velero Backup event filters type CompositePredicate struct { - Context context.Context - NonAdminBackupPredicate NonAdminBackupPredicate - VeleroBackupPredicate VeleroBackupPredicate + Context context.Context + NonAdminBackupPredicate NonAdminBackupPredicate + VeleroBackupPredicate VeleroBackupPredicate + VeleroBackupQueuePredicate VeleroBackupQueuePredicate } // Create event filter only accepts NonAdminBackup create events @@ -49,7 +50,7 @@ func (p CompositePredicate) Update(evt event.UpdateEvent) bool { case *nacv1alpha1.NonAdminBackup: return p.NonAdminBackupPredicate.Update(p.Context, evt) case *velerov1.Backup: - return p.VeleroBackupPredicate.Update(p.Context, evt) + return p.VeleroBackupQueuePredicate.Update(p.Context, evt) || p.VeleroBackupPredicate.Update(p.Context, evt) default: return false } diff --git a/internal/predicate/nonadminbackup_predicate.go b/internal/predicate/nonadminbackup_predicate.go index 1657d2c..b06ad18 100644 --- a/internal/predicate/nonadminbackup_predicate.go +++ b/internal/predicate/nonadminbackup_predicate.go @@ -32,7 +32,7 @@ type NonAdminBackupPredicate struct{} // Create event filter accepts all NonAdminBackup create events func (NonAdminBackupPredicate) Create(ctx context.Context, evt event.CreateEvent) bool { logger := function.GetLogger(ctx, evt.Object, nonAdminBackupPredicateKey) - logger.V(1).Info("Accepted Create event") + logger.V(1).Info("Accepted NAB Create event") return true } @@ -41,17 +41,17 @@ func (NonAdminBackupPredicate) Update(ctx context.Context, evt event.UpdateEvent logger := function.GetLogger(ctx, evt.ObjectNew, nonAdminBackupPredicateKey) if evt.ObjectNew.GetGeneration() != evt.ObjectOld.GetGeneration() { - logger.V(1).Info("Accepted Update event") + logger.V(1).Info("Accepted NAB Update event") return true } - logger.V(1).Info("Rejected Update event") + logger.V(1).Info("Rejected NAB Update event") return false } // Delete event filter accepts all NonAdminBackup delete events func (NonAdminBackupPredicate) Delete(ctx context.Context, evt event.DeleteEvent) bool { logger := function.GetLogger(ctx, evt.Object, nonAdminBackupPredicateKey) - logger.V(1).Info("Accepted Delete event") + logger.V(1).Info("Accepted NAB Delete event") return true } diff --git a/internal/predicate/velerobackup_predicate.go b/internal/predicate/velerobackup_predicate.go index 55ecf93..9072d37 100644 --- a/internal/predicate/velerobackup_predicate.go +++ b/internal/predicate/velerobackup_predicate.go @@ -37,11 +37,11 @@ func (p VeleroBackupPredicate) Update(ctx context.Context, evt event.UpdateEvent namespace := evt.ObjectNew.GetNamespace() if namespace == p.OADPNamespace { if function.CheckVeleroBackupMetadata(evt.ObjectNew) { - logger.V(1).Info("Accepted Update event") + logger.V(1).Info("Accepted Backup Update event") return true } } - logger.V(1).Info("Rejected Update event") + logger.V(1).Info("Rejected Backup Update event") return false } diff --git a/internal/predicate/velerobackup_queue_predicate.go b/internal/predicate/velerobackup_queue_predicate.go new file mode 100644 index 0000000..42e8717 --- /dev/null +++ b/internal/predicate/velerobackup_queue_predicate.go @@ -0,0 +1,60 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package predicate + +import ( + "context" + + velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + "sigs.k8s.io/controller-runtime/pkg/event" + + "github.com/migtools/oadp-non-admin/internal/common/function" +) + +// VeleroBackupQueuePredicate contains event filters for Velero Backup objects +type VeleroBackupQueuePredicate struct { + OADPNamespace string +} + +// Update event filter only accepts Velero Backup update events from the OADP namespace +// and from Velero Backups that have a new CompletionTimestamp. We are not interested in +// checking if the Velero Backup contains NonAdminBackup metadata, because every Velero Backup +// may change the Queue position of the NonAdminBackup object. +func (p VeleroBackupQueuePredicate) Update(ctx context.Context, evt event.UpdateEvent) bool { + logger := function.GetLogger(ctx, evt.ObjectNew, "VeleroBackupQueuePredicate") + + // Ensure the new and old objects are of the expected type + newBackup, okNew := evt.ObjectNew.(*velerov1.Backup) + oldBackup, okOld := evt.ObjectOld.(*velerov1.Backup) + + if !okNew || !okOld { + logger.V(1).Info("Rejected Backup Update event: invalid object type") + return false + } + + namespace := newBackup.GetNamespace() + + if namespace == p.OADPNamespace { + if oldBackup.Status.CompletionTimestamp == nil && newBackup.Status.CompletionTimestamp != nil { + logger.V(1).Info("Accepted Backup Update event: new completion timestamp") + return true + } + } + + logger.V(1).Info("Rejected Backup Update event: no changes to the CompletionTimestamp in the VeleroBackup object") + return false +}