diff --git a/provider/pkg/resources/customresources/custom_recoveryservices.go b/provider/pkg/resources/customresources/custom_recoveryservices.go index 6a61b7d76858..957e06796296 100644 --- a/provider/pkg/resources/customresources/custom_recoveryservices.go +++ b/provider/pkg/resources/customresources/custom_recoveryservices.go @@ -18,7 +18,8 @@ import ( ) const ( - protectedItemPath = "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.RecoveryServices/vaults/{vaultName}/backupFabrics/{fabricName}/protectionContainers/{containerName}/protectedItems/{protectedItemName}" + protectedItemPath = "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.RecoveryServices/vaults/{vaultName}/backupFabrics/{fabricName}/protectionContainers/{containerName}/protectedItems/{protectedItemName}" + recoveryServicesAzureStorageFilter = "backupManagementType eq 'AzureStorage'" ) type protectedItem struct { @@ -51,7 +52,8 @@ func recoveryServicesProtectedItem(subscription string, cred azcore.TokenCredent } reader := &systemNameReaderImpl{ - protectableItemsClient: clientFactory.NewBackupProtectableItemsClient(), + protectableItemsClient: clientFactory.NewBackupProtectableItemsClient(), + protectionContainersClient: clientFactory.NewProtectionContainersClient(), } return &CustomResource{ @@ -66,7 +68,7 @@ func recoveryServicesProtectedItem(subscription string, cred azcore.TokenCredent // getIdAndQuery replaces the default implementation of crud.ResourceCrudClient.PrepareAzureRESTIdAndQuery. It doesn't // change queryParams, only the id, to replace the file share's friendly name with the system name. func getIdAndQuery(ctx context.Context, inputs resource.PropertyMap, crudClient crud.ResourceCrudClient, reader systemNameReader) (string, map[string]any, error) { - systemName, err := retrieveSystemName(ctx, inputs, reader) + systemName, err := retrieveSystemName(ctx, inputs, reader, 10, 30*time.Second) if err != nil { return "", nil, fmt.Errorf("failed to retrieve system name: %w", err) } @@ -85,7 +87,7 @@ func getIdAndQuery(ctx context.Context, inputs resource.PropertyMap, crudClient } // retrieveSystemName looks up the system name of a file share protected item in Azure. -func retrieveSystemName(ctx context.Context, input resource.PropertyMap, reader systemNameReader) (string, error) { +func retrieveSystemName(ctx context.Context, input resource.PropertyMap, reader systemNameReader, maxAttempts int, waitBetweenAttempts time.Duration) (string, error) { item, err := extractProtectedItemProperties(input) if err != nil { return "", fmt.Errorf("failed to extract protected item properties from input: %w", err) @@ -96,20 +98,28 @@ func retrieveSystemName(ctx context.Context, input resource.PropertyMap, reader return "", nil } - logging.V(9).Infof("looking up system name for %s", item.itemName) - systemName, err := reader.readSystemNameFromProtectableItem(ctx, item) + err = reader.inquireContainer(ctx, item) if err != nil { - return "", fmt.Errorf("failed to read system name for protectable item: %w", err) + return "", fmt.Errorf("failed to inquire protection container %s: %w", item.containerName, err) } + + logging.V(9).Infof("looking up system name for %s", item.itemName) // Based on observations during testing, the system name is usually, but not always immediately available. - if systemName == "" { - time.Sleep(30 * time.Second) - systemName, err = reader.readSystemNameFromProtectableItem(ctx, item) + // The /inquire request above should be awaited according to the docs, but the SDK doesn't actually support that. + for i := 1; i <= maxAttempts; i++ { + systemName, err := reader.readSystemNameFromProtectableItem(ctx, item) if err != nil { - return "", fmt.Errorf("failed to read system name for protectable item in second attempt: %w", err) + return "", fmt.Errorf("failed to read system name for protectable item: %w", err) + } + if systemName != "" { + logging.V(9).Infof("found system name %s for %s in attempt %d", systemName, item.itemName, i) + return systemName, nil + } + if i < maxAttempts { + time.Sleep(waitBetweenAttempts) } } - return systemName, nil + return "", fmt.Errorf("failed to retrieve system name after %d attempts", maxAttempts) } // systemNameReader is an interface for getting the Azure system name of a protected item. @@ -117,11 +127,15 @@ func retrieveSystemName(ctx context.Context, input resource.PropertyMap, reader // needs to be determined by iterating over the protected items in scope and matching by the human-readable name. // Abstracted into an interface to allow for testing. type systemNameReader interface { + // inquireContainer makes a request to /inquire, a special recovery services API that triggers a background job to + // update the protectable items in scope. + inquireContainer(ctx context.Context, input *protectedItemProperties) error readSystemNameFromProtectableItem(ctx context.Context, input *protectedItemProperties) (string, error) } type systemNameReaderImpl struct { - protectableItemsClient *recovery.BackupProtectableItemsClient + protectableItemsClient *recovery.BackupProtectableItemsClient + protectionContainersClient *recovery.ProtectionContainersClient } func (s *systemNameReaderImpl) readSystemNameFromProtectableItem(ctx context.Context, input *protectedItemProperties) (string, error) { @@ -129,7 +143,7 @@ func (s *systemNameReaderImpl) readSystemNameFromProtectableItem(ctx context.Con storageAccountName := segments[len(segments)-1] protectablePager := s.protectableItemsClient.NewListPager(input.recoveryVaultName, input.resourceGroupName, &recovery.BackupProtectableItemsClientListOptions{ - Filter: to.Ptr("backupManagementType eq 'AzureStorage'"), + Filter: to.Ptr(recoveryServicesAzureStorageFilter), }) for protectablePager.More() { page, err := protectablePager.NextPage(ctx) @@ -147,6 +161,15 @@ func (s *systemNameReaderImpl) readSystemNameFromProtectableItem(ctx context.Con return "", nil } +func (s *systemNameReaderImpl) inquireContainer(ctx context.Context, item *protectedItemProperties) error { + // the first return value is an empty struct, so we can ignore it + _, err := s.protectionContainersClient.Inquire(ctx, item.recoveryVaultName, item.resourceGroupName, item.fabricName, item.containerName, + &recovery.ProtectionContainersClientInquireOptions{ + Filter: to.Ptr(recoveryServicesAzureStorageFilter), + }) + return err +} + // findSystemName finds the system name of the file share protected item by looking through the given protectable items // for one that matches the target friendly name and storage account name. func findSystemName(protectableItems []*recovery.WorkloadProtectableItemResource, targetFriendlyName, storageAccountName string) (string, error) { diff --git a/provider/pkg/resources/customresources/custom_recoveryservices_test.go b/provider/pkg/resources/customresources/custom_recoveryservices_test.go index fc0034378e89..be8fb9dc2af7 100644 --- a/provider/pkg/resources/customresources/custom_recoveryservices_test.go +++ b/provider/pkg/resources/customresources/custom_recoveryservices_test.go @@ -5,6 +5,7 @@ package customresources import ( "context" "testing" + "time" "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" recovery "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/recoveryservices/armrecoveryservicesbackup/v4" @@ -85,7 +86,7 @@ func TestGetIdAndQuery(t *testing.T) { }, } - reader := &mockSystemNameReader{systemName: "azurefileshare;339f9859"} + reader := &mockSystemNameReader{systemNames: []string{"azurefileshare;339f9859"}} azureClient := azure.MockAzureClient{} crudClient := crud.NewResourceCrudClient(&azureClient, nil, nil, "123", &protectedItemResource) @@ -104,18 +105,44 @@ func TestRetrieveSystemName(t *testing.T) { t.Run("happy path", func(t *testing.T) { t.Parallel() - reader := &mockSystemNameReader{systemName: "systemName"} - systemName, err := retrieveSystemName(context.Background(), standardAzureFileshareProtectedItemInput(), reader) + reader := &mockSystemNameReader{systemNames: []string{"systemName"}} + systemName, err := retrieveSystemName(context.Background(), standardAzureFileshareProtectedItemInput(), reader, 1, 0*time.Second) assert.NoError(t, err) assert.Equal(t, "systemName", systemName) + assert.True(t, reader.inquireCalled) }) t.Run("no system name is found", func(t *testing.T) { t.Parallel() - reader := &mockSystemNameReader{systemName: ""} - systemName, err := retrieveSystemName(context.Background(), standardAzureFileshareProtectedItemInput(), reader) + reader := &mockSystemNameReader{systemNames: []string{""}} + systemName, err := retrieveSystemName(context.Background(), standardAzureFileshareProtectedItemInput(), reader, 1, 0*time.Second) + assert.Error(t, err) + assert.Empty(t, systemName) + assert.True(t, reader.inquireCalled) + }) + + t.Run("system name is found after multiple attempts", func(t *testing.T) { + t.Parallel() + reader := &mockSystemNameReader{ + systemNames: []string{"", "", "systemName", ""}, + errors: []error{nil, nil, nil, nil}, + } + systemName, err := retrieveSystemName(context.Background(), standardAzureFileshareProtectedItemInput(), reader, 5, 0*time.Second) assert.NoError(t, err) + assert.Equal(t, "systemName", systemName) + assert.True(t, reader.inquireCalled) + }) + + t.Run("stops after max attempts", func(t *testing.T) { + t.Parallel() + reader := &mockSystemNameReader{ + systemNames: []string{"", "", "systemName"}, + errors: []error{nil, nil, nil}, + } + systemName, err := retrieveSystemName(context.Background(), standardAzureFileshareProtectedItemInput(), reader, 2, 0*time.Second) + assert.Error(t, err) assert.Empty(t, systemName) + assert.True(t, reader.inquireCalled) }) t.Run("no system name for protected item of type other than AzureFileShareProtectedItem", func(t *testing.T) { @@ -123,20 +150,35 @@ func TestRetrieveSystemName(t *testing.T) { input := standardAzureFileshareProtectedItemInput() input["properties"].ObjectValue()["protectedItemType"] = resource.NewStringProperty("SomeOtherProtectedItem") - reader := &mockSystemNameReader{systemName: "test"} - systemName, err := retrieveSystemName(context.Background(), input, reader) + reader := &mockSystemNameReader{systemNames: []string{"test"}, errors: []error{nil}} + systemName, err := retrieveSystemName(context.Background(), input, reader, 1, 0*time.Second) assert.NoError(t, err) assert.Empty(t, systemName) + assert.False(t, reader.inquireCalled) }) } type mockSystemNameReader struct { - systemName string - err error + systemNames []string + errors []error + attempt int + // Was the /inquire API resp. the SDK's ProtectionContainersClient.Inquire called? + inquireCalled bool } func (m *mockSystemNameReader) readSystemNameFromProtectableItem(ctx context.Context, input *protectedItemProperties) (string, error) { - return m.systemName, m.err + name := m.systemNames[m.attempt] + var err error + if len(m.errors) > m.attempt { + err = m.errors[m.attempt] + } + m.attempt++ + return name, err +} + +func (m *mockSystemNameReader) inquireContainer(ctx context.Context, input *protectedItemProperties) error { + m.inquireCalled = true + return nil } func TestFindSystemName(t *testing.T) {