From 5217d70ee90bfbc9b230f4df312eab833ccbc5e8 Mon Sep 17 00:00:00 2001 From: Renan Campos Date: Fri, 13 Sep 2024 14:36:41 -0400 Subject: [PATCH] fix: tolerate required delay between service account creation and configuration It was discovered through testing that service accounts created on GCP need a duration of time between creation and being referenced, otherwise a BadRequest error occurs. A delayed retry logic is introduced to ensure the service account is available before making additional configuration calls. --- cmd/ocm/gcp/gcp-client-shim.go | 18 +++++++++++++----- pkg/gcp/client.go | 13 ++++++++----- pkg/utils/helper.go | 13 +++++++++++++ 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/cmd/ocm/gcp/gcp-client-shim.go b/cmd/ocm/gcp/gcp-client-shim.go index c89c6543..cc90f9e7 100644 --- a/cmd/ocm/gcp/gcp-client-shim.go +++ b/cmd/ocm/gcp/gcp-client-shim.go @@ -6,6 +6,7 @@ import ( "log" "reflect" "strings" + "time" "cloud.google.com/go/iam/admin/apiv1/adminpb" "github.com/googleapis/gax-go/v2/apierror" @@ -17,6 +18,7 @@ import ( "google.golang.org/grpc/codes" "github.com/openshift-online/ocm-cli/pkg/gcp" + "github.com/openshift-online/ocm-cli/pkg/utils" ) type GcpClientWifConfigShim interface { @@ -269,11 +271,17 @@ func (c *shim) bindRolesToServiceAccount( serviceAccountId := serviceAccount.ServiceAccountId() roles := serviceAccount.Roles() - return c.bindRolesToPrincipal( - ctx, - fmt.Sprintf("serviceAccount:%s@%s.iam.gserviceaccount.com", serviceAccountId, c.wifConfig.Gcp().ProjectId()), - roles, - ) + // It was found that there is a window of time between when a service + // account creation call is made that the service account is not available + // in adjacent API calls. The call is therefore wrapped in retry logic to + // be robust to these types of synchronization issues. + return utils.DelayedRetry(func() error { + return c.bindRolesToPrincipal( + ctx, + fmt.Sprintf("serviceAccount:%s@%s.iam.gserviceaccount.com", serviceAccountId, c.wifConfig.Gcp().ProjectId()), + roles, + ) + }, 10, 500*time.Millisecond) } func (c *shim) bindRolesToGroup( diff --git a/pkg/gcp/client.go b/pkg/gcp/client.go index 371734e5..dd227e44 100644 --- a/pkg/gcp/client.go +++ b/pkg/gcp/client.go @@ -17,11 +17,6 @@ import ( ) type GcpClient interface { - /* - ListServiceAccounts(ctx context.Context, project string, filter func(s string) bool) ([]string, error) //nolint:lll - DeleteRole(context.Context, *adminpb.DeleteRoleRequest) (*adminpb.Role, error) - ListRoles(context.Context, *adminpb.ListRolesRequest) (*adminpb.ListRolesResponse, error) - */ AttachImpersonator(ctx context.Context, saId, projectId, impersonatorResourceId string) error AttachWorkloadIdentityPool(ctx context.Context, sa *cmv1.WifServiceAccount, poolId, projectId string) error CreateRole(context.Context, *adminpb.CreateRoleRequest) (*adminpb.Role, error) @@ -32,6 +27,7 @@ type GcpClient interface { DeleteWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.Operation, error) //nolint:lll GetProjectIamPolicy(ctx context.Context, projectName string, request *cloudresourcemanager.GetIamPolicyRequest) (*cloudresourcemanager.Policy, error) //nolint:lll GetRole(context.Context, *adminpb.GetRoleRequest) (*adminpb.Role, error) + GetServiceAccount(ctx context.Context, request *adminpb.GetServiceAccountRequest) (*adminpb.ServiceAccount, error) GetWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPool, error) //nolint:lll GetWorkloadIdentityProvider(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPoolProvider, error) //nolint:lll ProjectNumberFromId(ctx context.Context, projectId string) (int64, error) @@ -195,6 +191,13 @@ func (c *gcpClient) GetRole(ctx context.Context, request *adminpb.GetRoleRequest return c.iamClient.GetRole(ctx, request) } +func (c *gcpClient) GetServiceAccount( + ctx context.Context, + request *adminpb.GetServiceAccountRequest, +) (*adminpb.ServiceAccount, error) { + return c.iamClient.GetServiceAccount(ctx, request) +} + //nolint:lll func (c *gcpClient) GetWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPool, error) { return c.oldIamClient.Projects.Locations.WorkloadIdentityPools.Get(resource).Context(ctx).Do() diff --git a/pkg/utils/helper.go b/pkg/utils/helper.go index 77a42ba1..070f975e 100644 --- a/pkg/utils/helper.go +++ b/pkg/utils/helper.go @@ -6,6 +6,7 @@ import ( "net/url" "os" "regexp" + "time" ) // the following regex defines four different patterns: @@ -90,3 +91,15 @@ func HasDuplicates(valSlice []string) (string, bool) { } return "", false } + +func DelayedRetry(f func() error, maxRetries int, delay time.Duration) error { + var err error + for i := 0; i < maxRetries; i++ { + err = f() + if err == nil { + return nil + } + time.Sleep(delay) + } + return fmt.Errorf("Reached max retries. Last error: %s", err.Error()) +}