diff --git a/charts/radix-cluster-cleanup/Chart.yaml b/charts/radix-cluster-cleanup/Chart.yaml index a67d33c..0f8772c 100644 --- a/charts/radix-cluster-cleanup/Chart.yaml +++ b/charts/radix-cluster-cleanup/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v1 description: A Helm chart for Kubernetes name: radix-cluster-cleanup -version: 1.0.16 -appVersion: 1.0.16 +version: 1.0.17 +appVersion: 1.0.17 diff --git a/radix-cluster-cleanup/cmd/deleteRrs.go b/radix-cluster-cleanup/cmd/deleteRrs.go index f7de05c..322d8d8 100644 --- a/radix-cluster-cleanup/cmd/deleteRrs.go +++ b/radix-cluster-cleanup/cmd/deleteRrs.go @@ -65,7 +65,7 @@ func deleteRrs(ctx context.Context) error { return err } for _, rr := range tooInactiveRrs { - err := deleteRr(kubeClient, rr) + err := deleteRr(ctx, kubeClient, rr) if err != nil { return err } @@ -73,8 +73,8 @@ func deleteRrs(ctx context.Context) error { return nil } -func deleteRr(client *kube.Kube, rr v1.RadixRegistration) error { - err := client.RadixClient().RadixV1().RadixRegistrations().Delete(context.TODO(), rr.Name, metav1.DeleteOptions{}) +func deleteRr(ctx context.Context, client *kube.Kube, rr v1.RadixRegistration) error { + err := client.RadixClient().RadixV1().RadixRegistrations().Delete(ctx, rr.Name, metav1.DeleteOptions{}) if err != nil { return err } diff --git a/radix-cluster-cleanup/cmd/root.go b/radix-cluster-cleanup/cmd/root.go index b3d04bf..923e1df 100644 --- a/radix-cluster-cleanup/cmd/root.go +++ b/radix-cluster-cleanup/cmd/root.go @@ -92,6 +92,7 @@ func initZeroLogger(logLevel string, prettyPrint bool) error { if prettyPrint { log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.TimeOnly}) } + zerolog.DefaultContextLogger = &log.Logger return nil } @@ -139,7 +140,7 @@ func getKubernetesClient() (kubernetes.Interface, radixclient.Interface) { log.Fatal().Err(err).Msg("getClusterConfig radix client") } - log.Printf("Successfully constructed k8s client to API server %v", config.Host) + log.Info().Msgf("Successfully constructed k8s client to API server %v", config.Host) return client, radixClient } @@ -153,6 +154,7 @@ func getKubeUtil() (*kube.Kube, error) { } func runFunctionPeriodically(ctx context.Context, someFunc func(ctx context.Context) error) error { + logger := log.Ctx(ctx) cleanupDays, cleanupDaysErr := rootCmd.Flags().GetStringSlice(settings.CleanUpDaysOption) cleanupStart, cleanupStartErr := rootCmd.Flags().GetString(settings.CleanUpStartOption) cleanupEnd, cleanupEndErr := rootCmd.Flags().GetString(settings.CleanUpEndOption) @@ -164,60 +166,64 @@ func runFunctionPeriodically(ctx context.Context, someFunc func(ctx context.Cont timezone := "Local" window, err := timewindow.New(cleanupDays, cleanupStart, cleanupEnd, timezone) if err != nil { - log.Fatal().Err(err).Msg("Failed to build time window") + logger.Fatal().Err(err).Msg("Failed to build time window") } source := rand.NewSource(time.Now().UnixNano()) tick := delaytick.New(source, period) for range tick { pointInTime := time.Now() if window.Contains(pointInTime) { - log.Info().Msgf("Start listing RRs for stop %s", pointInTime) + logger.Info().Msgf("Start listing RRs for stop %s", pointInTime) err := someFunc(ctx) if err != nil { return err } } else { - log.Info().Msgf("%s is outside of window. Continue sleeping", pointInTime) + logger.Info().Msgf("%s is outside of window. Continue sleeping", pointInTime) } } - log.Warn().Msgf("execution reached code which was presumably after an inescapable loop") + logger.Warn().Msgf("execution reached code which was presumably after an inescapable loop") return nil } func getTooInactiveRrs(ctx context.Context, kubeClient *kube.Kube, inactivityLimit time.Duration, action string) ([]v1.RadixRegistration, error) { + logger := log.Ctx(ctx) rrs, err := kubeClient.ListRegistrations(ctx) if err != nil { return nil, err } var rrsForDeletion []v1.RadixRegistration for _, rr := range rrs { + logger := logger.With().Str("appName", rr.Name).Logger() + ctx = logger.WithContext(ctx) + if isWhitelisted(rr) { - log.Debug().Str("appName", rr.Name).Msg("RadixRegistration is whitelisted, skipping") + logger.Debug().Msg("RadixRegistration is whitelisted, skipping") continue } - ra, err := getRadixApplication(kubeClient, rr.Name) + ra, err := getRadixApplication(ctx, kubeClient, rr.Name) if kubeerrors.IsNotFound(err) { - log.Debug().Str("appName", rr.Name).Msg("could not find RadixApplication, continuing...") + logger.Debug().Msg("could not find RadixApplication, continuing...") continue } if err != nil { return nil, err } namespaces := getRuntimeNamespaces(ra) - log.Debug().Str("appName", rr.Name).Msgf("found namespaces %s associated with RadixRegistration", strings.Join(namespaces, ", ")) - rdsForRr, err := getRadixDeploymentsInNamespaces(kubeClient, namespaces) - log.Debug().Str("appName", rr.Name).Msgf("RadixRegistration has %d RadixDeployments", len(rdsForRr)) + logger.Debug().Msgf("found namespaces %s associated with RadixRegistration", strings.Join(namespaces, ", ")) + rdsForRr, err := getRadixDeploymentsInNamespaces(ctx, kubeClient, namespaces) if err != nil { return nil, err } - rjsForRr, err := getRadixJobsInNamespace(kubeClient, utils.GetAppNamespace(rr.Name)) - log.Debug().Str("appName", rr.Name).Msgf("RadixRegistration has %d RadixJobs", len(rdsForRr)) + logger.Debug().Msgf("RadixRegistration has %d RadixDeployments", len(rdsForRr)) + rjsForRr, err := getRadixJobsInNamespace(ctx, kubeClient, utils.GetAppNamespace(rr.Name)) + logger.Debug().Msgf("RadixRegistration has %d RadixJobs", len(rdsForRr)) if err != nil { return nil, err } - log.Debug().Str("appName", rr.Name).Msg("Checking timestamps of RadixDeployments and RadixJobs") - isInactive, err := rrIsInactive(rr.CreationTimestamp, rdsForRr, rjsForRr, inactivityLimit, action) + logger.Debug().Msg("Checking timestamps of RadixDeployments and RadixJobs") + isInactive, err := rrIsInactive(ctx, rr.CreationTimestamp, rdsForRr, rjsForRr, inactivityLimit, action) if err != nil { return nil, err } @@ -228,18 +234,18 @@ func getTooInactiveRrs(ctx context.Context, kubeClient *kube.Kube, inactivityLim return rrsForDeletion, nil } -func getRadixJobsInNamespace(kubeClient *kube.Kube, namespace string) ([]v1.RadixJob, error) { - rjs, err := kubeClient.RadixClient().RadixV1().RadixJobs(namespace).List(context.TODO(), metav1.ListOptions{}) +func getRadixJobsInNamespace(ctx context.Context, kubeClient *kube.Kube, namespace string) ([]v1.RadixJob, error) { + rjs, err := kubeClient.RadixClient().RadixV1().RadixJobs(namespace).List(ctx, metav1.ListOptions{}) if err != nil { return nil, err } return rjs.Items, nil } -func getRadixDeploymentsInNamespaces(kubeClient *kube.Kube, namespaces []string) ([]v1.RadixDeployment, error) { +func getRadixDeploymentsInNamespaces(ctx context.Context, kubeClient *kube.Kube, namespaces []string) ([]v1.RadixDeployment, error) { rdsForRr := make([]v1.RadixDeployment, 0) for _, ns := range namespaces { - rds, err := kubeClient.RadixClient().RadixV1().RadixDeployments(ns).List(context.TODO(), metav1.ListOptions{}) + rds, err := kubeClient.RadixClient().RadixV1().RadixDeployments(ns).List(ctx, metav1.ListOptions{}) if err != nil { return nil, err } @@ -256,8 +262,8 @@ func getRuntimeNamespaces(ra *v1.RadixApplication) []string { return namespaces } -func getRadixApplication(kubeClient *kube.Kube, appName string) (*v1.RadixApplication, error) { - return kubeClient.RadixClient().RadixV1().RadixApplications(utils.GetAppNamespace(appName)).Get(context.TODO(), appName, metav1.GetOptions{}) +func getRadixApplication(ctx context.Context, kubeClient *kube.Kube, appName string) (*v1.RadixApplication, error) { + return kubeClient.RadixClient().RadixV1().RadixApplications(utils.GetAppNamespace(appName)).Get(ctx, appName, metav1.GetOptions{}) } func isWhitelisted(rr *v1.RadixRegistration) bool { @@ -270,20 +276,27 @@ func isWhitelisted(rr *v1.RadixRegistration) bool { return false } -func rrIsInactive(rrCreationTimestamp metav1.Time, rds []v1.RadixDeployment, rjs []v1.RadixJob, inactivityLimit time.Duration, action string) (bool, error) { - if len(rds) == 0 && rrCreationTimestamp.Add(inactivityLimit).Before(time.Now()) { - log.Debug().Msgf("no RadixDeployments found, assuming RadixRegistration is inactive") +func rrIsInactive(ctx context.Context, rrCreationTimestamp metav1.Time, rds []v1.RadixDeployment, rjs []v1.RadixJob, inactivityLimit time.Duration, action string) (bool, error) { + logger := log.Ctx(ctx) + if rrCreationTimestamp.Add(inactivityLimit).After(time.Now()) { + logger.Debug().Msgf("RadixRegistration is newer than inactivity limit, assuming active") + return false, nil + } + + if len(rds) == 0 { + logger.Debug().Msgf("no RadixDeployments found, assuming RadixRegistration is inactive") return true, nil } + latestRadixDeployment := SortDeploymentsByActiveFromTimestampAsc(rds)[len(rds)-1] latestRadixDeploymentTimestamp := latestRadixDeployment.Status.ActiveFrom - log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("most recent radixDeployment is %s, active from %s, %d hours ago", latestRadixDeployment.Name, latestRadixDeploymentTimestamp.Format(time.RFC822), int(time.Since(latestRadixDeploymentTimestamp.Time).Hours())) + logger.Debug().Msgf("most recent radixDeployment is %s, active from %s, %d hours ago", latestRadixDeployment.Name, latestRadixDeploymentTimestamp.Format(time.RFC822), int(time.Since(latestRadixDeploymentTimestamp.Time).Hours())) latestRadixJobTimestamp := metav1.Time{Time: time.Unix(0, 0)} latestRadixJob := getLatestRadixJob(rjs) if latestRadixJob != nil { latestRadixJobTimestamp = *latestRadixJob.Status.Created - log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("most recent radixJob was %s, created %s, %d hours ago", latestRadixJob.Name, latestRadixJobTimestamp.Format(time.RFC822), int(time.Since(latestRadixJobTimestamp.Time).Hours())) + logger.Debug().Msgf("most recent radixJob was %s, created %s, %d hours ago", latestRadixJob.Name, latestRadixJobTimestamp.Format(time.RFC822), int(time.Since(latestRadixJobTimestamp.Time).Hours())) } latestUserMutationTimestamp, err := getLastUserMutationTimestamp(latestRadixDeployment) @@ -291,12 +304,12 @@ func rrIsInactive(rrCreationTimestamp metav1.Time, rds []v1.RadixDeployment, rjs return false, err } - log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("most recent manual user activity was %s, %d hours ago", latestUserMutationTimestamp.Format(time.RFC822), int(time.Since(latestUserMutationTimestamp.Time).Hours())) - log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("most recent creation of RR was %s, %d hours ago", rrCreationTimestamp, int(time.Since(rrCreationTimestamp.Time).Hours())) + logger.Debug().Msgf("most recent manual user activity was %s, %d hours ago", latestUserMutationTimestamp.Format(time.RFC822), int(time.Since(latestUserMutationTimestamp.Time).Hours())) + logger.Debug().Msgf("most recent creation of RR was %s, %d hours ago", rrCreationTimestamp, int(time.Since(rrCreationTimestamp.Time).Hours())) lastActivity := getMostRecentTimestamp(&latestRadixJobTimestamp, latestUserMutationTimestamp, &latestRadixDeploymentTimestamp, &rrCreationTimestamp) - log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("lastActivity was %s, %d hours ago", lastActivity, int(time.Since(lastActivity.Time).Hours())) + logger.Debug().Msgf("lastActivity was %s, %d hours ago", lastActivity, int(time.Since(lastActivity.Time).Hours())) if tooLongInactivity(lastActivity, inactivityLimit) { - log.Info().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("last activity was %d hours ago, which is more than %d hours ago, marking for %s", int(time.Since(lastActivity.Time).Hours()), int(inactivityLimit.Hours()), action) + logger.Debug().Msgf("last activity was %d hours ago, which is more than %d hours ago, marking for %s", int(time.Since(lastActivity.Time).Hours()), int(inactivityLimit.Hours()), action) return true, nil } return false, nil @@ -352,10 +365,12 @@ func isRJ1CreatedAfterRJ2(rj1 *v1.RadixJob, rj2 *v1.RadixJob) bool { } func SortDeploymentsByActiveFromTimestampAsc(rds []v1.RadixDeployment) []v1.RadixDeployment { - sort.Slice(rds, func(i, j int) bool { - return isRD1ActiveAfterRD2(&rds[j], &rds[i]) + target := make([]v1.RadixDeployment, len(rds)) + copy(target, rds) + sort.Slice(target, func(i, j int) bool { + return isRD1ActiveAfterRD2(&target[j], &target[i]) }) - return rds + return target } func isRD1ActiveAfterRD2(rd1 *v1.RadixDeployment, rd2 *v1.RadixDeployment) bool { diff --git a/radix-cluster-cleanup/cmd/stopRrs.go b/radix-cluster-cleanup/cmd/stopRrs.go index 2f0c460..f158b81 100644 --- a/radix-cluster-cleanup/cmd/stopRrs.go +++ b/radix-cluster-cleanup/cmd/stopRrs.go @@ -21,6 +21,7 @@ import ( "github.com/equinor/radix-cluster-cleanup/pkg/settings" "github.com/equinor/radix-common/utils/pointers" + "github.com/equinor/radix-common/utils/slice" "github.com/equinor/radix-operator/pkg/apis/kube" v1 "github.com/equinor/radix-operator/pkg/apis/radix/v1" "github.com/rs/zerolog/log" @@ -62,7 +63,8 @@ func stopRrs(ctx context.Context) error { return err } for _, rr := range tooInactiveRrs { - err := stopRr(kubeClient, rr) + ctx = log.Ctx(ctx).With().Str("appName", rr.Name).Logger().WithContext(ctx) + err := stopRr(ctx, kubeClient, rr) if err != nil { return err } @@ -70,36 +72,36 @@ func stopRrs(ctx context.Context) error { return nil } -func stopRr(kubeClient *kube.Kube, rr v1.RadixRegistration) error { - ra, err := getRadixApplication(kubeClient, rr.Name) +func stopRr(ctx context.Context, kubeClient *kube.Kube, rr v1.RadixRegistration) error { + ra, err := getRadixApplication(ctx, kubeClient, rr.Name) if err != nil { return err } namespaces := getRuntimeNamespaces(ra) - rdsForRr, err := getRadixDeploymentsInNamespaces(kubeClient, namespaces) - for _, rd := range rdsForRr { - isActive := rdIsActive(rd) - if err != nil { + rdsForRr, err := getRadixDeploymentsInNamespaces(ctx, kubeClient, namespaces) + if err != nil { + return err + } + + for _, rd := range slice.FindAll(rdsForRr, rdIsActive) { + ctx = log.Ctx(ctx).With().Str("deployment", rd.Name).Logger().WithContext(ctx) + if err := scaleRdComponentsToZeroReplicas(ctx, kubeClient, rd); err != nil { return err } - if isActive { - err := scaleRdComponentsToZeroReplicas(kubeClient, rd) - if err != nil { - return err - } - } } + return nil } -func scaleRdComponentsToZeroReplicas(kubeClient *kube.Kube, rd v1.RadixDeployment) error { +func scaleRdComponentsToZeroReplicas(ctx context.Context, kubeClient *kube.Kube, rd v1.RadixDeployment) error { + logger := log.Ctx(ctx) componentNames := make([]string, 0) for i := range rd.Spec.Components { rd.Spec.Components[i].ReplicasOverride = pointers.Ptr(0) componentNames = append(componentNames, rd.Spec.Components[i].Name) } - _, err := kubeClient.RadixClient().RadixV1().RadixDeployments(rd.Namespace).Update(context.TODO(), &rd, metav1.UpdateOptions{}) - log.Info().Str("appName", rd.Spec.AppName).Str("deployment", rd.Name).Msgf("scaled component %s in rd %s to 0 replicas", strings.Join(componentNames, ", "), rd.Name) + _, err := kubeClient.RadixClient().RadixV1().RadixDeployments(rd.Namespace).Update(ctx, &rd, metav1.UpdateOptions{}) + logger.Info().Msgf("scaled components %s to 0 replicas", strings.Join(componentNames, ", ")) if err != nil { return err } diff --git a/radix-cluster-cleanup/main.go b/radix-cluster-cleanup/main.go index 4d10c0b..81eec5c 100644 --- a/radix-cluster-cleanup/main.go +++ b/radix-cluster-cleanup/main.go @@ -15,5 +15,4 @@ func main() { ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGTERM) defer cancel() cmd.Execute(ctx) - <-ctx.Done() }