Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix panic cause by index out of range #67

Merged
merged 1 commit into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions charts/radix-cluster-cleanup/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
apiVersion: v1
description: A Helm chart for Kubernetes
name: radix-cluster-cleanup
version: 1.0.16
appVersion: 1.0.16
version: 1.0.17
appVersion: 1.0.17
6 changes: 3 additions & 3 deletions radix-cluster-cleanup/cmd/deleteRrs.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,16 @@ func deleteRrs(ctx context.Context) error {
return err
}
for _, rr := range tooInactiveRrs {
err := deleteRr(kubeClient, rr)
err := deleteRr(ctx, kubeClient, rr)
if err != nil {
return err
}
}
return nil
}

func deleteRr(client *kube.Kube, rr v1.RadixRegistration) error {
err := client.RadixClient().RadixV1().RadixRegistrations().Delete(context.TODO(), rr.Name, metav1.DeleteOptions{})
func deleteRr(ctx context.Context, client *kube.Kube, rr v1.RadixRegistration) error {
err := client.RadixClient().RadixV1().RadixRegistrations().Delete(ctx, rr.Name, metav1.DeleteOptions{})
if err != nil {
return err
}
Expand Down
81 changes: 48 additions & 33 deletions radix-cluster-cleanup/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ func initZeroLogger(logLevel string, prettyPrint bool) error {
if prettyPrint {
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.TimeOnly})
}
zerolog.DefaultContextLogger = &log.Logger
return nil
}

Expand Down Expand Up @@ -139,7 +140,7 @@ func getKubernetesClient() (kubernetes.Interface, radixclient.Interface) {
log.Fatal().Err(err).Msg("getClusterConfig radix client")
}

log.Printf("Successfully constructed k8s client to API server %v", config.Host)
log.Info().Msgf("Successfully constructed k8s client to API server %v", config.Host)
return client, radixClient
}

Expand All @@ -153,6 +154,7 @@ func getKubeUtil() (*kube.Kube, error) {
}

func runFunctionPeriodically(ctx context.Context, someFunc func(ctx context.Context) error) error {
logger := log.Ctx(ctx)
cleanupDays, cleanupDaysErr := rootCmd.Flags().GetStringSlice(settings.CleanUpDaysOption)
cleanupStart, cleanupStartErr := rootCmd.Flags().GetString(settings.CleanUpStartOption)
cleanupEnd, cleanupEndErr := rootCmd.Flags().GetString(settings.CleanUpEndOption)
Expand All @@ -164,60 +166,64 @@ func runFunctionPeriodically(ctx context.Context, someFunc func(ctx context.Cont
timezone := "Local"
window, err := timewindow.New(cleanupDays, cleanupStart, cleanupEnd, timezone)
if err != nil {
log.Fatal().Err(err).Msg("Failed to build time window")
logger.Fatal().Err(err).Msg("Failed to build time window")
}
source := rand.NewSource(time.Now().UnixNano())
tick := delaytick.New(source, period)
for range tick {
pointInTime := time.Now()
if window.Contains(pointInTime) {
log.Info().Msgf("Start listing RRs for stop %s", pointInTime)
logger.Info().Msgf("Start listing RRs for stop %s", pointInTime)
err := someFunc(ctx)
if err != nil {
return err
}
} else {
log.Info().Msgf("%s is outside of window. Continue sleeping", pointInTime)
logger.Info().Msgf("%s is outside of window. Continue sleeping", pointInTime)
}
}
log.Warn().Msgf("execution reached code which was presumably after an inescapable loop")
logger.Warn().Msgf("execution reached code which was presumably after an inescapable loop")
return nil
}

func getTooInactiveRrs(ctx context.Context, kubeClient *kube.Kube, inactivityLimit time.Duration, action string) ([]v1.RadixRegistration, error) {
logger := log.Ctx(ctx)
rrs, err := kubeClient.ListRegistrations(ctx)
if err != nil {
return nil, err
}
var rrsForDeletion []v1.RadixRegistration
for _, rr := range rrs {
logger := logger.With().Str("appName", rr.Name).Logger()
ctx = logger.WithContext(ctx)

if isWhitelisted(rr) {
log.Debug().Str("appName", rr.Name).Msg("RadixRegistration is whitelisted, skipping")
logger.Debug().Msg("RadixRegistration is whitelisted, skipping")
continue
}
ra, err := getRadixApplication(kubeClient, rr.Name)
ra, err := getRadixApplication(ctx, kubeClient, rr.Name)
if kubeerrors.IsNotFound(err) {
log.Debug().Str("appName", rr.Name).Msg("could not find RadixApplication, continuing...")
logger.Debug().Msg("could not find RadixApplication, continuing...")
continue
}
if err != nil {
return nil, err
}
namespaces := getRuntimeNamespaces(ra)
log.Debug().Str("appName", rr.Name).Msgf("found namespaces %s associated with RadixRegistration", strings.Join(namespaces, ", "))
rdsForRr, err := getRadixDeploymentsInNamespaces(kubeClient, namespaces)
log.Debug().Str("appName", rr.Name).Msgf("RadixRegistration has %d RadixDeployments", len(rdsForRr))
logger.Debug().Msgf("found namespaces %s associated with RadixRegistration", strings.Join(namespaces, ", "))
rdsForRr, err := getRadixDeploymentsInNamespaces(ctx, kubeClient, namespaces)
if err != nil {
return nil, err
}
rjsForRr, err := getRadixJobsInNamespace(kubeClient, utils.GetAppNamespace(rr.Name))
log.Debug().Str("appName", rr.Name).Msgf("RadixRegistration has %d RadixJobs", len(rdsForRr))
logger.Debug().Msgf("RadixRegistration has %d RadixDeployments", len(rdsForRr))
rjsForRr, err := getRadixJobsInNamespace(ctx, kubeClient, utils.GetAppNamespace(rr.Name))
logger.Debug().Msgf("RadixRegistration has %d RadixJobs", len(rdsForRr))
if err != nil {
return nil, err
}

log.Debug().Str("appName", rr.Name).Msg("Checking timestamps of RadixDeployments and RadixJobs")
isInactive, err := rrIsInactive(rr.CreationTimestamp, rdsForRr, rjsForRr, inactivityLimit, action)
logger.Debug().Msg("Checking timestamps of RadixDeployments and RadixJobs")
isInactive, err := rrIsInactive(ctx, rr.CreationTimestamp, rdsForRr, rjsForRr, inactivityLimit, action)
if err != nil {
return nil, err
}
Expand All @@ -228,18 +234,18 @@ func getTooInactiveRrs(ctx context.Context, kubeClient *kube.Kube, inactivityLim
return rrsForDeletion, nil
}

func getRadixJobsInNamespace(kubeClient *kube.Kube, namespace string) ([]v1.RadixJob, error) {
rjs, err := kubeClient.RadixClient().RadixV1().RadixJobs(namespace).List(context.TODO(), metav1.ListOptions{})
func getRadixJobsInNamespace(ctx context.Context, kubeClient *kube.Kube, namespace string) ([]v1.RadixJob, error) {
rjs, err := kubeClient.RadixClient().RadixV1().RadixJobs(namespace).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
return rjs.Items, nil
}

func getRadixDeploymentsInNamespaces(kubeClient *kube.Kube, namespaces []string) ([]v1.RadixDeployment, error) {
func getRadixDeploymentsInNamespaces(ctx context.Context, kubeClient *kube.Kube, namespaces []string) ([]v1.RadixDeployment, error) {
rdsForRr := make([]v1.RadixDeployment, 0)
for _, ns := range namespaces {
rds, err := kubeClient.RadixClient().RadixV1().RadixDeployments(ns).List(context.TODO(), metav1.ListOptions{})
rds, err := kubeClient.RadixClient().RadixV1().RadixDeployments(ns).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
Expand All @@ -256,8 +262,8 @@ func getRuntimeNamespaces(ra *v1.RadixApplication) []string {
return namespaces
}

func getRadixApplication(kubeClient *kube.Kube, appName string) (*v1.RadixApplication, error) {
return kubeClient.RadixClient().RadixV1().RadixApplications(utils.GetAppNamespace(appName)).Get(context.TODO(), appName, metav1.GetOptions{})
func getRadixApplication(ctx context.Context, kubeClient *kube.Kube, appName string) (*v1.RadixApplication, error) {
return kubeClient.RadixClient().RadixV1().RadixApplications(utils.GetAppNamespace(appName)).Get(ctx, appName, metav1.GetOptions{})
}

func isWhitelisted(rr *v1.RadixRegistration) bool {
Expand All @@ -270,33 +276,40 @@ func isWhitelisted(rr *v1.RadixRegistration) bool {
return false
}

func rrIsInactive(rrCreationTimestamp metav1.Time, rds []v1.RadixDeployment, rjs []v1.RadixJob, inactivityLimit time.Duration, action string) (bool, error) {
if len(rds) == 0 && rrCreationTimestamp.Add(inactivityLimit).Before(time.Now()) {
log.Debug().Msgf("no RadixDeployments found, assuming RadixRegistration is inactive")
func rrIsInactive(ctx context.Context, rrCreationTimestamp metav1.Time, rds []v1.RadixDeployment, rjs []v1.RadixJob, inactivityLimit time.Duration, action string) (bool, error) {
logger := log.Ctx(ctx)
if rrCreationTimestamp.Add(inactivityLimit).After(time.Now()) {
logger.Debug().Msgf("RadixRegistration is newer than inactivity limit, assuming active")
return false, nil
}

if len(rds) == 0 {
logger.Debug().Msgf("no RadixDeployments found, assuming RadixRegistration is inactive")
return true, nil
}

latestRadixDeployment := SortDeploymentsByActiveFromTimestampAsc(rds)[len(rds)-1]
latestRadixDeploymentTimestamp := latestRadixDeployment.Status.ActiveFrom
log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("most recent radixDeployment is %s, active from %s, %d hours ago", latestRadixDeployment.Name, latestRadixDeploymentTimestamp.Format(time.RFC822), int(time.Since(latestRadixDeploymentTimestamp.Time).Hours()))
logger.Debug().Msgf("most recent radixDeployment is %s, active from %s, %d hours ago", latestRadixDeployment.Name, latestRadixDeploymentTimestamp.Format(time.RFC822), int(time.Since(latestRadixDeploymentTimestamp.Time).Hours()))

latestRadixJobTimestamp := metav1.Time{Time: time.Unix(0, 0)}
latestRadixJob := getLatestRadixJob(rjs)
if latestRadixJob != nil {
latestRadixJobTimestamp = *latestRadixJob.Status.Created
log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("most recent radixJob was %s, created %s, %d hours ago", latestRadixJob.Name, latestRadixJobTimestamp.Format(time.RFC822), int(time.Since(latestRadixJobTimestamp.Time).Hours()))
logger.Debug().Msgf("most recent radixJob was %s, created %s, %d hours ago", latestRadixJob.Name, latestRadixJobTimestamp.Format(time.RFC822), int(time.Since(latestRadixJobTimestamp.Time).Hours()))
}

latestUserMutationTimestamp, err := getLastUserMutationTimestamp(latestRadixDeployment)
if err != nil {
return false, err
}

log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("most recent manual user activity was %s, %d hours ago", latestUserMutationTimestamp.Format(time.RFC822), int(time.Since(latestUserMutationTimestamp.Time).Hours()))
log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("most recent creation of RR was %s, %d hours ago", rrCreationTimestamp, int(time.Since(rrCreationTimestamp.Time).Hours()))
logger.Debug().Msgf("most recent manual user activity was %s, %d hours ago", latestUserMutationTimestamp.Format(time.RFC822), int(time.Since(latestUserMutationTimestamp.Time).Hours()))
logger.Debug().Msgf("most recent creation of RR was %s, %d hours ago", rrCreationTimestamp, int(time.Since(rrCreationTimestamp.Time).Hours()))
lastActivity := getMostRecentTimestamp(&latestRadixJobTimestamp, latestUserMutationTimestamp, &latestRadixDeploymentTimestamp, &rrCreationTimestamp)
log.Debug().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("lastActivity was %s, %d hours ago", lastActivity, int(time.Since(lastActivity.Time).Hours()))
logger.Debug().Msgf("lastActivity was %s, %d hours ago", lastActivity, int(time.Since(lastActivity.Time).Hours()))
if tooLongInactivity(lastActivity, inactivityLimit) {
log.Info().Str("appName", latestRadixDeployment.Spec.AppName).Msgf("last activity was %d hours ago, which is more than %d hours ago, marking for %s", int(time.Since(lastActivity.Time).Hours()), int(inactivityLimit.Hours()), action)
logger.Debug().Msgf("last activity was %d hours ago, which is more than %d hours ago, marking for %s", int(time.Since(lastActivity.Time).Hours()), int(inactivityLimit.Hours()), action)
return true, nil
}
return false, nil
Expand Down Expand Up @@ -352,10 +365,12 @@ func isRJ1CreatedAfterRJ2(rj1 *v1.RadixJob, rj2 *v1.RadixJob) bool {
}

func SortDeploymentsByActiveFromTimestampAsc(rds []v1.RadixDeployment) []v1.RadixDeployment {
sort.Slice(rds, func(i, j int) bool {
return isRD1ActiveAfterRD2(&rds[j], &rds[i])
target := make([]v1.RadixDeployment, len(rds))
copy(target, rds)
sort.Slice(target, func(i, j int) bool {
return isRD1ActiveAfterRD2(&target[j], &target[i])
})
return rds
return target
}

func isRD1ActiveAfterRD2(rd1 *v1.RadixDeployment, rd2 *v1.RadixDeployment) bool {
Expand Down
34 changes: 18 additions & 16 deletions radix-cluster-cleanup/cmd/stopRrs.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/equinor/radix-cluster-cleanup/pkg/settings"
"github.com/equinor/radix-common/utils/pointers"
"github.com/equinor/radix-common/utils/slice"
"github.com/equinor/radix-operator/pkg/apis/kube"
v1 "github.com/equinor/radix-operator/pkg/apis/radix/v1"
"github.com/rs/zerolog/log"
Expand Down Expand Up @@ -62,44 +63,45 @@ func stopRrs(ctx context.Context) error {
return err
}
for _, rr := range tooInactiveRrs {
err := stopRr(kubeClient, rr)
ctx = log.Ctx(ctx).With().Str("appName", rr.Name).Logger().WithContext(ctx)
err := stopRr(ctx, kubeClient, rr)
if err != nil {
return err
}
}
return nil
}

func stopRr(kubeClient *kube.Kube, rr v1.RadixRegistration) error {
ra, err := getRadixApplication(kubeClient, rr.Name)
func stopRr(ctx context.Context, kubeClient *kube.Kube, rr v1.RadixRegistration) error {
ra, err := getRadixApplication(ctx, kubeClient, rr.Name)
if err != nil {
return err
}
namespaces := getRuntimeNamespaces(ra)
rdsForRr, err := getRadixDeploymentsInNamespaces(kubeClient, namespaces)
for _, rd := range rdsForRr {
isActive := rdIsActive(rd)
if err != nil {
rdsForRr, err := getRadixDeploymentsInNamespaces(ctx, kubeClient, namespaces)
if err != nil {
return err
}

for _, rd := range slice.FindAll(rdsForRr, rdIsActive) {
ctx = log.Ctx(ctx).With().Str("deployment", rd.Name).Logger().WithContext(ctx)
if err := scaleRdComponentsToZeroReplicas(ctx, kubeClient, rd); err != nil {
return err
}
if isActive {
err := scaleRdComponentsToZeroReplicas(kubeClient, rd)
if err != nil {
return err
}
}
}

return nil
}

func scaleRdComponentsToZeroReplicas(kubeClient *kube.Kube, rd v1.RadixDeployment) error {
func scaleRdComponentsToZeroReplicas(ctx context.Context, kubeClient *kube.Kube, rd v1.RadixDeployment) error {
logger := log.Ctx(ctx)
componentNames := make([]string, 0)
for i := range rd.Spec.Components {
rd.Spec.Components[i].ReplicasOverride = pointers.Ptr(0)
componentNames = append(componentNames, rd.Spec.Components[i].Name)
}
_, err := kubeClient.RadixClient().RadixV1().RadixDeployments(rd.Namespace).Update(context.TODO(), &rd, metav1.UpdateOptions{})
log.Info().Str("appName", rd.Spec.AppName).Str("deployment", rd.Name).Msgf("scaled component %s in rd %s to 0 replicas", strings.Join(componentNames, ", "), rd.Name)
_, err := kubeClient.RadixClient().RadixV1().RadixDeployments(rd.Namespace).Update(ctx, &rd, metav1.UpdateOptions{})
logger.Info().Msgf("scaled components %s to 0 replicas", strings.Join(componentNames, ", "))
if err != nil {
return err
}
Expand Down
1 change: 0 additions & 1 deletion radix-cluster-cleanup/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@ func main() {
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGTERM)
defer cancel()
cmd.Execute(ctx)
<-ctx.Done()
}
Loading