Skip to content

Commit

Permalink
Requeue for Power state polling and ensuringServerClaim binding
Browse files Browse the repository at this point in the history
- Requeue `Available` and `Reserved` servers to reconcile and poll power states
- To avoid setting PXE boot for all reseved `Servers` we only set it if it's powered off - when server transitions from `Available` (and is powered off) to `Reserved`
- reconcile unbound `ServerClaim` when `Server` (e.g. label) is changed - to re-check available servers if no server was found for claim initially
  • Loading branch information
defo89 committed Sep 19, 2024
1 parent d648ac8 commit 945c7a8
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
10 changes: 7 additions & 3 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func main() {
var enforceFirstBoot bool
var enforcePowerOff bool
var serverResyncInterval time.Duration
var claimResyncInterval time.Duration
var powerPollingInterval time.Duration
var powerPollingTimeout time.Duration

Expand All @@ -69,8 +70,10 @@ func main() {
flag.DurationVar(&powerPollingTimeout, "power-polling-timeout", 2*time.Minute, "Timeout for polling power state")
flag.DurationVar(&registryResyncInterval, "registry-resync-interval", 10*time.Second,
"Defines the interval at which the registry is polled for new server information.")
flag.DurationVar(&serverResyncInterval, "server-resync-interval", 30*time.Second,
flag.DurationVar(&serverResyncInterval, "server-resync-interval", 2*time.Minute,
"Defines the interval at which the server is polled.")
flag.DurationVar(&claimResyncInterval, "claim-resync-interval", 2*time.Minute,
"Defines the interval at which the server claim is polled.")
flag.StringVar(&registryURL, "registry-url", "", "The URL of the registry.")
flag.StringVar(&registryProtocol, "registry-protocol", "http", "The protocol to use for the registry.")
flag.IntVar(&registryPort, "registry-port", 10000, "The port to use for the registry.")
Expand Down Expand Up @@ -230,8 +233,9 @@ func main() {
os.Exit(1)
}
if err = (&controller.ServerClaimReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
ResyncInterval: claimResyncInterval,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "ServerClaim")
os.Exit(1)
Expand Down
18 changes: 10 additions & 8 deletions internal/controller/server_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve

requeue, err := r.ensureServerStateTransition(ctx, log, server)
if requeue && err == nil {
return ctrl.Result{Requeue: requeue, RequeueAfter: r.RegistryResyncInterval}, nil
return ctrl.Result{Requeue: requeue, RequeueAfter: r.ResyncInterval}, nil
}
if err != nil && !apierrors.IsNotFound(err) {
return ctrl.Result{}, fmt.Errorf("failed to ensure server state transition: %w", err)
Expand Down Expand Up @@ -318,7 +318,7 @@ func (r *ServerReconciler) handleAvailableState(ctx context.Context, log logr.Lo
return false, fmt.Errorf("failed to ensure server indicator led: %w", err)
}
log.V(1).Info("Reconciled available state")
return false, nil
return true, nil
}

func (r *ServerReconciler) handleReservedState(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) {
Expand All @@ -328,11 +328,13 @@ func (r *ServerReconciler) handleReservedState(ctx context.Context, log logr.Log
}
log.V(1).Info("Server boot configuration is ready")

if err := r.pxeBootServer(ctx, log, server); err != nil {
return false, fmt.Errorf("failed to boot server: %w", err)
//TODO: handle working Reserved Server that was suddenly powered off but needs to boot from disk
if server.Status.PowerState == metalv1alpha1.ServerOffPowerState {
if err := r.pxeBootServer(ctx, log, server); err != nil {
return false, fmt.Errorf("failed to boot server: %w", err)
}
log.V(1).Info("Server is powered off, booting Server in PXE")
}
log.V(1).Info("Booted Server in PXE")

if err := r.ensureServerPowerState(ctx, log, server); err != nil {
return false, fmt.Errorf("failed to ensure server power state: %w", err)
}
Expand All @@ -341,7 +343,7 @@ func (r *ServerReconciler) handleReservedState(ctx context.Context, log logr.Log
return false, fmt.Errorf("failed to ensure server indicator led: %w", err)
}
log.V(1).Info("Reconciled reserved state")
return false, nil
return true, nil
}

func (r *ServerReconciler) ensureServerBootConfigRef(ctx context.Context, server *metalv1alpha1.Server, config *metalv1alpha1.ServerBootConfiguration) error {
Expand Down Expand Up @@ -890,7 +892,7 @@ func (r *ServerReconciler) SetupWithManager(mgr ctrl.Manager) error {

// Start a goroutine to send events to the channel at the specified interval
go func() {
ticker := time.NewTicker(r.RegistryResyncInterval)
ticker := time.NewTicker(r.ResyncInterval)
defer ticker.Stop()

for {
Expand Down
10 changes: 9 additions & 1 deletion internal/controller/serverclaim_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package controller
import (
"context"
"fmt"
"time"

"k8s.io/apimachinery/pkg/labels"
"sigs.k8s.io/controller-runtime/pkg/controller"
Expand All @@ -32,7 +33,8 @@ const (
// ServerClaimReconciler reconciles a ServerClaim object
type ServerClaimReconciler struct {
client.Client
Scheme *runtime.Scheme
Scheme *runtime.Scheme
ResyncInterval time.Duration
}

// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=serverclaims,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -439,6 +441,12 @@ func (r *ServerClaimReconciler) enqueueServerClaimByRefs() handler.EventHandler
})
return req
}
if claim.Spec.ServerRef == nil {
req = append(req, reconcile.Request{
NamespacedName: types.NamespacedName{Namespace: claim.Namespace, Name: claim.Name},
})
return req
}
}
return req
})
Expand Down

0 comments on commit 945c7a8

Please sign in to comment.