Skip to content

Commit

Permalink
Add the dataMoverPrepareTimeout and resourceTimeout to the DPA's Node…
Browse files Browse the repository at this point in the history
…-Agent

Fixes openshift#1368

Adds the following fields to the Node-Agent within the DPA:
 - dataMoverPrepareTimeout
   How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.
 - resourceTimeout
   How long to wait for resource processes which are not covered by other specific
   timeout parameters. Default is 10 minutes.

Signed-off-by: Michal Pryc <[email protected]>
  • Loading branch information
mpryc committed Mar 6, 2025
1 parent 14827fa commit 39905c5
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 12 deletions.
8 changes: 8 additions & 0 deletions api/v1alpha1/dataprotectionapplication_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,19 @@ type NodeAgentCommonFields struct {
// timeout defines the NodeAgent timeout, default value is 1h
// +optional
Timeout string `json:"timeout,omitempty"`
// How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.
// +optional
DataMoverPrepareTimeout *metav1.Duration `json:"dataMoverPrepareTimeout,omitempty"`
// How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.
// +optional
ResourceTimeout *metav1.Duration `json:"resourceTimeout,omitempty"`
// Pod specific configuration
PodConfig *PodConfig `json:"podConfig,omitempty"`
}

// NodeAgentConfig is the configuration for node server
// Holds the configuration for the Node Agent Server.
// https://github.com/openshift/velero/blob/8c8a6cccd78b78bd797e40189b0b9bee46a97f9e/pkg/cmd/cli/nodeagent/server.go#L87-L92
type NodeAgentConfig struct {
// Embedding NodeAgentCommonFields
// +optional
Expand Down
10 changes: 10 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions bundle/manifests/oadp.openshift.io_dataprotectionapplications.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ spec:
nodeAgent:
description: NodeAgent is needed to allow selection between kopia or restic
properties:
dataMoverPrepareTimeout:
description: How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.
type: string
enable:
description: |-
enable defines a boolean pointer whether we want the daemonset to
Expand Down Expand Up @@ -437,6 +440,9 @@ spec:
type: object
type: array
type: object
resourceTimeout:
description: How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.
type: string
supplementalGroups:
description: supplementalGroups defines the linux groups to be applied to the NodeAgent Pod
items:
Expand All @@ -461,6 +467,9 @@ spec:
restic is for backwards compatibility and is replaced by the nodeAgent
restic will be removed in the future
properties:
dataMoverPrepareTimeout:
description: How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.
type: string
enable:
description: |-
enable defines a boolean pointer whether we want the daemonset to
Expand Down Expand Up @@ -692,6 +701,9 @@ spec:
type: object
type: array
type: object
resourceTimeout:
description: How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.
type: string
supplementalGroups:
description: supplementalGroups defines the linux groups to be applied to the NodeAgent Pod
items:
Expand Down
12 changes: 12 additions & 0 deletions config/crd/bases/oadp.openshift.io_dataprotectionapplications.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ spec:
nodeAgent:
description: NodeAgent is needed to allow selection between kopia or restic
properties:
dataMoverPrepareTimeout:
description: How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.
type: string
enable:
description: |-
enable defines a boolean pointer whether we want the daemonset to
Expand Down Expand Up @@ -437,6 +440,9 @@ spec:
type: object
type: array
type: object
resourceTimeout:
description: How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.
type: string
supplementalGroups:
description: supplementalGroups defines the linux groups to be applied to the NodeAgent Pod
items:
Expand All @@ -461,6 +467,9 @@ spec:
restic is for backwards compatibility and is replaced by the nodeAgent
restic will be removed in the future
properties:
dataMoverPrepareTimeout:
description: How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.
type: string
enable:
description: |-
enable defines a boolean pointer whether we want the daemonset to
Expand Down Expand Up @@ -692,6 +701,9 @@ spec:
type: object
type: array
type: object
resourceTimeout:
description: How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.
type: string
supplementalGroups:
description: supplementalGroups defines the linux groups to be applied to the NodeAgent Pod
items:
Expand Down
13 changes: 13 additions & 0 deletions internal/controller/nodeagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,19 @@ func (r *DataProtectionApplicationReconciler) customizeNodeAgentDaemonset(ds *ap
nodeAgentContainer.ImagePullPolicy = imagePullPolicy
setContainerDefaults(nodeAgentContainer)

// append data mover prepare timeout and resource timeout to nodeAgent container args
if !useResticConf {
if dpa.Spec.Configuration.NodeAgent.DataMoverPrepareTimeout != nil {
nodeAgentContainer.Args = append(nodeAgentContainer.Args, fmt.Sprintf("--data-mover-prepare-timeout=%s", dpa.Spec.Configuration.NodeAgent.DataMoverPrepareTimeout.Duration))
}
if dpa.Spec.Configuration.NodeAgent.ResourceTimeout != nil {
nodeAgentContainer.Args = append(nodeAgentContainer.Args, fmt.Sprintf("--resource-timeout=%s", dpa.Spec.Configuration.NodeAgent.ResourceTimeout.Duration))
}
}

// Apply unsupported server args from the specified ConfigMap.
// This will completely override any previously set args for the node-agent server.
// If the ConfigMap exists and is not empty, its key-value pairs will be used as the new CLI arguments.
if configMapName, ok := dpa.Annotations[common.UnsupportedNodeAgentServerArgsAnnotation]; ok {
if configMapName != "" {
unsupportedServerArgsCM := corev1.ConfigMap{}
Expand Down
104 changes: 92 additions & 12 deletions internal/controller/nodeagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package controller

import (
"context"
"fmt"
"os"
"reflect"
"slices"
"testing"
"time"

"github.com/go-logr/logr"
"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -230,18 +232,20 @@ var _ = ginkgo.Describe("Test ReconcileNodeAgentDaemonSet function", func() {
})

type TestBuiltNodeAgentDaemonSetOptions struct {
args []string
labels map[string]string
annotations map[string]string
volumes []corev1.Volume
volumeMounts []corev1.VolumeMount
env []corev1.EnvVar
dnsPolicy corev1.DNSPolicy
dnsConfig *corev1.PodDNSConfig
resourceLimits corev1.ResourceList
resourceRequests corev1.ResourceList
toleration []corev1.Toleration
nodeSelector map[string]string
args []string
labels map[string]string
annotations map[string]string
volumes []corev1.Volume
volumeMounts []corev1.VolumeMount
env []corev1.EnvVar
dnsPolicy corev1.DNSPolicy
dnsConfig *corev1.PodDNSConfig
resourceLimits corev1.ResourceList
resourceRequests corev1.ResourceList
dataMoverPrepareTimeout *string
resourceTimeout *string
toleration []corev1.Toleration
nodeSelector map[string]string
}

func createTestBuiltNodeAgentDaemonSet(options TestBuiltNodeAgentDaemonSetOptions) *appsv1.DaemonSet {
Expand Down Expand Up @@ -457,6 +461,14 @@ func createTestBuiltNodeAgentDaemonSet(options TestBuiltNodeAgentDaemonSetOption
testBuiltNodeAgentDaemonSet.Spec.Template.Spec.DNSConfig = options.dnsConfig
}

if options.dataMoverPrepareTimeout != nil {
testBuiltNodeAgentDaemonSet.Spec.Template.Spec.Containers[0].Args = append(testBuiltNodeAgentDaemonSet.Spec.Template.Spec.Containers[0].Args, fmt.Sprintf("--data-mover-prepare-timeout=%s", *options.dataMoverPrepareTimeout))
}

if options.resourceTimeout != nil {
testBuiltNodeAgentDaemonSet.Spec.Template.Spec.Containers[0].Args = append(testBuiltNodeAgentDaemonSet.Spec.Template.Spec.Containers[0].Args, fmt.Sprintf("--resource-timeout=%s", *options.resourceTimeout))
}

return testBuiltNodeAgentDaemonSet
}

Expand Down Expand Up @@ -595,6 +607,74 @@ func TestDPAReconciler_buildNodeAgentDaemonset(t *testing.T) {
annotations: map[string]string{"test-annotation": "awesome annotation"},
}),
},
{
name: "valid DPA CR with DataMoverPrepareTimeout, NodeAgent DaemonSet is built with DataMoverPrepareTimeout",
dpa: createTestDpaWith(
nil,
oadpv1alpha1.DataProtectionApplicationSpec{
Configuration: &oadpv1alpha1.ApplicationConfig{
Velero: &oadpv1alpha1.VeleroConfig{},
NodeAgent: &oadpv1alpha1.NodeAgentConfig{
NodeAgentCommonFields: oadpv1alpha1.NodeAgentCommonFields{
DataMoverPrepareTimeout: &metav1.Duration{Duration: 10 * time.Second},
},
UploaderType: "kopia",
},
},
},
),
clientObjects: []client.Object{testGenericInfrastructure},
nodeAgentDaemonSet: testNodeAgentDaemonSet.DeepCopy(),
wantNodeAgentDaemonSet: createTestBuiltNodeAgentDaemonSet(TestBuiltNodeAgentDaemonSetOptions{
dataMoverPrepareTimeout: ptr.To("10s"),
}),
},
{
name: "valid DPA CR with ResourceTimeout, NodeAgent DaemonSet is built with ResourceTimeout",
dpa: createTestDpaWith(
nil,
oadpv1alpha1.DataProtectionApplicationSpec{
Configuration: &oadpv1alpha1.ApplicationConfig{
Velero: &oadpv1alpha1.VeleroConfig{},
NodeAgent: &oadpv1alpha1.NodeAgentConfig{
NodeAgentCommonFields: oadpv1alpha1.NodeAgentCommonFields{
ResourceTimeout: &metav1.Duration{Duration: 100 * time.Minute},
},
UploaderType: "kopia",
},
},
},
),
clientObjects: []client.Object{testGenericInfrastructure},
nodeAgentDaemonSet: testNodeAgentDaemonSet.DeepCopy(),
wantNodeAgentDaemonSet: createTestBuiltNodeAgentDaemonSet(TestBuiltNodeAgentDaemonSetOptions{
resourceTimeout: ptr.To("1h40m0s"),
}),
},
{
name: "valid DPA CR with DataMoverPrepareTimeout and ResourceTimeout, NodeAgent DaemonSet is built with DataMoverPrepareTimeout and ResourceTimeout",
dpa: createTestDpaWith(
nil,
oadpv1alpha1.DataProtectionApplicationSpec{
Configuration: &oadpv1alpha1.ApplicationConfig{
Velero: &oadpv1alpha1.VeleroConfig{},
NodeAgent: &oadpv1alpha1.NodeAgentConfig{
NodeAgentCommonFields: oadpv1alpha1.NodeAgentCommonFields{
DataMoverPrepareTimeout: &metav1.Duration{Duration: 10 * time.Second},
ResourceTimeout: &metav1.Duration{Duration: 10 * time.Minute},
},
UploaderType: "kopia",
},
},
},
),
clientObjects: []client.Object{testGenericInfrastructure},
nodeAgentDaemonSet: testNodeAgentDaemonSet.DeepCopy(),
wantNodeAgentDaemonSet: createTestBuiltNodeAgentDaemonSet(TestBuiltNodeAgentDaemonSetOptions{
dataMoverPrepareTimeout: ptr.To("10s"),
resourceTimeout: ptr.To("10m0s"),
}),
},
{
name: "valid DPA CR with Unsupported NodeAgent Server Args, NodeAgent DaemonSet is built with Unsupported NodeAgent Server Args",
dpa: createTestDpaWith(
Expand Down

0 comments on commit 39905c5

Please sign in to comment.