From dc5ac0ffbb5a7a457361953f7be7daaa2d05b805 Mon Sep 17 00:00:00 2001 From: Patryk Strusiewicz-Surmacki Date: Mon, 25 Mar 2024 18:26:48 +0100 Subject: [PATCH] Added support for gradual rollout Signed-off-by: Patryk Strusiewicz-Surmacki --- Makefile | 10 + api/v1alpha1/nodeconfig_types.go | 92 ++++ api/v1alpha1/nodeconfigprocess_types.go | 51 ++ api/v1alpha1/zz_generated.deepcopy.go | 181 +++++++ cmd/configurator/main.go | 226 +++++++++ cmd/manager/main.go | 31 +- config/certmanager/kustomization.yaml | 3 + config/configurator/configurator.yaml | 66 +++ config/configurator/kustomization.yaml | 12 + ...schiff.telekom.de_nodeconfigprocesses.yaml | 57 +++ ...network.schiff.telekom.de_nodeconfigs.yaml | 268 ++++++++++ config/crd/kustomization.yaml | 2 + config/default/kustomization.yaml | 1 + config/rbac/role.yaml | 38 ++ configurator.Dockerfile | 28 ++ .../layer2networkconfiguration_controller.go | 24 +- controllers/node_controller.go | 64 +++ controllers/nodeconfig_controller.go | 85 ++++ controllers/routingtable_controller.go | 2 +- .../vrfrouteconfiguration_controller.go | 2 +- go.mod | 4 +- go.sum | 2 + pkg/config_manager/config_manager.go | 419 ++++++++++++++++ pkg/config_manager/config_manager_test.go | 438 +++++++++++++++++ pkg/config_map/config_map.go | 63 +++ pkg/config_map/config_map_test.go | 80 +++ pkg/config_map/mock/mock_config_map.go | 116 +++++ pkg/healthcheck/healthcheck.go | 54 ++- pkg/healthcheck/healthcheck_test.go | 50 +- pkg/managerconfig/managerconfig_test.go | 8 +- pkg/nodeconfig/mock/mock_nodeconfig.go | 255 ++++++++++ pkg/nodeconfig/nodeconfig.go | 458 ++++++++++++++++++ pkg/nodeconfig/nodeconfig_test.go | 268 ++++++++++ pkg/reconciler/config_reconciler.go | 204 ++++++++ pkg/reconciler/layer2.go | 67 +-- pkg/reconciler/layer3.go | 64 ++- pkg/reconciler/mock/mock_config_reconciler.go | 51 ++ pkg/reconciler/mock/mock_node_reconciler.go | 49 ++ pkg/reconciler/node_reconciler.go | 151 ++++++ pkg/reconciler/reconciler.go | 172 ++++++- pkg/reconciler/reconciler_test.go | 141 ++++++ 41 files changed, 4151 insertions(+), 206 deletions(-) create mode 100644 api/v1alpha1/nodeconfig_types.go create mode 100644 api/v1alpha1/nodeconfigprocess_types.go create mode 100644 cmd/configurator/main.go create mode 100644 config/configurator/configurator.yaml create mode 100644 config/configurator/kustomization.yaml create mode 100644 config/crd/bases/network.schiff.telekom.de_nodeconfigprocesses.yaml create mode 100644 config/crd/bases/network.schiff.telekom.de_nodeconfigs.yaml create mode 100644 configurator.Dockerfile create mode 100644 controllers/node_controller.go create mode 100644 controllers/nodeconfig_controller.go create mode 100644 pkg/config_manager/config_manager.go create mode 100644 pkg/config_manager/config_manager_test.go create mode 100644 pkg/config_map/config_map.go create mode 100644 pkg/config_map/config_map_test.go create mode 100644 pkg/config_map/mock/mock_config_map.go create mode 100644 pkg/nodeconfig/mock/mock_nodeconfig.go create mode 100644 pkg/nodeconfig/nodeconfig.go create mode 100644 pkg/nodeconfig/nodeconfig_test.go create mode 100644 pkg/reconciler/config_reconciler.go create mode 100644 pkg/reconciler/mock/mock_config_reconciler.go create mode 100644 pkg/reconciler/mock/mock_node_reconciler.go create mode 100644 pkg/reconciler/node_reconciler.go create mode 100644 pkg/reconciler/reconciler_test.go diff --git a/Makefile b/Makefile index 623026d8..8419c8c6 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,8 @@ IMG ?= ghcr.io/telekom/das-schiff-network-operator:latest # Sidecar image URL to use all building/pushing image targets SIDECAR_IMG ?= ghcr.io/telekom/frr-exporter:latest +# Sidecar image URL to use all building/pushing image targets +CONFIGURATOR_IMG ?= ghcr.io/telekom/configurator:latest # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.25 @@ -87,6 +89,10 @@ docker-build: test ## Build docker image with the manager. docker-build-sidecar: test ## Build docker image with the manager. docker build -t ${SIDECAR_IMG} -f frr-exporter.Dockerfile . +.PHONY: docker-build-configurator +docker-build-configurator: test ## Build docker image with the manager. + docker build -t ${CONFIGURATOR_IMG} -f configurator.Dockerfile . + .PHONY: docker-push docker-push: ## Push docker image with the manager. docker push ${IMG} @@ -95,6 +101,9 @@ docker-push: ## Push docker image with the manager. docker-push-sidecar: ## Push docker image with the manager. docker push ${SIDECAR_IMG} +.PHONY: docker-push-configurator +docker-push-configurator: ## Push docker image with the manager. + docker push ${CONFIGURATOR_IMG} ##@ Release @@ -135,6 +144,7 @@ uninstall-certs: manifests kustomize ## Uninstall certs deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} cd config/manager && $(KUSTOMIZE) edit set image frr-exporter=${SIDECAR_IMG} + cd config/configurator && $(KUSTOMIZE) edit set image configurator=${CONFIGURATOR_IMG} $(KUSTOMIZE) build config/default | kubectl apply -f - .PHONY: undeploy diff --git a/api/v1alpha1/nodeconfig_types.go b/api/v1alpha1/nodeconfig_types.go new file mode 100644 index 00000000..f1b8a92d --- /dev/null +++ b/api/v1alpha1/nodeconfig_types.go @@ -0,0 +1,92 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "reflect" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// NodeConfigSpec defines the desired state of NodeConfig. +type NodeConfigSpec struct { + Layer2 []Layer2NetworkConfigurationSpec `json:"layer2"` + Vrf []VRFRouteConfigurationSpec `json:"vrf"` + RoutingTable []RoutingTableSpec `json:"routingTable"` +} + +// NodeConfigStatus defines the observed state of NodeConfig. +type NodeConfigStatus struct { + ConfigStatus string `json:"configStatus"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:resource:shortName=nc,scope=Cluster +//+kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.configStatus` + +// NodeConfig is the Schema for the node configuration. +type NodeConfig struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec NodeConfigSpec `json:"spec,omitempty"` + Status NodeConfigStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// NodeConfigList contains a list of NodeConfig. +type NodeConfigList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []NodeConfig `json:"items"` +} + +func (nc *NodeConfig) IsEqual(c *NodeConfig) bool { + return reflect.DeepEqual(nc.Spec.Layer2, c.Spec.Layer2) && reflect.DeepEqual(nc.Spec.Vrf, c.Spec.Vrf) && reflect.DeepEqual(nc.Spec.RoutingTable, c.Spec.RoutingTable) +} + +func NewEmptyConfig(name string) *NodeConfig { + return &NodeConfig{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: NodeConfigSpec{ + Vrf: []VRFRouteConfigurationSpec{}, + Layer2: []Layer2NetworkConfigurationSpec{}, + RoutingTable: []RoutingTableSpec{}, + }, + Status: NodeConfigStatus{ + ConfigStatus: "", + }, + } +} + +func CopyNodeConfig(src, dst *NodeConfig, name string) { + dst.Spec.Layer2 = make([]Layer2NetworkConfigurationSpec, len(src.Spec.Layer2)) + dst.Spec.Vrf = make([]VRFRouteConfigurationSpec, len(src.Spec.Vrf)) + dst.Spec.RoutingTable = make([]RoutingTableSpec, len(src.Spec.RoutingTable)) + copy(dst.Spec.Layer2, src.Spec.Layer2) + copy(dst.Spec.Vrf, src.Spec.Vrf) + copy(dst.Spec.RoutingTable, src.Spec.RoutingTable) + dst.OwnerReferences = make([]metav1.OwnerReference, len(src.OwnerReferences)) + copy(dst.OwnerReferences, src.OwnerReferences) + dst.Name = name +} + +func init() { + SchemeBuilder.Register(&NodeConfig{}, &NodeConfigList{}) +} diff --git a/api/v1alpha1/nodeconfigprocess_types.go b/api/v1alpha1/nodeconfigprocess_types.go new file mode 100644 index 00000000..88e61b9c --- /dev/null +++ b/api/v1alpha1/nodeconfigprocess_types.go @@ -0,0 +1,51 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// NodeConfigSpec defines the desired state of NodeConfig. +type NodeConfigProcessSpec struct { + State string `json:"state"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:resource:shortName=ncp,scope=Cluster +//+kubebuilder:printcolumn:name="State",type=string,JSONPath=`.spec.state` + +// NodeConfigProcess is the Schema for the node configuration process state. +type NodeConfigProcess struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec NodeConfigProcessSpec `json:"spec,omitempty"` +} + +//+kubebuilder:object:root=true + +// NodeConfigList contains a list of NodeConfigProcess. +type NodeConfigProcessList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []NodeConfigProcess `json:"items"` +} + +func init() { + SchemeBuilder.Register(&NodeConfigProcess{}, &NodeConfigProcessList{}) +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index ecaa9148..fbb00b5c 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -129,6 +129,187 @@ func (in *Layer2NetworkConfigurationStatus) DeepCopy() *Layer2NetworkConfigurati return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeConfig) DeepCopyInto(out *NodeConfig) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + out.Status = in.Status +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeConfig. +func (in *NodeConfig) DeepCopy() *NodeConfig { + if in == nil { + return nil + } + out := new(NodeConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NodeConfig) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeConfigList) DeepCopyInto(out *NodeConfigList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]NodeConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeConfigList. +func (in *NodeConfigList) DeepCopy() *NodeConfigList { + if in == nil { + return nil + } + out := new(NodeConfigList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NodeConfigList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeConfigProcess) DeepCopyInto(out *NodeConfigProcess) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeConfigProcess. +func (in *NodeConfigProcess) DeepCopy() *NodeConfigProcess { + if in == nil { + return nil + } + out := new(NodeConfigProcess) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NodeConfigProcess) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeConfigProcessList) DeepCopyInto(out *NodeConfigProcessList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]NodeConfigProcess, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeConfigProcessList. +func (in *NodeConfigProcessList) DeepCopy() *NodeConfigProcessList { + if in == nil { + return nil + } + out := new(NodeConfigProcessList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NodeConfigProcessList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeConfigProcessSpec) DeepCopyInto(out *NodeConfigProcessSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeConfigProcessSpec. +func (in *NodeConfigProcessSpec) DeepCopy() *NodeConfigProcessSpec { + if in == nil { + return nil + } + out := new(NodeConfigProcessSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeConfigSpec) DeepCopyInto(out *NodeConfigSpec) { + *out = *in + if in.Layer2 != nil { + in, out := &in.Layer2, &out.Layer2 + *out = make([]Layer2NetworkConfigurationSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Vrf != nil { + in, out := &in.Vrf, &out.Vrf + *out = make([]VRFRouteConfigurationSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.RoutingTable != nil { + in, out := &in.RoutingTable, &out.RoutingTable + *out = make([]RoutingTableSpec, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeConfigSpec. +func (in *NodeConfigSpec) DeepCopy() *NodeConfigSpec { + if in == nil { + return nil + } + out := new(NodeConfigSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeConfigStatus) DeepCopyInto(out *NodeConfigStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeConfigStatus. +func (in *NodeConfigStatus) DeepCopy() *NodeConfigStatus { + if in == nil { + return nil + } + out := new(NodeConfigStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RoutingTable) DeepCopyInto(out *RoutingTable) { *out = *in diff --git a/cmd/configurator/main.go b/cmd/configurator/main.go new file mode 100644 index 00000000..0f775f98 --- /dev/null +++ b/cmd/configurator/main.go @@ -0,0 +1,226 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//nolint:gci +package main + +import ( + "context" + "flag" + "fmt" + "os" + "time" + + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + + networkv1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + "github.com/telekom/das-schiff-network-operator/controllers" + configmanager "github.com/telekom/das-schiff-network-operator/pkg/config_manager" + "github.com/telekom/das-schiff-network-operator/pkg/managerconfig" + "github.com/telekom/das-schiff-network-operator/pkg/reconciler" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) //nolint:gci + // to ensure that exec-entrypoint and run can make use of them. + _ "k8s.io/client-go/plugin/pkg/client/auth" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/manager" + //nolint:gci // kubebuilder import + //+kubebuilder:scaffold:imports +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + + utilruntime.Must(networkv1alpha1.AddToScheme(scheme)) + //+kubebuilder:scaffold:scheme +} + +func main() { + var configFile string + var timeout string + var limit int64 + flag.StringVar(&configFile, "config", "", + "The controller will load its initial configuration from this file. "+ + "Omit this flag to use the default configuration values. "+ + "Command-line flags override configuration from this file.") + flag.StringVar(&timeout, "timeout", reconciler.DefaultTimeout, + "Timeout for Kubernetes API connections (default: 60s).") + flag.Int64Var(&limit, "update-limit", reconciler.DefaultNodeUpdateLimit, + "Defines how many nodes can be configured at once (default: 1).") + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + + options, err := setMangerOptions(configFile) + if err != nil { + setupLog.Error(err, "error configuring manager options") + os.Exit(1) + } + + clientConfig := ctrl.GetConfigOrDie() + mgr, err := ctrl.NewManager(clientConfig, *options) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + _, _, err = setupReconcilers(mgr, timeout, limit) + if err != nil { + setupLog.Error(err, "unable to setup reconcilers") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} + +func setupReconcilers(mgr manager.Manager, timeout string, limit int64) (*reconciler.ConfigReconciler, *reconciler.NodeReconciler, error) { + timoutVal, err := time.ParseDuration(timeout) + if err != nil { + return nil, nil, fmt.Errorf("error parsing timeout value %s: %w", timeout, err) + } + + cmInfo := make(chan bool) + nodeDelInfo := make(chan []string) + + cr, err := reconciler.NewConfigReconciler(mgr.GetClient(), mgr.GetLogger().WithName("ConfigReconciler"), timoutVal, cmInfo) + if err != nil { + return nil, nil, fmt.Errorf("unable to create config reconciler reconciler: %w", err) + } + + nr, err := reconciler.NewNodeReconciler(mgr.GetClient(), mgr.GetLogger().WithName("NodeReconciler"), timoutVal, cmInfo, nodeDelInfo) + if err != nil { + return nil, nil, fmt.Errorf("unable to create node reconciler: %w", err) + } + + cm := configmanager.New(mgr.GetClient(), cr, nr, mgr.GetLogger().WithName("ConfigManager"), timoutVal, limit, cmInfo, nodeDelInfo) + + if err := mgr.Add(newOnLeaderElectionEvent(cm)); err != nil { + return nil, nil, fmt.Errorf("unable to create OnLeadeElectionEvent: %w", err) + } + + if err = (&controllers.VRFRouteConfigurationReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Reconciler: cr, + }).SetupWithManager(mgr); err != nil { + return nil, nil, fmt.Errorf("unable to create VRFRouteConfiguration controller: %w", err) + } + + if err = (&controllers.Layer2NetworkConfigurationReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Reconciler: cr, + }).SetupWithManager(mgr); err != nil { + return nil, nil, fmt.Errorf("unable to create Layer2NetworkConfiguration controller: %w", err) + } + + if err = (&controllers.RoutingTableReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Reconciler: cr, + }).SetupWithManager(mgr); err != nil { + return nil, nil, fmt.Errorf("unable to create RoutingTable controller: %w", err) + } + + if err = (&controllers.NodeReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Reconciler: nr, + }).SetupWithManager(mgr); err != nil { + return nil, nil, fmt.Errorf("unable to create RoutingTable controller: %w", err) + } + + return cr, nr, nil +} + +func setMangerOptions(configFile string) (*manager.Options, error) { + var err error + var options manager.Options + if configFile != "" { + options, err = managerconfig.Load(configFile, scheme) + if err != nil { + return nil, fmt.Errorf("unable to load the config file: %w", err) + } + } else { + options = ctrl.Options{Scheme: scheme} + } + + // force leader election + options.LeaderElection = true + if options.LeaderElectionID == "" { + options.LeaderElectionID = "network-operator-configurator" + } + + // force turn off metrics server + options.MetricsBindAddress = "0" + + return &options, nil +} + +type onLeaderElectionEvent struct { + cm *configmanager.ConfigManager +} + +func newOnLeaderElectionEvent(cm *configmanager.ConfigManager) *onLeaderElectionEvent { + return &onLeaderElectionEvent{ + cm: cm, + } +} + +func (*onLeaderElectionEvent) NeedLeaderElection() bool { + return true +} + +func (e *onLeaderElectionEvent) Start(ctx context.Context) error { + setupLog.Info("onLeaderElectionEvent started") + if err := e.cm.DirtyStartup(ctx); err != nil { + return fmt.Errorf("error while checking previous leader work: %w", err) + } + + watchNodesErr := make(chan error) + watchConfigsErr := make(chan error) + leCtx, cancel := context.WithCancel(ctx) + defer cancel() + go e.cm.WatchDeletedNodes(leCtx, watchNodesErr) + go e.cm.WatchConfigs(leCtx, watchConfigsErr) + + select { + case <-leCtx.Done(): + if err := leCtx.Err(); err != nil { + return fmt.Errorf("onLeaderElection context error: %w", leCtx.Err()) + } + return nil + case err := <-watchNodesErr: + return fmt.Errorf("node watcher error: %w", err) + case err := <-watchConfigsErr: + return fmt.Errorf("config watcher error: %w", err) + } +} diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 536390ab..49f6fb14 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -92,6 +92,7 @@ func main() { var onlyBPFMode bool var configFile string var interfacePrefix string + var nodeConfigPath string flag.StringVar(&configFile, "config", "", "The controller will load its initial configuration from this file. "+ "Omit this flag to use the default configuration values. "+ @@ -100,6 +101,8 @@ func main() { "Only attach BPF to specified interfaces in config. This will not start any reconciliation. Perfect for masters.") flag.StringVar(&interfacePrefix, "macvlan-interface-prefix", "", "Interface prefix for bridge devices for MACVlan sync") + flag.StringVar(&nodeConfigPath, "nodeconfig-path", reconciler.DefaultNodeConfigPath, + "Path to store working node configuration.") opts := zap.Options{ Development: true, } @@ -146,7 +149,7 @@ func main() { os.Exit(1) } - if err := initComponents(mgr, anycastTracker, cfg, clientConfig, onlyBPFMode); err != nil { + if err := initComponents(mgr, anycastTracker, cfg, clientConfig, onlyBPFMode, nodeConfigPath); err != nil { setupLog.Error(err, "unable to initialize components") os.Exit(1) } @@ -163,10 +166,10 @@ func main() { } } -func initComponents(mgr manager.Manager, anycastTracker *anycast.Tracker, cfg *config.Config, clientConfig *rest.Config, onlyBPFMode bool) error { +func initComponents(mgr manager.Manager, anycastTracker *anycast.Tracker, cfg *config.Config, clientConfig *rest.Config, onlyBPFMode bool, nodeConfigPath string) error { // Start VRFRouteConfigurationReconciler when we are not running in only BPF mode. if !onlyBPFMode { - if err := setupReconcilers(mgr, anycastTracker); err != nil { + if err := setupReconcilers(mgr, anycastTracker, nodeConfigPath); err != nil { return fmt.Errorf("unable to setup reconcilers: %w", err) } } @@ -225,13 +228,13 @@ func initComponents(mgr manager.Manager, anycastTracker *anycast.Tracker, cfg *c return nil } -func setupReconcilers(mgr manager.Manager, anycastTracker *anycast.Tracker) error { - r, err := reconciler.NewReconciler(mgr.GetClient(), anycastTracker, mgr.GetLogger()) +func setupReconcilers(mgr manager.Manager, anycastTracker *anycast.Tracker, nodeConfigPath string) error { + r, err := reconciler.NewReconciler(mgr.GetClient(), anycastTracker, mgr.GetLogger(), nodeConfigPath) if err != nil { return fmt.Errorf("unable to create debounced reconciler: %w", err) } - if err = (&controllers.VRFRouteConfigurationReconciler{ + if err = (&controllers.NodeConfigReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Reconciler: r, @@ -239,22 +242,6 @@ func setupReconcilers(mgr manager.Manager, anycastTracker *anycast.Tracker) erro return fmt.Errorf("unable to create VRFRouteConfiguration controller: %w", err) } - if err = (&controllers.Layer2NetworkConfigurationReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Reconciler: r, - }).SetupWithManager(mgr); err != nil { - return fmt.Errorf("unable to create Layer2NetworkConfiguration controller: %w", err) - } - - if err = (&controllers.RoutingTableReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Reconciler: r, - }).SetupWithManager(mgr); err != nil { - return fmt.Errorf("unable to create RoutingTable controller: %w", err) - } - return nil } diff --git a/config/certmanager/kustomization.yaml b/config/certmanager/kustomization.yaml index bebea5a5..ff414e3c 100644 --- a/config/certmanager/kustomization.yaml +++ b/config/certmanager/kustomization.yaml @@ -1,3 +1,6 @@ +# Adds namespace to all resources. +namespace: kube-system + resources: - certificate.yaml diff --git a/config/configurator/configurator.yaml b/config/configurator/configurator.yaml new file mode 100644 index 00000000..b758d5b5 --- /dev/null +++ b/config/configurator/configurator.yaml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: configurator + namespace: system + labels: + app.kubernetes.io/component: configurator +spec: + selector: + matchLabels: + app.kubernetes.io/component: configurator + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: configurator + labels: + app.kubernetes.io/component: configurator + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: DoesNotExist + tolerations: + - effect: NoSchedule + key: node.schiff.telekom.de/uninitialized + operator: Exists + - key: node.cloudprovider.kubernetes.io/uninitialized + value: "true" + effect: NoSchedule + - key: node.kubernetes.io/not-ready + effect: NoSchedule + operator: Exists + hostNetwork: true + hostPID: true + containers: + - command: + - /configurator + args: + - --update-limit=1 + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + image: configurator:latest + name: configurator + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + volumeMounts: + - mountPath: /var/state + name: state + serviceAccountName: controller-manager + terminationGracePeriodSeconds: 10 + volumes: + - name: state + hostPath: + path: /var/state + type: DirectoryOrCreate diff --git a/config/configurator/kustomization.yaml b/config/configurator/kustomization.yaml new file mode 100644 index 00000000..3d758a3b --- /dev/null +++ b/config/configurator/kustomization.yaml @@ -0,0 +1,12 @@ +resources: +- configurator.yaml + +generatorOptions: + disableNameSuffixHash: true + +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: configurator + newName: ghcr.io/telekom/configurator + newTag: latest diff --git a/config/crd/bases/network.schiff.telekom.de_nodeconfigprocesses.yaml b/config/crd/bases/network.schiff.telekom.de_nodeconfigprocesses.yaml new file mode 100644 index 00000000..ff2e213a --- /dev/null +++ b/config/crd/bases/network.schiff.telekom.de_nodeconfigprocesses.yaml @@ -0,0 +1,57 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: nodeconfigprocesses.network.schiff.telekom.de +spec: + group: network.schiff.telekom.de + names: + kind: NodeConfigProcess + listKind: NodeConfigProcessList + plural: nodeconfigprocesses + shortNames: + - ncp + singular: nodeconfigprocess + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.state + name: State + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: NodeConfigProcess is the Schema for the node configuration process + state. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: NodeConfigSpec defines the desired state of NodeConfig. + properties: + state: + type: string + required: + - state + type: object + type: object + served: true + storage: true + subresources: {} diff --git a/config/crd/bases/network.schiff.telekom.de_nodeconfigs.yaml b/config/crd/bases/network.schiff.telekom.de_nodeconfigs.yaml new file mode 100644 index 00000000..9ccade21 --- /dev/null +++ b/config/crd/bases/network.schiff.telekom.de_nodeconfigs.yaml @@ -0,0 +1,268 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: nodeconfigs.network.schiff.telekom.de +spec: + group: network.schiff.telekom.de + names: + kind: NodeConfig + listKind: NodeConfigList + plural: nodeconfigs + shortNames: + - nc + singular: nodeconfig + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.configStatus + name: Status + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: NodeConfig is the Schema for the node configuration. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: NodeConfigSpec defines the desired state of NodeConfig. + properties: + layer2: + items: + description: Layer2NetworkConfigurationSpec defines the desired + state of Layer2NetworkConfiguration. + properties: + advertiseNeighbors: + description: If desired network-operator advertises host routes + for local neighbors + type: boolean + anycastGateways: + description: Anycast Gateway to configure on bridge + items: + type: string + type: array + anycastMac: + description: If anycast is desired, specify anycast gateway + MAC address + pattern: (?:[[:xdigit:]]{2}:){5}[[:xdigit:]]{2} + type: string + createMacVLANInterface: + description: Create MACVLAN attach interface + type: boolean + id: + description: VLAN Id of the layer 2 network + type: integer + mtu: + description: Network interface MTU + maximum: 9000 + minimum: 1000 + type: integer + neighSuppression: + description: Enable ARP / ND suppression + type: boolean + nodeSelector: + description: Select nodes to create Layer2 network on + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + vni: + description: VXLAN VNI Id for the layer 2 network + maximum: 16777215 + minimum: 1 + type: integer + vrf: + description: VRF to attach Layer2 network to, default if not + set + type: string + required: + - id + - mtu + - vni + type: object + type: array + routingTable: + items: + description: RoutingTableSpec defines the desired state of RoutingTable. + properties: + tableId: + description: TableID is the host table that can be used to export + routes + type: integer + required: + - tableId + type: object + type: array + vrf: + items: + description: VRFRouteConfigurationSpec defines the desired state + of VRFRouteConfiguration. + properties: + aggregate: + description: Aggregate Routes that should be announced + items: + type: string + type: array + community: + description: Community for export, if omitted no community will + be set + type: string + export: + description: Routes exported from the cluster VRF into the specified + VRF + items: + description: VRFRouteConfigurationPrefixItem defines a prefix + item. + properties: + action: + enum: + - permit + - deny + type: string + cidr: + description: CIDR of the leaked network + type: string + ge: + description: Minimum prefix length to be matched + type: integer + le: + description: Maximum prefix length to be matched + type: integer + seq: + description: Sequence in the generated prefix-list, if + omitted will be list index + maximum: 4294967295 + minimum: 1 + type: integer + required: + - action + type: object + maxItems: 4294967295 + type: array + import: + description: Routes imported from this VRF into the cluster + VRF + items: + description: VRFRouteConfigurationPrefixItem defines a prefix + item. + properties: + action: + enum: + - permit + - deny + type: string + cidr: + description: CIDR of the leaked network + type: string + ge: + description: Minimum prefix length to be matched + type: integer + le: + description: Maximum prefix length to be matched + type: integer + seq: + description: Sequence in the generated prefix-list, if + omitted will be list index + maximum: 4294967295 + minimum: 1 + type: integer + required: + - action + type: object + maxItems: 4294967295 + type: array + mtu: + default: 9000 + description: The MTU of the VRF + type: integer + seq: + description: Sequence of the generated route-map, maximum of + 65534 because we sometimes have to set an explicit default-deny + maximum: 65534 + minimum: 1 + type: integer + vrf: + description: VRF this configuration refers to + maxLength: 12 + type: string + required: + - export + - import + - mtu + - seq + type: object + type: array + required: + - layer2 + - routingTable + - vrf + type: object + status: + description: NodeConfigStatus defines the observed state of NodeConfig. + properties: + configStatus: + type: string + required: + - configStatus + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 2ecd84e3..5f210373 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -5,6 +5,8 @@ resources: - bases/network.schiff.telekom.de_vrfrouteconfigurations.yaml - bases/network.schiff.telekom.de_layer2networkconfigurations.yaml - bases/network.schiff.telekom.de_routingtables.yaml +- bases/network.schiff.telekom.de_nodeconfigs.yaml +- bases/network.schiff.telekom.de_nodeconfigprocesses.yaml #+kubebuilder:scaffold:crdkustomizeresource # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 72d9c77d..47dffc21 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -43,6 +43,7 @@ resources: - ../crd - ../rbac - ../manager +- ../configurator - ../webhook - ../prometheus labels: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index b58d5a05..484af3e6 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -18,7 +18,9 @@ rules: resources: - pods verbs: + - get - list + - watch - apiGroups: - "" resources: @@ -51,6 +53,42 @@ rules: - get - patch - update +- apiGroups: + - network.schiff.telekom.de + resources: + - nodeconfigprocesses + verbs: + - create + - get + - list + - update + - watch +- apiGroups: + - network.schiff.telekom.de + resources: + - nodeconfigs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - network.schiff.telekom.de + resources: + - nodeconfigs/finalizers + verbs: + - update +- apiGroups: + - network.schiff.telekom.de + resources: + - nodeconfigs/status + verbs: + - get + - patch + - update - apiGroups: - network.schiff.telekom.de resources: diff --git a/configurator.Dockerfile b/configurator.Dockerfile new file mode 100644 index 00000000..b6586789 --- /dev/null +++ b/configurator.Dockerfile @@ -0,0 +1,28 @@ +# Build the manager binary +FROM docker.io/library/golang:1.21-alpine as builder + + +WORKDIR /workspace +# Copy the Go Modules manifests +COPY go.mod go.mod +COPY go.sum go.sum +# cache deps before building and copying source so that we don't need to re-download as much +# and so that source changes don't invalidate our downloaded layer +RUN go mod download + +# Copy the go source +COPY cmd/configurator/main.go main.go +COPY api/ api/ +COPY controllers/ controllers/ +COPY pkg/ pkg/ + +# Build +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o configurator main.go + +FROM alpine:latest + +WORKDIR / +COPY --from=builder /workspace/configurator . +USER 65532:65532 + +ENTRYPOINT ["/configurator"] diff --git a/controllers/layer2networkconfiguration_controller.go b/controllers/layer2networkconfiguration_controller.go index 54396c27..7c787d12 100644 --- a/controllers/layer2networkconfiguration_controller.go +++ b/controllers/layer2networkconfiguration_controller.go @@ -19,23 +19,14 @@ package controllers import ( "context" "fmt" - "os" "time" - "github.com/google/go-cmp/cmp" networkv1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" - "github.com/telekom/das-schiff-network-operator/pkg/healthcheck" "github.com/telekom/das-schiff-network-operator/pkg/reconciler" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/reconcile" ) const requeueTime = 10 * time.Minute @@ -45,10 +36,9 @@ type Layer2NetworkConfigurationReconciler struct { client.Client Scheme *runtime.Scheme - Reconciler *reconciler.Reconciler + Reconciler *reconciler.ConfigReconciler } -//+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;update;watch //+kubebuilder:rbac:groups=network.schiff.telekom.de,resources=layer2networkconfigurations,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=network.schiff.telekom.de,resources=layer2networkconfigurations/status,verbs=get;update;patch //+kubebuilder:rbac:groups=network.schiff.telekom.de,resources=layer2networkconfigurations/finalizers,verbs=update @@ -68,20 +58,8 @@ func (r *Layer2NetworkConfigurationReconciler) Reconcile(ctx context.Context, _ // SetupWithManager sets up the controller with the Manager. func (r *Layer2NetworkConfigurationReconciler) SetupWithManager(mgr ctrl.Manager) error { - // Create empty request for changes to node - nodesMapFn := handler.EnqueueRequestsFromMapFunc(func(_ context.Context, _ client.Object) []reconcile.Request { return []reconcile.Request{{}} }) - nodePredicates := predicate.Funcs{ - CreateFunc: func(_ event.CreateEvent) bool { return false }, - UpdateFunc: func(e event.UpdateEvent) bool { - return os.Getenv(healthcheck.NodenameEnv) == e.ObjectNew.GetName() && !cmp.Equal(e.ObjectNew.GetLabels(), e.ObjectOld.GetLabels()) - }, - DeleteFunc: func(_ event.DeleteEvent) bool { return false }, - GenericFunc: func(_ event.GenericEvent) bool { return false }, - } - err := ctrl.NewControllerManagedBy(mgr). For(&networkv1alpha1.Layer2NetworkConfiguration{}). - Watches(&corev1.Node{}, nodesMapFn, builder.WithPredicates(nodePredicates)). Complete(r) if err != nil { return fmt.Errorf("error creating controller: %w", err) diff --git a/controllers/node_controller.go b/controllers/node_controller.go new file mode 100644 index 00000000..2d4e7ab9 --- /dev/null +++ b/controllers/node_controller.go @@ -0,0 +1,64 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + + "github.com/telekom/das-schiff-network-operator/pkg/reconciler" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// NodeReconciler reconciles a Node object. +type NodeReconciler struct { + client.Client + Scheme *runtime.Scheme + + Reconciler *reconciler.NodeReconciler +} + +//+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;update;watch + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.14.4/pkg/reconcile +func (r *NodeReconciler) Reconcile(ctx context.Context, _ ctrl.Request) (ctrl.Result, error) { + _ = log.FromContext(ctx) + + // Run ReconcileDebounced through debouncer + r.Reconciler.Reconcile(ctx) + + return ctrl.Result{RequeueAfter: requeueTime}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error { + err := ctrl.NewControllerManagedBy(mgr). + For(&corev1.Node{}). + Complete(r) + if err != nil { + return fmt.Errorf("error creating controller: %w", err) + } + return nil +} diff --git a/controllers/nodeconfig_controller.go b/controllers/nodeconfig_controller.go new file mode 100644 index 00000000..86475750 --- /dev/null +++ b/controllers/nodeconfig_controller.go @@ -0,0 +1,85 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + "os" + "strings" + + networkv1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + "github.com/telekom/das-schiff-network-operator/pkg/healthcheck" + "github.com/telekom/das-schiff-network-operator/pkg/reconciler" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" +) + +// NodeConfigReconciler reconciles a NodeConfig object. +type NodeConfigReconciler struct { + client.Client + Scheme *runtime.Scheme + + Reconciler *reconciler.Reconciler +} + +//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch +//+kubebuilder:rbac:groups=network.schiff.telekom.de,resources=nodeconfigs,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=network.schiff.telekom.de,resources=nodeconfigs/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=network.schiff.telekom.de,resources=nodeconfigs/finalizers,verbs=update +//+kubebuilder:rbac:groups=network.schiff.telekom.de,resources=nodeconfigprocesses,verbs=get;list;watch;create;update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.14.4/pkg/reconcile +func (r *NodeConfigReconciler) Reconcile(ctx context.Context, _ ctrl.Request) (ctrl.Result, error) { + _ = log.FromContext(ctx) + + // Run ReconcileDebounced through debouncer + r.Reconciler.Reconcile(ctx) + + return ctrl.Result{RequeueAfter: requeueTime}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *NodeConfigReconciler) SetupWithManager(mgr ctrl.Manager) error { + namePredicates := predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return strings.Contains(e.Object.GetName(), os.Getenv(healthcheck.NodenameEnv)) + }, + UpdateFunc: func(e event.UpdateEvent) bool { + return strings.Contains(e.ObjectNew.GetName(), os.Getenv(healthcheck.NodenameEnv)) + }, + DeleteFunc: func(event.DeleteEvent) bool { return false }, + GenericFunc: func(event.GenericEvent) bool { return false }, + } + + err := ctrl.NewControllerManagedBy(mgr). + For(&networkv1alpha1.NodeConfig{}, builder.WithPredicates(namePredicates)). + Complete(r) + if err != nil { + return fmt.Errorf("error creating controller: %w", err) + } + return nil +} diff --git a/controllers/routingtable_controller.go b/controllers/routingtable_controller.go index 47cf8409..903b3e40 100644 --- a/controllers/routingtable_controller.go +++ b/controllers/routingtable_controller.go @@ -33,7 +33,7 @@ type RoutingTableReconciler struct { client.Client Scheme *runtime.Scheme - Reconciler *reconciler.Reconciler + Reconciler *reconciler.ConfigReconciler } //+kubebuilder:rbac:groups=network.schiff.telekom.de,resources=routingtables,verbs=get;list;watch;create;update;patch;delete diff --git a/controllers/vrfrouteconfiguration_controller.go b/controllers/vrfrouteconfiguration_controller.go index f3d6f153..b27b66bd 100644 --- a/controllers/vrfrouteconfiguration_controller.go +++ b/controllers/vrfrouteconfiguration_controller.go @@ -32,7 +32,7 @@ type VRFRouteConfigurationReconciler struct { client.Client Scheme *runtime.Scheme - Reconciler *reconciler.Reconciler + Reconciler *reconciler.ConfigReconciler } //+kubebuilder:rbac:groups=network.schiff.telekom.de,resources=vrfrouteconfigurations,verbs=get;list;watch;create;update;patch;delete diff --git a/go.mod b/go.mod index 2ac3c3f2..336f1f43 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,6 @@ require ( github.com/coreos/go-iptables v0.6.0 github.com/coreos/go-systemd/v22 v22.4.0 github.com/go-logr/logr v1.2.4 - github.com/google/go-cmp v0.5.9 github.com/onsi/ginkgo v1.16.4 github.com/onsi/gomega v1.27.10 github.com/prometheus/client_golang v1.15.1 @@ -23,6 +22,8 @@ require ( sigs.k8s.io/controller-runtime v0.15.1 ) +require github.com/google/go-cmp v0.5.9 // indirect + require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect @@ -62,6 +63,7 @@ require ( go.uber.org/zap v1.24.0 // indirect golang.org/x/net v0.12.0 // indirect golang.org/x/oauth2 v0.5.0 // indirect + golang.org/x/sync v0.6.0 golang.org/x/term v0.10.0 // indirect golang.org/x/text v0.11.0 // indirect golang.org/x/time v0.3.0 // indirect diff --git a/go.sum b/go.sum index 1e12b45e..1f4669de 100644 --- a/go.sum +++ b/go.sum @@ -212,6 +212,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= diff --git a/pkg/config_manager/config_manager.go b/pkg/config_manager/config_manager.go new file mode 100644 index 00000000..c7813d29 --- /dev/null +++ b/pkg/config_manager/config_manager.go @@ -0,0 +1,419 @@ +package configmanager + +import ( + "context" + "errors" + "fmt" + "math" + "strings" + "sync" + "time" + + "github.com/go-logr/logr" + "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + configmap "github.com/telekom/das-schiff-network-operator/pkg/config_map" + "github.com/telekom/das-schiff-network-operator/pkg/nodeconfig" + "github.com/telekom/das-schiff-network-operator/pkg/reconciler" + "golang.org/x/sync/semaphore" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + defaultCooldownTime = time.Millisecond * 100 + processName = "configurator" +) + +type ConfigManager struct { + client client.Client + configsMap configmap.Interface + cr reconciler.ConfigReconcilerInterface + nr reconciler.NodeReconcilerInterface + changes chan bool + deletedNodes chan []string + logger logr.Logger + timeout time.Duration + sem semaphore.Weighted +} + +func New(c client.Client, cr reconciler.ConfigReconcilerInterface, nr reconciler.NodeReconcilerInterface, log logr.Logger, + timeout time.Duration, limit int64, changes chan bool, deleteNodes chan []string) *ConfigManager { + // disable gradual rolllout if limit is < 1 + if limit < 1 { + limit = math.MaxInt64 + } + return &ConfigManager{ + client: c, + configsMap: &configmap.ConfigMap{}, + cr: cr, + nr: nr, + logger: log, + changes: changes, + deletedNodes: deleteNodes, + timeout: timeout, + sem: *semaphore.NewWeighted(limit), + } +} + +// WatchConfigs waits for cm.deletedNodes channel. +func (cm *ConfigManager) WatchDeletedNodes(ctx context.Context, errCh chan error) { + cm.logger.Info("starting watching for deleted nodes...") + for { + select { + case <-ctx.Done(): + if !errors.Is(ctx.Err(), context.Canceled) { + errCh <- fmt.Errorf("error watching configs: %w", ctx.Err()) + } else { + errCh <- nil + } + return + case nodes := <-cm.deletedNodes: + cm.logger.Info("nodes deleted", "nodes", nodes) + for _, n := range nodes { + config, err := cm.configsMap.Get(n) + if err != nil { + cm.logger.Error(err, "error getting config", "node", n) + continue + } + + if config == nil { + cm.logger.Info("no in-memory config found", "node", n) + continue + } + + cm.configsMap.Delete(n) + config.SetActive(false) + cancel := config.GetCancelFunc() + if cancel != nil { + (*cancel)() + } + } + default: + time.Sleep(defaultCooldownTime) + } + } +} + +// WatchConfigs waits for cm.changes channel. +func (cm *ConfigManager) WatchConfigs(ctx context.Context, errCh chan error) { + cm.logger.Info("starting watching for changes...") + for { + select { + case <-ctx.Done(): + if !errors.Is(ctx.Err(), context.Canceled) { + errCh <- fmt.Errorf("error watching configs: %w", ctx.Err()) + } else { + errCh <- nil + } + return + case <-cm.changes: + cm.logger.Info("got notification about changes") + err := cm.updateConfigs() + if err != nil { + errCh <- fmt.Errorf("error updating configs: %w", err) + return + } + err = cm.deployConfigs(ctx) + if err != nil { + if err := cm.restoreBackup(ctx); err != nil { + cm.logger.Error(err, "error restoring backup") + } + } + default: + time.Sleep(defaultCooldownTime) + } + } +} + +// DirtyStartup will load all previously deployed NodeConfigs into current leader. +func (cm *ConfigManager) DirtyStartup(ctx context.Context) error { + process, err := cm.getProcess(ctx) + if err != nil { + // process object does not exists - there was no operator running on this cluster before + if apierrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("error getting process object: %w", err) + } + + cm.logger.Info("previous leader left cluster in state", "state", process.Spec.State) + cm.logger.Info("using data left by previous leader...") + + // get all known backup data and load it into config manager memory + if err := cm.loadConfigs(ctx); err != nil { + return fmt.Errorf("error loading configs: %w", err) + } + + // prevouos leader left cluster in provisioning state - restore known backups + if process.Spec.State == nodeconfig.StatusProvisioning { + if err := cm.restoreBackup(ctx); err != nil { + return fmt.Errorf("error restoring backup: %w", err) + } + } + return nil +} + +func (cm *ConfigManager) updateConfigs() error { + cm.logger.Info("updating configs...") + currentNodes := cm.nr.GetNodes() + for name := range currentNodes { + n := currentNodes[name] + next, err := cm.cr.CreateConfigForNode(name, n) + if err != nil { + return fmt.Errorf("error creating config for the node %s: %w", name, err) + } + cfg, err := cm.configsMap.Get(name) + if err != nil { + return fmt.Errorf("error getting config for node %s: %w", name, err) + } + if cfg != nil { + cfg.UpdateNext(next) + } else { + cfg = nodeconfig.NewEmpty(name) + cfg.UpdateNext(next) + cm.configsMap.Store(name, cfg) + } + } + return nil +} + +func (cm *ConfigManager) deploy(ctx context.Context, configs []nodeconfig.ConfigInterface) error { + for _, cfg := range configs { + cfg.SetDeployed(false) + } + + if err := cm.validateConfigs(configs); err != nil { + return fmt.Errorf("error validating configs: %w", err) + } + + if err := cm.setProcessStatus(ctx, nodeconfig.StatusProvisioning); err != nil { + return fmt.Errorf("error setting process status: %w", err) + } + + deploymentCtx, deploymentCancel := context.WithCancel(ctx) + defer deploymentCancel() + + wg := &sync.WaitGroup{} + errCh := make(chan error, len(configs)) + for _, cfg := range configs { + wg.Add(1) + go func(config nodeconfig.ConfigInterface) { + defer wg.Done() + + if err := cm.sem.Acquire(ctx, 1); err != nil { + errCh <- fmt.Errorf("error acquring semaphore: %w", err) + return + } + defer cm.sem.Release(1) + + select { + case <-deploymentCtx.Done(): + errCh <- deploymentCtx.Err() + return + default: + err := cm.deployConfig(deploymentCtx, config) + if err != nil { + deploymentCancel() + } + errCh <- err + return + } + }(cfg) + } + + wg.Wait() + close(errCh) + + if err := cm.checkErrors(errCh); err != nil { + return fmt.Errorf("errors occurred: %w", err) + } + + if err := cm.setProcessStatus(ctx, nodeconfig.StatusProvisioned); err != nil { + return fmt.Errorf("error setting process status: %w", err) + } + + return nil +} + +func (cm *ConfigManager) checkErrors(errCh chan error) error { + errCnt := 0 + var firstErr error + for err := range errCh { + if err != nil { + if firstErr == nil { + firstErr = err + } + if !errors.Is(err, context.Canceled) { + cm.logger.Error(err, "depoyment error") + } + errCnt++ + } + } + + if errCnt > 0 { + return fmt.Errorf("%d error(s) occurred while processing configs, please check the logs for details: first known error: %w", errCnt, firstErr) + } + + return nil +} + +// nolint: contextcheck +func (cm *ConfigManager) deployConfig(ctx context.Context, cfg nodeconfig.ConfigInterface) error { + if cfg.GetActive() { + cfgContext, cfgCancel := context.WithTimeout(ctx, cm.timeout) + cfgContext = context.WithValue(cfgContext, nodeconfig.ParentCtx, ctx) + cfg.SetCancelFunc(&cfgCancel) + + cm.logger.Info("processing config", "name", cfg.GetName()) + if err := cfg.Deploy(cfgContext, cm.client, cm.logger, cm.timeout); err != nil { + // we invalidate config on new, separate context, so invalidadion won't get cancelled + // if node update limit is set to more than 1. + invalidationCtx, invalidationCancel := context.WithTimeout(context.Background(), cm.timeout) + defer invalidationCancel() + if err := cfg.CrateInvalid(invalidationCtx, cm.client); err != nil { + return fmt.Errorf("error creating invalid config object: %w", err) + } + return fmt.Errorf("error deploying config %s: %w", cfg.GetName(), err) + } + cm.logger.Info("deployed", "name", cfg.GetName()) + } + return nil +} + +func (cm *ConfigManager) validateConfigs(configs []nodeconfig.ConfigInterface) error { + cm.logger.Info("validating configs...") + for _, cfg := range configs { + if !cfg.GetActive() { + continue + } + + next := cfg.GetNext() + invalid := cfg.GetInvalid() + + if invalid != nil && next != nil { + if next.IsEqual(invalid) { + return fmt.Errorf("config for node %s results in invalid config", cfg.GetName()) + } + } + } + return nil +} + +func (cm *ConfigManager) setProcessStatus(ctx context.Context, status string) error { + process, err := cm.getProcess(ctx) + if err != nil && apierrors.IsNotFound(err) { + process.Spec.State = status + if err := cm.client.Create(ctx, process); err != nil { + return fmt.Errorf("error creating process object: %w", err) + } + } else if err != nil { + return fmt.Errorf("error getting process object: %w", err) + } + process.Spec.State = status + if err := cm.client.Update(ctx, process); err != nil { + return fmt.Errorf("error updating process object: %w", err) + } + cm.logger.Info("process status set", "status", status) + return nil +} + +func (cm *ConfigManager) deployConfigs(ctx context.Context) error { + cm.logger.Info("deploying configs ...") + toDeploy, err := cm.configsMap.GetSlice() + if err != nil { + return fmt.Errorf("error converting config map to slice: %w", err) + } + + if err := cm.deploy(ctx, toDeploy); err != nil { + return fmt.Errorf("error deploying configs: %w", err) + } + + return nil +} + +func (cm *ConfigManager) restoreBackup(ctx context.Context) error { + cm.logger.Info("restoring backup...") + slice, err := cm.configsMap.GetSlice() + if err != nil { + return fmt.Errorf("error converting config map to slice: %w", err) + } + toDeploy := []nodeconfig.ConfigInterface{} + for _, cfg := range slice { + if cfg.GetDeployed() { + if backupAvailable := cfg.SetBackupAsNext(); backupAvailable { + toDeploy = append(toDeploy, cfg) + } + } + } + + if err := cm.deploy(ctx, toDeploy); err != nil { + return fmt.Errorf("error deploying configs: %w", err) + } + + cm.logger.Info("backup restored") + return nil +} + +func (cm *ConfigManager) getProcess(ctx context.Context) (*v1alpha1.NodeConfigProcess, error) { + process := &v1alpha1.NodeConfigProcess{ + ObjectMeta: metav1.ObjectMeta{ + Name: processName, + }, + } + if err := cm.client.Get(ctx, client.ObjectKeyFromObject(process), process); err != nil { + return process, fmt.Errorf("error getting process object: %w", err) + } + return process, nil +} + +func (cm *ConfigManager) loadConfigs(ctx context.Context) error { + // get all known backup data and load it into config manager memory + nodes, err := reconciler.ListNodes(ctx, cm.client) + if err != nil { + return fmt.Errorf("error listing nodes: %w", err) + } + + knownConfigs := &v1alpha1.NodeConfigList{} + if err := cm.client.List(ctx, knownConfigs); err != nil { + return fmt.Errorf("error listing NodeConfigs: %w", err) + } + + cm.createConfigsFromBackup(nodes, knownConfigs) + + return nil +} + +func (cm *ConfigManager) createConfigsFromBackup(nodes map[string]*corev1.Node, knownConfigs *v1alpha1.NodeConfigList) { + for _, node := range nodes { + current, backup, invalid := cm.matchConfigs(knownConfigs, node) + cfg := nodeconfig.New(node.Name, current, backup, invalid) + if backup != nil { + cfg.SetDeployed(true) + } + cm.configsMap.Store(node.Name, cfg) + } +} + +func (cm *ConfigManager) matchConfigs(knownConfigs *v1alpha1.NodeConfigList, node *corev1.Node) (current, backup, invalid *v1alpha1.NodeConfig) { + for i := range knownConfigs.Items { + for j := range knownConfigs.Items[i].OwnerReferences { + if knownConfigs.Items[i].OwnerReferences[j].UID == node.UID { + if knownConfigs.Items[i].Name == node.Name { + cm.logger.Info("found current config", "node", node.Name) + current = &knownConfigs.Items[i] + } + if strings.Contains(knownConfigs.Items[i].Name, nodeconfig.InvalidSuffix) { + cm.logger.Info("found invalid config", "node", node.Name) + invalid = &knownConfigs.Items[i] + } + if strings.Contains(knownConfigs.Items[i].Name, nodeconfig.BackupSuffix) { + cm.logger.Info("found backup config", "node", node.Name) + backup = &knownConfigs.Items[i] + } + } + } + } + return current, backup, invalid +} diff --git a/pkg/config_manager/config_manager_test.go b/pkg/config_manager/config_manager_test.go new file mode 100644 index 00000000..e2097df5 --- /dev/null +++ b/pkg/config_manager/config_manager_test.go @@ -0,0 +1,438 @@ +package configmanager + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/go-logr/logr" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + mock_config_map "github.com/telekom/das-schiff-network-operator/pkg/config_map/mock" + "github.com/telekom/das-schiff-network-operator/pkg/nodeconfig" + mock_nodeconfig "github.com/telekom/das-schiff-network-operator/pkg/nodeconfig/mock" + mock_reconciler "github.com/telekom/das-schiff-network-operator/pkg/reconciler/mock" + "go.uber.org/mock/gomock" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var ( + ctrl *gomock.Controller + processProvisioning = &v1alpha1.NodeConfigProcess{ + ObjectMeta: metav1.ObjectMeta{ + Name: processName, + }, + Spec: v1alpha1.NodeConfigProcessSpec{ + State: nodeconfig.StatusProvisioning, + }, + } +) + +func TestConfigMap(t *testing.T) { + RegisterFailHandler(Fail) + ctrl = gomock.NewController(t) + defer ctrl.Finish() + RunSpecs(t, + "ConfigManager Suite") +} + +var _ = Describe("ConfigManager", func() { + nodeName := "testNode" + Context("WatchConfigs() should", func() { + It("return no error to errCh if context was cancelled", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + ctx, cancel := context.WithCancel(context.Background()) + err := runContextTest(ctx, cancel, cm.WatchConfigs) + Expect(err).ToNot(HaveOccurred()) + }) + It("return error to errCh if context is done for reason other cancelation", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + + err := runContextTest(ctx, nil, cm.WatchConfigs) + Expect(err).To(HaveOccurred()) + }) + It("return error to errCh if cannot create config for node", func() { + cm, crm, nrm := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + nrm.EXPECT().GetNodes().Return(map[string]*corev1.Node{nodeName: {}}) + crm.EXPECT().CreateConfigForNode(gomock.Any(), gomock.Any()).Return(nil, fmt.Errorf("error getting config for node %s", nodeName)) + + errCh := make(chan error) + defer close(errCh) + err := runTest(ctx, cm, errCh, nil) + Expect(err).To(HaveOccurred()) + }) + It("return error to errCh if there was an error getting config for node from memory", func() { + cm, crm, nrm := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + cmi := mock_config_map.NewMockConfigMapInterface(ctrl) + cm.configsMap = cmi + + nrm.EXPECT().GetNodes().Return(map[string]*corev1.Node{nodeName: {ObjectMeta: metav1.ObjectMeta{Name: nodeName}}}) + crm.EXPECT().CreateConfigForNode(gomock.Any(), gomock.Any()).Return(&v1alpha1.NodeConfig{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}, nil) + cmi.EXPECT().Get(gomock.Any()).Return(nil, fmt.Errorf("error gettting config %s", nodeName)) + + errCh := make(chan error) + defer close(errCh) + err := runTest(ctx, cm, errCh, cancel) + Expect(err).To(HaveOccurred()) + }) + }) + Context("updateConfigs() should", func() { + It("return no error if config is being updated", func() { + cm, cr, nr := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + cmi := mock_config_map.NewMockConfigMapInterface(ctrl) + cm.configsMap = cmi + cfg := mock_nodeconfig.NewMockConfigInterface(ctrl) + + nr.EXPECT().GetNodes().Return(map[string]*corev1.Node{nodeName: {}}) + cr.EXPECT().CreateConfigForNode(nodeName, gomock.Any()).Return(v1alpha1.NewEmptyConfig(nodeName), nil) + cmi.EXPECT().Get(nodeName).Return(cfg, nil) + cfg.EXPECT().UpdateNext(gomock.Any()) + + err := cm.updateConfigs() + Expect(err).ToNot(HaveOccurred()) + }) + It("return no error if config is being created", func() { + cm, cr, nr := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + cmi := mock_config_map.NewMockConfigMapInterface(ctrl) + cm.configsMap = cmi + + nr.EXPECT().GetNodes().Return(map[string]*corev1.Node{nodeName: {}}) + cr.EXPECT().CreateConfigForNode(nodeName, gomock.Any()).Return(v1alpha1.NewEmptyConfig(nodeName), nil) + cmi.EXPECT().Get(nodeName).Return(nil, nil) + cmi.EXPECT().Store(gomock.Any(), gomock.Any()) + + err := cm.updateConfigs() + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("deployConfigs() should", func() { + It("return error if cannot get config slice", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + cmi := mock_config_map.NewMockConfigMapInterface(ctrl) + cm.configsMap = cmi + + cmi.EXPECT().GetSlice().Return(nil, fmt.Errorf("error getting config slice")) + + err := cm.deployConfigs(context.Background()) + Expect(err).To(HaveOccurred()) + }) + It("return error if new config is equal to known invalid config", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + cmi := mock_config_map.NewMockConfigMapInterface(ctrl) + cm.configsMap = cmi + cfg := mock_nodeconfig.NewMockConfigInterface(ctrl) + + cmi.EXPECT().GetSlice().Return([]nodeconfig.ConfigInterface{cfg}, nil) + cfg.EXPECT().SetDeployed(false) + cfg.EXPECT().GetActive().Return(true) + cfg.EXPECT().GetNext().Return(v1alpha1.NewEmptyConfig(nodeName)) + cfg.EXPECT().GetInvalid().Return(v1alpha1.NewEmptyConfig(nodeName + nodeconfig.InvalidSuffix)) + cfg.EXPECT().GetName().Return(nodeName) + + err := cm.deployConfigs(context.Background()) + Expect(err).To(HaveOccurred()) + }) + It("return error if was unable to deploy and invalidate invalid config", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + cmi := mock_config_map.NewMockConfigMapInterface(ctrl) + cm.configsMap = cmi + cfg := mock_nodeconfig.NewMockConfigInterface(ctrl) + + cmi.EXPECT().GetSlice().Return([]nodeconfig.ConfigInterface{cfg}, nil) + cfg.EXPECT().SetDeployed(false) + cfg.EXPECT().GetActive().Return(true) + next := v1alpha1.NewEmptyConfig(nodeName) + next.Spec.RoutingTable = []v1alpha1.RoutingTableSpec{{TableID: 1}} + cfg.EXPECT().GetNext().Return(next) + cfg.EXPECT().GetInvalid().Return(v1alpha1.NewEmptyConfig(nodeName + nodeconfig.InvalidSuffix)) + cfg.EXPECT().GetActive().Return(true) + cfg.EXPECT().SetCancelFunc(gomock.Any()) + cfg.EXPECT().GetName().Return(nodeName) + cfg.EXPECT().Deploy(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fmt.Errorf("error deploying config")) + cfg.EXPECT().CrateInvalid(gomock.Any(), gomock.Any()).Return(fmt.Errorf("error creating invalid config")) + + err := cm.deployConfigs(context.Background()) + Expect(err).To(HaveOccurred()) + }) + It("return no error on successful deployment", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + cmi := mock_config_map.NewMockConfigMapInterface(ctrl) + cm.configsMap = cmi + cfg := mock_nodeconfig.NewMockConfigInterface(ctrl) + + cmi.EXPECT().GetSlice().Return([]nodeconfig.ConfigInterface{cfg}, nil) + cfg.EXPECT().SetDeployed(false) + cfg.EXPECT().GetActive().Return(true) + next := v1alpha1.NewEmptyConfig(nodeName) + next.Spec.RoutingTable = []v1alpha1.RoutingTableSpec{{TableID: 1}} + cfg.EXPECT().GetNext().Return(next) + cfg.EXPECT().GetInvalid().Return(v1alpha1.NewEmptyConfig(nodeName + nodeconfig.InvalidSuffix)) + cfg.EXPECT().GetActive().Return(true) + cfg.EXPECT().SetCancelFunc(gomock.Any()) + cfg.EXPECT().GetName().Return(nodeName) + cfg.EXPECT().Deploy(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + cfg.EXPECT().GetName().Return(nodeName) + + err := cm.deployConfigs(context.Background()) + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("WatchNodes() should", func() { + It("return no error to errCh if context was cancelled", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + ctx, cancel := context.WithCancel(context.Background()) + err := runContextTest(ctx, cancel, cm.WatchDeletedNodes) + Expect(err).ToNot(HaveOccurred()) + }) + It("return error to errCh if context is done for reason other cancelation", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + + err := runContextTest(ctx, nil, cm.WatchDeletedNodes) + Expect(err).To(HaveOccurred()) + }) + It("return no error if successfully deleted nodes", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + cm.configsMap.Store(nodeName, nodeconfig.NewEmpty(nodeName)) + + errCh := make(chan error) + + go func() { + cm.WatchDeletedNodes(ctx, errCh) + }() + + cm.deletedNodes <- []string{nodeName} + time.Sleep(time.Millisecond * 20) + nodes, err := cm.configsMap.GetSlice() + Expect(err).ToNot(HaveOccurred()) + Expect(nodes).To(BeEmpty()) + cancel() + err = <-errCh + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("DirtyStartup() should", func() { + It("return no error if NodeConfigProcess object does not exist", func() { + cm, _, _ := prepareObjects() + defer close(cm.changes) + defer close(cm.deletedNodes) + + err := cm.DirtyStartup(context.Background()) + Expect(err).ToNot(HaveOccurred()) + }) + It("return no error if there is nothing to restore", func() { + cm, _, _ := prepareObjects(processProvisioning) + defer close(cm.changes) + defer close(cm.deletedNodes) + + err := cm.DirtyStartup(context.Background()) + Expect(err).ToNot(HaveOccurred()) + }) + It("return error if cannot get slice from ConfigMap", func() { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + invalidConfig := v1alpha1.NewEmptyConfig(nodeName + nodeconfig.BackupSuffix) + + cm, _, _ := prepareObjects(processProvisioning, node, invalidConfig) + defer close(cm.changes) + defer close(cm.deletedNodes) + + cmi := mock_config_map.NewMockConfigMapInterface(ctrl) + cm.configsMap = cmi + + cmi.EXPECT().Store(gomock.Any(), gomock.Any()) + cmi.EXPECT().GetSlice().Return(nil, fmt.Errorf("error")) + + err := cm.DirtyStartup(context.Background()) + Expect(err).To(HaveOccurred()) + }) + It("return no error if restored configs successfully", func() { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + UID: "7a4eec39-15c5-4d77-b235-78f46740351", + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + invalidConfig := v1alpha1.NewEmptyConfig(nodeName + nodeconfig.InvalidSuffix) + invalidConfig.OwnerReferences = []metav1.OwnerReference{ + { + Kind: "node", + UID: "7a4eec39-15c5-4d77-b235-78f46740351", + }, + } + backupConfig := v1alpha1.NewEmptyConfig(nodeName + nodeconfig.BackupSuffix) + backupConfig.OwnerReferences = []metav1.OwnerReference{ + { + Kind: "node", + UID: "7a4eec39-15c5-4d77-b235-78f46740351", + }, + } + currentConfig := v1alpha1.NewEmptyConfig(nodeName) + currentConfig.OwnerReferences = []metav1.OwnerReference{ + { + Kind: "node", + UID: "7a4eec39-15c5-4d77-b235-78f46740351", + }, + } + + cm, _, _ := prepareObjects(processProvisioning, node, invalidConfig, backupConfig, currentConfig) + defer close(cm.changes) + defer close(cm.deletedNodes) + + cmi := mock_config_map.NewMockConfigMapInterface(ctrl) + cm.configsMap = cmi + + cfg := mock_nodeconfig.NewMockConfigInterface(ctrl) + + cmi.EXPECT().Store(gomock.Any(), gomock.Any()) + cmi.EXPECT().GetSlice().Return([]nodeconfig.ConfigInterface{cfg}, nil) + cfg.EXPECT().GetDeployed().Return(true) + cfg.EXPECT().SetBackupAsNext().Return(true) + cfg.EXPECT().SetDeployed(false) + cfg.EXPECT().GetActive().Return(true) + next := v1alpha1.NewEmptyConfig(nodeName) + next.Spec.RoutingTable = []v1alpha1.RoutingTableSpec{{TableID: 1}} + cfg.EXPECT().GetNext().Return(next) + cfg.EXPECT().GetInvalid().Return(v1alpha1.NewEmptyConfig(nodeName + nodeconfig.InvalidSuffix)) + cfg.EXPECT().GetActive().Return(true) + cfg.EXPECT().SetCancelFunc(gomock.Any()) + cfg.EXPECT().GetName().Return(nodeName) + cfg.EXPECT().Deploy(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + cfg.EXPECT().GetName().Return(nodeName) + + err := cm.DirtyStartup(context.Background()) + Expect(err).ToNot(HaveOccurred()) + }) + }) +}) + +func prepareObjects(objects ...runtime.Object) (*ConfigManager, *mock_reconciler.MockConfigReconcilerInterface, *mock_reconciler.MockNodeReconcilerInterface) { + crm := mock_reconciler.NewMockConfigReconcilerInterface(ctrl) + nrm := mock_reconciler.NewMockNodeReconcilerInterface(ctrl) + + s := runtime.NewScheme() + err := corev1.AddToScheme(s) + Expect(err).ToNot(HaveOccurred()) + err = v1alpha1.AddToScheme(s) + Expect(err).ToNot(HaveOccurred()) + c := fake.NewClientBuilder().WithScheme(s). + WithRuntimeObjects(objects...).Build() + + changes := make(chan bool) + nodesDeleted := make(chan []string) + cm := New(c, crm, nrm, logr.New(nil), time.Second*10, -1, changes, nodesDeleted) + Expect(cm).ToNot(BeNil()) + return cm, crm, nrm +} + +func runTest(ctx context.Context, cm *ConfigManager, errCh chan error, cancel context.CancelFunc) error { + start := make(chan bool) + defer close(start) + go func() { + start <- true + cm.WatchConfigs(ctx, errCh) + }() + startVal := <-start + Expect(startVal).To(BeTrue()) + + time.Sleep(time.Millisecond * 100) + cm.changes <- true + time.Sleep(time.Millisecond * 100) + if cancel != nil { + cancel() + } + err := <-errCh + return err +} + +func runContextTest(ctx context.Context, cancel context.CancelFunc, f func(ctx context.Context, errCh chan error)) error { + errCh := make(chan error) + defer close(errCh) + quit := make(chan bool) + defer close(quit) + go func() { + f(ctx, errCh) + quit <- true + }() + if cancel != nil { + cancel() + } + err := <-errCh + <-quit + return err +} diff --git a/pkg/config_map/config_map.go b/pkg/config_map/config_map.go new file mode 100644 index 00000000..033e3c64 --- /dev/null +++ b/pkg/config_map/config_map.go @@ -0,0 +1,63 @@ +package configmap + +import ( + "fmt" + "sync" + + "github.com/telekom/das-schiff-network-operator/pkg/nodeconfig" +) + +//go:generate mockgen -destination ./mock/mock_config_map.go . ConfigMapInterface +type Interface interface { + Delete(key any) + Get(name string) (nodeconfig.ConfigInterface, error) + GetSlice() ([]nodeconfig.ConfigInterface, error) + Load(key any) (value any, ok bool) + Range(f func(key any, value any) bool) + Store(key any, value any) +} + +type ConfigMap struct { + sync.Map +} + +func (cm *ConfigMap) Get(name string) (nodeconfig.ConfigInterface, error) { + cfg, ok := cm.Load(name) + if !ok { + return nil, nil + } + config, ok := cfg.(nodeconfig.ConfigInterface) + if !ok { + return nil, fmt.Errorf("error converting config for node %s from interface", name) + } + return config, nil +} + +func (cm *ConfigMap) GetSlice() ([]nodeconfig.ConfigInterface, error) { + slice := []nodeconfig.ConfigInterface{} + var err error + cm.Range(func(key any, value any) bool { + name, ok := key.(string) + if !ok { + err = fmt.Errorf("error converting key %v to string", key) + return false + } + + if value == nil { + slice = append(slice, nil) + return true + } + + config, ok := value.(nodeconfig.ConfigInterface) + if !ok { + err = fmt.Errorf("error converting config %s from interface", name) + return false + } + slice = append(slice, config) + return true + }) + if err != nil { + return nil, fmt.Errorf("error converting config map to slice: %w", err) + } + return slice, nil +} diff --git a/pkg/config_map/config_map_test.go b/pkg/config_map/config_map_test.go new file mode 100644 index 00000000..453c62c2 --- /dev/null +++ b/pkg/config_map/config_map_test.go @@ -0,0 +1,80 @@ +package configmap + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/telekom/das-schiff-network-operator/pkg/nodeconfig" +) + +func TestConfigMap(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, + "ConfigMap Suite") +} + +var _ = Describe("ConfigMap", func() { + Context("Get() should", func() { + It("return no error and nil value if element does not exist in the map", func() { + m := ConfigMap{} + cfg, err := m.Get("nodeName") + Expect(err).ToNot(HaveOccurred()) + Expect(cfg).To(BeNil()) + }) + It("return error if cannot type assert map element to nodeconfig.Config pointer", func() { + name := "nodeName" + m := ConfigMap{} + m.Store(name, "someInvalidValue") + cfg, err := m.Get(name) + Expect(err).To(HaveOccurred()) + Expect(cfg).To(BeNil()) + }) + It("return no error if can get the value from the map", func() { + name := "nodeName" + m := ConfigMap{} + testCfg := &nodeconfig.Config{} + m.Store(name, testCfg) + cfg, err := m.Get(name) + Expect(err).ToNot(HaveOccurred()) + Expect(cfg).To(Equal(testCfg)) + }) + }) + Context("GetSlice() should", func() { + It("return empty slice if there are no values in the map", func() { + m := ConfigMap{} + slice, err := m.GetSlice() + Expect(err).ToNot(HaveOccurred()) + Expect(slice).To(BeEmpty()) + }) + It("return error if key is not of type string", func() { + m := ConfigMap{} + m.Store(0, &nodeconfig.Config{}) + slice, err := m.GetSlice() + Expect(err).To(HaveOccurred()) + Expect(slice).To(BeNil()) + }) + It("be able to contain nil value", func() { + m := ConfigMap{} + m.Store("nodeName", nil) + slice, err := m.GetSlice() + Expect(err).ToNot(HaveOccurred()) + Expect(slice).To(HaveLen(1)) + }) + It("return error if cannot type assert map element to nodeconfig.Config pointer", func() { + m := ConfigMap{} + m.Store("nodeName", "someInvalidValue") + slice, err := m.GetSlice() + Expect(err).To(HaveOccurred()) + Expect(slice).To(BeNil()) + }) + It("return no error", func() { + m := ConfigMap{} + m.Store("nodeName", &nodeconfig.Config{}) + m.Store("nodeName2", &nodeconfig.Config{}) + slice, err := m.GetSlice() + Expect(err).ToNot(HaveOccurred()) + Expect(slice).To(HaveLen(2)) + }) + }) +}) diff --git a/pkg/config_map/mock/mock_config_map.go b/pkg/config_map/mock/mock_config_map.go new file mode 100644 index 00000000..3f706491 --- /dev/null +++ b/pkg/config_map/mock/mock_config_map.go @@ -0,0 +1,116 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/telekom/das-schiff-network-operator/pkg/config_map (interfaces: ConfigMapInterface) + +// Package mock_config_map is a generated GoMock package. +package mock_config_map + +import ( + reflect "reflect" + + nodeconfig "github.com/telekom/das-schiff-network-operator/pkg/nodeconfig" + gomock "go.uber.org/mock/gomock" +) + +// MockConfigMapInterface is a mock of ConfigMapInterface interface. +type MockConfigMapInterface struct { + ctrl *gomock.Controller + recorder *MockConfigMapInterfaceMockRecorder +} + +// MockConfigMapInterfaceMockRecorder is the mock recorder for MockConfigMapInterface. +type MockConfigMapInterfaceMockRecorder struct { + mock *MockConfigMapInterface +} + +// NewMockConfigMapInterface creates a new mock instance. +func NewMockConfigMapInterface(ctrl *gomock.Controller) *MockConfigMapInterface { + mock := &MockConfigMapInterface{ctrl: ctrl} + mock.recorder = &MockConfigMapInterfaceMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockConfigMapInterface) EXPECT() *MockConfigMapInterfaceMockRecorder { + return m.recorder +} + +// Delete mocks base method. +func (m *MockConfigMapInterface) Delete(arg0 interface{}) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "Delete", arg0) +} + +// Delete indicates an expected call of Delete. +func (mr *MockConfigMapInterfaceMockRecorder) Delete(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Delete", reflect.TypeOf((*MockConfigMapInterface)(nil).Delete), arg0) +} + +// Get mocks base method. +func (m *MockConfigMapInterface) Get(arg0 string) (nodeconfig.ConfigInterface, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Get", arg0) + ret0, _ := ret[0].(nodeconfig.ConfigInterface) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Get indicates an expected call of Get. +func (mr *MockConfigMapInterfaceMockRecorder) Get(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Get", reflect.TypeOf((*MockConfigMapInterface)(nil).Get), arg0) +} + +// GetSlice mocks base method. +func (m *MockConfigMapInterface) GetSlice() ([]nodeconfig.ConfigInterface, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetSlice") + ret0, _ := ret[0].([]nodeconfig.ConfigInterface) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetSlice indicates an expected call of GetSlice. +func (mr *MockConfigMapInterfaceMockRecorder) GetSlice() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSlice", reflect.TypeOf((*MockConfigMapInterface)(nil).GetSlice)) +} + +// Load mocks base method. +func (m *MockConfigMapInterface) Load(arg0 interface{}) (interface{}, bool) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Load", arg0) + ret0, _ := ret[0].(interface{}) + ret1, _ := ret[1].(bool) + return ret0, ret1 +} + +// Load indicates an expected call of Load. +func (mr *MockConfigMapInterfaceMockRecorder) Load(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Load", reflect.TypeOf((*MockConfigMapInterface)(nil).Load), arg0) +} + +// Range mocks base method. +func (m *MockConfigMapInterface) Range(arg0 func(interface{}, interface{}) bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "Range", arg0) +} + +// Range indicates an expected call of Range. +func (mr *MockConfigMapInterfaceMockRecorder) Range(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Range", reflect.TypeOf((*MockConfigMapInterface)(nil).Range), arg0) +} + +// Store mocks base method. +func (m *MockConfigMapInterface) Store(arg0, arg1 interface{}) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "Store", arg0, arg1) +} + +// Store indicates an expected call of Store. +func (mr *MockConfigMapInterfaceMockRecorder) Store(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Store", reflect.TypeOf((*MockConfigMapInterface)(nil).Store), arg0, arg1) +} diff --git a/pkg/healthcheck/healthcheck.go b/pkg/healthcheck/healthcheck.go index b3084b4c..207202d7 100644 --- a/pkg/healthcheck/healthcheck.go +++ b/pkg/healthcheck/healthcheck.go @@ -43,8 +43,8 @@ var ( // HealthChecker is a struct that holds data required for networking healthcheck. type HealthChecker struct { - client client.Client - isNetworkingHealthy bool + client client.Client + taintsRemoved bool logr.Logger netConfig *NetHealthcheckConfig toolkit *Toolkit @@ -61,18 +61,18 @@ func NewHealthChecker(clusterClient client.Client, toolkit *Toolkit, netconf *Ne } return &HealthChecker{ - client: clusterClient, - isNetworkingHealthy: false, - Logger: log.Log.WithName("HealthCheck"), - netConfig: netconf, - toolkit: toolkit, - retries: retries, + client: clusterClient, + taintsRemoved: false, + Logger: log.Log.WithName("HealthCheck"), + netConfig: netconf, + toolkit: toolkit, + retries: retries, }, nil } -// IsNetworkingHealthy returns value of isNetworkingHealthly bool. -func (hc *HealthChecker) IsNetworkingHealthy() bool { - return hc.isNetworkingHealthy +// TaintsRemoved returns value of isNetworkingHealthly bool. +func (hc *HealthChecker) TaintsRemoved() bool { + return hc.taintsRemoved } // RemoveTaints removes taint from the node. @@ -102,7 +102,7 @@ func (hc *HealthChecker) RemoveTaints(ctx context.Context) error { } } - hc.isNetworkingHealthy = true + hc.taintsRemoved = true return nil } @@ -137,17 +137,6 @@ func (hc *HealthChecker) CheckInterfaces() error { return nil } -func (hc *HealthChecker) checkInterface(intf string) error { - link, err := hc.toolkit.linkByName(intf) - if err != nil { - return err - } - if link.Attrs().OperState != netlink.OperUp { - return errors.New("link " + intf + " is not up - current state: " + link.Attrs().OperState.String()) - } - return nil -} - // CheckReachability checks if all hosts in Reachability slice are reachable. func (hc *HealthChecker) CheckReachability() error { for _, i := range hc.netConfig.Reachability { @@ -163,6 +152,25 @@ func (hc *HealthChecker) CheckReachability() error { return nil } +// CheckAPIServer checks if Kubernetes Api server is reachable from the pod. +func (hc HealthChecker) CheckAPIServer(ctx context.Context) error { + if err := hc.client.List(ctx, &corev1.PodList{}); err != nil { + return fmt.Errorf("unable to reach API server: %w", err) + } + return nil +} + +func (hc *HealthChecker) checkInterface(intf string) error { + link, err := hc.toolkit.linkByName(intf) + if err != nil { + return err + } + if link.Attrs().OperState != netlink.OperUp { + return errors.New("link " + intf + " is not up - current state: " + link.Attrs().OperState.String()) + } + return nil +} + func (hc *HealthChecker) checkReachabilityItem(r netReachabilityItem) error { target := r.Host + ":" + strconv.Itoa(r.Port) conn, err := hc.toolkit.tcpDialer.Dial("tcp", target) diff --git a/pkg/healthcheck/healthcheck_test.go b/pkg/healthcheck/healthcheck_test.go index 2ef639be..074ef408 100644 --- a/pkg/healthcheck/healthcheck_test.go +++ b/pkg/healthcheck/healthcheck_test.go @@ -112,10 +112,10 @@ var _ = Describe("RemoveTaints()", func() { hc, err := NewHealthChecker(c, nil, nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.RemoveTaints(context.Background()) Expect(err).To(HaveOccurred()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) }) It("returns error when trying to remove taint (update node)", func() { c := &updateErrorClient{} @@ -123,21 +123,21 @@ var _ = Describe("RemoveTaints()", func() { hc, err := NewHealthChecker(c, nil, nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.RemoveTaints(context.Background()) Expect(err).To(HaveOccurred()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) }) - It("removes taint and set isInitialized true", func() { + It("remove taint and set isInitialized true", func() { c := fake.NewClientBuilder().WithRuntimeObjects(fakeNodes).Build() nc := &NetHealthcheckConfig{} hc, err := NewHealthChecker(c, nil, nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.RemoveTaints(context.Background()) Expect(err).ToNot(HaveOccurred()) - Expect(hc.IsNetworkingHealthy()).To(BeTrue()) + Expect(hc.TaintsRemoved()).To(BeTrue()) }) }) var _ = Describe("CheckInterfaces()", func() { @@ -147,10 +147,10 @@ var _ = Describe("CheckInterfaces()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeErrorGetByName, &net.Dialer{Timeout: time.Duration(3)}), nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.CheckInterfaces() Expect(err).To(HaveOccurred()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) }) It("returns error if interface is not up", func() { c := fake.NewClientBuilder().Build() @@ -158,10 +158,10 @@ var _ = Describe("CheckInterfaces()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeDownGetByName, &net.Dialer{Timeout: time.Duration(3)}), nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.CheckInterfaces() Expect(err).To(HaveOccurred()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) }) It("returns error if all links are up", func() { c := fake.NewClientBuilder().Build() @@ -169,10 +169,10 @@ var _ = Describe("CheckInterfaces()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeUpGetByName, NewTCPDialer("")), nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.CheckInterfaces() Expect(err).ToNot(HaveOccurred()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) }) }) var _ = Describe("NewTcpDialer()", func() { @@ -181,7 +181,7 @@ var _ = Describe("NewTcpDialer()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeUpGetByName, NewTCPDialer("")), &NetHealthcheckConfig{}) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) d := hc.toolkit.tcpDialer.(*net.Dialer) Expect(d.Timeout).To(Equal(time.Second * 3)) }) @@ -190,7 +190,7 @@ var _ = Describe("NewTcpDialer()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeUpGetByName, NewTCPDialer("5")), &NetHealthcheckConfig{}) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) d := hc.toolkit.tcpDialer.(*net.Dialer) Expect(d.Timeout).To(Equal(time.Second * 5)) }) @@ -199,7 +199,7 @@ var _ = Describe("NewTcpDialer()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeUpGetByName, NewTCPDialer("500ms")), &NetHealthcheckConfig{}) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) d := hc.toolkit.tcpDialer.(*net.Dialer) Expect(d.Timeout).To(Equal(time.Millisecond * 500)) }) @@ -216,7 +216,7 @@ var _ = Describe("CheckReachability()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeUpGetByName, dialerMock), nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.CheckReachability() Expect(err).To(HaveOccurred()) }) @@ -229,7 +229,7 @@ var _ = Describe("CheckReachability()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeUpGetByName, dialerMock), nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.CheckReachability() Expect(err).ToNot(HaveOccurred()) }) @@ -242,7 +242,7 @@ var _ = Describe("CheckReachability()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeUpGetByName, dialerMock), nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.CheckReachability() Expect(err).ToNot(HaveOccurred()) }) @@ -256,11 +256,21 @@ var _ = Describe("CheckReachability()", func() { hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, fakeUpGetByName, dialerMock), nc) Expect(err).ToNot(HaveOccurred()) Expect(hc).ToNot(BeNil()) - Expect(hc.IsNetworkingHealthy()).To(BeFalse()) + Expect(hc.TaintsRemoved()).To(BeFalse()) err = hc.CheckReachability() Expect(err).To(HaveOccurred()) }) }) +var _ = Describe("CheckAPIServer()", func() { + It("should return no error", func() { + c := fake.NewClientBuilder().Build() + hc, err := NewHealthChecker(c, NewHealthCheckToolkit(nil, nil, nil), &NetHealthcheckConfig{}) + Expect(err).ToNot(HaveOccurred()) + Expect(hc).ToNot(BeNil()) + err = hc.CheckAPIServer(context.TODO()) + Expect(err).ToNot(HaveOccurred()) + }) +}) func fakeErrorGetByName(_ string) (netlink.Link, error) { return nil, errors.New("Link not found") diff --git a/pkg/managerconfig/managerconfig_test.go b/pkg/managerconfig/managerconfig_test.go index 1ca53f82..c055743e 100644 --- a/pkg/managerconfig/managerconfig_test.go +++ b/pkg/managerconfig/managerconfig_test.go @@ -8,14 +8,10 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) -var _ = BeforeSuite(func() { - -}) - -func TestHealthCheck(t *testing.T) { +func TestManagerConfig(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, - "HealthCheck Suite") + "ManagerConfig Suite") } var _ = Describe("Load()", func() { diff --git a/pkg/nodeconfig/mock/mock_nodeconfig.go b/pkg/nodeconfig/mock/mock_nodeconfig.go new file mode 100644 index 00000000..5c71a645 --- /dev/null +++ b/pkg/nodeconfig/mock/mock_nodeconfig.go @@ -0,0 +1,255 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/telekom/das-schiff-network-operator/pkg/nodeconfig (interfaces: ConfigInterface) + +// Package mock_nodeconfig is a generated GoMock package. +package mock_nodeconfig + +import ( + context "context" + reflect "reflect" + time "time" + + logr "github.com/go-logr/logr" + v1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + gomock "go.uber.org/mock/gomock" + client "sigs.k8s.io/controller-runtime/pkg/client" +) + +// MockConfigInterface is a mock of ConfigInterface interface. +type MockConfigInterface struct { + ctrl *gomock.Controller + recorder *MockConfigInterfaceMockRecorder +} + +// MockConfigInterfaceMockRecorder is the mock recorder for MockConfigInterface. +type MockConfigInterfaceMockRecorder struct { + mock *MockConfigInterface +} + +// NewMockConfigInterface creates a new mock instance. +func NewMockConfigInterface(ctrl *gomock.Controller) *MockConfigInterface { + mock := &MockConfigInterface{ctrl: ctrl} + mock.recorder = &MockConfigInterfaceMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockConfigInterface) EXPECT() *MockConfigInterfaceMockRecorder { + return m.recorder +} + +// CrateInvalid mocks base method. +func (m *MockConfigInterface) CrateInvalid(arg0 context.Context, arg1 client.Client) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CrateInvalid", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// CrateInvalid indicates an expected call of CrateInvalid. +func (mr *MockConfigInterfaceMockRecorder) CrateInvalid(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CrateInvalid", reflect.TypeOf((*MockConfigInterface)(nil).CrateInvalid), arg0, arg1) +} + +// CreateBackup mocks base method. +func (m *MockConfigInterface) CreateBackup(arg0 context.Context, arg1 client.Client) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CreateBackup", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// CreateBackup indicates an expected call of CreateBackup. +func (mr *MockConfigInterfaceMockRecorder) CreateBackup(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateBackup", reflect.TypeOf((*MockConfigInterface)(nil).CreateBackup), arg0, arg1) +} + +// Deploy mocks base method. +func (m *MockConfigInterface) Deploy(arg0 context.Context, arg1 client.Client, arg2 logr.Logger, arg3 time.Duration) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Deploy", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(error) + return ret0 +} + +// Deploy indicates an expected call of Deploy. +func (mr *MockConfigInterfaceMockRecorder) Deploy(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Deploy", reflect.TypeOf((*MockConfigInterface)(nil).Deploy), arg0, arg1, arg2, arg3) +} + +// GetActive mocks base method. +func (m *MockConfigInterface) GetActive() bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetActive") + ret0, _ := ret[0].(bool) + return ret0 +} + +// GetActive indicates an expected call of GetActive. +func (mr *MockConfigInterfaceMockRecorder) GetActive() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetActive", reflect.TypeOf((*MockConfigInterface)(nil).GetActive)) +} + +// GetCancelFunc mocks base method. +func (m *MockConfigInterface) GetCancelFunc() *context.CancelFunc { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetCancelFunc") + ret0, _ := ret[0].(*context.CancelFunc) + return ret0 +} + +// GetCancelFunc indicates an expected call of GetCancelFunc. +func (mr *MockConfigInterfaceMockRecorder) GetCancelFunc() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetCancelFunc", reflect.TypeOf((*MockConfigInterface)(nil).GetCancelFunc)) +} + +// GetCurrentConfigStatus mocks base method. +func (m *MockConfigInterface) GetCurrentConfigStatus() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetCurrentConfigStatus") + ret0, _ := ret[0].(string) + return ret0 +} + +// GetCurrentConfigStatus indicates an expected call of GetCurrentConfigStatus. +func (mr *MockConfigInterfaceMockRecorder) GetCurrentConfigStatus() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetCurrentConfigStatus", reflect.TypeOf((*MockConfigInterface)(nil).GetCurrentConfigStatus)) +} + +// GetDeployed mocks base method. +func (m *MockConfigInterface) GetDeployed() bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetDeployed") + ret0, _ := ret[0].(bool) + return ret0 +} + +// GetDeployed indicates an expected call of GetDeployed. +func (mr *MockConfigInterfaceMockRecorder) GetDeployed() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetDeployed", reflect.TypeOf((*MockConfigInterface)(nil).GetDeployed)) +} + +// GetInvalid mocks base method. +func (m *MockConfigInterface) GetInvalid() *v1alpha1.NodeConfig { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetInvalid") + ret0, _ := ret[0].(*v1alpha1.NodeConfig) + return ret0 +} + +// GetInvalid indicates an expected call of GetInvalid. +func (mr *MockConfigInterfaceMockRecorder) GetInvalid() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetInvalid", reflect.TypeOf((*MockConfigInterface)(nil).GetInvalid)) +} + +// GetName mocks base method. +func (m *MockConfigInterface) GetName() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetName") + ret0, _ := ret[0].(string) + return ret0 +} + +// GetName indicates an expected call of GetName. +func (mr *MockConfigInterfaceMockRecorder) GetName() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetName", reflect.TypeOf((*MockConfigInterface)(nil).GetName)) +} + +// GetNext mocks base method. +func (m *MockConfigInterface) GetNext() *v1alpha1.NodeConfig { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetNext") + ret0, _ := ret[0].(*v1alpha1.NodeConfig) + return ret0 +} + +// GetNext indicates an expected call of GetNext. +func (mr *MockConfigInterfaceMockRecorder) GetNext() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetNext", reflect.TypeOf((*MockConfigInterface)(nil).GetNext)) +} + +// Prune mocks base method. +func (m *MockConfigInterface) Prune(arg0 context.Context, arg1 client.Client) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Prune", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// Prune indicates an expected call of Prune. +func (mr *MockConfigInterfaceMockRecorder) Prune(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Prune", reflect.TypeOf((*MockConfigInterface)(nil).Prune), arg0, arg1) +} + +// SetActive mocks base method. +func (m *MockConfigInterface) SetActive(arg0 bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetActive", arg0) +} + +// SetActive indicates an expected call of SetActive. +func (mr *MockConfigInterfaceMockRecorder) SetActive(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetActive", reflect.TypeOf((*MockConfigInterface)(nil).SetActive), arg0) +} + +// SetBackupAsNext mocks base method. +func (m *MockConfigInterface) SetBackupAsNext() bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetBackupAsNext") + ret0, _ := ret[0].(bool) + return ret0 +} + +// SetBackupAsNext indicates an expected call of SetBackupAsNext. +func (mr *MockConfigInterfaceMockRecorder) SetBackupAsNext() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetBackupAsNext", reflect.TypeOf((*MockConfigInterface)(nil).SetBackupAsNext)) +} + +// SetCancelFunc mocks base method. +func (m *MockConfigInterface) SetCancelFunc(arg0 *context.CancelFunc) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetCancelFunc", arg0) +} + +// SetCancelFunc indicates an expected call of SetCancelFunc. +func (mr *MockConfigInterfaceMockRecorder) SetCancelFunc(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetCancelFunc", reflect.TypeOf((*MockConfigInterface)(nil).SetCancelFunc), arg0) +} + +// SetDeployed mocks base method. +func (m *MockConfigInterface) SetDeployed(arg0 bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetDeployed", arg0) +} + +// SetDeployed indicates an expected call of SetDeployed. +func (mr *MockConfigInterfaceMockRecorder) SetDeployed(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetDeployed", reflect.TypeOf((*MockConfigInterface)(nil).SetDeployed), arg0) +} + +// UpdateNext mocks base method. +func (m *MockConfigInterface) UpdateNext(arg0 *v1alpha1.NodeConfig) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "UpdateNext", arg0) +} + +// UpdateNext indicates an expected call of UpdateNext. +func (mr *MockConfigInterfaceMockRecorder) UpdateNext(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateNext", reflect.TypeOf((*MockConfigInterface)(nil).UpdateNext), arg0) +} diff --git a/pkg/nodeconfig/nodeconfig.go b/pkg/nodeconfig/nodeconfig.go new file mode 100644 index 00000000..085bf4fa --- /dev/null +++ b/pkg/nodeconfig/nodeconfig.go @@ -0,0 +1,458 @@ +package nodeconfig + +import ( + "context" + "errors" + "fmt" + "sync" + "sync/atomic" + "time" + + "github.com/go-logr/logr" + "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +//go:generate mockgen -destination ./mock/mock_nodeconfig.go . ConfigInterface +type ConfigInterface interface { + CrateInvalid(ctx context.Context, c client.Client) error + CreateBackup(ctx context.Context, c client.Client) error + Deploy(ctx context.Context, c client.Client, logger logr.Logger, invalidationTimeout time.Duration) error + GetActive() bool + GetCancelFunc() *context.CancelFunc + GetCurrentConfigStatus() string + GetDeployed() bool + GetInvalid() *v1alpha1.NodeConfig + GetName() string + GetNext() *v1alpha1.NodeConfig + Prune(ctx context.Context, c client.Client) error + SetActive(value bool) + SetBackupAsNext() bool + SetCancelFunc(f *context.CancelFunc) + SetDeployed(value bool) + UpdateNext(next *v1alpha1.NodeConfig) +} + +const ( + StatusProvisioning = "provisioning" + StatusInvalid = "invalid" + StatusProvisioned = "provisioned" + statusEmpty = "" + + DefaultNodeUpdateLimit = 1 + defaultCooldownTime = 100 * time.Millisecond + + InvalidSuffix = "-invalid" + BackupSuffix = "-backup" + + ParentCtx contextKey = "parentCtx" +) + +type Config struct { + name string + current *v1alpha1.NodeConfig + next *v1alpha1.NodeConfig + backup *v1alpha1.NodeConfig + invalid *v1alpha1.NodeConfig + mtx sync.RWMutex + active atomic.Bool + deployed atomic.Bool + cancelFunc atomic.Pointer[context.CancelFunc] +} + +type contextKey string + +func New(name string, current, backup, invalid *v1alpha1.NodeConfig) *Config { + nc := NewEmpty(name) + nc.current = current + nc.backup = backup + nc.invalid = invalid + return nc +} + +func NewEmpty(name string) *Config { + nc := &Config{ + name: name, + current: v1alpha1.NewEmptyConfig(name), + } + nc.active.Store(true) + return nc +} + +func (nc *Config) SetCancelFunc(f *context.CancelFunc) { + nc.cancelFunc.Store(f) +} + +func (nc *Config) GetCancelFunc() *context.CancelFunc { + return nc.cancelFunc.Load() +} + +func (nc *Config) GetName() string { + nc.mtx.RLock() + defer nc.mtx.RUnlock() + return nc.name +} + +func (nc *Config) SetActive(value bool) { + nc.active.Store(value) +} + +func (nc *Config) GetActive() bool { + return nc.active.Load() +} + +func (nc *Config) SetDeployed(value bool) { + nc.mtx.Lock() + defer nc.mtx.Unlock() + nc.deployed.Store(value) +} + +func (nc *Config) GetDeployed() bool { + nc.mtx.RLock() + defer nc.mtx.RUnlock() + return nc.deployed.Load() +} + +func (nc *Config) GetNext() *v1alpha1.NodeConfig { + nc.mtx.RLock() + defer nc.mtx.RUnlock() + return nc.next +} + +func (nc *Config) GetInvalid() *v1alpha1.NodeConfig { + nc.mtx.RLock() + defer nc.mtx.RUnlock() + return nc.invalid +} + +func (nc *Config) GetCurrentConfigStatus() string { + nc.mtx.RLock() + defer nc.mtx.RUnlock() + return nc.current.Status.ConfigStatus +} + +func (nc *Config) UpdateNext(next *v1alpha1.NodeConfig) { + nc.mtx.Lock() + defer nc.mtx.Unlock() + if nc.next == nil { + nc.next = v1alpha1.NewEmptyConfig(nc.name) + } + v1alpha1.CopyNodeConfig(next, nc.next, nc.name) +} + +func (nc *Config) Deploy(ctx context.Context, c client.Client, logger logr.Logger, invalidationTimeout time.Duration) error { + skip, err := nc.createAPIObjects(ctx, c, logger) + if err != nil { + return fmt.Errorf("error creating API objects: %w", err) + } + + // either node was deleted or new config equals current config - skip + if skip { + return nil + } + + if err := nc.waitForConfig(ctx, c, nc.current, statusEmpty, false, logger, false, invalidationTimeout); err != nil { + return fmt.Errorf("error waiting for config %s with status %s: %w", nc.name, statusEmpty, err) + } + + if err := nc.updateStatus(ctx, c, nc.current, StatusProvisioning); err != nil { + return fmt.Errorf("error updating status of config %s to %s: %w", nc.name, StatusProvisioning, err) + } + + if err := nc.waitForConfig(ctx, c, nc.current, StatusProvisioning, false, logger, false, invalidationTimeout); err != nil { + return fmt.Errorf("error waiting for config %s with status %s: %w", nc.name, StatusProvisioning, err) + } + + if err := nc.waitForConfig(ctx, c, nc.current, StatusProvisioned, true, logger, true, invalidationTimeout); err != nil { + return fmt.Errorf("error waiting for config %s with status %s: %w", nc.name, StatusProvisioned, err) + } + + return nil +} + +func (nc *Config) createAPIObjects(ctx context.Context, c client.Client, logger logr.Logger) (bool, error) { + nc.mtx.Lock() + defer nc.mtx.Unlock() + + if nc.next == nil { + nc.next = v1alpha1.NewEmptyConfig(nc.name) + } + + if !nc.active.Load() { + return true, nil + } + + if nc.current == nil { + nc.current = v1alpha1.NewEmptyConfig(nc.name) + } + + skip, err := createOrUpdate(ctx, c, nc.current, nc.next, logger) + if err != nil { + return false, fmt.Errorf("error configuring node config object: %w", err) + } + + if skip { + return true, nil + } + + nc.deployed.Store(true) + + if err := nc.CreateBackup(ctx, c); err != nil { + return false, fmt.Errorf("error creating backup config: %w", err) + } + + return false, nil +} + +func createOrUpdate(ctx context.Context, c client.Client, current, next *v1alpha1.NodeConfig, logger logr.Logger) (bool, error) { + if err := c.Get(ctx, client.ObjectKeyFromObject(current), current); err != nil && apierrors.IsNotFound(err) { + v1alpha1.CopyNodeConfig(next, current, current.Name) + // config does not exist - create + if err := c.Create(ctx, current); err != nil { + return false, fmt.Errorf("error creating NodeConfig object: %w", err) + } + } else if err != nil { + return false, fmt.Errorf("error getting current config: %w", err) + } else { + // config already exists - update + // check if new config is equal to existing config + // if so, skip the update as nothing has to be updated + if next.IsEqual(current) { + logger.Info("new config is equal to current config, skipping...", "config", current.Name) + return true, nil + } + v1alpha1.CopyNodeConfig(next, current, current.Name) + if err := updateConfig(ctx, c, current); err != nil { + return false, fmt.Errorf("error updating NodeConfig object: %w", err) + } + } + return false, nil +} + +func (nc *Config) SetBackupAsNext() bool { + nc.mtx.Lock() + defer nc.mtx.Unlock() + if nc.backup != nil { + if nc.next == nil { + nc.next = v1alpha1.NewEmptyConfig(nc.current.Name) + } + v1alpha1.CopyNodeConfig(nc.backup, nc.next, nc.current.Name) + return true + } + return false +} + +func (nc *Config) CreateBackup(ctx context.Context, c client.Client) error { + backupName := nc.name + BackupSuffix + createNew := false + if nc.backup == nil { + nc.backup = v1alpha1.NewEmptyConfig(backupName) + } + if err := c.Get(ctx, types.NamespacedName{Name: backupName}, nc.backup); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("error getting backup config %s: %w", backupName, err) + } + createNew = true + } + + if nc.current != nil { + v1alpha1.CopyNodeConfig(nc.current, nc.backup, backupName) + } else { + v1alpha1.CopyNodeConfig(v1alpha1.NewEmptyConfig(backupName), nc.backup, backupName) + } + + if createNew { + if err := c.Create(ctx, nc.backup); err != nil { + return fmt.Errorf("error creating backup config: %w", err) + } + } else { + if err := c.Update(ctx, nc.backup); err != nil { + return fmt.Errorf("error updating backup config: %w", err) + } + } + + return nil +} + +func (nc *Config) CrateInvalid(ctx context.Context, c client.Client) error { + nc.mtx.Lock() + defer nc.mtx.Unlock() + invalidName := fmt.Sprintf("%s%s", nc.name, InvalidSuffix) + + if nc.invalid == nil { + nc.invalid = v1alpha1.NewEmptyConfig(invalidName) + } + + if err := c.Get(ctx, types.NamespacedName{Name: invalidName}, nc.invalid); err != nil { + if apierrors.IsNotFound(err) { + // invalid config for the node does not exist - create new + v1alpha1.CopyNodeConfig(nc.current, nc.invalid, invalidName) + if err = c.Create(ctx, nc.invalid); err != nil { + return fmt.Errorf("cannot store invalid config for node %s: %w", nc.name, err) + } + return nil + } + // other kind of error occurred - abort + return fmt.Errorf("error getting invalid config for node %s: %w", nc.name, err) + } + + // invalid config for the node exist - update + v1alpha1.CopyNodeConfig(nc.current, nc.invalid, invalidName) + if err := updateConfig(ctx, c, nc.invalid); err != nil { + return fmt.Errorf("error updating invalid config for node %s: %w", nc.name, err) + } + + return nil +} + +func (nc *Config) Prune(ctx context.Context, c client.Client) error { + nc.mtx.Lock() + defer nc.mtx.Unlock() + + if nc.current != nil { + if err := c.Delete(ctx, nc.current); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("error deleting current config: %w", err) + } + } + if nc.backup != nil { + if err := c.Delete(ctx, nc.backup); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("error deleting backup config: %w", err) + } + } + if nc.invalid != nil { + if err := c.Delete(ctx, nc.invalid); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("error deleting invalid config: %w", err) + } + } + return nil +} + +func (nc *Config) waitForConfig(ctx context.Context, c client.Client, config *v1alpha1.NodeConfig, + expectedStatus string, failIfInvalid bool, logger logr.Logger, invalidate bool, invalidationTimeout time.Duration) error { + for { + select { + case <-ctx.Done(): + return nc.handleContextDone(ctx, c, config, logger, invalidate, invalidationTimeout) + default: + if err := nc.apiUpdate(ctx, c, config); err != nil { + return fmt.Errorf("error updating API boject: %w", err) + } + // return no error if accepting any status (""), expected status, or if node was deleted + if expectedStatus == "" || config.Status.ConfigStatus == expectedStatus || !nc.active.Load() { + return nil + } + + // return error if status is invalid + if failIfInvalid && config.Status.ConfigStatus == StatusInvalid { + return fmt.Errorf("error creating NodeConfig - node %s reported state as %s", config.Name, config.Status.ConfigStatus) + } + time.Sleep(defaultCooldownTime) + } + } +} + +func (nc *Config) handleContextDone(ctx context.Context, c client.Client, config *v1alpha1.NodeConfig, + logger logr.Logger, invalidate bool, invalidationTimeout time.Duration) error { + // contex cancelled means that node was removed + // don't report error here + if errors.Is(ctx.Err(), context.Canceled) { + return nil + } + + if errors.Is(ctx.Err(), context.DeadlineExceeded) && invalidate { + if err := nc.handleContextDeadline(ctx, c, invalidationTimeout, config, logger); err != nil { + return fmt.Errorf("error while handling config invalidation: %w", err) + } + return fmt.Errorf("context timeout: %w", ctx.Err()) + } + // return error if there was different error than cancel + return fmt.Errorf("context error: %w", ctx.Err()) +} + +func (nc *Config) apiUpdate(ctx context.Context, c client.Client, config *v1alpha1.NodeConfig) error { + nc.mtx.Lock() + defer nc.mtx.Unlock() + if err := c.Get(ctx, types.NamespacedName{Name: config.Name, Namespace: config.Namespace}, config); err != nil { + if apierrors.IsNotFound(err) { + // discard eror - node was deleted + return nil + } + return fmt.Errorf("error getting config %s from APi server: %w", config.Name, err) + } + return nil +} + +// old context exceeded deadline so new config is created from the parent +// nolint: contextcheck +func (nc *Config) handleContextDeadline(ctx context.Context, c client.Client, invalidationTimeout time.Duration, config *v1alpha1.NodeConfig, logger logr.Logger) error { + pCtx, ok := ctx.Value(ParentCtx).(context.Context) + if !ok { + return fmt.Errorf("error getting parent context") + } + statusCtx, statusCancel := context.WithTimeout(pCtx, invalidationTimeout) + defer statusCancel() + + if err := nc.updateStatus(statusCtx, c, config, StatusInvalid); err != nil { + return fmt.Errorf("error setting config %s status %s: %w", config.GetName(), StatusInvalid, err) + } + + if err := nc.waitForConfig(statusCtx, c, config, StatusInvalid, false, logger, false, invalidationTimeout); err != nil { + return fmt.Errorf("error waiting for config %s status %s: %w", config.GetName(), StatusInvalid, err) + } + return nil +} + +func (nc *Config) updateStatus(ctx context.Context, c client.Client, config *v1alpha1.NodeConfig, status string) error { + for { + select { + case <-ctx.Done(): + return fmt.Errorf("status update error: %w", ctx.Err()) + default: + nc.mtx.Lock() + config.Status.ConfigStatus = status + err := c.Status().Update(ctx, config) + nc.mtx.Unlock() + if err != nil { + if apierrors.IsConflict(err) { + // if there is a conflict, update local copy of the config + nc.mtx.Lock() + if getErr := c.Get(ctx, client.ObjectKeyFromObject(config), config); getErr != nil { + nc.mtx.Unlock() + return fmt.Errorf("error updating status: %w", getErr) + } + nc.mtx.Unlock() + time.Sleep(defaultCooldownTime) + continue + } + return fmt.Errorf("status update error: %w", err) + } else { + return nil + } + } + } +} + +func updateConfig(ctx context.Context, c client.Client, config *v1alpha1.NodeConfig) error { + for { + select { + case <-ctx.Done(): + return fmt.Errorf("config update error (context): %w", ctx.Err()) + default: + if err := c.Update(ctx, config); err != nil { + if apierrors.IsConflict(err) { + // if there is a conflict, update local copy of the config + if err := c.Get(ctx, client.ObjectKeyFromObject(config), config); err != nil { + return fmt.Errorf("config update error (conflict): %w", err) + } + time.Sleep(defaultCooldownTime) + continue + } + return fmt.Errorf("config update error (error): %w", err) + } else { + return nil + } + } + } +} diff --git a/pkg/nodeconfig/nodeconfig_test.go b/pkg/nodeconfig/nodeconfig_test.go new file mode 100644 index 00000000..f5c3803e --- /dev/null +++ b/pkg/nodeconfig/nodeconfig_test.go @@ -0,0 +1,268 @@ +package nodeconfig + +import ( + "context" + "encoding/json" + "testing" + "time" + + "github.com/go-logr/logr" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var ( + testConfigName = "testConfig" + + fakeProcessState = ` + { + "apiVersion": "v1", + "items": [ + { + "apiVersion": "network.schiff.telekom.de/v1alpha1", + "kind": "NodeConfigProcess", + "metadata": { + "creationTimestamp": "2024-04-15T11:19:06Z", + "generation": 12, + "name": "network-operator", + "resourceVersion": "223252", + "uid": "4ad359bb-bb7d-4a1d-bf43-551c04b592d5" + }, + "spec": { + "state": "provisioning" + } + } + ], + "kind": "List", + "metadata": { + "resourceVersion": "" + } + }` + + emptyNodeConfig = ` + { + "apiVersion": "v1", + "items": [ + { + "apiVersion": "network.schiff.telekom.de/v1alpha1", + "kind": "NodeConfig", + "metadata": { + "creationTimestamp": "2024-04-15T11:22:08Z", + "generation": 2, + "name": "testConfig", + "resourceVersion": "222987", + "uid": "fc0376a2-7f6a-4388-8166-298b21cf2f89" + }, + "spec": { + "layer2": [], + "routingTable": [], + "vrf": [] + }, + "status": { + "configStatus": "provisioned" + } + }, + { + "apiVersion": "network.schiff.telekom.de/v1alpha1", + "kind": "NodeConfig", + "metadata": { + "creationTimestamp": "2024-04-15T11:22:08Z", + "generation": 3, + "name": "testConfig-backup", + "resourceVersion": "223106", + "uid": "5b0ed728-47ed-46cb-a678-8e32dda826ee" + }, + "spec": { + "layer2": [], + "routingTable": [], + "vrf": [] + } + } + ], + "kind": "List", + "metadata": { + "resourceVersion": "" + } + }` + + fakeProcess *v1alpha1.NodeConfigProcessList + fakeNodeConfig *v1alpha1.NodeConfigList +) + +var _ = BeforeSuite(func() { + fakeProcess = &v1alpha1.NodeConfigProcessList{} + err := json.Unmarshal([]byte(fakeProcessState), fakeProcess) + Expect(err).ShouldNot(HaveOccurred()) + fakeNodeConfig = &v1alpha1.NodeConfigList{} + err = json.Unmarshal([]byte(emptyNodeConfig), fakeNodeConfig) + Expect(err).ShouldNot(HaveOccurred()) +}) + +func TestConfigMap(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, + "NodeConfig Suite") +} + +var _ = Describe("NodeConfig", func() { + Context("New() should", func() { + It("create new NodeConfig with given data", func() { + current := &v1alpha1.NodeConfig{Status: v1alpha1.NodeConfigStatus{ConfigStatus: StatusProvisioned}} + backup := &v1alpha1.NodeConfig{} + invalid := &v1alpha1.NodeConfig{} + config := New(testConfigName, current, backup, invalid) + Expect(config).ToNot(BeNil()) + Expect(config.current).To(Equal(current)) + Expect(config.backup).To(Equal(backup)) + Expect(config.GetInvalid()).To(Equal(invalid)) + Expect(config.GetName()).To(Equal(testConfigName)) + Expect(config.GetCurrentConfigStatus()).To(Equal(StatusProvisioned)) + }) + }) + Context("SetCancel()/GetCancelFunc() should", func() { + It("set and return cancel function", func() { + config := NewEmpty(testConfigName) + _, cancel := context.WithCancel(context.Background()) + config.SetCancelFunc(&cancel) + setCancel := config.GetCancelFunc() + Expect(setCancel).To(Equal(&cancel)) + }) + }) + Context("SetActive()/GetActive() should", func() { + It("set and return active state", func() { + config := NewEmpty(testConfigName) + config.SetActive(true) + Expect(config.GetActive()).To(BeTrue()) + config.SetActive(false) + Expect(config.GetActive()).To(BeFalse()) + }) + }) + Context("SetDeployed()/GetDeployed() should", func() { + It("set and return deployed state", func() { + config := NewEmpty(testConfigName) + config.SetDeployed(true) + Expect(config.GetDeployed()).To(BeTrue()) + config.SetDeployed(false) + Expect(config.GetDeployed()).To(BeFalse()) + }) + }) + Context("GetNext() should", func() { + It("return next config to be deployed", func() { + config := NewEmpty(testConfigName) + cfg := v1alpha1.NewEmptyConfig(testConfigName) + config.UpdateNext(cfg) + Expect(config.GetNext().IsEqual(cfg)).To(BeTrue()) + }) + }) + Context("SetBackupAsNext() should", func() { + It("copy values of backup config to next config", func() { + config := NewEmpty(testConfigName) + config.backup = v1alpha1.NewEmptyConfig(testConfigName + BackupSuffix) + config.backup.Spec.RoutingTable = append(config.backup.Spec.RoutingTable, v1alpha1.RoutingTableSpec{TableID: 1}) + wasSet := config.SetBackupAsNext() + Expect(wasSet).To(BeTrue()) + Expect(config.GetNext().IsEqual(config.backup)).To(BeTrue()) + }) + }) + Context("Deploy() should", func() { + It("skip the deployment if it is the same as the existing one", func() { + config := NewEmpty(testConfigName) + ctx := context.TODO() + c := createClient(fakeNodeConfig) + err := config.Deploy(ctx, c, logr.New(nil), time.Millisecond*200) + Expect(err).ToNot(HaveOccurred()) + }) + It("return error if context deadline was exceeded when deploying config", func() { + config := NewEmpty(testConfigName) + parent := context.Background() + ctx, cancel := context.WithTimeout(parent, time.Millisecond*200) + defer cancel() + childCtx := context.WithValue(ctx, ParentCtx, parent) + + fakeNodeConfig.Items[0].Spec.RoutingTable = []v1alpha1.RoutingTableSpec{{TableID: 1}} + c := createClient(fakeNodeConfig) + + err := config.Deploy(childCtx, c, logr.New(nil), time.Millisecond*200) + Expect(err).To(HaveOccurred()) + }) + It("return error if context deadline was exceeded when invalidating config", func() { + config := NewEmpty(testConfigName) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*200) + defer cancel() + childCtx := context.WithValue(ctx, ParentCtx, ctx) + fakeNodeConfig.Items[0].Spec.RoutingTable = []v1alpha1.RoutingTableSpec{{TableID: 1}} + c := createClient(fakeNodeConfig) + + err := config.Deploy(childCtx, c, logr.New(nil), time.Millisecond*200) + Expect(err).To(HaveOccurred()) + }) + It("return no error if deployment was successful", func() { + config := NewEmpty(testConfigName) + config.active.Store(true) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*300) + defer cancel() + childCtx := context.WithValue(ctx, ParentCtx, ctx) + fakeNodeConfig.Items[0].Spec.RoutingTable = []v1alpha1.RoutingTableSpec{{TableID: 1}} + c := createClient(fakeNodeConfig) + + quit := make(chan bool) + var deployErr error + go func() { + deployErr = config.Deploy(childCtx, c, logr.New(nil), time.Millisecond*300) + quit <- true + }() + + time.Sleep(time.Millisecond * 100) + err := config.updateStatus(ctx, c, config.current, StatusProvisioned) + Expect(err).ToNot(HaveOccurred()) + + <-quit + Expect(deployErr).ToNot(HaveOccurred()) + }) + }) + Context("CreateBackup() should", func() { + It("return no error if backup config was created", func() { + config := NewEmpty(testConfigName) + c := createClient(fakeNodeConfig) + err := config.CreateBackup(context.Background(), c) + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("CreateInvalid() should", func() { + It("return no error if invalid config was created", func() { + config := NewEmpty(testConfigName) + c := createClient(fakeNodeConfig) + err := config.CrateInvalid(context.Background(), c) + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("Prune() should", func() { + It("return no error if all configs were deleted", func() { + config := New(testConfigName, + v1alpha1.NewEmptyConfig(testConfigName), + v1alpha1.NewEmptyConfig(testConfigName+BackupSuffix), + v1alpha1.NewEmptyConfig(testConfigName+InvalidSuffix), + ) + c := createClient(fakeNodeConfig) + err := config.Prune(context.Background(), c) + Expect(err).ToNot(HaveOccurred()) + }) + }) +}) + +func createClient(nodeConfigs *v1alpha1.NodeConfigList) client.Client { + s := runtime.NewScheme() + err := corev1.AddToScheme(s) + Expect(err).ToNot(HaveOccurred()) + err = v1alpha1.AddToScheme(s) + Expect(err).ToNot(HaveOccurred()) + return fake.NewClientBuilder().WithScheme(s). + WithRuntimeObjects(nodeConfigs, fakeProcess). + WithStatusSubresource(&fakeNodeConfig.Items[0]). + Build() +} diff --git a/pkg/reconciler/config_reconciler.go b/pkg/reconciler/config_reconciler.go new file mode 100644 index 00000000..a954f941 --- /dev/null +++ b/pkg/reconciler/config_reconciler.go @@ -0,0 +1,204 @@ +package reconciler + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/go-logr/logr" + "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + "github.com/telekom/das-schiff-network-operator/pkg/debounce" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +const ( + DefaultTimeout = "60s" + DefaultNodeUpdateLimit = 1 +) + +//go:generate mockgen -destination ./mock/mock_config_reconciler.go . ConfigReconcilerInterface +type ConfigReconcilerInterface interface { + CreateConfigForNode(string, *corev1.Node) (*v1alpha1.NodeConfig, error) +} + +// ConfigReconciler is responsible for creating NodeConfig objects. +type ConfigReconciler struct { + globalCfg *v1alpha1.NodeConfig + logger logr.Logger + debouncer *debounce.Debouncer + client client.Client + timeout time.Duration + + configManagerInform chan bool +} + +type reconcileConfig struct { + *ConfigReconciler + logr.Logger +} + +// Reconcile starts reconciliation. +func (cr *ConfigReconciler) Reconcile(ctx context.Context) { + cr.debouncer.Debounce(ctx) +} + +// // NewConfigReconciler creates new reconciler that creates NodeConfig objects. +func NewConfigReconciler(clusterClient client.Client, logger logr.Logger, timeout time.Duration, cmInfo chan bool) (*ConfigReconciler, error) { + reconciler := &ConfigReconciler{ + logger: logger, + timeout: timeout, + client: clusterClient, + configManagerInform: cmInfo, + } + + reconciler.debouncer = debounce.NewDebouncer(reconciler.reconcileDebounced, defaultDebounceTime, logger) + + return reconciler, nil +} + +func (cr *ConfigReconciler) reconcileDebounced(ctx context.Context) error { + r := &reconcileConfig{ + ConfigReconciler: cr, + Logger: cr.logger, + } + + cr.logger.Info("fetching config data...") + + timeoutCtx, cancel := context.WithTimeout(ctx, cr.timeout) + defer cancel() + + // get all configuration objects + var err error + cr.globalCfg, err = r.fetchConfigData(timeoutCtx) + if err != nil { + return fmt.Errorf("error fetching configuration details: %w", err) + } + + // inform config manager that it should update + cr.configManagerInform <- true + + cr.logger.Info("global config updated", "config", *cr.globalCfg) + return nil +} + +func (r *reconcileConfig) fetchConfigData(ctx context.Context) (*v1alpha1.NodeConfig, error) { + // get VRFRouteConfiguration objects + l3vnis, err := r.fetchLayer3(ctx) + if err != nil { + return nil, err + } + + // get Layer2networkConfigurationObjects objects + l2vnis, err := r.fetchLayer2(ctx) + if err != nil { + return nil, err + } + + // get RoutingTable objects + taas, err := r.fetchTaas(ctx) + if err != nil { + return nil, err + } + + config := &v1alpha1.NodeConfig{} + + // discard metadata from previously fetched objects + config.Spec.Layer2 = []v1alpha1.Layer2NetworkConfigurationSpec{} + for i := range l2vnis { + config.Spec.Layer2 = append(config.Spec.Layer2, l2vnis[i].Spec) + } + + config.Spec.Vrf = []v1alpha1.VRFRouteConfigurationSpec{} + for i := range l3vnis { + config.Spec.Vrf = append(config.Spec.Vrf, l3vnis[i].Spec) + } + + config.Spec.RoutingTable = []v1alpha1.RoutingTableSpec{} + for i := range taas { + config.Spec.RoutingTable = append(config.Spec.RoutingTable, taas[i].Spec) + } + + return config, nil +} + +func (cr *ConfigReconciler) CreateConfigForNode(name string, node *corev1.Node) (*v1alpha1.NodeConfig, error) { + // create new config + c := &v1alpha1.NodeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + + if cr.globalCfg == nil { + cr.globalCfg = v1alpha1.NewEmptyConfig(name) + } + + v1alpha1.CopyNodeConfig(cr.globalCfg, c, name) + + err := controllerutil.SetOwnerReference(node, c, scheme.Scheme) + if err != nil { + return nil, fmt.Errorf("error setting owner references: %w", err) + } + + // prepare Layer2NetworkConfigurationSpec (l2Spec) for each node. + // Each Layer2NetworkConfigurationSpec from l2Spec has node selector, + // which should be used to add config to proper nodes. + // Each Layer2NetworkConfigurationSpec that don't match the node selector + // is removed. + for i := 0; i < len(c.Spec.Layer2); i++ { + if c.Spec.Layer2[i].NodeSelector == nil { + // node selector is not defined for the spec. + // Layer2 is global - just continue + continue + } + + // node selector of type v1.labelSelector has to be converted + // to labels.Selector type to be used with controller-runtime client + selector, err := convertSelector(c.Spec.Layer2[i].NodeSelector.MatchLabels, c.Spec.Layer2[i].NodeSelector.MatchExpressions) + if err != nil { + return nil, fmt.Errorf("error converting selector: %w", err) + } + + // remove currently processed Layer2NetworkConfigurationSpec if node does not match the selector + if !selector.Matches(labels.Set(node.ObjectMeta.Labels)) { + // TODO: is it worth to preserve order? + c.Spec.Layer2 = append(c.Spec.Layer2[:i], c.Spec.Layer2[i+1:]...) + i-- + } + } + + // set config as next config for the node + return c, nil +} + +func convertSelector(matchLabels map[string]string, matchExpressions []metav1.LabelSelectorRequirement) (labels.Selector, error) { + selector := labels.NewSelector() + var reqs labels.Requirements + + for key, value := range matchLabels { + requirement, err := labels.NewRequirement(key, selection.Equals, []string{value}) + if err != nil { + return nil, fmt.Errorf("error creating MatchLabel requirement: %w", err) + } + reqs = append(reqs, *requirement) + } + + for _, req := range matchExpressions { + lowercaseOperator := selection.Operator(strings.ToLower(string(req.Operator))) + requirement, err := labels.NewRequirement(req.Key, lowercaseOperator, req.Values) + if err != nil { + return nil, fmt.Errorf("error creating MatchExpression requirement: %w", err) + } + reqs = append(reqs, *requirement) + } + selector = selector.Add(reqs...) + + return selector, nil +} diff --git a/pkg/reconciler/layer2.go b/pkg/reconciler/layer2.go index 941b28f6..be767bd2 100644 --- a/pkg/reconciler/layer2.go +++ b/pkg/reconciler/layer2.go @@ -4,19 +4,12 @@ import ( "context" "fmt" "net" - "os" - "strings" networkv1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" - "github.com/telekom/das-schiff-network-operator/pkg/healthcheck" "github.com/telekom/das-schiff-network-operator/pkg/nl" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/selection" - "k8s.io/apimachinery/pkg/types" ) -func (r *reconcile) fetchLayer2(ctx context.Context) ([]networkv1alpha1.Layer2NetworkConfiguration, error) { +func (r *reconcileConfig) fetchLayer2(ctx context.Context) ([]networkv1alpha1.Layer2NetworkConfiguration, error) { layer2List := &networkv1alpha1.Layer2NetworkConfigurationList{} err := r.client.List(ctx, layer2List) if err != nil { @@ -24,59 +17,17 @@ func (r *reconcile) fetchLayer2(ctx context.Context) ([]networkv1alpha1.Layer2Ne return nil, fmt.Errorf("error getting list of Layer2s from Kubernetes: %w", err) } - nodeName := os.Getenv(healthcheck.NodenameEnv) - node := &corev1.Node{} - err = r.client.Get(ctx, types.NamespacedName{Name: nodeName}, node) - if err != nil { - r.Logger.Error(err, "error getting local node name") - return nil, fmt.Errorf("error getting local node name: %w", err) - } - l2vnis := []networkv1alpha1.Layer2NetworkConfiguration{} - for i := range layer2List.Items { - item := &layer2List.Items[i] - logger := r.Logger.WithValues("name", item.ObjectMeta.Name, "namespace", item.ObjectMeta.Namespace, "vlan", item.Spec.ID, "vni", item.Spec.VNI) - if item.Spec.NodeSelector != nil { - selector := labels.NewSelector() - var reqs labels.Requirements - - for key, value := range item.Spec.NodeSelector.MatchLabels { - requirement, err := labels.NewRequirement(key, selection.Equals, []string{value}) - if err != nil { - logger.Error(err, "error creating MatchLabel requirement") - return nil, fmt.Errorf("error creating MatchLabel requirement: %w", err) - } - reqs = append(reqs, *requirement) - } - - for _, req := range item.Spec.NodeSelector.MatchExpressions { - lowercaseOperator := selection.Operator(strings.ToLower(string(req.Operator))) - requirement, err := labels.NewRequirement(req.Key, lowercaseOperator, req.Values) - if err != nil { - logger.Error(err, "error creating MatchExpression requirement") - return nil, fmt.Errorf("error creating MatchExpression requirement: %w", err) - } - reqs = append(reqs, *requirement) - } - selector = selector.Add(reqs...) - - if !selector.Matches(labels.Set(node.ObjectMeta.Labels)) { - logger.Info("local node does not match nodeSelector of layer2", "node", nodeName) - continue - } - } - - l2vnis = append(l2vnis, *item) - } + l2vnis = append(l2vnis, layer2List.Items...) - if err := r.checkL2Duplicates(l2vnis); err != nil { + if err := checkL2Duplicates(l2vnis); err != nil { return nil, err } return l2vnis, nil } -func (r *reconcile) reconcileLayer2(l2vnis []networkv1alpha1.Layer2NetworkConfiguration) error { +func (r *reconcile) reconcileLayer2(l2vnis []networkv1alpha1.Layer2NetworkConfigurationSpec) error { desired, err := r.getDesired(l2vnis) if err != nil { return err @@ -145,7 +96,7 @@ func (r *reconcile) createL2(info *nl.Layer2Information, anycastTrackerInterface return nil } -func (r *reconcile) getDesired(l2vnis []networkv1alpha1.Layer2NetworkConfiguration) ([]nl.Layer2Information, error) { +func (r *reconcile) getDesired(l2vnis []networkv1alpha1.Layer2NetworkConfigurationSpec) ([]nl.Layer2Information, error) { availableVrfs, err := r.netlinkManager.ListL3() if err != nil { return nil, fmt.Errorf("error loading available VRFs: %w", err) @@ -153,7 +104,7 @@ func (r *reconcile) getDesired(l2vnis []networkv1alpha1.Layer2NetworkConfigurati desired := []nl.Layer2Information{} for i := range l2vnis { - spec := l2vnis[i].Spec + spec := l2vnis[i] var anycastMAC *net.HardwareAddr if mac, err := net.ParseMAC(spec.AnycastMac); err == nil { @@ -162,7 +113,7 @@ func (r *reconcile) getDesired(l2vnis []networkv1alpha1.Layer2NetworkConfigurati anycastGateways, err := r.netlinkManager.ParseIPAddresses(spec.AnycastGateways) if err != nil { - r.Logger.Error(err, "error parsing anycast gateways", "layer", l2vnis[i].ObjectMeta.Name, "gw", spec.AnycastGateways) + r.Logger.Error(err, "error parsing anycast gateways", "gw", spec.AnycastGateways) return nil, fmt.Errorf("error parsing anycast gateways: %w", err) } @@ -175,7 +126,7 @@ func (r *reconcile) getDesired(l2vnis []networkv1alpha1.Layer2NetworkConfigurati } } if !vrfAvailable { - r.Logger.Error(err, "VRF of Layer2 not found on node", "layer", l2vnis[i].ObjectMeta.Name, "vrf", spec.VRF) + r.Logger.Error(err, "VRF of Layer2 not found on node", "vrf", spec.VRF) continue } } @@ -229,7 +180,7 @@ func (r *reconcile) reconcileExistingLayer(desired, currentConfig *nl.Layer2Info return nil } -func (*reconcile) checkL2Duplicates(configs []networkv1alpha1.Layer2NetworkConfiguration) error { +func checkL2Duplicates(configs []networkv1alpha1.Layer2NetworkConfiguration) error { for i := range configs { for j := i + 1; j < len(configs); j++ { if configs[i].Spec.ID == configs[j].Spec.ID { diff --git a/pkg/reconciler/layer3.go b/pkg/reconciler/layer3.go index 72508743..09651c0e 100644 --- a/pkg/reconciler/layer3.go +++ b/pkg/reconciler/layer3.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net" + "os" "sort" "strconv" "time" @@ -11,35 +12,24 @@ import ( networkv1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" "github.com/telekom/das-schiff-network-operator/pkg/config" "github.com/telekom/das-schiff-network-operator/pkg/frr" + "github.com/telekom/das-schiff-network-operator/pkg/healthcheck" "github.com/telekom/das-schiff-network-operator/pkg/nl" + "k8s.io/apimachinery/pkg/types" ) const defaultSleep = 2 * time.Second -func (r *reconcile) fetchLayer3(ctx context.Context) ([]networkv1alpha1.VRFRouteConfiguration, error) { - vrfs := &networkv1alpha1.VRFRouteConfigurationList{} - err := r.client.List(ctx, vrfs) - if err != nil { - r.Logger.Error(err, "error getting list of VRFs from Kubernetes") - return nil, fmt.Errorf("error getting list of VRFs from Kubernetes: %w", err) - } - - return vrfs.Items, nil -} - -func (r *reconcile) fetchTaas(ctx context.Context) ([]networkv1alpha1.RoutingTable, error) { - tables := &networkv1alpha1.RoutingTableList{} - err := r.client.List(ctx, tables) +func (r *reconcile) fetchNodeConfig(ctx context.Context) (*networkv1alpha1.NodeConfig, error) { + cfg := &networkv1alpha1.NodeConfig{} + err := r.client.Get(ctx, types.NamespacedName{Name: os.Getenv(healthcheck.NodenameEnv)}, cfg) if err != nil { - r.Logger.Error(err, "error getting list of TaaS from Kubernetes") - return nil, fmt.Errorf("error getting list of TaaS from Kubernetes: %w", err) + return nil, fmt.Errorf("error getting NodeConfig: %w", err) } - - return tables.Items, nil + return cfg, nil } // nolint: contextcheck // context is not relevant -func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfiguration, taas []networkv1alpha1.RoutingTable) error { +func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfigurationSpec, taas []networkv1alpha1.RoutingTableSpec) error { vrfConfigMap, err := r.createVrfConfigMap(l3vnis) if err != nil { return err @@ -141,11 +131,11 @@ func (r *reconcile) reloadFRR() error { return nil } -func (r *reconcile) createVrfConfigMap(l3vnis []networkv1alpha1.VRFRouteConfiguration) (map[string]frr.VRFConfiguration, error) { +func (r *reconcile) createVrfConfigMap(l3vnis []networkv1alpha1.VRFRouteConfigurationSpec) (map[string]frr.VRFConfiguration, error) { vrfConfigMap := map[string]frr.VRFConfiguration{} for i := range l3vnis { - spec := l3vnis[i].Spec - logger := r.Logger.WithValues("name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace, "vrf", spec.VRF) + spec := l3vnis[i] + logger := r.Logger.WithValues("vrf", spec.VRF) var vni int var rt string @@ -161,13 +151,13 @@ func (r *reconcile) createVrfConfigMap(l3vnis []networkv1alpha1.VRFRouteConfigur vni = config.SkipVrfTemplateVni } else { err := fmt.Errorf("vrf not in vrf vni map") - r.Logger.Error(err, "VRF does not exist in VRF VNI config, ignoring", "vrf", spec.VRF, "name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace) + r.Logger.Error(err, "VRF does not exist in VRF VNI config, ignoring", "vrf", spec.VRF) continue } if vni == 0 && vni > 16777215 { err := fmt.Errorf("VNI can not be set to 0") - r.Logger.Error(err, "VNI can not be set to 0, ignoring", "vrf", spec.VRF, "name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace) + r.Logger.Error(err, "VNI can not be set to 0, ignoring", "vrf", spec.VRF, "name") continue } @@ -181,11 +171,11 @@ func (r *reconcile) createVrfConfigMap(l3vnis []networkv1alpha1.VRFRouteConfigur return vrfConfigMap, nil } -func createVrfFromTaaS(taas []networkv1alpha1.RoutingTable) map[string]frr.VRFConfiguration { +func createVrfFromTaaS(taas []networkv1alpha1.RoutingTableSpec) map[string]frr.VRFConfiguration { vrfConfigMap := map[string]frr.VRFConfiguration{} for i := range taas { - spec := taas[i].Spec + spec := taas[i] name := fmt.Sprintf("taas.%d", spec.TableID) @@ -418,3 +408,25 @@ func copyPrefixItemToFRRItem(n int, item networkv1alpha1.VrfRouteConfigurationPr LE: item.LE, }, nil } + +func (r *reconcileConfig) fetchLayer3(ctx context.Context) ([]networkv1alpha1.VRFRouteConfiguration, error) { + vrfs := &networkv1alpha1.VRFRouteConfigurationList{} + err := r.client.List(ctx, vrfs) + if err != nil { + r.Logger.Error(err, "error getting list of VRFs from Kubernetes") + return nil, fmt.Errorf("error getting list of VRFs from Kubernetes: %w", err) + } + + return vrfs.Items, nil +} + +func (r *reconcileConfig) fetchTaas(ctx context.Context) ([]networkv1alpha1.RoutingTable, error) { + tables := &networkv1alpha1.RoutingTableList{} + err := r.client.List(ctx, tables) + if err != nil { + r.Logger.Error(err, "error getting list of TaaS from Kubernetes") + return nil, fmt.Errorf("error getting list of TaaS from Kubernetes: %w", err) + } + + return tables.Items, nil +} diff --git a/pkg/reconciler/mock/mock_config_reconciler.go b/pkg/reconciler/mock/mock_config_reconciler.go new file mode 100644 index 00000000..6d7e6920 --- /dev/null +++ b/pkg/reconciler/mock/mock_config_reconciler.go @@ -0,0 +1,51 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/telekom/das-schiff-network-operator/pkg/reconciler (interfaces: ConfigReconcilerInterface) + +// Package mock_reconciler is a generated GoMock package. +package mock_reconciler + +import ( + reflect "reflect" + + v1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + gomock "go.uber.org/mock/gomock" + v1 "k8s.io/api/core/v1" +) + +// MockConfigReconcilerInterface is a mock of ConfigReconcilerInterface interface. +type MockConfigReconcilerInterface struct { + ctrl *gomock.Controller + recorder *MockConfigReconcilerInterfaceMockRecorder +} + +// MockConfigReconcilerInterfaceMockRecorder is the mock recorder for MockConfigReconcilerInterface. +type MockConfigReconcilerInterfaceMockRecorder struct { + mock *MockConfigReconcilerInterface +} + +// NewMockConfigReconcilerInterface creates a new mock instance. +func NewMockConfigReconcilerInterface(ctrl *gomock.Controller) *MockConfigReconcilerInterface { + mock := &MockConfigReconcilerInterface{ctrl: ctrl} + mock.recorder = &MockConfigReconcilerInterfaceMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockConfigReconcilerInterface) EXPECT() *MockConfigReconcilerInterfaceMockRecorder { + return m.recorder +} + +// CreateConfigForNode mocks base method. +func (m *MockConfigReconcilerInterface) CreateConfigForNode(arg0 string, arg1 *v1.Node) (*v1alpha1.NodeConfig, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CreateConfigForNode", arg0, arg1) + ret0, _ := ret[0].(*v1alpha1.NodeConfig) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// CreateConfigForNode indicates an expected call of CreateConfigForNode. +func (mr *MockConfigReconcilerInterfaceMockRecorder) CreateConfigForNode(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateConfigForNode", reflect.TypeOf((*MockConfigReconcilerInterface)(nil).CreateConfigForNode), arg0, arg1) +} diff --git a/pkg/reconciler/mock/mock_node_reconciler.go b/pkg/reconciler/mock/mock_node_reconciler.go new file mode 100644 index 00000000..8b4b7297 --- /dev/null +++ b/pkg/reconciler/mock/mock_node_reconciler.go @@ -0,0 +1,49 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/telekom/das-schiff-network-operator/pkg/reconciler (interfaces: NodeReconcilerInterface) + +// Package mock_reconciler is a generated GoMock package. +package mock_reconciler + +import ( + reflect "reflect" + + gomock "go.uber.org/mock/gomock" + v1 "k8s.io/api/core/v1" +) + +// MockNodeReconcilerInterface is a mock of NodeReconcilerInterface interface. +type MockNodeReconcilerInterface struct { + ctrl *gomock.Controller + recorder *MockNodeReconcilerInterfaceMockRecorder +} + +// MockNodeReconcilerInterfaceMockRecorder is the mock recorder for MockNodeReconcilerInterface. +type MockNodeReconcilerInterfaceMockRecorder struct { + mock *MockNodeReconcilerInterface +} + +// NewMockNodeReconcilerInterface creates a new mock instance. +func NewMockNodeReconcilerInterface(ctrl *gomock.Controller) *MockNodeReconcilerInterface { + mock := &MockNodeReconcilerInterface{ctrl: ctrl} + mock.recorder = &MockNodeReconcilerInterfaceMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockNodeReconcilerInterface) EXPECT() *MockNodeReconcilerInterfaceMockRecorder { + return m.recorder +} + +// GetNodes mocks base method. +func (m *MockNodeReconcilerInterface) GetNodes() map[string]*v1.Node { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetNodes") + ret0, _ := ret[0].(map[string]*v1.Node) + return ret0 +} + +// GetNodes indicates an expected call of GetNodes. +func (mr *MockNodeReconcilerInterfaceMockRecorder) GetNodes() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetNodes", reflect.TypeOf((*MockNodeReconcilerInterface)(nil).GetNodes)) +} diff --git a/pkg/reconciler/node_reconciler.go b/pkg/reconciler/node_reconciler.go new file mode 100644 index 00000000..dc969169 --- /dev/null +++ b/pkg/reconciler/node_reconciler.go @@ -0,0 +1,151 @@ +package reconciler + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/go-logr/logr" + "github.com/telekom/das-schiff-network-operator/pkg/debounce" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + controlPlaneLabel = "node-role.kubernetes.io/control-plane" + nodeDebauncerTime = time.Second * 5 +) + +//go:generate mockgen -destination ./mock/mock_node_reconciler.go . NodeReconcilerInterface +type NodeReconcilerInterface interface { + GetNodes() map[string]*corev1.Node +} + +// NodeReconciler is responsible for watching node objects. +type NodeReconciler struct { + client client.Client + logger logr.Logger + debouncer *debounce.Debouncer + nodes map[string]*corev1.Node + Mutex sync.RWMutex + timeout time.Duration + + NodeReconcilerReady chan bool + configManagerInform chan bool + deleteNodeInform chan []string +} + +// Reconcile starts reconciliation. +func (nr *NodeReconciler) Reconcile(ctx context.Context) { + nr.debouncer.Debounce(ctx) +} + +// NewConfigReconciler creates new reconciler that creates NodeConfig objects. +func NewNodeReconciler(clusterClient client.Client, logger logr.Logger, timeout time.Duration, cmInfo chan bool, nodeDelInfo chan []string) (*NodeReconciler, error) { + reconciler := &NodeReconciler{ + client: clusterClient, + logger: logger, + nodes: make(map[string]*corev1.Node), + timeout: timeout, + configManagerInform: cmInfo, + deleteNodeInform: nodeDelInfo, + } + + reconciler.debouncer = debounce.NewDebouncer(reconciler.reconcileDebounced, nodeDebauncerTime, logger) + + return reconciler, nil +} + +func (nr *NodeReconciler) reconcileDebounced(ctx context.Context) error { + added, deleted, err := nr.update(ctx) + if err != nil { + return fmt.Errorf("error updating node reconciler data: %w", err) + } + + // inform config manager that nodes were deleted + if len(deleted) > 0 { + nr.logger.Info("nodes deleted - inform ConfigManager", "nodes", deleted) + nr.deleteNodeInform <- deleted + } + + // inform config manager that new nodes were added + if len(added) > 0 { + nr.logger.Info("nodes added - inform ConfigManager", "nodes", added) + nr.configManagerInform <- true + } + + return nil +} + +func (nr *NodeReconciler) update(ctx context.Context) (added, deleted []string, err error) { + nr.Mutex.Lock() + defer nr.Mutex.Unlock() + + timeoutCtx, cancel := context.WithTimeout(ctx, nr.timeout) + defer cancel() + + currentNodes, err := ListNodes(timeoutCtx, nr.client) + if err != nil { + return nil, nil, fmt.Errorf("error listing nodes: %w", err) + } + + added, deleted = nr.checkNodeChanges(currentNodes) + // save list of current nodes + nr.nodes = currentNodes + + return added, deleted, nil +} + +func ListNodes(ctx context.Context, c client.Client) (map[string]*corev1.Node, error) { + // list all nodes + list := &corev1.NodeList{} + if err := c.List(ctx, list); err != nil { + return nil, fmt.Errorf("unable to list nodes: %w", err) + } + + // discard control-plane nodes and create map of nodes + nodes := make(map[string]*corev1.Node) + for i := range list.Items { + _, isControlPlane := list.Items[i].Labels[controlPlaneLabel] + if !isControlPlane { + // discard nodes that are not in ready state + for j := range list.Items[i].Status.Conditions { + // TODO: Should taint node.kubernetes.io/not-ready be used instead of Conditions? + if list.Items[i].Status.Conditions[j].Type == corev1.NodeReady && + list.Items[i].Status.Conditions[j].Status == corev1.ConditionTrue { + nodes[list.Items[i].Name] = &list.Items[i] + break + } + } + } + } + + return nodes, nil +} + +func (nr *NodeReconciler) checkNodeChanges(newState map[string]*corev1.Node) (added, deleted []string) { + added = getDifference(newState, nr.nodes) + deleted = getDifference(nr.nodes, newState) + return added, deleted +} + +func getDifference(first, second map[string]*corev1.Node) []string { + diff := []string{} + for name := range first { + if _, exists := second[name]; !exists { + diff = append(diff, name) + } + } + return diff +} + +func (nr *NodeReconciler) GetNodes() map[string]*corev1.Node { + nr.Mutex.RLock() + defer nr.Mutex.RUnlock() + currentNodes := make(map[string]*corev1.Node) + for k, v := range nr.nodes { + currentNodes[k] = v + } + return currentNodes +} diff --git a/pkg/reconciler/reconciler.go b/pkg/reconciler/reconciler.go index 040d90c8..847a6dea 100644 --- a/pkg/reconciler/reconciler.go +++ b/pkg/reconciler/reconciler.go @@ -2,21 +2,32 @@ package reconciler import ( "context" + "encoding/json" + "errors" "fmt" "os" "time" "github.com/go-logr/logr" + "github.com/telekom/das-schiff-network-operator/api/v1alpha1" "github.com/telekom/das-schiff-network-operator/pkg/anycast" "github.com/telekom/das-schiff-network-operator/pkg/config" "github.com/telekom/das-schiff-network-operator/pkg/debounce" "github.com/telekom/das-schiff-network-operator/pkg/frr" "github.com/telekom/das-schiff-network-operator/pkg/healthcheck" "github.com/telekom/das-schiff-network-operator/pkg/nl" + "github.com/telekom/das-schiff-network-operator/pkg/nodeconfig" + apierrors "k8s.io/apimachinery/pkg/api/errors" "sigs.k8s.io/controller-runtime/pkg/client" ) -const defaultDebounceTime = 20 * time.Second +const ( + defaultDebounceTime = 20 * time.Second + defaultNodeDebounceTime = 5 * time.Second + + DefaultNodeConfigPath = "/opt/network-operator/nodeConfig.yaml" + nodeConfigFilePerm = 0o600 +) type Reconciler struct { client client.Client @@ -26,6 +37,8 @@ type Reconciler struct { config *config.Config logger logr.Logger healthChecker *healthcheck.HealthChecker + nodeConfig *v1alpha1.NodeConfig + nodeConfigPath string debouncer *debounce.Debouncer @@ -37,16 +50,17 @@ type reconcile struct { logr.Logger } -func NewReconciler(clusterClient client.Client, anycastTracker *anycast.Tracker, logger logr.Logger) (*Reconciler, error) { +func NewReconciler(clusterClient client.Client, anycastTracker *anycast.Tracker, logger logr.Logger, nodeConfigPath string) (*Reconciler, error) { reconciler := &Reconciler{ client: clusterClient, netlinkManager: nl.NewManager(&nl.Toolkit{}), frrManager: frr.NewFRRManager(), anycastTracker: anycastTracker, logger: logger, + nodeConfigPath: nodeConfigPath, } - reconciler.debouncer = debounce.NewDebouncer(reconciler.reconcileDebounced, defaultDebounceTime, logger) + reconciler.debouncer = debounce.NewDebouncer(reconciler.reconcileDebounced, defaultNodeDebounceTime, logger) if val := os.Getenv("FRR_CONFIG_FILE"); val != "" { reconciler.frrManager.ConfigPath = val @@ -71,7 +85,12 @@ func NewReconciler(clusterClient client.Client, anycastTracker *anycast.Tracker, healthcheck.NewDefaultHealthcheckToolkit(reconciler.frrManager, tcpDialer), nc) if err != nil { - return nil, fmt.Errorf("error creating netwokring healthchecker: %w", err) + return nil, fmt.Errorf("error creating networking healthchecker: %w", err) + } + + reconciler.nodeConfig, err = readNodeConfig(reconciler.nodeConfigPath) + if !errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("error reading NodeConfig from disk: %w", err) } return reconciler, nil @@ -87,24 +106,80 @@ func (reconciler *Reconciler) reconcileDebounced(ctx context.Context) error { Logger: reconciler.logger, } - r.Logger.Info("Reloading config") if err := r.config.ReloadConfig(); err != nil { return fmt.Errorf("error reloading network-operator config: %w", err) } - l3vnis, err := r.fetchLayer3(ctx) + // get NodeConfig from apiserver + cfg, err := r.fetchNodeConfig(ctx) if err != nil { + // discard IsNotFound error + if apierrors.IsNotFound(err) { + return nil + } return err } - l2vnis, err := r.fetchLayer2(ctx) - if err != nil { - return err + + // config is invalid or was already provisioned - discard + if cfg.Status.ConfigStatus != nodeconfig.StatusProvisioning { + return nil } - taas, err := r.fetchTaas(ctx) - if err != nil { - return err + + // reconcile config + if err = doReconciliation(r, cfg); err != nil { + // if reconciliation failed set NodeConfig's status as invalid and restore last known working config + if err := r.invalidateAndRestore(ctx, cfg); err != nil { + return fmt.Errorf("reconciler restoring config: %w", err) + } + + return fmt.Errorf("reconciler error: %w", err) + } + + // check if node is healthly after reconciliation + if err := reconciler.checkHealth(ctx); err != nil { + // if node is not healthly set NodeConfig's status as invalid and restore last known working config + if err := r.invalidateAndRestore(ctx, cfg); err != nil { + return fmt.Errorf("reconciler restoring config: %w", err) + } + + return fmt.Errorf("healthcheck error (previous config restored): %w", err) } + // set config status as provisioned (valid) + cfg.Status.ConfigStatus = nodeconfig.StatusProvisioned + if err = r.client.Status().Update(ctx, cfg); err != nil { + return fmt.Errorf("error updating NodeConfig status: %w", err) + } + + // replace in-memory working config and store it on the disk + reconciler.nodeConfig = cfg + if err = storeNodeConfig(cfg, reconciler.nodeConfigPath); err != nil { + return fmt.Errorf("error saving NodeConfig status: %w", err) + } + + return nil +} + +func (r *reconcile) invalidateAndRestore(ctx context.Context, cfg *v1alpha1.NodeConfig) error { + cfg.Status.ConfigStatus = nodeconfig.StatusInvalid + if err := r.client.Status().Update(ctx, cfg); err != nil { + return fmt.Errorf("error updating NodeConfig status: %w", err) + } + + // try to restore previously known good NodeConfig + if err := r.restoreNodeConfig(); err != nil { + return fmt.Errorf("error restoring NodeConfig: %w", err) + } + + return nil +} + +func doReconciliation(r *reconcile, nodeCfg *v1alpha1.NodeConfig) error { + r.logger.Info("config to reconcile", "NodeConfig", *nodeCfg) + l3vnis := nodeCfg.Spec.Vrf + l2vnis := nodeCfg.Spec.Layer2 + taas := nodeCfg.Spec.RoutingTable + if err := r.reconcileLayer3(l3vnis, taas); err != nil { return err } @@ -112,21 +187,68 @@ func (reconciler *Reconciler) reconcileDebounced(ctx context.Context) error { return err } - if !reconciler.healthChecker.IsNetworkingHealthy() { - _, err := reconciler.healthChecker.IsFRRActive() - if err != nil { - return fmt.Errorf("error checking FRR status: %w", err) - } - if err = reconciler.healthChecker.CheckInterfaces(); err != nil { - return fmt.Errorf("error checking network interfaces: %w", err) - } - if err = reconciler.healthChecker.CheckReachability(); err != nil { - return fmt.Errorf("error checking network reachability: %w", err) - } - if err = reconciler.healthChecker.RemoveTaints(ctx); err != nil { + return nil +} + +func (r *reconcile) restoreNodeConfig() error { + if r.nodeConfig == nil { + return nil + } + if err := doReconciliation(r, r.nodeConfig); err != nil { + return fmt.Errorf("error restoring configuration: %w", err) + } + + r.logger.Info("restored last known valid config") + + return nil +} + +func readNodeConfig(path string) (*v1alpha1.NodeConfig, error) { + cfg, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("error reading NodeConfig: %w", err) + } + + nodeConfig := &v1alpha1.NodeConfig{} + if err := json.Unmarshal(cfg, nodeConfig); err != nil { + return nil, fmt.Errorf("error unmarshalling NodeConfig: %w", err) + } + + return nodeConfig, nil +} + +func storeNodeConfig(cfg *v1alpha1.NodeConfig, path string) error { + // save working config + c, err := json.MarshalIndent(*cfg, "", " ") + if err != nil { + panic(err) + } + + if err = os.WriteFile(path, c, nodeConfigFilePerm); err != nil { + return fmt.Errorf("error saving NodeConfig status: %w", err) + } + + return nil +} + +func (reconciler *Reconciler) checkHealth(ctx context.Context) error { + _, err := reconciler.healthChecker.IsFRRActive() + if err != nil { + return fmt.Errorf("error checking FRR status: %w", err) + } + if err := reconciler.healthChecker.CheckInterfaces(); err != nil { + return fmt.Errorf("error checking network interfaces: %w", err) + } + if err := reconciler.healthChecker.CheckReachability(); err != nil { + return fmt.Errorf("error checking network reachability: %w", err) + } + if err := reconciler.healthChecker.CheckAPIServer(ctx); err != nil { + return fmt.Errorf("error checking API Server reachability: %w", err) + } + if !reconciler.healthChecker.TaintsRemoved() { + if err := reconciler.healthChecker.RemoveTaints(ctx); err != nil { return fmt.Errorf("error removing taint from the node: %w", err) } } - return nil } diff --git a/pkg/reconciler/reconciler_test.go b/pkg/reconciler/reconciler_test.go new file mode 100644 index 00000000..93352a34 --- /dev/null +++ b/pkg/reconciler/reconciler_test.go @@ -0,0 +1,141 @@ +package reconciler + +import ( + "context" + "testing" + "time" + + "github.com/go-logr/logr" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestReconciler(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, + "Reconciler Suite") +} + +var ( + node = &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node", + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } +) + +var _ = Describe("ConfigReconciler", func() { + Context("NewConfigReconciler() should", func() { + It("return new config reconciler", func() { + c := createClient() + cmInfo := make(chan bool) + r, err := NewConfigReconciler(c, logr.New(nil), time.Millisecond*100, cmInfo) + Expect(r).ToNot(BeNil()) + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("reconcileDebounced() should", func() { + It("return no error if fetched data successfully", func() { + c := createClient() + cmInfo := make(chan bool) + defer close(cmInfo) + r, err := NewConfigReconciler(c, logr.New(nil), time.Millisecond, cmInfo) + Expect(r).ToNot(BeNil()) + Expect(err).ToNot(HaveOccurred()) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + go func() { + err = r.reconcileDebounced(ctx) + }() + + <-cmInfo + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("CreateConfigForNode() should", func() { + It("return config for provided node", func() { + c := createClient() + cmInfo := make(chan bool) + defer close(cmInfo) + r, err := NewConfigReconciler(c, logr.New(nil), time.Millisecond, cmInfo) + Expect(err).ToNot(HaveOccurred()) + + r.globalCfg = v1alpha1.NewEmptyConfig("global") + r.globalCfg.Spec.Layer2 = []v1alpha1.Layer2NetworkConfigurationSpec{ + { + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "test"}}, + }, + } + + cfg, err := r.CreateConfigForNode("node", &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node", + Labels: map[string]string{"app": "test"}, + }, + }) + + Expect(cfg).ToNot(BeNil()) + Expect(err).ToNot(HaveOccurred()) + }) + }) +}) + +var _ = Describe("NodeReconciler", func() { + Context("reconcileDebounced() and GetNodes() should", func() { + It("return no error and inform about added and deleted nodes, list known nodes", func() { + c := createClient(node) + cmInfo := make(chan bool) + defer close(cmInfo) + nodeDelInfo := make(chan []string) + defer close(nodeDelInfo) + + r, err := NewNodeReconciler(c, logr.New(nil), time.Second, cmInfo, nodeDelInfo) + Expect(r).ToNot(BeNil()) + Expect(err).ToNot(HaveOccurred()) + + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + go func() { + err = r.reconcileDebounced(ctx) + }() + info := <-cmInfo + Expect(info).To(BeTrue()) + Expect(err).ToNot(HaveOccurred()) + + nodes := r.GetNodes() + Expect(nodes).To(HaveLen(1)) + + err = c.Delete(context.Background(), node) + Expect(err).ToNot(HaveOccurred()) + + go func() { + err = r.reconcileDebounced(ctx) + }() + deleted := <-nodeDelInfo + Expect(deleted).To(HaveLen(1)) + }) + }) +}) + +func createClient(initObjs ...runtime.Object) client.Client { + s := runtime.NewScheme() + err := corev1.AddToScheme(s) + Expect(err).ToNot(HaveOccurred()) + err = v1alpha1.AddToScheme(s) + Expect(err).ToNot(HaveOccurred()) + return fake.NewClientBuilder().WithScheme(s).WithRuntimeObjects(initObjs...).Build() +}