Skip to content

Commit

Permalink
Fix bug which should delete node from store when there is node not fo…
Browse files Browse the repository at this point in the history
…und error

When node is deleted, the phase node.ObjectMeta.DeletionTimestamp being not zero is intermittent,
and it is more possible the node is not found, we should delete the node from store.
  • Loading branch information
zhengxiexie committed Dec 30, 2024
1 parent c729a64 commit 1b7be35
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 90 deletions.
7 changes: 6 additions & 1 deletion pkg/controllers/node/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,15 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
if errors.IsNotFound(err) {
log.Info("node not found", "req", req.NamespacedName)
deleted = true
if err := r.Service.SyncNodeStore(req.NamespacedName.Name, deleted); err != nil {
log.Error(err, "failed to sync node store", "req", req.NamespacedName)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
} else {
log.Error(err, "unable to fetch node", "req", req.NamespacedName)
return common.ResultNormal, client.IgnoreNotFound(err)
}
return common.ResultNormal, client.IgnoreNotFound(err)
}
if common.NodeIsMaster(node) {
// For WCP supervisor cluster, the master node isn't a transport node.
Expand Down
25 changes: 13 additions & 12 deletions pkg/controllers/node/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/agiledragon/gomonkey/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/vmware/vsphere-automation-sdk-go/services/nsxt/model"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -17,20 +18,10 @@ import (

"github.com/vmware-tanzu/nsx-operator/pkg/config"
"github.com/vmware-tanzu/nsx-operator/pkg/metrics"
servicecommon "github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/common"
mock "github.com/vmware-tanzu/nsx-operator/pkg/mock/node"
"github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/node"
)

func createMockNodeService() *node.NodeService {
return &node.NodeService{
Service: servicecommon.Service{
NSXConfig: &config.NSXOperatorConfig{
NsxConfig: &config.NsxConfig{},
},
},
}
}

func TestNodeReconciler_Reconcile(t *testing.T) {
// Create a patch for metrics.CounterInc
patch := gomonkey.ApplyFunc(metrics.CounterInc, func(*config.NSXOperatorConfig, *prometheus.CounterVec, string) {
Expand All @@ -46,7 +37,7 @@ func TestNodeReconciler_Reconcile(t *testing.T) {
reconciler := &NodeReconciler{
Client: fake.NewClientBuilder().WithScheme(scheme).Build(),
Scheme: scheme,
Service: createMockNodeService(),
Service: mock.CreateMockNodeService(),

Check failure on line 40 in pkg/controllers/node/node_controller_test.go

View workflow job for this annotation

GitHub Actions / build

cannot use mock.CreateMockNodeService() (value of type *mock.NodeService) as *"github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/node".NodeService value in struct literal (typecheck)
}

// Test cases
Expand Down Expand Up @@ -102,6 +93,16 @@ func TestNodeReconciler_Reconcile(t *testing.T) {
},
}

if tt.name == "Node not found" {
patchesGetByIndex := gomonkey.ApplyFunc((*node.NodeStore).GetByIndex,
func(n *node.NodeStore, key string, value string) []*model.HostTransportNode {
nodes := make([]*model.HostTransportNode, 0)
return nodes
})
defer patchesGetByIndex.Reset()

}

result, err := reconciler.Reconcile(ctx, req)

if tt.expectedErr {
Expand Down
75 changes: 75 additions & 0 deletions pkg/mock/node/node.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package mock

import (
"github.com/vmware/vsphere-automation-sdk-go/services/nsxt/infra/sites/enforcement_points"
"github.com/vmware/vsphere-automation-sdk-go/services/nsxt/model"
"k8s.io/client-go/tools/cache"

"github.com/vmware-tanzu/nsx-operator/pkg/config"
"github.com/vmware-tanzu/nsx-operator/pkg/nsx"
servicecommon "github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/common"
"github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/node"
)

type NodeStore struct {
servicecommon.ResourceStore
}

type NodeService struct {
servicecommon.Service
NodeStore *NodeStore
}

type fakeHostTransportNodesClient struct {
enforcement_points.HostTransportNodesClient
}

func (f *fakeHostTransportNodesClient) Get(param1 string, param2 string, param3 string) (model.HostTransportNode, error) {
// Do nothing
return model.HostTransportNode{}, nil
}

func (f *fakeHostTransportNodesClient) List(param1 string, param2 string, param3 *string, param4 *string, param5 *bool, param6 *string, param7 *string, param8 *string, param9 *int64, param10 *bool, param11 *string, param12 *string) (model.HostTransportNodeListResult, error) {
nodeName := "test-node"
mockNode := &model.HostTransportNode{
UniqueId: servicecommon.String("test-node"),
NodeDeploymentInfo: &model.FabricHostNode{
Fqdn: &nodeName,
},
}
return model.HostTransportNodeListResult{
Results: []model.HostTransportNode{*mockNode},
}, nil
}

func (f *fakeHostTransportNodesClient) Update(param1 string, param2 string, param3 string, param4 model.HostTransportNode, _ *string, _ *string,
_ *bool, _ *string, _ *bool, _ *string, _ *string) (model.HostTransportNode, error) {
// Do nothing
return model.HostTransportNode{}, nil
}

func CreateMockNodeService() *NodeService {
return &NodeService{
Service: servicecommon.Service{
NSXClient: &nsx.Client{
HostTransPortNodesClient: &fakeHostTransportNodesClient{},
NsxConfig: &config.NSXOperatorConfig{
CoeConfig: &config.CoeConfig{
Cluster: "k8scl-one:test",
},
},
},
},
NodeStore: &NodeStore{
ResourceStore: servicecommon.ResourceStore{
Indexer: cache.NewIndexer(
node.KeyFunc,
cache.Indexers{
servicecommon.IndexKeyNodeName: node.NodeIndexByNodeName,
},
),
BindingType: model.HostTransportNodeBindingType(),
},
},
}
}
9 changes: 4 additions & 5 deletions pkg/nsx/services/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
var (
log = &logger.Log
ResourceTypeNode = servicecommon.ResourceTypeNode
MarkedForDelete = true
)

type NodeService struct {
Expand All @@ -35,9 +34,9 @@ func InitializeNode(service servicecommon.Service) (*NodeService, error) {
NodeStore: &NodeStore{
ResourceStore: servicecommon.ResourceStore{
Indexer: cache.NewIndexer(
keyFunc,
KeyFunc,
cache.Indexers{
servicecommon.IndexKeyNodeName: nodeIndexByNodeName,
servicecommon.IndexKeyNodeName: NodeIndexByNodeName,
},
),
BindingType: model.HostTransportNodeBindingType(),
Expand Down Expand Up @@ -73,9 +72,9 @@ func (service *NodeService) SyncNodeStore(nodeName string, deleted bool) error {
if len(nodes) > 1 {
return fmt.Errorf("multiple nodes found for node name %s", nodeName)
}
// TODO: confirm whether we need to resync the node info from NSX
// TODO: confirm whether we need to re-sync the node info from NSX
if len(nodes) == 1 {
log.Info("node alreay cached", "node.Fqdn", nodes[0].NodeDeploymentInfo.Fqdn, "node.UniqueId", *nodes[0].UniqueId)
log.Info("node already cached", "node.Fqdn", nodes[0].NodeDeploymentInfo.Fqdn, "node.UniqueId", *nodes[0].UniqueId)
// updatedNode, err := service.NSXClient.HostTransPortNodesClient.Get("default", "default", nodes[0].Id)
// if err != nil {
// return fmt.Errorf("failed to get HostTransPortNode for node %s: %s", nodeName, err)
Expand Down
69 changes: 6 additions & 63 deletions pkg/nsx/services/node/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,73 +9,16 @@ import (
"github.com/stretchr/testify/assert"
"github.com/vmware/vsphere-automation-sdk-go/runtime/bindings"
"github.com/vmware/vsphere-automation-sdk-go/runtime/data"
"github.com/vmware/vsphere-automation-sdk-go/services/nsxt/infra/sites/enforcement_points"
"github.com/vmware/vsphere-automation-sdk-go/services/nsxt/model"
"k8s.io/client-go/tools/cache"
logf "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/vmware-tanzu/nsx-operator/pkg/config"
"github.com/vmware-tanzu/nsx-operator/pkg/logger"
"github.com/vmware-tanzu/nsx-operator/pkg/nsx"
mock "github.com/vmware-tanzu/nsx-operator/pkg/mock/node"
servicecommon "github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/common"
)

type fakeHostTransportNodesClient struct {
enforcement_points.HostTransportNodesClient
}

func (f *fakeHostTransportNodesClient) Get(param1 string, param2 string, param3 string) (model.HostTransportNode, error) {
// Do nothing
return model.HostTransportNode{}, nil
}

func (f *fakeHostTransportNodesClient) List(param1 string, param2 string, param3 *string, param4 *string, param5 *bool, param6 *string, param7 *string, param8 *string, param9 *int64, param10 *bool, param11 *string, param12 *string) (model.HostTransportNodeListResult, error) {
nodeName := "test-node"
mockNode := &model.HostTransportNode{
UniqueId: servicecommon.String("test-node"),
NodeDeploymentInfo: &model.FabricHostNode{
Fqdn: &nodeName,
},
}
return model.HostTransportNodeListResult{
Results: []model.HostTransportNode{*mockNode},
}, nil
}

func (f *fakeHostTransportNodesClient) Update(param1 string, param2 string, param3 string, param4 model.HostTransportNode, _ *string, _ *string,
_ *bool, _ *string, _ *bool, _ *string, _ *string) (model.HostTransportNode, error) {
// Do nothing
return model.HostTransportNode{}, nil
}

func createMockNodeService() *NodeService {
return &NodeService{
Service: servicecommon.Service{
NSXClient: &nsx.Client{
HostTransPortNodesClient: &fakeHostTransportNodesClient{},
NsxConfig: &config.NSXOperatorConfig{
CoeConfig: &config.CoeConfig{
Cluster: "k8scl-one:test",
},
},
},
},
NodeStore: &NodeStore{
ResourceStore: servicecommon.ResourceStore{
Indexer: cache.NewIndexer(
keyFunc,
cache.Indexers{
servicecommon.IndexKeyNodeName: nodeIndexByNodeName,
},
),
BindingType: model.HostTransportNodeBindingType(),
},
},
}
}

func TestInitializeNode(t *testing.T) {
mockService := createMockNodeService()
mockService := mock.CreateMockNodeService()

var tc *bindings.TypeConverter
patches := gomonkey.ApplyMethod(reflect.TypeOf(tc), "ConvertToGolang",
Expand Down Expand Up @@ -105,7 +48,7 @@ func TestInitializeNode(t *testing.T) {
}

func TestNodeService_GetNodeByName(t *testing.T) {
service := createMockNodeService()
service := mock.CreateMockNodeService()

nodeName := "test-node"
mockNode := &model.HostTransportNode{
Expand All @@ -124,7 +67,7 @@ func TestNodeService_GetNodeByName(t *testing.T) {

func TestNodeService_SyncNodeStore(t *testing.T) {
logf.SetLogger(logger.ZapLogger(false, 0))
service := createMockNodeService()
service := mock.CreateMockNodeService()
nodeName := "test-node"
// Test case: Node not found
err := service.SyncNodeStore("non-existent-node", false)
Expand Down Expand Up @@ -154,7 +97,7 @@ func TestNodeIndexByNodeName(t *testing.T) {
},
}

indexes, _ := nodeIndexByNodeName(mockNode)
indexes, _ := NodeIndexByNodeName(mockNode)
assert.Len(t, indexes, 1)
assert.Equal(t, nodeName, indexes[0])
}
Expand All @@ -168,7 +111,7 @@ func TestKeyFunc(t *testing.T) {
},
}

key, err := keyFunc(mockNode)
key, err := KeyFunc(mockNode)
assert.NoError(t, err)
assert.Equal(t, nodeName, key)
}
10 changes: 5 additions & 5 deletions pkg/nsx/services/node/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,21 @@ func (nodeStore *NodeStore) GetByIndex(key string, value string) []*model.HostTr
return hostTransportNodes
}

// keyFunc is used to get the key of a resource, usually, which is the ID of the resource
func keyFunc(obj interface{}) (string, error) {
// KeyFunc is used to get the key of a resource, usually, which is the ID of the resource
func KeyFunc(obj interface{}) (string, error) {
switch v := obj.(type) {
case *model.HostTransportNode:
return *v.UniqueId, nil
default:
return "", errors.New("keyFunc doesn't support unknown type")
return "", errors.New("KeyFunc doesn't support unknown type")
}
}

func nodeIndexByNodeName(obj interface{}) ([]string, error) {
func NodeIndexByNodeName(obj interface{}) ([]string, error) {
switch o := obj.(type) {
case *model.HostTransportNode:
return []string{*o.NodeDeploymentInfo.Fqdn}, nil
default:
return nil, errors.New("nodeIndexByNodeName doesn't support unknown type")
return nil, errors.New("NodeIndexByNodeName doesn't support unknown type")
}
}
8 changes: 4 additions & 4 deletions pkg/nsx/services/node/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@ func Test_KeyFunc(t *testing.T) {
id := "test_id"
node := model.HostTransportNode{UniqueId: &id}
t.Run("1", func(t *testing.T) {
got, _ := keyFunc(&node)
got, _ := KeyFunc(&node)
if got != "test_id" {
t.Errorf("keyFunc() = %v, want %v", got, "test_id")
t.Errorf("KeyFunc() = %v, want %v", got, "test_id")
}
})
}

func TestSubnetStore_Apply(t *testing.T) {
resourceStore := common.ResourceStore{
Indexer: cache.NewIndexer(
keyFunc,
KeyFunc,
cache.Indexers{
common.IndexKeyNodeName: nodeIndexByNodeName,
common.IndexKeyNodeName: NodeIndexByNodeName,
},
),
BindingType: model.HostTransportNodeBindingType(),
Expand Down

0 comments on commit 1b7be35

Please sign in to comment.