Skip to content

Commit

Permalink
set node status before remove node (#645)
Browse files Browse the repository at this point in the history
* set node status before remove node

* fix metric help message

* fix UT issue
  • Loading branch information
yuyang0 authored Oct 16, 2024
1 parent b9632e7 commit ff909ee
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 2 deletions.
6 changes: 6 additions & 0 deletions cluster/calcium/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ func (c *Calcium) RemoveNode(ctx context.Context, nodename string) error {
return utils.Txn(ctx,
// if: remove node metadata
func(ctx context.Context) error {
// we need set node status here, consider the following scenery:
// the node is down, so the node status doesn't exist in ETCD,
// if we don't set node status here, other core instances will not be notified when the node is removed
if err = c.store.SetNodeStatus(ctx, node, 90); err != nil {
logger.Warnf(ctx, "failed to set node status: %s", err)
}
if err := c.store.RemoveNode(ctx, node); err != nil {
return err
}
Expand Down
4 changes: 3 additions & 1 deletion cluster/calcium/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,14 @@ func TestRemoveNode(t *testing.T) {

// fail by store.RemoveNode
store.On("ListNodeWorkloads", mock.Anything, mock.Anything, mock.Anything).Return([]*types.Workload{}, nil)
store.On("SetNodeStatus", mock.Anything, mock.Anything, mock.Anything).Return(nil).Once()
store.On("RemoveNode", mock.Anything, mock.Anything).Return(types.ErrMockError).Once()
assert.Error(t, c.RemoveNode(ctx, name))

// success
store.On("SetNodeStatus", mock.Anything, mock.Anything, mock.Anything).Return(nil).Once()
store.On("RemoveNode", mock.Anything, mock.Anything).Return(nil)
store.On("SetNodeStatus", mock.Anything, mock.Anything, int64(-1)).Return(nil)
store.On("SetNodeStatus", mock.Anything, mock.Anything, int64(-1)).Return(nil).Once()
rmgr := c.rmgr.(*resourcemocks.Manager)
rmgr.On("RemoveNode", mock.Anything, mock.Anything).Return(nil)
assert.NoError(t, c.RemoveNode(ctx, name))
Expand Down
2 changes: 1 addition & 1 deletion metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func InitMetrics(ctx context.Context, config types.Config, metricsDescriptions [

Client.Collectors[podNodeStatusName] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: podNodeStatusName,
Help: "number of up nodes",
Help: "node status",
}, []string{"hostname", "podname", "nodename"})

once.Do(func() {
Expand Down

0 comments on commit ff909ee

Please sign in to comment.