Skip to content

Commit

Permalink
Sync from server repo (52a35146737)
Browse files Browse the repository at this point in the history
  • Loading branch information
releng committed Oct 29, 2024
1 parent a7c3c28 commit d9c30d6
Show file tree
Hide file tree
Showing 11 changed files with 399 additions and 66 deletions.
7 changes: 7 additions & 0 deletions commands/cmd_add_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ func (c *CmdAddNode) setLocalFlags(cmd *cobra.Command) {
"",
"[Use only with support guidance] A comma-separated list of node names that exist in the cluster.",
)
cmd.Flags().StringVar(
&c.addNodeOptions.ComputeGroup,
"compute-group",
"",
util.GetEonFlagMsg("The new or existing compute group for the new nodes. "+
"If specified, the new nodes will be compute-only nodes."),
)
cmd.Flags().IntVar(
&c.addNodeOptions.TimeOut,
"add-node-timeout",
Expand Down
70 changes: 54 additions & 16 deletions vclusterops/add_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package vclusterops

import (
"errors"
"fmt"
"strings"

Expand Down Expand Up @@ -46,6 +47,9 @@ type VAddNodeOptions struct {
// Names of the existing nodes in the cluster. This option can be
// used to remove partially added nodes from catalog.
ExpectedNodeNames []string
// Name of the compute group for the new node(s). If provided, this indicates the new nodes
// will be compute nodes.
ComputeGroup string

// timeout for polling nodes in seconds when we add Nodes
TimeOut int
Expand Down Expand Up @@ -185,7 +189,7 @@ func (vcc VClusterCommands) VAddNode(options *VAddNodeOptions) (VCoordinationDat

// add_node is aborted if requirements are not met.
// Here we check whether the nodes being added already exist
err = checkAddNodeRequirements(&vdb, options.NewHosts)
err = options.checkAddNodeRequirements(&vdb, options.NewHosts)
if err != nil {
return vdb, err
}
Expand All @@ -208,13 +212,18 @@ func (vcc VClusterCommands) VAddNode(options *VAddNodeOptions) (VCoordinationDat
}

// checkAddNodeRequirements returns an error if at least one of the nodes
// to add already exists in db.
func checkAddNodeRequirements(vdb *VCoordinationDatabase, hostsToAdd []string) error {
// to add already exists in db, or if attempting to add compute nodes to
// an enterprise db.
func (options *VAddNodeOptions) checkAddNodeRequirements(vdb *VCoordinationDatabase, hostsToAdd []string) error {
// we don't want any of the new host to be part of the db.
if nodes, _ := vdb.containNodes(hostsToAdd); len(nodes) != 0 {
return fmt.Errorf("%s already exist in the database", strings.Join(nodes, ","))
}

if !vdb.IsEon && options.ComputeGroup != "" {
return errors.New("cannot add compute nodes to an Enterprise mode database")
}

return nil
}

Expand All @@ -225,8 +234,8 @@ func (options *VAddNodeOptions) completeVDBSetting(vdb *VCoordinationDatabase) e
vdb.DepotPrefix = options.DepotPrefix

hostNodeMap := makeVHostNodeMap()
// TODO: we set the depot and data path from /nodes rather than manually
// (VER-92725). This is useful for nmaDeleteDirectoriesOp.
// We could set the depot and data path from /nodes rather than manually.
// This would be useful for nmaDeleteDirectoriesOp.
for h, vnode := range vdb.HostNodeMap {
dataPath := vdb.GenDataPath(vnode.Name)
vnode.StorageLocations = append(vnode.StorageLocations, dataPath)
Expand All @@ -237,6 +246,12 @@ func (options *VAddNodeOptions) completeVDBSetting(vdb *VCoordinationDatabase) e
}
vdb.HostNodeMap = hostNodeMap

// Compute nodes currently do not have depot support, so skip setting up
// the depot for now. This doesn't affect directory preparation.
if options.ComputeGroup != "" {
vdb.UseDepot = false
}

return nil
}

Expand All @@ -260,6 +275,7 @@ func (vcc VClusterCommands) trimNodesInCatalog(vdb *VCoordinationDatabase,
expectedNodeNames[nodeName] = struct{}{}
}

subscribingHostsCount := 0
var aliveHosts []string
var nodesToTrim []string
nodeNamesInCatalog := make(map[string]any)
Expand All @@ -268,13 +284,19 @@ func (vcc VClusterCommands) trimNodesInCatalog(vdb *VCoordinationDatabase,
if _, ok := expectedNodeNames[vnode.Name]; ok { // catalog node is expected
aliveHosts = append(aliveHosts, h)
existingHostNodeMap[h] = vnode
// This could be counting a DOWN compute node as counting towards
// k-safety. When compute nodes can be identified when down or offline,
// this should do so instead of checking state.
if vnode.State != util.NodeComputeState {
subscribingHostsCount++
}
} else if vnode.Sandbox != "" { // add sandbox node to allExistingHostNodeMap as well
existingHostNodeMap[h] = vnode
} else { // main cluster catalog node is not expected, trim it
// cannot trim UP nodes
if vnode.State == util.NodeUpState {
return existingHostNodeMap, fmt.Errorf("cannot trim the UP node %s (address %s)",
vnode.Name, h)
if vnode.State == util.NodeUpState || vnode.State == util.NodeComputeState {
return existingHostNodeMap, fmt.Errorf("cannot trim the %s node %s (address %s)",
vnode.State, vnode.Name, h)
}
nodesToTrim = append(nodesToTrim, vnode.Name)
}
Expand All @@ -295,7 +317,7 @@ func (vcc VClusterCommands) trimNodesInCatalog(vdb *VCoordinationDatabase,
var instructions []clusterOp

// mark k-safety
if len(aliveHosts) < ksafetyThreshold {
if subscribingHostsCount < ksafetyThreshold {
httpsMarkDesignKSafeOp, err := makeHTTPSMarkDesignKSafeOp(initiator,
options.usePassword, options.UserName, options.Password,
ksafeValueZero)
Expand Down Expand Up @@ -385,7 +407,7 @@ func (vcc VClusterCommands) produceAddNodeInstructions(vdb *VCoordinationDatabas
}
nmaNetworkProfileOp := makeNMANetworkProfileOp(vdb.HostList)
httpsCreateNodeOp, err := makeHTTPSCreateNodeOp(newHosts, initiatorHost,
usePassword, username, password, vdb, options.SCName)
usePassword, username, password, vdb, options.SCName, options.ComputeGroup)
if err != nil {
return instructions, err
}
Expand Down Expand Up @@ -413,14 +435,27 @@ func (vcc VClusterCommands) produceAddNodeInstructions(vdb *VCoordinationDatabas
nil /*Sandbox name*/)

nmaStartNewNodesOp := makeNMAStartNodeOpWithVDB(newHosts, options.StartUpConf, vdb)
httpsPollNodeStateOp, err := makeHTTPSPollNodeStateOp(newHosts, usePassword, username, password, options.TimeOut)
if err != nil {
return instructions, err
var pollNodeStateOp clusterOp
if options.ComputeGroup == "" {
// poll normally
httpsPollNodeStateOp, err := makeHTTPSPollNodeStateOp(newHosts, usePassword, username, password, options.TimeOut)
if err != nil {
return instructions, err
}
httpsPollNodeStateOp.cmdType = AddNodeCmd
pollNodeStateOp = &httpsPollNodeStateOp
} else {
// poll indirectly via nodes with catalog access
httpsPollComputeNodeStateOp, err := makeHTTPSPollComputeNodeStateOp(vdb.PrimaryUpNodes, newHosts, usePassword,
username, password, options.TimeOut)
if err != nil {
return instructions, err
}
pollNodeStateOp = &httpsPollComputeNodeStateOp
}
httpsPollNodeStateOp.cmdType = AddNodeCmd
instructions = append(instructions,
&nmaStartNewNodesOp,
&httpsPollNodeStateOp,
pollNodeStateOp,
)

return vcc.prepareAdditionalEonInstructions(vdb, options, instructions,
Expand All @@ -447,7 +482,10 @@ func (vcc VClusterCommands) prepareAdditionalEonInstructions(vdb *VCoordinationD
return instructions, err
}
instructions = append(instructions, &httpsSyncCatalogOp)
if !*options.SkipRebalanceShards {
// Rebalancing shards after only adding compute nodes is pointless as compute nodes only
// have ephemeral subscriptions. However, it may be needed if real nodes were just trimmed.
// Only ignore the specified option if compute nodes were added with no trimming.
if !*options.SkipRebalanceShards && (options.ComputeGroup == "" || len(options.ExpectedNodeNames) != 0) {
httpsRBSCShardsOp, err := makeHTTPSRebalanceSubclusterShardsOp(
initiatorHost, usePassword, username, options.Password, options.SCName)
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions vclusterops/coordinator_database.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type VCoordinationDatabase struct {
Ipv6 bool

PrimaryUpNodes []string
ComputeNodes []string
FirstStartAfterRevive bool
}

Expand Down Expand Up @@ -199,6 +200,7 @@ func (vdb *VCoordinationDatabase) copy(targetHosts []string) VCoordinationDataba
LicensePathOnNode: vdb.LicensePathOnNode,
Ipv6: vdb.Ipv6,
PrimaryUpNodes: util.CopySlice(vdb.PrimaryUpNodes),
ComputeNodes: util.CopySlice(vdb.ComputeNodes),
}

if len(targetHosts) == 0 {
Expand Down
2 changes: 1 addition & 1 deletion vclusterops/create_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ func (vcc VClusterCommands) produceCreateDBWorkerNodesInstructions(
newNodeHosts := util.SliceDiff(hosts, bootstrapHost)
if len(hosts) > 1 {
httpsCreateNodeOp, err := makeHTTPSCreateNodeOp(newNodeHosts, bootstrapHost,
true /* use password auth */, options.UserName, options.Password, vdb, "")
true /* use password auth */, options.UserName, options.Password, vdb, "" /* subcluster */, "" /* compute group */)
if err != nil {
return instructions, err
}
Expand Down
23 changes: 21 additions & 2 deletions vclusterops/https_create_node_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,15 @@ type httpsCreateNodeOp struct {
RequestParams map[string]string
}

// some reused parameters
const (
createNodeSCNameParam = "subcluster"
createNodeCGNameParam = "compute-group"
)

func makeHTTPSCreateNodeOp(newNodeHosts []string, bootstrapHost []string,
useHTTPPassword bool, userName string, httpsPassword *string,
vdb *VCoordinationDatabase, scName string) (httpsCreateNodeOp, error) {
vdb *VCoordinationDatabase, scName, computeGroupName string) (httpsCreateNodeOp, error) {
op := httpsCreateNodeOp{}
op.name = "HTTPSCreateNodeOp"
op.description = "Create node in catalog"
Expand All @@ -41,7 +47,10 @@ func makeHTTPSCreateNodeOp(newNodeHosts []string, bootstrapHost []string,
op.RequestParams["data-prefix"] = vdb.DataPrefix + "/" + vdb.Name
op.RequestParams["hosts"] = util.ArrayToString(newNodeHosts, ",")
if scName != "" {
op.RequestParams["subcluster"] = scName
op.RequestParams[createNodeSCNameParam] = scName
}
if computeGroupName != "" {
op.RequestParams[createNodeCGNameParam] = computeGroupName
}
err := op.validateAndSetUsernameAndPassword(op.name,
useHTTPPassword, userName, httpsPassword)
Expand Down Expand Up @@ -75,6 +84,16 @@ func (op *httpsCreateNodeOp) updateQueryParams(execContext *opEngineExecContext)
}
op.RequestParams["broadcast"] = profile.Broadcast
}

// if the compute group doesn't exist yet, and the compute node is in a compute group
// of the default subcluster, the sc name is explicitly needed for the create CG DDL
cgName, ok := op.RequestParams[createNodeCGNameParam]
if ok && cgName != "" {
scName, ok := op.RequestParams[createNodeSCNameParam]
if !ok || scName == "" {
op.RequestParams[createNodeSCNameParam] = execContext.defaultSCName
}
}
return nil
}

Expand Down
3 changes: 3 additions & 0 deletions vclusterops/https_get_nodes_info_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ func (op *httpsGetNodesInfoOp) processResult(_ *opEngineExecContext) error {
op.vdb.HostNodeMap = makeVHostNodeMap()
op.vdb.HostList = []string{}
op.vdb.PrimaryUpNodes = []string{}
op.vdb.ComputeNodes = []string{}
op.vdb.UnboundNodes = []*VCoordinationNode{}
for _, node := range nodesStates.NodeList {
if node.Database != op.dbName {
Expand All @@ -150,6 +151,8 @@ func (op *httpsGetNodesInfoOp) processResult(_ *opEngineExecContext) error {
vnode := buildVnodeFromNodeStateInfo(node)
if node.IsPrimary && node.State == util.NodeUpState {
op.vdb.PrimaryUpNodes = append(op.vdb.PrimaryUpNodes, node.Address)
} else if node.State == util.NodeComputeState {
op.vdb.ComputeNodes = append(op.vdb.ComputeNodes, node.Address)
}
err := op.vdb.addNode(&vnode)
if err != nil {
Expand Down
Loading

0 comments on commit d9c30d6

Please sign in to comment.