Skip to content

Commit

Permalink
Sync from server repo (16acd2dad43)
Browse files Browse the repository at this point in the history
  • Loading branch information
cchen-vertica committed Jun 17, 2024
1 parent 3a04de0 commit bf13430
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 152 deletions.
6 changes: 0 additions & 6 deletions commands/cmd_restart_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,6 @@ func (c *CmdStartNodes) Run(vcc vclusterops.ClusterCommands) error {
return err
}

// all nodes unreachable, nothing need to be done.
if len(options.Nodes) == 0 {
vcc.DisplayInfo("No reachable nodes to start")
return nil
}

var hostToStart []string
for _, ip := range options.Nodes {
hostToStart = append(hostToStart, ip)
Expand Down
23 changes: 6 additions & 17 deletions commands/cmd_start_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ func (c *CmdStartDB) Run(vcc vclusterops.ClusterCommands) error {
}
dbConfig, readConfigErr := readConfig()
if readConfigErr == nil {
options.ReadFromConfig = true
if options.Sandbox != util.MainClusterSandbox || options.MainCluster {
options.RawHosts = filterInputHosts(options, dbConfig)
}
Expand All @@ -239,12 +238,6 @@ func (c *CmdStartDB) Run(vcc vclusterops.ClusterCommands) error {
return err
}

// all nodes unreachable
if len(options.Hosts) == 0 {
vcc.DisplayInfo("No reachable nodes to start database %s", options.DBName)
return nil
}

msg := fmt.Sprintf("Started database %s", options.DBName)
if options.Sandbox != "" {
sandboxMsg := fmt.Sprintf(" on sandbox %s", options.Sandbox)
Expand All @@ -260,7 +253,12 @@ func (c *CmdStartDB) Run(vcc vclusterops.ClusterCommands) error {

// for Eon database, update config file to fill nodes' subcluster information
if readConfigErr == nil && options.IsEon {
c.UpdateConfigFileForEon(vdb, vcc)
// write db info to vcluster config file
vdb.FirstStartAfterRevive = false
err = writeConfig(vdb, true /*forceOverwrite*/)
if err != nil {
vcc.DisplayWarning("fail to update config file, details: %s", err)
}
}

// write config parameters to vcluster config param file
Expand All @@ -272,15 +270,6 @@ func (c *CmdStartDB) Run(vcc vclusterops.ClusterCommands) error {
return nil
}

func (c *CmdStartDB) UpdateConfigFileForEon(vdb *vclusterops.VCoordinationDatabase, vcc vclusterops.ClusterCommands) {
// write db info to vcluster config file
vdb.FirstStartAfterRevive = false
err := writeConfig(vdb, true /*forceOverwrite*/)
if err != nil {
vcc.DisplayWarning("fail to update config file, details: %s", err)
}
}

// SetDatabaseOptions will assign a vclusterops.DatabaseOptions instance to the one in CmdStartDB
func (c *CmdStartDB) SetDatabaseOptions(opt *vclusterops.DatabaseOptions) {
c.startDBOptions.DatabaseOptions = *opt
Expand Down
6 changes: 0 additions & 6 deletions commands/cmd_start_subcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,6 @@ func (c *CmdStartSubcluster) Run(vcc vclusterops.ClusterCommands) error {
return err
}

// all nodes unreachable, nothing need to be done.
if len(options.Nodes) == 0 {
vcc.DisplayInfo("No reachable nodes to start in subcluster %s", options.SCName)
return nil
}

vcc.DisplayInfo("Successfully started subcluster %s for database %s",
options.SCName, options.DBName)

Expand Down
13 changes: 0 additions & 13 deletions vclusterops/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,16 +468,3 @@ func (vcc *VClusterCommands) doReIP(options *DatabaseOptions, scName string,

return nil
}

func (vcc *VClusterCommands) getUnreachableHosts(options *DatabaseOptions) ([]string, error) {
var nmaHealthInstructions []clusterOp
nmaHealthOp := makeNMAHealthOpSkipUnreachable(options.Hosts)
nmaHealthInstructions = []clusterOp{&nmaHealthOp}
certs := httpsCerts{key: options.Key, cert: options.Cert, caCert: options.CaCert}
opEng := makeClusterOpEngine(nmaHealthInstructions, &certs)
err := opEng.run(vcc.Log)
if err != nil {
return nil, err
}
return opEng.execContext.unreachableHosts, nil
}
20 changes: 7 additions & 13 deletions vclusterops/nma_vertica_version_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ type nmaVerticaVersionOp struct {
scName string
readOnly bool
targetNodeIPs []string // used to filter desired nodes' info
reachableHosts []string // hosts that are reachable through NMA
}

func makeHostVersionMap() hostVersionMap {
Expand Down Expand Up @@ -79,12 +78,10 @@ func makeNMAReadVerticaVersionOp(vdb *VCoordinationDatabase) nmaVerticaVersionOp

// makeNMAVerticaVersionOpWithTargetHosts is used in start_db, VCluster will check Vertica
// version for the subclusters which contain target hosts
func makeNMAVerticaVersionOpWithTargetHosts(sameVersion bool, targetNodeIPs []string) nmaVerticaVersionOp {
func makeNMAVerticaVersionOpWithTargetHosts(sameVersion bool, hosts []string) nmaVerticaVersionOp {
// We set hosts to nil and isEon to false temporarily, and they will get the correct value from execute context in prepare()
op := makeNMACheckVerticaVersionOp(nil /*hosts*/, sameVersion, false /*isEon*/)
op.targetNodeIPs = targetNodeIPs
// start_db target all reachable input hosts
op.reachableHosts = targetNodeIPs
op.targetNodeIPs = hosts
return op
}

Expand All @@ -107,10 +104,9 @@ func makeNMAVerticaVersionOpWithVDB(sameVersion bool, vdb *VCoordinationDatabase

// makeNMAVerticaVersionOpBeforeStartNode is used in start_node, VCluster will check Vertica
// version for the nodes which are in the same cluster(main cluster or sandbox) as the target hosts
func makeNMAVerticaVersionOpBeforeStartNode(vdb *VCoordinationDatabase, reachableHosts, targetNodeIPs []string) nmaVerticaVersionOp {
op := makeNMACheckVerticaVersionOp(nil, true /*sameVersion*/, vdb.IsEon)
op.reachableHosts = reachableHosts
op.targetNodeIPs = targetNodeIPs
func makeNMAVerticaVersionOpBeforeStartNode(vdb *VCoordinationDatabase, hosts []string) nmaVerticaVersionOp {
op := makeNMACheckVerticaVersionOp(nil /*hosts*/, true /*sameVersion*/, vdb.IsEon)
op.targetNodeIPs = hosts
op.vdb = vdb
return op
}
Expand Down Expand Up @@ -346,9 +342,8 @@ func (op *nmaVerticaVersionOp) prepareHostNodeMap(execContext *opEngineExecConte
if err != nil {
return hostNodeMap, err
}
allReachableHostsInTargetSCs := util.SliceCommon(allHostsInTargetSCs, op.reachableHosts)
// get host-node map for all hosts in target subclusters
hostNodeMap = util.FilterMapByKey(execContext.nmaVDatabase.HostNodeMap, allReachableHostsInTargetSCs)
hostNodeMap = util.FilterMapByKey(execContext.nmaVDatabase.HostNodeMap, allHostsInTargetSCs)
}
return hostNodeMap, nil
}
Expand All @@ -370,9 +365,8 @@ func (op *nmaVerticaVersionOp) prepareHostNodeMapWithVDB() (vHostNodeMap, error)
if err != nil {
return hostNodeMap, err
}
allReachableHostsInTargetSCs := util.SliceCommon(allHostsInTargetSCs, op.reachableHosts)
// get host-node map for all hosts in target subclusters
hostNodeMap = util.FilterMapByKey(op.vdb.HostNodeMap, allReachableHostsInTargetSCs)
hostNodeMap = util.FilterMapByKey(op.vdb.HostNodeMap, allHostsInTargetSCs)

return hostNodeMap, nil
}
Expand Down
42 changes: 7 additions & 35 deletions vclusterops/start_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ type VStartDatabaseOptions struct {

// whether the first time to start the database after revive
FirstStartAfterRevive bool

// whether input info is read from vcluster config file, used for quorum check
ReadFromConfig bool
}

func VStartDatabaseOptionsFactory() VStartDatabaseOptions {
Expand Down Expand Up @@ -155,10 +152,9 @@ func (vcc VClusterCommands) VStartDatabase(options *VStartDatabaseOptions) (vdbP
vcc.Log.PrintWarning("communal storage location is not specified" + warningMsg)
}
}
numTotalNodes := len(options.Hosts)

// start_db pre-checks and get basic info
err = vcc.runStartDBPrecheck(options, &vdb, numTotalNodes)
err = vcc.runStartDBPrecheck(options, &vdb)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -189,18 +185,7 @@ func (vcc VClusterCommands) VStartDatabase(options *VStartDatabaseOptions) (vdbP
return &updatedVDB, nil
}

func (vcc VClusterCommands) runStartDBPrecheck(options *VStartDatabaseOptions, vdb *VCoordinationDatabase, numTotalNodes int) error {
// filter out unreachable hosts
unreachableHosts, err := vcc.getUnreachableHosts(&options.DatabaseOptions)
if err != nil {
return err
}
// if it's eon mode and there are unreachable hosts, we cannot perform quorum check due to missing primary node information
// error out here with hint
if options.IsEon && len(unreachableHosts) > 0 {
return fmt.Errorf("cannot start db with unreachable hosts, please check cluster and NMA connectivity on unreachable hosts")
}
options.Hosts = util.SliceDiff(options.Hosts, unreachableHosts)
func (vcc VClusterCommands) runStartDBPrecheck(options *VStartDatabaseOptions, vdb *VCoordinationDatabase) error {
// pre-instruction to perform basic checks and get basic information
preInstructions, err := vcc.produceStartDBPreCheck(options, vdb, options.TrimHostList)
if err != nil {
Expand All @@ -222,14 +207,6 @@ func (vcc VClusterCommands) runStartDBPrecheck(options *VStartDatabaseOptions, v
options.Hosts = vcc.removeHostsNotInCatalog(&clusterOpEngine.execContext.nmaVDatabase, options.Hosts)
}

// Quorum Check
if options.ReadFromConfig && !options.IsEon {
err = vcc.quorumCheck(numTotalNodes, len(options.Hosts))
if err != nil {
return fmt.Errorf("fail to start database pre-checks: %w", err)
}
}

return nil
}

Expand Down Expand Up @@ -267,6 +244,7 @@ func (vcc VClusterCommands) produceStartDBPreCheck(options *VStartDatabaseOption
trimHostList bool) ([]clusterOp, error) {
var instructions []clusterOp

nmaHealthOp := makeNMAHealthOp(options.Hosts)
// need username for https operations
err := options.setUsePasswordAndValidateUsernameIfNeeded(vcc.Log)
if err != nil {
Expand All @@ -278,7 +256,10 @@ func (vcc VClusterCommands) produceStartDBPreCheck(options *VStartDatabaseOption
if err != nil {
return instructions, err
}
instructions = append(instructions, &checkDBRunningOp)
instructions = append(instructions,
&nmaHealthOp,
&checkDBRunningOp,
)

// when we cannot get db info from cluster_config.json, we will fetch it from NMA /nodes endpoint.
if len(vdb.HostNodeMap) == 0 {
Expand Down Expand Up @@ -370,12 +351,3 @@ func (vcc VClusterCommands) setOrRotateEncryptionKey(keyType string) clusterOp {
op := makeNMASpreadSecurityOp(vcc.Log, keyType)
return &op
}

func (vcc VClusterCommands) quorumCheck(numPrimaryNodes, numReachableHosts int) error {
minimumNodesForQuorum := numPrimaryNodes/2 + 1
if numReachableHosts < minimumNodesForQuorum {
return fmt.Errorf("quorum not satisfied, number of reachable nodes %d < minimum %d of %d primary nodes",
numReachableHosts, minimumNodesForQuorum, numPrimaryNodes)
}
return nil
}
86 changes: 29 additions & 57 deletions vclusterops/start_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,88 +150,56 @@ func (vcc VClusterCommands) startNodePreCheck(vdb *VCoordinationDatabase, option
return nil
}

func (vcc VClusterCommands) removeUnreachableHosts(options *VStartNodesOptions) error {
unreachableHosts, err := vcc.getUnreachableHosts(&options.DatabaseOptions)
// VStartNodes starts the given nodes for a cluster that has not yet lost
// cluster quorum. Returns any error encountered. If necessary, it updates the
// node's IP in the Vertica catalog. If cluster quorum is already lost, use
// VStartDatabase. It will skip any nodes given that no longer exist in the
// catalog.
func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error {
/*
* - Produce Instructions
* - Create a VClusterOpEngine
* - Give the instructions to the VClusterOpEngine to run
*/

// validate and analyze options
err := options.validateAnalyzeOptions(vcc.Log)
if err != nil {
return err
}
options.Hosts = util.SliceDiff(options.Hosts, unreachableHosts)
for _, unreachableHost := range unreachableHosts {
for name, val := range options.Nodes {
if val == unreachableHost {
delete(options.Nodes, name)
}
}
}
return nil
}

func (vcc VClusterCommands) preStartNodeCheck(options *VStartNodesOptions, vdb *VCoordinationDatabase,
hostNodeNameMap map[string]string, startNodeInfo *VStartNodesInfo) error {
// retrieve database information to execute the command so we do not always rely on some user input
// if VStartNodes is called from VStartSubcluster, we can reuse the vdb from VStartSubcluster
vdb := makeVCoordinationDatabase()
if options.vdb == nil {
err := vcc.getVDBFromRunningDBIncludeSandbox(vdb, &options.DatabaseOptions, AnySandbox)
err = vcc.getVDBFromRunningDBIncludeSandbox(&vdb, &options.DatabaseOptions, AnySandbox)
if err != nil {
return err
}
} else {
vdb = *options.vdb
}

hostNodeNameMap := make(map[string]string)
startNodeInfo := new(VStartNodesInfo)
for _, vnode := range vdb.HostNodeMap {
hostNodeNameMap[vnode.Name] = vnode.Address
}

// precheck to make sure the nodes to start are either all sandboxed nodes in one sandbox or all main cluster nodes
err := vcc.startNodePreCheck(vdb, options, hostNodeNameMap, startNodeInfo)
err = vcc.startNodePreCheck(&vdb, options, hostNodeNameMap, startNodeInfo)
if err != nil {
return err
}

// sandboxes may have different catalog from the main cluster, update the vdb build from the sandbox of the nodes to start
err = vcc.getVDBFromRunningDBIncludeSandbox(vdb, &options.DatabaseOptions, startNodeInfo.Sandbox)
err = vcc.getVDBFromRunningDBIncludeSandbox(&vdb, &options.DatabaseOptions, startNodeInfo.Sandbox)
if err != nil {
if startNodeInfo.Sandbox != util.MainClusterSandbox {
return errors.Join(err, fmt.Errorf("hint: make sure there is at least one UP node in the sandbox %s", startNodeInfo.Sandbox))
}
return errors.Join(err, fmt.Errorf("hint: make sure there is at least one UP node in the database"))
}
return nil
}

// VStartNodes starts the given nodes for a cluster that has not yet lost
// cluster quorum. Returns any error encountered. If necessary, it updates the
// node's IP in the Vertica catalog. If cluster quorum is already lost, use
// VStartDatabase. It will skip any nodes given that no longer exist in the
// catalog.
func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error {
/*
* - Produce Instructions
* - Create a VClusterOpEngine
* - Give the instructions to the VClusterOpEngine to run
*/

// validate and analyze options
err := options.validateAnalyzeOptions(vcc.Log)
if err != nil {
return err
}

err = vcc.removeUnreachableHosts(options)
if err != nil || len(options.Nodes) == 0 {
return err
}

vdb := makeVCoordinationDatabase()
if options.vdb != nil {
vdb = *options.vdb
}
hostNodeNameMap := make(map[string]string)
startNodeInfo := new(VStartNodesInfo)

err = vcc.preStartNodeCheck(options, &vdb, hostNodeNameMap, startNodeInfo)
if err != nil {
return err
}

// find out hosts
// - that need to re-ip, and
Expand All @@ -248,7 +216,9 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error {
// if none of them is down and no other nodes to re-ip,
// we will early stop as there is no need to start them
if !startNodeInfo.hasDownNodeNoNeedToReIP && len(startNodeInfo.ReIPList) == 0 {
vcc.Log.DisplayInfo("The provided nodes are either not in catalog or already up. There is nothing to start.")
const msg = "The provided nodes are either not in catalog or already up. There is nothing to start."
fmt.Println(msg)
vcc.Log.Info(msg)
return nil
}

Expand All @@ -259,7 +229,9 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error {
// If no nodes found to start. We can simply exit here. This can happen if
// given a list of nodes that aren't in the catalog any longer.
if len(startNodeInfo.HostsToStart) == 0 {
vcc.Log.DisplayInfo("None of the nodes provided are in the catalog. There is nothing to start.")
const msg = "None of the nodes provided are in the catalog. There is nothing to start."
fmt.Println(msg)
vcc.Log.Info(msg)
return nil
}

Expand Down Expand Up @@ -379,7 +351,7 @@ func (vcc VClusterCommands) produceStartNodesInstructions(startNodeInfo *VStartN
}

// require to have the same vertica version
nmaVerticaVersionOp := makeNMAVerticaVersionOpBeforeStartNode(vdb, options.Hosts, startNodeInfo.HostsToStart)
nmaVerticaVersionOp := makeNMAVerticaVersionOpBeforeStartNode(vdb, startNodeInfo.HostsToStart)
instructions = append(instructions, &nmaVerticaVersionOp)

// The second parameter (sourceConfHost) in produceTransferConfigOps is set to a nil value in the upload and download step
Expand Down
Loading

0 comments on commit bf13430

Please sign in to comment.