diff --git a/commands/cmd_restart_node.go b/commands/cmd_restart_node.go index ff2f90f..044e7bb 100644 --- a/commands/cmd_restart_node.go +++ b/commands/cmd_restart_node.go @@ -167,12 +167,6 @@ func (c *CmdStartNodes) Run(vcc vclusterops.ClusterCommands) error { return err } - // all nodes unreachable, nothing need to be done. - if len(options.Nodes) == 0 { - vcc.DisplayInfo("No reachable nodes to start") - return nil - } - var hostToStart []string for _, ip := range options.Nodes { hostToStart = append(hostToStart, ip) diff --git a/commands/cmd_start_db.go b/commands/cmd_start_db.go index 5a88a06..f44dfbe 100644 --- a/commands/cmd_start_db.go +++ b/commands/cmd_start_db.go @@ -221,7 +221,6 @@ func (c *CmdStartDB) Run(vcc vclusterops.ClusterCommands) error { } dbConfig, readConfigErr := readConfig() if readConfigErr == nil { - options.ReadFromConfig = true if options.Sandbox != util.MainClusterSandbox || options.MainCluster { options.RawHosts = filterInputHosts(options, dbConfig) } @@ -239,12 +238,6 @@ func (c *CmdStartDB) Run(vcc vclusterops.ClusterCommands) error { return err } - // all nodes unreachable - if len(options.Hosts) == 0 { - vcc.DisplayInfo("No reachable nodes to start database %s", options.DBName) - return nil - } - msg := fmt.Sprintf("Started database %s", options.DBName) if options.Sandbox != "" { sandboxMsg := fmt.Sprintf(" on sandbox %s", options.Sandbox) @@ -260,7 +253,12 @@ func (c *CmdStartDB) Run(vcc vclusterops.ClusterCommands) error { // for Eon database, update config file to fill nodes' subcluster information if readConfigErr == nil && options.IsEon { - c.UpdateConfigFileForEon(vdb, vcc) + // write db info to vcluster config file + vdb.FirstStartAfterRevive = false + err = writeConfig(vdb, true /*forceOverwrite*/) + if err != nil { + vcc.DisplayWarning("fail to update config file, details: %s", err) + } } // write config parameters to vcluster config param file @@ -272,15 +270,6 @@ func (c *CmdStartDB) Run(vcc vclusterops.ClusterCommands) error { return nil } -func (c *CmdStartDB) UpdateConfigFileForEon(vdb *vclusterops.VCoordinationDatabase, vcc vclusterops.ClusterCommands) { - // write db info to vcluster config file - vdb.FirstStartAfterRevive = false - err := writeConfig(vdb, true /*forceOverwrite*/) - if err != nil { - vcc.DisplayWarning("fail to update config file, details: %s", err) - } -} - // SetDatabaseOptions will assign a vclusterops.DatabaseOptions instance to the one in CmdStartDB func (c *CmdStartDB) SetDatabaseOptions(opt *vclusterops.DatabaseOptions) { c.startDBOptions.DatabaseOptions = *opt diff --git a/commands/cmd_start_subcluster.go b/commands/cmd_start_subcluster.go index 2d7419c..15f9f82 100644 --- a/commands/cmd_start_subcluster.go +++ b/commands/cmd_start_subcluster.go @@ -133,12 +133,6 @@ func (c *CmdStartSubcluster) Run(vcc vclusterops.ClusterCommands) error { return err } - // all nodes unreachable, nothing need to be done. - if len(options.Nodes) == 0 { - vcc.DisplayInfo("No reachable nodes to start in subcluster %s", options.SCName) - return nil - } - vcc.DisplayInfo("Successfully started subcluster %s for database %s", options.SCName, options.DBName) diff --git a/vclusterops/helpers.go b/vclusterops/helpers.go index 49d918d..0ff4851 100644 --- a/vclusterops/helpers.go +++ b/vclusterops/helpers.go @@ -468,16 +468,3 @@ func (vcc *VClusterCommands) doReIP(options *DatabaseOptions, scName string, return nil } - -func (vcc *VClusterCommands) getUnreachableHosts(options *DatabaseOptions) ([]string, error) { - var nmaHealthInstructions []clusterOp - nmaHealthOp := makeNMAHealthOpSkipUnreachable(options.Hosts) - nmaHealthInstructions = []clusterOp{&nmaHealthOp} - certs := httpsCerts{key: options.Key, cert: options.Cert, caCert: options.CaCert} - opEng := makeClusterOpEngine(nmaHealthInstructions, &certs) - err := opEng.run(vcc.Log) - if err != nil { - return nil, err - } - return opEng.execContext.unreachableHosts, nil -} diff --git a/vclusterops/nma_vertica_version_op.go b/vclusterops/nma_vertica_version_op.go index b38bd8d..36a821e 100644 --- a/vclusterops/nma_vertica_version_op.go +++ b/vclusterops/nma_vertica_version_op.go @@ -41,7 +41,6 @@ type nmaVerticaVersionOp struct { scName string readOnly bool targetNodeIPs []string // used to filter desired nodes' info - reachableHosts []string // hosts that are reachable through NMA } func makeHostVersionMap() hostVersionMap { @@ -79,12 +78,10 @@ func makeNMAReadVerticaVersionOp(vdb *VCoordinationDatabase) nmaVerticaVersionOp // makeNMAVerticaVersionOpWithTargetHosts is used in start_db, VCluster will check Vertica // version for the subclusters which contain target hosts -func makeNMAVerticaVersionOpWithTargetHosts(sameVersion bool, targetNodeIPs []string) nmaVerticaVersionOp { +func makeNMAVerticaVersionOpWithTargetHosts(sameVersion bool, hosts []string) nmaVerticaVersionOp { // We set hosts to nil and isEon to false temporarily, and they will get the correct value from execute context in prepare() op := makeNMACheckVerticaVersionOp(nil /*hosts*/, sameVersion, false /*isEon*/) - op.targetNodeIPs = targetNodeIPs - // start_db target all reachable input hosts - op.reachableHosts = targetNodeIPs + op.targetNodeIPs = hosts return op } @@ -107,10 +104,9 @@ func makeNMAVerticaVersionOpWithVDB(sameVersion bool, vdb *VCoordinationDatabase // makeNMAVerticaVersionOpBeforeStartNode is used in start_node, VCluster will check Vertica // version for the nodes which are in the same cluster(main cluster or sandbox) as the target hosts -func makeNMAVerticaVersionOpBeforeStartNode(vdb *VCoordinationDatabase, reachableHosts, targetNodeIPs []string) nmaVerticaVersionOp { - op := makeNMACheckVerticaVersionOp(nil, true /*sameVersion*/, vdb.IsEon) - op.reachableHosts = reachableHosts - op.targetNodeIPs = targetNodeIPs +func makeNMAVerticaVersionOpBeforeStartNode(vdb *VCoordinationDatabase, hosts []string) nmaVerticaVersionOp { + op := makeNMACheckVerticaVersionOp(nil /*hosts*/, true /*sameVersion*/, vdb.IsEon) + op.targetNodeIPs = hosts op.vdb = vdb return op } @@ -346,9 +342,8 @@ func (op *nmaVerticaVersionOp) prepareHostNodeMap(execContext *opEngineExecConte if err != nil { return hostNodeMap, err } - allReachableHostsInTargetSCs := util.SliceCommon(allHostsInTargetSCs, op.reachableHosts) // get host-node map for all hosts in target subclusters - hostNodeMap = util.FilterMapByKey(execContext.nmaVDatabase.HostNodeMap, allReachableHostsInTargetSCs) + hostNodeMap = util.FilterMapByKey(execContext.nmaVDatabase.HostNodeMap, allHostsInTargetSCs) } return hostNodeMap, nil } @@ -370,9 +365,8 @@ func (op *nmaVerticaVersionOp) prepareHostNodeMapWithVDB() (vHostNodeMap, error) if err != nil { return hostNodeMap, err } - allReachableHostsInTargetSCs := util.SliceCommon(allHostsInTargetSCs, op.reachableHosts) // get host-node map for all hosts in target subclusters - hostNodeMap = util.FilterMapByKey(op.vdb.HostNodeMap, allReachableHostsInTargetSCs) + hostNodeMap = util.FilterMapByKey(op.vdb.HostNodeMap, allHostsInTargetSCs) return hostNodeMap, nil } diff --git a/vclusterops/start_db.go b/vclusterops/start_db.go index c9a3f89..6c19078 100644 --- a/vclusterops/start_db.go +++ b/vclusterops/start_db.go @@ -49,9 +49,6 @@ type VStartDatabaseOptions struct { // whether the first time to start the database after revive FirstStartAfterRevive bool - - // whether input info is read from vcluster config file, used for quorum check - ReadFromConfig bool } func VStartDatabaseOptionsFactory() VStartDatabaseOptions { @@ -155,10 +152,9 @@ func (vcc VClusterCommands) VStartDatabase(options *VStartDatabaseOptions) (vdbP vcc.Log.PrintWarning("communal storage location is not specified" + warningMsg) } } - numTotalNodes := len(options.Hosts) // start_db pre-checks and get basic info - err = vcc.runStartDBPrecheck(options, &vdb, numTotalNodes) + err = vcc.runStartDBPrecheck(options, &vdb) if err != nil { return nil, err } @@ -189,18 +185,7 @@ func (vcc VClusterCommands) VStartDatabase(options *VStartDatabaseOptions) (vdbP return &updatedVDB, nil } -func (vcc VClusterCommands) runStartDBPrecheck(options *VStartDatabaseOptions, vdb *VCoordinationDatabase, numTotalNodes int) error { - // filter out unreachable hosts - unreachableHosts, err := vcc.getUnreachableHosts(&options.DatabaseOptions) - if err != nil { - return err - } - // if it's eon mode and there are unreachable hosts, we cannot perform quorum check due to missing primary node information - // error out here with hint - if options.IsEon && len(unreachableHosts) > 0 { - return fmt.Errorf("cannot start db with unreachable hosts, please check cluster and NMA connectivity on unreachable hosts") - } - options.Hosts = util.SliceDiff(options.Hosts, unreachableHosts) +func (vcc VClusterCommands) runStartDBPrecheck(options *VStartDatabaseOptions, vdb *VCoordinationDatabase) error { // pre-instruction to perform basic checks and get basic information preInstructions, err := vcc.produceStartDBPreCheck(options, vdb, options.TrimHostList) if err != nil { @@ -222,14 +207,6 @@ func (vcc VClusterCommands) runStartDBPrecheck(options *VStartDatabaseOptions, v options.Hosts = vcc.removeHostsNotInCatalog(&clusterOpEngine.execContext.nmaVDatabase, options.Hosts) } - // Quorum Check - if options.ReadFromConfig && !options.IsEon { - err = vcc.quorumCheck(numTotalNodes, len(options.Hosts)) - if err != nil { - return fmt.Errorf("fail to start database pre-checks: %w", err) - } - } - return nil } @@ -267,6 +244,7 @@ func (vcc VClusterCommands) produceStartDBPreCheck(options *VStartDatabaseOption trimHostList bool) ([]clusterOp, error) { var instructions []clusterOp + nmaHealthOp := makeNMAHealthOp(options.Hosts) // need username for https operations err := options.setUsePasswordAndValidateUsernameIfNeeded(vcc.Log) if err != nil { @@ -278,7 +256,10 @@ func (vcc VClusterCommands) produceStartDBPreCheck(options *VStartDatabaseOption if err != nil { return instructions, err } - instructions = append(instructions, &checkDBRunningOp) + instructions = append(instructions, + &nmaHealthOp, + &checkDBRunningOp, + ) // when we cannot get db info from cluster_config.json, we will fetch it from NMA /nodes endpoint. if len(vdb.HostNodeMap) == 0 { @@ -370,12 +351,3 @@ func (vcc VClusterCommands) setOrRotateEncryptionKey(keyType string) clusterOp { op := makeNMASpreadSecurityOp(vcc.Log, keyType) return &op } - -func (vcc VClusterCommands) quorumCheck(numPrimaryNodes, numReachableHosts int) error { - minimumNodesForQuorum := numPrimaryNodes/2 + 1 - if numReachableHosts < minimumNodesForQuorum { - return fmt.Errorf("quorum not satisfied, number of reachable nodes %d < minimum %d of %d primary nodes", - numReachableHosts, minimumNodesForQuorum, numPrimaryNodes) - } - return nil -} diff --git a/vclusterops/start_node.go b/vclusterops/start_node.go index efccd45..3b51f37 100644 --- a/vclusterops/start_node.go +++ b/vclusterops/start_node.go @@ -150,88 +150,56 @@ func (vcc VClusterCommands) startNodePreCheck(vdb *VCoordinationDatabase, option return nil } -func (vcc VClusterCommands) removeUnreachableHosts(options *VStartNodesOptions) error { - unreachableHosts, err := vcc.getUnreachableHosts(&options.DatabaseOptions) +// VStartNodes starts the given nodes for a cluster that has not yet lost +// cluster quorum. Returns any error encountered. If necessary, it updates the +// node's IP in the Vertica catalog. If cluster quorum is already lost, use +// VStartDatabase. It will skip any nodes given that no longer exist in the +// catalog. +func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { + /* + * - Produce Instructions + * - Create a VClusterOpEngine + * - Give the instructions to the VClusterOpEngine to run + */ + + // validate and analyze options + err := options.validateAnalyzeOptions(vcc.Log) if err != nil { return err } - options.Hosts = util.SliceDiff(options.Hosts, unreachableHosts) - for _, unreachableHost := range unreachableHosts { - for name, val := range options.Nodes { - if val == unreachableHost { - delete(options.Nodes, name) - } - } - } - return nil -} -func (vcc VClusterCommands) preStartNodeCheck(options *VStartNodesOptions, vdb *VCoordinationDatabase, - hostNodeNameMap map[string]string, startNodeInfo *VStartNodesInfo) error { // retrieve database information to execute the command so we do not always rely on some user input // if VStartNodes is called from VStartSubcluster, we can reuse the vdb from VStartSubcluster + vdb := makeVCoordinationDatabase() if options.vdb == nil { - err := vcc.getVDBFromRunningDBIncludeSandbox(vdb, &options.DatabaseOptions, AnySandbox) + err = vcc.getVDBFromRunningDBIncludeSandbox(&vdb, &options.DatabaseOptions, AnySandbox) if err != nil { return err } + } else { + vdb = *options.vdb } + hostNodeNameMap := make(map[string]string) + startNodeInfo := new(VStartNodesInfo) for _, vnode := range vdb.HostNodeMap { hostNodeNameMap[vnode.Name] = vnode.Address } // precheck to make sure the nodes to start are either all sandboxed nodes in one sandbox or all main cluster nodes - err := vcc.startNodePreCheck(vdb, options, hostNodeNameMap, startNodeInfo) + err = vcc.startNodePreCheck(&vdb, options, hostNodeNameMap, startNodeInfo) if err != nil { return err } // sandboxes may have different catalog from the main cluster, update the vdb build from the sandbox of the nodes to start - err = vcc.getVDBFromRunningDBIncludeSandbox(vdb, &options.DatabaseOptions, startNodeInfo.Sandbox) + err = vcc.getVDBFromRunningDBIncludeSandbox(&vdb, &options.DatabaseOptions, startNodeInfo.Sandbox) if err != nil { if startNodeInfo.Sandbox != util.MainClusterSandbox { return errors.Join(err, fmt.Errorf("hint: make sure there is at least one UP node in the sandbox %s", startNodeInfo.Sandbox)) } return errors.Join(err, fmt.Errorf("hint: make sure there is at least one UP node in the database")) } - return nil -} - -// VStartNodes starts the given nodes for a cluster that has not yet lost -// cluster quorum. Returns any error encountered. If necessary, it updates the -// node's IP in the Vertica catalog. If cluster quorum is already lost, use -// VStartDatabase. It will skip any nodes given that no longer exist in the -// catalog. -func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { - /* - * - Produce Instructions - * - Create a VClusterOpEngine - * - Give the instructions to the VClusterOpEngine to run - */ - - // validate and analyze options - err := options.validateAnalyzeOptions(vcc.Log) - if err != nil { - return err - } - - err = vcc.removeUnreachableHosts(options) - if err != nil || len(options.Nodes) == 0 { - return err - } - - vdb := makeVCoordinationDatabase() - if options.vdb != nil { - vdb = *options.vdb - } - hostNodeNameMap := make(map[string]string) - startNodeInfo := new(VStartNodesInfo) - - err = vcc.preStartNodeCheck(options, &vdb, hostNodeNameMap, startNodeInfo) - if err != nil { - return err - } // find out hosts // - that need to re-ip, and @@ -248,7 +216,9 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { // if none of them is down and no other nodes to re-ip, // we will early stop as there is no need to start them if !startNodeInfo.hasDownNodeNoNeedToReIP && len(startNodeInfo.ReIPList) == 0 { - vcc.Log.DisplayInfo("The provided nodes are either not in catalog or already up. There is nothing to start.") + const msg = "The provided nodes are either not in catalog or already up. There is nothing to start." + fmt.Println(msg) + vcc.Log.Info(msg) return nil } @@ -259,7 +229,9 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { // If no nodes found to start. We can simply exit here. This can happen if // given a list of nodes that aren't in the catalog any longer. if len(startNodeInfo.HostsToStart) == 0 { - vcc.Log.DisplayInfo("None of the nodes provided are in the catalog. There is nothing to start.") + const msg = "None of the nodes provided are in the catalog. There is nothing to start." + fmt.Println(msg) + vcc.Log.Info(msg) return nil } @@ -379,7 +351,7 @@ func (vcc VClusterCommands) produceStartNodesInstructions(startNodeInfo *VStartN } // require to have the same vertica version - nmaVerticaVersionOp := makeNMAVerticaVersionOpBeforeStartNode(vdb, options.Hosts, startNodeInfo.HostsToStart) + nmaVerticaVersionOp := makeNMAVerticaVersionOpBeforeStartNode(vdb, startNodeInfo.HostsToStart) instructions = append(instructions, &nmaVerticaVersionOp) // The second parameter (sourceConfHost) in produceTransferConfigOps is set to a nil value in the upload and download step diff --git a/vclusterops/start_subcluster.go b/vclusterops/start_subcluster.go index 9c58566..c425162 100644 --- a/vclusterops/start_subcluster.go +++ b/vclusterops/start_subcluster.go @@ -140,12 +140,13 @@ func (vcc VClusterCommands) VStartSubcluster(options *VStartScOptions) error { options.SCName) } - options.VStartNodesOptions.Nodes = nodesToStart - options.VStartNodesOptions.DatabaseOptions = options.DatabaseOptions - options.VStartNodesOptions.StatePollingTimeout = options.StatePollingTimeout - options.VStartNodesOptions.vdb = &vdb + var startNodesOptions VStartNodesOptions + startNodesOptions.Nodes = nodesToStart + startNodesOptions.DatabaseOptions = options.DatabaseOptions + startNodesOptions.StatePollingTimeout = options.StatePollingTimeout + startNodesOptions.vdb = &vdb vlog.DisplayColorInfo("Starting nodes %v in subcluster %s", maps.Keys(nodesToStart), options.SCName) - return vcc.VStartNodes(&options.VStartNodesOptions) + return vcc.VStartNodes(&startNodesOptions) }