diff --git a/commands/cluster_command_launcher.go b/commands/cluster_command_launcher.go index c63ab5a..085efc5 100644 --- a/commands/cluster_command_launcher.go +++ b/commands/cluster_command_launcher.go @@ -156,6 +156,7 @@ const ( removeSCSubCmd = "db_remove_subcluster" stopSCSubCmd = "stop_subcluster" addNodeSubCmd = "db_add_node" + startSCSubCmd = "start_subcluster" stopNodeCmd = "stop_node" removeNodeSubCmd = "db_remove_node" restartNodeSubCmd = "restart_node" @@ -512,6 +513,7 @@ func constructCmds() []*cobra.Command { makeCmdAddSubcluster(), makeCmdRemoveSubcluster(), makeCmdStopSubcluster(), + makeCmdStartSubcluster(), makeCmdSandboxSubcluster(), makeCmdUnsandboxSubcluster(), // node-scope cmds diff --git a/commands/cmd_add_node.go b/commands/cmd_add_node.go index d7f7a42..8281327 100644 --- a/commands/cmd_add_node.go +++ b/commands/cmd_add_node.go @@ -199,7 +199,7 @@ func (c *CmdAddNode) Run(vcc vclusterops.ClusterCommands) error { } // write db info to vcluster config file - err := writeConfig(&vdb, vcc.GetLog()) + err := writeConfig(&vdb) if err != nil { vcc.PrintWarning("fail to write config file, details: %s", err) } diff --git a/commands/cmd_add_subcluster.go b/commands/cmd_add_subcluster.go index e5ce9dd..a660ed6 100644 --- a/commands/cmd_add_subcluster.go +++ b/commands/cmd_add_subcluster.go @@ -220,7 +220,7 @@ func (c *CmdAddSubcluster) Run(vcc vclusterops.ClusterCommands) error { return err } // update db info in the config file - err = writeConfig(&vdb, vcc.GetLog()) + err = writeConfig(&vdb) if err != nil { vcc.PrintWarning("fail to write config file, details: %s", err) } diff --git a/commands/cmd_base.go b/commands/cmd_base.go index 6f8cf48..94cbaa5 100644 --- a/commands/cmd_base.go +++ b/commands/cmd_base.go @@ -283,6 +283,9 @@ func (c *CmdBase) setDBPassword(opt *vclusterops.DatabaseOptions) error { return nil } + if c.passwordFile == "" { + return fmt.Errorf("password file path is empty") + } password, err := c.passwordFileHelper(c.passwordFile) if err != nil { return err @@ -292,10 +295,6 @@ func (c *CmdBase) setDBPassword(opt *vclusterops.DatabaseOptions) error { } func (c *CmdBase) passwordFileHelper(passwordFile string) (string, error) { - if passwordFile == "" { - return "", fmt.Errorf("password file path is empty") - } - // Read password from file passwordBytes, err := os.ReadFile(passwordFile) if err != nil { diff --git a/commands/cmd_config_recover.go b/commands/cmd_config_recover.go index 54b6a08..173b2f4 100644 --- a/commands/cmd_config_recover.go +++ b/commands/cmd_config_recover.go @@ -110,7 +110,7 @@ func (c *CmdConfigRecover) Run(vcc vclusterops.ClusterCommands) error { return err } // write db info to vcluster config file - err = writeConfig(&vdb, vcc.GetLog()) + err = writeConfig(&vdb) if err != nil { return fmt.Errorf("fail to write config file, details: %s", err) } diff --git a/commands/cmd_create_db.go b/commands/cmd_create_db.go index 608ca32..7b0e05b 100644 --- a/commands/cmd_create_db.go +++ b/commands/cmd_create_db.go @@ -266,7 +266,7 @@ func (c *CmdCreateDB) Run(vcc vclusterops.ClusterCommands) error { } // write db info to vcluster config file - err := writeConfig(&vdb, vcc.GetLog()) + err := writeConfig(&vdb) if err != nil { vcc.PrintWarning("fail to write config file, details: %s", err) } diff --git a/commands/cmd_drop_db.go b/commands/cmd_drop_db.go index eef77d0..f52fab0 100644 --- a/commands/cmd_drop_db.go +++ b/commands/cmd_drop_db.go @@ -113,7 +113,7 @@ func (c *CmdDropDB) Run(vcc vclusterops.ClusterCommands) error { vcc.PrintInfo("Successfully dropped database %s", c.dropDBOptions.DBName) // if the database is successfully dropped, the config file will be removed // if failed to remove it, we will ask users to manually do it - err = removeConfig(vcc.GetLog()) + err = removeConfig() if err != nil { vcc.PrintWarning("Fail to remove config file %q, "+ "please manually do it. Details: %v", c.dropDBOptions.ConfigPath, err) diff --git a/commands/cmd_remove_node.go b/commands/cmd_remove_node.go index 1b105d4..0b59378 100644 --- a/commands/cmd_remove_node.go +++ b/commands/cmd_remove_node.go @@ -144,7 +144,7 @@ func (c *CmdRemoveNode) Run(vcc vclusterops.ClusterCommands) error { } // write db info to vcluster config file - err = writeConfig(&vdb, vcc.GetLog()) + err = writeConfig(&vdb) if err != nil { vcc.PrintWarning("fail to write config file, details: %s", err) } diff --git a/commands/cmd_remove_subcluster.go b/commands/cmd_remove_subcluster.go index 4e4c644..68f6066 100644 --- a/commands/cmd_remove_subcluster.go +++ b/commands/cmd_remove_subcluster.go @@ -134,7 +134,7 @@ func (c *CmdRemoveSubcluster) Run(vcc vclusterops.ClusterCommands) error { } // write db info to vcluster config file - err = writeConfig(&vdb, vcc.GetLog()) + err = writeConfig(&vdb) if err != nil { vcc.PrintWarning("fail to write config file, details: %s", err) } diff --git a/commands/cmd_revive_db.go b/commands/cmd_revive_db.go index 087c301..a011c05 100644 --- a/commands/cmd_revive_db.go +++ b/commands/cmd_revive_db.go @@ -180,7 +180,7 @@ func (c *CmdReviveDB) Run(vcc vclusterops.ClusterCommands) error { } // write db info to vcluster config file - err = writeConfig(vdb, vcc.GetLog()) + err = writeConfig(vdb) if err != nil { vcc.PrintWarning("fail to write config file, details: %s", err) } diff --git a/commands/cmd_start_db.go b/commands/cmd_start_db.go index 6c384aa..832ea61 100644 --- a/commands/cmd_start_db.go +++ b/commands/cmd_start_db.go @@ -177,7 +177,7 @@ func (c *CmdStartDB) Run(vcc vclusterops.ClusterCommands) error { // for Eon database, update config file to fill nodes' subcluster information if options.IsEon { // write db info to vcluster config file - err := writeConfig(vdb, vcc.GetLog()) + err := writeConfig(vdb) if err != nil { vcc.PrintWarning("fail to update config file, details: %s", err) } diff --git a/commands/cmd_start_replication.go b/commands/cmd_start_replication.go index 2276a77..cc1a89a 100644 --- a/commands/cmd_start_replication.go +++ b/commands/cmd_start_replication.go @@ -50,7 +50,7 @@ This subcommand copies table or schema data directly from one Eon Mode database's communal storage to another. The --target-conn option serves as a collection file for gathering necessary -target information for replication. You need to run vcluster manage_connection +target information for replication. You need to run vcluster create_connection to generate this connection file in order to use this option. The --sandbox option is used to replicate from a sandbox to a target database @@ -214,6 +214,9 @@ func (c *CmdStartReplication) parseTargetPassword() error { options.TargetPassword = new(string) } + if c.targetPasswordFile == "" { + return fmt.Errorf("target password file path is empty") + } password, err := c.passwordFileHelper(c.targetPasswordFile) if err != nil { return err diff --git a/commands/cmd_start_subcluster.go b/commands/cmd_start_subcluster.go new file mode 100644 index 0000000..57255ec --- /dev/null +++ b/commands/cmd_start_subcluster.go @@ -0,0 +1,142 @@ +/* + (c) Copyright [2023-2024] Open Text. + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package commands + +import ( + "github.com/spf13/cobra" + "github.com/spf13/viper" + "github.com/vertica/vcluster/vclusterops" + "github.com/vertica/vcluster/vclusterops/util" + "github.com/vertica/vcluster/vclusterops/vlog" +) + +/* CmdStartSubcluster + * + * Implements ClusterCommand interface + */ +type CmdStartSubcluster struct { + startScOptions *vclusterops.VStartScOptions + + CmdBase +} + +func makeCmdStartSubcluster() *cobra.Command { + // CmdStartSubcluster + newCmd := &CmdStartSubcluster{} + opt := vclusterops.VStartScOptionsFactory() + newCmd.startScOptions = &opt + + cmd := makeBasicCobraCmd( + newCmd, + startSCSubCmd, + "Start a subcluster", + `This subcommand starts a stopped subcluster in a running Eon database. + +You must provide the subcluster name with the --subcluster option. + +Examples: + # Start a subcluster with config file + vcluster start_subcluster --subcluster sc1 \ + --config /opt/vertica/config/vertica_cluster.yaml + + # Start a subcluster with user input + vcluster start_subcluster --db-name test_db \ + --hosts 10.20.30.40,10.20.30.41,10.20.30.42 --subcluster sc1 +`, + []string{dbNameFlag, configFlag, hostsFlag, eonModeFlag, passwordFlag}, + ) + + // local flags + newCmd.setLocalFlags(cmd) + + // require name of subcluster to start + markFlagsRequired(cmd, []string{subclusterFlag}) + + // hide eon mode flag since we expect it to come from config file, not from user input + hideLocalFlags(cmd, []string{eonModeFlag}) + + return cmd +} + +// setLocalFlags will set the local flags the command has +func (c *CmdStartSubcluster) setLocalFlags(cmd *cobra.Command) { + cmd.Flags().StringVar( + &c.startScOptions.SubclusterToStart, + subclusterFlag, + "", + "Name of subcluster to start", + ) + cmd.Flags().IntVar( + &c.startScOptions.StatePollingTimeout, + "timeout", + util.DefaultTimeoutSeconds, + "The timeout (in seconds) to wait for polling node state operation", + ) +} + +func (c *CmdStartSubcluster) Parse(inputArgv []string, logger vlog.Printer) error { + c.argv = inputArgv + logger.LogMaskedArgParse(c.argv) + + // reset some options that are not included in user input + c.ResetUserInputOptions(&c.startScOptions.DatabaseOptions) + + // start_subcluster only works for an Eon db so we assume the user always runs this subcommand + // on an Eon db. When Eon mode cannot be found in config file, we set its value to true. + if !viper.IsSet(eonModeKey) { + c.startScOptions.IsEon = true + } + return c.validateParse(logger) +} + +func (c *CmdStartSubcluster) validateParse(logger vlog.Printer) error { + logger.Info("Called validateParse()") + err := c.getCertFilesFromCertPaths(&c.startScOptions.DatabaseOptions) + if err != nil { + return err + } + + err = c.ValidateParseBaseOptions(&c.startScOptions.DatabaseOptions) + if err != nil { + return nil + } + return c.setDBPassword(&c.startScOptions.DatabaseOptions) +} + +func (c *CmdStartSubcluster) Analyze(_ vlog.Printer) error { + return nil +} + +func (c *CmdStartSubcluster) Run(vcc vclusterops.ClusterCommands) error { + vcc.V(1).Info("Called method Run()") + + options := c.startScOptions + + err := vcc.VStartSubcluster(options) + if err != nil { + return err + } + + vcc.PrintInfo("Successfully started subcluster %s for database %s", + options.SubclusterToStart, options.DBName) + + return nil +} + +// SetDatabaseOptions will assign a vclusterops.DatabaseOptions instance to the one in CmdStartSubcluster +func (c *CmdStartSubcluster) SetDatabaseOptions(opt *vclusterops.DatabaseOptions) { + c.startScOptions.DatabaseOptions = *opt +} diff --git a/commands/cmd_stop_db.go b/commands/cmd_stop_db.go index 6e5b334..196c387 100644 --- a/commands/cmd_stop_db.go +++ b/commands/cmd_stop_db.go @@ -76,7 +76,8 @@ func (c *CmdStopDB) setLocalFlags(cmd *cobra.Command) { util.DefaultDrainSeconds, util.GetEonFlagMsg("seconds to wait for user connections to close."+ " Default value is "+strconv.Itoa(util.DefaultDrainSeconds)+" seconds."+ - " When the time expires, connections will be forcibly closed and the db will shut down"), + " When the time expires, connections will be forcibly closed and the db will shut down."+ + " Set this to 0 for Eon database, if you want to forcibly stop the database."), ) cmd.Flags().StringVar( &c.stopDBOptions.Sandbox, diff --git a/commands/vcluster_config.go b/commands/vcluster_config.go index e092896..b4c15e7 100644 --- a/commands/vcluster_config.go +++ b/commands/vcluster_config.go @@ -24,7 +24,6 @@ import ( "github.com/spf13/viper" "github.com/vertica/vcluster/vclusterops" "github.com/vertica/vcluster/vclusterops/util" - "github.com/vertica/vcluster/vclusterops/vlog" "gopkg.in/yaml.v3" ) @@ -34,7 +33,6 @@ const ( // default file name that we'll use. defConfigFileName = "vertica_cluster.yaml" currentConfigFileVersion = "1.0" - configBackupName = "vertica_cluster.yaml.backup" configFilePerm = 0600 ) @@ -189,7 +187,7 @@ func loadConfigToViper() error { // writeConfig can write database information to vertica_cluster.yaml. // It will be called in the end of some subcommands that will change the db state. -func writeConfig(vdb *vclusterops.VCoordinationDatabase, logger vlog.Printer) error { +func writeConfig(vdb *vclusterops.VCoordinationDatabase) error { if dbOptions.ConfigPath == "" { return fmt.Errorf("configuration file path is empty") } @@ -199,13 +197,6 @@ func writeConfig(vdb *vclusterops.VCoordinationDatabase, logger vlog.Printer) er return err } - // if the config file exists already, - // create its backup before overwriting it - err = backupConfigFile(dbOptions.ConfigPath, logger) - if err != nil { - return err - } - // update db config with the given database info err = dbConfig.write(dbOptions.ConfigPath) if err != nil { @@ -217,15 +208,10 @@ func writeConfig(vdb *vclusterops.VCoordinationDatabase, logger vlog.Printer) er // removeConfig remove the config file vertica_cluster.yaml. // It will be called in the end of drop_db subcommands. -func removeConfig(logger vlog.Printer) error { +func removeConfig() error { if dbOptions.ConfigPath == "" { return fmt.Errorf("configuration file path is empty") } - // back up the old config file - err := backupConfigFile(dbOptions.ConfigPath, logger) - if err != nil { - return err - } // remove the old db config return os.Remove(dbOptions.ConfigPath) @@ -273,24 +259,6 @@ func readVDBToDBConfig(vdb *vclusterops.VCoordinationDatabase) (DatabaseConfig, return dbConfig, nil } -// backupConfigFile backs up config file before we update it. -// This function will add ".backup" suffix to previous config file. -func backupConfigFile(configFilePath string, logger vlog.Printer) error { - if util.CanReadAccessDir(configFilePath) == nil { - // copy file to vertica_cluster.yaml.backup - configDirPath := filepath.Dir(configFilePath) - configFileBackup := filepath.Join(configDirPath, configBackupName) - logger.Info("Configuration file exists and, creating a backup", "config file", configFilePath, - "backup file", configFileBackup) - err := util.CopyFile(configFilePath, configFileBackup, configFilePerm) - if err != nil { - return err - } - } - - return nil -} - // read reads information from configFilePath to a DatabaseConfig object. // It returns any read error encountered. func readConfig() (dbConfig *DatabaseConfig, err error) { diff --git a/vclusterops/cluster_op.go b/vclusterops/cluster_op.go index 21eba4d..6c562fa 100644 --- a/vclusterops/cluster_op.go +++ b/vclusterops/cluster_op.go @@ -500,6 +500,7 @@ type ClusterCommands interface { VShowRestorePoints(options *VShowRestorePointsOptions) (restorePoints []RestorePoint, err error) VStartDatabase(options *VStartDatabaseOptions) (vdbPtr *VCoordinationDatabase, err error) VStartNodes(options *VStartNodesOptions) error + VStartSubcluster(startScOpt *VStartScOptions) error VStopDatabase(options *VStopDatabaseOptions) error VReplicateDatabase(options *VReplicationDatabaseOptions) error VFetchCoordinationDatabase(options *VFetchCoordinationDatabaseOptions) (VCoordinationDatabase, error) diff --git a/vclusterops/helpers.go b/vclusterops/helpers.go index d454db5..307c4b7 100644 --- a/vclusterops/helpers.go +++ b/vclusterops/helpers.go @@ -174,18 +174,27 @@ func (vcc VClusterCommands) getVDBFromRunningDBImpl(vdb *VCoordinationDatabase, httpsGetNodesInfoOp, err := makeHTTPSGetNodesInfoOp(options.DBName, options.Hosts, options.usePassword, options.UserName, options.Password, vdb, allowUseSandboxRes, sandbox) if err != nil { - return fmt.Errorf("fail to produce httpsGetNodesInfo instructions while retrieving database configurations, %w", err) + return fmt.Errorf("fail to produce httpsGetNodesInfo instruction while retrieving database configurations, %w", err) } httpsGetClusterInfoOp, err := makeHTTPSGetClusterInfoOp(options.DBName, options.Hosts, options.usePassword, options.UserName, options.Password, vdb) if err != nil { - return fmt.Errorf("fail to produce httpsGetClusterInfo instructions while retrieving database configurations, %w", err) + return fmt.Errorf("fail to produce httpsGetClusterInfo instruction while retrieving database configurations, %w", err) } var instructions []clusterOp instructions = append(instructions, &httpsGetNodesInfoOp, &httpsGetClusterInfoOp) + // update node state for sandboxed nodes + if allowUseSandboxRes { + httpsUpdateNodeState, e := makeHTTPSUpdateNodeStateOp(vdb, options.usePassword, options.UserName, options.Password) + if e != nil { + return fmt.Errorf("fail to produce httpsUpdateNodeState instruction while updating node states, %w", e) + } + instructions = append(instructions, &httpsUpdateNodeState) + } + certs := httpsCerts{key: options.Key, cert: options.Cert, caCert: options.CaCert} clusterOpEngine := makeClusterOpEngine(instructions, &certs) err = clusterOpEngine.run(vcc.Log) diff --git a/vclusterops/https_poll_node_state_op.go b/vclusterops/https_poll_node_state_op.go index 3d4e719..6072026 100644 --- a/vclusterops/https_poll_node_state_op.go +++ b/vclusterops/https_poll_node_state_op.go @@ -224,9 +224,9 @@ func (op *httpsPollNodeStateOp) shouldStopPolling() (bool, error) { upNodeCount++ } } else { - // if NMA endpoint cannot function well on any of the hosts, we do not want to retry polling + // if HTTPS endpoint cannot function well on any of the hosts, we do not want to retry polling return true, fmt.Errorf("[%s] expect one node's information, but got %d nodes' information"+ - " from NMA /v1/nodes/{node} endpoint on host %s", + " from HTTPS /v1/nodes/ endpoint on host %s", op.name, len(nodesInformation.NodeList), host) } } diff --git a/vclusterops/https_update_node_state_op.go b/vclusterops/https_update_node_state_op.go new file mode 100644 index 0000000..3523cb6 --- /dev/null +++ b/vclusterops/https_update_node_state_op.go @@ -0,0 +1,136 @@ +/* + (c) Copyright [2023-2024] Open Text. + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package vclusterops + +import ( + "fmt" + + "github.com/vertica/vcluster/vclusterops/util" +) + +type httpsUpdateNodeStateOp struct { + opBase + opHTTPSBase + vdb *VCoordinationDatabase +} + +func makeHTTPSUpdateNodeStateOp(vdb *VCoordinationDatabase, + useHTTPPassword bool, + userName string, + httpsPassword *string, +) (httpsUpdateNodeStateOp, error) { + op := httpsUpdateNodeStateOp{} + op.name = "HTTPSUpdateNodeStateOp" + op.description = "Update node state from running database" + op.hosts = vdb.HostList + op.vdb = vdb + op.useHTTPPassword = useHTTPPassword + + err := util.ValidateUsernameAndPassword(op.name, useHTTPPassword, userName) + if err != nil { + return op, err + } + + op.userName = userName + op.httpsPassword = httpsPassword + return op, nil +} + +func (op *httpsUpdateNodeStateOp) setupClusterHTTPRequest(hosts []string) error { + for _, host := range hosts { + httpRequest := hostHTTPRequest{} + httpRequest.Method = GetMethod + httpRequest.buildHTTPSEndpoint("nodes/" + host) + if op.useHTTPPassword { + httpRequest.Password = op.httpsPassword + httpRequest.Username = op.userName + } + op.clusterHTTPRequest.RequestCollection[host] = httpRequest + } + + return nil +} + +func (op *httpsUpdateNodeStateOp) prepare(execContext *opEngineExecContext) error { + execContext.dispatcher.setup(op.hosts) + + return op.setupClusterHTTPRequest(op.hosts) +} + +func (op *httpsUpdateNodeStateOp) execute(execContext *opEngineExecContext) error { + if err := op.runExecute(execContext); err != nil { + return err + } + + return op.processResult(execContext) +} + +func (op *httpsUpdateNodeStateOp) processResult(execContext *opEngineExecContext) error { + // VER-93706 may update the error handling in this function + for host, result := range op.clusterHTTPRequest.ResultCollection { + op.logResponse(host, result) + + if result.isUnauthorizedRequest() { + op.logger.PrintError("[%s] unauthorized request: %s", op.name, result.content) + execContext.hostsWithWrongAuth = append(execContext.hostsWithWrongAuth, host) + // return here because we assume that + // we will get the same error across other nodes + return result.err + } + + if !result.isPassing() { + // for failed request, we set the host's state to DOWN + // only if its current state is UNKNOWN + vnode, ok := op.vdb.HostNodeMap[host] + if !ok { + return fmt.Errorf("cannot find host %s in vdb", host) + } + if vnode.State == util.NodeUnknownState { + vnode.State = util.NodeDownState + } + + continue + } + + // parse the /nodes/ endpoint response + nodesInformation := nodesInfo{} + err := op.parseAndCheckResponse(host, result.content, &nodesInformation) + if err != nil { + return fmt.Errorf("[%s] fail to parse result on host %s: %w", + op.name, host, err) + } + + if len(nodesInformation.NodeList) == 1 { + nodeInfo := nodesInformation.NodeList[0] + vnode, ok := op.vdb.HostNodeMap[host] + if !ok { + return fmt.Errorf("cannot find host %s in vdb", host) + } + vnode.State = nodeInfo.State + } else { + // if the result format is wrong on any of the hosts, we should throw an error + return fmt.Errorf("[%s] expect one node's information, but got %d nodes' information"+ + " from HTTPS /v1/nodes/ endpoint on host %s", + op.name, len(nodesInformation.NodeList), host) + } + } + + return nil +} + +func (op *httpsUpdateNodeStateOp) finalize(_ *opEngineExecContext) error { + return nil +} diff --git a/vclusterops/start_node.go b/vclusterops/start_node.go index cc63aa3..70897f2 100644 --- a/vclusterops/start_node.go +++ b/vclusterops/start_node.go @@ -37,6 +37,8 @@ type VStartNodesOptions struct { // you may not want to have both the NMA and Vertica server in the same container. // This feature requires version 24.2.0+. StartUpConf string + + vdb *VCoordinationDatabase } type VStartNodesInfo struct { @@ -51,6 +53,8 @@ type VStartNodesInfo struct { // sandbox that we need to get nodes info from // empty string means that we need to get info from main cluster nodes Sandbox string + // this can help decide whether there are nodes down that do not need to re-ip + hasDownNodeNoNeedToReIP bool } func VStartNodesOptionsFactory() VStartNodesOptions { @@ -154,13 +158,17 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { } // retrieve database information to execute the command so we do not always rely on some user input + // if VStartNodes is called from VStartSubcluster, we can reuse the vdb from VStartSubcluster vdb := makeVCoordinationDatabase() - err = vcc.getVDBFromRunningDBIncludeSandbox(&vdb, &options.DatabaseOptions, AnySandbox) - if err != nil { - return err + if options.vdb == nil { + err = vcc.getVDBFromRunningDBIncludeSandbox(&vdb, &options.DatabaseOptions, AnySandbox) + if err != nil { + return err + } + } else { + vdb = *options.vdb } - var hostsNoNeedToReIP []string hostNodeNameMap := make(map[string]string) restartNodeInfo := new(VStartNodesInfo) for _, vnode := range vdb.HostNodeMap { @@ -182,25 +190,19 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { return errors.Join(err, fmt.Errorf("hint: make sure there is at least one UP node in the database")) } - for nodename, newIP := range options.Nodes { - oldIP, ok := hostNodeNameMap[nodename] - if !ok { - // We can get here if the caller requests a node that we were in the - // middle of removing. Log a warning and continue without starting - // that node. - vcc.Log.Info("skipping start of node that doesn't exist in the catalog", - "nodename", nodename, "newIP", newIP) - continue - } - // if the IP that is given is different than the IP in the catalog, a re-ip is necessary - if oldIP != newIP { - restartNodeInfo.ReIPList = append(restartNodeInfo.ReIPList, newIP) - restartNodeInfo.NodeNamesToStart = append(restartNodeInfo.NodeNamesToStart, nodename) - vcc.Log.Info("the nodes need to be re-IP", "nodeNames", restartNodeInfo.NodeNamesToStart, "IPs", restartNodeInfo.ReIPList) - } else { - // otherwise, we don't need to re-ip - hostsNoNeedToReIP = append(hostsNoNeedToReIP, newIP) - } + // find out hosts + // - that need to re-ip, and + // - that don't need to re-ip + hostsNoNeedToReIP := options.separateHostsBasedOnReIPNeed(hostNodeNameMap, restartNodeInfo, &vdb, vcc.Log) + + // for the hosts that don't need to re-ip, + // if none of them is down and no other nodes to re-ip, + // we will early stop as there is no need to start them + if !restartNodeInfo.hasDownNodeNoNeedToReIP && len(restartNodeInfo.ReIPList) == 0 { + const msg = "The provided nodes are either not in catalog or already up. There is nothing to start." + fmt.Println(msg) + vcc.Log.Info(msg) + return nil } // we can proceed to restart both nodes with and without IP changes @@ -210,7 +212,9 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { // If no nodes found to start. We can simply exit here. This can happen if // given a list of nodes that aren't in the catalog any longer. if len(restartNodeInfo.HostsToStart) == 0 { - vcc.Log.Info("None of the nodes provided are in the catalog. There is nothing to start.") + const msg = "None of the nodes provided are in the catalog. There is nothing to start." + fmt.Println(msg) + vcc.Log.Info(msg) return nil } @@ -345,3 +349,37 @@ func (vcc VClusterCommands) produceStartNodesInstructions(startNodeInfo *VStartN return instructions, nil } + +func (options *VStartNodesOptions) separateHostsBasedOnReIPNeed( + hostNodeNameMap map[string]string, + restartNodeInfo *VStartNodesInfo, + vdb *VCoordinationDatabase, + logger vlog.Printer) (hostsNoNeedToReIP []string) { + for nodename, newIP := range options.Nodes { + oldIP, ok := hostNodeNameMap[nodename] + if !ok { + // We can get here if the caller requests a node that we were in the + // middle of removing. Log a warning and continue without starting + // that node. + logger.Info("skipping start of node that doesn't exist in the catalog", + "nodename", nodename, "newIP", newIP) + continue + } + // if the IP that is given is different than the IP in the catalog, a re-ip is necessary + if oldIP != newIP { + restartNodeInfo.ReIPList = append(restartNodeInfo.ReIPList, newIP) + restartNodeInfo.NodeNamesToStart = append(restartNodeInfo.NodeNamesToStart, nodename) + logger.Info("the nodes need to be re-IP", "nodeNames", restartNodeInfo.NodeNamesToStart, "IPs", restartNodeInfo.ReIPList) + } else { + // otherwise, we don't need to re-ip + hostsNoNeedToReIP = append(hostsNoNeedToReIP, newIP) + + vnode, ok := vdb.HostNodeMap[newIP] + if ok && vnode.State == util.NodeDownState { + restartNodeInfo.hasDownNodeNoNeedToReIP = true + } + } + } + + return hostsNoNeedToReIP +} diff --git a/vclusterops/start_subcluster.go b/vclusterops/start_subcluster.go new file mode 100644 index 0000000..bc43fb0 --- /dev/null +++ b/vclusterops/start_subcluster.go @@ -0,0 +1,140 @@ +/* + (c) Copyright [2023-2024] Open Text. + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package vclusterops + +import ( + "fmt" + + "github.com/vertica/vcluster/vclusterops/util" + "github.com/vertica/vcluster/vclusterops/vlog" + "golang.org/x/exp/maps" +) + +// VStartScOptions represents the available options when you start a subcluster +// from a database. +type VStartScOptions struct { + DatabaseOptions + VStartNodesOptions + SubclusterToStart string // subcluster to start +} + +func VStartScOptionsFactory() VStartScOptions { + opt := VStartScOptions{} + // set default values to the params + opt.setDefaultValues() + + return opt +} + +func (o *VStartScOptions) setDefaultValues() { + o.DatabaseOptions.setDefaultValues() + o.VStartNodesOptions.setDefaultValues() +} + +func (o *VStartScOptions) validateRequiredOptions(logger vlog.Printer) error { + err := o.validateBaseOptions("start_subcluster", logger) + if err != nil { + return err + } + + if o.SubclusterToStart == "" { + return fmt.Errorf("must specify a subcluster name") + } + return nil +} + +func (o *VStartScOptions) validateEonOptions() error { + if !o.IsEon { + return fmt.Errorf(`cannot start subcluster from an enterprise database '%s'`, + o.DBName) + } + return nil +} + +func (o *VStartScOptions) validateParseOptions(logger vlog.Printer) error { + err := o.validateRequiredOptions(logger) + if err != nil { + return err + } + + return o.validateEonOptions() +} + +func (o *VStartScOptions) analyzeOptions() (err error) { + // we analyze host names when it is set in user input, otherwise we use hosts in yaml config + if len(o.RawHosts) > 0 { + // resolve RawHosts to be IP addresses + o.Hosts, err = util.ResolveRawHostsToAddresses(o.RawHosts, o.IPv6) + if err != nil { + return err + } + o.normalizePaths() + } + return nil +} + +func (o *VStartScOptions) validateAnalyzeOptions(logger vlog.Printer) error { + if err := o.validateParseOptions(logger); err != nil { + return err + } + err := o.analyzeOptions() + if err != nil { + return err + } + return o.setUsePassword(logger) +} + +// VStartSubcluster start nodes in a subcluster. It returns any error encountered. +// VStartSubcluster has two major phases: +// 1. Pre-check: check the subcluster name and get nodes for the subcluster. +// 2. Start nodes: Optional. If there are any down nodes in the subcluster, runs VStartNodes. +func (vcc VClusterCommands) VStartSubcluster(options *VStartScOptions) error { + err := options.validateAnalyzeOptions(vcc.Log) + if err != nil { + return err + } + + // retrieve database information to execute the command so we do not always rely on some user input + vdb := makeVCoordinationDatabase() + err = vcc.getVDBFromRunningDBIncludeSandbox(&vdb, &options.DatabaseOptions, AnySandbox) + if err != nil { + return err + } + + // node name to host address map + nodesToStart := make(map[string]string) + + // collect down nodes to start in the target subcluster + for _, vnode := range vdb.HostNodeMap { + if vnode.Subcluster == options.SubclusterToStart && vnode.State == util.NodeDownState { + nodesToStart[vnode.Name] = vnode.Address + } + } + + if len(nodesToStart) == 0 { + return fmt.Errorf("cannot find down node to start in subcluster %s", + options.SubclusterToStart) + } + + var startNodesOptions VStartNodesOptions + startNodesOptions.Nodes = nodesToStart + startNodesOptions.DatabaseOptions = options.DatabaseOptions + startNodesOptions.StatePollingTimeout = options.StatePollingTimeout + startNodesOptions.vdb = &vdb + + fmt.Printf("Starting nodes %v in subcluster %s\n", maps.Keys(nodesToStart), options.SubclusterToStart) + return vcc.VStartNodes(&startNodesOptions) +} diff --git a/vclusterops/util/defaults.go b/vclusterops/util/defaults.go index 992a116..da08508 100644 --- a/vclusterops/util/defaults.go +++ b/vclusterops/util/defaults.go @@ -41,6 +41,7 @@ const ( DefaultControlSetSize = -1 NodeUpState = "UP" NodeDownState = "DOWN" + NodeUnknownState = "UNKNOWN" // this is for sandbox only SuppressHelp = "SUPPRESS_HELP" MainClusterSandbox = "" )