From fe6d9f15b9adb60842460c3aa5d59763c0bc87d8 Mon Sep 17 00:00:00 2001 From: Matt Spilchen Date: Sat, 3 Feb 2024 07:54:52 -0400 Subject: [PATCH] Sync from server repo (e7914b99bf1) --- commands/cmd_scrutinize.go | 56 ++++++++++++++--------- vclusterops/helpers.go | 17 +++++++ vclusterops/nma_check_system_tables_op.go | 24 +++++++--- vclusterops/nma_get_scrutinize_tar_op.go | 14 +++--- vclusterops/nma_stage_system_tables_op.go | 24 +++++++--- vclusterops/revive_db.go | 2 +- vclusterops/scrutinize.go | 44 ++++++++++++++++-- 7 files changed, 132 insertions(+), 49 deletions(-) diff --git a/commands/cmd_scrutinize.go b/commands/cmd_scrutinize.go index 6058ef4..b0c26ba 100644 --- a/commands/cmd_scrutinize.go +++ b/commands/cmd_scrutinize.go @@ -98,6 +98,10 @@ func makeCmdScrutinize() *CmdScrutinize { newCmd.ipv6 = newCmd.parser.Bool("ipv6", false, util.GetOptionalFlagMsg("Scrutinize database with IPv6 hosts")) + // this argument is parsed separately by the cluster command launcher to initialize the logger + newCmd.sOptions.LogPath = newCmd.parser.String("log-path", defaultLogPath, + util.GetOptionalFlagMsg("File path of the vcluster scrutinize log")) + return newCmd } @@ -149,29 +153,12 @@ func (c *CmdScrutinize) Analyze(logger vlog.Printer) error { return err } - var allErrs error - port, found := os.LookupEnv(kubernetesPort) - if found && port != "" && *c.sOptions.HonorUserInput { - logger.Info(kubernetesPort, " is set, k8s environment detected", found) - dbName, found := os.LookupEnv(databaseName) - if !found || dbName == "" { - allErrs = errors.Join(allErrs, fmt.Errorf("unable to get database name from environment variable. ")) - } else { - c.sOptions.DBName = &dbName - logger.Info("Setting database name from env as", "DBName", *c.sOptions.DBName) - } - - catPrefix, found := os.LookupEnv(catalogPathPref) - if !found || catPrefix == "" { - allErrs = errors.Join(allErrs, fmt.Errorf("unable to get catalog path from environment variable. ")) - } else { - c.sOptions.CatalogPrefix = &catPrefix - logger.Info("Setting catalog path from env as", "CatalogPrefix", *c.sOptions.CatalogPrefix) - } - if allErrs != nil { - return allErrs - } + // get extra options direct from env variables + err = c.readOptionsFromK8sEnv(logger) + if err != nil { + return err } + return nil } @@ -332,6 +319,31 @@ func (c *CmdScrutinize) nmaCertLookupFromEnv(logger vlog.Printer) (bool, error) return true, nil } +// readOptionsFromK8sEnv picks up the catalog path and dbname from the environment when on k8s +// which otherwise would need to be set at the command line or read from a config file. +func (c *CmdScrutinize) readOptionsFromK8sEnv(logger vlog.Printer) (allErrs error) { + port, found := os.LookupEnv(kubernetesPort) + if found && port != "" && *c.sOptions.HonorUserInput { + logger.Info(kubernetesPort, " is set, k8s environment detected", found) + dbName, found := os.LookupEnv(databaseName) + if !found || dbName == "" { + allErrs = errors.Join(allErrs, fmt.Errorf("unable to get database name from environment variable. ")) + } else { + c.sOptions.DBName = &dbName + logger.Info("Setting database name from env as", "DBName", *c.sOptions.DBName) + } + + catPrefix, found := os.LookupEnv(catalogPathPref) + if !found || catPrefix == "" { + allErrs = errors.Join(allErrs, fmt.Errorf("unable to get catalog path from environment variable. ")) + } else { + c.sOptions.CatalogPrefix = &catPrefix + logger.Info("Setting catalog path from env as", "CatalogPrefix", *c.sOptions.CatalogPrefix) + } + } + return +} + // readNonEmptyFile is a helper that reads the contents of a file into a string. // It returns an error if the file is empty. func readNonEmptyFile(filename string) (string, error) { diff --git a/vclusterops/helpers.go b/vclusterops/helpers.go index 4f3d0f6..47e5655 100644 --- a/vclusterops/helpers.go +++ b/vclusterops/helpers.go @@ -180,6 +180,23 @@ func getInitiator(hosts []string) string { return hosts[0] } +// getInitiator will pick an initiator from the up host list to execute https calls +// such that the initiator is also among the user provided host list +func getInitiatorFromUpHosts(upHosts, userProvidedHosts []string) string { + // Create a hash set for user-provided hosts + userHostsSet := mapset.NewSet[string](userProvidedHosts...) + + // Iterate through upHosts and check if any host is in the userHostsSet + for _, upHost := range upHosts { + if userHostsSet.Contains(upHost) { + return upHost + } + } + + // Return an empty string if no matching host is found + return "" +} + func cannotFindDBFromConfigErr(dbName string) error { return fmt.Errorf("database %s cannot be found in the config file", dbName) } diff --git a/vclusterops/nma_check_system_tables_op.go b/vclusterops/nma_check_system_tables_op.go index 232799b..b87203c 100644 --- a/vclusterops/nma_check_system_tables_op.go +++ b/vclusterops/nma_check_system_tables_op.go @@ -32,11 +32,13 @@ type checkSystemTablesResponseData struct { func makeNMACheckSystemTablesOp(logger vlog.Printer, id string, - hostNodeNameMap map[string]string) nmaCheckSystemTablesOp { + hostNodeNameMap map[string]string, + hosts []string) (nmaCheckSystemTablesOp, error) { // base members op := nmaCheckSystemTablesOp{} op.name = "NMACheckSystemTablesOp" op.logger = logger.WithName(op.name) + op.hosts = hosts // scrutinize members op.id = id @@ -45,7 +47,13 @@ func makeNMACheckSystemTablesOp(logger vlog.Printer, op.httpMethod = GetMethod op.urlSuffix = "/vs-status" - return op + // the caller is responsible for making sure hosts and maps match up exactly + err := validateHostMaps(hosts, hostNodeNameMap) + if err != nil { + return op, err + } + + return op, nil } func (op *nmaCheckSystemTablesOp) getPollingTimeout() int { @@ -60,14 +68,16 @@ func (op *nmaCheckSystemTablesOp) prepare(execContext *opEngineExecContext) erro op.skipExecute = true return nil } - host := getInitiator(execContext.upHosts) + + host := getInitiatorFromUpHosts(execContext.upHosts, op.hosts) + if host == "" { + op.logger.PrintWarning("no up hosts among user specified hosts to collect system tables from, skipping the operation") + op.skipExecute = true + return nil + } // construct host list for interface purposes op.hosts = []string{host} - err := validateHostMaps(op.hosts, op.hostNodeNameMap) - if err != nil { - return err - } // prepare GET request with no params or body execContext.dispatcher.setup(op.hosts) diff --git a/vclusterops/nma_get_scrutinize_tar_op.go b/vclusterops/nma_get_scrutinize_tar_op.go index 31d4b86..1ff59ad 100644 --- a/vclusterops/nma_get_scrutinize_tar_op.go +++ b/vclusterops/nma_get_scrutinize_tar_op.go @@ -89,15 +89,15 @@ func (op *nmaGetScrutinizeTarOp) prepare(execContext *opEngineExecContext) error op.skipExecute = true return nil } - host := getInitiator(execContext.upHosts) - op.hosts = []string{host} - // the initiator host should have been in the original host list, and already - // validated, but let's not assume - err := validateHostMaps(op.hosts, op.hostNodeNameMap) - if err != nil { - return err + host := getInitiatorFromUpHosts(execContext.upHosts, op.hosts) + if host == "" { + op.logger.PrintWarning("no up hosts among user specified hosts to collect system tables from, skipping the operation") + op.skipExecute = true + return nil } + + op.hosts = []string{host} } hostToFilePathsMap := map[string]string{} diff --git a/vclusterops/nma_stage_system_tables_op.go b/vclusterops/nma_stage_system_tables_op.go index df55ae3..16856bd 100644 --- a/vclusterops/nma_stage_system_tables_op.go +++ b/vclusterops/nma_stage_system_tables_op.go @@ -37,11 +37,13 @@ type stageSystemTablesRequestData struct { func makeNMAStageSystemTablesOp(logger vlog.Printer, id, username string, password *string, - hostNodeNameMap map[string]string) nmaStageSystemTablesOp { + hostNodeNameMap map[string]string, + hosts []string) (nmaStageSystemTablesOp, error) { // base members op := nmaStageSystemTablesOp{} op.name = "NMAStageSystemTablesOp" op.logger = logger.WithName(op.name) + op.hosts = hosts // scrutinize members op.id = id @@ -54,7 +56,13 @@ func makeNMAStageSystemTablesOp(logger vlog.Printer, op.username = username op.password = password - return op + // the caller is responsible for making sure hosts and maps match up exactly + err := validateHostMaps(hosts, hostNodeNameMap) + if err != nil { + return op, err + } + + return op, nil } func (op *nmaStageSystemTablesOp) setupRequestBody(host string) error { @@ -83,7 +91,13 @@ func (op *nmaStageSystemTablesOp) prepare(execContext *opEngineExecContext) erro op.skipExecute = true return nil } - host := getInitiator(execContext.upHosts) + + host := getInitiatorFromUpHosts(execContext.upHosts, op.hosts) + if host == "" { + op.logger.PrintWarning("no up hosts among user specified hosts to collect system tables from, skipping the operation") + op.skipExecute = true + return nil + } err := op.setupRequestBody(host) if err != nil { @@ -92,10 +106,6 @@ func (op *nmaStageSystemTablesOp) prepare(execContext *opEngineExecContext) erro // construct host list for interface purposes op.hosts = []string{host} - err = validateHostMaps(op.hosts, op.hostNodeNameMap) - if err != nil { - return err - } execContext.dispatcher.setup(op.hosts) return op.setupClusterHTTPRequest(op.hosts) diff --git a/vclusterops/revive_db.go b/vclusterops/revive_db.go index 8a442e3..ff3827d 100644 --- a/vclusterops/revive_db.go +++ b/vclusterops/revive_db.go @@ -104,7 +104,7 @@ func (e *ReviveDBRestorePointNotFoundError) Error() string { indicator = "index" value = fmt.Sprintf("%d", e.InvalidIndex) } - return fmt.Sprintf("restore point with %s %s not found in archive %s", indicator, value, e.Archive) + return fmt.Sprintf("restore point with %s %s not found in archive %q", indicator, value, e.Archive) } func VReviveDBOptionsFactory() VReviveDatabaseOptions { diff --git a/vclusterops/scrutinize.go b/vclusterops/scrutinize.go index 91db09c..bee0112 100644 --- a/vclusterops/scrutinize.go +++ b/vclusterops/scrutinize.go @@ -29,9 +29,10 @@ import ( // const to sync cmd, options parsing, and this const VScrutinizeTypeName = "scrutinize" -// folders used by scrutinize +// files and folders used by scrutinize const scrutinizeOutputBasePath = "/tmp/scrutinize" const scrutinizeRemoteOutputPath = scrutinizeOutputBasePath + "/remote" +const scrutinizeLogFileName = "vcluster.log" // these could be replaced with options later const scrutinizeLogAgeHours = 24 // copy archived logs produced in recent 24 hours @@ -168,6 +169,9 @@ func (vcc *VClusterCommands) VScrutinize(options *VScrutinizeOptions) error { return err } + // add vcluster log to output + options.stageVclusterLog(options.ID, vcc.Log) + // tar all results if err = tarAndRemoveDirectory(options.ID, vcc.Log); err != nil { vcc.Log.Error(err, "failed to create final scrutinize output tarball") @@ -177,8 +181,32 @@ func (vcc *VClusterCommands) VScrutinize(options *VScrutinizeOptions) error { return nil } +// stageVclusterLog attempts to copy the vcluster log to the scrutinize tarball, as +// that will contain log entries for this scrutinize run. Any failure shouldn't +// abort scrutinize, so just prints a warning. +func (options *VScrutinizeOptions) stageVclusterLog(id string, log vlog.Printer) { + // if using vcluster command line, the log path will always be set + if options.LogPath == nil { + log.PrintWarning("Path to scrutinize log not provided. " + + "The log for this scrutinize run will not be included.") + return + } + + destPath := fmt.Sprintf("%s/%s/%s", scrutinizeRemoteOutputPath, id, scrutinizeLogFileName) + sourcePath := *options.LogPath + + // copy the log instead of symlinking to avoid issues with tar + log.Info("Copying scrutinize log", "source", sourcePath, "dest", destPath) + const logFilePerms = 0700 + err := util.CopyFile(sourcePath, destPath, logFilePerms) + if err != nil { + log.PrintWarning("Unable to copy scrutinize log: %s", err.Error()) + } +} + +// tarAndRemoveDirectory packages the final scrutinize output. func tarAndRemoveDirectory(id string, log vlog.Printer) (err error) { - tarballPath := "/tmp/scrutinize/" + id + ".tar" + tarballPath := scrutinizeOutputBasePath + "/" + id + ".tar" cmd := exec.Command("tar", "cf", tarballPath, "-C", "/tmp/scrutinize/remote", id) log.Info("running command %s with args %v", cmd.Path, cmd.Args) if err = cmd.Run(); err != nil { @@ -259,8 +287,11 @@ func (vcc *VClusterCommands) produceScrutinizeInstructions(options *VScrutinizeO instructions = append(instructions, &getUpNodesOp) // Initiate system table staging early as it may take significantly longer than other ops - stageSystemTablesOp := makeNMAStageSystemTablesOp(vcc.Log, options.ID, *options.UserName, - options.Password, hostNodeNameMap) + stageSystemTablesOp, err := makeNMAStageSystemTablesOp(vcc.Log, options.ID, *options.UserName, + options.Password, hostNodeNameMap, options.Hosts) + if err != nil { + return nil, err + } instructions = append(instructions, &stageSystemTablesOp) // stage Vertica logs @@ -306,7 +337,10 @@ func (vcc *VClusterCommands) produceScrutinizeInstructions(options *VScrutinizeO instructions = append(instructions, &getContextTarballOp) // check for system tables staging completion before continuing - checkSystemTablesOp := makeNMACheckSystemTablesOp(vcc.Log, options.ID, hostNodeNameMap) + checkSystemTablesOp, err := makeNMACheckSystemTablesOp(vcc.Log, options.ID, hostNodeNameMap, options.Hosts) + if err != nil { + return nil, err + } instructions = append(instructions, &checkSystemTablesOp) // get 'system_tables' batch tarball last, as staging systables can take a long time