From ef75c94b457fb3dd5df538a43da6902482cfc900 Mon Sep 17 00:00:00 2001 From: Kirill Maksimov Date: Thu, 5 Nov 2020 10:46:06 +0200 Subject: [PATCH 1/8] Enable additional check for orchestrator options in node config --- boyar/config/config_test.go | 2 +- boyar/config/get_configuration.go | 2 +- boyar/config/get_configuration_test.go | 18 ++++++++++++++++++ boyar/config/node_configuration.go | 20 ++++++++++++-------- boyar/config/parse_string_config_test.go | 13 +++++++++++++ boyar/config/reload_time_delay_test.go | 4 ++-- boyar/config/url_configuration_source.go | 4 ++++ services/report_status.go | 2 +- strelets/adapter/purge_data_test.go | 4 ++-- strelets/adapter/swarm.go | 9 ++++++--- strelets/adapter/swarm_get_status_test.go | 6 +++--- strelets/adapter/swarm_volumes.go | 6 +++--- strelets/adapter/swarm_volumes_test.go | 10 +++++----- test/e2e/assert.go | 4 ++-- 14 files changed, 73 insertions(+), 31 deletions(-) diff --git a/boyar/config/config_test.go b/boyar/config/config_test.go index acaecd11..06dcd035 100644 --- a/boyar/config/config_test.go +++ b/boyar/config/config_test.go @@ -53,7 +53,7 @@ func Test_StringConfigurationSource(t *testing.T) { } func Test_StringConfigurationSourceFromEmptyConfig(t *testing.T) { - cfg, err := NewStringConfigurationSource("{}", "", fakeKeyPair, false) + cfg, err := NewStringConfigurationSource(`{"orchestrator":{}}`, "", fakeKeyPair, false) require.NoError(t, err) require.NotEmpty(t, cfg.Hash()) diff --git a/boyar/config/get_configuration.go b/boyar/config/get_configuration.go index 6ff30c92..f31a635c 100644 --- a/boyar/config/get_configuration.go +++ b/boyar/config/get_configuration.go @@ -21,7 +21,7 @@ func GetConfiguration(flags *Flags) (NodeConfiguration, error) { return nil, err } - config.SetOrchestratorOptions(orchestratorOptions) + config.SetOrchestratorOptions(&orchestratorOptions) } if err := config.VerifyConfig(); err != nil { diff --git a/boyar/config/get_configuration_test.go b/boyar/config/get_configuration_test.go index ab883537..35675493 100644 --- a/boyar/config/get_configuration_test.go +++ b/boyar/config/get_configuration_test.go @@ -1,7 +1,9 @@ package config import ( + "github.com/orbs-network/boyarin/test/helpers" "github.com/stretchr/testify/require" + "net/http" "testing" ) @@ -18,3 +20,19 @@ func Test_getOrchestratorOptions(t *testing.T) { require.NotEmpty(t, "amazing-custom-driver", options.StorageDriver) require.Equal(t, "932", options.StorageOptions["size"]) } + +func Test_VerifyConfigWithCorruptConfig(t *testing.T) { + server := helpers.CreateHttpServer("/", 0, func(writer http.ResponseWriter, request *http.Request) { + writer.Write([]byte("{}")) + }) + server.Start() + defer server.Shutdown() + + source, err := GetConfiguration(&Flags{ + ConfigUrl: server.Url(), // FIXME + KeyPairConfigPath: fakeKeyPair, + }) + + require.EqualError(t, err, "config verification failed: config is missing orchestrator options") + require.Nil(t, source) +} diff --git a/boyar/config/node_configuration.go b/boyar/config/node_configuration.go index 714096d0..f1ecc77e 100644 --- a/boyar/config/node_configuration.go +++ b/boyar/config/node_configuration.go @@ -11,7 +11,7 @@ import ( type NodeConfiguration interface { FederationNodes() []*FederationNode Chains() []*VirtualChain - OrchestratorOptions() adapter.OrchestratorOptions + OrchestratorOptions() *adapter.OrchestratorOptions KeyConfigPath() string KeyConfig() KeyConfig ReloadTimeDelay(maxDelay time.Duration) time.Duration @@ -30,16 +30,16 @@ type MutableNodeConfiguration interface { NodeConfiguration SetEthereumEndpoint(ethereumEndpoint string) MutableNodeConfiguration - SetOrchestratorOptions(options adapter.OrchestratorOptions) MutableNodeConfiguration + SetOrchestratorOptions(options *adapter.OrchestratorOptions) MutableNodeConfiguration SetSSLOptions(options adapter.SSLOptions) MutableNodeConfiguration UpdateDefaultServiceConfig() MutableNodeConfiguration } type nodeConfiguration struct { - Chains []*VirtualChain `json:"chains"` - FederationNodes []*FederationNode `json:"network"` - OrchestratorOptions adapter.OrchestratorOptions `json:"orchestrator"` - Services Services `json:"services"` + Chains []*VirtualChain `json:"chains"` + FederationNodes []*FederationNode `json:"network"` + OrchestratorOptions *adapter.OrchestratorOptions `json:"orchestrator"` + Services Services `json:"services"` } type nodeConfigurationContainer struct { @@ -71,7 +71,7 @@ func (c *nodeConfigurationContainer) KeyConfigPath() string { return c.keyConfigPath } -func (c *nodeConfigurationContainer) OrchestratorOptions() adapter.OrchestratorOptions { +func (c *nodeConfigurationContainer) OrchestratorOptions() *adapter.OrchestratorOptions { return c.value.OrchestratorOptions } @@ -86,6 +86,10 @@ func (c *nodeConfigurationContainer) VerifyConfig() error { return err } + if c.OrchestratorOptions() == nil { + return fmt.Errorf("config is missing orchestrator options") + } + return nil } @@ -107,7 +111,7 @@ func (c *nodeConfigurationContainer) SetEthereumEndpoint(ethereumEndpoint string return c } -func (c *nodeConfigurationContainer) SetOrchestratorOptions(options adapter.OrchestratorOptions) MutableNodeConfiguration { +func (c *nodeConfigurationContainer) SetOrchestratorOptions(options *adapter.OrchestratorOptions) MutableNodeConfiguration { c.value.OrchestratorOptions = options return c } diff --git a/boyar/config/parse_string_config_test.go b/boyar/config/parse_string_config_test.go index 82870a49..7d31152d 100644 --- a/boyar/config/parse_string_config_test.go +++ b/boyar/config/parse_string_config_test.go @@ -63,3 +63,16 @@ func TestNewUrlConfigurationSource(t *testing.T) { require.NoError(t, err) verifySource(t, source) } + +func TestNewUrlConfigurationSourceWithFaultyConfig(t *testing.T) { + server := helpers.CreateHttpServer("/", 0, func(writer http.ResponseWriter, request *http.Request) { + writer.WriteHeader(http.StatusInternalServerError) + }) + server.Start() + defer server.Shutdown() + + source, err := NewUrlConfigurationSource(server.Url(), "", fakeKeyPair, false) + + require.EqualError(t, err, "management config url returned with status 500 Internal Server Error") + require.Nil(t, source) +} diff --git a/boyar/config/reload_time_delay_test.go b/boyar/config/reload_time_delay_test.go index 4ee5ab30..0d8ad47f 100644 --- a/boyar/config/reload_time_delay_test.go +++ b/boyar/config/reload_time_delay_test.go @@ -7,7 +7,7 @@ import ( ) func TestNodeConfigurationContainer_ReloadTimeDelay(t *testing.T) { - source, err := NewStringConfigurationSource("{}", "", fakeKeyPair, false) + source, err := NewStringConfigurationSource(`{"orchestrator": {}}`, "", fakeKeyPair, false) require.NoError(t, err) reloadTimeDelay := source.ReloadTimeDelay(15 * time.Minute) @@ -19,7 +19,7 @@ func TestNodeConfigurationContainer_ReloadTimeDelay(t *testing.T) { } func TestNodeConfigurationContainer_ReloadTimeDelayWithNoDelay(t *testing.T) { - source, err := NewStringConfigurationSource("{}", "", fakeKeyPair, false) + source, err := NewStringConfigurationSource(`{"orchestrator": {}}`, "", fakeKeyPair, false) require.NoError(t, err) reloadTimeDelay := source.ReloadTimeDelay(0) diff --git a/boyar/config/url_configuration_source.go b/boyar/config/url_configuration_source.go index 4f6fc94c..4a7d1885 100644 --- a/boyar/config/url_configuration_source.go +++ b/boyar/config/url_configuration_source.go @@ -14,6 +14,10 @@ func NewUrlConfigurationSource(url string, ethereumEndpoint string, keyConfigPat defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("management config url returned with status %s", resp.Status) + } + input, err := ioutil.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("could not read configuration from source: %s", err) diff --git a/services/report_status.go b/services/report_status.go index 5b0c343e..4fd0a17d 100644 --- a/services/report_status.go +++ b/services/report_status.go @@ -76,7 +76,7 @@ func statusResponseWithError(flags *config.Flags, err error) StatusResponse { func GetStatusAndMetrics(ctx context.Context, logger log.Logger, flags *config.Flags, startupTimestamp time.Time, dockerStatusPeriod time.Duration) (status StatusResponse, metrics Metrics) { // We really don't need any options here since we're just observing - orchestrator, err := adapter.NewDockerSwarm(adapter.OrchestratorOptions{}, logger) + orchestrator, err := adapter.NewDockerSwarm(&adapter.OrchestratorOptions{}, logger) if err != nil { status = statusResponseWithError(flags, err) } else { diff --git a/strelets/adapter/purge_data_test.go b/strelets/adapter/purge_data_test.go index 11fbe61b..d86c1883 100644 --- a/strelets/adapter/purge_data_test.go +++ b/strelets/adapter/purge_data_test.go @@ -49,7 +49,7 @@ func TestPurgeServiceData(t *testing.T) { logger := log.GetLogger() orchestrator := &dockerSwarmOrchestrator{ client: helpers.DockerClient(t), - options: OrchestratorOptions{ + options: &OrchestratorOptions{ StorageDriver: LOCAL_DRIVER, StorageMountType: "bind", }, @@ -80,7 +80,7 @@ func TestPurgeVirtualChainData(t *testing.T) { logger := log.GetLogger() orchestrator := &dockerSwarmOrchestrator{ client: helpers.DockerClient(t), - options: OrchestratorOptions{ + options: &OrchestratorOptions{ StorageDriver: LOCAL_DRIVER, StorageMountType: "bind", }, diff --git a/strelets/adapter/swarm.go b/strelets/adapter/swarm.go index 91804751..39593244 100644 --- a/strelets/adapter/swarm.go +++ b/strelets/adapter/swarm.go @@ -14,7 +14,7 @@ import ( type dockerSwarmOrchestrator struct { client *client.Client - options OrchestratorOptions + options *OrchestratorOptions logger log.Logger } @@ -31,9 +31,12 @@ type dockerSwarmNginxSecretsConfig struct { sslPrivateKeyId string } -func NewDockerSwarm(options OrchestratorOptions, logger log.Logger) (Orchestrator, error) { - client, err := client.NewClientWithOpts(client.WithVersion(DOCKER_API_VERSION)) +func NewDockerSwarm(options *OrchestratorOptions, logger log.Logger) (Orchestrator, error) { + if options == nil { + return nil, fmt.Errorf("orchestration options are empty, can't instantiate Docker Swarm orchestrator") + } + client, err := client.NewClientWithOpts(client.WithVersion(DOCKER_API_VERSION)) if err != nil { return nil, err } diff --git a/strelets/adapter/swarm_get_status_test.go b/strelets/adapter/swarm_get_status_test.go index d045fd2f..bf73c983 100644 --- a/strelets/adapter/swarm_get_status_test.go +++ b/strelets/adapter/swarm_get_status_test.go @@ -24,7 +24,7 @@ func TestDockerSwarm_GetStatusIfUnableToStart(t *testing.T) { serviceId := startDefunctContainer(t) defer destroyDefunctContainer(t, serviceId) - swarm, err := NewDockerSwarm(OrchestratorOptions{}, log.GetLogger()) + swarm, err := NewDockerSwarm(&OrchestratorOptions{}, log.GetLogger()) require.NoError(t, err) require.True(t, helpers.Eventually(30*time.Second, func() bool { @@ -50,7 +50,7 @@ func TestDockerSwarm_GetStatusIfExitsImmediately(t *testing.T) { serviceId := startReloadingContainer(t) defer destroyDefunctContainer(t, serviceId) - swarm, err := NewDockerSwarm(OrchestratorOptions{}, log.GetLogger()) + swarm, err := NewDockerSwarm(&OrchestratorOptions{}, log.GetLogger()) require.NoError(t, err) require.True(t, helpers.Eventually(30*time.Second, func() bool { @@ -76,7 +76,7 @@ func TestDockerSwarm_GetStatusOfUnhealthyContainer(t *testing.T) { serviceId := startUnhealthyContainer(t) defer destroyDefunctContainer(t, serviceId) - swarm, err := NewDockerSwarm(OrchestratorOptions{}, log.GetLogger()) + swarm, err := NewDockerSwarm(&OrchestratorOptions{}, log.GetLogger()) require.NoError(t, err) require.True(t, helpers.Eventually(30*time.Second, func() bool { diff --git a/strelets/adapter/swarm_volumes.go b/strelets/adapter/swarm_volumes.go index c5ad9ebb..9d58ad76 100644 --- a/strelets/adapter/swarm_volumes.go +++ b/strelets/adapter/swarm_volumes.go @@ -44,7 +44,7 @@ func (d *dockerSwarmOrchestrator) provisionCacheVolume(ctx context.Context, serv return d.provisionVolume(ctx, getServiceVolumeName(serviceName, "cache"), ORBS_CACHE_TARGET, d.options) } -func (d *dockerSwarmOrchestrator) provisionVolume(ctx context.Context, volumeName string, target string, orchestratorOptions OrchestratorOptions) (mount.Mount, error) { +func (d *dockerSwarmOrchestrator) provisionVolume(ctx context.Context, volumeName string, target string, orchestratorOptions *OrchestratorOptions) (mount.Mount, error) { if orchestratorOptions.StorageDriver == REXRAY_EBS_DRIVER { return mount.Mount{}, errors.Errorf("%s storage driver is no longer supported, please consult how to enable EFS instead", REXRAY_EBS_DRIVER) } @@ -71,7 +71,7 @@ func (d *dockerSwarmOrchestrator) provisionVolume(ctx context.Context, volumeNam }, nil } -func getVolumeOptions(orchestratorOptions OrchestratorOptions, driverName string, driverOptions map[string]string) *mount.VolumeOptions { +func getVolumeOptions(orchestratorOptions *OrchestratorOptions, driverName string, driverOptions map[string]string) *mount.VolumeOptions { switch orchestratorOptions.MountType() { case mount.TypeVolume: return &mount.VolumeOptions{ @@ -85,7 +85,7 @@ func getVolumeOptions(orchestratorOptions OrchestratorOptions, driverName string return nil } -func getVolumeDriverOptions(volumeName string, orchestratorOptions OrchestratorOptions) (string, map[string]string) { +func getVolumeDriverOptions(volumeName string, orchestratorOptions *OrchestratorOptions) (string, map[string]string) { driverOptions := make(map[string]string) for k, v := range orchestratorOptions.StorageOptions { driverOptions[k] = v diff --git a/strelets/adapter/swarm_volumes_test.go b/strelets/adapter/swarm_volumes_test.go index c9ca361c..4066005d 100644 --- a/strelets/adapter/swarm_volumes_test.go +++ b/strelets/adapter/swarm_volumes_test.go @@ -6,7 +6,7 @@ import ( ) func TestDockerSwarm_getVolumeDriverOptionsDefaults(t *testing.T) { - orchestratorOptions := OrchestratorOptions{} + orchestratorOptions := &OrchestratorOptions{} _, options := getVolumeDriverOptions("myVolume", orchestratorOptions) require.Empty(t, options) @@ -17,7 +17,7 @@ func TestDockerSwarm_getVolumeDriverOptionsWithStorageOptions(t *testing.T) { storageOptions["artist"] = "Iggy Pop" storageOptions["song"] = "Passenger" - orchestratorOptions := OrchestratorOptions{ + orchestratorOptions := &OrchestratorOptions{ StorageOptions: storageOptions, } _, options := getVolumeDriverOptions("myVolume", orchestratorOptions) @@ -28,7 +28,7 @@ func TestDockerSwarm_getVolumeDriverOptionsWithStorageOptions(t *testing.T) { } func TestDockerSwarm_getVolumeDriverOptionsWithRexray(t *testing.T) { - orchestratorOptions := OrchestratorOptions{ + orchestratorOptions := &OrchestratorOptions{ StorageDriver: "rexray/ebs", } _, options := getVolumeDriverOptions("myVolume", orchestratorOptions) @@ -40,7 +40,7 @@ func TestDockerSwarm_getVolumeDriverWithLocalNFS(t *testing.T) { storageOptions := make(map[string]string) storageOptions["type"] = "nfs" - orchestratorOptions := OrchestratorOptions{ + orchestratorOptions := &OrchestratorOptions{ StorageOptions: storageOptions, } _, options := getVolumeDriverOptions("myVolume", orchestratorOptions) @@ -54,7 +54,7 @@ func TestDockerSwarm_getVolumeDriverWithBindMounts(t *testing.T) { storageOptions := make(map[string]string) storageOptions["type"] = "nfs" - orchestratorOptions := OrchestratorOptions{ + orchestratorOptions := &OrchestratorOptions{ StorageOptions: storageOptions, StorageMountType: "bind", } diff --git a/test/e2e/assert.go b/test/e2e/assert.go index 4be8e710..f4ea9137 100644 --- a/test/e2e/assert.go +++ b/test/e2e/assert.go @@ -26,7 +26,7 @@ func AssertGossipServer(t helpers.TestingT, vc VChainArgument) { } func AssertServiceUp(t helpers.TestingT, ctx context.Context, serviceName string) { - orchestrator, err := adapter.NewDockerSwarm(adapter.OrchestratorOptions{}, helpers.DefaultTestLogger()) + orchestrator, err := adapter.NewDockerSwarm(&adapter.OrchestratorOptions{}, helpers.DefaultTestLogger()) require.NoError(t, err) statuses, err := orchestrator.GetStatus(ctx, 1*time.Second) @@ -57,7 +57,7 @@ func AssertVolumeExists(t helpers.TestingT, ctx context.Context, volume string) } func AssertServiceDown(t helpers.TestingT, ctx context.Context, serviceName string) { - orchestrator, err := adapter.NewDockerSwarm(adapter.OrchestratorOptions{}, helpers.DefaultTestLogger()) + orchestrator, err := adapter.NewDockerSwarm(&adapter.OrchestratorOptions{}, helpers.DefaultTestLogger()) require.NoError(t, err) statuses, err := orchestrator.GetStatus(ctx, 1*time.Second) From c72b3db995bcf2f99f2edf5b349aebd5ba677d82 Mon Sep 17 00:00:00 2001 From: Kirill Maksimov Date: Thu, 5 Nov 2020 11:14:45 +0200 Subject: [PATCH 2/8] Add BootstrapResetTimeout parameter --- boyar/config/config_flags.go | 7 +++--- boyar/config/get_configuration_test.go | 2 +- boyar/main/main.go | 1 + services/bootstrap.go | 6 +++-- services/execute.go | 12 +++++++++ services/execute_test.go | 34 ++++++++++++++++++++++++++ 6 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 services/execute_test.go diff --git a/boyar/config/config_flags.go b/boyar/config/config_flags.go index 3c62a406..4d19d3ae 100644 --- a/boyar/config/config_flags.go +++ b/boyar/config/config_flags.go @@ -9,9 +9,10 @@ type Flags struct { SSLCertificatePath string SSLPrivateKeyPath string - PollingInterval time.Duration - Timeout time.Duration - MaxReloadTimeDelay time.Duration + PollingInterval time.Duration + Timeout time.Duration + MaxReloadTimeDelay time.Duration + BootstrapResetTimeout time.Duration EthereumEndpoint string diff --git a/boyar/config/get_configuration_test.go b/boyar/config/get_configuration_test.go index 35675493..0cb9ac53 100644 --- a/boyar/config/get_configuration_test.go +++ b/boyar/config/get_configuration_test.go @@ -29,7 +29,7 @@ func Test_VerifyConfigWithCorruptConfig(t *testing.T) { defer server.Shutdown() source, err := GetConfiguration(&Flags{ - ConfigUrl: server.Url(), // FIXME + ConfigUrl: server.Url(), KeyPairConfigPath: fakeKeyPair, }) diff --git a/boyar/main/main.go b/boyar/main/main.go index 378fbaaa..1430f2a8 100644 --- a/boyar/main/main.go +++ b/boyar/main/main.go @@ -81,6 +81,7 @@ func main() { AutoUpdate: *autoUpdate, ShutdownAfterUpdate: *shutdownAfterUpdate, BoyarBinaryPath: executableWithoutSymlink, + // FIXME set BootstrapResetTimeout parameter } if *showStatus { diff --git a/services/bootstrap.go b/services/bootstrap.go index e11d54aa..95675471 100644 --- a/services/bootstrap.go +++ b/services/bootstrap.go @@ -27,8 +27,10 @@ func Bootstrap(ctx context.Context, flags *config.Flags, logger log.Logger) (*co KeyPairConfigPath: flags.KeyPairConfigPath, - Timeout: flags.Timeout, - PollingInterval: flags.PollingInterval, + Timeout: flags.Timeout, + PollingInterval: flags.PollingInterval, + MaxReloadTimeDelay: flags.MaxReloadTimeDelay, + BootstrapResetTimeout: flags.BootstrapResetTimeout, SSLPrivateKeyPath: flags.SSLPrivateKeyPath, SSLCertificatePath: flags.SSLCertificatePath, diff --git a/services/execute.go b/services/execute.go index 95e620f8..9677b2a7 100644 --- a/services/execute.go +++ b/services/execute.go @@ -8,6 +8,7 @@ import ( "github.com/orbs-network/govnr" "github.com/orbs-network/scribe/log" "os" + "time" ) func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr.ShutdownWaiter, error) { @@ -37,15 +38,26 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr cfgFetcher := NewConfigurationPollService(flags, logger) coreBoyar := NewCoreBoyarService(logger) + configUpdateTimestamp := time.Now() + // wire cfg and boyar supervisor.Supervise(govnr.Forever(ctxWithCancel, "apply config changes", utils.NewLogErrors("apply config changes", logger), func() { var cfg config.NodeConfiguration = nil select { case <-ctx.Done(): return + case <-time.After(flags.BootstrapResetTimeout): case cfg = <-cfgFetcher.Output: } + if cfg == nil { + logger.Error("unexpected empty configuration received and ignored") + + if time.Since(configUpdateTimestamp).Nanoseconds() >= flags.BootstrapResetTimeout.Nanoseconds() { + logger.Error(fmt.Sprintf("did not receive new valid configuratin for %s, shutting down", flags.BootstrapResetTimeout)) + cancelAndExit() + } + return } // random delay when provisioning change (that is, not bootstrap flow or repairing broken system) diff --git a/services/execute_test.go b/services/execute_test.go new file mode 100644 index 00000000..c027024a --- /dev/null +++ b/services/execute_test.go @@ -0,0 +1,34 @@ +package services + +import ( + "context" + "github.com/orbs-network/boyarin/boyar/config" + "github.com/orbs-network/boyarin/test/helpers" + "github.com/orbs-network/scribe/log" + "github.com/stretchr/testify/require" + "testing" + "time" +) + +func TestExecuteWithInvalidConfig(t *testing.T) { + helpers.WithContext(func(ctx context.Context) { + logger := log.GetLogger() + executionCtx := context.Background() + + startTime := time.Now() + + resetTimeout := 1 * time.Second + pollingInterval := 100 * time.Millisecond + waiter, err := Execute(ctx, &config.Flags{ + ConfigUrl: "http://localhost/fake-url", + KeyPairConfigPath: "../boyar/config/test/fake-key-pair.json", + PollingInterval: pollingInterval, + BootstrapResetTimeout: resetTimeout, + }, logger) + require.NoError(t, err) + + waiter.WaitUntilShutdown(executionCtx) + + require.InDelta(t, resetTimeout, time.Since(startTime).Nanoseconds(), float64(2*pollingInterval.Nanoseconds())) + }) +} From bd2b8784a03407d782fc2a1dcebef57216ea5226 Mon Sep 17 00:00:00 2001 From: Kirill Maksimov Date: Thu, 5 Nov 2020 13:20:27 +0200 Subject: [PATCH 3/8] Add --bootstrap-reset-timeout option --- README.md | 2 ++ boyar/main/main.go | 38 ++++++++++++++++++++------------------ services/execute.go | 2 +- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 57a13af6..a3c5a9f2 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,8 @@ In case you ever need to regenerate the SSL certificate: `--shutdown-after-update` the process shuts down after automatic update is performed and **DOES NOT** restart; recommended to be used with an external process manager (default false) +`--bootstrap-reset-timeout` if the process is unable to receive valid configuration within a limited timeframe (duration: 1s, 1m, 1h, etc), it will exit with an error; recommended to be used with an external process manager (default: 30m) + `--version` show version, git commit and Docker API version ### SSL options diff --git a/boyar/main/main.go b/boyar/main/main.go index 1430f2a8..f313c94b 100644 --- a/boyar/main/main.go +++ b/boyar/main/main.go @@ -50,6 +50,8 @@ func main() { autoUpdate := flag.Bool("auto-update", false, "enables boyar binary auto update") shutdownAfterUpdate := flag.Bool("shutdown-after-update", false, "the process shuts down after automatic update is performed and **DOES NOT** restart; recommended to be used with an external process manager") + bootstrapResetTimeout := flag.Duration("bootstrap-reset-timeout", 30*time.Minute, "if the process is unable to receive valid configuration within a limited timeframe (duration: 1s, 1m, 1h, etc), it will exit with an error; recommended to be used with an external process manager") + flag.Parse() if *showVersion { @@ -64,24 +66,24 @@ func main() { executableWithoutSymlink, _ := filepath.EvalSymlinks(executable) flags := &config.Flags{ - ConfigUrl: *configUrlPtr, - KeyPairConfigPath: *keyPairConfigPathPtr, - LogFilePath: *logFilePath, - StatusFilePath: *statusFilePath, - MetricsFilePath: *metricsFilePath, - PollingInterval: *pollingIntervalPtr, - Timeout: *timeoutPtr, - MaxReloadTimeDelay: *maxReloadTimePtr, - EthereumEndpoint: *ethereumEndpointPtr, - LoggerHttpEndpoint: *loggerHttpEndpointPtr, - OrchestratorOptions: *orchestratorOptionsPtr, - SSLCertificatePath: *sslCertificatePathPtr, - SSLPrivateKeyPath: *sslPrivateKeyPtr, - ManagementConfig: *managementConfig, - AutoUpdate: *autoUpdate, - ShutdownAfterUpdate: *shutdownAfterUpdate, - BoyarBinaryPath: executableWithoutSymlink, - // FIXME set BootstrapResetTimeout parameter + ConfigUrl: *configUrlPtr, + KeyPairConfigPath: *keyPairConfigPathPtr, + LogFilePath: *logFilePath, + StatusFilePath: *statusFilePath, + MetricsFilePath: *metricsFilePath, + PollingInterval: *pollingIntervalPtr, + Timeout: *timeoutPtr, + MaxReloadTimeDelay: *maxReloadTimePtr, + EthereumEndpoint: *ethereumEndpointPtr, + LoggerHttpEndpoint: *loggerHttpEndpointPtr, + OrchestratorOptions: *orchestratorOptionsPtr, + SSLCertificatePath: *sslCertificatePathPtr, + SSLPrivateKeyPath: *sslPrivateKeyPtr, + ManagementConfig: *managementConfig, + AutoUpdate: *autoUpdate, + ShutdownAfterUpdate: *shutdownAfterUpdate, + BoyarBinaryPath: executableWithoutSymlink, + BootstrapResetTimeout: *bootstrapResetTimeout, } if *showStatus { diff --git a/services/execute.go b/services/execute.go index 9677b2a7..f008cc9c 100644 --- a/services/execute.go +++ b/services/execute.go @@ -53,7 +53,7 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr if cfg == nil { logger.Error("unexpected empty configuration received and ignored") - if time.Since(configUpdateTimestamp).Nanoseconds() >= flags.BootstrapResetTimeout.Nanoseconds() { + if resetInNanos := flags.BootstrapResetTimeout.Nanoseconds(); resetInNanos > 0 && time.Since(configUpdateTimestamp).Nanoseconds() >= resetInNanos { logger.Error(fmt.Sprintf("did not receive new valid configuratin for %s, shutting down", flags.BootstrapResetTimeout)) cancelAndExit() } From bcab6240b42a09ea6e696189326fd9ee3fec0e98 Mon Sep 17 00:00:00 2001 From: Kirill Maksimov Date: Thu, 5 Nov 2020 13:41:17 +0200 Subject: [PATCH 4/8] Fix main.go --- boyar/config/get_logger.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boyar/config/get_logger.go b/boyar/config/get_logger.go index 002cf96e..d0f1d1f2 100644 --- a/boyar/config/get_logger.go +++ b/boyar/config/get_logger.go @@ -40,7 +40,7 @@ func GetLogger(flags *Flags) (log.Logger, error) { WithTags(tags...). WithOutput(outputs...) - cfg, _ := NewStringConfigurationSource("{}", "", flags.KeyPairConfigPath, false) + cfg, _ := NewStringConfigurationSource(`{"orchestrator":{}}`, "", flags.KeyPairConfigPath, false) if err := cfg.VerifyConfig(); err != nil { logger.Error("Invalid configuration", log.Error(err)) return nil, err From 224c4a42e2984b13c13117c523adc8b2f4d32e47 Mon Sep 17 00:00:00 2001 From: Kirill Maksimov Date: Thu, 5 Nov 2020 15:33:22 +0200 Subject: [PATCH 5/8] Bootstrap mode bugfixes --- services/execute.go | 7 +++++++ services/execute_test.go | 16 ++++++++++++++++ strelets/adapter/swarm_get_status.go | 4 ++-- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/services/execute.go b/services/execute.go index f008cc9c..f476f184 100644 --- a/services/execute.go +++ b/services/execute.go @@ -29,6 +29,10 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr os.Remove(flags.MetricsFilePath) os.Remove(flags.StatusFilePath) + if flags.BootstrapResetTimeout.Nanoseconds() <= flags.PollingInterval.Nanoseconds() { + return nil, fmt.Errorf("invalid configuration: bootstrap reset timeout is less or equal to config polling interval") + } + if flags.StatusFilePath == "" && flags.MetricsFilePath == "" { logger.Info("status file path and metrics file path are empty, periodical report disabled") } else { @@ -60,6 +64,9 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr return } + + configUpdateTimestamp = time.Now() + // random delay when provisioning change (that is, not bootstrap flow or repairing broken system) if coreBoyar.healthy { maybeDelayConfigUpdate(ctxWithCancel, cfg, flags.MaxReloadTimeDelay, coreBoyar.logger) diff --git a/services/execute_test.go b/services/execute_test.go index c027024a..a19bdd0f 100644 --- a/services/execute_test.go +++ b/services/execute_test.go @@ -10,6 +10,22 @@ import ( "time" ) +func TestExecuteWithInvalidFlags(t *testing.T) { + helpers.WithContext(func(ctx context.Context) { + logger := log.GetLogger() + + resetTimeout := 1 * time.Second + pollingInterval := 2 * time.Second + _, err := Execute(ctx, &config.Flags{ + ConfigUrl: "http://localhost/fake-url", + KeyPairConfigPath: "../boyar/config/test/fake-key-pair.json", + PollingInterval: pollingInterval, + BootstrapResetTimeout: resetTimeout, + }, logger) + require.EqualError(t, err, "invalid configuration: bootstrap reset timeout is less or equal to config polling interval") + }) +} + func TestExecuteWithInvalidConfig(t *testing.T) { helpers.WithContext(func(ctx context.Context) { logger := log.GetLogger() diff --git a/strelets/adapter/swarm_get_status.go b/strelets/adapter/swarm_get_status.go index 0c215f89..95241deb 100644 --- a/strelets/adapter/swarm_get_status.go +++ b/strelets/adapter/swarm_get_status.go @@ -14,8 +14,8 @@ func (d *dockerSwarmOrchestrator) GetStatus(ctx context.Context, since time.Dura return nil, fmt.Errorf("failed to retrieve task list: %s", err) } else { for _, task := range tasks { - name, _ := d.getServiceName(ctx, task.ServiceID) - logs, _ := d.getLogs(ctx, task.ServiceID, since) + name, _ := d.getServiceName(ctx, task.ServiceID) // FIXME handle error for non existing service + logs, _ := d.getLogs(ctx, task.ServiceID, since) // FIXME handle more errors status := &ContainerStatus{ Name: name, From e65cf3ca9f9b1f2761c3592fc4ce4f49811bb076 Mon Sep 17 00:00:00 2001 From: Kirill Maksimov Date: Thu, 5 Nov 2020 16:20:40 +0200 Subject: [PATCH 6/8] Fix configuration polling service (with intention to remove in the future) --- services/config_poll.go | 13 ++----------- services/execute.go | 12 +++++++++--- services/execute_test.go | 19 +++++++++++++++++-- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/services/config_poll.go b/services/config_poll.go index ed0663ac..b4885c76 100644 --- a/services/config_poll.go +++ b/services/config_poll.go @@ -14,10 +14,10 @@ type ConfigurationPollService struct { logger log.Logger output chan config.NodeConfiguration Output <-chan config.NodeConfiguration - configCache *utils.CacheFilter errorHandler govnr.Errorer } +// FIXME completely remove in the future func NewConfigurationPollService(flags *config.Flags, logger log.Logger) *ConfigurationPollService { output := make(chan config.NodeConfiguration) return &ConfigurationPollService{ @@ -25,7 +25,6 @@ func NewConfigurationPollService(flags *config.Flags, logger log.Logger) *Config logger: logger, output: output, Output: output, - configCache: utils.NewCacheFilter(), errorHandler: utils.NewLogErrors("configuration polling", logger), } } @@ -44,11 +43,7 @@ func (service *ConfigurationPollService) Start(ctx context.Context) govnr.Shutdo return } - if service.configCache.CheckNewValue(cfg) { - service.output <- cfg - } else { - service.logger.Info("configuration has not changed") - } + service.output <- cfg }) go func() { <-handle.Done() @@ -56,7 +51,3 @@ func (service *ConfigurationPollService) Start(ctx context.Context) govnr.Shutdo }() return handle } - -func (service *ConfigurationPollService) Resend() { - service.configCache.Clear() -} diff --git a/services/execute.go b/services/execute.go index f476f184..d582b206 100644 --- a/services/execute.go +++ b/services/execute.go @@ -41,6 +41,7 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr cfgFetcher := NewConfigurationPollService(flags, logger) coreBoyar := NewCoreBoyarService(logger) + configCache := utils.NewCacheFilter() configUpdateTimestamp := time.Now() @@ -51,12 +52,11 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr case <-ctx.Done(): return case <-time.After(flags.BootstrapResetTimeout): + logger.Error("bootstrap reset timeout reached", log.String("configUpdateTimestamp", configUpdateTimestamp.Format(time.RFC3339))) case cfg = <-cfgFetcher.Output: } if cfg == nil { - logger.Error("unexpected empty configuration received and ignored") - if resetInNanos := flags.BootstrapResetTimeout.Nanoseconds(); resetInNanos > 0 && time.Since(configUpdateTimestamp).Nanoseconds() >= resetInNanos { logger.Error(fmt.Sprintf("did not receive new valid configuratin for %s, shutting down", flags.BootstrapResetTimeout)) cancelAndExit() @@ -66,6 +66,12 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr } configUpdateTimestamp = time.Now() + logger.Info("last valid configuration timestamp updated", log.String("configUpdateTimestamp", configUpdateTimestamp.Format(time.RFC3339))) + + if !configCache.CheckNewValue(cfg) { + logger.Info("configuration has not changed") + return + } // random delay when provisioning change (that is, not bootstrap flow or repairing broken system) if coreBoyar.healthy { @@ -86,7 +92,7 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr err := coreBoyar.OnConfigChange(ctxWithTimeout, cfg) if err != nil { logger.Error("error executing configuration", log.Error(err)) - cfgFetcher.Resend() + configCache.Clear() } if ctxWithTimeout.Err() != nil { diff --git a/services/execute_test.go b/services/execute_test.go index a19bdd0f..56ce7889 100644 --- a/services/execute_test.go +++ b/services/execute_test.go @@ -6,6 +6,7 @@ import ( "github.com/orbs-network/boyarin/test/helpers" "github.com/orbs-network/scribe/log" "github.com/stretchr/testify/require" + "net/http" "testing" "time" ) @@ -26,8 +27,22 @@ func TestExecuteWithInvalidFlags(t *testing.T) { }) } +// FIXME get to the bottom of docker socket issues func TestExecuteWithInvalidConfig(t *testing.T) { helpers.WithContext(func(ctx context.Context) { + successfulAttempts := 0 + maxSuccessfulAttempts := 3 + server := helpers.CreateHttpServer("/", 0, func(writer http.ResponseWriter, request *http.Request) { + if successfulAttempts < maxSuccessfulAttempts { + successfulAttempts++ + writer.Write([]byte(`{"orchestrator":{}}`)) + } else { + writer.Write([]byte("{}")) + } + }) + server.Start() + defer server.Shutdown() + logger := log.GetLogger() executionCtx := context.Background() @@ -36,7 +51,7 @@ func TestExecuteWithInvalidConfig(t *testing.T) { resetTimeout := 1 * time.Second pollingInterval := 100 * time.Millisecond waiter, err := Execute(ctx, &config.Flags{ - ConfigUrl: "http://localhost/fake-url", + ConfigUrl: server.Url(), KeyPairConfigPath: "../boyar/config/test/fake-key-pair.json", PollingInterval: pollingInterval, BootstrapResetTimeout: resetTimeout, @@ -45,6 +60,6 @@ func TestExecuteWithInvalidConfig(t *testing.T) { waiter.WaitUntilShutdown(executionCtx) - require.InDelta(t, resetTimeout, time.Since(startTime).Nanoseconds(), float64(2*pollingInterval.Nanoseconds())) + require.InDelta(t, resetTimeout, time.Since(startTime).Nanoseconds(), float64(4*pollingInterval.Nanoseconds())) }) } From cec6156eaa9b872b97cec62b7561f920192f1e10 Mon Sep 17 00:00:00 2001 From: Kirill Maksimov Date: Thu, 5 Nov 2020 16:48:19 +0200 Subject: [PATCH 7/8] Remove config poll service --- services/config_poll.go | 53 ----------------------------------------- services/execute.go | 31 ++++++++++++++++-------- 2 files changed, 21 insertions(+), 63 deletions(-) delete mode 100644 services/config_poll.go diff --git a/services/config_poll.go b/services/config_poll.go deleted file mode 100644 index b4885c76..00000000 --- a/services/config_poll.go +++ /dev/null @@ -1,53 +0,0 @@ -package services - -import ( - "context" - "github.com/orbs-network/boyarin/boyar/config" - "github.com/orbs-network/boyarin/utils" - "github.com/orbs-network/govnr" - "github.com/orbs-network/scribe/log" - "time" -) - -type ConfigurationPollService struct { - flags *config.Flags - logger log.Logger - output chan config.NodeConfiguration - Output <-chan config.NodeConfiguration - errorHandler govnr.Errorer -} - -// FIXME completely remove in the future -func NewConfigurationPollService(flags *config.Flags, logger log.Logger) *ConfigurationPollService { - output := make(chan config.NodeConfiguration) - return &ConfigurationPollService{ - flags: flags, - logger: logger, - output: output, - Output: output, - errorHandler: utils.NewLogErrors("configuration polling", logger), - } -} - -func (service *ConfigurationPollService) Start(ctx context.Context) govnr.ShutdownWaiter { - handle := govnr.Forever(ctx, "configuration polling service", service.errorHandler, func() { - defer func() { - select { - case <-ctx.Done(): - case <-time.After(service.flags.PollingInterval): - } - }() - cfg, err := config.GetConfiguration(service.flags) - if err != nil { - service.logger.Error("invalid configuration", log.Error(err)) - return - } - - service.output <- cfg - }) - go func() { - <-handle.Done() - close(service.output) - }() - return handle -} diff --git a/services/execute.go b/services/execute.go index d582b206..d84efab7 100644 --- a/services/execute.go +++ b/services/execute.go @@ -29,7 +29,7 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr os.Remove(flags.MetricsFilePath) os.Remove(flags.StatusFilePath) - if flags.BootstrapResetTimeout.Nanoseconds() <= flags.PollingInterval.Nanoseconds() { + if flags.BootstrapResetTimeout > 0 && flags.BootstrapResetTimeout.Nanoseconds() <= flags.PollingInterval.Nanoseconds() { return nil, fmt.Errorf("invalid configuration: bootstrap reset timeout is less or equal to config polling interval") } @@ -39,7 +39,6 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr supervisor.Supervise(WatchAndReportStatusAndMetrics(ctxWithCancel, logger, flags)) } - cfgFetcher := NewConfigurationPollService(flags, logger) coreBoyar := NewCoreBoyarService(logger) configCache := utils.NewCacheFilter() @@ -48,12 +47,21 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr // wire cfg and boyar supervisor.Supervise(govnr.Forever(ctxWithCancel, "apply config changes", utils.NewLogErrors("apply config changes", logger), func() { var cfg config.NodeConfiguration = nil + var err error + select { case <-ctx.Done(): return - case <-time.After(flags.BootstrapResetTimeout): + case <-timeout(flags.BootstrapResetTimeout): logger.Error("bootstrap reset timeout reached", log.String("configUpdateTimestamp", configUpdateTimestamp.Format(time.RFC3339))) - case cfg = <-cfgFetcher.Output: + case <-time.After(flags.PollingInterval): + cfg, err = config.GetConfiguration(flags) + if err != nil { + logger.Error("invalid configuration", log.Error(err)) + } else { + configUpdateTimestamp = time.Now() + logger.Info("last valid configuration timestamp updated", log.String("configUpdateTimestamp", configUpdateTimestamp.Format(time.RFC3339))) + } } if cfg == nil { @@ -65,9 +73,6 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr return } - configUpdateTimestamp = time.Now() - logger.Info("last valid configuration timestamp updated", log.String("configUpdateTimestamp", configUpdateTimestamp.Format(time.RFC3339))) - if !configCache.CheckNewValue(cfg) { logger.Info("configuration has not changed") return @@ -89,7 +94,7 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr ctxWithTimeout, cancel := context.WithTimeout(ctxWithCancel, flags.Timeout) defer cancel() - err := coreBoyar.OnConfigChange(ctxWithTimeout, cfg) + err = coreBoyar.OnConfigChange(ctxWithTimeout, cfg) if err != nil { logger.Error("error executing configuration", log.Error(err)) configCache.Clear() @@ -101,7 +106,13 @@ func Execute(ctx context.Context, flags *config.Flags, logger log.Logger) (govnr } })) - supervisor.Supervise(cfgFetcher.Start(ctxWithCancel)) - return supervisor, nil } + +func timeout(duration time.Duration) <-chan time.Time { + if duration == 0 { + return make(chan time.Time) // empty channel that nobody for waiting forever + } + + return time.After(duration) +} From d4fccf38970a1becc88849ec4bdd801d48511b7d Mon Sep 17 00:00:00 2001 From: Kirill Maksimov Date: Thu, 5 Nov 2020 16:59:33 +0200 Subject: [PATCH 8/8] Change boostrap reset timeout to 0 for standalone installation compatibility --- boyar/main/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boyar/main/main.go b/boyar/main/main.go index f313c94b..4046aa56 100644 --- a/boyar/main/main.go +++ b/boyar/main/main.go @@ -50,7 +50,7 @@ func main() { autoUpdate := flag.Bool("auto-update", false, "enables boyar binary auto update") shutdownAfterUpdate := flag.Bool("shutdown-after-update", false, "the process shuts down after automatic update is performed and **DOES NOT** restart; recommended to be used with an external process manager") - bootstrapResetTimeout := flag.Duration("bootstrap-reset-timeout", 30*time.Minute, "if the process is unable to receive valid configuration within a limited timeframe (duration: 1s, 1m, 1h, etc), it will exit with an error; recommended to be used with an external process manager") + bootstrapResetTimeout := flag.Duration("bootstrap-reset-timeout", 0, "if the process is unable to receive valid configuration within a limited timeframe (duration: 1s, 1m, 1h, etc), it will exit with an error; recommended to be used with an external process manager, (default 0s, off)") flag.Parse()